Gitweb:
http://git.fedorahosted.org/git/fence.git?p=fence.git;a=commitdiff;h=2168...
Commit: 216875bdc79edd22def7feb6d5a8c73a216191b9
Parent: f8c79f99a281f93807a9cb3289a8da6897b17253
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Tue Nov 16 10:29:40 2010 -0600
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Tue Nov 16 10:29:40 2010 -0600
fenced: use post_join_delay after cluster join
When the cluster has lost quorum due to a node failure,
the next event is generally a cluster node join which
gives the cluster quorum again. With quorum, fenced
begins fencing any failed nodes, applying post_fail_delay
since the last cpg event was a node failure. In this
case, however, post_join_delay is more appropriate since
the chances are good that nodes being fenced will be joining.
Detect this case where a node joins the cluster giving it
quorum, and use post_join_delay.
bz 624844
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
fence/fenced/fd.h | 1 +
fence/fenced/main.c | 1 +
fence/fenced/member_cman.c | 12 ++++++++++++
fence/fenced/recover.c | 5 ++++-
4 files changed, 18 insertions(+), 1 deletions(-)
diff --git a/fence/fenced/fd.h b/fence/fenced/fd.h
index f3c3696..1182ef5 100644
--- a/fence/fenced/fd.h
+++ b/fence/fenced/fd.h
@@ -59,6 +59,7 @@ extern int daemon_quit;
extern int cluster_down;
extern struct list_head domains;
extern int cluster_quorate;
+extern int cluster_quorate_from_last_update;
extern uint32_t cluster_ringid_seq;
extern uint64_t quorate_time;
extern int our_nodeid;
diff --git a/fence/fenced/main.c b/fence/fenced/main.c
index adbac6b..580afce 100644
--- a/fence/fenced/main.c
+++ b/fence/fenced/main.c
@@ -1016,6 +1016,7 @@ int daemon_quit;
int cluster_down;
struct list_head domains;
int cluster_quorate;
+int cluster_quorate_from_last_update;
uint32_t cluster_ringid_seq;
uint64_t quorate_time;
int our_nodeid;
diff --git a/fence/fenced/member_cman.c b/fence/fenced/member_cman.c
index e97794d..cf32818 100644
--- a/fence/fenced/member_cman.c
+++ b/fence/fenced/member_cman.c
@@ -53,6 +53,7 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate,
uint32_t *node_list)
{
int prev_quorate = cluster_quorate;
+ int removed = 0, added = 0;
int i;
cluster_quorate = quorate;
@@ -75,6 +76,7 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate,
log_debug("cluster node %d removed seq %u",
old_nodes[i], cluster_ringid_seq);
node_history_cluster_remove(old_nodes[i]);
+ removed++;
}
}
@@ -83,8 +85,18 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate,
log_debug("cluster node %d added seq %u",
quorum_nodes[i], cluster_ringid_seq);
node_history_cluster_add(quorum_nodes[i]);
+ added++;
}
}
+
+ if (removed) {
+ cluster_quorate_from_last_update = 0;
+ } else if (added) {
+ if (!quorate && cluster_quorate)
+ cluster_quorate_from_last_update = 1;
+ else
+ cluster_quorate_from_last_update = 0;
+ }
}
static quorum_callbacks_t quorum_callbacks =
diff --git a/fence/fenced/recover.c b/fence/fenced/recover.c
index 25e719c..1847246 100644
--- a/fence/fenced/recover.c
+++ b/fence/fenced/recover.c
@@ -196,7 +196,7 @@ void delay_fencing(struct fd *fd, int node_join)
if (list_empty(&fd->victims))
return;
- if (node_join) {
+ if (node_join || cluster_quorate_from_last_update) {
delay = cfgd_post_join_delay;
delay_type = "post_join_delay";
} else {
@@ -204,6 +204,9 @@ void delay_fencing(struct fd *fd, int node_join)
delay_type = "post_fail_delay";
}
+ log_debug("delay %s %d quorate_from_last_update %d",
+ delay_type, delay, cluster_quorate_from_last_update);
+
if (delay == 0)
goto out;