Gitweb: http://git.fedorahosted.org/git/fence.git?p=fence.git;a=commitdiff;h=216875b... Commit: 216875bdc79edd22def7feb6d5a8c73a216191b9 Parent: f8c79f99a281f93807a9cb3289a8da6897b17253 Author: David Teigland teigland@redhat.com AuthorDate: Tue Nov 16 10:29:40 2010 -0600 Committer: David Teigland teigland@redhat.com CommitterDate: Tue Nov 16 10:29:40 2010 -0600
fenced: use post_join_delay after cluster join
When the cluster has lost quorum due to a node failure, the next event is generally a cluster node join which gives the cluster quorum again. With quorum, fenced begins fencing any failed nodes, applying post_fail_delay since the last cpg event was a node failure. In this case, however, post_join_delay is more appropriate since the chances are good that nodes being fenced will be joining. Detect this case where a node joins the cluster giving it quorum, and use post_join_delay.
bz 624844
Signed-off-by: David Teigland teigland@redhat.com --- fence/fenced/fd.h | 1 + fence/fenced/main.c | 1 + fence/fenced/member_cman.c | 12 ++++++++++++ fence/fenced/recover.c | 5 ++++- 4 files changed, 18 insertions(+), 1 deletions(-)
diff --git a/fence/fenced/fd.h b/fence/fenced/fd.h index f3c3696..1182ef5 100644 --- a/fence/fenced/fd.h +++ b/fence/fenced/fd.h @@ -59,6 +59,7 @@ extern int daemon_quit; extern int cluster_down; extern struct list_head domains; extern int cluster_quorate; +extern int cluster_quorate_from_last_update; extern uint32_t cluster_ringid_seq; extern uint64_t quorate_time; extern int our_nodeid; diff --git a/fence/fenced/main.c b/fence/fenced/main.c index adbac6b..580afce 100644 --- a/fence/fenced/main.c +++ b/fence/fenced/main.c @@ -1016,6 +1016,7 @@ int daemon_quit; int cluster_down; struct list_head domains; int cluster_quorate; +int cluster_quorate_from_last_update; uint32_t cluster_ringid_seq; uint64_t quorate_time; int our_nodeid; diff --git a/fence/fenced/member_cman.c b/fence/fenced/member_cman.c index e97794d..cf32818 100644 --- a/fence/fenced/member_cman.c +++ b/fence/fenced/member_cman.c @@ -53,6 +53,7 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate, uint32_t *node_list) { int prev_quorate = cluster_quorate; + int removed = 0, added = 0; int i;
cluster_quorate = quorate; @@ -75,6 +76,7 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate, log_debug("cluster node %d removed seq %u", old_nodes[i], cluster_ringid_seq); node_history_cluster_remove(old_nodes[i]); + removed++; } }
@@ -83,8 +85,18 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate, log_debug("cluster node %d added seq %u", quorum_nodes[i], cluster_ringid_seq); node_history_cluster_add(quorum_nodes[i]); + added++; } } + + if (removed) { + cluster_quorate_from_last_update = 0; + } else if (added) { + if (!quorate && cluster_quorate) + cluster_quorate_from_last_update = 1; + else + cluster_quorate_from_last_update = 0; + } }
static quorum_callbacks_t quorum_callbacks = diff --git a/fence/fenced/recover.c b/fence/fenced/recover.c index 25e719c..1847246 100644 --- a/fence/fenced/recover.c +++ b/fence/fenced/recover.c @@ -196,7 +196,7 @@ void delay_fencing(struct fd *fd, int node_join) if (list_empty(&fd->victims)) return;
- if (node_join) { + if (node_join || cluster_quorate_from_last_update) { delay = cfgd_post_join_delay; delay_type = "post_join_delay"; } else { @@ -204,6 +204,9 @@ void delay_fencing(struct fd *fd, int node_join) delay_type = "post_fail_delay"; }
+ log_debug("delay %s %d quorate_from_last_update %d", + delay_type, delay, cluster_quorate_from_last_update); + if (delay == 0) goto out;
cluster-commits@lists.fedorahosted.org