cluster: RHEL6 - cman|fenced: Fix node killing in case of a 2node cluster that suffers brief network out

11 Jan 2013

Gitweb:        http://git.fedorahosted.org/git/?p=cluster.git;a=commitdiff;h=05bc9dc0345966...
Commit:        05bc9dc0345966bfbc6ade0ea9520206379d0068
Parent:        f5f29115b364e590a6f6fb1f560a4a268ab23fff
Author:        Christine Caulfield ccaulfie@redhat.com
AuthorDate:    Fri Jan 11 13:41:15 2013 +0000
Committer:     Christine Caulfield ccaulfie@redhat.com
CommitterDate: Fri Jan 11 13:41:15 2013 +0000
cman|fenced: Fix node killing in case of a 2node cluster that suffers brief network out
This patch fixes a rare but nasty condition in cman and fenced. In a 2node cluster
if the network splits for a period of time longer than the token timeout but
shorter than the time needed to fence a node then both nodes can send 'kill'
packets to the other with the effect that both nodes' cmans will quit
leaving no operational cluster.
This patch adds a check for a 2node cluster and only sends a 'kill' packet
to the node with the higher nodeid thus ensuring a predictable response
to such events and ensuring that services can continue to run.
The cman part of this patch is only active if Disallowed is enabled,
which is rare; the fenced part is more likely to be invoked
rhbz#893925
Signed-off-by: David Teigland teigland@redhat.com
Signed-off-by: Christine Caulfield ccaulfie@redhat.com
Acked-By: Fabio M. Di Nitto fdinitto@redhat.com
---
 cman/daemon/commands.c     |   23 +++++++++++++++++++++--
 fence/fenced/config.c      |    3 +++
 fence/fenced/fd.h          |    1 +
 fence/fenced/main.c        |    1 +
 fence/fenced/member_cman.c |   11 +++++++++++
 5 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/cman/daemon/commands.c b/cman/daemon/commands.c
index 83a2f4a..f8f8114 100644
--- a/cman/daemon/commands.c
+++ b/cman/daemon/commands.c
@@ -2059,9 +2059,28 @@ static void do_process_transition(int nodeid, char *data)
    		/* Don't duplicate messages */
    		if (node->state != NODESTATE_AISONLY) {
    			if (cluster_is_quorate) {
-					log_printf(LOG_CRIT, "Killing node %s because it has rejoined the cluster with existing state", node->name);
    				node->state = NODESTATE_AISONLY;
-					send_kill(nodeid, CLUSTER_KILL_REJOIN);
+
+					/* Oh, this gets even more complicated. Don't send a KILL message if we are in a two_node
+					 * cluster and that node has a lower node ID than us.
+					 * This allows fencing time to startup and caters for the situation where
+					 * a node rejoins REALLY quickly, before fencing has had time to work.
+					 * I've split this up a bit partly for clarity, but mainly so allow us to
+					 * print out helpful messages as to what we are up to here.
+					 */
+					if (two_node) {
+						if (node->node_id > us->node_id) {
+							log_printf(LOG_CRIT, "Killing node %s because it has rejoined the cluster with existing state and has higher node ID", node->name);
+							send_kill(nodeid, CLUSTER_KILL_REJOIN);
+						}
+						else {
+							log_printf(LOG_CRIT, "Not killing node %s despite it rejoining the cluster with existing state, it has a lower node ID", node->name);
+						}
+					}
+					else {
+						log_printf(LOG_CRIT, "Killing node %s because it has rejoined the cluster with existing state", node->name);
+						send_kill(nodeid, CLUSTER_KILL_REJOIN);
+					}
    			}
    			else {
    				log_printf(LOG_CRIT, "Node %s not joined to cman because it has existing state", node->name);
diff --git a/fence/fenced/config.c b/fence/fenced/config.c
index 66610ef..651ea8b 100644
--- a/fence/fenced/config.c
+++ b/fence/fenced/config.c
@@ -95,6 +95,7 @@ void read_ccs_int(const char *path, int *config_val)
 #define OVERRIDE_PATH_PATH "/cluster/fence_daemon/@override_path"
 #define OVERRIDE_TIME_PATH "/cluster/fence_daemon/@override_time"
 #define METHOD_NAME_PATH "/cluster/clusternodes/clusternode[@name="%s"]/fence/method[%d]/@name"
+#define TWO_NODE_PATH "/cluster/cman/two_node"
static int count_methods(char *victim)
 {
@@ -139,6 +140,8 @@ int read_ccs(struct fd *fd)
    if (!optd_clean_start)
    	read_ccs_int(CLEAN_START_PATH, &cfgd_clean_start);
+	read_ccs_int(TWO_NODE_PATH, &two_node_mode);
+
    reread_ccs();
if (!optd_override_path) {
diff --git a/fence/fenced/fd.h b/fence/fenced/fd.h
index 205836c..34a6c7f 100644
--- a/fence/fenced/fd.h
+++ b/fence/fenced/fd.h
@@ -75,6 +75,7 @@ extern char dump_buf[FENCED_DUMP_SIZE];
 extern int dump_point;
 extern int dump_wrap;
 extern int group_mode;
+extern int two_node_mode;
extern void daemon_dump_save(void);
diff --git a/fence/fenced/main.c b/fence/fenced/main.c
index 8e4f10b..924b8c9 100644
--- a/fence/fenced/main.c
+++ b/fence/fenced/main.c
@@ -1101,4 +1101,5 @@ char dump_buf[FENCED_DUMP_SIZE];
 int dump_point;
 int dump_wrap;
 int group_mode;
+int two_node_mode;
diff --git a/fence/fenced/member_cman.c b/fence/fenced/member_cman.c
index a7f4341..ced4272 100644
--- a/fence/fenced/member_cman.c
+++ b/fence/fenced/member_cman.c
@@ -32,6 +32,17 @@ void kick_node_from_cluster(int nodeid)
    	log_error("telling cman to shut down cluster locally");
    	cman_shutdown(ch_admin, CMAN_SHUTDOWN_ANYWAY);
    } else {
+
+		/* in a two_node cluster where both nodes maintain quorum
+		 * by themselves during a partition+merge, both will kick
+		 * the other, which can leave both dead and unfenced.
+		 * this delay should help */
+
+		if (two_node_mode && our_nodeid > nodeid) {
+			log_debug("kick_node_from_cluster %d delay", nodeid);
+			sleep(5);
+		}
+
    	log_error("telling cman to remove nodeid %d from cluster",
    		  nodeid);
    	cman_kill_node(ch_admin, nodeid);

    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

cluster: RHEL6 - cman|fenced: Fix node killing in case of a 2node cluster that suffers brief network out