[dlm/f19] Fixes related to parallel startup

David Teigland teigland at fedoraproject.org
Tue Jun 25 19:39:06 UTC 2013


commit fe7437c45bad647aeb11af497a668aacf108e78f
Author: David Teigland <teigland at redhat.com>
Date:   Tue Jun 25 14:38:44 2013 -0500

    Fixes related to parallel startup

 0001-man-fix-dlm.conf-man-page.patch               |   48 +++++++++
 ...d-daemon_fence_work-should-wait-for-confc.patch |   33 ++++++
 ...d-exclude-fencing-work-during-set_protoco.patch |  104 ++++++++++++++++++++
 ...dlm_controld-unify-fence-delay-variations.patch |   99 +++++++++++++++++++
 dlm.spec                                           |   19 +++-
 5 files changed, 299 insertions(+), 4 deletions(-)
---
diff --git a/0001-man-fix-dlm.conf-man-page.patch b/0001-man-fix-dlm.conf-man-page.patch
new file mode 100644
index 0000000..a859a01
--- /dev/null
+++ b/0001-man-fix-dlm.conf-man-page.patch
@@ -0,0 +1,48 @@
+From 41f4121768ab4948898945007f49168acfac6c9f Mon Sep 17 00:00:00 2001
+From: David Teigland <teigland at redhat.com>
+Date: Fri, 17 May 2013 11:06:12 -0500
+Subject: [PATCH 1/4] man: fix dlm.conf man page
+
+Signed-off-by: David Teigland <teigland at redhat.com>
+---
+ dlm_controld/dlm.conf.5 | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/dlm_controld/dlm.conf.5 b/dlm_controld/dlm.conf.5
+index 95f74b9..793435a 100644
+--- a/dlm_controld/dlm.conf.5
++++ b/dlm_controld/dlm.conf.5
+@@ -349,15 +349,15 @@ Example of nodeid 1 as master of all resources:
+ 
+ lockspace foo nodir=1
+ .br
+-master node=1
++master    foo node=1
+ 
+ Example of nodeid's 1 and 2 as masters of all resources:
+ 
+ lockspace foo nodir=1
+ .br
+-master node=1
++master    foo node=1
+ .br
+-master node=2
++master    foo node=2
+ 
+ Lock management will be partitioned among the available masters.  There
+ can be any number of masters defined.  The designated master nodes will
+@@ -376,9 +376,9 @@ can also be assigned to master nodes, e.g.
+ 
+ lockspace foo nodir=1
+ .br
+-master node=1 weight=2
++master    foo node=1 weight=2
+ .br
+-master node=2 weight=1
++master    foo node=2 weight=1
+ 
+ In which case node 1 will master 2/3 of the total resources and node 2
+ will master the other 1/3.
+-- 
+1.8.1.rc1.5.g7e0651a
+
diff --git a/0002-dlm_controld-daemon_fence_work-should-wait-for-confc.patch b/0002-dlm_controld-daemon_fence_work-should-wait-for-confc.patch
new file mode 100644
index 0000000..98d9500
--- /dev/null
+++ b/0002-dlm_controld-daemon_fence_work-should-wait-for-confc.patch
@@ -0,0 +1,33 @@
+From 5bdbe083ed23abc955309ea23fd6f008852b05b8 Mon Sep 17 00:00:00 2001
+From: David Teigland <teigland at redhat.com>
+Date: Tue, 25 Jun 2013 11:04:49 -0500
+Subject: [PATCH 2/4] dlm_controld: daemon_fence_work should wait for confchg
+
+If daemon_last_join_monotime has not yet been initialized
+by the first confchg, then daemon_fence_work() should
+wait for that to happen.
+
+Signed-off-by: David Teigland <teigland at redhat.com>
+---
+ dlm_controld/daemon_cpg.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/dlm_controld/daemon_cpg.c b/dlm_controld/daemon_cpg.c
+index 8c4cff2..0758560 100644
+--- a/dlm_controld/daemon_cpg.c
++++ b/dlm_controld/daemon_cpg.c
+@@ -865,6 +865,11 @@ static void daemon_fence_work(void)
+ 		if (!opt(enable_startup_fencing_ind))
+ 			continue;
+ 
++		if (!daemon_last_join_monotime) {
++			log_debug("fence startup %d wait for confchg", node->nodeid);
++			continue;
++		}
++
+ 		if (monotime() - daemon_last_join_monotime < opt(post_join_delay_ind)) {
+ 			log_debug("fence startup %d delay %d from %llu",
+ 				  node->nodeid, opt(post_join_delay_ind),
+-- 
+1.8.1.rc1.5.g7e0651a
+
diff --git a/0003-dlm_controld-exclude-fencing-work-during-set_protoco.patch b/0003-dlm_controld-exclude-fencing-work-during-set_protoco.patch
new file mode 100644
index 0000000..9f77ec6
--- /dev/null
+++ b/0003-dlm_controld-exclude-fencing-work-during-set_protoco.patch
@@ -0,0 +1,104 @@
+From f367c91ac4f4f8012456f4916c0c99b00d8569c4 Mon Sep 17 00:00:00 2001
+From: David Teigland <teigland at redhat.com>
+Date: Tue, 25 Jun 2013 11:02:22 -0500
+Subject: [PATCH 3/4] dlm_controld: exclude fencing work during set_protocol
+
+During set_protocol we only want to process protocol
+messages, and only process fencing status later during
+main daemon processing.
+
+Signed-off-by: David Teigland <teigland at redhat.com>
+---
+ dlm_controld/daemon_cpg.c | 26 ++++++++++++++++++++++++--
+ dlm_controld/dlm_daemon.h |  1 +
+ dlm_controld/main.c       |  4 ++++
+ 3 files changed, 29 insertions(+), 2 deletions(-)
+
+diff --git a/dlm_controld/daemon_cpg.c b/dlm_controld/daemon_cpg.c
+index 0758560..0db48f5 100644
+--- a/dlm_controld/daemon_cpg.c
++++ b/dlm_controld/daemon_cpg.c
+@@ -766,6 +766,9 @@ static void daemon_fence_work(void)
+ 	int retry = 0;
+ 	uint32_t flags;
+ 
++	if (!daemon_fence_allow)
++		return;
++
+ 	if (daemon_ringid_wait) {
+ 		/* We've seen a nodedown confchg callback, but not the
+ 		   corresponding ringid callback. */
+@@ -1811,6 +1814,7 @@ int set_protocol(void)
+ {
+ 	struct protocol proto;
+ 	struct pollfd pollfd;
++	cs_error_t error;
+ 	int sent_proposal = 0;
+ 	int rv;
+ 
+@@ -1860,8 +1864,17 @@ int set_protocol(void)
+ 			return -1;
+ 		}
+ 
+-		if (pollfd.revents & POLLIN)
+-			process_cpg_daemon(0);
++		if (pollfd.revents & POLLIN) {
++			/*
++			 * don't use process_cpg_daemon() because we only want to
++			 * dispatch one thing at a time because we only want to
++			 * handling protocol related things here.
++			 */
++
++			error = cpg_dispatch(cpg_handle_daemon, CS_DISPATCH_ONE);
++			if (error != CS_OK)
++				log_error("daemon cpg_dispatch one error %d", error);
++		}
+ 		if (pollfd.revents & (POLLERR | POLLHUP | POLLNVAL)) {
+ 			log_error("set_protocol poll revents %u",
+ 				  pollfd.revents);
+@@ -1927,6 +1940,15 @@ static void deliver_cb_daemon(cpg_handle_t handle,
+ 	hd = (struct dlm_header *)data;
+ 	dlm_header_in(hd);
+ 
++	if (!daemon_fence_allow && hd->type != DLM_MSG_PROTOCOL) {
++		/* don't think this will happen; if it does we may
++		   need to verify that it's correct to ignore these
++		   messages instead of saving them to process after
++		   allow is set */
++		log_debug("deliver_cb_daemon ignore non proto msg %d", hd->type);
++		return;
++	}
++
+ 	switch (hd->type) {
+ 	case DLM_MSG_PROTOCOL:
+ 		receive_protocol(hd, len);
+diff --git a/dlm_controld/dlm_daemon.h b/dlm_controld/dlm_daemon.h
+index 11a4777..dbe22ba 100644
+--- a/dlm_controld/dlm_daemon.h
++++ b/dlm_controld/dlm_daemon.h
+@@ -162,6 +162,7 @@ EXTERN int daemon_quit;
+ EXTERN int cluster_down;
+ EXTERN int poll_lockspaces;
+ EXTERN unsigned int retry_fencing;
++EXTERN int daemon_fence_allow;
+ EXTERN int poll_fs;
+ EXTERN int poll_ignore_plock;
+ EXTERN int poll_drop_plock;
+diff --git a/dlm_controld/main.c b/dlm_controld/main.c
+index 8fb16ef..287b82d 100644
+--- a/dlm_controld/main.c
++++ b/dlm_controld/main.c
+@@ -1021,6 +1021,10 @@ static void loop(void)
+ 	sd_notify(0, "READY=1");
+ #endif
+ 
++	/* We want to wait for our protocol to be set before
++	   we start to process fencing. */
++	daemon_fence_allow = 1;
++
+ 	for (;;) {
+ 		rv = poll(pollfd, client_maxi + 1, poll_timeout);
+ 		if (rv == -1 && errno == EINTR) {
+-- 
+1.8.1.rc1.5.g7e0651a
+
diff --git a/0004-dlm_controld-unify-fence-delay-variations.patch b/0004-dlm_controld-unify-fence-delay-variations.patch
new file mode 100644
index 0000000..2a99b13
--- /dev/null
+++ b/0004-dlm_controld-unify-fence-delay-variations.patch
@@ -0,0 +1,99 @@
+From 2548250de3991f1f0aca297bbd072b525a132841 Mon Sep 17 00:00:00 2001
+From: David Teigland <teigland at redhat.com>
+Date: Tue, 25 Jun 2013 13:18:47 -0500
+Subject: [PATCH 4/4] dlm_controld: unify fence delay variations
+
+The fence delay period begins after a node joins the
+cluster or joins the daemon cpg.  Apply this delay
+to both startup and normal fencing.
+
+Signed-off-by: David Teigland <teigland at redhat.com>
+---
+ dlm_controld/daemon_cpg.c | 15 +++++++--------
+ dlm_controld/dlm_daemon.h |  2 +-
+ dlm_controld/member.c     |  2 +-
+ 3 files changed, 9 insertions(+), 10 deletions(-)
+
+diff --git a/dlm_controld/daemon_cpg.c b/dlm_controld/daemon_cpg.c
+index 0db48f5..d88cd46 100644
+--- a/dlm_controld/daemon_cpg.c
++++ b/dlm_controld/daemon_cpg.c
+@@ -107,7 +107,6 @@ static int daemon_remove_count;
+ static int daemon_ringid_wait;
+ static struct cpg_ring_id daemon_ringid;
+ static int daemon_fence_pid;
+-static uint64_t daemon_last_join_monotime;
+ static uint32_t last_join_seq;
+ static uint32_t send_fipu_seq;
+ static int wait_clear_fipu;
+@@ -868,15 +867,15 @@ static void daemon_fence_work(void)
+ 		if (!opt(enable_startup_fencing_ind))
+ 			continue;
+ 
+-		if (!daemon_last_join_monotime) {
+-			log_debug("fence startup %d wait for confchg", node->nodeid);
++		if (!fence_delay_begin) {
++			log_debug("fence startup %d wait for initial delay", node->nodeid);
+ 			continue;
+ 		}
+ 
+-		if (monotime() - daemon_last_join_monotime < opt(post_join_delay_ind)) {
++		if (monotime() - fence_delay_begin < opt(post_join_delay_ind)) {
+ 			log_debug("fence startup %d delay %d from %llu",
+ 				  node->nodeid, opt(post_join_delay_ind),
+-				  (unsigned long long)daemon_last_join_monotime);
++				  (unsigned long long)fence_delay_begin);
+ 			retry = 1;
+ 			continue;
+ 		}
+@@ -959,10 +958,10 @@ static void daemon_fence_work(void)
+ 		   time between it joining the cluster (giving cluster quorum)
+ 		   and joining the daemon cpg, which allows it to bypass fencing */
+ 
+-		if (monotime() - cluster_last_join_monotime < opt(post_join_delay_ind)) {
++		if (monotime() - fence_delay_begin < opt(post_join_delay_ind)) {
+ 			log_debug("fence request %d delay %d from %llu",
+ 				  node->nodeid, opt(post_join_delay_ind),
+-				  (unsigned long long)cluster_last_join_monotime);
++				  (unsigned long long)fence_delay_begin);
+ 			node->delay_fencing = 1;
+ 			retry = 1;
+ 			continue;
+@@ -2042,7 +2041,7 @@ static void confchg_cb_daemon(cpg_handle_t handle,
+ 			node->daemon_member = 1;
+ 			node->daemon_add_time = now;
+ 
+-			daemon_last_join_monotime = now;
++			fence_delay_begin = now;
+ 			last_join_seq++;
+ 
+ 			/* a joining node shows prev members in joined list */
+diff --git a/dlm_controld/dlm_daemon.h b/dlm_controld/dlm_daemon.h
+index dbe22ba..62508ea 100644
+--- a/dlm_controld/dlm_daemon.h
++++ b/dlm_controld/dlm_daemon.h
+@@ -171,7 +171,7 @@ EXTERN int plock_ci;
+ EXTERN struct list_head lockspaces;
+ EXTERN int cluster_quorate;
+ EXTERN int cluster_two_node;
+-EXTERN uint64_t cluster_last_join_monotime;
++EXTERN uint64_t fence_delay_begin;
+ EXTERN uint64_t cluster_quorate_monotime;
+ EXTERN uint64_t cluster_joined_monotime;
+ EXTERN uint64_t cluster_joined_walltime;
+diff --git a/dlm_controld/member.c b/dlm_controld/member.c
+index fca3248..d4031ee 100644
+--- a/dlm_controld/member.c
++++ b/dlm_controld/member.c
+@@ -151,7 +151,7 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate,
+ 				  quorum_nodes[i], cluster_ringid_seq);
+ 			add_cluster_node(quorum_nodes[i], now);
+ 
+-			cluster_last_join_monotime = now;
++			fence_delay_begin = now;
+ 
+ 			err = corosync_cfg_get_node_addrs(ch, quorum_nodes[i],
+ 							  MAX_NODE_ADDRESSES,
+-- 
+1.8.1.rc1.5.g7e0651a
+
diff --git a/dlm.spec b/dlm.spec
index 2d47827..1276841 100644
--- a/dlm.spec
+++ b/dlm.spec
@@ -1,6 +1,6 @@
 Name:           dlm
 Version:        4.0.1
-Release:        1%{?dist}
+Release:        2%{?dist}
 License:        GPLv2 and GPLv2+ and LGPLv2+
 # For a breakdown of the licensing, see README.license
 Group:          System Environment/Kernel
@@ -12,9 +12,13 @@ BuildRequires:  pacemaker-libs-devel >= 1.1.7
 BuildRequires:  libxml2-devel
 BuildRequires:  systemd-units
 BuildRequires:  systemd-devel
-Source0:	http://people.redhat.com/teigland/%{name}-%{version}.tar.gz
+Source0:        http://git.fedorahosted.org/cgit/dlm.git/snapshot/%{name}-%{version}.tar.gz
+
+Patch0: 0001-man-fix-dlm.conf-man-page.patch
+Patch1: 0002-dlm_controld-daemon_fence_work-should-wait-for-confc.patch
+Patch2: 0003-dlm_controld-exclude-fencing-work-during-set_protoco.patch
+Patch3: 0004-dlm_controld-unify-fence-delay-variations.patch
 
-#Patch0: 0001-foo.patch
 
 %if 0%{?rhel}
 ExclusiveArch: i686 x86_64
@@ -33,7 +37,11 @@ The kernel dlm requires a user daemon to control membership.
 
 %prep
 %setup -q
-#%patch0 -p1 -b .0001-foo.patch
+
+%patch0 -p1 -b .0001-man-fix-dlm.conf-man-page.patch
+%patch1 -p1 -b .0002-dlm_controld-daemon_fence_work-should-wait-for-confc.patch
+%patch2 -p1 -b .0003-dlm_controld-exclude-fencing-work-during-set_protoco.patch
+%patch3 -p1 -b .0004-dlm_controld-unify-fence-delay-variations.patch
 
 %build
 # upstream does not require configure
@@ -105,6 +113,9 @@ developing applications that use %{name}.
 %{_libdir}/pkgconfig/*.pc
 
 %changelog
+* Tue Jun 25 2013 David Teigland <teigland at redhat.com> - 4.0.1-2
+- Fixes related to parallel startup
+
 * Wed Mar 06 2013 David Teigland <teigland at redhat.com> - 4.0.1-1
 - New usptream release, fencing fixes
 


More information about the scm-commits mailing list