[corosync] Import fixes from upstream

Jan Friesse honzaf at fedoraproject.org
Thu Sep 8 08:09:50 UTC 2011


commit f0be19d9dbf28c2dff0cefde0821f60398d59009
Author: Jan Friesse <jfriesse at redhat.com>
Date:   Thu Sep 8 10:00:35 2011 +0200

    Import fixes from upstream
    
    Signed-off-by: Jan Friesse <jfriesse at redhat.com>

 ...emb_join-messages-during-flush-operations.patch |   58 ++++++++
 corosync.spec                                      |   14 ++-
 ...dle-endless-loop-if-all-ifaces-are-faulty.patch |   85 ++++++++++++
 ...igher-threshold-in-passive-mode-for-mcast.patch |  140 ++++++++++++++++++++
 totemconfig-change-minimum-RRP-threshold.patch     |   30 ++++
 5 files changed, 326 insertions(+), 1 deletions(-)
---
diff --git a/Ignore-memb_join-messages-during-flush-operations.patch b/Ignore-memb_join-messages-during-flush-operations.patch
new file mode 100644
index 0000000..906f28b
--- /dev/null
+++ b/Ignore-memb_join-messages-during-flush-operations.patch
@@ -0,0 +1,58 @@
+From be608c050247e5f9c8266b8a0f9803cc0a3dc881 Mon Sep 17 00:00:00 2001
+From: Steven Dake <sdake at redhat.com>
+Date: Tue, 30 Aug 2011 22:25:21 -0700
+Subject: [PATCH] Ignore memb_join messages during flush operations
+
+a memb_join operation that occurs during flushing can result in an
+entry into the GATHER state from the RECOVERY state.  This results in the
+regular sort queue being used instead of the recovery sort queue, resulting
+in segfault.
+
+Signed-off-by: Steven Dake <sdake at redhat.com>
+Reviewed-by: Jan Friesse <jfriesse at redhat.com>
+(cherry picked from commit 48ffa8892daac18935d96ae46a72aebe2fb70430)
+---
+ exec/totemudp.c |   13 +++++++++++++
+ 1 files changed, 13 insertions(+), 0 deletions(-)
+
+diff --git a/exec/totemudp.c b/exec/totemudp.c
+index 96849b7..0c12b56 100644
+--- a/exec/totemudp.c
++++ b/exec/totemudp.c
+@@ -90,6 +90,8 @@
+ #define BIND_STATE_REGULAR	1
+ #define BIND_STATE_LOOPBACK	2
+ 
++#define MESSAGE_TYPE_MCAST	1
++
+ #define HMAC_HASH_SIZE 20
+ struct security_header {
+ 	unsigned char hash_digest[HMAC_HASH_SIZE]; /* The hash *MUST* be first in the data structure */
+@@ -1172,6 +1174,7 @@ static int net_deliver_fn (
+ 	int res = 0;
+ 	unsigned char *msg_offset;
+ 	unsigned int size_delv;
++	char *message_type;
+ 
+ 	if (instance->flushing == 1) {
+ 		iovec = &instance->totemudp_iov_recv_flush;
+@@ -1234,6 +1237,16 @@ static int net_deliver_fn (
+ 	}
+ 
+ 	/*
++	 * Drop all non-mcast messages (more specifically join
++	 * messages should be dropped)
++	 */
++	message_type = (char *)msg_offset;
++	if (instance->flushing == 1 && *message_type != MESSAGE_TYPE_MCAST) {
++		iovec->iov_len = FRAME_SIZE_MAX;
++		return (0);
++	}
++	
++	/*
+ 	 * Handle incoming message
+ 	 */
+ 	instance->totemudp_deliver_fn (
+-- 
+1.7.1
+
diff --git a/corosync.spec b/corosync.spec
index 7f3ff28..9bd7946 100644
--- a/corosync.spec
+++ b/corosync.spec
@@ -14,12 +14,17 @@
 Name: corosync
 Summary: The Corosync Cluster Engine and Application Programming Interfaces
 Version: 1.4.1
-Release: 1%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
+Release: 2%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
 License: BSD
 Group: System Environment/Base
 URL: http://ftp.corosync.org
 Source0: ftp://ftp:user@ftp.corosync.org/downloads/%{name}-%{version}/%{name}-%{version}%{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}}.tar.gz
 
+Patch0: rrp-Handle-endless-loop-if-all-ifaces-are-faulty.patch
+Patch1: rrp-Higher-threshold-in-passive-mode-for-mcast.patch
+Patch2: Ignore-memb_join-messages-during-flush-operations.patch
+Patch3: totemconfig-change-minimum-RRP-threshold.patch
+
 # Runtime bits
 Requires: corosynclib = %{version}-%{release}
 Requires(pre): /usr/sbin/useradd
@@ -50,6 +55,10 @@ BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
 
 %prep
 %setup -q -n %{name}-%{version}%{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}}
+%patch0 -p1
+%patch1 -p1
+%patch2 -p1
+%patch3 -p1
 
 %build
 %if %{buildtrunk}
@@ -277,6 +286,9 @@ The Corosync Cluster Engine APIs.
 %{_mandir}/man8/sam_overview.8*
 
 %changelog
+* Thu Sep 08 2011 Jan Friesse <jfriesse at redhat.com> - 1.4.1-2
+- Add upstream fixes
+
 * Tue Jul 26 2011 Jan Friesse <jfriesse at redhat.com> - 1.4.1-1
 - New upstream release
 
diff --git a/rrp-Handle-endless-loop-if-all-ifaces-are-faulty.patch b/rrp-Handle-endless-loop-if-all-ifaces-are-faulty.patch
new file mode 100644
index 0000000..7c70e9b
--- /dev/null
+++ b/rrp-Handle-endless-loop-if-all-ifaces-are-faulty.patch
@@ -0,0 +1,85 @@
+From dc862e15cc084926eccc5e1ff3241611c0cb54f0 Mon Sep 17 00:00:00 2001
+From: Jan Friesse <jfriesse at redhat.com>
+Date: Mon, 29 Aug 2011 10:44:05 +0200
+Subject: [PATCH] rrp: Handle endless loop if all ifaces are faulty
+
+If all interfaces were faulty, passive_mcast_flush_send and related
+functions ended in endless loop. This is now handled and if there is no
+live interface, message is dropped.
+
+Signed-off-by: Jan Friesse <jfriesse at redhat.com>
+Reviewed by: Steven Dake <sdake at redhat.com>
+(cherry picked from commit 0eade8de79b6e5b28e91604d4d460627c7a61ddd)
+---
+ exec/totemrrp.c |   29 ++++++++++++++++++++---------
+ 1 files changed, 20 insertions(+), 9 deletions(-)
+
+diff --git a/exec/totemrrp.c b/exec/totemrrp.c
+index 83292ad..a5abb1b 100644
+--- a/exec/totemrrp.c
++++ b/exec/totemrrp.c
+@@ -1015,12 +1015,16 @@ static void passive_mcast_flush_send (
+ 	unsigned int msg_len)
+ {
+ 	struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance;
++	int i = 0;
+ 
+ 	do {
+ 		passive_instance->msg_xmit_iface = (passive_instance->msg_xmit_iface + 1) % instance->interface_count;
+-	} while (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1);
++		i++;
++	} while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1));
+ 
+-	totemnet_mcast_flush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
++	if (i <= instance->interface_count) {
++		totemnet_mcast_flush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
++	}
+ }
+ 
+ static void passive_mcast_noflush_send (
+@@ -1029,13 +1033,16 @@ static void passive_mcast_noflush_send (
+ 	unsigned int msg_len)
+ {
+ 	struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance;
++	int i = 0;
+ 
+ 	do {
+ 		passive_instance->msg_xmit_iface = (passive_instance->msg_xmit_iface + 1) % instance->interface_count;
+-	} while (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1);
+-
++		i++;
++	} while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1));
+ 
+-	totemnet_mcast_noflush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
++	if (i <= instance->interface_count) {
++		totemnet_mcast_noflush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
++	}
+ }
+ 
+ static void passive_token_recv (
+@@ -1070,14 +1077,18 @@ static void passive_token_send (
+ 	unsigned int msg_len)
+ {
+ 	struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance;
++	int i = 0;
+ 
+ 	do {
+ 		passive_instance->token_xmit_iface = (passive_instance->token_xmit_iface + 1) % instance->interface_count;
+-	} while (passive_instance->faulty[passive_instance->token_xmit_iface] == 1);
++		i++;
++	} while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->token_xmit_iface] == 1));
+ 
+-	totemnet_token_send (
+-		instance->net_handles[passive_instance->token_xmit_iface],
+-		msg, msg_len);
++	if (i <= instance->interface_count) {
++		totemnet_token_send (
++		    instance->net_handles[passive_instance->token_xmit_iface],
++		    msg, msg_len);
++	}
+ 
+ }
+ 
+-- 
+1.7.1
+
diff --git a/rrp-Higher-threshold-in-passive-mode-for-mcast.patch b/rrp-Higher-threshold-in-passive-mode-for-mcast.patch
new file mode 100644
index 0000000..3b52bac
--- /dev/null
+++ b/rrp-Higher-threshold-in-passive-mode-for-mcast.patch
@@ -0,0 +1,140 @@
+From 4e32c3112a2f13a302709d72b0ae989287a48563 Mon Sep 17 00:00:00 2001
+From: Jan Friesse <jfriesse at redhat.com>
+Date: Mon, 29 Aug 2011 15:09:52 +0200
+Subject: [PATCH] rrp: Higher threshold in passive mode for mcast
+
+There were too much false positives with passive mode rrp when high
+number of messages were received.
+
+Patch adds new configurable variable rrp_problem_count_mcast_threshold
+which is by default 10 times rrp_problem_count_threshold and this is
+used as threshold for multicast packets in passive mode. Variable is
+unused in active mode.
+
+Signed-off-by: Jan Friesse <jfriesse at redhat.com>
+Reviewed by: Steven Dake <sdake at redhat.com>
+(cherry picked from commit 752239eaa1edd68695a6e40bcde60471f34a02fd)
+---
+ exec/totemconfig.c             |   11 +++++++++++
+ exec/totemrrp.c                |    6 ++++--
+ exec/totemsrp.c                |    3 +++
+ include/corosync/totem/totem.h |    2 ++
+ man/corosync.conf.5            |    8 ++++++++
+ 5 files changed, 28 insertions(+), 2 deletions(-)
+
+diff --git a/exec/totemconfig.c b/exec/totemconfig.c
+index 80ca182..f767f69 100644
+--- a/exec/totemconfig.c
++++ b/exec/totemconfig.c
+@@ -213,6 +213,8 @@ static void totem_volatile_config_read (
+ 
+ 	objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_threshold", &totem_config->rrp_problem_count_threshold);
+ 
++	objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_mcast_threshold", &totem_config->rrp_problem_count_mcast_threshold);
++
+ 	objdb_get_int (objdb,object_totem_handle, "rrp_autorecovery_check_timeout", &totem_config->rrp_autorecovery_check_timeout);
+ 
+ 	objdb_get_int (objdb,object_totem_handle, "heartbeat_failures_allowed", &totem_config->heartbeat_failures_allowed);
+@@ -667,12 +669,21 @@ int totem_config_validate (
+ 	if (totem_config->rrp_problem_count_threshold == 0) {
+ 		totem_config->rrp_problem_count_threshold = RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT;
+ 	}
++	if (totem_config->rrp_problem_count_mcast_threshold == 0) {
++		totem_config->rrp_problem_count_mcast_threshold = totem_config->rrp_problem_count_threshold * 10;
++	}
+ 	if (totem_config->rrp_problem_count_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) {
+ 		snprintf (local_error_reason, sizeof(local_error_reason),
+ 			"The RRP problem count threshold (%d problem count) may not be less then (%d problem count).",
+ 			totem_config->rrp_problem_count_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN);
+ 		goto parse_error;
+ 	}
++	if (totem_config->rrp_problem_count_mcast_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) {
++		snprintf (local_error_reason, sizeof(local_error_reason),
++			"The RRP multicast problem count threshold (%d problem count) may not be less then (%d problem count).",
++			totem_config->rrp_problem_count_mcast_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN);
++		goto parse_error;
++	}
+ 	if (totem_config->rrp_token_expired_timeout == 0) {
+ 		totem_config->rrp_token_expired_timeout =
+ 			totem_config->token_retransmit_timeout;
+diff --git a/exec/totemrrp.c b/exec/totemrrp.c
+index a5abb1b..616d0d5 100644
+--- a/exec/totemrrp.c
++++ b/exec/totemrrp.c
+@@ -890,14 +890,17 @@ static void passive_monitor (
+ 	unsigned int max;
+ 	unsigned int i;
+ 	unsigned int min_all, min_active;
++	unsigned int threshold;
+ 
+ 	/*
+ 	 * Monitor for failures
+ 	 */
+ 	if (is_token_recv_count) {
+ 		recv_count = passive_instance->token_recv_count;
++		threshold = rrp_instance->totem_config->rrp_problem_count_threshold;
+ 	} else {
+ 		recv_count = passive_instance->mcast_recv_count;
++		threshold = rrp_instance->totem_config->rrp_problem_count_mcast_threshold;
+ 	}
+ 
+ 	recv_count[iface_no] += 1;
+@@ -959,8 +962,7 @@ static void passive_monitor (
+ 
+ 	for (i = 0; i < rrp_instance->interface_count; i++) {
+ 		if ((passive_instance->faulty[i] == 0) &&
+-			(max - recv_count[i] >
+-			rrp_instance->totem_config->rrp_problem_count_threshold)) {
++		    (max - recv_count[i] > threshold)) {
+ 			passive_instance->faulty[i] = 1;
+ 			poll_timer_add (rrp_instance->poll_handle,
+ 				rrp_instance->totem_config->rrp_autorecovery_check_timeout,
+diff --git a/exec/totemsrp.c b/exec/totemsrp.c
+index 40460e0..6981ac1 100644
+--- a/exec/totemsrp.c
++++ b/exec/totemsrp.c
+@@ -858,6 +858,9 @@ int totemsrp_initialize (
+ 		"RRP threshold (%d problem count)\n",
+ 		totem_config->rrp_problem_count_threshold);
+ 	log_printf (instance->totemsrp_log_level_debug,
++		"RRP multicast threshold (%d problem count)\n",
++		totem_config->rrp_problem_count_mcast_threshold);
++	log_printf (instance->totemsrp_log_level_debug,
+ 		"RRP automatic recovery check timeout (%d ms)\n",
+ 		totem_config->rrp_autorecovery_check_timeout);
+ 	log_printf (instance->totemsrp_log_level_debug,
+diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h
+index f3ac9cc..4dce3b3 100644
+--- a/include/corosync/totem/totem.h
++++ b/include/corosync/totem/totem.h
+@@ -143,6 +143,8 @@ struct totem_config {
+ 
+ 	unsigned int rrp_problem_count_threshold;
+ 
++	unsigned int rrp_problem_count_mcast_threshold;
++
+ 	unsigned int rrp_autorecovery_check_timeout;
+ 
+ 	char rrp_mode[TOTEM_RRP_MODE_BYTES];
+diff --git a/man/corosync.conf.5 b/man/corosync.conf.5
+index b6f769e..78eb2bb 100644
+--- a/man/corosync.conf.5
++++ b/man/corosync.conf.5
+@@ -472,6 +472,14 @@ may occur.
+ The default is 10 problem counts.
+ 
+ .TP
++rrp_problem_count_mcast_threshold
++This specifies the number of times a problem is detected with multicast before
++setting the link faulty for passive rrp mode. This variable is unused in active
++rrp mode.
++
++The default is 10 times rrp_problem_count_threshold.
++
++.TP
+ rrp_token_expired_timeout
+ This specifies the time in milliseconds to increment the problem counter for
+ the redundant ring protocol after not having received a token from all rings
+-- 
+1.7.1
+
diff --git a/totemconfig-change-minimum-RRP-threshold.patch b/totemconfig-change-minimum-RRP-threshold.patch
new file mode 100644
index 0000000..0c70451
--- /dev/null
+++ b/totemconfig-change-minimum-RRP-threshold.patch
@@ -0,0 +1,30 @@
+From b1aba94732edc2ff084b7dd559a08b687f464ed0 Mon Sep 17 00:00:00 2001
+From: Jan Friesse <jfriesse at redhat.com>
+Date: Thu, 8 Sep 2011 09:40:04 +0200
+Subject: [PATCH] totemconfig: change minimum RRP threshold
+
+RRP threshold can be lower value then 5.
+
+Signed-off-by: Jan Friesse <jfriesse at redhat.com>
+Reviewed-by: Fabio M. Di Nitto <fdinitto at redhat.com>
+(cherry picked from commit f6c2a8dab786c50ece36dd3424e258e93a1000d3)
+---
+ exec/totemconfig.c |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/exec/totemconfig.c b/exec/totemconfig.c
+index f767f69..a475bb3 100644
+--- a/exec/totemconfig.c
++++ b/exec/totemconfig.c
+@@ -82,7 +82,7 @@
+ #define MISS_COUNT_CONST			5
+ #define RRP_PROBLEM_COUNT_TIMEOUT		2000
+ #define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT	10
+-#define RRP_PROBLEM_COUNT_THRESHOLD_MIN		5
++#define RRP_PROBLEM_COUNT_THRESHOLD_MIN		2
+ #define RRP_AUTORECOVERY_CHECK_TIMEOUT		1000
+ 
+ static char error_string_response[512];
+-- 
+1.7.1
+


More information about the scm-commits mailing list