[corosync] Import fixes from upstream
Jan Friesse
honzaf at fedoraproject.org
Thu Sep 8 08:09:50 UTC 2011
commit f0be19d9dbf28c2dff0cefde0821f60398d59009
Author: Jan Friesse <jfriesse at redhat.com>
Date: Thu Sep 8 10:00:35 2011 +0200
Import fixes from upstream
Signed-off-by: Jan Friesse <jfriesse at redhat.com>
...emb_join-messages-during-flush-operations.patch | 58 ++++++++
corosync.spec | 14 ++-
...dle-endless-loop-if-all-ifaces-are-faulty.patch | 85 ++++++++++++
...igher-threshold-in-passive-mode-for-mcast.patch | 140 ++++++++++++++++++++
totemconfig-change-minimum-RRP-threshold.patch | 30 ++++
5 files changed, 326 insertions(+), 1 deletions(-)
---
diff --git a/Ignore-memb_join-messages-during-flush-operations.patch b/Ignore-memb_join-messages-during-flush-operations.patch
new file mode 100644
index 0000000..906f28b
--- /dev/null
+++ b/Ignore-memb_join-messages-during-flush-operations.patch
@@ -0,0 +1,58 @@
+From be608c050247e5f9c8266b8a0f9803cc0a3dc881 Mon Sep 17 00:00:00 2001
+From: Steven Dake <sdake at redhat.com>
+Date: Tue, 30 Aug 2011 22:25:21 -0700
+Subject: [PATCH] Ignore memb_join messages during flush operations
+
+a memb_join operation that occurs during flushing can result in an
+entry into the GATHER state from the RECOVERY state. This results in the
+regular sort queue being used instead of the recovery sort queue, resulting
+in segfault.
+
+Signed-off-by: Steven Dake <sdake at redhat.com>
+Reviewed-by: Jan Friesse <jfriesse at redhat.com>
+(cherry picked from commit 48ffa8892daac18935d96ae46a72aebe2fb70430)
+---
+ exec/totemudp.c | 13 +++++++++++++
+ 1 files changed, 13 insertions(+), 0 deletions(-)
+
+diff --git a/exec/totemudp.c b/exec/totemudp.c
+index 96849b7..0c12b56 100644
+--- a/exec/totemudp.c
++++ b/exec/totemudp.c
+@@ -90,6 +90,8 @@
+ #define BIND_STATE_REGULAR 1
+ #define BIND_STATE_LOOPBACK 2
+
++#define MESSAGE_TYPE_MCAST 1
++
+ #define HMAC_HASH_SIZE 20
+ struct security_header {
+ unsigned char hash_digest[HMAC_HASH_SIZE]; /* The hash *MUST* be first in the data structure */
+@@ -1172,6 +1174,7 @@ static int net_deliver_fn (
+ int res = 0;
+ unsigned char *msg_offset;
+ unsigned int size_delv;
++ char *message_type;
+
+ if (instance->flushing == 1) {
+ iovec = &instance->totemudp_iov_recv_flush;
+@@ -1234,6 +1237,16 @@ static int net_deliver_fn (
+ }
+
+ /*
++ * Drop all non-mcast messages (more specifically join
++ * messages should be dropped)
++ */
++ message_type = (char *)msg_offset;
++ if (instance->flushing == 1 && *message_type != MESSAGE_TYPE_MCAST) {
++ iovec->iov_len = FRAME_SIZE_MAX;
++ return (0);
++ }
++
++ /*
+ * Handle incoming message
+ */
+ instance->totemudp_deliver_fn (
+--
+1.7.1
+
diff --git a/corosync.spec b/corosync.spec
index 7f3ff28..9bd7946 100644
--- a/corosync.spec
+++ b/corosync.spec
@@ -14,12 +14,17 @@
Name: corosync
Summary: The Corosync Cluster Engine and Application Programming Interfaces
Version: 1.4.1
-Release: 1%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
+Release: 2%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
License: BSD
Group: System Environment/Base
URL: http://ftp.corosync.org
Source0: ftp://ftp:user@ftp.corosync.org/downloads/%{name}-%{version}/%{name}-%{version}%{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}}.tar.gz
+Patch0: rrp-Handle-endless-loop-if-all-ifaces-are-faulty.patch
+Patch1: rrp-Higher-threshold-in-passive-mode-for-mcast.patch
+Patch2: Ignore-memb_join-messages-during-flush-operations.patch
+Patch3: totemconfig-change-minimum-RRP-threshold.patch
+
# Runtime bits
Requires: corosynclib = %{version}-%{release}
Requires(pre): /usr/sbin/useradd
@@ -50,6 +55,10 @@ BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
%prep
%setup -q -n %{name}-%{version}%{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}}
+%patch0 -p1
+%patch1 -p1
+%patch2 -p1
+%patch3 -p1
%build
%if %{buildtrunk}
@@ -277,6 +286,9 @@ The Corosync Cluster Engine APIs.
%{_mandir}/man8/sam_overview.8*
%changelog
+* Thu Sep 08 2011 Jan Friesse <jfriesse at redhat.com> - 1.4.1-2
+- Add upstream fixes
+
* Tue Jul 26 2011 Jan Friesse <jfriesse at redhat.com> - 1.4.1-1
- New upstream release
diff --git a/rrp-Handle-endless-loop-if-all-ifaces-are-faulty.patch b/rrp-Handle-endless-loop-if-all-ifaces-are-faulty.patch
new file mode 100644
index 0000000..7c70e9b
--- /dev/null
+++ b/rrp-Handle-endless-loop-if-all-ifaces-are-faulty.patch
@@ -0,0 +1,85 @@
+From dc862e15cc084926eccc5e1ff3241611c0cb54f0 Mon Sep 17 00:00:00 2001
+From: Jan Friesse <jfriesse at redhat.com>
+Date: Mon, 29 Aug 2011 10:44:05 +0200
+Subject: [PATCH] rrp: Handle endless loop if all ifaces are faulty
+
+If all interfaces were faulty, passive_mcast_flush_send and related
+functions ended in endless loop. This is now handled and if there is no
+live interface, message is dropped.
+
+Signed-off-by: Jan Friesse <jfriesse at redhat.com>
+Reviewed by: Steven Dake <sdake at redhat.com>
+(cherry picked from commit 0eade8de79b6e5b28e91604d4d460627c7a61ddd)
+---
+ exec/totemrrp.c | 29 ++++++++++++++++++++---------
+ 1 files changed, 20 insertions(+), 9 deletions(-)
+
+diff --git a/exec/totemrrp.c b/exec/totemrrp.c
+index 83292ad..a5abb1b 100644
+--- a/exec/totemrrp.c
++++ b/exec/totemrrp.c
+@@ -1015,12 +1015,16 @@ static void passive_mcast_flush_send (
+ unsigned int msg_len)
+ {
+ struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance;
++ int i = 0;
+
+ do {
+ passive_instance->msg_xmit_iface = (passive_instance->msg_xmit_iface + 1) % instance->interface_count;
+- } while (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1);
++ i++;
++ } while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1));
+
+- totemnet_mcast_flush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
++ if (i <= instance->interface_count) {
++ totemnet_mcast_flush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
++ }
+ }
+
+ static void passive_mcast_noflush_send (
+@@ -1029,13 +1033,16 @@ static void passive_mcast_noflush_send (
+ unsigned int msg_len)
+ {
+ struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance;
++ int i = 0;
+
+ do {
+ passive_instance->msg_xmit_iface = (passive_instance->msg_xmit_iface + 1) % instance->interface_count;
+- } while (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1);
+-
++ i++;
++ } while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1));
+
+- totemnet_mcast_noflush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
++ if (i <= instance->interface_count) {
++ totemnet_mcast_noflush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
++ }
+ }
+
+ static void passive_token_recv (
+@@ -1070,14 +1077,18 @@ static void passive_token_send (
+ unsigned int msg_len)
+ {
+ struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance;
++ int i = 0;
+
+ do {
+ passive_instance->token_xmit_iface = (passive_instance->token_xmit_iface + 1) % instance->interface_count;
+- } while (passive_instance->faulty[passive_instance->token_xmit_iface] == 1);
++ i++;
++ } while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->token_xmit_iface] == 1));
+
+- totemnet_token_send (
+- instance->net_handles[passive_instance->token_xmit_iface],
+- msg, msg_len);
++ if (i <= instance->interface_count) {
++ totemnet_token_send (
++ instance->net_handles[passive_instance->token_xmit_iface],
++ msg, msg_len);
++ }
+
+ }
+
+--
+1.7.1
+
diff --git a/rrp-Higher-threshold-in-passive-mode-for-mcast.patch b/rrp-Higher-threshold-in-passive-mode-for-mcast.patch
new file mode 100644
index 0000000..3b52bac
--- /dev/null
+++ b/rrp-Higher-threshold-in-passive-mode-for-mcast.patch
@@ -0,0 +1,140 @@
+From 4e32c3112a2f13a302709d72b0ae989287a48563 Mon Sep 17 00:00:00 2001
+From: Jan Friesse <jfriesse at redhat.com>
+Date: Mon, 29 Aug 2011 15:09:52 +0200
+Subject: [PATCH] rrp: Higher threshold in passive mode for mcast
+
+There were too much false positives with passive mode rrp when high
+number of messages were received.
+
+Patch adds new configurable variable rrp_problem_count_mcast_threshold
+which is by default 10 times rrp_problem_count_threshold and this is
+used as threshold for multicast packets in passive mode. Variable is
+unused in active mode.
+
+Signed-off-by: Jan Friesse <jfriesse at redhat.com>
+Reviewed by: Steven Dake <sdake at redhat.com>
+(cherry picked from commit 752239eaa1edd68695a6e40bcde60471f34a02fd)
+---
+ exec/totemconfig.c | 11 +++++++++++
+ exec/totemrrp.c | 6 ++++--
+ exec/totemsrp.c | 3 +++
+ include/corosync/totem/totem.h | 2 ++
+ man/corosync.conf.5 | 8 ++++++++
+ 5 files changed, 28 insertions(+), 2 deletions(-)
+
+diff --git a/exec/totemconfig.c b/exec/totemconfig.c
+index 80ca182..f767f69 100644
+--- a/exec/totemconfig.c
++++ b/exec/totemconfig.c
+@@ -213,6 +213,8 @@ static void totem_volatile_config_read (
+
+ objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_threshold", &totem_config->rrp_problem_count_threshold);
+
++ objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_mcast_threshold", &totem_config->rrp_problem_count_mcast_threshold);
++
+ objdb_get_int (objdb,object_totem_handle, "rrp_autorecovery_check_timeout", &totem_config->rrp_autorecovery_check_timeout);
+
+ objdb_get_int (objdb,object_totem_handle, "heartbeat_failures_allowed", &totem_config->heartbeat_failures_allowed);
+@@ -667,12 +669,21 @@ int totem_config_validate (
+ if (totem_config->rrp_problem_count_threshold == 0) {
+ totem_config->rrp_problem_count_threshold = RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT;
+ }
++ if (totem_config->rrp_problem_count_mcast_threshold == 0) {
++ totem_config->rrp_problem_count_mcast_threshold = totem_config->rrp_problem_count_threshold * 10;
++ }
+ if (totem_config->rrp_problem_count_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) {
+ snprintf (local_error_reason, sizeof(local_error_reason),
+ "The RRP problem count threshold (%d problem count) may not be less then (%d problem count).",
+ totem_config->rrp_problem_count_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN);
+ goto parse_error;
+ }
++ if (totem_config->rrp_problem_count_mcast_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) {
++ snprintf (local_error_reason, sizeof(local_error_reason),
++ "The RRP multicast problem count threshold (%d problem count) may not be less then (%d problem count).",
++ totem_config->rrp_problem_count_mcast_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN);
++ goto parse_error;
++ }
+ if (totem_config->rrp_token_expired_timeout == 0) {
+ totem_config->rrp_token_expired_timeout =
+ totem_config->token_retransmit_timeout;
+diff --git a/exec/totemrrp.c b/exec/totemrrp.c
+index a5abb1b..616d0d5 100644
+--- a/exec/totemrrp.c
++++ b/exec/totemrrp.c
+@@ -890,14 +890,17 @@ static void passive_monitor (
+ unsigned int max;
+ unsigned int i;
+ unsigned int min_all, min_active;
++ unsigned int threshold;
+
+ /*
+ * Monitor for failures
+ */
+ if (is_token_recv_count) {
+ recv_count = passive_instance->token_recv_count;
++ threshold = rrp_instance->totem_config->rrp_problem_count_threshold;
+ } else {
+ recv_count = passive_instance->mcast_recv_count;
++ threshold = rrp_instance->totem_config->rrp_problem_count_mcast_threshold;
+ }
+
+ recv_count[iface_no] += 1;
+@@ -959,8 +962,7 @@ static void passive_monitor (
+
+ for (i = 0; i < rrp_instance->interface_count; i++) {
+ if ((passive_instance->faulty[i] == 0) &&
+- (max - recv_count[i] >
+- rrp_instance->totem_config->rrp_problem_count_threshold)) {
++ (max - recv_count[i] > threshold)) {
+ passive_instance->faulty[i] = 1;
+ poll_timer_add (rrp_instance->poll_handle,
+ rrp_instance->totem_config->rrp_autorecovery_check_timeout,
+diff --git a/exec/totemsrp.c b/exec/totemsrp.c
+index 40460e0..6981ac1 100644
+--- a/exec/totemsrp.c
++++ b/exec/totemsrp.c
+@@ -858,6 +858,9 @@ int totemsrp_initialize (
+ "RRP threshold (%d problem count)\n",
+ totem_config->rrp_problem_count_threshold);
+ log_printf (instance->totemsrp_log_level_debug,
++ "RRP multicast threshold (%d problem count)\n",
++ totem_config->rrp_problem_count_mcast_threshold);
++ log_printf (instance->totemsrp_log_level_debug,
+ "RRP automatic recovery check timeout (%d ms)\n",
+ totem_config->rrp_autorecovery_check_timeout);
+ log_printf (instance->totemsrp_log_level_debug,
+diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h
+index f3ac9cc..4dce3b3 100644
+--- a/include/corosync/totem/totem.h
++++ b/include/corosync/totem/totem.h
+@@ -143,6 +143,8 @@ struct totem_config {
+
+ unsigned int rrp_problem_count_threshold;
+
++ unsigned int rrp_problem_count_mcast_threshold;
++
+ unsigned int rrp_autorecovery_check_timeout;
+
+ char rrp_mode[TOTEM_RRP_MODE_BYTES];
+diff --git a/man/corosync.conf.5 b/man/corosync.conf.5
+index b6f769e..78eb2bb 100644
+--- a/man/corosync.conf.5
++++ b/man/corosync.conf.5
+@@ -472,6 +472,14 @@ may occur.
+ The default is 10 problem counts.
+
+ .TP
++rrp_problem_count_mcast_threshold
++This specifies the number of times a problem is detected with multicast before
++setting the link faulty for passive rrp mode. This variable is unused in active
++rrp mode.
++
++The default is 10 times rrp_problem_count_threshold.
++
++.TP
+ rrp_token_expired_timeout
+ This specifies the time in milliseconds to increment the problem counter for
+ the redundant ring protocol after not having received a token from all rings
+--
+1.7.1
+
diff --git a/totemconfig-change-minimum-RRP-threshold.patch b/totemconfig-change-minimum-RRP-threshold.patch
new file mode 100644
index 0000000..0c70451
--- /dev/null
+++ b/totemconfig-change-minimum-RRP-threshold.patch
@@ -0,0 +1,30 @@
+From b1aba94732edc2ff084b7dd559a08b687f464ed0 Mon Sep 17 00:00:00 2001
+From: Jan Friesse <jfriesse at redhat.com>
+Date: Thu, 8 Sep 2011 09:40:04 +0200
+Subject: [PATCH] totemconfig: change minimum RRP threshold
+
+RRP threshold can be lower value then 5.
+
+Signed-off-by: Jan Friesse <jfriesse at redhat.com>
+Reviewed-by: Fabio M. Di Nitto <fdinitto at redhat.com>
+(cherry picked from commit f6c2a8dab786c50ece36dd3424e258e93a1000d3)
+---
+ exec/totemconfig.c | 2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/exec/totemconfig.c b/exec/totemconfig.c
+index f767f69..a475bb3 100644
+--- a/exec/totemconfig.c
++++ b/exec/totemconfig.c
+@@ -82,7 +82,7 @@
+ #define MISS_COUNT_CONST 5
+ #define RRP_PROBLEM_COUNT_TIMEOUT 2000
+ #define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT 10
+-#define RRP_PROBLEM_COUNT_THRESHOLD_MIN 5
++#define RRP_PROBLEM_COUNT_THRESHOLD_MIN 2
+ #define RRP_AUTORECOVERY_CHECK_TIMEOUT 1000
+
+ static char error_string_response[512];
+--
+1.7.1
+
More information about the scm-commits
mailing list