fence_sanlock/fence_sanlockd.c | 31 +++++++++++++++++++++++--------
init.d/fence_sanlockd | 7 +++++--
2 files changed, 28 insertions(+), 10 deletions(-)
New commits:
commit d9a896a1f68bd605a1d8f3560efda8c8c7f3ba17
Author: David Teigland <teigland(a)redhat.com>
Date: Fri Sep 28 15:59:18 2012 -0500
fence_sanlock: use SIGHUP for shutdown
Have the fence_sanlockd init script use SIGHUP to
tell the daemon to shut down instead of SIGTERM
because the sanlock daemon uses SIGTERM to tell
the daemon that the lockspace has failed.
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/fence_sanlock/fence_sanlockd.c b/fence_sanlock/fence_sanlockd.c
index ec0f862..9d4d03d 100644
--- a/fence_sanlock/fence_sanlockd.c
+++ b/fence_sanlock/fence_sanlockd.c
@@ -31,10 +31,6 @@
#include "sanlock_direct.h"
#include "wdmd.h"
-/*
- * TODO: shutdown checks
- */
-
#define MAX_HOSTS 128 /* keep in sync with fence_sanlock definition */
#define LIVE_INTERVAL 5
@@ -45,7 +41,8 @@
static char *prog_name = (char *)"fence_sanlockd";
static int we_are_victim;
-static int shutdown;
+static int init_shutdown;
+static int lockspace_recovery;
static int daemon_debug;
static int our_host_id;
static char path[PATH_MAX];
@@ -219,8 +216,12 @@ static void process_signals(int ci)
return;
}
+ if (fdsi.ssi_signo == SIGHUP) {
+ init_shutdown = 1;
+ }
+
if (fdsi.ssi_signo == SIGTERM) {
- shutdown = 1;
+ lockspace_recovery = 1;
}
if (fdsi.ssi_signo == SIGUSR1) {
@@ -235,6 +236,7 @@ static int setup_signals(void)
sigemptyset(&mask);
sigaddset(&mask, SIGTERM);
+ sigaddset(&mask, SIGHUP);
sigaddset(&mask, SIGUSR1);
rv = sigprocmask(SIG_BLOCK, &mask, NULL);
@@ -557,7 +559,7 @@ int main(int argc, char *argv[])
now = monotime();
- if (shutdown) {
+ if (init_shutdown) {
/*
* FIXME: how to be sure that it's safe for us to shut
* down? i.e. nothing is running that needs fencing?
@@ -580,6 +582,10 @@ int main(int argc, char *argv[])
* (dlm_controld is not running)
* - /sys/kernel/dlm/ is empty
* (lockspaces do not exist in the kernel)
+ *
+ * The init script has to use SIGHUP to stop us instead
+ * of SIGTERM because the sanlock daemon uses SIGTERM to
+ * tell us that the lockspace has failed.
*/
log_error("shutdown");
rv = wdmd_test_live(con, 0, 0);
@@ -587,6 +593,15 @@ int main(int argc, char *argv[])
log_error("wdmd_test_live 0 error %d", rv);
break;
+ } else if (lockspace_recovery) {
+ /*
+ * The sanlock daemon sends SIGTERM when the lockspace
+ * host_id cannot be renewed for a while and it enters
+ * recovery.
+ */
+ log_error("sanlock renewals failed, our watchdog will fire");
+ break;
+
} else if (we_are_victim) {
/*
* The sanlock daemon has seen someone request our
@@ -646,7 +661,7 @@ int main(int argc, char *argv[])
log_error("wdmd_test_live error %d", rv);
}
- if (we_are_victim) {
+ if (we_are_victim || lockspace_recovery) {
poll_timeout = 10000;
} else {
sleep_seconds = live_time + LIVE_INTERVAL - monotime();
diff --git a/init.d/fence_sanlockd b/init.d/fence_sanlockd
index 8780480..c7210d7 100755
--- a/init.d/fence_sanlockd
+++ b/init.d/fence_sanlockd
@@ -77,8 +77,11 @@ stop() {
PID=$(pidofproc -p $runfile $prog)
+ # We have to use SIGHUP to mean stop because sanlock
+ # uses SIGTERM to mean that the lockspace failed.
+
echo -n $"Sending stop signal $prog ($PID): "
- killproc -p $runfile $prog -TERM
+ killproc -p $runfile $prog -HUP
retval=$?
echo
@@ -86,7 +89,7 @@ stop() {
return $retval
fi
- # fence_sanlockd won't see the SIGTERM if it's
+ # fence_sanlockd won't see the SIGHUP if it's
# still waiting for config from the fifo, so
# send invalid config to the fifo to make it fail.
Show replies by date