Branch 'fence_sanlock' - fence_sanlock/fence_sanlockd.c init.d/fence_sanlockd
by David Teigland
fence_sanlock/fence_sanlockd.c | 31 +++++++++++++++++++++++--------
init.d/fence_sanlockd | 7 +++++--
2 files changed, 28 insertions(+), 10 deletions(-)
New commits:
commit d9a896a1f68bd605a1d8f3560efda8c8c7f3ba17
Author: David Teigland <teigland(a)redhat.com>
Date: Fri Sep 28 15:59:18 2012 -0500
fence_sanlock: use SIGHUP for shutdown
Have the fence_sanlockd init script use SIGHUP to
tell the daemon to shut down instead of SIGTERM
because the sanlock daemon uses SIGTERM to tell
the daemon that the lockspace has failed.
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/fence_sanlock/fence_sanlockd.c b/fence_sanlock/fence_sanlockd.c
index ec0f862..9d4d03d 100644
--- a/fence_sanlock/fence_sanlockd.c
+++ b/fence_sanlock/fence_sanlockd.c
@@ -31,10 +31,6 @@
#include "sanlock_direct.h"
#include "wdmd.h"
-/*
- * TODO: shutdown checks
- */
-
#define MAX_HOSTS 128 /* keep in sync with fence_sanlock definition */
#define LIVE_INTERVAL 5
@@ -45,7 +41,8 @@
static char *prog_name = (char *)"fence_sanlockd";
static int we_are_victim;
-static int shutdown;
+static int init_shutdown;
+static int lockspace_recovery;
static int daemon_debug;
static int our_host_id;
static char path[PATH_MAX];
@@ -219,8 +216,12 @@ static void process_signals(int ci)
return;
}
+ if (fdsi.ssi_signo == SIGHUP) {
+ init_shutdown = 1;
+ }
+
if (fdsi.ssi_signo == SIGTERM) {
- shutdown = 1;
+ lockspace_recovery = 1;
}
if (fdsi.ssi_signo == SIGUSR1) {
@@ -235,6 +236,7 @@ static int setup_signals(void)
sigemptyset(&mask);
sigaddset(&mask, SIGTERM);
+ sigaddset(&mask, SIGHUP);
sigaddset(&mask, SIGUSR1);
rv = sigprocmask(SIG_BLOCK, &mask, NULL);
@@ -557,7 +559,7 @@ int main(int argc, char *argv[])
now = monotime();
- if (shutdown) {
+ if (init_shutdown) {
/*
* FIXME: how to be sure that it's safe for us to shut
* down? i.e. nothing is running that needs fencing?
@@ -580,6 +582,10 @@ int main(int argc, char *argv[])
* (dlm_controld is not running)
* - /sys/kernel/dlm/ is empty
* (lockspaces do not exist in the kernel)
+ *
+ * The init script has to use SIGHUP to stop us instead
+ * of SIGTERM because the sanlock daemon uses SIGTERM to
+ * tell us that the lockspace has failed.
*/
log_error("shutdown");
rv = wdmd_test_live(con, 0, 0);
@@ -587,6 +593,15 @@ int main(int argc, char *argv[])
log_error("wdmd_test_live 0 error %d", rv);
break;
+ } else if (lockspace_recovery) {
+ /*
+ * The sanlock daemon sends SIGTERM when the lockspace
+ * host_id cannot be renewed for a while and it enters
+ * recovery.
+ */
+ log_error("sanlock renewals failed, our watchdog will fire");
+ break;
+
} else if (we_are_victim) {
/*
* The sanlock daemon has seen someone request our
@@ -646,7 +661,7 @@ int main(int argc, char *argv[])
log_error("wdmd_test_live error %d", rv);
}
- if (we_are_victim) {
+ if (we_are_victim || lockspace_recovery) {
poll_timeout = 10000;
} else {
sleep_seconds = live_time + LIVE_INTERVAL - monotime();
diff --git a/init.d/fence_sanlockd b/init.d/fence_sanlockd
index 8780480..c7210d7 100755
--- a/init.d/fence_sanlockd
+++ b/init.d/fence_sanlockd
@@ -77,8 +77,11 @@ stop() {
PID=$(pidofproc -p $runfile $prog)
+ # We have to use SIGHUP to mean stop because sanlock
+ # uses SIGTERM to mean that the lockspace failed.
+
echo -n $"Sending stop signal $prog ($PID): "
- killproc -p $runfile $prog -TERM
+ killproc -p $runfile $prog -HUP
retval=$?
echo
@@ -86,7 +89,7 @@ stop() {
return $retval
fi
- # fence_sanlockd won't see the SIGTERM if it's
+ # fence_sanlockd won't see the SIGHUP if it's
# still waiting for config from the fifo, so
# send invalid config to the fifo to make it fail.
11 years
Branch 'fence_sanlock' - fence_sanlock/Makefile fence_sanlock/fence_sanlock.8 fence_sanlock/fence_sanlockd.8
by David Teigland
fence_sanlock/Makefile | 3
fence_sanlock/fence_sanlock.8 | 203 +++++++++++++++++++++++++++++++++++++++++
fence_sanlock/fence_sanlockd.8 | 39 +++++++
3 files changed, 245 insertions(+)
New commits:
commit 732f8998d2b68ad4363989f6aadd2225d538527f
Author: David Teigland <teigland(a)redhat.com>
Date: Fri Sep 28 15:18:32 2012 -0500
fence_sanlock: add man pages
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/fence_sanlock/Makefile b/fence_sanlock/Makefile
index 3c0752c..99a9a5a 100644
--- a/fence_sanlock/Makefile
+++ b/fence_sanlock/Makefile
@@ -53,4 +53,7 @@ MANDIR=/usr/share/man
.PHONY: install
install: all
$(INSTALL) -d $(DESTDIR)/$(BINDIR)
+ $(INSTALL) -d $(DESTDIR)/$(MANDIR)/man8
$(INSTALL) -c -m 755 $(TARGET1) $(TARGET2) $(DESTDIR)/$(BINDIR)
+ $(INSTALL) -m 644 fence_sanlock.8 $(DESTDIR)/$(MANDIR)/man8/
+ $(INSTALL) -m 644 fence_sanlockd.8 $(DESTDIR)/$(MANDIR)/man8/
diff --git a/fence_sanlock/fence_sanlock.8 b/fence_sanlock/fence_sanlock.8
new file mode 100644
index 0000000..3c1d472
--- /dev/null
+++ b/fence_sanlock/fence_sanlock.8
@@ -0,0 +1,203 @@
+.TH FENCE_SANLOCK 8 2012-09-26
+
+.SH NAME
+fence_sanlock \- fence agent using watchdog and shared storage leases
+
+.SH SYNOPSIS
+.B fence_sanlock
+[OPTIONS]
+
+.SH DESCRIPTION
+fence_sanlock uses the watchdog device to reset nodes, in conjunction with
+three daemons: fence_sanlockd, sanlock, and wdmd.
+
+The watchdog device, controlled through /dev/watchdog, is available when a
+watchdog kernel module is loaded. A module should be loaded for the
+available hardware. If no hardware watchdog is available, or no module is
+loaded, the "softdog" module will be loaded, which emulates a hardware
+watchdog device.
+
+Shared storage must be configured for sanlock to use from all hosts. This
+is generally an lvm lv (non-clustered), but could be another block device,
+or NFS file. The storage should be 1GB of fully allocated space. After
+being created, the storage must be initialized with the command:
+.br
+# fence_sanlock -o sanlock_init -p /path/to/storage
+
+The fence_sanlock agent uses sanlock leases on shared storage to verify
+that hosts have been reset, and to notify fenced nodes that are still
+running, that they should be reset.
+
+The fence_sanlockd init script starts the wdmd, sanlock and fence_sanlockd
+daemons before the cluster or fencing systems are started (e.g. cman,
+corosync and fenced). The fence_sanlockd daemon is started with the -w
+option so it waits for the path and host_id options to be provided when
+they are available.
+
+Unfencing must be configured for fence_sanlock in cluster.conf. The cman
+init script does unfencing by running fence_node -U, which in turn runs
+fence_sanlock with the "on" action and local path and host_id values taken
+from cluster.conf. fence_sanlock in turn passes the path and host_id
+values to the waiting fence_sanlockd daemon. With these values,
+fence_sanlockd joins the sanlock lockspace and acquires a resource lease
+for the local host. It can take several minutes to complete these
+unfencing steps.
+
+Once unfencing is complete, the node is a member of the sanlock lockspace
+named "fence" and the node's fence_sanlockd process holds a resource lease
+named "hN", where N is the node's host_id. (To verify this, run the
+commands "sanlock client status" and "sanlock client host_status", which
+show state from the sanlock daemon, or "sanlock direct dump <path>" which
+shows state from shared storage.)
+
+When fence_sanlock fences a node, it tries to acquire that node's resource
+lease. sanlock will not grant the lease until the owner (the node being
+fenced) has been reset by its watchdog device. The time it takes to
+acquire the lease is 140 seconds from the victim's last lockspace renewal
+timestamp on the shared storage. Once acquired, the victim's lease is
+released, and fencing completes successfully.
+
+Live nodes being fenced
+
+When a live node is being fenced, fence_sanlock will continually fail to
+acquire the victim's lease, because the victim continues to renew its
+lockspace membership on storage, and the fencing node sees it is alive.
+This is by design. As long as the victim is alive, it must continue to
+renew its lockspace membership on storage. The victim must not allow the
+remote fence_sanlock to acquire its lease and consider it fenced while it
+is still alive.
+
+At the same time, a victim knows that when it is being fenced, it should
+be reset to avoid blocking recovery of the rest of the cluster. To
+communicate this, fence_sanlock makes a "request" on storage for the
+victim's resource lease. On the victim, fence_sanlockd, which holds the
+resource lease, is configured to receive SIGUSR1 from sanlock if anyone
+requests its lease. Upon receiving the signal, fence_sanlockd knows that
+it is a fencing victim. In response to this, fence_sanlockd allows its
+wdmd connection to expire, which in turn causes the watchdog device to
+fire, resetting the node.
+
+The watchdog reset will obviously have the effect of stopping the victim's
+lockspace membership renewals. Once the renewals stop, fence_sanlock will
+finally be able to acquire the victim's lease after waiting a fixed time
+from the final lockspace renewal.
+
+Loss of shared storage
+
+If access to shared storage with sanlock leases is lost for 80 seconds,
+sanlock is not able to renew the lockspace membership, and enters
+recovery. This causes sanlock clients holding leases, such as
+fence_sanlockd, to be notified that their leases are being lost. In
+response, fence_sanlockd must reset the node, much as if it was being
+fenced.
+
+Daemons killed/crashed/hung
+
+If sanlock, fence_sanlockd daemons are killed abnormally, or crash or
+hang, their wdmd connections will expire, causing the watchdog device to
+fire, resetting the node. fence_sanlock from another node will then run
+and acquire the victim's resource lease. If the wdmd daemon is killed
+abnormally or crashes or hangs, it will not pet the watchdog device,
+causing it to fire and reset the node.
+
+Time Values
+
+The specific times periods referenced above, e.g. 140, 80, are based on
+the default sanlock i/o timeout of 10 seconds. If sanlock is configured
+to use a different i/o timeout, these numbers will be different.
+
+.SH OPTIONS
+
+.BI \-o " action"
+ The agent action:
+
+.IP
+.B on
+.br
+Enable the local node to be fenced. Used by unfencing.
+
+.IP
+.B off
+.br
+Disable another node.
+
+.IP
+.B status
+.br
+Test if a node is on or off. A node is on if it's lease is held, and off
+is it's lease is free.
+
+.IP
+.B metadata
+.br
+Print xml description of required parameters.
+
+.IP
+.B sanlock_init
+.br
+Initialize sanlock leases on shared storage.
+
+.PP
+
+.BI \-p " path"
+ The path to shared storage with sanlock leases.
+
+.PP
+
+.BI \-i " host_id"
+ The host_id, from 1-128.
+
+.SH STDIN PARAMETERS
+
+Options can be passed on stdin, with the format key=val. Each key=val
+pair is separated by a new line.
+
+action=on|off|status
+.br
+See \-o
+
+path=/path/to/shared/storage
+.br
+See \-p
+
+host_id=num
+.br
+See \-i
+
+.SH FILES
+
+Example cluster.conf configuration for fence_sanlock:
+
+.nf
+<clusternode name="node01" nodeid="1">
+ <fence>
+ <method name="1">
+ <device name="wd" host_id="1"/>
+ </method>
+ </fence>
+ <unfence>
+ <device name="wd" host_id="1" action="on"/>
+ </unfence>
+</clusternode>
+
+<clusternode name="node02" nodeid="2">
+ <fence>
+ <method name="1">
+ <device name="wd" host_id="2"/>
+ </method>
+ </fence>
+ <unfence>
+ <device name="wd" host_id="2" action="on"/>
+ </unfence>
+</clusternode>
+
+<fencedevice name="wd" agent="fence_sanlock" device="/dev/fence/leases"/>
+.fi
+
+.SH SEE ALSO
+.BR fence_sanlockd (8),
+.BR sanlock (8),
+.BR wdmd (8),
+.BR fence_node (8),
+.BR fenced (8)
+
diff --git a/fence_sanlock/fence_sanlockd.8 b/fence_sanlock/fence_sanlockd.8
new file mode 100644
index 0000000..33ce051
--- /dev/null
+++ b/fence_sanlock/fence_sanlockd.8
@@ -0,0 +1,39 @@
+.TH FENCE_SANLOCKD 8 2012-09-26
+
+.SH NAME
+fence_sanlockd \- daemon for fence_sanlock agent
+
+.SH SYNOPSIS
+.B fence_sanlockd
+[OPTIONS]
+
+.SH DESCRIPTION
+The fence_sanlockd daemon is used by the fence_sanlock agent.
+See
+.BR fence_sanlock (8),
+for full description.
+
+.SH OPTIONS
+
+.B \-D
+ Enable debugging to stderr and don't fork.
+
+.BI \-p " path"
+ Path to shared storage with sanlock leases.
+
+.BI \-i " host_id"
+ Local sanlock host_id (1-128).
+
+.B \-w
+ Wait for fence_sanlockd -s to send options (p,i).
+
+.B \-s
+ Send options (p,i) to waiting fence_sanlockd -w.
+
+.SH SEE ALSO
+.BR fence_sanlock (8),
+.BR sanlock (8),
+.BR wdmd (8),
+.BR fence_node (8),
+.BR fenced (8)
+
11 years
Changes to 'work2'
by David Teigland
New branch 'work2' available with the following commits:
commit 3cc457d215713035cf9aa2024841184913c350d7
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Thu Sep 27 12:06:03 2012 -0400
sanlock: use signal handler for all terminating signals
Signed-off-by: Federico Simoncelli <fsimonce(a)redhat.com>
commit b08e8726c7e854921ec6d8d590c77d4e4d18c4b0
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Thu Sep 27 12:06:02 2012 -0400
python: fix the tuple parsing for py_add_lockspace
Signed-off-by: Federico Simoncelli <fsimonce(a)redhat.com>
commit abaf80bbf21b9c771684def60f73f3f1a287bc20
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Thu Sep 27 15:46:59 2012 -0400
sanlock: configuring the core dump output
All the setrlimit calls have been moved to a new setup_limits function
that is called regardless the presence of the -U and -G options.
The new RLIMIT_CORE limit is set to allow the core dump to be written
in case of a crash (for more details: man core).
Signed-off-by: Federico Simoncelli <fsimonce(a)redhat.com>
commit aff1f7713ca8f3836e1b76611b3222628f9c2b84
Author: David Teigland <teigland(a)redhat.com>
Date: Wed Sep 26 14:02:00 2012 -0500
sanlock: configurable mlockall level
-l 0 -> no mlockall
-l 1 -> mlockall(CURRENT)
-l 2 -> mlockall(CURRENT|FUTURE)
The default is now set to 1 (was 2).
CURRENT|FUTURE results in each pthread_create using
8MB of RSS for the thread stack. CURRENT alone does
not.
Signed-off-by: David Teigland <teigland(a)redhat.com>
11 years
[PATCH] sanlock: configuring the core dump output
by Federico Simoncelli
All the setrlimit calls have been moved to a new setup_limits function
that is called regardless the presence of the -U and -G options.
The new RLIMIT_CORE limit is set to allow the core dump to be written
in case of a crash (for more details: man core).
Signed-off-by: Federico Simoncelli <fsimonce(a)redhat.com>
---
src/main.c | 42 ++++++++++++++++++++++++++++++------------
1 files changed, 30 insertions(+), 12 deletions(-)
diff --git a/src/main.c b/src/main.c
index 552641e..a7922b5 100644
--- a/src/main.c
+++ b/src/main.c
@@ -24,6 +24,7 @@
#include <pwd.h>
#include <grp.h>
#include <sys/types.h>
+#include <sys/prctl.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <sys/socket.h>
@@ -1329,19 +1330,10 @@ static void setup_host_name(void)
uuid, name.nodename);
}
-static void setup_groups(void)
+static void setup_limits(void)
{
- int rv, i, j, h;
- int pngroups, sngroups, ngroups_max;
- gid_t *pgroup, *sgroup;
- struct rlimit rlim;
-
- if (!com.uname || !com.gname)
- return;
-
- /* before switching to a different user/group we must configure
- the limits for memlock and rtprio */
- rlim.rlim_cur = rlim.rlim_max= -1;
+ int rv;
+ struct rlimit rlim = { .rlim_cur = -1, .rlim_max= -1 };
rv = setrlimit(RLIMIT_MEMLOCK, &rlim);
if (rv < 0) {
@@ -1355,6 +1347,22 @@ static void setup_groups(void)
exit(EXIT_FAILURE);
}
+ rv = setrlimit(RLIMIT_CORE, &rlim);
+ if (rv < 0) {
+ log_error("cannot set the limits for core dumps %i", errno);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void setup_groups(void)
+{
+ int rv, i, j, h;
+ int pngroups, sngroups, ngroups_max;
+ gid_t *pgroup, *sgroup;
+
+ if (!com.uname || !com.gname)
+ return;
+
ngroups_max = sysconf(_SC_NGROUPS_MAX);
if (ngroups_max < 0) {
log_error("cannot get the max number of groups %i", errno);
@@ -1418,6 +1426,15 @@ static void setup_groups(void)
log_error("cannot set user id to %i errno %i", com.uid, errno);
}
+ /* When a program is owned by a user (group) other than the real user
+ * (group) ID of the process, the PR_SET_DUMPABLE option gets cleared.
+ * See RLIMIT_CORE in setup_limits and man 5 core.
+ */
+ rv = prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
+ if (rv < 0) {
+ log_error("cannot set dumpable process errno %i", com.uid, errno);
+ }
+
out:
free(pgroup);
}
@@ -1577,6 +1594,7 @@ static int do_daemon(void)
}
}
+ setup_limits();
setup_helper();
/* main task never does disk io, so we don't really need to set
--
1.7.1
11 years
[PATCH 1/2] python: fix the tuple parsing for py_add_lockspace
by Federico Simoncelli
Signed-off-by: Federico Simoncelli <fsimonce(a)redhat.com>
---
python/sanlock.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/python/sanlock.c b/python/sanlock.c
index cefe3d4..e32fab3 100644
--- a/python/sanlock.c
+++ b/python/sanlock.c
@@ -288,7 +288,7 @@ py_add_lockspace(PyObject *self __unused, PyObject *args, PyObject *keywds)
memset(&ls, 0, sizeof(struct sanlk_lockspace));
/* parse python tuple */
- if (!PyArg_ParseTupleAndKeywords(args, keywds, "sks|ki", kwlist,
+ if (!PyArg_ParseTupleAndKeywords(args, keywds, "sks|kIi", kwlist,
&lockspace, &ls.host_id, &path, &ls.host_id_disk.offset, &iotimeout,
&async)) {
return NULL;
--
1.7.1
11 years
[PATCH 1/2] sanlock: configuring the core dump output
by Federico Simoncelli
All the setrlimit calls have been moved to a new setup_limits function
that is called regardless the presence of the -U and -G options.
The new RLIMIT_CORE limit is set to allow the core dump to be written
in case of a crash (for more details: man core).
Signed-off-by: Federico Simoncelli <fsimonce(a)redhat.com>
---
src/main.c | 32 ++++++++++++++++++++------------
1 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/src/main.c b/src/main.c
index dea78f7..67b3d3c 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1323,19 +1323,10 @@ static void setup_host_name(void)
uuid, name.nodename);
}
-static void setup_groups(void)
+static void setup_limits(void)
{
- int rv, i, j, h;
- int pngroups, sngroups, ngroups_max;
- gid_t *pgroup, *sgroup;
- struct rlimit rlim;
-
- if (!com.uname || !com.gname)
- return;
-
- /* before switching to a different user/group we must configure
- the limits for memlock and rtprio */
- rlim.rlim_cur = rlim.rlim_max= -1;
+ int rv;
+ struct rlimit rlim = { .rlim_cur = -1, .rlim_max= -1 };
rv = setrlimit(RLIMIT_MEMLOCK, &rlim);
if (rv < 0) {
@@ -1349,6 +1340,22 @@ static void setup_groups(void)
exit(EXIT_FAILURE);
}
+ rv = setrlimit(RLIMIT_CORE, &rlim);
+ if (rv < 0) {
+ log_error("cannot set the limits for core dumps %i", errno);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void setup_groups(void)
+{
+ int rv, i, j, h;
+ int pngroups, sngroups, ngroups_max;
+ gid_t *pgroup, *sgroup;
+
+ if (!com.uname || !com.gname)
+ return;
+
ngroups_max = sysconf(_SC_NGROUPS_MAX);
if (ngroups_max < 0) {
log_error("cannot get the max number of groups %i", errno);
@@ -1553,6 +1560,7 @@ static int do_daemon(void)
}
}
+ setup_limits();
setup_helper();
/* main task never does disk io, so we don't really need to set
--
1.7.1
11 years
Changes to 'refs/tags/sanlock-2.5'
by David Teigland
Changes since the dawn of time:
Daniel P. Berrange (15):
Fix const-ness of many APIs.
Fix warnings in watchdog module
Fix function prototypes for no-arg methods
Remove use of 'index' as a variable name
Make many functions static
Fix missing include in logging file
Annotate many unused parameters to avoid warnings
Remove redundant redeclaration of 'to' variable
Fix args to execv()
Remove redundant arg to 'run_command'
Rename optarg to optionarg to avoid clashing with getopt.h
Disable the read_request method since it is unused
Add many more compiler warning flags & safety checks
Hard code a sector size of 512 if the lease volume is a regular file
Ensure libsanlock.so is built with debug/warning flags
David Teigland (376):
sync_manager: initial commit
sync_manager: misc updates
sync_manager: misc updates
sync_manager: misc updates
sync_manager: misc updates
sync_manager: add more logging
sync_manager: misc updates
sync_manager: misc updates
sync_manager: num_hosts/MAX_HOSTS stuff
daemon: reworking notions of resource_id/token_name
sync_manager: resource lockfiles
sync_manager: lease arg processing
sync_manager: Began multiple lease support
sync_manager: use first command line arg as action
sync_manager: leader record changes and verify
sync_manager: clean up released leases
sync_manager: move functions around
sync_manager: add more tool actions
sync_manager: naming changes
sync_manager: separate token index and id
sync_manager: fix index usage and other misc
sync_manager: use pthread cond in acquire
sync_manager: write all log entries
sync_manager: simplify polling
sync_manager: fix waitpid use
sync_manager: acquire can fail early
sync_manager: write log entries at exit
sync_manager: add test program
sync_manager: move secondary pid check
sync_manager: fix disk paxos contention
devcount: fix verify checks
sync_manager: add GPL license file
sync_manager: fix leader block offsets
sync_manager: increase COMMAND_MAX
sync_manager: renewal should verify
sync_manager: use sector size from libblkid
sync_manager: use a real checksum function
sync_manager: add libblkid to spec file
sync_manager: print status info
sync_manager: one watchdog file per lease
sync_manager: lease_threads handle watchdog files
sync_manager: fix/add some text/comments
sync_manager: refactor read/write
sync_manager: move disk io functions
sync_manager: remove token arg
sync_manager: rename paxos_disk sync_disk
sync_manager: add aio read/write
sync_manager: make io_timeout_seconds a diskio arg
sync_manager: forgot to add new files
sync_manager: use log thread
sync_manager: client connections
sync_manager: connection processing
sync_manager: send/recv pid
sync_manager: add write_sectors
sync_manager: restructuring
sync_manager: write_sectors code factoring
sync_manager: daemonize
sync_manager: releasing leases
sync_manager: async releasing
sync_manager: release fixes
sync_manager: add direct and indirect acquire/release
sync_manager: reacquire resources
sync_manager: move code
sync_manager: same pid may reacquire resource
sync_manager: lease migration
sync_manager: handle client errors
sync_manager: improve error handling
sync_manager: host_id leases
sync_manager: remove empty files
sync_manager: print initialization info
sync_manager: rename files
sync_manager: clean up header org
sync_manager: delta_lease implementation
sync_manager: accept offset units
sync_manager: fix up init output
sync_manager: put back watchdog calls
sync_manager: fix start_host_id error paths
sync_manager: add log_error's for watchdog file errors
sync_manager: actual timeouts
sync_manager: change timeouts on cmd line
sanlock: create new external api
sanlock: build libsanlock
sanlock: use MAX_LEASES everywhere
sanlock: add libvirt plugin
sanlock plugin: couple minor fixes
sanlock: clean up /var file names
sanlock plugin: fix symbol needed by libvirt
sanlock: add some debug output
sanlock plugin: fix uuid copy
sanlock plugin: fix names
sanlock: add "owner_name"
sanlock: fix renewal checks
sanlock: clean up host_id types
sanlock: set_host_id command
sanlock: fix killing pids
sanlock: add status command
sanlock: set version to 1.0
sanlock: delta_lease cleanup
sanlock: changing num_hosts
sanlock: add dump command
sanlock: renewal timings
sanlock: add direct option
sanlock: check for watchdog file
sanlock: recovery fixes
lock_driver_sanlock: fix compile problems
sanlock: improve command options
sanlock: tidying help text
sanlock: move binary to /usr/sbin
sanlock: add init script
sanlock: fix sigterm shutdown
sanlock: init stop
sanlock: add wdtest command
sanlock.spec: new url
lock_driver_sanlock: remove close
sanlock: introduce lockspaces
lock_driver_sanlock: remove files
sanlock: better logging functions
sanlock: misc log message
sanlock.spec: sbin not libexec
sanlock init: remove watchdog reference
wdmd: watchdog multiplexing daemon
sanlock: add code to use wdmd
sanlock/wdmd: use wdmd in sanlock
sanlock/wdmd: add copyright header to source files
sanlock: rename sanlock source dir
sanlock: move tests dir
move COPYING file
wdmd: use signalfd for signal handling
Fix Makefile comments
wdmd: fix daemon debug option
wdmd: add init script
sanlock.spec: updates
sanlock.spec: src dir
sanlock: build with uninstalled libwdmd
sanlock: version 1.1
sanlock: high priority options
wdmd: high priority options
sanlock: return migration state
sanlock: migration.txt describes libvirt/sanlock steps
libsanlock: include admin functions
sanlock: fix host_id expiration check
sanlock: migration working
devcount: migrate test
sanlock: setowner improvements
sanlock: migrate to target fix
sanlock: fix wdmd stop order
sanlock: various fixes
sanlock: remove wdtest
sanlock: remove migration
sanlock: clean up command return data
sanlock: add resource string conversion functions
sanlock: rework internal structs
devcount: add relock test
sanlock: fix release and inquire
sanlock: add_lockspace EEXIST
sanlock: rework client handling
sanlock: clean up warnings
sanlock: debug message changes
sanlock: add lockspace checks
wdmd: enable test scripts
sanlock: add str_to_lockspace to lib
WIP devcount migrate
devcount: new migrate test
sanlock: read_id and live_id commands
sanlock: check lockspace name and host_id
sanlock: remove remaining cluster_mode
sanlock: add libsanlock_direct
devcountn: start multiple devcount tests
devcount: small changes
sanlock: new return values
sanlock: misc changes and fix
sanlock: log error of full bad block
sanlock: interval between renewal checks
sanlock: renewal changes
sanlock: fix log_dump
sanlock: fix find_client_pid
sanlock: fix host_id reads from paxos_acquire
sanlock: init with one write
devcount: improve output
devcount: new pause/resume
devcount: add expire test
sanlock: correct paxos usage
sanlock: direct read_leader
sanlock: paxos delays
sanlock: use thread pool
sanlock: client status output format changes
sanlock: fix inquire of dead pid
sanlock: use native linux aio
sanlock: i/o changes
sanlock: aio changes
sanlock: reduce paxos acquire read ops
sanlock: quiet error case
sanlock: don't free aio buf until event completes
sanlock: io timeout related changes
sanlock: read dblocks in single aligned io
sanlock: add sanlock_restrict api
sanlock: add sanlock_direct_sector_size api
sanlock: add checksum to dblocks
sanlock: fix init restart
sanlock: don't release tokens in dead lockspace
sanlock: fix adding lockspace
sanlock: official 1MB/8MB alignment
devcount: use aio in init
libsanlock: link with LDFLAGS
sanlock: increase version to 1.3
sanlock/wdmd: shut up warnings
sanlock: fix libwdmd linking
remove spec file
sanlock: use a completed read after renewal timeout
sanlock: use unique host name in delta leases
sanlock: remove sector_size api
sanlock: abort delta wait on shutdown
sanlock: fix add_lockspace failure
sanlk_load: add new test
sanlock: fix recv and inquire
sanlock: initial pid_dead check in acquire
sanlock: release 1.4
sanlock: generate a uuid for host id
sanlock: return -EINPROGRESS from add_lockspace
sanlk_load: periodically kill and replace a pid
sanlock: zero num_hosts uses DEFAULT_MAX_HOSTS
tests: misc changes
sanlock: break paxos_acquire wait loop
sanlock: increase log line to 512 bytes
sanlock: change a log_error to log_debug
sanlock: fail host_id when corrupted
sanlock: release 1.5
sanlock: release 1.6
sanlock: handle colon escaping in path strings
wdmd: add option for high priority
wdmd: use accept4 with SOCK_NONBLOCK
wdmd: tidy sun_addr snprintf
wdmd: pid and sock file changes
wdmd: add man page
wdmd: disable test scripts
sanlock: use accept4 with SOCK_NONBLOCK
sanlock: tidy sun_addr snprintf
sanlock: add explicit -luuid
sanlock: pid and sock file changes
sanlock: add man page
sanlock/wdmd: improve mkdir of run dir
wdmd: new build flags
sanlock: new build flags
sanlock/wdmd: use monotonic time
sanlock: build with pie
sanlock/wdmd: nonblocking listening/accept
sanlock: add missing monotime files
sanlock: update man page
sanlock: man page update
sanlock: update man page and help text
sanlock: print connections limit
release: sanlock 1.7
makefile: install mode for man pages
sanlock: read align_size in renewal
sanlock: check other host_id leases
sanlock: minor fixes and cleanups
sanlock: add request api/cmd
sanlock: crc code tidying
sanlock/wdmd: add license header to files
sanlock: create libsanlock_client
sanlock: move client code
remove COPYING file
sanlock: remove internal header from sanlock_sock
libsanlock_client: use LGPLv2+
libwdmd: use LGPLv2+
sanlock_rv.h: switch to LGPLv2+
README.license: document licenses
sanlock: client align and init
sanlock: write request record
sanlock: request struct magic and version numbers
sanlock: set bitmap for requests
sanlock: examine resource requests
sanlock: update man page
sanlock: remove old comment
sanlock: renaming functions
sanlock: optimize paxos wait
sanlock: use flags in struct resource
sanlock: restructure lockspace checks
sanlock: remove BLOCK_WD force mode
libsanlock: fix function stubs
sanlock: new status and host_status
sanlock: improve status output
sanlock: status output sorting
sanlock: SIGTERM and SIGKILL for REQ_KILL_PID
sanlock: add flag RESTRICT_SIGKILL
sanlock: setup fewer aio events
sanlock: move cmd processing
sanlock: suppress log messages
sanlock: rename source files
sanlock: improve killing pids
sanlock: fix log_dump
version 1.8
sanlock: quick host_id reacquire
sanlock: add force option to shutdown command
simpler copyright line
default to GPLv2+ for our original files
sanlock: improve daemon permission errors
add systemd files
fixing up init stuff
release 1.9
sanlock: setmode
Revert "sanlock: setmode"
sanlock: fix error exit
sanlock: fix debug line
init scripts: fix path to restorecon
sanlock: shared mode for leases
sanlock: fix missing close_disks
sanlock: fix problem in paxos
sanlock: add paxos sanity check
sanlock: fix leaking fd
sanlock: change to paxos algorithm
sanlock: change ondisk version
sanlock: remove log noise
sanlock: ignore SIGTERM if lockspaces exist
sanlock: quiet more error messages
sanlk_load: munging variable names
sanlk_load: add shared locks
sanlock: kill client connection on recv error
sanlk_load: ignore release errors
sanlock: set sector_size in shared tokens
sanlock man page: mention SH
release 2.0
sanlock: clean up warnings
sanlock: fix inquire state string
sanlock: retry transient sh failures
sanlock: status for all shared tokens
sanlock: add a logrotate file
release 2.1
python: fix path strncpy
sanlock: remove physical sector size requirement
sanlock: add sample sysconfig file
sanlock: ASYNC flag for lockspace add and rem
sanlock: UNUSED flag for lockspace rem
release 2.2
sanlock.log: empty file to install from rpm
Revert "sanlock.log: empty file to install from rpm"
sanlock: remove limits.conf
sysconfig: remove user option
init: root user
release 2.3
latest spec file
sanlock.spec: fix exclusive arch
sanlock/wdmd: use /var/log/subsys/file
daemon: skip setup_groups when no -U or -G
sanlock.spec: require useradd and groupadd
sanlock.spec updated
daemon: don't put struct space on stack
daemon: fix add_lockspace that has been removed
daemon: fix inquire lver
daemon: graceful pid handling
Revert "init: make explicit to run sanlock as root"
remove spec file
release 2.4
daemon: include resource.h
daemon: fix lockfile ownership
wdmd: use lockfile mode 644
sanlock: use lockfile mode 644
wdmd: use shm_open to prevent restart
sanlock: remove umask 0
sanlock/wdmd: remove global connection
daemon: extend grace time
wdmd: close device when test fails
wdmd: preemptive close before test fails
wdmd: pet after reopen and use 1 sec interval after failure
sanlock: base kill sig on last renewal
clientn: add tests
tests: add test-recovery script
sanlock: fix paxos acquire host_id check
daemon: use helper for examine request kill
sanlock: show host_status for all lockspaces
sanlock: fix status of lockspaces in add and rem
sanlock: adjustable io timeouts
sanlock: add direct next_free command
sanlock: request force_mode 2 is SIGUSR1
sanlock: print escape chars in path from status
release 2.5
David Weber (2):
Fix order of linking
Install another symlink to shared library
Fabio M. Di Nitto (5):
build: sanlock should link with libsanlock
build: install shared lib header files and fix DESTDIR usage
build: drop rpm target
spec file: do first cut for total spec file
build: fix linking with libsanlock and install target
Federico Simoncelli (55):
rpm: sync specfile with fedora
rpm: add sanlock_admin.h header
rpm: add the lib package, install the initscripts
python: remove unused python code
python: add python binding
python: release the gil during sanlock operations
python: wrap sanlock extension with a module
rpm: add python binding package
python: pass a lockspace copy to str_to_lockspace
makefile: fix install typo
rpm: add sanlock_direct header
python: add sanlock init functions and exception
direct: close disks after initialization
python: register process only once
daemon: configurable socket permissions
rpm: add sanlock user and group
python: exceptions must contain the errno
rpm: add missing libaio-devel dependency
rpm: add daemon options in the init file
python: add missing aio library
python: add get_alignment function
libs: include libsanlock_direct into libsanlock
python: align num_hosts and max_hosts defaults
python: expose sanlock file descriptor
python: improve error reporting
python: parse lockspaces and resources natively
python: add usage example
python: initial support for sanlock errors
python: document the sanlock module
python: module cleanup
build: fix documentation install path
client: return appropriate errno on failure
python: expose errno in the exception
python: acquire leases for other processes
sanlock: implement the inq_lockspace command
python: add the inq_lockspace command binding
misc: run sanlock daemon as sanlock user
wdmd: use getopt to parse the command line
wdmd: make socket group ownership configurable
python: add shared resource parameter to acquire
python: release leases for other processes
python: ASYNC flag for lockspace add and rem
python: UNUSED flag for lockspace rem
log: add full time and date to the log messages
sanlock: set the supplementary groups at startup
log: add the thread id in the log messages
sanlock: fix an rv check typo in setup_groups
init: add a stop timeout for the sanlock service
build: add a generic rpm spec file
init: make explicit to run sanlock as root
wdmd: load the softdog module when needed
sanlock: WAIT flag for sanlock_inq_lockspace
daemon: drop root privileges and use the helper
init: use checkpid when stopping the services
sanlock: expose sanlock path import and export utils
Saggi Mizrahi (21):
Added the begining of the testing and debugging tools
Better handling of max hosts
sync_manager: Updated tests to work with new lease struct
sync_manager: fixed skipping first arg in command
sync_manager: acquire and release actions
sync_manager: minor fixes
sync_manager: renamed stuff
sync_manager: made acquire synchronous again
sync_manager: added set_host_id action
sync_manager: use kill(0) for secondary pid check
sync_manager: make rpm and install
sync_manager: spec file update
sync_manager: Allow longer resource names
sync_manager: allow repeated set_host_id
sync_manager: Added escaping for the leases arg
sync_manager: Created the python bindings for sync_manager
sync_manager: listener socket permissions
sync_manager: Updated python binding and tests
sync_manager: Made 'token' a const in log_level
sync_manager: refactor messaging system
sync_manager: use getsockopt PEERCRED
11 years
src/main.c wdmd/main.c
by David Teigland
src/main.c | 2 +-
wdmd/main.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
New commits:
commit 9bd053ea5bf54a896f1b5bce1e3d58abfbd007a9
Author: David Teigland <teigland(a)redhat.com>
Date: Mon Sep 24 10:35:09 2012 -0500
release 2.5
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/src/main.c b/src/main.c
index f9d6546..dea78f7 100644
--- a/src/main.c
+++ b/src/main.c
@@ -53,7 +53,7 @@
#include "helper.h"
#include "timeouts.h"
-#define RELEASE_VERSION "2.4"
+#define RELEASE_VERSION "2.5"
#define SIGRUNPATH 100 /* anything that's not SIGTERM/SIGKILL */
diff --git a/wdmd/main.c b/wdmd/main.c
index 2e41e91..ce6579c 100644
--- a/wdmd/main.c
+++ b/wdmd/main.c
@@ -41,7 +41,7 @@
#define GNUC_UNUSED __attribute__((__unused__))
#endif
-#define RELEASE_VERSION "2.4"
+#define RELEASE_VERSION "2.5"
#define DEFAULT_TEST_INTERVAL 10
#define RECOVER_TEST_INTERVAL 1
11 years
Changes to 'fence_sanlock'
by David Teigland
New branch 'fence_sanlock' available with the following commits:
commit 59a67bbeb30a75e874c3e9a2a6f83c6c5d997c90
Author: David Teigland <teigland(a)redhat.com>
Date: Thu Sep 20 17:16:06 2012 -0500
fence_sanlockd: use init script
Start/stop fence_sanlockd with init script.
It is started with -w, which means wait for options.
Later (from cman start), fence_node -U -> fence_sanlock -o on
calls fence_sanlockd -s to send the running daemon the
p,i options.
Signed-off-by: David Teigland <teigland(a)redhat.com>
commit f8de4dc7bb3fcfe7873dfa08428ef894aa175de8
Author: David Teigland <teigland(a)redhat.com>
Date: Thu Sep 20 14:06:00 2012 -0500
fence_sanlock: variable alignment
- make lease size depend on disk sector size
- clarify fence_sanlock read_leader behavior
Signed-off-by: David Teigland <teigland(a)redhat.com>
commit b8c524881b9e753a8b81892c844fb9e3a4e3f9b7
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Date: Wed Sep 19 09:12:31 2012 +0200
fence_sanlock: unify code and silence some operations
make a common function to get leader info
redirect some errors to /dev/null
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
commit 583a6f865488034cec105ab113777f6b80067485
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Date: Wed Sep 19 08:28:22 2012 +0200
fence_sanlock: improve action_on error checking and report
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
commit 7fae84b38cc2791b64e83605786166572d70cbe6
Author: David Teigland <teigland(a)redhat.com>
Date: Tue Sep 18 13:47:00 2012 -0500
fence_sanlock: various fixes and changes
- change device arg name to path
- add some metadata descriptions
- in off, make request after first acquire fails
- in off, always success after acquire works
- in off, check if victim re-acquired cleanly
- in off, check if already off at start
- in status, use ^timestamp
Signed-off-by: David Teigland <teigland(a)redhat.com>
commit b9e28873fbcd11ef6418758c6bf38e32524b0a76
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Date: Tue Sep 18 10:54:40 2012 +0200
fence_sanlock: add basic metadata to integrate with ccs_config_validate
drop node="" option (unused)
TODO:
- add proper short/long description
- add call in spec file:
%post -n fence_sanlock
ccs_update_schema > /dev/null 2>&1 ||:
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
commit 97a190b2ffb9a70ebd1c732db46199a2c0f6ef72
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Date: Tue Sep 18 10:37:49 2012 +0200
fence_sanlock: don't restart fence_sanlockd if it's already running
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
commit 154eb0d259887605c03e295b1e187629bace58a1
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Date: Mon Sep 17 15:10:16 2012 +0200
fence_sanlock: cleanup shell usage
add common code/sections
improve error handling
add a few more FIXME points as reminders
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
commit 62604e2920b6c22303aff33ce0d5e21570a4e720
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Date: Mon Sep 17 10:16:29 2012 +0200
fence_sanlockd: add missing \n
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
commit d32fccf6a29acc2a0b759eb8a9736a963a9beb05
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
Date: Mon Sep 17 10:15:08 2012 +0200
fence_sanlock: add install bits
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
commit 3c561fb027e9c9773e9671b755f3fa5c5e33f94c
Author: David Teigland <teigland(a)redhat.com>
Date: Fri Sep 14 10:31:37 2012 -0500
fence_sanlock: new code
A fence agent that uses /dev/watchdog to reset hosts.
Per-host sanlock leases on shared storage are used:
- for hosts to detect that they are being fenced by
someone if they haven't actually failed
- for hosts to verify that fenced hosts have been
reset by seeing their host_id leases expire (not
be renewed on disk)
- for hosts to know how long to wait after fencing
before they can proceed, based on acquiring the
expired lease of the fenced host
Signed-off-by: David Teigland <teigland(a)redhat.com>
11 years