Changes to 'helper'
by David Teigland
New branch 'helper' available with the following commits:
commit 5e2b5cc8cde79b233af9768c0ece8083c47a1f84
Author: David Teigland <teigland(a)redhat.com>
Date: Thu May 24 11:07:17 2012 -0500
daemon: graceful pid handling
Add the "killpath" api to set a program/script for
"killing" a pid/connection, where killing could
be anything that either causes the pid to exit,
or causes the pid to be put into a safe state and
releases its leases, e.g. pausing a vm.
Add a helper process to fork/exec the killpaths.
If pids need to be killed, a configured killpath will
first be run instead of kill(SIGTERM). If the pid
has not exited or had its leases released before
the existing SIGTERM time limit, then sanlock will
escalate to SIGKILL.
This allows for the following:
- pids running with leases in lockspace
- lockspace storage fails (i/o errors, host_id cannot be renewed)
- sanlock runs killpath for each pid
- killpath does inquire, saves state, releases leases
- sanlock finds no more pids with leases in lockspace
- sanlock releases lockspace
- pids remain, but without leases
later:
- lockspace storage is restored
- add_lockspace reacquires host_id
- leases are reacquired for each pid using saved state
- pids running with leases again
This sequence is used by the following test:
cd tests
cp killpath /root/killpath_pause
On host_id 1
./clientn 2 init /dev/bull/leases
./clientn 2 start /dev/bull/leases 1 /root/killpath_pause
./clientn 2 error /dev/bull/leases
./clientn 2 resume /dev/bull/leases 1
If between error and resume, host_id 2 completes:
./clientn 2 start /dev/bull/leases 2 /root/killpath_pause
Then, the resume on host_id 1 will fail and the sanlk_client
pids will be killed.
Signed-off-by: David Teigland <teigland(a)redhat.com>
11 years, 5 months
src/resource.c
by David Teigland
src/resource.c | 4 ++++
1 file changed, 4 insertions(+)
New commits:
commit 8910c06e7cc1afa676b311a0208468ddd98a0450
Author: David Teigland <teigland(a)redhat.com>
Date: Fri Jun 29 10:23:31 2012 -0500
daemon: fix inquire lver
The lver returned by inquire was always 0 because it
was not being copied back to the token after acquire.
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/src/resource.c b/src/resource.c
index cef0765..1ef03b2 100644
--- a/src/resource.c
+++ b/src/resource.c
@@ -654,6 +654,10 @@ int acquire_token(struct task *task, struct token *token)
memcpy(&r->leader, &leader, sizeof(struct leader_record));
+ /* copy lver into token because inquire looks there for it */
+ if (!(token->acquire_flags & SANLK_RES_SHARED))
+ token->r.lver = leader.lver;
+
if (token->acquire_flags & SANLK_RES_SHARED) {
rv = set_mode_block(task, token, token->host_id,
token->host_generation, MBLOCK_SHARED);
11 years, 5 months
4 commits - python/sanlock.c sanlock.spec src/cmd.c src/delta_lease.c src/lockspace.c src/lockspace.h src/sanlock_admin.h src/sanlock_internal.h
by David Teigland
python/sanlock.c | 25 ++++++++++-----
sanlock.spec | 9 ++++-
src/cmd.c | 35 +++++++++++++--------
src/delta_lease.c | 80 ++++++++++++++++++++++++++++++++++---------------
src/lockspace.c | 69 +++++++++++++++++++++++++++++++-----------
src/lockspace.h | 4 +-
src/sanlock_admin.h | 3 +
src/sanlock_internal.h | 10 ++++++
8 files changed, 171 insertions(+), 64 deletions(-)
New commits:
commit fee5d9c932d549acb8ba119704b3eb3eecdb60ef
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Thu Jun 21 18:44:35 2012 +0000
sanlock: WAIT flag for sanlock_inq_lockspace
New SANLK_INQ_WAIT for sanlock_inq_lockspace().
When the SANLK_INQ_WAIT flag is used the command will block and wait for
the ongoing procedure (add_lockspace/rem_lockspace) to complete before
returning the status.
sanlock_inq_lockspace() still returns:
0 lockspace exists and is not being added or removed
-EINPROGRESS add or rem is in progress, when the flag SANLK_INQ_WAIT
is not used
-ENOENT no lockspace with matching name and disk location is
found
Signed-off-by: Federico Simoncelli <fsimonce(a)redhat.com>
diff --git a/python/sanlock.c b/python/sanlock.c
index bfce46c..d953c90 100644
--- a/python/sanlock.c
+++ b/python/sanlock.c
@@ -314,36 +314,45 @@ py_add_lockspace(PyObject *self __unused, PyObject *args, PyObject *keywds)
/* inq_lockspace */
PyDoc_STRVAR(pydoc_inq_lockspace, "\
-inq_lockspace(lockspace, host_id, path, offset=0)\n\
+inq_lockspace(lockspace, host_id, path, offset=0, wait=False)\n\
Return True if the sanlock daemon currently owns the host_id in lockspace,\n\
False otherwise. The special value None is returned when the daemon is\n\
-still in the process of acquiring or releasing the host_id.");
+still in the process of acquiring or releasing the host_id. If the wait\n\
+flag is set to True the function will block until the host_id is either\n\
+acquired or released.");
static PyObject *
py_inq_lockspace(PyObject *self __unused, PyObject *args, PyObject *keywds)
{
- int rv;
+ int rv, waitrs = 0, flags = 0;
const char *lockspace, *path;
struct sanlk_lockspace ls;
- static char *kwlist[] = {"lockspace", "host_id", "path", "offset", NULL};
+ static char *kwlist[] = {"lockspace", "host_id", "path", "offset",
+ "wait", NULL};
/* initialize lockspace structure */
memset(&ls, 0, sizeof(struct sanlk_lockspace));
/* parse python tuple */
- if (!PyArg_ParseTupleAndKeywords(args, keywds, "sks|k", kwlist,
- &lockspace, &ls.host_id, &path, &ls.host_id_disk.offset)) {
+ if (!PyArg_ParseTupleAndKeywords(args, keywds, "sks|ki", kwlist,
+ &lockspace, &ls.host_id, &path, &ls.host_id_disk.offset,
+ &waitrs)) {
return NULL;
}
+ /* prepare sanlock_inq_lockspace flags */
+ if (waitrs) {
+ flags |= SANLK_INQ_WAIT;
+ }
+
/* prepare sanlock names */
strncpy(ls.name, lockspace, SANLK_NAME_LEN);
strncpy(ls.host_id_disk.path, path, SANLK_PATH_LEN - 1);
/* add sanlock lockspace (gil disabled) */
Py_BEGIN_ALLOW_THREADS
- rv = sanlock_inq_lockspace(&ls, 0);
+ rv = sanlock_inq_lockspace(&ls, flags);
Py_END_ALLOW_THREADS
if (rv == 0) {
@@ -362,7 +371,7 @@ py_inq_lockspace(PyObject *self __unused, PyObject *args, PyObject *keywds)
PyDoc_STRVAR(pydoc_rem_lockspace, "\
rem_lockspace(lockspace, host_id, path, offset=0, async=False, unused=False)\n\
Remove a lockspace, releasing the acquired host_id. If async is True the\n\
-function will return immediatly and the status can be checked using\n\
+function will return immediately and the status can be checked using\n\
inq_lockspace. If unused is True the command will fail (EBUSY) if there is\n\
at least one acquired resource in the lockspace (instead of automatically\n\
release it).");
diff --git a/src/cmd.c b/src/cmd.c
index 514a13d..d03dec5 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -891,6 +891,7 @@ static void cmd_add_lockspace(struct cmd_args *ca)
static void cmd_inq_lockspace(struct cmd_args *ca)
{
struct sanlk_lockspace lockspace;
+ int waitrs = ca->header.cmd_flags & SANLK_INQ_WAIT;
int fd, rv, result;
fd = client[ca->ci_in].fd;
@@ -903,13 +904,21 @@ static void cmd_inq_lockspace(struct cmd_args *ca)
goto reply;
}
- log_debug("cmd_inq_lockspace %d,%d %.48s:%llu:%s:%llu",
+ log_debug("cmd_inq_lockspace %d,%d %.48s:%llu:%s:%llu flags %x",
ca->ci_in, fd, lockspace.name,
(unsigned long long)lockspace.host_id,
lockspace.host_id_disk.path,
- (unsigned long long)lockspace.host_id_disk.offset);
+ (unsigned long long)lockspace.host_id_disk.offset,
+ ca->header.cmd_flags);
+
+ while (1) {
+ result = inq_lockspace(&lockspace);
+ if ((result != -EINPROGRESS) || !(waitrs)) {
+ break;
+ }
+ sleep(1);
+ }
- result = inq_lockspace(&lockspace);
reply:
log_debug("cmd_inq_lockspace %d,%d done %d", ca->ci_in, fd, result);
diff --git a/src/sanlock_admin.h b/src/sanlock_admin.h
index c5a3a0f..5134faa 100644
--- a/src/sanlock_admin.h
+++ b/src/sanlock_admin.h
@@ -17,6 +17,9 @@
#define SANLK_REM_ASYNC 0x00000001
#define SANLK_REM_UNUSED 0x00000002
+/* inq flags */
+#define SANLK_INQ_WAIT 0x00000001
+
/*
* add_lockspace returns:
* 0: the lockspace has been added successfully
commit 85de31c2efab2a37fb1837a97af7df5694f8cf1f
Author: David Teigland <teigland(a)redhat.com>
Date: Thu Jun 21 14:40:54 2012 -0500
daemon: fix add_lockspace that has been removed
If rem_lockspace happens
- after the lease_thread from add_lockspace has
acquired the lease, and
- before the space struct is moved to spaces list
then the struct space is freed, which leaves the
lease_thread using a freed struct space. The next
time the lease_thread tries to renew, it will
probably complain about bad info in struct space
and/or segfault.
rem_lockspace also needs to check for the lockspace
on the spaces_add list, otherwise it will return
immediately, before the remove, if the lockspace
is being added.
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/src/delta_lease.c b/src/delta_lease.c
index 9db9453..a7ca28c 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -201,19 +201,21 @@ int delta_lease_acquire(struct task *task,
uint64_t new_ts;
int i, error, rv, delay, delta_large_delay;
- log_space(sp, "delta_acquire %llu begin", (unsigned long long)host_id);
+ log_space(sp, "delta_acquire begin %.48s:%llu",
+ sp->space_name, (unsigned long long)host_id);
error = delta_lease_leader_read(task, disk, space_name, host_id, &leader,
"delta_acquire_begin");
- if (error < 0)
+ if (error < 0) {
+ log_space(sp, "delta_acquire leader_read1 error %d", error);
return error;
+ }
if (leader.timestamp == LEASE_FREE)
goto write_new;
if (!strncmp(leader.resource_name, our_host_name, NAME_ID_SIZE)) {
- log_space(sp, "delta_acquire %llu fast reacquire",
- (unsigned long long)host_id);
+ log_space(sp, "delta_acquire fast reacquire");
goto write_new;
}
@@ -243,22 +245,27 @@ int delta_lease_acquire(struct task *task,
while (1) {
memcpy(&leader1, &leader, sizeof(struct leader_record));
- log_space(sp, "delta_acquire %llu delta_large_delay %d delay %d",
- (unsigned long long)host_id, delta_large_delay, delay);
+ log_space(sp, "delta_acquire delta_large_delay %d delay %d",
+ delta_large_delay, delay);
/* TODO: we could reread every several seconds to see if
it has changed, so we can abort more quickly if so */
for (i = 0; i < delay; i++) {
- if (sp->external_remove || external_shutdown)
+ if (sp->external_remove || external_shutdown) {
+ log_space(sp, "delta_acquire abort1 remove %d shutdown %d",
+ sp->external_remove, external_shutdown);
return SANLK_ERROR;
+ }
sleep(1);
}
error = delta_lease_leader_read(task, disk, space_name, host_id,
&leader, "delta_acquire_wait");
- if (error < 0)
+ if (error < 0) {
+ log_space(sp, "delta_acquire leader_read2 error %d", error);
return error;
+ }
if (!memcmp(&leader1, &leader, sizeof(struct leader_record)))
break;
@@ -266,7 +273,7 @@ int delta_lease_acquire(struct task *task,
if (leader.timestamp == LEASE_FREE)
break;
- log_erros(sp, "delta_acquire %llu busy %llu %llu %llu %.48s",
+ log_erros(sp, "delta_acquire host_id %llu busy1 %llu %llu %llu %.48s",
(unsigned long long)host_id,
(unsigned long long)leader.owner_id,
(unsigned long long)leader.owner_generation,
@@ -283,8 +290,7 @@ int delta_lease_acquire(struct task *task,
snprintf(leader.resource_name, NAME_ID_SIZE, "%s", our_host_name);
leader.checksum = leader_checksum(&leader);
- log_space(sp, "delta_acquire %llu write %llu %llu %llu %.48s",
- (unsigned long long)host_id,
+ log_space(sp, "delta_acquire write %llu %llu %llu %.48s",
(unsigned long long)leader.owner_id,
(unsigned long long)leader.owner_generation,
(unsigned long long)leader.timestamp,
@@ -292,28 +298,34 @@ int delta_lease_acquire(struct task *task,
rv = write_sector(disk, host_id - 1, (char *)&leader, sizeof(struct leader_record),
task, "delta_leader");
- if (rv < 0)
+ if (rv < 0) {
+ log_space(sp, "delta_acquire write error %d", rv);
return rv;
+ }
memcpy(&leader1, &leader, sizeof(struct leader_record));
delay = 2 * task->io_timeout_seconds;
- log_space(sp, "delta_acquire %llu delta_short_delay %d",
- (unsigned long long)host_id, delay);
+ log_space(sp, "delta_acquire delta_short_delay %d", delay);
for (i = 0; i < delay; i++) {
- if (sp->external_remove || external_shutdown)
+ if (sp->external_remove || external_shutdown) {
+ log_space(sp, "delta_acquire abort2 remove %d shutdown %d",
+ sp->external_remove, external_shutdown);
return SANLK_ERROR;
+ }
sleep(1);
}
error = delta_lease_leader_read(task, disk, space_name, host_id, &leader,
"delta_acquire_check");
- if (error < 0)
+ if (error < 0) {
+ log_space(sp, "delta_acquire leader_read3 error %d", error);
return error;
+ }
if (memcmp(&leader1, &leader, sizeof(struct leader_record))) {
- log_erros(sp, "delta_acquire %llu busy %llu %llu %llu %.48s",
+ log_erros(sp, "delta_acquire host_id %llu busy2 %llu %llu %llu %.48s",
(unsigned long long)host_id,
(unsigned long long)leader.owner_id,
(unsigned long long)leader.owner_generation,
@@ -322,6 +334,11 @@ int delta_lease_acquire(struct task *task,
return SANLK_HOSTID_BUSY;
}
+ log_space(sp, "delta_acquire done %llu %llu %llu",
+ (unsigned long long)leader.owner_id,
+ (unsigned long long)leader.owner_generation,
+ (unsigned long long)leader.timestamp);
+
memcpy(leader_ret, &leader, sizeof(struct leader_record));
return SANLK_OK;
}
@@ -343,8 +360,10 @@ int delta_lease_renew(struct task *task,
uint64_t host_id, id_offset, new_ts;
int rv, iobuf_len, sector_size, io_timeout_save;
- if (!leader_last)
+ if (!leader_last) {
+ log_erros(sp, "delta_renew no leader_last");
return -EINVAL;
+ }
*read_result = SANLK_ERROR;
@@ -356,8 +375,11 @@ int delta_lease_renew(struct task *task,
/* offset of our leader_record */
id_offset = (host_id - 1) * sector_size;
- if (id_offset > iobuf_len)
+ if (id_offset > iobuf_len) {
+ log_erros(sp, "delta_renew bad offset %llu iobuf_len %d",
+ (unsigned long long)id_offset, iobuf_len);
return -EINVAL;
+ }
/* if the previous renew timed out in this initial read, and that read
@@ -441,8 +463,10 @@ int delta_lease_renew(struct task *task,
memcpy(&leader, task->iobuf+id_offset, sizeof(struct leader_record));
rv = verify_leader(disk, space_name, host_id, &leader, "delta_renew");
- if (rv < 0)
+ if (rv < 0) {
+ log_erros(sp, "delta_renew verify_leader error %d", rv);
return rv;
+ }
/* We can't always memcmp(&leader, leader_last) because previous writes
may have timed out and we don't know if they were actually written
@@ -500,8 +524,10 @@ int delta_lease_renew(struct task *task,
task->io_timeout_seconds = io_timeout_save;
- if (rv < 0)
+ if (rv < 0) {
+ log_erros(sp, "delta_renew write error %d", rv);
return rv;
+ }
/* the paper shows doing a delay and another read here, but it seems
unnecessary since we do the same at the beginning of the next renewal */
@@ -526,7 +552,8 @@ int delta_lease_release(struct task *task,
host_id = leader_last->owner_id;
- log_space(sp, "delta_release %llu begin", (unsigned long long)host_id);
+ log_space(sp, "delta_release begin %.48s:%llu",
+ sp->space_name, (unsigned long long)host_id);
memcpy(&leader, leader_last, sizeof(struct leader_record));
leader.timestamp = LEASE_FREE;
@@ -534,8 +561,15 @@ int delta_lease_release(struct task *task,
rv = write_sector(disk, host_id - 1, (char *)&leader, sizeof(struct leader_record),
task, "delta_leader");
- if (rv < 0)
+ if (rv < 0) {
+ log_space(sp, "delta_release write error %d", rv);
return rv;
+ }
+
+ log_space(sp, "delta_release done %llu %llu %llu",
+ (unsigned long long)leader.owner_id,
+ (unsigned long long)leader.owner_generation,
+ (unsigned long long)leader.timestamp);
memcpy(leader_ret, &leader, sizeof(struct leader_record));
return SANLK_OK;
diff --git a/src/lockspace.c b/src/lockspace.c
index c39a036..6618f16 100644
--- a/src/lockspace.c
+++ b/src/lockspace.c
@@ -704,21 +704,42 @@ int add_lockspace_wait(struct space *sp)
/* the thread exits right away if acquire fails */
pthread_join(sp->thread, NULL);
rv = result;
+ log_space(sp, "add_lockspace fail lease_status %d", result);
goto fail_del;
}
- /* once we move sp to spaces list, tokens can begin using it,
- and the main loop will begin monitoring its renewals */
+ /* Once we move sp to spaces list, tokens can begin using it,
+ the main loop will begin monitoring its renewals, and will
+ handle removing it. */
pthread_mutex_lock(&spaces_mutex);
if (sp->external_remove || external_shutdown) {
- rv = -1;
pthread_mutex_unlock(&spaces_mutex);
+ log_space(sp, "add_lockspace undo remove %d shutdown %d",
+ sp->external_remove, external_shutdown);
+
+ /* We've caught a remove/shutdown just before completing
+ the add process. Don't complete it, but reverse the
+ add, leaving the sp on spaces_add while reversing.
+ Do the same thing that main_loop would do, except we
+ don't have to go through killing_pids and checking for
+ all_pids_dead since this lockspace has never been on
+ the spaces list, so it could not have been used yet. */
+
+ pthread_mutex_lock(&sp->mutex);
+ sp->thread_stop = 1;
+ unlink_watchdog_file(sp);
+ pthread_mutex_unlock(&sp->mutex);
+ pthread_join(sp->thread, NULL);
+ rv = -1;
+ log_space(sp, "add_lockspace undo complete");
goto fail_del;
+ } else {
+ list_move(&sp->list, &spaces);
+ log_space(sp, "add_lockspace done");
+ pthread_mutex_unlock(&spaces_mutex);
+ return 0;
}
- list_move(&sp->list, &spaces);
- pthread_mutex_unlock(&spaces_mutex);
- return 0;
fail_del:
pthread_mutex_lock(&spaces_mutex);
@@ -775,8 +796,12 @@ int rem_lockspace_start(struct sanlk_lockspace *ls, unsigned int *space_id)
sp = _search_space(ls->name, (struct sync_disk *)&ls->host_id_disk, ls->host_id,
&spaces_add, NULL, NULL);
if (sp) {
+ /* add_lockspace will be aborted and undone and the sp will
+ not be moved to the spaces list */
sp->external_remove = 1;
+ id = sp->space_id;
pthread_mutex_unlock(&spaces_mutex);
+ *space_id = id;
rv = 0;
goto out;
}
@@ -805,7 +830,6 @@ int rem_lockspace_start(struct sanlk_lockspace *ls, unsigned int *space_id)
sp->external_remove = 1;
id = sp->space_id;
pthread_mutex_unlock(&spaces_mutex);
-
*space_id = id;
rv = 0;
out:
@@ -822,7 +846,7 @@ int rem_lockspace_wait(struct sanlk_lockspace *ls, unsigned int space_id)
while (1) {
pthread_mutex_lock(&spaces_mutex);
sp = _search_space(ls->name, (struct sync_disk *)&ls->host_id_disk, ls->host_id,
- &spaces, &spaces_rem, NULL);
+ &spaces, &spaces_rem, &spaces_add);
if (sp && (sp->space_id == space_id))
done = 0;
else
commit f179f842de746a221b16c4f7466468365847c94c
Author: David Teigland <teigland(a)redhat.com>
Date: Mon Jun 18 16:56:57 2012 -0500
daemon: don't put struct space on stack
struct space has become very large, and it was still
kept on the stack unnecessarily in a few places.
Use a new space_info struct and copy the necessary
fields.
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/src/cmd.c b/src/cmd.c
index 6417df0..514a13d 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -88,7 +88,7 @@ static int check_new_tokens_space(struct client *cl,
struct token *new_tokens[],
int new_tokens_count)
{
- struct space space;
+ struct space_info spi;
struct token *token;
int i, rv, empty_slots = 0;
@@ -107,9 +107,9 @@ static int check_new_tokens_space(struct client *cl,
for (i = 0; i < new_tokens_count; i++) {
token = new_tokens[i];
- rv = _lockspace_info(token->r.lockspace_name, &space);
+ rv = _lockspace_info(token->r.lockspace_name, &spi);
- if (!rv && !space.killing_pids && space.host_id == token->host_id)
+ if (!rv && !spi.killing_pids && spi.host_id == token->host_id)
continue;
return -ENOSPC;
@@ -125,7 +125,7 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca)
struct token *new_tokens[SANLK_MAX_RESOURCES];
struct sanlk_resource res;
struct sanlk_options opt;
- struct space space;
+ struct space_info spi;
char *opt_str;
int token_len, disks_len;
int fd, rv, i, j, empty_slots, lvl;
@@ -290,23 +290,23 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca)
for (i = 0; i < new_tokens_count; i++) {
token = new_tokens[i];
- rv = lockspace_info(token->r.lockspace_name, &space);
- if (rv < 0 || space.killing_pids) {
+ rv = lockspace_info(token->r.lockspace_name, &spi);
+ if (rv < 0 || spi.killing_pids) {
log_errot(token, "cmd_acquire %d,%d,%d invalid lockspace "
"found %d failed %d name %.48s",
- cl_ci, cl_fd, cl_pid, rv, space.killing_pids,
+ cl_ci, cl_fd, cl_pid, rv, spi.killing_pids,
token->r.lockspace_name);
result = -ENOSPC;
goto done;
}
- token->host_id = space.host_id;
- token->host_generation = space.host_generation;
+ token->host_id = spi.host_id;
+ token->host_generation = spi.host_generation;
token->pid = cl_pid;
if (cl->restrict & SANLK_RESTRICT_SIGKILL)
token->flags |= T_RESTRICT_SIGKILL;
/* save a record of what this token_id is for later debugging */
- log_level(space.space_id, token->token_id, NULL, LOG_WARNING,
+ log_level(spi.space_id, token->token_id, NULL, LOG_WARNING,
"resource %.48s:%.48s:%.256s:%llu%s for %d,%d,%d",
token->r.lockspace_name,
token->r.name,
diff --git a/src/lockspace.c b/src/lockspace.c
index 0e9ad87..c39a036 100644
--- a/src/lockspace.c
+++ b/src/lockspace.c
@@ -92,25 +92,33 @@ struct space *find_lockspace(char *name)
return _search_space(name, NULL, 0, &spaces, &spaces_rem, &spaces_add);
}
-int _lockspace_info(char *space_name, struct space *sp_out)
+int _lockspace_info(char *space_name, struct space_info *spi)
{
struct space *sp;
list_for_each_entry(sp, &spaces, list) {
if (strncmp(sp->space_name, space_name, NAME_ID_SIZE))
continue;
- memcpy(sp_out, sp, sizeof(struct space));
+
+ /* keep this in sync with any new fields added to
+ struct space_info */
+
+ spi->space_id = sp->space_id;
+ spi->host_id = sp->host_id;
+ spi->host_generation = sp->host_generation;
+ spi->killing_pids = sp->killing_pids;
+
return 0;
}
return -1;
}
-int lockspace_info(char *space_name, struct space *sp_out)
+int lockspace_info(char *space_name, struct space_info *spi)
{
int rv;
pthread_mutex_lock(&spaces_mutex);
- rv = _lockspace_info(space_name, sp_out);
+ rv = _lockspace_info(space_name, spi);
pthread_mutex_unlock(&spaces_mutex);
return rv;
@@ -118,14 +126,17 @@ int lockspace_info(char *space_name, struct space *sp_out)
int lockspace_disk(char *space_name, struct sync_disk *disk)
{
- struct space space;
- int rv;
+ struct space *sp;
+ int rv = -1;
pthread_mutex_lock(&spaces_mutex);
- rv = _lockspace_info(space_name, &space);
- if (!rv) {
- memcpy(disk, &space.host_id_disk, sizeof(struct sync_disk));
+ list_for_each_entry(sp, &spaces, list) {
+ if (strncmp(sp->space_name, space_name, NAME_ID_SIZE))
+ continue;
+
+ memcpy(disk, &sp->host_id_disk, sizeof(struct sync_disk));
disk->fd = -1;
+ rv = 0;
}
pthread_mutex_unlock(&spaces_mutex);
diff --git a/src/lockspace.h b/src/lockspace.h
index cf456ed..80055d6 100644
--- a/src/lockspace.h
+++ b/src/lockspace.h
@@ -10,8 +10,8 @@
#define __HOST_ID__H__
struct space *find_lockspace(char *name);
-int _lockspace_info(char *space_name, struct space *sp_out);
-int lockspace_info(char *space_name, struct space *sp_out);
+int _lockspace_info(char *space_name, struct space_info *spi);
+int lockspace_info(char *space_name, struct space_info *spi);
int lockspace_disk(char *space_name, struct sync_disk *disk);
int host_info(char *space_name, uint64_t host_id, struct host_status *hs_out);
int host_status_set_bit(char *space_name, uint64_t host_id);
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index d3ab366..2188a95 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -155,6 +155,16 @@ struct space {
struct host_status host_status[DEFAULT_MAX_HOSTS];
};
+/* Update lockspace_info() to copy any fields from struct space
+ to space_info */
+
+struct space_info {
+ uint32_t space_id;
+ uint64_t host_id;
+ uint64_t host_generation;
+ int killing_pids;
+};
+
/*
* Example of watchdog behavior when host_id renewals fail, assuming
* that sanlock cannot successfully kill the pids it is supervising that
commit ac569ab9ab7b55b7bcd3433cde0648993aebbd98
Author: David Teigland <teigland(a)redhat.com>
Date: Mon Jun 18 16:29:44 2012 -0500
sanlock.spec updated
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/sanlock.spec b/sanlock.spec
index b2534c4..56b861c 100644
--- a/sanlock.spec
+++ b/sanlock.spec
@@ -1,6 +1,6 @@
Name: sanlock
Version: 2.3
-Release: 3%{?dist}
+Release: 4%{?dist}
Summary: A shared disk lock manager
Group: System Environment/Base
@@ -13,6 +13,10 @@ Requires(pre): /usr/sbin/groupadd
Requires(pre): /usr/sbin/useradd
Source0: https://fedorahosted.org/releases/s/a/sanlock/%{name}-%{version}.tar.gz
+%if 0%{?fedora} >= 16
+BuildRequires: systemd-units
+%endif
+
%description
sanlock uses disk paxos to manage leases on shared storage.
Hosts connected to a common SAN can use this to synchronize their
@@ -177,6 +181,9 @@ developing applications that use %{name}.
%{_includedir}/sanlock_direct.h
%changelog
+* Mon Jun 18 2012 David Teigland <teigland(a)redhat.com> - 2.3-4
+- Require systemd-units
+
* Fri Jun 15 2012 David Teigland <teigland(a)redhat.com> - 2.3-3
- Require useradd and groupadd
11 years, 5 months
[PATCH] sanlock: WAIT flag for sanlock_inq_lockspace
by Federico Simoncelli
New SANLK_INQ_WAIT for sanlock_inq_lockspace().
When the SANLK_INQ_WAIT flag is used the command will block and wait for
the ongoing procedure (add_lockspace/rem_lockspace) to complete before
before returning the status.
sanlock_inq_lockspace() still returns:
0 lockspace exists and is not being added or removed
-EINPROGRESS add or rem is in progress, when the flag SANLK_INQ_WAIT
is not used
-ENOENT no lockspace with matching name and disk location is
found
Signed-off-by: Federico Simoncelli <fsimonce(a)redhat.com>
---
python/sanlock.c | 25 +++++++++++++++++--------
src/cmd.c | 15 ++++++++++++---
src/sanlock_admin.h | 3 +++
3 files changed, 32 insertions(+), 11 deletions(-)
diff --git a/python/sanlock.c b/python/sanlock.c
index bfce46c..d953c90 100644
--- a/python/sanlock.c
+++ b/python/sanlock.c
@@ -314,36 +314,45 @@ py_add_lockspace(PyObject *self __unused, PyObject *args, PyObject *keywds)
/* inq_lockspace */
PyDoc_STRVAR(pydoc_inq_lockspace, "\
-inq_lockspace(lockspace, host_id, path, offset=0)\n\
+inq_lockspace(lockspace, host_id, path, offset=0, wait=False)\n\
Return True if the sanlock daemon currently owns the host_id in lockspace,\n\
False otherwise. The special value None is returned when the daemon is\n\
-still in the process of acquiring or releasing the host_id.");
+still in the process of acquiring or releasing the host_id. If the wait\n\
+flag is set to True the function will block until the host_id is either\n\
+acquired or released.");
static PyObject *
py_inq_lockspace(PyObject *self __unused, PyObject *args, PyObject *keywds)
{
- int rv;
+ int rv, waitrs = 0, flags = 0;
const char *lockspace, *path;
struct sanlk_lockspace ls;
- static char *kwlist[] = {"lockspace", "host_id", "path", "offset", NULL};
+ static char *kwlist[] = {"lockspace", "host_id", "path", "offset",
+ "wait", NULL};
/* initialize lockspace structure */
memset(&ls, 0, sizeof(struct sanlk_lockspace));
/* parse python tuple */
- if (!PyArg_ParseTupleAndKeywords(args, keywds, "sks|k", kwlist,
- &lockspace, &ls.host_id, &path, &ls.host_id_disk.offset)) {
+ if (!PyArg_ParseTupleAndKeywords(args, keywds, "sks|ki", kwlist,
+ &lockspace, &ls.host_id, &path, &ls.host_id_disk.offset,
+ &waitrs)) {
return NULL;
}
+ /* prepare sanlock_inq_lockspace flags */
+ if (waitrs) {
+ flags |= SANLK_INQ_WAIT;
+ }
+
/* prepare sanlock names */
strncpy(ls.name, lockspace, SANLK_NAME_LEN);
strncpy(ls.host_id_disk.path, path, SANLK_PATH_LEN - 1);
/* add sanlock lockspace (gil disabled) */
Py_BEGIN_ALLOW_THREADS
- rv = sanlock_inq_lockspace(&ls, 0);
+ rv = sanlock_inq_lockspace(&ls, flags);
Py_END_ALLOW_THREADS
if (rv == 0) {
@@ -362,7 +371,7 @@ py_inq_lockspace(PyObject *self __unused, PyObject *args, PyObject *keywds)
PyDoc_STRVAR(pydoc_rem_lockspace, "\
rem_lockspace(lockspace, host_id, path, offset=0, async=False, unused=False)\n\
Remove a lockspace, releasing the acquired host_id. If async is True the\n\
-function will return immediatly and the status can be checked using\n\
+function will return immediately and the status can be checked using\n\
inq_lockspace. If unused is True the command will fail (EBUSY) if there is\n\
at least one acquired resource in the lockspace (instead of automatically\n\
release it).");
diff --git a/src/cmd.c b/src/cmd.c
index 6417df0..de82ff7 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -891,6 +891,7 @@ static void cmd_add_lockspace(struct cmd_args *ca)
static void cmd_inq_lockspace(struct cmd_args *ca)
{
struct sanlk_lockspace lockspace;
+ int waitrs = ca->header.cmd_flags & SANLK_INQ_WAIT;
int fd, rv, result;
fd = client[ca->ci_in].fd;
@@ -903,13 +904,21 @@ static void cmd_inq_lockspace(struct cmd_args *ca)
goto reply;
}
- log_debug("cmd_inq_lockspace %d,%d %.48s:%llu:%s:%llu",
+ log_debug("cmd_inq_lockspace %d,%d %.48s:%llu:%s:%llu flags %x",
ca->ci_in, fd, lockspace.name,
(unsigned long long)lockspace.host_id,
lockspace.host_id_disk.path,
- (unsigned long long)lockspace.host_id_disk.offset);
+ (unsigned long long)lockspace.host_id_disk.offset,
+ ca->header.cmd_flags);
+
+ while (1) {
+ result = inq_lockspace(&lockspace);
+ if ((result != -EINPROGRESS) || !(waitrs)) {
+ break;
+ }
+ sleep(1);
+ }
- result = inq_lockspace(&lockspace);
reply:
log_debug("cmd_inq_lockspace %d,%d done %d", ca->ci_in, fd, result);
diff --git a/src/sanlock_admin.h b/src/sanlock_admin.h
index c5a3a0f..5134faa 100644
--- a/src/sanlock_admin.h
+++ b/src/sanlock_admin.h
@@ -17,6 +17,9 @@
#define SANLK_REM_ASYNC 0x00000001
#define SANLK_REM_UNUSED 0x00000002
+/* inq flags */
+#define SANLK_INQ_WAIT 0x00000001
+
/*
* add_lockspace returns:
* 0: the lockspace has been added successfully
--
1.7.1
11 years, 5 months
sanlock.spec
by David Teigland
sanlock.spec | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
New commits:
commit d98ec76595e90df6031535d05199c45ccecafb52
Author: David Teigland <teigland(a)redhat.com>
Date: Fri Jun 15 09:28:12 2012 -0500
sanlock.spec: require useradd and groupadd
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/sanlock.spec b/sanlock.spec
index 9a5a586..b2534c4 100644
--- a/sanlock.spec
+++ b/sanlock.spec
@@ -1,6 +1,6 @@
Name: sanlock
Version: 2.3
-Release: 2%{?dist}
+Release: 3%{?dist}
Summary: A shared disk lock manager
Group: System Environment/Base
@@ -9,12 +9,10 @@ URL: https://fedorahosted.org/sanlock/
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
BuildRequires: libblkid-devel libaio-devel python python-devel
Requires: %{name}-lib = %{version}-%{release}
+Requires(pre): /usr/sbin/groupadd
+Requires(pre): /usr/sbin/useradd
Source0: https://fedorahosted.org/releases/s/a/sanlock/%{name}-%{version}.tar.gz
-%if 0%{?rhel}
-ExclusiveArch: x86_64
-%endif
-
%description
sanlock uses disk paxos to manage leases on shared storage.
Hosts connected to a common SAN can use this to synchronize their
@@ -179,6 +177,9 @@ developing applications that use %{name}.
%{_includedir}/sanlock_direct.h
%changelog
+* Fri Jun 15 2012 David Teigland <teigland(a)redhat.com> - 2.3-3
+- Require useradd and groupadd
+
* Mon Jun 04 2012 David Teigland <teigland(a)redhat.com> - 2.3-2
- Remove exclusive arch
11 years, 5 months
src/main.c
by David Teigland
src/main.c | 3 +++
1 file changed, 3 insertions(+)
New commits:
commit cffb3056b959a23444632c9473f75ba9980052d8
Author: David Teigland <teigland(a)redhat.com>
Date: Thu Jun 14 10:48:15 2012 -0500
daemon: skip setup_groups when no -U or -G
setting up groups uses user name and gid.
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/src/main.c b/src/main.c
index abc35fd..e9d8f40 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1188,6 +1188,9 @@ static void setup_groups(void)
int pngroups, sngroups, ngroups_max;
gid_t *pgroup, *sgroup;
+ if (!com.uname || !com.gname)
+ return;
+
ngroups_max = sysconf(_SC_NGROUPS_MAX);
if (ngroups_max < 0) {
log_error("cannot get the max number of groups %i", errno);
11 years, 5 months
2 commits - init.d/sanlock init.d/wdmd
by David Teigland
init.d/sanlock | 2 +-
init.d/wdmd | 7 +++++++
2 files changed, 8 insertions(+), 1 deletion(-)
New commits:
commit b6dc3c761c75a0411a607544a1b76955dbe9a1a7
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Thu Jun 14 09:41:12 2012 +0000
wdmd: load the softdog module when needed
Signed-off-by: Federico Simoncelli <fsimonce(a)redhat.com>
diff --git a/init.d/wdmd b/init.d/wdmd
index 9ec7b8c..19fc3ae 100644
--- a/init.d/wdmd
+++ b/init.d/wdmd
@@ -39,6 +39,13 @@ start() {
[ -x /sbin/restorecon ] && restorecon /var/run/$prog
fi
+ if [ ! -c /dev/watchdog ]; then
+ echo -n $"Loading the softdog kernel module: "
+ modprobe softdog && udevadm settle
+ [ -c /dev/watchdog ] && success || failure
+ echo
+ fi
+
echo -n $"Starting $prog: "
daemon $prog $WDMDOPTS
retval=$?
commit 6cead337ff6c065a983e9d4e539fb28f3b812498
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Thu Jun 14 09:41:11 2012 +0000
init: make explicit to run sanlock as root
Adding the explicit user (root) used to run sanlock makes runuser
acquire the supplementary groups for the process also at boot time.
Signed-off-by: Federico Simoncelli <fsimonce(a)redhat.com>
diff --git a/init.d/sanlock b/init.d/sanlock
index 636d581..7521ae5 100644
--- a/init.d/sanlock
+++ b/init.d/sanlock
@@ -40,7 +40,7 @@ start() {
fi
echo -n $"Starting $prog: "
- daemon $prog daemon $SANLOCKOPTS
+ daemon --user root $prog daemon $SANLOCKOPTS
retval=$?
echo
[ $retval -eq 0 ] && touch $lockfile
11 years, 5 months
[PATCH 1/2] init: make explicit to run sanlock as root
by Federico Simoncelli
Adding the explicit user (root) used to run sanlock makes runuser
acquire the supplementary groups for the process also at boot time.
Signed-off-by: Federico Simoncelli <fsimonce(a)redhat.com>
---
init.d/sanlock | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/init.d/sanlock b/init.d/sanlock
index 636d581..7521ae5 100644
--- a/init.d/sanlock
+++ b/init.d/sanlock
@@ -40,7 +40,7 @@ start() {
fi
echo -n $"Starting $prog: "
- daemon $prog daemon $SANLOCKOPTS
+ daemon --user root $prog daemon $SANLOCKOPTS
retval=$?
echo
[ $retval -eq 0 ] && touch $lockfile
--
1.7.1
11 years, 5 months
init.d/sanlock init.d/wdmd
by David Teigland
init.d/sanlock | 10 ++++++----
init.d/wdmd | 6 ++++--
2 files changed, 10 insertions(+), 6 deletions(-)
New commits:
commit e25453ccd46d265de1d5ef720e40977786a6fb1b
Author: David Teigland <teigland(a)redhat.com>
Date: Mon Jun 11 10:58:49 2012 -0500
sanlock/wdmd: use /var/log/subsys/file
sanlock was not using it at all, and wdmd was
using it incorrectly.
bz 830848
Signed-off-by: David Teigland <teigland(a)redhat.com>
diff --git a/init.d/sanlock b/init.d/sanlock
index ae5843b..636d581 100644
--- a/init.d/sanlock
+++ b/init.d/sanlock
@@ -22,7 +22,8 @@
. /etc/rc.d/init.d/functions
prog="sanlock"
-lockfile="/var/run/$prog/$prog.pid"
+runfile="/var/run/$prog/$prog.pid"
+lockfile="/var/lock/subsys/$prog"
exec="/usr/sbin/$prog"
SANLOCKUSER="sanlock"
@@ -42,13 +43,13 @@ start() {
daemon $prog daemon $SANLOCKOPTS
retval=$?
echo
-
+ [ $retval -eq 0 ] && touch $lockfile
return $retval
}
stop() {
echo -n $"Sending stop signal $prog: "
- killproc -p $lockfile $prog -TERM
+ killproc -p $runfile $prog -TERM
retval=$?
echo
@@ -58,7 +59,7 @@ stop() {
echo -n $"Waiting for $prog to stop:"
timeout=10
- while [ -e $lockfile ]; do
+ while [ -e $runfile ]; do
sleep 1
timeout=$((timeout - 1))
if [ "$timeout" -le 0 ]; then
@@ -68,6 +69,7 @@ stop() {
done
success; echo
+ rm -f $lockfile
return $retval
}
diff --git a/init.d/wdmd b/init.d/wdmd
index d5334c4..9ec7b8c 100644
--- a/init.d/wdmd
+++ b/init.d/wdmd
@@ -22,7 +22,8 @@
. /etc/rc.d/init.d/functions
prog="wdmd"
-lockfile="/var/run/$prog/$prog.pid"
+runfile="/var/run/$prog/$prog.pid"
+lockfile="/var/lock/subsys/$prog"
exec="/usr/sbin/$prog"
WDMDGROUP="sanlock"
@@ -48,10 +49,11 @@ start() {
stop() {
echo -n $"Stopping $prog: "
- killproc -p $lockfile $prog -TERM
+ killproc -p $runfile $prog -TERM
retval=$?
echo
[ $retval -eq 0 ] && rm -f $lockfile
+ return $retval
}
restart() {
11 years, 5 months