killing running vms when sanlock daemon terminated
by Jim Fehlig
[ resend after subscribing to list ]
Hi,
I've been playing with sanlock on SUSE distros and noticed that running
vms with disk leases controlled by sanlock are killed (with unclean
state on filesystems, databases, and block devices) when the sanlock
daemon is terminated. Actually, I noticed this when simply updating the
package, which caused a restart of the daemon.
Is there a way to control this behavior? I couldn't find anything with a
quick peek at the docs and code.
Regards,
Jim
11 years, 7 months
src/client.c src/cmd.c src/direct.c src/leader.h src/lockspace.c src/main.c src/paxos_lease.c src/resource.c src/resource.h src/sanlock.h src/sanlock_internal.h src/sanlock_resource.h src/sanlock_sock.h
by David Teigland
src/client.c | 59 +-------
src/cmd.c | 357 -------------------------------------------------
src/direct.c | 28 ---
src/leader.h | 12 -
src/lockspace.c | 3
src/main.c | 42 -----
src/paxos_lease.c | 4
src/resource.c | 7
src/resource.h | 6
src/sanlock.h | 1
src/sanlock_internal.h | 3
src/sanlock_resource.h | 13 -
src/sanlock_sock.h | 2
13 files changed, 28 insertions(+), 509 deletions(-)
New commits:
commit ed73691de0ae82451b47fca2d5b019dac195cf9e
Author: David Teigland <teigland(a)redhat.com>
Date: Thu Jan 26 11:50:29 2012 -0600
Revert "sanlock: setmode"
The different and incompatible behaviors between
these modal resources vs normal resources make them
difficult to use, so we will look at implementing
SH/EX differently.
This reverts commit 1c4a22bda66c26202d00619254e1b2c06c8027c7.
diff --git a/src/client.c b/src/client.c
index d54aaf8..6875bb6 100644
--- a/src/client.c
+++ b/src/client.c
@@ -61,7 +61,7 @@ static int connect_socket(int *sock_fd)
}
static int send_header(int sock, int cmd, uint32_t cmd_flags, int datalen,
- uint32_t data, uint32_t data2, uint64_t data64)
+ uint32_t data, uint32_t data2)
{
struct sm_header header;
int rv;
@@ -73,7 +73,6 @@ static int send_header(int sock, int cmd, uint32_t cmd_flags, int datalen,
header.length = sizeof(header) + datalen;
header.data = data;
header.data2 = data2;
- header.data64 = data64;
rv = send(sock, (void *) &header, sizeof(struct sm_header), 0);
if (rv < 0)
@@ -92,7 +91,7 @@ int send_command(int cmd, uint32_t data)
if (rv < 0)
return rv;
- rv = send_header(sock, cmd, 0, 0, data, 0, 0);
+ rv = send_header(sock, cmd, 0, 0, data, 0);
if (rv < 0) {
close(sock);
return rv;
@@ -125,7 +124,7 @@ static int cmd_lockspace(int cmd, struct sanlk_lockspace *ls, uint32_t flags)
if (rv < 0)
return rv;
- rv = send_header(fd, cmd, flags, sizeof(struct sanlk_lockspace), 0, 0, 0);
+ rv = send_header(fd, cmd, flags, sizeof(struct sanlk_lockspace), 0, 0);
if (rv < 0)
goto out;
@@ -164,7 +163,7 @@ int sanlock_align(struct sanlk_disk *disk)
if (rv < 0)
return rv;
- rv = send_header(fd, SM_CMD_ALIGN, 0, sizeof(struct sanlk_disk), 0, 0, 0);
+ rv = send_header(fd, SM_CMD_ALIGN, 0, sizeof(struct sanlk_disk), 0, 0);
if (rv < 0)
goto out;
@@ -202,7 +201,7 @@ int sanlock_init(struct sanlk_lockspace *ls,
sizeof(struct sanlk_disk) * res->num_disks;
}
- rv = send_header(fd, cmd, 0, datalen, max_hosts, num_hosts, 0);
+ rv = send_header(fd, cmd, 0, datalen, max_hosts, num_hosts);
if (rv < 0)
goto out;
@@ -266,7 +265,7 @@ int sanlock_register(void)
if (rv < 0)
return rv;
- rv = send_header(sock, SM_CMD_REGISTER, 0, 0, 0, 0, 0);
+ rv = send_header(sock, SM_CMD_REGISTER, 0, 0, 0, 0);
if (rv < 0) {
close(sock);
return rv;
@@ -279,7 +278,7 @@ int sanlock_restrict(int sock, uint32_t flags)
{
int rv;
- rv = send_header(sock, SM_CMD_RESTRICT, flags, 0, 0, -1, 0);
+ rv = send_header(sock, SM_CMD_RESTRICT, flags, 0, 0, -1);
if (rv < 0)
return rv;
@@ -334,7 +333,7 @@ int sanlock_acquire(int sock, int pid, uint32_t flags, int res_count,
fd = sock;
}
- rv = send_header(fd, SM_CMD_ACQUIRE, flags, datalen, res_count, data2, 0);
+ rv = send_header(fd, SM_CMD_ACQUIRE, flags, datalen, res_count, data2);
if (rv < 0)
return rv;
@@ -403,7 +402,7 @@ int sanlock_inquire(int sock, int pid, uint32_t flags, int *res_count,
fd = sock;
}
- rv = send_header(fd, SM_CMD_INQUIRE, flags, 0, 0, data2, 0);
+ rv = send_header(fd, SM_CMD_INQUIRE, flags, 0, 0, data2);
if (rv < 0)
return rv;
@@ -477,7 +476,7 @@ int sanlock_release(int sock, int pid, uint32_t flags, int res_count,
datalen = res_count * sizeof(struct sanlk_resource);
- rv = send_header(fd, SM_CMD_RELEASE, flags, datalen, res_count, data2, 0);
+ rv = send_header(fd, SM_CMD_RELEASE, flags, datalen, res_count, data2);
if (rv < 0)
goto out;
@@ -508,7 +507,7 @@ int sanlock_request(uint32_t flags, uint32_t force_mode,
if (rv < 0)
return rv;
- rv = send_header(fd, SM_CMD_REQUEST, flags, datalen, force_mode, 0, 0);
+ rv = send_header(fd, SM_CMD_REQUEST, flags, datalen, force_mode, 0);
if (rv < 0)
goto out;
@@ -553,7 +552,7 @@ int sanlock_examine(uint32_t flags, struct sanlk_lockspace *ls,
data = (char *)res;
}
- rv = send_header(fd, cmd, flags, datalen, 0, 0, 0);
+ rv = send_header(fd, cmd, flags, datalen, 0, 0);
if (rv < 0)
goto out;
@@ -569,40 +568,6 @@ int sanlock_examine(uint32_t flags, struct sanlk_lockspace *ls,
return rv;
}
-int sanlock_setmode(uint32_t flags, uint64_t host_id, int mode,
- struct sanlk_resource *res)
-{
- int fd, rv, datalen;
-
- datalen = sizeof(struct sanlk_resource) +
- sizeof(struct sanlk_disk) * res->num_disks;
-
- rv = connect_socket(&fd);
- if (rv < 0)
- return rv;
-
- rv = send_header(fd, SM_CMD_SETMODE, flags, datalen, mode, 0, host_id);
- if (rv < 0)
- goto out;
-
- rv = send(fd, res, sizeof(struct sanlk_resource), 0);
- if (rv < 0) {
- rv = -errno;
- goto out;
- }
-
- rv = send(fd, res->disks, sizeof(struct sanlk_disk) * res->num_disks, 0);
- if (rv < 0) {
- rv = -errno;
- goto out;
- }
-
- rv = recv_result(fd);
- out:
- close(fd);
- return rv;
-}
-
/*
* convert from struct sanlk_resource to string with format:
* <lockspace_name>:<resource_name>:<path>:<offset>[:<path>:<offset>...]:<lver>
diff --git a/src/cmd.c b/src/cmd.c
index 58994a2..3cef77d 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -350,13 +350,6 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca)
result = rv;
goto done;
}
- save_resource_lver(token, token->leader.lver);
-
- /* TODO: fail and return an error if this resource
- has LEADER_FL_MODE in which case only setmode is allowed.
- It may be better to detect this right when we first read the
- leader record in paxos_lease_acquire */
-
acquire_count++;
}
@@ -773,8 +766,9 @@ static void cmd_request(struct task *task, struct cmd_args *ca)
rv = recv(fd, token->disks, disks_len, MSG_WAITALL);
if (rv != disks_len) {
+ free(token);
result = -ENOTCONN;
- goto reply_free;
+ goto reply;
}
/* zero out pad1 and pad2, see WARNING above */
@@ -793,20 +787,18 @@ static void cmd_request(struct task *task, struct cmd_args *ca)
error = request_token(task, token, force_mode, &owner_id);
if (error < 0) {
result = error;
- goto reply_free;
+ goto reply;
}
result = 0;
if (!token->acquire_lver && !force_mode)
- goto reply_free;
+ goto reply;
if (owner_id)
host_status_set_bit(token->r.lockspace_name, owner_id);
-
- reply_free:
- free(token);
reply:
+ free(token);
log_debug("cmd_request %d,%d done %d", ca->ci_in, fd, result);
send_result(fd, &ca->header, result);
@@ -862,340 +854,6 @@ static void cmd_examine(struct task *task GNUC_UNUSED, struct cmd_args *ca)
client_resume(ca->ci_in);
}
-/* return 1 (is alive) to force a failure if we don't have enough
- knowledge to know it's really not alive. Later we could have this sit and
- wait (like paxos_lease_acquire) until we have waited long enough or have
- enough knowledge to say it's safely dead (unless of course we find it is
- alive while waiting) */
-
-static int host_live(struct task *task, char *lockspace_name, uint64_t host_id, uint64_t gen)
-{
- struct host_status hs;
- uint64_t now;
- int rv;
-
- rv = host_info(lockspace_name, host_id, &hs);
- if (rv) {
- log_debug("host_live %llu %llu yes host_info %d",
- (unsigned long long)host_id, (unsigned long long)gen, rv);
- return 1;
- }
-
- if (!hs.last_check) {
- log_debug("host_live %llu %llu yes unchecked",
- (unsigned long long)host_id, (unsigned long long)gen);
- return 1;
- }
-
- /* the host_id lease is free, not being used */
- if (!hs.timestamp) {
- log_debug("host_live %llu %llu no lease free",
- (unsigned long long)host_id, (unsigned long long)gen);
- return 0;
- }
-
- if (hs.owner_generation > gen) {
- log_debug("host_live %llu %llu no old gen %llu",
- (unsigned long long)host_id, (unsigned long long)gen,
- (unsigned long long)hs.owner_generation);
- return 0;
- }
-
- now = monotime();
-
- if (!hs.last_live && (now - hs.first_check > task->host_dead_seconds)) {
- log_debug("host_live %llu %llu no first_check %llu",
- (unsigned long long)host_id, (unsigned long long)gen,
- (unsigned long long)hs.first_check);
- return 0;
- }
-
- if (hs.last_live && (now - hs.last_live > task->host_dead_seconds)) {
- log_debug("host_live %llu %llu no last_live %llu",
- (unsigned long long)host_id, (unsigned long long)gen,
- (unsigned long long)hs.last_live);
- return 0;
- }
-
- log_debug("host_live %llu %llu yes recent first_check %llu last_live %llu",
- (unsigned long long)host_id, (unsigned long long)gen,
- (unsigned long long)hs.first_check,
- (unsigned long long)hs.last_live);
-
- return 1;
-}
-
-/*
- * What this is aiming to do is:
- * cmd_acquire();
- * for all mblocks, if any mblock.mode is incompatible with mode
- * if mblock is for dead host_id, clear mblock.mode, continue
- * if mblock is for live host_id, return -EAGAIN
- * write mblock.mode and mblock.generation for host_id
- * cmd_release();
- */
-
-static void cmd_setmode(struct task *task, struct cmd_args *ca)
-{
- struct token *token;
- struct sync_disk *disk;
- struct sanlk_resource res;
- struct space space;
- struct mode_block *mb;
- char *iobuf, **p_iobuf;
- char *rbuf, *wbuf;
- uint64_t set_hostid, set_gen;
- int iobuf_len;
- int set_mode;
- int token_len, disks_len;
- int i, j, fd, rv, result;
-
- fd = client[ca->ci_in].fd;
-
- /* the two args from sanlock_setmode() */
- set_hostid = ca->header.data64; /* TODO: add to struct */
- set_mode = ca->header.data;
-
- /* receiving and setting up token (copied from cmd_request) */
-
- rv = recv(fd, &res, sizeof(struct sanlk_resource), MSG_WAITALL);
- if (rv != sizeof(struct sanlk_resource)) {
- log_error("cmd_setmode %d,%d recv %d %d",
- ca->ci_in, fd, rv, errno);
- result = -ENOTCONN;
- goto reply;
- }
-
- if (!res.num_disks || res.num_disks > SANLK_MAX_DISKS) {
- result = -ERANGE;
- goto reply;
- }
-
- disks_len = res.num_disks * sizeof(struct sync_disk);
- token_len = sizeof(struct token) + disks_len;
-
- token = malloc(token_len);
- if (!token) {
- result = -ENOMEM;
- goto reply;
- }
-
- memset(token, 0, token_len);
- token->disks = (struct sync_disk *)&token->r.disks[0]; /* shorthand */
- token->r.num_disks = res.num_disks;
- memcpy(token->r.lockspace_name, res.lockspace_name, SANLK_NAME_LEN);
- memcpy(token->r.name, res.name, SANLK_NAME_LEN);
-
- token->acquire_lver = res.lver;
- token->acquire_data64 = res.data64;
- token->acquire_data32 = res.data32;
- token->acquire_flags = res.flags;
-
- /*
- * receive sanlk_disk's / sync_disk's
- *
- * WARNING: as a shortcut, this requires that sync_disk and
- * sanlk_disk match; this is the reason for the pad fields
- * in sanlk_disk (TODO: let these differ?)
- */
-
- rv = recv(fd, token->disks, disks_len, MSG_WAITALL);
- if (rv != disks_len) {
- result = -ENOTCONN;
- goto reply_token;
- }
-
- /* zero out pad1 and pad2, see WARNING above */
- for (j = 0; j < token->r.num_disks; j++) {
- token->disks[j].sector_size = 0;
- token->disks[j].fd = -1;
- }
-
- log_debug("cmd_setmode %d,%d host_id %llu mode %u %.48s:%.48s:%.256s:%llu",
- ca->ci_in, fd,
- (unsigned long long)set_hostid, set_mode,
- token->r.lockspace_name,
- token->r.name,
- token->disks[0].path,
- (unsigned long long)token->r.disks[0].offset);
-
- /* find what our own host_id and generation are for this lockspace */
-
- rv = lockspace_info(token->r.lockspace_name, &space);
- if (rv < 0 || space.killing_pids) {
- log_error("cmd_setmode %d,%d invalid lockspace "
- "found %d failed %d name %.48s",
- ca->ci_in, fd, rv, space.killing_pids,
- token->r.lockspace_name);
- result = -ENOSPC;
- goto reply_token;
- }
- token->host_id = space.host_id;
- token->host_generation = space.host_generation;
-
- if (!set_hostid)
- set_hostid = token->host_id;
- if (token->host_id == set_hostid)
- set_gen = token->host_generation;
- else
- set_gen = 0;
-
- /* only allow clearing mode of host_id's that are not ours
- (not sure this will be needed, but it may be useful) */
-
- if (token->host_id != set_hostid && set_mode != SANLK_MODE_NL) {
- log_error("cmd_setmode %d,%d host_id %llu set hostid %llu mode %d",
- ca->ci_in, fd, (unsigned long long)token->host_id,
- (unsigned long long)set_hostid, set_mode);
- result = -EINVAL;
- goto reply_token;
- }
-
- rv = acquire_token(task, token, 0, 0);
- if (rv < 0) {
- log_error("cmd_setmode %d,%d acquire error %d", ca->ci_in, fd, rv);
- result = rv;
- goto reply_token;
- }
-
- if (!(token->leader.flags & LEADER_FL_MODE)) {
- /* a resource lease must be initialized with the MODE
- flag for setmode to work */
- log_error("cmd_setmode %d,%d no-mode resource", ca->ci_in, fd);
- result = -EINVAL;
- goto reply_rel;
- }
-
- /*
- * Read the entire lease area, which includes all host's sectors.
- * Check mblock in each host sector for other incompatible lock modes.
- * If we find one, but the host is dead, then clear it and continue
- * checking. If no conflicts are found, write the requested mode
- * in the mblock.
- */
-
- /* only keep modes on the first disk */
- disk = &token->disks[0];
-
- rv = open_disk(disk);
- if (rv < 0) {
- result = rv;
- goto reply_rel;
- }
-
- iobuf_len = direct_align(disk);
-
- p_iobuf = &iobuf;
-
- rv = posix_memalign((void *)p_iobuf, getpagesize(), iobuf_len);
- if (rv)
- goto reply_close;
-
- memset(iobuf, 0, iobuf_len);
-
- rv = read_iobuf(disk->fd, disk->offset, iobuf, iobuf_len, task);
- if (rv < 0) {
- if (rv != SANLK_AIO_TIMEOUT)
- free(iobuf);
- result = rv;
- goto reply_close;
- }
-
- wbuf = malloc(disk->sector_size);
- if (!wbuf) {
- result = -ENOMEM;
- free(iobuf);
- goto reply_close;
- }
-
- if (set_mode == SANLK_MODE_NL)
- goto do_write;
-
- for (i = 0; i < token->leader.num_hosts; i++) {
- if (i+1 == set_hostid)
- continue;
-
- /*
- * The sector_nr for host_id N is:
- * 1 leader block + 1 request block + (N-1) host blocks.
- * The mode_block is DBLOCK_MAX_LEN into the sector
- *
- * rbuf is the start of the sector (where the paxos_dblock
- * struct exists, which we aren't modifying) for host_id i+1.
- */
-
- rbuf = iobuf + ((2 + i) * disk->sector_size);
- mb = (struct mode_block *)(rbuf + DBLOCK_MAX_LEN);
-
- if (mb->mode == SANLK_MODE_NL)
- continue;
-
- if (mb->mode == SANLK_MODE_SH && set_mode == SANLK_MODE_SH)
- continue;
-
- /* incompatible locks */
-
- if (host_live(task, token->r.lockspace_name, i+1, mb->generation)) {
- log_error("cmd_setmode %d,%d mode conflict host_id %d gen %llu mode %u",
- ca->ci_in, fd, i+1, (unsigned long long)mb->generation, mb->mode);
- result = -EAGAIN;
- goto reply_free;
- }
-
- /* clear mode in dead host's sector */
-
- log_error("cmd_setmode %d,%d clear dead host_id %d gen %llu mode %u",
- ca->ci_in, fd, i+1, (unsigned long long)mb->generation, mb->mode);
-
- memcpy(wbuf, rbuf, disk->sector_size);
- mb = (struct mode_block *)(wbuf + DBLOCK_MAX_LEN);
- mb->mode = SANLK_MODE_NL;
- mb->generation = 0;
-
- rv = write_sector(disk, 2 + i, wbuf, disk->sector_size,
- task, "mblock");
- if (rv < 0) {
- result = rv;
- goto reply_free;
- }
- }
-
- log_debug("cmd_setmode %d,%d write host_id %llu gen %llu mode %d",
- ca->ci_in, fd, (unsigned long long)set_hostid,
- (unsigned long long)set_gen, set_mode);
-
- do_write:
- rbuf = iobuf + ((2 + set_hostid - 1) * disk->sector_size);
- memcpy(wbuf, rbuf, disk->sector_size);
- mb = (struct mode_block *)(wbuf + DBLOCK_MAX_LEN);
- mb->mode = set_mode;
- mb->generation = set_gen;
-
- rv = write_sector(disk, 2 + set_hostid - 1, wbuf, disk->sector_size,
- task, "mblock");
- if (rv < 0) {
- result = rv;
- goto reply_free;
- }
-
- result = 0;
-
- reply_free:
- free(wbuf);
- free(iobuf);
- reply_close:
- close_disks(disk, 1);
- reply_rel:
- release_token(task, token);
- reply_token:
- free(token);
- reply:
- log_debug("cmd_setmode %d,%d done %d", ca->ci_in, fd, result);
-
- send_result(fd, &ca->header, result);
- client_resume(ca->ci_in);
-}
-
static void cmd_add_lockspace(struct cmd_args *ca)
{
struct sanlk_lockspace lockspace;
@@ -1453,8 +1111,6 @@ static void cmd_init_resource(struct task *task, struct cmd_args *ca)
memcpy(token->r.lockspace_name, res.lockspace_name, SANLK_NAME_LEN);
memcpy(token->r.name, res.name, SANLK_NAME_LEN);
- token->acquire_flags = res.flags;
-
/*
* receive sanlk_disk's / sync_disk's
*
@@ -1540,9 +1196,6 @@ void call_cmd_thread(struct task *task, struct cmd_args *ca)
case SM_CMD_EXAMINE_RESOURCE:
cmd_examine(task, ca);
break;
- case SM_CMD_SETMODE:
- cmd_setmode(task, ca);
- break;
};
}
diff --git a/src/direct.c b/src/direct.c
index 263f6c7..8394e0a 100644
--- a/src/direct.c
+++ b/src/direct.c
@@ -102,8 +102,6 @@ static int do_paxos_action(int action, struct task *task,
memcpy(token->r.lockspace_name, res->lockspace_name, SANLK_NAME_LEN);
memcpy(token->r.name, res->name, SANLK_NAME_LEN);
- token->acquire_flags = res->flags;
-
/* WARNING sync_disk == sanlk_disk */
memcpy(token->disks, &res->disks, disks_len);
@@ -428,15 +426,6 @@ int direct_read_leader(struct task *task,
int test_id_bit(int host_id, char *bitmap);
-static const char *mode_str(int mode)
-{
- if (mode == 3)
- return "SH";
- if (mode == 5)
- return "EX";
- return "??";
-}
-
int direct_dump(struct task *task, char *dump_path, int force_mode)
{
char *data, *bitmap;
@@ -444,8 +433,6 @@ int direct_dump(struct task *task, char *dump_path, int force_mode)
struct leader_record *lr;
struct request_record *rr;
struct sync_disk sd;
- char *pd;
- struct mode_block *mb;
char sname[NAME_ID_SIZE+1];
char rname[NAME_ID_SIZE+1];
uint64_t sector_nr;
@@ -556,21 +543,6 @@ int direct_dump(struct task *task, char *dump_path, int force_mode)
(unsigned long long)rr->lver, rr->force_mode);
}
printf("\n");
-
- if (lr->flags & LEADER_FL_MODE) {
- for (i = 0; i < lr->num_hosts; i++) {
- pd = data + ((2 + i) * sd.sector_size);
- mb = (struct mode_block *)(pd + DBLOCK_MAX_LEN);
-
- if (!mb->mode)
- continue;
-
- printf(" ");
- printf("%s %04d %04llu\n",
- mode_str(mb->mode), i+1,
- (unsigned long long)mb->generation);
- }
- }
} else {
break;
}
diff --git a/src/leader.h b/src/leader.h
index f850ef8..e7304b0 100644
--- a/src/leader.h
+++ b/src/leader.h
@@ -42,12 +42,10 @@
#define LEADER_CHECKSUM_LEN 168
#define LEASE_FREE 0
-#define LEADER_FL_MODE 0x00000001
-
struct leader_record {
uint32_t magic;
uint32_t version;
- uint32_t flags;
+ uint32_t unused0;
uint32_t sector_size;
uint64_t num_hosts;
uint64_t max_hosts;
@@ -83,12 +81,4 @@ struct request_record {
uint32_t force_mode;
};
-#define DBLOCK_MAX_LEN 128
-
-struct mode_block {
- uint32_t mode;
- uint32_t unused;
- uint64_t generation;
-};
-
#endif
diff --git a/src/lockspace.c b/src/lockspace.c
index dc89d90..188758c 100644
--- a/src/lockspace.c
+++ b/src/lockspace.c
@@ -262,9 +262,6 @@ void check_other_leases(struct task *task, struct space *sp, char *buf)
hs = &sp->host_status[i];
hs->last_check = now;
- if (!hs->first_check)
- hs->first_check = now;
-
leader = (struct leader_record *)(buf + (i * disk->sector_size));
if (hs->owner_id == leader->owner_id &&
diff --git a/src/main.c b/src/main.c
index 39a3b58..7ec689e 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1002,7 +1002,6 @@ static void process_connection(int ci)
case SM_CMD_ALIGN:
case SM_CMD_INIT_LOCKSPACE:
case SM_CMD_INIT_RESOURCE:
- case SM_CMD_SETMODE:
rv = client_suspend(ci);
if (rv < 0)
return;
@@ -1368,8 +1367,7 @@ static void print_usage(void)
printf("sanlock client host_status -s LOCKSPACE [-D]\n");
printf("sanlock client log_dump\n");
printf("sanlock client shutdown [-f 0|1]\n");
- printf("sanlock client init -s LOCKSPACE\n");
- printf("sanlock client init -r RESOURCE [-M]\n");
+ printf("sanlock client init -s LOCKSPACE | -r RESOURCE\n");
printf("sanlock client align -s LOCKSPACE\n");
printf("sanlock client add_lockspace -s LOCKSPACE\n");
printf("sanlock client inq_lockspace -s LOCKSPACE\n");
@@ -1380,11 +1378,9 @@ static void print_usage(void)
printf("sanlock client inquire -p <pid>\n");
printf("sanlock client request -r RESOURCE -f <force_mode>\n");
printf("sanlock client examine -r RESOURCE | -s LOCKSPACE\n");
- printf("sanlock client setmode -r RESOURCE -m <lock_mode>\n");
printf("\n");
printf("sanlock direct <action> [-a 0|1] [-o 0|1]\n");
- printf("sanlock direct init -s LOCKSPACE\n");
- printf("sanlock direct init -r RESOURCE [-M]\n");
+ printf("sanlock direct init -s LOCKSPACE | -r RESOURCE\n");
printf("sanlock direct read_leader -s LOCKSPACE | -r RESOURCE\n");
printf("sanlock direct read_id -s LOCKSPACE\n");
printf("sanlock direct live_id -s LOCKSPACE\n");
@@ -1421,7 +1417,6 @@ static int read_command_line(int argc, char *argv[])
char *arg1 = argv[1];
char *act;
int i, j, len, begin_command = 0;
- int init_mode = 0;
if (argc < 2 || !strcmp(arg1, "help") || !strcmp(arg1, "--help") ||
!strcmp(arg1, "-h")) {
@@ -1492,8 +1487,6 @@ static int read_command_line(int argc, char *argv[])
com.action = ACT_REQUEST;
else if (!strcmp(act, "examine"))
com.action = ACT_EXAMINE;
- else if (!strcmp(act, "setmode"))
- com.action = ACT_SETMODE;
else if (!strcmp(act, "align"))
com.action = ACT_CLIENT_ALIGN;
else if (!strcmp(act, "init"))
@@ -1553,25 +1546,13 @@ static int read_command_line(int argc, char *argv[])
optchar = p[1];
i++;
- /*
- * options that do not have optionarg
- */
-
+ /* the only option that does not have optionarg */
if (optchar == 'D') {
com.debug = 1;
log_stderr_priority = LOG_DEBUG;
continue;
}
- if (optchar == 'M') {
- init_mode = 1;
- continue;
- }
-
- /*
- * options that require arg
- */
-
if (i >= argc) {
log_tool("option '%c' requires arg", optchar);
exit(EXIT_FAILURE);
@@ -1621,11 +1602,7 @@ static int read_command_line(int argc, char *argv[])
com.num_hosts = atoi(optionarg);
break;
case 'm':
- if (com.action == ACT_SETMODE) {
- com.lock_mode = atoi(optionarg);
- } else {
- com.max_hosts = atoi(optionarg);
- }
+ com.max_hosts = atoi(optionarg);
break;
case 'p':
com.pid = atoi(optionarg);
@@ -1670,9 +1647,6 @@ static int read_command_line(int argc, char *argv[])
i++;
}
- if (init_mode)
- com.res_args[0]->flags |= SANLK_RES_MODE;
-
/*
* the remaining args are for the command
*
@@ -1857,13 +1831,6 @@ static int do_client(void)
log_tool("examine done %d", rv);
break;
- case ACT_SETMODE:
- log_tool("setmode %d", com.lock_mode);
- /* TODO: add optional host_id */
- rv = sanlock_setmode(0, 0, com.lock_mode, com.res_args[0]);
- log_tool("setmode done %d", rv);
- break;
-
case ACT_CLIENT_ALIGN:
log_tool("align");
rv = sanlock_align(&com.lockspace.host_id_disk);
@@ -1916,7 +1883,6 @@ static int do_direct(void)
log_tool("read_leader done %d", rv);
log_tool("magic 0x%0x", leader.magic);
log_tool("version 0x%x", leader.version);
- log_tool("flags 0x%x", leader.flags);
log_tool("sector_size %u", leader.sector_size);
log_tool("num_hosts %llu",
(unsigned long long)leader.num_hosts);
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index f55f3bb..a55933d 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -1517,10 +1517,6 @@ int paxos_lease_init(struct task *task,
leader->timestamp = LEASE_FREE;
strncpy(leader->space_name, token->r.lockspace_name, NAME_ID_SIZE);
strncpy(leader->resource_name, token->r.name, NAME_ID_SIZE);
-
- if (token->acquire_flags & SANLK_RES_MODE)
- leader->flags = LEADER_FL_MODE;
-
leader->checksum = leader_checksum(leader);
rr = (struct request_record *)(iobuf + sector_size);
diff --git a/src/resource.c b/src/resource.c
index ea9d7bb..9002f06 100644
--- a/src/resource.c
+++ b/src/resource.c
@@ -83,7 +83,7 @@ static struct resource *find_resource(struct token *token,
return NULL;
}
-void save_resource_lver(struct token *token, uint64_t lver)
+static void save_resource_lver(struct token *token, uint64_t lver)
{
struct resource *r;
@@ -190,14 +190,15 @@ int acquire_token(struct task *task, struct token *token,
close_disks(token->disks, token->r.num_disks);
log_token(token, "acquire rv %d lver %llu at %llu", rv,
- (unsigned long long)leader_ret.lver,
- (unsigned long long)leader_ret.timestamp);
+ (unsigned long long)token->leader.lver,
+ (unsigned long long)token->leader.timestamp);
if (rv < 0)
return rv;
memcpy(&token->leader, &leader_ret, sizeof(struct leader_record));
token->r.lver = token->leader.lver;
+ save_resource_lver(token, token->leader.lver);
return rv; /* SANLK_OK */
}
diff --git a/src/resource.h b/src/resource.h
index 6f8edee..72de40a 100644
--- a/src/resource.h
+++ b/src/resource.h
@@ -6,8 +6,8 @@
* of the GNU General Public License v2 or (at your option) any later version.
*/
-#ifndef __RESOURCE_H__
-#define __RESOURCE_H__
+#ifndef __TOKEN_MANAGER_H__
+#define __TOKEN_MANAGER_H__
int acquire_token(struct task *task, struct token *token,
uint64_t acquire_lver, int new_num_hosts);
@@ -22,8 +22,6 @@ int request_token(struct task *task, struct token *token, uint32_t force_mode,
int add_resource(struct token *token, int pid, uint32_t cl_restrict);
void del_resource(struct token *token);
-void save_resource_lver(struct token *token, uint64_t lver);
-
int set_resource_examine(char *space_name, char *res_name);
int setup_token_manager(void);
diff --git a/src/sanlock.h b/src/sanlock.h
index 7e49122..a56bb4e 100644
--- a/src/sanlock.h
+++ b/src/sanlock.h
@@ -53,7 +53,6 @@ struct sanlk_disk {
#define SANLK_RES_LVER 0x1 /* lver field is set */
#define SANLK_RES_NUM_HOSTS 0x2 /* data32 field is new num_hosts */
-#define SANLK_RES_MODE 0x4
struct sanlk_resource {
char lockspace_name[SANLK_NAME_LEN]; /* terminating \0 not required */
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 519faf3..eeeb897 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -117,7 +117,6 @@ struct lease_status {
};
struct host_status {
- uint64_t first_check; /* local monotime */
uint64_t last_check; /* local monotime */
uint64_t last_live; /* local monotime */
uint64_t last_req; /* local monotime */
@@ -531,7 +530,6 @@ struct command_line {
int max_hosts; /* -m */
int res_count;
uint32_t force_mode;
- int lock_mode;
char our_host_name[SANLK_NAME_LEN+1];
char *dump_path;
struct sanlk_lockspace lockspace; /* -s LOCKSPACE */
@@ -570,7 +568,6 @@ enum {
ACT_CLIENT_INIT,
ACT_CLIENT_ALIGN,
ACT_EXAMINE,
- ACT_SETMODE,
};
EXTERN int external_shutdown;
diff --git a/src/sanlock_resource.h b/src/sanlock_resource.h
index bb64d27..3991094 100644
--- a/src/sanlock_resource.h
+++ b/src/sanlock_resource.h
@@ -52,19 +52,6 @@ int sanlock_examine(uint32_t flags, struct sanlk_lockspace *ls,
struct sanlk_resource *res);
/*
- * Set a host's mode for the resource. A standard lease is acquired, the
- * mode is set if compatible with existing modes, the lease is released.
- * The resource lease used here is not associated with a pid.
- */
-
-#define SANLK_MODE_NL 0
-#define SANLK_MODE_SH 3
-#define SANLK_MODE_EX 5
-
-int sanlock_setmode(uint32_t flags, uint64_t host_id, int mode,
- struct sanlk_resource *res);
-
-/*
* Functions to convert between string and struct resource formats.
* All allocate space for returned data that the caller must free.
*/
diff --git a/src/sanlock_sock.h b/src/sanlock_sock.h
index b5c80dc..aa9e46a 100644
--- a/src/sanlock_sock.h
+++ b/src/sanlock_sock.h
@@ -36,7 +36,6 @@ enum {
SM_CMD_EXAMINE_RESOURCE = 16,
SM_CMD_HOST_STATUS = 17,
SM_CMD_INQ_LOCKSPACE = 18,
- SM_CMD_SETMODE = 19,
};
struct sm_header {
@@ -48,7 +47,6 @@ struct sm_header {
uint32_t seq;
uint32_t data;
uint32_t data2;
- uint64_t data64;
};
#define SANLK_STATE_MAXSTR 4096
11 years, 8 months
src/client.c src/cmd.c src/direct.c src/leader.h src/lockspace.c src/main.c src/paxos_lease.c src/resource.c src/resource.h src/sanlock.h src/sanlock_internal.h src/sanlock_resource.h src/sanlock_sock.h
by David Teigland
src/client.c | 59 ++++++--
src/cmd.c | 357 ++++++++++++++++++++++++++++++++++++++++++++++++-
src/direct.c | 28 +++
src/leader.h | 12 +
src/lockspace.c | 3
src/main.c | 42 +++++
src/paxos_lease.c | 4
src/resource.c | 7
src/resource.h | 6
src/sanlock.h | 1
src/sanlock_internal.h | 3
src/sanlock_resource.h | 13 +
src/sanlock_sock.h | 2
13 files changed, 509 insertions(+), 28 deletions(-)
New commits:
commit 1c4a22bda66c26202d00619254e1b2c06c8027c7
Author: David Teigland <teigland(a)redhat.com>
Date: Tue Jan 24 16:56:20 2012 -0600
sanlock: setmode
A resource lease is used to keep track of per-host shared/exclusive
modes associated with the resource. The resource lease is not
associated with a pid like normal resource leases are.
The standard resource lease is acquired/released internally
by setmode when modifying the list of per-host modes.
Only the setmode command is allowed on resources initialized
with MODE, and setmode is not allowed on resources not
initialized with MODE.
- initialize a resource with the MODE flag (or -M)
- use the setmode api/cmd with NL, SH or EX on the resource
(NL is unlocked)
- if no incompatible modes are set, setmode will succeed,
otherwise it returns -EAGAIN
- if an incompatible mode is set, but was set by a host_id+generation
that is now free, stale or dead, that incompatible mode will be cleared
Example command line usage:
> sanlock direct init -s foo:0:/dev/bull/leases:0
> sanlock direct init -M -r foo:bar:/dev/bull/leases:1048576
node-01> sanlock daemon
node-01> sanlock add_lockspace -s foo:1:/dev/bull/leases:0
node-01> sanlock setmode -r foo:bar:/dev/bull/leases:1048576 -m 5
setmode 5
setmode done 0
node-02> sanlock daemon
node-02> sanlock add_lockspace -s foo:2:/dev/bull/leases:0
node-02> sanlock setmode -r foo:bar:/dev/bull/leases:1048576 -m 3
setmode 3
setmode done -11
> sanlock direct dump /dev/bull/leases
offset lockspace resource timestamp own gen lver
00000000 foo a055eb92-2a6a-46fa-ae86-1ba74c0dfee4.node-01 0000698786 0001 0003
00000512 foo f30eed72-8608-443d-b390-aebe931ada85.node-02 0000698034 0002 0001
01048576 foo bar 0000000000 0002 0001 8
EX 0001 0003
diff --git a/src/client.c b/src/client.c
index 6875bb6..d54aaf8 100644
--- a/src/client.c
+++ b/src/client.c
@@ -61,7 +61,7 @@ static int connect_socket(int *sock_fd)
}
static int send_header(int sock, int cmd, uint32_t cmd_flags, int datalen,
- uint32_t data, uint32_t data2)
+ uint32_t data, uint32_t data2, uint64_t data64)
{
struct sm_header header;
int rv;
@@ -73,6 +73,7 @@ static int send_header(int sock, int cmd, uint32_t cmd_flags, int datalen,
header.length = sizeof(header) + datalen;
header.data = data;
header.data2 = data2;
+ header.data64 = data64;
rv = send(sock, (void *) &header, sizeof(struct sm_header), 0);
if (rv < 0)
@@ -91,7 +92,7 @@ int send_command(int cmd, uint32_t data)
if (rv < 0)
return rv;
- rv = send_header(sock, cmd, 0, 0, data, 0);
+ rv = send_header(sock, cmd, 0, 0, data, 0, 0);
if (rv < 0) {
close(sock);
return rv;
@@ -124,7 +125,7 @@ static int cmd_lockspace(int cmd, struct sanlk_lockspace *ls, uint32_t flags)
if (rv < 0)
return rv;
- rv = send_header(fd, cmd, flags, sizeof(struct sanlk_lockspace), 0, 0);
+ rv = send_header(fd, cmd, flags, sizeof(struct sanlk_lockspace), 0, 0, 0);
if (rv < 0)
goto out;
@@ -163,7 +164,7 @@ int sanlock_align(struct sanlk_disk *disk)
if (rv < 0)
return rv;
- rv = send_header(fd, SM_CMD_ALIGN, 0, sizeof(struct sanlk_disk), 0, 0);
+ rv = send_header(fd, SM_CMD_ALIGN, 0, sizeof(struct sanlk_disk), 0, 0, 0);
if (rv < 0)
goto out;
@@ -201,7 +202,7 @@ int sanlock_init(struct sanlk_lockspace *ls,
sizeof(struct sanlk_disk) * res->num_disks;
}
- rv = send_header(fd, cmd, 0, datalen, max_hosts, num_hosts);
+ rv = send_header(fd, cmd, 0, datalen, max_hosts, num_hosts, 0);
if (rv < 0)
goto out;
@@ -265,7 +266,7 @@ int sanlock_register(void)
if (rv < 0)
return rv;
- rv = send_header(sock, SM_CMD_REGISTER, 0, 0, 0, 0);
+ rv = send_header(sock, SM_CMD_REGISTER, 0, 0, 0, 0, 0);
if (rv < 0) {
close(sock);
return rv;
@@ -278,7 +279,7 @@ int sanlock_restrict(int sock, uint32_t flags)
{
int rv;
- rv = send_header(sock, SM_CMD_RESTRICT, flags, 0, 0, -1);
+ rv = send_header(sock, SM_CMD_RESTRICT, flags, 0, 0, -1, 0);
if (rv < 0)
return rv;
@@ -333,7 +334,7 @@ int sanlock_acquire(int sock, int pid, uint32_t flags, int res_count,
fd = sock;
}
- rv = send_header(fd, SM_CMD_ACQUIRE, flags, datalen, res_count, data2);
+ rv = send_header(fd, SM_CMD_ACQUIRE, flags, datalen, res_count, data2, 0);
if (rv < 0)
return rv;
@@ -402,7 +403,7 @@ int sanlock_inquire(int sock, int pid, uint32_t flags, int *res_count,
fd = sock;
}
- rv = send_header(fd, SM_CMD_INQUIRE, flags, 0, 0, data2);
+ rv = send_header(fd, SM_CMD_INQUIRE, flags, 0, 0, data2, 0);
if (rv < 0)
return rv;
@@ -476,7 +477,7 @@ int sanlock_release(int sock, int pid, uint32_t flags, int res_count,
datalen = res_count * sizeof(struct sanlk_resource);
- rv = send_header(fd, SM_CMD_RELEASE, flags, datalen, res_count, data2);
+ rv = send_header(fd, SM_CMD_RELEASE, flags, datalen, res_count, data2, 0);
if (rv < 0)
goto out;
@@ -507,7 +508,7 @@ int sanlock_request(uint32_t flags, uint32_t force_mode,
if (rv < 0)
return rv;
- rv = send_header(fd, SM_CMD_REQUEST, flags, datalen, force_mode, 0);
+ rv = send_header(fd, SM_CMD_REQUEST, flags, datalen, force_mode, 0, 0);
if (rv < 0)
goto out;
@@ -552,7 +553,7 @@ int sanlock_examine(uint32_t flags, struct sanlk_lockspace *ls,
data = (char *)res;
}
- rv = send_header(fd, cmd, flags, datalen, 0, 0);
+ rv = send_header(fd, cmd, flags, datalen, 0, 0, 0);
if (rv < 0)
goto out;
@@ -568,6 +569,40 @@ int sanlock_examine(uint32_t flags, struct sanlk_lockspace *ls,
return rv;
}
+int sanlock_setmode(uint32_t flags, uint64_t host_id, int mode,
+ struct sanlk_resource *res)
+{
+ int fd, rv, datalen;
+
+ datalen = sizeof(struct sanlk_resource) +
+ sizeof(struct sanlk_disk) * res->num_disks;
+
+ rv = connect_socket(&fd);
+ if (rv < 0)
+ return rv;
+
+ rv = send_header(fd, SM_CMD_SETMODE, flags, datalen, mode, 0, host_id);
+ if (rv < 0)
+ goto out;
+
+ rv = send(fd, res, sizeof(struct sanlk_resource), 0);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
+ }
+
+ rv = send(fd, res->disks, sizeof(struct sanlk_disk) * res->num_disks, 0);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
+ }
+
+ rv = recv_result(fd);
+ out:
+ close(fd);
+ return rv;
+}
+
/*
* convert from struct sanlk_resource to string with format:
* <lockspace_name>:<resource_name>:<path>:<offset>[:<path>:<offset>...]:<lver>
diff --git a/src/cmd.c b/src/cmd.c
index 3cef77d..58994a2 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -350,6 +350,13 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca)
result = rv;
goto done;
}
+ save_resource_lver(token, token->leader.lver);
+
+ /* TODO: fail and return an error if this resource
+ has LEADER_FL_MODE in which case only setmode is allowed.
+ It may be better to detect this right when we first read the
+ leader record in paxos_lease_acquire */
+
acquire_count++;
}
@@ -766,9 +773,8 @@ static void cmd_request(struct task *task, struct cmd_args *ca)
rv = recv(fd, token->disks, disks_len, MSG_WAITALL);
if (rv != disks_len) {
- free(token);
result = -ENOTCONN;
- goto reply;
+ goto reply_free;
}
/* zero out pad1 and pad2, see WARNING above */
@@ -787,18 +793,20 @@ static void cmd_request(struct task *task, struct cmd_args *ca)
error = request_token(task, token, force_mode, &owner_id);
if (error < 0) {
result = error;
- goto reply;
+ goto reply_free;
}
result = 0;
if (!token->acquire_lver && !force_mode)
- goto reply;
+ goto reply_free;
if (owner_id)
host_status_set_bit(token->r.lockspace_name, owner_id);
- reply:
+
+ reply_free:
free(token);
+ reply:
log_debug("cmd_request %d,%d done %d", ca->ci_in, fd, result);
send_result(fd, &ca->header, result);
@@ -854,6 +862,340 @@ static void cmd_examine(struct task *task GNUC_UNUSED, struct cmd_args *ca)
client_resume(ca->ci_in);
}
+/* return 1 (is alive) to force a failure if we don't have enough
+ knowledge to know it's really not alive. Later we could have this sit and
+ wait (like paxos_lease_acquire) until we have waited long enough or have
+ enough knowledge to say it's safely dead (unless of course we find it is
+ alive while waiting) */
+
+static int host_live(struct task *task, char *lockspace_name, uint64_t host_id, uint64_t gen)
+{
+ struct host_status hs;
+ uint64_t now;
+ int rv;
+
+ rv = host_info(lockspace_name, host_id, &hs);
+ if (rv) {
+ log_debug("host_live %llu %llu yes host_info %d",
+ (unsigned long long)host_id, (unsigned long long)gen, rv);
+ return 1;
+ }
+
+ if (!hs.last_check) {
+ log_debug("host_live %llu %llu yes unchecked",
+ (unsigned long long)host_id, (unsigned long long)gen);
+ return 1;
+ }
+
+ /* the host_id lease is free, not being used */
+ if (!hs.timestamp) {
+ log_debug("host_live %llu %llu no lease free",
+ (unsigned long long)host_id, (unsigned long long)gen);
+ return 0;
+ }
+
+ if (hs.owner_generation > gen) {
+ log_debug("host_live %llu %llu no old gen %llu",
+ (unsigned long long)host_id, (unsigned long long)gen,
+ (unsigned long long)hs.owner_generation);
+ return 0;
+ }
+
+ now = monotime();
+
+ if (!hs.last_live && (now - hs.first_check > task->host_dead_seconds)) {
+ log_debug("host_live %llu %llu no first_check %llu",
+ (unsigned long long)host_id, (unsigned long long)gen,
+ (unsigned long long)hs.first_check);
+ return 0;
+ }
+
+ if (hs.last_live && (now - hs.last_live > task->host_dead_seconds)) {
+ log_debug("host_live %llu %llu no last_live %llu",
+ (unsigned long long)host_id, (unsigned long long)gen,
+ (unsigned long long)hs.last_live);
+ return 0;
+ }
+
+ log_debug("host_live %llu %llu yes recent first_check %llu last_live %llu",
+ (unsigned long long)host_id, (unsigned long long)gen,
+ (unsigned long long)hs.first_check,
+ (unsigned long long)hs.last_live);
+
+ return 1;
+}
+
+/*
+ * What this is aiming to do is:
+ * cmd_acquire();
+ * for all mblocks, if any mblock.mode is incompatible with mode
+ * if mblock is for dead host_id, clear mblock.mode, continue
+ * if mblock is for live host_id, return -EAGAIN
+ * write mblock.mode and mblock.generation for host_id
+ * cmd_release();
+ */
+
+static void cmd_setmode(struct task *task, struct cmd_args *ca)
+{
+ struct token *token;
+ struct sync_disk *disk;
+ struct sanlk_resource res;
+ struct space space;
+ struct mode_block *mb;
+ char *iobuf, **p_iobuf;
+ char *rbuf, *wbuf;
+ uint64_t set_hostid, set_gen;
+ int iobuf_len;
+ int set_mode;
+ int token_len, disks_len;
+ int i, j, fd, rv, result;
+
+ fd = client[ca->ci_in].fd;
+
+ /* the two args from sanlock_setmode() */
+ set_hostid = ca->header.data64; /* TODO: add to struct */
+ set_mode = ca->header.data;
+
+ /* receiving and setting up token (copied from cmd_request) */
+
+ rv = recv(fd, &res, sizeof(struct sanlk_resource), MSG_WAITALL);
+ if (rv != sizeof(struct sanlk_resource)) {
+ log_error("cmd_setmode %d,%d recv %d %d",
+ ca->ci_in, fd, rv, errno);
+ result = -ENOTCONN;
+ goto reply;
+ }
+
+ if (!res.num_disks || res.num_disks > SANLK_MAX_DISKS) {
+ result = -ERANGE;
+ goto reply;
+ }
+
+ disks_len = res.num_disks * sizeof(struct sync_disk);
+ token_len = sizeof(struct token) + disks_len;
+
+ token = malloc(token_len);
+ if (!token) {
+ result = -ENOMEM;
+ goto reply;
+ }
+
+ memset(token, 0, token_len);
+ token->disks = (struct sync_disk *)&token->r.disks[0]; /* shorthand */
+ token->r.num_disks = res.num_disks;
+ memcpy(token->r.lockspace_name, res.lockspace_name, SANLK_NAME_LEN);
+ memcpy(token->r.name, res.name, SANLK_NAME_LEN);
+
+ token->acquire_lver = res.lver;
+ token->acquire_data64 = res.data64;
+ token->acquire_data32 = res.data32;
+ token->acquire_flags = res.flags;
+
+ /*
+ * receive sanlk_disk's / sync_disk's
+ *
+ * WARNING: as a shortcut, this requires that sync_disk and
+ * sanlk_disk match; this is the reason for the pad fields
+ * in sanlk_disk (TODO: let these differ?)
+ */
+
+ rv = recv(fd, token->disks, disks_len, MSG_WAITALL);
+ if (rv != disks_len) {
+ result = -ENOTCONN;
+ goto reply_token;
+ }
+
+ /* zero out pad1 and pad2, see WARNING above */
+ for (j = 0; j < token->r.num_disks; j++) {
+ token->disks[j].sector_size = 0;
+ token->disks[j].fd = -1;
+ }
+
+ log_debug("cmd_setmode %d,%d host_id %llu mode %u %.48s:%.48s:%.256s:%llu",
+ ca->ci_in, fd,
+ (unsigned long long)set_hostid, set_mode,
+ token->r.lockspace_name,
+ token->r.name,
+ token->disks[0].path,
+ (unsigned long long)token->r.disks[0].offset);
+
+ /* find what our own host_id and generation are for this lockspace */
+
+ rv = lockspace_info(token->r.lockspace_name, &space);
+ if (rv < 0 || space.killing_pids) {
+ log_error("cmd_setmode %d,%d invalid lockspace "
+ "found %d failed %d name %.48s",
+ ca->ci_in, fd, rv, space.killing_pids,
+ token->r.lockspace_name);
+ result = -ENOSPC;
+ goto reply_token;
+ }
+ token->host_id = space.host_id;
+ token->host_generation = space.host_generation;
+
+ if (!set_hostid)
+ set_hostid = token->host_id;
+ if (token->host_id == set_hostid)
+ set_gen = token->host_generation;
+ else
+ set_gen = 0;
+
+ /* only allow clearing mode of host_id's that are not ours
+ (not sure this will be needed, but it may be useful) */
+
+ if (token->host_id != set_hostid && set_mode != SANLK_MODE_NL) {
+ log_error("cmd_setmode %d,%d host_id %llu set hostid %llu mode %d",
+ ca->ci_in, fd, (unsigned long long)token->host_id,
+ (unsigned long long)set_hostid, set_mode);
+ result = -EINVAL;
+ goto reply_token;
+ }
+
+ rv = acquire_token(task, token, 0, 0);
+ if (rv < 0) {
+ log_error("cmd_setmode %d,%d acquire error %d", ca->ci_in, fd, rv);
+ result = rv;
+ goto reply_token;
+ }
+
+ if (!(token->leader.flags & LEADER_FL_MODE)) {
+ /* a resource lease must be initialized with the MODE
+ flag for setmode to work */
+ log_error("cmd_setmode %d,%d no-mode resource", ca->ci_in, fd);
+ result = -EINVAL;
+ goto reply_rel;
+ }
+
+ /*
+ * Read the entire lease area, which includes all host's sectors.
+ * Check mblock in each host sector for other incompatible lock modes.
+ * If we find one, but the host is dead, then clear it and continue
+ * checking. If no conflicts are found, write the requested mode
+ * in the mblock.
+ */
+
+ /* only keep modes on the first disk */
+ disk = &token->disks[0];
+
+ rv = open_disk(disk);
+ if (rv < 0) {
+ result = rv;
+ goto reply_rel;
+ }
+
+ iobuf_len = direct_align(disk);
+
+ p_iobuf = &iobuf;
+
+ rv = posix_memalign((void *)p_iobuf, getpagesize(), iobuf_len);
+ if (rv)
+ goto reply_close;
+
+ memset(iobuf, 0, iobuf_len);
+
+ rv = read_iobuf(disk->fd, disk->offset, iobuf, iobuf_len, task);
+ if (rv < 0) {
+ if (rv != SANLK_AIO_TIMEOUT)
+ free(iobuf);
+ result = rv;
+ goto reply_close;
+ }
+
+ wbuf = malloc(disk->sector_size);
+ if (!wbuf) {
+ result = -ENOMEM;
+ free(iobuf);
+ goto reply_close;
+ }
+
+ if (set_mode == SANLK_MODE_NL)
+ goto do_write;
+
+ for (i = 0; i < token->leader.num_hosts; i++) {
+ if (i+1 == set_hostid)
+ continue;
+
+ /*
+ * The sector_nr for host_id N is:
+ * 1 leader block + 1 request block + (N-1) host blocks.
+ * The mode_block is DBLOCK_MAX_LEN into the sector
+ *
+ * rbuf is the start of the sector (where the paxos_dblock
+ * struct exists, which we aren't modifying) for host_id i+1.
+ */
+
+ rbuf = iobuf + ((2 + i) * disk->sector_size);
+ mb = (struct mode_block *)(rbuf + DBLOCK_MAX_LEN);
+
+ if (mb->mode == SANLK_MODE_NL)
+ continue;
+
+ if (mb->mode == SANLK_MODE_SH && set_mode == SANLK_MODE_SH)
+ continue;
+
+ /* incompatible locks */
+
+ if (host_live(task, token->r.lockspace_name, i+1, mb->generation)) {
+ log_error("cmd_setmode %d,%d mode conflict host_id %d gen %llu mode %u",
+ ca->ci_in, fd, i+1, (unsigned long long)mb->generation, mb->mode);
+ result = -EAGAIN;
+ goto reply_free;
+ }
+
+ /* clear mode in dead host's sector */
+
+ log_error("cmd_setmode %d,%d clear dead host_id %d gen %llu mode %u",
+ ca->ci_in, fd, i+1, (unsigned long long)mb->generation, mb->mode);
+
+ memcpy(wbuf, rbuf, disk->sector_size);
+ mb = (struct mode_block *)(wbuf + DBLOCK_MAX_LEN);
+ mb->mode = SANLK_MODE_NL;
+ mb->generation = 0;
+
+ rv = write_sector(disk, 2 + i, wbuf, disk->sector_size,
+ task, "mblock");
+ if (rv < 0) {
+ result = rv;
+ goto reply_free;
+ }
+ }
+
+ log_debug("cmd_setmode %d,%d write host_id %llu gen %llu mode %d",
+ ca->ci_in, fd, (unsigned long long)set_hostid,
+ (unsigned long long)set_gen, set_mode);
+
+ do_write:
+ rbuf = iobuf + ((2 + set_hostid - 1) * disk->sector_size);
+ memcpy(wbuf, rbuf, disk->sector_size);
+ mb = (struct mode_block *)(wbuf + DBLOCK_MAX_LEN);
+ mb->mode = set_mode;
+ mb->generation = set_gen;
+
+ rv = write_sector(disk, 2 + set_hostid - 1, wbuf, disk->sector_size,
+ task, "mblock");
+ if (rv < 0) {
+ result = rv;
+ goto reply_free;
+ }
+
+ result = 0;
+
+ reply_free:
+ free(wbuf);
+ free(iobuf);
+ reply_close:
+ close_disks(disk, 1);
+ reply_rel:
+ release_token(task, token);
+ reply_token:
+ free(token);
+ reply:
+ log_debug("cmd_setmode %d,%d done %d", ca->ci_in, fd, result);
+
+ send_result(fd, &ca->header, result);
+ client_resume(ca->ci_in);
+}
+
static void cmd_add_lockspace(struct cmd_args *ca)
{
struct sanlk_lockspace lockspace;
@@ -1111,6 +1453,8 @@ static void cmd_init_resource(struct task *task, struct cmd_args *ca)
memcpy(token->r.lockspace_name, res.lockspace_name, SANLK_NAME_LEN);
memcpy(token->r.name, res.name, SANLK_NAME_LEN);
+ token->acquire_flags = res.flags;
+
/*
* receive sanlk_disk's / sync_disk's
*
@@ -1196,6 +1540,9 @@ void call_cmd_thread(struct task *task, struct cmd_args *ca)
case SM_CMD_EXAMINE_RESOURCE:
cmd_examine(task, ca);
break;
+ case SM_CMD_SETMODE:
+ cmd_setmode(task, ca);
+ break;
};
}
diff --git a/src/direct.c b/src/direct.c
index 8394e0a..263f6c7 100644
--- a/src/direct.c
+++ b/src/direct.c
@@ -102,6 +102,8 @@ static int do_paxos_action(int action, struct task *task,
memcpy(token->r.lockspace_name, res->lockspace_name, SANLK_NAME_LEN);
memcpy(token->r.name, res->name, SANLK_NAME_LEN);
+ token->acquire_flags = res->flags;
+
/* WARNING sync_disk == sanlk_disk */
memcpy(token->disks, &res->disks, disks_len);
@@ -426,6 +428,15 @@ int direct_read_leader(struct task *task,
int test_id_bit(int host_id, char *bitmap);
+static const char *mode_str(int mode)
+{
+ if (mode == 3)
+ return "SH";
+ if (mode == 5)
+ return "EX";
+ return "??";
+}
+
int direct_dump(struct task *task, char *dump_path, int force_mode)
{
char *data, *bitmap;
@@ -433,6 +444,8 @@ int direct_dump(struct task *task, char *dump_path, int force_mode)
struct leader_record *lr;
struct request_record *rr;
struct sync_disk sd;
+ char *pd;
+ struct mode_block *mb;
char sname[NAME_ID_SIZE+1];
char rname[NAME_ID_SIZE+1];
uint64_t sector_nr;
@@ -543,6 +556,21 @@ int direct_dump(struct task *task, char *dump_path, int force_mode)
(unsigned long long)rr->lver, rr->force_mode);
}
printf("\n");
+
+ if (lr->flags & LEADER_FL_MODE) {
+ for (i = 0; i < lr->num_hosts; i++) {
+ pd = data + ((2 + i) * sd.sector_size);
+ mb = (struct mode_block *)(pd + DBLOCK_MAX_LEN);
+
+ if (!mb->mode)
+ continue;
+
+ printf(" ");
+ printf("%s %04d %04llu\n",
+ mode_str(mb->mode), i+1,
+ (unsigned long long)mb->generation);
+ }
+ }
} else {
break;
}
diff --git a/src/leader.h b/src/leader.h
index e7304b0..f850ef8 100644
--- a/src/leader.h
+++ b/src/leader.h
@@ -42,10 +42,12 @@
#define LEADER_CHECKSUM_LEN 168
#define LEASE_FREE 0
+#define LEADER_FL_MODE 0x00000001
+
struct leader_record {
uint32_t magic;
uint32_t version;
- uint32_t unused0;
+ uint32_t flags;
uint32_t sector_size;
uint64_t num_hosts;
uint64_t max_hosts;
@@ -81,4 +83,12 @@ struct request_record {
uint32_t force_mode;
};
+#define DBLOCK_MAX_LEN 128
+
+struct mode_block {
+ uint32_t mode;
+ uint32_t unused;
+ uint64_t generation;
+};
+
#endif
diff --git a/src/lockspace.c b/src/lockspace.c
index 188758c..dc89d90 100644
--- a/src/lockspace.c
+++ b/src/lockspace.c
@@ -262,6 +262,9 @@ void check_other_leases(struct task *task, struct space *sp, char *buf)
hs = &sp->host_status[i];
hs->last_check = now;
+ if (!hs->first_check)
+ hs->first_check = now;
+
leader = (struct leader_record *)(buf + (i * disk->sector_size));
if (hs->owner_id == leader->owner_id &&
diff --git a/src/main.c b/src/main.c
index 7ec689e..39a3b58 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1002,6 +1002,7 @@ static void process_connection(int ci)
case SM_CMD_ALIGN:
case SM_CMD_INIT_LOCKSPACE:
case SM_CMD_INIT_RESOURCE:
+ case SM_CMD_SETMODE:
rv = client_suspend(ci);
if (rv < 0)
return;
@@ -1367,7 +1368,8 @@ static void print_usage(void)
printf("sanlock client host_status -s LOCKSPACE [-D]\n");
printf("sanlock client log_dump\n");
printf("sanlock client shutdown [-f 0|1]\n");
- printf("sanlock client init -s LOCKSPACE | -r RESOURCE\n");
+ printf("sanlock client init -s LOCKSPACE\n");
+ printf("sanlock client init -r RESOURCE [-M]\n");
printf("sanlock client align -s LOCKSPACE\n");
printf("sanlock client add_lockspace -s LOCKSPACE\n");
printf("sanlock client inq_lockspace -s LOCKSPACE\n");
@@ -1378,9 +1380,11 @@ static void print_usage(void)
printf("sanlock client inquire -p <pid>\n");
printf("sanlock client request -r RESOURCE -f <force_mode>\n");
printf("sanlock client examine -r RESOURCE | -s LOCKSPACE\n");
+ printf("sanlock client setmode -r RESOURCE -m <lock_mode>\n");
printf("\n");
printf("sanlock direct <action> [-a 0|1] [-o 0|1]\n");
- printf("sanlock direct init -s LOCKSPACE | -r RESOURCE\n");
+ printf("sanlock direct init -s LOCKSPACE\n");
+ printf("sanlock direct init -r RESOURCE [-M]\n");
printf("sanlock direct read_leader -s LOCKSPACE | -r RESOURCE\n");
printf("sanlock direct read_id -s LOCKSPACE\n");
printf("sanlock direct live_id -s LOCKSPACE\n");
@@ -1417,6 +1421,7 @@ static int read_command_line(int argc, char *argv[])
char *arg1 = argv[1];
char *act;
int i, j, len, begin_command = 0;
+ int init_mode = 0;
if (argc < 2 || !strcmp(arg1, "help") || !strcmp(arg1, "--help") ||
!strcmp(arg1, "-h")) {
@@ -1487,6 +1492,8 @@ static int read_command_line(int argc, char *argv[])
com.action = ACT_REQUEST;
else if (!strcmp(act, "examine"))
com.action = ACT_EXAMINE;
+ else if (!strcmp(act, "setmode"))
+ com.action = ACT_SETMODE;
else if (!strcmp(act, "align"))
com.action = ACT_CLIENT_ALIGN;
else if (!strcmp(act, "init"))
@@ -1546,13 +1553,25 @@ static int read_command_line(int argc, char *argv[])
optchar = p[1];
i++;
- /* the only option that does not have optionarg */
+ /*
+ * options that do not have optionarg
+ */
+
if (optchar == 'D') {
com.debug = 1;
log_stderr_priority = LOG_DEBUG;
continue;
}
+ if (optchar == 'M') {
+ init_mode = 1;
+ continue;
+ }
+
+ /*
+ * options that require arg
+ */
+
if (i >= argc) {
log_tool("option '%c' requires arg", optchar);
exit(EXIT_FAILURE);
@@ -1602,7 +1621,11 @@ static int read_command_line(int argc, char *argv[])
com.num_hosts = atoi(optionarg);
break;
case 'm':
- com.max_hosts = atoi(optionarg);
+ if (com.action == ACT_SETMODE) {
+ com.lock_mode = atoi(optionarg);
+ } else {
+ com.max_hosts = atoi(optionarg);
+ }
break;
case 'p':
com.pid = atoi(optionarg);
@@ -1647,6 +1670,9 @@ static int read_command_line(int argc, char *argv[])
i++;
}
+ if (init_mode)
+ com.res_args[0]->flags |= SANLK_RES_MODE;
+
/*
* the remaining args are for the command
*
@@ -1831,6 +1857,13 @@ static int do_client(void)
log_tool("examine done %d", rv);
break;
+ case ACT_SETMODE:
+ log_tool("setmode %d", com.lock_mode);
+ /* TODO: add optional host_id */
+ rv = sanlock_setmode(0, 0, com.lock_mode, com.res_args[0]);
+ log_tool("setmode done %d", rv);
+ break;
+
case ACT_CLIENT_ALIGN:
log_tool("align");
rv = sanlock_align(&com.lockspace.host_id_disk);
@@ -1883,6 +1916,7 @@ static int do_direct(void)
log_tool("read_leader done %d", rv);
log_tool("magic 0x%0x", leader.magic);
log_tool("version 0x%x", leader.version);
+ log_tool("flags 0x%x", leader.flags);
log_tool("sector_size %u", leader.sector_size);
log_tool("num_hosts %llu",
(unsigned long long)leader.num_hosts);
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index a55933d..f55f3bb 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -1517,6 +1517,10 @@ int paxos_lease_init(struct task *task,
leader->timestamp = LEASE_FREE;
strncpy(leader->space_name, token->r.lockspace_name, NAME_ID_SIZE);
strncpy(leader->resource_name, token->r.name, NAME_ID_SIZE);
+
+ if (token->acquire_flags & SANLK_RES_MODE)
+ leader->flags = LEADER_FL_MODE;
+
leader->checksum = leader_checksum(leader);
rr = (struct request_record *)(iobuf + sector_size);
diff --git a/src/resource.c b/src/resource.c
index 9002f06..ea9d7bb 100644
--- a/src/resource.c
+++ b/src/resource.c
@@ -83,7 +83,7 @@ static struct resource *find_resource(struct token *token,
return NULL;
}
-static void save_resource_lver(struct token *token, uint64_t lver)
+void save_resource_lver(struct token *token, uint64_t lver)
{
struct resource *r;
@@ -190,15 +190,14 @@ int acquire_token(struct task *task, struct token *token,
close_disks(token->disks, token->r.num_disks);
log_token(token, "acquire rv %d lver %llu at %llu", rv,
- (unsigned long long)token->leader.lver,
- (unsigned long long)token->leader.timestamp);
+ (unsigned long long)leader_ret.lver,
+ (unsigned long long)leader_ret.timestamp);
if (rv < 0)
return rv;
memcpy(&token->leader, &leader_ret, sizeof(struct leader_record));
token->r.lver = token->leader.lver;
- save_resource_lver(token, token->leader.lver);
return rv; /* SANLK_OK */
}
diff --git a/src/resource.h b/src/resource.h
index 72de40a..6f8edee 100644
--- a/src/resource.h
+++ b/src/resource.h
@@ -6,8 +6,8 @@
* of the GNU General Public License v2 or (at your option) any later version.
*/
-#ifndef __TOKEN_MANAGER_H__
-#define __TOKEN_MANAGER_H__
+#ifndef __RESOURCE_H__
+#define __RESOURCE_H__
int acquire_token(struct task *task, struct token *token,
uint64_t acquire_lver, int new_num_hosts);
@@ -22,6 +22,8 @@ int request_token(struct task *task, struct token *token, uint32_t force_mode,
int add_resource(struct token *token, int pid, uint32_t cl_restrict);
void del_resource(struct token *token);
+void save_resource_lver(struct token *token, uint64_t lver);
+
int set_resource_examine(char *space_name, char *res_name);
int setup_token_manager(void);
diff --git a/src/sanlock.h b/src/sanlock.h
index a56bb4e..7e49122 100644
--- a/src/sanlock.h
+++ b/src/sanlock.h
@@ -53,6 +53,7 @@ struct sanlk_disk {
#define SANLK_RES_LVER 0x1 /* lver field is set */
#define SANLK_RES_NUM_HOSTS 0x2 /* data32 field is new num_hosts */
+#define SANLK_RES_MODE 0x4
struct sanlk_resource {
char lockspace_name[SANLK_NAME_LEN]; /* terminating \0 not required */
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index eeeb897..519faf3 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -117,6 +117,7 @@ struct lease_status {
};
struct host_status {
+ uint64_t first_check; /* local monotime */
uint64_t last_check; /* local monotime */
uint64_t last_live; /* local monotime */
uint64_t last_req; /* local monotime */
@@ -530,6 +531,7 @@ struct command_line {
int max_hosts; /* -m */
int res_count;
uint32_t force_mode;
+ int lock_mode;
char our_host_name[SANLK_NAME_LEN+1];
char *dump_path;
struct sanlk_lockspace lockspace; /* -s LOCKSPACE */
@@ -568,6 +570,7 @@ enum {
ACT_CLIENT_INIT,
ACT_CLIENT_ALIGN,
ACT_EXAMINE,
+ ACT_SETMODE,
};
EXTERN int external_shutdown;
diff --git a/src/sanlock_resource.h b/src/sanlock_resource.h
index 3991094..bb64d27 100644
--- a/src/sanlock_resource.h
+++ b/src/sanlock_resource.h
@@ -52,6 +52,19 @@ int sanlock_examine(uint32_t flags, struct sanlk_lockspace *ls,
struct sanlk_resource *res);
/*
+ * Set a host's mode for the resource. A standard lease is acquired, the
+ * mode is set if compatible with existing modes, the lease is released.
+ * The resource lease used here is not associated with a pid.
+ */
+
+#define SANLK_MODE_NL 0
+#define SANLK_MODE_SH 3
+#define SANLK_MODE_EX 5
+
+int sanlock_setmode(uint32_t flags, uint64_t host_id, int mode,
+ struct sanlk_resource *res);
+
+/*
* Functions to convert between string and struct resource formats.
* All allocate space for returned data that the caller must free.
*/
diff --git a/src/sanlock_sock.h b/src/sanlock_sock.h
index aa9e46a..b5c80dc 100644
--- a/src/sanlock_sock.h
+++ b/src/sanlock_sock.h
@@ -36,6 +36,7 @@ enum {
SM_CMD_EXAMINE_RESOURCE = 16,
SM_CMD_HOST_STATUS = 17,
SM_CMD_INQ_LOCKSPACE = 18,
+ SM_CMD_SETMODE = 19,
};
struct sm_header {
@@ -47,6 +48,7 @@ struct sm_header {
uint32_t seq;
uint32_t data;
uint32_t data2;
+ uint64_t data64;
};
#define SANLK_STATE_MAXSTR 4096
11 years, 8 months
Changes to 'setmode'
by David Teigland
New branch 'setmode' available with the following commits:
commit 0d7f9fa0e42da357995b2b7f25b0990c5cda3ab3
Author: David Teigland <teigland(a)redhat.com>
Date: Tue Jan 24 16:56:20 2012 -0600
sanlock: add setmode
11 years, 8 months
Changes to 'refs/tags/sanlock-1.9'
by David Teigland
Changes since the dawn of time:
Daniel P. Berrange (15):
Fix const-ness of many APIs.
Fix warnings in watchdog module
Fix function prototypes for no-arg methods
Remove use of 'index' as a variable name
Make many functions static
Fix missing include in logging file
Annotate many unused parameters to avoid warnings
Remove redundant redeclaration of 'to' variable
Fix args to execv()
Remove redundant arg to 'run_command'
Rename optarg to optionarg to avoid clashing with getopt.h
Disable the read_request method since it is unused
Add many more compiler warning flags & safety checks
Hard code a sector size of 512 if the lease volume is a regular file
Ensure libsanlock.so is built with debug/warning flags
David Teigland (300):
sync_manager: initial commit
sync_manager: misc updates
sync_manager: misc updates
sync_manager: misc updates
sync_manager: misc updates
sync_manager: add more logging
sync_manager: misc updates
sync_manager: misc updates
sync_manager: num_hosts/MAX_HOSTS stuff
daemon: reworking notions of resource_id/token_name
sync_manager: resource lockfiles
sync_manager: lease arg processing
sync_manager: Began multiple lease support
sync_manager: use first command line arg as action
sync_manager: leader record changes and verify
sync_manager: clean up released leases
sync_manager: move functions around
sync_manager: add more tool actions
sync_manager: naming changes
sync_manager: separate token index and id
sync_manager: fix index usage and other misc
sync_manager: use pthread cond in acquire
sync_manager: write all log entries
sync_manager: simplify polling
sync_manager: fix waitpid use
sync_manager: acquire can fail early
sync_manager: write log entries at exit
sync_manager: add test program
sync_manager: move secondary pid check
sync_manager: fix disk paxos contention
devcount: fix verify checks
sync_manager: add GPL license file
sync_manager: fix leader block offsets
sync_manager: increase COMMAND_MAX
sync_manager: renewal should verify
sync_manager: use sector size from libblkid
sync_manager: use a real checksum function
sync_manager: add libblkid to spec file
sync_manager: print status info
sync_manager: one watchdog file per lease
sync_manager: lease_threads handle watchdog files
sync_manager: fix/add some text/comments
sync_manager: refactor read/write
sync_manager: move disk io functions
sync_manager: remove token arg
sync_manager: rename paxos_disk sync_disk
sync_manager: add aio read/write
sync_manager: make io_timeout_seconds a diskio arg
sync_manager: forgot to add new files
sync_manager: use log thread
sync_manager: client connections
sync_manager: connection processing
sync_manager: send/recv pid
sync_manager: add write_sectors
sync_manager: restructuring
sync_manager: write_sectors code factoring
sync_manager: daemonize
sync_manager: releasing leases
sync_manager: async releasing
sync_manager: release fixes
sync_manager: add direct and indirect acquire/release
sync_manager: reacquire resources
sync_manager: move code
sync_manager: same pid may reacquire resource
sync_manager: lease migration
sync_manager: handle client errors
sync_manager: improve error handling
sync_manager: host_id leases
sync_manager: remove empty files
sync_manager: print initialization info
sync_manager: rename files
sync_manager: clean up header org
sync_manager: delta_lease implementation
sync_manager: accept offset units
sync_manager: fix up init output
sync_manager: put back watchdog calls
sync_manager: fix start_host_id error paths
sync_manager: add log_error's for watchdog file errors
sync_manager: actual timeouts
sync_manager: change timeouts on cmd line
sanlock: create new external api
sanlock: build libsanlock
sanlock: use MAX_LEASES everywhere
sanlock: add libvirt plugin
sanlock plugin: couple minor fixes
sanlock: clean up /var file names
sanlock plugin: fix symbol needed by libvirt
sanlock: add some debug output
sanlock plugin: fix uuid copy
sanlock plugin: fix names
sanlock: add "owner_name"
sanlock: fix renewal checks
sanlock: clean up host_id types
sanlock: set_host_id command
sanlock: fix killing pids
sanlock: add status command
sanlock: set version to 1.0
sanlock: delta_lease cleanup
sanlock: changing num_hosts
sanlock: add dump command
sanlock: renewal timings
sanlock: add direct option
sanlock: check for watchdog file
sanlock: recovery fixes
lock_driver_sanlock: fix compile problems
sanlock: improve command options
sanlock: tidying help text
sanlock: move binary to /usr/sbin
sanlock: add init script
sanlock: fix sigterm shutdown
sanlock: init stop
sanlock: add wdtest command
sanlock.spec: new url
lock_driver_sanlock: remove close
sanlock: introduce lockspaces
lock_driver_sanlock: remove files
sanlock: better logging functions
sanlock: misc log message
sanlock.spec: sbin not libexec
sanlock init: remove watchdog reference
wdmd: watchdog multiplexing daemon
sanlock: add code to use wdmd
sanlock/wdmd: use wdmd in sanlock
sanlock/wdmd: add copyright header to source files
sanlock: rename sanlock source dir
sanlock: move tests dir
move COPYING file
wdmd: use signalfd for signal handling
Fix Makefile comments
wdmd: fix daemon debug option
wdmd: add init script
sanlock.spec: updates
sanlock.spec: src dir
sanlock: build with uninstalled libwdmd
sanlock: version 1.1
sanlock: high priority options
wdmd: high priority options
sanlock: return migration state
sanlock: migration.txt describes libvirt/sanlock steps
libsanlock: include admin functions
sanlock: fix host_id expiration check
sanlock: migration working
devcount: migrate test
sanlock: setowner improvements
sanlock: migrate to target fix
sanlock: fix wdmd stop order
sanlock: various fixes
sanlock: remove wdtest
sanlock: remove migration
sanlock: clean up command return data
sanlock: add resource string conversion functions
sanlock: rework internal structs
devcount: add relock test
sanlock: fix release and inquire
sanlock: add_lockspace EEXIST
sanlock: rework client handling
sanlock: clean up warnings
sanlock: debug message changes
sanlock: add lockspace checks
wdmd: enable test scripts
sanlock: add str_to_lockspace to lib
WIP devcount migrate
devcount: new migrate test
sanlock: read_id and live_id commands
sanlock: check lockspace name and host_id
sanlock: remove remaining cluster_mode
sanlock: add libsanlock_direct
devcountn: start multiple devcount tests
devcount: small changes
sanlock: new return values
sanlock: misc changes and fix
sanlock: log error of full bad block
sanlock: interval between renewal checks
sanlock: renewal changes
sanlock: fix log_dump
sanlock: fix find_client_pid
sanlock: fix host_id reads from paxos_acquire
sanlock: init with one write
devcount: improve output
devcount: new pause/resume
devcount: add expire test
sanlock: correct paxos usage
sanlock: direct read_leader
sanlock: paxos delays
sanlock: use thread pool
sanlock: client status output format changes
sanlock: fix inquire of dead pid
sanlock: use native linux aio
sanlock: i/o changes
sanlock: aio changes
sanlock: reduce paxos acquire read ops
sanlock: quiet error case
sanlock: don't free aio buf until event completes
sanlock: io timeout related changes
sanlock: read dblocks in single aligned io
sanlock: add sanlock_restrict api
sanlock: add sanlock_direct_sector_size api
sanlock: add checksum to dblocks
sanlock: fix init restart
sanlock: don't release tokens in dead lockspace
sanlock: fix adding lockspace
sanlock: official 1MB/8MB alignment
devcount: use aio in init
libsanlock: link with LDFLAGS
sanlock: increase version to 1.3
sanlock/wdmd: shut up warnings
sanlock: fix libwdmd linking
remove spec file
sanlock: use a completed read after renewal timeout
sanlock: use unique host name in delta leases
sanlock: remove sector_size api
sanlock: abort delta wait on shutdown
sanlock: fix add_lockspace failure
sanlk_load: add new test
sanlock: fix recv and inquire
sanlock: initial pid_dead check in acquire
sanlock: release 1.4
sanlock: generate a uuid for host id
sanlock: return -EINPROGRESS from add_lockspace
sanlk_load: periodically kill and replace a pid
sanlock: zero num_hosts uses DEFAULT_MAX_HOSTS
tests: misc changes
sanlock: break paxos_acquire wait loop
sanlock: increase log line to 512 bytes
sanlock: change a log_error to log_debug
sanlock: fail host_id when corrupted
sanlock: release 1.5
sanlock: release 1.6
sanlock: handle colon escaping in path strings
wdmd: add option for high priority
wdmd: use accept4 with SOCK_NONBLOCK
wdmd: tidy sun_addr snprintf
wdmd: pid and sock file changes
wdmd: add man page
wdmd: disable test scripts
sanlock: use accept4 with SOCK_NONBLOCK
sanlock: tidy sun_addr snprintf
sanlock: add explicit -luuid
sanlock: pid and sock file changes
sanlock: add man page
sanlock/wdmd: improve mkdir of run dir
wdmd: new build flags
sanlock: new build flags
sanlock/wdmd: use monotonic time
sanlock: build with pie
sanlock/wdmd: nonblocking listening/accept
sanlock: add missing monotime files
sanlock: update man page
sanlock: man page update
sanlock: update man page and help text
sanlock: print connections limit
release: sanlock 1.7
makefile: install mode for man pages
sanlock: read align_size in renewal
sanlock: check other host_id leases
sanlock: minor fixes and cleanups
sanlock: add request api/cmd
sanlock: crc code tidying
sanlock/wdmd: add license header to files
sanlock: create libsanlock_client
sanlock: move client code
remove COPYING file
sanlock: remove internal header from sanlock_sock
libsanlock_client: use LGPLv2+
libwdmd: use LGPLv2+
sanlock_rv.h: switch to LGPLv2+
README.license: document licenses
sanlock: client align and init
sanlock: write request record
sanlock: request struct magic and version numbers
sanlock: set bitmap for requests
sanlock: examine resource requests
sanlock: update man page
sanlock: remove old comment
sanlock: renaming functions
sanlock: optimize paxos wait
sanlock: use flags in struct resource
sanlock: restructure lockspace checks
sanlock: remove BLOCK_WD force mode
libsanlock: fix function stubs
sanlock: new status and host_status
sanlock: improve status output
sanlock: status output sorting
sanlock: SIGTERM and SIGKILL for REQ_KILL_PID
sanlock: add flag RESTRICT_SIGKILL
sanlock: setup fewer aio events
sanlock: move cmd processing
sanlock: suppress log messages
sanlock: rename source files
sanlock: improve killing pids
sanlock: fix log_dump
version 1.8
sanlock: quick host_id reacquire
sanlock: add force option to shutdown command
simpler copyright line
default to GPLv2+ for our original files
sanlock: improve daemon permission errors
add systemd files
fixing up init stuff
release 1.9
David Weber (2):
Fix order of linking
Install another symlink to shared library
Fabio M. Di Nitto (5):
build: sanlock should link with libsanlock
build: install shared lib header files and fix DESTDIR usage
build: drop rpm target
spec file: do first cut for total spec file
build: fix linking with libsanlock and install target
Federico Simoncelli (39):
rpm: sync specfile with fedora
rpm: add sanlock_admin.h header
rpm: add the lib package, install the initscripts
python: remove unused python code
python: add python binding
python: release the gil during sanlock operations
python: wrap sanlock extension with a module
rpm: add python binding package
python: pass a lockspace copy to str_to_lockspace
makefile: fix install typo
rpm: add sanlock_direct header
python: add sanlock init functions and exception
direct: close disks after initialization
python: register process only once
daemon: configurable socket permissions
rpm: add sanlock user and group
python: exceptions must contain the errno
rpm: add missing libaio-devel dependency
rpm: add daemon options in the init file
python: add missing aio library
python: add get_alignment function
libs: include libsanlock_direct into libsanlock
python: align num_hosts and max_hosts defaults
python: expose sanlock file descriptor
python: improve error reporting
python: parse lockspaces and resources natively
python: add usage example
python: initial support for sanlock errors
python: document the sanlock module
python: module cleanup
build: fix documentation install path
client: return appropriate errno on failure
python: expose errno in the exception
python: acquire leases for other processes
sanlock: implement the inq_lockspace command
python: add the inq_lockspace command binding
misc: run sanlock daemon as sanlock user
wdmd: use getopt to parse the command line
wdmd: make socket group ownership configurable
Saggi Mizrahi (21):
Added the begining of the testing and debugging tools
Better handling of max hosts
sync_manager: Updated tests to work with new lease struct
sync_manager: fixed skipping first arg in command
sync_manager: acquire and release actions
sync_manager: minor fixes
sync_manager: renamed stuff
sync_manager: made acquire synchronous again
sync_manager: added set_host_id action
sync_manager: use kill(0) for secondary pid check
sync_manager: make rpm and install
sync_manager: spec file update
sync_manager: Allow longer resource names
sync_manager: allow repeated set_host_id
sync_manager: Added escaping for the leases arg
sync_manager: Created the python bindings for sync_manager
sync_manager: listener socket permissions
sync_manager: Updated python binding and tests
sync_manager: Made 'token' a const in log_level
sync_manager: refactor messaging system
sync_manager: use getsockopt PEERCRED
11 years, 8 months
2 commits - init.d/wdmd src/main.c wdmd/main.c
by David Teigland
init.d/wdmd | 7 ++
src/main.c | 2
wdmd/main.c | 142 ++++++++++++++++++++++++++++++++----------------------------
3 files changed, 84 insertions(+), 67 deletions(-)
New commits:
commit ae407deb9c64ba4c1d3dfcb58d993050d64c314e
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Fri Jan 13 17:08:01 2012 +0000
wdmd: make socket group ownership configurable
diff --git a/init.d/wdmd b/init.d/wdmd
index 1adb03a..a952976 100644
--- a/init.d/wdmd
+++ b/init.d/wdmd
@@ -25,6 +25,11 @@ prog="wdmd"
lockfile="/var/run/$prog/$prog.pid"
exec="/usr/sbin/$prog"
+WDMDGROUP="sanlock"
+WDMDOPTS="-G $WDMDGROUP"
+
+[ -f /etc/sysconfig/$prog ] && . /etc/sysconfig/$prog
+
start() {
[ -x $exec ] || exit 5
@@ -34,7 +39,7 @@ start() {
fi
echo -n $"Starting $prog: "
- daemon $prog
+ daemon $prog $WDMDOPTS
retval=$?
echo
[ $retval -eq 0 ] && touch $lockfile
diff --git a/src/main.c b/src/main.c
index f77bf3a..0e47b2e 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1272,7 +1272,7 @@ static int group_to_gid(char *arg)
gr = getgrnam(arg);
if (gr == NULL) {
log_error("group '%s' not found, "
- "using uid: %i", arg, DEFAULT_SOCKET_UID);
+ "using uid: %i", arg, DEFAULT_SOCKET_GID);
return DEFAULT_SOCKET_GID;
}
diff --git a/wdmd/main.c b/wdmd/main.c
index 924bb86..4c19fc7 100644
--- a/wdmd/main.c
+++ b/wdmd/main.c
@@ -13,6 +13,7 @@
#include <getopt.h>
#include <stdint.h>
#include <stddef.h>
+#include <grp.h>
#include <fcntl.h>
#include <string.h>
#include <errno.h>
@@ -46,11 +47,15 @@
#define DEFAULT_FIRE_TIMEOUT 60
#define DEFAULT_HIGH_PRIORITY 1
+#define DEFAULT_SOCKET_GID 0
+#define DEFAULT_SOCKET_MODE (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP)
+
static int test_interval = DEFAULT_TEST_INTERVAL;
static int fire_timeout = DEFAULT_FIRE_TIMEOUT;
static int high_priority = DEFAULT_HIGH_PRIORITY;
static int daemon_quit;
static int daemon_debug;
+static int socket_gid;
static time_t last_keepalive;
static char lockfile_path[PATH_MAX];
static int dev_fd;
@@ -327,7 +332,14 @@ static int setup_listener_socket(int *listener_socket)
return rv;
}
- rv = fchmod(s, 666);
+ rv = chmod(addr.sun_path, DEFAULT_SOCKET_MODE);
+ if (rv < 0) {
+ rv = -errno;
+ close(s);
+ return rv;
+ }
+
+ rv = chown(addr.sun_path, -1, socket_gid);
if (rv < 0) {
rv = -errno;
close(s);
@@ -872,6 +884,20 @@ static void setup_priority(void)
}
}
+static int group_to_gid(char *arg)
+{
+ struct group *gr;
+
+ gr = getgrnam(arg);
+ if (gr == NULL) {
+ log_error("group '%s' not found, "
+ "using uid: %i", arg, DEFAULT_SOCKET_GID);
+ return DEFAULT_SOCKET_GID;
+ }
+
+ return gr->gr_gid;
+}
+
static void print_usage_and_exit(int status)
{
printf("Usage:\n");
@@ -881,6 +907,7 @@ static void print_usage_and_exit(int status)
printf("-D debug: no fork and print all logging to stderr\n");
printf("-H <num> use high priority features (1 yes, 0 no, default %d)\n",
DEFAULT_HIGH_PRIORITY);
+ printf("-G <groupname> group ownership for the socket\n");
exit(status);
}
@@ -922,7 +949,8 @@ int main(int argc, char *argv[])
{0, 0, 0, 0 }
};
- c = getopt_long(argc, argv, "hVDH:", long_options, &option_index);
+ c = getopt_long(argc, argv, "hVDH:G:",
+ long_options, &option_index);
if (c == -1)
break;
@@ -936,6 +964,9 @@ int main(int argc, char *argv[])
case 'D':
daemon_debug = 1;
break;
+ case 'G':
+ socket_gid = group_to_gid(optarg);
+ break;
case 'H':
high_priority = atoi(optarg);
break;
commit 6240b410f38b20a3cb02c1004bd040b956637041
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Fri Jan 13 17:08:00 2012 +0000
wdmd: use getopt to parse the command line
diff --git a/wdmd/main.c b/wdmd/main.c
index b3e9d67..924bb86 100644
--- a/wdmd/main.c
+++ b/wdmd/main.c
@@ -10,6 +10,7 @@
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
+#include <getopt.h>
#include <stdint.h>
#include <stddef.h>
#include <fcntl.h>
@@ -39,6 +40,8 @@
#define GNUC_UNUSED __attribute__((__unused__))
#endif
+#define RELEASE_VERSION "1.8"
+
#define DEFAULT_TEST_INTERVAL 10
#define DEFAULT_FIRE_TIMEOUT 60
#define DEFAULT_HIGH_PRIORITY 1
@@ -869,15 +872,25 @@ static void setup_priority(void)
}
}
-static void print_usage(void)
+static void print_usage_and_exit(int status)
{
printf("Usage:\n");
printf("wdmd [options]\n\n");
- printf("version print version\n");
- printf("help print usage\n");
+ printf("--version, -V print version\n");
+ printf("--help, -h print usage\n");
printf("-D debug: no fork and print all logging to stderr\n");
printf("-H <num> use high priority features (1 yes, 0 no, default %d)\n",
DEFAULT_HIGH_PRIORITY);
+ exit(status);
+}
+
+static void print_version_and_exit(void)
+{
+ printf("wdmd version %s tests_built%s%s%s\n", RELEASE_VERSION,
+ scripts_built ? scripts_built : "",
+ client_built ? client_built : "",
+ files_built ? files_built : "");
+ exit(0);
}
/* If wdmd exits abnormally, /dev/watchdog will eventually fire, and clients
@@ -888,14 +901,9 @@ static void print_usage(void)
would be for wdmd to fail starting if it found a pid file left over from
its previous run. */
-#define RELEASE_VERSION "1.8"
-
int main(int argc, char *argv[])
{
- char optchar;
- char *optionarg;
- char *p;
- int i, rv;
+ int rv;
/*
* TODO:
@@ -904,61 +912,34 @@ int main(int argc, char *argv[])
* -f <num> enable test files (1 yes, 0 no, default ...)
*/
- if ((argc > 1) &&
- !strcmp(argv[1], "version")) {
- printf("wdmd version %s tests_built%s%s%s\n", RELEASE_VERSION,
- scripts_built ? scripts_built : "",
- client_built ? client_built : "",
- files_built ? files_built : "");
- return 0;
- }
-
- if ((argc > 1) &&
- (!strcmp(argv[1], "help") || !strcmp(argv[1], "--help") || !strcmp(argv[1], "-h"))) {
- print_usage();
- return 0;
- }
-
- for (i = 1; i < argc; ) {
- p = argv[i];
-
- if ((p[0] != '-') || (strlen(p) != 2)) {
- fprintf(stderr, "unknown option %s\n", p);
- fprintf(stderr, "space required before option value\n");
- exit(EXIT_FAILURE);
- }
-
- optchar = p[1];
- i++;
-
- /* the only option that does not have optionarg */
- if (optchar == 'D') {
- daemon_debug = 1;
- continue;
- }
-
- if (i >= argc) {
- fprintf(stderr, "option '%c' requires arg\n", optchar);
- exit(EXIT_FAILURE);
- }
-
- optionarg = argv[i];
-
- switch (optchar) {
- case 'H':
- high_priority = atoi(optionarg);
- break;
- default:
- fprintf(stderr, "unknown option: %c\n", optchar);
- exit(EXIT_FAILURE);
- }
-
- i++;
- }
-
- if ((argc > 1) &&
- !strcmp(argv[1], "-D")) {
- daemon_debug = 1;
+ while (1) {
+ int c;
+ int option_index = 0;
+
+ static struct option long_options[] = {
+ {"help", no_argument, 0, 'h' },
+ {"version", no_argument, 0, 'V' },
+ {0, 0, 0, 0 }
+ };
+
+ c = getopt_long(argc, argv, "hVDH:", long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'h':
+ print_usage_and_exit(0);
+ break;
+ case 'V':
+ print_version_and_exit();
+ break;
+ case 'D':
+ daemon_debug = 1;
+ break;
+ case 'H':
+ high_priority = atoi(optarg);
+ break;
+ }
}
if (!daemon_debug) {
11 years, 8 months
src/Makefile
by David Teigland
src/Makefile | 2 ++
1 file changed, 2 insertions(+)
New commits:
commit 8d440397ef8ba9730016e9e34035f59b1afdc0c6
Author: David Weber <wb(a)munzinger.de>
Date: Wed Jan 11 09:27:15 2012 -0600
Install another symlink to shared library
Needed for packaging on Ubuntu
diff --git a/src/Makefile b/src/Makefile
index f7cf6cd..05baa90 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -119,5 +119,7 @@ install: all
$(INSTALL) -c -m 755 $(LIBSO_CLIENT_TARGET) $(DESTDIR)/$(LIBDIR)
cp -a $(LIB_ENTIRE_TARGET).so $(DESTDIR)/$(LIBDIR)
cp -a $(LIB_CLIENT_TARGET).so $(DESTDIR)/$(LIBDIR)
+ cp -a $(LIB_ENTIRE_TARGET).so.$(SOMAJOR) $(DESTDIR)/$(LIBDIR)
+ cp -a $(LIB_CLIENT_TARGET).so.$(SOMAJOR) $(DESTDIR)/$(LIBDIR)
$(INSTALL) -c -m 644 $(HEADER_TARGET) $(DESTDIR)/$(HEADIR)
$(INSTALL) -m 644 $(MAN_TARGET) $(DESTDIR)/$(MANDIR)/man8/
11 years, 8 months
[Patch] Fix symlink
by David Weber
Another patch to get things working more smoothly on Ubuntu.
A symlink got created and linked against but not installed.
Please check and commit.
You can of course also drop linking against it in the first place if you prefer that approach more.
One problem that still exist is btw. the LIBDIR:
/usr/lib64 isn't included in LD_PATH on Ubuntu. The best solution would be to use a configure script but I don't know if I will have time to do that. Maybe somebody wants to pick up that job?
David
11 years, 8 months
init.d/sanlock src/main.c
by David Teigland
init.d/sanlock | 5 +++--
src/main.c | 5 ++++-
2 files changed, 7 insertions(+), 3 deletions(-)
New commits:
commit 44b154b51be21e04fa7a8097fa7c2b2711ed21ad
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Tue Jan 10 17:02:06 2012 +0000
misc: run sanlock daemon as sanlock user
Signed-off-by: Federico Simoncelli <fsimonce(a)redhat.com>
diff --git a/init.d/sanlock b/init.d/sanlock
index b2682ef..ceac659 100644
--- a/init.d/sanlock
+++ b/init.d/sanlock
@@ -25,7 +25,8 @@ prog="sanlock"
lockfile="/var/run/$prog/$prog.pid"
exec="/usr/sbin/$prog"
-SANLOCKOPTS="-U sanlock -G sanlock"
+SANLOCKUSER="sanlock"
+SANLOCKOPTS="-U $SANLOCKUSER -G $SANLOCKUSER"
[ -f /etc/sysconfig/$prog ] && . /etc/sysconfig/$prog
@@ -38,7 +39,7 @@ start() {
fi
echo -n $"Starting $prog: "
- daemon $prog daemon $SANLOCKOPTS
+ daemon --user=$SANLOCKUSER $prog daemon $SANLOCKOPTS
retval=$?
echo
[ $retval -eq 0 ]
diff --git a/src/main.c b/src/main.c
index 2ab0aae..f77bf3a 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1066,8 +1066,11 @@ static int setup_listener(void)
goto exit_fail;
rv = chown(addr.sun_path, com.uid, com.gid);
- if (rv < 0)
+ if (rv < 0) {
+ log_error("could not set socket %s permissions: %s",
+ addr.sun_path, strerror(errno));
goto exit_fail;
+ }
rv = listen(fd, 5);
if (rv < 0)
11 years, 8 months