src/client.c | 59 ++++++--
src/cmd.c | 357 ++++++++++++++++++++++++++++++++++++++++++++++++-
src/direct.c | 28 +++
src/leader.h | 12 +
src/lockspace.c | 3
src/main.c | 42 +++++
src/paxos_lease.c | 4
src/resource.c | 7
src/resource.h | 6
src/sanlock.h | 1
src/sanlock_internal.h | 3
src/sanlock_resource.h | 13 +
src/sanlock_sock.h | 2
13 files changed, 509 insertions(+), 28 deletions(-)
New commits:
commit 1c4a22bda66c26202d00619254e1b2c06c8027c7
Author: David Teigland <teigland(a)redhat.com>
Date: Tue Jan 24 16:56:20 2012 -0600
sanlock: setmode
A resource lease is used to keep track of per-host shared/exclusive
modes associated with the resource. The resource lease is not
associated with a pid like normal resource leases are.
The standard resource lease is acquired/released internally
by setmode when modifying the list of per-host modes.
Only the setmode command is allowed on resources initialized
with MODE, and setmode is not allowed on resources not
initialized with MODE.
- initialize a resource with the MODE flag (or -M)
- use the setmode api/cmd with NL, SH or EX on the resource
(NL is unlocked)
- if no incompatible modes are set, setmode will succeed,
otherwise it returns -EAGAIN
- if an incompatible mode is set, but was set by a host_id+generation
that is now free, stale or dead, that incompatible mode will be cleared
Example command line usage:
> sanlock direct init -s foo:0:/dev/bull/leases:0
> sanlock direct init -M -r foo:bar:/dev/bull/leases:1048576
node-01> sanlock daemon
node-01> sanlock add_lockspace -s foo:1:/dev/bull/leases:0
node-01> sanlock setmode -r foo:bar:/dev/bull/leases:1048576 -m 5
setmode 5
setmode done 0
node-02> sanlock daemon
node-02> sanlock add_lockspace -s foo:2:/dev/bull/leases:0
node-02> sanlock setmode -r foo:bar:/dev/bull/leases:1048576 -m 3
setmode 3
setmode done -11
> sanlock direct dump /dev/bull/leases
offset lockspace resource timestamp own gen lver
00000000 foo a055eb92-2a6a-46fa-ae86-1ba74c0dfee4.node-01 0000698786 0001 0003
00000512 foo f30eed72-8608-443d-b390-aebe931ada85.node-02 0000698034 0002 0001
01048576 foo bar 0000000000 0002 0001 8
EX 0001 0003
diff --git a/src/client.c b/src/client.c
index 6875bb6..d54aaf8 100644
--- a/src/client.c
+++ b/src/client.c
@@ -61,7 +61,7 @@ static int connect_socket(int *sock_fd)
}
static int send_header(int sock, int cmd, uint32_t cmd_flags, int datalen,
- uint32_t data, uint32_t data2)
+ uint32_t data, uint32_t data2, uint64_t data64)
{
struct sm_header header;
int rv;
@@ -73,6 +73,7 @@ static int send_header(int sock, int cmd, uint32_t cmd_flags, int datalen,
header.length = sizeof(header) + datalen;
header.data = data;
header.data2 = data2;
+ header.data64 = data64;
rv = send(sock, (void *) &header, sizeof(struct sm_header), 0);
if (rv < 0)
@@ -91,7 +92,7 @@ int send_command(int cmd, uint32_t data)
if (rv < 0)
return rv;
- rv = send_header(sock, cmd, 0, 0, data, 0);
+ rv = send_header(sock, cmd, 0, 0, data, 0, 0);
if (rv < 0) {
close(sock);
return rv;
@@ -124,7 +125,7 @@ static int cmd_lockspace(int cmd, struct sanlk_lockspace *ls, uint32_t flags)
if (rv < 0)
return rv;
- rv = send_header(fd, cmd, flags, sizeof(struct sanlk_lockspace), 0, 0);
+ rv = send_header(fd, cmd, flags, sizeof(struct sanlk_lockspace), 0, 0, 0);
if (rv < 0)
goto out;
@@ -163,7 +164,7 @@ int sanlock_align(struct sanlk_disk *disk)
if (rv < 0)
return rv;
- rv = send_header(fd, SM_CMD_ALIGN, 0, sizeof(struct sanlk_disk), 0, 0);
+ rv = send_header(fd, SM_CMD_ALIGN, 0, sizeof(struct sanlk_disk), 0, 0, 0);
if (rv < 0)
goto out;
@@ -201,7 +202,7 @@ int sanlock_init(struct sanlk_lockspace *ls,
sizeof(struct sanlk_disk) * res->num_disks;
}
- rv = send_header(fd, cmd, 0, datalen, max_hosts, num_hosts);
+ rv = send_header(fd, cmd, 0, datalen, max_hosts, num_hosts, 0);
if (rv < 0)
goto out;
@@ -265,7 +266,7 @@ int sanlock_register(void)
if (rv < 0)
return rv;
- rv = send_header(sock, SM_CMD_REGISTER, 0, 0, 0, 0);
+ rv = send_header(sock, SM_CMD_REGISTER, 0, 0, 0, 0, 0);
if (rv < 0) {
close(sock);
return rv;
@@ -278,7 +279,7 @@ int sanlock_restrict(int sock, uint32_t flags)
{
int rv;
- rv = send_header(sock, SM_CMD_RESTRICT, flags, 0, 0, -1);
+ rv = send_header(sock, SM_CMD_RESTRICT, flags, 0, 0, -1, 0);
if (rv < 0)
return rv;
@@ -333,7 +334,7 @@ int sanlock_acquire(int sock, int pid, uint32_t flags, int res_count,
fd = sock;
}
- rv = send_header(fd, SM_CMD_ACQUIRE, flags, datalen, res_count, data2);
+ rv = send_header(fd, SM_CMD_ACQUIRE, flags, datalen, res_count, data2, 0);
if (rv < 0)
return rv;
@@ -402,7 +403,7 @@ int sanlock_inquire(int sock, int pid, uint32_t flags, int *res_count,
fd = sock;
}
- rv = send_header(fd, SM_CMD_INQUIRE, flags, 0, 0, data2);
+ rv = send_header(fd, SM_CMD_INQUIRE, flags, 0, 0, data2, 0);
if (rv < 0)
return rv;
@@ -476,7 +477,7 @@ int sanlock_release(int sock, int pid, uint32_t flags, int res_count,
datalen = res_count * sizeof(struct sanlk_resource);
- rv = send_header(fd, SM_CMD_RELEASE, flags, datalen, res_count, data2);
+ rv = send_header(fd, SM_CMD_RELEASE, flags, datalen, res_count, data2, 0);
if (rv < 0)
goto out;
@@ -507,7 +508,7 @@ int sanlock_request(uint32_t flags, uint32_t force_mode,
if (rv < 0)
return rv;
- rv = send_header(fd, SM_CMD_REQUEST, flags, datalen, force_mode, 0);
+ rv = send_header(fd, SM_CMD_REQUEST, flags, datalen, force_mode, 0, 0);
if (rv < 0)
goto out;
@@ -552,7 +553,7 @@ int sanlock_examine(uint32_t flags, struct sanlk_lockspace *ls,
data = (char *)res;
}
- rv = send_header(fd, cmd, flags, datalen, 0, 0);
+ rv = send_header(fd, cmd, flags, datalen, 0, 0, 0);
if (rv < 0)
goto out;
@@ -568,6 +569,40 @@ int sanlock_examine(uint32_t flags, struct sanlk_lockspace *ls,
return rv;
}
+int sanlock_setmode(uint32_t flags, uint64_t host_id, int mode,
+ struct sanlk_resource *res)
+{
+ int fd, rv, datalen;
+
+ datalen = sizeof(struct sanlk_resource) +
+ sizeof(struct sanlk_disk) * res->num_disks;
+
+ rv = connect_socket(&fd);
+ if (rv < 0)
+ return rv;
+
+ rv = send_header(fd, SM_CMD_SETMODE, flags, datalen, mode, 0, host_id);
+ if (rv < 0)
+ goto out;
+
+ rv = send(fd, res, sizeof(struct sanlk_resource), 0);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
+ }
+
+ rv = send(fd, res->disks, sizeof(struct sanlk_disk) * res->num_disks, 0);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
+ }
+
+ rv = recv_result(fd);
+ out:
+ close(fd);
+ return rv;
+}
+
/*
* convert from struct sanlk_resource to string with format:
* <lockspace_name>:<resource_name>:<path>:<offset>[:<path>:<offset>...]:<lver>
diff --git a/src/cmd.c b/src/cmd.c
index 3cef77d..58994a2 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -350,6 +350,13 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca)
result = rv;
goto done;
}
+ save_resource_lver(token, token->leader.lver);
+
+ /* TODO: fail and return an error if this resource
+ has LEADER_FL_MODE in which case only setmode is allowed.
+ It may be better to detect this right when we first read the
+ leader record in paxos_lease_acquire */
+
acquire_count++;
}
@@ -766,9 +773,8 @@ static void cmd_request(struct task *task, struct cmd_args *ca)
rv = recv(fd, token->disks, disks_len, MSG_WAITALL);
if (rv != disks_len) {
- free(token);
result = -ENOTCONN;
- goto reply;
+ goto reply_free;
}
/* zero out pad1 and pad2, see WARNING above */
@@ -787,18 +793,20 @@ static void cmd_request(struct task *task, struct cmd_args *ca)
error = request_token(task, token, force_mode, &owner_id);
if (error < 0) {
result = error;
- goto reply;
+ goto reply_free;
}
result = 0;
if (!token->acquire_lver && !force_mode)
- goto reply;
+ goto reply_free;
if (owner_id)
host_status_set_bit(token->r.lockspace_name, owner_id);
- reply:
+
+ reply_free:
free(token);
+ reply:
log_debug("cmd_request %d,%d done %d", ca->ci_in, fd, result);
send_result(fd, &ca->header, result);
@@ -854,6 +862,340 @@ static void cmd_examine(struct task *task GNUC_UNUSED, struct cmd_args *ca)
client_resume(ca->ci_in);
}
+/* return 1 (is alive) to force a failure if we don't have enough
+ knowledge to know it's really not alive. Later we could have this sit and
+ wait (like paxos_lease_acquire) until we have waited long enough or have
+ enough knowledge to say it's safely dead (unless of course we find it is
+ alive while waiting) */
+
+static int host_live(struct task *task, char *lockspace_name, uint64_t host_id, uint64_t gen)
+{
+ struct host_status hs;
+ uint64_t now;
+ int rv;
+
+ rv = host_info(lockspace_name, host_id, &hs);
+ if (rv) {
+ log_debug("host_live %llu %llu yes host_info %d",
+ (unsigned long long)host_id, (unsigned long long)gen, rv);
+ return 1;
+ }
+
+ if (!hs.last_check) {
+ log_debug("host_live %llu %llu yes unchecked",
+ (unsigned long long)host_id, (unsigned long long)gen);
+ return 1;
+ }
+
+ /* the host_id lease is free, not being used */
+ if (!hs.timestamp) {
+ log_debug("host_live %llu %llu no lease free",
+ (unsigned long long)host_id, (unsigned long long)gen);
+ return 0;
+ }
+
+ if (hs.owner_generation > gen) {
+ log_debug("host_live %llu %llu no old gen %llu",
+ (unsigned long long)host_id, (unsigned long long)gen,
+ (unsigned long long)hs.owner_generation);
+ return 0;
+ }
+
+ now = monotime();
+
+ if (!hs.last_live && (now - hs.first_check > task->host_dead_seconds)) {
+ log_debug("host_live %llu %llu no first_check %llu",
+ (unsigned long long)host_id, (unsigned long long)gen,
+ (unsigned long long)hs.first_check);
+ return 0;
+ }
+
+ if (hs.last_live && (now - hs.last_live > task->host_dead_seconds)) {
+ log_debug("host_live %llu %llu no last_live %llu",
+ (unsigned long long)host_id, (unsigned long long)gen,
+ (unsigned long long)hs.last_live);
+ return 0;
+ }
+
+ log_debug("host_live %llu %llu yes recent first_check %llu last_live %llu",
+ (unsigned long long)host_id, (unsigned long long)gen,
+ (unsigned long long)hs.first_check,
+ (unsigned long long)hs.last_live);
+
+ return 1;
+}
+
+/*
+ * What this is aiming to do is:
+ * cmd_acquire();
+ * for all mblocks, if any mblock.mode is incompatible with mode
+ * if mblock is for dead host_id, clear mblock.mode, continue
+ * if mblock is for live host_id, return -EAGAIN
+ * write mblock.mode and mblock.generation for host_id
+ * cmd_release();
+ */
+
+static void cmd_setmode(struct task *task, struct cmd_args *ca)
+{
+ struct token *token;
+ struct sync_disk *disk;
+ struct sanlk_resource res;
+ struct space space;
+ struct mode_block *mb;
+ char *iobuf, **p_iobuf;
+ char *rbuf, *wbuf;
+ uint64_t set_hostid, set_gen;
+ int iobuf_len;
+ int set_mode;
+ int token_len, disks_len;
+ int i, j, fd, rv, result;
+
+ fd = client[ca->ci_in].fd;
+
+ /* the two args from sanlock_setmode() */
+ set_hostid = ca->header.data64; /* TODO: add to struct */
+ set_mode = ca->header.data;
+
+ /* receiving and setting up token (copied from cmd_request) */
+
+ rv = recv(fd, &res, sizeof(struct sanlk_resource), MSG_WAITALL);
+ if (rv != sizeof(struct sanlk_resource)) {
+ log_error("cmd_setmode %d,%d recv %d %d",
+ ca->ci_in, fd, rv, errno);
+ result = -ENOTCONN;
+ goto reply;
+ }
+
+ if (!res.num_disks || res.num_disks > SANLK_MAX_DISKS) {
+ result = -ERANGE;
+ goto reply;
+ }
+
+ disks_len = res.num_disks * sizeof(struct sync_disk);
+ token_len = sizeof(struct token) + disks_len;
+
+ token = malloc(token_len);
+ if (!token) {
+ result = -ENOMEM;
+ goto reply;
+ }
+
+ memset(token, 0, token_len);
+ token->disks = (struct sync_disk *)&token->r.disks[0]; /* shorthand */
+ token->r.num_disks = res.num_disks;
+ memcpy(token->r.lockspace_name, res.lockspace_name, SANLK_NAME_LEN);
+ memcpy(token->r.name, res.name, SANLK_NAME_LEN);
+
+ token->acquire_lver = res.lver;
+ token->acquire_data64 = res.data64;
+ token->acquire_data32 = res.data32;
+ token->acquire_flags = res.flags;
+
+ /*
+ * receive sanlk_disk's / sync_disk's
+ *
+ * WARNING: as a shortcut, this requires that sync_disk and
+ * sanlk_disk match; this is the reason for the pad fields
+ * in sanlk_disk (TODO: let these differ?)
+ */
+
+ rv = recv(fd, token->disks, disks_len, MSG_WAITALL);
+ if (rv != disks_len) {
+ result = -ENOTCONN;
+ goto reply_token;
+ }
+
+ /* zero out pad1 and pad2, see WARNING above */
+ for (j = 0; j < token->r.num_disks; j++) {
+ token->disks[j].sector_size = 0;
+ token->disks[j].fd = -1;
+ }
+
+ log_debug("cmd_setmode %d,%d host_id %llu mode %u %.48s:%.48s:%.256s:%llu",
+ ca->ci_in, fd,
+ (unsigned long long)set_hostid, set_mode,
+ token->r.lockspace_name,
+ token->r.name,
+ token->disks[0].path,
+ (unsigned long long)token->r.disks[0].offset);
+
+ /* find what our own host_id and generation are for this lockspace */
+
+ rv = lockspace_info(token->r.lockspace_name, &space);
+ if (rv < 0 || space.killing_pids) {
+ log_error("cmd_setmode %d,%d invalid lockspace "
+ "found %d failed %d name %.48s",
+ ca->ci_in, fd, rv, space.killing_pids,
+ token->r.lockspace_name);
+ result = -ENOSPC;
+ goto reply_token;
+ }
+ token->host_id = space.host_id;
+ token->host_generation = space.host_generation;
+
+ if (!set_hostid)
+ set_hostid = token->host_id;
+ if (token->host_id == set_hostid)
+ set_gen = token->host_generation;
+ else
+ set_gen = 0;
+
+ /* only allow clearing mode of host_id's that are not ours
+ (not sure this will be needed, but it may be useful) */
+
+ if (token->host_id != set_hostid && set_mode != SANLK_MODE_NL) {
+ log_error("cmd_setmode %d,%d host_id %llu set hostid %llu mode %d",
+ ca->ci_in, fd, (unsigned long long)token->host_id,
+ (unsigned long long)set_hostid, set_mode);
+ result = -EINVAL;
+ goto reply_token;
+ }
+
+ rv = acquire_token(task, token, 0, 0);
+ if (rv < 0) {
+ log_error("cmd_setmode %d,%d acquire error %d", ca->ci_in, fd, rv);
+ result = rv;
+ goto reply_token;
+ }
+
+ if (!(token->leader.flags & LEADER_FL_MODE)) {
+ /* a resource lease must be initialized with the MODE
+ flag for setmode to work */
+ log_error("cmd_setmode %d,%d no-mode resource", ca->ci_in, fd);
+ result = -EINVAL;
+ goto reply_rel;
+ }
+
+ /*
+ * Read the entire lease area, which includes all host's sectors.
+ * Check mblock in each host sector for other incompatible lock modes.
+ * If we find one, but the host is dead, then clear it and continue
+ * checking. If no conflicts are found, write the requested mode
+ * in the mblock.
+ */
+
+ /* only keep modes on the first disk */
+ disk = &token->disks[0];
+
+ rv = open_disk(disk);
+ if (rv < 0) {
+ result = rv;
+ goto reply_rel;
+ }
+
+ iobuf_len = direct_align(disk);
+
+ p_iobuf = &iobuf;
+
+ rv = posix_memalign((void *)p_iobuf, getpagesize(), iobuf_len);
+ if (rv)
+ goto reply_close;
+
+ memset(iobuf, 0, iobuf_len);
+
+ rv = read_iobuf(disk->fd, disk->offset, iobuf, iobuf_len, task);
+ if (rv < 0) {
+ if (rv != SANLK_AIO_TIMEOUT)
+ free(iobuf);
+ result = rv;
+ goto reply_close;
+ }
+
+ wbuf = malloc(disk->sector_size);
+ if (!wbuf) {
+ result = -ENOMEM;
+ free(iobuf);
+ goto reply_close;
+ }
+
+ if (set_mode == SANLK_MODE_NL)
+ goto do_write;
+
+ for (i = 0; i < token->leader.num_hosts; i++) {
+ if (i+1 == set_hostid)
+ continue;
+
+ /*
+ * The sector_nr for host_id N is:
+ * 1 leader block + 1 request block + (N-1) host blocks.
+ * The mode_block is DBLOCK_MAX_LEN into the sector
+ *
+ * rbuf is the start of the sector (where the paxos_dblock
+ * struct exists, which we aren't modifying) for host_id i+1.
+ */
+
+ rbuf = iobuf + ((2 + i) * disk->sector_size);
+ mb = (struct mode_block *)(rbuf + DBLOCK_MAX_LEN);
+
+ if (mb->mode == SANLK_MODE_NL)
+ continue;
+
+ if (mb->mode == SANLK_MODE_SH && set_mode == SANLK_MODE_SH)
+ continue;
+
+ /* incompatible locks */
+
+ if (host_live(task, token->r.lockspace_name, i+1, mb->generation)) {
+ log_error("cmd_setmode %d,%d mode conflict host_id %d gen %llu mode %u",
+ ca->ci_in, fd, i+1, (unsigned long long)mb->generation, mb->mode);
+ result = -EAGAIN;
+ goto reply_free;
+ }
+
+ /* clear mode in dead host's sector */
+
+ log_error("cmd_setmode %d,%d clear dead host_id %d gen %llu mode %u",
+ ca->ci_in, fd, i+1, (unsigned long long)mb->generation, mb->mode);
+
+ memcpy(wbuf, rbuf, disk->sector_size);
+ mb = (struct mode_block *)(wbuf + DBLOCK_MAX_LEN);
+ mb->mode = SANLK_MODE_NL;
+ mb->generation = 0;
+
+ rv = write_sector(disk, 2 + i, wbuf, disk->sector_size,
+ task, "mblock");
+ if (rv < 0) {
+ result = rv;
+ goto reply_free;
+ }
+ }
+
+ log_debug("cmd_setmode %d,%d write host_id %llu gen %llu mode %d",
+ ca->ci_in, fd, (unsigned long long)set_hostid,
+ (unsigned long long)set_gen, set_mode);
+
+ do_write:
+ rbuf = iobuf + ((2 + set_hostid - 1) * disk->sector_size);
+ memcpy(wbuf, rbuf, disk->sector_size);
+ mb = (struct mode_block *)(wbuf + DBLOCK_MAX_LEN);
+ mb->mode = set_mode;
+ mb->generation = set_gen;
+
+ rv = write_sector(disk, 2 + set_hostid - 1, wbuf, disk->sector_size,
+ task, "mblock");
+ if (rv < 0) {
+ result = rv;
+ goto reply_free;
+ }
+
+ result = 0;
+
+ reply_free:
+ free(wbuf);
+ free(iobuf);
+ reply_close:
+ close_disks(disk, 1);
+ reply_rel:
+ release_token(task, token);
+ reply_token:
+ free(token);
+ reply:
+ log_debug("cmd_setmode %d,%d done %d", ca->ci_in, fd, result);
+
+ send_result(fd, &ca->header, result);
+ client_resume(ca->ci_in);
+}
+
static void cmd_add_lockspace(struct cmd_args *ca)
{
struct sanlk_lockspace lockspace;
@@ -1111,6 +1453,8 @@ static void cmd_init_resource(struct task *task, struct cmd_args *ca)
memcpy(token->r.lockspace_name, res.lockspace_name, SANLK_NAME_LEN);
memcpy(token->r.name, res.name, SANLK_NAME_LEN);
+ token->acquire_flags = res.flags;
+
/*
* receive sanlk_disk's / sync_disk's
*
@@ -1196,6 +1540,9 @@ void call_cmd_thread(struct task *task, struct cmd_args *ca)
case SM_CMD_EXAMINE_RESOURCE:
cmd_examine(task, ca);
break;
+ case SM_CMD_SETMODE:
+ cmd_setmode(task, ca);
+ break;
};
}
diff --git a/src/direct.c b/src/direct.c
index 8394e0a..263f6c7 100644
--- a/src/direct.c
+++ b/src/direct.c
@@ -102,6 +102,8 @@ static int do_paxos_action(int action, struct task *task,
memcpy(token->r.lockspace_name, res->lockspace_name, SANLK_NAME_LEN);
memcpy(token->r.name, res->name, SANLK_NAME_LEN);
+ token->acquire_flags = res->flags;
+
/* WARNING sync_disk == sanlk_disk */
memcpy(token->disks, &res->disks, disks_len);
@@ -426,6 +428,15 @@ int direct_read_leader(struct task *task,
int test_id_bit(int host_id, char *bitmap);
+static const char *mode_str(int mode)
+{
+ if (mode == 3)
+ return "SH";
+ if (mode == 5)
+ return "EX";
+ return "??";
+}
+
int direct_dump(struct task *task, char *dump_path, int force_mode)
{
char *data, *bitmap;
@@ -433,6 +444,8 @@ int direct_dump(struct task *task, char *dump_path, int force_mode)
struct leader_record *lr;
struct request_record *rr;
struct sync_disk sd;
+ char *pd;
+ struct mode_block *mb;
char sname[NAME_ID_SIZE+1];
char rname[NAME_ID_SIZE+1];
uint64_t sector_nr;
@@ -543,6 +556,21 @@ int direct_dump(struct task *task, char *dump_path, int force_mode)
(unsigned long long)rr->lver, rr->force_mode);
}
printf("\n");
+
+ if (lr->flags & LEADER_FL_MODE) {
+ for (i = 0; i < lr->num_hosts; i++) {
+ pd = data + ((2 + i) * sd.sector_size);
+ mb = (struct mode_block *)(pd + DBLOCK_MAX_LEN);
+
+ if (!mb->mode)
+ continue;
+
+ printf(" ");
+ printf("%s %04d %04llu\n",
+ mode_str(mb->mode), i+1,
+ (unsigned long long)mb->generation);
+ }
+ }
} else {
break;
}
diff --git a/src/leader.h b/src/leader.h
index e7304b0..f850ef8 100644
--- a/src/leader.h
+++ b/src/leader.h
@@ -42,10 +42,12 @@
#define LEADER_CHECKSUM_LEN 168
#define LEASE_FREE 0
+#define LEADER_FL_MODE 0x00000001
+
struct leader_record {
uint32_t magic;
uint32_t version;
- uint32_t unused0;
+ uint32_t flags;
uint32_t sector_size;
uint64_t num_hosts;
uint64_t max_hosts;
@@ -81,4 +83,12 @@ struct request_record {
uint32_t force_mode;
};
+#define DBLOCK_MAX_LEN 128
+
+struct mode_block {
+ uint32_t mode;
+ uint32_t unused;
+ uint64_t generation;
+};
+
#endif
diff --git a/src/lockspace.c b/src/lockspace.c
index 188758c..dc89d90 100644
--- a/src/lockspace.c
+++ b/src/lockspace.c
@@ -262,6 +262,9 @@ void check_other_leases(struct task *task, struct space *sp, char *buf)
hs = &sp->host_status[i];
hs->last_check = now;
+ if (!hs->first_check)
+ hs->first_check = now;
+
leader = (struct leader_record *)(buf + (i * disk->sector_size));
if (hs->owner_id == leader->owner_id &&
diff --git a/src/main.c b/src/main.c
index 7ec689e..39a3b58 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1002,6 +1002,7 @@ static void process_connection(int ci)
case SM_CMD_ALIGN:
case SM_CMD_INIT_LOCKSPACE:
case SM_CMD_INIT_RESOURCE:
+ case SM_CMD_SETMODE:
rv = client_suspend(ci);
if (rv < 0)
return;
@@ -1367,7 +1368,8 @@ static void print_usage(void)
printf("sanlock client host_status -s LOCKSPACE [-D]\n");
printf("sanlock client log_dump\n");
printf("sanlock client shutdown [-f 0|1]\n");
- printf("sanlock client init -s LOCKSPACE | -r RESOURCE\n");
+ printf("sanlock client init -s LOCKSPACE\n");
+ printf("sanlock client init -r RESOURCE [-M]\n");
printf("sanlock client align -s LOCKSPACE\n");
printf("sanlock client add_lockspace -s LOCKSPACE\n");
printf("sanlock client inq_lockspace -s LOCKSPACE\n");
@@ -1378,9 +1380,11 @@ static void print_usage(void)
printf("sanlock client inquire -p <pid>\n");
printf("sanlock client request -r RESOURCE -f <force_mode>\n");
printf("sanlock client examine -r RESOURCE | -s LOCKSPACE\n");
+ printf("sanlock client setmode -r RESOURCE -m <lock_mode>\n");
printf("\n");
printf("sanlock direct <action> [-a 0|1] [-o 0|1]\n");
- printf("sanlock direct init -s LOCKSPACE | -r RESOURCE\n");
+ printf("sanlock direct init -s LOCKSPACE\n");
+ printf("sanlock direct init -r RESOURCE [-M]\n");
printf("sanlock direct read_leader -s LOCKSPACE | -r RESOURCE\n");
printf("sanlock direct read_id -s LOCKSPACE\n");
printf("sanlock direct live_id -s LOCKSPACE\n");
@@ -1417,6 +1421,7 @@ static int read_command_line(int argc, char *argv[])
char *arg1 = argv[1];
char *act;
int i, j, len, begin_command = 0;
+ int init_mode = 0;
if (argc < 2 || !strcmp(arg1, "help") || !strcmp(arg1, "--help") ||
!strcmp(arg1, "-h")) {
@@ -1487,6 +1492,8 @@ static int read_command_line(int argc, char *argv[])
com.action = ACT_REQUEST;
else if (!strcmp(act, "examine"))
com.action = ACT_EXAMINE;
+ else if (!strcmp(act, "setmode"))
+ com.action = ACT_SETMODE;
else if (!strcmp(act, "align"))
com.action = ACT_CLIENT_ALIGN;
else if (!strcmp(act, "init"))
@@ -1546,13 +1553,25 @@ static int read_command_line(int argc, char *argv[])
optchar = p[1];
i++;
- /* the only option that does not have optionarg */
+ /*
+ * options that do not have optionarg
+ */
+
if (optchar == 'D') {
com.debug = 1;
log_stderr_priority = LOG_DEBUG;
continue;
}
+ if (optchar == 'M') {
+ init_mode = 1;
+ continue;
+ }
+
+ /*
+ * options that require arg
+ */
+
if (i >= argc) {
log_tool("option '%c' requires arg", optchar);
exit(EXIT_FAILURE);
@@ -1602,7 +1621,11 @@ static int read_command_line(int argc, char *argv[])
com.num_hosts = atoi(optionarg);
break;
case 'm':
- com.max_hosts = atoi(optionarg);
+ if (com.action == ACT_SETMODE) {
+ com.lock_mode = atoi(optionarg);
+ } else {
+ com.max_hosts = atoi(optionarg);
+ }
break;
case 'p':
com.pid = atoi(optionarg);
@@ -1647,6 +1670,9 @@ static int read_command_line(int argc, char *argv[])
i++;
}
+ if (init_mode)
+ com.res_args[0]->flags |= SANLK_RES_MODE;
+
/*
* the remaining args are for the command
*
@@ -1831,6 +1857,13 @@ static int do_client(void)
log_tool("examine done %d", rv);
break;
+ case ACT_SETMODE:
+ log_tool("setmode %d", com.lock_mode);
+ /* TODO: add optional host_id */
+ rv = sanlock_setmode(0, 0, com.lock_mode, com.res_args[0]);
+ log_tool("setmode done %d", rv);
+ break;
+
case ACT_CLIENT_ALIGN:
log_tool("align");
rv = sanlock_align(&com.lockspace.host_id_disk);
@@ -1883,6 +1916,7 @@ static int do_direct(void)
log_tool("read_leader done %d", rv);
log_tool("magic 0x%0x", leader.magic);
log_tool("version 0x%x", leader.version);
+ log_tool("flags 0x%x", leader.flags);
log_tool("sector_size %u", leader.sector_size);
log_tool("num_hosts %llu",
(unsigned long long)leader.num_hosts);
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index a55933d..f55f3bb 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -1517,6 +1517,10 @@ int paxos_lease_init(struct task *task,
leader->timestamp = LEASE_FREE;
strncpy(leader->space_name, token->r.lockspace_name, NAME_ID_SIZE);
strncpy(leader->resource_name, token->r.name, NAME_ID_SIZE);
+
+ if (token->acquire_flags & SANLK_RES_MODE)
+ leader->flags = LEADER_FL_MODE;
+
leader->checksum = leader_checksum(leader);
rr = (struct request_record *)(iobuf + sector_size);
diff --git a/src/resource.c b/src/resource.c
index 9002f06..ea9d7bb 100644
--- a/src/resource.c
+++ b/src/resource.c
@@ -83,7 +83,7 @@ static struct resource *find_resource(struct token *token,
return NULL;
}
-static void save_resource_lver(struct token *token, uint64_t lver)
+void save_resource_lver(struct token *token, uint64_t lver)
{
struct resource *r;
@@ -190,15 +190,14 @@ int acquire_token(struct task *task, struct token *token,
close_disks(token->disks, token->r.num_disks);
log_token(token, "acquire rv %d lver %llu at %llu", rv,
- (unsigned long long)token->leader.lver,
- (unsigned long long)token->leader.timestamp);
+ (unsigned long long)leader_ret.lver,
+ (unsigned long long)leader_ret.timestamp);
if (rv < 0)
return rv;
memcpy(&token->leader, &leader_ret, sizeof(struct leader_record));
token->r.lver = token->leader.lver;
- save_resource_lver(token, token->leader.lver);
return rv; /* SANLK_OK */
}
diff --git a/src/resource.h b/src/resource.h
index 72de40a..6f8edee 100644
--- a/src/resource.h
+++ b/src/resource.h
@@ -6,8 +6,8 @@
* of the GNU General Public License v2 or (at your option) any later version.
*/
-#ifndef __TOKEN_MANAGER_H__
-#define __TOKEN_MANAGER_H__
+#ifndef __RESOURCE_H__
+#define __RESOURCE_H__
int acquire_token(struct task *task, struct token *token,
uint64_t acquire_lver, int new_num_hosts);
@@ -22,6 +22,8 @@ int request_token(struct task *task, struct token *token, uint32_t force_mode,
int add_resource(struct token *token, int pid, uint32_t cl_restrict);
void del_resource(struct token *token);
+void save_resource_lver(struct token *token, uint64_t lver);
+
int set_resource_examine(char *space_name, char *res_name);
int setup_token_manager(void);
diff --git a/src/sanlock.h b/src/sanlock.h
index a56bb4e..7e49122 100644
--- a/src/sanlock.h
+++ b/src/sanlock.h
@@ -53,6 +53,7 @@ struct sanlk_disk {
#define SANLK_RES_LVER 0x1 /* lver field is set */
#define SANLK_RES_NUM_HOSTS 0x2 /* data32 field is new num_hosts */
+#define SANLK_RES_MODE 0x4
struct sanlk_resource {
char lockspace_name[SANLK_NAME_LEN]; /* terminating \0 not required */
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index eeeb897..519faf3 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -117,6 +117,7 @@ struct lease_status {
};
struct host_status {
+ uint64_t first_check; /* local monotime */
uint64_t last_check; /* local monotime */
uint64_t last_live; /* local monotime */
uint64_t last_req; /* local monotime */
@@ -530,6 +531,7 @@ struct command_line {
int max_hosts; /* -m */
int res_count;
uint32_t force_mode;
+ int lock_mode;
char our_host_name[SANLK_NAME_LEN+1];
char *dump_path;
struct sanlk_lockspace lockspace; /* -s LOCKSPACE */
@@ -568,6 +570,7 @@ enum {
ACT_CLIENT_INIT,
ACT_CLIENT_ALIGN,
ACT_EXAMINE,
+ ACT_SETMODE,
};
EXTERN int external_shutdown;
diff --git a/src/sanlock_resource.h b/src/sanlock_resource.h
index 3991094..bb64d27 100644
--- a/src/sanlock_resource.h
+++ b/src/sanlock_resource.h
@@ -52,6 +52,19 @@ int sanlock_examine(uint32_t flags, struct sanlk_lockspace *ls,
struct sanlk_resource *res);
/*
+ * Set a host's mode for the resource. A standard lease is acquired, the
+ * mode is set if compatible with existing modes, the lease is released.
+ * The resource lease used here is not associated with a pid.
+ */
+
+#define SANLK_MODE_NL 0
+#define SANLK_MODE_SH 3
+#define SANLK_MODE_EX 5
+
+int sanlock_setmode(uint32_t flags, uint64_t host_id, int mode,
+ struct sanlk_resource *res);
+
+/*
* Functions to convert between string and struct resource formats.
* All allocate space for returned data that the caller must free.
*/
diff --git a/src/sanlock_sock.h b/src/sanlock_sock.h
index aa9e46a..b5c80dc 100644
--- a/src/sanlock_sock.h
+++ b/src/sanlock_sock.h
@@ -36,6 +36,7 @@ enum {
SM_CMD_EXAMINE_RESOURCE = 16,
SM_CMD_HOST_STATUS = 17,
SM_CMD_INQ_LOCKSPACE = 18,
+ SM_CMD_SETMODE = 19,
};
struct sm_header {
@@ -47,6 +48,7 @@ struct sm_header {
uint32_t seq;
uint32_t data;
uint32_t data2;
+ uint64_t data64;
};
#define SANLK_STATE_MAXSTR 4096