src/client.c | 216 ++++++++++++++++++++++++++++++++++++++++++------- src/cmd.c | 192 ++++++++++++++++++++++++++++++++++++++++--- src/delta_lease.c | 59 ++++++++++++- src/delta_lease.h | 7 + src/direct.c | 49 +++++------ src/direct.h | 11 +- src/direct_lib.c | 36 +++++++- src/lockspace.c | 2 src/main.c | 60 +++++++++++-- src/paxos_lease.c | 32 +++++++ src/paxos_lease.h | 4 src/sanlock.8 | 41 ++++++--- src/sanlock_admin.h | 77 +++++++++++++++++ src/sanlock_direct.h | 18 ++++ src/sanlock_internal.h | 1 src/sanlock_sock.h | 40 ++++----- 16 files changed, 729 insertions(+), 116 deletions(-)
New commits: commit 185bc1f850ead0092d20d6dacc647a40ebdd1205 Author: David Teigland teigland@redhat.com Date: Wed Nov 21 09:14:18 2012 -0600
sanlock: read and write apis for lockspace and resource
- write_lockspace includes an arg to specify the io timeout written in the disk record.
- read_lockspace provides the io timeout from the disk record.
- read_lockspace/resource only require path and offset input. If host_id is zero or unspecified, host_id 1 is used. If other parameters are specified, they are used and verified.
# sanlock client init -s s1:0:/dev/vg/leases:0 # sanlock client init -r s1:r1:/dev/vg/leases:1048576
# sanlock client read -s ::/dev/vg/leases:0 s s1:1:/dev/vg/leases:0 io_timeout 10
# sanlock client read -r ::/dev/vg/leases:1048576 r s1:r1:/dev/vg/leases:1048576:0
Signed-off-by: David Teigland teigland@redhat.com
diff --git a/src/client.c b/src/client.c index 48e1505..ae73226 100644 --- a/src/client.c +++ b/src/client.c @@ -187,50 +187,157 @@ int sanlock_align(struct sanlk_disk *disk) return rv; }
-int sanlock_init(struct sanlk_lockspace *ls, - struct sanlk_resource *res, - int max_hosts, int num_hosts) +int sanlock_read_lockspace(struct sanlk_lockspace *ls, uint32_t flags, uint32_t *io_timeout) { - int rv, fd, cmd, datalen; + struct sm_header h; + int rv, fd;
- if (!ls && !res) + if (!ls || !ls->host_id_disk.path[0]) return -EINVAL;
rv = connect_socket(&fd); if (rv < 0) return rv;
- if (ls && ls->host_id_disk.path[0]) { - cmd = SM_CMD_INIT_LOCKSPACE; - datalen = sizeof(struct sanlk_lockspace); - } else { - cmd = SM_CMD_INIT_RESOURCE; - datalen = sizeof(struct sanlk_resource) + - sizeof(struct sanlk_disk) * res->num_disks; + rv = send_header(fd, SM_CMD_READ_LOCKSPACE, flags, + sizeof(struct sanlk_lockspace), + 0, 0); + if (rv < 0) + goto out; + + rv = send(fd, ls, sizeof(struct sanlk_lockspace), 0); + if (rv < 0) { + rv = -errno; + goto out; + } + + /* receive result, io_timeout and ls struct */ + + memset(&h, 0, sizeof(struct sm_header)); + + rv = recv(fd, &h, sizeof(h), MSG_WAITALL); + if (rv < 0) { + rv = -errno; + goto out; }
- rv = send_header(fd, cmd, 0, datalen, max_hosts, num_hosts); + if (rv != sizeof(h)) { + rv = -1; + goto out; + } + + rv = (int)h.data; if (rv < 0) goto out;
- if (ls) { - rv = send(fd, ls, sizeof(struct sanlk_lockspace), 0); - if (rv < 0) { - rv = -errno; - goto out; - } - } else { - rv = send(fd, res, sizeof(struct sanlk_resource), 0); - if (rv < 0) { - rv = -errno; - goto out; - } + rv = recv(fd, ls, sizeof(struct sanlk_lockspace), MSG_WAITALL); + if (rv < 0) { + rv = -errno; + goto out; + }
- rv = send(fd, res->disks, sizeof(struct sanlk_disk) * res->num_disks, 0); - if (rv < 0) { - rv = -errno; - goto out; - } + if (rv != sizeof(struct sanlk_lockspace)) { + rv = -1; + goto out; + } + + *io_timeout = h.data2; + rv = (int)h.data; + out: + close(fd); + return rv; +} + +int sanlock_read_resource(struct sanlk_resource *res, uint32_t flags) +{ + struct sm_header h; + int rv, fd; + + if (!res || !res->num_disks || res->num_disks > SANLK_MAX_DISKS || + !res->disks[0].path[0]) + return -EINVAL; + + rv = connect_socket(&fd); + if (rv < 0) + return rv; + + rv = send_header(fd, SM_CMD_READ_RESOURCE, flags, + sizeof(struct sanlk_resource) + + sizeof(struct sanlk_disk) * res->num_disks, + 0, 0); + if (rv < 0) + goto out; + + rv = send(fd, res, sizeof(struct sanlk_resource), 0); + if (rv < 0) { + rv = -errno; + goto out; + } + + rv = send(fd, res->disks, sizeof(struct sanlk_disk) * res->num_disks, 0); + if (rv < 0) { + rv = -errno; + goto out; + } + + /* receive result and res struct */ + + memset(&h, 0, sizeof(struct sm_header)); + + rv = recv(fd, &h, sizeof(h), MSG_WAITALL); + if (rv < 0) { + rv = -errno; + goto out; + } + + if (rv != sizeof(h)) { + rv = -1; + goto out; + } + + rv = (int)h.data; + if (rv < 0) + goto out; + + rv = recv(fd, res, sizeof(struct sanlk_resource), MSG_WAITALL); + if (rv < 0) { + rv = -errno; + goto out; + } + + if (rv != sizeof(struct sanlk_resource)) { + rv = -1; + goto out; + } + + rv = (int)h.data; + out: + close(fd); + return rv; +} + +int sanlock_write_lockspace(struct sanlk_lockspace *ls, int max_hosts, + uint32_t flags, uint32_t io_timeout) +{ + int rv, fd; + + if (!ls || !ls->host_id_disk.path[0]) + return -EINVAL; + + rv = connect_socket(&fd); + if (rv < 0) + return rv; + + rv = send_header(fd, SM_CMD_WRITE_LOCKSPACE, flags, + sizeof(struct sanlk_lockspace), + max_hosts, io_timeout); + if (rv < 0) + goto out; + + rv = send(fd, ls, sizeof(struct sanlk_lockspace), 0); + if (rv < 0) { + rv = -errno; + goto out; }
rv = recv_result(fd); @@ -239,6 +346,55 @@ int sanlock_init(struct sanlk_lockspace *ls, return rv; }
+int sanlock_write_resource(struct sanlk_resource *res, + int max_hosts, int num_hosts, uint32_t flags) +{ + int rv, fd; + + if (!res || !res->num_disks || res->num_disks > SANLK_MAX_DISKS || + !res->disks[0].path[0]) + return -EINVAL; + + rv = connect_socket(&fd); + if (rv < 0) + return rv; + + rv = send_header(fd, SM_CMD_WRITE_RESOURCE, flags, + sizeof(struct sanlk_resource) + + sizeof(struct sanlk_disk) * res->num_disks, + max_hosts, num_hosts); + if (rv < 0) + goto out; + + rv = send(fd, res, sizeof(struct sanlk_resource), 0); + if (rv < 0) { + rv = -errno; + goto out; + } + + rv = send(fd, res->disks, sizeof(struct sanlk_disk) * res->num_disks, 0); + if (rv < 0) { + rv = -errno; + goto out; + } + + rv = recv_result(fd); + out: + close(fd); + return rv; +} + +/* old api */ +int sanlock_init(struct sanlk_lockspace *ls, + struct sanlk_resource *res, + int max_hosts, int num_hosts) +{ + if (ls) + return sanlock_write_lockspace(ls, max_hosts, 0, 0); + else + return sanlock_write_resource(res, max_hosts, num_hosts, 0); +} + /* src has colons unescaped, dst should have them escaped with backslash */
size_t sanlock_path_export(char *dst, const char *src, size_t dstlen) diff --git a/src/cmd.c b/src/cmd.c index d443e92..18b9db0 100644 --- a/src/cmd.c +++ b/src/cmd.c @@ -1106,23 +1106,180 @@ static void cmd_align(struct task *task GNUC_UNUSED, struct cmd_args *ca) client_resume(ca->ci_in); }
-static void cmd_init_lockspace(struct task *task, struct cmd_args *ca) +static void cmd_read_lockspace(struct task *task, struct cmd_args *ca) +{ + struct sm_header h; + struct sanlk_lockspace lockspace; + struct sync_disk sd; + uint64_t host_id; + int io_timeout = 0; + int fd, rv, result; + + fd = client[ca->ci_in].fd; + + rv = recv(fd, &lockspace, sizeof(struct sanlk_lockspace), MSG_WAITALL); + if (rv != sizeof(struct sanlk_lockspace)) { + log_error("cmd_read_lockspace %d,%d recv %d %d", + ca->ci_in, fd, rv, errno); + result = -ENOTCONN; + goto reply; + } + + if (!lockspace.host_id) + host_id = 1; + else + host_id = lockspace.host_id; + + log_debug("cmd_read_lockspace %d,%d %llu %s:%llu", + ca->ci_in, fd, + (unsigned long long)host_id, + lockspace.host_id_disk.path, + (unsigned long long)lockspace.host_id_disk.offset); + + if (!lockspace.host_id_disk.path[0]) { + result = -ENODEV; + goto reply; + } + + memset(&sd, 0, sizeof(struct sync_disk)); + memcpy(&sd, &lockspace.host_id_disk, sizeof(struct sanlk_disk)); + sd.fd = -1; + + rv = open_disk(&sd); + if (rv < 0) { + result = -ENODEV; + goto reply; + } + + /* sets ls->name and io_timeout */ + result = delta_read_lockspace(task, &sd, host_id, &lockspace, + DEFAULT_IO_TIMEOUT, &io_timeout); + if (result == SANLK_OK) + result = 0; + + close_disks(&sd, 1); + reply: + log_debug("cmd_read_lockspace %d,%d done %d", ca->ci_in, fd, result); + + memcpy(&h, &ca->header, sizeof(struct sm_header)); + h.data = result; + h.data2 = io_timeout; + h.length = sizeof(h) + sizeof(lockspace); + send(fd, &h, sizeof(h), MSG_NOSIGNAL); + send(fd, &lockspace, sizeof(lockspace), MSG_NOSIGNAL); + client_resume(ca->ci_in); +} + +static void cmd_read_resource(struct task *task, struct cmd_args *ca) +{ + struct sm_header h; + struct sanlk_resource res; + struct token *token = NULL; + int token_len, disks_len; + int j, fd, rv, result; + + fd = client[ca->ci_in].fd; + + /* receiving and setting up token copied from cmd_acquire */ + + rv = recv(fd, &res, sizeof(struct sanlk_resource), MSG_WAITALL); + if (rv != sizeof(struct sanlk_resource)) { + log_error("cmd_read_resource %d,%d recv %d %d", + ca->ci_in, fd, rv, errno); + result = -ENOTCONN; + goto reply; + } + + if (!res.num_disks || res.num_disks > SANLK_MAX_DISKS) { + result = -ERANGE; + goto reply; + } + + disks_len = res.num_disks * sizeof(struct sync_disk); + token_len = sizeof(struct token) + disks_len; + + token = malloc(token_len); + if (!token) { + result = -ENOMEM; + goto reply; + } + memset(token, 0, token_len); + token->disks = (struct sync_disk *)&token->r.disks[0]; /* shorthand */ + token->r.num_disks = res.num_disks; + memcpy(token->r.lockspace_name, res.lockspace_name, SANLK_NAME_LEN); + memcpy(token->r.name, res.name, SANLK_NAME_LEN); + + /* + * receive sanlk_disk's / sync_disk's + * + * WARNING: as a shortcut, this requires that sync_disk and + * sanlk_disk match; this is the reason for the pad fields + * in sanlk_disk (TODO: let these differ?) + */ + + rv = recv(fd, token->disks, disks_len, MSG_WAITALL); + if (rv != disks_len) { + result = -ENOTCONN; + goto reply; + } + + /* zero out pad1 and pad2, see WARNING above */ + for (j = 0; j < token->r.num_disks; j++) { + token->disks[j].sector_size = 0; + token->disks[j].fd = -1; + } + + log_debug("cmd_read_resource %d,%d %.256s:%llu", + ca->ci_in, fd, + token->disks[0].path, + (unsigned long long)token->r.disks[0].offset); + + rv = open_disks(token->disks, token->r.num_disks); + if (rv < 0) { + result = rv; + goto reply; + } + + token->io_timeout = DEFAULT_IO_TIMEOUT; + + /* sets res.lockspace_name, res.name, res.lver */ + result = paxos_read_resource(task, token, &res); + if (result == SANLK_OK) + result = 0; + + close_disks(token->disks, token->r.num_disks); + reply: + if (token) + free(token); + log_debug("cmd_read_resource %d,%d done %d", ca->ci_in, fd, result); + + memcpy(&h, &ca->header, sizeof(struct sm_header)); + h.data = result; + h.data2 = 0; + h.length = sizeof(h) + sizeof(res); + send(fd, &h, sizeof(h), MSG_NOSIGNAL); + send(fd, &res, sizeof(res), MSG_NOSIGNAL); + client_resume(ca->ci_in); +} + +static void cmd_write_lockspace(struct task *task, struct cmd_args *ca) { struct sanlk_lockspace lockspace; struct sync_disk sd; int fd, rv, result; + int io_timeout = DEFAULT_IO_TIMEOUT;
fd = client[ca->ci_in].fd;
rv = recv(fd, &lockspace, sizeof(struct sanlk_lockspace), MSG_WAITALL); if (rv != sizeof(struct sanlk_lockspace)) { - log_error("cmd_init_lockspace %d,%d recv %d %d", + log_error("cmd_write_lockspace %d,%d recv %d %d", ca->ci_in, fd, rv, errno); result = -ENOTCONN; goto reply; }
- log_debug("cmd_init_lockspace %d,%d %.48s:%llu:%s:%llu", + log_debug("cmd_write_lockspace %d,%d %.48s:%llu:%s:%llu", ca->ci_in, fd, lockspace.name, (unsigned long long)lockspace.host_id, lockspace.host_id_disk.path, @@ -1143,17 +1300,20 @@ static void cmd_init_lockspace(struct task *task, struct cmd_args *ca) goto reply; }
- result = delta_lease_init(task, DEFAULT_IO_TIMEOUT, &sd, lockspace.name, ca->header.data); + if (ca->header.data2) + io_timeout = ca->header.data2; + + result = delta_lease_init(task, io_timeout, &sd, lockspace.name, ca->header.data);
close_disks(&sd, 1); reply: - log_debug("cmd_init_lockspace %d,%d done %d", ca->ci_in, fd, result); + log_debug("cmd_write_lockspace %d,%d done %d", ca->ci_in, fd, result);
send_result(fd, &ca->header, result); client_resume(ca->ci_in); }
-static void cmd_init_resource(struct task *task, struct cmd_args *ca) +static void cmd_write_resource(struct task *task, struct cmd_args *ca) { struct token *token = NULL; struct sanlk_resource res; @@ -1166,7 +1326,7 @@ static void cmd_init_resource(struct task *task, struct cmd_args *ca)
rv = recv(fd, &res, sizeof(struct sanlk_resource), MSG_WAITALL); if (rv != sizeof(struct sanlk_resource)) { - log_error("cmd_init_resource %d,%d recv %d %d", + log_error("cmd_write_resource %d,%d recv %d %d", ca->ci_in, fd, rv, errno); result = -ENOTCONN; goto reply; @@ -1211,7 +1371,7 @@ static void cmd_init_resource(struct task *task, struct cmd_args *ca) token->disks[j].fd = -1; }
- log_debug("cmd_init_resource %d,%d %.48s:%.48s:%.256s:%llu", + log_debug("cmd_write_resource %d,%d %.48s:%.48s:%.256s:%llu", ca->ci_in, fd, token->r.lockspace_name, token->r.name, @@ -1232,7 +1392,7 @@ static void cmd_init_resource(struct task *task, struct cmd_args *ca) reply: if (token) free(token); - log_debug("cmd_init_resource %d,%d done %d", ca->ci_in, fd, result); + log_debug("cmd_write_resource %d,%d done %d", ca->ci_in, fd, result);
send_result(fd, &ca->header, result); client_resume(ca->ci_in); @@ -1329,11 +1489,17 @@ void call_cmd_thread(struct task *task, struct cmd_args *ca) case SM_CMD_ALIGN: cmd_align(task, ca); break; - case SM_CMD_INIT_LOCKSPACE: - cmd_init_lockspace(task, ca); + case SM_CMD_WRITE_LOCKSPACE: + cmd_write_lockspace(task, ca); + break; + case SM_CMD_WRITE_RESOURCE: + cmd_write_resource(task, ca); + break; + case SM_CMD_READ_LOCKSPACE: + cmd_read_lockspace(task, ca); break; - case SM_CMD_INIT_RESOURCE: - cmd_init_resource(task, ca); + case SM_CMD_READ_RESOURCE: + cmd_read_resource(task, ca); break; case SM_CMD_EXAMINE_LOCKSPACE: case SM_CMD_EXAMINE_RESOURCE: diff --git a/src/delta_lease.c b/src/delta_lease.c index bad5e63..e0fe372 100644 --- a/src/delta_lease.c +++ b/src/delta_lease.c @@ -148,6 +148,43 @@ static int verify_leader(struct sync_disk *disk, return result; }
+ +/* read the lockspace name and io_timeout given the disk location */ + +int delta_read_lockspace(struct task *task, + struct sync_disk *disk, + uint64_t host_id, + struct sanlk_lockspace *ls, + int io_timeout, + int *io_timeout_ret) +{ + struct leader_record leader; + char *space_name; + int rv, error; + + /* host_id N is block offset N-1 */ + + memset(&leader, 0, sizeof(struct leader_record)); + + rv = read_sectors(disk, host_id - 1, 1, (char *)&leader, sizeof(struct leader_record), + task, io_timeout, "read_lockspace"); + if (rv < 0) + return rv; + + if (!ls->name[0]) + space_name = leader.space_name; + + error = verify_leader(disk, space_name, host_id, &leader, "read_lockspace"); + + if (error == SANLK_OK) { + memcpy(ls->name, leader.space_name, SANLK_NAME_LEN); + ls->host_id = host_id; + *io_timeout_ret = leader.io_timeout; + } + + return error; +} + int delta_lease_leader_read(struct task *task, int io_timeout, struct sync_disk *disk, char *space_name, @@ -618,6 +655,9 @@ int delta_lease_init(struct task *task, if (!max_hosts) max_hosts = DEFAULT_MAX_HOSTS;
+ if (!io_timeout) + io_timeout = DEFAULT_IO_TIMEOUT; + align_size = direct_align(disk); if (align_size < 0) return align_size; @@ -647,16 +687,27 @@ int delta_lease_init(struct task *task, leader->io_timeout = io_timeout; strncpy(leader->space_name, space_name, NAME_ID_SIZE); leader->checksum = leader_checksum(leader); + + /* make the first record invalid so we can do a single atomic + write below to commit the whole thing */ + if (!i) + leader->magic = 0; }
rv = write_iobuf(disk->fd, disk->offset, iobuf, iobuf_len, task, io_timeout); + if (rv < 0) + goto out; + + /* commit the whole lockspace by making the first record valid */ + + leader = (struct leader_record *)iobuf; + leader->magic = DELTA_DISK_MAGIC;
+ rv = write_iobuf(disk->fd, disk->offset, iobuf, disk->sector_size, task, io_timeout); + out: if (rv != SANLK_AIO_TIMEOUT) free(iobuf);
- if (rv < 0) - return rv; - - return 0; + return rv; }
diff --git a/src/delta_lease.h b/src/delta_lease.h index 4c21267..f015d1e 100644 --- a/src/delta_lease.h +++ b/src/delta_lease.h @@ -48,4 +48,11 @@ int delta_lease_init(struct task *task, char *space_name, int max_hosts);
+int delta_read_lockspace(struct task *task, + struct sync_disk *disk, + uint64_t host_id, + struct sanlk_lockspace *ls, + int io_timeout, + int *io_timeout_ret); + #endif diff --git a/src/direct.c b/src/direct.c index ab39569..7308c82 100644 --- a/src/direct.c +++ b/src/direct.c @@ -91,6 +91,9 @@ static int do_paxos_action(int action, struct task *task, int io_timeout, int disks_len, token_len; int j, rv = 0;
+ if (!io_timeout) + io_timeout = DEFAULT_IO_TIMEOUT; + disks_len = res->num_disks * sizeof(struct sync_disk); token_len = sizeof(struct token) + disks_len;
@@ -199,6 +202,9 @@ static int do_delta_action(int action,
memset(bitmap, 0, sizeof(bitmap));
+ if (!io_timeout) + io_timeout = DEFAULT_IO_TIMEOUT; + /* for log_space in delta functions */ memset(&space, 0, sizeof(space)); space.io_timeout = io_timeout; @@ -392,36 +398,31 @@ int direct_align(struct sync_disk *disk) return -EINVAL; }
-/* - * sanlock direct init [-s LOCKSPACE] [-r RESOURCE] - * - * Note: host_id not used for init, whatever is given in LOCKSPACE - * is ignored - */ - -int direct_init(struct task *task, - int io_timeout, - struct sanlk_lockspace *ls, - struct sanlk_resource *res, - int max_hosts, int num_hosts) +/* io_timeout is written to leader record and used for the write call itself */ +int direct_write_lockspace(struct task *task, struct sanlk_lockspace *ls, + int max_hosts, uint32_t io_timeout) { - int rv = -1; + if (!ls) + return -1;
- if (ls && ls->host_id_disk.path[0]) { - rv = do_delta_action(ACT_DIRECT_INIT, task, io_timeout, ls, max_hosts, NULL, NULL); + return do_delta_action(ACT_DIRECT_INIT, task, io_timeout, ls, + max_hosts, NULL, NULL); +}
- } else if (res) { - if (!res->num_disks) - return -ENODEV; +int direct_write_resource(struct task *task, struct sanlk_resource *res, + int max_hosts, int num_hosts) +{ + if (!res) + return -1;
- if (!res->disks[0].path[0]) - return -ENODEV; + if (!res->num_disks) + return -ENODEV;
- rv = do_paxos_action(ACT_DIRECT_INIT, task, io_timeout, res, - max_hosts, num_hosts, 0, 0, NULL); - } + if (!res->disks[0].path[0]) + return -ENODEV;
- return rv; + return do_paxos_action(ACT_DIRECT_INIT, task, 0, res, + max_hosts, num_hosts, 0, 0, NULL); }
int direct_read_leader(struct task *task, diff --git a/src/direct.h b/src/direct.h index bd71096..f6371ed 100644 --- a/src/direct.h +++ b/src/direct.h @@ -45,10 +45,13 @@ int direct_live_id(struct task *task, int io_timeout,
int direct_align(struct sync_disk *disk);
-int direct_init(struct task *task, int io_timeout, - struct sanlk_lockspace *ls, - struct sanlk_resource *res, - int max_hosts, int num_hosts); +/* io_timeout is written in the leader record and used for the + write call itself */ +int direct_write_lockspace(struct task *task, struct sanlk_lockspace *ls, + int max_hosts, uint32_t io_timeout); + +int direct_write_resource(struct task *task, struct sanlk_resource *res, + int max_hosts, int num_hosts);
int direct_read_leader(struct task *task, int io_timeout, struct sanlk_lockspace *ls, diff --git a/src/direct_lib.c b/src/direct_lib.c index 750ae71..c5b6dc0 100644 --- a/src/direct_lib.c +++ b/src/direct_lib.c @@ -123,6 +123,37 @@ int sanlock_direct_live_id(struct sanlk_lockspace *ls, return rv; }
+int sanlock_direct_write_lockspace(struct sanlk_lockspace *ls, int max_hosts, + uint32_t flags GNUC_UNUSED, uint32_t io_timeout) +{ + struct task task; + int rv; + + setup_task_lib(&task, 1); + + rv = direct_write_lockspace(&task, ls, max_hosts, io_timeout); + + close_task_aio(&task); + + return rv; +} + +int sanlock_direct_write_resource(struct sanlk_resource *res, + int max_hosts, int num_hosts, + uint32_t flags GNUC_UNUSED) +{ + struct task task; + int rv; + + setup_task_lib(&task, 1); + + rv = direct_write_resource(&task, res, max_hosts, num_hosts); + + close_task_aio(&task); + + return rv; +} + int sanlock_direct_init(struct sanlk_lockspace *ls, struct sanlk_resource *res, int max_hosts, int num_hosts, int use_aio) @@ -132,7 +163,10 @@ int sanlock_direct_init(struct sanlk_lockspace *ls,
setup_task_lib(&task, use_aio);
- rv = direct_init(&task, DEFAULT_IO_TIMEOUT, ls, res, max_hosts, num_hosts); + if (ls) + rv = direct_write_lockspace(&task, ls, max_hosts, 0); + else + rv = direct_write_resource(&task, res, max_hosts, num_hosts);
close_task_aio(&task);
diff --git a/src/lockspace.c b/src/lockspace.c index 469b66f..8e05fa5 100644 --- a/src/lockspace.c +++ b/src/lockspace.c @@ -384,7 +384,7 @@ static void *lockspace_thread(void *arg_in) struct space *sp; struct leader_record leader; uint64_t delta_begin, last_success = 0; - int rv, delta_length, renewal_interval; + int rv, delta_length, renewal_interval = 0; int id_renewal_seconds, id_renewal_fail_seconds; int acquire_result, delta_result, read_result; int opened = 0; diff --git a/src/main.c b/src/main.c index 2ce26a3..37b20db 100644 --- a/src/main.c +++ b/src/main.c @@ -1157,8 +1157,10 @@ static void process_connection(int ci) case SM_CMD_EXAMINE_RESOURCE: case SM_CMD_EXAMINE_LOCKSPACE: case SM_CMD_ALIGN: - case SM_CMD_INIT_LOCKSPACE: - case SM_CMD_INIT_RESOURCE: + case SM_CMD_WRITE_LOCKSPACE: + case SM_CMD_WRITE_RESOURCE: + case SM_CMD_READ_LOCKSPACE: + case SM_CMD_READ_RESOURCE: rv = client_suspend(ci); if (rv < 0) return; @@ -1779,6 +1781,7 @@ static void print_usage(void) printf("sanlock client log_dump\n"); printf("sanlock client shutdown [-f 0|1]\n"); printf("sanlock client init -s LOCKSPACE | -r RESOURCE\n"); + printf("sanlock client read -s LOCKSPACE | -r RESOURCE\n"); printf("sanlock client align -s LOCKSPACE\n"); printf("sanlock client add_lockspace -s LOCKSPACE\n"); printf("sanlock client inq_lockspace -s LOCKSPACE\n"); @@ -1902,6 +1905,10 @@ static int read_command_line(int argc, char *argv[]) com.action = ACT_CLIENT_ALIGN; else if (!strcmp(act, "init")) com.action = ACT_CLIENT_INIT; + else if (!strcmp(act, "write")) + com.action = ACT_CLIENT_INIT; + else if (!strcmp(act, "read")) + com.action = ACT_CLIENT_READ; else { log_tool("client action "%s" is unknown", act); exit(EXIT_FAILURE); @@ -2117,6 +2124,8 @@ static int do_client(void) struct sanlk_resource **res_args = NULL; struct sanlk_resource *res; char *res_state = NULL; + char *res_str = NULL; + uint32_t io_timeout = 0; int i, fd, rv = 0;
if (com.action == ACT_COMMAND || com.action == ACT_ACQUIRE) { @@ -2271,14 +2280,45 @@ static int do_client(void) case ACT_CLIENT_INIT: log_tool("init"); if (com.lockspace.host_id_disk.path[0]) - rv = sanlock_init(&com.lockspace, NULL, - com.max_hosts, com.num_hosts); + rv = sanlock_write_lockspace(&com.lockspace, + com.max_hosts, 0, + com.io_timeout_arg); else - rv = sanlock_init(NULL, com.res_args[0], - com.max_hosts, com.num_hosts); + rv = sanlock_write_resource(com.res_args[0], + com.max_hosts, + com.num_hosts, 0); log_tool("init done %d", rv); break;
+ case ACT_CLIENT_READ: + if (com.lockspace.host_id_disk.path[0]) + rv = sanlock_read_lockspace(&com.lockspace, 0, &io_timeout); + else + rv = sanlock_read_resource(com.res_args[0], 0); + + if (rv < 0) { + log_tool("read error %d", rv); + break; + } + + if (com.lockspace.host_id_disk.path[0]) { + log_tool("s %.48s:%llu:%s:%llu", + com.lockspace.name, + (unsigned long long)com.lockspace.host_id, + com.lockspace.host_id_disk.path, + (unsigned long long)com.lockspace.host_id_disk.offset); + log_tool("io_timeout %u", io_timeout); + } else { + rv = sanlock_res_to_str(com.res_args[0], &res_str); + if (rv < 0) { + log_tool("res_to_str error %d", rv); + break; + } + log_tool("r %s", res_str); + free(res_str); + } + break; + default: log_tool("action not implemented"); rv = -1; @@ -2299,8 +2339,12 @@ static int do_direct(void)
switch (com.action) { case ACT_DIRECT_INIT: - rv = direct_init(&main_task, com.io_timeout_arg, &com.lockspace, - com.res_args[0], com.max_hosts, com.num_hosts); + if (com.lockspace.host_id_disk.path[0]) + rv = direct_write_lockspace(&main_task, &com.lockspace, + com.max_hosts, com.io_timeout_arg); + else + rv = direct_write_resource(&main_task, com.res_args[0], + com.max_hosts, com.num_hosts); log_tool("init done %d", rv); break;
diff --git a/src/paxos_lease.c b/src/paxos_lease.c index 0561730..577ed0b 100644 --- a/src/paxos_lease.c +++ b/src/paxos_lease.c @@ -692,6 +692,38 @@ static int leaders_match(struct leader_record *a, struct leader_record *b) return 0; }
+/* read the lockspace name and resource name given the disk location */ + +int paxos_read_resource(struct task *task, + struct token *token, + struct sanlk_resource *res) +{ + struct leader_record leader; + int rv; + + memset(&leader, 0, sizeof(struct leader_record)); + + rv = read_leader(task, token, &token->disks[0], &leader); + if (rv < 0) + return rv; + + if (!res->lockspace_name[0]) + memcpy(token->r.lockspace_name, leader.space_name, NAME_ID_SIZE); + + if (!res->name[0]) + memcpy(token->r.name, leader.resource_name, NAME_ID_SIZE); + + rv = verify_leader(token, &token->disks[0], &leader, "read_resource"); + + if (rv == SANLK_OK) { + memcpy(res->lockspace_name, leader.space_name, NAME_ID_SIZE); + memcpy(res->name, leader.resource_name, NAME_ID_SIZE); + res->lver = leader.lver; + } + + return rv; +} + static int _leader_read_one(struct task *task, struct token *token, struct leader_record *leader_ret, diff --git a/src/paxos_lease.h b/src/paxos_lease.h index 6e7e833..99df087 100644 --- a/src/paxos_lease.h +++ b/src/paxos_lease.h @@ -42,4 +42,8 @@ int paxos_lease_request_read(struct task *task, struct token *token, int paxos_lease_request_write(struct task *task, struct token *token, struct request_record *rr);
+int paxos_read_resource(struct task *task, + struct token *token, + struct sanlk_resource *res); + #endif diff --git a/src/sanlock.8 b/src/sanlock.8 index a6bf6c4..59ef392 100644 --- a/src/sanlock.8 +++ b/src/sanlock.8 @@ -242,9 +242,6 @@ use mlockall (0 none, 1 current, 2 current and future) .BR -a " 0|1" use async i/o
-.BI -o " sec" -io timeout in seconds - .B "sanlock client" .I action [options] @@ -273,11 +270,28 @@ command will be ignored if any lockspaces exist. With the force option released, and lockspaces removed.
.BR "sanlock client init -s" " LOCKSPACE" -.br + +Tell the sanlock daemon to initialize a lockspace on disk. The -o option +can be used to specify the io timeout to be written in the host_id leases. +(Also see sanlock direct init.) + .BR "sanlock client init -r" " RESOURCE"
-Tell the sanlock daemon to initialize storage for lease areas. -(See sanlock direct init.) +Tell the sanlock daemon to initialize a resource lease on disk. +(Also see sanlock direct init.) + +.BR "sanlock client read -s" " LOCKSPACE" + +Tell the sanlock daemon to read a lockspace from disk. Only the +LOCKSPACE path and offset are required. If host_id is zero, the first +record at offset (host_id 1) is used. The complete LOCKSPACE and io +timeout are printed. (Also see sanlock direct read_leader and read_id.) + +.BR "sanlock client read -r" " RESOURCE" + +Tell the sanlock daemon to read a resource lease from disk. Only the +RESOURCE path and offset are required. The complete RESOURCE is printed. +(Also see sanlock direct read_leader.)
.BR "sanlock client align -s" " LOCKSPACE"
@@ -287,11 +301,14 @@ path. Only path is used from the LOCKSPACE argument. .BR "sanlock client add_lockspace -s" " LOCKSPACE"
Tell the sanlock daemon to acquire the specified host_id in the lockspace. -This will allow resources to be acquired in the lockspace. +This will allow resources to be acquired in the lockspace. The -o option +can be used to specify the io timeout of the acquiring host, and will be +written in the host_id lease.
.BR "sanlock client inq_lockspace -s" " LOCKSPACE"
-Ask to the sanlock daemon weather the lockspace is acquired or not. +Inquire about the state of the lockspace in the sanlock daemon, whether +it is being added or removed, or is joined.
.BR "sanlock client rem_lockspace -s" " LOCKSPACE"
@@ -359,7 +376,8 @@ lockspace, or initialize storage for one resource (paxos) lease. Both options require 1MB of space. The host_id in the LOCKSPACE string is not relevant to initialization, so the value is ignored. (The default of 2000 host_ids can be changed for special cases using the -n num_hosts and -m -max_hosts options.) +max_hosts options.) With -s, the -o option specifies the io timeout to be +written in the host_id leases.
.BR "sanlock direct read_leader -s" " LOCKSPACE" .br @@ -374,9 +392,8 @@ the single sector of a delta lease, or the first sector of a paxos lease.
read_id reads a host_id and prints the owner. live_id reads a host_id once a second until it the timestamp or owner change (prints live 1), or -until host_dead_seconds (prints live 0). (host_dead_seconds is derived -from the io_timeout option. The live 0|1 conclusion will not match the -sanlock daemon's conclusion unless the configured timeouts match.) +until host_dead_seconds (prints live 0). (host_dead_seconds is based on +the owner's io timeout.) ./" .P ./" .BR "sanlock direct acquire_id -s" " LOCKSPACE" ./" .br diff --git a/src/sanlock_admin.h b/src/sanlock_admin.h index 4544c21..14b1e7a 100644 --- a/src/sanlock_admin.h +++ b/src/sanlock_admin.h @@ -70,10 +70,87 @@ int sanlock_align(struct sanlk_disk *disk); * Use max_hosts = 0 for default value. * Use num_hosts = 0 for default value. * Provide either lockspace or resource, not both + * + * (Old api, see write_lockspace/resource) */
int sanlock_init(struct sanlk_lockspace *ls, struct sanlk_resource *res, int max_hosts, int num_hosts);
+/* + * write a lockspace to disk + * + * the sanlock daemon writes max_hosts lockspace leader records to disk + * + * the lockspace will support up to max_hosts using the lockspace at once + * + * use max_hosts = 0 for default value + * + * the first host_id (1) (the first record at offset) is the last + * leader record written, so read_lockspace of host_id 1 will fail + * until the entire write_lockspace is complete. + */ + +int sanlock_write_lockspace(struct sanlk_lockspace *ls, int max_hosts, + uint32_t flags, uint32_t io_timeout); + +/* + * read one host's lockspace record from disk + * + * the sanlock daemon reads one lockspace leader record from disk + * + * the minimum input is path and offset + * + * if name is specified and does not match the leader record name, + * SANLK_LEADER_LOCKSPACE is returned + * + * if name is not specified, it is filled it with the value from disk + * + * if host_id is zero, host_id 1 is used (the first record at offset) + * + * if there is no delta lease magic number found at the host_id location, + * SANLK_LEADER_MAGIC is returned + * + * on success, zero is returned and + * io_timeout and the entire sanlk_lockspace struct are written to + */ + +int sanlock_read_lockspace(struct sanlk_lockspace *ls, + uint32_t flags, uint32_t *io_timeout); + +/* + * format a resource lease area on disk + * + * the sanlock daemon writes a resource lease area to disk + * + * use max_hosts = 0 for default value + * use num_hosts = 0 for default value + */ + +int sanlock_write_resource(struct sanlk_resource *res, + int max_hosts, int num_hosts, uint32_t flags); + +/* + * read a resource lease from disk + * + * the sanlock daemon reads the lease's leader record from disk + * + * the minimum input is one disk with path and offset + * + * if lockspace name is specified and does not match the leader record + * lockspace name, SANLK_LEADER_LOCKSPACE is returned + * + * if resource name is specified and does not match the leader record + * resource name, SANLK_LEADER_RESOURCE is returned + * + * if there is no paxos lease magic number found in the leader record, + * SANLK_LEADER_MAGIC is returned + * + * on success, zero is returned and + * the entire sanlk_resource struct is written to (res->disks is not changed) + */ + +int sanlock_read_resource(struct sanlk_resource *res, uint32_t flags); + #endif diff --git a/src/sanlock_direct.h b/src/sanlock_direct.h index 0f0cf18..cbdec46 100644 --- a/src/sanlock_direct.h +++ b/src/sanlock_direct.h @@ -32,6 +32,8 @@ int sanlock_direct_live_id(struct sanlk_lockspace *ls, * Use max_hosts = 0 for default value. * Use num_hosts = 0 for default value. * Provide either lockspace or resource, not both + * + * (Old api, see write_lockspace/resource) */
int sanlock_direct_init(struct sanlk_lockspace *ls, @@ -39,6 +41,22 @@ int sanlock_direct_init(struct sanlk_lockspace *ls, int max_hosts, int num_hosts, int use_aio);
/* + * write a lockspace to disk + * (also see sanlock_write_lockspace) + */ + +int sanlock_direct_write_lockspace(struct sanlk_lockspace *ls, int max_hosts, + uint32_t flags, uint32_t io_timeout); + +/* + * format a resource lease area on disk + * (also see sanlock_write_resource) + */ + +int sanlock_direct_write_resource(struct sanlk_resource *res, + int max_hosts, int num_hosts, uint32_t flags); + +/* * Returns the alignment in bytes required by sanlock_direct_init() * (1MB for disks with 512 sectors, 8MB for disks with 4096 sectors) */ diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h index 65496ff..414ceb8 100644 --- a/src/sanlock_internal.h +++ b/src/sanlock_internal.h @@ -309,6 +309,7 @@ enum { ACT_NEXT_FREE, ACT_READ_LEADER, ACT_CLIENT_INIT, + ACT_CLIENT_READ, ACT_CLIENT_ALIGN, ACT_EXAMINE, }; diff --git a/src/sanlock_sock.h b/src/sanlock_sock.h index 5f8fcf0..fbabbb2 100644 --- a/src/sanlock_sock.h +++ b/src/sanlock_sock.h @@ -18,25 +18,27 @@ #define MAX_CLIENT_MSG (1024 * 1024) /* TODO: this is random */
enum { - SM_CMD_REGISTER = 1, - SM_CMD_ADD_LOCKSPACE = 2, - SM_CMD_REM_LOCKSPACE = 3, - SM_CMD_SHUTDOWN = 4, - SM_CMD_STATUS = 5, - SM_CMD_LOG_DUMP = 6, - SM_CMD_ACQUIRE = 7, - SM_CMD_RELEASE = 8, - SM_CMD_INQUIRE = 9, - SM_CMD_RESTRICT = 10, - SM_CMD_REQUEST = 11, - SM_CMD_ALIGN = 12, - SM_CMD_INIT_LOCKSPACE = 13, - SM_CMD_INIT_RESOURCE = 14, - SM_CMD_EXAMINE_LOCKSPACE = 15, - SM_CMD_EXAMINE_RESOURCE = 16, - SM_CMD_HOST_STATUS = 17, - SM_CMD_INQ_LOCKSPACE = 18, - SM_CMD_KILLPATH = 19, + SM_CMD_REGISTER = 1, + SM_CMD_ADD_LOCKSPACE = 2, + SM_CMD_REM_LOCKSPACE = 3, + SM_CMD_SHUTDOWN = 4, + SM_CMD_STATUS = 5, + SM_CMD_LOG_DUMP = 6, + SM_CMD_ACQUIRE = 7, + SM_CMD_RELEASE = 8, + SM_CMD_INQUIRE = 9, + SM_CMD_RESTRICT = 10, + SM_CMD_REQUEST = 11, + SM_CMD_ALIGN = 12, + SM_CMD_EXAMINE_LOCKSPACE = 13, + SM_CMD_EXAMINE_RESOURCE = 14, + SM_CMD_HOST_STATUS = 15, + SM_CMD_INQ_LOCKSPACE = 16, + SM_CMD_KILLPATH = 17, + SM_CMD_WRITE_LOCKSPACE = 18, + SM_CMD_WRITE_RESOURCE = 19, + SM_CMD_READ_LOCKSPACE = 20, + SM_CMD_READ_RESOURCE = 21, };
struct sm_header {
sanlock-devel@lists.fedorahosted.org