This is an automated email from the git hooks/post-receive script.
teigland pushed a commit to branch master
in repository sanlock.
commit e9e4ddc974d3692bc174b3710a4a380d2bbada7d
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Mon Mar 17 12:19:14 2025 -0500
sanlock: add new acquire function that returns lease owner
If sanlock_acquire2() fails because another host owns the lease,
then info about the owner host is returned with the error.
---
src/client.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++
src/cmd.c | 53 ++++++++++++++++++++++--
src/lockspace.c | 2 +-
src/lockspace.h | 2 +
src/main.c | 32 +++++++++++---
src/resource.c | 19 +++++++--
src/resource.h | 2 +-
src/sanlock_resource.h | 6 +++
src/sanlock_sock.h | 1 +
9 files changed, 212 insertions(+), 15 deletions(-)
diff --git a/src/client.c b/src/client.c
index f8d7d64..33228b3 100644
--- a/src/client.c
+++ b/src/client.c
@@ -167,6 +167,27 @@ retry:
return (int)h.data;
}
+static int recv_header(int fd, uint32_t *h_data)
+{
+ struct sm_header h;
+ ssize_t rv;
+
+ memset(&h, 0, sizeof(h));
+retry:
+ rv = recv(fd, &h, sizeof(h), MSG_WAITALL);
+ if (rv == -1 && errno == EINTR)
+ goto retry;
+ if (rv < 0)
+ return -errno;
+ if (rv != sizeof(h))
+ return -1;
+
+ if (h_data)
+ *h_data = h.data;
+
+ return 0;
+}
+
static int cmd_lockspace(int cmd, struct sanlk_lockspace *ls, uint32_t flags, uint32_t data)
{
int rv, fd;
@@ -1115,6 +1136,95 @@ int sanlock_killpath(int sock, uint32_t flags, const char *path, char *args)
return rv;
}
+int sanlock_acquire2(int sock, int pid, uint32_t flags,
+ struct sanlk_resource *res,
+ struct sanlk_options *opt_in,
+ struct sanlk_host *owner_host,
+ char **owner_name)
+{
+ struct sanlk_options opt = {0};
+ char namebuf[SANLK_NAME_LEN+1] = {0};
+ char *nameret;
+ uint32_t cmd_result = 0;
+ int rv, fd, data2;
+ int datalen;
+
+ if (res->num_disks != 1)
+ return -EINVAL;
+
+ if (opt_in)
+ memcpy(&opt, opt_in, sizeof(struct sanlk_options));
+
+ datalen = sizeof(struct sanlk_resource) + sizeof(struct sanlk_disk) + sizeof(struct sanlk_options);
+
+ if (sock == -1) {
+ /* connect to daemon and ask it to acquire a lease for
+ another registered pid */
+
+ data2 = pid;
+
+ rv = connect_socket(&fd);
+ if (rv < 0)
+ return rv;
+ } else {
+ /* use our own existing registered connection and ask daemon
+ to acquire a lease for self */
+
+ data2 = -1;
+ fd = sock;
+ }
+
+ rv = send_header(fd, SM_CMD_ACQUIRE2, flags, datalen, 1, data2);
+ if (rv < 0)
+ goto out;
+
+ rv = send_data(fd, res, sizeof(struct sanlk_resource), 0);
+ if (rv < 0) {
+ rv = -1;
+ goto out;
+ }
+
+ rv = send_data(fd, res->disks, sizeof(struct sanlk_disk), 0);
+ if (rv < 0) {
+ rv = -1;
+ goto out;
+ }
+
+ rv = send_data(fd, &opt, sizeof(struct sanlk_options), 0);
+ if (rv < 0) {
+ rv = -1;
+ goto out;
+ }
+
+ rv = recv_header(fd, &cmd_result);
+ if (rv < 0)
+ goto out;
+
+ rv = recv_data(fd, owner_host, sizeof(struct sanlk_host), MSG_WAITALL);
+ if (rv < 0)
+ goto out;
+
+ rv = recv_data(fd, &namebuf, SANLK_NAME_LEN, MSG_WAITALL);
+ if (rv < 0)
+ goto out;
+
+ if (owner_name && namebuf[0]) {
+ int len = strlen(namebuf);
+ if ((nameret = malloc(len+1))) {
+ memcpy(nameret, namebuf, len);
+ nameret[len] = '\0';
+ *owner_name = nameret;
+ }
+ }
+
+ rv = (int)cmd_result;
+
+ out:
+ if (sock == -1)
+ close(fd);
+ return rv;
+}
+
int sanlock_acquire(int sock, int pid, uint32_t flags, int res_count,
struct sanlk_resource *res_args[],
struct sanlk_options *opt_in)
diff --git a/src/cmd.c b/src/cmd.c
index b00f9de..d4f6c8a 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -190,6 +190,7 @@ static const char *acquire_error_str(int error)
static void cmd_acquire(struct task *task, struct cmd_args *ca, uint32_t cmd)
{
+ struct sm_header h = { 0 };
struct client *cl;
struct token *token = NULL;
struct token *new_tokens[SANLK_MAX_RESOURCES];
@@ -197,8 +198,11 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca, uint32_t cmd)
struct sanlk_resource res;
struct sanlk_options opt;
struct space_info spi;
+ struct sanlk_host owner_host = { 0 };
char killpath[SANLK_HELPER_PATH_LEN];
char killargs[SANLK_HELPER_ARGS_LEN];
+ char owner_host_name[SANLK_NAME_LEN+1] = { 0 };
+ int owner_host_send = 0;
int token_len, disks_len;
int fd, rv, i, j, empty_slots, lvl;
int alloc_count = 0, acquire_count = 0;
@@ -216,8 +220,9 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca, uint32_t cmd)
new_tokens_count = ca->header.data;
- log_cmd(cmd, "cmd_acquire %d,%d,%d ci_in %d fd %d count %d flags %x",
- cl_ci, cl_fd, cl_pid, ca->ci_in, fd, new_tokens_count, ca->header.cmd_flags);
+ log_cmd(cmd, "cmd_acquire %d,%d,%d ci_in %d fd %d count %d flags %x %c",
+ cl_ci, cl_fd, cl_pid, ca->ci_in, fd, new_tokens_count, ca->header.cmd_flags,
+ (cmd == SM_CMD_ACQUIRE2) ? '2' : ' ');
if (new_tokens_count > SANLK_MAX_RESOURCES) {
log_error("cmd_acquire %d,%d,%d new %d max %d",
@@ -418,7 +423,7 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca, uint32_t cmd)
for (i = 0; i < new_tokens_count; i++) {
token = new_tokens[i];
- rv = acquire_token(task, token, ca->header.cmd_flags, killpath, killargs);
+ rv = acquire_token(task, token, ca->header.cmd_flags, killpath, killargs, &owner_host);
if (rv < 0) {
switch (rv) {
case -EEXIST:
@@ -430,6 +435,10 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca, uint32_t cmd)
case SANLK_ACQUIRE_OWNED:
case SANLK_ACQUIRE_OTHER:
case SANLK_ACQUIRE_OWNED_RETRY:
+ if ((cmd == SM_CMD_ACQUIRE2) &&
+ (owner_host.host_id > 0) &&
+ (owner_host.host_id <= DEFAULT_MAX_HOSTS))
+ owner_host_send = 1;
lvl = com.quiet_fail ? LOG_DEBUG : LOG_ERR;
break;
default:
@@ -528,6 +537,29 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca, uint32_t cmd)
}
/* goto reply after mutex unlock */
}
+
+ /*
+ * sanlock_acquire2() returns info about the owner when the
+ * lease fails because it's held by another host. Most of
+ * that owner info comes from the leader returned by
+ * paxos_lease_acquire(), but two additional bits of info
+ * are added here: the host status flag (indicating the
+ * liveness state) and the owner name.
+ * This is done here with spaces_mutex held because the
+ * lookup/use of sp requires that.
+ */
+ if (owner_host_send) {
+ struct space *sp;
+ struct host_status *hs;
+
+ sp = find_lockspace(res.lockspace_name);
+ if (sp) {
+ hs = &sp->host_status[owner_host.host_id-1];
+ owner_host.flags = get_host_flag(sp, hs);
+ memcpy(owner_host_name, hs->owner_name, NAME_ID_SIZE);
+ }
+ }
+
pthread_mutex_unlock(&cl->mutex);
pthread_mutex_unlock(&spaces_mutex);
@@ -568,7 +600,19 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca, uint32_t cmd)
reply:
if (!recv_done)
client_recv_all(ca->ci_in, &ca->header, pos);
- send_result(ca->ci_in, fd, &ca->header, result);
+
+ if (owner_host_send) {
+ memcpy(&h, &ca->header, sizeof(struct sm_header));
+ h.version = SM_PROTO;
+ h.data = result;
+ h.length = sizeof(h) + sizeof(owner_host) + SANLK_NAME_LEN;
+ send_all(fd, &h, sizeof(h), MSG_NOSIGNAL);
+ send_all(fd, &owner_host, sizeof(owner_host), MSG_NOSIGNAL);
+ send_all(fd, owner_host_name, SANLK_NAME_LEN, MSG_NOSIGNAL);
+ } else {
+ send_result(ca->ci_in, fd, &ca->header, result);
+ }
+
client_resume(ca->ci_in);
}
@@ -2177,6 +2221,7 @@ void call_cmd_thread(struct task *task, struct cmd_args *ca)
switch (cmd) {
case SM_CMD_ACQUIRE:
+ case SM_CMD_ACQUIRE2:
cmd_acquire(task, ca, cmd);
break;
case SM_CMD_RELEASE:
diff --git a/src/lockspace.c b/src/lockspace.c
index 0756241..100bd44 100644
--- a/src/lockspace.c
+++ b/src/lockspace.c
@@ -1598,7 +1598,7 @@ int get_lockspaces(char *buf, int *len, int *count, int maxlen)
* After 80 seconds, we'd return FAIL. After 140 seconds we'd return DEAD.
*/
-static uint32_t get_host_flag(struct space *sp, struct host_status *hs)
+uint32_t get_host_flag(struct space *sp, struct host_status *hs)
{
uint32_t other_io_timeout;
int other_host_fail_seconds, other_host_dead_seconds;
diff --git a/src/lockspace.h b/src/lockspace.h
index 790a5cc..07d7cb3 100644
--- a/src/lockspace.h
+++ b/src/lockspace.h
@@ -83,4 +83,6 @@ int lockspace_set_config(struct sanlk_lockspace *ls, uint32_t flags, uint32_t cm
int lockspace_begin_rindex_op(char *space_name, int rindex_op, struct space_info *spi);
int lockspace_clear_rindex_op(char *space_name);
+uint32_t get_host_flag(struct space *sp, struct host_status *hs);
+
#endif
diff --git a/src/main.c b/src/main.c
index 6259c73..f9fa3b0 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1240,7 +1240,7 @@ static void process_cmd_thread_registered(int ci_in, struct sm_header *h_recv)
goto out;
}
- if (cl->kill_count && h_recv->cmd == SM_CMD_ACQUIRE) {
+ if (cl->kill_count && (h_recv->cmd == SM_CMD_ACQUIRE || h_recv->cmd == SM_CMD_ACQUIRE2)) {
/* when pid is being killed, we want killpath to be able
to inquire and release for it */
log_error("cmd %d %d,%d,%d kill_count %d",
@@ -1250,7 +1250,7 @@ static void process_cmd_thread_registered(int ci_in, struct sm_header *h_recv)
}
if (cl->cmd_active) {
- if (com.quiet_fail && cl->cmd_active == SM_CMD_ACQUIRE) {
+ if (com.quiet_fail && (cl->cmd_active == SM_CMD_ACQUIRE || cl->cmd_active == SM_CMD_ACQUIRE2)) {
result = -EBUSY;
goto out;
}
@@ -1399,6 +1399,7 @@ static void process_connection(int ci)
case SM_CMD_INQUIRE:
case SM_CMD_CONVERT:
case SM_CMD_KILLPATH:
+ case SM_CMD_ACQUIRE2:
/* the main_loop needs to ignore this connection
while the thread is working on it */
rv = client_suspend(ci);
@@ -2312,7 +2313,7 @@ static void print_usage(void)
printf("sanlock client add_lockspace -s LOCKSPACE\n");
printf("sanlock client inq_lockspace -s LOCKSPACE\n");
printf("sanlock client rem_lockspace -s LOCKSPACE\n");
- printf("sanlock client command -r RESOURCE -c <path> <args>\n");
+ printf("sanlock client command -r RESOURCE [-h 0|1] -c <path> <args>\n");
printf("sanlock client acquire -r RESOURCE -p|-C <id>\n");
printf("sanlock client convert -r RESOURCE -p|-C <id>\n");
printf("sanlock client release -r RESOURCE -p|-C <id>\n");
@@ -2600,7 +2601,7 @@ static int read_command_line(int argc, char *argv[])
com.wait = atoi(optionarg);
break;
case 'h':
- if (com.action == ACT_GETS || com.action == ACT_CLIENT_READ)
+ if (com.action == ACT_GETS || com.action == ACT_CLIENT_READ || com.action == ACT_COMMAND)
com.get_hosts = atoi(optionarg);
else
com.high_priority = atoi(optionarg);
@@ -2801,6 +2802,8 @@ const char *cmd_num_to_str(int cmd)
return "status";
case SM_CMD_ACQUIRE:
return "acquire";
+ case SM_CMD_ACQUIRE2:
+ return "acquire2";
case SM_CMD_RELEASE:
return "release";
case SM_CMD_INQUIRE:
@@ -2888,6 +2891,8 @@ uint32_t cmd_str_to_num(const char *str)
return SM_CMD_STATUS;
if (!strcmp(str, "acquire"))
return SM_CMD_ACQUIRE;
+ if (!strcmp(str, "acquire2"))
+ return SM_CMD_ACQUIRE2;
if (!strcmp(str, "release"))
return SM_CMD_RELEASE;
if (!strcmp(str, "inquire"))
@@ -3440,6 +3445,8 @@ static int do_client(void)
struct sanlk_host_event he;
struct sanlk_resource **res_args = NULL;
struct sanlk_resource *res;
+ struct sanlk_host owner = { 0 };
+ char *owner_name = NULL;
char *res_state = NULL;
uint32_t flags = 0;
uint32_t config_cmd = 0;
@@ -3499,11 +3506,24 @@ static int do_client(void)
flags |= com.orphan ? SANLK_ACQUIRE_ORPHAN : 0;
log_tool("acquire fd %d", fd);
- rv = sanlock_acquire(fd, -1, flags, com.res_count, com.res_args, NULL);
+
+ if (com.get_hosts)
+ rv = sanlock_acquire2(fd, -1, flags, com.res_args[0], NULL, &owner, &owner_name);
+ else
+ rv = sanlock_acquire(fd, -1, flags, com.res_count, com.res_args, NULL);
+
log_tool("acquire done %d", rv);
- if (rv < 0)
+ if (rv < 0) {
+ if (com.get_hosts && (owner.host_id || owner_name)) {
+ log_tool("owner: host_id %llu generation %llu timestamp %llu flags %x name %s",
+ (unsigned long long)owner.host_id, (unsigned long long)owner.generation,
+ (unsigned long long)owner.timestamp, owner.flags, owner_name);
+ if (owner_name)
+ free(owner_name);
+ }
goto out;
+ }
if (!command[0]) {
while (1)
diff --git a/src/resource.c b/src/resource.c
index ee8b9dc..88c33ab 100644
--- a/src/resource.c
+++ b/src/resource.c
@@ -1795,7 +1795,8 @@ int convert_token(struct task *task, struct sanlk_resource *res, struct token *c
}
int acquire_token(struct task *task, struct token *token, uint32_t cmd_flags,
- char *killpath, char *killargs)
+ char *killpath, char *killargs,
+ struct sanlk_host *owner_host)
{
struct leader_record leader;
struct paxos_dblock dblock;
@@ -2031,11 +2032,23 @@ int acquire_token(struct task *task, struct token *token, uint32_t cmd_flags,
return SANLK_ACQUIRE_SHRETRY;
}
if (com.quiet_fail)
- log_token(token, "acquire_token error: %d held", rv);
+ log_token(token, "acquire_token error: %d held by %llu %llu", rv,
+ (unsigned long long)leader.owner_id,
+ (unsigned long long)leader.owner_generation);
else
- log_errot(token, "acquire_token error: %d held", rv);
+ log_errot(token, "acquire_token error: %d held by %llu %llu", rv,
+ (unsigned long long)leader.owner_id,
+ (unsigned long long)leader.owner_generation);
/* We must not write zero dblock values from write_host_block()! */
release_token_nodisk_opened(task, token);
+
+ if (owner_host) {
+ owner_host->host_id = leader.owner_id;
+ owner_host->generation = leader.owner_generation;
+ owner_host->timestamp = leader.timestamp;
+ owner_host->io_timeout = leader.io_timeout;
+ }
+
return rv;
}
diff --git a/src/resource.h b/src/resource.h
index 73c13cd..f01fa6a 100644
--- a/src/resource.h
+++ b/src/resource.h
@@ -31,7 +31,7 @@ int convert_token(struct task *task, struct sanlk_resource *res, struct token *c
/* locks resource_mutex */
int acquire_token(struct task *task, struct token *token, uint32_t cmd_flags,
- char *killpath, char *killargs);
+ char *killpath, char *killargs, struct sanlk_host *owner_host);
/* locks resource_mutex */
diff --git a/src/sanlock_resource.h b/src/sanlock_resource.h
index e4cf682..7f0f6b7 100644
--- a/src/sanlock_resource.h
+++ b/src/sanlock_resource.h
@@ -150,6 +150,12 @@ int sanlock_acquire(int sock, int pid, uint32_t flags, int res_count,
struct sanlk_resource *res_args[],
struct sanlk_options *opt_in);
+int sanlock_acquire2(int sock, int pid, uint32_t flags,
+ struct sanlk_resource *res,
+ struct sanlk_options *opt_in,
+ struct sanlk_host *owner_host,
+ char **owner_name);
+
int sanlock_release(int sock, int pid, uint32_t flags, int res_count,
struct sanlk_resource *res_args[]);
diff --git a/src/sanlock_sock.h b/src/sanlock_sock.h
index e252f83..10ad24b 100644
--- a/src/sanlock_sock.h
+++ b/src/sanlock_sock.h
@@ -60,6 +60,7 @@ enum {
SM_CMD_CREATE_RESOURCE = 38,
SM_CMD_DELETE_RESOURCE = 39,
SM_CMD_REBUILD_RINDEX = 40,
+ SM_CMD_ACQUIRE2 = 41,
};
#define SM_CB_GET_EVENT 1
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.