[sanlock] 02/02: sanlock: skip delay when same host acquires delta
lease
by pagure@pagure.io
This is an automated email from the git hooks/post-receive script.
teigland pushed a commit to branch master
in repository sanlock.
commit 1144bda2d8d5c4eb8259915d78157bb2e6ca0aa5
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Fri May 10 11:53:06 2024 -0500
sanlock: skip delay when same host acquires delta lease
When the previous delta lease owner matches our host name, and
our host name includes the local product_uuid, then allow reacquiring
the delta lease without a delay, even if it's not free (assumption
is this host did not release it cleanly previously.)
---
src/delta_lease.c | 93 +++++++++++++++++++++++++++++++-------------------
src/main.c | 71 +++++++++++++++++++++++++++-----------
src/sanlock.8 | 1 +
src/sanlock_internal.h | 3 ++
4 files changed, 114 insertions(+), 54 deletions(-)
diff --git a/src/delta_lease.c b/src/delta_lease.c
index 4ac63ef..55a5bba 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -340,7 +340,8 @@ int delta_lease_acquire(struct task *task,
uint32_t checksum;
int other_io_timeout, other_host_dead_seconds, other_id_renewal_seconds;
int i, error, rv, delay, delta_large_delay;
- int fast_free_delay = 0;
+ int is_free, is_same;
+ int no_delay = 0;
log_space(sp, "delta_acquire begin %.48s:%llu",
sp->space_name, (unsigned long long)host_id);
@@ -353,37 +354,58 @@ int delta_lease_acquire(struct task *task,
}
other_io_timeout = leader.io_timeout;
-
- if (!other_io_timeout) {
- log_erros(sp, "delta_acquire use own io_timeout %d", sp->io_timeout);
+ if (!other_io_timeout)
other_io_timeout = sp->io_timeout;
- } else if (other_io_timeout != sp->io_timeout) {
- log_erros(sp, "delta_acquire other_io_timeout %u our %u",
- leader.io_timeout, sp->io_timeout);
- }
/*
- * This delay-free reacquire could probably be used whenever
- * our_host_name matches the current leader.resource_name,
- * regardless of wheter the lease is free (i.e. we cleanly
- * released the lease last time we held it.)
+ * If the delta lease is free, and the prev owner matches our host
+ * name, then reacquire with no delay.
+ *
+ * If the delta lease is not free, and the prev owner matches our host
+ * name, and our host name is from product_uuid, then reacquire with no
+ * delay. Assumption here is that the delta lease is not free because
+ * this host did not release it cleanly last time. The non-free lease
+ * generally indicates it's being used by someone, and we should
+ * monitor it for a renewal period for updates. But, we skip this
+ * monitoring given the certainty that comes from the owner being
+ * the product_uuid, and the assumption that our product_uuid will not
+ * be used by another host.
+ *
+ * If the prev owner was not our name, we delay for a rewnewal period
+ * to monitor for current use from another host.
+ *
+ * If the lease is not free, we delay and monitor for a renewal period
+ * to monitor (except when owner is our product_uuid per above.)
+ *
+ * If the lease is not free, and the owner does not match our host
+ * name, then use a long delay to monitor for other hosts using it,
+ * or to ensure a prev host using this lease is dead.
*/
- if (!strncmp(leader.resource_name, our_host_name, NAME_ID_SIZE) &&
- (leader.timestamp == LEASE_FREE)) {
- log_space(sp, "delta_acquire free fast reacquire");
- fast_free_delay = 1;
- goto write_new;
- }
- if (leader.timestamp == LEASE_FREE)
- goto write_new;
+ is_free = (leader.timestamp == LEASE_FREE);
+ is_same = !memcmp(leader.resource_name, our_host_name, NAME_ID_SIZE);
+
+ if (!is_same)
+ log_debug("delta_acquire new owner %.48s old owner %.48s", our_host_name, leader.resource_name);
+
+ if (is_same || is_free) {
+ if (is_same && is_free)
+ no_delay = 1;
+ if (is_same && our_host_name_matches_product_uuid)
+ no_delay = 1;
+
+ log_space(sp, "delta_acquire %s owner, %s free, %s our_product_uuid, %s delay, other_io_timeout %d",
+ is_same ? "same" : "new",
+ is_free ? "is" : "not",
+ our_host_name_matches_product_uuid ? "is" : "not",
+ no_delay ? "no" : "short",
+ other_io_timeout);
- if (!strncmp(leader.resource_name, our_host_name, NAME_ID_SIZE)) {
- log_space(sp, "delta_acquire fast reacquire");
goto write_new;
}
- /* we need to ensure that a host_id cannot be acquired and released
+ /*
+ * we need to ensure that a host_id cannot be acquired and released
* sooner than host_dead_seconds because the change in host_id
* ownership affects the host_id "liveness" determination used by paxos
* leases, and the ownership of paxos leases cannot change until after
@@ -399,18 +421,15 @@ int delta_lease_acquire(struct task *task,
* paxos leases cannot change ownership until a min of
* host_dead_seconds to ensure the watchdog has fired. So, the timeout
* we use here must be the max of the delta delay (D+6d) and
- * host_dead_seconds */
-
- /*
- * delay = task->host_dead_seconds;
- * delta_large_delay = task->id_renewal_seconds + (6 * task->io_timeout_seconds);
- * if (delta_large_delay > delay)
- * delay = delta_large_delay;
+ * host_dead_seconds.
*/
other_host_dead_seconds = calc_host_dead_seconds(other_io_timeout);
other_id_renewal_seconds = calc_id_renewal_seconds(other_io_timeout);
+ log_space(sp, "delta_acquire other_host %.48s, timestamp %llu, other_io_timeout %d",
+ leader.resource_name, (unsigned long long)leader.timestamp, other_io_timeout);
+
delay = other_host_dead_seconds;
delta_large_delay = other_id_renewal_seconds + (6 * other_io_timeout);
if (delta_large_delay > delay)
@@ -458,11 +477,12 @@ int delta_lease_acquire(struct task *task,
write_new:
new_ts = monotime();
+ leader.version = DELTA_DISK_VERSION_MAJOR | DELTA_DISK_VERSION_MINOR;
leader.timestamp = new_ts;
leader.io_timeout = (sp->io_timeout & 0x00FF);
leader.owner_id = host_id;
leader.owner_generation++;
- snprintf(leader.resource_name, NAME_ID_SIZE, "%s", our_host_name);
+ memcpy(leader.resource_name, our_host_name, NAME_ID_SIZE);
leader.checksum = 0; /* set below */
log_space(sp, "delta_acquire write %llu %llu %llu %.48s",
@@ -489,10 +509,12 @@ int delta_lease_acquire(struct task *task,
memcpy(&leader1, &leader, sizeof(struct leader_record));
- if (fast_free_delay)
- delay = 1;
- else
- delay = 2 * other_io_timeout;
+ if (no_delay) {
+ usleep(10000);
+ goto reread;
+ }
+
+ delay = 2 * other_io_timeout;
log_space(sp, "delta_acquire delta_short_delay %d", delay);
for (i = 0; i < delay; i++) {
@@ -504,6 +526,7 @@ int delta_lease_acquire(struct task *task,
sleep(1);
}
+reread:
error = delta_lease_leader_read(task, sp->sector_size, sp->io_timeout, disk, space_name, host_id, &leader,
"delta_acquire_check");
if (error < 0) {
diff --git a/src/main.c b/src/main.c
index eb906c9..5404554 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1486,17 +1486,20 @@ int get_rand(int a, int b)
return a + (int) (((float)(b - a + 1)) * rv / (RAND_MAX+1.0));
}
-static void read_product_uuid(char *buf, size_t buf_size)
+static void read_product_uuid(void)
{
+ FILE *fp;
char full[256] = { 0 };
int len;
- FILE *fp;
+ int i, j;
+
+ memset(our_product_uuid, 0, sizeof(our_product_uuid));
+ memset(our_product_uuid_compact, 0, sizeof(our_product_uuid_compact));
if (!(fp = fopen("/sys/devices/virtual/dmi/id/product_uuid", "r")))
return;
if (!fgets(full, sizeof(full), fp)) {
- buf[0] = '\0';
fclose(fp);
return;
}
@@ -1509,13 +1512,14 @@ static void read_product_uuid(char *buf, size_t buf_size)
if (len && full[len - 1] == '\n')
full[--len] = '\0';
+ len = strlen(full);
+
/*
* Randomly pick 16 as a minimum legitimate size for a product_uuid
* (expected to be 36 for a proper uuid including dashes)
*/
if (len < 16) {
log_debug("Ignore product_uuid that is too short %d (%s)", len, full);
- buf[0] = '\0';
return;
}
@@ -1525,27 +1529,34 @@ static void read_product_uuid(char *buf, size_t buf_size)
*/
if (len > SANLK_NAME_LEN) {
log_debug("Ignore product_uuid that is too long %d (%s)", len, full);
- buf[0] = '\0';
return;
}
- /*
- * buf_size is NAME_LEN+1 for easy printing (+1 is \0),
- * the actual size used in leader_record is NAME_LEN
- * with no required termination.
- */
- memcpy(buf, full, SANLK_NAME_LEN);
+ memcpy(our_product_uuid, full, SANLK_NAME_LEN);
+
+ if (strchr(our_product_uuid, '-')) {
+ for (i = 0, j = 0; i < len; i++) {
+ if (our_product_uuid[i] == '-')
+ continue;
+ our_product_uuid_compact[j++] = our_product_uuid[i];
+ }
+
+ if (strlen(our_product_uuid_compact) < 16) {
+ log_debug("Ignore compact product_uuid that is too short (%s)", our_product_uuid_compact);
+ memset(our_product_uuid_compact, 0, sizeof(our_product_uuid_compact));
+ }
+ }
}
static void setup_host_name(void)
{
char our_host_name_long[1024] = { 0 }; /* temp buf for snprintf, then memcpy to _global */
- char product_uuid[SANLK_NAME_LEN+1] = { 0 };
char rand_uuid[37] = { 0 };
struct utsname name = { 0 };
uuid_t uu;
- memset(&our_host_name_global, 0, sizeof(our_host_name_global));
+ read_product_uuid();
+ uname(&name);
/*
* Get host name value from:
@@ -1557,20 +1568,42 @@ static void setup_host_name(void)
if (com.our_host_name_opt[0]) {
memcpy(our_host_name_global, com.our_host_name_opt, SANLK_NAME_LEN);
+
+ /*
+ * user could configure our_host_name using product_uuid,
+ * in which case we can enable the no delay optimization
+ * when using product_uuid.
+ */
+
+ if (our_product_uuid[0] &&
+ !strncmp(our_host_name_global, our_product_uuid, strlen(our_product_uuid)))
+ our_host_name_matches_product_uuid = 1;
+
+ if (our_product_uuid_compact[0] &&
+ !strncmp(our_host_name_global, our_product_uuid_compact, strlen(our_product_uuid_compact)))
+ our_host_name_matches_product_uuid = 1;
+
if (strlen(com.our_host_name_opt) > SANLK_NAME_LEN)
- log_warn("our_host_name shortened from config %s to: %s",
- com.our_host_name_opt, our_host_name_global);
+ log_warn("our_host_name shortened from config %s len %ld to: %s",
+ com.our_host_name_opt, strlen(com.our_host_name_opt), our_host_name_global);
else
log_debug("our_host_name set from config: %s", our_host_name_global);
+
+ if (our_host_name_matches_product_uuid)
+ log_debug("our_host_name uses product_uuid");
return;
}
- read_product_uuid(product_uuid, sizeof(product_uuid));
- uname(&name);
+ /*
+ * A typical 36 char uuid, plus a '.' separator, leaves space for the first
+ * 11 characters of the local nodename, which is included at the end mainly
+ * to help with readability/debugging.
+ */
- if (product_uuid[0]) {
- snprintf(our_host_name_long, sizeof(our_host_name_long), "%s.%s", product_uuid, name.nodename);
+ if (our_product_uuid[0]) {
+ snprintf(our_host_name_long, sizeof(our_host_name_long), "%s.%s", our_product_uuid, name.nodename);
memcpy(our_host_name_global, our_host_name_long, SANLK_NAME_LEN);
+ our_host_name_matches_product_uuid = 1;
log_debug("our_host_name set from product_uuid: %s", our_host_name_global);
} else {
memset(rand_state, 0, sizeof(rand_state));
diff --git a/src/sanlock.8 b/src/sanlock.8
index cddf23f..9b71059 100644
--- a/src/sanlock.8
+++ b/src/sanlock.8
@@ -1350,6 +1350,7 @@ command line (-e), sanlock attempts to set our_host_name from
/sys/devices/virtual/dmi/id/product_uuid. If that is not available,
sanlock generates a random uuid to use as our_host_name. Using a
fixed our_host_name value will reduce delays when using a lockspace.
+Using product_uuid will reduce delays further.
.IP \[bu] 2
renewal_read_extend_sec = <seconds>
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 5de92b5..dda7d25 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -457,7 +457,10 @@ enum {
};
EXTERN int external_shutdown;
+EXTERN int our_host_name_matches_product_uuid;
EXTERN char our_host_name_global[SANLK_NAME_LEN+1];
+EXTERN char our_product_uuid[SANLK_NAME_LEN+1];
+EXTERN char our_product_uuid_compact[SANLK_NAME_LEN+1]; /* dash chars omitted from uuid */
EXTERN int kill_count_max;
EXTERN int is_helper;
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.
6 days, 23 hours
[sanlock] 01/02: sanlock: improve handling our_host_name
by pagure@pagure.io
This is an automated email from the git hooks/post-receive script.
teigland pushed a commit to branch master
in repository sanlock.
commit 59d8d870b339ef9655169e74a03f7741d4efbb9f
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Thu May 9 11:28:48 2024 -0500
sanlock: improve handling our_host_name
---
src/main.c | 87 +++++++++++++++++++++++++++++++++++---------------
src/sanlock.8 | 10 ++++--
src/sanlock_internal.h | 2 +-
3 files changed, 71 insertions(+), 28 deletions(-)
diff --git a/src/main.c b/src/main.c
index 403e358..eb906c9 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1488,28 +1488,64 @@ int get_rand(int a, int b)
static void read_product_uuid(char *buf, size_t buf_size)
{
+ char full[256] = { 0 };
+ int len;
FILE *fp;
- size_t len;
if (!(fp = fopen("/sys/devices/virtual/dmi/id/product_uuid", "r")))
return;
- if (!fgets(buf, buf_size, fp))
+ if (!fgets(full, sizeof(full), fp)) {
+ buf[0] = '\0';
+ fclose(fp);
return;
+ }
+ fclose(fp);
- if ((len = strlen(buf)) && buf[len - 1] == '\n')
- buf[--len] = '\0';
+ full[sizeof(full)-1] = '\0';
- fclose(fp);
+ len = strlen(full);
+
+ if (len && full[len - 1] == '\n')
+ full[--len] = '\0';
+
+ /*
+ * Randomly pick 16 as a minimum legitimate size for a product_uuid
+ * (expected to be 36 for a proper uuid including dashes)
+ */
+ if (len < 16) {
+ log_debug("Ignore product_uuid that is too short %d (%s)", len, full);
+ buf[0] = '\0';
+ return;
+ }
+
+ /*
+ * We can't trust that only a portion of the product_uuid
+ * would be unique.
+ */
+ if (len > SANLK_NAME_LEN) {
+ log_debug("Ignore product_uuid that is too long %d (%s)", len, full);
+ buf[0] = '\0';
+ return;
+ }
+
+ /*
+ * buf_size is NAME_LEN+1 for easy printing (+1 is \0),
+ * the actual size used in leader_record is NAME_LEN
+ * with no required termination.
+ */
+ memcpy(buf, full, SANLK_NAME_LEN);
}
static void setup_host_name(void)
{
+ char our_host_name_long[1024] = { 0 }; /* temp buf for snprintf, then memcpy to _global */
char product_uuid[SANLK_NAME_LEN+1] = { 0 };
- char uuid[37];
- struct utsname name;
+ char rand_uuid[37] = { 0 };
+ struct utsname name = { 0 };
uuid_t uu;
- int ret;
+
+ memset(&our_host_name_global, 0, sizeof(our_host_name_global));
/*
* Get host name value from:
@@ -1519,31 +1555,32 @@ static void setup_host_name(void)
* 4. generate random uuid that won't collide with another host
*/
- if (com.our_host_name[0]) {
- memcpy(our_host_name_global, com.our_host_name, SANLK_NAME_LEN);
+ if (com.our_host_name_opt[0]) {
+ memcpy(our_host_name_global, com.our_host_name_opt, SANLK_NAME_LEN);
+ if (strlen(com.our_host_name_opt) > SANLK_NAME_LEN)
+ log_warn("our_host_name shortened from config %s to: %s",
+ com.our_host_name_opt, our_host_name_global);
+ else
+ log_debug("our_host_name set from config: %s", our_host_name_global);
return;
}
- memset(&our_host_name_global, 0, sizeof(our_host_name_global));
- memset(product_uuid, 0, sizeof(product_uuid));
- memset(&name, 0, sizeof(name));
- memset(&uuid, 0, sizeof(uuid));
-
read_product_uuid(product_uuid, sizeof(product_uuid));
uname(&name);
if (product_uuid[0]) {
- ret = snprintf(our_host_name_global, SANLK_NAME_LEN, "%s.", product_uuid);
+ snprintf(our_host_name_long, sizeof(our_host_name_long), "%s.%s", product_uuid, name.nodename);
+ memcpy(our_host_name_global, our_host_name_long, SANLK_NAME_LEN);
+ log_debug("our_host_name set from product_uuid: %s", our_host_name_global);
} else {
memset(rand_state, 0, sizeof(rand_state));
initstate(time(NULL), rand_state, sizeof(rand_state));
uuid_generate(uu);
- uuid_unparse_lower(uu, uuid);
- ret = snprintf(our_host_name_global, SANLK_NAME_LEN, "%s.", uuid);
+ uuid_unparse_lower(uu, rand_uuid);
+ snprintf(our_host_name_long, sizeof(our_host_name_long), "%s.%s", rand_uuid, name.nodename);
+ memcpy(our_host_name_global, our_host_name_long, SANLK_NAME_LEN);
+ log_debug("our_host_name set from uuid_generate: %s", our_host_name_global);
}
-
- if (ret < SANLK_NAME_LEN)
- memcpy(our_host_name_global+ret, name.nodename, SANLK_NAME_LEN-ret);
}
static void setup_limits(void)
@@ -2170,8 +2207,8 @@ static void print_usage(void)
printf(" -l <num> use mlockall (0 none, 1 current, 2 current and future) (%d)\n", DEFAULT_MLOCK_LEVEL);
printf(" -b <sec> seconds a host id bit will remain set in delta lease bitmap\n");
printf(" (default: 6 * io_timeout)\n");
- printf(" -e <str> local host name used in delta leases\n");
- printf(" (default: generate new uuid)\n");
+ printf(" -e <str> local host name used in delta leases, max len 48\n");
+ printf(" (default: product_uuid or randomly generated uuid)\n");
printf("\n");
printf("sanlock client <action> [options]\n");
printf("sanlock client status [-D] [-o p|s]\n");
@@ -2514,7 +2551,7 @@ static int read_command_line(int argc, char *argv[])
if (com.rindex_op) {
parse_arg_rentry(optionarg);
} else {
- strncpy(com.our_host_name, optionarg, NAME_ID_SIZE);
+ strncpy(com.our_host_name_opt, optionarg, sizeof(com.our_host_name_opt)-1);
com.he_event = strtoull(optionarg, NULL, 0);
}
break;
@@ -2910,7 +2947,7 @@ static void read_config_file(void)
} else if (!strcmp(str, "our_host_name")) {
memset(str, 0, sizeof(str));
get_val_str(line, str);
- memcpy(com.our_host_name, str, NAME_ID_SIZE);
+ strncpy(com.our_host_name_opt, str, sizeof(com.our_host_name_opt)-1);
} else if (!strcmp(str, "renewal_read_extend_sec")) {
/* zero is a valid setting so we need the _set field to say it's set */
diff --git a/src/sanlock.8 b/src/sanlock.8
index 6466689..cddf23f 100644
--- a/src/sanlock.8
+++ b/src/sanlock.8
@@ -612,7 +612,7 @@ use mlockall (0 none, 1 current, 2 current and future)
seconds a host id bit will remain set in delta lease bitmap
.BI -e " str"
-local host name used in delta leases
+unique local host name used in delta leases as host_id owner
.\" non-aio is untested and may not work
.\" .BR \-a " 0|1"
@@ -1343,7 +1343,13 @@ See -G
.IP \[bu] 2
our_host_name = <str>
.br
-See -e
+A unique name that a host uses to ensure exclusive ownership of a
+lockspace host_id (delta lease owner.) The maximum length is 48
+characters. If no value is provided in sanlock.conf or on the
+command line (-e), sanlock attempts to set our_host_name from
+/sys/devices/virtual/dmi/id/product_uuid. If that is not available,
+sanlock generates a random uuid to use as our_host_name. Using a
+fixed our_host_name value will reduce delays when using a lockspace.
.IP \[bu] 2
renewal_read_extend_sec = <seconds>
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index a8ffdf5..5de92b5 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -393,7 +393,7 @@ struct command_line {
int renewal_history_size;
int renewal_read_extend_sec_set; /* 1 if renewal_read_extend_sec is configured */
uint32_t renewal_read_extend_sec;
- char our_host_name[SANLK_NAME_LEN+1];
+ char our_host_name_opt[256]; /* max SANLK_NAME_LEN will be used */
char *file_path;
char *dump_path;
int rindex_op;
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.
6 days, 23 hours