This is an automated email from the git hooks/post-receive script.
teigland pushed a commit to branch master
in repository sanlock.
commit 1144bda2d8d5c4eb8259915d78157bb2e6ca0aa5
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Fri May 10 11:53:06 2024 -0500
sanlock: skip delay when same host acquires delta lease
When the previous delta lease owner matches our host name, and
our host name includes the local product_uuid, then allow reacquiring
the delta lease without a delay, even if it's not free (assumption
is this host did not release it cleanly previously.)
---
src/delta_lease.c | 93 +++++++++++++++++++++++++++++++-------------------
src/main.c | 71 +++++++++++++++++++++++++++-----------
src/sanlock.8 | 1 +
src/sanlock_internal.h | 3 ++
4 files changed, 114 insertions(+), 54 deletions(-)
diff --git a/src/delta_lease.c b/src/delta_lease.c
index 4ac63ef..55a5bba 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -340,7 +340,8 @@ int delta_lease_acquire(struct task *task,
uint32_t checksum;
int other_io_timeout, other_host_dead_seconds, other_id_renewal_seconds;
int i, error, rv, delay, delta_large_delay;
- int fast_free_delay = 0;
+ int is_free, is_same;
+ int no_delay = 0;
log_space(sp, "delta_acquire begin %.48s:%llu",
sp->space_name, (unsigned long long)host_id);
@@ -353,37 +354,58 @@ int delta_lease_acquire(struct task *task,
}
other_io_timeout = leader.io_timeout;
-
- if (!other_io_timeout) {
- log_erros(sp, "delta_acquire use own io_timeout %d", sp->io_timeout);
+ if (!other_io_timeout)
other_io_timeout = sp->io_timeout;
- } else if (other_io_timeout != sp->io_timeout) {
- log_erros(sp, "delta_acquire other_io_timeout %u our %u",
- leader.io_timeout, sp->io_timeout);
- }
/*
- * This delay-free reacquire could probably be used whenever
- * our_host_name matches the current leader.resource_name,
- * regardless of wheter the lease is free (i.e. we cleanly
- * released the lease last time we held it.)
+ * If the delta lease is free, and the prev owner matches our host
+ * name, then reacquire with no delay.
+ *
+ * If the delta lease is not free, and the prev owner matches our host
+ * name, and our host name is from product_uuid, then reacquire with no
+ * delay. Assumption here is that the delta lease is not free because
+ * this host did not release it cleanly last time. The non-free lease
+ * generally indicates it's being used by someone, and we should
+ * monitor it for a renewal period for updates. But, we skip this
+ * monitoring given the certainty that comes from the owner being
+ * the product_uuid, and the assumption that our product_uuid will not
+ * be used by another host.
+ *
+ * If the prev owner was not our name, we delay for a rewnewal period
+ * to monitor for current use from another host.
+ *
+ * If the lease is not free, we delay and monitor for a renewal period
+ * to monitor (except when owner is our product_uuid per above.)
+ *
+ * If the lease is not free, and the owner does not match our host
+ * name, then use a long delay to monitor for other hosts using it,
+ * or to ensure a prev host using this lease is dead.
*/
- if (!strncmp(leader.resource_name, our_host_name, NAME_ID_SIZE) &&
- (leader.timestamp == LEASE_FREE)) {
- log_space(sp, "delta_acquire free fast reacquire");
- fast_free_delay = 1;
- goto write_new;
- }
- if (leader.timestamp == LEASE_FREE)
- goto write_new;
+ is_free = (leader.timestamp == LEASE_FREE);
+ is_same = !memcmp(leader.resource_name, our_host_name, NAME_ID_SIZE);
+
+ if (!is_same)
+ log_debug("delta_acquire new owner %.48s old owner %.48s", our_host_name, leader.resource_name);
+
+ if (is_same || is_free) {
+ if (is_same && is_free)
+ no_delay = 1;
+ if (is_same && our_host_name_matches_product_uuid)
+ no_delay = 1;
+
+ log_space(sp, "delta_acquire %s owner, %s free, %s our_product_uuid, %s delay, other_io_timeout %d",
+ is_same ? "same" : "new",
+ is_free ? "is" : "not",
+ our_host_name_matches_product_uuid ? "is" : "not",
+ no_delay ? "no" : "short",
+ other_io_timeout);
- if (!strncmp(leader.resource_name, our_host_name, NAME_ID_SIZE)) {
- log_space(sp, "delta_acquire fast reacquire");
goto write_new;
}
- /* we need to ensure that a host_id cannot be acquired and released
+ /*
+ * we need to ensure that a host_id cannot be acquired and released
* sooner than host_dead_seconds because the change in host_id
* ownership affects the host_id "liveness" determination used by paxos
* leases, and the ownership of paxos leases cannot change until after
@@ -399,18 +421,15 @@ int delta_lease_acquire(struct task *task,
* paxos leases cannot change ownership until a min of
* host_dead_seconds to ensure the watchdog has fired. So, the timeout
* we use here must be the max of the delta delay (D+6d) and
- * host_dead_seconds */
-
- /*
- * delay = task->host_dead_seconds;
- * delta_large_delay = task->id_renewal_seconds + (6 * task->io_timeout_seconds);
- * if (delta_large_delay > delay)
- * delay = delta_large_delay;
+ * host_dead_seconds.
*/
other_host_dead_seconds = calc_host_dead_seconds(other_io_timeout);
other_id_renewal_seconds = calc_id_renewal_seconds(other_io_timeout);
+ log_space(sp, "delta_acquire other_host %.48s, timestamp %llu, other_io_timeout %d",
+ leader.resource_name, (unsigned long long)leader.timestamp, other_io_timeout);
+
delay = other_host_dead_seconds;
delta_large_delay = other_id_renewal_seconds + (6 * other_io_timeout);
if (delta_large_delay > delay)
@@ -458,11 +477,12 @@ int delta_lease_acquire(struct task *task,
write_new:
new_ts = monotime();
+ leader.version = DELTA_DISK_VERSION_MAJOR | DELTA_DISK_VERSION_MINOR;
leader.timestamp = new_ts;
leader.io_timeout = (sp->io_timeout & 0x00FF);
leader.owner_id = host_id;
leader.owner_generation++;
- snprintf(leader.resource_name, NAME_ID_SIZE, "%s", our_host_name);
+ memcpy(leader.resource_name, our_host_name, NAME_ID_SIZE);
leader.checksum = 0; /* set below */
log_space(sp, "delta_acquire write %llu %llu %llu %.48s",
@@ -489,10 +509,12 @@ int delta_lease_acquire(struct task *task,
memcpy(&leader1, &leader, sizeof(struct leader_record));
- if (fast_free_delay)
- delay = 1;
- else
- delay = 2 * other_io_timeout;
+ if (no_delay) {
+ usleep(10000);
+ goto reread;
+ }
+
+ delay = 2 * other_io_timeout;
log_space(sp, "delta_acquire delta_short_delay %d", delay);
for (i = 0; i < delay; i++) {
@@ -504,6 +526,7 @@ int delta_lease_acquire(struct task *task,
sleep(1);
}
+reread:
error = delta_lease_leader_read(task, sp->sector_size, sp->io_timeout, disk, space_name, host_id, &leader,
"delta_acquire_check");
if (error < 0) {
diff --git a/src/main.c b/src/main.c
index eb906c9..5404554 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1486,17 +1486,20 @@ int get_rand(int a, int b)
return a + (int) (((float)(b - a + 1)) * rv / (RAND_MAX+1.0));
}
-static void read_product_uuid(char *buf, size_t buf_size)
+static void read_product_uuid(void)
{
+ FILE *fp;
char full[256] = { 0 };
int len;
- FILE *fp;
+ int i, j;
+
+ memset(our_product_uuid, 0, sizeof(our_product_uuid));
+ memset(our_product_uuid_compact, 0, sizeof(our_product_uuid_compact));
if (!(fp = fopen("/sys/devices/virtual/dmi/id/product_uuid", "r")))
return;
if (!fgets(full, sizeof(full), fp)) {
- buf[0] = '\0';
fclose(fp);
return;
}
@@ -1509,13 +1512,14 @@ static void read_product_uuid(char *buf, size_t buf_size)
if (len && full[len - 1] == '\n')
full[--len] = '\0';
+ len = strlen(full);
+
/*
* Randomly pick 16 as a minimum legitimate size for a product_uuid
* (expected to be 36 for a proper uuid including dashes)
*/
if (len < 16) {
log_debug("Ignore product_uuid that is too short %d (%s)", len, full);
- buf[0] = '\0';
return;
}
@@ -1525,27 +1529,34 @@ static void read_product_uuid(char *buf, size_t buf_size)
*/
if (len > SANLK_NAME_LEN) {
log_debug("Ignore product_uuid that is too long %d (%s)", len, full);
- buf[0] = '\0';
return;
}
- /*
- * buf_size is NAME_LEN+1 for easy printing (+1 is \0),
- * the actual size used in leader_record is NAME_LEN
- * with no required termination.
- */
- memcpy(buf, full, SANLK_NAME_LEN);
+ memcpy(our_product_uuid, full, SANLK_NAME_LEN);
+
+ if (strchr(our_product_uuid, '-')) {
+ for (i = 0, j = 0; i < len; i++) {
+ if (our_product_uuid[i] == '-')
+ continue;
+ our_product_uuid_compact[j++] = our_product_uuid[i];
+ }
+
+ if (strlen(our_product_uuid_compact) < 16) {
+ log_debug("Ignore compact product_uuid that is too short (%s)", our_product_uuid_compact);
+ memset(our_product_uuid_compact, 0, sizeof(our_product_uuid_compact));
+ }
+ }
}
static void setup_host_name(void)
{
char our_host_name_long[1024] = { 0 }; /* temp buf for snprintf, then memcpy to _global */
- char product_uuid[SANLK_NAME_LEN+1] = { 0 };
char rand_uuid[37] = { 0 };
struct utsname name = { 0 };
uuid_t uu;
- memset(&our_host_name_global, 0, sizeof(our_host_name_global));
+ read_product_uuid();
+ uname(&name);
/*
* Get host name value from:
@@ -1557,20 +1568,42 @@ static void setup_host_name(void)
if (com.our_host_name_opt[0]) {
memcpy(our_host_name_global, com.our_host_name_opt, SANLK_NAME_LEN);
+
+ /*
+ * user could configure our_host_name using product_uuid,
+ * in which case we can enable the no delay optimization
+ * when using product_uuid.
+ */
+
+ if (our_product_uuid[0] &&
+ !strncmp(our_host_name_global, our_product_uuid, strlen(our_product_uuid)))
+ our_host_name_matches_product_uuid = 1;
+
+ if (our_product_uuid_compact[0] &&
+ !strncmp(our_host_name_global, our_product_uuid_compact, strlen(our_product_uuid_compact)))
+ our_host_name_matches_product_uuid = 1;
+
if (strlen(com.our_host_name_opt) > SANLK_NAME_LEN)
- log_warn("our_host_name shortened from config %s to: %s",
- com.our_host_name_opt, our_host_name_global);
+ log_warn("our_host_name shortened from config %s len %ld to: %s",
+ com.our_host_name_opt, strlen(com.our_host_name_opt), our_host_name_global);
else
log_debug("our_host_name set from config: %s", our_host_name_global);
+
+ if (our_host_name_matches_product_uuid)
+ log_debug("our_host_name uses product_uuid");
return;
}
- read_product_uuid(product_uuid, sizeof(product_uuid));
- uname(&name);
+ /*
+ * A typical 36 char uuid, plus a '.' separator, leaves space for the first
+ * 11 characters of the local nodename, which is included at the end mainly
+ * to help with readability/debugging.
+ */
- if (product_uuid[0]) {
- snprintf(our_host_name_long, sizeof(our_host_name_long), "%s.%s", product_uuid, name.nodename);
+ if (our_product_uuid[0]) {
+ snprintf(our_host_name_long, sizeof(our_host_name_long), "%s.%s", our_product_uuid, name.nodename);
memcpy(our_host_name_global, our_host_name_long, SANLK_NAME_LEN);
+ our_host_name_matches_product_uuid = 1;
log_debug("our_host_name set from product_uuid: %s", our_host_name_global);
} else {
memset(rand_state, 0, sizeof(rand_state));
diff --git a/src/sanlock.8 b/src/sanlock.8
index cddf23f..9b71059 100644
--- a/src/sanlock.8
+++ b/src/sanlock.8
@@ -1350,6 +1350,7 @@ command line (-e), sanlock attempts to set our_host_name from
/sys/devices/virtual/dmi/id/product_uuid. If that is not available,
sanlock generates a random uuid to use as our_host_name. Using a
fixed our_host_name value will reduce delays when using a lockspace.
+Using product_uuid will reduce delays further.
.IP \[bu] 2
renewal_read_extend_sec = <seconds>
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 5de92b5..dda7d25 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -457,7 +457,10 @@ enum {
};
EXTERN int external_shutdown;
+EXTERN int our_host_name_matches_product_uuid;
EXTERN char our_host_name_global[SANLK_NAME_LEN+1];
+EXTERN char our_product_uuid[SANLK_NAME_LEN+1];
+EXTERN char our_product_uuid_compact[SANLK_NAME_LEN+1]; /* dash chars omitted from uuid */
EXTERN int kill_count_max;
EXTERN int is_helper;
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.