src/host_id.c | 100 +++++++++++++++++++--------------- src/host_id.h | 11 ++- src/main.c | 6 +- src/paxos_lease.c | 144 +++++++++++++++++++++++++++---------------------- src/sanlock.8 | 96 +++++++++++++++++++++++++++++++- src/sanlock_internal.h | 6 +- 6 files changed, 241 insertions(+), 122 deletions(-)
New commits: commit 63faee433069cbb9e3c16a85c3a66c2be0d33e75 Author: David Teigland teigland@redhat.com Date: Fri Sep 2 16:17:21 2011 -0500
sanlock: optimize paxos wait
When paxos_lease_acquire waits for host_dead_seconds for a delta lease to expire, the start of our waiting time can be the last timestamp update we saved from the host during our host_id renewal. This avoids always waiting for the full host_dead_seconds and could often mean waiting no additional time.
diff --git a/src/host_id.c b/src/host_id.c index ecab8fd..1b82eeb 100644 --- a/src/host_id.c +++ b/src/host_id.c @@ -236,6 +236,29 @@ int host_status_set_bit(char *space_name, uint64_t host_id) return 0; }
+int host_info(char *space_name, uint64_t host_id, struct host_status *hs_out) +{ + struct space *sp; + int found = 0; + + if (!host_id || host_id > DEFAULT_MAX_HOSTS) + return -EINVAL; + + pthread_mutex_lock(&spaces_mutex); + list_for_each_entry(sp, &spaces, list) { + if (strncmp(sp->space_name, space_name, NAME_ID_SIZE)) + continue; + memcpy(hs_out, &sp->host_status[host_id-1], sizeof(struct host_status)); + found = 1; + break; + } + pthread_mutex_unlock(&spaces_mutex); + + if (!found) + return -ENOSPC; + return 0; +} + static void create_bitmap(struct task *task, struct space *sp, char *bitmap) { uint64_t now; @@ -263,17 +286,6 @@ static void create_bitmap(struct task *task, struct space *sp, char *bitmap) pthread_mutex_unlock(&sp->mutex); }
-/* - * when entering the monitor loop in paxos_lease, once - * last_check - last_live > host_dead_seconds, it's expired - * - * at local time t=last_live, we read timstamp=X - * at local time t=last_check, we read timestamp=X - * so once the difference between last_live and last_check - * is > host_dead_seconds, the host has not renewed it's - * timestamp in host_dead_seconds. - */ - void check_other_leases(struct task *task, struct space *sp, char *buf) { struct leader_record *leader; diff --git a/src/host_id.h b/src/host_id.h index 5fab69d..fba2d03 100644 --- a/src/host_id.h +++ b/src/host_id.h @@ -14,6 +14,7 @@ int _lockspace_info(char *space_name, struct space *sp_out); int lockspace_info(char *space_name, struct space *sp_out); int lockspace_disk(char *space_name, struct sync_disk *disk); void block_watchdog_updates(char *space_name); +int host_info(char *space_name, uint64_t host_id, struct host_status *hs_out); int host_status_set_bit(char *space_name, uint64_t host_id); int host_status_clear_bit(char *space_name, uint64_t host_id); int test_id_bit(int host_id, char *bitmap); diff --git a/src/paxos_lease.c b/src/paxos_lease.c index 7323196..e7494e8 100644 --- a/src/paxos_lease.c +++ b/src/paxos_lease.c @@ -1011,15 +1011,16 @@ int paxos_lease_acquire(struct task *task, struct leader_record new_leader; struct paxos_dblock our_dblock; struct paxos_dblock dblock; - time_t start; + struct host_status hs; + uint64_t wait_start, now; + uint64_t last_timestamp; uint64_t next_lver; uint64_t our_mbal = 0; - uint64_t last_timestamp = 0; int copy_cur_leader = 0; int disk_open = 0; int error, rv, us;
- log_token(token, "paxos_acquire begin acquire_lver %llu flags %x", + log_token(token, "paxos_acquire begin lver %llu flags %x", (unsigned long long)acquire_lver, flags); restart:
@@ -1063,10 +1064,6 @@ int paxos_lease_acquire(struct task *task, * its watchdog has triggered and we can go for the paxos lease. */
- log_token(token, "paxos_acquire check owner_id %llu gen %llu", - (unsigned long long)cur_leader.owner_id, - (unsigned long long)cur_leader.owner_generation); - if (!disk_open) { memset(&host_id_disk, 0, sizeof(host_id_disk));
@@ -1085,17 +1082,28 @@ int paxos_lease_acquire(struct task *task, error = SANLK_ACQUIRE_IDDISK; goto out; } + }
- log_token(token, "paxos_acquire lockspace %.48s " - "path %s offset %llu sector_size %u fd %d", - cur_leader.space_name, - host_id_disk.path, - (unsigned long long)host_id_disk.offset, - host_id_disk.sector_size, - host_id_disk.fd); + rv = host_info(cur_leader.space_name, cur_leader.owner_id, &hs); + if (!rv && hs.last_check && hs.last_live && + hs.owner_id == cur_leader.owner_id && + hs.owner_generation == cur_leader.owner_generation) { + wait_start = hs.last_live; + last_timestamp = hs.timestamp; + } else { + wait_start = monotime(); + last_timestamp = 0; }
- start = monotime(); + log_token(token, "paxos_acquire owner %llu %llu %llu " + "host_status %llu %llu %llu wait_start %llu", + (unsigned long long)cur_leader.owner_id, + (unsigned long long)cur_leader.owner_generation, + (unsigned long long)cur_leader.timestamp, + (unsigned long long)hs.owner_id, + (unsigned long long)hs.owner_generation, + (unsigned long long)hs.timestamp, + (unsigned long long)wait_start);
while (1) { error = delta_lease_leader_read(task, &host_id_disk, @@ -1104,9 +1112,14 @@ int paxos_lease_acquire(struct task *task, &host_id_leader, "paxos_acquire"); if (error < 0) { - log_errot(token, "paxos_acquire host_id %llu read %d", + log_errot(token, "paxos_acquire owner %llu %llu %llu " + "delta read %d fd %d path %s off %llu ss %u", (unsigned long long)cur_leader.owner_id, - error); + (unsigned long long)cur_leader.owner_generation, + (unsigned long long)cur_leader.timestamp, + error, host_id_disk.fd, host_id_disk.path, + (unsigned long long)host_id_disk.offset, + host_id_disk.sector_size); goto out; }
@@ -1116,62 +1129,82 @@ int paxos_lease_acquire(struct task *task, and acquiring cannot take less than host_dead_seconds */
if (host_id_leader.timestamp == LEASE_FREE) { - log_token(token, "paxos_acquire host_id %llu free", + log_token(token, "paxos_acquire owner %llu delta free", (unsigned long long)cur_leader.owner_id); goto run; }
/* another host has acquired the host_id of the host that owned this paxos lease; acquiring a host_id also cannot be - done in less than host_dead_seconds */ - - if (host_id_leader.owner_id != cur_leader.owner_id) { - log_token(token, "paxos_acquire host_id %llu owner %llu", - (unsigned long long)cur_leader.owner_id, - (unsigned long long)host_id_leader.owner_id); - goto run; - } + done in less than host_dead_seconds, or
- /* the host_id that owns this lease may be alive, but it + the host_id that owns this lease may be alive, but it owned the lease in a previous generation without freeing it, and no longer owns it */
- if (host_id_leader.owner_generation > cur_leader.owner_generation) { - log_token(token, "paxos_acquire host_id %llu " - "generation now %llu old %llu", + if (host_id_leader.owner_id != cur_leader.owner_id || + host_id_leader.owner_generation > cur_leader.owner_generation) { + log_errot(token, "paxos_acquire owner %llu %llu %llu " + "delta %llu %llu %llu mismatch", (unsigned long long)cur_leader.owner_id, + (unsigned long long)cur_leader.owner_generation, + (unsigned long long)cur_leader.timestamp, + (unsigned long long)host_id_leader.owner_id, (unsigned long long)host_id_leader.owner_generation, - (unsigned long long)cur_leader.owner_generation); + (unsigned long long)host_id_leader.timestamp); goto run; }
- /* if the owner hasn't renewed its host_id lease for - host_dead_seconds then its watchdog should have fired - by now */ - - if (monotime() - start > task->host_dead_seconds) { - log_token(token, "paxos_acquire host_id %llu expired %llu", - (unsigned long long)cur_leader.owner_id, - (unsigned long long)host_id_leader.timestamp); - goto run; + if (!last_timestamp) { + last_timestamp = host_id_leader.timestamp; + goto skip_live_check; }
/* the owner is renewing its host_id so it's alive */
- if (last_timestamp && (host_id_leader.timestamp != last_timestamp)) { + if (host_id_leader.timestamp != last_timestamp) { if (flags & PAXOS_ACQUIRE_QUIET_FAIL) { - log_token(token, "paxos_acquire host_id %llu alive", - (unsigned long long)cur_leader.owner_id); + log_token(token, "paxos_acquire owner %llu " + "delta %llu %llu %llu alive", + (unsigned long long)cur_leader.owner_id, + (unsigned long long)host_id_leader.owner_id, + (unsigned long long)host_id_leader.owner_generation, + (unsigned long long)host_id_leader.timestamp); } else { - log_errot(token, "paxos_acquire host_id %llu alive", - (unsigned long long)cur_leader.owner_id); + log_errot(token, "paxos_acquire owner %llu " + "delta %llu %llu %llu alive", + (unsigned long long)cur_leader.owner_id, + (unsigned long long)host_id_leader.owner_id, + (unsigned long long)host_id_leader.owner_generation, + (unsigned long long)host_id_leader.timestamp); } error = SANLK_ACQUIRE_IDLIVE; goto out; }
- last_timestamp = host_id_leader.timestamp;
+ /* if the owner hasn't renewed its host_id lease for + host_dead_seconds then its watchdog should have fired + by now */ + + now = monotime(); + + if (now - wait_start > task->host_dead_seconds) { + log_token(token, "paxos_acquire owner %llu %llu %llu " + "delta %llu %llu %llu dead %llu-%llu>%d", + (unsigned long long)cur_leader.owner_id, + (unsigned long long)cur_leader.owner_generation, + (unsigned long long)cur_leader.timestamp, + (unsigned long long)host_id_leader.owner_id, + (unsigned long long)host_id_leader.owner_generation, + (unsigned long long)host_id_leader.timestamp, + (unsigned long long)now, + (unsigned long long)wait_start, + task->host_dead_seconds); + goto run; + } + + skip_live_check: /* TODO: test with sleep(2) here */ sleep(1);
commit af80a0f1ef076010ad98c9fb57e9188918269d61 Author: David Teigland teigland@redhat.com Date: Thu Sep 1 17:28:32 2011 -0500
sanlock: renaming functions
get_space_info() -> lockspace_info() struct host_info -> struct host_status
diff --git a/src/host_id.c b/src/host_id.c index badccd9..ecab8fd 100644 --- a/src/host_id.c +++ b/src/host_id.c @@ -123,7 +123,7 @@ static struct space *_search_space(char *name, return NULL; }
-int _get_space_info(char *space_name, struct space *sp_out) +int _lockspace_info(char *space_name, struct space *sp_out) { struct space *sp;
@@ -136,35 +136,24 @@ int _get_space_info(char *space_name, struct space *sp_out) return -1; }
-int get_space_info(char *space_name, struct space *sp_out) +int lockspace_info(char *space_name, struct space *sp_out) { int rv;
pthread_mutex_lock(&spaces_mutex); - rv = _get_space_info(space_name, sp_out); + rv = _lockspace_info(space_name, sp_out); pthread_mutex_unlock(&spaces_mutex);
return rv; }
-void block_watchdog_updates(char *space_name) -{ - struct space *sp; - - pthread_mutex_lock(&spaces_mutex); - sp = _search_space(space_name, NULL, 0, &spaces, NULL, NULL); - if (sp) - sp->block_watchdog_updates = 1; - pthread_mutex_unlock(&spaces_mutex); -} - -int host_id_disk_info(char *name, struct sync_disk *disk) +int lockspace_disk(char *space_name, struct sync_disk *disk) { struct space space; int rv;
pthread_mutex_lock(&spaces_mutex); - rv = _get_space_info(name, &space); + rv = _lockspace_info(space_name, &space); if (!rv) { memcpy(disk, &space.host_id_disk, sizeof(struct sync_disk)); disk->fd = -1; @@ -174,6 +163,17 @@ int host_id_disk_info(char *name, struct sync_disk *disk) return rv; }
+void block_watchdog_updates(char *space_name) +{ + struct space *sp; + + pthread_mutex_lock(&spaces_mutex); + sp = _search_space(space_name, NULL, 0, &spaces, NULL, NULL); + if (sp) + sp->block_watchdog_updates = 1; + pthread_mutex_unlock(&spaces_mutex); +} + #if 0 static void clear_bit(int host_id, char *bitmap) { @@ -210,7 +210,7 @@ int test_id_bit(int host_id, char *bitmap) return (*byte & mask); }
-int host_info_set_bit(char *space_name, uint64_t host_id) +int host_status_set_bit(char *space_name, uint64_t host_id) { struct space *sp; int found = 0; @@ -231,7 +231,7 @@ int host_info_set_bit(char *space_name, uint64_t host_id) return -ENOSPC;
pthread_mutex_lock(&sp->mutex); - sp->host_info[host_id-1].set_bit_time = monotime(); + sp->host_status[host_id-1].set_bit_time = monotime(); pthread_mutex_unlock(&sp->mutex); return 0; } @@ -249,12 +249,12 @@ static void create_bitmap(struct task *task, struct space *sp, char *bitmap) if (i+1 == sp->host_id) continue;
- if (!sp->host_info[i].set_bit_time) + if (!sp->host_status[i].set_bit_time) continue;
- if (now - sp->host_info[i].set_bit_time > task->request_finish_seconds) { + if (now - sp->host_status[i].set_bit_time > task->request_finish_seconds) { log_space(sp, "bitmap clear host_id %d", i+1); - sp->host_info[i].set_bit_time = 0; + sp->host_status[i].set_bit_time = 0; } else { set_id_bit(i+1, bitmap, &c); log_space(sp, "bitmap set host_id %d byte %x", i+1, c); @@ -278,7 +278,7 @@ void check_other_leases(struct task *task, struct space *sp, char *buf) { struct leader_record *leader; struct sync_disk *disk; - struct host_info *info; + struct host_status *hs; char *bitmap; uint64_t now; int i, new; @@ -292,21 +292,21 @@ void check_other_leases(struct task *task, struct space *sp, char *buf) if (i+1 == sp->host_id) continue;
- info = &sp->host_info[i]; - info->last_check = now; + hs = &sp->host_status[i]; + hs->last_check = now;
leader = (struct leader_record *)(buf + (i * disk->sector_size));
- if (info->owner_id == leader->owner_id && - info->owner_generation == leader->owner_generation && - info->timestamp == leader->timestamp) { + if (hs->owner_id == leader->owner_id && + hs->owner_generation == leader->owner_generation && + hs->timestamp == leader->timestamp) { continue; }
- info->owner_id = leader->owner_id; - info->owner_generation = leader->owner_generation; - info->timestamp = leader->timestamp; - info->last_live = now; + hs->owner_id = leader->owner_id; + hs->owner_generation = leader->owner_generation; + hs->timestamp = leader->timestamp; + hs->last_live = now;
bitmap = (char *)leader + HOSTID_BITMAP_OFFSET;
@@ -316,11 +316,11 @@ void check_other_leases(struct task *task, struct space *sp, char *buf) /* this host has made a request for us, we won't take a new request from this host for another request_finish_seconds */
- if (now - info->last_req < task->request_finish_seconds) + if (now - hs->last_req < task->request_finish_seconds) continue;
log_space(sp, "request from host_id %d", i+1); - info->last_req = now; + hs->last_req = now; new = 1; }
diff --git a/src/host_id.h b/src/host_id.h index ea5c225..5fab69d 100644 --- a/src/host_id.h +++ b/src/host_id.h @@ -10,12 +10,12 @@ #define __HOST_ID__H__
int print_space_state(struct space *sp, char *str); -int _get_space_info(char *space_name, struct space *sp_out); -int get_space_info(char *space_name, struct space *sp_out); +int _lockspace_info(char *space_name, struct space *sp_out); +int lockspace_info(char *space_name, struct space *sp_out); +int lockspace_disk(char *space_name, struct sync_disk *disk); void block_watchdog_updates(char *space_name); -int host_id_disk_info(char *name, struct sync_disk *disk); -int host_info_set_bit(char *space_name, uint64_t host_id); -int host_info_clear_bit(char *space_name, uint64_t host_id); +int host_status_set_bit(char *space_name, uint64_t host_id); +int host_status_clear_bit(char *space_name, uint64_t host_id); int test_id_bit(int host_id, char *bitmap); int check_our_lease(struct task *task, struct space *sp, int *check_all, char *check_buf); void check_other_leases(struct task *task, struct space *sp, char *buf); diff --git a/src/main.c b/src/main.c index ef98a45..f30960b 100644 --- a/src/main.c +++ b/src/main.c @@ -734,7 +734,7 @@ static int check_new_tokens_space(struct client *cl, for (i = 0; i < new_tokens_count; i++) { token = new_tokens[i];
- rv = _get_space_info(token->r.lockspace_name, &space); + rv = _lockspace_info(token->r.lockspace_name, &space);
if (!rv && !space.killing_pids && space.host_id == token->host_id) continue; @@ -928,7 +928,7 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca)
for (i = 0; i < new_tokens_count; i++) { token = new_tokens[i]; - rv = get_space_info(token->r.lockspace_name, &space); + rv = lockspace_info(token->r.lockspace_name, &space); if (rv < 0 || space.killing_pids) { log_errot(token, "cmd_acquire %d,%d,%d invalid lockspace " "found %d failed %d name %.48s", @@ -1429,7 +1429,7 @@ static void cmd_request(struct task *task, struct cmd_args *ca) goto reply;
if (owner_id) - host_info_set_bit(token->r.lockspace_name, owner_id); + host_status_set_bit(token->r.lockspace_name, owner_id); reply: free(token); log_debug("cmd_request %d,%d done %d", ca->ci_in, fd, result); diff --git a/src/paxos_lease.c b/src/paxos_lease.c index 1d2edf0..7323196 100644 --- a/src/paxos_lease.c +++ b/src/paxos_lease.c @@ -1070,7 +1070,7 @@ int paxos_lease_acquire(struct task *task, if (!disk_open) { memset(&host_id_disk, 0, sizeof(host_id_disk));
- rv = host_id_disk_info(cur_leader.space_name, &host_id_disk); + rv = lockspace_disk(cur_leader.space_name, &host_id_disk); if (rv < 0) { log_errot(token, "paxos_acquire no lockspace info %.48s", cur_leader.space_name); diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h index 3d78a0c..70c696d 100644 --- a/src/sanlock_internal.h +++ b/src/sanlock_internal.h @@ -103,7 +103,7 @@ struct lease_status { char *renewal_read_buf; };
-struct host_info { +struct host_status { uint64_t last_check; /* local monotime */ uint64_t last_live; /* local monotime */ uint64_t last_req; /* local monotime */ @@ -126,11 +126,11 @@ struct space { int external_remove; int block_watchdog_updates; int thread_stop; + int wd_fd; pthread_t thread; pthread_mutex_t mutex; /* protects lease_status, thread_stop */ struct lease_status lease_status; - int wd_fd; - struct host_info host_info[DEFAULT_MAX_HOSTS]; + struct host_status host_status[DEFAULT_MAX_HOSTS]; };
/*
commit b6e7633e8c5a276be7acd8da5e9188c93bd18ce2 Author: David Teigland teigland@redhat.com Date: Thu Sep 1 17:10:00 2011 -0500
sanlock: remove old comment
We don't use real time timestamps any longer, only monotonic timestamps. The comment about comparing real timestamps among nodes being a problem due to out of sync date/time settings is not relevant. monotonic timestamp comparisons among nodes is even more impossible.
diff --git a/src/paxos_lease.c b/src/paxos_lease.c index b5a7282..1d2edf0 100644 --- a/src/paxos_lease.c +++ b/src/paxos_lease.c @@ -1147,16 +1147,7 @@ int paxos_lease_acquire(struct task *task,
/* if the owner hasn't renewed its host_id lease for host_dead_seconds then its watchdog should have fired - by now - - if we trust that the clocks are in sync among hosts, then this - check could be: if (time() - host_id_leader.timestamp > - task->host_dead_seconds), but if the clocks are out of sync, - this check would easily give two hosts the lease. - - N.B. we need to be careful about ever comparing local time - to a time value we read off disk from another node that may - have different time. */ + by now */
if (monotime() - start > task->host_dead_seconds) { log_token(token, "paxos_acquire host_id %llu expired %llu", @@ -1164,14 +1155,6 @@ int paxos_lease_acquire(struct task *task, (unsigned long long)host_id_leader.timestamp); goto run; } -#if 0 - if (time(NULL) - host_id_leader.timestamp > task->host_dead_seconds) { - log_token(token, "paxos_acquire host_id %llu expired %llu", - (unsigned long long)cur_leader.owner_id, - (unsigned long long)host_id_leader.timestamp); - goto run; - } -#endif
/* the owner is renewing its host_id so it's alive */
commit 5859f32c8503e21db9bece4024341b4111dbc1c9 Author: David Teigland teigland@redhat.com Date: Thu Sep 1 16:30:40 2011 -0500
sanlock: update man page
diff --git a/src/sanlock.8 b/src/sanlock.8 index 6a9787a..f004159 100644 --- a/src/sanlock.8 +++ b/src/sanlock.8 @@ -257,6 +257,18 @@ Print the sanlock daemon internal debug log. Tell the sanlock daemon to exit. Any registered processes will be killed, their resource leases released, and lockspaces removed.
+.BR "sanlock client init -s" " LOCKSPACE" +.br +.BR "sanlock client init -r" " RESOURCE" + +Tell the sanlock daemon to initialize storage for lease areas. +(See sanlock direct init.) + +.BR "sanlock client align -s" " LOCKSPACE" + +Tell the sanlock daemon to report the required lease alignment for a storage +path. Only path is used from the LOCKSPACE argument. + .BR "sanlock client add_lockspace -s" " LOCKSPACE"
Tell the sanlock daemon to acquire the specified host_id in the lockspace. @@ -292,6 +304,23 @@ Print the resource leases held the given pid. The format is a versioned RESOURCE string "RESOURCE:lver" where lver is the version of the lease held.
+.BR "sanlock client request -r" " RESOURCE " \ +\fB-f\fP " " \fIforce_mode\fP + +Request the owner of a resource do something specified by force_mode. A +versioned RESOURCE:lver string must be used with a greater version than is +presently held. Zero lver and force_mode clears the request. + +.BR "sanlock client examine -r" " RESOURCE" + +Examine the request record for the currently held resource lease and carry +out the action specified by the requested force_mode. + +.BR "sanlock client examine -s" " LOCKSPACE" + +Examine requests for all resource leases currently held in the named +lockspace. Only lockspace_name is used from the LOCKSPACE argument. + .B "sanlock direct" .I action [options] @@ -353,7 +382,9 @@ sanlock daemon's conclusion unless the configured timeouts match.) .BI "sanlock direct dump" " path" \ \fR[\fP\fB:\fP\fIoffset\fP\fR]\fP
-Read disk sectors and print leader records for delta or paxos leases. +Read disk sectors and print leader records for delta or paxos leases. Add +-f 1 to print the request record values for paxos leases, and host_ids set +in delta lease bitmaps.
.SS LOCKSPACE option string @@ -381,8 +412,13 @@ RESOURCE option string .IR path " path to storage reserved for leases" .br .IR offset " offset on path (bytes)" -.br -.IR lver " optional leader version" + +.SS +RESOURCE option string with version + +.BR -r " " \fIlockspace_name\fP:\fIresource_name\fP:\fIpath\fP:\fIoffset\fP:\fIlver\fP +.P +.IR lver " leader version"
.SS Defaults
@@ -392,6 +428,60 @@ shows the default values for the options above. .B sanlock version shows the build version.
+.SH USAGE + +.SS Request/Examine + +The first part of making a request for a resource is writing the request +record of the resource (the sector following the leader record). To make +a successful request: +.IP (bu 3 +RESOURCE:lver must be greater than the lver presently held by the other +host. This implies the leader record must be read to discover the lver, +prior to making a request. +.IP (bu 3 +RESOURCE:lver must be greater than or equal to the lver presently +written to the request record. Two hosts may write a new request at the +same time for the same lver, in which case both would succeed, but the +force_mode from the last would win. +.IP (bu 3 +The force_mode must be greater than zero. +.IP (bu 3 +To unconditionally clear the request record (set both lver and +force_mode to 0), make request with RESOURCE:0 and force_mode 0. +.PP + +The owner of the requested resource will not know of the request unless it +is explicitly told to examine its resources via the "examine" api/command, +or otherwise notfied. + +The second part of making a request is notifying the resource lease owner +that it should examine the request records of its resource leases. The +notification will cause the lease owner to automatically run the +equivalent of "sanlock client examine -s LOCKSPACE" for the lockspace of +the requested resource. + +The notification is made using a bitmap in each host_id delta lease. Each +bit represents each of the possible host_ids (1-2000). If host A wants to +notify host B to examine its resources, A sets the bit in its own bitmap +that corresponds to the host_id of B. When B next renews its delta lease, +it reads the delta leases for all hosts and checks each bitmap to see if +its own host_id has been set. It finds the bit for its own host_id set in +A's bitmap, and examines its resource request records. (The bit remains +set in A's bitmap for request_finish_seconds.) + +\fIforce_mode\fP determines the action the resource lease owner should +take: + +\fB1\fP (KILL_PID): kill the process holding the resource lease. When the +process has exited, the resource lease will be released, and can then be +acquired by anyone. + +\fB2\fP (BLOCK_WD): stop updating the watchdog (/dev/watchdog keepalive +via wdmd_test_live) for the lockspace, which will lead to /dev/watchdog +firing and reseting the host. The resource lease can be acquired after +the timeout for a failed host. + .SH SEE ALSO .BR wdmd (8)
sanlock-devel@lists.fedorahosted.org