This is an automated email from the git hooks/post-receive script.
teigland pushed a commit to branch master
in repository sanlock.
commit ab58c897e547c77721a52c1833cc3865ecaad3b3
Author: David Teigland <teigland(a)redhat.com>
Date: Mon Nov 20 15:00:54 2017 -0600
sanlock: improve log messages in paxos code
A lot of messages related to "interesting" conditions
(which may want later inspection) were being written
to /var/log/messages. Change these to be written only
to sanlock.log or the memory debug buffer.
Log more paxos details so that the behavior of the
algorithm can be more fully reconstructed.
---
src/client.c | 4 +-
src/main.c | 5 +
src/paxos_lease.c | 333 +++++++++++++++++++++++++++++++++++++++----------
src/paxos_lease.h | 1 +
src/resource.c | 3 +
src/sanlock.conf | 3 +
src/sanlock_internal.h | 1 +
7 files changed, 280 insertions(+), 70 deletions(-)
diff --git a/src/client.c b/src/client.c
index 45aa06a..ad78c21 100644
--- a/src/client.c
+++ b/src/client.c
@@ -2014,6 +2014,8 @@ const char *sanlock_strerror(int rv)
return "Lease write error in dblock";
case SANLK_DBLOCK_MBAL:
return "Lease was acquired by another host in current ballot";
+ case SANLK_DBLOCK_LVER:
+ return "Lease was acquired by another host in new ballot";
case SANLK_DBLOCK_CHECKSUM:
return "Lease checksum error in dblock";
case SANLK_LEADER_READ:
@@ -2036,8 +2038,6 @@ const char *sanlock_strerror(int rv)
return "Lease num_hosts is incorrect";
case SANLK_LEADER_CHECKSUM:
return "Lease checksum error in leader";
- case SANLK_ACQUIRE_LVER:
- return "Lease leader version is unmatching";
case SANLK_ACQUIRE_LOCKSPACE:
return "Lease lockspace is not found";
case SANLK_ACQUIRE_IDDISK:
diff --git a/src/main.c b/src/main.c
index 13fec0d..0117183 100644
--- a/src/main.c
+++ b/src/main.c
@@ -2351,6 +2351,10 @@ static void read_config_file(void)
} else if (!strcmp(str, "renewal_history_size")) {
get_val_int(line, &val);
com.renewal_history_size = val;
+
+ } else if (!strcmp(str, "paxos_debug_all")) {
+ get_val_int(line, &val);
+ com.paxos_debug_all = val;
}
}
@@ -3215,6 +3219,7 @@ int main(int argc, char *argv[])
com.renewal_read_extend_sec_set = 0;
com.renewal_read_extend_sec = 0;
com.renewal_history_size = DEFAULT_RENEWAL_HISTORY_SIZE;
+ com.paxos_debug_all = 0;
if (getgrnam("sanlock") && getpwnam("sanlock")) {
com.uname = (char *)"sanlock";
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index 210543f..a1f43e3 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -35,6 +35,22 @@
uint32_t crc32c(uint32_t crc, uint8_t *data, size_t length);
int get_rand(int a, int b);
+/*
+ * BK_DEBUG_SIZE: size of buffer to hold ballot debug info,
+ * this can't be larger than LOG_STR_LEN 512
+ * BK_STR_SIZE: the max length of a dblock string for one host
+ * BK_DEBUG_COUNT: the max number of hosts for which we'll copy
+ * dblock info
+ *
+ * BK_DEBUG_COUNT * BK_STR_SIZE + extra debug text that comes before
+ * the dblock info needs to be less than BK_DEBUG_SIZE.
+ * Be very careful about increasing BK_DEBUG_COUNT because the use
+ * of strncat depends on it.
+ */
+#define BK_DEBUG_SIZE 512
+#define BK_DEBUG_COUNT 4
+#define BK_STR_SIZE 80
+
static uint32_t roundup_power_of_two(uint32_t val)
{
val--;
@@ -427,10 +443,13 @@ static int verify_dblock(struct token *token, struct paxos_dblock
*pd, uint32_t
* host1 fail
*/
-static int run_ballot(struct task *task, struct token *token, int num_hosts,
- uint64_t next_lver, uint64_t our_mbal,
+static int run_ballot(struct task *task, struct token *token, uint32_t flags,
+ int num_hosts, uint64_t next_lver, uint64_t our_mbal,
struct paxos_dblock *dblock_out)
{
+ char bk_debug[BK_DEBUG_SIZE];
+ char bk_str[BK_STR_SIZE];
+ int bk_debug_count;
struct paxos_dblock dblock;
struct paxos_dblock bk_in;
struct paxos_dblock bk_max;
@@ -477,7 +496,7 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
* component is greater than dblock[p].mbal."
*/
- log_token(token, "ballot %llu phase1 mbal %llu",
+ log_token(token, "ballot %llu phase1 write mbal %llu",
(unsigned long long)next_lver,
(unsigned long long)our_mbal);
@@ -491,6 +510,7 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
num_writes = 0;
for (d = 0; d < num_disks; d++) {
+ /* acquire io: write 1 */
rv = write_dblock(task, token, &token->disks[d], token->host_id,
&dblock);
if (rv < 0)
continue;
@@ -504,6 +524,9 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
goto out;
}
+ memset(bk_debug, 0, sizeof(bk_debug));
+ bk_debug_count = 0;
+
num_reads = 0;
for (d = 0; d < num_disks; d++) {
@@ -513,6 +536,7 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
continue;
memset(iobuf[d], 0, iobuf_len);
+ /* acquire io: read 2 */
rv = read_iobuf(disk->fd, disk->offset, iobuf[d], iobuf_len, task,
token->io_timeout, NULL);
if (rv == SANLK_AIO_TIMEOUT)
iobuf[d] = NULL;
@@ -528,6 +552,28 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
paxos_dblock_in(bk_end, &bk_in);
bk = &bk_in;
+ if (bk_in.mbal && ((flags & PAXOS_ACQUIRE_DEBUG_ALL) || (bk_in.lver >=
dblock.lver))) {
+ if (bk_debug_count >= BK_DEBUG_COUNT) {
+ log_token(token, "ballot %llu phase1 read %s",
+ (unsigned long long)next_lver, bk_debug);
+ memset(bk_debug, 0, sizeof(bk_debug));
+ bk_debug_count = 0;
+ }
+
+ memset(bk_str, 0, sizeof(bk_str));
+ snprintf(bk_str, BK_STR_SIZE, "%d:%llu:%llu:%llu:%llu:%llu:%llu:%x,", q,
+ (unsigned long long)bk_in.mbal,
+ (unsigned long long)bk_in.bal,
+ (unsigned long long)bk_in.inp,
+ (unsigned long long)bk_in.inp2,
+ (unsigned long long)bk_in.inp3,
+ (unsigned long long)bk_in.lver,
+ bk_in.flags);
+ bk_str[BK_STR_SIZE-1] = '\0';
+ strncat(bk_debug, bk_str, BK_STR_SIZE-1);
+ bk_debug_count++;
+ }
+
rv = verify_dblock(token, bk, checksum);
if (rv < 0)
continue;
@@ -538,10 +584,26 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
continue;
if (bk->lver > dblock.lver) {
- /* I don't think this should happen */
- log_errot(token, "ballot %llu larger1 lver[%d] %llu",
+ log_level(0, token->token_id, NULL, LOG_WARNING,
+ "ballot %llu abort1 larger lver in bk[%d] %llu:%llu:%llu:%llu:%llu:%llu
"
+ "our dblock %llu:%llu:%llu:%llu:%llu:%llu",
(unsigned long long)next_lver, q,
- (unsigned long long)bk->lver);
+ (unsigned long long)bk->mbal,
+ (unsigned long long)bk->bal,
+ (unsigned long long)bk->inp,
+ (unsigned long long)bk->inp2,
+ (unsigned long long)bk->inp3,
+ (unsigned long long)bk->lver,
+ (unsigned long long)dblock.mbal,
+ (unsigned long long)dblock.bal,
+ (unsigned long long)dblock.inp,
+ (unsigned long long)dblock.inp2,
+ (unsigned long long)dblock.inp3,
+ (unsigned long long)dblock.lver);
+
+ log_token(token, "ballot %llu phase1 read %s",
+ (unsigned long long)next_lver, bk_debug);
+
error = SANLK_DBLOCK_LVER;
goto out;
}
@@ -549,10 +611,26 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
/* see "It aborts the ballot" in comment above */
if (bk->mbal > dblock.mbal) {
- log_errot(token, "ballot %llu abort1 mbal %llu mbal[%d] %llu",
- (unsigned long long)next_lver,
- (unsigned long long)our_mbal, q,
- (unsigned long long)bk->mbal);
+ log_level(0, token->token_id, NULL, LOG_WARNING,
+ "ballot %llu abort1 larger mbal in bk[%d] %llu:%llu:%llu:%llu:%llu:%llu
"
+ "our dblock %llu:%llu:%llu:%llu:%llu:%llu",
+ (unsigned long long)next_lver, q,
+ (unsigned long long)bk->mbal,
+ (unsigned long long)bk->bal,
+ (unsigned long long)bk->inp,
+ (unsigned long long)bk->inp2,
+ (unsigned long long)bk->inp3,
+ (unsigned long long)bk->lver,
+ (unsigned long long)dblock.mbal,
+ (unsigned long long)dblock.bal,
+ (unsigned long long)dblock.inp,
+ (unsigned long long)dblock.inp2,
+ (unsigned long long)dblock.inp3,
+ (unsigned long long)dblock.lver);
+
+ log_token(token, "ballot %llu phase1 read %s",
+ (unsigned long long)next_lver, bk_debug);
+
error = SANLK_DBLOCK_MBAL;
goto out;
}
@@ -576,6 +654,9 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
}
}
+ log_token(token, "ballot %llu phase1 read %s",
+ (unsigned long long)next_lver, bk_debug);
+
if (!majority_disks(num_disks, num_reads)) {
log_errot(token, "ballot %llu dblock read error %d",
(unsigned long long)next_lver, rv);
@@ -614,8 +695,7 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
dblock.checksum = 0; /* set after paxos_dblock_out */
if (bk_max.inp) {
- /* not a problem, but interesting to see, so use log_error */
- log_errot(token, "ballot %llu choose bk_max[%d] lver %llu mbal %llu bal %llu inp
%llu %llu %llu",
+ log_token(token, "ballot %llu choose bk_max[%d] lver %llu mbal %llu bal %llu inp
%llu %llu %llu",
(unsigned long long)next_lver, q_max,
(unsigned long long)bk_max.lver,
(unsigned long long)bk_max.mbal,
@@ -634,7 +714,7 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
phase2 = 1;
- log_token(token, "ballot %llu phase2 bal %llu inp %llu %llu %llu q_max %d",
+ log_token(token, "ballot %llu phase2 write bal %llu inp %llu %llu %llu q_max
%d",
(unsigned long long)dblock.lver,
(unsigned long long)dblock.bal,
(unsigned long long)dblock.inp,
@@ -645,6 +725,7 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
num_writes = 0;
for (d = 0; d < num_disks; d++) {
+ /* acquire io: write 2 */
rv = write_dblock(task, token, &token->disks[d], token->host_id,
&dblock);
if (rv < 0)
continue;
@@ -658,6 +739,9 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
goto out;
}
+ memset(bk_debug, 0, sizeof(bk_debug));
+ bk_debug_count = 0;
+
num_reads = 0;
for (d = 0; d < num_disks; d++) {
@@ -667,6 +751,7 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
continue;
memset(iobuf[d], 0, iobuf_len);
+ /* acquire io: read 3 */
rv = read_iobuf(disk->fd, disk->offset, iobuf[d], iobuf_len, task,
token->io_timeout, NULL);
if (rv == SANLK_AIO_TIMEOUT)
iobuf[d] = NULL;
@@ -682,6 +767,28 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
paxos_dblock_in(bk_end, &bk_in);
bk = &bk_in;
+ if (bk->mbal && ((flags & PAXOS_ACQUIRE_DEBUG_ALL) || (bk->lver
>= dblock.lver))) {
+ if (bk_debug_count >= BK_DEBUG_COUNT) {
+ log_token(token, "ballot %llu phase2 read %s",
+ (unsigned long long)next_lver, bk_debug);
+ memset(bk_debug, 0, sizeof(bk_debug));
+ bk_debug_count = 0;
+ }
+
+ memset(bk_str, 0, sizeof(bk_str));
+ snprintf(bk_str, BK_STR_SIZE, "%d:%llu:%llu:%llu:%llu:%llu:%llu:%x,", q,
+ (unsigned long long)bk->mbal,
+ (unsigned long long)bk->bal,
+ (unsigned long long)bk->inp,
+ (unsigned long long)bk->inp2,
+ (unsigned long long)bk->inp3,
+ (unsigned long long)bk->lver,
+ bk->flags);
+ bk_str[BK_STR_SIZE-1] = '\0';
+ strncat(bk_debug, bk_str, BK_STR_SIZE-1);
+ bk_debug_count++;
+ }
+
rv = verify_dblock(token, bk, checksum);
if (rv < 0)
continue;
@@ -698,22 +805,26 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
* also be caught the the bk->mbal > dblock.mbal condition
* below.
*/
- log_errot(token, "ballot %llu larger2 lver[%d] %llu dblock %llu",
- (unsigned long long)next_lver, q,
- (unsigned long long)bk->lver,
- (unsigned long long)dblock.lver);
- log_errot(token, "ballot %llu larger2 mbal[%d] %llu dblock %llu",
+ log_level(0, token->token_id, NULL, LOG_WARNING,
+ "ballot %llu abort2 larger lver in bk[%d] %llu:%llu:%llu:%llu:%llu:%llu
"
+ "our dblock %llu:%llu:%llu:%llu:%llu:%llu",
(unsigned long long)next_lver, q,
(unsigned long long)bk->mbal,
- (unsigned long long)dblock.mbal);
- log_errot(token, "ballot %llu larger2 inp[%d] %llu %llu %llu dblock %llu %llu
%llu",
- (unsigned long long)next_lver, q,
+ (unsigned long long)bk->bal,
(unsigned long long)bk->inp,
(unsigned long long)bk->inp2,
(unsigned long long)bk->inp3,
+ (unsigned long long)bk->lver,
+ (unsigned long long)dblock.mbal,
+ (unsigned long long)dblock.bal,
(unsigned long long)dblock.inp,
(unsigned long long)dblock.inp2,
- (unsigned long long)dblock.inp3);
+ (unsigned long long)dblock.inp3,
+ (unsigned long long)dblock.lver);
+
+ log_token(token, "ballot %llu phase2 read %s",
+ (unsigned long long)next_lver, bk_debug);
+
error = SANLK_DBLOCK_LVER;
goto out;
}
@@ -721,16 +832,35 @@ static int run_ballot(struct task *task, struct token *token, int
num_hosts,
/* see "It aborts the ballot" in comment above */
if (bk->mbal > dblock.mbal) {
- log_errot(token, "ballot %llu abort2 mbal %llu mbal[%d] %llu",
- (unsigned long long)next_lver,
- (unsigned long long)our_mbal, q,
- (unsigned long long)bk->mbal);
+ log_level(0, token->token_id, NULL, LOG_WARNING,
+ "ballot %llu abort2 larger mbal in bk[%d] %llu:%llu:%llu:%llu:%llu:%llu
"
+ "our dblock %llu:%llu:%llu:%llu:%llu:%llu",
+ (unsigned long long)next_lver, q,
+ (unsigned long long)bk->mbal,
+ (unsigned long long)bk->bal,
+ (unsigned long long)bk->inp,
+ (unsigned long long)bk->inp2,
+ (unsigned long long)bk->inp3,
+ (unsigned long long)bk->lver,
+ (unsigned long long)dblock.mbal,
+ (unsigned long long)dblock.bal,
+ (unsigned long long)dblock.inp,
+ (unsigned long long)dblock.inp2,
+ (unsigned long long)dblock.inp3,
+ (unsigned long long)dblock.lver);
+
+ log_token(token, "ballot %llu phase2 read %s",
+ (unsigned long long)next_lver, bk_debug);
+
error = SANLK_DBLOCK_MBAL;
goto out;
}
}
}
+ log_token(token, "ballot %llu phase2 read %s",
+ (unsigned long long)next_lver, bk_debug);
+
if (!majority_disks(num_disks, num_reads)) {
log_errot(token, "ballot %llu dblock read2 error %d",
(unsigned long long)next_lver, rv);
@@ -1115,13 +1245,18 @@ int paxos_lease_leader_read(struct task *task,
static int _lease_read_one(struct task *task,
struct token *token,
+ uint32_t flags,
struct sync_disk *disk,
struct leader_record *leader_ret,
struct paxos_dblock *our_dblock,
uint64_t *max_mbal,
int *max_q,
- const char *caller)
+ const char *caller,
+ int log_bk_vals)
{
+ char bk_debug[BK_DEBUG_SIZE];
+ char bk_str[BK_STR_SIZE];
+ int bk_debug_count;
struct leader_record leader_end;
struct paxos_dblock our_dblock_end;
struct paxos_dblock bk;
@@ -1162,6 +1297,9 @@ static int _lease_read_one(struct task *task,
if (rv < 0)
goto out;
+ memset(bk_debug, 0, sizeof(bk_debug));
+ bk_debug_count = 0;
+
for (q = 0; q < leader_ret->num_hosts; q++) {
bk_end = (struct paxos_dblock *)(iobuf + ((2 + q) * sector_size));
@@ -1169,6 +1307,29 @@ static int _lease_read_one(struct task *task,
paxos_dblock_in(bk_end, &bk);
+ if (log_bk_vals && bk.mbal &&
+ ((flags & PAXOS_ACQUIRE_DEBUG_ALL) || (bk.lver >= leader_ret->lver))) {
+ if (bk_debug_count >= BK_DEBUG_COUNT) {
+ log_token(token, "leader %llu dblocks %s",
+ (unsigned long long)leader_ret->lver, bk_debug);
+ memset(bk_debug, 0, sizeof(bk_debug));
+ bk_debug_count = 0;
+ }
+
+ memset(bk_str, 0, sizeof(bk_str));
+ snprintf(bk_str, BK_STR_SIZE, "%d:%llu:%llu:%llu:%llu:%llu:%llu:%x,", q,
+ (unsigned long long)bk.mbal,
+ (unsigned long long)bk.bal,
+ (unsigned long long)bk.inp,
+ (unsigned long long)bk.inp2,
+ (unsigned long long)bk.inp3,
+ (unsigned long long)bk.lver,
+ bk.flags);
+ bk_str[BK_STR_SIZE-1] = '\0';
+ strncat(bk_debug, bk_str, BK_STR_SIZE-1);
+ bk_debug_count++;
+ }
+
rv = verify_dblock(token, &bk, checksum);
if (rv < 0)
goto out;
@@ -1181,6 +1342,14 @@ static int _lease_read_one(struct task *task,
*max_mbal = tmp_mbal;
*max_q = tmp_q;
+ if (log_bk_vals)
+ log_token(token, "leader %llu owner %llu %llu %llu dblocks %s",
+ (unsigned long long)leader_ret->lver,
+ (unsigned long long)leader_ret->owner_id,
+ (unsigned long long)leader_ret->owner_generation,
+ (unsigned long long)leader_ret->timestamp,
+ bk_debug);
+
out:
if (rv != SANLK_AIO_TIMEOUT)
free(iobuf);
@@ -1191,6 +1360,7 @@ static int _lease_read_one(struct task *task,
static int _lease_read_num(struct task *task,
struct token *token,
+ uint32_t flags,
struct leader_record *leader_ret,
struct paxos_dblock *our_dblock,
uint64_t *max_mbal,
@@ -1226,8 +1396,8 @@ static int _lease_read_num(struct task *task,
num_reads = 0;
for (d = 0; d < num_disks; d++) {
- rv = _lease_read_one(task, token, &token->disks[d], &leader_one,
- &dblock_one, &mbal_one, &q_one, caller);
+ rv = _lease_read_one(task, token, flags, &token->disks[d], &leader_one,
+ &dblock_one, &mbal_one, &q_one, caller, 0);
if (rv < 0)
continue;
@@ -1295,19 +1465,19 @@ static int _lease_read_num(struct task *task,
* values from it.
*/
-static int paxos_lease_read(struct task *task, struct token *token,
+static int paxos_lease_read(struct task *task, struct token *token, uint32_t flags,
struct leader_record *leader_ret,
- uint64_t *max_mbal, const char *caller)
+ uint64_t *max_mbal, const char *caller, int log_bk_vals)
{
struct paxos_dblock our_dblock;
int rv, q = -1;
if (token->r.num_disks > 1)
- rv = _lease_read_num(task, token,
+ rv = _lease_read_num(task, token, flags,
leader_ret, &our_dblock, max_mbal, &q, caller);
else
- rv = _lease_read_one(task, token, &token->disks[0],
- leader_ret, &our_dblock, max_mbal, &q, caller);
+ rv = _lease_read_one(task, token, flags, &token->disks[0],
+ leader_ret, &our_dblock, max_mbal, &q, caller, log_bk_vals);
if (rv == SANLK_OK)
log_token(token, "%s leader %llu owner %llu %llu %llu max mbal[%d] %llu "
@@ -1415,7 +1585,7 @@ int paxos_lease_acquire(struct task *task,
uint64_t max_mbal;
uint64_t num_mbal;
uint64_t our_mbal;
- int copy_cur_leader = 0;
+ int copy_cur_leader;
int disk_open = 0;
int error, rv, us;
int other_io_timeout, other_host_dead_seconds;
@@ -1426,8 +1596,11 @@ int paxos_lease_acquire(struct task *task,
flags, (unsigned long long)acquire_lver, new_num_hosts);
restart:
+ memset(&tmp_leader, 0, sizeof(tmp_leader));
+ copy_cur_leader = 0;
- error = paxos_lease_read(task, token, &cur_leader, &max_mbal,
"paxos_acquire");
+ /* acquire io: read 1 */
+ error = paxos_lease_read(task, token, flags, &cur_leader, &max_mbal,
"paxos_acquire", 1);
if (error < 0)
goto out;
@@ -1596,21 +1769,11 @@ int paxos_lease_acquire(struct task *task,
if ((host_id_leader.timestamp != last_timestamp) ||
(hs.last_live && (hs.last_check == hs.last_live))) {
- if (flags & PAXOS_ACQUIRE_QUIET_FAIL) {
- log_token(token, "paxos_acquire owner %llu "
- "delta %llu %llu %llu alive",
- (unsigned long long)cur_leader.owner_id,
- (unsigned long long)host_id_leader.owner_id,
- (unsigned long long)host_id_leader.owner_generation,
- (unsigned long long)host_id_leader.timestamp);
- } else {
- log_errot(token, "paxos_acquire owner %llu "
- "delta %llu %llu %llu alive",
- (unsigned long long)cur_leader.owner_id,
- (unsigned long long)host_id_leader.owner_id,
- (unsigned long long)host_id_leader.owner_generation,
- (unsigned long long)host_id_leader.timestamp);
- }
+ log_token(token, "paxos_acquire owner %llu delta %llu %llu %llu alive",
+ (unsigned long long)cur_leader.owner_id,
+ (unsigned long long)host_id_leader.owner_id,
+ (unsigned long long)host_id_leader.owner_generation,
+ (unsigned long long)host_id_leader.timestamp);
memcpy(leader_ret, &cur_leader, sizeof(struct leader_record));
/* It's possible that the live owner has released the
@@ -1629,7 +1792,8 @@ int paxos_lease_acquire(struct task *task,
cur_leader.owner_id, &owner_dblock);
if (!rv && (owner_dblock.flags & DBLOCK_FL_RELEASED)) {
/* not an error, but interesting to see */
- log_errot(token, "paxos_acquire owner %llu %llu %llu writer %llu owner dblock
released",
+ log_level(0, token->token_id, NULL, LOG_WARNING,
+ "paxos_acquire owner %llu %llu %llu writer %llu owner dblock
released",
(unsigned long long)cur_leader.owner_id,
(unsigned long long)cur_leader.owner_generation,
(unsigned long long)cur_leader.timestamp,
@@ -1684,12 +1848,26 @@ int paxos_lease_acquire(struct task *task,
goto out;
}
+ /*
+ * In this while loop we are waiting for an indication that the
+ * current owner is alive or dead, but if we see the leader
+ * owner change in the meantime, we'll restart the entire
+ * process.
+ */
+
error = paxos_lease_leader_read(task, token, &tmp_leader,
"paxos_acquire");
if (error < 0)
goto out;
if (memcmp(&cur_leader, &tmp_leader, sizeof(struct leader_record))) {
- log_token(token, "paxos_acquire restart leader changed");
+ log_token(token, "paxos_acquire restart leader changed1 from "
+ "%llu %llu %llu to %llu %llu %llu",
+ (unsigned long long)cur_leader.owner_id,
+ (unsigned long long)cur_leader.owner_generation,
+ (unsigned long long)cur_leader.timestamp,
+ (unsigned long long)tmp_leader.owner_id,
+ (unsigned long long)tmp_leader.owner_generation,
+ (unsigned long long)tmp_leader.timestamp);
goto restart;
}
}
@@ -1730,6 +1908,7 @@ int paxos_lease_acquire(struct task *task,
copy_cur_leader = 0;
memcpy(&tmp_leader, &cur_leader, sizeof(struct leader_record));
} else {
+ /* acquire io: read 1 (for retry) */
error = paxos_lease_leader_read(task, token, &tmp_leader,
"paxos_acquire");
if (error < 0)
goto out;
@@ -1743,9 +1922,10 @@ int paxos_lease_acquire(struct task *task,
if (tmp_leader.owner_id == token->host_id &&
tmp_leader.owner_generation == token->host_generation) {
- /* not a problem, but interesting to see, so use log_error */
+ /* not a problem, but interesting to see */
- log_errot(token, "paxos_acquire %llu owner is our inp "
+ log_level(0, token->token_id, NULL, LOG_WARNING,
+ "paxos_acquire %llu owner is our inp "
"%llu %llu %llu commited by %llu",
(unsigned long long)next_lver,
(unsigned long long)tmp_leader.owner_id,
@@ -1756,9 +1936,10 @@ int paxos_lease_acquire(struct task *task,
memcpy(leader_ret, &tmp_leader, sizeof(struct leader_record));
error = SANLK_OK;
} else {
- /* not a problem, but interesting to see, so use log_error */
+ /* not a problem, but interesting to see */
- log_errot(token, "paxos_acquire %llu owner is %llu %llu %llu",
+ log_level(0, token->token_id, NULL, LOG_WARNING,
+ "paxos_acquire %llu owner is %llu %llu %llu",
(unsigned long long)next_lver,
(unsigned long long)tmp_leader.owner_id,
(unsigned long long)tmp_leader.owner_generation,
@@ -1778,9 +1959,13 @@ int paxos_lease_acquire(struct task *task,
* next_lver is 65, but the current lver on disk is 66, causing us to
* we fail in the larger1 check.)
*/
- log_token(token, "paxos_acquire stale next_lver %llu now %llu owner %llu %llu
%llu",
+ log_token(token, "paxos_acquire %llu restart new lver %llu from "
+ "%llu %llu %llu to %llu %llu %llu",
(unsigned long long)next_lver,
(unsigned long long)tmp_leader.lver,
+ (unsigned long long)cur_leader.owner_id,
+ (unsigned long long)cur_leader.owner_generation,
+ (unsigned long long)cur_leader.timestamp,
(unsigned long long)tmp_leader.owner_id,
(unsigned long long)tmp_leader.owner_generation,
(unsigned long long)tmp_leader.timestamp);
@@ -1788,21 +1973,26 @@ int paxos_lease_acquire(struct task *task,
}
if (memcmp(&cur_leader, &tmp_leader, sizeof(struct leader_record))) {
- /* I don't think this should ever happen. */
- log_errot(token, "paxos_acquire restart leader changed2");
+ log_token(token, "paxos_acquire %llu restart leader changed2 from "
+ "%llu %llu %llu to %llu %llu %llu",
+ (unsigned long long)next_lver,
+ (unsigned long long)cur_leader.owner_id,
+ (unsigned long long)cur_leader.owner_generation,
+ (unsigned long long)cur_leader.timestamp,
+ (unsigned long long)tmp_leader.owner_id,
+ (unsigned long long)tmp_leader.owner_generation,
+ (unsigned long long)tmp_leader.timestamp);
goto restart;
}
- error = run_ballot(task, token, cur_leader.num_hosts, next_lver, our_mbal,
- &dblock);
+ error = run_ballot(task, token, flags, cur_leader.num_hosts, next_lver, our_mbal,
&dblock);
if ((error == SANLK_DBLOCK_MBAL) || (error == SANLK_DBLOCK_LVER)) {
us = get_rand(0, 1000000);
if (us < 0)
us = token->host_id * 100;
- /* not a problem, but interesting to see, so use log_error */
- log_errot(token, "paxos_acquire %llu retry delay %d us",
+ log_token(token, "paxos_acquire %llu retry delay %d us",
(unsigned long long)next_lver, us);
usleep(us);
@@ -1858,14 +2048,15 @@ int paxos_lease_acquire(struct task *task,
}
if (new_leader.owner_id != token->host_id) {
- /* not a problem, but interesting to see, so use log_error */
+ /* not a problem, but interesting to see */
/* It's possible that we commit an outdated owner id/gen here.
If we go back to the top and retry, we may find that the
owner host_id is alive but with a newer generation, and
we'd be able to get the lease by running the ballot again. */
- log_errot(token, "ballot %llu commit other owner %llu %llu %llu",
+ log_level(0, token->token_id, NULL, LOG_WARNING,
+ "ballot %llu commit other owner %llu %llu %llu",
(unsigned long long)new_leader.lver,
(unsigned long long)new_leader.owner_id,
(unsigned long long)new_leader.owner_generation,
@@ -1999,6 +2190,14 @@ int paxos_lease_release(struct task *task,
return 0;
}
+ /*
+ * When we were the writer of our own leader record, then
+ * releasing the lease includes both setting the REALEASED flag
+ * in our dblock and clearing out timestamp in the leader.
+ * When we reread the leader here in release, we should find
+ * it the same as we last saw in acquire.
+ */
+
if (leader.lver != last->lver) {
log_errot(token, "paxos_release other lver "
"last lver %llu owner %llu %llu %llu writer %llu %llu %llu "
@@ -2038,7 +2237,6 @@ int paxos_lease_release(struct task *task,
(unsigned long long)leader.write_id,
(unsigned long long)leader.write_generation,
(unsigned long long)leader.write_timestamp);
-
return SANLK_RELEASE_OWNER;
}
@@ -2061,7 +2259,6 @@ int paxos_lease_release(struct task *task,
(unsigned long long)leader.write_id,
(unsigned long long)leader.write_generation,
(unsigned long long)leader.write_timestamp);
-
return SANLK_RELEASE_OWNER;
}
diff --git a/src/paxos_lease.h b/src/paxos_lease.h
index cce85ed..e2c2484 100644
--- a/src/paxos_lease.h
+++ b/src/paxos_lease.h
@@ -13,6 +13,7 @@
#define PAXOS_ACQUIRE_QUIET_FAIL 0x00000002
#define PAXOS_ACQUIRE_SHARED 0x00000004
#define PAXOS_ACQUIRE_OWNER_NOWAIT 0x00000008
+#define PAXOS_ACQUIRE_DEBUG_ALL 0x00000010
uint32_t leader_checksum(struct leader_record *lr);
diff --git a/src/resource.c b/src/resource.c
index 81b6d99..af8f91c 100644
--- a/src/resource.c
+++ b/src/resource.c
@@ -654,6 +654,9 @@ static int acquire_disk(struct task *task, struct token *token,
if (com.quiet_fail)
flags |= PAXOS_ACQUIRE_QUIET_FAIL;
+ if (com.paxos_debug_all)
+ flags |= PAXOS_ACQUIRE_DEBUG_ALL;
+
if (token->acquire_flags & SANLK_RES_SHARED)
flags |= PAXOS_ACQUIRE_SHARED;
diff --git a/src/sanlock.conf b/src/sanlock.conf
index af753d1..4debc6b 100644
--- a/src/sanlock.conf
+++ b/src/sanlock.conf
@@ -43,4 +43,7 @@
#
# renewal_read_extend_sec = <seconds>
# command line: n/a
+#
+# paxos_debug_all = 0
+# command line: n/a
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 5d799b9..4cee6c1 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -304,6 +304,7 @@ struct command_line {
int debug;
int debug_renew;
int quiet_fail;
+ int paxos_debug_all;
int wait;
int use_watchdog;
int high_priority; /* -h */
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.