src/direct_lib.c | 11 +--------- src/diskio.c | 20 +++++++++++++++++++ src/diskio.h | 1 src/host_id.c | 41 ++++++++++++++++++--------------------- src/host_id.h | 2 - src/main.c | 17 ++++++++++++++-- src/paxos_lease.c | 51 +++++++++++++++++++++++++++++++++++++++---------- src/paxos_lease.h | 5 +++- src/sanlock_internal.h | 2 + src/sanlock_rv.h | 2 + src/token_manager.c | 6 ++++- 11 files changed, 112 insertions(+), 46 deletions(-)
New commits: commit 4292a8dd2fefd8cb950e77f3b459524bd456e3fa Author: David Teigland teigland@redhat.com Date: Wed Apr 20 17:31:11 2011 -0500
sanlock: fix host_id reads from paxos_acquire
the threads doing paxos_acquire were copying the host_id_disk, including fd's, and reading the host_id disk using those fd's, (in use by the host_id thread) instead of opening the host_id disk themselves to read from. This probably explains why the host_id leader reads sometimes returned the wrong data.
Also add -R and -Q options to enable renewal debugging and quiet acquire failure messages.
diff --git a/src/direct_lib.c b/src/direct_lib.c index 09503cc..2c168ee 100644 --- a/src/direct_lib.c +++ b/src/direct_lib.c @@ -23,16 +23,9 @@ void log_level(int space_id GNUC_UNUSED, int token_id GNUC_UNUSED, { }
-int host_id_leader_read(struct timeout *ti GNUC_UNUSED, - char *space_name GNUC_UNUSED, - uint64_t host_id GNUC_UNUSED, - struct leader_record *leader_ret GNUC_UNUSED); +int host_id_disk_info(char *name GNUC_UNUSED, struct sync_disk *disk GNUC_UNUSED);
-int host_id_leader_read(struct timeout *ti GNUC_UNUSED, - char *space_name GNUC_UNUSED, - uint64_t host_id GNUC_UNUSED, - struct leader_record *leader_ret GNUC_UNUSED) +int host_id_disk_info(char *name GNUC_UNUSED, struct sync_disk *disk GNUC_UNUSED) { return -1; } - diff --git a/src/diskio.c b/src/diskio.c index 78a5fee..d55f40f 100644 --- a/src/diskio.c +++ b/src/diskio.c @@ -74,6 +74,26 @@ void close_disks(struct sync_disk *disks, int num_disks) close(disks[d].fd); }
+int open_disks_fd(struct sync_disk *disks, int num_disks) +{ + struct sync_disk *disk; + int num_opens = 0; + int d, fd; + + for (d = 0; d < num_disks; d++) { + disk = &disks[d]; + fd = open(disk->path, O_RDWR | O_DIRECT | O_SYNC, 0); + if (fd < 0) { + log_error("open error %d %s", fd, disk->path); + continue; + } + + disk->fd = fd; + num_opens++; + } + return num_opens; +} + /* return number of opened disks */
int open_disks(struct sync_disk *disks, int num_disks) diff --git a/src/diskio.h b/src/diskio.h index 24d55a3..3ec3b27 100644 --- a/src/diskio.h +++ b/src/diskio.h @@ -11,6 +11,7 @@
void close_disks(struct sync_disk *disks, int num_disks); int open_disks(struct sync_disk *disks, int num_disks); +int open_disks_fd(struct sync_disk *disks, int num_disks);
int write_sector(const struct sync_disk *disk, uint64_t sector_nr, const char *data, int data_len, int io_timeout_seconds, diff --git a/src/host_id.c b/src/host_id.c index 2ac381b..fcd9b8b 100644 --- a/src/host_id.c +++ b/src/host_id.c @@ -102,23 +102,20 @@ int get_space_info(char *space_name, struct space *sp_out) return rv; }
-int host_id_leader_read(struct timeout *ti, - char *space_name, uint64_t host_id, - struct leader_record *leader_ret) +int host_id_disk_info(char *name, struct sync_disk *disk) { struct space space; int rv;
- rv = get_space_info(space_name, &space); - if (rv < 0) - return rv; - - rv = delta_lease_leader_read(ti, &space.host_id_disk, space_name, - host_id, leader_ret, "host_id"); - if (rv < 0) - return rv; + pthread_mutex_lock(&spaces_mutex); + rv = _get_space_info(name, &space); + if (!rv) { + memcpy(disk, &space.host_id_disk, sizeof(struct sync_disk)); + disk->fd = -1; + } + pthread_mutex_unlock(&spaces_mutex);
- return 0; + return rv; }
/* @@ -146,10 +143,10 @@ int host_id_check(struct space *sp) gap, (unsigned long long)last_success); }
- /* - log_space(sp, "host_id_check good %d %llu", - gap, (unsigned long long)last_success); - */ + if (com.debug_renew > 1) { + log_space(sp, "host_id_check good %d %llu", + gap, (unsigned long long)last_success); + }
return 1; } @@ -248,12 +245,12 @@ static void *host_id_thread(void *arg_in) gap = last_success - sp->lease_status.renewal_last_success; sp->lease_status.renewal_last_success = last_success;
- /* - log_space(sp, "host_id %llu renewed %llu len %d interval %d", - (unsigned long long)host_id, - (unsigned long long)last_success, - delta_length, gap); - */ + if (com.debug_renew) { + log_space(sp, "host_id %llu renewed %llu len %d interval %d", + (unsigned long long)host_id, + (unsigned long long)last_success, + delta_length, gap); + }
if (!sp->thread_stop) update_watchdog_file(sp, last_success); diff --git a/src/host_id.h b/src/host_id.h index ae230c8..f8e551d 100644 --- a/src/host_id.h +++ b/src/host_id.h @@ -12,7 +12,7 @@ int print_space_state(struct space *sp, char *str); int _get_space_info(char *space_name, struct space *sp_out); int get_space_info(char *space_name, struct space *sp_out); -int host_id_leader_read(struct timeout *ti, char *space_name, uint64_t host_id, struct leader_record *leader_ret); +int host_id_disk_info(char *name, struct sync_disk *disk); int host_id_check(struct space *sp); int add_space(struct space *sp); int rem_space(char *name, struct sync_disk *disk, uint64_t host_id); diff --git a/src/main.c b/src/main.c index d5e3067..d30dd00 100644 --- a/src/main.c +++ b/src/main.c @@ -920,8 +920,13 @@ static void *cmd_acquire_thread(void *args_in)
rv = acquire_token(token, acquire_lver, new_num_hosts); if (rv < 0) { - log_errot(token, "cmd_acquire %d,%d,%d paxos_lease %d", - cl_ci, cl_fd, cl_pid, rv); + if (rv == SANLK_LIVE_LEADER && com.quiet_fail) { + log_token(token, "cmd_acquire %d,%d,%d paxos_lease %d", + cl_ci, cl_fd, cl_pid, rv); + } else { + log_errot(token, "cmd_acquire %d,%d,%d paxos_lease %d", + cl_ci, cl_fd, cl_pid, rv); + } result = rv; goto done; } @@ -2273,6 +2278,8 @@ static void print_usage(void) printf("\n"); printf("daemon\n"); printf(" -D debug: no fork and print all logging to stderr\n"); + printf(" -R <num> debug renewal: log debug info about renewals\n"); + printf(" -Q <num> quiet error messages for common lock contention\n"); printf(" -L <level> write logging at level and up to logfile (-1 none)\n"); printf(" -S <level> write logging at level and up to syslog (-1 none)\n"); printf(" -w <num> use watchdog through wdmd (1 yes, 0 no, default %d)\n", DEFAULT_USE_WATCHDOG); @@ -2488,6 +2495,12 @@ static int read_command_line(int argc, char *argv[]) optionarg = argv[i];
switch (optchar) { + case 'Q': + com.quiet_fail = atoi(optionarg); + break; + case 'R': + com.debug_renew = atoi(optionarg); + break; case 'L': log_logfile_priority = atoi(optionarg); break; diff --git a/src/paxos_lease.c b/src/paxos_lease.c index a11e020..d882706 100644 --- a/src/paxos_lease.c +++ b/src/paxos_lease.c @@ -26,6 +26,7 @@ #include "log.h" #include "crc32c.h" #include "host_id.h" +#include "delta_lease.h" #include "paxos_lease.h"
/* @@ -658,7 +659,7 @@ static int write_new_leader(struct timeout *ti, */
int paxos_lease_acquire(struct timeout *ti, - struct token *token, int force, + struct token *token, uint32_t flags, struct leader_record *leader_ret, uint64_t acquire_lver, int new_num_hosts) @@ -666,19 +667,20 @@ int paxos_lease_acquire(struct timeout *ti, struct leader_record prev_leader; struct leader_record new_leader; struct leader_record host_id_leader; + struct sync_disk host_id_disk; struct paxos_dblock dblock; time_t start; uint64_t last_timestamp = 0; - int error; + int error, rv, disk_open = 0;
- log_token(token, "paxos_acquire begin lver %llu force %d", - (unsigned long long)acquire_lver, force); + log_token(token, "paxos_acquire begin lver %llu flags %x", + (unsigned long long)acquire_lver, flags);
error = paxos_lease_leader_read(ti, token, &prev_leader, "paxos_acquire"); if (error < 0) goto out;
- if (force) + if (flags & PAXOS_ACQUIRE_FORCE) goto run;
if (prev_leader.timestamp == LEASE_FREE) { @@ -703,12 +705,31 @@ int paxos_lease_acquire(struct timeout *ti, log_token(token, "paxos_acquire check owner_id %llu", (unsigned long long)prev_leader.owner_id);
+ memset(&host_id_disk, 0, sizeof(host_id_disk)); + + rv = host_id_disk_info(prev_leader.space_name, &host_id_disk); + if (rv < 0) { + log_errot(token, "paxos_acquire no lockspace info %.48s", + prev_leader.space_name); + error = SANLK_BAD_SPACE_NAME; + goto out; + } + + disk_open = open_disks_fd(&host_id_disk, 1); + if (disk_open != 1) { + log_errot(token, "paxos_acquire cannot open host_id_disk"); + error = SANLK_BAD_SPACE_DISK; + goto out; + } + start = time(NULL);
while (1) { - error = host_id_leader_read(ti, prev_leader.space_name, - prev_leader.owner_id, - &host_id_leader); + error = delta_lease_leader_read(ti, &host_id_disk, + prev_leader.space_name, + prev_leader.owner_id, + &host_id_leader, + "paxos_acquire"); if (error < 0) { log_errot(token, "paxos_acquire host_id %llu read %d", (unsigned long long)prev_leader.owner_id, @@ -782,8 +803,13 @@ int paxos_lease_acquire(struct timeout *ti, /* the owner is renewing its host_id so it's alive */
if (last_timestamp && (host_id_leader.timestamp != last_timestamp)) { - log_errot(token, "paxos_acquire host_id %llu alive", - (unsigned long long)prev_leader.owner_id); + if (flags & PAXOS_ACQUIRE_QUIET_FAIL) { + log_token(token, "paxos_acquire host_id %llu alive", + (unsigned long long)prev_leader.owner_id); + } else { + log_errot(token, "paxos_acquire host_id %llu alive", + (unsigned long long)prev_leader.owner_id); + } error = SANLK_LIVE_LEADER; goto out; } @@ -857,8 +883,13 @@ int paxos_lease_acquire(struct timeout *ti, goto out;
memcpy(leader_ret, &new_leader, sizeof(struct leader_record)); + out: log_token(token, "paxos_acquire done %d", error); + + if (disk_open) + close_disks(&host_id_disk, 1); + return error; }
diff --git a/src/paxos_lease.h b/src/paxos_lease.h index b384561..89eca2b 100644 --- a/src/paxos_lease.h +++ b/src/paxos_lease.h @@ -9,12 +9,15 @@ #ifndef __PAXOS_LEASE_H__ #define __PAXOS_LEASE_H__
+#define PAXOS_ACQUIRE_FORCE 0x00000001 +#define PAXOS_ACQUIRE_QUIET_FAIL 0x00000002 + uint32_t leader_checksum(struct leader_record *lr); int majority_disks(struct token *token, int num); int paxos_lease_leader_read(struct timeout *ti, struct token *token, struct leader_record *leader_ret, const char *caller); -int paxos_lease_acquire(struct timeout *ti, struct token *token, int force, +int paxos_lease_acquire(struct timeout *ti, struct token *token, uint32_t flags, struct leader_record *leader_ret, uint64_t acquire_lver, int new_num_hosts); diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h index f2bbbe3..f8c4e2f 100644 --- a/src/sanlock_internal.h +++ b/src/sanlock_internal.h @@ -268,6 +268,8 @@ struct command_line { int type; /* COM_ */ int action; /* ACT_ */ int debug; + int debug_renew; + int quiet_fail; int use_watchdog; int high_priority; int uid; /* -U */ diff --git a/src/sanlock_rv.h b/src/sanlock_rv.h index 53c6c4f..614e08c 100644 --- a/src/sanlock_rv.h +++ b/src/sanlock_rv.h @@ -40,5 +40,7 @@ #define SANLK_REACQUIRE_LVER -227 #define SANLK_BAD_LOCKSPACE -228 #define SANLK_OTHER_OWNER -229 +#define SANLK_BAD_SPACE_NAME -230 +#define SANLK_BAD_SPACE_DISK -231
#endif diff --git a/src/token_manager.c b/src/token_manager.c index 3b19a5d..1d215ff 100644 --- a/src/token_manager.c +++ b/src/token_manager.c @@ -123,8 +123,12 @@ int acquire_token(struct token *token, uint64_t acquire_lver, { struct leader_record leader_ret; int rv; + uint32_t flags = 0;
- rv = paxos_lease_acquire(&to, token, 0, &leader_ret, acquire_lver, + if (com.quiet_fail) + flags |= PAXOS_ACQUIRE_QUIET_FAIL; + + rv = paxos_lease_acquire(&to, token, flags, &leader_ret, acquire_lver, new_num_hosts);
token->acquire_result = rv;
sanlock-devel@lists.fedorahosted.org