src/client_admin.c | 16 ++++++++-------- src/client_msg.c | 14 +++++++------- src/client_resource.c | 24 ++++++++++++------------ src/diskio.c | 16 ++++++++-------- src/host_id.c | 16 ++++++++-------- src/main.c | 2 +- src/paxos_lease.c | 38 +++++++++++++++++--------------------- src/watchdog.c | 4 +++- 8 files changed, 64 insertions(+), 66 deletions(-)
New commits: commit 67679d023967542150f66f8a2758810e05593cf6 Author: David Teigland teigland@redhat.com Date: Tue Mar 15 16:23:38 2011 -0500
sanlock: various fixes
- when freeing a disk lease in paxos_lease_release, use the proper paxos_lease_leader_read so we don't try to free the lease if everything isn't ok (e.g. we were trying to free it even after failing to read it.)
- quit doing "return -errno;" everwhere. There were some places where it resulted in returning 0 when we shouldn't, (e.g. when recv returns 0 after recving nothing when the conection was closed), which can be very bad, (e.g. from sanlock_acquire(), indicating the lease is acquired when it isn't.)
- I think finally properly synchronize unlink_watchdog_file and update_watchdog_file
diff --git a/src/client_admin.c b/src/client_admin.c index acd9dba..e801694 100644 --- a/src/client_admin.c +++ b/src/client_admin.c @@ -45,7 +45,7 @@ int sanlock_shutdown(void)
rv = recv(fd, &h, sizeof(h), MSG_WAITALL); if (rv != sizeof(h)) - rv = -errno; + rv = -1; else rv = 0;
@@ -67,7 +67,7 @@ int sanlock_log_dump(void)
rv = recv(fd, &h, sizeof(h), MSG_WAITALL); if (rv != sizeof(h)) { - rv = -errno; + rv = -1; goto out; }
@@ -82,7 +82,7 @@ int sanlock_log_dump(void)
rv = recv(fd, buf, len, MSG_WAITALL); if (rv != len) { - rv = -errno; + rv = -1; goto out; }
@@ -178,7 +178,7 @@ int sanlock_status(int debug)
rv = recv(fd, &h, sizeof(h), MSG_WAITALL); if (rv != sizeof(h)) - return -errno; + return -1;
while (1) { @@ -186,12 +186,12 @@ int sanlock_status(int debug) if (!rv) break; if (rv != sizeof(st)) - return -errno; + return -1;
if (st.str_len) { rv = recv(fd, str, st.str_len, MSG_WAITALL); if (rv != st.str_len) - return -errno; + return -1; }
switch (st.type) { @@ -228,7 +228,7 @@ static int cmd_lockspace(int cmd, struct sanlk_lockspace *ls, uint32_t flags)
rv = send(fd, (void *)ls, sizeof(struct sanlk_lockspace), 0); if (rv < 0) { - rv = -errno; + rv = -1; goto out; }
@@ -236,7 +236,7 @@ static int cmd_lockspace(int cmd, struct sanlk_lockspace *ls, uint32_t flags)
rv = recv(fd, &h, sizeof(struct sm_header), MSG_WAITALL); if (rv != sizeof(h)) { - rv = -errno; + rv = -1; goto out; }
diff --git a/src/client_msg.c b/src/client_msg.c index 5dc6673..d752af6 100644 --- a/src/client_msg.c +++ b/src/client_msg.c @@ -50,7 +50,7 @@ int setup_listener_socket(int *listener_socket)
s = socket(AF_LOCAL, SOCK_STREAM, 0); if (s < 0) - return -errno; + return -1;
rv = get_socket_address(&addr); if (rv < 0) @@ -59,21 +59,21 @@ int setup_listener_socket(int *listener_socket) unlink(addr.sun_path); rv = bind(s, (struct sockaddr *) &addr, sizeof(struct sockaddr_un)); if (rv < 0) { - rv = -errno; + rv = -1; close(s); return rv; }
rv = listen(s, 5); if (rv < 0) { - rv = -errno; + rv = -1; close(s); return rv; }
rv = fchmod(s, 666); if (rv < 0) { - rv = -errno; + rv = -1; close(s); return rv; } @@ -88,7 +88,7 @@ int connect_socket(int *sock_fd)
s = socket(AF_LOCAL, SOCK_STREAM, 0); if (s < 0) - return -errno; + return -1;
rv = get_socket_address(&addr); if (rv < 0) @@ -96,7 +96,7 @@ int connect_socket(int *sock_fd)
rv = connect(s, (struct sockaddr *) &addr, sizeof(struct sockaddr_un)); if (rv < 0) { - rv = -errno; + rv = -1; close(s); return rv; } @@ -118,7 +118,7 @@ int send_header(int sock, int cmd, int datalen, uint32_t data, uint32_t data2)
rv = send(sock, (void *) &header, sizeof(struct sm_header), 0); if (rv < 0) - return -errno; + return -1;
return 0; } diff --git a/src/client_resource.c b/src/client_resource.c index ab17b7e..ef19326 100644 --- a/src/client_resource.c +++ b/src/client_resource.c @@ -104,27 +104,27 @@ int sanlock_acquire(int sock, int pid, int res_count, res = res_args[i]; rv = send(fd, res, sizeof(struct sanlk_resource), 0); if (rv < 0) { - rv = -errno; + rv = -1; goto out; }
rv = send(fd, res->disks, sizeof(struct sanlk_disk) * res->num_disks, 0); if (rv < 0) { - rv = -errno; + rv = -1; goto out; } }
rv = send(fd, &opt, sizeof(struct sanlk_options), 0); if (rv < 0) { - rv = -errno; + rv = -1; goto out; }
if (opt.len) { rv = send(fd, opt_in->str, opt.len, 0); if (rv < 0) { - rv = -errno; + rv = -1; goto out; } } @@ -133,7 +133,7 @@ int sanlock_acquire(int sock, int pid, int res_count,
rv = recv(fd, &h, sizeof(struct sm_header), MSG_WAITALL); if (rv != sizeof(h)) { - rv = -errno; + rv = -1; goto out; }
@@ -177,7 +177,7 @@ int sanlock_migrate(int sock, int pid, uint64_t target_host_id, char **state)
rv = send(fd, &target_host_id, sizeof(uint64_t), 0); if (rv < 0) { - rv = -errno; + rv = -1; goto out; }
@@ -185,7 +185,7 @@ int sanlock_migrate(int sock, int pid, uint64_t target_host_id, char **state)
rv = recv(fd, &h, sizeof(struct sm_header), MSG_WAITALL); if (rv != sizeof(h)) { - rv = -errno; + rv = -1; goto out; }
@@ -199,7 +199,7 @@ int sanlock_migrate(int sock, int pid, uint64_t target_host_id, char **state) rv = recv(fd, reply_str, len, MSG_WAITALL); if (rv != len) { free(reply_str); - rv = -errno; + rv = -1; goto out; }
@@ -255,7 +255,7 @@ int sanlock_release(int sock, int pid, int res_count, for (i = 0; i < res_count; i++) { rv = send(fd, res_args[i], sizeof(struct sanlk_resource), 0); if (rv < 0) { - rv = -errno; + rv = -1; goto out; } } @@ -265,13 +265,13 @@ int sanlock_release(int sock, int pid, int res_count,
rv = recv(fd, &h, sizeof(struct sm_header), MSG_WAITALL); if (rv != sizeof(h)) { - rv = -errno; + rv = -1; goto out; }
rv = recv(fd, &results, sizeof(int) * res_count, MSG_WAITALL); if (rv != sizeof(int) * res_count) { - rv = -errno; + rv = -1; goto out; }
@@ -317,7 +317,7 @@ int sanlock_setowner(int sock, int pid)
rv = recv(fd, &h, sizeof(struct sm_header), MSG_WAITALL); if (rv != sizeof(h)) { - rv = -errno; + rv = -1; goto out; }
diff --git a/src/diskio.c b/src/diskio.c index a1bf92c..16b615a 100644 --- a/src/diskio.c +++ b/src/diskio.c @@ -149,14 +149,14 @@ static int do_write(int fd, uint64_t offset, const char *buf, int len)
ret = lseek(fd, offset, SEEK_SET); if (ret != offset) - return -errno; + return -1;
retry: rv = write(fd, buf + pos, len); if (rv == -1 && errno == EINTR) goto retry; if (rv < 0) - return -errno; + return -1;
/* if (rv != len && len == sector_size) return error? partial sector writes should not happen AFAIK, and @@ -178,7 +178,7 @@ static int do_read(int fd, uint64_t offset, char *buf, int len)
ret = lseek(fd, offset, SEEK_SET); if (ret != offset) - return -errno; + return -1;
while (pos < len) { rv = read(fd, buf + pos, len - pos); @@ -187,7 +187,7 @@ static int do_read(int fd, uint64_t offset, char *buf, int len) if (rv == -1 && errno == EINTR) continue; if (rv < 0) - return -errno; + return -1; pos += rv; }
@@ -215,7 +215,7 @@ static int do_write_aio(int fd, uint64_t offset, char *buf, int len,
rv = aio_write(&cb); if (rv < 0) - return -errno; + return -1;
rv = aio_suspend(&p_cb, 1, &ts); if (!rv) @@ -225,7 +225,7 @@ static int do_write_aio(int fd, uint64_t offset, char *buf, int len,
rv = aio_cancel(fd, &cb); if (rv < 0) - return -errno; + return -1;
if (rv == AIO_ALLDONE) return 0; @@ -264,7 +264,7 @@ static int do_read_aio(int fd, uint64_t offset, char *buf, int len, int io_timeo
rv = aio_read(&cb); if (rv < 0) - return -errno; + return -1;
rv = aio_suspend(&p_cb, 1, &ts); if (!rv) @@ -274,7 +274,7 @@ static int do_read_aio(int fd, uint64_t offset, char *buf, int len, int io_timeo
rv = aio_cancel(fd, &cb); if (rv < 0) - return -errno; + return -1;
if (rv == AIO_ALLDONE) return 0; diff --git a/src/host_id.c b/src/host_id.c index f6f7b83..b3dc51f 100644 --- a/src/host_id.c +++ b/src/host_id.c @@ -235,6 +235,7 @@ static void *host_id_thread(void *arg_in)
result = delta_lease_renew(sp, &sp->host_id_disk, sp->space_name, our_host_id, sp->host_id, &leader); + dl_result = result; t = leader.timestamp;
pthread_mutex_lock(&sp->mutex); @@ -251,20 +252,19 @@ static void *host_id_thread(void *arg_in) sp->lease_status.max_renewal_interval = good_diff; sp->lease_status.max_renewal_time = t; } - } - pthread_mutex_unlock(&sp->mutex);
- if (result < 0) { - log_erros(sp, "host_id %llu renewal error %d last good %llu", - (unsigned long long)sp->host_id, result, - (unsigned long long)sp->lease_status.renewal_good_time); - } else { log_space(sp, "host_id %llu renewal %llu interval %d", (unsigned long long)sp->host_id, (unsigned long long)t, good_diff);
- update_watchdog_file(sp, t); + if (!sp->thread_stop) + update_watchdog_file(sp, t); + } else { + log_erros(sp, "host_id %llu renewal error %d last good %llu", + (unsigned long long)sp->host_id, result, + (unsigned long long)sp->lease_status.renewal_good_time); } + pthread_mutex_unlock(&sp->mutex); }
/* unlink called below to get it done ASAP */ diff --git a/src/main.c b/src/main.c index b80f87f..91c930c 100644 --- a/src/main.c +++ b/src/main.c @@ -397,9 +397,9 @@ static int main_loop(void) log_space(sp, "set thread_stop"); pthread_mutex_lock(&sp->mutex); sp->thread_stop = 1; + unlink_watchdog_file(sp); pthread_cond_broadcast(&sp->cond); pthread_mutex_unlock(&sp->mutex); - unlink_watchdog_file(sp); list_move(&sp->list, &spaces_remove); } else { kill_pids(sp); diff --git a/src/paxos_lease.c b/src/paxos_lease.c index f693f62..9f815d6 100644 --- a/src/paxos_lease.c +++ b/src/paxos_lease.c @@ -854,44 +854,40 @@ int paxos_lease_release(struct token *token, struct leader_record *leader_last, struct leader_record *leader_ret) { - struct leader_record new_leader; - int rv, d; + struct leader_record leader; int error;
- for (d = 0; d < token->num_disks; d++) { - memset(&new_leader, 0, sizeof(struct leader_record)); - - rv = read_leader(&token->disks[d], &new_leader); - if (rv < 0) - continue; + error = paxos_lease_leader_read(token, &leader); + if (error < 0) { + log_errot(token, "release error cannot read leader"); + goto out; + }
- if (memcmp(&new_leader, leader_last, - sizeof(struct leader_record))) { - log_errot(token, "release error leader changed"); - return DP_BAD_LEADER; - } + if (memcmp(&leader, leader_last, sizeof(struct leader_record))) { + log_errot(token, "release error leader changed"); + return DP_BAD_LEADER; }
- if (new_leader.owner_id != token->host_id) { + if (leader.owner_id != token->host_id) { log_errot(token, "release error other owner_id %llu", - (unsigned long long)new_leader.owner_id); + (unsigned long long)leader.owner_id); return DP_OTHER_OWNER; }
- if (new_leader.next_owner_id) { + if (leader.next_owner_id) { log_errot(token, "release error next_owner_id %llu", - (unsigned long long)new_leader.next_owner_id); + (unsigned long long)leader.next_owner_id); return DP_LEADER_MIGRATE; }
- new_leader.timestamp = LEASE_FREE; - new_leader.checksum = leader_checksum(&new_leader); + leader.timestamp = LEASE_FREE; + leader.checksum = leader_checksum(&leader);
- error = write_new_leader(token, &new_leader); + error = write_new_leader(token, &leader); if (error < 0) goto out;
- memcpy(leader_ret, &new_leader, sizeof(struct leader_record)); + memcpy(leader_ret, &leader, sizeof(struct leader_record)); out: return error; } diff --git a/src/watchdog.c b/src/watchdog.c index 563654e..e92ad74 100644 --- a/src/watchdog.c +++ b/src/watchdog.c @@ -102,6 +102,8 @@ void unlink_watchdog_file(struct space *sp) if (!options.use_watchdog) return;
+ log_space(sp, "wdmd_test_live 0 0 to disable"); + rv = wdmd_test_live(sp->wd_fd, 0, 0); if (rv < 0) log_erros(sp, "wdmd_test_live failed %d", rv); @@ -209,7 +211,7 @@ static int do_write(int fd, void *buf, size_t count) if (rv == -1 && errno == EINTR) goto retry; if (rv < 0) { - return -errno; + return -1; }
if (rv != count) {
sanlock-devel@lists.fedorahosted.org