src/client_admin.c | 16 ++++++++--------
src/client_msg.c | 14 +++++++-------
src/client_resource.c | 24 ++++++++++++------------
src/diskio.c | 16 ++++++++--------
src/host_id.c | 16 ++++++++--------
src/main.c | 2 +-
src/paxos_lease.c | 38 +++++++++++++++++---------------------
src/watchdog.c | 4 +++-
8 files changed, 64 insertions(+), 66 deletions(-)
New commits:
commit 67679d023967542150f66f8a2758810e05593cf6
Author: David Teigland <teigland(a)redhat.com>
Date: Tue Mar 15 16:23:38 2011 -0500
sanlock: various fixes
- when freeing a disk lease in paxos_lease_release, use
the proper paxos_lease_leader_read so we don't try to
free the lease if everything isn't ok (e.g. we were
trying to free it even after failing to read it.)
- quit doing "return -errno;" everwhere. There were some
places where it resulted in returning 0 when we shouldn't,
(e.g. when recv returns 0 after recving nothing when the
conection was closed), which can be very bad, (e.g. from
sanlock_acquire(), indicating the lease is acquired when it
isn't.)
- I think finally properly synchronize unlink_watchdog_file and
update_watchdog_file
diff --git a/src/client_admin.c b/src/client_admin.c
index acd9dba..e801694 100644
--- a/src/client_admin.c
+++ b/src/client_admin.c
@@ -45,7 +45,7 @@ int sanlock_shutdown(void)
rv = recv(fd, &h, sizeof(h), MSG_WAITALL);
if (rv != sizeof(h))
- rv = -errno;
+ rv = -1;
else
rv = 0;
@@ -67,7 +67,7 @@ int sanlock_log_dump(void)
rv = recv(fd, &h, sizeof(h), MSG_WAITALL);
if (rv != sizeof(h)) {
- rv = -errno;
+ rv = -1;
goto out;
}
@@ -82,7 +82,7 @@ int sanlock_log_dump(void)
rv = recv(fd, buf, len, MSG_WAITALL);
if (rv != len) {
- rv = -errno;
+ rv = -1;
goto out;
}
@@ -178,7 +178,7 @@ int sanlock_status(int debug)
rv = recv(fd, &h, sizeof(h), MSG_WAITALL);
if (rv != sizeof(h))
- return -errno;
+ return -1;
while (1) {
@@ -186,12 +186,12 @@ int sanlock_status(int debug)
if (!rv)
break;
if (rv != sizeof(st))
- return -errno;
+ return -1;
if (st.str_len) {
rv = recv(fd, str, st.str_len, MSG_WAITALL);
if (rv != st.str_len)
- return -errno;
+ return -1;
}
switch (st.type) {
@@ -228,7 +228,7 @@ static int cmd_lockspace(int cmd, struct sanlk_lockspace *ls, uint32_t
flags)
rv = send(fd, (void *)ls, sizeof(struct sanlk_lockspace), 0);
if (rv < 0) {
- rv = -errno;
+ rv = -1;
goto out;
}
@@ -236,7 +236,7 @@ static int cmd_lockspace(int cmd, struct sanlk_lockspace *ls, uint32_t
flags)
rv = recv(fd, &h, sizeof(struct sm_header), MSG_WAITALL);
if (rv != sizeof(h)) {
- rv = -errno;
+ rv = -1;
goto out;
}
diff --git a/src/client_msg.c b/src/client_msg.c
index 5dc6673..d752af6 100644
--- a/src/client_msg.c
+++ b/src/client_msg.c
@@ -50,7 +50,7 @@ int setup_listener_socket(int *listener_socket)
s = socket(AF_LOCAL, SOCK_STREAM, 0);
if (s < 0)
- return -errno;
+ return -1;
rv = get_socket_address(&addr);
if (rv < 0)
@@ -59,21 +59,21 @@ int setup_listener_socket(int *listener_socket)
unlink(addr.sun_path);
rv = bind(s, (struct sockaddr *) &addr, sizeof(struct sockaddr_un));
if (rv < 0) {
- rv = -errno;
+ rv = -1;
close(s);
return rv;
}
rv = listen(s, 5);
if (rv < 0) {
- rv = -errno;
+ rv = -1;
close(s);
return rv;
}
rv = fchmod(s, 666);
if (rv < 0) {
- rv = -errno;
+ rv = -1;
close(s);
return rv;
}
@@ -88,7 +88,7 @@ int connect_socket(int *sock_fd)
s = socket(AF_LOCAL, SOCK_STREAM, 0);
if (s < 0)
- return -errno;
+ return -1;
rv = get_socket_address(&addr);
if (rv < 0)
@@ -96,7 +96,7 @@ int connect_socket(int *sock_fd)
rv = connect(s, (struct sockaddr *) &addr, sizeof(struct sockaddr_un));
if (rv < 0) {
- rv = -errno;
+ rv = -1;
close(s);
return rv;
}
@@ -118,7 +118,7 @@ int send_header(int sock, int cmd, int datalen, uint32_t data,
uint32_t data2)
rv = send(sock, (void *) &header, sizeof(struct sm_header), 0);
if (rv < 0)
- return -errno;
+ return -1;
return 0;
}
diff --git a/src/client_resource.c b/src/client_resource.c
index ab17b7e..ef19326 100644
--- a/src/client_resource.c
+++ b/src/client_resource.c
@@ -104,27 +104,27 @@ int sanlock_acquire(int sock, int pid, int res_count,
res = res_args[i];
rv = send(fd, res, sizeof(struct sanlk_resource), 0);
if (rv < 0) {
- rv = -errno;
+ rv = -1;
goto out;
}
rv = send(fd, res->disks, sizeof(struct sanlk_disk) * res->num_disks, 0);
if (rv < 0) {
- rv = -errno;
+ rv = -1;
goto out;
}
}
rv = send(fd, &opt, sizeof(struct sanlk_options), 0);
if (rv < 0) {
- rv = -errno;
+ rv = -1;
goto out;
}
if (opt.len) {
rv = send(fd, opt_in->str, opt.len, 0);
if (rv < 0) {
- rv = -errno;
+ rv = -1;
goto out;
}
}
@@ -133,7 +133,7 @@ int sanlock_acquire(int sock, int pid, int res_count,
rv = recv(fd, &h, sizeof(struct sm_header), MSG_WAITALL);
if (rv != sizeof(h)) {
- rv = -errno;
+ rv = -1;
goto out;
}
@@ -177,7 +177,7 @@ int sanlock_migrate(int sock, int pid, uint64_t target_host_id, char
**state)
rv = send(fd, &target_host_id, sizeof(uint64_t), 0);
if (rv < 0) {
- rv = -errno;
+ rv = -1;
goto out;
}
@@ -185,7 +185,7 @@ int sanlock_migrate(int sock, int pid, uint64_t target_host_id, char
**state)
rv = recv(fd, &h, sizeof(struct sm_header), MSG_WAITALL);
if (rv != sizeof(h)) {
- rv = -errno;
+ rv = -1;
goto out;
}
@@ -199,7 +199,7 @@ int sanlock_migrate(int sock, int pid, uint64_t target_host_id, char
**state)
rv = recv(fd, reply_str, len, MSG_WAITALL);
if (rv != len) {
free(reply_str);
- rv = -errno;
+ rv = -1;
goto out;
}
@@ -255,7 +255,7 @@ int sanlock_release(int sock, int pid, int res_count,
for (i = 0; i < res_count; i++) {
rv = send(fd, res_args[i], sizeof(struct sanlk_resource), 0);
if (rv < 0) {
- rv = -errno;
+ rv = -1;
goto out;
}
}
@@ -265,13 +265,13 @@ int sanlock_release(int sock, int pid, int res_count,
rv = recv(fd, &h, sizeof(struct sm_header), MSG_WAITALL);
if (rv != sizeof(h)) {
- rv = -errno;
+ rv = -1;
goto out;
}
rv = recv(fd, &results, sizeof(int) * res_count, MSG_WAITALL);
if (rv != sizeof(int) * res_count) {
- rv = -errno;
+ rv = -1;
goto out;
}
@@ -317,7 +317,7 @@ int sanlock_setowner(int sock, int pid)
rv = recv(fd, &h, sizeof(struct sm_header), MSG_WAITALL);
if (rv != sizeof(h)) {
- rv = -errno;
+ rv = -1;
goto out;
}
diff --git a/src/diskio.c b/src/diskio.c
index a1bf92c..16b615a 100644
--- a/src/diskio.c
+++ b/src/diskio.c
@@ -149,14 +149,14 @@ static int do_write(int fd, uint64_t offset, const char *buf, int
len)
ret = lseek(fd, offset, SEEK_SET);
if (ret != offset)
- return -errno;
+ return -1;
retry:
rv = write(fd, buf + pos, len);
if (rv == -1 && errno == EINTR)
goto retry;
if (rv < 0)
- return -errno;
+ return -1;
/* if (rv != len && len == sector_size) return error?
partial sector writes should not happen AFAIK, and
@@ -178,7 +178,7 @@ static int do_read(int fd, uint64_t offset, char *buf, int len)
ret = lseek(fd, offset, SEEK_SET);
if (ret != offset)
- return -errno;
+ return -1;
while (pos < len) {
rv = read(fd, buf + pos, len - pos);
@@ -187,7 +187,7 @@ static int do_read(int fd, uint64_t offset, char *buf, int len)
if (rv == -1 && errno == EINTR)
continue;
if (rv < 0)
- return -errno;
+ return -1;
pos += rv;
}
@@ -215,7 +215,7 @@ static int do_write_aio(int fd, uint64_t offset, char *buf, int len,
rv = aio_write(&cb);
if (rv < 0)
- return -errno;
+ return -1;
rv = aio_suspend(&p_cb, 1, &ts);
if (!rv)
@@ -225,7 +225,7 @@ static int do_write_aio(int fd, uint64_t offset, char *buf, int len,
rv = aio_cancel(fd, &cb);
if (rv < 0)
- return -errno;
+ return -1;
if (rv == AIO_ALLDONE)
return 0;
@@ -264,7 +264,7 @@ static int do_read_aio(int fd, uint64_t offset, char *buf, int len,
int io_timeo
rv = aio_read(&cb);
if (rv < 0)
- return -errno;
+ return -1;
rv = aio_suspend(&p_cb, 1, &ts);
if (!rv)
@@ -274,7 +274,7 @@ static int do_read_aio(int fd, uint64_t offset, char *buf, int len,
int io_timeo
rv = aio_cancel(fd, &cb);
if (rv < 0)
- return -errno;
+ return -1;
if (rv == AIO_ALLDONE)
return 0;
diff --git a/src/host_id.c b/src/host_id.c
index f6f7b83..b3dc51f 100644
--- a/src/host_id.c
+++ b/src/host_id.c
@@ -235,6 +235,7 @@ static void *host_id_thread(void *arg_in)
result = delta_lease_renew(sp, &sp->host_id_disk, sp->space_name,
our_host_id, sp->host_id, &leader);
+ dl_result = result;
t = leader.timestamp;
pthread_mutex_lock(&sp->mutex);
@@ -251,20 +252,19 @@ static void *host_id_thread(void *arg_in)
sp->lease_status.max_renewal_interval = good_diff;
sp->lease_status.max_renewal_time = t;
}
- }
- pthread_mutex_unlock(&sp->mutex);
- if (result < 0) {
- log_erros(sp, "host_id %llu renewal error %d last good %llu",
- (unsigned long long)sp->host_id, result,
- (unsigned long long)sp->lease_status.renewal_good_time);
- } else {
log_space(sp, "host_id %llu renewal %llu interval %d",
(unsigned long long)sp->host_id,
(unsigned long long)t, good_diff);
- update_watchdog_file(sp, t);
+ if (!sp->thread_stop)
+ update_watchdog_file(sp, t);
+ } else {
+ log_erros(sp, "host_id %llu renewal error %d last good %llu",
+ (unsigned long long)sp->host_id, result,
+ (unsigned long long)sp->lease_status.renewal_good_time);
}
+ pthread_mutex_unlock(&sp->mutex);
}
/* unlink called below to get it done ASAP */
diff --git a/src/main.c b/src/main.c
index b80f87f..91c930c 100644
--- a/src/main.c
+++ b/src/main.c
@@ -397,9 +397,9 @@ static int main_loop(void)
log_space(sp, "set thread_stop");
pthread_mutex_lock(&sp->mutex);
sp->thread_stop = 1;
+ unlink_watchdog_file(sp);
pthread_cond_broadcast(&sp->cond);
pthread_mutex_unlock(&sp->mutex);
- unlink_watchdog_file(sp);
list_move(&sp->list, &spaces_remove);
} else {
kill_pids(sp);
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index f693f62..9f815d6 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -854,44 +854,40 @@ int paxos_lease_release(struct token *token,
struct leader_record *leader_last,
struct leader_record *leader_ret)
{
- struct leader_record new_leader;
- int rv, d;
+ struct leader_record leader;
int error;
- for (d = 0; d < token->num_disks; d++) {
- memset(&new_leader, 0, sizeof(struct leader_record));
-
- rv = read_leader(&token->disks[d], &new_leader);
- if (rv < 0)
- continue;
+ error = paxos_lease_leader_read(token, &leader);
+ if (error < 0) {
+ log_errot(token, "release error cannot read leader");
+ goto out;
+ }
- if (memcmp(&new_leader, leader_last,
- sizeof(struct leader_record))) {
- log_errot(token, "release error leader changed");
- return DP_BAD_LEADER;
- }
+ if (memcmp(&leader, leader_last, sizeof(struct leader_record))) {
+ log_errot(token, "release error leader changed");
+ return DP_BAD_LEADER;
}
- if (new_leader.owner_id != token->host_id) {
+ if (leader.owner_id != token->host_id) {
log_errot(token, "release error other owner_id %llu",
- (unsigned long long)new_leader.owner_id);
+ (unsigned long long)leader.owner_id);
return DP_OTHER_OWNER;
}
- if (new_leader.next_owner_id) {
+ if (leader.next_owner_id) {
log_errot(token, "release error next_owner_id %llu",
- (unsigned long long)new_leader.next_owner_id);
+ (unsigned long long)leader.next_owner_id);
return DP_LEADER_MIGRATE;
}
- new_leader.timestamp = LEASE_FREE;
- new_leader.checksum = leader_checksum(&new_leader);
+ leader.timestamp = LEASE_FREE;
+ leader.checksum = leader_checksum(&leader);
- error = write_new_leader(token, &new_leader);
+ error = write_new_leader(token, &leader);
if (error < 0)
goto out;
- memcpy(leader_ret, &new_leader, sizeof(struct leader_record));
+ memcpy(leader_ret, &leader, sizeof(struct leader_record));
out:
return error;
}
diff --git a/src/watchdog.c b/src/watchdog.c
index 563654e..e92ad74 100644
--- a/src/watchdog.c
+++ b/src/watchdog.c
@@ -102,6 +102,8 @@ void unlink_watchdog_file(struct space *sp)
if (!options.use_watchdog)
return;
+ log_space(sp, "wdmd_test_live 0 0 to disable");
+
rv = wdmd_test_live(sp->wd_fd, 0, 0);
if (rv < 0)
log_erros(sp, "wdmd_test_live failed %d", rv);
@@ -209,7 +211,7 @@ static int do_write(int fd, void *buf, size_t count)
if (rv == -1 && errno == EINTR)
goto retry;
if (rv < 0) {
- return -errno;
+ return -1;
}
if (rv != count) {