src/delta_lease.c | 53 +++++++++++++++++++++++++---
src/diskio.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++-
src/diskio.h | 4 ++
src/host_id.c | 2 -
src/sanlock_internal.h | 1
tests/Makefile | 2 -
6 files changed, 143 insertions(+), 9 deletions(-)
New commits:
commit 04ad78d30c275c66d096883971ee62b0affb2340
Author: David Teigland <teigland(a)redhat.com>
Date: Thu Jun 9 12:26:06 2011 -0500
sanlock: use a completed read after renewal timeout
intead of discarding the result of the previous read
and trying the same thing again.
diff --git a/src/delta_lease.c b/src/delta_lease.c
index b4a1df7..b5624c1 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -168,7 +168,33 @@ int delta_lease_leader_read(struct task *task,
rv = read_sectors(disk, host_id - 1, 1, (char *)&leader, sizeof(struct
leader_record),
task, "delta_leader");
if (rv < 0)
- return SANLK_LEADER_READ;
+ return rv;
+
+ error = verify_leader(disk, space_name, host_id, &leader, caller);
+
+ memcpy(leader_ret, &leader, sizeof(struct leader_record));
+ return error;
+}
+
+static int delta_lease_leader_reap(struct task *task,
+ struct sync_disk *disk,
+ char *space_name,
+ uint64_t host_id,
+ struct leader_record *leader_ret,
+ const char *caller)
+{
+ struct leader_record leader;
+ int rv, error;
+
+ /* host_id N is block offset N-1 */
+
+ memset(&leader, 0, sizeof(struct leader_record));
+ memset(leader_ret, 0, sizeof(struct leader_record));
+
+ rv = read_sectors_reap(disk, host_id - 1, 1, (char *)&leader, sizeof(struct
leader_record),
+ task, "delta_leader");
+ if (rv < 0)
+ return rv;
error = verify_leader(disk, space_name, host_id, &leader, caller);
@@ -301,15 +327,30 @@ int delta_lease_renew(struct task *task,
int io_timeout_save;
int error;
- /* TODO: if the previous renew timed out in this initial read, and that
- * read is now complete, we could just use the result from that read
- * here instead of ignoring it and doing another. */
+ /* if the previous renew timed out in this initial read, and that read
+ is now complete, we can use that result here instead of discarding
+ it and doing another. */
- error = delta_lease_leader_read(task, disk, space_name, host_id, &leader,
- "delta_renew_begin");
+ if (prev_result == SANLK_AIO_TIMEOUT && task->read_timeout) {
+ error = delta_lease_leader_reap(task, disk, space_name, host_id,
+ &leader, "delta_renew_reap");
+
+ log_space(sp, "delta_renew reap %d", error);
+
+ if (error == SANLK_OK) {
+ task->read_timeout = NULL;
+ goto read_done;
+ }
+ }
+
+ task->read_timeout = NULL;
+
+ error = delta_lease_leader_read(task, disk, space_name, host_id,
+ &leader, "delta_renew_read");
if (error < 0)
return error;
+ read_done:
if (!our_host_id_generation)
our_host_id_generation = leader.owner_generation;
diff --git a/src/diskio.c b/src/diskio.c
index f386470..bda0e47 100644
--- a/src/diskio.c
+++ b/src/diskio.c
@@ -426,6 +426,9 @@ static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
} else {
/* aicb->used and aicb->buf both remain set */
rv = SANLK_AIO_TIMEOUT;
+
+ if (cmd == IO_CMD_PREAD)
+ task->read_timeout = aicb;
}
out:
return rv;
@@ -651,12 +654,13 @@ int read_sectors(const struct sync_disk *disk, uint64_t sector_nr,
{
char *iobuf, **p_iobuf;
uint64_t offset;
- int iobuf_len = sector_count * disk->sector_size;
+ int iobuf_len;
int rv;
if (!disk->sector_size)
return -EINVAL;
+ iobuf_len = sector_count * disk->sector_size;
offset = disk->offset + (sector_nr * disk->sector_size);
p_iobuf = &iobuf;
@@ -685,3 +689,87 @@ int read_sectors(const struct sync_disk *disk, uint64_t sector_nr,
return rv;
}
+/* Try to reap the event of a previously timed out read_sectors.
+ A task's last timed out read is saved in task->read_timeout. */
+
+int read_sectors_reap(const struct sync_disk *disk, uint64_t sector_nr,
+ uint32_t sector_count, char *data, int data_len,
+ struct task *task, const char *blktype GNUC_UNUSED)
+{
+ struct timespec ts;
+ struct aicb *aicb;
+ struct iocb *iocb;
+ struct io_event event;
+ char *iobuf;
+ uint64_t offset;
+ int iobuf_len;
+ int rv;
+
+ iobuf_len = sector_count * disk->sector_size;
+ offset = disk->offset + (sector_nr * disk->sector_size);
+
+ aicb = task->read_timeout;
+ iocb = &aicb->iocb;
+ iobuf = iocb->u.c.buf;
+
+ if (!aicb->used)
+ return -EINVAL;
+ if (iocb->aio_fildes != disk->fd)
+ return -EINVAL;
+ if (iocb->u.c.nbytes != iobuf_len)
+ return -EINVAL;
+ if (iocb->u.c.offset != offset)
+ return -EINVAL;
+ if (iocb->aio_lio_opcode != IO_CMD_PREAD)
+ return -EINVAL;
+
+ memset(&ts, 0, sizeof(struct timespec));
+ ts.tv_nsec = 500000000; /* half a second */
+ retry:
+ memset(&event, 0, sizeof(event));
+
+ rv = io_getevents(task->aio_ctx, 1, 1, &event, &ts);
+ if (rv == -EINTR)
+ goto retry;
+ if (rv < 0) {
+ log_error("reap aio %s io_getevents error %d", task->name, rv);
+ goto out;
+ }
+ if (rv == 1) {
+ struct iocb *ev_iocb = event.obj;
+ struct aicb *ev_aicb = container_of(ev_iocb, struct aicb, iocb);
+
+ ev_aicb->used = 0;
+
+ if (ev_iocb != iocb) {
+ log_error("reap aio %s other iocb %p event result %ld %ld",
+ task->name, ev_iocb, event.res, event.res2);
+ free(ev_aicb->buf);
+ ev_aicb->buf = NULL;
+ goto retry;
+ }
+ if ((int)event.res < 0) {
+ log_error("reap aio %s event result %ld %ld",
+ task->name, event.res, event.res2);
+ rv = event.res;
+ goto out;
+ }
+ if (event.res != iobuf_len) {
+ log_error("reap aio %s event len %d result %lu %lu",
+ task->name, iobuf_len, event.res, event.res2);
+ rv = -EMSGSIZE;
+ goto out;
+ }
+
+ rv = 0;
+ memcpy(data, iobuf, data_len);
+ free(iobuf);
+ goto out;
+ }
+
+ /* timed out again */
+ rv = SANLK_AIO_TIMEOUT;
+ out:
+ return rv;
+}
+
diff --git a/src/diskio.h b/src/diskio.h
index dd32958..a54ffaf 100644
--- a/src/diskio.h
+++ b/src/diskio.h
@@ -31,4 +31,8 @@ int write_sectors(const struct sync_disk *disk, uint64_t sector_nr,
int read_sectors(const struct sync_disk *disk, uint64_t sector_nr,
uint32_t sector_count, char *data, int data_len,
struct task *task, const char *blktype);
+
+int read_sectors_reap(const struct sync_disk *disk, uint64_t sector_nr,
+ uint32_t sector_count, char *data, int data_len,
+ struct task *task, const char *blktype);
#endif
diff --git a/src/host_id.c b/src/host_id.c
index 0ae3138..3075d3f 100644
--- a/src/host_id.c
+++ b/src/host_id.c
@@ -276,7 +276,7 @@ static void *lockspace_thread(void *arg_in)
} else {
/* don't spin too quickly if renew is failing
immediately and repeatedly */
- usleep(200000);
+ usleep(500000);
}
last_attempt = time(NULL);
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 62b3f68..6201085 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -444,6 +444,7 @@ struct task {
int use_aio;
int cb_size;
io_context_t aio_ctx;
+ struct aicb *read_timeout;
struct aicb *callbacks;
};
diff --git a/tests/Makefile b/tests/Makefile
index e656606..4041a8e 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -21,7 +21,7 @@ CFLAGS += -D_GNU_SOURCE -g \
-fasynchronous-unwind-tables \
-fdiagnostics-show-option
-LDFLAGS = -lrt -laio -lblkid -lsanlock -lsanlock_direct
+LDFLAGS = -lrt -laio -lblkid -lsanlock
all: $(TARGET)