src/delta_lease.c | 4 ++++
src/diskio.c | 35 +++++++++++++++++++++++++++--------
src/main.c | 3 +++
src/paxos_lease.c | 11 ++++++++++-
src/sanlock_internal.h | 1 +
src/sanlock_rv.h | 1 +
6 files changed, 46 insertions(+), 9 deletions(-)
New commits:
commit f6775efb0dea32a830fb24f6b2555aa7b7b27c23
Author: David Teigland <teigland(a)redhat.com>
Date: Mon May 23 14:05:28 2011 -0500
sanlock: don't free aio buf until event completes
This is pretty basic, I should have noticed this earlier.
diff --git a/src/delta_lease.c b/src/delta_lease.c
index a9640c9..9a808c8 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -399,6 +399,10 @@ int delta_lease_init(struct task *task,
}
rv = write_iobuf(disk->fd, disk->offset, iobuf, iobuf_len, task);
+
+ if (rv != SANLK_AIO_TIMEOUT)
+ free(iobuf);
+
if (rv < 0)
return rv;
diff --git a/src/diskio.c b/src/diskio.c
index aee02e4..72bd7d8 100644
--- a/src/diskio.c
+++ b/src/diskio.c
@@ -269,11 +269,22 @@ static struct aicb *find_callback_slot(struct task *task)
struct aicb *ev_aicb = container_of(ev_iocb, struct aicb, iocb);
ev_aicb->used = 0;
+
+ log_error("aio %s clear iocb %p event result %ld %ld",
+ task->name, ev_iocb, event.res, event.res2);
goto find;
}
return NULL;
}
+/*
+ * If this function returns SANLK_AIO_TIMEOUT, it means the io has timed out
+ * and the event for the timed out io has not been reaped; the caller cannot
+ * free the buf it passed in. It will be freed by a subsequent call when the
+ * event is reaped. (Using my own error value here because I'm not certain
+ * what values we might return from event.res.)
+ */
+
static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
struct task *task, int cmd)
{
@@ -306,8 +317,9 @@ static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
task->io_count++;
- /* don't reuse aicb->iocb until we reap the event for it */
+ /* don't reuse aicb->iocb or free the buf until we reap the event */
aicb->used = 1;
+ aicb->buf = buf;
memset(&ts, 0, sizeof(struct timespec));
ts.tv_sec = task->io_timeout_seconds;
@@ -330,6 +342,8 @@ static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
if (ev_iocb != iocb) {
log_error("aio %s other iocb %p event result %ld %ld",
task->name, ev_iocb, event.res, event.res2);
+ free(ev_aicb->buf);
+ ev_aicb->buf = NULL;
goto retry;
}
if ((int)event.res < 0) {
@@ -368,9 +382,11 @@ static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
rv = io_cancel(task->aio_ctx, iocb, &event);
if (!rv) {
+ aicb->used = 0;
rv = -ECANCELED;
- } else if (rv > 0) {
- rv = -EILSEQ;
+ } else {
+ /* aicb->used and aicb->buf both remain set */
+ rv = SANLK_AIO_TIMEOUT;
}
out:
return rv;
@@ -514,7 +530,7 @@ static int _write_sectors(const struct sync_disk *disk, uint64_t
sector_nr,
if (rv) {
log_error("write_sectors %s posix_memalign rv %d %s",
blktype, rv, disk->path);
- rv = -1;
+ rv = -ENOMEM;
goto out;
}
@@ -522,11 +538,13 @@ static int _write_sectors(const struct sync_disk *disk, uint64_t
sector_nr,
memcpy(iobuf, data, data_len);
rv = write_iobuf(disk->fd, offset, iobuf, iobuf_len, task);
- if (rv < 0)
+ if (rv < 0) {
log_error("write_sectors %s offset %llu rv %d %s",
blktype, (unsigned long long)offset, rv, disk->path);
+ }
- free(iobuf);
+ if (rv != SANLK_AIO_TIMEOUT)
+ free(iobuf);
out:
return rv;
}
@@ -608,7 +626,7 @@ int read_sectors(const struct sync_disk *disk, uint64_t sector_nr,
if (rv) {
log_error("read_sectors %s posix_memalign rv %d %s",
blktype, rv, disk->path);
- rv = -1;
+ rv = -ENOMEM;
goto out;
}
@@ -622,7 +640,8 @@ int read_sectors(const struct sync_disk *disk, uint64_t sector_nr,
blktype, (unsigned long long)offset, rv, disk->path);
}
- free(iobuf);
+ if (rv != SANLK_AIO_TIMEOUT)
+ free(iobuf);
out:
return rv;
}
diff --git a/src/main.c b/src/main.c
index c4ed5f8..26e8196 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1578,6 +1578,9 @@ void setup_task(struct task *task, int cb_size)
log_error("setup_task aio disabled %d", rv);
}
+/* TODO: do we need/want to go through all task->callbacks that are still used
+ and wait to reap events for them before doing io_destroy? */
+
void close_task(struct task *task)
{
if (task->use_aio)
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index b13f104..0322685 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -802,7 +802,8 @@ static int _leader_dblock_read_single(struct task *task,
memcpy(our_dblock, iobuf + (sector_size * (host_id + 1)),
sizeof(struct paxos_dblock));
out:
- free(iobuf);
+ if (rv != SANLK_AIO_TIMEOUT)
+ free(iobuf);
return rv;
}
@@ -1385,6 +1386,7 @@ int paxos_lease_init(struct task *task,
char *iobuf, **p_iobuf;
struct leader_record *leader;
int iobuf_len;
+ int aio_timeout = 0;
int rv, d;
iobuf_len = token->disks[0].sector_size * (2 + max_hosts);
@@ -1411,10 +1413,17 @@ int paxos_lease_init(struct task *task,
for (d = 0; d < token->r.num_disks; d++) {
rv = write_iobuf(token->disks[d].fd, token->disks[d].offset,
iobuf, iobuf_len, task);
+
+ if (rv == SANLK_AIO_TIMEOUT)
+ aio_timeout = 1;
+
if (rv < 0)
return rv;
}
+ if (!aio_timeout)
+ free(iobuf);
+
return 0;
}
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 5144188..f4efb6a 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -259,6 +259,7 @@ struct sm_header {
struct aicb {
int used;
+ char *buf;
struct iocb iocb;
};
diff --git a/src/sanlock_rv.h b/src/sanlock_rv.h
index 85db9c3..c027828 100644
--- a/src/sanlock_rv.h
+++ b/src/sanlock_rv.h
@@ -12,6 +12,7 @@
#define SANLK_OK 1
#define SANLK_NONE 0 /* unused */
#define SANLK_ERROR -201
+#define SANLK_AIO_TIMEOUT -202
/* run_ballot */