src/paxos_lease.c
by David Teigland
src/paxos_lease.c | 160 ++++++++++++++++++++++++++++++++++--------------------
1 file changed, 102 insertions(+), 58 deletions(-)
New commits:
commit d233f91e2d58bce9ed038719b8ee31ed93ff7f92
Author: David Teigland <teigland(a)redhat.com>
Date: Fri May 27 17:38:34 2011 -0500
sanlock: read dblocks in single aligned io
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index b784fdb..e9adcfb 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -43,6 +43,18 @@ struct paxos_dblock {
uint64_t lver;
};
+static uint32_t roundup_power_of_two(uint32_t val)
+{
+ val--;
+ val |= val >> 1;
+ val |= val >> 2;
+ val |= val >> 4;
+ val |= val >> 8;
+ val |= val >> 16;
+ val++;
+ return val;
+}
+
int majority_disks(struct token *token, int num)
{
int num_disks = token->r.num_disks;
@@ -117,6 +129,7 @@ static int read_dblock(struct task *task,
return rv;
}
+#if 0
static int read_dblocks(struct task *task,
struct sync_disk *disk,
struct paxos_dblock *pds,
@@ -134,23 +147,6 @@ static int read_dblocks(struct task *task,
goto out;
}
-
- /* TODO: the actual read io should start at offset 0, and the len should
- be rounded up to the next power of two. Then copy pds starting at
- data + (2 * ss).
-
- data_len = next_po2((2 + pds_count)*ss)
-
- for (i = 0; i < pds_count; i++)
- memcpy(&pds[i], data + ((2+i)*ss));
-
- TODO2: return the data to the caller, let them use it directly
- and then free it, instead of copying data into their buf; also
- removes bk[num_hosts] from the stack of the callers, which could
- get too big with large num_hosts.
- */
-
-
/* 2 = 1 leader block + 1 request block */
rv = read_sectors(disk, 2, pds_count, data, data_len,
@@ -172,6 +168,7 @@ static int read_dblocks(struct task *task,
out:
return rv;
}
+#endif
static int read_leader(struct task *task,
struct sync_disk *disk,
@@ -255,13 +252,36 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts,
uint64_t next_lver, uint64_t our_mbal,
struct paxos_dblock *dblock_out)
{
- struct paxos_dblock bk[num_hosts];
- struct paxos_dblock bk_max;
struct paxos_dblock dblock;
+ struct paxos_dblock bk_max;
+ struct paxos_dblock *bk;
+ struct sync_disk *disk;
+ char *iobuf[MAX_DISKS];
+ char **p_iobuf[MAX_DISKS];
int num_disks = token->r.num_disks;
int num_writes, num_reads;
+ int sector_size = token->disks[0].sector_size;
+ int sector_count;
+ int iobuf_len;
int d, q, rv;
int q_max = -1;
+ int error;
+
+ sector_count = roundup_power_of_two(num_hosts + 2);
+
+ iobuf_len = sector_count * sector_size;
+
+ if (!iobuf_len)
+ return -EINVAL;
+
+ for (d = 0; d < num_disks; d++) {
+ p_iobuf[d] = &iobuf[d];
+
+ rv = posix_memalign((void *)p_iobuf[d], getpagesize(), iobuf_len);
+ if (rv)
+ return rv;
+ }
+
/*
* phase 1
@@ -296,53 +316,67 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts,
if (!majority_disks(token, num_writes)) {
log_errot(token, "ballot %llu dblock write error %d",
(unsigned long long)next_lver, rv);
- return SANLK_DBLOCK_WRITE;
+ error = SANLK_DBLOCK_WRITE;
+ goto out;
}
num_reads = 0;
for (d = 0; d < num_disks; d++) {
- rv = read_dblocks(task, &token->disks[d], bk, num_hosts);
+ disk = &token->disks[d];
+
+ if (!iobuf[d])
+ continue;
+ memset(iobuf[d], 0, iobuf_len);
+
+ rv = read_iobuf(disk->fd, disk->offset, iobuf[d], iobuf_len, task);
+ if (rv == SANLK_AIO_TIMEOUT)
+ iobuf[d] = NULL;
if (rv < 0)
continue;
num_reads++;
+
for (q = 0; q < num_hosts; q++) {
- if (bk[q].lver < dblock.lver)
+ bk = (struct paxos_dblock *)(iobuf[d] + ((2 + q)*sector_size));
+
+ if (bk->lver < dblock.lver)
continue;
- if (bk[q].lver > dblock.lver) {
+ if (bk->lver > dblock.lver) {
/* I don't think this should happen */
log_errot(token, "ballot %llu larger1 lver[%d] %llu",
(unsigned long long)next_lver, q,
- (unsigned long long)bk[q].lver);
- return SANLK_DBLOCK_LVER;
+ (unsigned long long)bk->lver);
+ error = SANLK_DBLOCK_LVER;
+ goto out;
}
/* see "It aborts the ballot" in comment above */
- if (bk[q].mbal > dblock.mbal) {
+ if (bk->mbal > dblock.mbal) {
log_errot(token, "ballot %llu abort1 mbal %llu mbal[%d] %llu",
(unsigned long long)next_lver,
(unsigned long long)our_mbal, q,
- (unsigned long long)bk[q].mbal);
- return SANLK_DBLOCK_MBAL;
+ (unsigned long long)bk->mbal);
+ error = SANLK_DBLOCK_MBAL;
+ goto out;
}
/* see choosing inp for phase 2 in comment below */
- if (!bk[q].inp)
+ if (!bk->inp)
continue;
- if (!bk[q].bal) {
+ if (!bk->bal) {
log_errot(token, "ballot %llu zero bal inp[%d] %llu",
(unsigned long long)next_lver, q,
- (unsigned long long)bk[q].inp);
+ (unsigned long long)bk->inp);
continue;
}
- if (bk[q].bal > bk_max.bal) {
- bk_max = bk[q];
+ if (bk->bal > bk_max.bal) {
+ bk_max = *bk;
q_max = q;
}
}
@@ -351,7 +385,8 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts,
if (!majority_disks(token, num_reads)) {
log_errot(token, "ballot %llu dblock read error %d",
(unsigned long long)next_lver, rv);
- return SANLK_DBLOCK_READ;
+ error = SANLK_DBLOCK_READ;
+ goto out;
}
@@ -422,37 +457,50 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts,
if (!majority_disks(token, num_writes)) {
log_errot(token, "ballot %llu our dblock write2 error %d",
(unsigned long long)next_lver, rv);
- return SANLK_DBLOCK_WRITE;
+ error = SANLK_DBLOCK_WRITE;
+ goto out;
}
num_reads = 0;
for (d = 0; d < num_disks; d++) {
- rv = read_dblocks(task, &token->disks[d], bk, num_hosts);
+ disk = &token->disks[d];
+
+ if (!iobuf[d])
+ continue;
+ memset(iobuf[d], 0, iobuf_len);
+
+ rv = read_iobuf(disk->fd, disk->offset, iobuf[d], iobuf_len, task);
+ if (rv == SANLK_AIO_TIMEOUT)
+ iobuf[d] = NULL;
if (rv < 0)
continue;
num_reads++;
for (q = 0; q < num_hosts; q++) {
- if (bk[q].lver < dblock.lver)
+ bk = (struct paxos_dblock *)(iobuf[d] + ((2 + q)*sector_size));
+
+ if (bk->lver < dblock.lver)
continue;
- if (bk[q].lver > dblock.lver) {
+ if (bk->lver > dblock.lver) {
/* I don't think this should happen */
log_errot(token, "ballot %llu larger2 lver[%d] %llu",
(unsigned long long)next_lver, q,
- (unsigned long long)bk[q].lver);
- return SANLK_DBLOCK_LVER;
+ (unsigned long long)bk->lver);
+ error = SANLK_DBLOCK_LVER;
+ goto out;
}
/* see "It aborts the ballot" in comment above */
- if (bk[q].mbal > dblock.mbal) {
+ if (bk->mbal > dblock.mbal) {
log_errot(token, "ballot %llu abort2 mbal %llu mbal[%d] %llu",
(unsigned long long)next_lver,
(unsigned long long)our_mbal, q,
- (unsigned long long)bk[q].mbal);
- return SANLK_DBLOCK_MBAL;
+ (unsigned long long)bk->mbal);
+ error = SANLK_DBLOCK_MBAL;
+ goto out;
}
}
}
@@ -460,14 +508,22 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts,
if (!majority_disks(token, num_reads)) {
log_errot(token, "ballot %llu dblock read2 error %d",
(unsigned long long)next_lver, rv);
- return SANLK_DBLOCK_READ;
+ error = SANLK_DBLOCK_READ;
+ goto out;
}
/* "When it completes phase 2, p has committed dblock[p].inp." */
memcpy(dblock_out, &dblock, sizeof(struct paxos_dblock));
-
- return SANLK_OK;
+ error = SANLK_OK;
+ out:
+ for (d = 0; d < num_disks; d++) {
+ /* don't free iobufs that have timed out */
+ if (!iobuf[d])
+ continue;
+ free(iobuf[d]);
+ }
+ return error;
}
uint32_t leader_checksum(struct leader_record *lr)
@@ -744,18 +800,6 @@ int paxos_lease_leader_read(struct task *task,
return rv;
}
-static uint32_t roundup_power_of_two(uint32_t val)
-{
- val--;
- val |= val >> 1;
- val |= val >> 2;
- val |= val >> 4;
- val |= val >> 8;
- val |= val >> 16;
- val++;
- return val;
-}
-
static int _leader_dblock_read_single(struct task *task,
struct token *token,
struct leader_record *leader_ret,
12 years, 4 months
python/setup.py
by David Teigland
python/setup.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
New commits:
commit b888cad00cd43192bd5d4a1dab85f052dd73c859
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Fri May 27 06:42:22 2011 -0400
python: add missing aio library
diff --git a/python/setup.py b/python/setup.py
index f74076f..db36dc7 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -6,7 +6,7 @@
from distutils.core import setup, Extension
-sanlocklib = ['sanlock', 'sanlock_direct', 'blkid', 'rt']
+sanlocklib = ['sanlock', 'sanlock_direct', 'blkid', 'rt', 'aio']
sanlockmod = Extension(name = 'sanlockmod',
sources = ['sanlockmod.c'],
include_dirs = ['../src'],
12 years, 4 months
src/delta_lease.c src/diskio.c src/main.c src/paxos_lease.c src/sanlock_internal.h src/sanlock_rv.h
by David Teigland
src/delta_lease.c | 4 ++++
src/diskio.c | 35 +++++++++++++++++++++++++++--------
src/main.c | 3 +++
src/paxos_lease.c | 11 ++++++++++-
src/sanlock_internal.h | 1 +
src/sanlock_rv.h | 1 +
6 files changed, 46 insertions(+), 9 deletions(-)
New commits:
commit f6775efb0dea32a830fb24f6b2555aa7b7b27c23
Author: David Teigland <teigland(a)redhat.com>
Date: Mon May 23 14:05:28 2011 -0500
sanlock: don't free aio buf until event completes
This is pretty basic, I should have noticed this earlier.
diff --git a/src/delta_lease.c b/src/delta_lease.c
index a9640c9..9a808c8 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -399,6 +399,10 @@ int delta_lease_init(struct task *task,
}
rv = write_iobuf(disk->fd, disk->offset, iobuf, iobuf_len, task);
+
+ if (rv != SANLK_AIO_TIMEOUT)
+ free(iobuf);
+
if (rv < 0)
return rv;
diff --git a/src/diskio.c b/src/diskio.c
index aee02e4..72bd7d8 100644
--- a/src/diskio.c
+++ b/src/diskio.c
@@ -269,11 +269,22 @@ static struct aicb *find_callback_slot(struct task *task)
struct aicb *ev_aicb = container_of(ev_iocb, struct aicb, iocb);
ev_aicb->used = 0;
+
+ log_error("aio %s clear iocb %p event result %ld %ld",
+ task->name, ev_iocb, event.res, event.res2);
goto find;
}
return NULL;
}
+/*
+ * If this function returns SANLK_AIO_TIMEOUT, it means the io has timed out
+ * and the event for the timed out io has not been reaped; the caller cannot
+ * free the buf it passed in. It will be freed by a subsequent call when the
+ * event is reaped. (Using my own error value here because I'm not certain
+ * what values we might return from event.res.)
+ */
+
static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
struct task *task, int cmd)
{
@@ -306,8 +317,9 @@ static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
task->io_count++;
- /* don't reuse aicb->iocb until we reap the event for it */
+ /* don't reuse aicb->iocb or free the buf until we reap the event */
aicb->used = 1;
+ aicb->buf = buf;
memset(&ts, 0, sizeof(struct timespec));
ts.tv_sec = task->io_timeout_seconds;
@@ -330,6 +342,8 @@ static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
if (ev_iocb != iocb) {
log_error("aio %s other iocb %p event result %ld %ld",
task->name, ev_iocb, event.res, event.res2);
+ free(ev_aicb->buf);
+ ev_aicb->buf = NULL;
goto retry;
}
if ((int)event.res < 0) {
@@ -368,9 +382,11 @@ static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
rv = io_cancel(task->aio_ctx, iocb, &event);
if (!rv) {
+ aicb->used = 0;
rv = -ECANCELED;
- } else if (rv > 0) {
- rv = -EILSEQ;
+ } else {
+ /* aicb->used and aicb->buf both remain set */
+ rv = SANLK_AIO_TIMEOUT;
}
out:
return rv;
@@ -514,7 +530,7 @@ static int _write_sectors(const struct sync_disk *disk, uint64_t sector_nr,
if (rv) {
log_error("write_sectors %s posix_memalign rv %d %s",
blktype, rv, disk->path);
- rv = -1;
+ rv = -ENOMEM;
goto out;
}
@@ -522,11 +538,13 @@ static int _write_sectors(const struct sync_disk *disk, uint64_t sector_nr,
memcpy(iobuf, data, data_len);
rv = write_iobuf(disk->fd, offset, iobuf, iobuf_len, task);
- if (rv < 0)
+ if (rv < 0) {
log_error("write_sectors %s offset %llu rv %d %s",
blktype, (unsigned long long)offset, rv, disk->path);
+ }
- free(iobuf);
+ if (rv != SANLK_AIO_TIMEOUT)
+ free(iobuf);
out:
return rv;
}
@@ -608,7 +626,7 @@ int read_sectors(const struct sync_disk *disk, uint64_t sector_nr,
if (rv) {
log_error("read_sectors %s posix_memalign rv %d %s",
blktype, rv, disk->path);
- rv = -1;
+ rv = -ENOMEM;
goto out;
}
@@ -622,7 +640,8 @@ int read_sectors(const struct sync_disk *disk, uint64_t sector_nr,
blktype, (unsigned long long)offset, rv, disk->path);
}
- free(iobuf);
+ if (rv != SANLK_AIO_TIMEOUT)
+ free(iobuf);
out:
return rv;
}
diff --git a/src/main.c b/src/main.c
index c4ed5f8..26e8196 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1578,6 +1578,9 @@ void setup_task(struct task *task, int cb_size)
log_error("setup_task aio disabled %d", rv);
}
+/* TODO: do we need/want to go through all task->callbacks that are still used
+ and wait to reap events for them before doing io_destroy? */
+
void close_task(struct task *task)
{
if (task->use_aio)
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index b13f104..0322685 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -802,7 +802,8 @@ static int _leader_dblock_read_single(struct task *task,
memcpy(our_dblock, iobuf + (sector_size * (host_id + 1)),
sizeof(struct paxos_dblock));
out:
- free(iobuf);
+ if (rv != SANLK_AIO_TIMEOUT)
+ free(iobuf);
return rv;
}
@@ -1385,6 +1386,7 @@ int paxos_lease_init(struct task *task,
char *iobuf, **p_iobuf;
struct leader_record *leader;
int iobuf_len;
+ int aio_timeout = 0;
int rv, d;
iobuf_len = token->disks[0].sector_size * (2 + max_hosts);
@@ -1411,10 +1413,17 @@ int paxos_lease_init(struct task *task,
for (d = 0; d < token->r.num_disks; d++) {
rv = write_iobuf(token->disks[d].fd, token->disks[d].offset,
iobuf, iobuf_len, task);
+
+ if (rv == SANLK_AIO_TIMEOUT)
+ aio_timeout = 1;
+
if (rv < 0)
return rv;
}
+ if (!aio_timeout)
+ free(iobuf);
+
return 0;
}
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 5144188..f4efb6a 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -259,6 +259,7 @@ struct sm_header {
struct aicb {
int used;
+ char *buf;
struct iocb iocb;
};
diff --git a/src/sanlock_rv.h b/src/sanlock_rv.h
index 85db9c3..c027828 100644
--- a/src/sanlock_rv.h
+++ b/src/sanlock_rv.h
@@ -12,6 +12,7 @@
#define SANLK_OK 1
#define SANLK_NONE 0 /* unused */
#define SANLK_ERROR -201
+#define SANLK_AIO_TIMEOUT -202
/* run_ballot */
12 years, 4 months
src/main.c src/token_manager.c
by David Teigland
src/main.c | 5 +++--
src/token_manager.c | 5 +++--
2 files changed, 6 insertions(+), 4 deletions(-)
New commits:
commit c1f8306a1f5bfa219bb014251f4d8f50f27c65ab
Author: David Teigland <teigland(a)redhat.com>
Date: Thu May 19 13:07:37 2011 -0500
sanlock: quiet error case
silence add_resource -17 error condition with -Q 1
since we don't care during testing
diff --git a/src/main.c b/src/main.c
index bfa6a1c..c4ed5f8 100644
--- a/src/main.c
+++ b/src/main.c
@@ -898,8 +898,9 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca)
token = new_tokens[i];
rv = add_resource(token, cl_pid);
if (rv < 0) {
- log_errot(token, "cmd_acquire %d,%d,%d add_resource %d",
- cl_ci, cl_fd, cl_pid, rv);
+ if (!com.quiet_fail)
+ log_errot(token, "cmd_acquire %d,%d,%d add_resource %d",
+ cl_ci, cl_fd, cl_pid, rv);
result = rv;
goto done;
}
diff --git a/src/token_manager.c b/src/token_manager.c
index 12d14c5..4803c08 100644
--- a/src/token_manager.c
+++ b/src/token_manager.c
@@ -74,8 +74,9 @@ int add_resource(struct token *token, int pid)
r = find_resource(token, &dispose_resources);
if (r) {
- log_errot(token, "add_resource disposed");
- rv = -EEXIST;
+ if (!com.quiet_fail)
+ log_errot(token, "add_resource disposed");
+ rv = -EAGAIN;
goto out;
}
12 years, 4 months
src/diskio.c src/diskio.h src/paxos_lease.c
by David Teigland
src/diskio.c | 20 ++-
src/diskio.h | 3
src/paxos_lease.c | 273 +++++++++++++++++++++++++++++++++++++++++++++---------
3 files changed, 248 insertions(+), 48 deletions(-)
New commits:
commit 2258184931ae60d8beabec68a9e852ae886767e9
Author: David Teigland <teigland(a)redhat.com>
Date: Wed May 18 17:27:57 2011 -0500
sanlock: reduce paxos acquire read ops
Eliminate two read io's in paxos acquire function:
- read both leader record and our dblock in one multi-sector io
instead of two single sector reads
- the first time through the ballot retry loop the leader does
not need to be reread
diff --git a/src/diskio.c b/src/diskio.c
index d0e2873..aee02e4 100644
--- a/src/diskio.c
+++ b/src/diskio.c
@@ -570,6 +570,18 @@ int write_sectors(const struct sync_disk *disk, uint64_t sector_nr,
iobuf_len, task, blktype);
}
+/* read aligned io buffer */
+
+int read_iobuf(int fd, uint64_t offset, char *iobuf, int iobuf_len, struct task *task)
+{
+ if (task && task->use_aio == 1)
+ return do_read_aio_linux(fd, offset, iobuf, iobuf_len, task);
+ else if (task && task->use_aio == 2)
+ return do_read_aio_posix(fd, offset, iobuf, iobuf_len, task);
+ else
+ return do_read(fd, offset, iobuf, iobuf_len, task);
+}
+
/* read sector_count sectors starting with sector_nr, where sector_nr
is a logical sector number within the sync_disk. the caller will
generally want to look at the first N bytes of each sector.
@@ -602,13 +614,7 @@ int read_sectors(const struct sync_disk *disk, uint64_t sector_nr,
memset(iobuf, 0, iobuf_len);
- if (task && task->use_aio == 1)
- rv = do_read_aio_linux(disk->fd, offset, iobuf, iobuf_len, task);
- else if (task && task->use_aio == 2)
- rv = do_read_aio_posix(disk->fd, offset, iobuf, iobuf_len, task);
- else
- rv = do_read(disk->fd, offset, iobuf, iobuf_len, task);
-
+ rv = read_iobuf(disk->fd, offset, iobuf, iobuf_len, task);
if (!rv) {
memcpy(data, iobuf, data_len);
} else {
diff --git a/src/diskio.h b/src/diskio.h
index e6717a5..c6b2762 100644
--- a/src/diskio.h
+++ b/src/diskio.h
@@ -16,6 +16,9 @@ int open_disks_fd(struct sync_disk *disks, int num_disks);
int write_iobuf(int fd, uint64_t offset, char *iobuf, int iobuf_len,
struct task *task);
+int read_iobuf(int fd, uint64_t offset, char *iobuf, int iobuf_len,
+ struct task *task);
+
int write_sector(const struct sync_disk *disk, uint64_t sector_nr,
const char *data, int data_len,
struct task *task, const char *blktype);
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index 6aec60e..b13f104 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -134,6 +134,23 @@ static int read_dblocks(struct task *task,
goto out;
}
+
+ /* TODO: the actual read io should start at offset 0, and the len should
+ be rounded up to the next power of two. Then copy pds starting at
+ data + (2 * ss).
+
+ data_len = next_po2((2 + pds_count)*ss)
+
+ for (i = 0; i < pds_count; i++)
+ memcpy(&pds[i], data + ((2+i)*ss));
+
+ TODO2: return the data to the caller, let them use it directly
+ and then free it, instead of copying data into their buf; also
+ removes bk[num_hosts] from the stack of the callers, which could
+ get too big with large num_hosts.
+ */
+
+
/* 2 = 1 leader block + 1 request block */
rv = read_sectors(disk, 2, pds_count, data, data_len,
@@ -581,10 +598,32 @@ static int leaders_match(struct leader_record *a, struct leader_record *b)
return 0;
}
-int paxos_lease_leader_read(struct task *task,
- struct token *token,
- struct leader_record *leader_ret,
- const char *caller)
+static int _leader_read_single(struct task *task,
+ struct token *token,
+ struct leader_record *leader_ret,
+ const char *caller)
+{
+ struct leader_record leader;
+ int rv;
+
+ memset(&leader, 0, sizeof(struct leader_record));
+
+ rv = read_leader(task, &token->disks[0], &leader);
+ if (rv < 0)
+ return rv;
+
+ rv = verify_leader(token, &token->disks[0], &leader, caller);
+
+ /* copy what we read even if verify finds a problem */
+
+ memcpy(leader_ret, &leader, sizeof(struct leader_record));
+ return rv;
+}
+
+static int _leader_read_multiple(struct task *task,
+ struct token *token,
+ struct leader_record *leader_ret,
+ const char *caller)
{
struct leader_record leader;
struct leader_record *leaders;
@@ -670,12 +709,6 @@ int paxos_lease_leader_read(struct task *task,
goto fail;
}
- log_token(token, "%s leader %llu owner %llu %llu %llu", caller,
- (unsigned long long)leader.lver,
- (unsigned long long)leader.owner_id,
- (unsigned long long)leader.owner_generation,
- (unsigned long long)leader.timestamp);
-
memcpy(leader_ret, &leader, sizeof(struct leader_record));
return SANLK_OK;
@@ -686,6 +719,168 @@ int paxos_lease_leader_read(struct task *task,
return error;
}
+int paxos_lease_leader_read(struct task *task,
+ struct token *token,
+ struct leader_record *leader_ret,
+ const char *caller)
+{
+ int rv;
+
+ /* _leader_read_multiple works fine for the single disk case, but
+ we can cut out a bunch of stuff when we know there's one disk */
+
+ if (token->r.num_disks > 1)
+ rv = _leader_read_multiple(task, token, leader_ret, caller);
+ else
+ rv = _leader_read_single(task, token, leader_ret, caller);
+
+ if (rv == SANLK_OK)
+ log_token(token, "%s leader %llu owner %llu %llu %llu", caller,
+ (unsigned long long)leader_ret->lver,
+ (unsigned long long)leader_ret->owner_id,
+ (unsigned long long)leader_ret->owner_generation,
+ (unsigned long long)leader_ret->timestamp);
+
+ return rv;
+}
+
+static uint32_t roundup_power_of_two(uint32_t val)
+{
+ val--;
+ val |= val >> 1;
+ val |= val >> 2;
+ val |= val >> 4;
+ val |= val >> 8;
+ val |= val >> 16;
+ val++;
+ return val;
+}
+
+static int _leader_dblock_read_single(struct task *task,
+ struct token *token,
+ struct leader_record *leader_ret,
+ struct paxos_dblock *our_dblock,
+ const char *caller)
+{
+ struct sync_disk *disk = &token->disks[0];
+ char *iobuf, **p_iobuf;
+ uint32_t host_id = token->host_id;
+ int sector_size = disk->sector_size;
+ int sector_count;
+ int rv, iobuf_len;
+
+ /* sector 0: leader record
+ sector 1: empty
+ sector 2: dblock host_id 1
+ sector 3: dblock host_id 2
+ sector 4: dblock host_id 3
+ for host_id N we need to read N+2 sectors */
+
+ sector_count = roundup_power_of_two(host_id + 2);
+
+ iobuf_len = sector_count * sector_size;
+
+ if (!iobuf_len)
+ return -EINVAL;
+
+ p_iobuf = &iobuf;
+
+ rv = posix_memalign((void *)p_iobuf, getpagesize(), iobuf_len);
+ if (rv)
+ return rv;
+
+ memset(iobuf, 0, iobuf_len);
+
+ rv = read_iobuf(disk->fd, disk->offset, iobuf, iobuf_len, task);
+ if (rv < 0)
+ goto out;
+
+ memcpy(leader_ret, iobuf, sizeof(struct leader_record));
+
+ rv = verify_leader(token, &token->disks[0], leader_ret, caller);
+
+ memcpy(our_dblock, iobuf + (sector_size * (host_id + 1)),
+ sizeof(struct paxos_dblock));
+ out:
+ free(iobuf);
+ return rv;
+}
+
+/* TODO: the point of a combined leader+dblock read is to reduce iops by
+ reading the leader and our dblock in a single read covering both, which
+ this function obviously does not do. */
+
+static int _leader_dblock_read_multiple(struct task *task,
+ struct token *token,
+ struct leader_record *leader_ret,
+ struct paxos_dblock *our_dblock,
+ const char *caller)
+{
+ struct paxos_dblock dblock;
+ uint64_t our_mbal = 0;
+ int d, num_reads;
+ int rv;
+
+ rv = _leader_read_multiple(task, token, leader_ret, caller);
+ if (rv < 0)
+ return rv;
+
+ num_reads = 0;
+
+ for (d = 0; d < token->r.num_disks; d++) {
+ rv = read_dblock(task, &token->disks[d], token->host_id, &dblock);
+ if (rv < 0)
+ continue;
+ num_reads++;
+
+ if (dblock.mbal > our_mbal) {
+ our_mbal = dblock.mbal;
+ memcpy(our_dblock, &dblock, sizeof(struct paxos_dblock));
+ }
+ }
+
+ if (!num_reads) {
+ log_errot(token, "paxos_acquire cannot read our dblock %d", rv);
+ rv = SANLK_DBLOCK_READ;
+ }
+
+ return rv;
+}
+
+/* read the leader_record and our own dblock in a single larger read op
+ instead of two smaller read ops */
+
+static int paxos_lease_leader_dblock_read(struct task *task,
+ struct token *token,
+ struct leader_record *leader_ret,
+ struct paxos_dblock *our_dblock,
+ const char *caller)
+{
+ int rv;
+
+ if (token->r.num_disks > 1)
+ rv = _leader_dblock_read_multiple(task, token, leader_ret, our_dblock, caller);
+ else
+ rv = _leader_dblock_read_single(task, token, leader_ret, our_dblock, caller);
+
+ if (rv == SANLK_OK)
+ log_token(token, "%s leader %llu owner %llu %llu %llu "
+ "our_dblock %llu %llu %llu %llu %llu %llu",
+ caller,
+ (unsigned long long)leader_ret->lver,
+ (unsigned long long)leader_ret->owner_id,
+ (unsigned long long)leader_ret->owner_generation,
+ (unsigned long long)leader_ret->timestamp,
+ (unsigned long long)our_dblock->mbal,
+ (unsigned long long)our_dblock->bal,
+ (unsigned long long)our_dblock->inp,
+ (unsigned long long)our_dblock->inp2,
+ (unsigned long long)our_dblock->inp3,
+ (unsigned long long)our_dblock->lver);
+
+ return rv;
+}
+
/* return a random int between a and b inclusive */
static int get_rand(int a, int b)
@@ -741,28 +936,34 @@ int paxos_lease_acquire(struct task *task,
uint64_t acquire_lver,
int new_num_hosts)
{
+ struct sync_disk host_id_disk;
+ struct leader_record host_id_leader;
struct leader_record cur_leader;
struct leader_record tmp_leader;
struct leader_record new_leader;
- struct leader_record host_id_leader;
- struct sync_disk host_id_disk;
+ struct paxos_dblock our_dblock;
struct paxos_dblock dblock;
time_t start;
uint64_t next_lver;
uint64_t our_mbal = 0;
uint64_t last_timestamp = 0;
- int error, rv, d, us, num_reads, disk_open = 0;
+ int copy_cur_leader = 0;
+ int disk_open = 0;
+ int error, rv, us;
log_token(token, "paxos_acquire begin acquire_lver %llu flags %x",
(unsigned long long)acquire_lver, flags);
restart:
- error = paxos_lease_leader_read(task, token, &cur_leader, "paxos_acquire");
+ error = paxos_lease_leader_dblock_read(task, token, &cur_leader, &our_dblock,
+ "paxos_acquire");
if (error < 0)
goto out;
- if (flags & PAXOS_ACQUIRE_FORCE)
+ if (flags & PAXOS_ACQUIRE_FORCE) {
+ copy_cur_leader = 1;
goto run;
+ }
if (acquire_lver && cur_leader.lver != acquire_lver) {
log_errot(token, "paxos_acquire acquire_lver %llu cur_leader %llu",
@@ -775,6 +976,7 @@ int paxos_lease_acquire(struct task *task,
if (cur_leader.timestamp == LEASE_FREE) {
log_token(token, "paxos_acquire leader %llu free",
(unsigned long long)cur_leader.lver);
+ copy_cur_leader = 1;
goto run;
}
@@ -783,6 +985,7 @@ int paxos_lease_acquire(struct task *task,
log_token(token, "paxos_acquire already owner id %llu gen %llu",
(unsigned long long)token->host_id,
(unsigned long long)token->host_generation);
+ copy_cur_leader = 1;
goto run;
}
@@ -941,41 +1144,29 @@ int paxos_lease_acquire(struct task *task,
* next_lver is derived from cur_leader with a zero or timed out owner.
* We need to monitor the leader record to see if another host commits
* a new leader_record with next_lver.
+ *
+ * TODO: may not need to increase mbal if dblock.inp and inp2 match
+ * current host_id and generation?
*/
next_lver = cur_leader.lver + 1;
- num_reads = 0;
-
- for (d = 0; d < token->r.num_disks; d++) {
- rv = read_dblock(task, &token->disks[d], token->host_id, &dblock);
- if (rv < 0)
- continue;
- num_reads++;
-
- if (dblock.mbal > our_mbal)
- our_mbal = dblock.mbal;
- }
-
- if (!num_reads) {
- log_errot(token, "paxos_acquire cannot read our dblock %d", rv);
- error = SANLK_DBLOCK_READ;
- goto out;
- }
-
- /* TODO: may not need to increase mbal if dblock.inp and inp2 match
- current host_id and generation? */
-
- if (!our_mbal)
+ if (!our_dblock.mbal)
our_mbal = token->host_id;
else
- our_mbal += cur_leader.max_hosts;
+ our_mbal = our_dblock.mbal + cur_leader.max_hosts;
retry_ballot:
- error = paxos_lease_leader_read(task, token, &tmp_leader, "paxos_acquire");
- if (error < 0)
- goto out;
+ if (copy_cur_leader) {
+ /* reusing the initial read removes an iop in the common case */
+ copy_cur_leader = 0;
+ memcpy(&tmp_leader, &cur_leader, sizeof(struct leader_record));
+ } else {
+ error = paxos_lease_leader_read(task, token, &tmp_leader, "paxos_acquire");
+ if (error < 0)
+ goto out;
+ }
if (tmp_leader.lver == next_lver) {
/*
12 years, 4 months
src/direct.c src/direct_lib.c src/diskio.c src/host_id.c src/main.c src/Makefile src/sanlock_internal.h src/token_manager.c tests/devcount.c tests/Makefile
by David Teigland
src/Makefile | 5 -
src/direct.c | 89 --------------------------
src/direct_lib.c | 107 ++++++++++++++++++++++++++++++++
src/diskio.c | 163 +++++++++++++++++++++++++++++++++----------------
src/host_id.c | 3
src/main.c | 71 ++++++++++++++-------
src/sanlock_internal.h | 18 +++++
src/token_manager.c | 3
tests/Makefile | 2
tests/devcount.c | 4 -
10 files changed, 294 insertions(+), 171 deletions(-)
New commits:
commit dc2b3c5982f7c46ebf93dc3ef58c894fb626c7af
Author: David Teigland <teigland(a)redhat.com>
Date: Tue May 17 15:58:15 2011 -0500
sanlock: aio changes
Move iocb structs off stack, into task struct, and don't reuse an
iocb until we reap an event for it. Also add task names so aio log
messages can show which tasks have errors.
Also enable both linux and posix aio (default linux). If one has
problems in a given situation, the other may not.
diff --git a/src/Makefile b/src/Makefile
index cade592..a93ced1 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -63,10 +63,9 @@ CFLAGS += -D_GNU_SOURCE -g \
-Wp,-D_FORTIFY_SOURCE=2 \
-fexceptions \
-fasynchronous-unwind-tables \
- -fdiagnostics-show-option \
- -DLINUX_AIO
+ -fdiagnostics-show-option
-CMD_LDFLAGS = -lpthread -laio -lblkid -lsanlock -lwdmd
+CMD_LDFLAGS = -lpthread -lrt -laio -lblkid -lsanlock -lwdmd
all: $(SHLIB_TARGET) $(CMD_TARGET) $(SHLIB2_TARGET)
diff --git a/src/direct.c b/src/direct.c
index 5ce625d..a95750d 100644
--- a/src/direct.c
+++ b/src/direct.c
@@ -29,7 +29,6 @@
#include "direct.h"
#include "paxos_lease.h"
#include "delta_lease.h"
-#include "sanlock_direct.h"
/*
* cli: sanlock direct init
@@ -78,17 +77,6 @@
* delta_lease_renew()
*/
-
-/* TODO: include from sanlock_internal */
-static struct task task_default = {
- DEFAULT_USE_AIO,
- DEFAULT_IO_TIMEOUT_SECONDS,
- DEFAULT_HOST_ID_TIMEOUT_SECONDS,
- DEFAULT_HOST_ID_RENEWAL_SECONDS,
- DEFAULT_HOST_ID_RENEWAL_FAIL_SECONDS,
- DEFAULT_HOST_ID_RENEWAL_WARN_SECONDS,
- 0 };
-
static int do_paxos_action(int action, struct task *task,
struct sanlk_resource *res,
int max_hosts, int num_hosts,
@@ -313,31 +301,6 @@ int direct_read_id(struct task *task,
return rv;
}
-int sanlock_direct_read_id(struct sanlk_lockspace *ls,
- uint64_t *timestamp,
- uint64_t *owner_id,
- uint64_t *owner_generation,
- int use_aio)
-{
- struct task task = task_default;
- task.use_aio = use_aio;
- int rv;
-
- if (use_aio) {
- memset(&task.aio_ctx, 0, sizeof(task.aio_ctx));
- rv = io_setup(1, &task.aio_ctx);
- if (rv < 0)
- return rv;
- }
-
- rv = direct_read_id(&task, ls, timestamp, owner_id, owner_generation);
-
- if (use_aio)
- io_destroy(task.aio_ctx);
-
- return rv;
-}
-
int direct_live_id(struct task *task,
struct sanlk_lockspace *ls,
uint64_t *timestamp,
@@ -390,32 +353,6 @@ int direct_live_id(struct task *task,
return 0;
}
-int sanlock_direct_live_id(struct sanlk_lockspace *ls,
- uint64_t *timestamp,
- uint64_t *owner_id,
- uint64_t *owner_generation,
- int *live,
- int use_aio)
-{
- struct task task = task_default;
- task.use_aio = use_aio;
- int rv;
-
- if (use_aio) {
- memset(&task.aio_ctx, 0, sizeof(task.aio_ctx));
- rv = io_setup(1, &task.aio_ctx);
- if (rv < 0)
- return rv;
- }
-
- rv = direct_live_id(&task, ls, timestamp, owner_id, owner_generation, live);
-
- if (use_aio)
- io_destroy(task.aio_ctx);
-
- return rv;
-}
-
/*
* sanlock direct init -n <num_hosts> [-s LOCKSPACE] [-r RESOURCE]
*
@@ -453,32 +390,6 @@ int direct_init(struct task *task,
return rv;
}
-int sanlock_direct_init(struct sanlk_lockspace *ls,
- struct sanlk_resource *res,
- int max_hosts, int num_hosts, int use_aio)
-{
- struct task task = task_default;
- task.use_aio = use_aio;
- int rv;
-
- if (use_aio) {
- memset(&task.aio_ctx, 0, sizeof(task.aio_ctx));
- rv = io_setup(1, &task.aio_ctx);
- if (rv < 0)
- return rv;
- }
-
- if (!max_hosts)
- max_hosts = DEFAULT_MAX_HOSTS;
-
- rv = direct_init(&task, ls, res, max_hosts, num_hosts);
-
- if (use_aio)
- io_destroy(task.aio_ctx);
-
- return rv;
-}
-
int direct_read_leader(struct task *task,
struct sanlk_lockspace *ls,
struct sanlk_resource *res,
diff --git a/src/direct_lib.c b/src/direct_lib.c
index 2c168ee..5045026 100644
--- a/src/direct_lib.c
+++ b/src/direct_lib.c
@@ -12,8 +12,11 @@
#include <stdlib.h>
#include <stdint.h>
#include <stddef.h>
+#include <errno.h>
#include "sanlock_internal.h"
+#include "sanlock_direct.h"
+#include "direct.h"
void log_level(int space_id GNUC_UNUSED, int token_id GNUC_UNUSED,
int level GNUC_UNUSED, const char *fmt GNUC_UNUSED, ...);
@@ -29,3 +32,107 @@ int host_id_disk_info(char *name GNUC_UNUSED, struct sync_disk *disk GNUC_UNUSED
{
return -1;
}
+
+static void setup_task_lib(struct task *task, int use_aio)
+{
+ int rv;
+
+ memset(task, 0, sizeof(struct task));
+
+ sprintf(task->name, "%s", "lib");
+
+ task->io_timeout_seconds = DEFAULT_IO_TIMEOUT_SECONDS;
+ task->host_id_timeout_seconds = DEFAULT_HOST_ID_TIMEOUT_SECONDS;
+ task->host_id_renewal_seconds = DEFAULT_HOST_ID_RENEWAL_SECONDS;
+ task->host_id_renewal_fail_seconds = DEFAULT_HOST_ID_RENEWAL_FAIL_SECONDS;
+ task->host_id_renewal_warn_seconds = DEFAULT_HOST_ID_RENEWAL_WARN_SECONDS;
+
+ task->use_aio = use_aio;
+
+ if (task->use_aio) {
+ rv = io_setup(LIB_AIO_CB_SIZE, &task->aio_ctx);
+ if (rv < 0)
+ goto fail;
+
+ task->cb_size = LIB_AIO_CB_SIZE;
+ task->callbacks = malloc(LIB_AIO_CB_SIZE * sizeof(struct aicb));
+ if (!task->callbacks) {
+ rv = -ENOMEM;
+ goto fail_setup;
+ }
+ memset(task->callbacks, 0, LIB_AIO_CB_SIZE * sizeof(struct aicb));
+ }
+ return;
+
+ fail_setup:
+ io_destroy(task->aio_ctx);
+ fail:
+ task->use_aio = 0;
+}
+
+static void close_task_lib(struct task *task)
+{
+ if (task->use_aio)
+ io_destroy(task->aio_ctx);
+
+ if (task->callbacks)
+ free(task->callbacks);
+ task->callbacks = NULL;
+}
+
+int sanlock_direct_read_id(struct sanlk_lockspace *ls,
+ uint64_t *timestamp,
+ uint64_t *owner_id,
+ uint64_t *owner_generation,
+ int use_aio)
+{
+ struct task task;
+ int rv;
+
+ setup_task_lib(&task, use_aio);
+
+ rv = direct_read_id(&task, ls, timestamp, owner_id, owner_generation);
+
+ close_task_lib(&task);
+
+ return rv;
+}
+
+int sanlock_direct_live_id(struct sanlk_lockspace *ls,
+ uint64_t *timestamp,
+ uint64_t *owner_id,
+ uint64_t *owner_generation,
+ int *live,
+ int use_aio)
+{
+ struct task task;
+ int rv;
+
+ setup_task_lib(&task, use_aio);
+
+ rv = direct_live_id(&task, ls, timestamp, owner_id, owner_generation, live);
+
+ close_task_lib(&task);
+
+ return rv;
+}
+
+int sanlock_direct_init(struct sanlk_lockspace *ls,
+ struct sanlk_resource *res,
+ int max_hosts, int num_hosts, int use_aio)
+{
+ struct task task;
+ int rv;
+
+ setup_task_lib(&task, use_aio);
+
+ if (!max_hosts)
+ max_hosts = DEFAULT_MAX_HOSTS;
+
+ rv = direct_init(&task, ls, res, max_hosts, num_hosts);
+
+ close_task_lib(&task);
+
+ return rv;
+}
+
diff --git a/src/diskio.c b/src/diskio.c
index 1046f03..d0e2873 100644
--- a/src/diskio.c
+++ b/src/diskio.c
@@ -23,11 +23,8 @@
#include <sys/stat.h>
#include <blkid/blkid.h>
-#ifdef LINUX_AIO
-#include <libaio.h>
-#else /* POSIX_AIO */
-#include <aio.h>
-#endif
+#include <libaio.h> /* linux aio */
+#include <aio.h> /* posix aio */
#include "sanlock_internal.h"
#include "diskio.h"
@@ -180,12 +177,15 @@ int open_disks(struct sync_disk *disks, int num_disks)
return 0;
}
-static int do_write(int fd, uint64_t offset, const char *buf, int len)
+static int do_write(int fd, uint64_t offset, const char *buf, int len, struct task *task)
{
off_t ret;
int rv;
int pos = 0;
+ if (task)
+ task->io_count++;
+
ret = lseek(fd, offset, SEEK_SET);
if (ret != offset)
return -1;
@@ -210,11 +210,14 @@ static int do_write(int fd, uint64_t offset, const char *buf, int len)
return 0;
}
-static int do_read(int fd, uint64_t offset, char *buf, int len)
+static int do_read(int fd, uint64_t offset, char *buf, int len, struct task *task)
{
off_t ret;
int rv, pos = 0;
+ if (task)
+ task->io_count++;
+
ret = lseek(fd, offset, SEEK_SET);
if (ret != offset)
return -1;
@@ -233,34 +236,81 @@ static int do_read(int fd, uint64_t offset, char *buf, int len)
return 0;
}
-#ifdef LINUX_AIO
-static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
- struct task *task, int cmd)
+static struct aicb *find_callback_slot(struct task *task)
{
struct timespec ts;
- struct iocb cb;
- struct iocb *p_cb;
struct io_event event;
+ int cleared = 0;
int rv;
+ int i;
+
+ find:
+ for (i = 0; i < task->cb_size; i++) {
+ if (task->callbacks[i].used)
+ continue;
+ return &task->callbacks[i];
+ }
+
+ if (cleared++)
+ return NULL;
memset(&ts, 0, sizeof(struct timespec));
ts.tv_sec = task->io_timeout_seconds;
+ retry:
+ memset(&event, 0, sizeof(event));
- memset(&cb, 0, sizeof(cb));
- p_cb = &cb;
+ rv = io_getevents(task->aio_ctx, 1, 1, &event, &ts);
+ if (rv == -EINTR)
+ goto retry;
+ if (rv < 0)
+ return NULL;
+ if (rv == 1) {
+ struct iocb *ev_iocb = event.obj;
+ struct aicb *ev_aicb = container_of(ev_iocb, struct aicb, iocb);
- cb.aio_fildes = fd;
- cb.aio_lio_opcode = cmd;
- cb.u.c.buf = buf;
- cb.u.c.nbytes = len;
- cb.u.c.offset = offset;
+ ev_aicb->used = 0;
+ goto find;
+ }
+ return NULL;
+}
- rv = io_submit(task->aio_ctx, 1, &p_cb);
+static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
+ struct task *task, int cmd)
+{
+ struct timespec ts;
+ struct aicb *aicb;
+ struct iocb *iocb;
+ struct io_event event;
+ int rv;
+
+ /* I expect this pre-emptively catches the io_submit EAGAIN case */
+
+ aicb = find_callback_slot(task);
+ if (!aicb)
+ return -ENOENT;
+
+ iocb = &aicb->iocb;
+
+ memset(iocb, 0, sizeof(struct iocb));
+ iocb->aio_fildes = fd;
+ iocb->aio_lio_opcode = cmd;
+ iocb->u.c.buf = buf;
+ iocb->u.c.nbytes = len;
+ iocb->u.c.offset = offset;
+
+ rv = io_submit(task->aio_ctx, 1, &iocb);
if (rv < 0) {
- log_error("aio %d io_submit error %d", cmd, rv);
+ log_error("aio %s io_submit error %d", task->name, rv);
goto out;
}
+ task->io_count++;
+
+ /* don't reuse aicb->iocb until we reap the event for it */
+ aicb->used = 1;
+
+ memset(&ts, 0, sizeof(struct timespec));
+ ts.tv_sec = task->io_timeout_seconds;
retry:
memset(&event, 0, sizeof(event));
@@ -268,23 +318,29 @@ static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
if (rv == -EINTR)
goto retry;
if (rv < 0) {
- log_error("aio %d io_getevents error %d", cmd, rv);
+ log_error("aio %s io_getevents error %d", task->name, rv);
goto out;
}
if (rv == 1) {
- if (event.obj != p_cb) {
- log_error("aio %d event for other io retry", cmd);
+ struct iocb *ev_iocb = event.obj;
+ struct aicb *ev_aicb = container_of(ev_iocb, struct aicb, iocb);
+
+ ev_aicb->used = 0;
+
+ if (ev_iocb != iocb) {
+ log_error("aio %s other iocb %p event result %ld %ld",
+ task->name, ev_iocb, event.res, event.res2);
goto retry;
}
if ((int)event.res < 0) {
- log_error("aio %d event res error %ld %ld",
- cmd, event.res, event.res2);
+ log_error("aio %s event result %ld %ld",
+ task->name, event.res, event.res2);
rv = event.res;
goto out;
}
if (event.res != len) {
- log_error("aio %d event len %d error %lu %lu",
- cmd, len, event.res, event.res2);
+ log_error("aio %s event len %d result %lu %lu",
+ task->name, len, event.res, event.res2);
rv = -EMSGSIZE;
goto out;
}
@@ -297,37 +353,39 @@ static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
/* Timed out waiting for result. If cancel fails, we could try retry
io_getevents indefinately, but that removes the whole point of using
aio, which is the timeout. So, we need to be prepared to reap the
- event the next time we call io_getevents for a different i/o. */
+ event the next time we call io_getevents for a different i/o. We
+ can't reuse the iocb for this timed out io until we get an event for
+ it because we need to compare the iocb to event.obj to distinguish
+ events for separate submissions.
- rv = io_cancel(task->aio_ctx, &cb, &event);
- if (!rv) {
- log_error("aio %d canceled", cmd);
- rv = -ECANCELED;
- goto out;
- }
-
- /* <phro> dct: io_cancel doesn't work, in general. you are very
+ <phro> dct: io_cancel doesn't work, in general. you are very
likely going to get -EINVAL from that call */
- log_error("aio %d error %d", cmd, rv);
+ task->to_count++;
+
+ log_error("aio %s iocb %p timeout %u io_count %u", task->name, iocb,
+ task->to_count, task->io_count);
- if (rv > 0)
+ rv = io_cancel(task->aio_ctx, iocb, &event);
+ if (!rv) {
+ rv = -ECANCELED;
+ } else if (rv > 0) {
rv = -EILSEQ;
+ }
out:
return rv;
}
-static int do_write_aio(int fd, uint64_t offset, char *buf, int len, struct task *task)
+static int do_write_aio_linux(int fd, uint64_t offset, char *buf, int len, struct task *task)
{
return do_linux_aio(fd, offset, buf, len, task, IO_CMD_PWRITE);
}
-static int do_read_aio(int fd, uint64_t offset, char *buf, int len, struct task *task)
+static int do_read_aio_linux(int fd, uint64_t offset, char *buf, int len, struct task *task)
{
return do_linux_aio(fd, offset, buf, len, task, IO_CMD_PREAD);
}
-#else
-static int do_write_aio(int fd, uint64_t offset, char *buf, int len, struct task *task)
+static int do_write_aio_posix(int fd, uint64_t offset, char *buf, int len, struct task *task)
{
struct timespec ts;
struct aiocb cb;
@@ -376,7 +434,7 @@ static int do_write_aio(int fd, uint64_t offset, char *buf, int len, struct task
return -1;
}
-static int do_read_aio(int fd, uint64_t offset, char *buf, int len, struct task *task)
+static int do_read_aio_posix(int fd, uint64_t offset, char *buf, int len, struct task *task)
{
struct timespec ts;
struct aiocb cb;
@@ -423,16 +481,17 @@ static int do_read_aio(int fd, uint64_t offset, char *buf, int len, struct task
/* undefined error condition */
return -1;
}
-#endif
/* write aligned io buffer */
int write_iobuf(int fd, uint64_t offset, char *iobuf, int iobuf_len, struct task *task)
{
- if (task && task->use_aio)
- return do_write_aio(fd, offset, iobuf, iobuf_len, task);
+ if (task && task->use_aio == 1)
+ return do_write_aio_linux(fd, offset, iobuf, iobuf_len, task);
+ else if (task && task->use_aio == 2)
+ return do_write_aio_posix(fd, offset, iobuf, iobuf_len, task);
else
- return do_write(fd, offset, iobuf, iobuf_len);
+ return do_write(fd, offset, iobuf, iobuf_len, task);
}
static int _write_sectors(const struct sync_disk *disk, uint64_t sector_nr,
@@ -543,10 +602,12 @@ int read_sectors(const struct sync_disk *disk, uint64_t sector_nr,
memset(iobuf, 0, iobuf_len);
- if (task && task->use_aio)
- rv = do_read_aio(disk->fd, offset, iobuf, iobuf_len, task);
+ if (task && task->use_aio == 1)
+ rv = do_read_aio_linux(disk->fd, offset, iobuf, iobuf_len, task);
+ else if (task && task->use_aio == 2)
+ rv = do_read_aio_posix(disk->fd, offset, iobuf, iobuf_len, task);
else
- rv = do_read(disk->fd, offset, iobuf, iobuf_len);
+ rv = do_read(disk->fd, offset, iobuf, iobuf_len, task);
if (!rv) {
memcpy(data, iobuf, data_len);
diff --git a/src/host_id.c b/src/host_id.c
index 97cf7d7..9cfe35f 100644
--- a/src/host_id.c
+++ b/src/host_id.c
@@ -167,7 +167,8 @@ static void *host_id_thread(void *arg_in)
host_id = sp->host_id;
memcpy(&space_name, sp->space_name, NAME_ID_SIZE);
- setup_task(&task);
+ setup_task(&task, HOSTID_AIO_CB_SIZE);
+ memcpy(task.name, sp->space_name, NAME_ID_SIZE);
last_attempt = time(NULL);
diff --git a/src/main.c b/src/main.c
index 6ad4c85..bfa6a1c 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1430,12 +1430,13 @@ static void call_cmd(struct task *task, struct cmd_args *ca)
};
}
-static void *thread_pool_worker(void *data GNUC_UNUSED)
+static void *thread_pool_worker(void *data)
{
struct task task;
struct cmd_args *ca;
- setup_task(&task);
+ setup_task(&task, WORKER_AIO_CB_SIZE);
+ snprintf(task.name, NAME_ID_SIZE, "worker%ld", (long)data);
pthread_mutex_lock(&pool.mutex);
@@ -1484,7 +1485,8 @@ static int thread_pool_add_work(struct cmd_args *ca)
list_add_tail(&ca->list, &pool.work_data);
if (!pool.free_workers && pool.num_workers < pool.max_workers) {
- rv = pthread_create(&th, NULL, thread_pool_worker, &pool);
+ rv = pthread_create(&th, NULL, thread_pool_worker,
+ (void *)(long)pool.num_workers);
if (rv < 0) {
list_del(&ca->list);
pthread_mutex_unlock(&pool.mutex);
@@ -1522,7 +1524,8 @@ static int thread_pool_create(int min_workers, int max_workers)
pool.max_workers = max_workers;
for (i = 0; i < min_workers; i++) {
- rv = pthread_create(&th, NULL, thread_pool_worker, &pool);
+ rv = pthread_create(&th, NULL, thread_pool_worker,
+ (void *)(long)i);
if (rv < 0)
break;
pool.num_workers++;
@@ -1534,27 +1537,54 @@ static int thread_pool_create(int min_workers, int max_workers)
return rv;
}
-void setup_task(struct task *task)
+void setup_task(struct task *task, int cb_size)
{
int rv;
- memcpy(task, &main_task, sizeof(struct task));
+ memset(task, 0, sizeof(struct task));
- memset(&task->aio_ctx, 0, sizeof(io_context_t));
+ /* inherit configured timeouts from main_task */
+
+ task->use_aio = main_task.use_aio;
+ task->io_timeout_seconds = main_task.io_timeout_seconds;
+ task->host_id_timeout_seconds = main_task.host_id_timeout_seconds;
+ task->host_id_renewal_seconds = main_task.host_id_renewal_seconds;
+ task->host_id_renewal_fail_seconds = main_task.host_id_renewal_fail_seconds;
+ task->host_id_renewal_warn_seconds = main_task.host_id_renewal_warn_seconds;
if (task->use_aio) {
- rv = io_setup(1, &task->aio_ctx);
- if (rv < 0) {
- log_error("io_setup error %d, use_aio=0", rv);
- task->use_aio = 0;
+ if (!cb_size)
+ goto fail;
+
+ rv = io_setup(cb_size, &task->aio_ctx);
+ if (rv < 0)
+ goto fail;
+
+ task->cb_size = cb_size;
+ task->callbacks = malloc(cb_size * sizeof(struct aicb));
+ if (!task->callbacks) {
+ rv = -ENOMEM;
+ goto fail_setup;
}
+ memset(task->callbacks, 0, cb_size * sizeof(struct aicb));
}
+ return;
+
+ fail_setup:
+ io_destroy(task->aio_ctx);
+ fail:
+ task->use_aio = 0;
+ log_error("setup_task aio disabled %d", rv);
}
void close_task(struct task *task)
{
if (task->use_aio)
io_destroy(task->aio_ctx);
+
+ if (task->callbacks)
+ free(task->callbacks);
+ task->callbacks = NULL;
}
static int print_daemon_state(char *str)
@@ -2129,9 +2159,10 @@ static int do_daemon(void)
umask(0);
}
- /* in the daemon, the main_task should never do disk i/o, so we do not
- need to call io_setup() on main_task.aio_ctx */
-
+ /* no setup_task(&main_task) to set up aio in daemon because the daemon
+ main task should never do disk i/o. We do leave main_task.use_aio
+ set because other tasks copy their use_aio setting from there. */
+
rv = client_alloc();
if (rv < 0)
return rv;
@@ -2858,15 +2889,10 @@ static int do_direct(void)
int rv;
/* for direct commands, the main_task does disk i/o, so set up
- main_task.aio_ctx */
+ main_task for aio */
- if (main_task.use_aio) {
- rv = io_setup(1, &main_task.aio_ctx);
- if (rv < 0) {
- log_tool("io_setup error %d, use_aio=0", rv);
- main_task.use_aio = 0;
- }
- }
+ setup_task(&main_task, DIRECT_AIO_CB_SIZE);
+ sprintf(main_task.name, "%s", "main_direct");
switch (com.action) {
case ACT_INIT:
@@ -2990,6 +3016,7 @@ int main(int argc, char *argv[])
com.pid = -1;
memset(&main_task, 0, sizeof(main_task));
+ sprintf(main_task.name, "%s", "main");
main_task.use_aio = DEFAULT_USE_AIO;
main_task.io_timeout_seconds = DEFAULT_IO_TIMEOUT_SECONDS;
main_task.host_id_timeout_seconds = DEFAULT_HOST_ID_TIMEOUT_SECONDS;
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 7a02b0a..5144188 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -251,14 +251,30 @@ struct sm_header {
#define DEFAULT_HOST_ID_RENEWAL_FAIL_SECONDS 30
#define DEFAULT_HOST_ID_RENEWAL_WARN_SECONDS 25
+#define HOSTID_AIO_CB_SIZE 64
+#define WORKER_AIO_CB_SIZE 8
+#define DIRECT_AIO_CB_SIZE 8
+#define RELEASE_AIO_CB_SIZE 64
+#define LIB_AIO_CB_SIZE 8
+
+struct aicb {
+ int used;
+ struct iocb iocb;
+};
+
struct task {
+ char name[NAME_ID_SIZE+1];
int use_aio;
int io_timeout_seconds;
int host_id_timeout_seconds;
int host_id_renewal_seconds;
int host_id_renewal_fail_seconds;
int host_id_renewal_warn_seconds;
+ unsigned int io_count;
+ unsigned int to_count;
+ int cb_size;
io_context_t aio_ctx;
+ struct aicb *callbacks;
};
EXTERN struct task main_task;
@@ -322,7 +338,7 @@ enum {
};
/* main.c */
-void setup_task(struct task *task);
+void setup_task(struct task *task, int cb_size);
void close_task(struct task *task);
#endif
diff --git a/src/token_manager.c b/src/token_manager.c
index 43a6507..12d14c5 100644
--- a/src/token_manager.c
+++ b/src/token_manager.c
@@ -190,7 +190,8 @@ static void *async_release_thread(void *arg GNUC_UNUSED)
struct resource *r;
struct token *token;
- setup_task(&task);
+ setup_task(&task, RELEASE_AIO_CB_SIZE);
+ sprintf(task.name, "%s", "release");
while (1) {
pthread_mutex_lock(&resource_mutex);
diff --git a/tests/Makefile b/tests/Makefile
index 80f5126..e656606 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -21,7 +21,7 @@ CFLAGS += -D_GNU_SOURCE -g \
-fasynchronous-unwind-tables \
-fdiagnostics-show-option
-LDFLAGS = -laio -lblkid -lsanlock -lsanlock_direct
+LDFLAGS = -lrt -laio -lblkid -lsanlock -lsanlock_direct
all: $(TARGET)
diff --git a/tests/devcount.c b/tests/devcount.c
index fc7a1fb..f7d049e 100644
--- a/tests/devcount.c
+++ b/tests/devcount.c
@@ -570,9 +570,9 @@ static int do_relock(int argc, char *argv[])
}
run_more:
- /* let the child run for 10 seconds before stopping it */
+ /* let the child run for 20 seconds before stopping it */
- for (i = 0; i < 10; i++) {
+ for (i = 0; i < 20; i++) {
rv = waitpid(pid, &status, WNOHANG);
if (rv == pid)
break;
12 years, 4 months
2 commits - init.d/sanlock sanlock.spec
by David Teigland
init.d/sanlock | 4 +++-
sanlock.spec | 2 +-
2 files changed, 4 insertions(+), 2 deletions(-)
New commits:
commit 06694036af7c1dd6b06066498eaf0e7cea00858f
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Mon May 16 11:54:04 2011 -0400
rpm: add daemon options in the init file
diff --git a/init.d/sanlock b/init.d/sanlock
index d787d3c..c05fa9c 100644
--- a/init.d/sanlock
+++ b/init.d/sanlock
@@ -25,10 +25,12 @@ prog="sanlock"
lockfile="/var/run/sanlock/$prog.pid"
exec="/usr/sbin/$prog"
+[ -f /etc/sysconfig/$prog ] && . /etc/sysconfig/$prog
+
start() {
[ -x $exec ] || exit 5
echo -n $"Starting $prog: "
- daemon $prog daemon
+ daemon $prog daemon $SANLOCKOPTS
retval=$?
echo
[ $retval -eq 0 ] && touch $lockfile
commit 8de031767e59f98b237ea8991f1c00afb51cbc81
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Fri May 13 04:30:42 2011 -0400
rpm: add missing libaio-devel dependency
diff --git a/sanlock.spec b/sanlock.spec
index 2b8798e..c4cb013 100644
--- a/sanlock.spec
+++ b/sanlock.spec
@@ -11,7 +11,7 @@ URL: https://fedorahosted.org/sanlock/
Source0: https://fedorahosted.org/releases/s/a/sanlock/%{name}-%{version}.tar.bz2
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
-BuildRequires: libblkid-devel
+BuildRequires: libblkid-devel, libaio-devel
Requires: %{name}-lib = %{version}-%{release}
12 years, 4 months
src/diskio.c src/main.c src/Makefile
by David Teigland
src/Makefile | 5 ++--
src/diskio.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
src/main.c | 2 +
3 files changed, 69 insertions(+), 3 deletions(-)
New commits:
commit 0096325c6f7fa1d32bc11d2a8608d747f87f8312
Author: David Teigland <teigland(a)redhat.com>
Date: Tue May 10 15:06:53 2011 -0500
sanlock: use native linux aio
instead of posix aio
diff --git a/src/Makefile b/src/Makefile
index b3c3ade..cade592 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -63,9 +63,10 @@ CFLAGS += -D_GNU_SOURCE -g \
-Wp,-D_FORTIFY_SOURCE=2 \
-fexceptions \
-fasynchronous-unwind-tables \
- -fdiagnostics-show-option
+ -fdiagnostics-show-option \
+ -DLINUX_AIO
-CMD_LDFLAGS = -lpthread -lrt -lblkid -lsanlock -lwdmd
+CMD_LDFLAGS = -lpthread -laio -lblkid -lsanlock -lwdmd
all: $(SHLIB_TARGET) $(CMD_TARGET) $(SHLIB2_TARGET)
diff --git a/src/diskio.c b/src/diskio.c
index fe55a6b..9ca4abd 100644
--- a/src/diskio.c
+++ b/src/diskio.c
@@ -21,9 +21,14 @@
#include <sys/types.h>
#include <sys/time.h>
#include <sys/stat.h>
-#include <aio.h>
#include <blkid/blkid.h>
+#ifdef LINUX_AIO
+#include <libaio.h>
+#else /* POSIX_AIO */
+#include <aio.h>
+#endif
+
#include "sanlock_internal.h"
#include "diskio.h"
#include "log.h"
@@ -209,6 +214,63 @@ static int do_read(int fd, uint64_t offset, char *buf, int len)
return 0;
}
+#ifdef LINUX_AIO
+static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
+ int io_timeout_seconds, int cmd)
+{
+ io_context_t ctx;
+ struct io_event event;
+ struct timespec ts;
+ struct iocb cb;
+ struct iocb *p_cb;
+ int rv;
+
+ memset(&ctx, 0, sizeof(ctx));
+ rv = io_setup(1, &ctx);
+ if (rv < 0)
+ return rv;
+
+ memset(&cb, 0, sizeof(cb));
+ p_cb = &cb;
+
+ cb.aio_fildes = fd;
+ cb.aio_lio_opcode = cmd;
+ cb.u.c.buf = buf;
+ cb.u.c.nbytes = len;
+ cb.u.c.offset = offset;
+
+ rv = io_submit(ctx, 1, &p_cb);
+ if (rv < 0)
+ goto out;
+
+ memset(&event, 0, sizeof(event));
+
+ memset(&ts, 0, sizeof(struct timespec));
+ ts.tv_sec = io_timeout_seconds;
+
+ rv = io_getevents(ctx, 1, 1, &event, &ts);
+ if (rv == 1) {
+ rv = 0;
+ goto out;
+ }
+ rv = -EIO;
+ out:
+ io_destroy(ctx);
+ return rv;
+}
+
+static int do_write_aio(int fd, uint64_t offset, char *buf, int len,
+ int io_timeout_seconds)
+{
+ return do_linux_aio(fd, offset, buf, len, io_timeout_seconds, IO_CMD_PWRITE);
+}
+static int do_read_aio(int fd, uint64_t offset, char *buf, int len,
+ int io_timeout_seconds)
+{
+ return do_linux_aio(fd, offset, buf, len, io_timeout_seconds, IO_CMD_PREAD);
+}
+
+#else
static int do_write_aio(int fd, uint64_t offset, char *buf, int len,
int io_timeout_seconds)
{
@@ -306,6 +368,7 @@ static int do_read_aio(int fd, uint64_t offset, char *buf, int len, int io_timeo
/* undefined error condition */
return -1;
}
+#endif
/* write aligned io buffer */
diff --git a/src/main.c b/src/main.c
index 911379a..b6cd451 100644
--- a/src/main.c
+++ b/src/main.c
@@ -2564,6 +2564,8 @@ static int read_command_line(int argc, char *argv[])
/* the only action that has an option without dash-letter prefix */
if (com.action == ACT_DUMP) {
+ if (argc < 4)
+ exit(EXIT_FAILURE);
optionarg = argv[i++];
com.dump_path = strdup(optionarg);
}
12 years, 4 months
2 commits - src/client_admin.c src/main.c
by David Teigland
src/client_admin.c | 10 ++++++----
src/main.c | 9 +++++++--
2 files changed, 13 insertions(+), 6 deletions(-)
New commits:
commit 21101ea0bfdf186e4946e7a25ad6484e51072713
Author: David Teigland <teigland(a)redhat.com>
Date: Mon May 9 16:37:12 2011 -0500
sanlock: fix inquire of dead pid
return an error if the pid is dead by the time
we get to process the inquire
diff --git a/src/main.c b/src/main.c
index c0b36d8..911379a 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1192,7 +1192,7 @@ static void cmd_inquire(struct cmd_args *ca)
struct sm_header h;
struct token *token;
struct client *cl;
- char *state, *str;
+ char *state = NULL, *str;
int state_maxlen = 0, state_strlen = 0;
int res_count = 0, cat_count = 0;
int fd, i, rv, pid_dead;
@@ -1209,6 +1209,11 @@ static void cmd_inquire(struct cmd_args *ca)
pthread_mutex_lock(&cl->mutex);
+ if (cl->pid_dead) {
+ result = -ESTALE;
+ goto done;
+ }
+
for (i = 0; i < SANLK_MAX_RESOURCES; i++) {
if (cl->tokens[i])
res_count++;
commit 2e8b735e75c056ca25983fe209529e7167027f45
Author: David Teigland <teigland(a)redhat.com>
Date: Mon May 9 16:01:08 2011 -0500
sanlock: client status output format changes
diff --git a/src/client_admin.c b/src/client_admin.c
index c488b50..fc3fa4a 100644
--- a/src/client_admin.c
+++ b/src/client_admin.c
@@ -111,8 +111,9 @@ static void status_lockspace(int fd, struct sanlk_state *st, char *str, int debu
rv = recv(fd, &lockspace, sizeof(lockspace), MSG_WAITALL);
- printf("lockspace %.48s host_id %llu %s:%llu\n",
- lockspace.name, (unsigned long long)lockspace.host_id,
+ printf(" lockspace %.48s:%llu:%s:%llu\n",
+ lockspace.name,
+ (unsigned long long)lockspace.host_id,
lockspace.host_id_disk.path,
(unsigned long long)lockspace.host_id_disk.offset);
@@ -137,14 +138,15 @@ static void status_resource(int fd, struct sanlk_state *st, char *str, int debug
rv = recv(fd, &resource, sizeof(resource), MSG_WAITALL);
- printf(" %.48s %.48s\n", resource.lockspace_name, resource.name);
+ printf(" resource %.48s:%.48s", resource.lockspace_name, resource.name);
for (i = 0; i < resource.num_disks; i++) {
rv = recv(fd, &disk, sizeof(disk), MSG_WAITALL);
- printf(" %s:%llu\n",
+ printf(":%s:%llu",
disk.path, (unsigned long long)disk.offset);
}
+ printf("\n");
if (st->str_len && debug)
print_debug(str, st->str_len);
diff --git a/src/main.c b/src/main.c
index 1c3a8e9..c0b36d8 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1554,7 +1554,7 @@ static int print_daemon_state(char *str)
"host_id_renewal=%d "
"host_id_renewal_fail=%d "
"host_id_renewal_warn=%d "
- "host_id_timeout=%d ",
+ "host_id_timeout=%d",
to.use_aio,
to.io_timeout_seconds,
to.host_id_renewal_seconds,
12 years, 4 months
Changes to 'refs/tags/sanlock-1.2.0'
by Chris Feist
Changes since the dawn of time:
Daniel P. Berrange (15):
Fix const-ness of many APIs.
Fix warnings in watchdog module
Fix function prototypes for no-arg methods
Remove use of 'index' as a variable name
Make many functions static
Fix missing include in logging file
Annotate many unused parameters to avoid warnings
Remove redundant redeclaration of 'to' variable
Fix args to execv()
Remove redundant arg to 'run_command'
Rename optarg to optionarg to avoid clashing with getopt.h
Disable the read_request method since it is unused
Add many more compiler warning flags & safety checks
Hard code a sector size of 512 if the lease volume is a regular file
Ensure libsanlock.so is built with debug/warning flags
David Teigland (185):
sync_manager: initial commit
sync_manager: misc updates
sync_manager: misc updates
sync_manager: misc updates
sync_manager: misc updates
sync_manager: add more logging
sync_manager: misc updates
sync_manager: misc updates
sync_manager: num_hosts/MAX_HOSTS stuff
daemon: reworking notions of resource_id/token_name
sync_manager: resource lockfiles
sync_manager: lease arg processing
sync_manager: Began multiple lease support
sync_manager: use first command line arg as action
sync_manager: leader record changes and verify
sync_manager: clean up released leases
sync_manager: move functions around
sync_manager: add more tool actions
sync_manager: naming changes
sync_manager: separate token index and id
sync_manager: fix index usage and other misc
sync_manager: use pthread cond in acquire
sync_manager: write all log entries
sync_manager: simplify polling
sync_manager: fix waitpid use
sync_manager: acquire can fail early
sync_manager: write log entries at exit
sync_manager: add test program
sync_manager: move secondary pid check
sync_manager: fix disk paxos contention
devcount: fix verify checks
sync_manager: add GPL license file
sync_manager: fix leader block offsets
sync_manager: increase COMMAND_MAX
sync_manager: renewal should verify
sync_manager: use sector size from libblkid
sync_manager: use a real checksum function
sync_manager: add libblkid to spec file
sync_manager: print status info
sync_manager: one watchdog file per lease
sync_manager: lease_threads handle watchdog files
sync_manager: fix/add some text/comments
sync_manager: refactor read/write
sync_manager: move disk io functions
sync_manager: remove token arg
sync_manager: rename paxos_disk sync_disk
sync_manager: add aio read/write
sync_manager: make io_timeout_seconds a diskio arg
sync_manager: forgot to add new files
sync_manager: use log thread
sync_manager: client connections
sync_manager: connection processing
sync_manager: send/recv pid
sync_manager: add write_sectors
sync_manager: restructuring
sync_manager: write_sectors code factoring
sync_manager: daemonize
sync_manager: releasing leases
sync_manager: async releasing
sync_manager: release fixes
sync_manager: add direct and indirect acquire/release
sync_manager: reacquire resources
sync_manager: move code
sync_manager: same pid may reacquire resource
sync_manager: lease migration
sync_manager: handle client errors
sync_manager: improve error handling
sync_manager: host_id leases
sync_manager: remove empty files
sync_manager: print initialization info
sync_manager: rename files
sync_manager: clean up header org
sync_manager: delta_lease implementation
sync_manager: accept offset units
sync_manager: fix up init output
sync_manager: put back watchdog calls
sync_manager: fix start_host_id error paths
sync_manager: add log_error's for watchdog file errors
sync_manager: actual timeouts
sync_manager: change timeouts on cmd line
sanlock: create new external api
sanlock: build libsanlock
sanlock: use MAX_LEASES everywhere
sanlock: add libvirt plugin
sanlock plugin: couple minor fixes
sanlock: clean up /var file names
sanlock plugin: fix symbol needed by libvirt
sanlock: add some debug output
sanlock plugin: fix uuid copy
sanlock plugin: fix names
sanlock: add "owner_name"
sanlock: fix renewal checks
sanlock: clean up host_id types
sanlock: set_host_id command
sanlock: fix killing pids
sanlock: add status command
sanlock: set version to 1.0
sanlock: delta_lease cleanup
sanlock: changing num_hosts
sanlock: add dump command
sanlock: renewal timings
sanlock: add direct option
sanlock: check for watchdog file
sanlock: recovery fixes
lock_driver_sanlock: fix compile problems
sanlock: improve command options
sanlock: tidying help text
sanlock: move binary to /usr/sbin
sanlock: add init script
sanlock: fix sigterm shutdown
sanlock: init stop
sanlock: add wdtest command
sanlock.spec: new url
lock_driver_sanlock: remove close
sanlock: introduce lockspaces
lock_driver_sanlock: remove files
sanlock: better logging functions
sanlock: misc log message
sanlock.spec: sbin not libexec
sanlock init: remove watchdog reference
wdmd: watchdog multiplexing daemon
sanlock: add code to use wdmd
sanlock/wdmd: use wdmd in sanlock
sanlock/wdmd: add copyright header to source files
sanlock: rename sanlock source dir
sanlock: move tests dir
move COPYING file
wdmd: use signalfd for signal handling
Fix Makefile comments
wdmd: fix daemon debug option
wdmd: add init script
sanlock.spec: updates
sanlock.spec: src dir
sanlock: build with uninstalled libwdmd
sanlock: version 1.1
sanlock: high priority options
wdmd: high priority options
sanlock: return migration state
sanlock: migration.txt describes libvirt/sanlock steps
libsanlock: include admin functions
sanlock: fix host_id expiration check
sanlock: migration working
devcount: migrate test
sanlock: setowner improvements
sanlock: migrate to target fix
sanlock: fix wdmd stop order
sanlock: various fixes
sanlock: remove wdtest
sanlock: remove migration
sanlock: clean up command return data
sanlock: add resource string conversion functions
sanlock: rework internal structs
devcount: add relock test
sanlock: fix release and inquire
sanlock: add_lockspace EEXIST
sanlock: rework client handling
sanlock: clean up warnings
sanlock: debug message changes
sanlock: add lockspace checks
wdmd: enable test scripts
sanlock: add str_to_lockspace to lib
WIP devcount migrate
devcount: new migrate test
sanlock: read_id and live_id commands
sanlock: check lockspace name and host_id
sanlock: remove remaining cluster_mode
sanlock: add libsanlock_direct
devcountn: start multiple devcount tests
devcount: small changes
sanlock: new return values
sanlock: misc changes and fix
sanlock: log error of full bad block
sanlock: interval between renewal checks
sanlock: renewal changes
sanlock: fix log_dump
sanlock: fix find_client_pid
sanlock: fix host_id reads from paxos_acquire
sanlock: init with one write
devcount: improve output
devcount: new pause/resume
devcount: add expire test
sanlock: correct paxos usage
sanlock: direct read_leader
sanlock: paxos delays
sanlock: use thread pool
Fabio M. Di Nitto (5):
build: sanlock should link with libsanlock
build: install shared lib header files and fix DESTDIR usage
build: drop rpm target
spec file: do first cut for total spec file
build: fix linking with libsanlock and install target
Federico Simoncelli (17):
rpm: sync specfile with fedora
rpm: add sanlock_admin.h header
rpm: add the lib package, install the initscripts
python: remove unused python code
python: add python binding
python: release the gil during sanlock operations
python: wrap sanlock extension with a module
rpm: add python binding package
python: pass a lockspace copy to str_to_lockspace
makefile: fix install typo
rpm: add sanlock_direct header
python: add sanlock init functions and exception
direct: close disks after initialization
python: register process only once
daemon: configurable socket permissions
rpm: add sanlock user and group
python: exceptions must contain the errno
Saggi Mizrahi (21):
Added the begining of the testing and debugging tools
Better handling of max hosts
sync_manager: Updated tests to work with new lease struct
sync_manager: fixed skipping first arg in command
sync_manager: acquire and release actions
sync_manager: minor fixes
sync_manager: renamed stuff
sync_manager: made acquire synchronous again
sync_manager: added set_host_id action
sync_manager: use kill(0) for secondary pid check
sync_manager: make rpm and install
sync_manager: spec file update
sync_manager: Allow longer resource names
sync_manager: allow repeated set_host_id
sync_manager: Added escaping for the leases arg
sync_manager: Created the python bindings for sync_manager
sync_manager: listener socket permissions
sync_manager: Updated python binding and tests
sync_manager: Made 'token' a const in log_level
sync_manager: refactor messaging system
sync_manager: use getsockopt PEERCRED
12 years, 4 months