src/delta_lease.c | 50 ++++++++++++++++++++----------------------
src/diskio.c | 18 +++++++++++----
src/diskio.h | 3 ++
src/paxos_lease.c | 63 ++++++++++++++++++++++++++----------------------------
4 files changed, 71 insertions(+), 63 deletions(-)
New commits:
commit a7863d7d95a44de102637c6a27bffdd534487bee
Author: David Teigland <teigland(a)redhat.com>
Date: Thu Apr 21 10:58:26 2011 -0500
sanlock: init with one write
do one big write of all sectors during init,
instead of many single sector writes.
(both lockspace and resource)
diff --git a/src/delta_lease.c b/src/delta_lease.c
index bf55469..63fb2a0 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -373,42 +373,40 @@ int delta_lease_init(struct timeout *ti,
char *space_name,
int max_hosts)
{
- struct leader_record leader;
+ struct leader_record *leader;
+ char *iobuf, **p_iobuf;
+ int iobuf_len;
int i, rv;
- uint64_t bb, be, sb, se;
- uint32_t ss;
- ss = disk->sector_size;
- bb = disk->offset;
- be = disk->offset + (disk->sector_size * max_hosts) - 1;
- sb = bb / ss;
- se = be / ss;
+ iobuf_len = disk->sector_size * max_hosts;
- memset(&leader, 0, sizeof(struct leader_record));
+ p_iobuf = &iobuf;
- leader.magic = DELTA_DISK_MAGIC;
- leader.version = DELTA_DISK_VERSION_MAJOR | DELTA_DISK_VERSION_MINOR;
- leader.sector_size = disk->sector_size;
- leader.max_hosts = 1;
- leader.timestamp = LEASE_FREE;
- strncpy(leader.space_name, space_name, NAME_ID_SIZE);
+ rv = posix_memalign((void *)p_iobuf, getpagesize(), iobuf_len);
+ if (rv)
+ return rv;
+
+ memset(iobuf, 0, iobuf_len);
/* host_id N is block offset N-1 */
for (i = 0; i < max_hosts; i++) {
- memset(leader.resource_name, 0, NAME_ID_SIZE);
- snprintf(leader.resource_name, NAME_ID_SIZE, "host_id_%d", i+1);
- leader.checksum = leader_checksum(&leader);
-
- rv = write_sector(disk, i, (char *)&leader, sizeof(struct leader_record),
- ti->io_timeout_seconds, ti->use_aio, "delta_leader");
-
- if (rv < 0) {
- log_tool("delta_init write_sector %d rv %d", i, rv);
- return rv;
- }
+ leader = (struct leader_record *)(iobuf + (i * disk->sector_size));
+ leader->magic = DELTA_DISK_MAGIC;
+ leader->version = DELTA_DISK_VERSION_MAJOR | DELTA_DISK_VERSION_MINOR;
+ leader->sector_size = disk->sector_size;
+ leader->max_hosts = 1;
+ leader->timestamp = LEASE_FREE;
+ strncpy(leader->space_name, space_name, NAME_ID_SIZE);
+ snprintf(leader->resource_name, NAME_ID_SIZE, "host_id_%d", i+1);
+ leader->checksum = leader_checksum(leader);
}
+ rv = write_iobuf(disk->fd, disk->offset, iobuf, iobuf_len,
+ ti->io_timeout_seconds, ti->use_aio);
+ if (rv < 0)
+ return rv;
+
return 0;
}
diff --git a/src/diskio.c b/src/diskio.c
index d55f40f..fe55a6b 100644
--- a/src/diskio.c
+++ b/src/diskio.c
@@ -307,6 +307,17 @@ static int do_read_aio(int fd, uint64_t offset, char *buf, int len,
int io_timeo
return -1;
}
+/* write aligned io buffer */
+
+int write_iobuf(int fd, uint64_t offset, char *iobuf, int iobuf_len,
+ int io_timeout_seconds, int use_aio)
+{
+ if (use_aio)
+ return do_write_aio(fd, offset, iobuf, iobuf_len, io_timeout_seconds);
+ else
+ return do_write(fd, offset, iobuf, iobuf_len);
+}
+
static int _write_sectors(const struct sync_disk *disk, uint64_t sector_nr,
uint32_t sector_count GNUC_UNUSED,
const char *data, int data_len,
@@ -332,11 +343,8 @@ static int _write_sectors(const struct sync_disk *disk, uint64_t
sector_nr,
memset(iobuf, 0, iobuf_len);
memcpy(iobuf, data, data_len);
- if (use_aio)
- rv = do_write_aio(disk->fd, offset, iobuf, iobuf_len, io_timeout_seconds);
- else
- rv = do_write(disk->fd, offset, iobuf, iobuf_len);
-
+ rv = write_iobuf(disk->fd, offset, iobuf, iobuf_len,
+ io_timeout_seconds, use_aio);
if (rv < 0)
log_error("write_sectors %s offset %llu rv %d %s",
blktype, (unsigned long long)offset, rv, disk->path);
diff --git a/src/diskio.h b/src/diskio.h
index 3ec3b27..a034e4c 100644
--- a/src/diskio.h
+++ b/src/diskio.h
@@ -13,6 +13,9 @@ void close_disks(struct sync_disk *disks, int num_disks);
int open_disks(struct sync_disk *disks, int num_disks);
int open_disks_fd(struct sync_disk *disks, int num_disks);
+int write_iobuf(int fd, uint64_t offset, char *iobuf, int iobuf_len,
+ int io_timeout_seconds, int use_aio);
+
int write_sector(const struct sync_disk *disk, uint64_t sector_nr,
const char *data, int data_len, int io_timeout_seconds,
int use_aio, const char *blktype);
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index d882706..c846d47 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -84,6 +84,7 @@ static int write_dblock(struct timeout *ti,
return rv;
}
+#if 0
static int write_request(struct timeout *ti,
struct sync_disk *disk, struct request_record *rr)
{
@@ -93,6 +94,7 @@ static int write_request(struct timeout *ti,
ti->io_timeout_seconds, ti->use_aio, "request");
return rv;
}
+#endif
static int write_leader(struct timeout *ti,
struct sync_disk *disk, struct leader_record *lr)
@@ -988,43 +990,40 @@ int paxos_lease_release(struct timeout *ti,
int paxos_lease_init(struct timeout *ti,
struct token *token, int num_hosts, int max_hosts)
{
- struct leader_record leader;
- struct request_record req;
- struct paxos_dblock dblock;
- int d, q;
- uint32_t offset, ss;
- uint64_t bb, be, sb, se;
-
- offset = token->disks[0].offset;
- ss = token->disks[0].sector_size;
- bb = offset;
- be = offset + (ss * (max_hosts + 2) - 1);
- sb = bb / ss;
- se = be / ss;
-
- memset(&leader, 0, sizeof(struct leader_record));
- memset(&req, 0, sizeof(struct request_record));
- memset(&dblock, 0, sizeof(struct paxos_dblock));
+ char *iobuf, **p_iobuf;
+ struct leader_record *leader;
+ int iobuf_len;
+ int rv, d;
- leader.magic = PAXOS_DISK_MAGIC;
- leader.version = PAXOS_DISK_VERSION_MAJOR | PAXOS_DISK_VERSION_MINOR;
- leader.sector_size = token->disks[0].sector_size;
- leader.num_hosts = num_hosts;
- leader.max_hosts = max_hosts;
- leader.timestamp = LEASE_FREE;
- strncpy(leader.space_name, token->r.lockspace_name, NAME_ID_SIZE);
- strncpy(leader.resource_name, token->r.name, NAME_ID_SIZE);
- leader.checksum = leader_checksum(&leader);
+ iobuf_len = token->disks[0].sector_size * (2 + max_hosts);
+
+ p_iobuf = &iobuf;
+
+ rv = posix_memalign((void *)p_iobuf, getpagesize(), iobuf_len);
+ if (rv)
+ return rv;
+
+ memset(iobuf, 0, iobuf_len);
+
+ leader = (struct leader_record *)iobuf;
+ leader->magic = PAXOS_DISK_MAGIC;
+ leader->version = PAXOS_DISK_VERSION_MAJOR | PAXOS_DISK_VERSION_MINOR;
+ leader->sector_size = token->disks[0].sector_size;
+ leader->num_hosts = num_hosts;
+ leader->max_hosts = max_hosts;
+ leader->timestamp = LEASE_FREE;
+ strncpy(leader->space_name, token->r.lockspace_name, NAME_ID_SIZE);
+ strncpy(leader->resource_name, token->r.name, NAME_ID_SIZE);
+ leader->checksum = leader_checksum(leader);
for (d = 0; d < token->r.num_disks; d++) {
- write_leader(ti, &token->disks[d], &leader);
- write_request(ti, &token->disks[d], &req);
- for (q = 0; q < max_hosts; q++)
- write_dblock(ti, &token->disks[d], q, &dblock);
+ rv = write_iobuf(token->disks[d].fd, token->disks[d].offset,
+ iobuf, iobuf_len,
+ ti->io_timeout_seconds, ti->use_aio);
+ if (rv < 0)
+ return rv;
}
- /* TODO: return error if cannot initialize majority of disks */
-
return 0;
}