src/main.c
by David Teigland
src/main.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
New commits:
commit 57a59a7256df858cace94cfab6c10aa1b0a4cf56
Author: David Teigland <teigland(a)redhat.com>
Date: Tue Jun 21 10:35:20 2011 -0500
sanlock: release 1.4
diff --git a/src/main.c b/src/main.c
index 0b531bf..d110f4e 100644
--- a/src/main.c
+++ b/src/main.c
@@ -2326,7 +2326,7 @@ static void parse_arg_timeout(char *optstr)
}
#endif
-#define RELEASE_VERSION "1.3"
+#define RELEASE_VERSION "1.4"
/*
* daemon: acquires leases for the local host_id, associates them with a local
11 years, 9 months
2 commits - src/main.c tests/sanlk_load.c
by David Teigland
src/main.c | 34 +++++++++++++------
tests/sanlk_load.c | 91 ++++++++++++++++++++++++++++++++++++++++++-----------
2 files changed, 97 insertions(+), 28 deletions(-)
New commits:
commit 021048e33e4c260c85ba6ab50645523725c385b0
Author: David Teigland <teigland(a)redhat.com>
Date: Mon Jun 20 16:29:25 2011 -0500
sanlock: initial pid_dead check in acquire
If cmd_acquire has been queued for a while waiting for
the thread pool, the pid may already be dead by the time
cmd_acquire begins. In that case don't go to the trouble
of acquiring leases before checking if the pid is dead.
diff --git a/src/main.c b/src/main.c
index 3d407fb..0b531bf 100644
--- a/src/main.c
+++ b/src/main.c
@@ -761,6 +761,12 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca)
}
pthread_mutex_lock(&cl->mutex);
+ if (cl->pid_dead) {
+ result = -ESTALE;
+ pthread_mutex_unlock(&cl->mutex);
+ goto done;
+ }
+
empty_slots = 0;
for (i = 0; i < SANLK_MAX_RESOURCES; i++) {
if (!cl->tokens[i])
commit 0d8078864b74173a181d65dcf217549639bd81db
Author: David Teigland <teigland(a)redhat.com>
Date: Mon Jun 20 15:32:42 2011 -0500
sanlock: fix recv and inquire
- recv correct number of remaining bytes after aborting cmd_acquire
- don't return partial/bogus string from cmd_inquire when the
pid has no resources
diff --git a/src/main.c b/src/main.c
index fec794e..3d407fb 100644
--- a/src/main.c
+++ b/src/main.c
@@ -635,22 +635,25 @@ static void client_recv_all(int ci, struct sm_header *h_recv, int pos)
{
char trash[64];
int rem = h_recv->length - sizeof(struct sm_header) - pos;
- int rv, total = 0;
+ int rv, error = 0, total = 0;
if (!rem)
return;
while (1) {
rv = recv(client[ci].fd, trash, sizeof(trash), MSG_DONTWAIT);
+ if (rv == -1)
+ error = errno;
if (rv <= 0)
break;
total += rv;
- if (total > MAX_CLIENT_MSG)
+ if (total >= rem)
break;
}
- log_debug("recv_all ci %d rem %d total %d", ci, rem, total);
+ log_error("recv_all %d,%d,%d pos %d rv %d error %d rem %d total %d",
+ ci, client[ci].fd, client[ci].pid, pos, rv, error, rem, total);
}
static void release_cl_tokens(struct task *task, struct client *cl)
@@ -1060,15 +1063,15 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca)
}
reply:
+ if (!recv_done)
+ client_recv_all(ca->ci_in, &ca->header, pos);
+
memcpy(&h, &ca->header, sizeof(struct sm_header));
h.length = sizeof(h);
h.data = result;
h.data2 = 0;
send(fd, &h, sizeof(h), MSG_NOSIGNAL);
- if (!recv_done)
- client_recv_all(ca->ci_in, &ca->header, pos);
-
client_resume(ca->ci_in);
}
@@ -1214,6 +1217,11 @@ static void cmd_inquire(struct task *task, struct cmd_args *ca)
res_count++;
}
+ if (!res_count) {
+ result = 0;
+ goto done;
+ }
+
state_maxlen = res_count * (SANLK_MAX_RES_STR + 1);
state = malloc(state_maxlen);
@@ -1282,8 +1290,8 @@ static void cmd_inquire(struct task *task, struct cmd_args *ca)
cl->cmd_active = 0;
pthread_mutex_unlock(&cl->mutex);
- log_debug("cmd_inquire %d,%d,%d result %d pid_dead %d count %d strlen %d",
- cl_ci, cl_fd, cl_pid, result, pid_dead, res_count, state_strlen);
+ log_debug("cmd_inquire %d,%d,%d result %d pid_dead %d res_count %d cat_count %d strlen %d",
+ cl_ci, cl_fd, cl_pid, result, pid_dead, res_count, cat_count, state_strlen);
if (pid_dead) {
release_cl_tokens(task, cl);
@@ -1888,14 +1896,14 @@ static void process_cmd_thread_resource(int ci_in, struct sm_header *h_recv)
return;
fail:
+ client_recv_all(ci_in, h_recv, 0);
+
memcpy(&h, h_recv, sizeof(struct sm_header));
h.length = sizeof(h);
h.data = result;
h.data2 = 0;
send(client[ci_in].fd, &h, sizeof(h), MSG_NOSIGNAL);
- client_recv_all(ci_in, h_recv, 0);
-
client_resume(ci_in);
if (ca)
free(ca);
diff --git a/tests/sanlk_load.c b/tests/sanlk_load.c
index 27d2221..6c55ffb 100644
--- a/tests/sanlk_load.c
+++ b/tests/sanlk_load.c
@@ -15,6 +15,7 @@
#include <limits.h>
#include <time.h>
#include <signal.h>
+#include <syslog.h>
#include "sanlock.h"
#include "sanlock_admin.h"
@@ -25,12 +26,15 @@
#define LEASE_SIZE ONEMB
#define MAX_LS_COUNT 64
-#define MAX_RES_COUNT 64
+#define MAX_RES_COUNT 512
+#define MAX_PID_COUNT 256
#define DEFAULT_LS_COUNT 4
#define DEFAULT_RES_COUNT 4
#define DEFAULT_PID_COUNT 4
#define MAX_RV 300
+int debug = 0;
+char error_buf[4096];
char lock_disk_base[PATH_MAX];
int lock_state[MAX_LS_COUNT][MAX_RES_COUNT];
int ls_count = DEFAULT_LS_COUNT;
@@ -39,17 +43,19 @@ int pid_count = DEFAULT_PID_COUNT;
int our_hostid;
int acquire_rv[MAX_RV];
int release_rv[MAX_RV];
-int debug = 0;
#define log_debug(fmt, args...) \
do { \
- if (debug) printf("%llu " fmt "\n", (unsigned long long)time(NULL), ##args); \
+ if (debug) printf("%lu " fmt "\n", time(NULL), ##args); \
} while (0)
#define log_error(fmt, args...) \
do { \
- printf("ERROR %llu " fmt "\n", (unsigned long long)time(NULL), ##args); \
+ memset(error_buf, 0, sizeof(error_buf)); \
+ snprintf(error_buf, 4095, "%ld " fmt "\n", time(NULL), ##args); \
+ printf("ERROR: %s\n", error_buf); \
+ syslog(LOG_ERR, "%s", error_buf); \
} while (0)
@@ -58,7 +64,7 @@ static int get_rand(int a, int b)
return a + (int) (((float)(b - a + 1)) * random() / (RAND_MAX+1.0));
}
-static void save_rv(int rv, int acquire)
+static void save_rv(int pid, int rv, int acquire)
{
if (rv > 0)
goto fail;
@@ -79,16 +85,18 @@ static void save_rv(int rv, int acquire)
return;
fail:
- log_error("save_rv %d %d", rv, acquire);
- while (1)
+ log_error("%d save_rv %d %d", pid, rv, acquire);
+ while (1) {
sleep(10);
+ printf("%lu %d ERROR save_rv %d %d", time(NULL), pid, rv, acquire);
+ }
}
static void display_rv(int pid)
{
int i;
- printf("%llu %d results acquire ", (unsigned long long)time(NULL), pid);
+ printf("%lu %d results acquire ", time(NULL), pid);
for (i = 0; i < MAX_RV; i++) {
if (acquire_rv[i])
printf("%d:%d ", i, acquire_rv[i]);
@@ -118,15 +126,36 @@ static void dump_lock_state(int pid)
static int check_lock_state(int pid, int result, int count, char *res_state)
{
char buf[128];
- char *found;
+ char *found = NULL;
int found_count = 0;
int none_count = 0;
int bad_count = 0;
int i, j;
+ memset(buf, 0, sizeof(buf));
+
if (result < 0)
goto fail;
+ if (!count) {
+ if (res_state) {
+ log_error("%d check_lock_state zero count res_state %s",
+ pid, res_state);
+ }
+ for (i = 0; i < ls_count; i++) {
+ for (j = 0; j < res_count; j++) {
+ if (lock_state[i][j]) {
+ bad_count++;
+ log_error("%d check_lock_state zero count %d %d lock", pid, i, j);
+ }
+ }
+ }
+
+ if (bad_count)
+ goto fail;
+ return 0;
+ }
+
for (i = 0; i < ls_count; i++) {
for (j = 0; j < res_count; j++) {
memset(buf, 0, sizeof(buf));
@@ -140,7 +169,6 @@ static int check_lock_state(int pid, int result, int count, char *res_state)
none_count++;
} else {
bad_count++;
-
log_error("%d check_lock_state %s lock_state %d res_state %s",
pid, buf, lock_state[i][j], res_state);
}
@@ -163,8 +191,11 @@ static int check_lock_state(int pid, int result, int count, char *res_state)
dump_lock_state(pid);
- while (1)
+ while (1) {
sleep(10);
+ printf("%lu %d ERROR check_lock_state result %d count %d found %d bad %d res_state %s",
+ time(NULL), pid, result, count, found_count, bad_count, res_state);
+ }
}
static int add_lockspaces(void)
@@ -172,6 +203,8 @@ static int add_lockspaces(void)
struct sanlk_lockspace ls;
int i, rv;
+ printf("adding %d lockspaces...\n", ls_count);
+
for (i = 0; i < ls_count; i++) {
memset(&ls, 0, sizeof(ls));
sprintf(ls.host_id_disk.path, "%s%d", lock_disk_base, i);
@@ -185,9 +218,9 @@ static int add_lockspaces(void)
return -1;
}
- log_debug("add lockspace %s:%llu:%s:%d",
- ls.name, (unsigned long long)ls.host_id,
- ls.host_id_disk.path, 0);
+ printf("add lockspace %s:%llu:%s:%d\n",
+ ls.name, (unsigned long long)ls.host_id,
+ ls.host_id_disk.path, 0);
}
return 0;
@@ -220,7 +253,7 @@ static int do_one(int pid, int fd, int ls1, int res1, int *full, int acquire)
log_debug("%d %s %d,%d = %d",
pid, acquire ? "acquire" : "release", ls1, res1, rv);
- save_rv(rv, acquire);
+ save_rv(pid, rv, acquire);
return rv;
}
@@ -269,7 +302,7 @@ static int do_two(int pid, int fd, int ls1, int res1, int ls2, int res2, int *fu
log_debug("%d %s %d,%d %d,%d = %d",
pid, acquire ? "acquire" : "release", ls1, res1, ls2, res2, rv);
- save_rv(rv, acquire);
+ save_rv(pid, rv, acquire);
free(res_args);
return rv;
@@ -303,7 +336,7 @@ static int release_all(int pid, int fd)
log_debug("%d release all = %d", pid, rv);
- save_rv(rv, 0);
+ save_rv(pid, rv, 0);
return rv;
}
@@ -456,12 +489,25 @@ void get_options(int argc, char *argv[])
break;
case 's':
ls_count = atoi(optionarg);
+ if (ls_count > MAX_LS_COUNT) {
+ log_error("max ls_count %d", MAX_LS_COUNT);
+ exit(-1);
+ }
break;
case 'r':
res_count = atoi(optionarg);
+ if (res_count > MAX_RES_COUNT) {
+ log_error("max res_count %d", MAX_RES_COUNT);
+ exit(-1);
+ }
break;
case 'p':
pid_count = atoi(optionarg);
+ if (pid_count > MAX_PID_COUNT) {
+ log_error("max pid_count %d", MAX_PID_COUNT);
+ exit(-1);
+ }
+ break;
default:
log_error("unknown option: %c", optchar);
exit(EXIT_FAILURE);
@@ -473,7 +519,7 @@ void get_options(int argc, char *argv[])
int do_rand(int argc, char *argv[])
{
- int children[pid_count];
+ int children[MAX_PID_COUNT];
int run_count = 0;
int i, rv, pid, status;
@@ -488,9 +534,16 @@ int do_rand(int argc, char *argv[])
if (rv < 0)
return rv;
+ printf("forking %d pids...\n", pid_count);
+
for (i = 0; i < pid_count; i++) {
pid = fork();
+ if (pid < 0) {
+ log_error("fork %d failed %d run_count %d", i, errno, run_count);
+ break;
+ }
+
if (!pid) {
do_rand_child();
exit(-1);
@@ -500,6 +553,8 @@ int do_rand(int argc, char *argv[])
run_count++;
}
+ printf("children running\n");
+
while (run_count) {
pid = wait(&status);
if (pid > 0)
11 years, 9 months
src/token_manager.c tests/Makefile tests/sanlk_load.c
by David Teigland
src/token_manager.c | 3
tests/Makefile | 9
tests/sanlk_load.c | 609 ++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 618 insertions(+), 3 deletions(-)
New commits:
commit fd36a8f91b0eb40dac1f3d93f26e9115136438de
Author: David Teigland <teigland(a)redhat.com>
Date: Fri Jun 17 16:34:23 2011 -0500
sanlk_load: add new test
diff --git a/src/token_manager.c b/src/token_manager.c
index c0fc485..0c85197 100644
--- a/src/token_manager.c
+++ b/src/token_manager.c
@@ -68,7 +68,8 @@ int add_resource(struct token *token, int pid)
r = find_resource(token, &resources);
if (r) {
- log_errot(token, "add_resource name exists");
+ if (!com.quiet_fail)
+ log_errot(token, "add_resource name exists");
rv = -EEXIST;
goto out;
}
diff --git a/tests/Makefile b/tests/Makefile
index 4041a8e..77b60c2 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,6 +1,8 @@
TARGET = devcount
+TARGET2 = sanlk_load
SOURCE = devcount.c
+SOURCE2 = sanlk_load.c
CFLAGS += -D_GNU_SOURCE -g \
-Wall \
@@ -23,11 +25,14 @@ CFLAGS += -D_GNU_SOURCE -g \
LDFLAGS = -lrt -laio -lblkid -lsanlock
-all: $(TARGET)
+all: $(TARGET) $(TARGET2)
$(TARGET): $(SOURCE)
$(CC) $(CFLAGS) $(LDFLAGS) $(SOURCE) -o $@ -L. -L../src
+$(TARGET2): $(SOURCE2)
+ $(CC) $(CFLAGS) $(LDFLAGS) $(SOURCE2) -o $@ -L. -L../src
+
clean:
- rm -f *.o *.so *.so.* $(TARGET)
+ rm -f *.o *.so *.so.* $(TARGET) $(TARGET2)
diff --git a/tests/sanlk_load.c b/tests/sanlk_load.c
new file mode 100644
index 0000000..27d2221
--- /dev/null
+++ b/tests/sanlk_load.c
@@ -0,0 +1,609 @@
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/un.h>
+#include <sys/mount.h>
+#include <sys/signalfd.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <time.h>
+#include <signal.h>
+
+#include "sanlock.h"
+#include "sanlock_admin.h"
+#include "sanlock_resource.h"
+#include "sanlock_direct.h"
+
+#define ONEMB 1048576
+#define LEASE_SIZE ONEMB
+
+#define MAX_LS_COUNT 64
+#define MAX_RES_COUNT 64
+#define DEFAULT_LS_COUNT 4
+#define DEFAULT_RES_COUNT 4
+#define DEFAULT_PID_COUNT 4
+#define MAX_RV 300
+
+char lock_disk_base[PATH_MAX];
+int lock_state[MAX_LS_COUNT][MAX_RES_COUNT];
+int ls_count = DEFAULT_LS_COUNT;
+int res_count = DEFAULT_RES_COUNT;
+int pid_count = DEFAULT_PID_COUNT;
+int our_hostid;
+int acquire_rv[MAX_RV];
+int release_rv[MAX_RV];
+int debug = 0;
+
+
+#define log_debug(fmt, args...) \
+do { \
+ if (debug) printf("%llu " fmt "\n", (unsigned long long)time(NULL), ##args); \
+} while (0)
+
+#define log_error(fmt, args...) \
+do { \
+ printf("ERROR %llu " fmt "\n", (unsigned long long)time(NULL), ##args); \
+} while (0)
+
+
+static int get_rand(int a, int b)
+{
+ return a + (int) (((float)(b - a + 1)) * random() / (RAND_MAX+1.0));
+}
+
+static void save_rv(int rv, int acquire)
+{
+ if (rv > 0)
+ goto fail;
+ if (-rv > MAX_RV)
+ goto fail;
+
+ if (acquire) {
+ if (!rv)
+ acquire_rv[0]++;
+ else
+ acquire_rv[-rv]++;
+ } else {
+ if (!rv)
+ release_rv[0]++;
+ else
+ release_rv[-rv]++;
+ }
+ return;
+
+ fail:
+ log_error("save_rv %d %d", rv, acquire);
+ while (1)
+ sleep(10);
+}
+
+static void display_rv(int pid)
+{
+ int i;
+
+ printf("%llu %d results acquire ", (unsigned long long)time(NULL), pid);
+ for (i = 0; i < MAX_RV; i++) {
+ if (acquire_rv[i])
+ printf("%d:%d ", i, acquire_rv[i]);
+ }
+
+ printf("release ");
+ for (i = 0; i < MAX_RV; i++) {
+ if (release_rv[i])
+ printf("%d:%d ", i, release_rv[i]);
+ }
+ printf("\n");
+}
+
+static void dump_lock_state(int pid)
+{
+ int i, j;
+
+ for (i = 0; i < ls_count; i++) {
+ for (j = 0; j < res_count; j++) {
+ if (!lock_state[i][j])
+ continue;
+ log_error("%d lockspace%d:resource%d", pid, i, j);
+ }
+ }
+}
+
+static int check_lock_state(int pid, int result, int count, char *res_state)
+{
+ char buf[128];
+ char *found;
+ int found_count = 0;
+ int none_count = 0;
+ int bad_count = 0;
+ int i, j;
+
+ if (result < 0)
+ goto fail;
+
+ for (i = 0; i < ls_count; i++) {
+ for (j = 0; j < res_count; j++) {
+ memset(buf, 0, sizeof(buf));
+ sprintf(buf, "lockspace%d:resource%d:", i, j);
+
+ found = strstr(res_state, buf);
+
+ if (found && lock_state[i][j]) {
+ found_count++;
+ } else if (!found && !lock_state[i][j]) {
+ none_count++;
+ } else {
+ bad_count++;
+
+ log_error("%d check_lock_state %s lock_state %d res_state %s",
+ pid, buf, lock_state[i][j], res_state);
+ }
+ }
+ }
+
+ if ((found_count != count) || bad_count)
+ goto fail;
+
+ if (res_state)
+ free(res_state);
+ return 0;
+
+ fail:
+ log_error("%d check_lock_state result %d count %d res_state %s",
+ pid, result, count, res_state);
+
+ log_error("%d check_lock_state found %d none %d bad %d",
+ pid, found_count, none_count, bad_count);
+
+ dump_lock_state(pid);
+
+ while (1)
+ sleep(10);
+}
+
+static int add_lockspaces(void)
+{
+ struct sanlk_lockspace ls;
+ int i, rv;
+
+ for (i = 0; i < ls_count; i++) {
+ memset(&ls, 0, sizeof(ls));
+ sprintf(ls.host_id_disk.path, "%s%d", lock_disk_base, i);
+ sprintf(ls.name, "lockspace%d", i);
+ ls.host_id = our_hostid;
+
+ rv = sanlock_add_lockspace(&ls, 0);
+ if (rv < 0) {
+ log_error("sanlock_add_lockspace error %d %s", rv,
+ ls.host_id_disk.path);
+ return -1;
+ }
+
+ log_debug("add lockspace %s:%llu:%s:%d",
+ ls.name, (unsigned long long)ls.host_id,
+ ls.host_id_disk.path, 0);
+ }
+
+ return 0;
+}
+
+static int do_one(int pid, int fd, int ls1, int res1, int *full, int acquire)
+{
+ char buf1[sizeof(struct sanlk_resource) + sizeof(struct sanlk_disk)];
+ struct sanlk_resource *r1;
+ int rv;
+
+ memset(buf1, 0, sizeof(buf1));
+ r1 = (struct sanlk_resource *)&buf1;
+
+ sprintf(r1->lockspace_name, "lockspace%d", ls1);
+ sprintf(r1->name, "resource%d", res1);
+ sprintf(r1->disks[0].path, "%s%d", lock_disk_base, ls1);
+ r1->disks[0].offset = (res1+1)*LEASE_SIZE;
+ r1->num_disks = 1;
+
+ if (acquire) {
+ rv = sanlock_acquire(fd, -1, 0, 1, &r1, NULL);
+
+ if (rv == -E2BIG || rv == -ENOENT)
+ *full = 1;
+ } else {
+ rv = sanlock_release(fd, -1, 0, 1, &r1);
+ }
+
+ log_debug("%d %s %d,%d = %d",
+ pid, acquire ? "acquire" : "release", ls1, res1, rv);
+
+ save_rv(rv, acquire);
+
+ return rv;
+}
+
+static int do_two(int pid, int fd, int ls1, int res1, int ls2, int res2, int *full, int acquire)
+{
+ char buf1[sizeof(struct sanlk_resource) + sizeof(struct sanlk_disk)];
+ char buf2[sizeof(struct sanlk_resource) + sizeof(struct sanlk_disk)];
+ struct sanlk_resource *r1;
+ struct sanlk_resource *r2;
+ struct sanlk_resource **res_args;
+ int rv;
+
+ res_args = malloc(2 * sizeof(struct sanlk_resource *));
+ if (!res_args)
+ return -ENOMEM;
+
+ memset(buf1, 0, sizeof(buf1));
+ memset(buf2, 0, sizeof(buf2));
+ r1 = (struct sanlk_resource *)&buf1;
+ r2 = (struct sanlk_resource *)&buf2;
+ res_args[0] = r1;
+ res_args[1] = r2;
+
+ sprintf(r1->lockspace_name, "lockspace%d", ls1);
+ sprintf(r1->name, "resource%d", res1);
+ sprintf(r1->disks[0].path, "%s%d", lock_disk_base, ls1);
+ r1->disks[0].offset = (res1+1)*LEASE_SIZE;
+ r1->num_disks = 1;
+
+ sprintf(r2->lockspace_name, "lockspace%d", ls2);
+ sprintf(r2->name, "resource%d", res2);
+ sprintf(r2->disks[0].path, "%s%d", lock_disk_base, ls2);
+ r2->disks[0].offset = (res2+1)*LEASE_SIZE;
+ r2->num_disks = 1;
+
+ if (acquire) {
+ rv = sanlock_acquire(fd, -1, 0, 2, res_args, NULL);
+
+ if (rv == -E2BIG || rv == -ENOENT)
+ *full = 1;
+ } else {
+ rv = sanlock_release(fd, -1, 0, 2, res_args);
+ }
+
+ log_debug("%d %s %d,%d %d,%d = %d",
+ pid, acquire ? "acquire" : "release", ls1, res1, ls2, res2, rv);
+
+ save_rv(rv, acquire);
+
+ free(res_args);
+ return rv;
+}
+
+static int acquire_one(int pid, int fd, int ls1, int res1, int *full)
+{
+ return do_one(pid, fd, ls1, res1, full, 1);
+}
+
+static int acquire_two(int pid, int fd, int ls1, int res1, int ls2, int res2, int *full)
+{
+ return do_two(pid, fd, ls1, res1, ls2, res2, full, 1);
+}
+
+static int release_one(int pid, int fd, int ls1, int res1)
+{
+ return do_one(pid, fd, ls1, res1, NULL, 0);
+}
+
+static int release_two(int pid, int fd, int ls1, int res1, int ls2, int res2)
+{
+ return do_two(pid, fd, ls1, res1, ls2, res2, NULL, 0);
+}
+
+static int release_all(int pid, int fd)
+{
+ int rv;
+
+ rv = sanlock_release(fd, -1, SANLK_REL_ALL, 0, NULL);
+
+ log_debug("%d release all = %d", pid, rv);
+
+ save_rv(rv, 0);
+
+ return rv;
+}
+
+static void inquire_all(int pid, int fd)
+{
+ int rv, count = 0;
+ char *state = NULL;
+
+ rv = sanlock_inquire(fd, -1, 0, &count, &state);
+
+ log_debug("%d inquire all = %d", pid, rv);
+
+ check_lock_state(pid, rv, count, state);
+}
+
+int do_rand_child(void)
+{
+ int ls1, ls2, res1, res2, state1, state2, full;
+ int fd, rv;
+ int iter = 1;
+ int pid = getpid();
+
+ srandom(pid);
+
+ memset(lock_state, 0, sizeof(lock_state));
+
+ fd = sanlock_register();
+ if (fd < 0) {
+ log_error("%d sanlock_register error %d", pid, fd);
+ exit(-1);
+ }
+
+ while (1) {
+ ls1 = get_rand(0, ls_count-1);
+ res1 = get_rand(0, res_count-1);
+ state1 = lock_state[ls1][res1];
+
+ ls2 = -1;
+ res2 = -1;
+ state2 = -1;
+
+ if (get_rand(1, 3) == 2) {
+ ls2 = get_rand(0, ls_count-1);
+ res2 = get_rand(0, res_count-1);
+ state2 = lock_state[ls2][res2];
+
+ if (ls1 == ls2 && res1 == res2) {
+ ls2 = -1;
+ res2 = -1;
+ state2 = -1;
+ }
+ }
+
+ full = 0;
+
+ if (state1 == 0 && state2 == 0) {
+ /* both picks are unlocked, lock both together */
+
+ rv = acquire_two(pid, fd, ls1, res1, ls2, res2, &full);
+ if (!rv) {
+ lock_state[ls1][res1] = 1;
+ lock_state[ls2][res2] = 1;
+ }
+ state1 = -1;
+ state2 = -1;
+ }
+ if (state1 == 1 && state2 == 1) {
+ /* both picks are locked, unlock both together */
+
+ rv = release_two(pid, fd, ls1, res1, ls2, res2);
+ if (!rv) {
+ lock_state[ls1][res1] = 0;
+ lock_state[ls2][res2] = 0;
+ }
+ state1 = -1;
+ state2 = -1;
+ }
+ if (state1 == 0) {
+ rv = acquire_one(pid, fd, ls1, res1, &full);
+ if (!rv)
+ lock_state[ls1][res1] = 1;
+ }
+ if (state2 == 0) {
+ rv = acquire_one(pid, fd, ls2, res2, &full);
+ if (!rv)
+ lock_state[ls2][res2] = 1;
+ }
+ if (state1 == 1) {
+ rv = release_one(pid, fd, ls1, res1);
+ if (!rv)
+ lock_state[ls1][res1] = 0;
+ }
+ if (state2 == 1) {
+ rv = release_one(pid, fd, ls2, res2);
+ if (!rv)
+ lock_state[ls2][res2] = 0;
+ }
+ if (full) {
+ rv = release_all(pid, fd);
+ if (!rv)
+ memset(lock_state, 0, sizeof(lock_state));
+ }
+ if ((iter % 10) == 0) {
+ display_rv(pid);
+ inquire_all(pid, fd);
+ }
+ iter++;
+ }
+}
+
+/*
+ * sanlk_load rand <lock_disk_base> -i <host_id> [-D -s <ls_count> -r <res_count> -p <pid_count>]
+ */
+
+void get_options(int argc, char *argv[])
+{
+ char optchar;
+ char *optionarg;
+ char *p;
+ int i = 3;
+
+ for (; i < argc; ) {
+ p = argv[i];
+
+ if ((p[0] != '-') || (strlen(p) != 2)) {
+ log_error("unknown option %s", p);
+ log_error("space required before option value");
+ exit(EXIT_FAILURE);
+ }
+
+ optchar = p[1];
+ i++;
+
+ if (optchar == 'D') {
+ debug = 1;
+ continue;
+ }
+
+ if (i >= argc) {
+ log_error("option '%c' requires arg", optchar);
+ exit(EXIT_FAILURE);
+ }
+
+ optionarg = argv[i];
+
+ switch (optchar) {
+ case 'i':
+ our_hostid = atoi(optionarg);
+ break;
+ case 's':
+ ls_count = atoi(optionarg);
+ break;
+ case 'r':
+ res_count = atoi(optionarg);
+ break;
+ case 'p':
+ pid_count = atoi(optionarg);
+ default:
+ log_error("unknown option: %c", optchar);
+ exit(EXIT_FAILURE);
+ }
+
+ i++;
+ }
+}
+
+int do_rand(int argc, char *argv[])
+{
+ int children[pid_count];
+ int run_count = 0;
+ int i, rv, pid, status;
+
+ if (argc < 5)
+ return -1;
+
+ strcpy(lock_disk_base, argv[2]);
+
+ get_options(argc, argv);
+
+ rv = add_lockspaces();
+ if (rv < 0)
+ return rv;
+
+ for (i = 0; i < pid_count; i++) {
+ pid = fork();
+
+ if (!pid) {
+ do_rand_child();
+ exit(-1);
+ }
+
+ children[i] = pid;
+ run_count++;
+ }
+
+ while (run_count) {
+ pid = wait(&status);
+ if (pid > 0)
+ run_count--;
+ }
+
+ /*
+ * periodically:
+ * - kill a random pid, and fork a new one to replace it
+ * - rem a random ls and add it again
+ */
+
+ return 0;
+}
+
+/*
+ * sanlk_load init <lock_disk_base> [<ls_count> <res_count>]
+ * lock_disk_base = /dev/vg/foo
+ *
+ * sanlock direct init -s lockspace0:0:/dev/vg/foo0:0
+ * sanlock direct init -r lockspace0:resource0:/dev/vg/foo0:1M
+ * sanlock direct init -r lockspace0:resource1:/dev/vg/foo0:2M
+ * ...
+ * sanlock direct init -s lockspace1:0:/dev/vg/foo1:0
+ * sanlock direct init -r lockspace1:resource0:/dev/vg/foo1:1M
+ * sanlock direct init -r lockspace1:resource1:/dev/vg/foo1:2M
+ * ...
+ */
+
+#define INIT_NUM_HOSTS 8
+
+int do_init(int argc, char *argv[])
+{
+ char resbuf[sizeof(struct sanlk_resource) + sizeof(struct sanlk_disk)];
+ struct sanlk_resource *res;
+ struct sanlk_lockspace ls;
+ int i, j, rv;
+
+ if (argc < 3)
+ return -1;
+
+ strcpy(lock_disk_base, argv[2]);
+
+ if (argc > 3)
+ ls_count = atoi(argv[3]);
+ if (argc > 4)
+ res_count = atoi(argv[4]);
+
+ for (i = 0; i < ls_count; i++) {
+
+ memset(&ls, 0, sizeof(ls));
+ sprintf(ls.host_id_disk.path, "%s%d", lock_disk_base, i);
+ sprintf(ls.name, "lockspace%d", i);
+
+ rv = sanlock_direct_init(&ls, NULL, 0, INIT_NUM_HOSTS, 1);
+ if (rv < 0) {
+ printf("sanlock_direct_init lockspace error %d %s\n", rv,
+ ls.host_id_disk.path);
+ return -1;
+ }
+
+ for (j = 0; j < res_count; j++) {
+
+ memset(resbuf, 0, sizeof(resbuf));
+ res = (struct sanlk_resource *)&resbuf;
+
+ strcpy(res->lockspace_name, ls.name);
+ sprintf(res->name, "resource%d", j);
+ res->num_disks = 1;
+ strcpy(res->disks[0].path, ls.host_id_disk.path);
+ res->disks[0].offset = (j+1)*LEASE_SIZE;
+
+ rv = sanlock_direct_init(NULL, res, 0, INIT_NUM_HOSTS, 0);
+ if (rv < 0) {
+ printf("sanlock_direct_init resource error %d\n", rv);
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int rv = -1;
+
+ if (argc < 2)
+ goto out;
+
+ if (!strcmp(argv[1], "init"))
+ rv = do_init(argc, argv);
+
+ else if (!strcmp(argv[1], "rand"))
+ rv = do_rand(argc, argv);
+
+ if (!rv)
+ return 0;
+
+ out:
+ printf("sanlk_load init <lock_disk_base> [<ls_count> <res_count>]\n");
+ printf("\n");
+ printf("sanlk_load rand\n");
+ printf("\n");
+ return -1;
+}
+
11 years, 9 months
src/host_id.c
by David Teigland
src/host_id.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
New commits:
commit 869484c207b3644b8b59b43c86d8bd44c41badc5
Author: David Teigland <teigland(a)redhat.com>
Date: Wed Jun 15 11:06:30 2011 -0500
sanlock: fix add_lockspace failure
the previous patch did not remove the sp struct
from spaces_add list
diff --git a/src/host_id.c b/src/host_id.c
index 33ffca4..8f2bfa8 100644
--- a/src/host_id.c
+++ b/src/host_id.c
@@ -411,10 +411,7 @@ int add_lockspace(struct sanlk_lockspace *ls)
rv = pthread_create(&sp->thread, NULL, lockspace_thread, sp);
if (rv < 0) {
log_erros(sp, "add_lockspace create thread failed");
- pthread_mutex_lock(&spaces_mutex);
- list_del(&sp->list);
- pthread_mutex_unlock(&spaces_mutex);
- goto fail_free;
+ goto fail_del;
}
while (1) {
@@ -430,7 +427,7 @@ int add_lockspace(struct sanlk_lockspace *ls)
/* the thread exits right away if acquire fails */
pthread_join(sp->thread, NULL);
rv = result;
- goto fail_free;
+ goto fail_del;
}
/* once we move sp to spaces list, tokens can begin using it,
@@ -440,12 +437,16 @@ int add_lockspace(struct sanlk_lockspace *ls)
if (sp->external_remove || external_shutdown) {
rv = -1;
pthread_mutex_unlock(&spaces_mutex);
- goto fail_free;
+ goto fail_del;
}
list_move(&sp->list, &spaces);
pthread_mutex_unlock(&spaces_mutex);
return 0;
+ fail_del:
+ pthread_mutex_lock(&spaces_mutex);
+ list_del(&sp->list);
+ pthread_mutex_unlock(&spaces_mutex);
fail_free:
free(sp);
return rv;
11 years, 9 months
src/delta_lease.c src/direct_lib.c src/host_id.c src/main.c src/sanlock_internal.h
by David Teigland
src/delta_lease.c | 20 +++++++++++++++-----
src/direct_lib.c | 1 +
src/host_id.c | 8 +++++++-
src/main.c | 1 -
src/sanlock_internal.h | 1 +
5 files changed, 24 insertions(+), 7 deletions(-)
New commits:
commit 27d26b9ef1b68d28e8333ebc3ebea0d489f83f99
Author: David Teigland <teigland(a)redhat.com>
Date: Wed Jun 15 10:49:23 2011 -0500
sanlock: abort delta wait on shutdown
if the daemon is shutdown or the lockspace is removed
during one of the long add_lockspace/delta_acquire delays,
quit and return an error
diff --git a/src/delta_lease.c b/src/delta_lease.c
index 6806f94..a2683ad 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -215,7 +215,7 @@ int delta_lease_acquire(struct task *task,
struct leader_record leader;
struct leader_record leader1;
uint64_t new_ts;
- int error, delay, delta_large_delay;
+ int i, error, delay, delta_large_delay;
log_space(sp, "delta_acquire %llu begin", (unsigned long long)host_id);
@@ -253,12 +253,17 @@ int delta_lease_acquire(struct task *task,
while (1) {
memcpy(&leader1, &leader, sizeof(struct leader_record));
+ log_space(sp, "delta_acquire %llu delta_large_delay %d delay %d",
+ (unsigned long long)host_id, delta_large_delay, delay);
+
/* TODO: we could reread every several seconds to see if
it has changed, so we can abort more quickly if so */
- log_space(sp, "delta_acquire %llu delta_large_delay %d delay %d",
- (unsigned long long)host_id, delta_large_delay, delay);
- sleep(delay);
+ for (i = 0; i < delay; i++) {
+ if (sp->external_remove || external_shutdown)
+ return SANLK_ERROR;
+ sleep(1);
+ }
error = delta_lease_leader_read(task, disk, space_name, host_id,
&leader, "delta_acquire_wait");
@@ -305,7 +310,12 @@ int delta_lease_acquire(struct task *task,
delay = 2 * task->io_timeout_seconds;
log_space(sp, "delta_acquire %llu delta_short_delay %d",
(unsigned long long)host_id, delay);
- sleep(delay);
+
+ for (i = 0; i < delay; i++) {
+ if (sp->external_remove || external_shutdown)
+ return SANLK_ERROR;
+ sleep(1);
+ }
error = delta_lease_leader_read(task, disk, space_name, host_id, &leader,
"delta_acquire_check");
diff --git a/src/direct_lib.c b/src/direct_lib.c
index e31e514..f0a3dcb 100644
--- a/src/direct_lib.c
+++ b/src/direct_lib.c
@@ -14,6 +14,7 @@
#include <stddef.h>
#include <errno.h>
+#define EXTERN
#include "sanlock_internal.h"
#include "sanlock_direct.h"
#include "diskio.h"
diff --git a/src/host_id.c b/src/host_id.c
index 3586d34..33ffca4 100644
--- a/src/host_id.c
+++ b/src/host_id.c
@@ -437,6 +437,11 @@ int add_lockspace(struct sanlk_lockspace *ls)
and the main loop will begin monitoring its renewals */
pthread_mutex_lock(&spaces_mutex);
+ if (sp->external_remove || external_shutdown) {
+ rv = -1;
+ pthread_mutex_unlock(&spaces_mutex);
+ goto fail_free;
+ }
list_move(&sp->list, &spaces);
pthread_mutex_unlock(&spaces_mutex);
return 0;
@@ -465,8 +470,9 @@ int rem_lockspace(struct sanlk_lockspace *ls)
sp = _search_space(ls->name, (struct sync_disk *)&ls->host_id_disk, ls->host_id,
&spaces_add, NULL, NULL);
if (sp) {
+ sp->external_remove = 1;
pthread_mutex_unlock(&spaces_mutex);
- rv = -EAGAIN;
+ rv = 0;
goto out;
}
diff --git a/src/main.c b/src/main.c
index e75e320..fec794e 100644
--- a/src/main.c
+++ b/src/main.c
@@ -79,7 +79,6 @@ static struct pollfd *pollfd = NULL;
static char command[COMMAND_MAX];
static int cmd_argc;
static char **cmd_argv;
-static int external_shutdown;
static unsigned int token_id_counter = 1;
struct cmd_args {
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 560bbda..c38ebf8 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -514,6 +514,7 @@ enum {
ACT_READ_LEADER,
};
+EXTERN int external_shutdown;
EXTERN char our_host_name_global[SANLK_NAME_LEN+1];
#endif
11 years, 9 months
src/direct_lib.c src/sanlock_direct.h tests/devcount.c
by David Teigland
src/direct_lib.c | 18 ------------------
src/sanlock_direct.h | 6 ------
tests/devcount.c | 14 +-------------
3 files changed, 1 insertion(+), 37 deletions(-)
New commits:
commit f72071848ad54e9450e78bde9e5ea935b19d85f7
Author: David Teigland <teigland(a)redhat.com>
Date: Tue Jun 14 16:17:00 2011 -0500
sanlock: remove sector_size api
I can't think of a case where this should really be used.
The align api should be used to determine proper offsets.
diff --git a/src/direct_lib.c b/src/direct_lib.c
index fa26736..e31e514 100644
--- a/src/direct_lib.c
+++ b/src/direct_lib.c
@@ -111,24 +111,6 @@ int sanlock_direct_init(struct sanlk_lockspace *ls,
return rv;
}
-int sanlock_direct_sector_size(struct sanlk_disk *disk_in)
-{
- struct sync_disk disk;
- int rv;
-
- memset(&disk, 0, sizeof(disk));
-
- memcpy(disk.path, disk_in->path, SANLK_PATH_LEN);
-
- rv = open_disk(&disk);
- if (rv < 0)
- return rv;
-
- close(disk.fd);
-
- return disk.sector_size;
-}
-
int sanlock_direct_align(struct sanlk_disk *disk_in)
{
struct sync_disk disk;
diff --git a/src/sanlock_direct.h b/src/sanlock_direct.h
index d0976f6..83f1522 100644
--- a/src/sanlock_direct.h
+++ b/src/sanlock_direct.h
@@ -39,12 +39,6 @@ int sanlock_direct_init(struct sanlk_lockspace *ls,
int max_hosts, int num_hosts, int use_aio);
/*
- * Returns sector size in bytes, -1 on error
- */
-
-int sanlock_direct_sector_size(struct sanlk_disk *disk);
-
-/*
* Returns the alignment in bytes required by sanlock_direct_init()
* (1MB for disks with 512 sectors, 8MB for disks with 4096 sectors)
*/
diff --git a/tests/devcount.c b/tests/devcount.c
index ee9a9dd..67ce943 100644
--- a/tests/devcount.c
+++ b/tests/devcount.c
@@ -1358,7 +1358,7 @@ int do_init(int argc, char *argv[])
struct sanlk_resource *res;
struct sanlk_lockspace ls;
char command[4096];
- int rv, ss, align_size;
+ int rv, align_size;
if (argc < 4)
return -1;
@@ -1397,13 +1397,6 @@ int do_init(int argc, char *argv[])
memset(&disk, 0, sizeof(disk));
strcpy(disk.path, argv[2]);
- ss = sanlock_direct_sector_size(&disk);
- if (ss < 0) {
- printf("sanlock_direct_sector_size %s error %d\n",
- disk.path, ss);
- return -1;
- }
-
align_size = sanlock_direct_align(&disk);
if (align_size != LEASE_SIZE) {
printf("sanlock_direct align %s error %d\n",
@@ -1411,11 +1404,6 @@ int do_init(int argc, char *argv[])
return -1;
}
- if (ss != 512) {
- printf("unsupported sector size %d\n", ss);
- return -1;
- }
-
memset(&ls, 0, sizeof(ls));
strcpy(ls.name, "devcount");
strcpy(ls.host_id_disk.path, argv[2]);
11 years, 9 months
src/delta_lease.c src/delta_lease.h src/direct.c src/direct.h src/direct_lib.c src/host_id.c src/host_id.h src/main.c src/paxos_lease.c src/sanlock_internal.h src/sanlock_rv.h
by David Teigland
src/delta_lease.c | 120 +++++++++++++++++++++++++++++--------------------
src/delta_lease.h | 38 ++++++---------
src/direct.c | 36 ++++++++------
src/direct.h | 3 -
src/direct_lib.c | 7 ++
src/host_id.c | 67 ++++++++++++++++++++++-----
src/host_id.h | 1
src/main.c | 8 ++-
src/paxos_lease.c | 11 +---
src/sanlock_internal.h | 3 +
src/sanlock_rv.h | 3 -
11 files changed, 192 insertions(+), 105 deletions(-)
New commits:
commit 00f59855385474053dca7cc59bebd4c7c343aa94
Author: David Teigland <teigland(a)redhat.com>
Date: Fri Jun 10 17:31:32 2011 -0500
sanlock: use unique host name in delta leases
Also return an error for add_lockspace if we fail to
acquire the delta_lease (instead of retrying forever).
diff --git a/src/delta_lease.c b/src/delta_lease.c
index b5624c1..6806f94 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -76,7 +76,6 @@ static int verify_leader(struct sync_disk *disk,
const char *caller)
{
struct leader_record leader_rr;
- char resource_name[NAME_ID_SIZE];
uint32_t sum;
int result, rv;
@@ -112,18 +111,6 @@ static int verify_leader(struct sync_disk *disk,
goto fail;
}
- memset(resource_name, 0, NAME_ID_SIZE);
- snprintf(resource_name, NAME_ID_SIZE, "host_id_%llu",
- (unsigned long long)host_id);
-
- if (strncmp(lr->resource_name, resource_name, NAME_ID_SIZE)) {
- log_error("verify_leader %llu wrong resource name %.48s %.48s %s",
- (unsigned long long)host_id,
- lr->resource_name, resource_name, disk->path);
- result = SANLK_LEADER_RESOURCE;
- goto fail;
- }
-
sum = leader_checksum(lr);
if (lr->checksum != sum) {
@@ -202,19 +189,26 @@ static int delta_lease_leader_reap(struct task *task,
return error;
}
-/* TODO: do we need to set the watchdog to expire in host_dead_seconds just
- * before we do the write here? The algorithm depends on io timeouts to
- * protect against this write happening at a latest possible time, but since
- * our ios don't ever really timeout reliably, we need to timeout in
- * host_dead_seconds.
- * And can we touch the watchdog immediately after the write, or do we
- * need to wait for the read to complete also? */
+/*
+ * delta_lease_acquire:
+ * set the owner of host_id to our_host_name.
+ *
+ * paxos_lease_acquire:
+ * set the owner of resource_name to host_id.
+ *
+ * our_host_name is a unique host identifier used to detect when two different
+ * hosts are trying to acquire the same host_id (since both will be using the
+ * same host_id, that host_id won't work to distinguish between them.) We copy
+ * our_host_name into leader.resource_name, so in a sense the owner_id and
+ * resource_name fields of the leader_record switch functions: the common
+ * resource is the ower_id, and the distinguishing id is the resource_name.
+ */
int delta_lease_acquire(struct task *task,
struct space *sp,
struct sync_disk *disk,
char *space_name,
- uint64_t our_host_id,
+ char *our_host_name,
uint64_t host_id,
struct leader_record *leader_ret)
{
@@ -230,7 +224,6 @@ int delta_lease_acquire(struct task *task,
if (error < 0)
return error;
- retry:
if (leader.timestamp == LEASE_FREE)
goto write_new;
@@ -260,8 +253,11 @@ int delta_lease_acquire(struct task *task,
while (1) {
memcpy(&leader1, &leader, sizeof(struct leader_record));
- log_space(sp, "delta_acquire delta_large_delay %d delay %d",
- delta_large_delay, delay);
+ /* TODO: we could reread every several seconds to see if
+ it has changed, so we can abort more quickly if so */
+
+ log_space(sp, "delta_acquire %llu delta_large_delay %d delay %d",
+ (unsigned long long)host_id, delta_large_delay, delay);
sleep(delay);
error = delta_lease_leader_read(task, disk, space_name, host_id,
@@ -275,25 +271,40 @@ int delta_lease_acquire(struct task *task,
if (leader.timestamp == LEASE_FREE)
break;
- /* TODO: fail and return an error? */
+ log_erros(sp, "delta_acquire %llu busy %llu %llu %llu %.48s",
+ (unsigned long long)host_id,
+ (unsigned long long)leader.owner_id,
+ (unsigned long long)leader.owner_generation,
+ (unsigned long long)leader.timestamp,
+ leader.resource_name);
+ return SANLK_HOSTID_BUSY;
}
write_new:
new_ts = time(NULL);
leader.timestamp = new_ts;
- leader.owner_id = our_host_id;
+ leader.owner_id = host_id;
leader.owner_generation++;
+ snprintf(leader.resource_name, NAME_ID_SIZE, "%s", our_host_name);
leader.checksum = leader_checksum(&leader);
- log_space(sp, "delta_acquire write new %llu", (unsigned long long)new_ts);
+ log_space(sp, "delta_acquire %llu write %llu %llu %llu %.48s",
+ (unsigned long long)host_id,
+ (unsigned long long)leader.owner_id,
+ (unsigned long long)leader.owner_generation,
+ (unsigned long long)leader.timestamp,
+ leader.resource_name);
error = write_sector(disk, host_id - 1, (char *)&leader, sizeof(struct leader_record),
task, "delta_leader");
if (error < 0)
return error;
+ memcpy(&leader1, &leader, sizeof(struct leader_record));
+
delay = 2 * task->io_timeout_seconds;
- log_space(sp, "delta_acquire delta_short_delay %d", delay);
+ log_space(sp, "delta_acquire %llu delta_short_delay %d",
+ (unsigned long long)host_id, delay);
sleep(delay);
error = delta_lease_leader_read(task, disk, space_name, host_id, &leader,
@@ -301,32 +312,39 @@ int delta_lease_acquire(struct task *task,
if (error < 0)
return error;
- if ((leader.timestamp != new_ts) || (leader.owner_id != our_host_id))
- goto retry;
+ if (memcmp(&leader1, &leader, sizeof(struct leader_record))) {
+ log_erros(sp, "delta_acquire %llu busy %llu %llu %llu %.48s",
+ (unsigned long long)host_id,
+ (unsigned long long)leader.owner_id,
+ (unsigned long long)leader.owner_generation,
+ (unsigned long long)leader.timestamp,
+ leader.resource_name);
+ return SANLK_HOSTID_BUSY;
+ }
memcpy(leader_ret, &leader, sizeof(struct leader_record));
return SANLK_OK;
}
-/* our_host_id and host_id will always be the same, i.e. we
- only ever try to acquire/renew our own host_id */
-
int delta_lease_renew(struct task *task,
struct space *sp,
struct sync_disk *disk,
char *space_name,
- uint64_t our_host_id,
- uint64_t our_host_id_generation,
- uint64_t host_id,
int prev_result,
struct leader_record *leader_last,
struct leader_record *leader_ret)
{
struct leader_record leader;
+ uint64_t host_id;
uint64_t new_ts;
int io_timeout_save;
int error;
+ if (!leader_last)
+ return -EINVAL;
+
+ host_id = leader_last->owner_id;
+
/* if the previous renew timed out in this initial read, and that read
is now complete, we can use that result here instead of discarding
it and doing another. */
@@ -335,7 +353,8 @@ int delta_lease_renew(struct task *task,
error = delta_lease_leader_reap(task, disk, space_name, host_id,
&leader, "delta_renew_reap");
- log_space(sp, "delta_renew reap %d", error);
+ log_space(sp, "delta_renew %llu reap %d",
+ (unsigned long long)host_id, error);
if (error == SANLK_OK) {
task->read_timeout = NULL;
@@ -351,11 +370,14 @@ int delta_lease_renew(struct task *task,
return error;
read_done:
- if (!our_host_id_generation)
- our_host_id_generation = leader.owner_generation;
-
- if (leader.owner_id != our_host_id ||
- leader.owner_generation != our_host_id_generation) {
+ /* We can't always memcmp(&leader, leader_last) because previous writes
+ may have timed out and we don't know if they were actually written
+ or not. We can definately verify that we're still the owner,
+ though, which is the main thing we need to know. */
+
+ if (leader.owner_id != leader_last->owner_id ||
+ leader.owner_generation != leader_last->owner_generation ||
+ memcmp(leader.resource_name, leader_last->resource_name, NAME_ID_SIZE)) {
log_erros(sp, "delta_renew %llu not owner", (unsigned long long)host_id);
log_leader_error(0, space_name, host_id, disk, leader_last, "delta_renew_last");
log_leader_error(0, space_name, host_id, disk, &leader, "delta_renew_read");
@@ -380,9 +402,9 @@ int delta_lease_renew(struct task *task,
leader.checksum = leader_checksum(&leader);
/* extend io timeout for this one write; we need to give this write
- * every chance to succeed, and there's no point in letting it time
- * out. there's nothing we would do but retry it, and timing out and
- * retrying unnecessarily would probably be counter productive. */
+ every chance to succeed, and there's no point in letting it time
+ out. there's nothing we would do but retry it, and timing out and
+ retrying unnecessarily would probably be counter productive. */
io_timeout_save = task->io_timeout_seconds;
task->io_timeout_seconds = task->host_dead_seconds;
@@ -430,13 +452,18 @@ int delta_lease_release(struct task *task,
struct space *sp,
struct sync_disk *disk,
char *space_name GNUC_UNUSED,
- uint64_t host_id,
struct leader_record *leader_last,
struct leader_record *leader_ret)
{
struct leader_record leader;
+ uint64_t host_id;
int error;
+ if (!leader_last)
+ return -EINVAL;
+
+ host_id = leader_last->owner_id;
+
log_space(sp, "delta_release %llu begin", (unsigned long long)host_id);
memcpy(&leader, leader_last, sizeof(struct leader_record));
@@ -491,7 +518,6 @@ int delta_lease_init(struct task *task,
leader->max_hosts = 1;
leader->timestamp = LEASE_FREE;
strncpy(leader->space_name, space_name, NAME_ID_SIZE);
- snprintf(leader->resource_name, NAME_ID_SIZE, "host_id_%d", i+1);
leader->checksum = leader_checksum(leader);
}
diff --git a/src/delta_lease.h b/src/delta_lease.h
index b264286..9206c76 100644
--- a/src/delta_lease.h
+++ b/src/delta_lease.h
@@ -17,31 +17,27 @@ int delta_lease_leader_read(struct task *task,
const char *caller);
int delta_lease_acquire(struct task *task,
- struct space *sp,
- struct sync_disk *disk,
- char *space_name,
- uint64_t our_host_id,
- uint64_t host_id,
- struct leader_record *leader_ret);
+ struct space *sp,
+ struct sync_disk *disk,
+ char *space_name,
+ char *our_host_name,
+ uint64_t host_id,
+ struct leader_record *leader_ret);
int delta_lease_renew(struct task *task,
- struct space *sp,
- struct sync_disk *disk,
- char *space_name,
- uint64_t our_host_id,
- uint64_t our_host_id_generation,
- uint64_t host_id,
- int prev_result,
- struct leader_record *leader_last,
- struct leader_record *leader_ret);
+ struct space *sp,
+ struct sync_disk *disk,
+ char *space_name,
+ int prev_result,
+ struct leader_record *leader_last,
+ struct leader_record *leader_ret);
int delta_lease_release(struct task *task,
- struct space *sp,
- struct sync_disk *disk,
- char *space_name,
- uint64_t host_id,
- struct leader_record *leader_last,
- struct leader_record *leader_ret);
+ struct space *sp,
+ struct sync_disk *disk,
+ char *space_name GNUC_UNUSED,
+ struct leader_record *leader_last,
+ struct leader_record *leader_ret);
int delta_lease_init(struct task *task,
struct sync_disk *disk,
diff --git a/src/direct.c b/src/direct.c
index 280c5c7..50d5cbf 100644
--- a/src/direct.c
+++ b/src/direct.c
@@ -185,6 +185,7 @@ static int do_delta_action(int action,
struct task *task,
struct sanlk_lockspace *ls,
int max_hosts,
+ char *our_host_name,
struct leader_record *leader_ret)
{
struct leader_record leader;
@@ -214,16 +215,20 @@ static int do_delta_action(int action,
case ACT_ACQUIRE_ID:
rv = delta_lease_acquire(task, &space, &sd,
ls->name,
- ls->host_id,
+ our_host_name,
ls->host_id,
&leader);
break;
case ACT_RENEW_ID:
+ rv = delta_lease_leader_read(task, &sd,
+ ls->name,
+ ls->host_id,
+ &leader,
+ "direct_renew");
+ if (rv < 0)
+ return rv;
rv = delta_lease_renew(task, &space, &sd,
ls->name,
- ls->host_id,
- 0, /* local_host_generaion */
- ls->host_id,
-1,
&leader,
&leader);
@@ -238,8 +243,8 @@ static int do_delta_action(int action,
return rv;
rv = delta_lease_release(task, &space, &sd,
ls->name,
- ls->host_id,
- &leader, &leader);
+ &leader,
+ &leader);
break;
case ACT_READ_ID:
case ACT_READ_LEADER:
@@ -269,19 +274,20 @@ static int do_delta_action(int action,
* sanlock client add_lockspace|rem_lockspace -s LOCKSPACE
*/
-int direct_acquire_id(struct task *task, struct sanlk_lockspace *ls)
+int direct_acquire_id(struct task *task, struct sanlk_lockspace *ls,
+ char *our_host_name)
{
- return do_delta_action(ACT_ACQUIRE_ID, task, ls, -1, NULL);
+ return do_delta_action(ACT_ACQUIRE_ID, task, ls, -1, our_host_name, NULL);
}
int direct_release_id(struct task *task, struct sanlk_lockspace *ls)
{
- return do_delta_action(ACT_RELEASE_ID, task, ls, -1, NULL);
+ return do_delta_action(ACT_RELEASE_ID, task, ls, -1, NULL, NULL);
}
int direct_renew_id(struct task *task, struct sanlk_lockspace *ls)
{
- return do_delta_action(ACT_RENEW_ID, task, ls, -1, NULL);
+ return do_delta_action(ACT_RENEW_ID, task, ls, -1, NULL, NULL);
}
int direct_read_id(struct task *task,
@@ -295,7 +301,7 @@ int direct_read_id(struct task *task,
memset(&leader, 0, sizeof(struct leader_record));
- rv = do_delta_action(ACT_READ_ID, task, ls, -1, &leader);
+ rv = do_delta_action(ACT_READ_ID, task, ls, -1, NULL, &leader);
*timestamp = leader.timestamp;
*owner_id = leader.owner_id;
@@ -316,7 +322,7 @@ int direct_live_id(struct task *task,
time_t start;
int rv;
- rv = do_delta_action(ACT_READ_ID, task, ls, -1, &leader_begin);
+ rv = do_delta_action(ACT_READ_ID, task, ls, -1, NULL, &leader_begin);
if (rv < 0)
return rv;
@@ -325,7 +331,7 @@ int direct_live_id(struct task *task,
while (1) {
sleep(1);
- rv = do_delta_action(ACT_READ_ID, task, ls, -1, &leader);
+ rv = do_delta_action(ACT_READ_ID, task, ls, -1, NULL, &leader);
if (rv < 0)
return rv;
@@ -381,7 +387,7 @@ int direct_init(struct task *task,
int rv = -1;
if (ls && ls->host_id_disk.path[0]) {
- rv = do_delta_action(ACT_INIT, task, ls, max_hosts, NULL);
+ rv = do_delta_action(ACT_INIT, task, ls, max_hosts, NULL, NULL);
} else if (res) {
if (!num_hosts)
@@ -411,7 +417,7 @@ int direct_read_leader(struct task *task,
int rv = -1;
if (ls && ls->host_id_disk.path[0])
- rv = do_delta_action(ACT_READ_LEADER, task, ls, -1, leader_ret);
+ rv = do_delta_action(ACT_READ_LEADER, task, ls, -1, NULL, leader_ret);
else if (res)
rv = do_paxos_action(ACT_READ_LEADER, task, res,
diff --git a/src/direct.h b/src/direct.h
index 952cc00..a4af3b8 100644
--- a/src/direct.h
+++ b/src/direct.h
@@ -20,7 +20,8 @@ int direct_release(struct task *task,
struct sanlk_resource *res,
struct leader_record *leader_ret);
-int direct_acquire_id(struct task *task, struct sanlk_lockspace *ls);
+int direct_acquire_id(struct task *task, struct sanlk_lockspace *ls,
+ char *our_host_name);
int direct_release_id(struct task *task, struct sanlk_lockspace *ls);
int direct_renew_id(struct task *task, struct sanlk_lockspace *ls);
diff --git a/src/direct_lib.c b/src/direct_lib.c
index 50c75b2..fa26736 100644
--- a/src/direct_lib.c
+++ b/src/direct_lib.c
@@ -35,6 +35,13 @@ int host_id_disk_info(char *name GNUC_UNUSED, struct sync_disk *disk GNUC_UNUSED
return -1;
}
+int get_rand(int a, int b);
+
+int get_rand(int a, int b)
+{
+ return a + (int) (((float)(b - a + 1)) * random() / (RAND_MAX+1.0));
+}
+
static void setup_task_lib(struct task *task, int use_aio, int io_timeout_sec)
{
memset(task, 0, sizeof(struct task));
diff --git a/src/host_id.c b/src/host_id.c
index 3075d3f..3586d34 100644
--- a/src/host_id.c
+++ b/src/host_id.c
@@ -21,6 +21,7 @@
#include <syslog.h>
#include <sys/types.h>
#include <sys/time.h>
+#include <sys/utsname.h>
#include "sanlock_internal.h"
#include "diskio.h"
@@ -33,6 +34,10 @@
static unsigned int space_id_counter = 1;
+static struct random_data rand_data;
+static char rand_state[32];
+static pthread_mutex_t rand_mutex = PTHREAD_MUTEX_INITIALIZER;
+
struct list_head spaces;
struct list_head spaces_add;
struct list_head spaces_rem;
@@ -195,9 +200,7 @@ static void *lockspace_thread(void *arg_in)
{
struct task task;
struct space *sp;
- char space_name[NAME_ID_SIZE];
struct leader_record leader;
- uint64_t our_host_id, our_host_id_generation;
time_t last_attempt, last_success;
int rv, result, delta_length, gap;
int delta_result = 0;
@@ -205,8 +208,6 @@ static void *lockspace_thread(void *arg_in)
int stop = 0;
sp = (struct space *)arg_in;
- our_host_id = sp->host_id;
- memcpy(&space_name, sp->space_name, NAME_ID_SIZE);
setup_task_timeouts(&task, main_task.io_timeout_seconds);
setup_task_aio(&task, main_task.use_aio, HOSTID_AIO_CB_SIZE);
@@ -222,8 +223,9 @@ static void *lockspace_thread(void *arg_in)
}
opened = 1;
- result = delta_lease_acquire(&task, sp, &sp->host_id_disk, space_name,
- our_host_id, our_host_id, &leader);
+ result = delta_lease_acquire(&task, sp, &sp->host_id_disk,
+ sp->space_name, our_host_name_global,
+ sp->host_id, &leader);
delta_result = result;
delta_length = time(NULL) - last_attempt;
@@ -257,7 +259,6 @@ static void *lockspace_thread(void *arg_in)
goto out;
sp->host_generation = leader.owner_generation;
- our_host_id_generation = leader.owner_generation;
while (1) {
if (stop)
@@ -282,9 +283,8 @@ static void *lockspace_thread(void *arg_in)
last_attempt = time(NULL);
result = delta_lease_renew(&task, sp, &sp->host_id_disk,
- space_name, our_host_id,
- our_host_id_generation, our_host_id,
- delta_result, &leader, &leader);
+ sp->space_name, delta_result,
+ &leader, &leader);
delta_result = result;
delta_length = time(NULL) - last_attempt;
@@ -324,8 +324,8 @@ static void *lockspace_thread(void *arg_in)
close_watchdog_file(sp);
out:
if (delta_result == SANLK_OK)
- delta_lease_release(&task, sp, &sp->host_id_disk, space_name,
- our_host_id, &leader, &leader);
+ delta_lease_release(&task, sp, &sp->host_id_disk,
+ sp->space_name, &leader, &leader);
if (opened)
close(sp->host_id_disk.fd);
@@ -550,10 +550,53 @@ void free_lockspaces(int wait)
pthread_mutex_unlock(&spaces_mutex);
}
+/* return a random int between a and b inclusive */
+
+int get_rand(int a, int b)
+{
+ int32_t val;
+ int rv;
+
+ pthread_mutex_lock(&rand_mutex);
+ rv = random_r(&rand_data, &val);
+ pthread_mutex_unlock(&rand_mutex);
+ if (rv < 0)
+ return rv;
+
+ return a + (int) (((float)(b - a + 1)) * val / (RAND_MAX+1.0));
+}
+
void setup_spaces(void)
{
+ struct utsname name;
+ struct timeval tv;
+
INIT_LIST_HEAD(&spaces);
INIT_LIST_HEAD(&spaces_add);
INIT_LIST_HEAD(&spaces_rem);
+
+ memset(rand_state, 0, sizeof(rand_state));
+ memset(&rand_data, 0, sizeof(rand_data));
+
+ initstate_r(time(NULL), rand_state, sizeof(rand_state), &rand_data);
+
+ /* use host name from command line */
+
+ if (com.our_host_name[0]) {
+ memcpy(our_host_name_global, com.our_host_name, SANLK_NAME_LEN);
+ return;
+ }
+
+ /* make up something that's likely to be different among hosts */
+
+ memset(&our_host_name_global, 0, sizeof(our_host_name_global));
+ uname(&name);
+ gettimeofday(&tv, NULL);
+
+ snprintf(our_host_name_global, NAME_ID_SIZE, "%llu.%llu.%d.%s",
+ (unsigned long long)tv.tv_sec,
+ (unsigned long long)tv.tv_usec,
+ get_rand(1, RAND_MAX-1),
+ name.nodename);
}
diff --git a/src/host_id.h b/src/host_id.h
index 5cc8b50..703cb3f 100644
--- a/src/host_id.h
+++ b/src/host_id.h
@@ -18,5 +18,6 @@ int add_lockspace(struct sanlk_lockspace *ls);
int rem_lockspace(struct sanlk_lockspace *ls);
void free_lockspaces(int wait);
void setup_spaces(void);
+int get_rand(int a, int b);
#endif
diff --git a/src/main.c b/src/main.c
index 5ac6c54..e75e320 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1511,11 +1511,13 @@ static int print_daemon_state(char *str)
memset(str, 0, SANLK_STATE_MAXSTR);
snprintf(str, SANLK_STATE_MAXSTR-1,
+ "our_host_name=%s "
"use_aio=%d "
"io_timeout=%d "
"id_renewal=%d "
"id_renewal_fail=%d "
"id_renewal_warn=%d",
+ our_host_name_global,
main_task.use_aio,
main_task.io_timeout_seconds,
main_task.id_renewal_seconds,
@@ -2623,6 +2625,9 @@ static int read_command_line(int argc, char *argv[])
case 'p':
com.pid = atoi(optionarg);
break;
+ case 'e':
+ strncpy(com.our_host_name, optionarg, NAME_ID_SIZE);
+ break;
case 'i':
com.local_host_id = atoll(optionarg);
break;
@@ -2888,7 +2893,8 @@ static int do_direct(void)
break;
case ACT_ACQUIRE_ID:
- rv = direct_acquire_id(&main_task, &com.lockspace);
+ rv = direct_acquire_id(&main_task, &com.lockspace,
+ com.our_host_name);
log_tool("acquire_id done %d", rv);
break;
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index e842b93..82010db 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -963,13 +963,6 @@ static int paxos_lease_leader_dblock_read(struct task *task,
return rv;
}
-/* return a random int between a and b inclusive */
-
-static int get_rand(int a, int b)
-{
- return a + (int) (((float)(b - a + 1)) * random() / (RAND_MAX+1.0));
-}
-
static int write_new_leader(struct task *task,
struct token *token,
struct leader_record *nl,
@@ -1287,9 +1280,13 @@ int paxos_lease_acquire(struct task *task,
if (error == SANLK_DBLOCK_MBAL) {
us = get_rand(0, 1000000);
+ if (us < 0)
+ us = token->host_id * 100;
+
/* not a problem, but interesting to see, so use log_error */
log_errot(token, "paxos_acquire %llu retry delay %d us",
(unsigned long long)next_lver, us);
+
usleep(us);
our_mbal += cur_leader.max_hosts;
goto retry_ballot;
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 6201085..560bbda 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -480,6 +480,7 @@ struct command_line {
int num_hosts; /* -n */
int max_hosts; /* -m */
int res_count;
+ char our_host_name[SANLK_NAME_LEN+1];
char *dump_path;
struct sanlk_lockspace lockspace; /* -s LOCKSPACE */
struct sanlk_resource *res_args[SANLK_MAX_RESOURCES]; /* -r RESOURCE */
@@ -513,5 +514,7 @@ enum {
ACT_READ_LEADER,
};
+EXTERN char our_host_name_global[SANLK_NAME_LEN+1];
+
#endif
diff --git a/src/sanlock_rv.h b/src/sanlock_rv.h
index 6b0510f..9683477 100644
--- a/src/sanlock_rv.h
+++ b/src/sanlock_rv.h
@@ -47,9 +47,10 @@
#define SANLK_RELEASE_LVER -250
#define SANLK_RELEASE_OWNER -251
-/* delta_lease_renew */
+/* delta_lease_renew, delta_lease_acquire */
#define SANLK_RENEW_OWNER -260
#define SANLK_RENEW_DIFF -261
+#define SANLK_HOSTID_BUSY -262
#endif
11 years, 9 months
Changes to 'dct-work'
by David Teigland
New branch 'dct-work' available with the following commits:
commit aa437fc8964e533fe5114a17acd84f544962bd22
Author: David Teigland <teigland(a)redhat.com>
Date: Fri Jun 10 17:31:32 2011 -0500
sanlock: use unique host name in delta leases
11 years, 9 months
src/delta_lease.c src/diskio.c src/diskio.h src/host_id.c src/sanlock_internal.h tests/Makefile
by David Teigland
src/delta_lease.c | 53 +++++++++++++++++++++++++---
src/diskio.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++-
src/diskio.h | 4 ++
src/host_id.c | 2 -
src/sanlock_internal.h | 1
tests/Makefile | 2 -
6 files changed, 143 insertions(+), 9 deletions(-)
New commits:
commit 04ad78d30c275c66d096883971ee62b0affb2340
Author: David Teigland <teigland(a)redhat.com>
Date: Thu Jun 9 12:26:06 2011 -0500
sanlock: use a completed read after renewal timeout
intead of discarding the result of the previous read
and trying the same thing again.
diff --git a/src/delta_lease.c b/src/delta_lease.c
index b4a1df7..b5624c1 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -168,7 +168,33 @@ int delta_lease_leader_read(struct task *task,
rv = read_sectors(disk, host_id - 1, 1, (char *)&leader, sizeof(struct leader_record),
task, "delta_leader");
if (rv < 0)
- return SANLK_LEADER_READ;
+ return rv;
+
+ error = verify_leader(disk, space_name, host_id, &leader, caller);
+
+ memcpy(leader_ret, &leader, sizeof(struct leader_record));
+ return error;
+}
+
+static int delta_lease_leader_reap(struct task *task,
+ struct sync_disk *disk,
+ char *space_name,
+ uint64_t host_id,
+ struct leader_record *leader_ret,
+ const char *caller)
+{
+ struct leader_record leader;
+ int rv, error;
+
+ /* host_id N is block offset N-1 */
+
+ memset(&leader, 0, sizeof(struct leader_record));
+ memset(leader_ret, 0, sizeof(struct leader_record));
+
+ rv = read_sectors_reap(disk, host_id - 1, 1, (char *)&leader, sizeof(struct leader_record),
+ task, "delta_leader");
+ if (rv < 0)
+ return rv;
error = verify_leader(disk, space_name, host_id, &leader, caller);
@@ -301,15 +327,30 @@ int delta_lease_renew(struct task *task,
int io_timeout_save;
int error;
- /* TODO: if the previous renew timed out in this initial read, and that
- * read is now complete, we could just use the result from that read
- * here instead of ignoring it and doing another. */
+ /* if the previous renew timed out in this initial read, and that read
+ is now complete, we can use that result here instead of discarding
+ it and doing another. */
- error = delta_lease_leader_read(task, disk, space_name, host_id, &leader,
- "delta_renew_begin");
+ if (prev_result == SANLK_AIO_TIMEOUT && task->read_timeout) {
+ error = delta_lease_leader_reap(task, disk, space_name, host_id,
+ &leader, "delta_renew_reap");
+
+ log_space(sp, "delta_renew reap %d", error);
+
+ if (error == SANLK_OK) {
+ task->read_timeout = NULL;
+ goto read_done;
+ }
+ }
+
+ task->read_timeout = NULL;
+
+ error = delta_lease_leader_read(task, disk, space_name, host_id,
+ &leader, "delta_renew_read");
if (error < 0)
return error;
+ read_done:
if (!our_host_id_generation)
our_host_id_generation = leader.owner_generation;
diff --git a/src/diskio.c b/src/diskio.c
index f386470..bda0e47 100644
--- a/src/diskio.c
+++ b/src/diskio.c
@@ -426,6 +426,9 @@ static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
} else {
/* aicb->used and aicb->buf both remain set */
rv = SANLK_AIO_TIMEOUT;
+
+ if (cmd == IO_CMD_PREAD)
+ task->read_timeout = aicb;
}
out:
return rv;
@@ -651,12 +654,13 @@ int read_sectors(const struct sync_disk *disk, uint64_t sector_nr,
{
char *iobuf, **p_iobuf;
uint64_t offset;
- int iobuf_len = sector_count * disk->sector_size;
+ int iobuf_len;
int rv;
if (!disk->sector_size)
return -EINVAL;
+ iobuf_len = sector_count * disk->sector_size;
offset = disk->offset + (sector_nr * disk->sector_size);
p_iobuf = &iobuf;
@@ -685,3 +689,87 @@ int read_sectors(const struct sync_disk *disk, uint64_t sector_nr,
return rv;
}
+/* Try to reap the event of a previously timed out read_sectors.
+ A task's last timed out read is saved in task->read_timeout. */
+
+int read_sectors_reap(const struct sync_disk *disk, uint64_t sector_nr,
+ uint32_t sector_count, char *data, int data_len,
+ struct task *task, const char *blktype GNUC_UNUSED)
+{
+ struct timespec ts;
+ struct aicb *aicb;
+ struct iocb *iocb;
+ struct io_event event;
+ char *iobuf;
+ uint64_t offset;
+ int iobuf_len;
+ int rv;
+
+ iobuf_len = sector_count * disk->sector_size;
+ offset = disk->offset + (sector_nr * disk->sector_size);
+
+ aicb = task->read_timeout;
+ iocb = &aicb->iocb;
+ iobuf = iocb->u.c.buf;
+
+ if (!aicb->used)
+ return -EINVAL;
+ if (iocb->aio_fildes != disk->fd)
+ return -EINVAL;
+ if (iocb->u.c.nbytes != iobuf_len)
+ return -EINVAL;
+ if (iocb->u.c.offset != offset)
+ return -EINVAL;
+ if (iocb->aio_lio_opcode != IO_CMD_PREAD)
+ return -EINVAL;
+
+ memset(&ts, 0, sizeof(struct timespec));
+ ts.tv_nsec = 500000000; /* half a second */
+ retry:
+ memset(&event, 0, sizeof(event));
+
+ rv = io_getevents(task->aio_ctx, 1, 1, &event, &ts);
+ if (rv == -EINTR)
+ goto retry;
+ if (rv < 0) {
+ log_error("reap aio %s io_getevents error %d", task->name, rv);
+ goto out;
+ }
+ if (rv == 1) {
+ struct iocb *ev_iocb = event.obj;
+ struct aicb *ev_aicb = container_of(ev_iocb, struct aicb, iocb);
+
+ ev_aicb->used = 0;
+
+ if (ev_iocb != iocb) {
+ log_error("reap aio %s other iocb %p event result %ld %ld",
+ task->name, ev_iocb, event.res, event.res2);
+ free(ev_aicb->buf);
+ ev_aicb->buf = NULL;
+ goto retry;
+ }
+ if ((int)event.res < 0) {
+ log_error("reap aio %s event result %ld %ld",
+ task->name, event.res, event.res2);
+ rv = event.res;
+ goto out;
+ }
+ if (event.res != iobuf_len) {
+ log_error("reap aio %s event len %d result %lu %lu",
+ task->name, iobuf_len, event.res, event.res2);
+ rv = -EMSGSIZE;
+ goto out;
+ }
+
+ rv = 0;
+ memcpy(data, iobuf, data_len);
+ free(iobuf);
+ goto out;
+ }
+
+ /* timed out again */
+ rv = SANLK_AIO_TIMEOUT;
+ out:
+ return rv;
+}
+
diff --git a/src/diskio.h b/src/diskio.h
index dd32958..a54ffaf 100644
--- a/src/diskio.h
+++ b/src/diskio.h
@@ -31,4 +31,8 @@ int write_sectors(const struct sync_disk *disk, uint64_t sector_nr,
int read_sectors(const struct sync_disk *disk, uint64_t sector_nr,
uint32_t sector_count, char *data, int data_len,
struct task *task, const char *blktype);
+
+int read_sectors_reap(const struct sync_disk *disk, uint64_t sector_nr,
+ uint32_t sector_count, char *data, int data_len,
+ struct task *task, const char *blktype);
#endif
diff --git a/src/host_id.c b/src/host_id.c
index 0ae3138..3075d3f 100644
--- a/src/host_id.c
+++ b/src/host_id.c
@@ -276,7 +276,7 @@ static void *lockspace_thread(void *arg_in)
} else {
/* don't spin too quickly if renew is failing
immediately and repeatedly */
- usleep(200000);
+ usleep(500000);
}
last_attempt = time(NULL);
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 62b3f68..6201085 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -444,6 +444,7 @@ struct task {
int use_aio;
int cb_size;
io_context_t aio_ctx;
+ struct aicb *read_timeout;
struct aicb *callbacks;
};
diff --git a/tests/Makefile b/tests/Makefile
index e656606..4041a8e 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -21,7 +21,7 @@ CFLAGS += -D_GNU_SOURCE -g \
-fasynchronous-unwind-tables \
-fdiagnostics-show-option
-LDFLAGS = -lrt -laio -lblkid -lsanlock -lsanlock_direct
+LDFLAGS = -lrt -laio -lblkid -lsanlock
all: $(TARGET)
11 years, 9 months