tests/devcount.c tests/devcount-dmsetup
by David Teigland
tests/devcount-dmsetup | 28 ++++++
tests/devcount.c | 206 ++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 231 insertions(+), 3 deletions(-)
New commits:
commit 1ee905695cc9053412fe58aca267e1f0cd69f0fe
Author: David Teigland <teigland(a)redhat.com>
Date: Tue Apr 26 16:15:53 2011 -0500
devcount: add expire test
forces host_id renewals to expire using dmsetup
diff --git a/tests/devcount-dmsetup b/tests/devcount-dmsetup
new file mode 100755
index 0000000..0a92f76
--- /dev/null
+++ b/tests/devcount-dmsetup
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+if [ $# -le 1 ]; then
+ echo "num $#"
+ echo ""
+ echo "devcount-dmsetup save <dev>"
+ echo ""
+ echo "devcount-dmsetup error <dev>"
+ echo ""
+ echo "devcount-dmsetup linear <dev>"
+ echo ""
+fi
+
+cmd=$1
+dev=$2
+
+if [ "$cmd" == "save" ]; then
+ rm -f /tmp/table-linear.txt
+ rm -f /tmp/table-error.txt
+ dmsetup table $dev > /tmp/table-linear.txt
+ sed "s/linear/error/" /tmp/table-linear.txt > /tmp/table-error.txt
+ exit 0
+fi
+
+dmsetup suspend $dev
+dmsetup load $dev /tmp/table-$cmd.txt
+dmsetup resume $dev
+
diff --git a/tests/devcount.c b/tests/devcount.c
index 4ad389f..fc7a1fb 100644
--- a/tests/devcount.c
+++ b/tests/devcount.c
@@ -380,7 +380,7 @@ static int do_count(int argc, char *argv[])
if (write_seconds && (our_we->time - start >= write_seconds))
break;
- if (check_pause(pause_fd)) {
+ if (!(writes % 64) && check_pause(pause_fd)) {
print_our_we(count_path, our_pid, writes, our_we, "pause");
fprintf(stderr, "we_are_paused\n");
raise(SIGSTOP);
@@ -436,7 +436,7 @@ static int do_count(int argc, char *argv[])
return -1;
}
-static void add_lockspace(void)
+static int add_lockspace(void)
{
int rv;
@@ -446,7 +446,11 @@ static void add_lockspace(void)
lockspace.host_id = our_hostid;
rv = sanlock_add_lockspace(&lockspace, 0);
- printf("%d sanlock_add_lockspace %d\n", getpid(), rv);
+
+ log_debug("%s p %d sanlock_add_lockspace %d",
+ lock_path, getpid(), rv);
+
+ return rv;
}
/*
@@ -641,6 +645,7 @@ static int do_relock(int argc, char *argv[])
kill_child:
kill_pid(pid);
+ log_debug("%s p %d killed c %d", count_path, parent_pid, pid);
dead_child:
close(c2p[0]);
close(c2p[1]);
@@ -1126,6 +1131,7 @@ static int do_migrate(int argc, char *argv[])
write_migrate_incoming(state); /* to dest */
kill_pid(pid);
+ log_debug("%s p %d killed c %d", count_path, parent_pid, pid);
close(c2p[0]);
close(c2p[1]);
free(state);
@@ -1137,6 +1143,195 @@ static int do_migrate(int argc, char *argv[])
return -1;
}
+/*
+ * dmsetup table /dev/bull/lock1 > /tmp/table-linear.txt
+ * sed "s/linear/error/" /tmp/table-linear.txt > /tmp/table-error.txt
+ *
+ * dmsetup suspend /dev/bull/lock1
+ * dmsetup load /dev/bull/lock1 /tmp/table-error.txt
+ * dmsetup resume /dev/bull/lock1
+ *
+ * dmsetup suspend /dev/bull/lock1
+ * dmsetup load /dev/bull/lock1 /tmp/table-linear.txt
+ * dmsetup resume /dev/bull/lock1
+ */
+
+static void dmsetup_save_lock_disk(void)
+{
+ char cmd[128];
+ sprintf(cmd, "./devcount-dmsetup save %s", lock_path);
+ system(cmd);
+}
+
+static void dmsetup_error_lock_disk(void)
+{
+ char cmd[128];
+ sprintf(cmd, "./devcount-dmsetup error %s", lock_path);
+ system(cmd);
+}
+
+static void dmsetup_linear_lock_disk(void)
+{
+ char cmd[128];
+ sprintf(cmd, "./devcount-dmsetup linear %s", lock_path);
+ system(cmd);
+}
+
+int do_expire(int argc, char *argv[])
+{
+ char *av[COUNT_ARGS+1];
+ struct sanlk_resource *res;
+ uint32_t parent_pid = getpid();
+ int i, j, pid, rv, sock, len, status;
+ int c2p[2];
+ char result[5];
+
+ if (argc < LOCK_ARGS)
+ return -1;
+
+ count_offset = 0;
+
+ strcpy(lock_path, argv[2]);
+ strcpy(count_path, argv[4]);
+ our_hostid = atoi(argv[7]);
+
+ dmsetup_save_lock_disk();
+
+ add_lockspace();
+
+ len = sizeof(struct sanlk_resource) + sizeof(struct sanlk_disk);
+ res = malloc(len);
+ memset(res, 0, len);
+ strcpy(res->lockspace_name, lockspace.name);
+ snprintf(res->name, SANLK_NAME_LEN, "resource%s", count_path);
+ res->name[SANLK_NAME_LEN-1] = '\0';
+ res->num_disks = 1;
+ strncpy(res->disks[0].path, lock_path, SANLK_PATH_LEN);
+ res->disks[0].path[SANLK_PATH_LEN-1] = '\0';
+ res->disks[0].offset = 1024000;
+
+ /*
+ * argv[0] = devcount
+ * argv[1] = expire
+ * argv[2] = <lock_disk>
+ * argv[3] = rw
+ * start copying at argv[3]
+ */
+
+ j = 0;
+ av[j++] = strdup(argv[0]);
+ for (i = 3; i < LOCK_ARGS; i++)
+ av[j++] = strdup(argv[i]);
+ av[j] = NULL;
+
+ while (1) {
+ pipe(c2p);
+
+ pid = fork();
+ if (!pid) {
+ int child_pid = getpid();
+
+ sock = sanlock_register();
+ if (sock < 0) {
+ log_error("%s c %d sanlock_register error %d",
+ count_path, child_pid, sock);
+ exit(-1);
+ }
+
+ /* this acquire can take up to 90 seconds waiting for
+ the host_id of the owner to time out */
+
+ log_debug("%s c %d sanlock_acquire begin",
+ count_path, child_pid);
+
+ rv = sanlock_acquire(sock, -1, 0, 1, &res, NULL);
+ if (rv < 0) {
+ log_debug("%s c %d sanlock_acquire error %d",
+ count_path, child_pid, rv);
+
+ /* all hosts are trying to acquire so we
+ expect this to acquire only sometimes;
+ TODO: exit with an error for some rv's */
+
+ write(c2p[1], "fail", 4);
+ close(c2p[0]);
+ close(c2p[1]);
+ exit(0);
+ }
+ log_debug("%s c %d sanlock_acquire done",
+ count_path, child_pid);
+
+ write(c2p[1], "good", 4);
+ close(c2p[0]);
+ close(c2p[1]);
+ execv(av[0], av);
+ perror("execv devcount problem");
+ exit(EXIT_FAILURE);
+ }
+
+ memset(&result, 0, sizeof(result));
+ read(c2p[0], &result, 4);
+ close(c2p[0]);
+ close(c2p[1]);
+
+ if (strstr(result, "fail")) {
+ /* we expect child to exit when it fails to acquire the
+ lock because it's held by someone else */
+ waitpid(pid, &status, 0);
+ goto dead_child;
+ }
+
+ /* this test should be run with sec2 set to some large value
+ that won't run out before sanlock daemon kills rw */
+
+ sleep(rand_int(6, 100));
+
+ dmsetup_error_lock_disk();
+ log_debug("%s p %d disable %s", count_path, parent_pid, lock_path);
+
+ /* sanlock daemon kills pid when the renewals fail; after the
+ kill it will try to release the resource lease, which will
+ also fail if the resource lease is on the same disk as the
+ host_id lease. Other nodes trying to get pid's resource
+ lease are watching our host_id for 90 seconds, after which
+ they will take pid's resource lease. If the resource lease
+ is on a different disk, the daemon will be able to release
+ it after the kill, and another node will be able to take it
+ immediately after that, without watching our host_id for 90
+ seconds */
+
+ /* other nodes can't rely on the daemon being able to kill rw,
+ so they need to wait 90 seconds to ensure that the watchdog
+ has killed the host before taking pid's resource lease.
+ In a different test, have the daemon kill fail, causing rw
+ to continue running until the watchdog fires, after which
+ another host will take pid's resource lease */
+
+ waitpid(pid, &status, 0);
+ log_debug("%s p %d waitpid c %d done", count_path, parent_pid, pid);
+
+ sleep(rand_int(0, 3));
+
+ dmsetup_linear_lock_disk();
+ log_debug("%s p %d enable %s", count_path, parent_pid, lock_path);
+
+ log_debug("%s p %d sanlock_add_lockspace begin",
+ lock_path, parent_pid);
+ while (1) {
+ sleep(1);
+ rv = add_lockspace();
+ if (!rv)
+ break;
+ }
+ dead_child:
+ sleep(rand_int(0, 1));
+ }
+
+ printf("test failed...\n");
+ sleep(1000000);
+ return -1;
+}
+
/*
* devcount init <lock_disk> <count_disk>
* sanlock direct init -n 8 -s devcount:0:<lock_disk>:0
@@ -1248,6 +1443,9 @@ int main(int argc, char *argv[])
else if (!strcmp(argv[1], "migrate"))
rv = do_migrate(argc, argv);
+ else if (!strcmp(argv[1], "expire"))
+ rv = do_expire(argc, argv);
+
if (!rv)
return 0;
@@ -1281,10 +1479,12 @@ int main(int argc, char *argv[])
printf("devcount wrap <lock_disk> rw <count_disk> <sec1> <sec2> <hostid>\n");
printf(" sanlock add_lockspace -s devcount:<hostid>:<lock_disk>:0\n");
printf(" sanlock_acquire, exec devcount rw\n");
+ printf("\n");
printf("devcount migrate <lock_disk> rw <count_disk> <sec1> <sec2> <hostid> <max_hostid>\n");
printf(" sanlock add_lockspace -s devcount:<hostid>:<lock_disk>:0\n");
printf(" loop around fork, sanlock_acquire, exec devcount rw\n");
printf("\n");
+ printf("devcount expire <lock_disk> rw <count_disk> <sec1> <sec2> <hostid>\n");
printf("\n");
return -1;
}
12 years, 5 months
tests/devcount.c
by David Teigland
tests/devcount.c | 154 +++++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 128 insertions(+), 26 deletions(-)
New commits:
commit b064a8e3b3f39524776caf67fa56675ddc7ad4ab
Author: David Teigland <teigland(a)redhat.com>
Date: Thu Apr 21 16:48:37 2011 -0500
devcount: new pause/resume
a last write from the rw process could hit disk after kill(SIGSTOP)
returns in the parent. Use a new method to pause the rw child
process which should guarantee no last write after the parent
completes the pause.
diff --git a/tests/devcount.c b/tests/devcount.c
index e2dfc92..4ad389f 100644
--- a/tests/devcount.c
+++ b/tests/devcount.c
@@ -2,6 +2,7 @@
#include <sys/wait.h>
#include <sys/un.h>
#include <sys/mount.h>
+#include <sys/signalfd.h>
#include <inttypes.h>
#include <unistd.h>
#include <stdio.h>
@@ -55,12 +56,45 @@ do { \
} while (0)
+static int kill_pid(int pid)
+{
+ int rv, status;
+
+ kill(pid, SIGKILL);
+
+ while (1) {
+ rv = waitpid(pid, &status, 0);
+ if (rv < 0)
+ return -1;
+ if (rv != pid)
+ return -2;
+
+ if (WIFEXITED(status))
+ return 0;
+ }
+}
+
/* kill(pid, SIGSTOP) would be nice, but that won't guarantee
- the pid has finished all i/o when it returns */
+ the pid has finished all i/o when it returns.
+
+ Instead, we send SIGUSR1, which child sees after it's done
+ with a (synchronous) write, and calls SIGSTOP on itself */
-static void pause_pid(int pid)
+static void pause_pid(int pid, int child_stderr)
{
- kill(pid, SIGSTOP);
+ char buf[64];
+ int rv;
+
+ kill(pid, SIGUSR1);
+
+ /* child prints "we_are_paused" to stderr before stopping */
+
+ memset(buf, 0, sizeof(buf));
+
+ rv = read(child_stderr, buf, sizeof(buf));
+
+ if (!strstr(buf, "we_are_paused"))
+ log_error("pause_pid %d buf %s", pid, buf);
}
static void resume_pid(int pid)
@@ -68,6 +102,40 @@ static void resume_pid(int pid)
kill(pid, SIGCONT);
}
+static int check_pause(int fd)
+{
+ struct signalfd_siginfo fdsi;
+ ssize_t rv;
+
+ rv = read(fd, &fdsi, sizeof(struct signalfd_siginfo));
+ if (rv != sizeof(struct signalfd_siginfo)) {
+ return 0;
+ }
+ if (fdsi.ssi_signo == SIGUSR1) {
+ return 1;
+ }
+ return 0;
+}
+
+static int setup_pause(void)
+{
+ sigset_t mask;
+ int fd, rv;
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGUSR1);
+
+ rv = sigprocmask(SIG_BLOCK, &mask, NULL);
+ if (rv < 0)
+ return rv;
+
+ fd = signalfd(-1, &mask, SFD_NONBLOCK);
+ if (fd < 0)
+ return -errno;
+
+ return fd;
+}
+
static int rand_int(int a, int b)
{
return a + (int) (((float)(b - a + 1)) * random() / (RAND_MAX+1.0));
@@ -100,12 +168,14 @@ void print_entries(char *path, int pid, char *buf)
}
}
-void print_our_we(char *path, int pid, int writes, struct entry *our_we)
+void print_our_we(char *path, int pid, int writes, struct entry *our_we,
+ const char *stage)
{
- log_debug("%s c %d w %d index %d turn %u time %llu %u:%llu:%llu "
+ log_debug("%s c %d %s w %d index %d turn %u time %llu %u:%llu:%llu "
"last %u %llu %u:%llu:%llu",
path,
pid,
+ stage,
writes,
our_hostid - 1,
our_we->turn,
@@ -133,6 +203,7 @@ static int do_count(int argc, char *argv[])
char *rbuf, **p_rbuf, *wbuf, **p_wbuf, *vbuf, **p_vbuf;
struct entry *re, *max_re, *our_we;
int i, fd, rv, max_i;
+ int pause_fd;
time_t start;
uint32_t our_pid = getpid();
uint32_t max_turn;
@@ -143,6 +214,8 @@ static int do_count(int argc, char *argv[])
if (argc < COUNT_ARGS)
return -1;
+ pause_fd = setup_pause();
+
strcpy(count_path, argv[2]);
sec1 = atoi(argv[3]);
sec2 = atoi(argv[4]);
@@ -287,7 +360,7 @@ static int do_count(int argc, char *argv[])
}
writes = 1;
- print_our_we(count_path, our_pid, writes, our_we);
+ print_our_we(count_path, our_pid, writes, our_we, "begin");
start = time(NULL);
@@ -306,9 +379,17 @@ static int do_count(int argc, char *argv[])
if (write_seconds && (our_we->time - start >= write_seconds))
break;
+
+ if (check_pause(pause_fd)) {
+ print_our_we(count_path, our_pid, writes, our_we, "pause");
+ fprintf(stderr, "we_are_paused\n");
+ raise(SIGSTOP);
+ /* this shouldn't appear until parent does kill(SIGCONT) */
+ print_our_we(count_path, our_pid, writes, our_we, "resume");
+ }
}
- print_our_we(count_path, our_pid, writes, our_we);
+ print_our_we(count_path, our_pid, writes, our_we, "end");
if (turn_file) {
fprintf(turn_file, "turn %03u start %llu end %llu host %u pid %u\n",
@@ -402,6 +483,7 @@ static int do_relock(int argc, char *argv[])
char *av[COUNT_ARGS+1];
struct sanlk_resource *res, *res_inq;
int i, j, pid, rv, sock, len, status;
+ int c2p[2]; /* child to parent */
int res_count;
uint32_t parent_pid = getpid();
uint64_t lver;
@@ -444,6 +526,8 @@ static int do_relock(int argc, char *argv[])
av[j] = NULL;
while (1) {
+ pipe(c2p);
+
pid = fork();
if (!pid) {
int child_pid = getpid();
@@ -470,6 +554,12 @@ static int do_relock(int argc, char *argv[])
log_debug("%s c %d sanlock_acquire done",
count_path, child_pid);
+ /* make child's stderr go to parent c2p[0] */
+ close(2);
+ dup(c2p[1]);
+ close(c2p[0]);
+ close(c2p[1]);
+
execv(av[0], av);
perror("execv devcount problem");
exit(EXIT_FAILURE);
@@ -488,10 +578,8 @@ static int do_relock(int argc, char *argv[])
/* we expect child to exit when it fails to acquire the lock
because it's held by someone else, or rw run time is up */
- if (rv == pid) {
- sleep(rand_int(0, 1));
- continue;
- }
+ if (rv == pid)
+ goto dead_child;
rv = sanlock_inquire(-1, pid, 0, &res_count, &state);
if (rv < 0) {
@@ -514,7 +602,8 @@ static int do_relock(int argc, char *argv[])
free(res_inq);
free(state);
- pause_pid(pid);
+ pause_pid(pid, c2p[0]);
+ log_debug("%s p %d paused c %d", count_path, parent_pid, pid);
rv = sanlock_release(-1, pid, SANLK_REL_ALL, 0, NULL);
if (rv < 0) {
@@ -551,8 +640,10 @@ static int do_relock(int argc, char *argv[])
count_path, parent_pid, pid, (unsigned long long)lver, rv);
kill_child:
- kill(pid, SIGKILL);
- waitpid(pid, &status, 0);
+ kill_pid(pid);
+ dead_child:
+ close(c2p[0]);
+ close(c2p[1]);
sleep(rand_int(0, 1));
}
@@ -900,8 +991,9 @@ static int do_migrate(int argc, char *argv[])
{
char *av[MIGRATE_ARGS+1];
struct sanlk_resource *res;
- int i, j, pid, rv, sock, len, status, init;
- int pfd[2];
+ int i, j, pid, rv, sock, len, init;
+ int p2c[2]; /* parent to child */
+ int c2p[2]; /* child to parent */
int res_count;
uint32_t parent_pid = getpid();
uint64_t lver;
@@ -945,7 +1037,9 @@ static int do_migrate(int argc, char *argv[])
av[j] = NULL;
while (1) {
- pipe(pfd);
+ pipe(p2c);
+ pipe(c2p);
+
pid = fork();
if (!pid) {
int child_pid = getpid();
@@ -960,12 +1054,18 @@ static int do_migrate(int argc, char *argv[])
log_debug("%s c %d wait", count_path, child_pid);
- read(pfd[0], &junk, 1);
- close(pfd[0]);
- close(pfd[1]);
+ read(p2c[0], &junk, 1);
+ close(p2c[0]);
+ close(p2c[1]);
log_debug("%s c %d begin", count_path, child_pid);
+ /* make child's stderr go to parent c2p[0] */
+ close(2);
+ dup(c2p[1]);
+ close(c2p[0]);
+ close(c2p[1]);
+
execv(av[0], av);
perror("execv devcount problem");
exit(EXIT_FAILURE);
@@ -992,9 +1092,9 @@ static int do_migrate(int argc, char *argv[])
(unsigned long long)lver);
/* tell child to resume */
- write(pfd[1], "\n", 1);
- close(pfd[0]);
- close(pfd[1]);
+ write(p2c[1], "\n", 1);
+ close(p2c[0]);
+ close(p2c[1]);
/* let the child run for 10 seconds before stopping it;
if the child exits before the 10 seconds, the sanlock_inquire
@@ -1011,7 +1111,8 @@ static int do_migrate(int argc, char *argv[])
log_debug("%s p %d sanlock_inquire c %d done",
count_path, parent_pid, pid);
- pause_pid(pid);
+ pause_pid(pid, c2p[0]);
+ log_debug("%s p %d paused c %d", count_path, parent_pid, pid);
rv = sanlock_release(-1, pid, SANLK_REL_ALL, 0, NULL);
if (rv < 0) {
@@ -1024,8 +1125,9 @@ static int do_migrate(int argc, char *argv[])
write_migrate_incoming(state); /* to dest */
- kill(pid, SIGKILL);
- waitpid(pid, &status, 0);
+ kill_pid(pid);
+ close(c2p[0]);
+ close(c2p[1]);
free(state);
}
12 years, 5 months
tests/devcount.c
by David Teigland
tests/devcount.c | 159 ++++++++++++++++++++++++++++++++++---------------------
1 file changed, 100 insertions(+), 59 deletions(-)
New commits:
commit a40ff4b5a41b6d33c5249253b0046fed290926f4
Author: David Teigland <teigland(a)redhat.com>
Date: Thu Apr 21 14:58:13 2011 -0500
devcount: improve output
so it's easier to match multiple devcount processes
with the sanlock debug output
diff --git a/tests/devcount.c b/tests/devcount.c
index dcfe60a..e2dfc92 100644
--- a/tests/devcount.c
+++ b/tests/devcount.c
@@ -44,6 +44,30 @@ struct entry {
uint64_t last_count;
};
+#define log_debug(fmt, args...) \
+do { \
+ printf("%llu " fmt "\n", (unsigned long long)time(NULL), ##args); \
+} while (0)
+
+#define log_error(fmt, args...) \
+do { \
+ printf("ERROR %llu " fmt "\n", (unsigned long long)time(NULL), ##args); \
+} while (0)
+
+
+/* kill(pid, SIGSTOP) would be nice, but that won't guarantee
+ the pid has finished all i/o when it returns */
+
+static void pause_pid(int pid)
+{
+ kill(pid, SIGSTOP);
+}
+
+static void resume_pid(int pid)
+{
+ kill(pid, SIGCONT);
+}
+
static int rand_int(int a, int b)
{
return a + (int) (((float)(b - a + 1)) * random() / (RAND_MAX+1.0));
@@ -51,14 +75,15 @@ static int rand_int(int a, int b)
/* 64 byte entry: can fit up to 8 nodes in a 512 byte block */
-void print_entries(int pid, char *buf)
+void print_entries(char *path, int pid, char *buf)
{
struct entry *e = (struct entry *)buf;
int i;
for (i = 0; i < (512 / sizeof(struct entry)); i++) {
- printf("%d index %d turn %u time %llu %u:%llu:%llu "
- "last %u %llu %u:%llu:%llu\n",
+ log_error("%s c %d index %d turn %u time %llu %u:%llu:%llu "
+ "last %u %llu %u:%llu:%llu",
+ path,
pid,
i,
e->turn,
@@ -75,10 +100,11 @@ void print_entries(int pid, char *buf)
}
}
-void print_our_we(int pid, int writes, struct entry *our_we)
+void print_our_we(char *path, int pid, int writes, struct entry *our_we)
{
- printf("%d w %d index %d turn %u time %llu %u:%llu:%llu "
- "last %u %llu %u:%llu:%llu\n",
+ log_debug("%s c %d w %d index %d turn %u time %llu %u:%llu:%llu "
+ "last %u %llu %u:%llu:%llu",
+ path,
pid,
writes,
our_hostid - 1,
@@ -196,10 +222,10 @@ static int do_count(int argc, char *argv[])
}
if (memcmp(rbuf, vbuf, 512)) {
- printf("%d rbuf:\n", our_pid);
- print_entries(our_pid, rbuf);
- printf("%d vbuf:\n", our_pid);
- print_entries(our_pid, vbuf);
+ log_error("%s c %d rbuf:", count_path, our_pid);
+ print_entries(count_path, our_pid, rbuf);
+ log_error("%s c %d vbuf:", count_path, our_pid);
+ print_entries(count_path, our_pid, vbuf);
goto fail;
}
}
@@ -225,8 +251,8 @@ static int do_count(int argc, char *argv[])
}
if (max_turn != max_re->turn) {
- printf("%d max_turn %d max_re->turn %d\n", our_pid,
- max_turn, max_re->turn);
+ log_error("%s c %d max_turn %d max_re->turn %d\n",
+ count_path, our_pid, max_turn, max_re->turn);
goto fail;
}
@@ -261,7 +287,7 @@ static int do_count(int argc, char *argv[])
}
writes = 1;
- print_our_we(our_pid, writes, our_we);
+ print_our_we(count_path, our_pid, writes, our_we);
start = time(NULL);
@@ -282,7 +308,7 @@ static int do_count(int argc, char *argv[])
break;
}
- print_our_we(our_pid, writes, our_we);
+ print_our_we(count_path, our_pid, writes, our_we);
if (turn_file) {
fprintf(turn_file, "turn %03u start %llu end %llu host %u pid %u\n",
@@ -313,10 +339,10 @@ static int do_count(int argc, char *argv[])
}
if (memcmp(rbuf, vbuf, 512)) {
- printf("%d rbuf:\n", our_pid);
- print_entries(our_pid, rbuf);
- printf("%d vbuf:\n", our_pid);
- print_entries(our_pid, vbuf);
+ log_error("%s c %d rbuf:", count_path, our_pid);
+ print_entries(count_path, our_pid, rbuf);
+ log_error("%s c %d vbuf:", count_path, our_pid);
+ print_entries(count_path, our_pid, vbuf);
goto fail;
}
}
@@ -424,8 +450,8 @@ static int do_relock(int argc, char *argv[])
sock = sanlock_register();
if (sock < 0) {
- printf("%d sanlock_register error %d\n",
- child_pid, sock);
+ log_error("%s c %d sanlock_register error %d",
+ count_path, child_pid, sock);
exit(-1);
}
@@ -434,14 +460,15 @@ static int do_relock(int argc, char *argv[])
rv = sanlock_acquire(sock, -1, 0, 1, &res, NULL);
if (rv < 0) {
- printf("%d sanlock_acquire error %d\n",
- child_pid, rv);
+ log_debug("%s c %d sanlock_acquire error %d",
+ count_path, child_pid, rv);
/* all hosts are trying to acquire so we
expect this to acquire only sometimes;
TODO: exit with an error for some rv's */
exit(0);
}
- printf("%d sanlock_acquire done\n", child_pid);
+ log_debug("%s c %d sanlock_acquire done",
+ count_path, child_pid);
execv(av[0], av);
perror("execv devcount problem");
@@ -458,7 +485,7 @@ static int do_relock(int argc, char *argv[])
sleep(1);
}
- /* we expect child to exit when it fails go acquire the lock
+ /* we expect child to exit when it fails to acquire the lock
because it's held by someone else, or rw run time is up */
if (rv == pid) {
@@ -469,32 +496,36 @@ static int do_relock(int argc, char *argv[])
rv = sanlock_inquire(-1, pid, 0, &res_count, &state);
if (rv < 0) {
/* pid may have exited */
- printf("%d sanlock_inquire error %d\n", parent_pid, rv);
+ log_error("%s p %d sanlock_inquire c %d error %d",
+ count_path, parent_pid, pid, rv);
goto run_more;
}
rv = sanlock_str_to_res(state, &res_inq);
if (rv < 0) {
- printf("sanlock_str_to_res error %d %s\n", rv, state);
+ log_error("%s p %d sanlock_str_to_res error %d %s",
+ count_path, parent_pid, rv, state);
goto fail;
}
lver = res_inq->lver;
- printf("%d sanlock_inquire %llu done\n", parent_pid,
- (unsigned long long)lver);
+ log_debug("%s p %d sanlock_inquire c %d lver %llu done",
+ count_path, parent_pid, pid, (unsigned long long)lver);
free(res_inq);
free(state);
- kill(pid, SIGSTOP);
+ pause_pid(pid);
rv = sanlock_release(-1, pid, SANLK_REL_ALL, 0, NULL);
if (rv < 0) {
/* pid may have exited */
- printf("%d sanlock_release error %d\n", parent_pid, rv);
- goto run_more;
+ log_error("%s p %d sanlock_release c %d error %d",
+ count_path, parent_pid, pid, rv);
+ goto kill_child;
}
- printf("%d sanlock_release done\n", parent_pid);
+ log_debug("%s p %d sanlock_release c %d done",
+ count_path, parent_pid, pid);
/* give a chance to someone else to acquire the lock in here */
usleep(1000000);
@@ -506,19 +537,22 @@ static int do_relock(int argc, char *argv[])
if (!rv) {
/* we got the lock back in the same version */
- printf("%d sanlock_acquire %llu done\n", parent_pid,
- (unsigned long long)lver);
+ log_debug("%s p %d sanlock_acquire c %d lver %llu done",
+ count_path, parent_pid, pid,
+ (unsigned long long)lver);
- kill(pid, SIGCONT);
+ resume_pid(pid);
goto run_more;
}
/* someone got the lock between our release and reacquire */
- printf("%d sanlock_acquire %llu error %d\n", parent_pid,
- (unsigned long long)lver, rv);
+ log_debug("%s p %d sanlock_acquire c %d lver %llu error %d",
+ count_path, parent_pid, pid, (unsigned long long)lver, rv);
+ kill_child:
kill(pid, SIGKILL);
+ waitpid(pid, &status, 0);
sleep(rand_int(0, 1));
}
@@ -583,21 +617,22 @@ static int do_lock(int argc, char *argv[])
sock = sanlock_register();
if (sock < 0) {
- printf("%d sanlock_register error %d\n",
- child_pid, sock);
+ log_error("%s c %d sanlock_register error %d",
+ count_path, child_pid, sock);
exit(-1);
}
rv = sanlock_acquire(sock, -1, 0, 1, &res, NULL);
if (rv < 0) {
- printf("%d sanlock_acquire error %d\n",
- child_pid, rv);
+ log_debug("%s c %d sanlock_acquire error %d",
+ count_path, child_pid, rv);
/* all hosts are trying to acquire so we
expect this to acquire only sometimes;
TODO: exit with an error for some rv's */
exit(0);
}
- printf("%d sanlock_acquire done\n", child_pid);
+ log_debug("%s c %d sanlock_acquire done",
+ count_path, child_pid);
execv(av[0], av);
perror("execv devcount problem");
@@ -661,19 +696,21 @@ static int do_wrap(int argc, char *argv[])
sock = sanlock_register();
if (sock < 0) {
- printf("%d sanlock_register error %d\n", pid, sock);
+ log_error("%s c %d sanlock_register error %d",
+ count_path, pid, sock);
exit(-1);
}
rv = sanlock_acquire(sock, -1, 0, 1, &res, NULL);
if (rv < 0) {
- printf("%d sanlock_acquire error %d\n", pid, rv);
+ log_error("%s c %d sanlock_acquire error %d",
+ count_path, pid, rv);
/* all hosts are trying to acquire so we
expect this to acquire only sometimes;
TODO: exit with an error for some rv's */
exit(0);
}
- printf("%d sanlock_acquire done\n", pid);
+ log_debug("%s c %d sanlock_acquire done", count_path, pid);
execv(av[0], av);
perror("execv devcount problem");
@@ -916,18 +953,18 @@ static int do_migrate(int argc, char *argv[])
sock = sanlock_register();
if (sock < 0) {
- printf("%d sanlock_register error %d\n",
- child_pid, sock);
+ log_error("%s c %d sanlock_register error %d",
+ count_path, child_pid, sock);
exit(-1);
}
- printf("%d pause\n", child_pid);
+ log_debug("%s c %d wait", count_path, child_pid);
read(pfd[0], &junk, 1);
close(pfd[0]);
close(pfd[1]);
- printf("%d resume\n", child_pid);
+ log_debug("%s c %d begin", count_path, child_pid);
execv(av[0], av);
perror("execv devcount problem");
@@ -946,11 +983,13 @@ static int do_migrate(int argc, char *argv[])
rv = sanlock_acquire(-1, pid, 0, 1, &res, NULL);
if (rv < 0) {
- printf("%d sanlock_acquire error %d\n", parent_pid, rv);
+ log_error("%s p %d sanlock_acquire c %d error %d",
+ count_path, parent_pid, pid, rv);
exit(0);
}
- printf("%d sanlock_acquire done init %d lver %llu\n", parent_pid,
- init, (unsigned long long)lver);
+ log_debug("%s p %d sanlock_acquire c %d init %d lver %llu done",
+ count_path, parent_pid, pid, init,
+ (unsigned long long)lver);
/* tell child to resume */
write(pfd[1], "\n", 1);
@@ -965,26 +1004,28 @@ static int do_migrate(int argc, char *argv[])
rv = sanlock_inquire(-1, pid, 0, &res_count, &state);
if (rv < 0) {
- printf("%d sanlock_inquire error %d\n", parent_pid, rv);
+ log_error("%s p %d sanlock_inquire c %d error %d",
+ count_path, parent_pid, pid, rv);
goto fail;
}
- printf("%d sanlock_inquire done\n", parent_pid);
+ log_debug("%s p %d sanlock_inquire c %d done",
+ count_path, parent_pid, pid);
- kill(pid, SIGSTOP);
+ pause_pid(pid);
rv = sanlock_release(-1, pid, SANLK_REL_ALL, 0, NULL);
if (rv < 0) {
- printf("%d sanlock_release error %d\n", parent_pid, rv);
+ log_error("%s p %d sanlock_release c %d error %d",
+ count_path, parent_pid, pid, rv);
goto fail;
}
- printf("%d sanlock_release done\n", parent_pid);
+ log_debug("%s p %d sanlock_release c %d done",
+ count_path, parent_pid, pid);
write_migrate_incoming(state); /* to dest */
kill(pid, SIGKILL);
-
waitpid(pid, &status, 0);
-
free(state);
}
12 years, 5 months
src/delta_lease.c src/diskio.c src/diskio.h src/paxos_lease.c
by David Teigland
src/delta_lease.c | 50 ++++++++++++++++++++----------------------
src/diskio.c | 18 +++++++++++----
src/diskio.h | 3 ++
src/paxos_lease.c | 63 ++++++++++++++++++++++++++----------------------------
4 files changed, 71 insertions(+), 63 deletions(-)
New commits:
commit a7863d7d95a44de102637c6a27bffdd534487bee
Author: David Teigland <teigland(a)redhat.com>
Date: Thu Apr 21 10:58:26 2011 -0500
sanlock: init with one write
do one big write of all sectors during init,
instead of many single sector writes.
(both lockspace and resource)
diff --git a/src/delta_lease.c b/src/delta_lease.c
index bf55469..63fb2a0 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -373,42 +373,40 @@ int delta_lease_init(struct timeout *ti,
char *space_name,
int max_hosts)
{
- struct leader_record leader;
+ struct leader_record *leader;
+ char *iobuf, **p_iobuf;
+ int iobuf_len;
int i, rv;
- uint64_t bb, be, sb, se;
- uint32_t ss;
- ss = disk->sector_size;
- bb = disk->offset;
- be = disk->offset + (disk->sector_size * max_hosts) - 1;
- sb = bb / ss;
- se = be / ss;
+ iobuf_len = disk->sector_size * max_hosts;
- memset(&leader, 0, sizeof(struct leader_record));
+ p_iobuf = &iobuf;
- leader.magic = DELTA_DISK_MAGIC;
- leader.version = DELTA_DISK_VERSION_MAJOR | DELTA_DISK_VERSION_MINOR;
- leader.sector_size = disk->sector_size;
- leader.max_hosts = 1;
- leader.timestamp = LEASE_FREE;
- strncpy(leader.space_name, space_name, NAME_ID_SIZE);
+ rv = posix_memalign((void *)p_iobuf, getpagesize(), iobuf_len);
+ if (rv)
+ return rv;
+
+ memset(iobuf, 0, iobuf_len);
/* host_id N is block offset N-1 */
for (i = 0; i < max_hosts; i++) {
- memset(leader.resource_name, 0, NAME_ID_SIZE);
- snprintf(leader.resource_name, NAME_ID_SIZE, "host_id_%d", i+1);
- leader.checksum = leader_checksum(&leader);
-
- rv = write_sector(disk, i, (char *)&leader, sizeof(struct leader_record),
- ti->io_timeout_seconds, ti->use_aio, "delta_leader");
-
- if (rv < 0) {
- log_tool("delta_init write_sector %d rv %d", i, rv);
- return rv;
- }
+ leader = (struct leader_record *)(iobuf + (i * disk->sector_size));
+ leader->magic = DELTA_DISK_MAGIC;
+ leader->version = DELTA_DISK_VERSION_MAJOR | DELTA_DISK_VERSION_MINOR;
+ leader->sector_size = disk->sector_size;
+ leader->max_hosts = 1;
+ leader->timestamp = LEASE_FREE;
+ strncpy(leader->space_name, space_name, NAME_ID_SIZE);
+ snprintf(leader->resource_name, NAME_ID_SIZE, "host_id_%d", i+1);
+ leader->checksum = leader_checksum(leader);
}
+ rv = write_iobuf(disk->fd, disk->offset, iobuf, iobuf_len,
+ ti->io_timeout_seconds, ti->use_aio);
+ if (rv < 0)
+ return rv;
+
return 0;
}
diff --git a/src/diskio.c b/src/diskio.c
index d55f40f..fe55a6b 100644
--- a/src/diskio.c
+++ b/src/diskio.c
@@ -307,6 +307,17 @@ static int do_read_aio(int fd, uint64_t offset, char *buf, int len, int io_timeo
return -1;
}
+/* write aligned io buffer */
+
+int write_iobuf(int fd, uint64_t offset, char *iobuf, int iobuf_len,
+ int io_timeout_seconds, int use_aio)
+{
+ if (use_aio)
+ return do_write_aio(fd, offset, iobuf, iobuf_len, io_timeout_seconds);
+ else
+ return do_write(fd, offset, iobuf, iobuf_len);
+}
+
static int _write_sectors(const struct sync_disk *disk, uint64_t sector_nr,
uint32_t sector_count GNUC_UNUSED,
const char *data, int data_len,
@@ -332,11 +343,8 @@ static int _write_sectors(const struct sync_disk *disk, uint64_t sector_nr,
memset(iobuf, 0, iobuf_len);
memcpy(iobuf, data, data_len);
- if (use_aio)
- rv = do_write_aio(disk->fd, offset, iobuf, iobuf_len, io_timeout_seconds);
- else
- rv = do_write(disk->fd, offset, iobuf, iobuf_len);
-
+ rv = write_iobuf(disk->fd, offset, iobuf, iobuf_len,
+ io_timeout_seconds, use_aio);
if (rv < 0)
log_error("write_sectors %s offset %llu rv %d %s",
blktype, (unsigned long long)offset, rv, disk->path);
diff --git a/src/diskio.h b/src/diskio.h
index 3ec3b27..a034e4c 100644
--- a/src/diskio.h
+++ b/src/diskio.h
@@ -13,6 +13,9 @@ void close_disks(struct sync_disk *disks, int num_disks);
int open_disks(struct sync_disk *disks, int num_disks);
int open_disks_fd(struct sync_disk *disks, int num_disks);
+int write_iobuf(int fd, uint64_t offset, char *iobuf, int iobuf_len,
+ int io_timeout_seconds, int use_aio);
+
int write_sector(const struct sync_disk *disk, uint64_t sector_nr,
const char *data, int data_len, int io_timeout_seconds,
int use_aio, const char *blktype);
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index d882706..c846d47 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -84,6 +84,7 @@ static int write_dblock(struct timeout *ti,
return rv;
}
+#if 0
static int write_request(struct timeout *ti,
struct sync_disk *disk, struct request_record *rr)
{
@@ -93,6 +94,7 @@ static int write_request(struct timeout *ti,
ti->io_timeout_seconds, ti->use_aio, "request");
return rv;
}
+#endif
static int write_leader(struct timeout *ti,
struct sync_disk *disk, struct leader_record *lr)
@@ -988,43 +990,40 @@ int paxos_lease_release(struct timeout *ti,
int paxos_lease_init(struct timeout *ti,
struct token *token, int num_hosts, int max_hosts)
{
- struct leader_record leader;
- struct request_record req;
- struct paxos_dblock dblock;
- int d, q;
- uint32_t offset, ss;
- uint64_t bb, be, sb, se;
-
- offset = token->disks[0].offset;
- ss = token->disks[0].sector_size;
- bb = offset;
- be = offset + (ss * (max_hosts + 2) - 1);
- sb = bb / ss;
- se = be / ss;
-
- memset(&leader, 0, sizeof(struct leader_record));
- memset(&req, 0, sizeof(struct request_record));
- memset(&dblock, 0, sizeof(struct paxos_dblock));
+ char *iobuf, **p_iobuf;
+ struct leader_record *leader;
+ int iobuf_len;
+ int rv, d;
- leader.magic = PAXOS_DISK_MAGIC;
- leader.version = PAXOS_DISK_VERSION_MAJOR | PAXOS_DISK_VERSION_MINOR;
- leader.sector_size = token->disks[0].sector_size;
- leader.num_hosts = num_hosts;
- leader.max_hosts = max_hosts;
- leader.timestamp = LEASE_FREE;
- strncpy(leader.space_name, token->r.lockspace_name, NAME_ID_SIZE);
- strncpy(leader.resource_name, token->r.name, NAME_ID_SIZE);
- leader.checksum = leader_checksum(&leader);
+ iobuf_len = token->disks[0].sector_size * (2 + max_hosts);
+
+ p_iobuf = &iobuf;
+
+ rv = posix_memalign((void *)p_iobuf, getpagesize(), iobuf_len);
+ if (rv)
+ return rv;
+
+ memset(iobuf, 0, iobuf_len);
+
+ leader = (struct leader_record *)iobuf;
+ leader->magic = PAXOS_DISK_MAGIC;
+ leader->version = PAXOS_DISK_VERSION_MAJOR | PAXOS_DISK_VERSION_MINOR;
+ leader->sector_size = token->disks[0].sector_size;
+ leader->num_hosts = num_hosts;
+ leader->max_hosts = max_hosts;
+ leader->timestamp = LEASE_FREE;
+ strncpy(leader->space_name, token->r.lockspace_name, NAME_ID_SIZE);
+ strncpy(leader->resource_name, token->r.name, NAME_ID_SIZE);
+ leader->checksum = leader_checksum(leader);
for (d = 0; d < token->r.num_disks; d++) {
- write_leader(ti, &token->disks[d], &leader);
- write_request(ti, &token->disks[d], &req);
- for (q = 0; q < max_hosts; q++)
- write_dblock(ti, &token->disks[d], q, &dblock);
+ rv = write_iobuf(token->disks[d].fd, token->disks[d].offset,
+ iobuf, iobuf_len,
+ ti->io_timeout_seconds, ti->use_aio);
+ if (rv < 0)
+ return rv;
}
- /* TODO: return error if cannot initialize majority of disks */
-
return 0;
}
12 years, 5 months
src/direct_lib.c src/diskio.c src/diskio.h src/host_id.c src/host_id.h src/main.c src/paxos_lease.c src/paxos_lease.h src/sanlock_internal.h src/sanlock_rv.h src/token_manager.c
by David Teigland
src/direct_lib.c | 11 +---------
src/diskio.c | 20 +++++++++++++++++++
src/diskio.h | 1
src/host_id.c | 41 ++++++++++++++++++---------------------
src/host_id.h | 2 -
src/main.c | 17 ++++++++++++++--
src/paxos_lease.c | 51 +++++++++++++++++++++++++++++++++++++++----------
src/paxos_lease.h | 5 +++-
src/sanlock_internal.h | 2 +
src/sanlock_rv.h | 2 +
src/token_manager.c | 6 ++++-
11 files changed, 112 insertions(+), 46 deletions(-)
New commits:
commit 4292a8dd2fefd8cb950e77f3b459524bd456e3fa
Author: David Teigland <teigland(a)redhat.com>
Date: Wed Apr 20 17:31:11 2011 -0500
sanlock: fix host_id reads from paxos_acquire
the threads doing paxos_acquire were copying the host_id_disk,
including fd's, and reading the host_id disk using those fd's,
(in use by the host_id thread) instead of opening the host_id
disk themselves to read from. This probably explains why the
host_id leader reads sometimes returned the wrong data.
Also add -R and -Q options to enable renewal debugging and
quiet acquire failure messages.
diff --git a/src/direct_lib.c b/src/direct_lib.c
index 09503cc..2c168ee 100644
--- a/src/direct_lib.c
+++ b/src/direct_lib.c
@@ -23,16 +23,9 @@ void log_level(int space_id GNUC_UNUSED, int token_id GNUC_UNUSED,
{
}
-int host_id_leader_read(struct timeout *ti GNUC_UNUSED,
- char *space_name GNUC_UNUSED,
- uint64_t host_id GNUC_UNUSED,
- struct leader_record *leader_ret GNUC_UNUSED);
+int host_id_disk_info(char *name GNUC_UNUSED, struct sync_disk *disk GNUC_UNUSED);
-int host_id_leader_read(struct timeout *ti GNUC_UNUSED,
- char *space_name GNUC_UNUSED,
- uint64_t host_id GNUC_UNUSED,
- struct leader_record *leader_ret GNUC_UNUSED)
+int host_id_disk_info(char *name GNUC_UNUSED, struct sync_disk *disk GNUC_UNUSED)
{
return -1;
}
-
diff --git a/src/diskio.c b/src/diskio.c
index 78a5fee..d55f40f 100644
--- a/src/diskio.c
+++ b/src/diskio.c
@@ -74,6 +74,26 @@ void close_disks(struct sync_disk *disks, int num_disks)
close(disks[d].fd);
}
+int open_disks_fd(struct sync_disk *disks, int num_disks)
+{
+ struct sync_disk *disk;
+ int num_opens = 0;
+ int d, fd;
+
+ for (d = 0; d < num_disks; d++) {
+ disk = &disks[d];
+ fd = open(disk->path, O_RDWR | O_DIRECT | O_SYNC, 0);
+ if (fd < 0) {
+ log_error("open error %d %s", fd, disk->path);
+ continue;
+ }
+
+ disk->fd = fd;
+ num_opens++;
+ }
+ return num_opens;
+}
+
/* return number of opened disks */
int open_disks(struct sync_disk *disks, int num_disks)
diff --git a/src/diskio.h b/src/diskio.h
index 24d55a3..3ec3b27 100644
--- a/src/diskio.h
+++ b/src/diskio.h
@@ -11,6 +11,7 @@
void close_disks(struct sync_disk *disks, int num_disks);
int open_disks(struct sync_disk *disks, int num_disks);
+int open_disks_fd(struct sync_disk *disks, int num_disks);
int write_sector(const struct sync_disk *disk, uint64_t sector_nr,
const char *data, int data_len, int io_timeout_seconds,
diff --git a/src/host_id.c b/src/host_id.c
index 2ac381b..fcd9b8b 100644
--- a/src/host_id.c
+++ b/src/host_id.c
@@ -102,23 +102,20 @@ int get_space_info(char *space_name, struct space *sp_out)
return rv;
}
-int host_id_leader_read(struct timeout *ti,
- char *space_name, uint64_t host_id,
- struct leader_record *leader_ret)
+int host_id_disk_info(char *name, struct sync_disk *disk)
{
struct space space;
int rv;
- rv = get_space_info(space_name, &space);
- if (rv < 0)
- return rv;
-
- rv = delta_lease_leader_read(ti, &space.host_id_disk, space_name,
- host_id, leader_ret, "host_id");
- if (rv < 0)
- return rv;
+ pthread_mutex_lock(&spaces_mutex);
+ rv = _get_space_info(name, &space);
+ if (!rv) {
+ memcpy(disk, &space.host_id_disk, sizeof(struct sync_disk));
+ disk->fd = -1;
+ }
+ pthread_mutex_unlock(&spaces_mutex);
- return 0;
+ return rv;
}
/*
@@ -146,10 +143,10 @@ int host_id_check(struct space *sp)
gap, (unsigned long long)last_success);
}
- /*
- log_space(sp, "host_id_check good %d %llu",
- gap, (unsigned long long)last_success);
- */
+ if (com.debug_renew > 1) {
+ log_space(sp, "host_id_check good %d %llu",
+ gap, (unsigned long long)last_success);
+ }
return 1;
}
@@ -248,12 +245,12 @@ static void *host_id_thread(void *arg_in)
gap = last_success - sp->lease_status.renewal_last_success;
sp->lease_status.renewal_last_success = last_success;
- /*
- log_space(sp, "host_id %llu renewed %llu len %d interval %d",
- (unsigned long long)host_id,
- (unsigned long long)last_success,
- delta_length, gap);
- */
+ if (com.debug_renew) {
+ log_space(sp, "host_id %llu renewed %llu len %d interval %d",
+ (unsigned long long)host_id,
+ (unsigned long long)last_success,
+ delta_length, gap);
+ }
if (!sp->thread_stop)
update_watchdog_file(sp, last_success);
diff --git a/src/host_id.h b/src/host_id.h
index ae230c8..f8e551d 100644
--- a/src/host_id.h
+++ b/src/host_id.h
@@ -12,7 +12,7 @@
int print_space_state(struct space *sp, char *str);
int _get_space_info(char *space_name, struct space *sp_out);
int get_space_info(char *space_name, struct space *sp_out);
-int host_id_leader_read(struct timeout *ti, char *space_name, uint64_t host_id, struct leader_record *leader_ret);
+int host_id_disk_info(char *name, struct sync_disk *disk);
int host_id_check(struct space *sp);
int add_space(struct space *sp);
int rem_space(char *name, struct sync_disk *disk, uint64_t host_id);
diff --git a/src/main.c b/src/main.c
index d5e3067..d30dd00 100644
--- a/src/main.c
+++ b/src/main.c
@@ -920,8 +920,13 @@ static void *cmd_acquire_thread(void *args_in)
rv = acquire_token(token, acquire_lver, new_num_hosts);
if (rv < 0) {
- log_errot(token, "cmd_acquire %d,%d,%d paxos_lease %d",
- cl_ci, cl_fd, cl_pid, rv);
+ if (rv == SANLK_LIVE_LEADER && com.quiet_fail) {
+ log_token(token, "cmd_acquire %d,%d,%d paxos_lease %d",
+ cl_ci, cl_fd, cl_pid, rv);
+ } else {
+ log_errot(token, "cmd_acquire %d,%d,%d paxos_lease %d",
+ cl_ci, cl_fd, cl_pid, rv);
+ }
result = rv;
goto done;
}
@@ -2273,6 +2278,8 @@ static void print_usage(void)
printf("\n");
printf("daemon\n");
printf(" -D debug: no fork and print all logging to stderr\n");
+ printf(" -R <num> debug renewal: log debug info about renewals\n");
+ printf(" -Q <num> quiet error messages for common lock contention\n");
printf(" -L <level> write logging at level and up to logfile (-1 none)\n");
printf(" -S <level> write logging at level and up to syslog (-1 none)\n");
printf(" -w <num> use watchdog through wdmd (1 yes, 0 no, default %d)\n", DEFAULT_USE_WATCHDOG);
@@ -2488,6 +2495,12 @@ static int read_command_line(int argc, char *argv[])
optionarg = argv[i];
switch (optchar) {
+ case 'Q':
+ com.quiet_fail = atoi(optionarg);
+ break;
+ case 'R':
+ com.debug_renew = atoi(optionarg);
+ break;
case 'L':
log_logfile_priority = atoi(optionarg);
break;
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index a11e020..d882706 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -26,6 +26,7 @@
#include "log.h"
#include "crc32c.h"
#include "host_id.h"
+#include "delta_lease.h"
#include "paxos_lease.h"
/*
@@ -658,7 +659,7 @@ static int write_new_leader(struct timeout *ti,
*/
int paxos_lease_acquire(struct timeout *ti,
- struct token *token, int force,
+ struct token *token, uint32_t flags,
struct leader_record *leader_ret,
uint64_t acquire_lver,
int new_num_hosts)
@@ -666,19 +667,20 @@ int paxos_lease_acquire(struct timeout *ti,
struct leader_record prev_leader;
struct leader_record new_leader;
struct leader_record host_id_leader;
+ struct sync_disk host_id_disk;
struct paxos_dblock dblock;
time_t start;
uint64_t last_timestamp = 0;
- int error;
+ int error, rv, disk_open = 0;
- log_token(token, "paxos_acquire begin lver %llu force %d",
- (unsigned long long)acquire_lver, force);
+ log_token(token, "paxos_acquire begin lver %llu flags %x",
+ (unsigned long long)acquire_lver, flags);
error = paxos_lease_leader_read(ti, token, &prev_leader, "paxos_acquire");
if (error < 0)
goto out;
- if (force)
+ if (flags & PAXOS_ACQUIRE_FORCE)
goto run;
if (prev_leader.timestamp == LEASE_FREE) {
@@ -703,12 +705,31 @@ int paxos_lease_acquire(struct timeout *ti,
log_token(token, "paxos_acquire check owner_id %llu",
(unsigned long long)prev_leader.owner_id);
+ memset(&host_id_disk, 0, sizeof(host_id_disk));
+
+ rv = host_id_disk_info(prev_leader.space_name, &host_id_disk);
+ if (rv < 0) {
+ log_errot(token, "paxos_acquire no lockspace info %.48s",
+ prev_leader.space_name);
+ error = SANLK_BAD_SPACE_NAME;
+ goto out;
+ }
+
+ disk_open = open_disks_fd(&host_id_disk, 1);
+ if (disk_open != 1) {
+ log_errot(token, "paxos_acquire cannot open host_id_disk");
+ error = SANLK_BAD_SPACE_DISK;
+ goto out;
+ }
+
start = time(NULL);
while (1) {
- error = host_id_leader_read(ti, prev_leader.space_name,
- prev_leader.owner_id,
- &host_id_leader);
+ error = delta_lease_leader_read(ti, &host_id_disk,
+ prev_leader.space_name,
+ prev_leader.owner_id,
+ &host_id_leader,
+ "paxos_acquire");
if (error < 0) {
log_errot(token, "paxos_acquire host_id %llu read %d",
(unsigned long long)prev_leader.owner_id,
@@ -782,8 +803,13 @@ int paxos_lease_acquire(struct timeout *ti,
/* the owner is renewing its host_id so it's alive */
if (last_timestamp && (host_id_leader.timestamp != last_timestamp)) {
- log_errot(token, "paxos_acquire host_id %llu alive",
- (unsigned long long)prev_leader.owner_id);
+ if (flags & PAXOS_ACQUIRE_QUIET_FAIL) {
+ log_token(token, "paxos_acquire host_id %llu alive",
+ (unsigned long long)prev_leader.owner_id);
+ } else {
+ log_errot(token, "paxos_acquire host_id %llu alive",
+ (unsigned long long)prev_leader.owner_id);
+ }
error = SANLK_LIVE_LEADER;
goto out;
}
@@ -857,8 +883,13 @@ int paxos_lease_acquire(struct timeout *ti,
goto out;
memcpy(leader_ret, &new_leader, sizeof(struct leader_record));
+
out:
log_token(token, "paxos_acquire done %d", error);
+
+ if (disk_open)
+ close_disks(&host_id_disk, 1);
+
return error;
}
diff --git a/src/paxos_lease.h b/src/paxos_lease.h
index b384561..89eca2b 100644
--- a/src/paxos_lease.h
+++ b/src/paxos_lease.h
@@ -9,12 +9,15 @@
#ifndef __PAXOS_LEASE_H__
#define __PAXOS_LEASE_H__
+#define PAXOS_ACQUIRE_FORCE 0x00000001
+#define PAXOS_ACQUIRE_QUIET_FAIL 0x00000002
+
uint32_t leader_checksum(struct leader_record *lr);
int majority_disks(struct token *token, int num);
int paxos_lease_leader_read(struct timeout *ti,
struct token *token, struct leader_record *leader_ret,
const char *caller);
-int paxos_lease_acquire(struct timeout *ti, struct token *token, int force,
+int paxos_lease_acquire(struct timeout *ti, struct token *token, uint32_t flags,
struct leader_record *leader_ret,
uint64_t acquire_lver,
int new_num_hosts);
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index f2bbbe3..f8c4e2f 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -268,6 +268,8 @@ struct command_line {
int type; /* COM_ */
int action; /* ACT_ */
int debug;
+ int debug_renew;
+ int quiet_fail;
int use_watchdog;
int high_priority;
int uid; /* -U */
diff --git a/src/sanlock_rv.h b/src/sanlock_rv.h
index 53c6c4f..614e08c 100644
--- a/src/sanlock_rv.h
+++ b/src/sanlock_rv.h
@@ -40,5 +40,7 @@
#define SANLK_REACQUIRE_LVER -227
#define SANLK_BAD_LOCKSPACE -228
#define SANLK_OTHER_OWNER -229
+#define SANLK_BAD_SPACE_NAME -230
+#define SANLK_BAD_SPACE_DISK -231
#endif
diff --git a/src/token_manager.c b/src/token_manager.c
index 3b19a5d..1d215ff 100644
--- a/src/token_manager.c
+++ b/src/token_manager.c
@@ -123,8 +123,12 @@ int acquire_token(struct token *token, uint64_t acquire_lver,
{
struct leader_record leader_ret;
int rv;
+ uint32_t flags = 0;
- rv = paxos_lease_acquire(&to, token, 0, &leader_ret, acquire_lver,
+ if (com.quiet_fail)
+ flags |= PAXOS_ACQUIRE_QUIET_FAIL;
+
+ rv = paxos_lease_acquire(&to, token, flags, &leader_ret, acquire_lver,
new_num_hosts);
token->acquire_result = rv;
12 years, 5 months
src/main.c tests/devcount.c
by David Teigland
src/main.c | 51 ++++++++++++++++++++++-----------------------------
tests/devcount.c | 13 ++++++++-----
2 files changed, 30 insertions(+), 34 deletions(-)
New commits:
commit c411d521d6560f0e015211571f98931c1f792b40
Author: David Teigland <teigland(a)redhat.com>
Date: Wed Apr 20 15:44:39 2011 -0500
sanlock: fix find_client_pid
don't release the pthread mutex between finding a
matching pid for a ci, and using that ci.
diff --git a/src/main.c b/src/main.c
index 5468990..d5e3067 100644
--- a/src/main.c
+++ b/src/main.c
@@ -296,23 +296,6 @@ static int client_add(int fd, void (*workfn)(int ci), void (*deadfn)(int ci))
return -1;
}
-static int find_client_pid(int pid)
-{
- struct client *cl;
- int i;
-
- for (i = 0; i < client_size; i++) {
- cl = &client[i];
- pthread_mutex_lock(&cl->mutex);
- if (cl->used && cl->pid == pid) {
- pthread_mutex_unlock(&cl->mutex);
- return i;
- }
- pthread_mutex_unlock(&cl->mutex);
- }
- return -1;
-}
-
static int get_peer_pid(int fd, int *pid)
{
struct ucred cred;
@@ -1704,30 +1687,40 @@ static void process_cmd_thread_resource(int ci_in, struct sm_header *h_recv)
struct sm_header h;
struct client *cl;
int result = 0;
- int rv, ci_target;
+ int rv, i, ci_target;
+
+ ca = malloc(sizeof(struct cmd_args));
+ if (!ca) {
+ result = -ENOMEM;
+ goto fail;
+ }
if (h_recv->data2 != -1) {
/* lease for another registered client with pid specified by data2 */
- ci_target = find_client_pid(h_recv->data2);
+ ci_target = -1;
+
+ for (i = 0; i < client_size; i++) {
+ cl = &client[i];
+ pthread_mutex_lock(&cl->mutex);
+ if (cl->pid != h_recv->data2) {
+ pthread_mutex_unlock(&cl->mutex);
+ continue;
+ }
+ ci_target = i;
+ break;
+ }
if (ci_target < 0) {
result = -ESRCH;
goto fail;
}
} else {
/* lease for this registered client */
- ci_target = ci_in;
- }
- ca = malloc(sizeof(struct cmd_args));
- if (!ca) {
- result = -ENOMEM;
- goto fail;
+ ci_target = ci_in;
+ cl = &client[ci_target];
+ pthread_mutex_lock(&cl->mutex);
}
- cl = &client[ci_target];
-
- pthread_mutex_lock(&cl->mutex);
-
if (!cl->used) {
log_error("cmd %d %d,%d,%d not used",
h_recv->cmd, ci_target, cl->fd, cl->pid);
diff --git a/tests/devcount.c b/tests/devcount.c
index 074b495..dcfe60a 100644
--- a/tests/devcount.c
+++ b/tests/devcount.c
@@ -1001,6 +1001,8 @@ static int do_migrate(int argc, char *argv[])
* dd if=/dev/zero of=<count_disk> bs=512 count=24
*/
+#define INIT_NUM_HOSTS 8
+
int do_init(int argc, char *argv[])
{
char resbuf[sizeof(struct sanlk_resource) + sizeof(struct sanlk_disk)];
@@ -1020,8 +1022,8 @@ int do_init(int argc, char *argv[])
memset(command, 0, sizeof(command));
snprintf(command, sizeof(command),
- "sanlock direct init -n 8 -s devcount:0:%s:0",
- argv[2]);
+ "sanlock direct init -n %d -s devcount:0:%s:0",
+ INIT_NUM_HOSTS, argv[2]);
printf("%s\n", command);
@@ -1033,7 +1035,8 @@ int do_init(int argc, char *argv[])
snprintf(command, sizeof(command),
- "sanlock direct init -n 8 -r devcount:resource%s:%s:1024000",
+ "sanlock direct init -n %d -r devcount:resource%s:%s:1024000",
+ INIT_NUM_HOSTS,
argv[3],
argv[2]);
@@ -1045,7 +1048,7 @@ int do_init(int argc, char *argv[])
strcpy(ls.name, "devcount");
strcpy(ls.host_id_disk.path, argv[2]);
- rv = sanlock_direct_init(&ls, NULL, 0, 8, 0);
+ rv = sanlock_direct_init(&ls, NULL, 0, INIT_NUM_HOSTS, 0);
if (rv < 0) {
printf("sanlock_direct_init lockspace error %d\n", rv);
return -1;
@@ -1059,7 +1062,7 @@ int do_init(int argc, char *argv[])
strcpy(res->disks[0].path, argv[2]);
res->disks[0].offset = 1024000;
- rv = sanlock_direct_init(NULL, res, 0, 8, 0);
+ rv = sanlock_direct_init(NULL, res, 0, INIT_NUM_HOSTS, 0);
if (rv < 0) {
printf("sanlock_direct_init resource error %d\n", rv);
return -1;
12 years, 5 months
src/client_admin.c src/log.c src/log.h src/main.c
by David Teigland
src/client_admin.c | 26 ++++++++++----------------
src/log.c | 7 +------
src/log.h | 2 +-
src/main.c | 8 ++------
4 files changed, 14 insertions(+), 29 deletions(-)
New commits:
commit 6899ed3210b8fdab6b1b8535c54ece954e60a856
Author: David Teigland <teigland(a)redhat.com>
Date: Wed Apr 20 11:55:54 2011 -0500
sanlock: fix log_dump
by not bothering with the size
diff --git a/src/client_admin.c b/src/client_admin.c
index 2c90ad1..c488b50 100644
--- a/src/client_admin.c
+++ b/src/client_admin.c
@@ -47,8 +47,8 @@ int sanlock_shutdown(void)
int sanlock_log_dump(void)
{
struct sm_header h;
- char *buf;
- int fd, rv, len;
+ char buf[4096];
+ int fd, rv;
fd = send_command(SM_CMD_LOG_DUMP, 0);
if (fd < 0)
@@ -62,23 +62,17 @@ int sanlock_log_dump(void)
goto out;
}
- len = h.length - sizeof(h);
+ while (1) {
+ memset(buf, 0, sizeof(buf));
- buf = malloc(len);
- if (!buf) {
- rv = -ENOMEM;
- goto out;
- }
- memset(buf, 0, len);
+ rv = recv(fd, buf, sizeof(buf) - 1, MSG_WAITALL);
- rv = recv(fd, buf, len, MSG_WAITALL);
- if (rv != len) {
- rv = -1;
- goto out;
+ if (rv > 0)
+ printf("%s", buf);
+ else
+ break;
}
-
- rv = 0;
- printf("%s\n", buf);
+ printf("\n");
out:
close(fd);
return rv;
diff --git a/src/log.c b/src/log.c
index ee58898..884507e 100644
--- a/src/log.c
+++ b/src/log.c
@@ -175,15 +175,10 @@ static void write_dropped(int level, int num)
write_entry(level, str);
}
-void write_log_dump(int fd, struct sm_header *hd)
+void write_log_dump(int fd)
{
pthread_mutex_lock(&log_mutex);
- hd->length = sizeof(struct sm_header);
- hd->length += log_wrap ? SM_LOG_DUMP_SIZE : log_point;
-
- send(fd, hd, sizeof(struct sm_header), MSG_DONTWAIT);
-
if (log_wrap)
send(fd, log_dump + log_point, SM_LOG_DUMP_SIZE - log_point, MSG_DONTWAIT);
diff --git a/src/log.h b/src/log.h
index b065787..1aef3b2 100644
--- a/src/log.h
+++ b/src/log.h
@@ -14,7 +14,7 @@ void log_level(int space_id, int token_id, int level, const char *fmt, ...)
int setup_logging(void);
void close_logging(void);
-void write_log_dump(int fd, struct sm_header *hd);
+void write_log_dump(int fd);
#define log_debug(fmt, args...) log_level(0, 0, LOG_DEBUG, fmt, ##args)
#define log_space(space, fmt, args...) log_level(space->space_id, 0, LOG_DEBUG, fmt, ##args)
diff --git a/src/main.c b/src/main.c
index c1e8abd..5468990 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1642,13 +1642,9 @@ static void cmd_status(int fd, struct sm_header *h_recv)
static void cmd_log_dump(int fd, struct sm_header *h_recv)
{
- struct sm_header h;
-
- memcpy(&h, h_recv, sizeof(struct sm_header));
-
- /* can't send header until taking log_mutex to find the length */
+ send(fd, h_recv, sizeof(struct sm_header), MSG_DONTWAIT);
- write_log_dump(fd, &h);
+ write_log_dump(fd);
}
static void process_cmd_thread_lockspace(int ci_in, struct sm_header *h_recv)
12 years, 5 months
sanlock.spec
by David Teigland
sanlock.spec | 4 ++++
1 file changed, 4 insertions(+)
New commits:
commit 044aeaa2ff7877ec00fb0aae6845972267da5dca
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Wed Apr 20 13:59:34 2011 +0100
rpm: add sanlock user and group
diff --git a/sanlock.spec b/sanlock.spec
index 5b5fdee..2b8798e 100644
--- a/sanlock.spec
+++ b/sanlock.spec
@@ -47,6 +47,10 @@ install -D -m 755 init.d/wdmd $RPM_BUILD_ROOT/%{_initddir}/wdmd
%clean
rm -rf $RPM_BUILD_ROOT
+%pre
+/usr/sbin/useradd -c "Sanlock" -s /sbin/nologin -r \
+ -d /var/run/sanlock sanlock 2> /dev/null || :
+
%post
/sbin/chkconfig --add sanlock
/sbin/chkconfig --add wdmd
12 years, 5 months
src/client_msg.c src/client_msg.h src/main.c src/sanlock_internal.h
by David Teigland
src/client_msg.c | 35 ++++++++++++++++++-----------------
src/client_msg.h | 3 ++-
src/main.c | 43 +++++++++++++++++++++++++++++++++++++++++--
src/sanlock_internal.h | 5 +++++
4 files changed, 66 insertions(+), 20 deletions(-)
New commits:
commit d8805d428ff7c8994a43cc5265eb5379f4db1057
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Wed Apr 20 13:59:33 2011 +0100
daemon: configurable socket permissions
diff --git a/src/client_msg.c b/src/client_msg.c
index 3240222..11d5ab5 100644
--- a/src/client_msg.c
+++ b/src/client_msg.c
@@ -43,7 +43,8 @@ static int get_socket_address(struct sockaddr_un *addr)
return 0;
}
-int setup_listener_socket(int *listener_socket)
+int setup_listener_socket(int *listener_socket,
+ uid_t owner, gid_t group, mode_t mode)
{
int rv, s;
struct sockaddr_un addr;
@@ -58,27 +59,27 @@ int setup_listener_socket(int *listener_socket)
unlink(addr.sun_path);
rv = bind(s, (struct sockaddr *) &addr, sizeof(struct sockaddr_un));
- if (rv < 0) {
- rv = -1;
- close(s);
- return rv;
- }
+ if (rv < 0)
+ goto exit_fail;
+
+ rv = chmod(addr.sun_path, mode);
+ if (rv < 0)
+ goto exit_fail;
+
+ rv = chown(addr.sun_path, owner, group);
+ if (rv < 0)
+ goto exit_fail;
rv = listen(s, 5);
- if (rv < 0) {
- rv = -1;
- close(s);
- return rv;
- }
+ if (rv < 0)
+ goto exit_fail;
- rv = fchmod(s, 666);
- if (rv < 0) {
- rv = -1;
- close(s);
- return rv;
- }
*listener_socket = s;
return 0;
+
+ exit_fail:
+ close(s);
+ return -1;
}
int connect_socket(int *sock_fd)
diff --git a/src/client_msg.h b/src/client_msg.h
index 12b206a..828acf5 100644
--- a/src/client_msg.h
+++ b/src/client_msg.h
@@ -40,7 +40,8 @@ struct sanlk_state {
char str[0]; /* string of internal state */
};
-int setup_listener_socket(int *listener_socket);
+int setup_listener_socket(int *listener_socket,
+ uid_t owner, gid_t group, mode_t mode);
int connect_socket(int *sock_fd);
int send_header(int sock, int cmd, uint32_t cmd_flags, int datalen,
uint32_t data, uint32_t data2);
diff --git a/src/main.c b/src/main.c
index 5587957..c1e8abd 100644
--- a/src/main.c
+++ b/src/main.c
@@ -21,6 +21,8 @@
#include <pthread.h>
#include <poll.h>
#include <sched.h>
+#include <pwd.h>
+#include <grp.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
@@ -1956,7 +1958,7 @@ static int setup_listener(void)
{
int rv, fd, ci;
- rv = setup_listener_socket(&fd);
+ rv = setup_listener_socket(&fd, com.uid, com.gid, DEFAULT_SOCKET_MODE);
if (rv < 0)
return rv;
@@ -2004,7 +2006,7 @@ static void setup_priority(void)
rv = sched_get_priority_max(SCHED_RR);
if (rv < 0) {
- log_error("could not get max scheduler priority err %d", errno);
+ log_error("could not get max scheduler priority err %d", errno);
return;
}
@@ -2087,6 +2089,34 @@ static int do_daemon(void)
return rv;
}
+static int user_to_uid(char *arg)
+{
+ struct passwd *pw;
+
+ pw = getpwnam(arg);
+ if (pw == NULL) {
+ log_error("user '%s' not found, "
+ "using uid: %i", arg, DEFAULT_SOCKET_UID);
+ return DEFAULT_SOCKET_UID;
+ }
+
+ return pw->pw_uid;
+}
+
+static int group_to_gid(char *arg)
+{
+ struct group *gr;
+
+ gr = getgrnam(arg);
+ if (gr == NULL) {
+ log_error("group '%s' not found, "
+ "using uid: %i", arg, DEFAULT_SOCKET_UID);
+ return DEFAULT_SOCKET_GID;
+ }
+
+ return gr->gr_gid;
+}
+
static int parse_arg_lockspace(char *arg)
{
sanlock_str_to_lockspace(arg, &com.lockspace);
@@ -2511,6 +2541,13 @@ static int read_command_line(int argc, char *argv[])
parse_arg_resource(optionarg); /* com.res_args[] */
break;
+ case 'U':
+ com.uid = user_to_uid(optionarg);
+ break;
+ case 'G':
+ com.gid = group_to_gid(optionarg);
+ break;
+
case 'c':
begin_command = 1;
break;
@@ -2782,6 +2819,8 @@ int main(int argc, char *argv[])
com.max_hosts = DEFAULT_MAX_HOSTS;
com.use_watchdog = DEFAULT_USE_WATCHDOG;
com.high_priority = DEFAULT_HIGH_PRIORITY;
+ com.uid = DEFAULT_SOCKET_UID;
+ com.gid = DEFAULT_SOCKET_GID;
com.pid = -1;
to.use_aio = DEFAULT_USE_AIO;
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index f0108ea..f2bbbe3 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -260,6 +260,9 @@ EXTERN struct timeout to;
#define DEFAULT_USE_WATCHDOG 1
#define DEFAULT_HIGH_PRIORITY 1
+#define DEFAULT_SOCKET_UID 0
+#define DEFAULT_SOCKET_GID 0
+#define DEFAULT_SOCKET_MODE (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP)
struct command_line {
int type; /* COM_ */
@@ -267,6 +270,8 @@ struct command_line {
int debug;
int use_watchdog;
int high_priority;
+ int uid; /* -U */
+ int gid; /* -G */
int pid; /* -p */
uint64_t local_host_id; /* -i */
uint64_t local_host_generation; /* -g */
12 years, 5 months
src/delta_lease.c src/host_id.c src/host_id.h src/main.c src/sanlock_internal.h
by David Teigland
src/delta_lease.c | 14 +++
src/host_id.c | 181 ++++++++++++++++++++++++-------------------------
src/host_id.h | 2
src/main.c | 4 -
src/sanlock_internal.h | 14 +--
5 files changed, 107 insertions(+), 108 deletions(-)
New commits:
commit 8fc52ae57a456e158ccec97f85e3055b8650ab12
Author: David Teigland <teigland(a)redhat.com>
Date: Wed Apr 20 11:15:53 2011 -0500
sanlock: renewal changes
Remove the pthread cond var and cond_timedwait and
replace with a much simpler sleep. This seems to
eliminates the erratic renewal behavior, in addition
to making the code simpler.
In the renewal thread, only use the leader timestamp
when the renewal was successful.
Add/change debugging messages related to renewls.
Use local variables in renewal thread to guard against
corrupting the disk if a bug causes the sp struct be
corrupted.
diff --git a/src/delta_lease.c b/src/delta_lease.c
index 19a8b61..bf55469 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -281,7 +281,7 @@ int delta_lease_renew(struct timeout *ti,
uint64_t host_id,
struct leader_record *leader_ret)
{
- struct leader_record leader;
+ struct leader_record leader, leader_read;
uint64_t new_ts;
int error, delay;
@@ -316,13 +316,23 @@ int delta_lease_renew(struct timeout *ti,
/* log_space(sp, "delta_renew sleep 2d %d", delay); */
sleep(delay);
- error = delta_lease_leader_read(ti, disk, space_name, host_id, &leader,
+ error = delta_lease_leader_read(ti, disk, space_name, host_id, &leader_read,
"delta_renew_check");
if (error < 0)
return error;
+ /*
if ((leader.timestamp != new_ts) || (leader.owner_id != our_host_id))
return SANLK_BAD_LEADER;
+ */
+
+ if (memcmp(&leader, &leader_read, sizeof(struct leader_record))) {
+ log_erros(sp, "delta_renew %llu reread mismatch",
+ (unsigned long long)host_id);
+ log_leader_error(0, space_name, host_id, disk, &leader, "delta_renew_write");
+ log_leader_error(0, space_name, host_id, disk, &leader_read, "delta_renew_reread");
+ return SANLK_BAD_LEADER;
+ }
memcpy(leader_ret, &leader, sizeof(struct leader_record));
return SANLK_OK;
diff --git a/src/host_id.c b/src/host_id.c
index 928d57b..2ac381b 100644
--- a/src/host_id.c
+++ b/src/host_id.c
@@ -44,29 +44,19 @@ int print_space_state(struct space *sp, char *str)
"killing_pids=%d "
"acquire_last_result=%d "
"renewal_last_result=%d "
- "release_last_result=%d "
- "acquire_last_time=%llu "
- "acquire_good_time=%llu "
- "renewal_last_time=%llu "
- "renewal_good_time=%llu "
- "release_last_time=%llu "
- "release_good_time=%llu "
- "max_renewal_time=%llu "
- "max_renewal_interval=%d",
+ "acquire_last_attempt=%llu "
+ "acquire_last_success=%llu "
+ "renewal_last_attempt=%llu "
+ "renewal_last_success=%llu",
sp->space_id,
(unsigned long long)sp->host_generation,
sp->killing_pids,
sp->lease_status.acquire_last_result,
sp->lease_status.renewal_last_result,
- sp->lease_status.release_last_result,
- (unsigned long long)sp->lease_status.acquire_last_time,
- (unsigned long long)sp->lease_status.acquire_good_time,
- (unsigned long long)sp->lease_status.renewal_last_time,
- (unsigned long long)sp->lease_status.renewal_good_time,
- (unsigned long long)sp->lease_status.release_last_time,
- (unsigned long long)sp->lease_status.release_good_time,
- (unsigned long long)sp->lease_status.max_renewal_time,
- sp->lease_status.max_renewal_interval);
+ (unsigned long long)sp->lease_status.acquire_last_attempt,
+ (unsigned long long)sp->lease_status.acquire_last_success,
+ (unsigned long long)sp->lease_status.renewal_last_attempt,
+ (unsigned long long)sp->lease_status.renewal_last_success);
return strlen(str);
}
@@ -135,54 +125,65 @@ int host_id_leader_read(struct timeout *ti,
* check if our_host_id_thread has renewed within timeout
*/
-int host_id_renewed(struct space *sp)
+int host_id_check(struct space *sp)
{
- uint64_t good_time;
- int good_diff;
+ uint64_t last_success;
+ int gap;
pthread_mutex_lock(&sp->mutex);
- good_time = sp->lease_status.renewal_good_time;
+ last_success = sp->lease_status.renewal_last_success;
pthread_mutex_unlock(&sp->mutex);
- good_diff = time(NULL) - good_time;
+ gap = time(NULL) - last_success;
- if (good_diff >= to.host_id_renewal_fail_seconds) {
- log_erros(sp, "host_id_renewed failed %d", good_diff);
+ if (gap >= to.host_id_renewal_fail_seconds) {
+ log_erros(sp, "host_id_check failed %d", gap);
return 0;
}
- if (good_diff >= to.host_id_renewal_warn_seconds) {
- log_erros(sp, "host_id_renewed warning %d last good %llu",
- good_diff,
- (unsigned long long)good_time);
+ if (gap >= to.host_id_renewal_warn_seconds) {
+ log_erros(sp, "host_id_check warning %d last_success %llu",
+ gap, (unsigned long long)last_success);
}
+ /*
+ log_space(sp, "host_id_check good %d %llu",
+ gap, (unsigned long long)last_success);
+ */
+
return 1;
}
static void *host_id_thread(void *arg_in)
{
+ struct space *sp;
+ char space_name[NAME_ID_SIZE];
+ struct sync_disk host_id_disk;
struct leader_record leader;
- struct timespec renew_time;
- struct space *sp = (struct space *)arg_in;
- uint64_t our_host_id;
- uint64_t t;
- uint64_t good_time;
- int good_diff;
- int rv, stop, result, dl_result;
+ uint64_t host_id;
+ time_t last_attempt, last_success;
+ int rv, stop = 0, result, delta_result, delta_length, gap;
- our_host_id = sp->host_id;
+ sp = (struct space *)arg_in;
+ host_id = sp->host_id;
+ memcpy(&space_name, sp->space_name, NAME_ID_SIZE);
+ memcpy(&host_id_disk, &sp->host_id_disk, sizeof(host_id_disk));
- result = delta_lease_acquire(&to, sp, &sp->host_id_disk, sp->space_name,
- our_host_id, sp->host_id, &leader);
- dl_result = result;
- t = leader.timestamp;
+ last_attempt = time(NULL);
+
+ result = delta_lease_acquire(&to, sp, &host_id_disk, space_name,
+ host_id, host_id, &leader);
+ delta_result = result;
+ delta_length = time(NULL) - last_attempt;
+
+ if (result == SANLK_OK)
+ last_success = leader.timestamp;
/* we need to start the watchdog after we acquire the host_id but
before we allow any pid's to begin running */
if (result == SANLK_OK) {
- rv = create_watchdog_file(sp, t);
+ rv = create_watchdog_file(sp, last_success);
if (rv < 0) {
log_erros(sp, "create_watchdog failed %d", rv);
result = SANLK_ERROR;
@@ -191,92 +192,86 @@ static void *host_id_thread(void *arg_in)
pthread_mutex_lock(&sp->mutex);
sp->lease_status.acquire_last_result = result;
- sp->lease_status.acquire_last_time = t;
+ sp->lease_status.acquire_last_attempt = last_attempt;
if (result == SANLK_OK)
- sp->lease_status.acquire_good_time = t;
+ sp->lease_status.acquire_last_success = last_success;
sp->lease_status.renewal_last_result = result;
- sp->lease_status.renewal_last_time = t;
+ sp->lease_status.renewal_last_attempt = last_attempt;
if (result == SANLK_OK)
- sp->lease_status.renewal_good_time = t;
- pthread_cond_broadcast(&sp->cond);
+ sp->lease_status.renewal_last_success = last_success;
pthread_mutex_unlock(&sp->mutex);
if (result < 0) {
log_erros(sp, "host_id %llu acquire failed %d",
- (unsigned long long)sp->host_id, result);
+ (unsigned long long)host_id, result);
goto out;
}
log_erros(sp, "host_id %llu generation %llu acquire %llu",
- (unsigned long long)sp->host_id,
+ (unsigned long long)host_id,
(unsigned long long)leader.owner_generation,
- (unsigned long long)t);
+ (unsigned long long)leader.timestamp);
sp->host_generation = leader.owner_generation;
- good_time = t;
- good_diff = 0;
- renew_time.tv_sec = t;
-
while (1) {
+ if (stop)
+ break;
+
+ sleep(1);
+
pthread_mutex_lock(&sp->mutex);
- renew_time.tv_sec += to.host_id_renewal_seconds;
- rv = 0;
- while (!sp->thread_stop && rv == 0) {
- rv = pthread_cond_timedwait(&sp->cond,
- &sp->mutex,
- &renew_time);
- }
stop = sp->thread_stop;
pthread_mutex_unlock(&sp->mutex);
+
if (stop)
break;
- clock_gettime(CLOCK_REALTIME, &renew_time);
+ if (time(NULL) - last_success < to.host_id_renewal_seconds)
+ continue;
+
+ last_attempt = time(NULL);
+
+ result = delta_lease_renew(&to, sp, &host_id_disk, space_name,
+ host_id, host_id, &leader);
+ delta_result = result;
+ delta_length = time(NULL) - last_attempt;
- result = delta_lease_renew(&to, sp, &sp->host_id_disk,
- sp->space_name, our_host_id,
- sp->host_id, &leader);
- dl_result = result;
- t = leader.timestamp;
+ if (result == SANLK_OK)
+ last_success = leader.timestamp;
pthread_mutex_lock(&sp->mutex);
sp->lease_status.renewal_last_result = result;
- sp->lease_status.renewal_last_time = t;
+ sp->lease_status.renewal_last_attempt = last_attempt;
if (result == SANLK_OK) {
- sp->lease_status.renewal_good_time = t;
-
- good_diff = t - good_time;
- good_time = t;
-
- if (good_diff > sp->lease_status.max_renewal_interval) {
- sp->lease_status.max_renewal_interval = good_diff;
- sp->lease_status.max_renewal_time = t;
- }
+ gap = last_success - sp->lease_status.renewal_last_success;
+ sp->lease_status.renewal_last_success = last_success;
/*
- log_space(sp, "host_id %llu renewal %llu interval %d",
- (unsigned long long)sp->host_id,
- (unsigned long long)t, good_diff);
+ log_space(sp, "host_id %llu renewed %llu len %d interval %d",
+ (unsigned long long)host_id,
+ (unsigned long long)last_success,
+ delta_length, gap);
*/
if (!sp->thread_stop)
- update_watchdog_file(sp, t);
+ update_watchdog_file(sp, last_success);
} else {
- log_erros(sp, "host_id %llu renewal error %d last good %llu",
- (unsigned long long)sp->host_id, result,
- (unsigned long long)sp->lease_status.renewal_good_time);
+ log_erros(sp, "host_id %llu renewal error %d len %d last_success %llu",
+ (unsigned long long)host_id, result, delta_length,
+ (unsigned long long)sp->lease_status.renewal_last_success);
}
+ stop = sp->thread_stop;
pthread_mutex_unlock(&sp->mutex);
}
/* unlink called below to get it done ASAP */
close_watchdog_file(sp);
out:
- if (dl_result == SANLK_OK)
- delta_lease_release(&to, sp, &sp->host_id_disk, sp->space_name,
- sp->host_id, &leader, &leader);
+ if (delta_result == SANLK_OK)
+ delta_lease_release(&to, sp, &host_id_disk, space_name,
+ host_id, &leader, &leader);
return NULL;
}
@@ -334,12 +329,14 @@ int add_space(struct space *sp)
goto fail_close;
}
- pthread_mutex_lock(&sp->mutex);
- while (!sp->lease_status.acquire_last_result) {
- pthread_cond_wait(&sp->cond, &sp->mutex);
+ while (1) {
+ pthread_mutex_lock(&sp->mutex);
+ result = sp->lease_status.acquire_last_result;
+ pthread_mutex_unlock(&sp->mutex);
+ if (result)
+ break;
+ sleep(1);
}
- result = sp->lease_status.acquire_last_result;
- pthread_mutex_unlock(&sp->mutex);
if (result != SANLK_OK) {
/* the thread exits right away if acquire fails */
diff --git a/src/host_id.h b/src/host_id.h
index eec3780..ae230c8 100644
--- a/src/host_id.h
+++ b/src/host_id.h
@@ -13,7 +13,7 @@ int print_space_state(struct space *sp, char *str);
int _get_space_info(char *space_name, struct space *sp_out);
int get_space_info(char *space_name, struct space *sp_out);
int host_id_leader_read(struct timeout *ti, char *space_name, uint64_t host_id, struct leader_record *leader_ret);
-int host_id_renewed(struct space *sp);
+int host_id_check(struct space *sp);
int add_space(struct space *sp);
int rem_space(char *name, struct sync_disk *disk, uint64_t host_id);
void clear_spaces(int wait);
diff --git a/src/main.c b/src/main.c
index a5d251b..5587957 100644
--- a/src/main.c
+++ b/src/main.c
@@ -579,7 +579,6 @@ static int main_loop(void)
pthread_mutex_lock(&sp->mutex);
sp->thread_stop = 1;
unlink_watchdog_file(sp);
- pthread_cond_broadcast(&sp->cond);
pthread_mutex_unlock(&sp->mutex);
list_move(&sp->list, &spaces_remove);
} else {
@@ -588,7 +587,7 @@ static int main_loop(void)
check_interval = RECOVERY_CHECK_INTERVAL;
} else {
if (external_shutdown || sp->external_remove ||
- !host_id_renewed(sp)) {
+ !host_id_check(sp)) {
log_space(sp, "set killing_pids");
sp->killing_pids = 1;
kill_pids(sp);
@@ -1345,7 +1344,6 @@ static void *cmd_add_lockspace_thread(void *args_in)
memcpy(&sp->host_id_disk, &lockspace.host_id_disk,
sizeof(struct sanlk_disk));
pthread_mutex_init(&sp->mutex, NULL);
- pthread_cond_init(&sp->cond, NULL);
pthread_mutex_lock(&spaces_mutex);
sp->space_id = space_id_counter++;
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 60ffb2d..f0108ea 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -91,15 +91,10 @@ struct token {
struct lease_status {
int acquire_last_result;
int renewal_last_result;
- int release_last_result;
- int max_renewal_interval;
- uint64_t acquire_last_time;
- uint64_t acquire_good_time;
- uint64_t renewal_last_time;
- uint64_t renewal_good_time;
- uint64_t release_last_time;
- uint64_t release_good_time;
- uint64_t max_renewal_time;
+ uint64_t acquire_last_attempt;
+ uint64_t acquire_last_success;
+ uint64_t renewal_last_attempt;
+ uint64_t renewal_last_success;
};
struct space {
@@ -114,7 +109,6 @@ struct space {
int thread_stop;
pthread_t thread;
pthread_mutex_t mutex; /* protects lease_status, thread_stop */
- pthread_cond_t cond;
struct lease_status lease_status;
int wd_fd;
};
12 years, 5 months