src/client.c src/host_id.c src/host_id.h src/main.c src/sanlock_internal.h src/sanlock_resource.h src/sanlock_sock.h src/token_manager.c src/token_manager.h
by David Teigland
src/client.c | 39 ++++++++
src/host_id.c | 31 +++---
src/host_id.h | 1
src/main.c | 78 ++++++++++++++--
src/sanlock_internal.h | 15 +--
src/sanlock_resource.h | 18 ++-
src/sanlock_sock.h | 2
src/token_manager.c | 238 +++++++++++++++++++++++++++++++++++++++++--------
src/token_manager.h | 2
9 files changed, 359 insertions(+), 65 deletions(-)
New commits:
commit 5f333741f2093f92113e0f0e1adc452d1e20be4c
Author: David Teigland <teigland(a)redhat.com>
Date: Wed Aug 31 16:00:53 2011 -0500
sanlock: examine resource requests
when we find our host_id set in another host's delta lease,
or when the examine api/cmd is called, scan all resource
request blocks for any requests we should process.
diff --git a/src/client.c b/src/client.c
index 3115e07..4987867 100644
--- a/src/client.c
+++ b/src/client.c
@@ -690,6 +690,45 @@ int sanlock_request(uint32_t flags, uint32_t force_mode,
return rv;
}
+int sanlock_examine(uint32_t flags, struct sanlk_lockspace *ls,
+ struct sanlk_resource *res)
+{
+ char *data;
+ int rv, fd, cmd, datalen;
+
+ if (!ls && !res)
+ return -EINVAL;
+
+ rv = connect_socket(&fd);
+ if (rv < 0)
+ return rv;
+
+ if (ls && ls->host_id_disk.path[0]) {
+ cmd = SM_CMD_EXAMINE_LOCKSPACE;
+ datalen = sizeof(struct sanlk_lockspace);
+ data = (char *)ls;
+ } else {
+ cmd = SM_CMD_EXAMINE_RESOURCE;
+ datalen = sizeof(struct sanlk_resource);
+ data = (char *)res;
+ }
+
+ rv = send_header(fd, cmd, flags, datalen, 0, 0);
+ if (rv < 0)
+ goto out;
+
+ rv = send(fd, data, datalen, 0);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
+ }
+
+ rv = recv_result(fd);
+ out:
+ close(fd);
+ return rv;
+}
+
/*
* convert from struct sanlk_resource to string with format:
* <lockspace_name>:<resource_name>:<path>:<offset>[:<path>:<offset>...]:<lver>
diff --git a/src/host_id.c b/src/host_id.c
index a1cc468..badccd9 100644
--- a/src/host_id.c
+++ b/src/host_id.c
@@ -32,6 +32,7 @@
#include "watchdog.h"
#include "task.h"
#include "direct.h"
+#include "token_manager.h"
static unsigned int space_id_counter = 1;
@@ -146,6 +147,17 @@ int get_space_info(char *space_name, struct space *sp_out)
return rv;
}
+void block_watchdog_updates(char *space_name)
+{
+ struct space *sp;
+
+ pthread_mutex_lock(&spaces_mutex);
+ sp = _search_space(space_name, NULL, 0, &spaces, NULL, NULL);
+ if (sp)
+ sp->block_watchdog_updates = 1;
+ pthread_mutex_unlock(&spaces_mutex);
+}
+
int host_id_disk_info(char *name, struct sync_disk *disk)
{
struct space space;
@@ -269,7 +281,7 @@ void check_other_leases(struct task *task, struct space *sp, char *buf)
struct host_info *info;
char *bitmap;
uint64_t now;
- int i, new = 0;
+ int i, new;
disk = &sp->host_id_disk;
@@ -309,18 +321,11 @@ void check_other_leases(struct task *task, struct space *sp, char *buf)
log_space(sp, "request from host_id %d", i+1);
info->last_req = now;
- new++;
+ new = 1;
}
- /* TODO: add a thread that will periodically scan spaces and
- for any with req_count > req_check, scan request blocks for
- all locally held paxos leases in that lockspace. */
-
- if (new) {
- pthread_mutex_lock(&sp->mutex);
- sp->req_count++;
- pthread_mutex_unlock(&sp->mutex);
- }
+ if (new)
+ set_resource_examine(sp->space_name, NULL);
}
/*
@@ -546,7 +551,9 @@ static void *lockspace_thread(void *arg_in)
* pet the watchdog
*/
- if (delta_result == SANLK_OK && !sp->thread_stop)
+ if (delta_result == SANLK_OK &&
+ !sp->thread_stop &&
+ !sp->block_watchdog_updates)
update_watchdog_file(sp, last_success);
pthread_mutex_unlock(&sp->mutex);
diff --git a/src/host_id.h b/src/host_id.h
index 3a289c2..ea5c225 100644
--- a/src/host_id.h
+++ b/src/host_id.h
@@ -12,6 +12,7 @@
int print_space_state(struct space *sp, char *str);
int _get_space_info(char *space_name, struct space *sp_out);
int get_space_info(char *space_name, struct space *sp_out);
+void block_watchdog_updates(char *space_name);
int host_id_disk_info(char *name, struct sync_disk *disk);
int host_info_set_bit(char *space_name, uint64_t host_id);
int host_info_clear_bit(char *space_name, uint64_t host_id);
diff --git a/src/main.c b/src/main.c
index 8b354b2..ef98a45 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1438,6 +1438,55 @@ static void cmd_request(struct task *task, struct cmd_args *ca)
client_resume(ca->ci_in);
}
+static void cmd_examine(struct task *task GNUC_UNUSED, struct cmd_args *ca)
+{
+ union {
+ struct sanlk_resource r;
+ struct sanlk_lockspace s;
+ } buf;
+ struct sanlk_resource *res = NULL;
+ struct sanlk_lockspace *ls = NULL;
+ char *space_name = NULL;
+ char *res_name = NULL;
+ int fd, rv, result, count = 0, datalen;
+
+ fd = client[ca->ci_in].fd;
+
+ if (ca->header.cmd == SM_CMD_EXAMINE_RESOURCE) {
+ datalen = sizeof(struct sanlk_resource);
+ res = &buf.r;
+ } else {
+ datalen = sizeof(struct sanlk_lockspace);
+ ls = &buf.s;
+ }
+
+ rv = recv(fd, &buf, datalen, MSG_WAITALL);
+ if (rv != datalen) {
+ log_error("cmd_examine %d,%d recv %d %d",
+ ca->ci_in, fd, rv, errno);
+ result = -ENOTCONN;
+ goto reply;
+ }
+
+ if (res) {
+ space_name = res->lockspace_name;
+ res_name = res->name;
+ } else {
+ space_name = ls->name;
+ }
+
+ log_debug("cmd_examine %d,%d %.48s %.48s",
+ ca->ci_in, fd, space_name, res_name ? res_name : "");
+
+ count = set_resource_examine(space_name, res_name);
+ result = 0;
+ reply:
+ log_debug("cmd_examine %d,%d done %d", ca->ci_in, fd, count);
+
+ send_result(fd, &ca->header, result);
+ client_resume(ca->ci_in);
+}
+
static void cmd_add_lockspace(struct cmd_args *ca)
{
struct sanlk_lockspace lockspace;
@@ -1698,6 +1747,10 @@ static void call_cmd(struct task *task, struct cmd_args *ca)
case SM_CMD_INIT_RESOURCE:
cmd_init_resource(task, ca);
break;
+ case SM_CMD_EXAMINE_LOCKSPACE:
+ case SM_CMD_EXAMINE_RESOURCE:
+ cmd_examine(task, ca);
+ break;
};
}
@@ -2277,6 +2330,8 @@ static void process_connection(int ci)
case SM_CMD_ADD_LOCKSPACE:
case SM_CMD_REM_LOCKSPACE:
case SM_CMD_REQUEST:
+ case SM_CMD_EXAMINE_RESOURCE:
+ case SM_CMD_EXAMINE_LOCKSPACE:
case SM_CMD_ALIGN:
case SM_CMD_INIT_LOCKSPACE:
case SM_CMD_INIT_RESOURCE:
@@ -2741,8 +2796,7 @@ static void print_usage(void)
printf("sanlock client status [-D]\n");
printf("sanlock client log_dump\n");
printf("sanlock client shutdown\n");
- printf("sanlock client init -s LOCKSPACE\n");
- printf("sanlock client init -r RESOURCE\n");
+ printf("sanlock client init -s LOCKSPACE | -r RESOURCE\n");
printf("sanlock client align -s LOCKSPACE\n");
printf("sanlock client add_lockspace -s LOCKSPACE\n");
printf("sanlock client rem_lockspace -s LOCKSPACE\n");
@@ -2750,13 +2804,12 @@ static void print_usage(void)
printf("sanlock client acquire -r RESOURCE -p <pid>\n");
printf("sanlock client release -r RESOURCE -p <pid>\n");
printf("sanlock client inquire -p <pid>\n");
- printf("sanlock client request -r RESOURCE\n");
+ printf("sanlock client request -r RESOURCE -f <force_mode>\n");
+ printf("sanlock client examine -r RESOURCE | -s LOCKSPACE\n");
printf("\n");
printf("sanlock direct <action> [-a 0|1] [-o 0|1]\n");
- printf("sanlock direct init -s LOCKSPACE\n");
- printf("sanlock direct init -r RESOURCE\n");
- printf("sanlock direct read_leader -s LOCKSPACE\n");
- printf("sanlock direct read_leader -r RESOURCE\n");
+ printf("sanlock direct init -s LOCKSPACE | -r RESOURCE\n");
+ printf("sanlock direct read_leader -s LOCKSPACE | -r RESOURCE\n");
printf("sanlock direct read_id -s LOCKSPACE\n");
printf("sanlock direct live_id -s LOCKSPACE\n");
printf("sanlock direct dump <path>[:<offset>]\n");
@@ -2856,6 +2909,8 @@ static int read_command_line(int argc, char *argv[])
com.action = ACT_INQUIRE;
else if (!strcmp(act, "request"))
com.action = ACT_REQUEST;
+ else if (!strcmp(act, "examine"))
+ com.action = ACT_EXAMINE;
else if (!strcmp(act, "align"))
com.action = ACT_CLIENT_ALIGN;
else if (!strcmp(act, "init"))
@@ -3177,6 +3232,15 @@ static int do_client(void)
log_tool("request done %d", rv);
break;
+ case ACT_EXAMINE:
+ log_tool("examine");
+ if (com.lockspace.host_id_disk.path[0])
+ rv = sanlock_examine(0, &com.lockspace, NULL);
+ else
+ rv = sanlock_examine(0, NULL, com.res_args[0]);
+ log_tool("examine done %d", rv);
+ break;
+
case ACT_CLIENT_ALIGN:
log_tool("align");
rv = sanlock_align(&com.lockspace.host_id_disk);
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 4e69988..3d78a0c 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -24,6 +24,7 @@
#include "sanlock.h"
#include "sanlock_rv.h"
+#include "sanlock_resource.h"
#include "leader.h"
#include "list.h"
#include "monotime.h"
@@ -60,9 +61,11 @@ struct sync_disk {
int fd; /* sanlk_disk pad2 */
};
-/* Once token and token->disks are initialized by the main loop, the only
- fields that are modified are disk fd's by open_disks() in the lease
- threads. */
+/*
+ * There are two different wrappers around a sanlk_resource:
+ * 'struct token' keeps track of resources per-client, client.tokens[]
+ * 'struct resource' keeps track of resources globally, resources list
+ */
struct token {
/* values copied from acquire res arg */
@@ -121,13 +124,12 @@ struct space {
int space_dead;
int killing_pids;
int external_remove;
+ int block_watchdog_updates;
int thread_stop;
pthread_t thread;
pthread_mutex_t mutex; /* protects lease_status, thread_stop */
struct lease_status lease_status;
int wd_fd;
- uint32_t req_count;
- uint32_t req_check;
struct host_info host_info[DEFAULT_MAX_HOSTS];
};
@@ -420,7 +422,7 @@ struct space {
#define HOSTID_AIO_CB_SIZE 64
#define WORKER_AIO_CB_SIZE 8
#define DIRECT_AIO_CB_SIZE 8
-#define RELEASE_AIO_CB_SIZE 64
+#define RESOURCE_AIO_CB_SIZE 64
#define LIB_AIO_CB_SIZE 8
struct aicb {
@@ -520,6 +522,7 @@ enum {
ACT_READ_LEADER,
ACT_CLIENT_INIT,
ACT_CLIENT_ALIGN,
+ ACT_EXAMINE,
};
EXTERN int external_shutdown;
diff --git a/src/sanlock_resource.h b/src/sanlock_resource.h
index 2005022..4998c16 100644
--- a/src/sanlock_resource.h
+++ b/src/sanlock_resource.h
@@ -20,9 +20,17 @@
* registered pid
*/
-int sanlock_register(void);
+/* restrict flags */
+#define SANLK_RESTRICT_ALL 0x00000001
+
+/* release flags */
+#define SANLK_REL_ALL 0x00000001
-#define SANLK_RESTRICT_ALL 0x1
+/* request flags */
+#define SANLK_REQ_KILL_PID 0x00000001
+#define SANLK_REQ_BLOCK_WD 0x00000002
+
+int sanlock_register(void);
int sanlock_restrict(int sock, uint32_t flags);
@@ -30,8 +38,6 @@ int sanlock_acquire(int sock, int pid, uint32_t flags, int res_count,
struct sanlk_resource *res_args[],
struct sanlk_options *opt_in);
-#define SANLK_REL_ALL 0x1
-
int sanlock_release(int sock, int pid, uint32_t flags, int res_count,
struct sanlk_resource *res_args[]);
@@ -41,12 +47,14 @@ int sanlock_inquire(int sock, int pid, uint32_t flags, int *res_count,
int sanlock_request(uint32_t flags, uint32_t force_mode,
struct sanlk_resource *res);
+int sanlock_examine(uint32_t flags, struct sanlk_lockspace *ls,
+ struct sanlk_resource *res);
+
/*
* Functions to convert between string and struct resource formats.
* All allocate space for returned data that the caller must free.
*/
-
/*
* convert from struct sanlk_resource to string with format:
* <lockspace_name>:<resource_name>:<path>:<offset>[:<path>:<offset>...]:<lver>
diff --git a/src/sanlock_sock.h b/src/sanlock_sock.h
index 32c3511..5c6a004 100644
--- a/src/sanlock_sock.h
+++ b/src/sanlock_sock.h
@@ -32,6 +32,8 @@ enum {
SM_CMD_ALIGN = 12,
SM_CMD_INIT_LOCKSPACE = 13,
SM_CMD_INIT_RESOURCE = 14,
+ SM_CMD_EXAMINE_LOCKSPACE = 15,
+ SM_CMD_EXAMINE_RESOURCE = 16,
};
struct sm_header {
diff --git a/src/token_manager.c b/src/token_manager.c
index b786e12..4d447a6 100644
--- a/src/token_manager.c
+++ b/src/token_manager.c
@@ -19,6 +19,7 @@
#include <pthread.h>
#include <time.h>
#include <syslog.h>
+#include <signal.h>
#include <sys/types.h>
#include <sys/time.h>
@@ -28,41 +29,92 @@
#include "paxos_lease.h"
#include "token_manager.h"
#include "task.h"
+#include "host_id.h"
static struct list_head resources;
static struct list_head dispose_resources;
static pthread_mutex_t resource_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t resource_cond = PTHREAD_COND_INITIALIZER;
-static pthread_t release_thread;
-static int release_thread_stop;
+static pthread_t resource_pt;
+static int resource_thread_stop;
+static int resource_examine;
struct resource {
struct list_head list;
- char space_name[NAME_ID_SIZE+1];
- char resource_name[NAME_ID_SIZE+1];
struct token *token;
int pid;
+ int examine;
+ uint64_t lver;
+ struct sanlk_resource r;
};
+int set_resource_examine(char *space_name, char *res_name)
+{
+ struct resource *r;
+ int count = 0;
+
+ pthread_mutex_lock(&resource_mutex);
+ list_for_each_entry(r, &resources, list) {
+ if (strncmp(r->r.lockspace_name, space_name, NAME_ID_SIZE))
+ continue;
+ if (res_name && strncmp(r->r.name, res_name, NAME_ID_SIZE))
+ continue;
+ r->examine = 1;
+ resource_examine = 1;
+ count++;
+ }
+ if (count)
+ pthread_cond_signal(&resource_cond);
+ pthread_mutex_unlock(&resource_mutex);
+
+ return count;
+}
+
+static struct resource *find_resource_examine(void)
+{
+ struct resource *r;
+
+ list_for_each_entry(r, &resources, list) {
+ if (r->examine)
+ return r;
+ }
+ return NULL;
+}
+
static struct resource *find_resource(struct token *token,
struct list_head *head)
{
struct resource *r;
list_for_each_entry(r, head, list) {
- if (strncmp(r->space_name, token->r.lockspace_name, NAME_ID_SIZE))
+ if (strncmp(r->r.lockspace_name, token->r.lockspace_name, NAME_ID_SIZE))
continue;
- if (strncmp(r->resource_name, token->r.name, NAME_ID_SIZE))
+ if (strncmp(r->r.name, token->r.name, NAME_ID_SIZE))
continue;
return r;
}
return NULL;
}
+static void save_resource_lver(struct token *token, uint64_t lver)
+{
+ struct resource *r;
+
+ pthread_mutex_lock(&resource_mutex);
+ r = find_resource(token, &resources);
+ if (r)
+ r->lver = lver;
+ pthread_mutex_unlock(&resource_mutex);
+
+ if (!r)
+ log_errot(token, "save_resource_lver no r");
+
+}
+
int add_resource(struct token *token, int pid)
{
struct resource *r;
- int rv;
+ int rv, disks_len, r_len;
pthread_mutex_lock(&resource_mutex);
@@ -82,15 +134,17 @@ int add_resource(struct token *token, int pid)
goto out;
}
- r = malloc(sizeof(struct resource));
+ disks_len = token->r.num_disks * sizeof(struct sync_disk);
+ r_len = sizeof(struct resource) + disks_len;
+
+ r = malloc(r_len);
if (!r) {
rv = -ENOMEM;
goto out;
}
-
- memset(r, 0, sizeof(struct resource));
- strncpy(r->space_name, token->r.lockspace_name, NAME_ID_SIZE);
- strncpy(r->resource_name, token->r.name, NAME_ID_SIZE);
+ memset(r, 0, r_len);
+ memcpy(&r->r, &token->r, sizeof(struct sanlk_resource));
+ memcpy(&r->r.disks, &token->r.disks, disks_len);
r->token = token;
r->pid = pid;
list_add_tail(&r->list, &resources);
@@ -152,6 +206,8 @@ int acquire_token(struct task *task, struct token *token,
if (rv < 0)
return rv;
+ save_resource_lver(token, token->leader.lver);
+
memcpy(&token->leader, &leader_ret, sizeof(struct leader_record));
token->r.lver = token->leader.lver;
return rv; /* SANLK_OK */
@@ -246,7 +302,7 @@ int request_token(struct task *task, struct token *token, uint32_t force_mode,
goto out;
}
-req_write:
+ req_write:
req.version = REQ_DISK_VERSION_MAJOR | REQ_DISK_VERSION_MINOR;
req.lver = token->acquire_lver;
req.force_mode = force_mode;
@@ -262,46 +318,158 @@ req_write:
return rv;
}
-/* thread that releases tokens of pid's that die */
+static int examine_token(struct task *task, struct token *token,
+ struct request_record *req_out)
+{
+ struct request_record req;
+ int rv;
+
+ memset(&req, 0, sizeof(req));
+
+ rv = open_disks(token->disks, token->r.num_disks);
+ if (!majority_disks(token, rv)) {
+ log_debug("request open_disk error %s", token->disks[0].path);
+ return -ENODEV;
+ }
+
+ rv = paxos_lease_request_read(task, token, &req);
+ if (rv < 0)
+ goto out;
-static void *async_release_thread(void *arg GNUC_UNUSED)
+ if (req.magic != REQ_DISK_MAGIC) {
+ rv = SANLK_REQUEST_MAGIC;
+ goto out;
+ }
+
+ if ((req.version & 0xFFFF0000) != REQ_DISK_VERSION_MAJOR) {
+ rv = SANLK_REQUEST_VERSION;
+ goto out;
+ }
+
+ memcpy(req_out, &req, sizeof(struct request_record));
+ out:
+ close_disks(token->disks, token->r.num_disks);
+
+ log_debug("examine rv %d lver %llu mode %u",
+ rv, (unsigned long long)req.lver, req.force_mode);
+
+ return rv;
+}
+
+/*
+ * - releases tokens of pid's that die
+ * - examines request blocks of resources
+ */
+
+static void *resource_thread(void *arg GNUC_UNUSED)
{
struct task task;
struct resource *r;
- struct token *token;
+ struct token *token, *tt = NULL;
+ struct request_record req;
+ uint64_t lver;
+ int rv, j, pid, tt_len;
memset(&task, 0, sizeof(struct task));
setup_task_timeouts(&task, main_task.io_timeout_seconds);
- setup_task_aio(&task, main_task.use_aio, RELEASE_AIO_CB_SIZE);
- sprintf(task.name, "%s", "release");
+ setup_task_aio(&task, main_task.use_aio, RESOURCE_AIO_CB_SIZE);
+ sprintf(task.name, "%s", "resource");
+
+ /* a fake/tmp token struct we copy necessary res info into,
+ because other functions take a token struct arg */
+
+ tt_len = sizeof(struct token) + (SANLK_MAX_DISKS * sizeof(struct sync_disk));
+ tt = malloc(tt_len);
+ if (!tt) {
+ log_error("resource_thread tt malloc error");
+ goto out;
+ }
+ memset(tt, 0, tt_len);
+ tt->disks = (struct sync_disk *)&tt->r.disks[0];
while (1) {
pthread_mutex_lock(&resource_mutex);
- while (list_empty(&dispose_resources)) {
- if (release_thread_stop) {
+ while (list_empty(&dispose_resources) && !resource_examine) {
+ if (resource_thread_stop) {
pthread_mutex_unlock(&resource_mutex);
goto out;
}
pthread_cond_wait(&resource_cond, &resource_mutex);
}
- r = list_first_entry(&dispose_resources, struct resource, list);
- pthread_mutex_unlock(&resource_mutex);
+ if (!list_empty(&dispose_resources)) {
+ r = list_first_entry(&dispose_resources, struct resource, list);
+ pthread_mutex_unlock(&resource_mutex);
- token = r->token;
- release_token(&task, token);
+ token = r->token;
+ release_token(&task, token);
- /* we don't want to remove r from dispose_list until after the
- lease is released because we don't want a new token for
- the same resource to be added and attempt to acquire
- the lease until after it's been released */
+ /* we don't want to remove r from dispose_list until after the
+ lease is released because we don't want a new token for
+ the same resource to be added and attempt to acquire
+ the lease until after it's been released */
- pthread_mutex_lock(&resource_mutex);
- _del_resource(r);
- pthread_mutex_unlock(&resource_mutex);
- free(token);
+ pthread_mutex_lock(&resource_mutex);
+ _del_resource(r);
+ pthread_mutex_unlock(&resource_mutex);
+ free(token);
+
+ } else if (resource_examine) {
+ r = find_resource_examine();
+ if (!r) {
+ resource_examine = 0;
+ pthread_mutex_unlock(&resource_mutex);
+ continue;
+ }
+ r->examine = 0;
+
+ /* we can't safely access r->token here, and
+ r may be freed after we release mutex, so copy
+ everything we need before unlocking mutex */
+
+ pid = r->pid;
+ lver = r->lver;
+ memcpy(&tt->r, &r->r, sizeof(struct sanlk_resource));
+ memcpy(&tt->r.disks, &r->r.disks, r->r.num_disks * sizeof(struct sync_disk));
+ pthread_mutex_unlock(&resource_mutex);
+
+ for (j = 0; j < tt->r.num_disks; j++) {
+ tt->disks[j].sector_size = 0;
+ tt->disks[j].fd = -1;
+ }
+
+ rv = examine_token(&task, tt, &req);
+
+ if (rv != SANLK_OK)
+ continue;
+
+ if (!req.force_mode || !req.lver)
+ continue;
+
+ if (req.lver <= lver) {
+ log_debug("examine req lver %llu our lver %llu",
+ (unsigned long long)req.lver,
+ (unsigned long long)lver);
+ continue;
+ }
+
+ if (req.force_mode == SANLK_REQ_KILL_PID) {
+ /* look up r again to check it still exists and
+ pid is same? */
+
+ log_error("req_kill_pid %d %.48s:%.48s", pid,
+ tt->r.lockspace_name, tt->r.name);
+ kill(pid, SIGKILL);
+
+ } else if (req.force_mode == SANLK_REQ_BLOCK_WD) {
+ log_error("req_block_wd %.48s", tt->r.lockspace_name);
+ block_watchdog_updates(tt->r.lockspace_name);
+ }
+ }
}
out:
+ if (tt)
+ free(tt);
close_task_aio(&task);
return NULL;
}
@@ -332,7 +500,7 @@ int setup_token_manager(void)
INIT_LIST_HEAD(&resources);
INIT_LIST_HEAD(&dispose_resources);
- rv = pthread_create(&release_thread, NULL, async_release_thread, NULL);
+ rv = pthread_create(&resource_pt, NULL, resource_thread, NULL);
if (rv)
return -1;
return 0;
@@ -341,9 +509,9 @@ int setup_token_manager(void)
void close_token_manager(void)
{
pthread_mutex_lock(&resource_mutex);
- release_thread_stop = 1;
+ resource_thread_stop = 1;
pthread_cond_signal(&resource_cond);
pthread_mutex_unlock(&resource_mutex);
- pthread_join(release_thread, NULL);
+ pthread_join(resource_pt, NULL);
}
diff --git a/src/token_manager.h b/src/token_manager.h
index 7825642..da34baa 100644
--- a/src/token_manager.h
+++ b/src/token_manager.h
@@ -22,6 +22,8 @@ int request_token(struct task *task, struct token *token, uint32_t force_mode,
int add_resource(struct token *token, int pid);
void del_resource(struct token *token);
+int set_resource_examine(char *space_name, char *res_name);
+
int setup_token_manager(void);
void close_token_manager(void);
12 years, 3 months
2 commits - src/delta_lease.c src/delta_lease.h src/direct.c src/direct_lib.c src/host_id.c src/host_id.h src/leader.h src/main.c src/paxos_lease.c src/sanlock_internal.h src/sanlock_rv.h src/token_manager.c
by David Teigland
src/delta_lease.c | 33 ++++++++++++-----
src/delta_lease.h | 1
src/direct.c | 21 +++++++++--
src/direct_lib.c | 15 +++++++
src/host_id.c | 92 ++++++++++++++++++++++++++++++++++++++++++-------
src/host_id.h | 3 +
src/leader.h | 7 +++
src/main.c | 8 +---
src/paxos_lease.c | 12 +++++-
src/sanlock_internal.h | 1
src/sanlock_rv.h | 8 +++-
src/token_manager.c | 24 +++++++++++-
12 files changed, 188 insertions(+), 37 deletions(-)
New commits:
commit ae88c11978cc17f70309c150a1e3115f7f824f1e
Author: David Teigland <teigland(a)redhat.com>
Date: Mon Aug 29 17:37:39 2011 -0500
sanlock: set bitmap for requests
After a request_record for a lease is written,
set the bit of the current lease owner in our
own host_id bitmap the next time we renew.
Leave the bit set for request_finish_seconds.
diff --git a/src/delta_lease.c b/src/delta_lease.c
index f8dc729..53cbd5e 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -324,17 +324,18 @@ int delta_lease_renew(struct task *task,
struct space *sp,
struct sync_disk *disk,
char *space_name,
+ char *bitmap,
int prev_result,
int *read_result,
struct leader_record *leader_last,
struct leader_record *leader_ret)
{
struct leader_record leader;
- uint64_t host_id, offset;
- uint64_t new_ts;
char **p_iobuf;
- int iobuf_len, io_timeout_save;
- int rv;
+ char **p_wbuf;
+ char *wbuf;
+ uint64_t host_id, id_offset, new_ts;
+ int rv, iobuf_len, sector_size, io_timeout_save;
if (!leader_last)
return -EINVAL;
@@ -345,9 +346,11 @@ int delta_lease_renew(struct task *task,
iobuf_len = sp->align_size;
+ sector_size = disk->sector_size;
+
/* offset of our leader_record */
- offset = (host_id - 1) * disk->sector_size;
- if (offset > iobuf_len)
+ id_offset = (host_id - 1) * sector_size;
+ if (id_offset > iobuf_len)
return -EINVAL;
@@ -429,7 +432,7 @@ int delta_lease_renew(struct task *task,
read_done:
*read_result = SANLK_OK;
- memcpy(&leader, task->iobuf+offset, sizeof(struct leader_record));
+ memcpy(&leader, task->iobuf+id_offset, sizeof(struct leader_record));
rv = verify_leader(disk, space_name, host_id, &leader, "delta_renew");
if (rv < 0)
@@ -466,6 +469,16 @@ int delta_lease_renew(struct task *task,
leader.timestamp = new_ts;
leader.checksum = leader_checksum(&leader);
+ p_wbuf = &wbuf;
+ rv = posix_memalign((void *)p_wbuf, getpagesize(), sector_size);
+ if (rv) {
+ log_erros(sp, "dela_renew write memalign rv %d", rv);
+ return -ENOMEM;
+ }
+ memset(wbuf, 0, sector_size);
+ memcpy(wbuf, &leader, sizeof(struct leader_record));
+ memcpy(wbuf+LEADER_RECORD_MAX, bitmap, HOSTID_BITMAP_SIZE);
+
/* extend io timeout for this one write; we need to give this write
every chance to succeed, and there's no point in letting it time
out. there's nothing we would do but retry it, and timing out and
@@ -474,8 +487,10 @@ int delta_lease_renew(struct task *task,
io_timeout_save = task->io_timeout_seconds;
task->io_timeout_seconds = task->host_dead_seconds;
- rv = write_sector(disk, host_id - 1, (char *)&leader, sizeof(struct leader_record),
- task, "delta_leader");
+ rv = write_iobuf(disk->fd, disk->offset+id_offset, wbuf, sector_size, task);
+
+ if (rv != SANLK_AIO_TIMEOUT)
+ free(wbuf);
task->io_timeout_seconds = io_timeout_save;
diff --git a/src/delta_lease.h b/src/delta_lease.h
index 4a0cad2..4739cc3 100644
--- a/src/delta_lease.h
+++ b/src/delta_lease.h
@@ -28,6 +28,7 @@ int delta_lease_renew(struct task *task,
struct space *sp,
struct sync_disk *disk,
char *space_name,
+ char *bitmap,
int prev_result,
int *read_result,
struct leader_record *leader_last,
diff --git a/src/direct.c b/src/direct.c
index 6383787..a095803 100644
--- a/src/direct.c
+++ b/src/direct.c
@@ -191,8 +191,11 @@ static int do_delta_action(int action,
struct leader_record leader;
struct sync_disk sd;
struct space space;
+ char bitmap[HOSTID_BITMAP_SIZE];
int read_result, rv;
+ memset(bitmap, 0, sizeof(bitmap));
+
/* for log_space in delta functions */
memset(&space, 0, sizeof(space));
@@ -229,6 +232,7 @@ static int do_delta_action(int action,
return rv;
rv = delta_lease_renew(task, &space, &sd,
ls->name,
+ bitmap,
-1,
&read_result,
&leader,
@@ -420,9 +424,11 @@ int direct_read_leader(struct task *task,
return rv;
}
+int test_id_bit(int host_id, char *bitmap);
+
int direct_dump(struct task *task, char *dump_path, int force_mode)
{
- char *data;
+ char *data, *bitmap;
char *colon, *off_str;
struct leader_record *lr;
struct request_record *rr;
@@ -431,7 +437,7 @@ int direct_dump(struct task *task, char *dump_path, int force_mode)
char rname[NAME_ID_SIZE+1];
uint64_t sector_nr;
int sector_count, datalen, align_size;
- int i, rv;
+ int i, rv, b;
memset(&sd, 0, sizeof(struct sync_disk));
@@ -503,14 +509,21 @@ int direct_dump(struct task *task, char *dump_path, int force_mode)
strncpy(sname, lr->space_name, NAME_ID_SIZE);
strncpy(rname, lr->resource_name, NAME_ID_SIZE);
- printf("%08llu %36s %48s %010llu %04llu %04llu\n",
+ printf("%08llu %36s %48s %010llu %04llu %04llu",
(unsigned long long)((sector_nr + i) * sd.sector_size),
sname, rname,
(unsigned long long)lr->timestamp,
(unsigned long long)lr->owner_id,
(unsigned long long)lr->owner_generation);
- /* TODO: if force_mode, print host_ids set in bitmap */
+ if (force_mode) {
+ bitmap = (char *)lr + LEADER_RECORD_MAX;
+ for (b = 0; b < DEFAULT_MAX_HOSTS; b++) {
+ if (test_id_bit(b+1, bitmap))
+ printf(" %d", b+1);
+ }
+ }
+ printf("\n");
}
} else if (lr->magic == PAXOS_DISK_MAGIC) {
strncpy(sname, lr->space_name, NAME_ID_SIZE);
diff --git a/src/direct_lib.c b/src/direct_lib.c
index 2cb79c3..e2c7d7a 100644
--- a/src/direct_lib.c
+++ b/src/direct_lib.c
@@ -38,6 +38,21 @@ int host_id_disk_info(char *name GNUC_UNUSED, struct sync_disk *disk GNUC_UNUSED
return -1;
}
+/* copied from host_id.c */
+
+int test_id_bit(int host_id, char *bitmap);
+
+int test_id_bit(int host_id, char *bitmap)
+{
+ char *byte = bitmap + ((host_id - 1) / 8);
+ unsigned int bit = (host_id - 1) % 8;
+ char mask;
+
+ mask = 1 << bit;
+
+ return (*byte & mask);
+}
+
int get_rand(int a, int b);
int get_rand(int a, int b)
diff --git a/src/host_id.c b/src/host_id.c
index 0688b93..a1cc468 100644
--- a/src/host_id.c
+++ b/src/host_id.c
@@ -170,22 +170,85 @@ static void clear_bit(int host_id, char *bitmap)
*byte &= ~bit;
}
+#endif
-static void set_bit(int host_id, char *bitmap)
+static void set_id_bit(int host_id, char *bitmap, char *c)
{
char *byte = bitmap + ((host_id - 1) / 8);
- unsigned int bit = host_id % 8;
+ unsigned int bit = (host_id - 1) % 8;
+ char mask;
+
+ mask = 1 << bit;
- *byte |= bit;
+ *byte |= mask;
+
+ *c = *byte;
}
-#endif
-static int test_bit(int host_id, char *bitmap)
+/* FIXME: another copy in direct_lib.c */
+
+int test_id_bit(int host_id, char *bitmap)
{
char *byte = bitmap + ((host_id - 1) / 8);
- unsigned int bit = host_id % 8;
+ unsigned int bit = (host_id - 1) % 8;
+ char mask;
+
+ mask = 1 << bit;
- return *byte & bit;
+ return (*byte & mask);
+}
+
+int host_info_set_bit(char *space_name, uint64_t host_id)
+{
+ struct space *sp;
+ int found = 0;
+
+ if (!host_id || host_id > DEFAULT_MAX_HOSTS)
+ return -EINVAL;
+
+ pthread_mutex_lock(&spaces_mutex);
+ list_for_each_entry(sp, &spaces, list) {
+ if (strncmp(sp->space_name, space_name, NAME_ID_SIZE))
+ continue;
+ found = 1;
+ break;
+ }
+ pthread_mutex_unlock(&spaces_mutex);
+
+ if (!found)
+ return -ENOSPC;
+
+ pthread_mutex_lock(&sp->mutex);
+ sp->host_info[host_id-1].set_bit_time = monotime();
+ pthread_mutex_unlock(&sp->mutex);
+ return 0;
+}
+
+static void create_bitmap(struct task *task, struct space *sp, char *bitmap)
+{
+ uint64_t now;
+ int i;
+ char c;
+
+ now = monotime();
+
+ pthread_mutex_lock(&sp->mutex);
+ for (i = 0; i < DEFAULT_MAX_HOSTS; i++) {
+ if (i+1 == sp->host_id)
+ continue;
+
+ if (!sp->host_info[i].set_bit_time)
+ continue;
+
+ if (now - sp->host_info[i].set_bit_time > task->request_finish_seconds) {
+ log_space(sp, "bitmap clear host_id %d", i+1);
+ sp->host_info[i].set_bit_time = 0;
+ } else {
+ set_id_bit(i+1, bitmap, &c);
+ log_space(sp, "bitmap set host_id %d byte %x", i+1, c);
+ }
+ }
+ pthread_mutex_unlock(&sp->mutex);
}
/*
@@ -206,7 +269,7 @@ void check_other_leases(struct task *task, struct space *sp, char *buf)
struct host_info *info;
char *bitmap;
uint64_t now;
- int i, new;
+ int i, new = 0;
disk = &sp->host_id_disk;
@@ -235,7 +298,7 @@ void check_other_leases(struct task *task, struct space *sp, char *buf)
bitmap = (char *)leader + HOSTID_BITMAP_OFFSET;
- if (!test_bit(sp->host_id, bitmap))
+ if (!test_id_bit(sp->host_id, bitmap))
continue;
/* this host has made a request for us, we won't take a new
@@ -246,7 +309,7 @@ void check_other_leases(struct task *task, struct space *sp, char *buf)
log_space(sp, "request from host_id %d", i+1);
info->last_req = now;
- new = 1;
+ new++;
}
/* TODO: add a thread that will periodically scan spaces and
@@ -331,6 +394,7 @@ static int corrupt_result(int result)
static void *lockspace_thread(void *arg_in)
{
+ char bitmap[HOSTID_BITMAP_SIZE];
struct task task;
struct space *sp;
struct leader_record leader;
@@ -441,11 +505,15 @@ static void *lockspace_thread(void *arg_in)
* and the length of time between successful renewals
*/
+ memset(bitmap, 0, sizeof(bitmap));
+ create_bitmap(&task, sp, bitmap);
+
delta_begin = monotime();
delta_result = delta_lease_renew(&task, sp, &sp->host_id_disk,
- sp->space_name, delta_result,
- &read_result, &leader, &leader);
+ sp->space_name, bitmap,
+ delta_result, &read_result,
+ &leader, &leader);
delta_length = monotime() - delta_begin;
if (delta_result == SANLK_OK) {
diff --git a/src/host_id.h b/src/host_id.h
index 69cb43f..3a289c2 100644
--- a/src/host_id.h
+++ b/src/host_id.h
@@ -13,6 +13,9 @@ int print_space_state(struct space *sp, char *str);
int _get_space_info(char *space_name, struct space *sp_out);
int get_space_info(char *space_name, struct space *sp_out);
int host_id_disk_info(char *name, struct sync_disk *disk);
+int host_info_set_bit(char *space_name, uint64_t host_id);
+int host_info_clear_bit(char *space_name, uint64_t host_id);
+int test_id_bit(int host_id, char *bitmap);
int check_our_lease(struct task *task, struct space *sp, int *check_all, char *check_buf);
void check_other_leases(struct task *task, struct space *sp, char *buf);
int add_lockspace(struct sanlk_lockspace *ls);
diff --git a/src/leader.h b/src/leader.h
index e808283..48189bc 100644
--- a/src/leader.h
+++ b/src/leader.h
@@ -68,6 +68,7 @@ struct leader_record {
#define LEADER_RECORD_MAX 256
#define HOSTID_BITMAP_OFFSET 256
+#define HOSTID_BITMAP_SIZE 256
#define REQ_DISK_MAGIC 0x08292011
#define REQ_DISK_VERSION_MAJOR 0x00010000
diff --git a/src/main.c b/src/main.c
index a4ba642..8b354b2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1425,15 +1425,11 @@ static void cmd_request(struct task *task, struct cmd_args *ca)
result = 0;
- /* zero lver and force mode clears the req, don't set bitmap */
-
if (!token->acquire_lver && !force_mode)
goto reply;
-#if 0
- if (owner_id)
- host_bitmap_set(token, owner_id);
-#endif
+ if (owner_id)
+ host_info_set_bit(token->r.lockspace_name, owner_id);
reply:
free(token);
log_debug("cmd_request %d,%d done %d", ca->ci_in, fd, result);
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index cce1947..4e69988 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -107,6 +107,7 @@ struct host_info {
uint64_t owner_id;
uint64_t owner_generation;
uint64_t timestamp; /* remote monotime */
+ uint64_t set_bit_time;
};
struct space {
commit aeedc5d7668710b019d703ccc85b8b82d53a168b
Author: David Teigland <teigland(a)redhat.com>
Date: Mon Aug 29 14:17:23 2011 -0500
sanlock: request struct magic and version numbers
standard fields for on disk struct
diff --git a/src/leader.h b/src/leader.h
index c62dced..e808283 100644
--- a/src/leader.h
+++ b/src/leader.h
@@ -69,7 +69,13 @@ struct leader_record {
#define LEADER_RECORD_MAX 256
#define HOSTID_BITMAP_OFFSET 256
+#define REQ_DISK_MAGIC 0x08292011
+#define REQ_DISK_VERSION_MAJOR 0x00010000
+#define REQ_DISK_VERSION_MINOR 0x00000001
+
struct request_record {
+ uint32_t magic;
+ uint32_t version;
uint64_t lver;
uint32_t force_mode;
};
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index 0834d95..b5a7282 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -1461,7 +1461,9 @@ int paxos_lease_init(struct task *task,
{
char *iobuf, **p_iobuf;
struct leader_record *leader;
+ struct request_record *rr;
int iobuf_len;
+ int sector_size;
int align_size;
int aio_timeout = 0;
int rv, d;
@@ -1471,11 +1473,13 @@ int paxos_lease_init(struct task *task,
if (!max_hosts)
max_hosts = DEFAULT_MAX_HOSTS;
+ sector_size = token->disks[0].sector_size;
+
align_size = direct_align(&token->disks[0]);
if (align_size < 0)
return align_size;
- if (token->disks[0].sector_size * (2 + max_hosts) > align_size)
+ if (sector_size * (2 + max_hosts) > align_size)
return -E2BIG;
iobuf_len = align_size;
@@ -1491,7 +1495,7 @@ int paxos_lease_init(struct task *task,
leader = (struct leader_record *)iobuf;
leader->magic = PAXOS_DISK_MAGIC;
leader->version = PAXOS_DISK_VERSION_MAJOR | PAXOS_DISK_VERSION_MINOR;
- leader->sector_size = token->disks[0].sector_size;
+ leader->sector_size = sector_size;
leader->num_hosts = num_hosts;
leader->max_hosts = max_hosts;
leader->timestamp = LEASE_FREE;
@@ -1499,6 +1503,10 @@ int paxos_lease_init(struct task *task,
strncpy(leader->resource_name, token->r.name, NAME_ID_SIZE);
leader->checksum = leader_checksum(leader);
+ rr = (struct request_record *)(iobuf + sector_size);
+ rr->magic = REQ_DISK_MAGIC;
+ rr->version = REQ_DISK_VERSION_MAJOR | REQ_DISK_VERSION_MINOR;
+
for (d = 0; d < token->r.num_disks; d++) {
rv = write_iobuf(token->disks[d].fd, token->disks[d].offset,
iobuf, iobuf_len, task);
diff --git a/src/sanlock_rv.h b/src/sanlock_rv.h
index 8edd492..3fd53c8 100644
--- a/src/sanlock_rv.h
+++ b/src/sanlock_rv.h
@@ -48,7 +48,6 @@
#define SANLK_RELEASE_LVER -250
#define SANLK_RELEASE_OWNER -251
-#define SANLK_REQUEST_LVER -252
/* delta_lease_renew, delta_lease_acquire */
@@ -56,4 +55,11 @@
#define SANLK_RENEW_DIFF -261
#define SANLK_HOSTID_BUSY -262
+/* request_token */
+
+#define SANLK_REQUEST_MAGIC -270
+#define SANLK_REQUEST_VERSION -271
+#define SANLK_REQUEST_OLD -272
+#define SANLK_REQUEST_LVER -273
+
#endif
diff --git a/src/token_manager.c b/src/token_manager.c
index b75b974..b786e12 100644
--- a/src/token_manager.c
+++ b/src/token_manager.c
@@ -201,7 +201,7 @@ int request_token(struct task *task, struct token *token, uint32_t force_mode,
}
if (!token->acquire_lver && !force_mode)
- goto do_req;
+ goto req_read;
rv = paxos_lease_leader_read(task, token, &leader, "request");
if (rv < 0)
@@ -216,20 +216,38 @@ int request_token(struct task *task, struct token *token, uint32_t force_mode,
*owner_id = leader.owner_id;
if (leader.lver >= token->acquire_lver) {
- rv = SANLK_ACQUIRE_LVER;
+ rv = SANLK_REQUEST_OLD;
goto out;
}
+ req_read:
rv = paxos_lease_request_read(task, token, &req);
if (rv < 0)
goto out;
+ if (req.magic != REQ_DISK_MAGIC) {
+ rv = SANLK_REQUEST_MAGIC;
+ goto out;
+ }
+
+ if ((req.version & 0xFFFF0000) != REQ_DISK_VERSION_MAJOR) {
+ rv = SANLK_REQUEST_VERSION;
+ goto out;
+ }
+
+ if (!token->acquire_lver && !force_mode)
+ goto req_write;
+
+ /* > instead of >= so multiple hosts can request the same
+ version at once and all succeed */
+
if (req.lver > token->acquire_lver) {
rv = SANLK_REQUEST_LVER;
goto out;
}
- do_req:
+req_write:
+ req.version = REQ_DISK_VERSION_MAJOR | REQ_DISK_VERSION_MINOR;
req.lver = token->acquire_lver;
req.force_mode = force_mode;
12 years, 3 months
3 commits - src/client_admin.c src/client_resource.c src/crc32c.c src/crc32c.h src/main.c src/monotime.c src/paxos_lease.c src/sanlock_internal.h src/sanlock_resource.h src/sanlock_sock.c src/sanlock_sock.h src/task.c src/token_manager.c src/token_manager.h wdmd/wdmd_sock.c wdmd/wdmd_sock.h
by David Teigland
src/client_admin.c | 4 -
src/client_resource.c | 44 +++++++++++++++
src/crc32c.c | 17 +-----
src/crc32c.h | 34 ------------
src/main.c | 136 +++++++++++++++++++++++++++++++++++++++++++++----
src/monotime.c | 8 ++
src/paxos_lease.c | 9 +--
src/sanlock_internal.h | 13 ----
src/sanlock_resource.h | 2
src/sanlock_sock.c | 8 ++
src/sanlock_sock.h | 12 ++++
src/task.c | 8 ++
src/token_manager.c | 45 ++++++++++++++++
src/token_manager.h | 3 +
wdmd/wdmd_sock.c | 8 ++
wdmd/wdmd_sock.h | 8 ++
16 files changed, 284 insertions(+), 75 deletions(-)
New commits:
commit aad12eec511f1ac442d7a6996db942959463a7e0
Author: David Teigland <teigland(a)redhat.com>
Date: Thu Aug 25 18:18:24 2011 -0500
sanlock/wdmd: add license header to files
diff --git a/src/monotime.c b/src/monotime.c
index 79e7a58..f76a716 100644
--- a/src/monotime.c
+++ b/src/monotime.c
@@ -1,3 +1,11 @@
+/*
+ * Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
#include <unistd.h>
#include <stdint.h>
#include <time.h>
diff --git a/src/sanlock_sock.c b/src/sanlock_sock.c
index dea7c34..5393017 100644
--- a/src/sanlock_sock.c
+++ b/src/sanlock_sock.c
@@ -1,3 +1,11 @@
+/*
+ * Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
#include <inttypes.h>
#include <unistd.h>
#include <stdio.h>
diff --git a/src/task.c b/src/task.c
index d54dabe..9635f92 100644
--- a/src/task.c
+++ b/src/task.c
@@ -1,3 +1,11 @@
+/*
+ * Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
#include <inttypes.h>
#include <unistd.h>
#include <stdio.h>
diff --git a/wdmd/wdmd_sock.c b/wdmd/wdmd_sock.c
index ef2923e..203c28e 100644
--- a/wdmd/wdmd_sock.c
+++ b/wdmd/wdmd_sock.c
@@ -1,3 +1,11 @@
+/*
+ * Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
#include <inttypes.h>
#include <unistd.h>
#include <stdio.h>
diff --git a/wdmd/wdmd_sock.h b/wdmd/wdmd_sock.h
index cf2f4b1..02c4c04 100644
--- a/wdmd/wdmd_sock.h
+++ b/wdmd/wdmd_sock.h
@@ -1,3 +1,11 @@
+/*
+ * Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
#ifndef __WDMD_SOCK_H__
#define __WDMD_SOCK_H__
commit 1786fc2d9fe2ca84a508cd8b4546ef9e55b38a6f
Author: David Teigland <teigland(a)redhat.com>
Date: Thu Aug 25 18:09:06 2011 -0500
sanlock: crc code tidying
- just use original license that was copied with the code
- remove the header
- change unsigned char const arg to uint8_t
diff --git a/src/crc32c.c b/src/crc32c.c
index 3edab07..6eaf703 100644
--- a/src/crc32c.c
+++ b/src/crc32c.c
@@ -1,11 +1,3 @@
-/*
- * Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License v.2.
- */
-
/*
* Copied from the btrfs-progs source code, which...
* Copied from the kernel source code, lib/libcrc32c.c.
@@ -14,11 +6,9 @@
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
- *
*/
#include <unistd.h>
#include <stdint.h>
-#include "crc32c.h"
/*
* This is the CRC-32C table
@@ -101,11 +91,12 @@ static const uint32_t crc32c_table[256] = {
* crc using table.
*/
-uint32_t crc32c_le(uint32_t crc, unsigned char const *data, size_t length)
+uint32_t crc32c(uint32_t crc, uint8_t *data, size_t length);
+
+uint32_t crc32c(uint32_t crc, uint8_t *data, size_t length)
{
while (length--)
- crc =
- crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8);
+ crc = crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8);
return crc;
}
diff --git a/src/crc32c.h b/src/crc32c.h
deleted file mode 100644
index ae2a340..0000000
--- a/src/crc32c.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License v.2.
- */
-
-/*
- * Copyright (C) 2007 Red Hat. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __CRC32C__
-#define __CRC32C__
-
-uint32_t crc32c_le(uint32_t seed, unsigned char const *data, size_t length);
-
-#define crc32c(seed, data, length) crc32c_le(seed, (unsigned char const *)data, length)
-#endif
-
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index 1d9d7a3..570b5d7 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -25,11 +25,11 @@
#include "diskio.h"
#include "direct.h"
#include "log.h"
-#include "crc32c.h"
#include "host_id.h"
#include "delta_lease.h"
#include "paxos_lease.h"
+uint32_t crc32c(uint32_t crc, uint8_t *data, size_t length);
int get_rand(int a, int b);
struct request_record {
@@ -208,7 +208,7 @@ static int read_request(struct task *task,
static uint32_t dblock_checksum(struct paxos_dblock *pd)
{
- return crc32c((uint32_t)~1, (char *)pd, DBLOCK_CHECKSUM_LEN);
+ return crc32c((uint32_t)~1, (uint8_t *)pd, DBLOCK_CHECKSUM_LEN);
}
static int verify_dblock(struct token *token, struct paxos_dblock *pd)
@@ -566,7 +566,7 @@ static int run_ballot(struct task *task, struct token *token, int num_hosts,
uint32_t leader_checksum(struct leader_record *lr)
{
- return crc32c((uint32_t)~1, (char *)lr, LEADER_CHECKSUM_LEN);
+ return crc32c((uint32_t)~1, (uint8_t *)lr, LEADER_CHECKSUM_LEN);
}
static void log_leader_error(int result,
commit 38496d10f78664727bf1f4419db2ed0980c6ee96
Author: David Teigland <teigland(a)redhat.com>
Date: Thu Aug 25 17:09:15 2011 -0500
sanlock: add request api/cmd
the actual implementation is not done
diff --git a/src/client_admin.c b/src/client_admin.c
index 94645ec..cb743b3 100644
--- a/src/client_admin.c
+++ b/src/client_admin.c
@@ -212,11 +212,11 @@ static int cmd_lockspace(int cmd, struct sanlk_lockspace *ls, uint32_t flags)
rv = send_header(fd, cmd, flags, sizeof(struct sanlk_lockspace), 0, 0);
if (rv < 0)
- return rv;
+ goto out;
rv = send(fd, (void *)ls, sizeof(struct sanlk_lockspace), 0);
if (rv < 0) {
- rv = -1;
+ rv = -errno;
goto out;
}
diff --git a/src/client_resource.c b/src/client_resource.c
index 18f185f..0a02115 100644
--- a/src/client_resource.c
+++ b/src/client_resource.c
@@ -325,6 +325,50 @@ int sanlock_release(int sock, int pid, uint32_t flags, int res_count,
return rv;
}
+int sanlock_request(uint32_t flags, uint32_t force_mode,
+ struct sanlk_resource *res)
+{
+ struct sm_header h;
+ int fd, rv, datalen;
+
+ datalen = sizeof(struct sanlk_resource) +
+ sizeof(struct sanlk_disk) * res->num_disks;
+
+ rv = connect_socket(&fd);
+ if (rv < 0)
+ return rv;
+
+ rv = send_header(fd, SM_CMD_REQUEST, flags, datalen, force_mode, 0);
+ if (rv < 0)
+ goto out;
+
+ rv = send(fd, res, sizeof(struct sanlk_resource), 0);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
+ }
+
+ rv = send(fd, res->disks, sizeof(struct sanlk_disk) * res->num_disks, 0);
+ if (rv < 0) {
+ rv = -errno;
+ goto out;
+ }
+
+ /* get result */
+
+ memset(&h, 0, sizeof(h));
+
+ rv = recv(fd, &h, sizeof(h), MSG_WAITALL);
+ if (rv != sizeof(h)) {
+ rv = -1;
+ goto out;
+ }
+ rv = (int)h.data;
+ out:
+ close(fd);
+ return rv;
+}
+
/*
* convert from struct sanlk_resource to string with format:
* <lockspace_name>:<resource_name>:<path>:<offset>[:<path>:<offset>...]:<lver>
diff --git a/src/main.c b/src/main.c
index 3f0f448..6e0721a 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1343,6 +1343,105 @@ static void cmd_inquire(struct task *task, struct cmd_args *ca)
client_resume(ca->ci_in);
}
+static void cmd_request(struct task *task, struct cmd_args *ca)
+{
+ struct sm_header h;
+ struct token *token;
+ struct sanlk_resource res;
+ uint64_t owner_id;
+ uint32_t force_mode;
+ int token_len, disks_len;
+ int j, fd, rv, result;
+
+ fd = client[ca->ci_in].fd;
+
+ force_mode = ca->header.data;
+
+ /* receiving and setting up token copied from cmd_acquire */
+
+ rv = recv(fd, &res, sizeof(struct sanlk_resource), MSG_WAITALL);
+ if (rv != sizeof(struct sanlk_resource)) {
+ log_error("cmd_request %d,%d recv %d %d",
+ ca->ci_in, fd, rv, errno);
+ result = -ENOTCONN;
+ goto reply;
+ }
+
+ if (!res.num_disks || res.num_disks > MAX_DISKS) {
+ result = -ERANGE;
+ goto reply;
+ }
+
+ disks_len = res.num_disks * sizeof(struct sync_disk);
+ token_len = sizeof(struct token) + disks_len;
+
+ token = malloc(token_len);
+ if (!token) {
+ result = -ENOMEM;
+ goto reply;
+ }
+
+ memset(token, 0, token_len);
+ token->disks = (struct sync_disk *)&token->r.disks[0]; /* shorthand */
+ token->r.num_disks = res.num_disks;
+ memcpy(token->r.lockspace_name, res.lockspace_name, SANLK_NAME_LEN);
+ memcpy(token->r.name, res.name, SANLK_NAME_LEN);
+
+ token->acquire_lver = res.lver;
+ token->acquire_data64 = res.data64;
+ token->acquire_data32 = res.data32;
+ token->acquire_flags = res.flags;
+
+ /*
+ * receive sanlk_disk's / sync_disk's
+ *
+ * WARNING: as a shortcut, this requires that sync_disk and
+ * sanlk_disk match; this is the reason for the pad fields
+ * in sanlk_disk (TODO: let these differ?)
+ */
+
+ rv = recv(fd, token->disks, disks_len, MSG_WAITALL);
+ if (rv != disks_len) {
+ free(token);
+ result = -ENOTCONN;
+ goto reply;
+ }
+
+ /* zero out pad1 and pad2, see WARNING above */
+ for (j = 0; j < token->r.num_disks; j++) {
+ token->disks[j].sector_size = 0;
+ token->disks[j].fd = -1;
+ }
+
+ log_debug("cmd_request %d,%d force_mode %u %.48s:%.48s:%.256s:%llu",
+ ca->ci_in, fd, force_mode,
+ token->r.lockspace_name,
+ token->r.name,
+ token->disks[0].path,
+ (unsigned long long)token->r.disks[0].offset);
+
+ result = request_token(task, token, force_mode, &owner_id);
+
+ if (result < 0)
+ goto reply;
+
+#if 0
+ host_bitmap_set(token, owner_id);
+#endif
+
+ reply:
+ free(token);
+ log_debug("cmd_request %d,%d done %d", ca->ci_in, fd, result);
+
+ memcpy(&h, &ca->header, sizeof(struct sm_header));
+ h.length = sizeof(h);
+ h.data = result;
+ h.data2 = 0;
+ send(fd, &h, sizeof(h), MSG_NOSIGNAL);
+
+ client_resume(ca->ci_in);
+}
+
static void cmd_add_lockspace(struct cmd_args *ca)
{
struct sm_header h;
@@ -1422,6 +1521,9 @@ static void call_cmd(struct task *task, struct cmd_args *ca)
case SM_CMD_INQUIRE:
cmd_inquire(task, ca);
break;
+ case SM_CMD_REQUEST:
+ cmd_request(task, ca);
+ break;
case SM_CMD_ADD_LOCKSPACE:
strcpy(client[ca->ci_in].owner_name, "add_lockspace");
cmd_add_lockspace(ca);
@@ -1770,7 +1872,10 @@ static void cmd_restrict(int ci, int fd, struct sm_header *h_recv)
send(fd, &h, sizeof(h), MSG_NOSIGNAL);
}
-static void process_cmd_thread_lockspace(int ci_in, struct sm_header *h_recv)
+/* cmd comes from a transient client/fd set up just to pass the cmd,
+ and is not being done on behalf of another registered client/fd */
+
+static void process_cmd_thread_unregistered(int ci_in, struct sm_header *h_recv)
{
struct cmd_args *ca;
struct sm_header h;
@@ -1784,12 +1889,7 @@ static void process_cmd_thread_lockspace(int ci_in, struct sm_header *h_recv)
ca->ci_in = ci_in;
memcpy(&ca->header, h_recv, sizeof(struct sm_header));
- if (h_recv->cmd == SM_CMD_ADD_LOCKSPACE)
- strcpy(client[ci_in].owner_name, "add_lockspace");
- else if (h_recv->cmd == SM_CMD_REM_LOCKSPACE)
- strcpy(client[ci_in].owner_name, "rem_lockspace");
- else
- strcpy(client[ci_in].owner_name, "cmd_lockspace");
+ snprintf(client[ci_in].owner_name, SANLK_NAME_LEN, "cmd%d", h_recv->cmd);
rv = thread_pool_add_work(ca);
if (rv < 0)
@@ -1807,7 +1907,10 @@ static void process_cmd_thread_lockspace(int ci_in, struct sm_header *h_recv)
close(client[ci_in].fd);
}
-static void process_cmd_thread_resource(int ci_in, struct sm_header *h_recv)
+/* cmd either comes from a registered client/fd,
+ or is targeting a registered client/fd */
+
+static void process_cmd_thread_registered(int ci_in, struct sm_header *h_recv)
{
struct cmd_args *ca;
struct sm_header h;
@@ -2024,10 +2127,11 @@ static void process_connection(int ci)
break;
case SM_CMD_ADD_LOCKSPACE:
case SM_CMD_REM_LOCKSPACE:
+ case SM_CMD_REQUEST:
rv = client_suspend(ci);
if (rv < 0)
return;
- process_cmd_thread_lockspace(ci, &h);
+ process_cmd_thread_unregistered(ci, &h);
break;
case SM_CMD_ACQUIRE:
case SM_CMD_RELEASE:
@@ -2037,7 +2141,7 @@ static void process_connection(int ci)
rv = client_suspend(ci);
if (rv < 0)
return;
- process_cmd_thread_resource(ci, &h);
+ process_cmd_thread_registered(ci, &h);
break;
default:
log_error("ci %d cmd %d unknown", ci, h.cmd);
@@ -2491,6 +2595,7 @@ static void print_usage(void)
printf("sanlock client acquire -r RESOURCE -p <pid>\n");
printf("sanlock client release -r RESOURCE -p <pid>\n");
printf("sanlock client inquire -p <pid>\n");
+ printf("sanlock client request -r RESOURCE\n");
printf("\n");
printf("sanlock direct <action> [-a 0|1] [-o 0|1]\n");
printf("sanlock direct init -s LOCKSPACE\n");
@@ -2594,6 +2699,8 @@ static int read_command_line(int argc, char *argv[])
com.action = ACT_RELEASE;
else if (!strcmp(act, "inquire"))
com.action = ACT_INQUIRE;
+ else if (!strcmp(act, "request"))
+ com.action = ACT_REQUEST;
else {
log_tool("client action \"%s\" is unknown", act);
exit(EXIT_FAILURE);
@@ -2715,6 +2822,9 @@ static int read_command_line(int argc, char *argv[])
case 'g':
com.local_host_generation = atoll(optionarg);
break;
+ case 'f':
+ com.force_mode = strtoul(optionarg, NULL, 0);
+ break;
case 's':
parse_arg_lockspace(optionarg); /* com.lockspace */
break;
@@ -2902,6 +3012,12 @@ static int do_client(void)
log_tool("\"%s\"", res_state);
break;
+ case ACT_REQUEST:
+ log_tool("request");
+ rv = sanlock_request(0, com.force_mode, com.res_args[0]);
+ log_tool("request done %d", rv);
+ break;
+
default:
log_tool("action not implemented");
rv = -1;
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index 2a65c60..1d9d7a3 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -34,7 +34,7 @@ int get_rand(int a, int b);
struct request_record {
uint64_t lver;
- uint8_t force_mode;
+ uint32_t force_mode;
};
#define DBLOCK_CHECKSUM_LEN 48 /* ends before checksum field */
@@ -190,7 +190,6 @@ static int read_leader(struct task *task,
return rv;
}
-
#if 0
static int read_request(struct task *task,
struct sync_disk *disk,
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 0356807..7395805 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -138,17 +138,6 @@ struct space {
struct host_info host_info[DEFAULT_MAX_HOSTS];
};
-struct sm_header {
- uint32_t magic;
- uint32_t version;
- uint32_t cmd;
- uint32_t cmd_flags;
- uint32_t length;
- uint32_t seq;
- uint32_t data;
- uint32_t data2;
-};
-
/*
* Example of watchdog behavior when host_id renewals fail, assuming
* that sanlock cannot successfully kill the pids it is supervising that
@@ -502,6 +491,7 @@ struct command_line {
int num_hosts; /* -n */
int max_hosts; /* -m */
int res_count;
+ uint32_t force_mode;
char our_host_name[SANLK_NAME_LEN+1];
char *dump_path;
struct sanlk_lockspace lockspace; /* -s LOCKSPACE */
@@ -526,6 +516,7 @@ enum {
ACT_ACQUIRE,
ACT_RELEASE,
ACT_INQUIRE,
+ ACT_REQUEST,
ACT_ACQUIRE_ID,
ACT_RELEASE_ID,
ACT_RENEW_ID,
diff --git a/src/sanlock_resource.h b/src/sanlock_resource.h
index 11f9ace..dd1f878 100644
--- a/src/sanlock_resource.h
+++ b/src/sanlock_resource.h
@@ -37,6 +37,8 @@ int sanlock_release(int sock, int pid, uint32_t flags, int res_count,
int sanlock_inquire(int sock, int pid, uint32_t flags, int *res_count,
char **res_state);
+int sanlock_request(uint32_t flags, uint32_t force_mode,
+ struct sanlk_resource *res);
/*
* Functions to convert between string and struct resource formats.
diff --git a/src/sanlock_sock.h b/src/sanlock_sock.h
index 154665a..1afa4df 100644
--- a/src/sanlock_sock.h
+++ b/src/sanlock_sock.h
@@ -22,6 +22,18 @@ enum {
SM_CMD_RELEASE = 8,
SM_CMD_INQUIRE = 9,
SM_CMD_RESTRICT = 10,
+ SM_CMD_REQUEST = 11,
+};
+
+struct sm_header {
+ uint32_t magic;
+ uint32_t version;
+ uint32_t cmd; /* SM_CMD_ */
+ uint32_t cmd_flags;
+ uint32_t length;
+ uint32_t seq;
+ uint32_t data;
+ uint32_t data2;
};
#define SANLK_STATE_MAXSTR 4096
diff --git a/src/token_manager.c b/src/token_manager.c
index df5d632..977e74f 100644
--- a/src/token_manager.c
+++ b/src/token_manager.c
@@ -185,6 +185,51 @@ int release_token(struct task *task, struct token *token)
return rv; /* SANLK_OK */
}
+int request_token(struct task *task, struct token *token, uint32_t force_mode,
+ uint64_t *owner_id)
+{
+#if 0
+ struct leader_record leader;
+ struct request_record req;
+ int rv;
+
+ rv = open_disks(token->disks, token->r.num_disks);
+ if (!majority_disks(token, rv)) {
+ log_errot(token, "request open_disk error %s", token->disks[0].path);
+ return -ENODEV;
+ }
+
+ rv = paxos_lease_leader_read(task, token, &leader, "request");
+ if (rv < 0)
+ goto out;
+
+ if (leader.timestamp == LEASE_FREE) {
+ }
+
+ if (leader.lver >= token->acquire_lver) {
+ }
+
+ *owner_id = leader.owner_id;
+
+ rv = paxos_lease_request_read(task, token, &req);
+ if (rv < 0)
+ goto out;
+
+ if (req.lver >= token->acquire_lver) {
+ goto out;
+ }
+
+ req.lver = token->acquire_lver;
+ req.force_mode = force_mode;
+
+ rv = paxos_lease_request_write(task, token, &req);
+ out:
+ close_disks(token->disks, token->r.num_disks);
+ return rv;
+#endif
+ return -1;
+}
+
/* thread that releases tokens of pid's that die */
static void *async_release_thread(void *arg GNUC_UNUSED)
diff --git a/src/token_manager.h b/src/token_manager.h
index e860336..7825642 100644
--- a/src/token_manager.h
+++ b/src/token_manager.h
@@ -16,6 +16,9 @@ int release_token(struct task *task, struct token *token);
void release_token_async(struct token *token);
+int request_token(struct task *task, struct token *token, uint32_t force_mode,
+ uint64_t *owner_id);
+
int add_resource(struct token *token, int pid);
void del_resource(struct token *token);
12 years, 3 months
src/client_resource.c src/delta_lease.c src/diskio.c src/host_id.c src/paxos_lease.c
by David Teigland
src/client_resource.c | 9 +++++++--
src/delta_lease.c | 2 ++
src/diskio.c | 5 +++++
src/host_id.c | 5 +++++
src/paxos_lease.c | 12 ++++++------
5 files changed, 25 insertions(+), 8 deletions(-)
New commits:
commit 39b47eeee5edb2d69088d83e23fc83fc1c62c046
Author: David Teigland <teigland(a)redhat.com>
Date: Tue Aug 23 11:57:39 2011 -0500
sanlock: minor fixes and cleanups
handle null str arg in str_to_lockspace
handle error from direct_align
free mem from leaders_read_multiple (not using this function)
diff --git a/src/client_resource.c b/src/client_resource.c
index eeffae4..18f185f 100644
--- a/src/client_resource.c
+++ b/src/client_resource.c
@@ -536,6 +536,7 @@ int sanlock_args_to_state(int res_count,
free(str);
}
+ /* caller to free state */
*res_state = state;
return 0;
}
@@ -592,6 +593,7 @@ int sanlock_state_to_args(char *res_state,
memset(str, 0, sizeof(str));
}
+ /* caller to free res_count res and args */
*res_count = arg_count;
*res_args = args;
return 0;
@@ -617,6 +619,9 @@ int sanlock_str_to_lockspace(char *str, struct sanlk_lockspace *ls)
char *offset = NULL;
int i;
+ if (!str)
+ return -EINVAL;
+
for (i = 0; i < strlen(str); i++) {
if (str[i] == '\\') {
i++;
@@ -646,8 +651,8 @@ int sanlock_str_to_lockspace(char *str, struct sanlk_lockspace *ls)
offset++;
}
- if (str)
- strncpy(ls->name, str, NAME_ID_SIZE);
+ strncpy(ls->name, str, NAME_ID_SIZE);
+
if (host_id)
ls->host_id = atoll(host_id);
if (path)
diff --git a/src/delta_lease.c b/src/delta_lease.c
index 12386ce..f8dc729 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -538,6 +538,8 @@ int delta_lease_init(struct task *task,
max_hosts = DEFAULT_MAX_HOSTS;
align_size = direct_align(disk);
+ if (align_size < 0)
+ return align_size;
if (disk->sector_size * max_hosts > align_size)
return -E2BIG;
diff --git a/src/diskio.c b/src/diskio.c
index c336b3b..bd8f72e 100644
--- a/src/diskio.c
+++ b/src/diskio.c
@@ -153,6 +153,11 @@ int open_disk(struct sync_disk *disk)
}
align_size = direct_align(disk);
+ if (align_size < 0) {
+ rv = align_size;
+ close(fd);
+ goto fail;
+ }
if (disk->offset % align_size) {
rv = -EBADSLT;
diff --git a/src/host_id.c b/src/host_id.c
index a16194e..b7faef2 100644
--- a/src/host_id.c
+++ b/src/host_id.c
@@ -356,6 +356,11 @@ static void *lockspace_thread(void *arg_in)
opened = 1;
sp->align_size = direct_align(&sp->host_id_disk);
+ if (sp->align_size < 0) {
+ log_erros(sp, "direct_align error");
+ acquire_result = sp->align_size;
+ goto set_status;
+ }
sp->lease_status.renewal_read_buf = malloc(sp->align_size);
if (!sp->lease_status.renewal_read_buf) {
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index 0816a64..2a65c60 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -779,7 +779,7 @@ static int _leader_read_multiple(struct task *task,
if (!majority_disks(token, num_reads)) {
log_errot(token, "%s leader read error %d", caller, rv);
error = SANLK_LEADER_READ;
- goto fail;
+ goto out;
}
/* check that a majority of disks have the same leader */
@@ -801,13 +801,11 @@ static int _leader_read_multiple(struct task *task,
if (!found) {
log_errot(token, "%s leader inconsistent", caller);
error = SANLK_LEADER_DIFF;
- goto fail;
+ goto out;
}
- memcpy(leader_ret, &leader, sizeof(struct leader_record));
- return SANLK_OK;
-
- fail:
+ error = SANLK_OK;
+ out:
memcpy(leader_ret, &leader, sizeof(struct leader_record));
free(leaders);
free(leader_reps);
@@ -1481,6 +1479,8 @@ int paxos_lease_init(struct task *task,
max_hosts = DEFAULT_MAX_HOSTS;
align_size = direct_align(&token->disks[0]);
+ if (align_size < 0)
+ return align_size;
if (token->disks[0].sector_size * (2 + max_hosts) > align_size)
return -E2BIG;
12 years, 3 months
python/sanlock.c
by David Teigland
python/sanlock.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 56 insertions(+), 1 deletion(-)
New commits:
commit 11336fcfd7ab759849e24d27ccece8f5478af9b7
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Fri Aug 19 13:27:12 2011 +0000
python: expose errno in the exception
diff --git a/python/sanlock.c b/python/sanlock.c
index 161499c..9c1bfa0 100644
--- a/python/sanlock.c
+++ b/python/sanlock.c
@@ -435,6 +435,23 @@ exit_fail:
return NULL;
}
+/* exception_errno */
+PyDoc_STRVAR(pydoc_errno, "exception errno");
+
+static PyObject *
+py_exception_errno(PyObject *self, PyBaseExceptionObject *exc_obj)
+{
+ PyObject *exc_errno;
+
+ exc_errno = PyTuple_GetItem(exc_obj->args, 0);
+
+ if (exc_errno == NULL)
+ return NULL;
+
+ Py_INCREF(exc_errno);
+ return exc_errno;
+}
+
static PyMethodDef
sanlock_methods[] = {
{"register", py_register, METH_NOARGS, pydoc_register},
@@ -450,9 +467,43 @@ sanlock_methods[] = {
{NULL, NULL, 0, NULL}
};
+static PyMethodDef
+sanlock_exception = {
+ "errno", (PyCFunction) py_exception_errno, METH_O, pydoc_errno
+};
+
+static void
+initexception(void)
+{
+ int rv;
+ PyObject *dict, *func, *meth;
+
+ dict = PyDict_New();
+
+ if (dict == NULL)
+ return;
+
+ func = PyCFunction_New(&sanlock_exception, NULL);
+ meth = PyObject_CallFunction((PyObject *) &PyProperty_Type, "O", func);
+ Py_DECREF(func);
+
+ if (meth == NULL)
+ return;
+
+ rv = PyDict_SetItemString(dict, sanlock_exception.ml_name, meth);
+ Py_DECREF(meth);
+
+ if (rv < 0)
+ return;
+
+ py_exception = PyErr_NewException("sanlock.SanlockException", NULL, dict);
+ Py_DECREF(dict);
+}
+
PyMODINIT_FUNC
initsanlock(void)
{
+
py_module = Py_InitModule4("sanlock",
sanlock_methods, pydoc_sanlock, NULL, PYTHON_API_VERSION);
@@ -461,7 +512,11 @@ initsanlock(void)
return;
/* Initializing sanlock exception */
- py_exception = PyErr_NewException("sanlock.SanlockException", NULL, NULL);
+ initexception();
+
+ if (py_exception == NULL)
+ return;
+
Py_INCREF(py_exception);
PyModule_AddObject(py_module, "SanlockException", py_exception);
}
12 years, 3 months
2 commits - src/client_msg.c src/delta_lease.c src/delta_lease.h src/direct.c src/host_id.c src/host_id.h src/leader.h src/main.c src/sanlock_internal.h src/task.c
by David Teigland
src/client_msg.c | 6 -
src/delta_lease.c | 9 -
src/delta_lease.h | 1
src/direct.c | 3
src/host_id.c | 276 +++++++++++++++++++++++++++++++++++++------------
src/host_id.h | 3
src/leader.h | 8 +
src/main.c | 39 ++++--
src/sanlock_internal.h | 21 +++
src/task.c | 1
10 files changed, 280 insertions(+), 87 deletions(-)
New commits:
commit 258db5c545d4f9ac8d8a041a298867e6a950b4f1
Author: Federico Simoncelli <fsimonce(a)redhat.com>
Date: Wed Aug 17 15:35:37 2011 -0500
client: return appropriate errno on failure
diff --git a/src/client_msg.c b/src/client_msg.c
index 21b18db..96ad970 100644
--- a/src/client_msg.c
+++ b/src/client_msg.c
@@ -38,7 +38,7 @@ int connect_socket(int *sock_fd)
s = socket(AF_LOCAL, SOCK_STREAM, 0);
if (s < 0)
- return -1;
+ return -errno;
rv = sanlock_socket_address(&addr);
if (rv < 0)
@@ -46,7 +46,7 @@ int connect_socket(int *sock_fd)
rv = connect(s, (struct sockaddr *) &addr, sizeof(struct sockaddr_un));
if (rv < 0) {
- rv = -1;
+ rv = -errno;
close(s);
return rv;
}
@@ -70,7 +70,7 @@ int send_header(int sock, int cmd, uint32_t cmd_flags, int datalen,
rv = send(sock, (void *) &header, sizeof(struct sm_header), 0);
if (rv < 0)
- return -1;
+ return -errno;
return 0;
}
commit 9fc87ead232eb7ac07dfb98be91d18ce772f4ccb
Author: David Teigland <teigland(a)redhat.com>
Date: Wed Aug 17 15:22:17 2011 -0500
sanlock: check other host_id leases
keep track of the owner and timestamp changes of other host_id
leases, and check if our host_id bit has been set in their
bitmap (there's no code yet that would set it)
diff --git a/src/delta_lease.c b/src/delta_lease.c
index b210d7f..12386ce 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -325,6 +325,7 @@ int delta_lease_renew(struct task *task,
struct sync_disk *disk,
char *space_name,
int prev_result,
+ int *read_result,
struct leader_record *leader_last,
struct leader_record *leader_ret)
{
@@ -338,12 +339,11 @@ int delta_lease_renew(struct task *task,
if (!leader_last)
return -EINVAL;
+ *read_result = SANLK_ERROR;
+
host_id = leader_last->owner_id;
- /* read all delta leases */
- iobuf_len = direct_align(disk);
- if (iobuf_len <= 0)
- return -EINVAL;
+ iobuf_len = sp->align_size;
/* offset of our leader_record */
offset = (host_id - 1) * disk->sector_size;
@@ -428,6 +428,7 @@ int delta_lease_renew(struct task *task,
}
read_done:
+ *read_result = SANLK_OK;
memcpy(&leader, task->iobuf+offset, sizeof(struct leader_record));
rv = verify_leader(disk, space_name, host_id, &leader, "delta_renew");
diff --git a/src/delta_lease.h b/src/delta_lease.h
index 9206c76..4a0cad2 100644
--- a/src/delta_lease.h
+++ b/src/delta_lease.h
@@ -29,6 +29,7 @@ int delta_lease_renew(struct task *task,
struct sync_disk *disk,
char *space_name,
int prev_result,
+ int *read_result,
struct leader_record *leader_last,
struct leader_record *leader_ret);
diff --git a/src/direct.c b/src/direct.c
index 8b09fe8..f8f15db 100644
--- a/src/direct.c
+++ b/src/direct.c
@@ -191,7 +191,7 @@ static int do_delta_action(int action,
struct leader_record leader;
struct sync_disk sd;
struct space space;
- int rv;
+ int read_result, rv;
/* for log_space in delta functions */
memset(&space, 0, sizeof(space));
@@ -230,6 +230,7 @@ static int do_delta_action(int action,
rv = delta_lease_renew(task, &space, &sd,
ls->name,
-1,
+ &read_result,
&leader,
&leader);
break;
diff --git a/src/host_id.c b/src/host_id.c
index 67e532a..a16194e 100644
--- a/src/host_id.c
+++ b/src/host_id.c
@@ -31,6 +31,7 @@
#include "host_id.h"
#include "watchdog.h"
#include "task.h"
+#include "direct.h"
static unsigned int space_id_counter = 1;
@@ -161,11 +162,107 @@ int host_id_disk_info(char *name, struct sync_disk *disk)
return rv;
}
+static void clear_bit(int host_id, char *bitmap)
+{
+ char *byte = bitmap + ((host_id - 1) / 8);
+ unsigned int bit = host_id % 8;
+
+ *byte &= ~bit;
+}
+
+static void set_bit(int host_id, char *bitmap)
+{
+ char *byte = bitmap + ((host_id - 1) / 8);
+ unsigned int bit = host_id % 8;
+
+ *byte |= bit;
+}
+
+static int test_bit(int host_id, char *bitmap)
+{
+ char *byte = bitmap + ((host_id - 1) / 8);
+ unsigned int bit = host_id % 8;
+
+ return *byte & bit;
+}
+
+/*
+ * when entering the monitor loop in paxos_lease, once
+ * last_check - last_live > host_dead_seconds, it's expired
+ *
+ * at local time t=last_live, we read timstamp=X
+ * at local time t=last_check, we read timestamp=X
+ * so once the difference between last_live and last_check
+ * is > host_dead_seconds, the host has not renewed it's
+ * timestamp in host_dead_seconds.
+ */
+
+void check_other_leases(struct task *task, struct space *sp, char *buf)
+{
+ struct leader_record *leader;
+ struct sync_disk *disk;
+ struct host_info *info;
+ char *bitmap;
+ uint64_t now;
+ int i, new;
+
+ disk = &sp->host_id_disk;
+
+ now = monotime();
+ new = 0;
+
+ for (i = 0; i < DEFAULT_MAX_HOSTS; i++) {
+ if (i+1 == sp->host_id)
+ continue;
+
+ info = &sp->host_info[i];
+ info->last_check = now;
+
+ leader = (struct leader_record *)(buf + (i * disk->sector_size));
+
+ if (info->owner_id == leader->owner_id &&
+ info->owner_generation == leader->owner_generation &&
+ info->timestamp == leader->timestamp) {
+ continue;
+ }
+
+ info->owner_id = leader->owner_id;
+ info->owner_generation = leader->owner_generation;
+ info->timestamp = leader->timestamp;
+ info->last_live = now;
+
+ bitmap = (char *)leader + HOSTID_BITMAP_OFFSET;
+
+ if (!test_bit(sp->host_id, bitmap))
+ continue;
+
+ /* this host has made a request for us, we won't take a new
+ request from this host for another request_finish_seconds */
+
+ if (now - info->last_req < task->request_finish_seconds)
+ continue;
+
+ log_space(sp, "request from host_id %d", i+1);
+ info->last_req = now;
+ new = 1;
+ }
+
+ /* TODO: add a thread that will periodically scan spaces and
+ for any with req_count > req_check, scan request blocks for
+ all locally held paxos leases in that lockspace. */
+
+ if (new) {
+ pthread_mutex_lock(&sp->mutex);
+ sp->req_count++;
+ pthread_mutex_unlock(&sp->mutex);
+ }
+}
+
/*
* check if our_host_id_thread has renewed within timeout
*/
-int host_id_check(struct task *task, struct space *sp)
+int check_our_lease(struct task *task, struct space *sp, int *check_all, char *check_buf)
{
uint64_t last_success;
int corrupt_result;
@@ -174,31 +271,39 @@ int host_id_check(struct task *task, struct space *sp)
pthread_mutex_lock(&sp->mutex);
last_success = sp->lease_status.renewal_last_success;
corrupt_result = sp->lease_status.corrupt_result;
+
+ if (sp->lease_status.renewal_read_count > sp->lease_status.renewal_read_check) {
+ /* main loop will pass this buf to check_other_leases next */
+ sp->lease_status.renewal_read_check = sp->lease_status.renewal_read_count;
+ *check_all = 1;
+ if (check_buf)
+ memcpy(check_buf, sp->lease_status.renewal_read_buf, sp->align_size);
+ }
pthread_mutex_unlock(&sp->mutex);
if (corrupt_result) {
- log_erros(sp, "host_id_check corrupt %d", corrupt_result);
- return 0;
+ log_erros(sp, "check_our_lease corrupt %d", corrupt_result);
+ return -1;
}
gap = monotime() - last_success;
if (gap >= task->id_renewal_fail_seconds) {
- log_erros(sp, "host_id_check failed %d", gap);
- return 0;
+ log_erros(sp, "check_our_lease failed %d", gap);
+ return -1;
}
if (gap >= task->id_renewal_warn_seconds) {
- log_erros(sp, "host_id_check warning %d last_success %llu",
+ log_erros(sp, "check_our_lease warning %d last_success %llu",
gap, (unsigned long long)last_success);
}
if (com.debug_renew > 1) {
- log_space(sp, "host_id_check good %d %llu",
+ log_space(sp, "check_our_lease good %d %llu",
gap, (unsigned long long)last_success);
}
- return 1;
+ return 0;
}
/* If a renewal result is one of the listed errors, it means our
@@ -227,9 +332,9 @@ static void *lockspace_thread(void *arg_in)
struct task task;
struct space *sp;
struct leader_record leader;
- time_t last_attempt, last_success;
- int rv, result, delta_length, gap;
- int delta_result = 0;
+ uint64_t delta_begin, last_success;
+ int rv, delta_length, renewal_interval;
+ int acquire_result, delta_result, read_result;
int opened = 0;
int stop = 0;
@@ -240,49 +345,64 @@ static void *lockspace_thread(void *arg_in)
setup_task_aio(&task, main_task.use_aio, HOSTID_AIO_CB_SIZE);
memcpy(task.name, sp->space_name, NAME_ID_SIZE);
- last_attempt = monotime();
+ delta_begin = monotime();
rv = open_disk(&sp->host_id_disk);
if (rv < 0) {
log_erros(sp, "open_disk %s error %d", sp->host_id_disk.path, rv);
- result = -ENODEV;
+ acquire_result = -ENODEV;
goto set_status;
}
opened = 1;
- result = delta_lease_acquire(&task, sp, &sp->host_id_disk,
- sp->space_name, our_host_name_global,
- sp->host_id, &leader);
- delta_result = result;
- delta_length = monotime() - last_attempt;
+ sp->align_size = direct_align(&sp->host_id_disk);
+
+ sp->lease_status.renewal_read_buf = malloc(sp->align_size);
+ if (!sp->lease_status.renewal_read_buf) {
+ acquire_result = -ENOMEM;
+ goto set_status;
+ }
+
+ /*
+ * acquire the delta lease
+ */
+
+ delta_begin = monotime();
+
+ delta_result = delta_lease_acquire(&task, sp, &sp->host_id_disk,
+ sp->space_name, our_host_name_global,
+ sp->host_id, &leader);
+ delta_length = monotime() - delta_begin;
- if (result == SANLK_OK)
+ if (delta_result == SANLK_OK)
last_success = leader.timestamp;
+ acquire_result = delta_result;
+
/* we need to start the watchdog after we acquire the host_id but
before we allow any pid's to begin running */
- if (result == SANLK_OK) {
+ if (delta_result == SANLK_OK) {
rv = create_watchdog_file(sp, last_success);
if (rv < 0) {
log_erros(sp, "create_watchdog failed %d", rv);
- result = SANLK_ERROR;
+ acquire_result = SANLK_ERROR;
}
}
set_status:
pthread_mutex_lock(&sp->mutex);
- sp->lease_status.acquire_last_result = result;
- sp->lease_status.acquire_last_attempt = last_attempt;
- if (result == SANLK_OK)
+ sp->lease_status.acquire_last_result = acquire_result;
+ sp->lease_status.acquire_last_attempt = delta_begin;
+ if (delta_result == SANLK_OK)
sp->lease_status.acquire_last_success = last_success;
- sp->lease_status.renewal_last_result = result;
- sp->lease_status.renewal_last_attempt = last_attempt;
- if (result == SANLK_OK)
+ sp->lease_status.renewal_last_result = acquire_result;
+ sp->lease_status.renewal_last_attempt = delta_begin;
+ if (delta_result == SANLK_OK)
sp->lease_status.renewal_last_success = last_success;
pthread_mutex_unlock(&sp->mutex);
- if (result < 0)
+ if (acquire_result < 0)
goto out;
sp->host_generation = leader.owner_generation;
@@ -291,10 +411,14 @@ static void *lockspace_thread(void *arg_in)
pthread_mutex_lock(&sp->mutex);
stop = sp->thread_stop;
pthread_mutex_unlock(&sp->mutex);
-
if (stop)
break;
+
+ /*
+ * wait between each renewal
+ */
+
if (monotime() - last_success < task.id_renewal_seconds) {
sleep(1);
continue;
@@ -304,54 +428,69 @@ static void *lockspace_thread(void *arg_in)
usleep(500000);
}
- last_attempt = monotime();
- result = delta_lease_renew(&task, sp, &sp->host_id_disk,
- sp->space_name, delta_result,
- &leader, &leader);
- delta_result = result;
- delta_length = monotime() - last_attempt;
+ /*
+ * do a renewal, measuring length of time spent in renewal,
+ * and the length of time between successful renewals
+ */
+
+ delta_begin = monotime();
- if (result == SANLK_OK)
+ delta_result = delta_lease_renew(&task, sp, &sp->host_id_disk,
+ sp->space_name, delta_result,
+ &read_result, &leader, &leader);
+ delta_length = monotime() - delta_begin;
+
+ if (delta_result == SANLK_OK) {
+ renewal_interval = leader.timestamp - last_success;
last_success = leader.timestamp;
+ }
+
+
+ /*
+ * publish the results
+ */
pthread_mutex_lock(&sp->mutex);
- sp->lease_status.renewal_last_result = result;
- sp->lease_status.renewal_last_attempt = last_attempt;
+ sp->lease_status.renewal_last_result = delta_result;
+ sp->lease_status.renewal_last_attempt = delta_begin;
- if (result == SANLK_OK) {
- gap = last_success - sp->lease_status.renewal_last_success;
+ if (delta_result == SANLK_OK)
sp->lease_status.renewal_last_success = last_success;
- if (delta_length > task.id_renewal_seconds) {
- log_erros(sp, "renewed %llu delta_length %d too long",
- (unsigned long long)last_success,
- delta_length);
- } else if (com.debug_renew) {
- log_space(sp, "renewed %llu delta_length %d interval %d",
- (unsigned long long)last_success,
- delta_length, gap);
- }
-
- if (!sp->thread_stop)
- update_watchdog_file(sp, last_success);
- } else {
- log_erros(sp, "renewal error %d delta_length %d last_success %llu",
- result, delta_length,
- (unsigned long long)sp->lease_status.renewal_last_success);
+ if (delta_result != SANLK_OK && !sp->lease_status.corrupt_result)
+ sp->lease_status.corrupt_result = corrupt_result(delta_result);
- if (!sp->lease_status.corrupt_result)
- sp->lease_status.corrupt_result = corrupt_result(result);
+ if (read_result == SANLK_OK && task.iobuf) {
+ memcpy(sp->lease_status.renewal_read_buf, task.iobuf, sp->align_size);
+ sp->lease_status.renewal_read_count++;
}
+
+
+ /*
+ * pet the watchdog
+ */
+
+ if (delta_result == SANLK_OK && !sp->thread_stop)
+ update_watchdog_file(sp, last_success);
+
pthread_mutex_unlock(&sp->mutex);
- /* TODO: pass off all the delta leases we read (in task->iobuf)
- for analysis by another thread */
/*
- if (result == SANLK_OK)
- queue_delta_lease_analysis(sp, task->iobuf);
- */
+ * log the results
+ */
+
+ if (delta_result != SANLK_OK) {
+ log_erros(sp, "renewal error %d delta_length %d last_success %llu",
+ delta_result, delta_length, (unsigned long long)last_success);
+ } else if (delta_length > task.id_renewal_seconds) {
+ log_erros(sp, "renewed %llu delta_length %d too long",
+ (unsigned long long)last_success, delta_length);
+ } else if (com.debug_renew) {
+ log_space(sp, "renewed %llu delta_length %d interval %d",
+ (unsigned long long)last_success, delta_length, renewal_interval);
+ }
}
/* unlink called below to get it done ASAP */
@@ -368,6 +507,13 @@ static void *lockspace_thread(void *arg_in)
return NULL;
}
+static void free_sp(struct space *sp)
+{
+ if (sp->lease_status.renewal_read_buf)
+ free(sp->lease_status.renewal_read_buf);
+ free(sp);
+}
+
/*
* When this function returns, it needs to be safe to being processing lease
* requests and allowing pid's to run, so we need to own our host_id, and the
@@ -498,7 +644,7 @@ int add_lockspace(struct sanlk_lockspace *ls)
list_del(&sp->list);
pthread_mutex_unlock(&spaces_mutex);
fail_free:
- free(sp);
+ free_sp(sp);
return rv;
}
@@ -601,7 +747,7 @@ void free_lockspaces(int wait)
if (!rv) {
log_space(sp, "free lockspace");
list_del(&sp->list);
- free(sp);
+ free_sp(sp);
}
}
pthread_mutex_unlock(&spaces_mutex);
diff --git a/src/host_id.h b/src/host_id.h
index 5cc8b50..69cb43f 100644
--- a/src/host_id.h
+++ b/src/host_id.h
@@ -13,7 +13,8 @@ int print_space_state(struct space *sp, char *str);
int _get_space_info(char *space_name, struct space *sp_out);
int get_space_info(char *space_name, struct space *sp_out);
int host_id_disk_info(char *name, struct sync_disk *disk);
-int host_id_check(struct task *task, struct space *sp);
+int check_our_lease(struct task *task, struct space *sp, int *check_all, char *check_buf);
+void check_other_leases(struct task *task, struct space *sp, char *buf);
int add_lockspace(struct sanlk_lockspace *ls);
int rem_lockspace(struct sanlk_lockspace *ls);
void free_lockspaces(int wait);
diff --git a/src/leader.h b/src/leader.h
index dca9b62..2830ae1 100644
--- a/src/leader.h
+++ b/src/leader.h
@@ -17,7 +17,7 @@
#define PAXOS_DISK_MAGIC 0x06152010
#define PAXOS_DISK_VERSION_MAJOR 0x00050000
-#define PAXOS_DISK_VERSION_MINOR 0x00000001
+#define PAXOS_DISK_VERSION_MINOR 0x00000001
#define DELTA_DISK_MAGIC 0x12212010
#define DELTA_DISK_VERSION_MAJOR 0x00030000
@@ -63,4 +63,10 @@ struct leader_record {
uint64_t write_timestamp; /* for extra info, debug */
};
+/* leader_record can use first 256 bytes of a sector,
+ bitmap uses the last 256 bytes */
+
+#define LEADER_RECORD_MAX 256
+#define HOSTID_BITMAP_OFFSET 256
+
#endif
diff --git a/src/main.c b/src/main.c
index c17b18f..3f0f448 100644
--- a/src/main.c
+++ b/src/main.c
@@ -546,7 +546,9 @@ static int main_loop(void)
struct timeval now, last_check;
int poll_timeout, check_interval;
unsigned int ms;
- int i, rv, empty, space_dead;
+ int i, rv, empty, check_all;
+ char *check_buf = NULL;
+ int check_buf_len = 0;
gettimeofday(&last_check, NULL);
poll_timeout = STANDARD_CHECK_INTERVAL;
@@ -582,9 +584,21 @@ static int main_loop(void)
continue;
}
last_check = now;
+ check_interval = STANDARD_CHECK_INTERVAL;
pthread_mutex_lock(&spaces_mutex);
list_for_each_entry_safe(sp, safe, &spaces, list) {
+ check_all = 0;
+
+ if (sp->align_size > check_buf_len) {
+ if (check_buf)
+ free(check_buf);
+ check_buf_len = sp->align_size;
+ check_buf = malloc(check_buf_len);
+ }
+ if (check_buf)
+ memset(check_buf, 0, check_buf_len);
+
if (sp->killing_pids) {
if (all_pids_dead(sp)) {
log_space(sp, "set thread_stop");
@@ -596,24 +610,26 @@ static int main_loop(void)
} else {
kill_pids(sp);
}
- check_interval = RECOVERY_CHECK_INTERVAL;
} else {
- space_dead = !host_id_check(&main_task, sp);
+ rv = check_our_lease(&main_task, sp,
+ &check_all, check_buf);
- if (space_dead || external_shutdown ||
- sp->external_remove) {
- log_space(sp, "set killing_pids dead %d "
+ if (rv || external_shutdown || sp->external_remove) {
+ log_space(sp, "set killing_pids check %d "
"shutdown %d remove %d",
- space_dead, external_shutdown,
+ rv, external_shutdown,
sp->external_remove);
- sp->space_dead = space_dead;
+ sp->space_dead = 1;
sp->killing_pids = 1;
kill_pids(sp);
- check_interval = RECOVERY_CHECK_INTERVAL;
- } else {
- check_interval = STANDARD_CHECK_INTERVAL;
}
}
+
+ if (!sp->killing_pids && check_all)
+ check_other_leases(&main_task, sp, check_buf);
+
+ if (sp->killing_pids)
+ check_interval = RECOVERY_CHECK_INTERVAL;
}
empty = list_empty(&spaces);
pthread_mutex_unlock(&spaces_mutex);
@@ -3018,6 +3034,7 @@ int main(int argc, char *argv[])
int rv;
BUILD_BUG_ON(sizeof(struct sanlk_disk) != sizeof(struct sync_disk));
+ BUILD_BUG_ON(sizeof(struct leader_record) > LEADER_RECORD_MAX);
memset(&com, 0, sizeof(com));
com.use_watchdog = DEFAULT_USE_WATCHDOG;
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 7f6f740..0356807 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -102,14 +102,28 @@ struct lease_status {
uint64_t acquire_last_success;
uint64_t renewal_last_attempt;
uint64_t renewal_last_success;
+
+ uint32_t renewal_read_count;
+ uint32_t renewal_read_check;
+ char *renewal_read_buf;
+};
+
+struct host_info {
+ uint64_t last_check; /* local monotime */
+ uint64_t last_live; /* local monotime */
+ uint64_t last_req; /* local monotime */
+ uint64_t owner_id;
+ uint64_t owner_generation;
+ uint64_t timestamp; /* remote monotime */
};
struct space {
+ struct list_head list;
char space_name[NAME_ID_SIZE];
uint64_t host_id;
uint64_t host_generation;
struct sync_disk host_id_disk;
- struct list_head list;
+ int align_size;
int space_id; /* used to refer to this space instance in log messages */
int space_dead;
int killing_pids;
@@ -119,6 +133,9 @@ struct space {
pthread_mutex_t mutex; /* protects lease_status, thread_stop */
struct lease_status lease_status;
int wd_fd;
+ uint32_t req_count;
+ uint32_t req_check;
+ struct host_info host_info[DEFAULT_MAX_HOSTS];
};
struct sm_header {
@@ -440,6 +457,8 @@ struct task {
int host_dead_seconds; /* calculated */
+ int request_finish_seconds; /* calculated */
+
unsigned int io_count; /* stats */
unsigned int to_count; /* stats */
diff --git a/src/task.c b/src/task.c
index c0ab44b..d54dabe 100644
--- a/src/task.c
+++ b/src/task.c
@@ -51,6 +51,7 @@ void setup_task_timeouts(struct task *task, int io_timeout_arg)
task->id_renewal_fail_seconds = id_renewal_fail_seconds;
task->id_renewal_warn_seconds = id_renewal_warn_seconds;
task->host_dead_seconds = host_dead_seconds;
+ task->request_finish_seconds = 3 * id_renewal_seconds; /* random */
/* the rest are calculated as needed in place */
/* hack to make just main thread log this info */
12 years, 3 months
src/delta_lease.c src/direct_lib.c src/diskio.c src/diskio.h src/host_id.c src/host_id.h src/leader.h src/log.c src/log.h src/main.c src/paxos_lease.c src/sanlock_internal.h src/task.c src/token_manager.c
by David Teigland
src/delta_lease.c | 196 ++++++++++++++++++++++++++++---------------------
src/direct_lib.c | 2
src/diskio.c | 75 +++++++++---------
src/diskio.h | 17 +++-
src/host_id.c | 77 ++++---------------
src/host_id.h | 1
src/leader.h | 2
src/log.c | 18 ++--
src/log.h | 23 +++--
src/main.c | 95 ++++++++++++++++++++---
src/paxos_lease.c | 2
src/sanlock_internal.h | 3
src/task.c | 72 ++++++++++++++++--
src/token_manager.c | 1
14 files changed, 358 insertions(+), 226 deletions(-)
New commits:
commit e58da9c2426a712575de29d77453ef2fb6e48c79
Author: David Teigland <teigland(a)redhat.com>
Date: Fri Aug 12 11:18:17 2011 -0500
sanlock: read align_size in renewal
Read all delta leases when renewing our own, rather
than reading only our delta lease sector. This is
is preparation for using the info in the other delta
leases.
Also includes
- only permit -a 0|1 (not 2)
- remove bogus error message about corrupt renewal result
- record every new lockspace and resource in /var/log/sanlock.log
by default so the space_id/token_id values can later be
translated to actual lockspace/resource
- improvements to aio related error messages
- initializing task structs to zero
- don't reread sectors due to verify error to avoid
interfering with renewal aio
- free iobuf in find_callback_slot
- initialize our_host_name_global early in main so it
won't be blank if no cmd line option is given
- wait for all aio to complete in close_task_aio before
destroying aio context
diff --git a/src/delta_lease.c b/src/delta_lease.c
index bf5ad82..b210d7f 100644
--- a/src/delta_lease.c
+++ b/src/delta_lease.c
@@ -35,6 +35,12 @@
/* delta_leases are a series max_hosts leader_records, one leader per sector,
host N's delta_lease is the leader_record in sectors N-1 */
+/*
+ * variable names:
+ * rv: success is 0, failure is < 0
+ * error: success is 1 (SANLK_OK), failure is < 0
+ */
+
static void log_leader_error(int result,
char *space_name,
uint64_t host_id,
@@ -75,9 +81,8 @@ static int verify_leader(struct sync_disk *disk,
struct leader_record *lr,
const char *caller)
{
- struct leader_record leader_rr;
uint32_t sum;
- int result, rv;
+ int result;
if (lr->magic != DELTA_DISK_MAGIC) {
log_error("verify_leader %llu wrong magic %x %s",
@@ -126,6 +131,10 @@ static int verify_leader(struct sync_disk *disk,
fail:
log_leader_error(result, space_name, host_id, disk, lr, caller);
+ /*
+ struct leader_record leader_rr;
+ int rv;
+
memset(&leader_rr, 0, sizeof(leader_rr));
rv = read_sectors(disk, host_id - 1, 1, (char *)&leader_rr,
@@ -133,6 +142,7 @@ static int verify_leader(struct sync_disk *disk,
NULL, "delta_verify");
log_leader_error(rv, space_name, host_id, disk, &leader_rr, "delta_verify");
+ */
return result;
}
@@ -163,32 +173,6 @@ int delta_lease_leader_read(struct task *task,
return error;
}
-static int delta_lease_leader_reap(struct task *task,
- struct sync_disk *disk,
- char *space_name,
- uint64_t host_id,
- struct leader_record *leader_ret,
- const char *caller)
-{
- struct leader_record leader;
- int rv, error;
-
- /* host_id N is block offset N-1 */
-
- memset(&leader, 0, sizeof(struct leader_record));
- memset(leader_ret, 0, sizeof(struct leader_record));
-
- rv = read_sectors_reap(disk, host_id - 1, 1, (char *)&leader, sizeof(struct leader_record),
- task, "delta_leader");
- if (rv < 0)
- return rv;
-
- error = verify_leader(disk, space_name, host_id, &leader, caller);
-
- memcpy(leader_ret, &leader, sizeof(struct leader_record));
- return error;
-}
-
/*
* delta_lease_acquire:
* set the owner of host_id to our_host_name.
@@ -215,7 +199,7 @@ int delta_lease_acquire(struct task *task,
struct leader_record leader;
struct leader_record leader1;
uint64_t new_ts;
- int i, error, delay, delta_large_delay;
+ int i, error, rv, delay, delta_large_delay;
log_space(sp, "delta_acquire %llu begin", (unsigned long long)host_id);
@@ -300,10 +284,10 @@ int delta_lease_acquire(struct task *task,
(unsigned long long)leader.timestamp,
leader.resource_name);
- error = write_sector(disk, host_id - 1, (char *)&leader, sizeof(struct leader_record),
- task, "delta_leader");
- if (error < 0)
- return error;
+ rv = write_sector(disk, host_id - 1, (char *)&leader, sizeof(struct leader_record),
+ task, "delta_leader");
+ if (rv < 0)
+ return rv;
memcpy(&leader1, &leader, sizeof(struct leader_record));
@@ -345,41 +329,111 @@ int delta_lease_renew(struct task *task,
struct leader_record *leader_ret)
{
struct leader_record leader;
- uint64_t host_id;
+ uint64_t host_id, offset;
uint64_t new_ts;
- int io_timeout_save;
- int error;
+ char **p_iobuf;
+ int iobuf_len, io_timeout_save;
+ int rv;
if (!leader_last)
return -EINVAL;
host_id = leader_last->owner_id;
+ /* read all delta leases */
+ iobuf_len = direct_align(disk);
+ if (iobuf_len <= 0)
+ return -EINVAL;
+
+ /* offset of our leader_record */
+ offset = (host_id - 1) * disk->sector_size;
+ if (offset > iobuf_len)
+ return -EINVAL;
+
+
/* if the previous renew timed out in this initial read, and that read
is now complete, we can use that result here instead of discarding
it and doing another. */
- if (prev_result == SANLK_AIO_TIMEOUT && task->read_timeout) {
- error = delta_lease_leader_reap(task, disk, space_name, host_id,
- &leader, "delta_renew_reap");
+ if (prev_result == SANLK_AIO_TIMEOUT) {
+ if (!task->read_iobuf_timeout_aicb) {
+ /* shouldn't happen, when do_linux_aio returned AIO_TIMEOUT
+ it should have set read_iobuf_timeout_aicb */
+ log_erros(sp, "delta_renew reap no aicb");
+ goto skip_reap;
+ }
+
+ if (!task->iobuf) {
+ /* shouldn't happen */
+ log_erros(sp, "delta_renew reap no iobuf");
+ goto skip_reap;
+ }
- log_space(sp, "delta_renew %llu reap %d",
- (unsigned long long)host_id, error);
+ rv = read_iobuf_reap(disk->fd, disk->offset,
+ task->iobuf, iobuf_len, task);
- if (error == SANLK_OK) {
- task->read_timeout = NULL;
+ log_space(sp, "delta_renew reap %d", rv);
+
+ if (!rv) {
+ task->read_iobuf_timeout_aicb = NULL;
goto read_done;
}
+ skip_reap:
+ /* abandon the previous timed out read and try a new
+ one from scratch. the current task->iobuf mem will
+ freed when timeout_aicb completes sometime */
+
+ task->read_iobuf_timeout_aicb = NULL;
+ task->iobuf = NULL;
}
- task->read_timeout = NULL;
+ if (task->read_iobuf_timeout_aicb) {
+ /* this could happen get here if there was another read between
+ renewal reads, which timed out and caused
+ read_iobuf_timeout_aicb to be set; I don't think there are
+ any cases where that would happen, though. we could avoid
+ this confusion by passing back the timed out aicb along with
+ SANLK_AIO_TIMEOUT, and only save the timed out aicb when we
+ want to try to reap it later. */
+
+ log_space(sp, "delta_renew timeout_aicb is unexpectedly %p iobuf %p",
+ task->read_iobuf_timeout_aicb, task->iobuf);
+ task->read_iobuf_timeout_aicb = NULL;
+ task->iobuf = NULL;
+ }
- error = delta_lease_leader_read(task, disk, space_name, host_id,
- &leader, "delta_renew_read");
- if (error < 0)
- return error;
+ if (!task->iobuf) {
+ /* this will happen the first time renew is called, and after
+ a timed out renewal read fails to be reaped (see
+ task->iobuf = NULL above) */
+
+ p_iobuf = &task->iobuf;
+
+ rv = posix_memalign((void *)p_iobuf, getpagesize(), iobuf_len);
+ if (rv) {
+ log_erros(sp, "dela_renew memalign rv %d", rv);
+ rv = -ENOMEM;
+ }
+ }
+
+ rv = read_iobuf(disk->fd, disk->offset, task->iobuf, iobuf_len, task);
+ if (rv) {
+ /* the next time delta_lease_renew() is called, prev_result
+ will be this rv. If this rv is SANLK_AIO_TIMEOUT, we'll
+ try to reap the event */
+
+ log_erros(sp, "delta_renew read rv %d offset %llu %s",
+ rv, (unsigned long long)disk->offset, disk->path);
+ return rv;
+ }
read_done:
+ memcpy(&leader, task->iobuf+offset, sizeof(struct leader_record));
+
+ rv = verify_leader(disk, space_name, host_id, &leader, "delta_renew");
+ if (rv < 0)
+ return rv;
+
/* We can't always memcmp(&leader, leader_last) because previous writes
may have timed out and we don't know if they were actually written
or not. We can definately verify that we're still the owner,
@@ -388,7 +442,7 @@ int delta_lease_renew(struct task *task,
if (leader.owner_id != leader_last->owner_id ||
leader.owner_generation != leader_last->owner_generation ||
memcmp(leader.resource_name, leader_last->resource_name, NAME_ID_SIZE)) {
- log_erros(sp, "delta_renew %llu not owner", (unsigned long long)host_id);
+ log_erros(sp, "delta_renew not owner");
log_leader_error(0, space_name, host_id, disk, leader_last, "delta_renew_last");
log_leader_error(0, space_name, host_id, disk, &leader, "delta_renew_read");
return SANLK_RENEW_OWNER;
@@ -396,7 +450,7 @@ int delta_lease_renew(struct task *task,
if (prev_result == SANLK_OK &&
memcmp(&leader, leader_last, sizeof(struct leader_record))) {
- log_erros(sp, "delta_renew %llu reread mismatch", (unsigned long long)host_id);
+ log_erros(sp, "delta_renew reread mismatch");
log_leader_error(0, space_name, host_id, disk, leader_last, "delta_renew_last");
log_leader_error(0, space_name, host_id, disk, &leader, "delta_renew_read");
return SANLK_RENEW_DIFF;
@@ -419,41 +473,17 @@ int delta_lease_renew(struct task *task,
io_timeout_save = task->io_timeout_seconds;
task->io_timeout_seconds = task->host_dead_seconds;
- error = write_sector(disk, host_id - 1, (char *)&leader, sizeof(struct leader_record),
- task, "delta_leader");
+ rv = write_sector(disk, host_id - 1, (char *)&leader, sizeof(struct leader_record),
+ task, "delta_leader");
task->io_timeout_seconds = io_timeout_save;
- if (error < 0)
- return error;
+ if (rv < 0)
+ return rv;
-#if 0
/* the paper shows doing a delay and another read here, but it seems
unnecessary since we do the same at the beginning of the next renewal */
- delay = 2 * task->io_timeout_seconds;
- /* log_space(sp, "delta_renew sleep 2d %d", delay); */
- sleep(delay);
-
- error = delta_lease_leader_read(task, disk, space_name, host_id, &leader_read,
- "delta_renew_check");
- if (error < 0)
- return error;
-
- /*
- if ((leader.timestamp != new_ts) || (leader.owner_id != our_host_id))
- return SANLK_BAD_LEADER;
- */
-
- if (memcmp(&leader, &leader_read, sizeof(struct leader_record))) {
- log_erros(sp, "delta_renew %llu reread mismatch",
- (unsigned long long)host_id);
- log_leader_error(0, space_name, host_id, disk, &leader, "delta_renew_write");
- log_leader_error(0, space_name, host_id, disk, &leader_read, "delta_renew_reread");
- return SANLK_RENEW_DIFF;
- }
-#endif
-
memcpy(leader_ret, &leader, sizeof(struct leader_record));
return SANLK_OK;
}
@@ -467,7 +497,7 @@ int delta_lease_release(struct task *task,
{
struct leader_record leader;
uint64_t host_id;
- int error;
+ int rv;
if (!leader_last)
return -EINVAL;
@@ -480,10 +510,10 @@ int delta_lease_release(struct task *task,
leader.timestamp = LEASE_FREE;
leader.checksum = leader_checksum(&leader);
- error = write_sector(disk, host_id - 1, (char *)&leader, sizeof(struct leader_record),
- task, "delta_leader");
- if (error < 0)
- return error;
+ rv = write_sector(disk, host_id - 1, (char *)&leader, sizeof(struct leader_record),
+ task, "delta_leader");
+ if (rv < 0)
+ return rv;
memcpy(leader_ret, &leader, sizeof(struct leader_record));
return SANLK_OK;
diff --git a/src/direct_lib.c b/src/direct_lib.c
index 71b19e9..2cb79c3 100644
--- a/src/direct_lib.c
+++ b/src/direct_lib.c
@@ -22,9 +22,11 @@
#include "task.h"
void log_level(int space_id GNUC_UNUSED, int token_id GNUC_UNUSED,
+ char *name GNUC_UNUSED,
int level GNUC_UNUSED, const char *fmt GNUC_UNUSED, ...);
void log_level(int space_id GNUC_UNUSED, int token_id GNUC_UNUSED,
+ char *name GNUC_UNUSED,
int level GNUC_UNUSED, const char *fmt GNUC_UNUSED, ...)
{
}
diff --git a/src/diskio.c b/src/diskio.c
index bda0e47..c336b3b 100644
--- a/src/diskio.c
+++ b/src/diskio.c
@@ -306,10 +306,11 @@ static struct aicb *find_callback_slot(struct task *task)
struct iocb *ev_iocb = event.obj;
struct aicb *ev_aicb = container_of(ev_iocb, struct aicb, iocb);
+ log_taske(task, "aio collect %p:%p:%p result %ld:%ld old free",
+ ev_aicb, ev_iocb, ev_aicb->buf, event.res, event.res2);
ev_aicb->used = 0;
-
- log_error("aio %s clear iocb %p event result %ld %ld",
- task->name, ev_iocb, event.res, event.res2);
+ free(ev_aicb->buf);
+ ev_aicb->buf = NULL;
goto find;
}
return NULL;
@@ -349,7 +350,8 @@ static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
rv = io_submit(task->aio_ctx, 1, &iocb);
if (rv < 0) {
- log_error("aio %s io_submit error %d", task->name, rv);
+ log_taske(task, "aio submit %p:%p:%p rv %d fd %d cmd %d",
+ aicb, iocb, buf, rv, fd, cmd);
goto out;
}
@@ -368,7 +370,8 @@ static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
if (rv == -EINTR)
goto retry;
if (rv < 0) {
- log_error("aio %s io_getevents error %d", task->name, rv);
+ log_taske(task, "aio getevent %p:%p:%p rv %d",
+ aicb, iocb, buf, rv);
goto out;
}
if (rv == 1) {
@@ -378,21 +381,21 @@ static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
ev_aicb->used = 0;
if (ev_iocb != iocb) {
- log_error("aio %s other iocb %p event result %ld %ld",
- task->name, ev_iocb, event.res, event.res2);
+ log_taske(task, "aio collect %p:%p:%p result %ld:%ld other free",
+ ev_aicb, ev_iocb, ev_aicb->buf, event.res, event.res2);
free(ev_aicb->buf);
ev_aicb->buf = NULL;
goto retry;
}
if ((int)event.res < 0) {
- log_error("aio %s event result %ld %ld",
- task->name, event.res, event.res2);
+ log_taske(task, "aio collect %p:%p:%p result %ld:%ld match res",
+ ev_aicb, ev_iocb, ev_aicb->buf, event.res, event.res2);
rv = event.res;
goto out;
}
if (event.res != len) {
- log_error("aio %s event len %d result %lu %lu",
- task->name, len, event.res, event.res2);
+ log_taske(task, "aio collect %p:%p:%p result %ld:%ld match len %d",
+ ev_aicb, ev_iocb, ev_aicb->buf, event.res, event.res2, len);
rv = -EMSGSIZE;
goto out;
}
@@ -415,9 +418,8 @@ static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
task->to_count++;
- log_error("aio %s iocb %p timeout sec %d count %u ios %u",
- task->name, iocb, task->io_timeout_seconds,
- task->to_count, task->io_count);
+ log_taske(task, "aio timeout %p:%p:%p sec %d to_count %d",
+ aicb, iocb, buf, task->io_timeout_seconds, task->to_count);
rv = io_cancel(task->aio_ctx, iocb, &event);
if (!rv) {
@@ -428,7 +430,7 @@ static int do_linux_aio(int fd, uint64_t offset, char *buf, int len,
rv = SANLK_AIO_TIMEOUT;
if (cmd == IO_CMD_PREAD)
- task->read_timeout = aicb;
+ task->read_iobuf_timeout_aicb = aicb;
}
out:
return rv;
@@ -438,6 +440,7 @@ static int do_write_aio_linux(int fd, uint64_t offset, char *buf, int len, struc
{
return do_linux_aio(fd, offset, buf, len, task, IO_CMD_PWRITE);
}
+
static int do_read_aio_linux(int fd, uint64_t offset, char *buf, int len, struct task *task)
{
return do_linux_aio(fd, offset, buf, len, task, IO_CMD_PREAD);
@@ -689,32 +692,26 @@ int read_sectors(const struct sync_disk *disk, uint64_t sector_nr,
return rv;
}
-/* Try to reap the event of a previously timed out read_sectors.
- A task's last timed out read is saved in task->read_timeout. */
+/* Try to reap the event of a previously timed out read_iobuf.
+ The aicb used in a task's last timed out read_iobuf is
+ task->read_iobuf_timeout_aicb . */
-int read_sectors_reap(const struct sync_disk *disk, uint64_t sector_nr,
- uint32_t sector_count, char *data, int data_len,
- struct task *task, const char *blktype GNUC_UNUSED)
+int read_iobuf_reap(int fd, uint64_t offset, char *iobuf, int iobuf_len, struct task *task)
{
struct timespec ts;
struct aicb *aicb;
struct iocb *iocb;
struct io_event event;
- char *iobuf;
- uint64_t offset;
- int iobuf_len;
int rv;
- iobuf_len = sector_count * disk->sector_size;
- offset = disk->offset + (sector_nr * disk->sector_size);
-
- aicb = task->read_timeout;
+ aicb = task->read_iobuf_timeout_aicb;
iocb = &aicb->iocb;
- iobuf = iocb->u.c.buf;
if (!aicb->used)
return -EINVAL;
- if (iocb->aio_fildes != disk->fd)
+ if (iocb->aio_fildes != fd)
+ return -EINVAL;
+ if (iocb->u.c.buf != iobuf)
return -EINVAL;
if (iocb->u.c.nbytes != iobuf_len)
return -EINVAL;
@@ -732,7 +729,8 @@ int read_sectors_reap(const struct sync_disk *disk, uint64_t sector_nr,
if (rv == -EINTR)
goto retry;
if (rv < 0) {
- log_error("reap aio %s io_getevents error %d", task->name, rv);
+ log_taske(task, "aio getevent %p:%p:%p rv %d r",
+ aicb, iocb, iobuf, rv);
goto out;
}
if (rv == 1) {
@@ -742,28 +740,29 @@ int read_sectors_reap(const struct sync_disk *disk, uint64_t sector_nr,
ev_aicb->used = 0;
if (ev_iocb != iocb) {
- log_error("reap aio %s other iocb %p event result %ld %ld",
- task->name, ev_iocb, event.res, event.res2);
+ log_taske(task, "aio collect %p:%p:%p result %ld:%ld other free r",
+ ev_aicb, ev_iocb, ev_aicb->buf, event.res, event.res2);
free(ev_aicb->buf);
ev_aicb->buf = NULL;
goto retry;
}
if ((int)event.res < 0) {
- log_error("reap aio %s event result %ld %ld",
- task->name, event.res, event.res2);
+ log_taske(task, "aio collect %p:%p:%p result %ld:%ld match res r",
+ ev_aicb, ev_iocb, ev_aicb->buf, event.res, event.res2);
rv = event.res;
goto out;
}
if (event.res != iobuf_len) {
- log_error("reap aio %s event len %d result %lu %lu",
- task->name, iobuf_len, event.res, event.res2);
+ log_taske(task, "aio collect %p:%p:%p result %ld:%ld match len %d r",
+ ev_aicb, ev_iocb, ev_aicb->buf, event.res, event.res2, iobuf_len);
rv = -EMSGSIZE;
goto out;
}
+ log_taske(task, "aio collect %p:%p:%p result %ld:%ld match reap",
+ ev_aicb, ev_iocb, ev_aicb->buf, event.res, event.res2);
+
rv = 0;
- memcpy(data, iobuf, data_len);
- free(iobuf);
goto out;
}
diff --git a/src/diskio.h b/src/diskio.h
index a54ffaf..4ce0632 100644
--- a/src/diskio.h
+++ b/src/diskio.h
@@ -14,12 +14,25 @@ int open_disk(struct sync_disk *disks);
int open_disks(struct sync_disk *disks, int num_disks);
int open_disks_fd(struct sync_disk *disks, int num_disks);
+/*
+ * iobuf functions require the caller to allocate iobuf using posix_memalign
+ * and pass it into the function
+ */
+
int write_iobuf(int fd, uint64_t offset, char *iobuf, int iobuf_len,
struct task *task);
int read_iobuf(int fd, uint64_t offset, char *iobuf, int iobuf_len,
struct task *task);
+int read_iobuf_reap(int fd, uint64_t offset, char *iobuf, int iobuf_len,
+ struct task *task);
+
+/*
+ * sector functions allocate an iobuf themselves, copy into it for read, use it
+ * for io, copy out of it for write, and free it
+ */
+
int write_sector(const struct sync_disk *disk, uint64_t sector_nr,
const char *data, int data_len,
struct task *task, const char *blktype);
@@ -31,8 +44,4 @@ int write_sectors(const struct sync_disk *disk, uint64_t sector_nr,
int read_sectors(const struct sync_disk *disk, uint64_t sector_nr,
uint32_t sector_count, char *data, int data_len,
struct task *task, const char *blktype);
-
-int read_sectors_reap(const struct sync_disk *disk, uint64_t sector_nr,
- uint32_t sector_count, char *data, int data_len,
- struct task *task, const char *blktype);
#endif
diff --git a/src/host_id.c b/src/host_id.c
index 7341d65..67e532a 100644
--- a/src/host_id.c
+++ b/src/host_id.c
@@ -21,9 +21,7 @@
#include <syslog.h>
#include <sys/types.h>
#include <sys/time.h>
-#include <sys/utsname.h>
#include <sys/un.h>
-#include <uuid/uuid.h>
#include "sanlock_internal.h"
#include "sanlock_sock.h"
@@ -36,10 +34,6 @@
static unsigned int space_id_counter = 1;
-static struct random_data rand_data;
-static char rand_state[32];
-static pthread_mutex_t rand_mutex = PTHREAD_MUTEX_INITIALIZER;
-
struct list_head spaces;
struct list_head spaces_add;
struct list_head spaces_rem;
@@ -241,6 +235,7 @@ static void *lockspace_thread(void *arg_in)
sp = (struct space *)arg_in;
+ memset(&task, 0, sizeof(struct task));
setup_task_timeouts(&task, main_task.io_timeout_seconds);
setup_task_aio(&task, main_task.use_aio, HOSTID_AIO_CB_SIZE);
memcpy(task.name, sp->space_name, NAME_ID_SIZE);
@@ -293,9 +288,6 @@ static void *lockspace_thread(void *arg_in)
sp->host_generation = leader.owner_generation;
while (1) {
- if (stop)
- break;
-
pthread_mutex_lock(&sp->mutex);
stop = sp->thread_stop;
pthread_mutex_unlock(&sp->mutex);
@@ -348,14 +340,18 @@ static void *lockspace_thread(void *arg_in)
result, delta_length,
(unsigned long long)sp->lease_status.renewal_last_success);
- if (!sp->lease_status.corrupt_result) {
+ if (!sp->lease_status.corrupt_result)
sp->lease_status.corrupt_result = corrupt_result(result);
- log_erros(sp, "renewal error %d is corruption",
- sp->lease_status.corrupt_result);
- }
}
- stop = sp->thread_stop;
pthread_mutex_unlock(&sp->mutex);
+
+ /* TODO: pass off all the delta leases we read (in task->iobuf)
+ for analysis by another thread */
+
+ /*
+ if (result == SANLK_OK)
+ queue_delta_lease_analysis(sp, task->iobuf);
+ */
}
/* unlink called below to get it done ASAP */
@@ -454,6 +450,14 @@ int add_lockspace(struct sanlk_lockspace *ls)
list_add(&sp->list, &spaces_add);
pthread_mutex_unlock(&spaces_mutex);
+ /* save a record of what this space_id is for later debugging */
+ log_level(sp->space_id, 0, NULL, LOG_WARNING,
+ "lockspace %.48s:%llu:%.256s:%llu",
+ sp->space_name,
+ (unsigned long long)sp->host_id,
+ sp->host_id_disk.path,
+ (unsigned long long)sp->host_id_disk.offset);
+
rv = pthread_create(&sp->thread, NULL, lockspace_thread, sp);
if (rv < 0) {
log_erros(sp, "add_lockspace create thread failed");
@@ -603,55 +607,10 @@ void free_lockspaces(int wait)
pthread_mutex_unlock(&spaces_mutex);
}
-/* return a random int between a and b inclusive */
-
-int get_rand(int a, int b)
-{
- int32_t val;
- int rv;
-
- pthread_mutex_lock(&rand_mutex);
- rv = random_r(&rand_data, &val);
- pthread_mutex_unlock(&rand_mutex);
- if (rv < 0)
- return rv;
-
- return a + (int) (((float)(b - a + 1)) * val / (RAND_MAX+1.0));
-}
-
void setup_spaces(void)
{
- struct utsname name;
- char uuid[37];
- uuid_t uu;
-
INIT_LIST_HEAD(&spaces);
INIT_LIST_HEAD(&spaces_add);
INIT_LIST_HEAD(&spaces_rem);
-
- memset(rand_state, 0, sizeof(rand_state));
- memset(&rand_data, 0, sizeof(rand_data));
-
- initstate_r(time(NULL), rand_state, sizeof(rand_state), &rand_data);
-
- /* use host name from command line */
-
- if (com.our_host_name[0]) {
- memcpy(our_host_name_global, com.our_host_name, SANLK_NAME_LEN);
- return;
- }
-
- /* make up something that's likely to be different among hosts */
-
- memset(&our_host_name_global, 0, sizeof(our_host_name_global));
- memset(&name, 0, sizeof(name));
- memset(&uuid, 0, sizeof(uuid));
-
- uname(&name);
- uuid_generate(uu);
- uuid_unparse_lower(uu, uuid);
-
- snprintf(our_host_name_global, NAME_ID_SIZE, "%s.%s",
- uuid, name.nodename);
}
diff --git a/src/host_id.h b/src/host_id.h
index 703cb3f..5cc8b50 100644
--- a/src/host_id.h
+++ b/src/host_id.h
@@ -18,6 +18,5 @@ int add_lockspace(struct sanlk_lockspace *ls);
int rem_lockspace(struct sanlk_lockspace *ls);
void free_lockspaces(int wait);
void setup_spaces(void);
-int get_rand(int a, int b);
#endif
diff --git a/src/leader.h b/src/leader.h
index e100bb6..dca9b62 100644
--- a/src/leader.h
+++ b/src/leader.h
@@ -21,7 +21,7 @@
#define DELTA_DISK_MAGIC 0x12212010
#define DELTA_DISK_VERSION_MAJOR 0x00030000
-#define DELTA_DISK_VERSION_MINOR 0x00000001
+#define DELTA_DISK_VERSION_MINOR 0x00000002
/* for all disk structures:
uint64 aligned on 8 byte boundaries,
diff --git a/src/log.c b/src/log.c
index e753ee6..3cb4064 100644
--- a/src/log.c
+++ b/src/log.c
@@ -100,7 +100,7 @@ static void _log_save_ent(int level, int len)
* logfile and/or syslog (so callers don't block writing messages to files)
*/
-void log_level(int space_id, int token_id, int level, const char *fmt, ...)
+void log_level(int space_id, int token_id, char *name_in, int level, const char *fmt, ...)
{
va_list ap;
char name[NAME_ID_SIZE + 1];
@@ -109,19 +109,19 @@ void log_level(int space_id, int token_id, int level, const char *fmt, ...)
memset(name, 0, sizeof(name));
- if (!space_id && !token_id)
- snprintf(name, NAME_ID_SIZE, "-");
- else if (space_id && !token_id)
- snprintf(name, NAME_ID_SIZE, "s%u", space_id);
+ if (space_id && !token_id)
+ snprintf(name, NAME_ID_SIZE, "s%u ", space_id);
else if (!space_id && token_id)
- snprintf(name, NAME_ID_SIZE, "t%u", token_id);
+ snprintf(name, NAME_ID_SIZE, "r%u ", token_id);
else if (space_id && token_id)
- snprintf(name, NAME_ID_SIZE, "s%u:t%u", space_id, token_id);
+ snprintf(name, NAME_ID_SIZE, "s%u:r%u ", space_id, token_id);
+ else if (name_in)
+ snprintf(name, NAME_ID_SIZE, "%.8s ", name_in);
pthread_mutex_lock(&log_mutex);
- ret = snprintf(log_str + pos, len - pos, "%ld %s ",
- time(NULL), name);
+ ret = snprintf(log_str + pos, len - pos, "%llu %s",
+ (unsigned long long)monotime(), name);
pos += ret;
va_start(ap, fmt);
diff --git a/src/log.h b/src/log.h
index 1aef3b2..eacb91b 100644
--- a/src/log.h
+++ b/src/log.h
@@ -9,22 +9,25 @@
#ifndef __LOG_H__
#define __LOG_H__
-void log_level(int space_id, int token_id, int level, const char *fmt, ...)
- __attribute__((format(printf, 4, 5)));
+void log_level(int space_id, int token_id, char *name_in, int level, const char *fmt, ...)
+ __attribute__((format(printf, 5, 6)));
int setup_logging(void);
void close_logging(void);
void write_log_dump(int fd);
-#define log_debug(fmt, args...) log_level(0, 0, LOG_DEBUG, fmt, ##args)
-#define log_space(space, fmt, args...) log_level(space->space_id, 0, LOG_DEBUG, fmt, ##args)
-#define log_token(token, fmt, args...) log_level(0, token->token_id, LOG_DEBUG, fmt, ##args)
-#define log_spoke(space, token, fmt, args...) log_level(space->space_id, token->token_id, LOG_DEBUG, fmt, ##args)
+#define log_debug(fmt, args...) log_level(0, 0, NULL, LOG_DEBUG, fmt, ##args)
+#define log_space(space, fmt, args...) log_level(space->space_id, 0, NULL, LOG_DEBUG, fmt, ##args)
+#define log_token(token, fmt, args...) log_level(0, token->token_id, NULL, LOG_DEBUG, fmt, ##args)
+#define log_spoke(space, token, fmt, args...) log_level(space->space_id, token->token_id, NULL, LOG_DEBUG, fmt, ##args)
-#define log_error(fmt, args...) log_level(0, 0, LOG_ERR, fmt, ##args)
-#define log_erros(space, fmt, args...) log_level(space->space_id, 0, LOG_ERR, fmt, ##args)
-#define log_errot(token, fmt, args...) log_level(0, token->token_id, LOG_ERR, fmt, ##args)
-#define log_errst(space, token, fmt, args...) log_level(space->space_id, token->token_id, LOG_ERR, fmt, ##args)
+#define log_error(fmt, args...) log_level(0, 0, NULL, LOG_ERR, fmt, ##args)
+#define log_erros(space, fmt, args...) log_level(space->space_id, 0, NULL, LOG_ERR, fmt, ##args)
+#define log_errot(token, fmt, args...) log_level(0, token->token_id, NULL, LOG_ERR, fmt, ##args)
+#define log_errst(space, token, fmt, args...) log_level(space->space_id, token->token_id, NULL, LOG_ERR, fmt, ##args)
+
+#define log_taske(task, fmt, args...) log_level(0, 0, task->name, LOG_ERR, fmt, ##args)
+#define log_taskd(task, fmt, args...) log_level(0, 0, task->name, LOG_DEBUG, fmt, ##args)
/* use log_tool for tool actions (non-daemon), and for daemon until
logging is set up */
diff --git a/src/main.c b/src/main.c
index 1ea5a36..c17b18f 100644
--- a/src/main.c
+++ b/src/main.c
@@ -30,6 +30,9 @@
#include <sys/time.h>
#include <sys/un.h>
#include <sys/mman.h>
+#include <sys/mman.h>
+#include <sys/utsname.h>
+#include <uuid/uuid.h>
#define EXTERN
#include "sanlock_internal.h"
@@ -48,9 +51,9 @@
#include "sanlock_admin.h"
/* priorities are LOG_* from syslog.h */
-int log_logfile_priority = LOG_ERR;
+int log_logfile_priority = LOG_WARNING;
int log_syslog_priority = LOG_ERR;
-int log_stderr_priority = LOG_ERR;
+int log_stderr_priority = -1; /* -D sets this to LOG_DEBUG */
struct client {
int used;
@@ -107,6 +110,10 @@ extern struct list_head spaces;
extern struct list_head spaces_rem;
extern pthread_mutex_t spaces_mutex;
+static struct random_data rand_data;
+static char rand_state[32];
+static pthread_mutex_t rand_mutex = PTHREAD_MUTEX_INITIALIZER;
+
/* FIXME: add a mutex for client array so we don't try to expand it
while a cmd thread is using it. Or, with a thread pool we know
when cmd threads are running and can expand when none are. */
@@ -853,16 +860,6 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca)
token->token_id = token_id_counter++;
new_tokens[i] = token;
alloc_count++;
-
- /* We use the token_id in log messages because the combination
- * of full length space_name+resource_name in each log message
- * would make excessively long lines. */
-
- log_token(token, "cmd_acquire %d,%d,%d %.48s:%.48s:%s:%llu",
- cl_ci, cl_fd, cl_pid,
- token->r.lockspace_name, token->r.name,
- token->disks[0].path,
- (unsigned long long)token->disks[0].offset);
}
rv = recv(fd, &opt, sizeof(struct sanlk_options), MSG_WAITALL);
@@ -916,6 +913,15 @@ static void cmd_acquire(struct task *task, struct cmd_args *ca)
}
token->host_id = space.host_id;
token->host_generation = space.host_generation;
+
+ /* save a record of what this token_id is for later debugging */
+ log_level(space.space_id, token->token_id, NULL, LOG_WARNING,
+ "resource %.48s:%.48s:%.256s:%llu for %d,%d,%d",
+ token->r.lockspace_name,
+ token->r.name,
+ token->r.disks[0].path,
+ (unsigned long long)token->r.disks[0].offset,
+ cl_ci, cl_fd, cl_pid);
}
for (i = 0; i < new_tokens_count; i++) {
@@ -1416,6 +1422,7 @@ static void *thread_pool_worker(void *data)
struct task task;
struct cmd_args *ca;
+ memset(&task, 0, sizeof(struct task));
setup_task_timeouts(&task, main_task.io_timeout_seconds);
setup_task_aio(&task, main_task.use_aio, WORKER_AIO_CB_SIZE);
snprintf(task.name, NAME_ID_SIZE, "worker%ld", (long)data);
@@ -2118,6 +2125,56 @@ static void setup_priority(void)
}
}
+/* return a random int between a and b inclusive */
+
+int get_rand(int a, int b);
+
+int get_rand(int a, int b)
+{
+ int32_t val;
+ int rv;
+
+ pthread_mutex_lock(&rand_mutex);
+ rv = random_r(&rand_data, &val);
+ pthread_mutex_unlock(&rand_mutex);
+ if (rv < 0)
+ return rv;
+
+ return a + (int) (((float)(b - a + 1)) * val / (RAND_MAX+1.0));
+}
+
+static void setup_host_name(void)
+{
+ struct utsname name;
+ char uuid[37];
+ uuid_t uu;
+
+ memset(rand_state, 0, sizeof(rand_state));
+ memset(&rand_data, 0, sizeof(rand_data));
+
+ initstate_r(time(NULL), rand_state, sizeof(rand_state), &rand_data);
+
+ /* use host name from command line */
+
+ if (com.our_host_name[0]) {
+ memcpy(our_host_name_global, com.our_host_name, SANLK_NAME_LEN);
+ return;
+ }
+
+ /* make up something that's likely to be different among hosts */
+
+ memset(&our_host_name_global, 0, sizeof(our_host_name_global));
+ memset(&name, 0, sizeof(name));
+ memset(&uuid, 0, sizeof(uuid));
+
+ uname(&name);
+ uuid_generate(uu);
+ uuid_unparse_lower(uu, uuid);
+
+ snprintf(our_host_name_global, NAME_ID_SIZE, "%s.%s",
+ uuid, name.nodename);
+}
+
static int do_daemon(void)
{
struct sigaction act;
@@ -2157,9 +2214,13 @@ static int do_daemon(void)
setup_logging();
- log_error("sanlock daemon started aio %d %d renew %d %d",
+ setup_host_name();
+
+ log_error("sanlock daemon started aio %d %d renew %d %d host %s time %llu",
main_task.use_aio, main_task.io_timeout_seconds,
- main_task.id_renewal_seconds, main_task.id_renewal_fail_seconds);
+ main_task.id_renewal_seconds, main_task.id_renewal_fail_seconds,
+ our_host_name_global,
+ (unsigned long long)time(NULL));
setup_priority();
@@ -2601,6 +2662,8 @@ static int read_command_line(int argc, char *argv[])
break;
case 'a':
com.aio_arg = atoi(optionarg);
+ if (com.aio_arg && com.aio_arg != 1)
+ com.aio_arg = 1;
break;
case 't':
com.max_worker_threads = atoi(optionarg);
@@ -2895,8 +2958,10 @@ static int do_direct(void)
break;
case ACT_ACQUIRE_ID:
+ setup_host_name();
+
rv = direct_acquire_id(&main_task, &com.lockspace,
- com.our_host_name);
+ our_host_name_global);
log_tool("acquire_id done %d", rv);
break;
diff --git a/src/paxos_lease.c b/src/paxos_lease.c
index 25165f1..0816a64 100644
--- a/src/paxos_lease.c
+++ b/src/paxos_lease.c
@@ -30,6 +30,8 @@
#include "delta_lease.h"
#include "paxos_lease.h"
+int get_rand(int a, int b);
+
struct request_record {
uint64_t lver;
uint8_t force_mode;
diff --git a/src/sanlock_internal.h b/src/sanlock_internal.h
index 833822e..7f6f740 100644
--- a/src/sanlock_internal.h
+++ b/src/sanlock_internal.h
@@ -445,8 +445,9 @@ struct task {
int use_aio;
int cb_size;
+ char *iobuf;
io_context_t aio_ctx;
- struct aicb *read_timeout;
+ struct aicb *read_iobuf_timeout_aicb;
struct aicb *callbacks;
};
diff --git a/src/task.c b/src/task.c
index 5a04248..c0ab44b 100644
--- a/src/task.c
+++ b/src/task.c
@@ -114,14 +114,76 @@ void setup_task_aio(struct task *task, int use_aio, int cb_size)
task->use_aio = 0;
}
-/* TODO: do we need/want to go through all task->callbacks that are still used
- and wait to reap events for them before doing io_destroy? */
-
void close_task_aio(struct task *task)
{
- if (task->use_aio)
- io_destroy(task->aio_ctx);
+ struct timespec ts;
+ struct io_event event;
+ uint64_t last_warn;
+ int rv, i, used, warn;
+
+ if (!task->use_aio)
+ goto skip_aio;
+
+ memset(&ts, 0, sizeof(struct timespec));
+ ts.tv_sec = task->io_timeout_seconds;
+
+ last_warn = time(NULL);
+
+ /* wait for all outstanding aio to complete before
+ destroying aio context, freeing iocb and buffers */
+
+ while (1) {
+ warn = 0;
+
+ if (time(NULL) - last_warn >= task->io_timeout_seconds) {
+ last_warn = time(NULL);
+ warn = 1;
+ }
+
+ used = 0;
+
+ for (i = 0; i < task->cb_size; i++) {
+ if (!task->callbacks[i].used)
+ continue;
+ used++;
+
+ if (!warn)
+ continue;
+ log_taske(task, "close_task_aio %d %p busy",
+ i, &task->callbacks[i]);
+ }
+
+ if (!used)
+ break;
+
+ memset(&event, 0, sizeof(event));
+
+ rv = io_getevents(task->aio_ctx, 1, 1, &event, &ts);
+ if (rv == -EINTR)
+ continue;
+ if (rv < 0)
+ break;
+ if (rv == 1) {
+ struct iocb *ev_iocb = event.obj;
+ struct aicb *ev_aicb = container_of(ev_iocb, struct aicb, iocb);
+
+ if (ev_aicb->buf == task->iobuf)
+ task->iobuf = NULL;
+
+ log_taske(task, "aio collect %p:%p:%p result %ld:%ld close free",
+ ev_aicb, ev_iocb, ev_aicb->buf, event.res, event.res2);
+
+ ev_aicb->used = 0;
+ free(ev_aicb->buf);
+ ev_aicb->buf = NULL;
+ }
+ }
+ io_destroy(task->aio_ctx);
+
+ if (task->iobuf)
+ free(task->iobuf);
+ skip_aio:
if (task->callbacks)
free(task->callbacks);
task->callbacks = NULL;
diff --git a/src/token_manager.c b/src/token_manager.c
index 0c85197..df5d632 100644
--- a/src/token_manager.c
+++ b/src/token_manager.c
@@ -193,6 +193,7 @@ static void *async_release_thread(void *arg GNUC_UNUSED)
struct resource *r;
struct token *token;
+ memset(&task, 0, sizeof(struct task));
setup_task_timeouts(&task, main_task.io_timeout_seconds);
setup_task_aio(&task, main_task.use_aio, RELEASE_AIO_CB_SIZE);
sprintf(task.name, "%s", "release");
12 years, 4 months
src/Makefile wdmd/Makefile
by David Teigland
src/Makefile | 2 +-
wdmd/Makefile | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
New commits:
commit e771dcd6076791ace7b2a65677b17dda5457bce6
Author: David Teigland <teigland(a)redhat.com>
Date: Fri Aug 5 14:49:35 2011 -0500
makefile: install mode for man pages
diff --git a/src/Makefile b/src/Makefile
index d4768b2..0301892 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -103,4 +103,4 @@ install: all
$(INSTALL) -c -m 755 $(SHLIB_TARGET) $(DESTDIR)/$(LIBDIR)
cp -a $(LIB_TARGET).so $(DESTDIR)/$(LIBDIR)
$(INSTALL) -c -m 644 $(HEADER_TARGET) $(DESTDIR)/$(HEADIR)
- $(INSTALL) $(MAN_TARGET) $(DESTDIR)/$(MANDIR)/man8/
+ $(INSTALL) -m 644 $(MAN_TARGET) $(DESTDIR)/$(MANDIR)/man8/
diff --git a/wdmd/Makefile b/wdmd/Makefile
index 99a9678..8adaf1a 100644
--- a/wdmd/Makefile
+++ b/wdmd/Makefile
@@ -86,4 +86,4 @@ install: all
cp -a $(LIB_TARGET).so $(DESTDIR)/$(LIBDIR)
cp -a $(LIB_TARGET).so.$(SOMAJOR) $(DESTDIR)/$(LIBDIR)
$(INSTALL) -c -m 644 $(HEADER_TARGET) $(DESTDIR)/$(HEADIR)
- $(INSTALL) $(MAN_TARGET) $(DESTDIR)/$(MANDIR)/man8
+ $(INSTALL) -m 644 $(MAN_TARGET) $(DESTDIR)/$(MANDIR)/man8
12 years, 4 months
Changes to 'refs/tags/sanlock-1.7'
by David Teigland
Changes since the dawn of time:
Daniel P. Berrange (15):
Fix const-ness of many APIs.
Fix warnings in watchdog module
Fix function prototypes for no-arg methods
Remove use of 'index' as a variable name
Make many functions static
Fix missing include in logging file
Annotate many unused parameters to avoid warnings
Remove redundant redeclaration of 'to' variable
Fix args to execv()
Remove redundant arg to 'run_command'
Rename optarg to optionarg to avoid clashing with getopt.h
Disable the read_request method since it is unused
Add many more compiler warning flags & safety checks
Hard code a sector size of 512 if the lease volume is a regular file
Ensure libsanlock.so is built with debug/warning flags
David Teigland (252):
sync_manager: initial commit
sync_manager: misc updates
sync_manager: misc updates
sync_manager: misc updates
sync_manager: misc updates
sync_manager: add more logging
sync_manager: misc updates
sync_manager: misc updates
sync_manager: num_hosts/MAX_HOSTS stuff
daemon: reworking notions of resource_id/token_name
sync_manager: resource lockfiles
sync_manager: lease arg processing
sync_manager: Began multiple lease support
sync_manager: use first command line arg as action
sync_manager: leader record changes and verify
sync_manager: clean up released leases
sync_manager: move functions around
sync_manager: add more tool actions
sync_manager: naming changes
sync_manager: separate token index and id
sync_manager: fix index usage and other misc
sync_manager: use pthread cond in acquire
sync_manager: write all log entries
sync_manager: simplify polling
sync_manager: fix waitpid use
sync_manager: acquire can fail early
sync_manager: write log entries at exit
sync_manager: add test program
sync_manager: move secondary pid check
sync_manager: fix disk paxos contention
devcount: fix verify checks
sync_manager: add GPL license file
sync_manager: fix leader block offsets
sync_manager: increase COMMAND_MAX
sync_manager: renewal should verify
sync_manager: use sector size from libblkid
sync_manager: use a real checksum function
sync_manager: add libblkid to spec file
sync_manager: print status info
sync_manager: one watchdog file per lease
sync_manager: lease_threads handle watchdog files
sync_manager: fix/add some text/comments
sync_manager: refactor read/write
sync_manager: move disk io functions
sync_manager: remove token arg
sync_manager: rename paxos_disk sync_disk
sync_manager: add aio read/write
sync_manager: make io_timeout_seconds a diskio arg
sync_manager: forgot to add new files
sync_manager: use log thread
sync_manager: client connections
sync_manager: connection processing
sync_manager: send/recv pid
sync_manager: add write_sectors
sync_manager: restructuring
sync_manager: write_sectors code factoring
sync_manager: daemonize
sync_manager: releasing leases
sync_manager: async releasing
sync_manager: release fixes
sync_manager: add direct and indirect acquire/release
sync_manager: reacquire resources
sync_manager: move code
sync_manager: same pid may reacquire resource
sync_manager: lease migration
sync_manager: handle client errors
sync_manager: improve error handling
sync_manager: host_id leases
sync_manager: remove empty files
sync_manager: print initialization info
sync_manager: rename files
sync_manager: clean up header org
sync_manager: delta_lease implementation
sync_manager: accept offset units
sync_manager: fix up init output
sync_manager: put back watchdog calls
sync_manager: fix start_host_id error paths
sync_manager: add log_error's for watchdog file errors
sync_manager: actual timeouts
sync_manager: change timeouts on cmd line
sanlock: create new external api
sanlock: build libsanlock
sanlock: use MAX_LEASES everywhere
sanlock: add libvirt plugin
sanlock plugin: couple minor fixes
sanlock: clean up /var file names
sanlock plugin: fix symbol needed by libvirt
sanlock: add some debug output
sanlock plugin: fix uuid copy
sanlock plugin: fix names
sanlock: add "owner_name"
sanlock: fix renewal checks
sanlock: clean up host_id types
sanlock: set_host_id command
sanlock: fix killing pids
sanlock: add status command
sanlock: set version to 1.0
sanlock: delta_lease cleanup
sanlock: changing num_hosts
sanlock: add dump command
sanlock: renewal timings
sanlock: add direct option
sanlock: check for watchdog file
sanlock: recovery fixes
lock_driver_sanlock: fix compile problems
sanlock: improve command options
sanlock: tidying help text
sanlock: move binary to /usr/sbin
sanlock: add init script
sanlock: fix sigterm shutdown
sanlock: init stop
sanlock: add wdtest command
sanlock.spec: new url
lock_driver_sanlock: remove close
sanlock: introduce lockspaces
lock_driver_sanlock: remove files
sanlock: better logging functions
sanlock: misc log message
sanlock.spec: sbin not libexec
sanlock init: remove watchdog reference
wdmd: watchdog multiplexing daemon
sanlock: add code to use wdmd
sanlock/wdmd: use wdmd in sanlock
sanlock/wdmd: add copyright header to source files
sanlock: rename sanlock source dir
sanlock: move tests dir
move COPYING file
wdmd: use signalfd for signal handling
Fix Makefile comments
wdmd: fix daemon debug option
wdmd: add init script
sanlock.spec: updates
sanlock.spec: src dir
sanlock: build with uninstalled libwdmd
sanlock: version 1.1
sanlock: high priority options
wdmd: high priority options
sanlock: return migration state
sanlock: migration.txt describes libvirt/sanlock steps
libsanlock: include admin functions
sanlock: fix host_id expiration check
sanlock: migration working
devcount: migrate test
sanlock: setowner improvements
sanlock: migrate to target fix
sanlock: fix wdmd stop order
sanlock: various fixes
sanlock: remove wdtest
sanlock: remove migration
sanlock: clean up command return data
sanlock: add resource string conversion functions
sanlock: rework internal structs
devcount: add relock test
sanlock: fix release and inquire
sanlock: add_lockspace EEXIST
sanlock: rework client handling
sanlock: clean up warnings
sanlock: debug message changes
sanlock: add lockspace checks
wdmd: enable test scripts
sanlock: add str_to_lockspace to lib
WIP devcount migrate
devcount: new migrate test
sanlock: read_id and live_id commands
sanlock: check lockspace name and host_id
sanlock: remove remaining cluster_mode
sanlock: add libsanlock_direct
devcountn: start multiple devcount tests
devcount: small changes
sanlock: new return values
sanlock: misc changes and fix
sanlock: log error of full bad block
sanlock: interval between renewal checks
sanlock: renewal changes
sanlock: fix log_dump
sanlock: fix find_client_pid
sanlock: fix host_id reads from paxos_acquire
sanlock: init with one write
devcount: improve output
devcount: new pause/resume
devcount: add expire test
sanlock: correct paxos usage
sanlock: direct read_leader
sanlock: paxos delays
sanlock: use thread pool
sanlock: client status output format changes
sanlock: fix inquire of dead pid
sanlock: use native linux aio
sanlock: i/o changes
sanlock: aio changes
sanlock: reduce paxos acquire read ops
sanlock: quiet error case
sanlock: don't free aio buf until event completes
sanlock: io timeout related changes
sanlock: read dblocks in single aligned io
sanlock: add sanlock_restrict api
sanlock: add sanlock_direct_sector_size api
sanlock: add checksum to dblocks
sanlock: fix init restart
sanlock: don't release tokens in dead lockspace
sanlock: fix adding lockspace
sanlock: official 1MB/8MB alignment
devcount: use aio in init
libsanlock: link with LDFLAGS
sanlock: increase version to 1.3
sanlock/wdmd: shut up warnings
sanlock: fix libwdmd linking
remove spec file
sanlock: use a completed read after renewal timeout
sanlock: use unique host name in delta leases
sanlock: remove sector_size api
sanlock: abort delta wait on shutdown
sanlock: fix add_lockspace failure
sanlk_load: add new test
sanlock: fix recv and inquire
sanlock: initial pid_dead check in acquire
sanlock: release 1.4
sanlock: generate a uuid for host id
sanlock: return -EINPROGRESS from add_lockspace
sanlk_load: periodically kill and replace a pid
sanlock: zero num_hosts uses DEFAULT_MAX_HOSTS
tests: misc changes
sanlock: break paxos_acquire wait loop
sanlock: increase log line to 512 bytes
sanlock: change a log_error to log_debug
sanlock: fail host_id when corrupted
sanlock: release 1.5
sanlock: release 1.6
sanlock: handle colon escaping in path strings
wdmd: add option for high priority
wdmd: use accept4 with SOCK_NONBLOCK
wdmd: tidy sun_addr snprintf
wdmd: pid and sock file changes
wdmd: add man page
wdmd: disable test scripts
sanlock: use accept4 with SOCK_NONBLOCK
sanlock: tidy sun_addr snprintf
sanlock: add explicit -luuid
sanlock: pid and sock file changes
sanlock: add man page
sanlock/wdmd: improve mkdir of run dir
wdmd: new build flags
sanlock: new build flags
sanlock/wdmd: use monotonic time
sanlock: build with pie
sanlock/wdmd: nonblocking listening/accept
sanlock: add missing monotime files
sanlock: update man page
sanlock: man page update
sanlock: update man page and help text
sanlock: print connections limit
release: sanlock 1.7
Fabio M. Di Nitto (5):
build: sanlock should link with libsanlock
build: install shared lib header files and fix DESTDIR usage
build: drop rpm target
spec file: do first cut for total spec file
build: fix linking with libsanlock and install target
Federico Simoncelli (31):
rpm: sync specfile with fedora
rpm: add sanlock_admin.h header
rpm: add the lib package, install the initscripts
python: remove unused python code
python: add python binding
python: release the gil during sanlock operations
python: wrap sanlock extension with a module
rpm: add python binding package
python: pass a lockspace copy to str_to_lockspace
makefile: fix install typo
rpm: add sanlock_direct header
python: add sanlock init functions and exception
direct: close disks after initialization
python: register process only once
daemon: configurable socket permissions
rpm: add sanlock user and group
python: exceptions must contain the errno
rpm: add missing libaio-devel dependency
rpm: add daemon options in the init file
python: add missing aio library
python: add get_alignment function
libs: include libsanlock_direct into libsanlock
python: align num_hosts and max_hosts defaults
python: expose sanlock file descriptor
python: improve error reporting
python: parse lockspaces and resources natively
python: add usage example
python: initial support for sanlock errors
python: document the sanlock module
python: module cleanup
build: fix documentation install path
Saggi Mizrahi (21):
Added the begining of the testing and debugging tools
Better handling of max hosts
sync_manager: Updated tests to work with new lease struct
sync_manager: fixed skipping first arg in command
sync_manager: acquire and release actions
sync_manager: minor fixes
sync_manager: renamed stuff
sync_manager: made acquire synchronous again
sync_manager: added set_host_id action
sync_manager: use kill(0) for secondary pid check
sync_manager: make rpm and install
sync_manager: spec file update
sync_manager: Allow longer resource names
sync_manager: allow repeated set_host_id
sync_manager: Added escaping for the leases arg
sync_manager: Created the python bindings for sync_manager
sync_manager: listener socket permissions
sync_manager: Updated python binding and tests
sync_manager: Made 'token' a const in log_level
sync_manager: refactor messaging system
sync_manager: use getsockopt PEERCRED
12 years, 4 months
Changes to 'refs/tags/sanlock-1.6'
by David Teigland
Changes since the dawn of time:
Daniel P. Berrange (15):
Fix const-ness of many APIs.
Fix warnings in watchdog module
Fix function prototypes for no-arg methods
Remove use of 'index' as a variable name
Make many functions static
Fix missing include in logging file
Annotate many unused parameters to avoid warnings
Remove redundant redeclaration of 'to' variable
Fix args to execv()
Remove redundant arg to 'run_command'
Rename optarg to optionarg to avoid clashing with getopt.h
Disable the read_request method since it is unused
Add many more compiler warning flags & safety checks
Hard code a sector size of 512 if the lease volume is a regular file
Ensure libsanlock.so is built with debug/warning flags
David Teigland (228):
sync_manager: initial commit
sync_manager: misc updates
sync_manager: misc updates
sync_manager: misc updates
sync_manager: misc updates
sync_manager: add more logging
sync_manager: misc updates
sync_manager: misc updates
sync_manager: num_hosts/MAX_HOSTS stuff
daemon: reworking notions of resource_id/token_name
sync_manager: resource lockfiles
sync_manager: lease arg processing
sync_manager: Began multiple lease support
sync_manager: use first command line arg as action
sync_manager: leader record changes and verify
sync_manager: clean up released leases
sync_manager: move functions around
sync_manager: add more tool actions
sync_manager: naming changes
sync_manager: separate token index and id
sync_manager: fix index usage and other misc
sync_manager: use pthread cond in acquire
sync_manager: write all log entries
sync_manager: simplify polling
sync_manager: fix waitpid use
sync_manager: acquire can fail early
sync_manager: write log entries at exit
sync_manager: add test program
sync_manager: move secondary pid check
sync_manager: fix disk paxos contention
devcount: fix verify checks
sync_manager: add GPL license file
sync_manager: fix leader block offsets
sync_manager: increase COMMAND_MAX
sync_manager: renewal should verify
sync_manager: use sector size from libblkid
sync_manager: use a real checksum function
sync_manager: add libblkid to spec file
sync_manager: print status info
sync_manager: one watchdog file per lease
sync_manager: lease_threads handle watchdog files
sync_manager: fix/add some text/comments
sync_manager: refactor read/write
sync_manager: move disk io functions
sync_manager: remove token arg
sync_manager: rename paxos_disk sync_disk
sync_manager: add aio read/write
sync_manager: make io_timeout_seconds a diskio arg
sync_manager: forgot to add new files
sync_manager: use log thread
sync_manager: client connections
sync_manager: connection processing
sync_manager: send/recv pid
sync_manager: add write_sectors
sync_manager: restructuring
sync_manager: write_sectors code factoring
sync_manager: daemonize
sync_manager: releasing leases
sync_manager: async releasing
sync_manager: release fixes
sync_manager: add direct and indirect acquire/release
sync_manager: reacquire resources
sync_manager: move code
sync_manager: same pid may reacquire resource
sync_manager: lease migration
sync_manager: handle client errors
sync_manager: improve error handling
sync_manager: host_id leases
sync_manager: remove empty files
sync_manager: print initialization info
sync_manager: rename files
sync_manager: clean up header org
sync_manager: delta_lease implementation
sync_manager: accept offset units
sync_manager: fix up init output
sync_manager: put back watchdog calls
sync_manager: fix start_host_id error paths
sync_manager: add log_error's for watchdog file errors
sync_manager: actual timeouts
sync_manager: change timeouts on cmd line
sanlock: create new external api
sanlock: build libsanlock
sanlock: use MAX_LEASES everywhere
sanlock: add libvirt plugin
sanlock plugin: couple minor fixes
sanlock: clean up /var file names
sanlock plugin: fix symbol needed by libvirt
sanlock: add some debug output
sanlock plugin: fix uuid copy
sanlock plugin: fix names
sanlock: add "owner_name"
sanlock: fix renewal checks
sanlock: clean up host_id types
sanlock: set_host_id command
sanlock: fix killing pids
sanlock: add status command
sanlock: set version to 1.0
sanlock: delta_lease cleanup
sanlock: changing num_hosts
sanlock: add dump command
sanlock: renewal timings
sanlock: add direct option
sanlock: check for watchdog file
sanlock: recovery fixes
lock_driver_sanlock: fix compile problems
sanlock: improve command options
sanlock: tidying help text
sanlock: move binary to /usr/sbin
sanlock: add init script
sanlock: fix sigterm shutdown
sanlock: init stop
sanlock: add wdtest command
sanlock.spec: new url
lock_driver_sanlock: remove close
sanlock: introduce lockspaces
lock_driver_sanlock: remove files
sanlock: better logging functions
sanlock: misc log message
sanlock.spec: sbin not libexec
sanlock init: remove watchdog reference
wdmd: watchdog multiplexing daemon
sanlock: add code to use wdmd
sanlock/wdmd: use wdmd in sanlock
sanlock/wdmd: add copyright header to source files
sanlock: rename sanlock source dir
sanlock: move tests dir
move COPYING file
wdmd: use signalfd for signal handling
Fix Makefile comments
wdmd: fix daemon debug option
wdmd: add init script
sanlock.spec: updates
sanlock.spec: src dir
sanlock: build with uninstalled libwdmd
sanlock: version 1.1
sanlock: high priority options
wdmd: high priority options
sanlock: return migration state
sanlock: migration.txt describes libvirt/sanlock steps
libsanlock: include admin functions
sanlock: fix host_id expiration check
sanlock: migration working
devcount: migrate test
sanlock: setowner improvements
sanlock: migrate to target fix
sanlock: fix wdmd stop order
sanlock: various fixes
sanlock: remove wdtest
sanlock: remove migration
sanlock: clean up command return data
sanlock: add resource string conversion functions
sanlock: rework internal structs
devcount: add relock test
sanlock: fix release and inquire
sanlock: add_lockspace EEXIST
sanlock: rework client handling
sanlock: clean up warnings
sanlock: debug message changes
sanlock: add lockspace checks
wdmd: enable test scripts
sanlock: add str_to_lockspace to lib
WIP devcount migrate
devcount: new migrate test
sanlock: read_id and live_id commands
sanlock: check lockspace name and host_id
sanlock: remove remaining cluster_mode
sanlock: add libsanlock_direct
devcountn: start multiple devcount tests
devcount: small changes
sanlock: new return values
sanlock: misc changes and fix
sanlock: log error of full bad block
sanlock: interval between renewal checks
sanlock: renewal changes
sanlock: fix log_dump
sanlock: fix find_client_pid
sanlock: fix host_id reads from paxos_acquire
sanlock: init with one write
devcount: improve output
devcount: new pause/resume
devcount: add expire test
sanlock: correct paxos usage
sanlock: direct read_leader
sanlock: paxos delays
sanlock: use thread pool
sanlock: client status output format changes
sanlock: fix inquire of dead pid
sanlock: use native linux aio
sanlock: i/o changes
sanlock: aio changes
sanlock: reduce paxos acquire read ops
sanlock: quiet error case
sanlock: don't free aio buf until event completes
sanlock: io timeout related changes
sanlock: read dblocks in single aligned io
sanlock: add sanlock_restrict api
sanlock: add sanlock_direct_sector_size api
sanlock: add checksum to dblocks
sanlock: fix init restart
sanlock: don't release tokens in dead lockspace
sanlock: fix adding lockspace
sanlock: official 1MB/8MB alignment
devcount: use aio in init
libsanlock: link with LDFLAGS
sanlock: increase version to 1.3
sanlock/wdmd: shut up warnings
sanlock: fix libwdmd linking
remove spec file
sanlock: use a completed read after renewal timeout
sanlock: use unique host name in delta leases
sanlock: remove sector_size api
sanlock: abort delta wait on shutdown
sanlock: fix add_lockspace failure
sanlk_load: add new test
sanlock: fix recv and inquire
sanlock: initial pid_dead check in acquire
sanlock: release 1.4
sanlock: generate a uuid for host id
sanlock: return -EINPROGRESS from add_lockspace
sanlk_load: periodically kill and replace a pid
sanlock: zero num_hosts uses DEFAULT_MAX_HOSTS
tests: misc changes
sanlock: break paxos_acquire wait loop
sanlock: increase log line to 512 bytes
sanlock: change a log_error to log_debug
sanlock: fail host_id when corrupted
sanlock: release 1.5
sanlock: release 1.6
Fabio M. Di Nitto (5):
build: sanlock should link with libsanlock
build: install shared lib header files and fix DESTDIR usage
build: drop rpm target
spec file: do first cut for total spec file
build: fix linking with libsanlock and install target
Federico Simoncelli (25):
rpm: sync specfile with fedora
rpm: add sanlock_admin.h header
rpm: add the lib package, install the initscripts
python: remove unused python code
python: add python binding
python: release the gil during sanlock operations
python: wrap sanlock extension with a module
rpm: add python binding package
python: pass a lockspace copy to str_to_lockspace
makefile: fix install typo
rpm: add sanlock_direct header
python: add sanlock init functions and exception
direct: close disks after initialization
python: register process only once
daemon: configurable socket permissions
rpm: add sanlock user and group
python: exceptions must contain the errno
rpm: add missing libaio-devel dependency
rpm: add daemon options in the init file
python: add missing aio library
python: add get_alignment function
libs: include libsanlock_direct into libsanlock
python: align num_hosts and max_hosts defaults
python: expose sanlock file descriptor
python: improve error reporting
Saggi Mizrahi (21):
Added the begining of the testing and debugging tools
Better handling of max hosts
sync_manager: Updated tests to work with new lease struct
sync_manager: fixed skipping first arg in command
sync_manager: acquire and release actions
sync_manager: minor fixes
sync_manager: renamed stuff
sync_manager: made acquire synchronous again
sync_manager: added set_host_id action
sync_manager: use kill(0) for secondary pid check
sync_manager: make rpm and install
sync_manager: spec file update
sync_manager: Allow longer resource names
sync_manager: allow repeated set_host_id
sync_manager: Added escaping for the leases arg
sync_manager: Created the python bindings for sync_manager
sync_manager: listener socket permissions
sync_manager: Updated python binding and tests
sync_manager: Made 'token' a const in log_level
sync_manager: refactor messaging system
sync_manager: use getsockopt PEERCRED
12 years, 4 months