cluster: RHEL54 - cman/groupd/dlm_controld/gfs_controld: work around ipc deadlock
by David Teigland
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 1673920d5adf8019ddbe880b507b398b1c254c6e
Parent: 6adfa2e69f30d1773035ce495b1a1e0aae618196
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Mon Feb 8 15:26:03 2010 -0600
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Fri Feb 26 12:10:10 2010 -0600
cman/groupd/dlm_controld/gfs_controld: work around ipc deadlock
bz 561892
When there are many gfs fs's (approx above 120) in a two
node cluster, and one of the nodes fails, groupd on the
remaining node can deadlock with dlm_controld and gfs_controld.
The problem is caused by so much communication (lots of fs's)
being sent between groupd and the other daemons so quickly
(no other nodes to synchronize with), that the unix socket
buffers fill up, causing both daemons to be blocked writing
stop/start/stop_done/start_done messages to the other.
Since the daemons are single threaded, being blocked on write
means that neither will read to unblock the other.
To determine if you're having this problem, you can strace
groupd, dlm_controld and gfs_controld, and notice that they
are blocked writing strings starting with "stop" or "start".
group_tool will hang since groupd is blocked.
The solution has three main parts:
1. dlm_controld queues its stop_done and start_done messages
and waits to send them to groupd until groupd is finished
sending all the stop/start messages.
2. gfs_controld does the same only for stop_done messages
(start_done messages are already naturally delayed here)
3. groupd skips sending finish messages to dlm_controld,
since dlm_controld does not use them for anything
Each of these changes in behavior are disabled by default and
need to be configured explicitly:
1. <dlm delay_done="2"/> in cluster.conf
2. <gfs_controld delay_done="2"/> in cluster.conf
3. SKIP_DLM_FINISH=1 in /etc/sysconfig/cman
(adds -f0 option to groupd which doesn't read
options from cluster.conf)
The delay_done values are in seconds. If the same problem
persists with these settings, values of 4 or 8 might help.
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
cman/init.d/cman | 20 +++++-
group/daemon/main.c | 14 ++++-
group/dlm_controld/action.c | 38 ++++++++++
group/dlm_controld/deadlock.c | 1 -
group/dlm_controld/dlm_daemon.h | 12 +++
group/dlm_controld/group.c | 138 +++++++++++++++++++++++++++++++++++---
group/dlm_controld/main.c | 75 +++++++++++++++++++-
group/dlm_controld/member_cman.c | 1 -
group/gfs_controld/group.c | 55 +++++++++++++++
group/gfs_controld/lock_dlm.h | 10 +++
group/gfs_controld/main.c | 60 ++++++++++++++++-
group/gfs_controld/recover.c | 6 ++-
12 files changed, 408 insertions(+), 22 deletions(-)
diff --git a/cman/init.d/cman b/cman/init.d/cman
index b900f13..8194f4b 100755
--- a/cman/init.d/cman
+++ b/cman/init.d/cman
@@ -31,6 +31,11 @@
# The default is 60 seconds
[ -z "$CMAN_SHUTDOWN_TIMEOUT" ] && CMAN_SHUTDOWN_TIMEOUT=60
+# SKIP_DLM_FINISH -- setting to 1 will cause groupd to be started with
+# -f0, which causes groupd to not send finish callbacks to dlm_controld
+# as part of a workaround for bz 561892.
+[ -z "$SKIP_DLM_FINISH" ] && SKIP_DLM_FINISH=0
+
# FENCED_START_TIMEOUT -- amount of time to wait for starting fenced
# before giving up. If FENCED_START_TIMEOUT is positive, then we will
# wait FENCED_START_TIMEOUT seconds before giving up and failing when
@@ -61,6 +66,7 @@
[ -n "$NODENAME" ] && cman_join_opts+=" -n $NODENAME"
+
load_modules()
{
errmsg=$( /sbin/modprobe configfs 2>&1 ) || return 1
@@ -145,21 +151,31 @@ start_qdiskd()
start_daemons()
{
status groupd &> /dev/null
- if [ $? -ne 0 ]; then
- errmsg=$( /sbin/groupd 2>&1 ) || return 1
+ if [ $? -ne 0 ]
+ then
+ if [ $SKIP_DLM_FINISH -eq 1 ]
+ then
+ errmsg=$( /sbin/groupd -f0 2>&1 ) || return 1
+ else
+ errmsg=$( /sbin/groupd 2>&1 ) || return 1
+ fi
fi
+
status fenced &> /dev/null
if [ $? -ne 0 ]; then
errmsg=$( /sbin/fenced 2>&1 ) || return 1
fi
+
status dlm_controld &> /dev/null
if [ $? -ne 0 ]; then
errmsg=$( /sbin/dlm_controld 2>&1 ) || return 1
fi
+
status gfs_controld &> /dev/null
if [ $? -ne 0 ]; then
errmsg=$( /sbin/gfs_controld 2>&1 ) || return 1
fi
+
return 0
}
diff --git a/group/daemon/main.c b/group/daemon/main.c
index f5dcc88..7a4fca4 100644
--- a/group/daemon/main.c
+++ b/group/daemon/main.c
@@ -15,7 +15,7 @@
#include "gd_internal.h"
-#define OPTION_STRING "Dhs:Vv"
+#define OPTION_STRING "Dhs:f:Vv"
#define LOCKFILE_NAME "/var/run/groupd.pid"
#define LOG_FILE "/var/log/groupd.log"
@@ -27,6 +27,7 @@ uint32_t gd_event_nr;
char *our_name;
int our_nodeid;
int cman_quorate;
+int dlm_finish = 1;
static int client_maxi;
static int client_size = 0;
@@ -314,6 +315,12 @@ void app_start(app_t *a)
void app_finish(app_t *a)
{
char buf[GROUPD_MSGLEN];
+
+ if (!strncmp(client[a->client].type, "dlm", 3) && !dlm_finish) {
+ log_group(a->g, "skip finish");
+ return;
+ }
+
snprintf(buf, sizeof(buf), "finish %s %d",
a->g->name, a->current_event->event_nr);
app_action(a, buf);
@@ -919,6 +926,7 @@ static void print_usage(void)
printf(" -D Enable debugging code and don't fork\n");
printf(" -h Print this help, then exit\n");
printf(" -s [0|1] Enable (or disable) shutdown mode\n");
+ printf(" -f [0|1] Send finish callbacks to dlm_controld, default 1\n");
printf(" -V Print program version information, then exit\n");
}
@@ -945,6 +953,10 @@ static void decode_arguments(int argc, char **argv)
groupd_shutdown_opt = atoi(optarg);
break;
+ case 'f':
+ dlm_finish = atoi(optarg);
+ break;
+
case 'v':
groupd_debug_verbose++;
break;
diff --git a/group/dlm_controld/action.c b/group/dlm_controld/action.c
index 34e84fe..a7ea3a7 100644
--- a/group/dlm_controld/action.c
+++ b/group/dlm_controld/action.c
@@ -968,6 +968,43 @@ static void set_debug(int cd)
set_configfs_debug(rv);
}
+#define DELAY_DONE_PATH "/cluster/dlm/@delay_done"
+
+static int get_ccs_delay_done(int cd)
+{
+ char path[PATH_MAX], *str;
+ int error, rv;
+
+ memset(path, 0, PATH_MAX);
+ sprintf(path, DELAY_DONE_PATH);
+
+ error = ccs_get(cd, path, &str);
+ if (error || !str)
+ return -1;
+
+ rv = atoi(str);
+
+ if (rv < 0) {
+ log_error("invalid delay_done from ccs");
+ rv = -1;
+ }
+
+ free(str);
+ log_error("ccs dlm/delay_done %d", rv);
+ return rv;
+}
+
+static void set_delay_done(int cd)
+{
+ int rv;
+
+ rv = get_ccs_delay_done(cd);
+ if (rv < 0)
+ return;
+
+ delay_done_cb = rv;
+}
+
void set_ccs_options(void)
{
int cd;
@@ -979,6 +1016,7 @@ void set_ccs_options(void)
set_protocol(cd);
set_timewarn(cd);
set_debug(cd);
+ set_delay_done(cd);
ccs_disconnect(cd);
}
diff --git a/group/dlm_controld/deadlock.c b/group/dlm_controld/deadlock.c
index f21beda..0b1538a 100644
--- a/group/dlm_controld/deadlock.c
+++ b/group/dlm_controld/deadlock.c
@@ -15,7 +15,6 @@
int deadlock_enabled = 0;
-extern struct list_head lockspaces;
extern int our_nodeid;
static SaCkptHandleT global_ckpt_h;
diff --git a/group/dlm_controld/dlm_daemon.h b/group/dlm_controld/dlm_daemon.h
index c164a81..0037d32 100644
--- a/group/dlm_controld/dlm_daemon.h
+++ b/group/dlm_controld/dlm_daemon.h
@@ -63,6 +63,16 @@ extern int daemon_debug_opt;
extern int kernel_debug_opt;
extern char daemon_debug_buf[256];
+extern int delay_done_cb;
+extern struct list_head stop_done_list;
+extern int stop_done_entries;
+extern struct list_head start_done_list;
+extern int start_done_entries;
+extern struct list_head lockspaces;
+extern int lockspace_count;
+extern uint64_t last_stop_time;
+extern uint64_t last_start_time;
+
#define log_debug(fmt, args...) \
do { \
snprintf(daemon_debug_buf, 255, "%ld " fmt "\n", time(NULL), ##args); \
@@ -123,6 +133,8 @@ char *nodeid2name(int nodeid);
/* group.c */
int setup_groupd(void);
void process_groupd(int ci);
+void push_stop_done(void);
+void push_start_done(void);
/* main.c */
int client_add(int fd, void (*workfn)(int ci), void (*deadfn)(int ci));
diff --git a/group/dlm_controld/group.c b/group/dlm_controld/group.c
index 700edac..3024bef 100644
--- a/group/dlm_controld/group.c
+++ b/group/dlm_controld/group.c
@@ -24,6 +24,7 @@
do the processing within the callback function itself */
group_handle_t gh;
+
static int cb_action;
static char cb_name[MAX_GROUP_NAME_LEN+1];
static int cb_event_nr;
@@ -31,6 +32,7 @@ static unsigned int cb_id;
static int cb_type;
static int cb_member_count;
static int cb_members[MAX_GROUP_MEMBERS];
+static int last_action;
static void stop_cbfn(group_handle_t h, void *private, char *name)
{
@@ -116,10 +118,108 @@ static unsigned int replace_zero_global_id(char *name)
return new_id;
}
+struct save_done {
+ struct list_head list;
+ int event_nr;
+ char name[MAX_GROUP_NAME_LEN+1];
+};
+
+void push_stop_done(void)
+{
+ struct save_done *sd, *safe;
+ int count;
+
+ if (stop_done_entries > 1)
+ log_error("push_stop_done begin %d", stop_done_entries);
+
+ count = 0;
+
+ list_for_each_entry_safe(sd, safe, &stop_done_list, list) {
+ group_stop_done(gh, sd->name);
+ list_del(&sd->list);
+ free(sd);
+ stop_done_entries--;
+ count++;
+ }
+
+ if (count > 1)
+ log_error("push_stop_done end %d", count);
+}
+
+/* only queue if the last action is also stop? */
+
+static int queue_stop_done(char *name)
+{
+ struct save_done *sd;
+
+ if (!delay_done_cb)
+ return -1;
+
+ if (last_action != DO_STOP)
+ return -1;
+
+ sd = malloc(sizeof(struct save_done));
+ if (!sd)
+ return -1;
+
+ memset(sd, 0, sizeof(struct save_done));
+ strcpy(sd->name, name);
+ list_add_tail(&sd->list, &stop_done_list);
+ stop_done_entries++;
+
+ return 0;
+}
+
+void push_start_done(void)
+{
+ struct save_done *sd, *safe;
+ int count;
+
+ if (start_done_entries > 1)
+ log_error("push_start_done begin %d", start_done_entries);
+
+ count = 0;
+
+ list_for_each_entry_safe(sd, safe, &start_done_list, list) {
+ group_start_done(gh, sd->name, sd->event_nr);
+ list_del(&sd->list);
+ free(sd);
+ start_done_entries--;
+ count++;
+ }
+
+ if (count > 1)
+ log_error("push_start_done end %d", count);
+}
+
+static int queue_start_done(char *name, int event_nr)
+{
+ struct save_done *sd;
+
+ if (!delay_done_cb)
+ return -1;
+
+ if (last_action != DO_START)
+ return -1;
+
+ sd = malloc(sizeof(struct save_done));
+ if (!sd)
+ return -1;
+
+ memset(sd, 0, sizeof(struct save_done));
+ strcpy(sd->name, name);
+ sd->event_nr = event_nr;
+ list_add_tail(&sd->list, &start_done_list);
+ start_done_entries++;
+
+ return 0;
+}
+
void process_groupd(int ci)
{
struct lockspace *ls;
int error = 0, val;
+ int rv;
group_dispatch(gh);
@@ -137,7 +237,10 @@ void process_groupd(int ci)
case DO_STOP:
log_debug("groupd callback: stop %s", cb_name);
set_control(cb_name, 0);
- group_stop_done(gh, cb_name);
+ rv = queue_stop_done(cb_name);
+ if (rv < 0)
+ group_stop_done(gh, cb_name);
+ last_stop_time = time(NULL);
break;
case DO_START:
@@ -154,20 +257,27 @@ void process_groupd(int ci)
/* the dlm doesn't need/use a "finish" stage following
start, so we can just do start_done immediately */
- group_start_done(gh, cb_name, cb_event_nr);
- if (!ls->joining)
+ if (!ls->joining) {
+ rv = queue_start_done(cb_name, cb_event_nr);
+ if (rv < 0)
+ group_start_done(gh, cb_name, cb_event_nr);
+ last_start_time = time(NULL);
break;
+ } else {
+ group_start_done(gh, cb_name, cb_event_nr);
- ls->joining = 0;
- log_debug("join event done %s", cb_name);
+ ls->joining = 0;
+ log_debug("join event done %s", cb_name);
- /* this causes the dlm_new_lockspace() call (typically from
- mount) to complete */
- set_event_done(cb_name, 0);
+ /* this causes the dlm_new_lockspace() call (typically from
+ mount) to complete */
+ set_event_done(cb_name, 0);
- join_deadlock_cpg(ls);
- break;
+ join_deadlock_cpg(ls);
+ last_start_time = time(NULL);
+ break;
+ }
case DO_SETID:
log_debug("groupd callback: set_id %s %x", cb_name, cb_id);
@@ -195,6 +305,7 @@ void process_groupd(int ci)
set_event_done(cb_name, val);
leave_deadlock_cpg(ls);
list_del(&ls->list);
+ lockspace_count--;
free(ls);
break;
@@ -206,6 +317,8 @@ void process_groupd(int ci)
error = -EINVAL;
}
+ last_action = cb_action;
+
cb_action = 0;
out:
return;
@@ -215,6 +328,11 @@ int setup_groupd(void)
{
int rv;
+ INIT_LIST_HEAD(&stop_done_list);
+ INIT_LIST_HEAD(&start_done_list);
+ stop_done_entries = 0;
+ start_done_entries = 0;
+
gh = group_init(NULL, "dlm", 1, &callbacks, GROUPD_TIMEOUT);
if (!gh) {
log_error("group_init error %p %d", gh, errno);
diff --git a/group/dlm_controld/main.c b/group/dlm_controld/main.c
index 1588605..d47ea63 100644
--- a/group/dlm_controld/main.c
+++ b/group/dlm_controld/main.c
@@ -17,15 +17,13 @@
#include <linux/dlm.h>
#include <linux/dlm_netlink.h>
-#define OPTION_STRING "KDhVd:"
+#define OPTION_STRING "KDhVd:y:"
#define LOCKFILE_NAME "/var/run/dlm_controld.pid"
#define DEADLOCK_CHECK_SECS 10
#define NALLOC 16
-struct list_head lockspaces;
-
extern group_handle_t gh;
extern int deadlock_enabled;
@@ -265,6 +263,7 @@ static void process_uevent(int ci)
ls->joining = 1;
list_add(&ls->list, &lockspaces);
+ lockspace_count++;
rv = group_join(gh, argv[3]);
@@ -611,8 +610,11 @@ void cluster_dead(int ci)
static int loop(void)
{
int rv, i;
+ int poll_timeout = -1;
void (*workfn) (int ci);
void (*deadfn) (int ci);
+ uint64_t push_begin = 0;
+ uint64_t now;
rv = setup_listener();
if (rv < 0)
@@ -646,7 +648,7 @@ static int loop(void)
for_loop:
for (;;) {
- rv = poll(pollfd, client_maxi + 1, -1);
+ rv = poll(pollfd, client_maxi + 1, poll_timeout);
if (rv == -1 && errno == EINTR) {
if (daemon_quit && list_empty(&lockspaces)) {
clear_configfs();
@@ -672,6 +674,56 @@ static int loop(void)
deadfn(i);
}
}
+
+ if (delay_done_cb && !list_empty(&stop_done_list)) {
+ if (!push_begin) {
+ push_begin = time(NULL);
+ poll_timeout = 1000;
+ }
+ now = time(NULL);
+
+ if ((stop_done_entries == lockspace_count) ||
+ (now - push_begin >= delay_done_cb)) {
+ if (stop_done_entries > 1) {
+ log_error("stop_done entries %d "
+ "count %d begin %llu "
+ "now %llu last stop %llu",
+ stop_done_entries,
+ lockspace_count,
+ (unsigned long long)push_begin,
+ (unsigned long long)now,
+ (unsigned long long)last_stop_time);
+ }
+ push_stop_done();
+ push_begin = 0;
+ poll_timeout = -1;
+ }
+ }
+
+ if (delay_done_cb && !list_empty(&start_done_list)) {
+ if (!push_begin) {
+ push_begin = time(NULL);
+ poll_timeout = 1000;
+ }
+ now = time(NULL);
+
+ if ((start_done_entries == lockspace_count) ||
+ (now - push_begin >= delay_done_cb)) {
+ if (start_done_entries > 1) {
+ log_error("start_done entries %d "
+ "count %d begin %llu "
+ "now %llu last start %llu",
+ start_done_entries,
+ lockspace_count,
+ (unsigned long long)push_begin,
+ (unsigned long long)now,
+ (unsigned long long)last_start_time);
+ }
+ push_start_done();
+ push_begin = 0;
+ poll_timeout = -1;
+ }
+ }
}
rv = 0;
out:
@@ -754,6 +806,7 @@ static void print_usage(void)
#endif
printf(" -D Enable debugging code and don't fork\n");
printf(" -K Enable kernel dlm debugging messages\n");
+ printf(" -y <sec> Delay done callbacks to groupd by this many seconds, default 0\n");
printf(" -h Print this help, then exit\n");
printf(" -V Print program version information, then exit\n");
}
@@ -780,6 +833,10 @@ static void decode_arguments(int argc, char **argv)
print_usage();
exit(EXIT_SUCCESS);
break;
+
+ case 'y':
+ delay_done_cb = atoi(optarg);
+ break;
#if DEADLOCK
case 'd':
deadlock_enabled = atoi(optarg);
@@ -844,6 +901,7 @@ int main(int argc, char **argv)
prog_name = argv[0];
INIT_LIST_HEAD(&lockspaces);
+ lockspace_count = 0;
decode_arguments(argc, argv);
@@ -871,4 +929,13 @@ char *prog_name;
int daemon_debug_opt;
char daemon_debug_buf[256];
int kernel_debug_opt;
+int delay_done_cb = 0;
+struct list_head stop_done_list;
+int stop_done_entries;
+struct list_head start_done_list;
+int start_done_entries;
+struct list_head lockspaces;
+int lockspace_count;
+uint64_t last_stop_time;
+uint64_t last_start_time;
diff --git a/group/dlm_controld/member_cman.c b/group/dlm_controld/member_cman.c
index 1ce180c..b37e4ec 100644
--- a/group/dlm_controld/member_cman.c
+++ b/group/dlm_controld/member_cman.c
@@ -19,7 +19,6 @@ static cman_node_t old_nodes[MAX_NODES];
static int old_node_count;
static cman_node_t cman_nodes[MAX_NODES];
static int cman_node_count;
-extern struct list_head lockspaces;
static int is_member(cman_node_t *node_list, int count, int nodeid)
{
diff --git a/group/gfs_controld/group.c b/group/gfs_controld/group.c
index 3717579..d2fff7c 100644
--- a/group/gfs_controld/group.c
+++ b/group/gfs_controld/group.c
@@ -23,6 +23,7 @@ static unsigned int cb_id;
static int cb_type;
static int cb_member_count;
static int cb_members[MAX_GROUP_MEMBERS];
+static int last_action;
int do_stop(struct mountgroup *mg);
int do_finish(struct mountgroup *mg);
@@ -109,6 +110,55 @@ static unsigned int replace_zero_global_id(char *name)
return new_id;
}
+struct save_done {
+ struct list_head list;
+ char name[MAX_GROUP_NAME_LEN+1];
+};
+
+void push_stop_done(void)
+{
+ struct save_done *sd, *safe;
+ int count;
+
+ if (stop_done_entries > 1)
+ log_error("push_stop_done begin %d", stop_done_entries);
+
+ count = 0;
+
+ list_for_each_entry_safe(sd, safe, &stop_done_list, list) {
+ group_stop_done(gh, sd->name);
+ list_del(&sd->list);
+ free(sd);
+ stop_done_entries--;
+ count++;
+ }
+
+ if (count > 1)
+ log_error("push_stop_done end %d", count);
+}
+
+int queue_stop_done(char *name)
+{
+ struct save_done *sd;
+
+ if (!delay_done_cb)
+ return -1;
+
+ if (last_action != DO_STOP)
+ return -1;
+
+ sd = malloc(sizeof(struct save_done));
+ if (!sd)
+ return -1;
+
+ memset(sd, 0, sizeof(struct save_done));
+ strcpy(sd->name, name);
+ list_add_tail(&sd->list, &stop_done_list);
+ stop_done_entries++;
+
+ return 0;
+}
+
int process_groupd(void)
{
struct mountgroup *mg;
@@ -132,6 +182,7 @@ int process_groupd(void)
switch (cb_action) {
case DO_STOP:
+ last_stop_time = time(NULL);
log_debug("groupd cb: stop %s", cb_name);
mg->last_callback = DO_STOP;
mg->last_stop = mg->last_start;
@@ -170,6 +221,7 @@ int process_groupd(void)
error = -EINVAL;
}
+ last_action = cb_action;
out:
cb_action = 0;
return error;
@@ -179,6 +231,9 @@ int setup_groupd(void)
{
int rv;
+ INIT_LIST_HEAD(&stop_done_list);
+ stop_done_entries = 0;
+
gh = group_init(NULL, LOCK_DLM_GROUP_NAME, LOCK_DLM_GROUP_LEVEL,
&callbacks, 10);
if (!gh) {
diff --git a/group/gfs_controld/lock_dlm.h b/group/gfs_controld/lock_dlm.h
index 746d0c7..cc12e1c 100644
--- a/group/gfs_controld/lock_dlm.h
+++ b/group/gfs_controld/lock_dlm.h
@@ -74,6 +74,13 @@ extern char dump_buf[DUMP_SIZE];
extern int dump_point;
extern int dump_wrap;
+extern int delay_done_cb;
+extern struct list_head stop_done_list;
+extern int stop_done_entries;
+extern struct list_head mounts;
+extern int mountgroup_count;
+extern uint64_t last_stop_time;
+
extern void daemon_dump_save(void);
#define log_debug(fmt, args...) \
@@ -283,6 +290,9 @@ int setup_plocks(void);
int process_plocks(void);
void exit_cman(void);
+void push_stop_done(void);
+int queue_stop_done(char *name);
+
int do_mount(int ci, char *dir, char *type, char *proto, char *table,
char *options, char *dev, struct mountgroup **mg_ret);
int do_unmount(int ci, char *dir, int mnterr);
diff --git a/group/gfs_controld/main.c b/group/gfs_controld/main.c
index 7293938..5a70f60 100644
--- a/group/gfs_controld/main.c
+++ b/group/gfs_controld/main.c
@@ -41,7 +41,6 @@ struct client {
int another_mount;
};
-extern struct list_head mounts;
extern struct list_head withdrawn_mounts;
extern group_handle_t gh;
@@ -545,6 +544,8 @@ int setup_uevent(void)
int loop(void)
{
int rv, i, f, error, poll_timeout = -1, ignore_plocks_fd = 0;
+ uint64_t push_begin = 0;
+ uint64_t now;
rv = listen_fd = setup_listen();
if (rv < 0)
@@ -655,6 +656,31 @@ int loop(void)
}
}
}
+
+ if (delay_done_cb && !list_empty(&stop_done_list)) {
+ if (!push_begin) {
+ push_begin = time(NULL);
+ poll_timeout = 1000;
+ }
+ now = time(NULL);
+
+ if ((stop_done_entries == mountgroup_count) ||
+ (now - push_begin >= delay_done_cb)) {
+ if (stop_done_entries > 1) {
+ log_error("stop_done entries %d "
+ "count %d begin %llu "
+ "now %llu last stop %llu",
+ stop_done_entries,
+ mountgroup_count,
+ (unsigned long long)push_begin,
+ (unsigned long long)now,
+ (unsigned long long)last_stop_time);
+ }
+ push_stop_done();
+ push_begin = 0;
+ poll_timeout = -1;
+ }
+ }
}
rv = 0;
out:
@@ -666,11 +692,12 @@ int loop(void)
#define DROP_RESOURCES_TIME_PATH "/cluster/gfs_controld/@drop_resources_time"
#define DROP_RESOURCES_COUNT_PATH "/cluster/gfs_controld/@drop_resources_count"
#define DROP_RESOURCES_AGE_PATH "/cluster/gfs_controld/@drop_resources_age"
+#define DELAY_DONE_PATH "/cluster/gfs_controld/@delay_done"
static void set_ccs_config(void)
{
char path[PATH_MAX], *str;
- int i = 0, cd, error;
+ int i = 0, cd, error, rv;
while ((cd = ccs_connect()) < 0) {
sleep(1);
@@ -738,6 +765,23 @@ static void set_ccs_config(void)
}
if (str)
free(str);
+
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s", DELAY_DONE_PATH);
+ str = NULL;
+
+ error = ccs_get(cd, path, &str);
+ if (!error) {
+ rv = atoi(str);
+ if (rv < 0) {
+ log_error("invalid delay_done from ccs");
+ } else {
+ delay_done_cb = rv;
+ log_error("ccs gfs_controld/delay_done %d", rv);
+ }
+ }
+ if (str)
+ free(str);
}
static void lockfile(void)
@@ -824,6 +868,7 @@ static void print_usage(void)
printf(" Default is %u\n", DEFAULT_DROP_RESOURCES_COUNT);
printf(" -a <ms> drop resources age (milliseconds)\n");
printf(" Default is %u\n", DEFAULT_DROP_RESOURCES_AGE);
+ printf(" -y <sec> Delay done callbacks to groupd by this many seconds, default 0\n");
printf(" -h Print this help, then exit\n");
printf(" -V Print program version information, then exit\n");
}
@@ -881,6 +926,10 @@ static void decode_arguments(int argc, char **argv)
opt_drop_resources_age = 1;
break;
+ case 'y':
+ delay_done_cb = atoi(optarg);
+ break;
+
case 'h':
print_usage();
exit(EXIT_SUCCESS);
@@ -946,6 +995,7 @@ int main(int argc, char **argv)
INIT_LIST_HEAD(&mounts);
INIT_LIST_HEAD(&withdrawn_mounts);
+ mountgroup_count = 0;
config_no_withdraw = DEFAULT_NO_WITHDRAW;
config_no_plock = DEFAULT_NO_PLOCK;
@@ -1007,4 +1057,10 @@ char daemon_debug_buf[256];
char dump_buf[DUMP_SIZE];
int dump_point;
int dump_wrap;
+int delay_done_cb = 0;
+struct list_head stop_done_list;
+int stop_done_entries;
+struct list_head mounts;
+int mountgroup_count;
+uint64_t last_stop_time;
diff --git a/group/gfs_controld/recover.c b/group/gfs_controld/recover.c
index 52d96ff..3eec64f 100644
--- a/group/gfs_controld/recover.c
+++ b/group/gfs_controld/recover.c
@@ -1676,6 +1676,7 @@ int do_mount(int ci, char *dir, char *type, char *proto, char *table,
}
list_add(&mg->list, &mounts);
+ mountgroup_count++;
group_join(gh, name);
rv = 0;
out:
@@ -1996,6 +1997,7 @@ int do_unmount(int ci, char *dir, int mnterr)
free(mp);
if (list_empty(&mg->mountpoints)) {
list_del(&mg->list);
+ mountgroup_count--;
free(mg);
}
return 0;
@@ -2342,7 +2344,9 @@ int do_stop(struct mountgroup *mg)
}
}
out:
- group_stop_done(gh, mg->name);
+ rv = queue_stop_done(mg->name);
+ if (rv < 0)
+ group_stop_done(gh, mg->name);
return 0;
}
14 years, 1 month
cluster: STABLE3 - ccs: Remove non-existant commands from ccs_tool man page.
by Christine Caulfield
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 3e03a4a55527718f200f57bec6766e5f3d2ac8a5
Parent: bed786ea60f0d63d97b06f8c830600e069900528
Author: Christine Caulfield <ccaulfie(a)redhat.com>
AuthorDate: Mon Mar 29 15:11:55 2010 +0100
Committer: Christine Caulfield <ccaulfie(a)redhat.com>
CommitterDate: Mon Mar 29 15:11:55 2010 +0100
ccs: Remove non-existant commands from ccs_tool man page.
The ccs_tool 'upgrade' and 'update' commands no longer exist, but they
were still mentioned in the man page.
Signed-off-by: Christine Caulfield <ccaulfie(a)redhat.com>
---
config/tools/man/ccs_tool.8 | 15 +--------------
1 files changed, 1 insertions(+), 14 deletions(-)
diff --git a/config/tools/man/ccs_tool.8 b/config/tools/man/ccs_tool.8
index ef13406..dd409e4 100644
--- a/config/tools/man/ccs_tool.8
+++ b/config/tools/man/ccs_tool.8
@@ -23,19 +23,6 @@ Print the version information.
sub\-commands have their own options, see below for more detail
.SH "COMMANDS"
-.TP
-\fBupdate\fP \fI<xml file>\fP
-This command is used to update the config file that ccsd is working with
-while the cman cluster is operational (i.e. online). Run this on a single
-machine to update cluster.conf on all current cluster members. This also
-notifies cman of the new config version.
-
-.TP
-\fBupgrade\fP \fI<location>\fP
-This command is used to upgrade an old CCS format archive to the new
-xml format. \fI<location>\fP is the location of the old archive,
-which can be either a block device archive or a file archive. The
-converted configuration will be printed to stdout.
.TP
\fBaddnode\fP [options] \fI<node> [<fenceoption=value>]...\fP
@@ -182,4 +169,4 @@ have them.
.SH "SEE ALSO"
-ccs(7), ccsd(8), cluster.conf(5)
+cluster.conf(5)
14 years, 1 month
cluster: STABLE3 - rgmanager: Fix 2+ simultaneous relocation crash
by Lon Hohberger
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: bed786ea60f0d63d97b06f8c830600e069900528
Parent: 9f3163869167555201a408d2e1e1a2890b04d9b3
Author: Lon Hohberger <lhh(a)redhat.com>
AuthorDate: Fri Mar 26 16:57:45 2010 -0400
Committer: Lon Hohberger <lhh(a)redhat.com>
CommitterDate: Mon Mar 29 09:46:38 2010 -0400
rgmanager: Fix 2+ simultaneous relocation crash
If you tried to relocate a service twice at the same time,
rgmanager would crash because it would close the
connection twice.
This was reproduced on 3.0.9
Resolves: bz577856
Signed-off-by: Lon Hohberger <lhh(a)redhat.com>
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
---
rgmanager/src/daemons/rg_thread.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/rgmanager/src/daemons/rg_thread.c b/rgmanager/src/daemons/rg_thread.c
index 5d22968..517f8fe 100644
--- a/rgmanager/src/daemons/rg_thread.c
+++ b/rgmanager/src/daemons/rg_thread.c
@@ -717,7 +717,7 @@ rt_enqueue_request(const char *resgroupname, int request,
/* EWOULDBLOCK */
pthread_mutex_unlock(resgroup->rt_queue_mutex);
pthread_mutex_unlock(&reslist_mutex);
- return -1;
+ return 0;
}
ret = rq_queue_request(resgroup->rt_queue, resgroup->rt_name,
14 years, 1 month
cluster: STABLE3 - dlm: bump libdlmcontrol sominor
by Fabio M. Di Nitto
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 9f3163869167555201a408d2e1e1a2890b04d9b3
Parent: bc01b7a1a057aaed39fde3daa7f484de276cedcb
Author: Fabio M. Di Nitto <fdinitto(a)redhat.com>
AuthorDate: Fri Mar 26 18:56:20 2010 +0100
Committer: Fabio M. Di Nitto <fdinitto(a)redhat.com>
CommitterDate: Fri Mar 26 18:56:20 2010 +0100
dlm: bump libdlmcontrol sominor
new symbol has been added
Signed-off-by: Fabio M. Di Nitto <fdinitto(a)redhat.com>
---
dlm/libdlmcontrol/Makefile | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/dlm/libdlmcontrol/Makefile b/dlm/libdlmcontrol/Makefile
index 28faedd..e2c320c 100644
--- a/dlm/libdlmcontrol/Makefile
+++ b/dlm/libdlmcontrol/Makefile
@@ -2,6 +2,8 @@ TARGET= libdlmcontrol
OBJS= main.o
+SOMINOR=1
+
include ../../make/defines.mk
include $(OBJDIR)/make/libs.mk
include $(OBJDIR)/make/cobj.mk
14 years, 1 month
cluster: STABLE3 - cman: move fnv hash function into its own file
by Christine Caulfield
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: bc01b7a1a057aaed39fde3daa7f484de276cedcb
Parent: 980cb15f64083b3f7fc2e74b5e43588616a7e282
Author: Christine Caulfield <ccaulfie(a)redhat.com>
AuthorDate: Fri Mar 26 11:44:39 2010 +0000
Committer: Christine Caulfield <ccaulfie(a)redhat.com>
CommitterDate: Fri Mar 26 11:44:39 2010 +0000
cman: move fnv hash function into its own file
And add it to the COPYRIGHT exceptions file too.
Signed-off-by: Christine Caulfield <ccaulfie(a)redhat.com>
---
cman/daemon/Makefile | 3 +-
cman/daemon/cman-preconfig.c | 105 +++--------------------------------------
cman/daemon/fnvhash.c | 93 +++++++++++++++++++++++++++++++++++++
cman/daemon/fnvhash.h | 1 +
doc/COPYRIGHT | 6 ++
5 files changed, 110 insertions(+), 98 deletions(-)
diff --git a/cman/daemon/Makefile b/cman/daemon/Makefile
index 9a495eb..1329de3 100644
--- a/cman/daemon/Makefile
+++ b/cman/daemon/Makefile
@@ -23,7 +23,8 @@ OBJS1= daemon.o \
barrier.o \
cmanconfig.o
-OBJS2= cman-preconfig.o
+OBJS2= cman-preconfig.o \
+ fnvhash.o
${TARGET1}: ${OBJS1}
$(CC) -shared -Wl,-soname,$@ -o $@ $^ $(LDFLAGS)
diff --git a/cman/daemon/cman-preconfig.c b/cman/daemon/cman-preconfig.c
index 3339f1d..e8248b4 100644
--- a/cman/daemon/cman-preconfig.c
+++ b/cman/daemon/cman-preconfig.c
@@ -25,6 +25,7 @@
#define OBJDB_API struct objdb_iface_ver0
#include "cnxman-socket.h"
#include "nodelist.h"
+#include "fnvhash.h"
#define MAX_PATH_LEN PATH_MAX
@@ -284,99 +285,6 @@ static int add_ifaddr(struct objdb_iface_ver0 *objdb, char *mcast, char *ifaddr,
return ret;
}
-
-/***
- *
- * Fowler/Noll/Vo hash
- *
- * The basis of this hash algorithm was taken from an idea sent
- * as reviewer comments to the IEEE POSIX P1003.2 committee by:
- *
- * Phong Vo (http://www.research.att.com/info/kpv/)
- * Glenn Fowler (http://www.research.att.com/~gsf/)
- *
- * In a subsequent ballot round:
- *
- * Landon Curt Noll (http://www.isthe.com/chongo/)
- *
- * improved on their algorithm. Some people tried this hash
- * and found that it worked rather well. In an EMail message
- * to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash.
- *
- * FNV hashes are designed to be fast while maintaining a low
- * collision rate. The FNV speed allows one to quickly hash lots
- * of data while maintaining a reasonable collision rate. See:
- *
- * http://www.isthe.com/chongo/tech/comp/fnv/index.html
- *
- * for more details as well as other forms of the FNV hash.
- ***
- *
- * To use the recommended 32 bit FNV-1a hash, pass FNV1_32A_INIT as the
- * Fnv32_t hashval argument to fnv_32a_buf() or fnv_32a_str().
- *
- ***
- *
- * Please do not copyright this code. This code is in the public domain.
- *
- * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO
- * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
- * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
- * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- *
- * By:
- * chongo <Landon Curt Noll> /\oo/\
- * http://www.isthe.com/chongo/
- *
- * Share and Enjoy! :-)
- */
-
-/*
- * Modified to be a little more simple to understand and to provide a 16 bit
- * value rather then 32 bit for cluster id generation
- *
- * sdake(a)redhat.com
- */
-
-/* 32 bit magic FNV-1a prime */
-#define FNV_32_PRIME ((uint32_t)0x01000193)
-
-/* Default initialization for FNV-1a */
-#define FNV_32_INIT ((uint32_t)0x811c9dc5)
-
-static uint16_t generate_hashed_cluster_id(char *str)
-{
- unsigned char *s = (unsigned char *)str;
- uint32_t hval = FNV_32_INIT;
- uint32_t ret;
-
- /*
- * FNV-1a hash each octet in the buffer
- */
- while (*s) {
- /*
- * xor the bottom with the current octet
- */
- hval ^= (uint32_t)*s++;
- /*
- * multiply by the 32 bit FNV magic prime mod 2^32
- */
- hval *= FNV_32_PRIME;
- }
-
- /*
- * Use XOR folding as recommended by authors of algorithm
- * to create a different hash size that is a power of two
- */
- ret = (hval >> 16) ^ (hval & 0xFFFF);
-
- sprintf(error_reason, "Generated hashed cluster id for '%s' is %d\n", str, ret);
- return (ret);
-}
-
static uint16_t generate_cluster_id(char *name)
{
int i;
@@ -386,7 +294,6 @@ static uint16_t generate_cluster_id(char *name)
value <<= 1;
value += name[i];
}
- sprintf(error_reason, "Generated cluster id for '%s' is %d\n", name, value & 0xFFFF);
return value & 0xFFFF;
}
@@ -1012,9 +919,11 @@ static int set_noccs_defaults(struct objdb_iface_ver0 *objdb)
if (!cluster_id) {
if (use_hashed_cluster_id)
- cluster_id = generate_hashed_cluster_id(cluster_name);
+ cluster_id = fnv_hash(cluster_name);
else
cluster_id = generate_cluster_id(cluster_name);
+
+ sprintf(error_reason, "Generated cluster id for '%s' is %d\n", cluster_name, cluster_id);
}
if (!nodename_env) {
@@ -1206,9 +1115,11 @@ static int get_cman_globals(struct objdb_iface_ver0 *objdb)
if (!cluster_id) {
if (use_hashed_cluster_id)
- cluster_id = generate_hashed_cluster_id(cluster_name);
+ cluster_id = fnv_hash(cluster_name);
else
cluster_id = generate_cluster_id(cluster_name);
+
+ sprintf(error_reason, "Generated cluster id for '%s' is %d\n", cluster_name, cluster_id);
}
}
objdb->object_find_destroy(find_handle);
@@ -1311,7 +1222,7 @@ static void setup_old_compat(struct objdb_iface_ver0 *objdb, hdb_handle_t cluste
hdb_handle_t totem_handle;
hdb_handle_t gfs_handle;
char *value;
-
+
use_hashed_cluster_id = 0;
/* Set groupd to backwards compatibility mode */
diff --git a/cman/daemon/fnvhash.c b/cman/daemon/fnvhash.c
new file mode 100644
index 0000000..47e221a
--- /dev/null
+++ b/cman/daemon/fnvhash.c
@@ -0,0 +1,93 @@
+#include <stdint.h>
+#include "fnvhash.h"
+
+/***
+ *
+ * Fowler/Noll/Vo hash
+ *
+ * The basis of this hash algorithm was taken from an idea sent
+ * as reviewer comments to the IEEE POSIX P1003.2 committee by:
+ *
+ * Phong Vo (http://www.research.att.com/info/kpv/)
+ * Glenn Fowler (http://www.research.att.com/~gsf/)
+ *
+ * In a subsequent ballot round:
+ *
+ * Landon Curt Noll (http://www.isthe.com/chongo/)
+ *
+ * improved on their algorithm. Some people tried this hash
+ * and found that it worked rather well. In an EMail message
+ * to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash.
+ *
+ * FNV hashes are designed to be fast while maintaining a low
+ * collision rate. The FNV speed allows one to quickly hash lots
+ * of data while maintaining a reasonable collision rate. See:
+ *
+ * http://www.isthe.com/chongo/tech/comp/fnv/index.html
+ *
+ * for more details as well as other forms of the FNV hash.
+ ***
+ *
+ * To use the recommended 32 bit FNV-1a hash, pass FNV1_32A_INIT as the
+ * Fnv32_t hashval argument to fnv_32a_buf() or fnv_32a_str().
+ *
+ ***
+ *
+ * Please do not copyright this code. This code is in the public domain.
+ *
+ * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO
+ * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ *
+ * By:
+ * chongo <Landon Curt Noll> /\oo/\
+ * http://www.isthe.com/chongo/
+ *
+ * Share and Enjoy! :-)
+ */
+
+/*
+ * Modified to be a little more simple to understand and to provide a 16 bit
+ * value rather then 32 bit for cluster id generation
+ *
+ * sdake(a)redhat.com
+ */
+
+/* 32 bit magic FNV-1a prime */
+#define FNV_32_PRIME ((uint32_t)0x01000193)
+
+/* Default initialization for FNV-1a */
+#define FNV_32_INIT ((uint32_t)0x811c9dc5)
+
+uint16_t fnv_hash(char *str)
+{
+ unsigned char *s = (unsigned char *)str;
+ uint32_t hval = FNV_32_INIT;
+ uint32_t ret;
+
+ /*
+ * FNV-1a hash each octet in the buffer
+ */
+ while (*s) {
+ /*
+ * xor the bottom with the current octet
+ */
+ hval ^= (uint32_t)*s++;
+ /*
+ * multiply by the 32 bit FNV magic prime mod 2^32
+ */
+ hval *= FNV_32_PRIME;
+ }
+
+ /*
+ * Use XOR folding as recommended by authors of algorithm
+ * to create a different hash size that is a power of two
+ */
+ ret = (hval >> 16) ^ (hval & 0xFFFF);
+
+ return (ret);
+}
diff --git a/cman/daemon/fnvhash.h b/cman/daemon/fnvhash.h
new file mode 100644
index 0000000..65e9c11
--- /dev/null
+++ b/cman/daemon/fnvhash.h
@@ -0,0 +1 @@
+uint16_t fnv_hash(char *str);
diff --git a/doc/COPYRIGHT b/doc/COPYRIGHT
index 313587b..55331b3 100644
--- a/doc/COPYRIGHT
+++ b/doc/COPYRIGHT
@@ -27,6 +27,12 @@ cman/qdisk/scandisk.{c,h}:
Original design by: Joel Becker <Joel.Becker at oracle.com> and
Fabio M. Di Nitto <fdinitto at redhat.com>
+cman/daemon/fnvhash.c
+ This code is in the public domain.
+ Phong Vo (http://www.research.att.com/info/kpv/)
+ Glenn Fowler (http://www.research.att.com/~gsf/)
+ Landon Curt Noll (http://www.isthe.com/chongo/)
+
dlm/doc/example.c:
Author: Daniel Phillips <phillips at redhat.com>
14 years, 1 month
dlm: master - dlm_controld: avoid full plock unlock when no resource exists
by David Teigland
Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=29ddcc7d...
Commit: 29ddcc7de91828a7d96cd2f07dd054daba7b1e1f
Parent: 27b09badd40a2d1500500fa6945aeb532f75bd13
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Fri Mar 19 13:29:17 2010 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Thu Mar 25 14:00:12 2010 -0500
dlm_controld: avoid full plock unlock when no resource exists
When a plock unlock operation is read, we can immediately return if
the resource does not exist. If there were any locks to unlock, the
resource would exist. This optimization avoids creating/caching the
resource and sending the unlock message unnecessarily.
(When a process holding an *flock* exits without unlocking it, the
vfs cleanup code sends us a full plock unlock on the file, even if
the process has never used plocks. This means we can potentially
see a lot of plock activity for processes that never touch plocks.
This patch reduces it, although it would be nice to detect that a
process has no plocks earlier, while still in the kernel.)
bz 575103
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
group/dlm_controld/plock.c | 6 ++++--
1 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/group/dlm_controld/plock.c b/group/dlm_controld/plock.c
index 7e5b735..981e9be 100644
--- a/group/dlm_controld/plock.c
+++ b/group/dlm_controld/plock.c
@@ -1451,7 +1451,7 @@ void process_plocks(int ci)
struct dlm_plock_info info;
struct timeval now;
uint64_t usec;
- int rv;
+ int create, rv;
if (limit_plocks()) {
poll_ignore_plock = 1;
@@ -1506,7 +1506,9 @@ void process_plocks(int ci)
plock_rate_delays = 0;
}
- rv = find_resource(ls, info.number, 1, &r);
+ create = (info.optype == DLM_PLOCK_OP_UNLOCK) ? 0 : 1;
+
+ rv = find_resource(ls, info.number, create, &r);
if (rv)
goto fail;
14 years, 1 month
cluster: STABLE3 - dlm_controld/gfs_controld: avoid full plock unlock when no resource exists
by David Teigland
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 980cb15f64083b3f7fc2e74b5e43588616a7e282
Parent: 7b8f7da923c5f18e6c5e688ee6a2db981e51f22d
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Fri Mar 19 13:29:17 2010 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Thu Mar 25 13:52:12 2010 -0500
dlm_controld/gfs_controld: avoid full plock unlock when no resource exists
When a plock unlock operation is read, we can immediately return if
the resource does not exist. If there were any locks to unlock, the
resource would exist. This optimization avoids creating/caching the
resource and sending the unlock message unnecessarily.
(When a process holding an *flock* exits without unlocking it, the
vfs cleanup code sends us a full plock unlock on the file, even if
the process has never used plocks. This means we can potentially
see a lot of plock activity for processes that never touch plocks.
This patch reduces it, although it would be nice to detect that a
process has no plocks earlier, while still in the kernel.)
bz 575103
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
group/dlm_controld/plock.c | 6 ++++--
group/gfs_controld/plock.c | 6 ++++--
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/group/dlm_controld/plock.c b/group/dlm_controld/plock.c
index 3f20f7f..b11341d 100644
--- a/group/dlm_controld/plock.c
+++ b/group/dlm_controld/plock.c
@@ -1452,7 +1452,7 @@ void process_plocks(int ci)
struct dlm_plock_info info;
struct timeval now;
uint64_t usec;
- int rv;
+ int create, rv;
if (limit_plocks()) {
poll_ignore_plock = 1;
@@ -1507,7 +1507,9 @@ void process_plocks(int ci)
plock_rate_delays = 0;
}
- rv = find_resource(ls, info.number, 1, &r);
+ create = (info.optype == DLM_PLOCK_OP_UNLOCK) ? 0 : 1;
+
+ rv = find_resource(ls, info.number, create, &r);
if (rv)
goto fail;
diff --git a/group/gfs_controld/plock.c b/group/gfs_controld/plock.c
index 7a83a08..39113cf 100644
--- a/group/gfs_controld/plock.c
+++ b/group/gfs_controld/plock.c
@@ -1492,7 +1492,7 @@ void process_plocks(int ci)
struct dlm_plock_info info;
struct timeval now;
uint64_t usec;
- int rv;
+ int create, rv;
if (limit_plocks()) {
poll_ignore_plock = 1;
@@ -1547,7 +1547,9 @@ void process_plocks(int ci)
plock_rate_delays = 0;
}
- rv = find_resource(mg, info.number, 1, &r);
+ create = (info.optype == DLM_PLOCK_OP_UNLOCK) ? 0 : 1;
+
+ rv = find_resource(mg, info.number, create, &r);
if (rv)
goto fail;
14 years, 1 month
dlm: master - dlm_controld: add more fs_notified debugging
by David Teigland
Gitweb: http://git.fedorahosted.org/git/dlm.git?p=dlm.git;a=commitdiff;h=27b09bad...
Commit: 27b09badd40a2d1500500fa6945aeb532f75bd13
Parent: bcaea3259de4dc961cdf38f06c3635f5ecc5ba8d
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Tue Mar 23 15:57:48 2010 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Tue Mar 23 15:59:35 2010 -0500
dlm_controld: add more fs_notified debugging
To help figure out what's gone wrong if/when an fs_controld
goes into an fs_notified loop, which we see from time to time.
bz 576335
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
group/dlm_controld/cpg.c | 6 +++++-
1 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c
index d5245ce..ef0b741 100644
--- a/group/dlm_controld/cpg.c
+++ b/group/dlm_controld/cpg.c
@@ -506,8 +506,10 @@ static void node_history_fail(struct lockspace *ls, int nodeid,
if (cfgd_enable_quorum && !cfgd_enable_fencing)
node->check_quorum = 1;
- if (ls->fs_registered)
+ if (ls->fs_registered) {
+ log_group(ls, "check_fs nodeid %d set", nodeid);
node->check_fs = 1;
+ }
node->removed_seq = cg->seq; /* for queries */
node->failed_reason = reason; /* for queries */
@@ -635,6 +637,7 @@ static int check_fs_done(struct lockspace *ls)
continue;
if (node->fs_notified) {
+ log_group(ls, "check_fs nodeid %d clear", node->nodeid);
node->check_fs = 0;
} else {
log_group(ls, "check_fs nodeid %d needs fs notify",
@@ -2305,6 +2308,7 @@ int set_fs_notified(struct lockspace *ls, int nodeid)
return -EAGAIN;
}
+ log_group(ls, "set_fs_notified nodeid %d", nodeid);
node->fs_notified = 1;
return 0;
}
14 years, 1 month
cluster: STABLE3 - dlm_controld: add more fs_notified debugging
by David Teigland
Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 7b8f7da923c5f18e6c5e688ee6a2db981e51f22d
Parent: 7fde51f8331e11e1a8cfcaa00bc90c9cc5d420f5
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Tue Mar 23 15:57:48 2010 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Tue Mar 23 15:59:14 2010 -0500
dlm_controld: add more fs_notified debugging
To help figure out what's gone wrong if/when an fs_controld
goes into an fs_notified loop, which we see from time to time.
bz 576335
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
group/dlm_controld/cpg.c | 6 +++++-
1 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c
index e01ecc2..20d59d5 100644
--- a/group/dlm_controld/cpg.c
+++ b/group/dlm_controld/cpg.c
@@ -506,8 +506,10 @@ static void node_history_fail(struct lockspace *ls, int nodeid,
if (cfgd_enable_quorum && !cfgd_enable_fencing)
node->check_quorum = 1;
- if (ls->fs_registered)
+ if (ls->fs_registered) {
+ log_group(ls, "check_fs nodeid %d set", nodeid);
node->check_fs = 1;
+ }
node->removed_seq = cg->seq; /* for queries */
node->failed_reason = reason; /* for queries */
@@ -635,6 +637,7 @@ static int check_fs_done(struct lockspace *ls)
continue;
if (node->fs_notified) {
+ log_group(ls, "check_fs nodeid %d clear", node->nodeid);
node->check_fs = 0;
} else {
log_group(ls, "check_fs nodeid %d needs fs notify",
@@ -2344,6 +2347,7 @@ int set_fs_notified(struct lockspace *ls, int nodeid)
return -EAGAIN;
}
+ log_group(ls, "set_fs_notified nodeid %d", nodeid);
node->fs_notified = 1;
return 0;
}
14 years, 1 month