Gitweb:
http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: 844ff74c533b72f24789f78aa82acb321fadc5bb
Parent: f1be533f910238ab5350e1a63b2ee18f548bff6b
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Wed Sep 16 16:03:46 2009 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Wed Sep 16 16:03:46 2009 -0500
gfs_controld: copy some fenced changes
from the fenced commit bcc5fdef8473d99399c624a7bc15423a2af645c1
. copy some naming changes
. copy some logging changes
. copy some new checks for accepting start messages,
check for a start already matched to a struct change,
check that a node was a cluster member prior to the
creation of the struct change
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
group/gfs_controld/cpg-new.c | 170 +++++++++++++++++++++++++++++++++----
group/gfs_controld/gfs_daemon.h | 16 ++--
group/gfs_controld/main.c | 14 ++--
group/gfs_controld/member_cman.c | 6 +-
4 files changed, 170 insertions(+), 36 deletions(-)
diff --git a/group/gfs_controld/cpg-new.c b/group/gfs_controld/cpg-new.c
index d08ca03..b3f25ae 100644
--- a/group/gfs_controld/cpg-new.c
+++ b/group/gfs_controld/cpg-new.c
@@ -126,6 +126,9 @@ struct node {
int withdraw;
int send_withdraw_ack;
+ uint64_t cluster_add_time;
+ uint64_t cluster_remove_time;
+
struct protocol proto;
};
@@ -158,6 +161,7 @@ struct change {
int we_joined;
uint32_t seq; /* used as a reference for debugging, and for queries */
uint32_t combined_seq; /* for queries */
+ uint64_t create_time;
};
struct save_msg {
@@ -167,7 +171,7 @@ struct save_msg {
};
static int dlmcontrol_fd;
-static int daemon_cpg_fd;
+static int cpg_fd_daemon;
static struct protocol our_protocol;
static struct list_head daemon_nodes;
static struct cpg_address daemon_member[MAX_NODES];
@@ -261,6 +265,59 @@ static void apply_changes_recovery(struct mountgroup *mg);
static void send_withdraw_acks(struct mountgroup *mg);
static void leave_mountgroup(struct mountgroup *mg, int mnterr);
+static void log_config(const struct cpg_name *group_name,
+ const struct cpg_address *member_list,
+ size_t member_list_entries,
+ const struct cpg_address *left_list,
+ size_t left_list_entries,
+ const struct cpg_address *joined_list,
+ size_t joined_list_entries)
+{
+ char m_buf[128];
+ char j_buf[32];
+ char l_buf[32];
+ size_t i, len, pos;
+ int ret;
+
+ memset(m_buf, 0, sizeof(m_buf));
+ memset(j_buf, 0, sizeof(j_buf));
+ memset(l_buf, 0, sizeof(l_buf));
+
+ len = sizeof(m_buf);
+ pos = 0;
+ for (i = 0; i < member_list_entries; i++) {
+ ret = snprintf(m_buf + pos, len - pos, " %d",
+ member_list[i].nodeid);
+ if (ret >= len - pos)
+ break;
+ pos += ret;
+ }
+
+ len = sizeof(j_buf);
+ pos = 0;
+ for (i = 0; i < joined_list_entries; i++) {
+ ret = snprintf(j_buf + pos, len - pos, " %d",
+ joined_list[i].nodeid);
+ if (ret >= len - pos)
+ break;
+ pos += ret;
+ }
+
+ len = sizeof(l_buf);
+ pos = 0;
+ for (i = 0; i < left_list_entries; i++) {
+ ret = snprintf(l_buf + pos, len - pos, " %d",
+ left_list[i].nodeid);
+ if (ret >= len - pos)
+ break;
+ pos += ret;
+ }
+
+ log_debug("%s conf %zu %zu %zu memb%s join%s left%s", group_name->value,
+ member_list_entries, joined_list_entries, left_list_entries,
+ m_buf, j_buf, l_buf);
+}
+
static const char *msg_name(int type)
{
switch (type) {
@@ -470,7 +527,45 @@ static void node_history_init(struct mountgroup *mg, int nodeid,
node->nodeid = nodeid;
node->add_time = 0;
list_add_tail(&node->list, &mg->node_history);
- node->added_seq = cg->seq; /* for queries */
+
+ if (cg)
+ node->added_seq = cg->seq; /* for queries */
+}
+
+void node_history_cluster_add(int nodeid)
+{
+ struct mountgroup *mg;
+ struct node *node;
+
+ list_for_each_entry(mg, &mountgroups, list) {
+ node_history_init(mg, nodeid, NULL);
+
+ node = get_node_history(mg, nodeid);
+ if (!node) {
+ log_error("node_history_cluster_add no nodeid %d",
+ nodeid);
+ return;
+ }
+
+ node->cluster_add_time = time(NULL);
+ }
+}
+
+void node_history_cluster_remove(int nodeid)
+{
+ struct mountgroup *mg;
+ struct node *node;
+
+ list_for_each_entry(mg, &mountgroups, list) {
+ node = get_node_history(mg, nodeid);
+ if (!node) {
+ log_error("node_history_cluster_remove no nodeid %d",
+ nodeid);
+ return;
+ }
+
+ node->cluster_remove_time = time(NULL);
+ }
}
static void node_history_start(struct mountgroup *mg, int nodeid)
@@ -857,6 +952,7 @@ static int match_change(struct mountgroup *mg, struct change *cg,
{
struct id_info *id;
struct member *memb;
+ struct node *node;
uint32_t seq = hd->msgdata;
int i, members_mismatch;
@@ -880,6 +976,30 @@ static int match_change(struct mountgroup *mg, struct change *cg,
return 0;
}
+ if (memb->start && hd->type == GFS_MSG_START) {
+ log_group(mg, "match_change %d:%u skip %u already start",
+ hd->nodeid, seq, cg->seq);
+ return 0;
+ }
+
+ /* a node's start can't match a change if the node joined the cluster
+ more recently than the change was created */
+
+ node = get_node_history(mg, hd->nodeid);
+ if (!node) {
+ log_group(mg, "match_change %d:%u skip cg %u no node history",
+ hd->nodeid, seq, cg->seq);
+ return 0;
+ }
+
+ if (node->cluster_add_time > cg->create_time) {
+ log_group(mg, "match_change %d:%u skip cg %u created %llu "
+ "cluster add %llu", hd->nodeid, seq, cg->seq,
+ (unsigned long long)cg->create_time,
+ (unsigned long long)node->cluster_add_time);
+ return 0;
+ }
+
/* verify this is the right change by matching the counts
and the nodeids of the current members */
@@ -1016,7 +1136,7 @@ static void receive_start(struct mountgroup *mg, struct gfs_header
*hd, int len)
added = is_added(mg, hd->nodeid);
- if (added && mi->started_count) {
+ if (added && mi->started_count && mg->started_count) {
log_error("receive_start %d:%u add node with started_count %u",
hd->nodeid, seq, mi->started_count);
@@ -1687,11 +1807,11 @@ static void create_old_nodes(struct mountgroup *mg)
return;
}
- node->jid = id->jid;
+ node->jid = id->jid;
node->kernel_mount_done = !!(id->flags & IDI_MOUNT_DONE);
node->kernel_mount_error = !!(id->flags & IDI_MOUNT_ERROR);
- node->ro = !!(id->flags & IDI_MOUNT_RO);
- node->spectator = !!(id->flags & IDI_MOUNT_SPECTATOR);
+ node->ro = !!(id->flags & IDI_MOUNT_RO);
+ node->spectator = !!(id->flags & IDI_MOUNT_SPECTATOR);
j = malloc(sizeof(struct journal));
if (!j) {
@@ -1749,7 +1869,7 @@ static void create_new_nodes(struct mountgroup *mg)
}
node->jid = JID_NONE;
- node->ro = !!(id->flags & IDI_MOUNT_RO);
+ node->ro = !!(id->flags & IDI_MOUNT_RO);
node->spectator = !!(id->flags & IDI_MOUNT_SPECTATOR);
log_group(mg, "create_new_nodes %d ro %d spect %d",
@@ -2033,7 +2153,7 @@ static void sync_state(struct mountgroup *mg)
/* Normal case where nodes join an established group that completed
first recovery sometime in the past. Existing nodes that weren't
around during first recovery come through here, and new nodes
- being added in this cycle come through here. */
+ being added in this cycle come through here. */
if (mg->first_recovery_needed) {
/* shouldn't happen */
@@ -2349,6 +2469,7 @@ static int add_change(struct mountgroup *mg,
INIT_LIST_HEAD(&cg->removed);
INIT_LIST_HEAD(&cg->saved_messages);
cg->state = CGST_WAIT_CONDITIONS;
+ cg->create_time = time(NULL);
cg->seq = ++mg->change_seq;
if (!cg->seq)
cg->seq = ++mg->change_seq;
@@ -2432,7 +2553,8 @@ static int add_change(struct mountgroup *mg,
return error;
}
-static int we_left(const struct cpg_address *left_list, size_t left_list_entries)
+static int we_left(const struct cpg_address *left_list,
+ size_t left_list_entries)
{
int i;
@@ -2456,6 +2578,10 @@ static void confchg_cb(cpg_handle_t handle,
struct change *cg;
int rv;
+ log_config(group_name, member_list, member_list_entries,
+ left_list, left_list_entries,
+ joined_list, joined_list_entries);
+
mg = find_mg_handle(handle);
if (!mg) {
log_error("confchg_cb no mountgroup for cpg %s",
@@ -2591,7 +2717,7 @@ static cpg_callbacks_t cpg_callbacks = {
.cpg_confchg_fn = confchg_cb,
};
-static void process_mountgroup_cpg(int ci)
+static void process_cpg_mountgroup(int ci)
{
struct mountgroup *mg;
cpg_error_t error;
@@ -2634,7 +2760,7 @@ int gfs_join_mountgroup(struct mountgroup *mg)
cpg_fd_get(h, &fd);
- ci = client_add(fd, process_mountgroup_cpg, NULL);
+ ci = client_add(fd, process_cpg_mountgroup, NULL);
mg->cpg_handle = h;
mg->cpg_client = ci;
@@ -3047,7 +3173,7 @@ int set_protocol(void)
int rv;
memset(&pollfd, 0, sizeof(pollfd));
- pollfd.fd = daemon_cpg_fd;
+ pollfd.fd = cpg_fd_daemon;
pollfd.events = POLLIN;
while (1) {
@@ -3093,7 +3219,7 @@ int set_protocol(void)
}
if (pollfd.revents & POLLIN)
- process_cpg(0);
+ process_cpg_daemon(0);
if (pollfd.revents & (POLLERR | POLLHUP | POLLNVAL)) {
log_error("set_protocol poll revents %u",
pollfd.revents);
@@ -3139,6 +3265,8 @@ int set_protocol(void)
our_protocol.kernel_max[0],
our_protocol.kernel_max[1],
our_protocol.kernel_max[2]);
+
+ send_protocol(&our_protocol);
return 0;
}
@@ -3182,6 +3310,10 @@ static void confchg_cb_daemon(cpg_handle_t handle,
{
int i;
+ log_config(group_name, member_list, member_list_entries,
+ left_list, left_list_entries,
+ joined_list, joined_list_entries);
+
if (joined_list_entries)
send_protocol(&our_protocol);
@@ -3199,7 +3331,7 @@ static cpg_callbacks_t cpg_callbacks_daemon = {
.cpg_confchg_fn = confchg_cb_daemon,
};
-void process_cpg(int ci)
+void process_cpg_daemon(int ci)
{
cpg_error_t error;
@@ -3208,7 +3340,7 @@ void process_cpg(int ci)
log_error("daemon cpg_dispatch error %d", error);
}
-int setup_cpg(void)
+int setup_cpg_daemon(void)
{
cpg_error_t error;
cpg_handle_t h;
@@ -3231,7 +3363,7 @@ int setup_cpg(void)
return -1;
}
- cpg_fd_get(h, &daemon_cpg_fd);
+ cpg_fd_get(h, &cpg_fd_daemon);
cpg_handle_daemon = h;
@@ -3252,15 +3384,15 @@ int setup_cpg(void)
goto fail;
}
- log_debug("setup_cpg %d", daemon_cpg_fd);
- return daemon_cpg_fd;
+ log_debug("setup_cpg_daemon %d", cpg_fd_daemon);
+ return cpg_fd_daemon;
fail:
cpg_finalize(h);
return -1;
}
-void close_cpg(void)
+void close_cpg_daemon(void)
{
struct mountgroup *mg;
cpg_error_t error;
diff --git a/group/gfs_controld/gfs_daemon.h b/group/gfs_controld/gfs_daemon.h
index 8880b42..a69385b 100644
--- a/group/gfs_controld/gfs_daemon.h
+++ b/group/gfs_controld/gfs_daemon.h
@@ -70,7 +70,7 @@ extern int poll_ignore_plock;
extern int plock_fd;
extern int plock_ci;
extern struct list_head mountgroups;
-extern int cman_quorate;
+extern int cluster_quorate;
extern int our_nodeid;
extern char *clustername;
extern char daemon_debug_buf[256];
@@ -221,9 +221,9 @@ int read_ccs_int(const char *path, int *config_val);
void read_ccs_nodir(struct mountgroup *mg, char *buf);
/* cpg-new.c */
-int setup_cpg(void);
-void close_cpg(void);
-void process_cpg(int ci);
+int setup_cpg_daemon(void);
+void close_cpg_daemon(void);
+void process_cpg_daemon(int ci);
int setup_dlmcontrol(void);
void process_dlmcontrol(int ci);
int set_protocol(void);
@@ -240,6 +240,8 @@ int set_mountgroups(int *count, struct gfsc_mountgroup **mgs_out);
int set_mountgroup_nodes(struct mountgroup *mg, int option, int *node_count,
struct gfsc_node **nodes_out);
void free_mg(struct mountgroup *mg);
+void node_history_cluster_add(int nodeid);
+void node_history_cluster_remove(int nodeid);
/* cpg-old.c */
int setup_cpg_old(void);
@@ -294,9 +296,9 @@ void process_connection(int ci);
void cluster_dead(int ci);
/* member_cman.c */
-int setup_cman(void);
-void close_cman(void);
-void process_cman(int ci);
+int setup_cluster(void);
+void close_cluster(void);
+void process_cluster(int ci);
void kick_node_from_cluster(int nodeid);
/* plock.c */
diff --git a/group/gfs_controld/main.c b/group/gfs_controld/main.c
index ae59d80..30f44eb 100644
--- a/group/gfs_controld/main.c
+++ b/group/gfs_controld/main.c
@@ -1131,10 +1131,10 @@ static void loop(void)
goto out;
client_add(rv, process_listener, NULL);
- rv = setup_cman();
+ rv = setup_cluster();
if (rv < 0)
goto out;
- client_add(rv, process_cman, cluster_dead);
+ client_add(rv, process_cluster, cluster_dead);
rv = setup_ccs();
if (rv < 0)
@@ -1184,10 +1184,10 @@ static void loop(void)
* code in: cpg-new.c
*/
- rv = setup_cpg();
+ rv = setup_cpg_daemon();
if (rv < 0)
goto out;
- client_add(rv, process_cpg, cluster_dead);
+ client_add(rv, process_cpg_daemon, cluster_dead);
rv = set_protocol();
if (rv < 0)
@@ -1291,7 +1291,7 @@ static void loop(void)
}
out:
if (group_mode == GROUP_LIBCPG)
- close_cpg();
+ close_cpg_daemon();
else if (group_mode == GROUP_LIBGROUP) {
close_plocks();
close_cpg_old();
@@ -1300,7 +1300,7 @@ static void loop(void)
close_groupd();
close_logging();
close_ccs();
- close_cman();
+ close_cluster();
if (!list_empty(&mountgroups))
log_error("mountgroups abandoned");
@@ -1565,7 +1565,7 @@ int poll_dlm;
int plock_fd;
int plock_ci;
struct list_head mountgroups;
-int cman_quorate;
+int cluster_quorate;
int our_nodeid;
char *clustername;
char daemon_debug_buf[256];
diff --git a/group/gfs_controld/member_cman.c b/group/gfs_controld/member_cman.c
index f536d30..0f78097 100644
--- a/group/gfs_controld/member_cman.c
+++ b/group/gfs_controld/member_cman.c
@@ -36,7 +36,7 @@ static void cman_callback(cman_handle_t h, void *private, int reason,
int arg)
}
}
-void process_cman(int ci)
+void process_cluster(int ci)
{
int rv;
@@ -45,7 +45,7 @@ void process_cman(int ci)
cluster_dead(0);
}
-int setup_cman(void)
+int setup_cluster(void)
{
cman_node_t node;
int rv, fd;
@@ -119,7 +119,7 @@ int setup_cman(void)
return fd;
}
-void close_cman(void)
+void close_cluster(void)
{
cman_finish(ch);
cman_finish(ch_admin);