Gitweb:
http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=...
Commit: e8cc459b6ac847c1dd92a17ce833b74e46228ab0
Parent: 4010dd2e3ffaa95630b74a889737aa6eef37d50e
Author: David Teigland <teigland(a)redhat.com>
AuthorDate: Mon Aug 16 16:20:10 2010 -0500
Committer: David Teigland <teigland(a)redhat.com>
CommitterDate: Tue Aug 17 14:52:39 2010 -0500
gfs_controld: fix plock owner in unmount
When a node owns any plock resources on a file system and that
fs is unmounted, the remaining nodes do nothing to change the
owner value on those resources. Any process that attempts to
access those plock resources will become stuck and require a
reboot. The fix is to change the owner to 0 (unowned) on any
resources owned by a node that unmounts.
bz 624554
Signed-off-by: David Teigland <teigland(a)redhat.com>
---
group/gfs_controld/lock_dlm.h | 1 +
group/gfs_controld/plock.c | 31 +++++++++++++++++++++++++++++++
group/gfs_controld/recover.c | 9 +++++++++
3 files changed, 41 insertions(+), 0 deletions(-)
diff --git a/group/gfs_controld/lock_dlm.h b/group/gfs_controld/lock_dlm.h
index cc12e1c..0a15fe1 100644
--- a/group/gfs_controld/lock_dlm.h
+++ b/group/gfs_controld/lock_dlm.h
@@ -314,6 +314,7 @@ void retrieve_plocks(struct mountgroup *mg);
int dump_plocks(char *name, int fd);
void process_saved_plocks(struct mountgroup *mg);
void purge_plocks(struct mountgroup *mg, int nodeid, int unmount);
+void remove_resource_owner(struct mountgroup *mg, int nodeid);
int unlink_checkpoint(struct mountgroup *mg);
void update_dmsetup_wait(void);
diff --git a/group/gfs_controld/plock.c b/group/gfs_controld/plock.c
index 0d8c82d..4330a2c 100644
--- a/group/gfs_controld/plock.c
+++ b/group/gfs_controld/plock.c
@@ -2365,6 +2365,37 @@ void purge_plocks(struct mountgroup *mg, int nodeid, int unmount)
unlink_checkpoint(mg);
}
+/* when a node unmounts we need to remove it as the owner of any resources */
+
+void remove_resource_owner(struct mountgroup *mg, int nodeid)
+{
+ struct resource *r, *r2;
+ int rem = 0;
+
+ if (!config_plock_ownership)
+ return;
+
+ list_for_each_entry_safe(r, r2, &mg->resources, list) {
+ if (r->owner == nodeid) {
+ log_plock(mg, "rem owner %d from %llu",
+ nodeid, (unsigned long long)r->number);
+ r->owner = 0;
+ r->flags |= R_GOT_UNOWN;
+ rem++;
+
+ /* should probably wait to do this until after
+ the finish barrier when we know everyone has
+ changed owner to 0 */
+ send_pending_plocks(mg, r);
+ }
+ }
+
+ if (rem)
+ mg->last_plock_time = time(NULL);
+
+ log_group(mg, "removed owner %d from %d resources", nodeid, rem);
+}
+
int dump_plocks(char *name, int fd)
{
struct mountgroup *mg;
diff --git a/group/gfs_controld/recover.c b/group/gfs_controld/recover.c
index 931b357..597516c 100644
--- a/group/gfs_controld/recover.c
+++ b/group/gfs_controld/recover.c
@@ -2576,6 +2576,14 @@ void reset_unfinished_recoveries(struct mountgroup *mg)
}
}
+void reset_plock_resources(struct mountgroup *mg)
+{
+ struct mg_member *memb;
+
+ list_for_each_entry(memb, &mg->members_gone, list)
+ remove_resource_owner(mg, memb->nodeid);
+}
+
/*
old method:
A is rw mount, B mounts rw
@@ -2623,6 +2631,7 @@ void do_start(struct mountgroup *mg, int type, int member_count, int
*nodeids)
recover_members(mg, member_count, nodeids, &pos, &neg);
reset_unfinished_recoveries(mg);
+ reset_plock_resources(mg);
if (mg->init) {
if (member_count == 1)