Gitweb: http://git.fedorahosted.org/git/cluster.git?p=cluster.git;a=commitdiff;h=633... Commit: 6330bbe58b809aaf5dffc4be87e632b427a4b568 Parent: 082a641b2f7122ba86b2773131aa2c77e31680b5 Author: Lon Hohberger lhh@redhat.com AuthorDate: Wed Apr 14 17:29:54 2010 -0400 Committer: Lon Hohberger lhh@redhat.com CommitterDate: Wed May 5 11:47:07 2010 -0400
rgmanager: Kill processes correctly w/ force_unmount
The killMountProcesses function was written about 10 years ago. It was designed to work with lsof or fuser, and to log messages for each process killed. This is not a bad idea. The problem is that parsing the output of either is and error-prone, particularly when mountpoints are similar to other directories on the system.
A far less error-prone method to cleaning up a mount point is to use 'fuser -kvm' on it. Not only is this less error-prone, it's a good bit faster at doing its job than iterating through output in a shell script.
This patch makes force_unmount very reliable at killing the correct processes, but we lose the logging functionality. It is a fair trade-off because there have been several bugs in the killMountProcesses function over the years which have caused several problems.
Resolves: bz555901
Signed-off-by: Lon Hohberger lhh@redhat.com --- rgmanager/src/resources/clusterfs.sh | 113 +-------------------------- rgmanager/src/resources/fs.sh | 140 +++++----------------------------- rgmanager/src/resources/netfs.sh | 128 +++---------------------------- 3 files changed, 37 insertions(+), 344 deletions(-)
diff --git a/rgmanager/src/resources/clusterfs.sh b/rgmanager/src/resources/clusterfs.sh index bf2a3d1..3ca7a30 100755 --- a/rgmanager/src/resources/clusterfs.sh +++ b/rgmanager/src/resources/clusterfs.sh @@ -492,113 +492,6 @@ isAlive()
# -# killMountProcesses device mount_point -# -# Using lsof or fuser try to unmount the mount by killing of the processes -# that might be keeping it busy. -# -killMountProcesses() -{ - typeset -i ret=$SUCCESS - typeset have_lsof="" - typeset have_fuser="" - typeset try - - if [ $# -ne 1 ]; then - ocf_log err \ - "Usage: killMountProcesses mount_point" - return $FAIL - fi - - typeset mp=$1 - - ocf_log notice "Forcefully unmounting $mp" - - # - # Not all distributions have lsof. If not use fuser. If it - # does, try both. - # - file=$(which lsof 2>/dev/null) - if [ -f "$file" ]; then - have_lsof=$YES - fi - - file=$(which fuser 2>/dev/null) - if [ -f "$file" ]; then - have_fuser=$YES - fi - - if [ -z "$have_lsof" -a -z "$have_fuser" ]; then - ocf_log warn \ - "Cannot forcefully unmount $mp; cannot find lsof or fuser commands" - return $FAIL - fi - - for try in 1 2 3; do - if [ -n "$have_lsof" ]; then - # - # Use lsof to free up mount point - # - while read command pid user - do - if [ -z "$pid" ]; then - continue - fi - - if [ $try -eq 1 ]; then - ocf_log warn \ - "killing process $pid ($user $command $mp)" - elif [ $try -eq 3 ]; then - ocf_log crit \ - "Could not clean up mountpoint $mp" - ret=$FAIL - fi - - if [ $try -gt 1 ]; then - kill -9 $pid - else - kill -TERM $pid - fi - done < <(lsof -b 2>/dev/null | \ - grep -E "$mp(/.*|)$" | \ - awk '{print $1,$2,$3}' | \ - sort -u -k 1,3) - elif [ -n "$have_fuser" ]; then - # - # Use fuser to free up mount point - # - while read command pid user - do - if [ -z "$pid" ]; then - continue - fi - - if [ $try -eq 1 ]; then - ocf_log warn \ - "killing process $pid ($user $command $mp)" - elif [ $try -eq 3 ]; then - ocf_log crit \ - "Could not clean up mount point $mp" - ret=$FAIL - fi - - if [ $try -gt 1 ]; then - kill -9 $pid - else - kill -TERM $pid - fi - done < <(fuser -vm $mp | \ - grep -v PID | \ - sed 's;^'$mp';;' | \ - awk '{print $4,$2,$1}' | \ - sort -u -k 1,3) - fi - done - - return $ret -} - -# # startFilesystem # startFilesystem() { @@ -880,7 +773,11 @@ stop: Could not match $OCF_RESKEY_device with a real device" umount_failed=yes
if [ "$force_umount" ]; then - killMountProcesses $mp + if [ $try -eq 1 ]; then + fuser -TERM -kvm "$mp" + else + fuser -kvm "$mp" + fi fi
if [ $try -ge $max_tries ]; then diff --git a/rgmanager/src/resources/fs.sh b/rgmanager/src/resources/fs.sh index 5b3a3bc..e438894 100755 --- a/rgmanager/src/resources/fs.sh +++ b/rgmanager/src/resources/fs.sh @@ -570,6 +570,8 @@ isMounted () { typeset mp tmp_mp typeset dev tmp_dev typeset ret=$FAIL + typeset found=1 + typeset poss_mp
if [ $# -ne 2 ]; then ocf_log err "Usage: isMounted device mount_point" @@ -581,7 +583,7 @@ isMounted () { if [ -z "$dev" ]; then ocf_log err \ "fs (isMounted): Could not match $1 with a real device" - return $FAIL + return $OCF_ERR_ARGS fi
if [ -h "$2" ]; then @@ -607,14 +609,21 @@ isMounted () { # Check to see if its mounted in the right # place # - if [ -n "$tmp_mp" -a "$tmp_mp" != "$mp" ]; then - ocf_log warn \ -"Device $dev is mounted on $tmp_mp instead of $mp" + if [ -n "$tmp_mp" ]; then + if [ "$tmp_mp" != "$mp" ]; then + poss_mp=$tmp_mp + else + found=0 + fi fi ret=$YES fi done < /proc/mounts
+ if [ $ret -eq $YES ] && [ $found -ne 0 ]; then + ocf_log warn "Device $dev is mounted on $poss_mp instead of $mp" + fi + return $ret }
@@ -679,114 +688,6 @@ isAlive()
# -# killMountProcesses mount_point -# -# Using lsof or fuser try to unmount the mount by killing of the processes -# that might be keeping it busy. -# -killMountProcesses() -{ - typeset -i ret=$SUCCESS - typeset have_lsof="" - typeset have_fuser="" - typeset try - - if [ $# -ne 1 ]; then - ocf_log err \ - "Usage: killMountProcesses mount_point" - return $FAIL - fi - - typeset mp=$1 - - ocf_log notice "Forcefully unmounting $mp" - - # - # Not all distributions have lsof. If not use fuser. If it - # does, try both. - # - file=$(which lsof 2>/dev/null) - if [ -f "$file" ]; then - have_lsof=$YES - fi - - file=$(which fuser 2>/dev/null) - if [ -f "$file" ]; then - have_fuser=$YES - fi - - if [ -z "$have_lsof" -a -z "$have_fuser" ]; then - ocf_log warn \ - "Cannot forcefully unmount $mp; cannot find lsof or fuser commands" - return $FAIL - fi - - for try in 1 2 3; do - if [ -n "$have_lsof" ]; then - # - # Use lsof to free up mount point - # - while read command pid user - do - if [ -z "$pid" ]; then - continue - fi - - if [ $try -eq 1 ]; then - ocf_log warn \ - "killing process $pid ($user $command $mp)" - elif [ $try -eq 3 ]; then - ocf_log crit \ - "Could not clean up mountpoint $mp" - ret=$FAIL - fi - - if [ $try -gt 1 ]; then - kill -9 $pid - else - kill -TERM $pid - fi - done < <(lsof -bn 2>/dev/null | \ - grep -E "$mp(/.*|)$" | \ - awk '{print $1,$2,$3}' | \ - sort -u -k 1,3) - elif [ -n "$have_fuser" ]; then - # - # Use fuser to free up mount point - # - while read command pid user - do - if [ -z "$pid" ]; then - continue - fi - - if [ $try -eq 1 ]; then - ocf_log warn \ - "killing process $pid ($user $command $mp)" - elif [ $try -eq 3 ]; then - ocf_log crit \ - "Could not clean up mount point $mp" - ret=$FAIL - fi - - if [ $try -gt 1 ]; then - kill -9 $pid - else - kill -TERM $pid - fi - done < <(fuser -vm $mp | \ - grep -v PID | \ - sed 's;^'$mp';;' | \ - awk '{print $4,$2,$1}' | \ - sort -u -k 1,3) - fi - done - - return $ret -} - - -# # Decide which quota options are enabled and return a string # which we can pass to quotaon # @@ -950,7 +851,7 @@ startFilesystem: Creating mount point $mp for device $dev" ;; $NO) # not mounted, continue ;; - $FAIL) + *) return $FAIL ;; esac @@ -1184,8 +1085,9 @@ stop: Could not match $OCF_RESKEY_device with a real device" umount_failed=yes
if [ "$force_umount" ]; then - killMountProcesses $mp if [ $try -eq 1 ]; then + fuser -TERM -kvm "$mp" + if [ "$OCF_RESKEY_nfslock" = "yes" ] || \ [ "$OCF_RESKEY_nfslock" = "1" ]; then ocf_log warning \ @@ -1197,15 +1099,11 @@ stop: Could not match $OCF_RESKEY_device with a real device" notify_list_store $mp/.clumanager/statd nfslock_reclaim=1 fi + else + fuser -kvm "$mp" fi fi
- if [ $try -ge $max_tries ]; then - done=$YES - else - sleep $sleep_time - let try=try+1 - fi ;; *) return $FAIL @@ -1214,7 +1112,7 @@ stop: Could not match $OCF_RESKEY_device with a real device"
if [ $try -ge $max_tries ]; then done=$YES - elif [ "$done" -ne "$YES" ]; then + elif [ "$done" != "$YES" ]; then sleep $sleep_time let try=try+1 fi diff --git a/rgmanager/src/resources/netfs.sh b/rgmanager/src/resources/netfs.sh index c683ee6..108d6f3 100755 --- a/rgmanager/src/resources/netfs.sh +++ b/rgmanager/src/resources/netfs.sh @@ -349,113 +349,6 @@ isMounted () { }
# -# killMountProcesses mount_point -# -# Using lsof or fuser try to unmount the mount by killing of the processes -# that might be keeping it busy. -# -killMountProcesses() -{ - typeset -i ret=$SUCCESS - typeset have_lsof="" - typeset have_fuser="" - typeset try - - if [ $# -ne 1 ]; then - ocf_log err \ - "Usage: killMountProcesses mount_point" - return $FAIL - fi - - typeset mp=$1 - - ocf_log notice "Forcefully unmounting $mp" - - # - # Not all distributions have lsof. If not use fuser. If it - # does, try both. - # - file=$(which lsof 2>/dev/null) - if [ -f "$file" ]; then - have_lsof=$YES - fi - - file=$(which fuser 2>/dev/null) - if [ -f "$file" ]; then - have_fuser=$YES - fi - - if [ -z "$have_lsof" -a -z "$have_fuser" ]; then - ocf_log warn \ - "Cannot forcefully unmount $mp; cannot find lsof or fuser commands" - return $FAIL - fi - - for try in 1 2 3; do - if [ -n "$have_lsof" ]; then - # - # Use lsof to free up mount point - # - while read command pid user - do - if [ -z "$pid" ]; then - continue - fi - - if [ $try -eq 1 ]; then - ocf_log warn \ - "killing process $pid ($user $command $mp)" - elif [ $try -eq 3 ]; then - ocf_log crit \ - "Could not clean up mountpoint $mp" - ret=$FAIL - fi - - if [ $try -gt 1 ]; then - kill -9 $pid - else - kill -TERM $pid - fi - done < <(lsof -w -bn 2>/dev/null | \ - grep -w -E "$mp(/.*|)$" | \ - awk '{print $1,$2,$3}' | \ - sort -u -k 1,3) - elif [ -n "$have_fuser" ]; then - # - # Use fuser to free up mount point - # - while read command pid user - do - if [ -z "$pid" ]; then - continue - fi - - if [ $try -eq 1 ]; then - ocf_log warn \ - "killing process $pid ($user $command $mp)" - elif [ $try -eq 3 ]; then - ocf_log crit \ - "Could not clean up mount point $mp" - ret=$FAIL - fi - - if [ $try -gt 1 ]; then - kill -9 $pid - else - kill -TERM $pid - fi - done < <(fuser -vm $mp | \ - grep -v PID | \ - sed 's;^'$mp';;' | \ - awk '{print $4,$2,$1}' | \ - sort -u -k 1,3) - fi - done - - return $ret -} - -# # startNFSFilesystem # startNFSFilesystem() { @@ -637,15 +530,20 @@ stopNFSFilesystem() {
umount_failed=yes
- if [ "$force_umount" ]; then - killMountProcesses $mp - fi + if [ "$force_umount" ]; then + if [ $try -eq 1 ]; then + fuser -TERM -kvm "$mp" + else + fuser -kvm "$mp" + fi + fi
- if [ $try -ge $max_tries ]; then - done=$YES - else - sleep $sleep_time - let try=try+1 + + if [ $try -ge $max_tries ]; then + done=$YES + else + sleep $sleep_time + let try=try+1 fi ;; *)
cluster-commits@lists.fedorahosted.org