Dan Kenigsberg has uploaded a new change for review.
Change subject: fencing: use deathSignal to keep scripts at bay
......................................................................
fencing: use deathSignal to keep scripts at bay
Before this patch, the fencing code used a knowingly-raceful code to
wait for pending fencing scripts, and kill them when Vdsm exited.
This patch makes sure that deathSignal is sent to pending scripts if
Vdsm exits or crashes.
We require a newer version of python-cpopen that raises a proper
exception when an executable is missing.
Change-Id: I068acf5ca0ad0813aaef660770de79958bd0b763
Signed-off-by: Dan Kenigsberg <danken(a)redhat.com>
---
M vdsm.spec.in
M vdsm/API.py
2 files changed, 24 insertions(+), 36 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/97/22997/1
diff --git a/vdsm.spec.in b/vdsm.spec.in
index d42b1b9..2d3a1b4 100644
--- a/vdsm.spec.in
+++ b/vdsm.spec.in
@@ -104,7 +104,7 @@
Requires: python-netaddr
Requires: python-inotify
Requires: python-argparse
-Requires: python-cpopen >= 1.2.3-4
+Requires: python-cpopen >= 1.2.4
Requires: python-ethtool >= 0.6-3
Requires: %{name}-python-zombiereaper = %{version}-%{release}
Requires: rpm-python
diff --git a/vdsm/API.py b/vdsm/API.py
index 3cf3133..f76bd7b 100644
--- a/vdsm/API.py
+++ b/vdsm/API.py
@@ -1078,26 +1078,15 @@
action can be one of (status, on, off, reboot)."""
@utils.traceback(on=self.log.name)
- def waitForPid(p, inp):
- """ Wait until p.pid exits. Kill it if vdsm exists before.
"""
- p.stdin.write(inp)
- p.stdin.close()
- while p.poll() is None:
- if not self._cif._enabled:
- self.log.debug('killing fence script pid %s', p.pid)
- os.kill(p.pid, signal.SIGTERM)
- time.sleep(1)
- try:
- # improbable race: p.pid may now belong to another
- # process
- os.kill(p.pid, signal.SIGKILL)
- except:
- pass
- return
- time.sleep(1)
- self.log.debug('rc %s inp %s out %s err %s', p.returncode,
- hidePasswd(inp),
- p.stdout.read(), p.stderr.read())
+ def fence(script, inp):
+ # non-status actions are sent asyncronously. deathSignal is set to
+ # make sure that no stray fencing scripts are left behind if Vdsm
+ # crashes.
+ rc, out, err = utils.execCmd(
+ [script], deathSignal=signal.SIGTERM,
+ data=inp)
+ self.log.debug('rc %s inp %s out %s err %s', rc,
+ hidePasswd(inp), out, err)
def hidePasswd(text):
cleantext = ''
@@ -1116,15 +1105,6 @@
script = constants.EXT_FENCE_PREFIX + agent
- try:
- p = subprocess.Popen([script], stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE, close_fds=True)
- except OSError as e:
- if e.errno == os.errno.ENOENT:
- return errCode['fenceAgent']
- raise
-
inp = ('agent=fence_%s\nipaddr=%s\nlogin=%s\noption=%s\n'
'passwd=%s\n') % (agent, addr, username, action, password)
if port != '':
@@ -1132,24 +1112,32 @@
if utils.tobool(secure):
inp += 'secure=yes\n'
inp += options
+
if action == 'status':
- out, err = p.communicate(inp)
- self.log.debug('rc %s in %s out %s err %s', p.returncode,
+ try:
+ rc, out, err = utils.execCmd(
+ [script], deathSignal=signal.SIGTERM,
+ data=inp)
+ except OSError as e:
+ if e.errno == os.errno.ENOENT:
+ return errCode['fenceAgent']
+ raise
+ self.log.debug('rc %s in %s out %s err %s', rc,
hidePasswd(inp), out, err)
- if not 0 <= p.returncode <= 2:
+ if not 0 <= rc <= 2:
return {'status': {'code': 1,
'message': out + err}}
message = doneCode['message']
- if p.returncode == 0:
+ if rc == 0:
power = 'on'
- elif p.returncode == 2:
+ elif rc == 2:
power = 'off'
else:
power = 'unknown'
message = out + err
return {'status': {'code': 0, 'message': message},
'power': power}
- threading.Thread(target=waitForPid, args=(p, inp)).start()
+ threading.Thread(target=fence, args=(script, inp)).start()
return {'status': doneCode}
def ping(self):
--
To view, visit
http://gerrit.ovirt.org/22997
To unsubscribe, visit
http://gerrit.ovirt.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I068acf5ca0ad0813aaef660770de79958bd0b763
Gerrit-PatchSet: 1
Gerrit-Project: vdsm
Gerrit-Branch: master
Gerrit-Owner: Dan Kenigsberg <danken(a)redhat.com>