Dan Kenigsberg has uploaded a new change for review.
Change subject: fencing: use deathSignal to keep scripts at bay ......................................................................
fencing: use deathSignal to keep scripts at bay
Before this patch, the fencing code used a knowingly-raceful code to wait for pending fencing scripts, and kill them when Vdsm exited. This patch makes sure that deathSignal is sent to pending scripts if Vdsm exits or crashes.
We require a newer version of python-cpopen that raises a proper exception when an executable is missing.
Change-Id: I068acf5ca0ad0813aaef660770de79958bd0b763 Signed-off-by: Dan Kenigsberg danken@redhat.com --- M vdsm.spec.in M vdsm/API.py 2 files changed, 24 insertions(+), 36 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/97/22997/1
diff --git a/vdsm.spec.in b/vdsm.spec.in index d42b1b9..2d3a1b4 100644 --- a/vdsm.spec.in +++ b/vdsm.spec.in @@ -104,7 +104,7 @@ Requires: python-netaddr Requires: python-inotify Requires: python-argparse -Requires: python-cpopen >= 1.2.3-4 +Requires: python-cpopen >= 1.2.4 Requires: python-ethtool >= 0.6-3 Requires: %{name}-python-zombiereaper = %{version}-%{release} Requires: rpm-python diff --git a/vdsm/API.py b/vdsm/API.py index 3cf3133..f76bd7b 100644 --- a/vdsm/API.py +++ b/vdsm/API.py @@ -1078,26 +1078,15 @@ action can be one of (status, on, off, reboot)."""
@utils.traceback(on=self.log.name) - def waitForPid(p, inp): - """ Wait until p.pid exits. Kill it if vdsm exists before. """ - p.stdin.write(inp) - p.stdin.close() - while p.poll() is None: - if not self._cif._enabled: - self.log.debug('killing fence script pid %s', p.pid) - os.kill(p.pid, signal.SIGTERM) - time.sleep(1) - try: - # improbable race: p.pid may now belong to another - # process - os.kill(p.pid, signal.SIGKILL) - except: - pass - return - time.sleep(1) - self.log.debug('rc %s inp %s out %s err %s', p.returncode, - hidePasswd(inp), - p.stdout.read(), p.stderr.read()) + def fence(script, inp): + # non-status actions are sent asyncronously. deathSignal is set to + # make sure that no stray fencing scripts are left behind if Vdsm + # crashes. + rc, out, err = utils.execCmd( + [script], deathSignal=signal.SIGTERM, + data=inp) + self.log.debug('rc %s inp %s out %s err %s', rc, + hidePasswd(inp), out, err)
def hidePasswd(text): cleantext = '' @@ -1116,15 +1105,6 @@
script = constants.EXT_FENCE_PREFIX + agent
- try: - p = subprocess.Popen([script], stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, close_fds=True) - except OSError as e: - if e.errno == os.errno.ENOENT: - return errCode['fenceAgent'] - raise - inp = ('agent=fence_%s\nipaddr=%s\nlogin=%s\noption=%s\n' 'passwd=%s\n') % (agent, addr, username, action, password) if port != '': @@ -1132,24 +1112,32 @@ if utils.tobool(secure): inp += 'secure=yes\n' inp += options + if action == 'status': - out, err = p.communicate(inp) - self.log.debug('rc %s in %s out %s err %s', p.returncode, + try: + rc, out, err = utils.execCmd( + [script], deathSignal=signal.SIGTERM, + data=inp) + except OSError as e: + if e.errno == os.errno.ENOENT: + return errCode['fenceAgent'] + raise + self.log.debug('rc %s in %s out %s err %s', rc, hidePasswd(inp), out, err) - if not 0 <= p.returncode <= 2: + if not 0 <= rc <= 2: return {'status': {'code': 1, 'message': out + err}} message = doneCode['message'] - if p.returncode == 0: + if rc == 0: power = 'on' - elif p.returncode == 2: + elif rc == 2: power = 'off' else: power = 'unknown' message = out + err return {'status': {'code': 0, 'message': message}, 'power': power} - threading.Thread(target=waitForPid, args=(p, inp)).start() + threading.Thread(target=fence, args=(script, inp)).start() return {'status': doneCode}
def ping(self):