Francesco Romani has uploaded a new change for review.
Change subject: vm: graphdev: update the _devices field
......................................................................
vm: graphdev: update the _devices field
this patch let the Graphic Devices instances in _devices
be updated from the fresh data from libvirt.
Change-Id: Ie6a2133742a5515dc1077cd935ad7d1e37410e44
Signed-off-by: Francesco Romani <fromani(a)redhat.com>
---
M vdsm/virt/vm.py
1 file changed, 10 insertions(+), 3 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/32/27932/1
diff --git a/vdsm/virt/vm.py b/vdsm/virt/vm.py
index 2c4ad7e..e977ff3 100644
--- a/vdsm/virt/vm.py
+++ b/vdsm/virt/vm.py
@@ -4945,13 +4945,20 @@
updated = False
for gxml in graphicsXml:
graphicsType = gxml.getAttribute('type')
+ port = gxml.getAttribute('port')
+ tlsPort = gxml.getAttribute('tlsPort')
+
+ for d in self._devices[GRAPHICS_DEVICES]:
+ if d.device == graphicsType:
+ d.port = port
+ d.tlsPort = tlsPort
+ break
for dev in self.conf['devices']:
- if dev.get('device') == graphicsType:
- port = gxml.getAttribute('port')
+ if (dev.get('type') == GRAPHICS_DEVICES and
+ dev.get('device') == graphicsType):
if port:
dev['port'] = port
- tlsPort = gxml.getAttribute('tlsPort')
if tlsPort:
dev['tlsPort'] = tlsPort
if not updated:
--
To view, visit http://gerrit.ovirt.org/27932
To unsubscribe, visit http://gerrit.ovirt.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie6a2133742a5515dc1077cd935ad7d1e37410e44
Gerrit-PatchSet: 1
Gerrit-Project: vdsm
Gerrit-Branch: master
Gerrit-Owner: Francesco Romani <fromani(a)redhat.com>
Yoav Kleinberger has uploaded a new change for review.
Change subject: tests: fix wrong use of assertions
......................................................................
tests: fix wrong use of assertions
Previously, the test used assertEquals for checking return code of
subprocesses spawned by the test itself. This resulted in a resported
test failure, when it should have reported an error.
To fix this, I replaced execCmd which does not belong in test code (we
should not use the vdsm code to write tests that check vdsm) with a new
testrunner.runCommand which is easier to use, and simplified the test.
Change-Id: I693923e6dcaa05bc1479db814c0b7696b3536c9c
Signed-off-by: Yoav Kleinberger <ykleinbe(a)redhat.com>
---
M tests/functional/storageTests.py
M tests/testrunner.py
2 files changed, 34 insertions(+), 20 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/24/28124/1
diff --git a/tests/functional/storageTests.py b/tests/functional/storageTests.py
index 80ba312..0a19776 100644
--- a/tests/functional/storageTests.py
+++ b/tests/functional/storageTests.py
@@ -32,6 +32,9 @@
from testrunner import VdsmTestCase as TestCaseBase
from testrunner import permutations, expandPermutations
from testrunner import TEMPDIR
+from testrunner import runCommand
+from testrunner import TestRunnerError
+
try:
import rtslib
except ImportError:
@@ -40,7 +43,6 @@
import storage.sd
import storage.storage_exception as se
import storage.volume
-from storage.misc import execCmd
from storage.mount import Mount
from vdsm.config import config
@@ -372,9 +374,7 @@
if "rtslib" not in globals().keys():
raise SkipTest("python-rtslib is not installed.")
- cmd = [_modprobe.cmd, "iscsi_target_mod"]
- rc, out, err = execCmd(cmd, sudo=True)
- asserts.assertEquals(rc, 0)
+ runCommand(_modprobe.cmd, "iscsi_target_mod")
# mount the configfs for rtslib if it is not mounted
m = Mount('configfs', '/sys/kernel/config')
@@ -522,21 +522,15 @@
def exportNFS(path):
- rc, out, err = execCmd([_exportfs.cmd, '-o', 'rw,insecure,sync',
- '127.0.0.1:%s' % path])
- return rc
+ runCommand(_exportfs.cmd, '-o', 'rw,insecure,sync', '127.0.0.1:%s' % path)
def unexportNFS(path):
- rc, out, err = execCmd([_exportfs.cmd, '-u', '127.0.0.1:%s' % path])
- return rc
+ runCommand(_exportfs.cmd, '-u', '127.0.0.1:%s' % path)
def listNFS():
- rc, out, err = execCmd([_exportfs.cmd])
- if rc != 0:
- raise RuntimeError("Can not list NFS export: %s\n" % err)
- return out
+ return runCommand([_exportfs.cmd])
def cleanNFSLeftovers(pathPrefix):
@@ -545,10 +539,13 @@
for line in exports:
export = line.split(" ", 1)[0]
if fnmatch.fnmatch(export, pathPattern):
- if unexportNFS(export) == 0:
- shutil.rmtree(export, ignore_errors=True)
+ try:
+ unexportNFS(export)
+ except TestRunnerError as e:
+ logging.warning("Failed to unexport NFS entry %s: %s",
+ export, e)
else:
- logging.warning("Failed to unexport NFS entry %s", export)
+ shutil.rmtree(export, ignore_errors=True)
class NFSServer(BackendServer):
@@ -574,10 +571,8 @@
rollback.prependDefer(undo)
os.chown(path, uid, gid)
os.chmod(path, 0o775)
- self.asserts.assertEquals(0, exportNFS(path))
- undo = lambda path=path: self.asserts.assertEquals(
- 0, unexportNFS(path))
- rollback.prependDefer(undo)
+ exportNFS(path)
+ rollback.prependDefer(unexportNFS, path)
connections[uuid] = {'type': 'nfs',
'params': {'export': '127.0.0.1:%s' % path}}
diff --git a/tests/testrunner.py b/tests/testrunner.py
index ac0f90f..6f99a96 100644
--- a/tests/testrunner.py
+++ b/tests/testrunner.py
@@ -26,12 +26,21 @@
import shutil
import tempfile
from contextlib import contextmanager
+import subprocess
from vdsm import utils
from nose import config
from nose import core
from nose import result
+
+
+class TestRunnerError(Exception):
+ """
+ This is an error in the test itself (e.g. could not cleanup some temproray
+ file). It is not an error in the tested code.
+ """
+
# Monkey patch pthreading in necessary
if sys.version_info[0] == 2:
@@ -383,6 +392,16 @@
raise AssertionError(msg)
+def runCommand(*args):
+ process = subprocess.Popen(args, stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE)
+ output, error = process.communicate()
+ if process.returncode != 0:
+ raise TestRunnerError("subprocerss failed: returncode=%s: stderr=%r" %
+ (process.returncode, error))
+ return output
+
+
if __name__ == '__main__':
if "--help" in sys.argv:
print("testrunner options:\n"
--
To view, visit http://gerrit.ovirt.org/28124
To unsubscribe, visit http://gerrit.ovirt.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I693923e6dcaa05bc1479db814c0b7696b3536c9c
Gerrit-PatchSet: 1
Gerrit-Project: vdsm
Gerrit-Branch: master
Gerrit-Owner: Yoav Kleinberger <ykleinbe(a)redhat.com>
Nir Soffer has uploaded a new change for review.
Change subject: multipath: Fix devices rescanning
......................................................................
multipath: Fix devices rescanning
When rescanning devices, we used to update ISCSI devices, and then run
multipath to update device mapping. However, we never rescanned FC
devices.
Since commit dbf2089488 (Jul 9 2013) multipath call was change to use
the -r flag, forcing a reload of the device map. This was tested to fix
a case where new lun is created on the storage server, while a host was
connected, and the new device is not available when issuing the
getDeviceList command. According to a comment on gerrit, the change was
tested for ISCSI and FC storage types, but there is no documentation of
the testing procedure. The related bug has no information about what was
tested.
We currently have two bugs related multipath rescanning:
- Bug 1078879 tell us that invoking multipath with the -r flag sometimes
triggers a segfault in the multipathd daemon. In the bug, multipath
developer suggests that as long as multipathd daemon is running,
there is no need to invoke multipath to detect new devices, and
"multipath -r really isn't useful for much of anything".
- Bug 1071654 tell us that devices rescanning is broken on FC storage
domains (although the -r flag is used). The bug suggest to use
issue_lip sysfs api, which is also recommended in the RHEL storage
administration manual.
This patch removes the -r flag, which seem to be useless currently, and
adds the missing FC rescan using the recommended sysfs api.
To be on the safe side, I left the multipath call as it was since the
first multipath commit in 2009. We will work with kernel and multipath
developers further on removing this call if it is indeed unneeded.
Bug-Url: https://bugzilla.redhat.com/1078879
Bug-Url: https://bugzilla.redhat.com/1071654
Relates-to: http://gerrit.ovirt.org/17263
Change-Id: I7699504f9771232ee0b880f9c83a51fd5b90f40e
Signed-off-by: Nir Soffer <nsoffer(a)redhat.com>
---
M vdsm/storage/hba.py
M vdsm/storage/multipath.py
M vdsm/supervdsmServer
3 files changed, 34 insertions(+), 3 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/22/27122/1
diff --git a/vdsm/storage/hba.py b/vdsm/storage/hba.py
index da3feef..050bec5 100644
--- a/vdsm/storage/hba.py
+++ b/vdsm/storage/hba.py
@@ -36,6 +36,31 @@
NODE_NAME = "node_name"
+def rescan():
+ """
+ Rescan HBAs connections, updating available devices.
+
+ This operation performs a Loop Initialization Protocol (LIP) and then
+ scans the interconnect and causes the SCSI layer to be updated to reflect
+ the devices currently on the bus. A LIP is, essentially, a bus reset, and
+ will cause device addition and removal.
+
+ Bear in mind that issue_lip is an asynchronous operation. The command may
+ complete before the entire scan has completed.
+
+ Note: Must be executed as root.
+ TODO: Some drivers do not support this operation.
+ """
+ log.info("Rescanning HBAs")
+ for path in glob.glob(FC_HOST_MASK + '/issue_lip'):
+ log.debug("Issuing lip %s", path)
+ try:
+ with open(path, 'w') as f:
+ f.write('1')
+ except IOError as e:
+ logging.error("Error issuing lip: %s", e)
+
+
def getiSCSIInitiators():
"""
Get iSCSI initiator name from the default location.
diff --git a/vdsm/storage/multipath.py b/vdsm/storage/multipath.py
index 82d435d..ba98866 100644
--- a/vdsm/storage/multipath.py
+++ b/vdsm/storage/multipath.py
@@ -102,12 +102,12 @@
Should only be called from hsm._rescanDevices()
"""
- # First ask iSCSI to rescan all its sessions
+ # First rescan iSCSI and FCP connections
iscsi.rescan()
+ supervdsm.getProxy().hbaRescan()
# Now let multipath daemon pick up new devices
- cmd = [constants.EXT_MULTIPATH, "-r"]
- misc.execCmd(cmd, sudo=True)
+ misc.execCmd([constants.EXT_MULTIPATH], sudo=True)
def isEnabled():
diff --git a/vdsm/supervdsmServer b/vdsm/supervdsmServer
index ffcedb8..dc6f602 100755
--- a/vdsm/supervdsmServer
+++ b/vdsm/supervdsmServer
@@ -58,6 +58,8 @@
from network import sourceroutethread
from network.api import (addNetwork, delNetwork, editNetwork, setupNetworks,
setSafeNetworkConfig)
+
+from storage import hba
from network.tc import setPortMirroring, unsetPortMirroring
from storage.multipath import getScsiSerial as _getScsiSerial
from storage.iscsi import getDevIscsiInfo as _getdeviSCSIinfo
@@ -253,6 +255,10 @@
return setSafeNetworkConfig()
@logDecorator
+ def hbaRescan(self):
+ return hba.rescan()
+
+ @logDecorator
def udevTrigger(self, guid):
self.__udevReloadRules(guid)
cmd = [EXT_UDEVADM, 'trigger', '--verbose', '--action', 'change',
--
To view, visit http://gerrit.ovirt.org/27122
To unsubscribe, visit http://gerrit.ovirt.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I7699504f9771232ee0b880f9c83a51fd5b90f40e
Gerrit-PatchSet: 1
Gerrit-Project: vdsm
Gerrit-Branch: master
Gerrit-Owner: Nir Soffer <nsoffer(a)redhat.com>
Nir Soffer has uploaded a new change for review.
Change subject: domainMonitor: Extract domain monitoring methods
......................................................................
domainMonitor: Extract domain monitoring methods
The _domainMonitor method was extremely long and was doing too much.
This patch breaks it to multiple small methods to make it easier to
understand and modify.
One of the planned changes is to allow _domainMonitor to abort in the
middle if a monitor is stopped. Implementing this in the original method
would only make it worse.
This patch (hopefully) does change the behavior of the monitor.
Change-Id: I87ac6a82e560bc4360a3bc3f6f6fd94623678cc2
Signed-off-by: Nir Soffer <nsoffer(a)redhat.com>
---
M vdsm/storage/domainMonitor.py
1 file changed, 111 insertions(+), 73 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/14/27714/1
diff --git a/vdsm/storage/domainMonitor.py b/vdsm/storage/domainMonitor.py
index 8a50b4f..8057910 100644
--- a/vdsm/storage/domainMonitor.py
+++ b/vdsm/storage/domainMonitor.py
@@ -174,66 +174,20 @@
self.log.debug("Stopping domain monitor for %s", self.sdUUID)
- # If this is an ISO domain we didn't acquire the host id and releasing
- # it is superfluous.
- if self.domain and not self.isIsoDomain:
- try:
- self.domain.releaseHostId(self.hostId, unused=True)
- except:
- self.log.debug("Unable to release the host id %s for domain "
- "%s", self.hostId, self.sdUUID, exc_info=True)
+ if self._shouldReleaseHostId():
+ self._releaseHostId()
def _monitorDomain(self):
self.nextStatus.clear()
- if time() - self.lastRefresh > self.refreshTime:
- # Refreshing the domain object in order to pick up changes as,
- # for example, the domain upgrade.
- self.log.debug("Refreshing domain %s", self.sdUUID)
- sdCache.manuallyRemoveDomain(self.sdUUID)
- self.lastRefresh = time()
+ if self._shouldRefreshDomain():
+ self._refreshDomain()
try:
- # We should produce the domain inside the monitoring loop because
- # it might take some time and we don't want to slow down the thread
- # start (and anything else that relies on that as for example
- # updateMonitoringThreads). It also needs to be inside the loop
- # since it might fail and we want keep trying until we succeed or
- # the domain is deactivated.
- if self.domain is None:
- self.domain = sdCache.produce(self.sdUUID)
-
- if self.isIsoDomain is None:
- # The isIsoDomain assignment is delayed because the isoPrefix
- # discovery might fail (if the domain suddenly disappears) and
- # we could risk to never try to set it again.
- isIsoDomain = self.domain.isISO()
- if isIsoDomain:
- self.isoPrefix = self.domain.getIsoDomainImagesDir()
- self.isIsoDomain = isIsoDomain
-
- self.domain.selftest()
-
- self.nextStatus.readDelay = self.domain.getReadDelay()
-
- stats = self.domain.getStats()
- self.nextStatus.diskUtilization = (stats["disktotal"],
- stats["diskfree"])
-
- self.nextStatus.vgMdUtilization = (stats["mdasize"],
- stats["mdafree"])
-
- self.nextStatus.vgMdHasEnoughFreeSpace = stats["mdavalid"]
- self.nextStatus.vgMdFreeBelowThreashold = stats["mdathreshold"]
-
- masterStats = self.domain.validateMaster()
- self.nextStatus.masterValid = masterStats['valid']
- self.nextStatus.masterMounted = masterStats['mount']
-
- self.nextStatus.hasHostId = self.domain.hasHostId(self.hostId)
- self.nextStatus.isoPrefix = self.isoPrefix
- self.nextStatus.version = self.domain.getVersion()
-
+ self._initializeDomain()
+ self._checkDomain()
+ self._checkReadDelay()
+ self._collectStats()
except Exception as e:
self.log.error("Error while collecting domain %s monitoring "
"information", self.sdUUID, exc_info=True)
@@ -243,29 +197,113 @@
self.nextStatus.valid = (self.nextStatus.error is None)
if self._statusDidChange():
- self.log.debug("Domain %s changed its status to %s", self.sdUUID,
- "Valid" if self.nextStatus.valid else "Invalid")
-
- try:
- self.domainMonitor.onDomainStateChange.emit(
- self.sdUUID, self.nextStatus.valid)
- except:
- self.log.warn("Could not emit domain state change event",
- exc_info=True)
-
+ self._notifyStatusChanges()
self.firstChange = False
- # An ISO domain can be shared by multiple pools
- if (not self.isIsoDomain and self.nextStatus.valid
- and self.nextStatus.hasHostId is False):
- try:
- self.domain.acquireHostId(self.hostId, async=True)
- except:
- self.log.debug("Unable to issue the acquire host id %s "
- "request for domain %s", self.hostId,
- self.sdUUID, exc_info=True)
+ if self._shouldAcquireHostId():
+ self._acquireHostId()
self.status.update(self.nextStatus)
+ # Notifiying status changes
+
def _statusDidChange(self):
return self.firstChange or self.status.valid != self.nextStatus.valid
+
+ def _notifyStatusChanges(self):
+ self.log.debug("Domain %s changed its status to %s", self.sdUUID,
+ "Valid" if self.nextStatus.valid else "Invalid")
+ try:
+ self.domainMonitor.onDomainStateChange.emit(
+ self.sdUUID, self.nextStatus.valid)
+ except:
+ self.log.warn("Could not emit domain state change event",
+ exc_info=True)
+
+ # Refreshing domain
+
+ def _shouldRefreshDomain(self):
+ return time() - self.lastRefresh > self.refreshTime
+
+ def _refreshDomain(self):
+ # Refreshing the domain object in order to pick up changes as,
+ # for example, the domain upgrade.
+ self.log.debug("Refreshing domain %s", self.sdUUID)
+ sdCache.manuallyRemoveDomain(self.sdUUID)
+ self.lastRefresh = time()
+
+ # Deferred initialization
+
+ def _initializeDomain(self):
+ # We should produce the domain inside the monitoring loop because
+ # it might take some time and we don't want to slow down the thread
+ # start (and anything else that relies on that as for example
+ # updateMonitoringThreads). It also needs to be inside the loop
+ # since it might fail and we want keep trying until we succeed or
+ # the domain is deactivated.
+ if self.domain is None:
+ self.domain = sdCache.produce(self.sdUUID)
+
+ if self.isIsoDomain is None:
+ # The isIsoDomain assignment is delayed because the isoPrefix
+ # discovery might fail (if the domain suddenly disappears) and
+ # we could risk to never try to set it again.
+ isIsoDomain = self.domain.isISO()
+ if isIsoDomain:
+ self.isoPrefix = self.domain.getIsoDomainImagesDir()
+ self.isIsoDomain = isIsoDomain
+
+ # Collecting monitoring info
+
+ def _checkDomain(self):
+ self.domain.selftest()
+
+ def _checkReadDelay(self):
+ self.nextStatus.readDelay = self.domain.getReadDelay()
+
+ def _collectStats(self):
+ stats = self.domain.getStats()
+ self.nextStatus.diskUtilization = (stats["disktotal"],
+ stats["diskfree"])
+
+ self.nextStatus.vgMdUtilization = (stats["mdasize"],
+ stats["mdafree"])
+
+ self.nextStatus.vgMdHasEnoughFreeSpace = stats["mdavalid"]
+ self.nextStatus.vgMdFreeBelowThreashold = stats["mdathreshold"]
+
+ masterStats = self.domain.validateMaster()
+ self.nextStatus.masterValid = masterStats['valid']
+ self.nextStatus.masterMounted = masterStats['mount']
+
+ self.nextStatus.hasHostId = self.domain.hasHostId(self.hostId)
+ self.nextStatus.isoPrefix = self.isoPrefix
+ self.nextStatus.version = self.domain.getVersion()
+
+ # Managing host id
+
+ def _shouldAcquireHostId(self):
+ # An ISO domain can be shared by multiple pools
+ return (not self.isIsoDomain and
+ self.nextStatus.valid and
+ self.nextStatus.hasHostId is False)
+
+ def _shouldReleaseHostId(self):
+ # If this is an ISO domain we didn't acquire the host id and releasing
+ # it is superfluous.
+ return self.domain and not self.isIsoDomain
+
+ def _acquireHostId(self):
+ try:
+ self.domain.acquireHostId(self.hostId, async=True)
+ except:
+ self.log.debug("Unable to issue the acquire host id %s "
+ "request for domain %s", self.hostId,
+ self.sdUUID, exc_info=True)
+
+ def _releaseHostId(self):
+ try:
+ self.domain.releaseHostId(self.hostId, unused=True)
+ except:
+ self.log.debug("Unable to release the host id %s for domain "
+ "%s", self.hostId, self.sdUUID, exc_info=True)
--
To view, visit http://gerrit.ovirt.org/27714
To unsubscribe, visit http://gerrit.ovirt.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I87ac6a82e560bc4360a3bc3f6f6fd94623678cc2
Gerrit-PatchSet: 1
Gerrit-Project: vdsm
Gerrit-Branch: master
Gerrit-Owner: Nir Soffer <nsoffer(a)redhat.com>
Nir Soffer has uploaded a new change for review.
Change subject: domainMonitor: Stop domain monitors concurrently
......................................................................
domainMonitor: Stop domain monitors concurrently
Deactivating a host connected to lot of storage domains takes too much
time. This is caused by stopping each monitor thread and waiting until
the thread exists before stopping the next. Looking at the log, we can
see that while we stop one monitor thread, other monitor threads wake up
and continue to monitor their domains.
If you try to stop vdsm after moving a host to maintenance, vdsm will be
killed after 10 seconds. If some domain monitores id not exit, you will
have stale leases held by sanlock. This will lead to failure to upgrade
this host, and requires reboot.
This patch changes the way monitors are stopped, so we stop all monitors
without waiting, which will wake up all sleeping monitors at once, and
signal busy monitor to abort. Then we wait until all monitor threads
exit. To help a monitor thread exit soon as possible, we check if
monitor was stopped in key points in the _monitorDomain method.
I tested this patch on Fedora 19 machines connected to 30 storage
domains. With this patch, the time to complete disconnectStoragePool
dropped from 70 to 13 seconds. Most of this time is spent in stopping
monitoring. Stopping all 30 monitor threads took 10 milliseconds, and
waiting until monitors threads exist took 10 seconds.
Note that this change cannot guarantee that all monitor threads will
exit in a timely manner, since some of them may be blocked, reading from
inaccessible storage. But it does ensure that in the common case
monitors stop and exit immediately.
Change-Id: Ia12f137f1ed055316767e1a9384d8982720bc564
Signed-off-by: Nir Soffer <nsoffer(a)redhat.com>
---
M vdsm/storage/domainMonitor.py
M vdsm/storage/sp.py
2 files changed, 81 insertions(+), 33 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/73/27573/1
diff --git a/vdsm/storage/domainMonitor.py b/vdsm/storage/domainMonitor.py
index 8a50b4f..7b82777 100644
--- a/vdsm/storage/domainMonitor.py
+++ b/vdsm/storage/domainMonitor.py
@@ -104,9 +104,9 @@
# The domain should be added only after it succesfully started
self._domains[sdUUID] = domainThread
- def stopMonitoring(self, sdUUID):
+ def stopMonitoring(self, sdUUID, wait=True):
# The domain monitor issues events that might become raceful if
- # stopMonitoring doesn't stop until the thread exits.
+ # you don't wait until a monitor thread exit.
# Eg: when a domain is detached the domain monitor is stopped and
# the host id is released. If the monitor didn't actually exit it
# might respawn a new acquire host id.
@@ -114,17 +114,33 @@
try:
self._domains[sdUUID].stop()
except KeyError:
- return
+ self.log.warning("No monitor for %s", sdUUID)
+ else:
+ if wait:
+ self.waitForMonitor(sdUUID)
- del self._domains[sdUUID]
+ def waitForMonitor(self, sdUUID):
+ # If you are stopping multiple monitors, you want to invoke
+ # stopMonitoring with wait=False, and wait on them after all of the
+ # monitors are stopped.
+ self.log.info("Waiting for %s", sdUUID)
+ try:
+ self._domains[sdUUID].wait()
+ except KeyError:
+ self.log.warning("No monitor for %s", sdUUID)
+ else:
+ del self._domains[sdUUID]
def getStatus(self, sdUUID):
return self._domains[sdUUID].getStatus()
def close(self):
self.log.info("Stopping domain monitors")
- for sdUUID in self._domains.keys():
- self.stopMonitoring(sdUUID)
+ domains = self._domains.keys()
+ for sdUUID in domains:
+ self.stopMonitoring(sdUUID, wait=False)
+ for sdUUID in domains:
+ self.waitForMonitor(sdUUID)
class DomainMonitorThread(object):
@@ -152,10 +168,11 @@
def start(self):
self.thread.start()
- def stop(self, wait=True):
+ def stop(self):
self.stopEvent.set()
- if wait:
- self.thread.join()
+
+ def wait(self):
+ self.thread.join()
def getStatus(self):
return self.status.copy()
@@ -164,7 +181,7 @@
def _monitorLoop(self):
self.log.debug("Starting domain monitor for %s", self.sdUUID)
- while not self.stopEvent.is_set():
+ while self._running():
try:
self._monitorDomain()
except:
@@ -189,6 +206,11 @@
if time() - self.lastRefresh > self.refreshTime:
# Refreshing the domain object in order to pick up changes as,
# for example, the domain upgrade.
+
+ # Don't try to refresh if stopoped
+ if not self._running():
+ return
+
self.log.debug("Refreshing domain %s", self.sdUUID)
sdCache.manuallyRemoveDomain(self.sdUUID)
self.lastRefresh = time()
@@ -212,9 +234,21 @@
self.isoPrefix = self.domain.getIsoDomainImagesDir()
self.isIsoDomain = isIsoDomain
+ # Don't wait for storage if stopped
+ if not self._running():
+ return
+
self.domain.selftest()
+ # Don't wait for storage if stopped
+ if not self._running():
+ return
+
self.nextStatus.readDelay = self.domain.getReadDelay()
+
+ # I could have been stopped while waiting for storage
+ if not self._running():
+ return
stats = self.domain.getStats()
self.nextStatus.diskUtilization = (stats["disktotal"],
@@ -242,6 +276,10 @@
self.nextStatus.checkTime = time()
self.nextStatus.valid = (self.nextStatus.error is None)
+ # Do not emmit notifications if stopped
+ if not self._running():
+ return
+
if self._statusDidChange():
self.log.debug("Domain %s changed its status to %s", self.sdUUID,
"Valid" if self.nextStatus.valid else "Invalid")
@@ -254,6 +292,10 @@
exc_info=True)
self.firstChange = False
+
+ # Do not try to acquire host id if stopped
+ if not self._running():
+ return
# An ISO domain can be shared by multiple pools
if (not self.isIsoDomain and self.nextStatus.valid
@@ -269,3 +311,6 @@
def _statusDidChange(self):
return self.firstChange or self.status.valid != self.nextStatus.valid
+
+ def _running(self):
+ return not self.stopEvent.is_set()
diff --git a/vdsm/storage/sp.py b/vdsm/storage/sp.py
index 4a07c6a..7cd77ca 100644
--- a/vdsm/storage/sp.py
+++ b/vdsm/storage/sp.py
@@ -641,8 +641,11 @@
@unsecured
def stopMonitoringDomains(self):
- for sdUUID in self.domainMonitor.poolMonitoredDomains:
- self.domainMonitor.stopMonitoring(sdUUID)
+ domains = self.domainMonitor.poolMonitoredDomains
+ for sdUUID in domains:
+ self.domainMonitor.stopMonitoring(sdUUID, wait=False)
+ for sdUUID in domains:
+ self.domainMonitor.waitForMonitor(sdUUID)
return True
@unsecured
@@ -1389,29 +1392,29 @@
def updateMonitoringThreads(self):
# domain list it's list of sdUUID:status
# sdUUID1:status1,sdUUID2:status2,...
- activeDomains = self.getDomains(activeOnly=True)
- monitoredDomains = self.domainMonitor.poolMonitoredDomains
+ activeDomains = set(self.getDomains(activeOnly=True))
+ monitoredDomains = set(self.domainMonitor.poolMonitoredDomains)
- for sdUUID in monitoredDomains:
- if sdUUID not in activeDomains:
- try:
- self.domainMonitor.stopMonitoring(sdUUID)
- self.log.debug("Storage Pool `%s` stopped monitoring "
- "domain `%s`", self.spUUID, sdUUID)
- except se.StorageException:
- self.log.error("Unexpected error while trying to stop "
- "monitoring domain `%s`", sdUUID,
- exc_info=True)
+ monitorsToStop = monitoredDomains - activeDomains
- for sdUUID in activeDomains:
- if sdUUID not in monitoredDomains:
- try:
- self.domainMonitor.startMonitoring(sdUUID, self.id)
- self.log.debug("Storage Pool `%s` started monitoring "
- "domain `%s`", self.spUUID, sdUUID)
- except se.StorageException:
- self.log.error("Unexpected error while trying to monitor "
- "domain `%s`", sdUUID, exc_info=True)
+ for sdUUID in monitorsToStop:
+ self.domainMonitor.stopMonitoring(sdUUID, wait=False)
+
+ for sdUUID in monitorsToStop:
+ self.domainMonitor.waitForMonitor(sdUUID)
+ self.log.debug("Storage Pool `%s` stopped monitoring "
+ "domain `%s`", self.spUUID, sdUUID)
+
+ monitorsToStart = activeDomains - monitoredDomains
+
+ for sdUUID in monitorsToStart:
+ try:
+ self.domainMonitor.startMonitoring(sdUUID, self.id)
+ self.log.debug("Storage Pool `%s` started monitoring "
+ "domain `%s`", self.spUUID, sdUUID)
+ except se.StorageException:
+ self.log.error("Unexpected error while trying to monitor "
+ "domain `%s`", sdUUID, exc_info=True)
@unsecured
def getDomains(self, activeOnly=False):
--
To view, visit http://gerrit.ovirt.org/27573
To unsubscribe, visit http://gerrit.ovirt.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia12f137f1ed055316767e1a9384d8982720bc564
Gerrit-PatchSet: 1
Gerrit-Project: vdsm
Gerrit-Branch: master
Gerrit-Owner: Nir Soffer <nsoffer(a)redhat.com>
Maor Lipchuk has uploaded a new change for review.
Change subject: ifcfg: Log unhandled exception for new Thread
......................................................................
ifcfg: Log unhandled exception for new Thread
Ading a traceback log for unhandled exceptions,
when openning a new thread, so it will not die silently.
Since the log in ifcfg instance is inaccessible from the decorator,
we use the default root logger.
Change-Id: I2ce44b4586e85438898fcdcd2d62d80813caa5ba
Signed-off-by: Maor Lipchuk <mlipchuk(a)redhat.com>
---
M vdsm/netconf/ifcfg.py
1 file changed, 1 insertion(+), 0 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/85/23085/1
diff --git a/vdsm/netconf/ifcfg.py b/vdsm/netconf/ifcfg.py
index 52b95b1..8431e80 100644
--- a/vdsm/netconf/ifcfg.py
+++ b/vdsm/netconf/ifcfg.py
@@ -738,6 +738,7 @@
def ifup(iface, async=False):
"Bring up an interface"
+ @utils.traceback()
def _ifup(netIf):
rc, out, err = utils.execCmd([constants.EXT_IFUP, netIf], raw=False)
--
To view, visit http://gerrit.ovirt.org/23085
To unsubscribe, visit http://gerrit.ovirt.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I2ce44b4586e85438898fcdcd2d62d80813caa5ba
Gerrit-PatchSet: 1
Gerrit-Project: vdsm
Gerrit-Branch: master
Gerrit-Owner: Maor Lipchuk <mlipchuk(a)redhat.com>