Hello Dan Kenigsberg,
I'd like you to do a code review. Please visit
to review the following change.
Change subject: vm: fix _dom access when QEMU is dying ......................................................................
vm: fix _dom access when QEMU is dying
commit 8fedf8bde3c28edb07add23c3e9b72681cb48e49 introduced a tiny window for races between libvirt notifications (vm._onQemuDeath) and polling for stats from engine.
This is demonstrated by bz1073478, but it really boils down to how events are serialized. In this case, a stats request from engine is being answered while QEMU is being reported down and the _dom handle is being shut down, we end up with an uncaught exception.
This patch address the reported race between polling and qemu death. The window of vulnerability is (supposed to be) tiny and the issue itself is hard to reproduce.
Sampling thread is theorically vulnerable to the a race of the same type, but this problem will be addressed in a separate patch.
Change-Id: Ibac9e75a484b228c67d5432b71e965bdf317b89c Bug-Url: https://bugzilla.redhat.com/1073478 Signed-off-by: Francesco Romani fromani@redhat.com Reviewed-on: http://gerrit.ovirt.org/26539 Reviewed-by: Dan Kenigsberg danken@redhat.com --- M tests/vmTests.py M vdsm/vm.py 2 files changed, 70 insertions(+), 2 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/24/27724/1
diff --git a/tests/vmTests.py b/tests/vmTests.py index e061d42..0a3d440 100644 --- a/tests/vmTests.py +++ b/tests/vmTests.py @@ -17,18 +17,22 @@ # # Refer to the README and COPYING files for full details of the license # +from contextlib import contextmanager import re import shutil import tempfile import xml.etree.ElementTree as ET
+import libvirt + import vm from vdsm import constants from testrunner import VdsmTestCase as TestCaseBase +from testrunner import namedTemporaryDir import caps from vdsm import utils from vdsm import libvirtconnection -from monkeypatch import MonkeyPatch +from monkeypatch import MonkeyPatch, MonkeyPatchScope from vmTestsData import CONF_TO_DOMXML_X86_64 from vmTestsData import CONF_TO_DOMXML_PPC64
@@ -36,6 +40,11 @@ class ConnectionMock: def domainEventRegisterAny(self, *arg): pass + + +class FakeDomain: + def info(self): + raise libvirt.libvirtError(defmsg='')
class TestVm(TestCaseBase): @@ -655,3 +664,56 @@ lambda: "fc25cbbe-5520-4f83-b82e-1541914753d9") def testBuildCmdLinePPC64(self): self.assertBuildCmdLine(CONF_TO_DOMXML_PPC64) + + +class FakeGuestAgent(object): + def getGuestInfo(self): + return { + 'username': 'Unknown', + 'session': 'Unknown', + 'memUsage': 0, + 'appsList': [], + 'guestIPs': [], + 'guestFQDN': '', + 'disksUsage': [], + 'netIfaces': [], + 'memoryStats': {}, + 'guestCPUCount': -1} + + +@contextmanager +def FakeVM(params=None, devices=None): + with namedTemporaryDir() as tmpDir: + with MonkeyPatchScope([(constants, 'P_VDSM_RUN', tmpDir + '/'), + (libvirtconnection, 'get', + lambda x: ConnectionMock())]): + vmParams = {'vmId': 'TESTING'} + vmParams.update({} if params is None else params) + fake = vm.Vm(None, vmParams) + fake.guestAgent = FakeGuestAgent() + fake.conf['devices'] = [] if devices is None else devices + yield fake + + +class TestVmStatsThread(TestCaseBase): + VM_PARAMS = {'displayPort': -1, 'displaySecurePort': -1, + 'display': 'qxl', 'displayIp': '127.0.0.1', + 'vmType': 'kvm', 'memSize': 1024} + + DEV_BALLOON = [{'type': 'balloon', 'specParams': {'model': 'virtio'}}] + + def testGetStatsNoDom(self): + # bz1073478 - main case + with FakeVM(self.VM_PARAMS, self.DEV_BALLOON) as fake: + self.assertEqual(fake._dom, None) + res = fake.getStats() + self.assertIn('balloonInfo', res) + self.assertIn('balloon_cur', res['balloonInfo']) + + def testGetStatsDomInfoFail(self): + # bz1073478 - extra case + with FakeVM(self.VM_PARAMS, self.DEV_BALLOON) as fake: + fake._dom = FakeDomain() + res = fake.getStats() + self.assertIn('balloonInfo', res) + self.assertIn('balloon_cur', res['balloonInfo']) diff --git a/vdsm/vm.py b/vdsm/vm.py index 6834c61..69f7cb4 100644 --- a/vdsm/vm.py +++ b/vdsm/vm.py @@ -4671,7 +4671,13 @@ max_mem = int(self.conf.get('memSize')) * 1024 min_mem = int(self.conf.get('memGuaranteedSize', '0')) * 1024 target_mem = dev.get('target', max_mem) - cur_mem = self._dom.info()[2] + try: + cur_mem = self._dom.info()[2] + except (AttributeError, libvirt.libvirtError): + # _dom may be None (race on shutdown) + self.log.exception( + 'failed to retrieve the balloon stats') + cur_mem = 0 return {'balloon_max': str(max_mem), 'balloon_cur': str(cur_mem), 'balloon_min': str(min_mem),