Francesco Romani has uploaded a new change for review.
Change subject: virt: stats: move VmStatsThread away from vm.py ......................................................................
virt: stats: move VmStatsThread away from vm.py
This patch moves VmStatsThread from vm.py to sampling.py where it fits better.
isVdsmImage is moved to utils.py to accomodate the needs of both sampling.py and vm.py.
Change-Id: Icd18288b94c7593ddd2a5e6a1314b6be7a7d8f92 Signed-off-by: Francesco Romani fromani@redhat.com --- M lib/vdsm/utils.py M tests/vmTests.py M vdsm/virt/sampling.py M vdsm/virt/vm.py 4 files changed, 283 insertions(+), 271 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/92/26792/1
diff --git a/lib/vdsm/utils.py b/lib/vdsm/utils.py index cd178da..c80341f 100644 --- a/lib/vdsm/utils.py +++ b/lib/vdsm/utils.py @@ -105,6 +105,18 @@ return stat.S_ISBLK(os.stat(path).st_mode)
+def isVdsmImage(drive): + """ + Tell if drive looks like a vdsm image + + :param drive: drive to check + :type drive: dict or vm.Drive + :return: bool + """ + required = ('domainID', 'imageID', 'poolID', 'volumeID') + return all(k in drive for k in required) + + def touchFile(filePath): """ http://www.unix.com/man-page/POSIX/1posix/touch/ diff --git a/tests/vmTests.py b/tests/vmTests.py index 17967c2..44ef984 100644 --- a/tests/vmTests.py +++ b/tests/vmTests.py @@ -25,6 +25,7 @@ import tempfile import xml.etree.ElementTree as ET
+from virt import sampling from virt import vm from virt import vmexitreason from vdsm import constants @@ -860,7 +861,7 @@ GBPS = 10 ** 9 / 8 MAC = '52:54:00:59:F5:3F' with FakeVM() as fake: - mock_stats_thread = vm.VmStatsThread(fake) + mock_stats_thread = sampling.VmStatsThread(fake) res = mock_stats_thread._getNicStats( name='vnettest', model='virtio', mac=MAC, start_sample=(2 ** 64 - 15 * GBPS, 1, 2, 3, 0, 4, 5, 6), diff --git a/vdsm/virt/sampling.py b/vdsm/virt/sampling.py index 389bb65..7df9b36 100644 --- a/vdsm/virt/sampling.py +++ b/vdsm/virt/sampling.py @@ -33,10 +33,14 @@ import errno import ethtool
+import libvirt + from vdsm import utils from vdsm import netinfo -from vdsm.ipwrapper import getLinks +from vdsm.config import config from vdsm.constants import P_VDSM_RUN +from vdsm.ipwrapper import getLinks +from vdsm.utils import isVdsmImage
_THP_STATE_PATH = '/sys/kernel/mm/transparent_hugepage/enabled' if not os.path.exists(_THP_STATE_PATH): @@ -523,3 +527,259 @@ stats['txDropped'] = txDropped
return stats + + +class VmStatsThread(AdvancedStatsThread): + MBPS_TO_BPS = 10 ** 6 / 8 + + def __init__(self, vm): + AdvancedStatsThread.__init__(self, log=vm.log, daemon=True) + self._vm = vm + + self.highWrite = ( + AdvancedStatsFunction( + self._highWrite, + config.getint('vars', 'vm_watermark_interval'))) + self.updateVolumes = ( + AdvancedStatsFunction( + self._updateVolumes, + config.getint('irs', 'vol_size_sample_interval'))) + + self.sampleCpu = ( + AdvancedStatsFunction( + self._sampleCpu, + config.getint('vars', 'vm_sample_cpu_interval'), + config.getint('vars', 'vm_sample_cpu_window'))) + self.sampleDisk = ( + AdvancedStatsFunction( + self._sampleDisk, + config.getint('vars', 'vm_sample_disk_interval'), + config.getint('vars', 'vm_sample_disk_window'))) + self.sampleDiskLatency = ( + AdvancedStatsFunction( + self._sampleDiskLatency, + config.getint('vars', 'vm_sample_disk_latency_interval'), + config.getint('vars', 'vm_sample_disk_latency_window'))) + self.sampleNet = ( + AdvancedStatsFunction( + self._sampleNet, + config.getint('vars', 'vm_sample_net_interval'), + config.getint('vars', 'vm_sample_net_window'))) + + self.addStatsFunction( + self.highWrite, self.updateVolumes, self.sampleCpu, + self.sampleDisk, self.sampleDiskLatency, self.sampleNet) + + def _highWrite(self): + if not self._vm.isDisksStatsCollectionEnabled(): + # Avoid queries from storage during recovery process + return + self._vm.extendDrivesIfNeeded() + + def _updateVolumes(self): + if not self._vm.isDisksStatsCollectionEnabled(): + # Avoid queries from storage during recovery process + return + + for vmDrive in self._vm.getDiskDevices(): + self._vm.updateDriveVolume(vmDrive) + + def _sampleCpu(self): + cpuStats = self._vm._dom.getCPUStats(True, 0) + return cpuStats[0] + + def _sampleDisk(self): + diskSamples = {} + # Avoid queries from storage during recovery process + if self._vm.isDisksStatsCollectionEnabled(): + for vmDrive in self._vm.getDiskDevices(): + diskSamples[vmDrive.name] = self._vm._dom.blockStats( + vmDrive.name) + return diskSamples + + def _sampleDiskLatency(self): + # {'wr_total_times': 0L, 'rd_operations': 9638L, + # 'flush_total_times': 0L,'rd_total_times': 7622718001L, + # 'rd_bytes': 85172430L, 'flush_operations': 0L, + # 'wr_operations': 0L, 'wr_bytes': 0L} + diskLatency = {} + # Avoid queries from storage during recovery process + if self._vm.isDisksStatsCollectionEnabled(): + for vmDrive in self._vm.getDiskDevices(): + diskLatency[vmDrive.name] = self._vm._dom.blockStatsFlags( + vmDrive.name, flags=libvirt.VIR_TYPED_PARAM_STRING_OKAY) + return diskLatency + + def _sampleNet(self): + netSamples = {} + for nic in self._vm.getNicDevices(): + netSamples[nic.name] = self._vm._dom.interfaceStats(nic.name) + return netSamples + + def _diff(self, prev, curr, val): + return prev[val] - curr[val] + + def _usagePercentage(self, val, sampleInterval): + return 100 * val / sampleInterval / 1000 ** 3 + + def _getCpuStats(self, stats): + try: + sInfo, eInfo, sampleInterval = self.sampleCpu.getStats() + except NotEnoughSamplesError: + return + + try: + stats['cpuSys'] = self._usagePercentage( + self._diff(eInfo, sInfo, 'user_time') + + self._diff(eInfo, sInfo, 'system_time'), + sampleInterval) + stats['cpuUser'] = self._usagePercentage( + self._diff(eInfo, sInfo, 'cpu_time') + - self._diff(eInfo, sInfo, 'user_time') + - self._diff(eInfo, sInfo, 'system_time'), + sampleInterval) + + except (TypeError, ZeroDivisionError) as e: + self._log.debug("CPU stats not available: %s", e) + stats['cpuUser'] = 0.0 + stats['cpuSys'] = 0.0 + + @classmethod + def _getNicStats(cls, name, model, mac, + start_sample, end_sample, interval): + ifSpeed = [100, 1000][model in ('e1000', 'virtio')] + + ifStats = {'macAddr': mac, + 'name': name, + 'speed': str(ifSpeed), + 'state': 'unknown'} + + ifStats['rxErrors'] = str(end_sample[2]) + ifStats['rxDropped'] = str(end_sample[3]) + ifStats['txErrors'] = str(end_sample[6]) + ifStats['txDropped'] = str(end_sample[7]) + + ifRxBytes = (100.0 * + ((end_sample[0] - start_sample[0]) % 2 ** 32) / + interval / ifSpeed / cls.MBPS_TO_BPS) + ifTxBytes = (100.0 * + ((end_sample[4] - start_sample[4]) % 2 ** 32) / + interval / ifSpeed / cls.MBPS_TO_BPS) + + ifStats['rxRate'] = '%.1f' % ifRxBytes + ifStats['txRate'] = '%.1f' % ifTxBytes + + return ifStats + + def _getNetworkStats(self, stats): + stats['network'] = {} + try: + sInfo, eInfo, sampleInterval = self.sampleNet.getStats() + except NotEnoughSamplesError: + return + + for nic in self._vm.getNicDevices(): + if nic.name.startswith('hostdev'): + continue + + # may happen if nic is a new hot-plugged one + if nic.name not in sInfo or nic.name not in eInfo: + continue + + stats['network'][nic.name] = self._getNicStats( + nic.name, nic.nicModel, nic.macAddr, + sInfo[nic.name], eInfo[nic.name], sampleInterval) + + def _getDiskStats(self, stats): + try: + sInfo, eInfo, sampleInterval = self.sampleDisk.getStats() + except NotEnoughSamplesError: + return + + for vmDrive in self._vm.getDiskDevices(): + dName = vmDrive.name + dStats = {} + try: + dStats = {'truesize': str(vmDrive.truesize), + 'apparentsize': str(vmDrive.apparentsize)} + if isVdsmImage(vmDrive): + dStats['imageID'] = vmDrive.imageID + elif "GUID" in vmDrive: + dStats['lunGUID'] = vmDrive.GUID + dStats['readRate'] = ((eInfo[dName][1] - sInfo[dName][1]) / + sampleInterval) + dStats['writeRate'] = ((eInfo[dName][3] - sInfo[dName][3]) / + sampleInterval) + except (AttributeError, KeyError, TypeError, ZeroDivisionError): + self._log.debug("Disk %s stats not available", dName) + + stats[dName] = dStats + + def _getDiskLatency(self, stats): + try: + sInfo, eInfo, sampleInterval = self.sampleDiskLatency.getStats() + except NotEnoughSamplesError: + return + + def _avgLatencyCalc(sData, eData): + readLatency = (0 if not (eData['rd_operations'] - + sData['rd_operations']) + else (eData['rd_total_times'] - + sData['rd_total_times']) / + (eData['rd_operations'] - + sData['rd_operations'])) + writeLatency = (0 if not (eData['wr_operations'] - + sData['wr_operations']) + else (eData['wr_total_times'] - + sData['wr_total_times']) / + (eData['wr_operations'] - + sData['wr_operations'])) + flushLatency = (0 if not (eData['flush_operations'] - + sData['flush_operations']) + else (eData['flush_total_times'] - + sData['flush_total_times']) / + (eData['flush_operations'] - + sData['flush_operations'])) + return str(readLatency), str(writeLatency), str(flushLatency) + + for vmDrive in self._vm.getDeskDevices(): + dName = vmDrive.name + dLatency = {'readLatency': '0', + 'writeLatency': '0', + 'flushLatency': '0'} + try: + (dLatency['readLatency'], + dLatency['writeLatency'], + dLatency['flushLatency']) = _avgLatencyCalc(sInfo[dName], + eInfo[dName]) + except (KeyError, TypeError): + self._log.debug("Disk %s latency not available", dName) + else: + stats[dName].update(dLatency) + + def get(self): + stats = {} + + try: + stats['statsAge'] = time.time() - self.getLastSampleTime() + except TypeError: + self._log.debug("Stats age not available") + stats['statsAge'] = -1.0 + + self._getCpuStats(stats) + self._getNetworkStats(stats) + self._getDiskStats(stats) + self._getDiskLatency(stats) + + return stats + + def handleStatsException(self, ex): + # We currently handle only libvirt exceptions + if not hasattr(ex, "get_error_code"): + return False + + # We currently handle only the missing domain exception + if ex.get_error_code() != libvirt.VIR_ERR_NO_DOMAIN: + return False + + return True diff --git a/vdsm/virt/vm.py b/vdsm/virt/vm.py index 42025c0..92a179e 100644 --- a/vdsm/virt/vm.py +++ b/vdsm/virt/vm.py @@ -44,6 +44,7 @@ from vdsm.config import config from vdsm.define import ERROR, NORMAL, doneCode, errCode from vdsm.netinfo import DUMMY_BRIDGE +from vdsm.utils import isVdsmImage from storage import outOfProcess as oop from storage import sd from storage import fileUtils @@ -85,18 +86,6 @@ CONSOLE_DEVICES = 'console' SMARTCARD_DEVICES = 'smartcard' TPM_DEVICES = 'tpm' - - -def isVdsmImage(drive): - """ - Tell if drive looks like a vdsm image - - :param drive: drive to check - :type drive: dict or vm.Drive - :return: bool - """ - required = ('domainID', 'imageID', 'poolID', 'volumeID') - return all(k in drive for k in required)
def _filterSnappableDiskDevices(diskDeviceXmlElements): @@ -165,262 +154,6 @@
class UpdatePortMirroringError(Exception): pass - - -class VmStatsThread(sampling.AdvancedStatsThread): - MBPS_TO_BPS = 10 ** 6 / 8 - - def __init__(self, vm): - sampling.AdvancedStatsThread.__init__(self, log=vm.log, daemon=True) - self._vm = vm - - self.highWrite = ( - sampling.AdvancedStatsFunction( - self._highWrite, - config.getint('vars', 'vm_watermark_interval'))) - self.updateVolumes = ( - sampling.AdvancedStatsFunction( - self._updateVolumes, - config.getint('irs', 'vol_size_sample_interval'))) - - self.sampleCpu = ( - sampling.AdvancedStatsFunction( - self._sampleCpu, - config.getint('vars', 'vm_sample_cpu_interval'), - config.getint('vars', 'vm_sample_cpu_window'))) - self.sampleDisk = ( - sampling.AdvancedStatsFunction( - self._sampleDisk, - config.getint('vars', 'vm_sample_disk_interval'), - config.getint('vars', 'vm_sample_disk_window'))) - self.sampleDiskLatency = ( - sampling.AdvancedStatsFunction( - self._sampleDiskLatency, - config.getint('vars', 'vm_sample_disk_latency_interval'), - config.getint('vars', 'vm_sample_disk_latency_window'))) - self.sampleNet = ( - sampling.AdvancedStatsFunction( - self._sampleNet, - config.getint('vars', 'vm_sample_net_interval'), - config.getint('vars', 'vm_sample_net_window'))) - - self.addStatsFunction( - self.highWrite, self.updateVolumes, self.sampleCpu, - self.sampleDisk, self.sampleDiskLatency, self.sampleNet) - - def _highWrite(self): - if not self._vm.isDisksStatsCollectionEnabled(): - # Avoid queries from storage during recovery process - return - self._vm.extendDrivesIfNeeded() - - def _updateVolumes(self): - if not self._vm.isDisksStatsCollectionEnabled(): - # Avoid queries from storage during recovery process - return - - for vmDrive in self._vm._devices[DISK_DEVICES]: - self._vm.updateDriveVolume(vmDrive) - - def _sampleCpu(self): - cpuStats = self._vm._dom.getCPUStats(True, 0) - return cpuStats[0] - - def _sampleDisk(self): - diskSamples = {} - # Avoid queries from storage during recovery process - if self._vm.isDisksStatsCollectionEnabled(): - for vmDrive in self._vm._devices[DISK_DEVICES]: - diskSamples[vmDrive.name] = self._vm._dom.blockStats( - vmDrive.name) - return diskSamples - - def _sampleDiskLatency(self): - # {'wr_total_times': 0L, 'rd_operations': 9638L, - # 'flush_total_times': 0L,'rd_total_times': 7622718001L, - # 'rd_bytes': 85172430L, 'flush_operations': 0L, - # 'wr_operations': 0L, 'wr_bytes': 0L} - diskLatency = {} - # Avoid queries from storage during recovery process - if self._vm.isDisksStatsCollectionEnabled(): - for vmDrive in self._vm._devices[DISK_DEVICES]: - diskLatency[vmDrive.name] = self._vm._dom.blockStatsFlags( - vmDrive.name, flags=libvirt.VIR_TYPED_PARAM_STRING_OKAY) - return diskLatency - - def _sampleNet(self): - netSamples = {} - for nic in self._vm._devices[NIC_DEVICES]: - netSamples[nic.name] = self._vm._dom.interfaceStats(nic.name) - return netSamples - - def _diff(self, prev, curr, val): - return prev[val] - curr[val] - - def _usagePercentage(self, val, sampleInterval): - return 100 * val / sampleInterval / 1000 ** 3 - - def _getCpuStats(self, stats): - try: - sInfo, eInfo, sampleInterval = self.sampleCpu.getStats() - except sampling.NotEnoughSamplesError: - return - - try: - stats['cpuSys'] = self._usagePercentage( - self._diff(eInfo, sInfo, 'user_time') + - self._diff(eInfo, sInfo, 'system_time'), - sampleInterval) - stats['cpuUser'] = self._usagePercentage( - self._diff(eInfo, sInfo, 'cpu_time') - - self._diff(eInfo, sInfo, 'user_time') - - self._diff(eInfo, sInfo, 'system_time'), - sampleInterval) - - except (TypeError, ZeroDivisionError) as e: - self._log.debug("CPU stats not available: %s", e) - stats['cpuUser'] = 0.0 - stats['cpuSys'] = 0.0 - - @classmethod - def _getNicStats(cls, name, model, mac, - start_sample, end_sample, interval): - ifSpeed = [100, 1000][model in ('e1000', 'virtio')] - - ifStats = {'macAddr': mac, - 'name': name, - 'speed': str(ifSpeed), - 'state': 'unknown'} - - ifStats['rxErrors'] = str(end_sample[2]) - ifStats['rxDropped'] = str(end_sample[3]) - ifStats['txErrors'] = str(end_sample[6]) - ifStats['txDropped'] = str(end_sample[7]) - - ifRxBytes = (100.0 * - ((end_sample[0] - start_sample[0]) % 2 ** 32) / - interval / ifSpeed / cls.MBPS_TO_BPS) - ifTxBytes = (100.0 * - ((end_sample[4] - start_sample[4]) % 2 ** 32) / - interval / ifSpeed / cls.MBPS_TO_BPS) - - ifStats['rxRate'] = '%.1f' % ifRxBytes - ifStats['txRate'] = '%.1f' % ifTxBytes - - return ifStats - - def _getNetworkStats(self, stats): - stats['network'] = {} - try: - sInfo, eInfo, sampleInterval = self.sampleNet.getStats() - except sampling.NotEnoughSamplesError: - return - - for nic in self._vm._devices[NIC_DEVICES]: - if nic.name.startswith('hostdev'): - continue - - # may happen if nic is a new hot-plugged one - if nic.name not in sInfo or nic.name not in eInfo: - continue - - stats['network'][nic.name] = self._getNicStats( - nic.name, nic.nicModel, nic.macAddr, - sInfo[nic.name], eInfo[nic.name], sampleInterval) - - def _getDiskStats(self, stats): - try: - sInfo, eInfo, sampleInterval = self.sampleDisk.getStats() - except sampling.NotEnoughSamplesError: - return - - for vmDrive in self._vm._devices[DISK_DEVICES]: - dName = vmDrive.name - dStats = {} - try: - dStats = {'truesize': str(vmDrive.truesize), - 'apparentsize': str(vmDrive.apparentsize)} - if isVdsmImage(vmDrive): - dStats['imageID'] = vmDrive.imageID - elif "GUID" in vmDrive: - dStats['lunGUID'] = vmDrive.GUID - dStats['readRate'] = ((eInfo[dName][1] - sInfo[dName][1]) / - sampleInterval) - dStats['writeRate'] = ((eInfo[dName][3] - sInfo[dName][3]) / - sampleInterval) - except (AttributeError, KeyError, TypeError, ZeroDivisionError): - self._log.debug("Disk %s stats not available", dName) - - stats[dName] = dStats - - def _getDiskLatency(self, stats): - try: - sInfo, eInfo, sampleInterval = self.sampleDiskLatency.getStats() - except sampling.NotEnoughSamplesError: - return - - def _avgLatencyCalc(sData, eData): - readLatency = (0 if not (eData['rd_operations'] - - sData['rd_operations']) - else (eData['rd_total_times'] - - sData['rd_total_times']) / - (eData['rd_operations'] - - sData['rd_operations'])) - writeLatency = (0 if not (eData['wr_operations'] - - sData['wr_operations']) - else (eData['wr_total_times'] - - sData['wr_total_times']) / - (eData['wr_operations'] - - sData['wr_operations'])) - flushLatency = (0 if not (eData['flush_operations'] - - sData['flush_operations']) - else (eData['flush_total_times'] - - sData['flush_total_times']) / - (eData['flush_operations'] - - sData['flush_operations'])) - return str(readLatency), str(writeLatency), str(flushLatency) - - for vmDrive in self._vm._devices[DISK_DEVICES]: - dName = vmDrive.name - dLatency = {'readLatency': '0', - 'writeLatency': '0', - 'flushLatency': '0'} - try: - (dLatency['readLatency'], - dLatency['writeLatency'], - dLatency['flushLatency']) = _avgLatencyCalc(sInfo[dName], - eInfo[dName]) - except (KeyError, TypeError): - self._log.debug("Disk %s latency not available", dName) - else: - stats[dName].update(dLatency) - - def get(self): - stats = {} - - try: - stats['statsAge'] = time.time() - self.getLastSampleTime() - except TypeError: - self._log.debug("Stats age not available") - stats['statsAge'] = -1.0 - - self._getCpuStats(stats) - self._getNetworkStats(stats) - self._getDiskStats(stats) - self._getDiskLatency(stats) - - return stats - - def handleStatsException(self, ex): - # We currently handle only libvirt exceptions - if not hasattr(ex, "get_error_code"): - return False - - # We currently handle only the missing domain exception - if ex.get_error_code() != libvirt.VIR_ERR_NO_DOMAIN: - return False - - return True
class TimeoutError(libvirt.libvirtError): @@ -2654,7 +2387,7 @@ return domxml.toxml()
def _initVmStats(self): - self._vmStats = VmStatsThread(self) + self._vmStats = sampling.VmStatsThread(self) self._vmStats.start() self._guestEventTime = self._startTime
@@ -4923,6 +4656,12 @@ hooks.before_vm_migrate_destination(srcDomXML, self.conf) return True
+ def getDiskDevices(self): + return self._devices[DISK_DEVICES] + + def getNicDevices(self): + return self._devices[NIC_DEVICES] +
# A little unrelated hack to make xml.dom.minidom.Document.toprettyxml() # not wrap Text node with whitespace.