Francesco Romani has uploaded a new change for review.
Change subject: sampling: vm: use bulk-stats-style sample format ......................................................................
sampling: vm: use bulk-stats-style sample format
Use the bulk stats style format for samples. This makes room for a followup patch which want to add the statistics gathering using bulk stats.
Since is data is just translated and not changed, there is no change in behaviour, but (maybe?) a small performance penalty.
Change-Id: Ic43b1ecb5e3503f22d3b45576310a169de852b47 Signed-off-by: Francesco Romani fromani@redhat.com --- M vdsm/numaUtils.py M vdsm/virt/vm.py 2 files changed, 119 insertions(+), 43 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/31/36731/1
diff --git a/vdsm/numaUtils.py b/vdsm/numaUtils.py index 6207816..1d6d970 100644 --- a/vdsm/numaUtils.py +++ b/vdsm/numaUtils.py @@ -92,9 +92,10 @@ def _getVcpuRuntimePinMap(sample): vCpuRuntimePinMap = {} if sample: - vCpuInfos = sample if sample is not None else [] - for vCpuInfo in vCpuInfos: - vCpuRuntimePinMap[vCpuInfo[0]] = vCpuInfo[3] + for idx in xrange(sample['vcpu.maximum']): + if 'vcpu.%d.state' % idx in sample: + vCpuRuntimePinMap[idx] = sample.get( + 'vcpu.%d.physical' % idx, -1) return vCpuRuntimePinMap
diff --git a/vdsm/virt/vm.py b/vdsm/virt/vm.py index 132f09f..27d6d0f 100644 --- a/vdsm/virt/vm.py +++ b/vdsm/virt/vm.py @@ -260,16 +260,16 @@
try: stats['cpuSys'] = self._usagePercentage( - self._diff(eInfo, sInfo, 'user_time') + - self._diff(eInfo, sInfo, 'system_time'), + self._diff(eInfo, sInfo, 'cpu.user') + + self._diff(eInfo, sInfo, 'cpu.system'), interval) stats['cpuUser'] = self._usagePercentage( - self._diff(eInfo, sInfo, 'cpu_time') - - self._diff(eInfo, sInfo, 'user_time') - - self._diff(eInfo, sInfo, 'system_time'), + self._diff(eInfo, sInfo, 'cpu.time') + - self._diff(eInfo, sInfo, 'cpu.user') + - self._diff(eInfo, sInfo, 'cpu.system'), interval)
- except (TypeError, ZeroDivisionError) as e: + except (KeyError, TypeError, ZeroDivisionError) as e: self._log.exception("CPU stats not available: %s", e)
def _getBalloonStats(self, stats, sample): @@ -292,7 +292,7 @@ 'balloon_max': str(max_mem), 'balloon_min': str( int(self._vm.conf.get('memGuaranteedSize', '0')) * 1024), - 'balloon_cur': str(sample), + 'balloon_cur': str(sample['balloon.current']), 'balloon_target': str(balloon_target) })
@@ -336,7 +336,9 @@
@classmethod def _getNicStats(cls, name, model, mac, - start_sample, end_sample, interval): + start_sample, start_index, + end_sample, end_index, + interval): ifSpeed = [100, 1000][model in ('e1000', 'virtio')]
ifStats = {'macAddr': mac, @@ -344,16 +346,22 @@ 'speed': str(ifSpeed), 'state': 'unknown'}
- ifStats['rxErrors'] = str(end_sample[2]) - ifStats['rxDropped'] = str(end_sample[3]) - ifStats['txErrors'] = str(end_sample[6]) - ifStats['txDropped'] = str(end_sample[7]) + ifStats['rxErrors'] = str(end_sample['net.%d.rx.errs' % end_index]) + ifStats['rxDropped'] = str(end_sample['net.%d.rx.drop' % end_index]) + ifStats['txErrors'] = str(end_sample['net.%d.tx.errs' % end_index]) + ifStats['txDropped'] = str(end_sample['net.%d.tx.drop' % end_index])
+ rxDelta = ( + end_sample['net.%d.rx.bytes' % end_index] - + start_sample['net.%d.rx.bytes' % start_index]) ifRxBytes = (100.0 * - ((end_sample[0] - start_sample[0]) % 2 ** 32) / + (rxDelta % 2 ** 32) / interval / ifSpeed / cls.MBPS_TO_BPS) + txDelta = ( + end_sample['net.%d.tx.bytes' % end_index] - + start_sample['net.%d.tx.bytes' % start_index]) ifTxBytes = (100.0 * - ((end_sample[4] - start_sample[4]) % 2 ** 32) / + (txDelta % 2 ** 32) / interval / ifSpeed / cls.MBPS_TO_BPS)
ifStats['rxRate'] = '%.1f' % ifRxBytes @@ -371,13 +379,14 @@ if nic.name.startswith('hostdev'): continue
+ sIdx = _findBulkStatIndex('net', nic.name, sInfo) + eIdx = _findBulkStatIndex('net', nic.name, eInfo) # may happen if nic is a new hot-plugged one - if nic.name not in sInfo or nic.name not in eInfo: + if sIdx is None or eIdx is None: continue - stats['network'][nic.name] = self._getNicStats( nic.name, nic.nicModel, nic.macAddr, - sInfo[nic.name], eInfo[nic.name], interval) + sInfo, sIdx, eInfo, eIdx, sampleInterval)
def _getDiskStats(self, stats, sInfo, eInfo, interval): for vmDrive in self._vm.getDiskDevices(): @@ -392,12 +401,14 @@ dStats['imageID'] = vmDrive.imageID elif "GUID" in vmDrive: dStats['lunGUID'] = vmDrive.GUID - if (sInfo and vmDrive.name in sInfo and - eInfo and vmDrive.name in eInfo): + sIdx = _findBulkStatIndex('block', vmDrive.name, sInfo) + eIdx = _findBulkStatIndex('block', vmDrive.name, eInfo) + if sIdx is not None and eIdx is not None: # will be None if sampled during recovery - dStats.update(_calcDiskRate(vmDrive, sInfo, eInfo, - interval)) - dStats.update(_calcDiskLatency(vmDrive, sInfo, eInfo)) + dStats.update(_calcDiskRate(sInfo, sIdx, + eInfo, eIdx, + sampleInterval)) + dStats.update(_calcDiskLatency(sInfo, sIdx, eInfo, eIdx))
except (AttributeError, TypeError, ZeroDivisionError): self._log.exception("Disk %s stats not available", @@ -712,22 +723,33 @@ return infos
def _getSampleCpu(self): - return self.sampleCpu.getStats() + stats = self.sampleCpu.getStats() + return { + 'cpu.user': stats['user_time'], + 'cpu.system': stats['system_time'], + 'cpu.time': stats['cpu_time']}
def _getSampleNet(self): - return self.sampleNet.getStats() + sInfo, eInfo, interval = self.sampleNet.getStats() + return (_translateNetSample(sInfo), + _translateNetSample(eInfo), + interval)
def _getSampleDisk(self): - return self.sampleDisk.getStats() + sInfo, eInfo, interval = self.sampleDisk.getStats() + return (_translateDiskSample(sInfo), + _translateDiskSample(eInfo), + interval)
def _getSampleBalloon(self): - return self.sampleBalloon.getLastSample() + return { + 'balloon.current': self.sampleBalloon.getLastSample()}
def _getSampleVmJobs(self): return self.sampleVmJobs.getLastSample()
def _getSampleVcpu(self): - return self.sampleVcpuPinning.getLastSample() + return _translateVcpuSample(self.sampleVcpuPinning.getLastSample())
def _getSampleCpuTune(self): return self.sampleCpuTune.getLastSample() @@ -744,33 +766,86 @@ return True
-def _calcDiskRate(vmDrive, sInfo, eInfo, sampleInterval): +def _findBulkStatIndex(group, value, stats, attr='name'): + if stats: + for idx in xrange(stats['%s.count' % group]): + if stats['%s.%d.%s' % (group, idx, attr)] == value: + return idx + return None + + +def _translateNetSample(oldInfo): + newInfo = {'net.count': len(oldInfo)} + for idx, (name, values) in enumerate(oldInfo.iteritems()): + newInfo['net.%d.name' % idx] = name + newInfo['net.%d.rx.bytes' % (idx)] = values[0] + newInfo['net.%d.rx.pkts' % (idx)] = values[1] + newInfo['net.%d.rx.errs' % (idx)] = values[2] + newInfo['net.%d.rx.drop' % (idx)] = values[3] + newInfo['net.%d.tx.bytes' % (idx)] = values[4] + newInfo['net.%d.tx.pkts' % (idx)] = values[5] + newInfo['net.%d.tx.errs' % (idx)] = values[6] + newInfo['net.%d.tx.drop' % (idx)] = values[7] + return newInfo + + +def _translateDiskSample(oldInfo): + newInfo = {'block.count': len(oldInfo)} + for idx, (name values) in enumerate(oldInfo.iteritems()): + newInfo['block.%d.name' % idx] = name + newInfo['block.%d.rd.times' % idx] = values['rd_total_times'] + newInfo['block.%d.wr.times' % idx] = values['wr_total_times'] + newInfo['block.%d.fl.times' % idx] = values['flush_total_times'] + newInfo['block.%d.rd.reqs' % idx] = values['rd_operations'] + newInfo['block.%d.wr.reqs' % idx] = values['wr_operations'] + newInfo['block.%d.fl.reqs' % idx] = values['flush_operations'] + newInfo['block.%d.rd.bytes' % idx] = values['rd_bytes'] + newInfo['block.%d.wr.bytes' % idx] = values['wr_bytes'] + return newInfo + + +def _translateVcpuSample(oldInfo): + newInfo = {'vcpu.current': len(oldInfo)} + vcpuMax = 0 + for (vcpuNum, vcpuState, vcpuTime, pcpuNum) in oldInfo: + vcpuMax = max(vcpuMax, vcpuNum) + newInfo['vcpu.%d.state' % vcpuNum] = vcpuState + newInfo['vcpu.%d.time' % vcpuNum] = vcpuTime + newInfo['vcpu.%d.physical' % vcpuNum] = pcpuNum + # yep, just guesswork + newInfo['vcpu.maximum'] = vcpuMax + return newInfo + + +def _calcDiskRate(sInfo, sIdx, eInfo, eIdx, sampleInterval): return { 'readRate': ( - (eInfo[vmDrive.name]['rd_bytes'] - - sInfo[vmDrive.name]['rd_bytes']) + (eInfo['block.%d.rd.bytes' % eIdx] - + sInfo['block.%d.rd.bytes' % sIdx]) / sampleInterval), 'writeRate': ( - (eInfo[vmDrive.name]['wr_bytes'] - - sInfo[vmDrive.name]['wr_bytes']) + (eInfo['block.%d.wr.bytes' % eIdx] - + sInfo['block.%d.wr.bytes' % sIdx]) / sampleInterval)}
-def _calcDiskLatency(vmDrive, sInfo, eInfo): - dname = vmDrive.name - +def _calcDiskLatency(sInfo, sIdx, eInfo, eIdx): def compute_latency(ltype): - ops = ltype + '_operations' - operations = eInfo[dname][ops] - sInfo[dname][ops] + ops = ltype + '.reqs' + operations = ( + eInfo['block.%d.%s' % (eIdx, ops)] - + sInfo['block.%d.%s' % (sIdx, ops)]) if not operations: return 0 - times = ltype + '_total_times' - elapsed_time = eInfo[dname][times] - sInfo[dname][times] + times = ltype + '.times' + elapsed_time = ( + eInfo['block.%d.%s' % (eIdx, times)] - + sInfo['block.%d.%s' % (sIdx, times)]) return elapsed_time / operations
return {'readLatency': str(compute_latency('rd')), 'writeLatency': str(compute_latency('wr')), - 'flushLatency': str(compute_latency('flush'))} + 'flushLatency': str(compute_latency('fl'))}
class TimeoutError(libvirt.libvirtError):