Milan Zamazal has uploaded a new change for review.
Change subject: virt: Distinguish between switching to and entering post-copy migration ......................................................................
virt: Distinguish between switching to and entering post-copy migration
Switching to migration post-copy mode has two phases: Asking for switching to post-copy and actually switching to post-copy. The transition period should be short (if it is not then it is a QEMU/libvirt bug), so we needn't care about it much.
But on various checks we should distinguish between the post-copy request phase and the post-copy switch phase. For instance, we should block canceling migrations very early to avoid timing issues and possibly canceling a post-copy migration. On the other hand we should report a true VM state to Engine, so we can report post-copy only after the VM actually enters it.
Change-Id: I88dddeab84a609ebd3d5d9139724adff9cdb1352 Signed-off-by: Milan Zamazal mzamazal@redhat.com Bug-Url: https://bugzilla.redhat.com/1354343 --- M vdsm/virt/migration.py M vdsm/virt/vm.py 2 files changed, 29 insertions(+), 13 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/43/64143/7
diff --git a/vdsm/virt/migration.py b/vdsm/virt/migration.py index df478cd..2c73375 100644 --- a/vdsm/virt/migration.py +++ b/vdsm/virt/migration.py @@ -150,9 +150,9 @@ return self._mode == MODE_FILE
@property - def in_post_copy(self): + def post_copy_initiated(self): return (self._monitorThread is not None and - self._monitorThread.in_post_copy) + self._monitorThread.post_copy_initiated)
def getStat(self): """ @@ -644,7 +644,7 @@ self.progress = None self._conv_schedule = conv_schedule self._use_conv_schedule = use_conv_schedule - self._in_post_copy = False + self._post_copy_initiated = False self.downtime_thread = _FakeThreadInterface() self._thread = concurrent.thread(self.run)
@@ -659,8 +659,8 @@ return MonitorThread._MIGRATION_MONITOR_INTERVAL > 0
@property - def in_post_copy(self): - return self._in_post_copy + def post_copy_initiated(self): + return self._post_copy_initiated
@utils.traceback() def run(self): @@ -709,7 +709,7 @@ progress = Progress.from_job_stats(job_stats)
now = time.time() - if self._in_post_copy: + if self._post_copy_initiated: # Post-copy mode is a final state of a migration -- it either # completes or fails and stops the VM, there is no way to # continue with the migration in either case. So we won't @@ -718,8 +718,15 @@ # abort action after the post-copy action in the schedule, for # the case when it's not possible to switch to the post-copy # mode for some reason. - self._vm.log.debug('Post-copy migration still in progress: %d', - progress.data_remaining) + if self._vm.in_post_copy: + # If in_post-copy flag is false then we are in the interim + # phase (which should be short) between initiating the + # post-copy migration and the actual start of the post-copy + # migration. Nothing needs to be done in that case. + self._vm.log.debug( + 'Post-copy migration still in progress: %d', + progress.data_remaining + ) elif not self._use_conv_schedule and\ (0 < migrationMaxTime < now - self._startTime): self._vm.log.warn('The migration took %d seconds which is ' @@ -741,7 +748,7 @@ ' Refer to RHBZ#919201.', progress.data_remaining / Mbytes, lowmark / Mbytes)
- if not self._in_post_copy and\ + if not self._post_copy_initiated and\ lastDataRemaining is not None and\ lastDataRemaining < progress.data_remaining: iterationCount += 1 @@ -802,8 +809,8 @@ vm.log.info('Switching to post-copy migration') ret = vm.switch_migration_to_post_copy() if ret >= 0: - self._in_post_copy = True - if not self._in_post_copy: + self._post_copy_initiated = True + if not self._post_copy_initiated: # Do nothing for now; the next action will be invoked after # a while vm.log.warn('Failed to switch to post-copy migration') diff --git a/vdsm/virt/vm.py b/vdsm/virt/vm.py index 0f2ece8..f806c45 100644 --- a/vdsm/virt/vm.py +++ b/vdsm/virt/vm.py @@ -309,6 +309,7 @@ self._numaInfo = {} self._vmJobs = None self._clientPort = '' + self._in_post_copy = False
@property def start_time(self): @@ -317,6 +318,10 @@ @property def domain(self): return self._domain + + @property + def in_post_copy(self): + return self._in_post_copy
def _get_lastStatus(self): # note that we don't use _statusLock here. One of the reasons is the @@ -1199,7 +1204,7 @@ if self.lastStatus == vmstatus.DOWN: stats.update(self._getDownVmStats()) else: - if self.isMigrating() and self._migrationSourceThread.in_post_copy: + if self.isMigrating() and self._in_post_copy: # Stats are on the destination during post-copy migration, # except for migration progress, which is always on the source. stats['migrationProgress'] = self._get_vm_migration_progress() @@ -1428,7 +1433,8 @@
def migrateCancel(self): with self._post_copy_lock: - if self._migrationSourceThread.in_post_copy: + if self._migrationSourceThread.post_copy_initiated or \ + self._in_post_copy: return response.error( 'migCancelErr', message=("Can't cancel migration in post-copy mode") @@ -4181,6 +4187,9 @@ pass else: hooks.after_vm_pause(domxml, self.conf) + elif detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY: + self._in_post_copy = True + self.log.debug("Migration entered post-copy mode") elif detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY_FAILED: # This can happen on both the ends of the migration. # After a failed post-copy migration, the VM remains in a