Adam Litke has uploaded a new change for review.
Change subject: storage: Fix abort race in SDM.copy_data
......................................................................
storage: Fix abort race in SDM.copy_data
The copy_data job supports aborting the qemuimg process. If such a
process has been started we kill it. If a process hasn't yet been
created we do nothing but place the job in aborting state. Later before
creating the qemuimg command we want to check if we have been asked to
abort. If so, raise ActionStopped instead of continuing.
Change-Id: I0c7d9aadb981e8b851a70c7ad0fdc0d75b46dc51
Signed-off-by: Adam Litke <alitke(a)redhat.com>
---
M tests/storage_sdm_copy_data_test.py
M vdsm/storage/sdm/api/copy_data.py
2 files changed, 44 insertions(+), 9 deletions(-)
git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/50/65150/1
diff --git a/tests/storage_sdm_copy_data_test.py b/tests/storage_sdm_copy_data_test.py
index 6b116d5..0c6589d 100644
--- a/tests/storage_sdm_copy_data_test.py
+++ b/tests/storage_sdm_copy_data_test.py
@@ -21,6 +21,7 @@
import threading
from contextlib import contextmanager
+from functools import partial
from fakelib import FakeScheduler
from monkeypatch import MonkeyPatchScope
@@ -285,6 +286,35 @@
self.assertEqual(sc.ILLEGAL_VOL, dst_vol.getLegality())
self.assertEqual(gen_id, dst_vol.getMetaParam(sc.GENERATION))
+ @permutations((('file',), ('block',)))
+ def test_abort_before_copy(self, env_type):
+ def _fake_run(job_instance, real_run):
+ job_instance._status = jobs.STATUS.ABORTING
+ real_run()
+
+ fmt = sc.RAW_FORMAT
+ with self.get_vols(env_type, fmt, fmt) as (src_chain, dst_chain):
+ src_vol = src_chain[0]
+ dst_vol = dst_chain[0]
+ gen_id = dst_vol.getMetaParam(sc.GENERATION)
+ source = dict(endpoint_type='div', sd_id=src_vol.sdUUID,
+ img_id=src_vol.imgUUID, vol_id=src_vol.volUUID,
+ generation=0)
+ dest = dict(endpoint_type='div', sd_id=dst_vol.sdUUID,
+ img_id=dst_vol.imgUUID, vol_id=dst_vol.volUUID,
+ generation=gen_id)
+ fake_convert = FakeQemuConvertChecker(src_vol, dst_vol,
+ error=RuntimeError)
+ with MonkeyPatchScope([(qemuimg, 'convert', fake_convert)]):
+ job_id = make_uuid()
+ job = storage.sdm.api.copy_data.Job(job_id, 0, source, dest)
+ # Simulate status changing to aborting right after _run called
+ job._run = partial(_fake_run, job, job._run)
+ job.run()
+ self.assertEqual(jobs.STATUS.ABORTED, job.status)
+ self.assertEqual(sc.LEGAL_VOL, dst_vol.getLegality())
+ self.assertEqual(gen_id, dst_vol.getMetaParam(sc.GENERATION))
+
def test_wrong_generation(self):
fmt = sc.RAW_FORMAT
with self.get_vols('block', fmt, fmt) as (src_chain, dst_chain):
@@ -318,20 +348,23 @@
self.ready_event = threading.Event()
def __call__(self, *args, **kwargs):
- assert sc.LEGAL_VOL == self.src_vol.getLegality()
- assert sc.ILLEGAL_VOL == self.dst_vol.getLegality()
- return FakeQemuImgOperation(self.ready_event, self.wait_for_abort,
+ return FakeQemuImgOperation(self.src_vol, self.dst_vol,
+ self.ready_event, self.wait_for_abort,
self.error)
class FakeQemuImgOperation(object):
- def __init__(self, ready_event, wait_for_abort, error):
+ def __init__(self, src_vol, dst_vol, ready_event, wait_for_abort, error):
+ self.src_vol = src_vol
+ self.dst_vol = dst_vol
self.ready_event = ready_event
self.wait_for_abort = wait_for_abort
self.error = error
self.abort_event = threading.Event()
def start(self):
+ assert sc.LEGAL_VOL == self.src_vol.getLegality()
+ assert sc.ILLEGAL_VOL == self.dst_vol.getLegality()
self.ready_event.set()
def abort(self):
diff --git a/vdsm/storage/sdm/api/copy_data.py b/vdsm/storage/sdm/api/copy_data.py
index 3dc6fff..7182f49 100644
--- a/vdsm/storage/sdm/api/copy_data.py
+++ b/vdsm/storage/sdm/api/copy_data.py
@@ -26,6 +26,7 @@
from vdsm import jobs
from vdsm import properties
from vdsm import qemuimg
+from vdsm.common import exception
from vdsm.storage import constants as sc
from vdsm.storage import guarded
from vdsm.storage import workarounds
@@ -60,12 +61,9 @@
self._operation.abort()
def _run(self):
+ print "called: status=%r" % self.status
with guarded.context(self._source.locks + self._dest.locks):
with self._source.prepare(), self._dest.prepare():
- # Do not start copying if we have already been aborted
- if self._status == jobs.STATUS.ABORTED:
- return
-
# Workaround for volumes containing VM configuration info that
# were created with invalid vdsm metadata.
if self._source.is_invalid_vm_conf_disk():
@@ -74,7 +72,10 @@
src_format = self._source.qemu_format
dst_format = self._dest.qemu_format
- with self._dest.volume_operation():
+ with self._status_lock:
+ # Do not start copying if we have already been aborted
+ if self._status == jobs.STATUS.ABORTING:
+ raise exception.ActionStopped()
self._operation = qemuimg.convert(
self._source.path,
self._dest.path,
@@ -82,6 +83,7 @@
dstFormat=dst_format,
backing=self._dest.backing_path,
backingFormat=self._dest.backing_qemu_format)
+ with self._dest.volume_operation():
self._operation.start()
self._operation.wait_for_completion()
--
To view, visit
https://gerrit.ovirt.org/65150
To unsubscribe, visit
https://gerrit.ovirt.org/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I0c7d9aadb981e8b851a70c7ad0fdc0d75b46dc51
Gerrit-PatchSet: 1
Gerrit-Project: vdsm
Gerrit-Branch: master
Gerrit-Owner: Adam Litke <alitke(a)redhat.com>