[openstack-nova] avoid and cater for missing libvirt instance images (#801791)

Pádraig Brady pbrady at fedoraproject.org
Fri Mar 16 23:35:12 UTC 2012


commit 5689fb32a4a76cd597e37f6f36d15695c8b64560
Author: Pádraig Brady <P at draigBrady.com>
Date:   Fri Mar 16 23:17:14 2012 +0000

    avoid and cater for missing libvirt instance images (#801791)
    
    Note the cause of the missing disk images may not be fixed
    (as documented in the second upstream bug referenced in
    the bug report).  But this does fix potential issues and
    works around the specific issue of the compute service
    not starting in this state.

 ...Fix-backing-file-cp-resize-race-condition.patch |   67 ++++++
 ...tomic-manipulation-of-libvirt-disk-images.patch |  218 ++++++++++++++++++++
 ...ompute-service-to-start-with-missing-libv.patch |   77 +++++++
 openstack-nova.spec                                |    9 +-
 4 files changed, 370 insertions(+), 1 deletions(-)
---
diff --git a/0009-Fix-backing-file-cp-resize-race-condition.patch b/0009-Fix-backing-file-cp-resize-race-condition.patch
new file mode 100644
index 0000000..561aa6f
--- /dev/null
+++ b/0009-Fix-backing-file-cp-resize-race-condition.patch
@@ -0,0 +1,67 @@
+From 32505c980956003214dfacbe363e3e020e635076 Mon Sep 17 00:00:00 2001
+From: Anthony Young <sleepsonthefloor at gmail.com>
+Date: Tue, 13 Mar 2012 14:44:31 -0700
+Subject: [PATCH] Fix backing file cp/resize race condition.
+
+ * Fixes bug 953831
+
+Change-Id: I39950b16c9b76159b16203f7b8b504cae5475516
+---
+ nova/virt/libvirt/connection.py |   37 +++++++++++++++++++++----------------
+ 1 files changed, 21 insertions(+), 16 deletions(-)
+
+diff --git a/nova/virt/libvirt/connection.py b/nova/virt/libvirt/connection.py
+index 2785a5d..6626ff6 100644
+--- a/nova/virt/libvirt/connection.py
++++ b/nova/virt/libvirt/connection.py
+@@ -46,6 +46,7 @@ import multiprocessing
+ import os
+ import shutil
+ import sys
++import time
+ import uuid
+ 
+ from eventlet import greenthread
+@@ -1005,22 +1006,26 @@ class LibvirtConnection(driver.ComputeDriver):
+                 # For raw it's quicker to just generate outside the cache
+                 call_if_not_exists(target, fn, *args, **kwargs)
+ 
+-            if cow:
+-                cow_base = base
+-                if size:
+-                    size_gb = size / (1024 * 1024 * 1024)
+-                    cow_base += "_%d" % size_gb
+-                    if not os.path.exists(cow_base):
+-                        libvirt_utils.copy_image(base, cow_base)
+-                        disk.extend(cow_base, size)
+-                libvirt_utils.create_cow_image(cow_base, target)
+-            elif not generating:
+-                libvirt_utils.copy_image(base, target)
+-                # Resize after the copy, as it's usually much faster
+-                # to make sparse updates, rather than potentially
+-                # naively copying the whole image file.
+-                if size:
+-                    disk.extend(target, size)
++            @utils.synchronized(base)
++            def copy_and_extend(cow, generating, base, target, size):
++                if cow:
++                    cow_base = base
++                    if size:
++                        size_gb = size / (1024 * 1024 * 1024)
++                        cow_base += "_%d" % size_gb
++                        if not os.path.exists(cow_base):
++                            libvirt_utils.copy_image(base, cow_base)
++                            disk.extend(cow_base, size)
++                    libvirt_utils.create_cow_image(cow_base, target)
++                elif not generating:
++                    libvirt_utils.copy_image(base, target)
++                    # Resize after the copy, as it's usually much faster
++                    # to make sparse updates, rather than potentially
++                    # naively copying the whole image file.
++                    if size:
++                        disk.extend(target, size)
++
++            copy_and_extend(cow, generating, base, target, size)
+ 
+     @staticmethod
+     def _create_local(target, local_size, unit='G',
diff --git a/0010-ensure-atomic-manipulation-of-libvirt-disk-images.patch b/0010-ensure-atomic-manipulation-of-libvirt-disk-images.patch
new file mode 100644
index 0000000..3416fd2
--- /dev/null
+++ b/0010-ensure-atomic-manipulation-of-libvirt-disk-images.patch
@@ -0,0 +1,218 @@
+From 81ef3436c52f84816a341d19348a464412ce7dc9 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?P=C3=A1draig=20Brady?= <pbrady at redhat.com>
+Date: Fri, 16 Mar 2012 03:43:49 +0000
+Subject: [PATCH] ensure atomic manipulation of libvirt disk images
+
+This pattern could probably be used elsewhere,
+but only libvirt disk images are considered for now.
+This change ensures there are no stale files left
+anywhere in the path from glance, through the libvirt image cache.
+These could cause subsequent operational errors either
+directly or indirectly through disk wastage.
+
+* nova/utils.py: Add a new remove_on_error() context manager
+that is used to remove the passed PATH on a raised exception.
+* nova/virt/images.py: Ensure temporary downloaded and
+converted images are protected.
+* nova/virt/libvirt/connection.py: Ensure all the images in
+the image cache and instance dirs are protected.
+
+Change-Id: I81a5407665a6998128c0dee41387ef00ebddeb4d
+---
+ nova/utils.py                   |   21 +++++++++--
+ nova/virt/images.py             |   69 +++++++++++++++++----------------------
+ nova/virt/libvirt/connection.py |   16 ++++++---
+ 3 files changed, 57 insertions(+), 49 deletions(-)
+
+diff --git a/nova/utils.py b/nova/utils.py
+index a224b38..3ca6aa1 100644
+--- a/nova/utils.py
++++ b/nova/utils.py
+@@ -21,6 +21,7 @@
+ 
+ import contextlib
+ import datetime
++import errno
+ import functools
+ import hashlib
+ import inspect
+@@ -920,8 +921,8 @@ def cleanup_file_locks():
+             continue
+         try:
+             stat_info = os.stat(os.path.join(FLAGS.lock_path, filename))
+-        except OSError as (errno, strerror):
+-            if errno == 2:  # doesn't exist
++        except OSError as e:
++            if e.errno == errno.ENOENT:
+                 continue
+             else:
+                 raise
+@@ -940,8 +941,8 @@ def delete_if_exists(pathname):
+ 
+     try:
+         os.unlink(pathname)
+-    except OSError as (errno, strerror):
+-        if errno == 2:  # doesn't exist
++    except OSError as e:
++        if e.errno == errno.ENOENT:
+             return
+         else:
+             raise
+@@ -1247,6 +1248,18 @@ def logging_error(message):
+             LOG.exception(message)
+ 
+ 
++ at contextlib.contextmanager
++def remove_on_error(path):
++    """Protect code that wants to operate on PATH atomically.
++    Any exception will cause PATH to be removed.
++    """
++    try:
++        yield
++    except Exception:
++        with save_and_reraise_exception():
++            delete_if_exists(path)
++
++
+ def make_dev_path(dev, partition=None, base='/dev'):
+     """Return a path to a particular device.
+ 
+diff --git a/nova/virt/images.py b/nova/virt/images.py
+index 12510c7..4ae47d5 100644
+--- a/nova/virt/images.py
++++ b/nova/virt/images.py
+@@ -52,18 +52,10 @@ def fetch(context, image_href, path, _user_id, _project_id):
+     #             checked before we got here.
+     (image_service, image_id) = nova.image.get_image_service(context,
+                                                              image_href)
+-    try:
++    with utils.remove_on_error(path):
+         with open(path, "wb") as image_file:
+             metadata = image_service.get(context, image_id, image_file)
+-    except Exception:
+-        with utils.save_and_reraise_exception():
+-            try:
+-                os.unlink(path)
+-            except OSError, e:
+-                if e.errno != errno.ENOENT:
+-                    LOG.warn("unable to remove stale image '%s': %s" %
+-                             (path, e.strerror))
+-    return metadata
++            return metadata
+ 
+ 
+ def fetch_to_raw(context, image_href, path, user_id, project_id):
+@@ -86,37 +78,36 @@ def fetch_to_raw(context, image_href, path, user_id, project_id):
+ 
+         return(data)
+ 
+-    data = _qemu_img_info(path_tmp)
+-
+-    fmt = data.get("file format")
+-    if fmt is None:
+-        os.unlink(path_tmp)
+-        raise exception.ImageUnacceptable(
+-            reason=_("'qemu-img info' parsing failed."), image_id=image_href)
+-
+-    if "backing file" in data:
+-        backing_file = data['backing file']
+-        os.unlink(path_tmp)
+-        raise exception.ImageUnacceptable(image_id=image_href,
+-            reason=_("fmt=%(fmt)s backed by: %(backing_file)s") % locals())
+-
+-    if fmt != "raw" and FLAGS.force_raw_images:
+-        staged = "%s.converted" % path
+-        LOG.debug("%s was %s, converting to raw" % (image_href, fmt))
+-        out, err = utils.execute('qemu-img', 'convert', '-O', 'raw',
+-                                 path_tmp, staged)
+-        os.unlink(path_tmp)
+-
+-        data = _qemu_img_info(staged)
+-        if data.get('file format', None) != "raw":
+-            os.unlink(staged)
++    with utils.remove_on_error(path_tmp):
++        data = _qemu_img_info(path_tmp)
++
++        fmt = data.get("file format")
++        if fmt is None:
++            raise exception.ImageUnacceptable(
++                reason=_("'qemu-img info' parsing failed."),
++                image_id=image_href)
++
++        if "backing file" in data:
++            backing_file = data['backing file']
+             raise exception.ImageUnacceptable(image_id=image_href,
+-                reason=_("Converted to raw, but format is now %s") %
+-                data.get('file format', None))
++                reason=_("fmt=%(fmt)s backed by: %(backing_file)s") % locals())
++
++        if fmt != "raw" and FLAGS.force_raw_images:
++            staged = "%s.converted" % path
++            LOG.debug("%s was %s, converting to raw" % (image_href, fmt))
++            with utils.remove_on_error(staged):
++                out, err = utils.execute('qemu-img', 'convert', '-O', 'raw',
++                                         path_tmp, staged)
++
++                data = _qemu_img_info(staged)
++                if data.get('file format', None) != "raw":
++                    raise exception.ImageUnacceptable(image_id=image_href,
++                        reason=_("Converted to raw, but format is now %s") %
++                        data.get('file format', None))
+ 
+-        os.rename(staged, path)
++                os.rename(staged, path)
+ 
+-    else:
+-        os.rename(path_tmp, path)
++        else:
++            os.rename(path_tmp, path)
+ 
+     return metadata
+diff --git a/nova/virt/libvirt/connection.py b/nova/virt/libvirt/connection.py
+index 6626ff6..9a8b094 100644
+--- a/nova/virt/libvirt/connection.py
++++ b/nova/virt/libvirt/connection.py
+@@ -998,7 +998,8 @@ class LibvirtConnection(driver.ComputeDriver):
+             @utils.synchronized(fname)
+             def call_if_not_exists(base, fn, *args, **kwargs):
+                 if not os.path.exists(base):
+-                    fn(target=base, *args, **kwargs)
++                    with utils.remove_on_error(base):
++                        fn(target=base, *args, **kwargs)
+ 
+             if cow or not generating:
+                 call_if_not_exists(base, fn, *args, **kwargs)
+@@ -1014,8 +1015,9 @@ class LibvirtConnection(driver.ComputeDriver):
+                         size_gb = size / (1024 * 1024 * 1024)
+                         cow_base += "_%d" % size_gb
+                         if not os.path.exists(cow_base):
+-                            libvirt_utils.copy_image(base, cow_base)
+-                            disk.extend(cow_base, size)
++                            with utils.remove_on_error(cow_base):
++                                libvirt_utils.copy_image(base, cow_base)
++                                disk.extend(cow_base, size)
+                     libvirt_utils.create_cow_image(cow_base, target)
+                 elif not generating:
+                     libvirt_utils.copy_image(base, target)
+@@ -1025,7 +1027,8 @@ class LibvirtConnection(driver.ComputeDriver):
+                     if size:
+                         disk.extend(target, size)
+ 
+-            copy_and_extend(cow, generating, base, target, size)
++            with utils.remove_on_error(target):
++                copy_and_extend(cow, generating, base, target, size)
+ 
+     @staticmethod
+     def _create_local(target, local_size, unit='G',
+@@ -1185,8 +1188,9 @@ class LibvirtConnection(driver.ComputeDriver):
+                               project_id=instance['project_id'],)
+         elif config_drive:
+             label = 'config'
+-            self._create_local(basepath('disk.config'), 64, unit='M',
+-                               fs_format='msdos', label=label)  # 64MB
++            with utils.remove_on_error(basepath('disk.config')):
++                self._create_local(basepath('disk.config'), 64, unit='M',
++                                   fs_format='msdos', label=label)  # 64MB
+ 
+         if instance['key_data']:
+             key = str(instance['key_data'])
diff --git a/0011-allow-the-compute-service-to-start-with-missing-libv.patch b/0011-allow-the-compute-service-to-start-with-missing-libv.patch
new file mode 100644
index 0000000..f2fe217
--- /dev/null
+++ b/0011-allow-the-compute-service-to-start-with-missing-libv.patch
@@ -0,0 +1,77 @@
+From 012603b3f4494f571771514d51c3868cb0930ad6 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?P=C3=A1draig=20Brady?= <pbrady at redhat.com>
+Date: Fri, 16 Mar 2012 16:00:43 +0000
+Subject: [PATCH] allow the compute service to start with missing libvirt
+ disks
+
+* nova/virt/libvirt/connection.py: Program defensively and handle
+the case of missing instance disks and log the error rather than
+propagating that exception up (which triggers nova.service to fail).
+* Fixes bug 957110
+
+Change-Id: I1a414f56661843ff6b886e6ebf6f614fcb5a5f31
+---
+ nova/virt/libvirt/connection.py |   26 +++++++++++++++++---------
+ 1 files changed, 17 insertions(+), 9 deletions(-)
+
+diff --git a/nova/virt/libvirt/connection.py b/nova/virt/libvirt/connection.py
+index 9a8b094..de23573 100644
+--- a/nova/virt/libvirt/connection.py
++++ b/nova/virt/libvirt/connection.py
+@@ -39,6 +39,7 @@ Supports KVM, LXC, QEMU, UML, and XEN.
+ 
+ """
+ 
++import errno
+ import hashlib
+ import functools
+ import glob
+@@ -2112,6 +2113,10 @@ class LibvirtConnection(driver.ComputeDriver):
+                           locals())
+                 continue
+ 
++            # get the real disk size or
++            # raise a localized error if image is unavailable
++            dk_size = int(os.path.getsize(path))
++
+             disk_type = driver_nodes[cnt].get('type')
+             if disk_type == "qcow2":
+                 out, err = utils.execute('qemu-img', 'info', path)
+@@ -2121,13 +2126,9 @@ class LibvirtConnection(driver.ComputeDriver):
+                     if i.strip().find('virtual size') >= 0]
+                 virt_size = int(size[0])
+ 
+-                # real disk size:
+-                dk_size = int(os.path.getsize(path))
+-
+                 # backing file:(actual path:)
+                 backing_file = libvirt_utils.get_disk_backing_file(path)
+             else:
+-                dk_size = int(os.path.getsize(path))
+                 backing_file = ""
+                 virt_size = 0
+ 
+@@ -2155,11 +2156,18 @@ class LibvirtConnection(driver.ComputeDriver):
+         instances_name = self.list_instances()
+         instances_sz = 0
+         for i_name in instances_name:
+-            disk_infos = utils.loads(self.get_instance_disk_info(i_name))
+-            for info in disk_infos:
+-                i_vt_sz = int(info['virt_disk_size'])
+-                i_dk_sz = int(info['disk_size'])
+-                instances_sz += i_vt_sz - i_dk_sz
++            try:
++                disk_infos = utils.loads(self.get_instance_disk_info(i_name))
++                for info in disk_infos:
++                    i_vt_sz = int(info['virt_disk_size'])
++                    i_dk_sz = int(info['disk_size'])
++                    instances_sz += i_vt_sz - i_dk_sz
++            except OSError as e:
++                if e.errno == errno.ENOENT:
++                    LOG.error(_("Getting disk size of %(i_name)s: %(e)s") %
++                              locals())
++                else:
++                    raise
+ 
+         # Disk available least size
+         available_least_size = dk_sz_gb * (1024 ** 3) - instances_sz
diff --git a/openstack-nova.spec b/openstack-nova.spec
index 1580254..0016708 100644
--- a/openstack-nova.spec
+++ b/openstack-nova.spec
@@ -42,6 +42,9 @@ Patch0005: 0005-Fix-_sync_power_states-to-obtain-correct-state.patch
 Patch0006: 0006-nonblocking-libvirt-mode-using-tpool.patch
 Patch0007: 0007-Fixes-xml-representation-of-ext_srv_attr-extension.patch
 Patch0008: 0008-fix-useexisting-deprecation-warnings.patch
+Patch0009: 0009-Fix-backing-file-cp-resize-race-condition.patch
+Patch0010: 0010-ensure-atomic-manipulation-of-libvirt-disk-images.patch
+Patch0011: 0011-allow-the-compute-service-to-start-with-missing-libv.patch
 
 BuildArch:        noarch
 BuildRequires:    intltool
@@ -185,6 +188,9 @@ This package contains documentation files for nova.
 %patch0006 -p1
 %patch0007 -p1
 %patch0008 -p1
+%patch0009 -p1
+%patch0010 -p1
+%patch0011 -p1
 
 find . \( -name .gitignore -o -name .placeholder \) -delete
 
@@ -375,11 +381,12 @@ fi
 %endif
 
 %changelog
-* Fri Mar  8 2012 Pádraig Brady <P at draigBrady.com> - 2012.1-0.8.e4
+* Fri Mar 16 2012 Pádraig Brady <P at draigBrady.com> - 2012.1-0.8.e4
 - Include an upstream fix for errors logged when syncing power states
 - Support non blocking libvirt operations
 - Fix an exception when querying a server through the API (#803905)
 - Suppress deprecation warnings with db sync at install (#801302)
+- Avoid and cater for missing libvirt instance images (#801791)
 
 * Fri Mar  6 2012 Alan Pevec <apevec at redhat.com> - 2012.1-0.7.e4
 - Fixup permissions on nova config files


More information about the scm-commits mailing list