[yum/f18] Fix non-removal of corrupt metadata files. BZ 908870.

James Antill james at fedoraproject.org
Wed Feb 20 00:43:51 UTC 2013


commit eb9900ac1ab8e696c199d06eee27d9b520cc5eee
Author: James Antill <james at and.org>
Date:   Tue Feb 19 19:41:07 2013 -0500

    Fix non-removal of corrupt metadata files. BZ 908870.

 BZ-908870-MD-files-bad.patch |  241 ++++++++++++++++++++++++++++++++++++++++++
 yum.spec                     |    7 +-
 2 files changed, 247 insertions(+), 1 deletions(-)
---
diff --git a/BZ-908870-MD-files-bad.patch b/BZ-908870-MD-files-bad.patch
new file mode 100644
index 0000000..c8d79a8
--- /dev/null
+++ b/BZ-908870-MD-files-bad.patch
@@ -0,0 +1,241 @@
+commit 83fcbe745c3ee8f5f0fa29626a86c824db059b22
+Author: James Antill <james at and.org>
+Date:   Thu Feb 7 12:58:07 2013 -0500
+
+    Fix problems with mirrors like wtfnix.com, delete bad MD files.
+
+diff --git a/yum/yumRepo.py b/yum/yumRepo.py
+index dfcf8f9..efbc42a 100644
+--- a/yum/yumRepo.py
++++ b/yum/yumRepo.py
+@@ -940,6 +941,7 @@ Insufficient space in download directory %s
+                                     range=(start, end),
+                                     )
+             except URLGrabError, e:
++                self._del_dl_file(local, size)
+                 errstr = "failed to retrieve %s from %s\nerror was %s" % (relative, self, e)
+                 if self.mirrorurls:
+                     errstr +="\n  You could try running: yum clean expire-cache"
+@@ -961,6 +963,7 @@ Insufficient space in download directory %s
+                                            **kwargs
+                                            )
+             except URLGrabError, e:
++                self._del_dl_file(local, size)
+                 errstr = "failure: %s from %s: %s" % (relative, self, e)
+                 errors = getattr(e, 'errors', None)
+                 raise Errors.NoMoreMirrorsRepoError(errstr, errors)
+@@ -1652,6 +1655,18 @@ Insufficient space in download directory %s
+             raise URLGrabError(-1, 'repomd.xml does not match metalink for %s' %
+                                self)
+ 
++    def _del_dl_file(self, local, size):
++        """ Delete a downloaded file if it's the correct size. """
++
++        sd = misc.stat_f(local)
++        if not sd: # File doesn't exist...
++            return
++
++        if size and sd.st_size < size:
++            return # Still more to get...
++
++        # Is the correct size, or too big ... delete it so we'll try again.
++        misc.unlink_f(local)
+ 
+     def checkMD(self, fn, mdtype, openchecksum=False):
+         """check the metadata type against its checksum"""
+@@ -1681,7 +1696,7 @@ Insufficient space in download directory %s
+         if size is not None:
+             size = int(size)
+ 
+-        if fast:
++        if fast and skip_old_DBMD_check:
+             fsize = misc.stat_f(file)
+             if fsize is None: # File doesn't exist...
+                 return None
+@@ -1756,16 +1771,21 @@ Insufficient space in download directory %s
+ 
+         try:
+             def checkfunc(obj):
+-                self.checkMD(obj, mdtype)
++                try:
++                    self.checkMD(obj, mdtype)
++                except URLGrabError:
++                    #  Don't share MD among mirrors, in theory we could use:
++                    #     self._del_dl_file(local, int(thisdata.size))
++                    # ...but this is safer.
++                    misc.unlink_f(obj.filename)
++                    raise
+                 self.retrieved[mdtype] = 1
+             text = "%s/%s" % (self, mdtype)
+             if thisdata.size is None:
+                 reget = None
+             else:
+                 reget = 'simple'
+-                if os.path.exists(local):
+-                    if os.stat(local).st_size >= int(thisdata.size):
+-                        misc.unlink_f(local)
++                self._del_dl_file(local, int(thisdata.size))
+             local = self._getFile(relative=remote,
+                                   local=local, 
+                                   copy_local=1,
+commit c148eb10b798270b3d15087433c8efb2a79a69d0
+Author: James Antill <james at and.org>
+Date:   Mon Feb 18 16:17:06 2013 -0500
+
+    Use xattr data as well as file size for "fast checksumming".
+
+diff --git a/yum/yumRepo.py b/yum/yumRepo.py
+index efbc42a..8c38093 100644
+--- a/yum/yumRepo.py
++++ b/yum/yumRepo.py
+@@ -52,15 +52,54 @@ import stat
+ import errno
+ import tempfile
+ 
+-#  If you want yum to _always_ check the MD .sqlite files then set this to
+-# False (this doesn't affect .xml files or .sqilte files derived from them).
+-# With this as True yum will only check when a new repomd.xml or
+-# new MD is downloaded.
+-#  Note that with atomic MD, we can't have old MD lying around anymore so
+-# the only way we need this check is if someone does something like:
+-#   cp primary.sqlite /var/cache/yum/blah
+-# ...at which point you lose.
+-skip_old_DBMD_check = True
++# This is unused now, probably nothing uses it but it was global/public.
++skip_old_DBMD_check = False
++
++try:
++    import xattr
++    if not hasattr(xattr, 'get') or not hasattr(xattr, 'set'):
++        xattr = None # This is a "newer" API.
++except ImportError:
++    xattr = None
++
++#  The problem we are trying to solve here is that:
++#
++# 1. We rarely want to be downloading MD/pkgs/etc.
++# 2. We want to check those files are valid (match checksums) when we do
++#    download them.
++# 3. We _really_ don't want to checksum all the files everytime we
++#    run (100s of MBs).
++# 4. We can continue to download files from bad mirrors, or retry files due to
++#    C-c etc.
++#
++# ...we used to solve this by just checking the file size, and assuming the
++# files had been downloaded and checksumed as correct if that matched. But that
++# was error prone on bad mirrors, so now we store the checksum in an
++# xattr ... this does mean that if you can't store xattrs (Eg. NFS) you will
++# rechecksum everything constantly.
++
++def _xattr_get_chksum(filename, chktype):
++    if not xattr:
++        return None
++
++    try:
++        ret = xattr.get(filename, 'user.yum.checksum.' + chktype)
++    except: # Documented to be "EnvironmentError", but make sure
++        return None
++
++    return ret
++
++def _xattr_set_chksum(filename, chktype, chksum):
++    if not xattr:
++        return None
++
++    try:
++        xattr.set(filename, 'user.yum.checksum.' + chktype, chksum)
++    except:
++        return False # Data too long. = IOError ... ignore everything.
++
++    return True
++
+ 
+ warnings.simplefilter("ignore", Errors.YumFutureDeprecationWarning)
+ 
+@@ -228,7 +267,7 @@ class YumPackageSack(packageSack.PackageSack):
+         # get rid of all this stuff we don't need now
+         del repo.cacheHandler
+ 
+-    def _check_uncompressed_db_gen(self, repo, mdtype, fast=True):
++    def _check_uncompressed_db_gen(self, repo, mdtype):
+         """return file name of db in gen/ dir if good, None if not"""
+ 
+         mydbdata         = repo.repoXML.getData(mdtype)
+@@ -238,7 +277,7 @@ class YumPackageSack(packageSack.PackageSack):
+         db_un_fn         = mdtype + '.sqlite'
+ 
+         if not repo._checkMD(compressed_fn, mdtype, data=mydbdata,
+-                             check_can_fail=fast, fast=fast):
++                             check_can_fail=True):
+             return None
+ 
+         ret = misc.repo_gen_decompress(compressed_fn, db_un_fn,
+@@ -261,8 +300,7 @@ class YumPackageSack(packageSack.PackageSack):
+         result = None
+ 
+         if os.path.exists(db_un_fn):
+-            if skip_old_DBMD_check and repo._using_old_MD:
+-                return db_un_fn
++
+ 
+             try:
+                 repo.checkMD(db_un_fn, mdtype, openchecksum=True)
+@@ -296,7 +334,6 @@ class YumRepository(Repository, config.RepoConf):
+                                               # eventually want
+         self.repoMDFile = 'repodata/repomd.xml'
+         self._repoXML = None
+-        self._using_old_MD = None
+         self._oldRepoMDData = {}
+         self.cache = 0
+         self.mirrorlistparsed = 0
+@@ -1407,7 +1444,6 @@ Insufficient space in download directory %s
+             self._revertOldRepoXML()
+             return False
+ 
+-        self._using_old_MD = caching
+         if caching:
+             return False # Skip any work.
+ 
+@@ -1673,7 +1709,7 @@ Insufficient space in download directory %s
+         return self._checkMD(fn, mdtype, openchecksum)
+ 
+     def _checkMD(self, fn, mdtype, openchecksum=False,
+-                 data=None, check_can_fail=False, fast=False):
++                 data=None, check_can_fail=False):
+         """ Internal function, use .checkMD() from outside yum. """
+ 
+         thisdata = data # So the argument name is nicer
+@@ -1696,17 +1732,15 @@ Insufficient space in download directory %s
+         if size is not None:
+             size = int(size)
+ 
+-        if fast and skip_old_DBMD_check:
++        l_csum = _xattr_get_chksum(file, r_ctype)
++        if l_csum:
+             fsize = misc.stat_f(file)
+-            if fsize is None: # File doesn't exist...
+-                return None
+-            if size is None:
+-                return 1
+-            if size == fsize.st_size:
+-                return 1
+-            if check_can_fail:
+-                return None
+-            raise URLGrabError(-1, 'Metadata file does not match size')
++            if fsize is not None: # We just got an xattr, so it should be there
++                if size is None and l_csum == r_csum:
++                    return 1
++                if size == fsize.st_size and l_csum == r_csum:
++                    return 1
++            # Anything goes wrong, run the checksums as normal...
+ 
+         try: # get the local checksum
+             l_csum = self._checksum(r_ctype, file, datasize=size)
+@@ -1716,6 +1750,7 @@ Insufficient space in download directory %s
+             raise URLGrabError(-3, 'Error performing checksum')
+ 
+         if l_csum == r_csum:
++            _xattr_set_chksum(file, r_ctype, l_csum)
+             return 1
+         else:
+             if check_can_fail:
diff --git a/yum.spec b/yum.spec
index 3c04245..15c7050 100644
--- a/yum.spec
+++ b/yum.spec
@@ -34,6 +34,7 @@ Patch20: yum-manpage-files.patch
 Patch21: yum-completion-helper.patch
 Patch22: BZ-881756-include-langpacks.patch
 Patch23: BZ-885139-not-enough-arguments.patch
+Patch24: BZ-908870-MD-files-bad.patch
 
 URL: http://yum.baseurl.org/
 BuildArchitectures: noarch
@@ -60,7 +61,7 @@ Requires: yum-metadata-parser >= 1.1.0
 Requires: pygpgme
 # rawhide is >= 0.5.3-7.fc18 ... as this is added.
 Requires: pyliblzma
-
+Requires: pyxattr
 
 Conflicts: rpm >= 5-0
 # Zif is a re-implementation of yum in C, however:
@@ -150,6 +151,7 @@ Install this package if you want auto yum updates nightly via cron.
 %patch21 -p1
 %patch22 -p1
 %patch23 -p1
+%patch24 -p1
 %patch1 -p1
 
 %build
@@ -325,6 +327,9 @@ exit 0
 %endif
 
 %changelog
+* Tue Feb 19 2013 James Antill <james at fedoraproject.org> - 3.4.3-51
+- Fix non-removal of corrupt metadata files. BZ 908870.
+
 * Tue Feb 19 2013 Zdenek Pavlas <zpavlas at redhat.com> - 3.4.3-50
 - Revert last commit, bump the obsoleted range.  BZ 905438
 


More information about the scm-commits mailing list