[PATCH] distgit: Add a script to make the new paths in the lookaside cache

Mathieu Bridon bochecha at fedoraproject.org
Tue Jun 30 16:49:21 UTC 2015


From: Mathieu Bridon <bochecha at daitauha.fr>

We are migrating from the following path scheme:
    /%(srpmname)s/%(filename)s/%(hash)s/%(filename)s

To:
    /%(srpmname)s/%(filename)s/%(hashtype)s/%(hash)s/%(filename)s

As a result, we need to hardlink all the files existing under the old
path to their new path.

This script does just that.

Given that it should only ever be run once anyway, it is added as a
file to the distgit role, but not set to be installed anywhere.
---
 roles/distgit/files/make-new-lookaside-links | 177 +++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100755 roles/distgit/files/make-new-lookaside-links

diff --git a/roles/distgit/files/make-new-lookaside-links b/roles/distgit/files/make-new-lookaside-links
new file mode 100755
index 0000000..c5c9b5d
--- /dev/null
+++ b/roles/distgit/files/make-new-lookaside-links
@@ -0,0 +1,177 @@
+#!/usr/bin/python
+
+
+# Copyright (c) 2015 - Mathieu Bridon <bochecha at daitauha.fr>
+#
+# This script is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This script is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this script.  If not, see <http://www.gnu.org/licenses/>.
+
+
+import argparse
+import errno
+import hashlib
+import os
+import sys
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('--perform', action='store_true', default=False,
+                        help="Actually do the hardlinking (default is to "
+                             "report only)")
+    parser.add_argument('--link-hashtype', default='md5',
+                        choices=('md5', 'sha512'),
+                        help='The hash type to use in the new path of the'
+                             'hardlink. (default: "md5")')
+    parser.add_argument('lookasideroot',
+                        help="The full path to the root of the lookaside "
+                             "cache")
+
+    return parser.parse_args()
+
+
+def info(msg):
+    sys.stdout.write("%s\n" % msg)
+
+
+def error(msg):
+    sys.stderr.write("ERROR: %s\n" % msg)
+
+
+def die(msg):
+    sys.stderr.write("FATAL: %s\n" % msg)
+    sys.exit(1)
+
+
+def get_file_hash(full_path, hashtype):
+    hash = hashlib.new(hashtype)
+
+    with open(full_path, 'rb') as f:
+        chunk = f.read(4096)
+
+        while chunk:
+            hash.update(chunk)
+            chunk = f.read(4096)
+
+    return hash.hexdigest()
+
+
+def verify_source(dir, expected_name, expected_hash, hashtype):
+    sources = os.listdir(dir)
+
+    if len(sources) == 0:
+        raise Exception("No source file in %s" % dir)
+
+    if len(sources) > 1:
+        raise Exception("Multiple source files in %s: %s" % (dir, sources))
+
+    if sources[0] != expected_name:
+        raise Exception("Badly named source file in %s: %s"
+                        % (dir, sources[0]))
+
+    source_path = os.path.join(dir, expected_name)
+    hash = get_file_hash(source_path, hashtype)
+
+    if hash != expected_hash:
+        die("Invalid %s for %s: %s" % (hashtype.upper(), source_path, hash))
+
+    return source_path
+
+
+def makedirs(dir):
+    try:
+        os.makedirs(dir)
+
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise e
+
+
+def hardlink(src, dst):
+    makedirs(os.path.dirname(dst))
+
+    try:
+        os.link(src, dst)
+
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise e
+
+        # The file already exists at the new-style path?
+        # Overwrite it with a hardlink.
+        os.unlink(dst)
+        os.link(src, dst)
+
+
+def main(root, link_hashtype, perform=False):
+    try:
+        os.chdir(root)
+        info("All future paths relative to %s" % root)
+
+    except OSError as e:
+        die(e)
+
+    for pkg_name in os.listdir(root):
+        for source_name in os.listdir(pkg_name):
+            source_dir = os.path.join(pkg_name, source_name)
+
+            for hash in os.listdir(source_dir):
+                if hash in ('md5', 'sha512'):
+                    # This is not a hash, but a new-style path containing the
+                    # hashtype. Let's just verify what it contains
+                    hashtype = hash
+                    hashtype_dir = os.path.join(source_dir, hash)
+
+                    for hash in os.listdir(hashtype_dir):
+                        try:
+                            verify_source(os.path.join(hashtype_dir, hash),
+                                          source_name, hash, hashtype)
+                        except Exception as e:
+                            error(e)
+                            continue
+
+                    continue
+
+                else:
+                    # This is what is used for hashes which are not under a
+                    # hashtype folder
+                    hashtype = 'md5'
+
+                try:
+                    source_path = verify_source(
+                        os.path.join(source_dir, hash), source_name, hash,
+                        hashtype)
+                except Exception as e:
+                    error(e)
+                    continue
+
+                if link_hashtype != hashtype:
+                    new_hash = get_file_hash(source_path, link_hashtype)
+
+                else:
+                    new_hash = hash
+
+                new_path = os.path.join(source_dir, link_hashtype, new_hash,
+                                        source_name)
+                info("Hardlinking: %s to %s" % (source_path, new_path))
+
+                if perform:
+                    hardlink(source_path, new_path)
+
+
+if __name__ == '__main__':
+    args = get_args()
+
+    main(args.lookasideroot, args.link_hashtype, perform=args.perform)
+    sys.exit(0)
-- 
2.4.3



More information about the rel-eng mailing list