Freeze Break: fix pkgdb_sync_git_branches.py for git being too clever

Ralph Bean rbean at redhat.com
Fri Mar 6 21:17:27 UTC 2015


On Fri, Mar 06, 2015 at 12:24:57PM -0700, Kevin Fenzi wrote:
> ...and we discovered a problem with this version... 
> 
> Look for a revised one soon. ;( 
> 
> kevin

Allright, here's a fixed version.

We discussed it further in IRC, but this simplifies the method of shelling out
to be less error prone but also introduces some more complexity with a
threadpool.  On its own, the new method of assessing existing branches took
something like 900 seconds to complete.  The threadpool cuts that down
significantly to 120 seconds.



--- pkgdb_sync_git_branches.py.orig 2015-03-06 15:49:32.421638745 -0500
+++ pkgdb_sync_git_branches.py.mine 2015-03-06 16:07:19.308145295 -0500
@@ -26,8 +26,10 @@
 
 """
 
+import multiprocessing.pool
 import os
 import subprocess
+import time
 
 import requests
 
@@ -52,6 +54,7 @@
 MKBRANCH = '/usr/local/bin/mkbranch'
 SETUP_PACKAGE = '/usr/local/bin/setup_git_package'
 
+THREADS = 20
 VERBOSE = False
 
 
@@ -63,7 +66,7 @@
     pass
 
 
-def _invoke(program, args):
+def _invoke(program, args, cwd=None):
     '''Run a command and raise an exception if an error occurred.
 
     :arg program: The program to invoke
@@ -75,27 +78,25 @@
     cmdLine.extend(args)
     if VERBOSE:
         print ' '.join(cmdLine)
+        print '  in', cwd
 
-    if VERBOSE:
-        program = subprocess.Popen(
-            cmdLine, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-    else:
-        program = subprocess.Popen(cmdLine, stderr=subprocess.STDOUT)
+    program = subprocess.Popen(
+        cmdLine, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=cwd)
+
+    stdout, stderr = program.communicate()
 
-    retCode = program.wait()
-    if retCode != 0:
+    if program.returncode != 0:
         e = ProcessError()
-        e.returnCode = retCode
+        e.returnCode = program.returncode
         e.cmd = ' '.join(cmdLine)
-        if VERBOSE:
-            output = program.stdout.read()
-            e.message = 'Error, "%s" returned %s: %s' % (
-                e.cmd, e.returnCode, output)
-            print e.message
-        else:
-            e.message = 'Error, "%s" returned %s' % (e.cmd, e.returnCode)
+        e.cwd = cwd
+        e.message = 'Error, "%s" (in %r) returned %s\n  stdout: %s\n  stderr: %s' % (
+            e.cmd, e.cwd, e.returnCode, stdout, stderr)
+        print e.message
         raise e
 
+    return stdout.strip()
+
 
 def _create_branch(pkgname, branch):
     '''Create a specific branch for a package.
@@ -104,34 +105,33 @@
     :arg branch: Name of the branch to create
 
     '''
+    branch = branch.replace('*', '').strip()
     if branch == 'master':
         print 'ERROR: Proudly refusing to create master branch. Invalid repo?'
         print 'INFO: Please check %s repo' % pkgname
         return
 
-    branchpath = os.path.join(
-        GIT_FOLDER, '%s.git' % pkgname, 'refs/heads', branch)
-    if not os.path.exists(branchpath):
-        try:
-            _invoke(MKBRANCH, [branch, pkgname])
-        except ProcessError, e:
-            if e.returnCode == 255:
-                # This is a warning, not an error
-                return
-            raise
-        finally:
-            fedmsg.publish(
-                topic='branch',
-                modname='git',
-                msg=dict(
-                    agent='pkgdb',
-                    name=pkgname,
-                    branch=branch,
-                ),
-            )
-    elif VERBOSE:
-            print 'Was asked to create branch %s of package %s, but it '\
-                'already exists' % (pkgname, branch)
+    branches = get_git_branch(pkgname)
+    if branch in branches:
+       print 'ERROR: Refusing to create a branch %s that exists' % branch
+       return
+
+    try:
+        _invoke(MKBRANCH, [branch, pkgname])
+        fedmsg.publish(
+            topic='branch',
+            modname='git',
+            msg=dict(
+                agent='pkgdb',
+                name=pkgname,
+                branch=branch,
+            ),
+        )
+    except ProcessError, e:
+        if e.returnCode == 255:
+            # This is a warning, not an error
+            return
+        raise
 
 
 def pkgdb_pkg_branch():
@@ -167,8 +167,11 @@
         print 'Could not find %s' % git_folder
         return set()
 
-    head_folder = os.path.join(git_folder, 'refs', 'heads')
-    return set(os.listdir(head_folder))
+    branches = [
+       lclbranch.replace('*', '').strip()
+       for lclbranch in _invoke('git', ['branch'], cwd=git_folder).split('\n')
+    ]
+    return set(branches)
 
 
 def branch_package(pkgname, branches):
@@ -182,10 +185,11 @@
         print 'Fixing package %s for branches %s' % (pkgname, branches)
 
     # Create the devel branch if necessary
-    if not os.path.exists(
-            os.path.join(GIT_FOLDER, '%s.git' % pkgname)):
+    exists = os.path.exists(os.path.join(GIT_FOLDER, '%s.git' % pkgname))
+    if not exists or 'master' not in get_git_branch(pkgname):
         _invoke(SETUP_PACKAGE, [pkgname])
-        branches.remove('master')  # SETUP_PACKAGE creates master
+        if 'master' in branches:
+            branches.remove('master')  # SETUP_PACKAGE creates master
         fedmsg.publish(
             topic='branch',
             modname='git',
@@ -209,10 +213,12 @@
 
     local_pkgs = set(os.listdir(GIT_FOLDER))
     local_pkgs = set([it.replace('.git', '') for it in local_pkgs])
+    print "Found %i local packages" % len(local_pkgs)
 
     pkgdb_info = pkgdb_pkg_branch()
 
     pkgdb_pkgs = set(pkgdb_info.keys())
+    print "Found %i pkgdb packages" % len(pkgdb_pkgs)
 
     ## Commented out as we keep the git of retired packages while they won't
     ## show up in the information retrieved from pkgdb.
@@ -225,19 +231,36 @@
         print 'Some packages are present in pkgdb but not locally:'
         print ', '.join(sorted(pkgdb_pkgs - local_pkgs))
 
+
+    print "Finding the lists of local branches for local repos."
+    start = time.time()
+    if THREADS == 1:
+        git_branch_lookup = map(get_git_branch, sorted(pkgdb_info))
+    else:
+        threadpool = multiprocessing.pool.ThreadPool(processes=THREADS)
+        git_branch_lookup = threadpool.map(get_git_branch, sorted(pkgdb_info))
+
+    # Zip that list of results up into a lookup dict.
+    git_branch_lookup = dict(zip(sorted(pkgdb_info), git_branch_lookup))
+
+    print "Found all local git branches in %0.2fs" % (time.time() - start)
+
     tofix = set()
     for pkg in sorted(pkgdb_info):
         pkgdb_branches = pkgdb_info[pkg]
-        git_branches = get_git_branch(pkg)
+        git_branches = git_branch_lookup[pkg]
         diff = (pkgdb_branches - git_branches)
         if diff:
             print '%s missing: %s' % (pkg, ','.join(sorted(diff)))
+            print pkgdb_branches, git_branches
             tofix.add(pkg)
             branch_package(pkg, diff)
 
     if tofix:
         print 'Packages fixed (%s): %s' % (
             len(tofix), ', '.join(sorted(tofix)))
+    else:
+        print 'Didn\'t find any packages to fix.'
 
 
 if __name__ == '__main__':
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 473 bytes
Desc: not available
URL: <http://lists.fedoraproject.org/pipermail/infrastructure/attachments/20150306/05a7219e/attachment.sig>


More information about the infrastructure mailing list