Mash multilib optimization

Toshio Kuratomi a.badger at gmail.com
Sun Apr 26 12:47:42 UTC 2015


Hey guys, I wasn't able to get mash to run locally so I never got profiling
of it done this vacation.  But I did see several obvious ways to optimize
the multilib code.  So here's a patch that will probably help make that
piece of the puzzle faster.

There's a few conceptual changes:

1) Only create the lists of packages and files once, when we create the
   class rather than every time we instantiate it (or worse, everytime we
   run the select() method which is what it was doing before).
2) Use frozensets() instead of lists wherever we're doing a containment test
   ('string' in set_of_packages).  Sets are a hashed lookup whereas lists
   have to be searched linearly.
3) Reduce the calls to fnmatch.  fnmatch uses a regex under the hood so it's
   not the cheapest operation in the world (although the regex compilation
   is cached so it's not the worst thing either).  I was able to change
   a few fnmatches into containment tests instead and others I was able to
   put behind a cheaper conditional which skips those tests altogether if
   the cheaper conditional is satisfied

Without profiling a mash run I don't know how much this will speed up
mashing but talking to people it seemed like the multilib portion was taking
30-45 minutes to complete and since this was all low hanging fruit it seemed
like a good place to start.

-Toshio
-------------- next part --------------
From f79bcbce632192c86c402308c218c4133cef6691 Mon Sep 17 00:00:00 2001
From: Toshio Kuratomi <toshio at fedoraproject.org>
Date: Sun, 26 Apr 2015 05:45:57 -0700
Subject: [PATCH] Multilib mashing optimization

* Only create the lists of packages and directories once, when the class
  is created (not at instantiation or worse, everytime the method is
  invoked)
* Use sets instead of lists for containment tests
* Reduce calling of fnmatch
---
 mash/multilib.py | 172 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 96 insertions(+), 76 deletions(-)

diff --git a/mash/multilib.py b/mash/multilib.py
index f2594b0..9cb37d7 100644
--- a/mash/multilib.py
+++ b/mash/multilib.py
@@ -11,15 +11,18 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
+import os
 from fnmatch import fnmatch
 
-class MultilibMethod:
+class MultilibMethod(object):
+    PREFER_64 = frozenset(( 'gdb', 'frysk', 'systemtap', 'systemtap-runtime', 'ltrace', 'strace' ))
+
     def __init__(self, dummy):
         self.name = 'base'
+
     def select(self, po):
-        prefer_64 = [ 'gdb', 'frysk', 'systemtap', 'systemtap-runtime', 'ltrace', 'strace' ]
         if po.arch.find('64') != -1:
-            if po.name in prefer_64:
+            if po.name in self.PREFER_64:
                 return True
             if po.name.startswith('kernel'):
                 for (p_name, p_flag, (p_e, p_v, p_r)) in po.provides:
@@ -27,20 +30,20 @@ class MultilibMethod:
                         return True
         return False
 
-class NoMultilibMethod:
+class NoMultilibMethod(object):
     def __init__(self, dummy):
         self.name = 'none'
-        
+
     def select(self, po):
         return False
 
 class AllMultilibMethod(MultilibMethod):
     def __init__(self, dummy):
         self.name = 'all'
-    
+
     def select(self, po):
         return True
-    
+
 class FileMultilibMethod(MultilibMethod):
     def __init__(self, file):
         self.name = 'file'
@@ -53,16 +56,17 @@ class FileMultilibMethod(MultilibMethod):
                 line = line.strip()
                 if not line.startswith('#'):
                     self.list.append(line)
-    
+
     def select(self, po):
         for item in self.list:
             if fnmatch(po.name, item):
                 return True
         return False
 
-class KernelMultilibMethod:
+class KernelMultilibMethod(object):
     def __init__(self, dummy):
         self.name = 'base'
+
     def select(self, po):
         if po.arch.find('64') != -1:
             if po.name.startswith('kernel'):
@@ -71,28 +75,52 @@ class KernelMultilibMethod:
                         return True
         return False
 
-class YabootMultilibMethod:
+class YabootMultilibMethod(object):
     def __init__(self, dummy):
         self.name = 'base'
+
     def select(self, po):
         if po.arch in ['ppc'] :
             if po.name.startswith('yaboot'):
                 return True
         return False
 
+
 class RuntimeMultilibMethod(MultilibMethod):
+    BLACKLIST = frozenset('tomcat-native')
+    WHITELIST = frozenset(('libgnat', 'wine', 'lmms-vst', 'nspluginwrapper', 'libflashsupport', 'valgrind', 'perl-libs',  'redhat-lsb', 'yaboot', 'syslinux-extlinux-nonlinux', 'syslinux-nonlinux', 'syslinux-tftpboot'))
+    ROOTLIBDIRS = frozenset(('/lib', '/lib64'))
+    USRLIBDIRS = frozenset(('/usr/lib', '/usr/lib64'))
+    LIBDIRS = ROOTLIBDIRS.union(USRLIBDIRS)
+    OPROFILEDIRS = frozenset(('/usr/lib/oprofile', '/usr/lib64/oprofile'))
+    WINEDIRS = frozenset(('/usr/lib/wine', '/usr/lib64/wine'))
+    SANEDIRS = frozenset(('/usr/lib/sane', '/usr/lib64/sane'))
+
+    by_dir = set()
+
+    # alsa, dri, gtk-accessibility, scim-bridge-gtk, krb5, sasl, vdpau
+    by_dir.update(frozenset(os.path.join('/usr/lib', p) for p in ('alsa-lib',
+        'dri', 'gtk-2.0/modules', 'gtk-2.0/immodules', 'krb5/plugins',
+        'sasl2', 'vdpau')))
+    by_dir.update(frozenset(os.path.join('/usr/lib64', p) for p in ('alsa-lib',
+        'dri', 'gtk-2.0/modules', 'gtk-2.0/immodules', 'krb5/plugins',
+        'sasl2', 'vdpau')))
+
+    # pam
+    by_dir.update(frozenset(os.path.join(p, 'security') for p in ROOTLIBDIRS))
+
+    # lsb
+    by_dir.add('/etc/lsb-release.d')
+
     def __init__(self, dummy):
         self.name = 'runtime'
-    
+
     def select(self, po):
-        libdirs = [ '/usr/lib', '/usr/lib64', '/lib', '/lib64' ]
-        blacklist = [ 'tomcat-native' ]
-        whitelist = [ 'libgnat', 'wine', 'lmms-vst', 'nspluginwrapper', 'libflashsupport', 'valgrind', 'perl-libs', 'redhat-lsb', 'yaboot', 'syslinux-extlinux-nonlinux', 'syslinux-nonlinux', 'syslinux-tftpboot' ]
-        if po.name in blacklist:
+        if po.name in self.BLACKLIST:
             return False
-        if po.name in whitelist:
+        if po.name in self.WHITELIST:
             return True
-        if MultilibMethod.select(self,po):
+        if MultilibMethod.select(self, po):
             return True
         if po.name.startswith('kernel'):
             for (p_name, p_flag, (p_e, p_v, p_r)) in po.provides:
@@ -100,57 +128,49 @@ class RuntimeMultilibMethod(MultilibMethod):
                     return False
         for file in po.returnFileEntries():
             (dirname, filename) = file.rsplit('/', 1)
+
             # libraries in standard dirs
-            if dirname in libdirs and fnmatch(filename, '*.so.*'):
-                return True
-            # dri
-            if dirname in [ '/usr/lib/dri', '/usr/lib64/dri' ]:
-                return True
-            # vdpau
-            if dirname in [ '/usr/lib/vdpau', '/usr/lib64/vdpau' ]:
-                return True
-            # krb5
-            if dirname in [ '/usr/lib/krb5/plugins', '/usr/lib64/krb5/plugins' ]:
-                return True
-            # pam
-            if dirname in [ '/lib/security', '/lib64/security' ]:
-                return True
-            # sasl
-            if dirname in [ '/usr/lib/sasl2', '/usr/lib64/sasl2' ]:
-                return True
-            # nss
-            if dirname in [ '/lib', '/lib64' ] and filename.startswith('libnss_'):
-                return True
-            # alsa
-            if dirname in [ '/usr/lib/alsa-lib', '/usr/lib64/alsa-lib' ]:
+            if dirname in self.LIBDIRS and fnmatch(filename, '*.so.*'):
                 return True
-            # lsb
-            if dirname == '/etc/lsb-release.d':
+            if dirname in self.by_dir:
                 return True
             # mysql, qt, etc.
             if dirname == '/etc/ld.so.conf.d' and filename.endswith('.conf'):
                 return True
-	    # gtk2-engines
-	    if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/engines'):
-		return True
-            # accessibility
-            if fnmatch(dirname, '/usr/lib*/gtk-2.0/modules'):
+            # nss (Some nss modules end in .so instead of .so.X)
+            # db (db modules end in .so instead of .so.X)
+            if dirname in self.ROOTLIBDIRS and (filename.startswith('libnss_') or filename.startswith('libdb-')):
                 return True
-            if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/modules'):
-                return True
-	    # scim-bridge-gtk	
-            if fnmatch(dirname, '/usr/lib*/gtk-2.0/immodules'):
-                return True
-            if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/immodules'):
-                return True
-            # images
-            if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/loaders'):
-                return True
-            if fnmatch(dirname, '/usr/lib*/gdk-pixbuf-2.0/*/loaders'):
-                return True
-            if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/printbackends'):
-                return True
-            if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/filesystems'):
+            # Optimization:
+            # No tests beyond here for things outside of LIBDIRS
+            if not dirname.startswith(tuple(self.USRLIBDIRS)):
+                # The dirname does not start with a USRLIBDIR so we can test
+                # the next file
+                continue
+
+            if dirname.startswith(('/usr/lib/gtk-2.0', '/usr/lib64/gtk-2.0')):
+                # gtk2-engines
+                if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/engines'):
+                    return True
+                # accessibility
+                if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/modules'):
+                    return True
+                # scim-bridge-gtk
+                if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/immodules'):
+                    return True
+                # images
+                if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/loaders'):
+                    return True
+                if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/printbackends'):
+                    return True
+                if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/filesystems'):
+                    return True
+                # Optimization:
+                # No tests beyond here for things in /usr/lib*/gtk-2.0
+                continue
+
+            # gstreamer
+            if dirname.startswith(('/usr/lib/gstreamer-', '/usr/lib64/gstreamer-')):
                 return True
             # qt/kde fun
             if fnmatch(dirname, '/usr/lib*/qt*/plugins/*'):
@@ -160,39 +180,39 @@ class RuntimeMultilibMethod(MultilibMethod):
             # qml
             if fnmatch(dirname, '/usr/lib*/qt5/qml/*'):
                 return True
-            # gstreamer
-            if fnmatch(dirname, '/usr/lib*/gstreamer-*'):
+            # images
+            if fnmatch(dirname, '/usr/lib*/gdk-pixbuf-2.0/*/loaders'):
                 return True
             # xine-lib
             if fnmatch(dirname, '/usr/lib*/xine/plugins/*'):
                 return True
             # oprofile
-            if fnmatch(dirname, '/usr/lib*/oprofile') and fnmatch(filename, '*.so.*'):
+            if dirname in self.OPROFILEDIRS and fnmatch(filename, '*.so.*'):
                 return True
             # wine
-            if fnmatch(dirname, '/usr/lib*/wine') and filename.endswith('.so'):
-                return True
-            # db
-            if dirname in [ '/lib', '/lib64' ] and filename.startswith('libdb-'):
+            if dirname in self.WINEDIRS and filename.endswith('.so'):
                 return True
             # sane drivers
-            if dirname in [ '/usr/lib/sane', '/usr/lib64/sane' ] and filename.startswith('libsane-'):
+            if dirname in self.SANEDIRS and filename.startswith('libsane-'):
                 return True
         return False
 
 class DevelMultilibMethod(RuntimeMultilibMethod):
+    BLACKLIST = frozenset(('dmraid-devel', 'kdeutils-devel', 'mkinitrd-devel',
+        'java-1.5.0-gcj-devel', 'java-1.7.0-icedtea-devel', 'php-devel',
+        'java-1.6.0-openjdk-devel', 'java-1.7.0-openjdk-devel',
+        'java-1.8.0-openjdk-devel' ))
+    WHITELIST = frozenset()
+
     def __init__(self, dummy):
         self.name = 'devel'
-    
+
     def select(self, po):
-        blacklist = ['dmraid-devel', 'kdeutils-devel', 'mkinitrd-devel', 'java-1.5.0-gcj-devel', 'java-1.7.0-icedtea-devel', 'php-devel', 'java-1.6.0-openjdk-devel',
-                     'java-1.7.0-openjdk-devel', 'java-1.8.0-openjdk-devel' ]
-        whitelist = []
-        if po.name in blacklist:
+        if po.name in self.BLACKLIST:
             return False
-        if po.name in whitelist:
+        if po.name in self.WHITELIST:
             return True
-        if RuntimeMultilibMethod.select(self,po):
+        if RuntimeMultilibMethod.select(self, po):
             return True
         if po.name.startswith('ghc-'):
             return False
-- 
2.1.0

-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 181 bytes
Desc: not available
URL: <http://lists.fedoraproject.org/pipermail/rel-eng/attachments/20150426/93c9fc3e/attachment.sig>


More information about the rel-eng mailing list