Rebased Mash Multilib Optimization

Toshio Kuratomi a.badger at gmail.com
Wed May 6 19:34:28 UTC 2015


Attaching rebased mash multilib optimization patch.  In testing with
dgilmore today this reduced the multilib section of mash to about 1.5
minutes of wall clock time which is a 50% savings.

-Toshio
-------------- next part --------------
From 30f3fc732fe4bd25fbb1e249136b0855b4f54ec2 Mon Sep 17 00:00:00 2001
From: Toshio Kuratomi <toshio at fedoraproject.org>
Date: Wed, 6 May 2015 12:14:15 -0700
Subject: [PATCH] Multilib mashing optimization

* Only create the lists of packages and directories once, when the class
  is created (not at instantiation or worse, everytime the method is
  invoked)
* Use sets instead of lists for containment tests
* Reduce calling of fnmatch
* Bypass groups of tests when we know earlier that they will never be true
---
 mash/config.py   |  56 +++++++++++++++++++---
 mash/multilib.py | 141 ++++++++++++++++++++++++++++++-------------------------
 2 files changed, 127 insertions(+), 70 deletions(-)

diff --git a/mash/config.py b/mash/config.py
index 57b6668..a2f928a 100644
--- a/mash/config.py
+++ b/mash/config.py
@@ -19,6 +19,48 @@ import string
 from ConfigParser import RawConfigParser
 
 from yum import config
+from yum.misc import read_in_items_from_dot_dir
+
+class SetOption(config.Option):
+    """An option that contains a set of strings.
+
+       This is a port of :class:`yum.config.ListOption` to return sets
+    """
+    def __init__(self, default=None, parse_default=False):
+        if default is None:
+            default = set()
+        super(SetOption, self).__init__(default, parse_default)
+
+    def parse(self, s):
+        """Convert a string from the config file into a set.  parses
+        globdir:paths as foo.d-style dirs.
+
+        :param s: The string to be converted to a set.  Commas and
+            whitespace are used as separators for the set.
+        :return: *s* converted to a set
+        """
+        # we need to allow for the '\n[whitespace]' continuation - easier
+        # to sub the \n with a space and then read the lines
+        s = s.replace('\n', ' ')
+        s = s.replace(',', ' ')
+        results = set()
+        for item in s.split():
+            if item.startswith('glob:'):
+                thisglob = item.replace('glob:', '')
+                results.update(read_in_items_from_dot_dir(thisglob))
+                continue
+            results.add(item)
+
+        return results
+
+    def tostring(self, value):
+        """Convert a set of strings to a string value.  This does the
+        opposite of the :meth:`parse` method above.
+
+        :param value: a list of values
+        :return: string representation of input
+        """
+        return '\n '.join(value)
 
 class MashConfig(config.BaseConfig):
     rpm_path = config.Option('Mash')
@@ -29,23 +71,23 @@ class MashConfig(config.BaseConfig):
     multilib = config.BoolOption(True)
     multilib_method = config.Option('devel')
     multilib_file = config.Option()
-    multilib_devel_whitelist = config.ListOption()
-    multilib_devel_blacklist = config.ListOption([
+    multilib_devel_whitelist = SetOption()
+    multilib_devel_blacklist = SetOption(set((
         'dmraid-devel', 'kdeutils-devel', 'mkinitrd-devel',
         'java-1.5.0-gcj-devel', 'java-1.7.0-icedtea-devel',
         'php-devel', 'java-1.6.0-openjdk-devel',
         'java-1.7.0-openjdk-devel', 'java-1.8.0-openjdk-devel',
         'httpd-devel',
-    ])
-    multilib_runtime_whitelist = config.ListOption([
+    )))
+    multilib_runtime_whitelist = SetOption(set((
         'libgnat', 'wine', 'lmms-vst', 'nspluginwrapper',
         'libflashsupport', 'valgrind', 'perl-libs', 'redhat-lsb',
         'yaboot', 'syslinux-extlinux-nonlinux', 'syslinux-nonlinux',
         'syslinux-tftpboot',
-    ])
-    multilib_runtime_blacklist = config.ListOption([
+    )))
+    multilib_runtime_blacklist = SetOption(set((
         'tomcat-native', 'php', 'httpd',
-    ])
+    )))
     arches = config.ListOption()
     keys = config.ListOption()
     configdir = config.Option('/etc/mash')
diff --git a/mash/multilib.py b/mash/multilib.py
index e18b87b..b3b7ce2 100644
--- a/mash/multilib.py
+++ b/mash/multilib.py
@@ -11,15 +11,18 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
+import os
 from fnmatch import fnmatch
 
-class MultilibMethod:
+class MultilibMethod(object):
+    PREFER_64 = frozenset(( 'gdb', 'frysk', 'systemtap', 'systemtap-runtime', 'ltrace', 'strace' ))
+
     def __init__(self, config):
         self.name = 'base'
+
     def select(self, po):
-        prefer_64 = [ 'gdb', 'frysk', 'systemtap', 'systemtap-runtime', 'ltrace', 'strace' ]
         if po.arch.find('64') != -1:
-            if po.name in prefer_64:
+            if po.name in self.PREFER_64:
                 return True
             if po.name.startswith('kernel'):
                 for (p_name, p_flag, (p_e, p_v, p_r)) in po.provides:
@@ -27,7 +30,7 @@ class MultilibMethod:
                         return True
         return False
 
-class NoMultilibMethod:
+class NoMultilibMethod(object):
     def __init__(self, config):
         self.name = 'none'
 
@@ -40,7 +43,7 @@ class AllMultilibMethod(MultilibMethod):
 
     def select(self, po):
         return True
-    
+
 class FileMultilibMethod(MultilibMethod):
     def __init__(self, config):
         file = config.multilib_file
@@ -54,14 +57,14 @@ class FileMultilibMethod(MultilibMethod):
                 line = line.strip()
                 if not line.startswith('#'):
                     self.list.append(line)
-    
+
     def select(self, po):
         for item in self.list:
             if fnmatch(po.name, item):
                 return True
         return False
 
-class KernelMultilibMethod:
+class KernelMultilibMethod(object):
     def __init__(self, config):
         self.name = 'base'
 
@@ -73,7 +76,7 @@ class KernelMultilibMethod:
                         return True
         return False
 
-class YabootMultilibMethod:
+class YabootMultilibMethod(object):
     def __init__(self, config):
         self.name = 'base'
 
@@ -83,18 +86,41 @@ class YabootMultilibMethod:
                 return True
         return False
 
+
 class RuntimeMultilibMethod(MultilibMethod):
+    ROOTLIBDIRS = frozenset(('/lib', '/lib64'))
+    USRLIBDIRS = frozenset(('/usr/lib', '/usr/lib64'))
+    LIBDIRS = ROOTLIBDIRS.union(USRLIBDIRS)
+    OPROFILEDIRS = frozenset(('/usr/lib/oprofile', '/usr/lib64/oprofile'))
+    WINEDIRS = frozenset(('/usr/lib/wine', '/usr/lib64/wine'))
+    SANEDIRS = frozenset(('/usr/lib/sane', '/usr/lib64/sane'))
+
+    by_dir = set()
+
+    # alsa, dri, gtk-accessibility, scim-bridge-gtk, krb5, sasl, vdpau
+    by_dir.update(frozenset(os.path.join('/usr/lib', p) for p in ('alsa-lib',
+        'dri', 'gtk-2.0/modules', 'gtk-2.0/immodules', 'krb5/plugins',
+        'sasl2', 'vdpau')))
+    by_dir.update(frozenset(os.path.join('/usr/lib64', p) for p in ('alsa-lib',
+        'dri', 'gtk-2.0/modules', 'gtk-2.0/immodules', 'krb5/plugins',
+        'sasl2', 'vdpau')))
+
+    # pam
+    by_dir.update(frozenset(os.path.join(p, 'security') for p in ROOTLIBDIRS))
+
+    # lsb
+    by_dir.add('/etc/lsb-release.d')
+
     def __init__(self, config):
         self.name = 'runtime'
         self.config = config
 
     def select(self, po):
-        libdirs = [ '/usr/lib', '/usr/lib64', '/lib', '/lib64' ]
         if po.name in self.config.multilib_runtime_blacklist:
             return False
         if po.name in self.config.multilib_runtime_whitelist:
             return True
-        if MultilibMethod.select(self,po):
+        if MultilibMethod.select(self, po):
             return True
         if po.name.startswith('kernel'):
             for (p_name, p_flag, (p_e, p_v, p_r)) in po.provides:
@@ -102,57 +128,49 @@ class RuntimeMultilibMethod(MultilibMethod):
                     return False
         for file in po.returnFileEntries():
             (dirname, filename) = file.rsplit('/', 1)
+
             # libraries in standard dirs
-            if dirname in libdirs and fnmatch(filename, '*.so.*'):
-                return True
-            # dri
-            if dirname in [ '/usr/lib/dri', '/usr/lib64/dri' ]:
-                return True
-            # vdpau
-            if dirname in [ '/usr/lib/vdpau', '/usr/lib64/vdpau' ]:
-                return True
-            # krb5
-            if dirname in [ '/usr/lib/krb5/plugins', '/usr/lib64/krb5/plugins' ]:
-                return True
-            # pam
-            if dirname in [ '/lib/security', '/lib64/security' ]:
+            if dirname in self.LIBDIRS and fnmatch(filename, '*.so.*'):
                 return True
-            # sasl
-            if dirname in [ '/usr/lib/sasl2', '/usr/lib64/sasl2' ]:
-                return True
-            # nss
-            if dirname in [ '/lib', '/lib64' ] and filename.startswith('libnss_'):
-                return True
-            # alsa
-            if dirname in [ '/usr/lib/alsa-lib', '/usr/lib64/alsa-lib' ]:
-                return True
-            # lsb
-            if dirname == '/etc/lsb-release.d':
+            if dirname in self.by_dir:
                 return True
             # mysql, qt, etc.
             if dirname == '/etc/ld.so.conf.d' and filename.endswith('.conf'):
                 return True
-	    # gtk2-engines
-	    if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/engines'):
-		return True
-            # accessibility
-            if fnmatch(dirname, '/usr/lib*/gtk-2.0/modules'):
-                return True
-            if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/modules'):
-                return True
-	    # scim-bridge-gtk	
-            if fnmatch(dirname, '/usr/lib*/gtk-2.0/immodules'):
+            # nss (Some nss modules end in .so instead of .so.X)
+            # db (db modules end in .so instead of .so.X)
+            if dirname in self.ROOTLIBDIRS and (filename.startswith('libnss_') or filename.startswith('libdb-')):
                 return True
-            if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/immodules'):
-                return True
-            # images
-            if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/loaders'):
-                return True
-            if fnmatch(dirname, '/usr/lib*/gdk-pixbuf-2.0/*/loaders'):
-                return True
-            if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/printbackends'):
-                return True
-            if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/filesystems'):
+            # Optimization:
+            # All tests beyond here are for things in USRLIBDIRS
+            if not dirname.startswith(tuple(self.USRLIBDIRS)):
+                # The dirname does not start with a USRLIBDIR so we can move
+                # on to the next file
+                continue
+
+            if dirname.startswith(('/usr/lib/gtk-2.0', '/usr/lib64/gtk-2.0')):
+                # gtk2-engines
+                if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/engines'):
+                    return True
+                # accessibility
+                if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/modules'):
+                    return True
+                # scim-bridge-gtk
+                if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/immodules'):
+                    return True
+                # images
+                if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/loaders'):
+                    return True
+                if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/printbackends'):
+                    return True
+                if fnmatch(dirname, '/usr/lib*/gtk-2.0/*/filesystems'):
+                    return True
+                # Optimization:
+                # No tests beyond here for things in /usr/lib*/gtk-2.0
+                continue
+
+            # gstreamer
+            if dirname.startswith(('/usr/lib/gstreamer-', '/usr/lib64/gstreamer-')):
                 return True
             # qt/kde fun
             if fnmatch(dirname, '/usr/lib*/qt*/plugins/*'):
@@ -162,23 +180,20 @@ class RuntimeMultilibMethod(MultilibMethod):
             # qml
             if fnmatch(dirname, '/usr/lib*/qt5/qml/*'):
                 return True
-            # gstreamer
-            if fnmatch(dirname, '/usr/lib*/gstreamer-*'):
+            # images
+            if fnmatch(dirname, '/usr/lib*/gdk-pixbuf-2.0/*/loaders'):
                 return True
             # xine-lib
             if fnmatch(dirname, '/usr/lib*/xine/plugins/*'):
                 return True
             # oprofile
-            if fnmatch(dirname, '/usr/lib*/oprofile') and fnmatch(filename, '*.so.*'):
+            if dirname in self.OPROFILEDIRS and fnmatch(filename, '*.so.*'):
                 return True
             # wine
-            if fnmatch(dirname, '/usr/lib*/wine') and filename.endswith('.so'):
-                return True
-            # db
-            if dirname in [ '/lib', '/lib64' ] and filename.startswith('libdb-'):
+            if dirname in self.WINEDIRS and filename.endswith('.so'):
                 return True
             # sane drivers
-            if dirname in [ '/usr/lib/sane', '/usr/lib64/sane' ] and filename.startswith('libsane-'):
+            if dirname in self.SANEDIRS and filename.startswith('libsane-'):
                 return True
         return False
 
@@ -192,7 +207,7 @@ class DevelMultilibMethod(RuntimeMultilibMethod):
             return False
         if po.name in self.config.multilib_devel_whitelist:
             return True
-        if RuntimeMultilibMethod.select(self,po):
+        if RuntimeMultilibMethod.select(self, po):
             return True
         if po.name.startswith('ghc-'):
             return False
-- 
2.1.0

-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 181 bytes
Desc: not available
URL: <http://lists.fedoraproject.org/pipermail/buildsys/attachments/20150506/ac34c584/attachment.sig>


More information about the buildsys mailing list