[PATCH 2/4] sources: Move to the new file format

Mathieu Bridon bochecha at fedoraproject.org
Tue Feb 17 03:33:44 UTC 2015


From: Mathieu Bridon <bochecha at daitauha.fr>

We are eventually going to move away from md5 for the sources. However,
in order to make the migration (and future ones) easier, we want to
indicate on each line of the 'sources' file what is the hash function
used to compute the hash of the file.

Fortunately, the md5sum/sha512sum/... utilities support two file formats
as their inputs and outputs:

  * the current format:
    `ahash  afile`

  * the BSD-style format, obtained with the `--tag` option:
    `HASHTYPE (afile) = ahash

This second format is perfect for us, so this commit moves our 'sources'
file handling to it.

A couple of notes:

* we preserve compatibility with existing files, so lines in the old
  format are still read
* we now only ever write lines in the new format, which means that when
  reading an existing file, all currently present lines will be converted to
  the new format when writing back to the file
---
 src/pyrpkg/sources.py | 21 ++++++++++--
 test/test_sources.py  | 92 ++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 99 insertions(+), 14 deletions(-)

diff --git a/src/pyrpkg/sources.py b/src/pyrpkg/sources.py
index e987fa6..340c3e1 100644
--- a/src/pyrpkg/sources.py
+++ b/src/pyrpkg/sources.py
@@ -2,7 +2,12 @@
 Our so-called sources file is simple text-based line-oriented file format.
 
 Each line represents one source file and is in the same format as the output
-of commands like `md5sum filename`:
+of commands like `md5sum --tag filename`:
+
+    hashtype (filename) = hash
+
+To preserve backwards compatibility, lines can also be in the older format,
+which corresponds to the output of commands like `md5sum filename`:
 
     hash  filename
 
@@ -15,6 +20,10 @@ import os
 import re
 
 
+LINE_PATTERN = re.compile(
+    r'^(?P<hashtype>[^ ]+?) \((?P<file>[^ )]+?)\) = (?P<hash>[^ ]+?)$')
+
+
 class MalformedLineError(Exception):
     pass
 
@@ -41,6 +50,12 @@ class SourcesFile(object):
         if not stripped:
             return
 
+        m = LINE_PATTERN.match(stripped)
+        if m is not None:
+            return SourceFileEntry(m.group('hashtype'), m.group('file'),
+                                   m.group('hash'))
+
+        # Try falling back on the old format
         try:
             hash, file = stripped.split('  ', 1)
 
@@ -68,7 +83,9 @@ class SourceFileEntry(object):
             self.file = file
 
     def __str__(self):
-        return '%s  %s\n' % (self.hash, self.file)
+        return '%s (%s) = %s\n' % (self.hashtype.upper(), self.file,
+                                   self.hash)
+
 
     def __eq__(self, other):
         return ((self.hashtype, self.hash, self.file) ==
diff --git a/test/test_sources.py b/test/test_sources.py
index 2b281e7..fb91b3e 100644
--- a/test/test_sources.py
+++ b/test/test_sources.py
@@ -16,7 +16,7 @@ sys.path = old_path
 class SourceFileEntryTestCase(unittest.TestCase):
     def test_entry(self):
         e = sources.SourceFileEntry('md5', 'afile', 'ahash')
-        expected = 'ahash  afile\n'
+        expected = 'MD5 (afile) = ahash\n'
         self.assertEqual(str(e), expected)
 
 
@@ -43,10 +43,23 @@ class SourcesFileTestCase(unittest.TestCase):
         entry = s.parse_line('    \n')
         self.assertIsNone(entry)
 
-    def test_parse_entry_line(self):
+    def test_parse_old_style_line(self):
         s = sources.SourcesFile(self.sourcesfile)
 
         line = 'ahash  afile\n'
+        newline = 'MD5 (afile) = ahash\n'
+        entry = s.parse_line(line)
+
+        self.assertTrue(isinstance(entry, sources.SourceFileEntry))
+        self.assertEqual(entry.hashtype, 'md5')
+        self.assertEqual(entry.hash, 'ahash')
+        self.assertEqual(entry.file, 'afile')
+        self.assertEqual(str(entry), newline)
+
+    def test_parse_entry_line(self):
+        s = sources.SourcesFile(self.sourcesfile)
+
+        line = 'MD5 (afile) = ahash\n'
         entry = s.parse_line(line)
 
         self.assertTrue(isinstance(entry, sources.SourceFileEntry))
@@ -61,6 +74,8 @@ class SourcesFileTestCase(unittest.TestCase):
         lines = ['ahash',
                  'ahash  ',
                  'ahash afile',
+                 'SHA512 (afile) = ahash garbage',
+                 'MD5 SHA512 (afile) = ahash',
                  ]
 
         for line in lines:
@@ -76,8 +91,23 @@ class SourcesFileTestCase(unittest.TestCase):
         s = sources.SourcesFile(self.sourcesfile)
         self.assertEqual(len(s.entries), 0)
 
-    def test_open_existing_file(self):
+    def test_open_existing_file_with_old_style_lines(self):
         lines = ['ahash  afile\n', 'anotherhash  anotherfile\n']
+        newlines = ['MD5 (afile) = ahash\n',
+                    'MD5 (anotherfile) = anotherhash\n']
+
+        with open(self.sourcesfile, 'w') as f:
+            for line in lines:
+                f.write(line)
+
+        s = sources.SourcesFile(self.sourcesfile)
+
+        for i, entry in enumerate(s.entries):
+            self.assertTrue(isinstance(entry, sources.SourceFileEntry))
+            self.assertEqual(str(entry), newlines[i])
+
+    def test_open_existing_file(self):
+        lines = ['MD5 (afile) = ahash\n', 'MD5 (anotherfile) = anotherhash\n']
 
         with open(self.sourcesfile, 'w') as f:
             for line in lines:
@@ -89,6 +119,44 @@ class SourcesFileTestCase(unittest.TestCase):
             self.assertTrue(isinstance(entry, sources.SourceFileEntry))
             self.assertEqual(str(entry), lines[i])
 
+    def test_open_existing_file_with_mixed_lines(self):
+        lines = ['ahash  afile\n',
+                 'anotherhash  anotherfile\n',
+                 'MD5 (thirdfile) = thirdhash\n',
+                 ]
+        expected = [
+            'MD5 (afile) = ahash\n',
+            'MD5 (anotherfile) = anotherhash\n',
+            'MD5 (thirdfile) = thirdhash\n',
+            ]
+
+        with open(self.sourcesfile, 'w') as f:
+            for line in lines:
+                f.write(line)
+
+        s = sources.SourcesFile(self.sourcesfile)
+
+        for i, entry in enumerate(s.entries):
+            self.assertTrue(isinstance(entry, sources.SourceFileEntry))
+            self.assertEqual(str(entry), expected[i])
+
+    def test_open_existing_file_with_identical_entries_old_and_new(self):
+        lines = ['ahash  afile\n',
+                 'MD5 (afile) = ahash\n',
+                 ]
+
+        with open(self.sourcesfile, 'w') as f:
+            for line in lines:
+                f.write(line)
+
+        s = sources.SourcesFile(self.sourcesfile)
+
+        self.assertEqual(len(s.entries), 1)
+        self.assertEqual(s.entries[0].hashtype, 'md5')
+        self.assertEqual(s.entries[0].file, 'afile')
+        self.assertEqual(s.entries[0].hash, 'ahash')
+        self.assertEqual(str(s.entries[0]), lines[-1])
+
     def test_open_existing_file_with_wrong_line(self):
         line = 'some garbage here\n'
 
@@ -104,11 +172,11 @@ class SourcesFileTestCase(unittest.TestCase):
 
         s.add_entry('md5', 'afile', 'ahash')
         self.assertEqual(len(s.entries), 1)
-        self.assertEqual(str(s.entries[-1]), 'ahash  afile\n')
+        self.assertEqual(str(s.entries[-1]), 'MD5 (afile) = ahash\n')
 
         s.add_entry('md5', 'anotherfile', 'anotherhash')
         self.assertEqual(len(s.entries), 2)
-        self.assertEqual(str(s.entries[-1]), 'anotherhash  anotherfile\n')
+        self.assertEqual(str(s.entries[-1]), 'MD5 (anotherfile) = anotherhash\n')
 
     def test_add_entry_twice(self):
         s = sources.SourcesFile(self.sourcesfile)
@@ -116,7 +184,7 @@ class SourcesFileTestCase(unittest.TestCase):
 
         s.add_entry('md5', 'afile', 'ahash')
         self.assertEqual(len(s.entries), 1)
-        self.assertEqual(str(s.entries[-1]), 'ahash  afile\n')
+        self.assertEqual(str(s.entries[-1]), 'MD5 (afile) = ahash\n')
 
         s.add_entry('md5', 'afile', 'ahash')
         self.assertEqual(len(s.entries), 1)
@@ -133,8 +201,8 @@ class SourcesFileTestCase(unittest.TestCase):
              lines = f.readlines()
 
         self.assertEqual(len(lines), 2)
-        self.assertEqual(lines[0], 'ahash  afile\n')
-        self.assertEqual(lines[1], 'anotherhash  anotherfile\n')
+        self.assertEqual(lines[0], 'MD5 (afile) = ahash\n')
+        self.assertEqual(lines[1], 'MD5 (anotherfile) = anotherhash\n')
 
     def test_write_adding_a_line(self):
         lines = ['ahash  afile\n', 'anotherhash  anotherfile\n']
@@ -151,9 +219,9 @@ class SourcesFileTestCase(unittest.TestCase):
              lines = f.readlines()
 
         self.assertEqual(len(lines), 3)
-        self.assertEqual(lines[0], 'ahash  afile\n')
-        self.assertEqual(lines[1], 'anotherhash  anotherfile\n')
-        self.assertEqual(lines[2], 'thirdhash  thirdfile\n')
+        self.assertEqual(lines[0], 'MD5 (afile) = ahash\n')
+        self.assertEqual(lines[1], 'MD5 (anotherfile) = anotherhash\n')
+        self.assertEqual(lines[2], 'MD5 (thirdfile) = thirdhash\n')
 
     def test_write_over(self):
         lines = ['ahash  afile\n', 'anotherhash  anotherfile\n']
@@ -170,7 +238,7 @@ class SourcesFileTestCase(unittest.TestCase):
              lines = f.readlines()
 
         self.assertEqual(len(lines), 1)
-        self.assertEqual(lines[0], 'thirdhash  thirdfile\n')
+        self.assertEqual(lines[0], 'MD5 (thirdfile) = thirdhash\n')
 
 
 if __name__ == '__main__':
-- 
2.1.0



More information about the buildsys mailing list