From: Mathieu Bridon <bochecha(a)daitauha.fr>
We are eventually going to move away from md5 for the sources. However,
in order to make the migration (and future ones) easier, we want to
indicate on each line of the 'sources' file what is the hash function
used to compute the hash of the file.
Fortunately, the md5sum/sha512sum/... utilities support two file formats
as their inputs and outputs:
* the current format:
`ahash afile`
* the BSD-style format, obtained with the `--tag` option:
`HASHTYPE (afile) = ahash
This second format is perfect for us, so this commit moves our 'sources'
file handling to it.
A couple of notes:
* we preserve compatibility with existing files, so lines in the old
format are still read
* we now only ever write lines in the new format, which means that when
reading an existing file, all currently present lines will be converted to
the new format when writing back to the file
---
src/pyrpkg/sources.py | 21 ++++++++++--
test/test_sources.py | 92 ++++++++++++++++++++++++++++++++++++++++++++-------
2 files changed, 99 insertions(+), 14 deletions(-)
diff --git a/src/pyrpkg/sources.py b/src/pyrpkg/sources.py
index e987fa6..340c3e1 100644
--- a/src/pyrpkg/sources.py
+++ b/src/pyrpkg/sources.py
@@ -2,7 +2,12 @@
Our so-called sources file is simple text-based line-oriented file format.
Each line represents one source file and is in the same format as the output
-of commands like `md5sum filename`:
+of commands like `md5sum --tag filename`:
+
+ hashtype (filename) = hash
+
+To preserve backwards compatibility, lines can also be in the older format,
+which corresponds to the output of commands like `md5sum filename`:
hash filename
@@ -15,6 +20,10 @@ import os
import re
+LINE_PATTERN = re.compile(
+ r'^(?P<hashtype>[^ ]+?) \((?P<file>[^ )]+?)\) = (?P<hash>[^
]+?)$')
+
+
class MalformedLineError(Exception):
pass
@@ -41,6 +50,12 @@ class SourcesFile(object):
if not stripped:
return
+ m = LINE_PATTERN.match(stripped)
+ if m is not None:
+ return SourceFileEntry(m.group('hashtype'), m.group('file'),
+ m.group('hash'))
+
+ # Try falling back on the old format
try:
hash, file = stripped.split(' ', 1)
@@ -68,7 +83,9 @@ class SourceFileEntry(object):
self.file = file
def __str__(self):
- return '%s %s\n' % (self.hash, self.file)
+ return '%s (%s) = %s\n' % (self.hashtype.upper(), self.file,
+ self.hash)
+
def __eq__(self, other):
return ((self.hashtype, self.hash, self.file) ==
diff --git a/test/test_sources.py b/test/test_sources.py
index 2b281e7..fb91b3e 100644
--- a/test/test_sources.py
+++ b/test/test_sources.py
@@ -16,7 +16,7 @@ sys.path = old_path
class SourceFileEntryTestCase(unittest.TestCase):
def test_entry(self):
e = sources.SourceFileEntry('md5', 'afile', 'ahash')
- expected = 'ahash afile\n'
+ expected = 'MD5 (afile) = ahash\n'
self.assertEqual(str(e), expected)
@@ -43,10 +43,23 @@ class SourcesFileTestCase(unittest.TestCase):
entry = s.parse_line(' \n')
self.assertIsNone(entry)
- def test_parse_entry_line(self):
+ def test_parse_old_style_line(self):
s = sources.SourcesFile(self.sourcesfile)
line = 'ahash afile\n'
+ newline = 'MD5 (afile) = ahash\n'
+ entry = s.parse_line(line)
+
+ self.assertTrue(isinstance(entry, sources.SourceFileEntry))
+ self.assertEqual(entry.hashtype, 'md5')
+ self.assertEqual(entry.hash, 'ahash')
+ self.assertEqual(entry.file, 'afile')
+ self.assertEqual(str(entry), newline)
+
+ def test_parse_entry_line(self):
+ s = sources.SourcesFile(self.sourcesfile)
+
+ line = 'MD5 (afile) = ahash\n'
entry = s.parse_line(line)
self.assertTrue(isinstance(entry, sources.SourceFileEntry))
@@ -61,6 +74,8 @@ class SourcesFileTestCase(unittest.TestCase):
lines = ['ahash',
'ahash ',
'ahash afile',
+ 'SHA512 (afile) = ahash garbage',
+ 'MD5 SHA512 (afile) = ahash',
]
for line in lines:
@@ -76,8 +91,23 @@ class SourcesFileTestCase(unittest.TestCase):
s = sources.SourcesFile(self.sourcesfile)
self.assertEqual(len(s.entries), 0)
- def test_open_existing_file(self):
+ def test_open_existing_file_with_old_style_lines(self):
lines = ['ahash afile\n', 'anotherhash anotherfile\n']
+ newlines = ['MD5 (afile) = ahash\n',
+ 'MD5 (anotherfile) = anotherhash\n']
+
+ with open(self.sourcesfile, 'w') as f:
+ for line in lines:
+ f.write(line)
+
+ s = sources.SourcesFile(self.sourcesfile)
+
+ for i, entry in enumerate(s.entries):
+ self.assertTrue(isinstance(entry, sources.SourceFileEntry))
+ self.assertEqual(str(entry), newlines[i])
+
+ def test_open_existing_file(self):
+ lines = ['MD5 (afile) = ahash\n', 'MD5 (anotherfile) =
anotherhash\n']
with open(self.sourcesfile, 'w') as f:
for line in lines:
@@ -89,6 +119,44 @@ class SourcesFileTestCase(unittest.TestCase):
self.assertTrue(isinstance(entry, sources.SourceFileEntry))
self.assertEqual(str(entry), lines[i])
+ def test_open_existing_file_with_mixed_lines(self):
+ lines = ['ahash afile\n',
+ 'anotherhash anotherfile\n',
+ 'MD5 (thirdfile) = thirdhash\n',
+ ]
+ expected = [
+ 'MD5 (afile) = ahash\n',
+ 'MD5 (anotherfile) = anotherhash\n',
+ 'MD5 (thirdfile) = thirdhash\n',
+ ]
+
+ with open(self.sourcesfile, 'w') as f:
+ for line in lines:
+ f.write(line)
+
+ s = sources.SourcesFile(self.sourcesfile)
+
+ for i, entry in enumerate(s.entries):
+ self.assertTrue(isinstance(entry, sources.SourceFileEntry))
+ self.assertEqual(str(entry), expected[i])
+
+ def test_open_existing_file_with_identical_entries_old_and_new(self):
+ lines = ['ahash afile\n',
+ 'MD5 (afile) = ahash\n',
+ ]
+
+ with open(self.sourcesfile, 'w') as f:
+ for line in lines:
+ f.write(line)
+
+ s = sources.SourcesFile(self.sourcesfile)
+
+ self.assertEqual(len(s.entries), 1)
+ self.assertEqual(s.entries[0].hashtype, 'md5')
+ self.assertEqual(s.entries[0].file, 'afile')
+ self.assertEqual(s.entries[0].hash, 'ahash')
+ self.assertEqual(str(s.entries[0]), lines[-1])
+
def test_open_existing_file_with_wrong_line(self):
line = 'some garbage here\n'
@@ -104,11 +172,11 @@ class SourcesFileTestCase(unittest.TestCase):
s.add_entry('md5', 'afile', 'ahash')
self.assertEqual(len(s.entries), 1)
- self.assertEqual(str(s.entries[-1]), 'ahash afile\n')
+ self.assertEqual(str(s.entries[-1]), 'MD5 (afile) = ahash\n')
s.add_entry('md5', 'anotherfile', 'anotherhash')
self.assertEqual(len(s.entries), 2)
- self.assertEqual(str(s.entries[-1]), 'anotherhash anotherfile\n')
+ self.assertEqual(str(s.entries[-1]), 'MD5 (anotherfile) =
anotherhash\n')
def test_add_entry_twice(self):
s = sources.SourcesFile(self.sourcesfile)
@@ -116,7 +184,7 @@ class SourcesFileTestCase(unittest.TestCase):
s.add_entry('md5', 'afile', 'ahash')
self.assertEqual(len(s.entries), 1)
- self.assertEqual(str(s.entries[-1]), 'ahash afile\n')
+ self.assertEqual(str(s.entries[-1]), 'MD5 (afile) = ahash\n')
s.add_entry('md5', 'afile', 'ahash')
self.assertEqual(len(s.entries), 1)
@@ -133,8 +201,8 @@ class SourcesFileTestCase(unittest.TestCase):
lines = f.readlines()
self.assertEqual(len(lines), 2)
- self.assertEqual(lines[0], 'ahash afile\n')
- self.assertEqual(lines[1], 'anotherhash anotherfile\n')
+ self.assertEqual(lines[0], 'MD5 (afile) = ahash\n')
+ self.assertEqual(lines[1], 'MD5 (anotherfile) = anotherhash\n')
def test_write_adding_a_line(self):
lines = ['ahash afile\n', 'anotherhash anotherfile\n']
@@ -151,9 +219,9 @@ class SourcesFileTestCase(unittest.TestCase):
lines = f.readlines()
self.assertEqual(len(lines), 3)
- self.assertEqual(lines[0], 'ahash afile\n')
- self.assertEqual(lines[1], 'anotherhash anotherfile\n')
- self.assertEqual(lines[2], 'thirdhash thirdfile\n')
+ self.assertEqual(lines[0], 'MD5 (afile) = ahash\n')
+ self.assertEqual(lines[1], 'MD5 (anotherfile) = anotherhash\n')
+ self.assertEqual(lines[2], 'MD5 (thirdfile) = thirdhash\n')
def test_write_over(self):
lines = ['ahash afile\n', 'anotherhash anotherfile\n']
@@ -170,7 +238,7 @@ class SourcesFileTestCase(unittest.TestCase):
lines = f.readlines()
self.assertEqual(len(lines), 1)
- self.assertEqual(lines[0], 'thirdhash thirdfile\n')
+ self.assertEqual(lines[0], 'MD5 (thirdfile) = thirdhash\n')
if __name__ == '__main__':
--
2.1.0