aboutsummaryrefslogtreecommitdiff
path: root/load_changelog.py
diff options
context:
space:
mode:
authorRuben Rodriguez <ruben@trisquel.info>2015-07-01 14:51:05 -0500
committerRuben Rodriguez <ruben@trisquel.info>2015-07-01 14:51:05 -0500
commit3dc769bd4237b439c32b1dc4ad128e7cda480a15 (patch)
tree17fc26f06c67a16c92b11be91cd678ba689570ba /load_changelog.py
parent2a2a0d49ecccabe971bb0a39e6206bb47cd17168 (diff)
Parse the changelog files and extract the uploader name and date.
The load_changelog.py script parses the changelog files in the same way as load_copyright.py and stores the results (package name, version, author/uploader and date) in cl.h5 That information is later used by export.py to fill up the 'License verified by' and 'License verified date' fields with more accurate information than 'Debian' and today()
Diffstat (limited to 'load_changelog.py')
-rw-r--r--load_changelog.py46
1 files changed, 46 insertions, 0 deletions
diff --git a/load_changelog.py b/load_changelog.py
new file mode 100644
index 0000000..448c087
--- /dev/null
+++ b/load_changelog.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from debian import changelog
+import pandas as pd
+import os
+import sys
+from dateutil import parser
+
+def import_one(pkgname, fh):
+ try:
+ c = changelog.Changelog(fh)
+ date = parser.parse(c.date).strftime('%Y-%m-%d')
+ df = pd.DataFrame([{'_srcpkg':c.package, 'version':c.version, 'date':date, 'author':c.author}])
+ except:
+ return
+ return (df)
+
+def get_pkgname(path):
+ (dir, base) = os.path.split(path)
+
+ if base in ('current', 'changelog.txt'):
+ return get_pkgname(dir)
+ else:
+ return base
+
+def main(paths):
+ versions = []
+
+ for path in paths:
+ pkgname = get_pkgname(path)
+ print pkgname, path
+ data = import_one(pkgname, file(path))
+
+ if data is not None:
+ versions.append(data)
+
+ versions = pd.concat(versions)
+ print versions
+ store = pd.HDFStore('cl.h5')
+ store['cl_versions'] = versions
+ store.close()
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
+