aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRuben Rodriguez <ruben@trisquel.info>2015-07-01 14:51:05 -0500
committerRuben Rodriguez <ruben@trisquel.info>2015-07-01 14:51:05 -0500
commit3dc769bd4237b439c32b1dc4ad128e7cda480a15 (patch)
tree17fc26f06c67a16c92b11be91cd678ba689570ba
parent2a2a0d49ecccabe971bb0a39e6206bb47cd17168 (diff)
Parse the changelog files and extract the uploader name and date.
The load_changelog.py script parses the changelog files in the same way as load_copyright.py and stores the results (package name, version, author/uploader and date) in cl.h5 That information is later used by export.py to fill up the 'License verified by' and 'License verified date' fields with more accurate information than 'Debian' and today()
-rw-r--r--export.py15
-rw-r--r--load_changelog.py46
2 files changed, 57 insertions, 4 deletions
diff --git a/export.py b/export.py
index 5f358cb..8bd72bd 100644
--- a/export.py
+++ b/export.py
@@ -60,6 +60,10 @@ class PkgData(object):
self.licenses = cp_store['licenses']
cp_store.close()
+ cl_store = pd.HDFStore('cl.h5')
+ self.cl = cl_store['cl_versions']
+ cl_store.close()
+
def nouni(s):
return s.encode('utf8') if isinstance(s, unicode) else s
@@ -122,7 +126,7 @@ def get_license_map():
return map
-def srcpkg_extract_licenses(header, filess, licenses):
+def srcpkg_extract_licenses(header, filess, licenses, cl_date, cl_uploader):
# XXX: generate template from header stanza
# XXX: flag CC licenses
# XXX: check all License stanzas were included
@@ -174,8 +178,8 @@ def srcpkg_extract_licenses(header, filess, licenses):
yield Template('Project license', [
('License', canon),
('License copyright', cp),
- ('License verified by', 'Debian'),
- ('License verified date', today()),
+ ('License verified by', 'Debian: %s' % cl_uploader),
+ ('License verified date', cl_date),
('License note', txt)])
def parse_person(s):
@@ -284,8 +288,11 @@ def export_srcpkgs(data, name, srcpkg_names):
people.extend(list(extract_people(pkg_cps)))
res.extend(list(extract_resources(pkg_cps)))
+ pkg_cl = data.cl[data.cl['_srcpkg'] == srcpkg]
+ cl_date = pkg_cl['date'][0]
+ cl_uploader = pkg_cl['author'][0]
for template in srcpkg_extract_licenses(
- pkg_cps, pkg_cpf, pkg_licenses):
+ pkg_cps, pkg_cpf, pkg_licenses, cl_date, cl_uploader):
# XXX: eliminate duplicates
yield template
diff --git a/load_changelog.py b/load_changelog.py
new file mode 100644
index 0000000..448c087
--- /dev/null
+++ b/load_changelog.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from debian import changelog
+import pandas as pd
+import os
+import sys
+from dateutil import parser
+
+def import_one(pkgname, fh):
+ try:
+ c = changelog.Changelog(fh)
+ date = parser.parse(c.date).strftime('%Y-%m-%d')
+ df = pd.DataFrame([{'_srcpkg':c.package, 'version':c.version, 'date':date, 'author':c.author}])
+ except:
+ return
+ return (df)
+
+def get_pkgname(path):
+ (dir, base) = os.path.split(path)
+
+ if base in ('current', 'changelog.txt'):
+ return get_pkgname(dir)
+ else:
+ return base
+
+def main(paths):
+ versions = []
+
+ for path in paths:
+ pkgname = get_pkgname(path)
+ print pkgname, path
+ data = import_one(pkgname, file(path))
+
+ if data is not None:
+ versions.append(data)
+
+ versions = pd.concat(versions)
+ print versions
+ store = pd.HDFStore('cl.h5')
+ store['cl_versions'] = versions
+ store.close()
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
+