diff options
-rw-r--r-- | export.py | 15 | ||||
-rw-r--r-- | load_changelog.py | 46 |
2 files changed, 57 insertions, 4 deletions
@@ -60,6 +60,10 @@ class PkgData(object): self.licenses = cp_store['licenses'] cp_store.close() + cl_store = pd.HDFStore('cl.h5') + self.cl = cl_store['cl_versions'] + cl_store.close() + def nouni(s): return s.encode('utf8') if isinstance(s, unicode) else s @@ -122,7 +126,7 @@ def get_license_map(): return map -def srcpkg_extract_licenses(header, filess, licenses): +def srcpkg_extract_licenses(header, filess, licenses, cl_date, cl_uploader): # XXX: generate template from header stanza # XXX: flag CC licenses # XXX: check all License stanzas were included @@ -174,8 +178,8 @@ def srcpkg_extract_licenses(header, filess, licenses): yield Template('Project license', [ ('License', canon), ('License copyright', cp), - ('License verified by', 'Debian'), - ('License verified date', today()), + ('License verified by', 'Debian: %s' % cl_uploader), + ('License verified date', cl_date), ('License note', txt)]) def parse_person(s): @@ -284,8 +288,11 @@ def export_srcpkgs(data, name, srcpkg_names): people.extend(list(extract_people(pkg_cps))) res.extend(list(extract_resources(pkg_cps))) + pkg_cl = data.cl[data.cl['_srcpkg'] == srcpkg] + cl_date = pkg_cl['date'][0] + cl_uploader = pkg_cl['author'][0] for template in srcpkg_extract_licenses( - pkg_cps, pkg_cpf, pkg_licenses): + pkg_cps, pkg_cpf, pkg_licenses, cl_date, cl_uploader): # XXX: eliminate duplicates yield template diff --git a/load_changelog.py b/load_changelog.py new file mode 100644 index 0000000..448c087 --- /dev/null +++ b/load_changelog.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from debian import changelog +import pandas as pd +import os +import sys +from dateutil import parser + +def import_one(pkgname, fh): + try: + c = changelog.Changelog(fh) + date = parser.parse(c.date).strftime('%Y-%m-%d') + df = pd.DataFrame([{'_srcpkg':c.package, 'version':c.version, 'date':date, 'author':c.author}]) + except: + return + return (df) + +def get_pkgname(path): + (dir, base) = os.path.split(path) + + if base in ('current', 'changelog.txt'): + return get_pkgname(dir) + else: + return base + +def main(paths): + versions = [] + + for path in paths: + pkgname = get_pkgname(path) + print pkgname, path + data = import_one(pkgname, file(path)) + + if data is not None: + versions.append(data) + + versions = pd.concat(versions) + print versions + store = pd.HDFStore('cl.h5') + store['cl_versions'] = versions + store.close() + +if __name__ == '__main__': + main(sys.argv[1:]) + |