diff options
author | Ruben Rodriguez <ruben@trisquel.info> | 2015-07-01 14:51:05 -0500 |
---|---|---|
committer | Ruben Rodriguez <ruben@trisquel.info> | 2015-07-01 14:51:05 -0500 |
commit | 3dc769bd4237b439c32b1dc4ad128e7cda480a15 (patch) | |
tree | 17fc26f06c67a16c92b11be91cd678ba689570ba | |
parent | 2a2a0d49ecccabe971bb0a39e6206bb47cd17168 (diff) |
Parse the changelog files and extract the uploader name and date.
The load_changelog.py script parses the changelog files in the same way as load_copyright.py
and stores the results (package name, version, author/uploader and date) in cl.h5
That information is later used by export.py to fill up the 'License verified by' and
'License verified date' fields with more accurate information than 'Debian' and today()
-rw-r--r-- | export.py | 15 | ||||
-rw-r--r-- | load_changelog.py | 46 |
2 files changed, 57 insertions, 4 deletions
@@ -60,6 +60,10 @@ class PkgData(object): self.licenses = cp_store['licenses'] cp_store.close() + cl_store = pd.HDFStore('cl.h5') + self.cl = cl_store['cl_versions'] + cl_store.close() + def nouni(s): return s.encode('utf8') if isinstance(s, unicode) else s @@ -122,7 +126,7 @@ def get_license_map(): return map -def srcpkg_extract_licenses(header, filess, licenses): +def srcpkg_extract_licenses(header, filess, licenses, cl_date, cl_uploader): # XXX: generate template from header stanza # XXX: flag CC licenses # XXX: check all License stanzas were included @@ -174,8 +178,8 @@ def srcpkg_extract_licenses(header, filess, licenses): yield Template('Project license', [ ('License', canon), ('License copyright', cp), - ('License verified by', 'Debian'), - ('License verified date', today()), + ('License verified by', 'Debian: %s' % cl_uploader), + ('License verified date', cl_date), ('License note', txt)]) def parse_person(s): @@ -284,8 +288,11 @@ def export_srcpkgs(data, name, srcpkg_names): people.extend(list(extract_people(pkg_cps))) res.extend(list(extract_resources(pkg_cps))) + pkg_cl = data.cl[data.cl['_srcpkg'] == srcpkg] + cl_date = pkg_cl['date'][0] + cl_uploader = pkg_cl['author'][0] for template in srcpkg_extract_licenses( - pkg_cps, pkg_cpf, pkg_licenses): + pkg_cps, pkg_cpf, pkg_licenses, cl_date, cl_uploader): # XXX: eliminate duplicates yield template diff --git a/load_changelog.py b/load_changelog.py new file mode 100644 index 0000000..448c087 --- /dev/null +++ b/load_changelog.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from debian import changelog +import pandas as pd +import os +import sys +from dateutil import parser + +def import_one(pkgname, fh): + try: + c = changelog.Changelog(fh) + date = parser.parse(c.date).strftime('%Y-%m-%d') + df = pd.DataFrame([{'_srcpkg':c.package, 'version':c.version, 'date':date, 'author':c.author}]) + except: + return + return (df) + +def get_pkgname(path): + (dir, base) = os.path.split(path) + + if base in ('current', 'changelog.txt'): + return get_pkgname(dir) + else: + return base + +def main(paths): + versions = [] + + for path in paths: + pkgname = get_pkgname(path) + print pkgname, path + data = import_one(pkgname, file(path)) + + if data is not None: + versions.append(data) + + versions = pd.concat(versions) + print versions + store = pd.HDFStore('cl.h5') + store['cl_versions'] = versions + store.close() + +if __name__ == '__main__': + main(sys.argv[1:]) + |