From 0574ae301512e3d68922fd037826e74c9e54e521 Mon Sep 17 00:00:00 2001 From: Dafydd Harries Date: Mon, 18 Mar 2013 14:06:53 -0400 Subject: first pass at license export --- export.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 3 deletions(-) diff --git a/export.py b/export.py index 7e1d840..934f70f 100644 --- a/export.py +++ b/export.py @@ -1,3 +1,4 @@ +# encoding: utf8 import re import sys @@ -6,6 +7,8 @@ import textwrap import numpy import pandas as pd +import license + def concat(xss): all = [] @@ -45,6 +48,23 @@ def munge_description(s): return '\n\n'.join( textwrap.fill(para.lstrip().replace('\n', ''), 65) for para in paras) +def get_license_map(): + map = {} + + for para in file('license_map').read().split('\n\n'): + if not para: + continue + + match = re.match('\[([^\]]+)\]', para) + assert match, para + canonical = match.group(1) + aliases = para[match.end():].lstrip().splitlines() + + for alias in aliases: + map[alias] = canonical + + return map + def export(pkgs, descs, cps, cpf, name): pkg_cps = cps[cps['Upstream-Name'] == name] srcpkg_names = list(pkg_cps['_srcpkg']) @@ -89,9 +109,43 @@ def export(pkgs, descs, cps, cpf, name): ('Status', ''), ('Is GNU', 'No')]) - print Template('Project license', [ - ('License', ''), - ('License note', '')]) + lmap = get_license_map() + + for srcpkg in srcpkg_names: + pkg_cpf = cpf[cpf['_srcpkg'] == srcpkg] + #licenses = license.parse_licenses(list(pkg_cpf['_license'])) + #licenses = [ + # license.parse_licenses(row['_license']) + # for (_ix, row) in pkg_cpf.iterrows()] + #print licenses + #all = set(concat(l.flatten() for l in licenses)) + + for (_ix, files) in pkg_cpf.iterrows(): + lname = files['_license'].strip() + + if '\n' not in lname: + # Looks like license text is present. + txt = files['License'] + else: + # Licens information is a stub. + # XXX: look it up + txt = lname + + canon = lmap.get(lname.lower(), 'Other') + cp = ''.join( + u'© %s\n' % line + for line in files['Copyright'].splitlines()) + cp = cp.encode('utf8') + txt = txt.encode('utf8') + + print Template('Project license', [ + ('License', canon), + ('License note', (cp + '\n' + txt))]) + + # XXX: eliminate duplicates + # XXX: check all License stanzas were included + # XXX: generate template from header stanza + # XXX: flag CC licenses print Template('Person', [ ('Real name', ''), -- cgit v1.2.3