From 97e0bc2e76ee25633944de3d62eaa00380fcb6c4 Mon Sep 17 00:00:00 2001 From: Dafydd Harries Date: Mon, 18 Mar 2013 00:28:40 -0400 Subject: add tool for generating summary of licenses --- license_summary.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 license_summary.py diff --git a/license_summary.py b/license_summary.py new file mode 100644 index 0000000..7e7899c --- /dev/null +++ b/license_summary.py @@ -0,0 +1,21 @@ +# summary of the License: fields found in Files: clauses + +import pandas as pd + +store = pd.HDFStore('cp.h5') +cpf = store['cp_files'] + +def histogram(values): + hist = {} + + for v in values: + v_ = v.lower() + hist[v_] = hist.get(v_, 0) + 1 + + return hist + +licenses = histogram(cpf['_license']) + +for (k, v) in sorted(licenses.iteritems(), key=lambda x: x[1], reverse=True): + print '%-40s %6d' % (k.encode('utf8'), v) + -- cgit v1.2.3