diff options
author | Dafydd Harries <daf@rhydd.org> | 2013-03-18 00:28:40 -0400 |
---|---|---|
committer | Dafydd Harries <daf@rhydd.org> | 2013-03-18 00:28:40 -0400 |
commit | 97e0bc2e76ee25633944de3d62eaa00380fcb6c4 (patch) | |
tree | e090203827d06dc54fefaf0f1f9ae433585451d4 | |
parent | 8818a5e2944f2724d3e7dc0ba6f3e432dac7c1c5 (diff) |
add tool for generating summary of licenses
-rw-r--r-- | license_summary.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/license_summary.py b/license_summary.py new file mode 100644 index 0000000..7e7899c --- /dev/null +++ b/license_summary.py @@ -0,0 +1,21 @@ +# summary of the License: fields found in Files: clauses + +import pandas as pd + +store = pd.HDFStore('cp.h5') +cpf = store['cp_files'] + +def histogram(values): + hist = {} + + for v in values: + v_ = v.lower() + hist[v_] = hist.get(v_, 0) + 1 + + return hist + +licenses = histogram(cpf['_license']) + +for (k, v) in sorted(licenses.iteritems(), key=lambda x: x[1], reverse=True): + print '%-40s %6d' % (k.encode('utf8'), v) + |