aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDafydd Harries <daf@rhydd.org>2013-03-18 00:28:40 -0400
committerDafydd Harries <daf@rhydd.org>2013-03-18 00:28:40 -0400
commit97e0bc2e76ee25633944de3d62eaa00380fcb6c4 (patch)
treee090203827d06dc54fefaf0f1f9ae433585451d4
parent8818a5e2944f2724d3e7dc0ba6f3e432dac7c1c5 (diff)
add tool for generating summary of licenses
-rw-r--r--license_summary.py21
1 files changed, 21 insertions, 0 deletions
diff --git a/license_summary.py b/license_summary.py
new file mode 100644
index 0000000..7e7899c
--- /dev/null
+++ b/license_summary.py
@@ -0,0 +1,21 @@
+# summary of the License: fields found in Files: clauses
+
+import pandas as pd
+
+store = pd.HDFStore('cp.h5')
+cpf = store['cp_files']
+
+def histogram(values):
+ hist = {}
+
+ for v in values:
+ v_ = v.lower()
+ hist[v_] = hist.get(v_, 0) + 1
+
+ return hist
+
+licenses = histogram(cpf['_license'])
+
+for (k, v) in sorted(licenses.iteritems(), key=lambda x: x[1], reverse=True):
+ print '%-40s %6d' % (k.encode('utf8'), v)
+