blob: e5edaba24859391b6017bde59674f0aa72adfe60 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
# summary of the License: fields found in Files: clauses
import pandas as pd
def histogram(values):
hist = {}
for v in values:
v_ = v.lower()
hist[v_] = hist.get(v_, 0) + 1
return hist
if __name__ == '__main__':
store = pd.HDFStore('cp.h5')
cpf = store['cp_files']
licenses = list(histogram(cpf['_license']))
for (k, v) in sorted(licenses, key=lambda x: x[1], reverse=True):
print '%-40s %6d' % (k.encode('utf8'), v)
|