aboutsummaryrefslogtreecommitdiff
path: root/license_summary.py
blob: e5edaba24859391b6017bde59674f0aa72adfe60 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# summary of the License: fields found in Files: clauses

import pandas as pd

def histogram(values):
    hist = {}

    for v in values:
        v_ = v.lower()
        hist[v_] = hist.get(v_, 0) + 1

    return hist

if __name__ == '__main__':
    store = pd.HDFStore('cp.h5')
    cpf = store['cp_files']

    licenses = list(histogram(cpf['_license']))

    for (k, v) in sorted(licenses, key=lambda x: x[1], reverse=True):
        print '%-40s %6d' % (k.encode('utf8'), v)