diff options
-rw-r--r-- | license_summary.py | 13 | ||||
-rw-r--r-- | load_descriptions.py | 11 | ||||
-rw-r--r-- | load_packages.py | 25 | ||||
-rw-r--r-- | pkg_summary.py | 7 |
4 files changed, 30 insertions, 26 deletions
diff --git a/license_summary.py b/license_summary.py index 7e7899c..e5edaba 100644 --- a/license_summary.py +++ b/license_summary.py @@ -2,9 +2,6 @@ import pandas as pd -store = pd.HDFStore('cp.h5') -cpf = store['cp_files'] - def histogram(values): hist = {} @@ -14,8 +11,12 @@ def histogram(values): return hist -licenses = histogram(cpf['_license']) +if __name__ == '__main__': + store = pd.HDFStore('cp.h5') + cpf = store['cp_files'] + + licenses = list(histogram(cpf['_license'])) -for (k, v) in sorted(licenses.iteritems(), key=lambda x: x[1], reverse=True): - print '%-40s %6d' % (k.encode('utf8'), v) + for (k, v) in sorted(licenses, key=lambda x: x[1], reverse=True): + print '%-40s %6d' % (k.encode('utf8'), v) diff --git a/load_descriptions.py b/load_descriptions.py index 36d6257..14d0ee4 100644 --- a/load_descriptions.py +++ b/load_descriptions.py @@ -4,9 +4,10 @@ import sys import debian.deb822 import pandas as pd -descs = debian.deb822.Packages.iter_paragraphs(sys.stdin) -df = pd.DataFrame([dict(p) for p in descs]) -store = pd.HDFStore('pkg.h5') -store['descriptions'] = df -store.close() +if __name__ == '__main__': + descs = debian.deb822.Packages.iter_paragraphs(sys.stdin) + df = pd.DataFrame([dict(p) for p in descs]) + store = pd.HDFStore('pkg.h5') + store['descriptions'] = df + store.close() diff --git a/load_packages.py b/load_packages.py index c02eabd..423b227 100644 --- a/load_packages.py +++ b/load_packages.py @@ -4,19 +4,20 @@ import sys import debian.deb822 import pandas as pd -packages = debian.deb822.Packages.iter_paragraphs(sys.stdin) -df = pd.DataFrame([dict(p) for p in packages]) -store = pd.HDFStore('pkg.h5') +if __name__ == '__main__': + packages = debian.deb822.Packages.iter_paragraphs(sys.stdin) + df = pd.DataFrame([dict(p) for p in packages]) + store = pd.HDFStore('pkg.h5') -# No 'Source' field means that it has the same value as the 'Package' field. -# Set this explicitly. -nosrc = df['Source'].isnull() -df['Source'][nosrc] = df[nosrc]['Package'] -assert sum(pd.isnull(df['Source'])) == 0 + # No 'Source' field means that it has the same value as the 'Package' + # field. Set this explicitly. + nosrc = df['Source'].isnull() + df['Source'][nosrc] = df[nosrc]['Package'] + assert sum(pd.isnull(df['Source'])) == 0 -print df + print df -store = pd.HDFStore('pkg.h5') -store['packages'] = df -store.close() + store = pd.HDFStore('pkg.h5') + store['packages'] = df + store.close() diff --git a/pkg_summary.py b/pkg_summary.py index f404c56..a700c65 100644 --- a/pkg_summary.py +++ b/pkg_summary.py @@ -1,6 +1,7 @@ import pandas as pd -store = pd.HDFStore('pkg.h5') -print store.handle -print store['packages'] +if __name__ == '__main__': + store = pd.HDFStore('pkg.h5') + print store.handle + print store['packages'] |