From 7ac7787095c3fe6c730a16523d7e00785c6c40cd Mon Sep 17 00:00:00 2001 From: Dafydd Harries Date: Tue, 12 Mar 2013 19:53:28 -0400 Subject: import --- load_packages.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 load_packages.py (limited to 'load_packages.py') diff --git a/load_packages.py b/load_packages.py new file mode 100644 index 0000000..c02eabd --- /dev/null +++ b/load_packages.py @@ -0,0 +1,22 @@ + +import sys + +import debian.deb822 +import pandas as pd + +packages = debian.deb822.Packages.iter_paragraphs(sys.stdin) +df = pd.DataFrame([dict(p) for p in packages]) +store = pd.HDFStore('pkg.h5') + +# No 'Source' field means that it has the same value as the 'Package' field. +# Set this explicitly. +nosrc = df['Source'].isnull() +df['Source'][nosrc] = df[nosrc]['Package'] +assert sum(pd.isnull(df['Source'])) == 0 + +print df + +store = pd.HDFStore('pkg.h5') +store['packages'] = df +store.close() + -- cgit v1.2.3