diff options
Diffstat (limited to 'load_packages.py')
-rw-r--r-- | load_packages.py | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/load_packages.py b/load_packages.py new file mode 100644 index 0000000..c02eabd --- /dev/null +++ b/load_packages.py @@ -0,0 +1,22 @@ + +import sys + +import debian.deb822 +import pandas as pd + +packages = debian.deb822.Packages.iter_paragraphs(sys.stdin) +df = pd.DataFrame([dict(p) for p in packages]) +store = pd.HDFStore('pkg.h5') + +# No 'Source' field means that it has the same value as the 'Package' field. +# Set this explicitly. +nosrc = df['Source'].isnull() +df['Source'][nosrc] = df[nosrc]['Package'] +assert sum(pd.isnull(df['Source'])) == 0 + +print df + +store = pd.HDFStore('pkg.h5') +store['packages'] = df +store.close() + |