diff options
author | Dafydd Harries <daf@rhydd.org> | 2013-03-12 19:53:28 -0400 |
---|---|---|
committer | Dafydd Harries <daf@rhydd.org> | 2013-03-12 19:53:28 -0400 |
commit | 7ac7787095c3fe6c730a16523d7e00785c6c40cd (patch) | |
tree | 56b9c3cbc55dd53dc60f9eb640e28ce595460314 /load_packages.py |
import
Diffstat (limited to 'load_packages.py')
-rw-r--r-- | load_packages.py | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/load_packages.py b/load_packages.py new file mode 100644 index 0000000..c02eabd --- /dev/null +++ b/load_packages.py @@ -0,0 +1,22 @@ + +import sys + +import debian.deb822 +import pandas as pd + +packages = debian.deb822.Packages.iter_paragraphs(sys.stdin) +df = pd.DataFrame([dict(p) for p in packages]) +store = pd.HDFStore('pkg.h5') + +# No 'Source' field means that it has the same value as the 'Package' field. +# Set this explicitly. +nosrc = df['Source'].isnull() +df['Source'][nosrc] = df[nosrc]['Package'] +assert sum(pd.isnull(df['Source'])) == 0 + +print df + +store = pd.HDFStore('pkg.h5') +store['packages'] = df +store.close() + |