aboutsummaryrefslogtreecommitdiff
path: root/load_packages.py
diff options
context:
space:
mode:
authorDafydd Harries <daf@rhydd.org>2013-03-12 19:53:28 -0400
committerDafydd Harries <daf@rhydd.org>2013-03-12 19:53:28 -0400
commit7ac7787095c3fe6c730a16523d7e00785c6c40cd (patch)
tree56b9c3cbc55dd53dc60f9eb640e28ce595460314 /load_packages.py
import
Diffstat (limited to 'load_packages.py')
-rw-r--r--load_packages.py22
1 files changed, 22 insertions, 0 deletions
diff --git a/load_packages.py b/load_packages.py
new file mode 100644
index 0000000..c02eabd
--- /dev/null
+++ b/load_packages.py
@@ -0,0 +1,22 @@
+
+import sys
+
+import debian.deb822
+import pandas as pd
+
+packages = debian.deb822.Packages.iter_paragraphs(sys.stdin)
+df = pd.DataFrame([dict(p) for p in packages])
+store = pd.HDFStore('pkg.h5')
+
+# No 'Source' field means that it has the same value as the 'Package' field.
+# Set this explicitly.
+nosrc = df['Source'].isnull()
+df['Source'][nosrc] = df[nosrc]['Package']
+assert sum(pd.isnull(df['Source'])) == 0
+
+print df
+
+store = pd.HDFStore('pkg.h5')
+store['packages'] = df
+store.close()
+