aboutsummaryrefslogtreecommitdiff
path: root/load_sources.py
diff options
context:
space:
mode:
Diffstat (limited to 'load_sources.py')
-rw-r--r--load_sources.py36
1 files changed, 36 insertions, 0 deletions
diff --git a/load_sources.py b/load_sources.py
new file mode 100644
index 0000000..f420796
--- /dev/null
+++ b/load_sources.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+import debian.deb822
+import pandas as pd
+
+def pkg(para):
+ d = dict(para)
+ a = dict()
+
+ if 'Files' in d:
+ a['srcfile']=d['Files'].split('\n')[2].split(' ')[3]
+
+ if 'Source' in d:
+ # Source fields sometimes have the source version number; strip it.
+ a['_srcpkg'] = d['Source'].split(' ')[0]
+ else:
+ # No 'Source' field means that it has the same value as the 'Package'
+ # field.
+ a['_srcpkg'] = d['Package']
+
+ return a
+
+if __name__ == '__main__':
+ packages = debian.deb822.Packages.iter_paragraphs(sys.stdin)
+ df = pd.DataFrame([pkg(p) for p in packages])
+ store = pd.HDFStore('pkg.h5')
+
+ print df
+
+ store = pd.HDFStore('pkg.h5')
+ store['sources'] = df
+ store.close()
+