aboutsummaryrefslogtreecommitdiff
path: root/load_sources.py
diff options
context:
space:
mode:
authorRuben Rodriguez <ruben@trisquel.info>2015-07-17 14:10:11 -0500
committerRuben Rodriguez <ruben@trisquel.info>2015-07-17 14:10:11 -0500
commit39b340faf35b1d59aae37b13b4ed850d50be1d5f (patch)
tree216369260086f133dd8ed1067b4328ee4ce6f722 /load_sources.py
parentce66672fa7400a9fe87d767698f629941b309b8e (diff)
Parse Sources file to extract information about the source tarball url
Diffstat (limited to 'load_sources.py')
-rw-r--r--load_sources.py36
1 files changed, 36 insertions, 0 deletions
diff --git a/load_sources.py b/load_sources.py
new file mode 100644
index 0000000..f420796
--- /dev/null
+++ b/load_sources.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+import debian.deb822
+import pandas as pd
+
+def pkg(para):
+ d = dict(para)
+ a = dict()
+
+ if 'Files' in d:
+ a['srcfile']=d['Files'].split('\n')[2].split(' ')[3]
+
+ if 'Source' in d:
+ # Source fields sometimes have the source version number; strip it.
+ a['_srcpkg'] = d['Source'].split(' ')[0]
+ else:
+ # No 'Source' field means that it has the same value as the 'Package'
+ # field.
+ a['_srcpkg'] = d['Package']
+
+ return a
+
+if __name__ == '__main__':
+ packages = debian.deb822.Packages.iter_paragraphs(sys.stdin)
+ df = pd.DataFrame([pkg(p) for p in packages])
+ store = pd.HDFStore('pkg.h5')
+
+ print df
+
+ store = pd.HDFStore('pkg.h5')
+ store['sources'] = df
+ store.close()
+