aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRuben Rodriguez <ruben@trisquel.info>2015-07-17 14:10:11 -0500
committerRuben Rodriguez <ruben@trisquel.info>2015-07-17 14:10:11 -0500
commit39b340faf35b1d59aae37b13b4ed850d50be1d5f (patch)
tree216369260086f133dd8ed1067b4328ee4ce6f722
parentce66672fa7400a9fe87d767698f629941b309b8e (diff)
Parse Sources file to extract information about the source tarball url
-rw-r--r--export.py12
-rw-r--r--load_sources.py36
2 files changed, 48 insertions, 0 deletions
diff --git a/export.py b/export.py
index 8bd72bd..3a93602 100644
--- a/export.py
+++ b/export.py
@@ -52,6 +52,7 @@ class PkgData(object):
pkg_store = pd.HDFStore('pkg.h5')
self.pkgs = pkg_store['packages']
self.descs = pkg_store['descriptions']
+ self.srcs = pkg_store['sources']
pkg_store.close()
cp_store = pd.HDFStore('cp.h5')
@@ -224,6 +225,15 @@ def export_srcpkgs(data, name, srcpkg_names):
for srcpkg in srcpkg_names])
versions = {}
+ srcfiles=""
+ for srcpkg in srcpkg_names:
+ srcfile = data.srcs[data.srcs['_srcpkg'] == srcpkg]['srcfile'].values[0]
+ letter = srcfile[0]
+ if srcfile[:3] == 'lib':
+ letter = srcfile[:4]
+ srcfile = 'http://ftp.debian.org/debian/pool/main/%s/%s/%s' % (letter,srcpkg,srcfile)
+ srcfiles = srcfiles + srcfile + " "
+
for (_i, pkg) in binpkgs.iterrows():
versions[pkg['_srcpkg']] = pkg['Version']
@@ -267,6 +277,8 @@ def export_srcpkgs(data, name, srcpkg_names):
('Computer languages', ', '.join(langs)),
('Status', ''),
('Is GNU', 'No'),
+ ('Version identifier', pkg['Version']),
+ ('Version download', srcfiles),
('Submitted by', 'Debian import'),
('Submitted date', today())])
diff --git a/load_sources.py b/load_sources.py
new file mode 100644
index 0000000..f420796
--- /dev/null
+++ b/load_sources.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+import debian.deb822
+import pandas as pd
+
+def pkg(para):
+ d = dict(para)
+ a = dict()
+
+ if 'Files' in d:
+ a['srcfile']=d['Files'].split('\n')[2].split(' ')[3]
+
+ if 'Source' in d:
+ # Source fields sometimes have the source version number; strip it.
+ a['_srcpkg'] = d['Source'].split(' ')[0]
+ else:
+ # No 'Source' field means that it has the same value as the 'Package'
+ # field.
+ a['_srcpkg'] = d['Package']
+
+ return a
+
+if __name__ == '__main__':
+ packages = debian.deb822.Packages.iter_paragraphs(sys.stdin)
+ df = pd.DataFrame([pkg(p) for p in packages])
+ store = pd.HDFStore('pkg.h5')
+
+ print df
+
+ store = pd.HDFStore('pkg.h5')
+ store['sources'] = df
+ store.close()
+