diff options
author | Ruben Rodriguez <ruben@trisquel.info> | 2015-07-17 14:10:11 -0500 |
---|---|---|
committer | Ruben Rodriguez <ruben@trisquel.info> | 2015-07-17 14:10:11 -0500 |
commit | 39b340faf35b1d59aae37b13b4ed850d50be1d5f (patch) | |
tree | 216369260086f133dd8ed1067b4328ee4ce6f722 | |
parent | ce66672fa7400a9fe87d767698f629941b309b8e (diff) |
Parse Sources file to extract information about the source tarball url
-rw-r--r-- | export.py | 12 | ||||
-rw-r--r-- | load_sources.py | 36 |
2 files changed, 48 insertions, 0 deletions
@@ -52,6 +52,7 @@ class PkgData(object): pkg_store = pd.HDFStore('pkg.h5') self.pkgs = pkg_store['packages'] self.descs = pkg_store['descriptions'] + self.srcs = pkg_store['sources'] pkg_store.close() cp_store = pd.HDFStore('cp.h5') @@ -224,6 +225,15 @@ def export_srcpkgs(data, name, srcpkg_names): for srcpkg in srcpkg_names]) versions = {} + srcfiles="" + for srcpkg in srcpkg_names: + srcfile = data.srcs[data.srcs['_srcpkg'] == srcpkg]['srcfile'].values[0] + letter = srcfile[0] + if srcfile[:3] == 'lib': + letter = srcfile[:4] + srcfile = 'http://ftp.debian.org/debian/pool/main/%s/%s/%s' % (letter,srcpkg,srcfile) + srcfiles = srcfiles + srcfile + " " + for (_i, pkg) in binpkgs.iterrows(): versions[pkg['_srcpkg']] = pkg['Version'] @@ -267,6 +277,8 @@ def export_srcpkgs(data, name, srcpkg_names): ('Computer languages', ', '.join(langs)), ('Status', ''), ('Is GNU', 'No'), + ('Version identifier', pkg['Version']), + ('Version download', srcfiles), ('Submitted by', 'Debian import'), ('Submitted date', today())]) diff --git a/load_sources.py b/load_sources.py new file mode 100644 index 0000000..f420796 --- /dev/null +++ b/load_sources.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + +import debian.deb822 +import pandas as pd + +def pkg(para): + d = dict(para) + a = dict() + + if 'Files' in d: + a['srcfile']=d['Files'].split('\n')[2].split(' ')[3] + + if 'Source' in d: + # Source fields sometimes have the source version number; strip it. + a['_srcpkg'] = d['Source'].split(' ')[0] + else: + # No 'Source' field means that it has the same value as the 'Package' + # field. + a['_srcpkg'] = d['Package'] + + return a + +if __name__ == '__main__': + packages = debian.deb822.Packages.iter_paragraphs(sys.stdin) + df = pd.DataFrame([pkg(p) for p in packages]) + store = pd.HDFStore('pkg.h5') + + print df + + store = pd.HDFStore('pkg.h5') + store['sources'] = df + store.close() + |