aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDafydd Harries <daf@rhydd.org>2013-03-18 19:01:14 -0400
committerDafydd Harries <daf@rhydd.org>2013-03-18 19:01:14 -0400
commitb0c990223f60363d750cfd41420367b8b8088a2a (patch)
tree30c0bbbbc8cebb79feef6998cf565723d4ec0398
parent3b69e5b4e9f05ffeb87ad74a8d373ea84030192c (diff)
fix bulk package export
-rw-r--r--export.py35
1 files changed, 26 insertions, 9 deletions
diff --git a/export.py b/export.py
index 1b132ef..7ddb9d3 100644
--- a/export.py
+++ b/export.py
@@ -31,6 +31,9 @@ def concat(xss):
def today():
return datetime.datetime.now().strftime('%Y-%m-%d')
+def warn(*x):
+ print >>sys.stderr, ('warning:',) + x
+
class PkgData(object):
def __init__(self):
pkg_store = pd.HDFStore('pkg.h5')
@@ -151,13 +154,15 @@ def extract_resources(cp_header):
('Resource kind', 'Download'),
('Resource URL', cp_header[key])])
-def export(data, name):
- pkg_cps = data.cps[data.cps['Upstream-Name'] == name]
- srcpkg_names = list(pkg_cps['_srcpkg'])
- print srcpkg_names
+def export_srcpkgs(data, name, srcpkg_names):
binpkgs = pd.concat([
data.pkgs[data.pkgs['_srcpkg'] == srcpkg]
for srcpkg in srcpkg_names])
+
+ if len(binpkgs) == 0:
+ warn('no binary packages found for', srcpkg_names)
+ return
+
binpkg_names = sorted(binpkgs['Package'], key=len)
print binpkg_names
print list(binpkgs['Package'])
@@ -176,7 +181,7 @@ def export(data, name):
# Heuristic: choose the package with the shortest name.
# We could try to do something smarter, like look for the common
# prefix of the descriptions of all the binary packages.
- descpkg = binpkgs[0]
+ descpkg = binpkg_names[0]
desc = list(data.descs[
data.descs['Package'] == descpkg]['Description-en'])[0]
@@ -235,16 +240,28 @@ def export(data, name):
# ('Resource kind', ''),
# ('Resource URL', '')])
+def export(data, name):
+ pkg_cps = data.cps[data.cps['Upstream-Name'] == name]
+ srcpkg_names = list(pkg_cps['_srcpkg'])
+ print name
+ print 'source packages:', srcpkg_names
+ export_srcpkgs(data, name, srcpkg_names)
+
def main():
data = PkgData()
args = sys.argv[1:]
if len(args) == 0:
- # XXX use upstream names
- srcps = sorted(set(data.pkgs['_srcpkg']))
+ unames = set(data.cps['Upstream-Name'].dropna())
+
+ for uname in unames:
+ export(data, uname)
+
+ no_uname = set(data.cps[
+ data.cps['Upstream-Name'].isnull()]['_srcpkg'])
- for pkgname in srcps[:100]:
- export(data, pkgname)
+ for srcpkg in no_uname:
+ export_srcpkgs(data, srcpkg, [srcpkg])
elif len(args) == 1:
export(data, args[0])
else: