aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDafydd Harries <daf@rhydd.org>2013-03-18 16:02:38 -0400
committerDafydd Harries <daf@rhydd.org>2013-03-18 16:02:38 -0400
commit8d53b7b7ae920bf3a53d10490ab3e79e917c5918 (patch)
treedcf28d2a9d1a85d038cc2b861857042d7600646e
parented5ca84e96e5c84cd35e5a387471eb3165d66174 (diff)
extract/export download URLS
-rw-r--r--export.py30
1 files changed, 26 insertions, 4 deletions
diff --git a/export.py b/export.py
index 91e19a0..d1d3304 100644
--- a/export.py
+++ b/export.py
@@ -9,6 +9,16 @@ import pandas as pd
import license
+download_keys = """
+ Origin
+ Original-Source
+ Source
+ Source-Code
+ X-Origin
+ X-Original-Package
+ X-Source
+ """
+
def concat(xss):
all = []
@@ -93,6 +103,15 @@ def srcpkg_extract_licenses(header, filess):
('License', canon),
('License note', (cp + '\n' + txt))])
+def extract_resources(cp_header):
+ cp_header = cp_header.dropna()
+
+ for key in re.findall('\S+', download_keys):
+ if key in cp_header:
+ yield Template('Resource', [
+ ('Resource kind', 'Download'),
+ ('Resource URL', cp_header[key])])
+
def export(pkgs, descs, cps, cpf, name):
pkg_cps = cps[cps['Upstream-Name'] == name]
srcpkg_names = list(pkg_cps['_srcpkg'])
@@ -137,9 +156,12 @@ def export(pkgs, descs, cps, cpf, name):
('Status', ''),
('Is GNU', 'No')])
+ res = []
+
for srcpkg in srcpkg_names:
pkg_cps = cps[cps['_srcpkg'] == srcpkg].ix[0]
pkg_cpf = cpf[cpf['_srcpkg'] == srcpkg]
+ res.extend(list(extract_resources(pkg_cps)))
#licenses = license.parse_licenses(list(pkg_cpf['_license']))
#licenses = [
# license.parse_licenses(row['_license'])
@@ -151,16 +173,16 @@ def export(pkgs, descs, cps, cpf, name):
# XXX: eliminate duplicates
print template
+ for template in res:
+ # XXX: eliminate duplicates
+ print template
+
print Template('Person', [
('Real name', ''),
('Role', ''),
('Email', ''),
('Resource URL', '')])
- print Template('Resource', [
- ('Resource kind', ''),
- ('Resource URL', '')])
-
#print Template('Software category', [
# ('Resource kind', ''),
# ('Resource URL', '')])