From 61945db94b1007362a2994221c84ec4d0c33b37d Mon Sep 17 00:00:00 2001 From: Dafydd Harries Date: Sun, 17 Mar 2013 22:14:13 -0400 Subject: extract language information --- export.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/export.py b/export.py index f28a45d..3106f07 100644 --- a/export.py +++ b/export.py @@ -2,8 +2,17 @@ import re import sys +import numpy import pandas as pd +def concat(xss): + all = [] + + for xs in xss: + all.extend(xs) + + return all + class Template(object): def __init__(self, name, values): self.name = name @@ -14,6 +23,22 @@ class Template(object): self.name, '\n'.join(['|' + '%s=%s' % (n, v) for (n, v) in self.values])) +def parse_tags(s): + return s.replace('\n', '').split(', ') + +def extract_languages(tags): + langs = [] + + for tag in tags: + (a, b) = tag.split('::') + + if a == 'implemented-in': + langs.append(b) + elif a == 'devel' and b.startswith('lang:'): + langs.append(b.split(':')[1]) + + return list(set(langs)) + def export(pkgs, cps, cpf, name): pkg_cps = cps[cps['Upstream-Name'] == name] srcpkg_names = list(pkg_cps['_srcpkg']) @@ -25,6 +50,11 @@ def export(pkgs, cps, cpf, name): homepages = list(binpkgs['Homepage']) # XXX: maybe choose the one that appears the most? homepage = homepages[0] if homepages else '' + tags = set(concat( + [parse_tags(t) for t in binpkgs['Tag'] if not pd.isnull(t)])) + print tags + langs = [s.title() for s in extract_languages(tags)] + print langs print Template('Entry', [ ('Name', srcpkg), @@ -35,7 +65,7 @@ def export(pkgs, cps, cpf, name): # XXX get this information from apt-file ('Component programs', ''), ('VCS checkout command', ''), - ('Computer languages', ''), + ('Computer languages', ', '.join(langs)), ('Status', ''), ('Is GNU', 'No')]) -- cgit v1.2.3