diff options
author | Dafydd Harries <daf@rhydd.org> | 2013-03-17 22:14:13 -0400 |
---|---|---|
committer | Dafydd Harries <daf@rhydd.org> | 2013-03-17 22:14:13 -0400 |
commit | 61945db94b1007362a2994221c84ec4d0c33b37d (patch) | |
tree | e7d6069d09f8c2a69e2ef3c24d60ed8fda1e9f34 | |
parent | 216d7e307a6e0413dbd6d18991fc079e148dd88f (diff) |
extract language information
-rw-r--r-- | export.py | 32 |
1 files changed, 31 insertions, 1 deletions
@@ -2,8 +2,17 @@ import re import sys +import numpy import pandas as pd +def concat(xss): + all = [] + + for xs in xss: + all.extend(xs) + + return all + class Template(object): def __init__(self, name, values): self.name = name @@ -14,6 +23,22 @@ class Template(object): self.name, '\n'.join(['|' + '%s=%s' % (n, v) for (n, v) in self.values])) +def parse_tags(s): + return s.replace('\n', '').split(', ') + +def extract_languages(tags): + langs = [] + + for tag in tags: + (a, b) = tag.split('::') + + if a == 'implemented-in': + langs.append(b) + elif a == 'devel' and b.startswith('lang:'): + langs.append(b.split(':')[1]) + + return list(set(langs)) + def export(pkgs, cps, cpf, name): pkg_cps = cps[cps['Upstream-Name'] == name] srcpkg_names = list(pkg_cps['_srcpkg']) @@ -25,6 +50,11 @@ def export(pkgs, cps, cpf, name): homepages = list(binpkgs['Homepage']) # XXX: maybe choose the one that appears the most? homepage = homepages[0] if homepages else '' + tags = set(concat( + [parse_tags(t) for t in binpkgs['Tag'] if not pd.isnull(t)])) + print tags + langs = [s.title() for s in extract_languages(tags)] + print langs print Template('Entry', [ ('Name', srcpkg), @@ -35,7 +65,7 @@ def export(pkgs, cps, cpf, name): # XXX get this information from apt-file ('Component programs', ''), ('VCS checkout command', ''), - ('Computer languages', ''), + ('Computer languages', ', '.join(langs)), ('Status', ''), ('Is GNU', 'No')]) |