aboutsummaryrefslogtreecommitdiff
path: root/export.py
blob: 3106f0702ec32a8008d0cbc4a7d3af9c208b1be5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import re
import sys

import numpy
import pandas as pd

def concat(xss):
    all = []

    for xs in xss:
        all.extend(xs)

    return all

class Template(object):
    def __init__(self, name, values):
        self.name = name
        self.values = values

    def __str__(self):
        return '{{%s\n%s\n}}' % (
            self.name,
            '\n'.join(['|' + '%s=%s' % (n, v) for (n, v) in self.values]))

def parse_tags(s):
    return s.replace('\n', '').split(', ')

def extract_languages(tags):
    langs = []

    for tag in tags:
        (a, b) = tag.split('::')

        if a == 'implemented-in':
            langs.append(b)
        elif a == 'devel' and b.startswith('lang:'):
            langs.append(b.split(':')[1])

    return list(set(langs))

def export(pkgs, cps, cpf, name):
    pkg_cps = cps[cps['Upstream-Name'] == name]
    srcpkg_names = list(pkg_cps['_srcpkg'])
    print srcpkg_names
    binpkgs = pd.concat([
        pkgs[pkgs['Source'] == srcpkg]
        for srcpkg in srcpkg_names])
    print list(binpkgs['Package'])
    homepages = list(binpkgs['Homepage'])
    # XXX: maybe choose the one that appears the most?
    homepage = homepages[0] if homepages else ''
    tags = set(concat(
        [parse_tags(t) for t in binpkgs['Tag'] if not pd.isnull(t)]))
    print tags
    langs = [s.title() for s in extract_languages(tags)]
    print langs

    print Template('Entry', [
        ('Name', srcpkg),
        ('Short description', ''),
        ('Full description', ''),
        ('Homepage', homepage),
        ('User level', ''),
        # XXX get this information from apt-file
        ('Component programs', ''),
        ('VCS checkout command', ''),
        ('Computer languages', ', '.join(langs)),
        ('Status', ''),
        ('Is GNU', 'No')])

    print Template('Project license', [
        ('License', ''),
        ('License note', '')])

    print Template('Person', [
        ('Real name', ''),
        ('Role', ''),
        ('Email', ''),
        ('Resource URL', '')])

    print Template('Resource', [
        ('Resource kind', ''),
        ('Resource URL', '')])

    #print Template('Software category', [
    #    ('Resource kind', ''),
    #    ('Resource URL', '')])

def main():
    pkg_store = pd.HDFStore('pkg.h5')
    pkgs = pkg_store['packages']
    pkg_store.close()

    cp_store = pd.HDFStore('cp.h5')
    cpf = cp_store['cp_files']
    cps = cp_store['cp_summary']
    cp_store.close()

    args = sys.argv[1:]

    if len(args) == 0:
        srcps = sorted(set(pkgs['Source']))

        for pkgname in srcps[:100]:
            export(pkgs, cps, cpf, pkgname)
    elif len(args) == 1:
        export(pkgs, cps, cpf, args[0])
    else:
        raise RuntimeError()

if __name__ == '__main__':
    main()