aboutsummaryrefslogtreecommitdiff
path: root/doall.sh
blob: 5a3e48ee3bd1d762fd58ff4ecc4e4aac71f1340e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/bin/bash

source err
set -x

x="$(readlink -f -- "$BASH_SOURCE")"; cd ${x%/*} # directory of this file


rm -rf distfiles output *.log *.h5 metadata.ftp-master.debian.org
mkdir distfiles

wget -O - http://ftp.debian.org/debian/dists/stable/main/source/Sources.gz | gunzip >> distfiles/Sources
#wget -O - http://ftp.debian.org/debian/dists/stable-updates/main/source/Sources.gz | gunzip >> distfiles/Sources
#wget -O - http://security.debian.org/debian-security/dists/stable/updates/main/source/Sources.gz | gunzip >> distfiles/Sources

PACKAGES=$(grep ^Package: distfiles/Sources |sort -u| sed 's/Package: //')
pv distfiles/Sources | python load_sources.py

wget -O - http://ftp.debian.org/debian/dists/stable/main/binary-all/Packages.gz | gunzip >> distfiles/Packages
wget -O - http://ftp.debian.org/debian/dists/stable/main/binary-amd64/Packages.gz | gunzip >> distfiles/Packages
#wget -O - http://ftp.debian.org/debian/dists/stable-updates/main/binary-amd64/Packages.gz | gunzip >> distfiles/Packages
#wget -O - http://security.debian.org/debian-security/dists/stable/updates/main/binary-all/Packages.gz | gunzip >> distfiles/Packages

pv distfiles/Packages | python load_packages.py

wget -O - http://ftp.debian.org/debian/dists/stable/main/i18n/Translation-en.bz2 | bunzip2 >> distfiles/Translation-en
#wget -O - http://ftp.debian.org/debian/dists/stable-updates/main/i18n/Translation-en.bz2 | bunzip2 >> distfiles/Translation-en
#wget -O - http://security.debian.org/debian-security/dists/stable/updates/main/i18n/Translation-en.bz2 | bunzip2 >> distfiles/Translation-en

pv distfiles/Translation-en | python load_descriptions.py


rm downloadlist* -f
for PACKAGE in $PACKAGES; do
    LETTER=$(echo $PACKAGE |cut -c1)
    [ 1$(echo $PACKAGE |cut -c-3) = 1'lib' ] && LETTER=$(echo $PACKAGE |cut -c-4)
    echo http://metadata.ftp-master.debian.org/changelogs/main/$LETTER/$PACKAGE/stable_copyright >> downloadlist
    echo http://metadata.ftp-master.debian.org/changelogs/main/$LETTER/$PACKAGE/stable_changelog >> downloadlist
done


# this may return 8, because debian has removed a package from the time we made downloadlist, to the time we actually did the wget on a url in it
# so, we do a check to see if we downloaded at least the number of files - 10
for (( i=0; i<10; i++ )); do
    
    if wget -nc -nv -x --continue -i downloadlist; then
        
        break
        
    fi
    
done

# Emailed 490848@bugs.debian.org 2018-07-06 with subject: 30 missing stable_copyright and stable_changelog files -- see https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=490848#51
if [[ $(find metadata.ftp-master.debian.org -type f |wc -l) -lt $(( $(wc -l <downloadlist) - 100)) ]]; then
    
    echo "$0: error: failed to download enough files";
    exit 1;
    
fi


for PACKAGE in $PACKAGES; do
    LETTER=$(echo $PACKAGE |cut -c1)
    [ 1$(echo $PACKAGE |cut -c-3) = 1'lib' ] && LETTER=$(echo $PACKAGE |cut -c-4)
    [ -f metadata.ftp-master.debian.org/changelogs/main/$LETTER/$PACKAGE/stable_copyright ] || echo http://metadata.ftp-master.debian.org/changelogs/main/$LETTER/$PACKAGE/stable_$PACKAGE.copyright >> downloadlist404
done

wget -v -x --continue -i downloadlist404


python load_copyright.py &> cp_import.log
python load_changelog.py &> cl_import.log

python export.py
python export_json.py

echo empty files: > broken
find output -type f -empty >> broken
find output -type f -empty -delete

echo no license: >> broken
grep "Project license" output/* -c |grep :0|sed 's/:0//' >> broken
grep "Project license" output/* -c |grep :0|sed 's/:0//'|xargs rm