aboutsummaryrefslogtreecommitdiff
path: root/src/FSD/Download.hs
blob: 3631ddc257de027dd9ba9b87955dfdab3215c7ea (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
{-
Copyright (C) 2022 Yuchen Pei.

This file is part of fsd.

fsd is free software: you can redistribute it and/or modify it under
the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

fsd is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General
Public License for more details.

You should have received a copy of the GNU Affero General Public
License along with fsd.  If not, see <https://www.gnu.org/licenses/>.

-}

{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}

module FSD.Download where

import System.FilePath
import Control.Monad
import Data.Function
import Data.List
import Data.Text (Text)
import Data.Text qualified as T
import System.Exit
import System.Process
import System.Random

wgetCommand = "wget"

wgetIndexFileFlags = "-O- 2>/dev/null"

gunzipCommand = "gunzip"

bunzip2Command = "bunzip2"

wgetMetadataFlags = "-N -nv -x -c"

sourcesUrl = "https://ftp.debian.org/debian/dists/stable/main/source/Sources.gz"

packagesUrl = "https://ftp.debian.org/debian/dists/stable/main/binary-amd64/Packages.gz"

translationUrl = "https://ftp.debian.org/debian/dists/stable/main/i18n/Translation-en.bz2"

data MetadataType = MTChangelog | MTCopyright deriving Eq

metadataUrlFile :: MetadataType -> Int -> String
metadataUrlFile mtype i = toFilename mtype ++ "_urls_" ++ show i
  where toFilename MTChangelog = "changelog"
        toFilename MTCopyright = "copyright"

wgetIndexFileCommand :: FilePath -> (String, String) -> String
wgetIndexFileCommand outputDir (url, filename) =
  wgetCommand ++ " " ++ wgetIndexFileFlags ++ " " ++ url ++ "|" ++ extractCommand ++ " >" ++ (outputDir </> filename) ++ " && echo Done: " ++ filename
  where
    extractCommand = if isSuffixOf ".gz" url then gunzipCommand else bunzip2Command

downloadSources :: FilePath -> IO ExitCode
downloadSources outputDir =
  waitForProcess =<<
  (runCommand $ wgetIndexFileCommand outputDir (sourcesUrl, "Sources"))

downloadPackages :: FilePath -> IO ExitCode
downloadPackages outputDir =
  waitForProcess =<<
  (runCommand $ wgetIndexFileCommand outputDir (packagesUrl, "Packages"))

downloadTranslation :: FilePath -> IO ExitCode
downloadTranslation outputDir =
  waitForProcess =<<
  (runCommand $
   wgetIndexFileCommand outputDir (translationUrl, "Translation-en"))

downloadMetadataFiles :: FilePath -> MetadataType -> Int -> IO [ProcessHandle]
downloadMetadataFiles root mtype nWorkers =
  sequence $
    ( \i ->
        runCommand
          ( wgetCommand ++ " " ++ wgetMetadataFlags
              ++ " -i " ++ (root </> metadataUrlFile mtype i)
              ++ " -P " ++ root
              ++ " && echo Done: worker " ++ show i
          )
    )
      <$> [1 .. nWorkers]

writeWgetListFiles ::  FilePath -> MetadataType -> [Text] -> Int -> IO ()
writeWgetListFiles outputDir mtype packages n = do
  pairs <- zip [1 ..] . fmap T.unlines <$> mapM shuffle (replicate n urls)
  mapM_ (\(i, list) ->
           writeFile (outputDir </> metadataUrlFile mtype i)
           (T.unpack list)) pairs
  where
    urls = metadataUrl mtype <$> packages

shuffle :: [Text] -> IO [Text]
shuffle xs =
  fmap fst . sortBy (compare `on` snd) . zip xs
    <$> replicateM n (randomRIO (0, n * n))
  where
    n = length xs

metadataUrl :: MetadataType -> Text -> Text
metadataUrl mtype pkgName =
 "https://metadata.ftp-master.debian.org/changelogs/main/" <>
  (if T.isPrefixOf "lib" pkgName
     then T.take 4 pkgName else T.take 1 pkgName) <>
  "/" <> pkgName <> "/" <> toFilename mtype
  where
    toFilename MTChangelog = "stable_changelog"
    toFilename MTCopyright = "stable_copyright"