aboutsummaryrefslogtreecommitdiff
path: root/src/FSD/Download.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/FSD/Download.hs')
-rw-r--r--src/FSD/Download.hs118
1 files changed, 118 insertions, 0 deletions
diff --git a/src/FSD/Download.hs b/src/FSD/Download.hs
new file mode 100644
index 0000000..3631ddc
--- /dev/null
+++ b/src/FSD/Download.hs
@@ -0,0 +1,118 @@
+{-
+Copyright (C) 2022 Yuchen Pei.
+
+This file is part of fsd.
+
+fsd is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+fsd is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
+Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with fsd. If not, see <https://www.gnu.org/licenses/>.
+
+-}
+
+{-# LANGUAGE ImportQualifiedPost #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module FSD.Download where
+
+import System.FilePath
+import Control.Monad
+import Data.Function
+import Data.List
+import Data.Text (Text)
+import Data.Text qualified as T
+import System.Exit
+import System.Process
+import System.Random
+
+wgetCommand = "wget"
+
+wgetIndexFileFlags = "-O- 2>/dev/null"
+
+gunzipCommand = "gunzip"
+
+bunzip2Command = "bunzip2"
+
+wgetMetadataFlags = "-N -nv -x -c"
+
+sourcesUrl = "https://ftp.debian.org/debian/dists/stable/main/source/Sources.gz"
+
+packagesUrl = "https://ftp.debian.org/debian/dists/stable/main/binary-amd64/Packages.gz"
+
+translationUrl = "https://ftp.debian.org/debian/dists/stable/main/i18n/Translation-en.bz2"
+
+data MetadataType = MTChangelog | MTCopyright deriving Eq
+
+metadataUrlFile :: MetadataType -> Int -> String
+metadataUrlFile mtype i = toFilename mtype ++ "_urls_" ++ show i
+ where toFilename MTChangelog = "changelog"
+ toFilename MTCopyright = "copyright"
+
+wgetIndexFileCommand :: FilePath -> (String, String) -> String
+wgetIndexFileCommand outputDir (url, filename) =
+ wgetCommand ++ " " ++ wgetIndexFileFlags ++ " " ++ url ++ "|" ++ extractCommand ++ " >" ++ (outputDir </> filename) ++ " && echo Done: " ++ filename
+ where
+ extractCommand = if isSuffixOf ".gz" url then gunzipCommand else bunzip2Command
+
+downloadSources :: FilePath -> IO ExitCode
+downloadSources outputDir =
+ waitForProcess =<<
+ (runCommand $ wgetIndexFileCommand outputDir (sourcesUrl, "Sources"))
+
+downloadPackages :: FilePath -> IO ExitCode
+downloadPackages outputDir =
+ waitForProcess =<<
+ (runCommand $ wgetIndexFileCommand outputDir (packagesUrl, "Packages"))
+
+downloadTranslation :: FilePath -> IO ExitCode
+downloadTranslation outputDir =
+ waitForProcess =<<
+ (runCommand $
+ wgetIndexFileCommand outputDir (translationUrl, "Translation-en"))
+
+downloadMetadataFiles :: FilePath -> MetadataType -> Int -> IO [ProcessHandle]
+downloadMetadataFiles root mtype nWorkers =
+ sequence $
+ ( \i ->
+ runCommand
+ ( wgetCommand ++ " " ++ wgetMetadataFlags
+ ++ " -i " ++ (root </> metadataUrlFile mtype i)
+ ++ " -P " ++ root
+ ++ " && echo Done: worker " ++ show i
+ )
+ )
+ <$> [1 .. nWorkers]
+
+writeWgetListFiles :: FilePath -> MetadataType -> [Text] -> Int -> IO ()
+writeWgetListFiles outputDir mtype packages n = do
+ pairs <- zip [1 ..] . fmap T.unlines <$> mapM shuffle (replicate n urls)
+ mapM_ (\(i, list) ->
+ writeFile (outputDir </> metadataUrlFile mtype i)
+ (T.unpack list)) pairs
+ where
+ urls = metadataUrl mtype <$> packages
+
+shuffle :: [Text] -> IO [Text]
+shuffle xs =
+ fmap fst . sortBy (compare `on` snd) . zip xs
+ <$> replicateM n (randomRIO (0, n * n))
+ where
+ n = length xs
+
+metadataUrl :: MetadataType -> Text -> Text
+metadataUrl mtype pkgName =
+ "https://metadata.ftp-master.debian.org/changelogs/main/" <>
+ (if T.isPrefixOf "lib" pkgName
+ then T.take 4 pkgName else T.take 1 pkgName) <>
+ "/" <> pkgName <> "/" <> toFilename mtype
+ where
+ toFilename MTChangelog = "stable_changelog"
+ toFilename MTCopyright = "stable_copyright"