{- Copyright (C) 2022 Yuchen Pei. This file is part of fsd. fsd is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. fsd is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with fsd. If not, see . -} {-# LANGUAGE ImportQualifiedPost #-} {-# LANGUAGE OverloadedStrings #-} module FSD.Download where import System.FilePath import Control.Monad import Data.Function import Data.List import Data.Text (Text) import Data.Text qualified as T import System.Exit import System.Process import System.Random wgetCommand = "wget" wgetIndexFileFlags = "-O- 2>/dev/null" gunzipCommand = "gunzip" bunzip2Command = "bunzip2" wgetMetadataFlags = "-N -nv -x -c" sourcesUrl = "https://ftp.debian.org/debian/dists/stable/main/source/Sources.gz" packagesUrl = "https://ftp.debian.org/debian/dists/stable/main/binary-amd64/Packages.gz" translationUrl = "https://ftp.debian.org/debian/dists/stable/main/i18n/Translation-en.bz2" data MetadataType = MTChangelog | MTCopyright deriving Eq metadataUrlFile :: MetadataType -> Int -> String metadataUrlFile mtype i = toFilename mtype ++ "_urls_" ++ show i where toFilename MTChangelog = "changelog" toFilename MTCopyright = "copyright" wgetIndexFileCommand :: FilePath -> (String, String) -> String wgetIndexFileCommand outputDir (url, filename) = wgetCommand ++ " " ++ wgetIndexFileFlags ++ " " ++ url ++ "|" ++ extractCommand ++ " >" ++ (outputDir filename) ++ " && echo Done: " ++ filename where extractCommand = if isSuffixOf ".gz" url then gunzipCommand else bunzip2Command downloadSources :: FilePath -> IO ExitCode downloadSources outputDir = waitForProcess =<< (runCommand $ wgetIndexFileCommand outputDir (sourcesUrl, "Sources")) downloadPackages :: FilePath -> IO ExitCode downloadPackages outputDir = waitForProcess =<< (runCommand $ wgetIndexFileCommand outputDir (packagesUrl, "Packages")) downloadTranslation :: FilePath -> IO ExitCode downloadTranslation outputDir = waitForProcess =<< (runCommand $ wgetIndexFileCommand outputDir (translationUrl, "Translation-en")) downloadMetadataFiles :: FilePath -> MetadataType -> Int -> IO [ProcessHandle] downloadMetadataFiles root mtype nWorkers = sequence $ ( \i -> runCommand ( wgetCommand ++ " " ++ wgetMetadataFlags ++ " -i " ++ (root metadataUrlFile mtype i) ++ " -P " ++ root ++ " && echo Done: worker " ++ show i ) ) <$> [1 .. nWorkers] writeWgetListFiles :: FilePath -> MetadataType -> [Text] -> Int -> IO () writeWgetListFiles outputDir mtype packages n = do pairs <- zip [1 ..] . fmap T.unlines <$> mapM shuffle (replicate n urls) mapM_ (\(i, list) -> writeFile (outputDir metadataUrlFile mtype i) (T.unpack list)) pairs where urls = metadataUrl mtype <$> packages shuffle :: [Text] -> IO [Text] shuffle xs = fmap fst . sortBy (compare `on` snd) . zip xs <$> replicateM n (randomRIO (0, n * n)) where n = length xs metadataUrl :: MetadataType -> Text -> Text metadataUrl mtype pkgName = "https://metadata.ftp-master.debian.org/changelogs/main/" <> (if T.isPrefixOf "lib" pkgName then T.take 4 pkgName else T.take 1 pkgName) <> "/" <> pkgName <> "/" <> toFilename mtype where toFilename MTChangelog = "stable_changelog" toFilename MTCopyright = "stable_copyright"