diff options
author | Yuchen Pei <hi@ypei.me> | 2022-05-19 22:23:10 +1000 |
---|---|---|
committer | Yuchen Pei <hi@ypei.me> | 2022-05-19 22:23:10 +1000 |
commit | 3db93bc6f7b46bc322694e6658b8f559433a03c6 (patch) | |
tree | 1273a17e11e0d9888ae57676906c68f8bb77e287 /src/FSD | |
parent | 09c05e86a1096b08eb7483b970207d47d0388665 (diff) |
Replacing the files with a haskell rewrite.
Diffstat (limited to 'src/FSD')
-rw-r--r-- | src/FSD/ChangeLog.hs | 60 | ||||
-rw-r--r-- | src/FSD/Control.hs | 84 | ||||
-rw-r--r-- | src/FSD/Copyright.hs | 106 | ||||
-rw-r--r-- | src/FSD/Db.hs | 211 | ||||
-rw-r--r-- | src/FSD/Download.hs | 118 | ||||
-rw-r--r-- | src/FSD/Package.hs | 61 | ||||
-rw-r--r-- | src/FSD/PackageInfo.hs | 70 | ||||
-rw-r--r-- | src/FSD/Source.hs | 45 | ||||
-rw-r--r-- | src/FSD/Translation.hs | 41 | ||||
-rw-r--r-- | src/FSD/Types.hs | 248 | ||||
-rw-r--r-- | src/FSD/Wiki.hs | 156 |
11 files changed, 1200 insertions, 0 deletions
diff --git a/src/FSD/ChangeLog.hs b/src/FSD/ChangeLog.hs new file mode 100644 index 0000000..195b713 --- /dev/null +++ b/src/FSD/ChangeLog.hs @@ -0,0 +1,60 @@ +{- +Copyright (C) 2022 Yuchen Pei. + +This file is part of fsd. + +fsd is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +fsd is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General +Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with fsd. If not, see <https://www.gnu.org/licenses/>. + +-} + +{-# LANGUAGE ImportQualifiedPost #-} +module FSD.ChangeLog where + +import Data.Generics +import Data.Time +import Data.Text qualified as T +import Debian.Changes +import Debian.Version +import Text.Regex.TDFA +import FSD.Types + +readChangeLog :: String -> Maybe ChangeLogEntry +readChangeLog raw = case parseEntry raw of + Left _ -> Nothing + Right (entry, _) -> Just entry + +-- Debian version is an abstract type with no constructor, hence the ugly hack. +getVersion :: DebianVersion -> Maybe String +getVersion v = + let match = show v =~ ("\".*\"" :: String) + in if null match then Nothing else Just $ init $ tail match + +convertChangeLogEntry :: ChangeLogEntry -> Maybe FSDChangeLogEntry +convertChangeLogEntry entry = do + version <- getVersion $ logVersion entry + time <- parseTime' $ logDate entry + return $ + FSDChangeLogEntry + (T.pack $ logPackage entry) + (T.pack version) + (T.pack $ logWho entry) + time + where + parseTime' raw = + orElse + (parseTimeM False defaultTimeLocale "%a, %d %b %Y %T %Z" raw) + (parseTimeM False defaultTimeLocale "%a, %e %b %Y %T %Z" raw) + +getChangeLogEntry :: String -> Maybe FSDChangeLogEntry +getChangeLogEntry raw = readChangeLog raw >>= convertChangeLogEntry diff --git a/src/FSD/Control.hs b/src/FSD/Control.hs new file mode 100644 index 0000000..2395f85 --- /dev/null +++ b/src/FSD/Control.hs @@ -0,0 +1,84 @@ +{- +Copyright (C) 2022 Yuchen Pei. + +This file is part of fsd. + +fsd is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +fsd is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General +Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with fsd. If not, see <https://www.gnu.org/licenses/>. + +-} + +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} + +module FSD.Control where + +import Data.Either +import Data.List.Extra +import Data.Text (Text) +import Data.Text qualified as T +import Debian.Control +import Debug.Trace + +-- import Text.Regex.TDFA + +-- parse sources, packages, translations and copyright +-- https://manpages.debian.org/bullseye/dpkg-dev/deb822.5.en.html +-- https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ + +readControl :: String -> Control +readControl = + fromRight (Control []) . parseControl "" + +-- stripControl . fromRight (Control []) . parseControl "" + +simpleFV :: String -> Text +simpleFV = T.pack + +-- multiple lines with nonsignficant whitespaces +foldedFV :: String -> Text +foldedFV = undefined + +-- multilines +-- may erroneously remove first char of first line (we'll see) +multilineFV :: String -> Text +multilineFV raw = + let (first, rest) = fSynFV raw + in T.strip $ T.concat [first, "\n", rest] + +-- whitespace separated list +wsListFV :: String -> [Text] +wsListFV = T.splitOn " " . foldedFV + +-- line-based lists +lbListFV :: String -> [Text] +lbListFV = fmap T.strip . T.splitOn "\n" . T.pack + +-- formatted text with synopsis +fSynFV :: String -> (Text, Text) +fSynFV raw = + let (synopsis, longRaw) = breakOn "\n" $ raw + paras = + if null longRaw + then [] + else T.splitOn "\n .\n" $ T.pack $ tail longRaw + full = + T.intercalate "\n\n" $ + (T.intercalate "\n" . fmap T.tail . T.lines) <$> paras + in (T.pack synopsis, full) + +firstFieldName :: Paragraph -> Maybe String +firstFieldName (Paragraph []) = Nothing +firstFieldName (Paragraph (Comment _ : xs)) = + firstFieldName (Paragraph xs) +firstFieldName (Paragraph (Field (name, _) : _)) = Just name diff --git a/src/FSD/Copyright.hs b/src/FSD/Copyright.hs new file mode 100644 index 0000000..68b7b50 --- /dev/null +++ b/src/FSD/Copyright.hs @@ -0,0 +1,106 @@ +{- +Copyright (C) 2022 Yuchen Pei. + +This file is part of fsd. + +fsd is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +fsd is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General +Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with fsd. If not, see <https://www.gnu.org/licenses/>. + +-} + +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} + +module FSD.Copyright where + +import FSD.Control +import Data.Map hiding (filter) +import Data.Maybe +import Data.Text (Text) +import Data.Text qualified as T +import Debian.Control +import Text.Regex.TDFA +import FSD.Types + +parseCopyright :: Text -> Control -> Maybe (Upstream, Copyright) +parseCopyright package control = + case unControl control of + [] -> Nothing + header : rest -> + -- no parse if header is wrong + if fromMaybe "" (firstFieldName header) /= "Format" + then Nothing + else + Just + ( getUpstream package header, + Copyright package (getLicenses (header : files) licenses) + ) + where + files = filter (\para -> + fromMaybe "" (firstFieldName para) == "Files") rest + licenses = filter + (\para -> + fromMaybe "" (firstFieldName para) == "License") rest + +getUpstream :: Text -> Paragraph -> Upstream +getUpstream package para = + Upstream package uName contacts (SourceUrls source) + where + uName = T.pack <$> fieldValue "Upstream-Name" para + contacts = case fieldValue "Upstream-Contact" para of + Just contact -> parseContacts $ lbListFV contact + Nothing -> [] + source = case fieldValue "Source" para of + Just source -> parseSource $ multilineFV source + Nothing -> [] + +parseContacts :: [Text] -> [Contact] +parseContacts = catMaybes . fmap parseContact + +parseContact :: Text -> Maybe Contact +parseContact raw = + case T.unpack raw =~ ("^(.*)<(.*@.*)>$" :: String) :: [[String]] of + [[_, "", email]] -> Just $ Contact Nothing (T.pack email) + [[_, name, email]] -> + Just $ + Contact + (Just $ T.strip $ T.pack name) + (T.pack email) + _ -> Nothing + +parseSource :: Text -> [Text] +parseSource = fmap T.strip . T.splitOn "\n" + +getLicenses :: [Paragraph] -> [Paragraph] -> [LicenseInfo] +getLicenses paras licenses = + catMaybes $ getLicense (getLicenseMap licenses) <$> paras + +getLicense :: Map Text Text -> Paragraph -> Maybe LicenseInfo +getLicense licMap para = case fSynFV <$> fieldValue "License" para of + Just ("", _) -> Nothing + Nothing -> Nothing + Just (name, desc) -> + Just $ + LicenseInfo + (multilineFV <$> fieldValue "Copyright" para) + name + (if T.null desc then Data.Map.lookup name licMap else Just desc) + +getLicenseMap :: [Paragraph] -> Map Text Text +getLicenseMap licenses = fromList $ catMaybes $ toPair <$> licenses + where + toPair license = case fSynFV <$> fieldValue "License" license of + Just ("", _) -> Nothing + Just (_, "") -> Nothing + Just (name, desc) -> Just (name, desc) + Nothing -> Nothing diff --git a/src/FSD/Db.hs b/src/FSD/Db.hs new file mode 100644 index 0000000..38a822c --- /dev/null +++ b/src/FSD/Db.hs @@ -0,0 +1,211 @@ +{- +Copyright (C) 2022 Yuchen Pei. + +This file is part of fsd. + +fsd is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +fsd is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General +Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with fsd. If not, see <https://www.gnu.org/licenses/>. + +-} + +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeOperators #-} + +module FSD.Db where + +import Control.Monad +import FSD.Copyright +import Data.Text (Text) +import Data.Text qualified as T +import Database.SQLite.Simple +import FSD.Types + +newtype FsDb = FsDb {getConn :: Connection} + +withFsDb :: FilePath -> (FsDb -> IO a) -> IO a +withFsDb path f = withConnection path (f . FsDb) + +-- creations + +initConn :: FsDb -> IO () +initConn (FsDb conn) = do + execute_ + conn + "CREATE TABLE IF NOT EXISTS sources \ + \( package TEXT NOT NULL PRIMARY KEY ON CONFLICT REPLACE \ + \, version TEXT NOT NULL \ + \, file TEXT NOT NULL \ + \, homepage TEXT \ + \)" + execute_ + conn + "CREATE TABLE IF NOT EXISTS packages \ + \( package TEXT NOT NULL PRIMARY KEY ON CONFLICT REPLACE \ + \, version TEXT NOT NULL \ + \, homepage TEXT \ + \, tags TEXT NOT NULL \ + \)" + execute_ + conn + "CREATE TABLE IF NOT EXISTS translation \ + \( package TEXT NOT NULL PRIMARY KEY ON CONFLICT REPLACE \ + \, shortDesc TEXT NOT NULL \ + \, fullDesc TEXT NOT NULL \ + \)" + execute_ + conn + "CREATE TABLE IF NOT EXISTS changelog \ + \( package TEXT NOT NULL PRIMARY KEY ON CONFLICT REPLACE \ + \, version TEXT NOT NULL \ + \, author TEXT NOT NULL \ + \, date TEXT NOT NULL \ + \)" + execute_ + conn + "CREATE TABLE IF NOT EXISTS copyright \ + \( id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT \ + \, package TEXT NOT NULL \ + \, entryId INTEGER NOT NULL \ + \, copyright TEXT \ + \, license TEXT NOT NULL \ + \, note TEXT \ + \, CONSTRAINT uniqname UNIQUE (package, entryId) ON CONFLICT REPLACE \ + \)" + execute_ + conn + "CREATE TABLE IF NOT EXISTS upstream \ + \( package TEXT NOT NULL PRIMARY KEY ON CONFLICT REPLACE \ + \, name TEXT \ + \, contacts TEXT \ + \, sources TEXT \ + \)" + +-- insertions + +insertSource :: FsDb -> Source -> IO () +insertSource (FsDb conn) source = + execute conn "INSERT INTO sources VALUES (?,?,?,?)" source + +insertPackage :: FsDb -> Package -> IO () +insertPackage (FsDb conn) package = + execute conn "INSERT INTO packages VALUES (?,?,?,?)" package + +insertTranslation :: FsDb -> Translation -> IO () +insertTranslation (FsDb conn) translation = + execute conn "INSERT INTO translation VALUES (?,?,?)" translation + +insertFSDChangeLogEntry :: FsDb -> FSDChangeLogEntry -> IO () +insertFSDChangeLogEntry (FsDb conn) change = + execute conn "INSERT INTO changelog VALUES (?,?,?,?)" change + +insertCopyright :: FsDb -> Copyright -> IO () +insertCopyright db (Copyright package licenses) = + zipWithM_ (insertLicense db package) [0 ..] licenses + +insertUpstream :: FsDb -> Upstream -> IO () +insertUpstream (FsDb conn) upstream = + execute conn "INSERT INTO upstream VALUES (?,?,?,?)" upstream + +insertLicense :: FsDb -> Text -> Int -> LicenseInfo -> IO () +insertLicense (FsDb conn) package idx (LicenseInfo copyright license note) = + execute + conn + "INSERT INTO copyright (package,entryId,copyright,license,note) \ + \VALUES (?,?,?,?,?)" + (package, idx, copyright, license, note) + +-- queries +getDbAll :: FsDb -> IO [Source :. Package :. Translation :. FSDChangeLogEntry :. Upstream] +getDbAll (FsDb conn) = + query_ + conn + "SELECT sources.*,packages.*,translation.*,changelog.*,upstream.* \ + \FROM sources JOIN packages USING (package,version)\ + \ JOIN changelog USING (package,version)\ + \ JOIN translation USING (package)\ + \ JOIN upstream USING (package)\ + \" + +getDbPkgNames :: FsDb -> IO [Text] +getDbPkgNames (FsDb conn) = do + rows <- query_ conn + "SELECT DISTINCT package\ + \ FROM sources JOIN packages USING (package,version)\ + \ JOIN changelog USING (package,version)\ + \ JOIN translation USING (package)\ + \ JOIN upstream USING (package)\ + \ JOIN copyright USING (package)\ + \" + return $ fromOnly <$> rows + +getDbAllTypes :: Text -> FsDb -> IO (Maybe (Source :. Package :. Translation :. FSDChangeLogEntry :. Upstream)) +getDbAllTypes pkgName (FsDb conn) = do + results <- query + conn + "SELECT sources.*,packages.*,translation.*,changelog.*,upstream.* \ + \FROM sources JOIN packages USING (package,version)\ + \ JOIN changelog USING (package,version)\ + \ JOIN translation USING (package)\ + \ JOIN upstream USING (package) \ + \WHERE package = ?\ + \" (Only pkgName) + return $ case results of + [source] -> Just source + otherwise -> Nothing + +getDbSource :: Text -> FsDb -> IO (Maybe Source) +getDbSource pkgName (FsDb conn) = do + results <- query conn "SELECT * FROM sources WHERE package = ?" (Only pkgName) + return $ case results of + [source] -> Just source + otherwise -> Nothing + +getDbPackage :: Text -> FsDb -> IO (Maybe Package) +getDbPackage pkgName (FsDb conn) = do + results <- query conn "SELECT * FROM packages WHERE package = ?" (Only pkgName) + return $ case results of + [package] -> Just package + otherwise -> Nothing + +getDbTranslation :: Text -> FsDb -> IO (Maybe Translation) +getDbTranslation pkgName (FsDb conn) = do + results <- query conn "SELECT * FROM translation WHERE package = ?" (Only pkgName) + return $ case results of + [translation] -> Just translation + otherwise -> Nothing + +getDbFSDChangeLogEntry :: Text -> FsDb -> IO (Maybe FSDChangeLogEntry) +getDbFSDChangeLogEntry pkgName (FsDb conn) = do + results <- query conn "SELECT * FROM changelog WHERE package = ?" (Only pkgName) + return $ case results of + [entry] -> Just entry + otherwise -> Nothing + +getDbUpstream :: Text -> FsDb -> IO (Maybe Upstream) +getDbUpstream pkgName (FsDb conn) = do + results <- query conn "SELECT * FROM upstream WHERE package = ?" (Only pkgName) + return $ case results of + [upstream] -> Just upstream + otherwise -> Nothing + +getDbCopyright :: Text -> FsDb -> IO (Maybe Copyright) +getDbCopyright pkgName (FsDb conn) = do + results <- + query + conn + "SELECT copyright,license,note FROM copyright WHERE package = ?" + (Only pkgName) :: + IO [LicenseInfo] + return $ + if (null results) then Nothing else Just $ Copyright pkgName results diff --git a/src/FSD/Download.hs b/src/FSD/Download.hs new file mode 100644 index 0000000..3631ddc --- /dev/null +++ b/src/FSD/Download.hs @@ -0,0 +1,118 @@ +{- +Copyright (C) 2022 Yuchen Pei. + +This file is part of fsd. + +fsd is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +fsd is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General +Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with fsd. If not, see <https://www.gnu.org/licenses/>. + +-} + +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} + +module FSD.Download where + +import System.FilePath +import Control.Monad +import Data.Function +import Data.List +import Data.Text (Text) +import Data.Text qualified as T +import System.Exit +import System.Process +import System.Random + +wgetCommand = "wget" + +wgetIndexFileFlags = "-O- 2>/dev/null" + +gunzipCommand = "gunzip" + +bunzip2Command = "bunzip2" + +wgetMetadataFlags = "-N -nv -x -c" + +sourcesUrl = "https://ftp.debian.org/debian/dists/stable/main/source/Sources.gz" + +packagesUrl = "https://ftp.debian.org/debian/dists/stable/main/binary-amd64/Packages.gz" + +translationUrl = "https://ftp.debian.org/debian/dists/stable/main/i18n/Translation-en.bz2" + +data MetadataType = MTChangelog | MTCopyright deriving Eq + +metadataUrlFile :: MetadataType -> Int -> String +metadataUrlFile mtype i = toFilename mtype ++ "_urls_" ++ show i + where toFilename MTChangelog = "changelog" + toFilename MTCopyright = "copyright" + +wgetIndexFileCommand :: FilePath -> (String, String) -> String +wgetIndexFileCommand outputDir (url, filename) = + wgetCommand ++ " " ++ wgetIndexFileFlags ++ " " ++ url ++ "|" ++ extractCommand ++ " >" ++ (outputDir </> filename) ++ " && echo Done: " ++ filename + where + extractCommand = if isSuffixOf ".gz" url then gunzipCommand else bunzip2Command + +downloadSources :: FilePath -> IO ExitCode +downloadSources outputDir = + waitForProcess =<< + (runCommand $ wgetIndexFileCommand outputDir (sourcesUrl, "Sources")) + +downloadPackages :: FilePath -> IO ExitCode +downloadPackages outputDir = + waitForProcess =<< + (runCommand $ wgetIndexFileCommand outputDir (packagesUrl, "Packages")) + +downloadTranslation :: FilePath -> IO ExitCode +downloadTranslation outputDir = + waitForProcess =<< + (runCommand $ + wgetIndexFileCommand outputDir (translationUrl, "Translation-en")) + +downloadMetadataFiles :: FilePath -> MetadataType -> Int -> IO [ProcessHandle] +downloadMetadataFiles root mtype nWorkers = + sequence $ + ( \i -> + runCommand + ( wgetCommand ++ " " ++ wgetMetadataFlags + ++ " -i " ++ (root </> metadataUrlFile mtype i) + ++ " -P " ++ root + ++ " && echo Done: worker " ++ show i + ) + ) + <$> [1 .. nWorkers] + +writeWgetListFiles :: FilePath -> MetadataType -> [Text] -> Int -> IO () +writeWgetListFiles outputDir mtype packages n = do + pairs <- zip [1 ..] . fmap T.unlines <$> mapM shuffle (replicate n urls) + mapM_ (\(i, list) -> + writeFile (outputDir </> metadataUrlFile mtype i) + (T.unpack list)) pairs + where + urls = metadataUrl mtype <$> packages + +shuffle :: [Text] -> IO [Text] +shuffle xs = + fmap fst . sortBy (compare `on` snd) . zip xs + <$> replicateM n (randomRIO (0, n * n)) + where + n = length xs + +metadataUrl :: MetadataType -> Text -> Text +metadataUrl mtype pkgName = + "https://metadata.ftp-master.debian.org/changelogs/main/" <> + (if T.isPrefixOf "lib" pkgName + then T.take 4 pkgName else T.take 1 pkgName) <> + "/" <> pkgName <> "/" <> toFilename mtype + where + toFilename MTChangelog = "stable_changelog" + toFilename MTCopyright = "stable_copyright" diff --git a/src/FSD/Package.hs b/src/FSD/Package.hs new file mode 100644 index 0000000..8ccd852 --- /dev/null +++ b/src/FSD/Package.hs @@ -0,0 +1,61 @@ +{- +Copyright (C) 2022 Yuchen Pei. + +This file is part of fsd. + +fsd is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +fsd is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General +Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with fsd. If not, see <https://www.gnu.org/licenses/>. + +-} + +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} + +module FSD.Package where + +import Data.Generics +import Data.Maybe +import Data.Text (Text) +import Data.Text qualified as T +import Debian.Control +import Text.Regex.TDFA +import FSD.Types + +getTags :: Paragraph -> [Text] +getTags para = case fieldValue "Tag" para of + Nothing -> [] + Just tags -> + (T.splitOn ", " . T.pack . concat . lines) tags + +{-package version should be from Source field overriding Version field: +Package: pandoc +Source: pandoc (2.9.2.1-1) +Version: 2.9.2.1-1+b1 +-} +getPackage :: Paragraph -> Maybe Package +getPackage para = do + package <- T.pack <$> fieldValue "Package" para + let homepage = T.pack <$> fieldValue "Homepage" para + let tags = getTags para + let srcVersion = extractSrcVersion =<< fieldValue "Source" para + version <- T.pack <$> orElse srcVersion (fieldValue "Version" para) + return $ Package package version homepage (Tags tags) + where + extractSrcVersion :: String -> Maybe String + extractSrcVersion source = + case source =~ (".*\\((.*)\\)$" :: String) :: [[String]] of + [[_, version]] -> Just version + _ -> Nothing + +getPackages :: Control -> [Package] +getPackages = catMaybes . fmap getPackage . unControl diff --git a/src/FSD/PackageInfo.hs b/src/FSD/PackageInfo.hs new file mode 100644 index 0000000..959c225 --- /dev/null +++ b/src/FSD/PackageInfo.hs @@ -0,0 +1,70 @@ +{- +Copyright (C) 2022 Yuchen Pei. + +This file is part of fsd. + +fsd is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +fsd is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General +Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with fsd. If not, see <https://www.gnu.org/licenses/>. + +-} + +{-# LANGUAGE ImportQualifiedPost #-} +module FSD.PackageInfo where + +import Data.Generics +import Data.Maybe +import Data.Text qualified as T +import Data.Text (Text) +import Data.Char +import Data.Time +import FSD.Types + +makePackageInfo :: + Source -> + Package -> + Translation -> + FSDChangeLogEntry -> + Upstream -> + Copyright -> + IO (Maybe PackageInfo) +makePackageInfo source package trans change upstream copyright = + if or + [ sPackage source /= pPackage package, + sPackage source /= tPackage trans, + sPackage source /= clPackage change, + sPackage source /= uPackage upstream, + sPackage source /= crPackage copyright, + sVersion source /= pVersion package, + sVersion source /= clVersion change + ] + then return Nothing + else do + time <- getCurrentTime + return $ + Just + PackageInfo + { piPackage = sPackage source, + piVersion = sVersion source, + piTimestamp = time, + piFile = sFile source, + piHomepage = orElse (sHomepage source) (pHomepage package), + piTags = unTags $ pTags package, + piShortDesc = tShortDesc trans, + piFullDesc = tFullDesc trans, + piUpstreamName = uName upstream, + piContacts = uContacts upstream, + piSources = unSources $ uSources upstream, + piLicenses = crLicenses copyright, + piCLAuthor = clAuthor change, + piCLTimestamp = clTimestamp change + } diff --git a/src/FSD/Source.hs b/src/FSD/Source.hs new file mode 100644 index 0000000..751ac31 --- /dev/null +++ b/src/FSD/Source.hs @@ -0,0 +1,45 @@ +{- +Copyright (C) 2022 Yuchen Pei. + +This file is part of fsd. + +fsd is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +fsd is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General +Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with fsd. If not, see <https://www.gnu.org/licenses/>. + +-} + +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} + +module FSD.Source where + +import FSD.Control +import Data.List.Extra +import Data.Maybe +import Data.Text qualified as T +import Debian.Control +import FSD.Types + +getSource :: Paragraph -> Maybe Source +getSource para = do + package <- simpleFV <$> fieldValue "Package" para + version <- simpleFV <$> fieldValue "Version" para + fileLines <- lbListFV <$> fieldValue "Files" para + -- normally the 2nd file is the orig one + fileLine <- fileLines !? 2 + file <- (T.splitOn " " fileLine) !? 2 + let homepage = simpleFV <$> fieldValue "Homepage" para + return $ Source package version file homepage + +getSources :: Control -> [Source] +getSources = catMaybes . fmap getSource . unControl diff --git a/src/FSD/Translation.hs b/src/FSD/Translation.hs new file mode 100644 index 0000000..d825baa --- /dev/null +++ b/src/FSD/Translation.hs @@ -0,0 +1,41 @@ +{- +Copyright (C) 2022 Yuchen Pei. + +This file is part of fsd. + +fsd is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +fsd is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General +Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with fsd. If not, see <https://www.gnu.org/licenses/>. + +-} + +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} + +module FSD.Translation where + +import FSD.Control +import Data.List.Extra +import Data.Maybe +import Data.Text (Text) +import Data.Text qualified as T +import Debian.Control +import FSD.Types + +getTranslation :: Paragraph -> Maybe Translation +getTranslation para = do + package <- T.pack <$> fieldValue "Package" para + (shortDesc, fullDesc) <- fSynFV <$> fieldValue "Description-en" para + return $ Translation package shortDesc fullDesc + +getTranslations :: Control -> [Translation] +getTranslations = catMaybes . fmap getTranslation . unControl diff --git a/src/FSD/Types.hs b/src/FSD/Types.hs new file mode 100644 index 0000000..6ac6c12 --- /dev/null +++ b/src/FSD/Types.hs @@ -0,0 +1,248 @@ +{- +Copyright (C) 2022 Yuchen Pei. + +This file is part of fsd. + +fsd is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +fsd is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General +Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with fsd. If not, see <https://www.gnu.org/licenses/>. + +-} + +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleInstances #-} + +module FSD.Types where + +import Data.Maybe +import Data.Time +import Data.Text (Text) +import Data.Text qualified as T +import Database.SQLite.Simple +import Database.SQLite.Simple.FromField + +{- Source +- obtained from dist/Sources.gz +- sPackage, sVersion: key, package and version, also used for {{Entry|Version identifier, Version download and {{Import|Source link and Source packages +- there's also Uploaders field in Source, but it is not as good as the changelog which is more precise on the version +-} +data Source = Source + {sPackage :: Text, sVersion :: Text, sFile :: Text, sHomepage :: Maybe Text} + deriving (Show) + +instance ToRow Source where + toRow (Source a b c d) = toRow (a, b, c, d) + +instance FromRow Source where + fromRow = Source <$> field <*> field <*> field <*> field + +{- Package +- obtained from dist/Packages.gz +- pPackage, pVersion: key, package and version + - comes from Package and Version +- homepage: used for {{Entry|Homepage URL + - comes from Homepage + - also available in Sources.gz +- tags: used for {{Entry|Computer languages (implementing lang) + - can also be used for use, works-with etc. +-} + +data Package = Package + { pPackage :: Text, + pVersion :: Text, + pHomepage :: Maybe Text, + pTags :: Tags + } + deriving (Show) + +newtype Tags = Tags {unTags :: [Text]} deriving (Show, Eq) + +instance ToRow Package where + toRow (Package a b c d) = toRow (a, b, c, T.intercalate "," $ unTags d) + +instance FromRow Package where + fromRow = Package <$> field <*> field <*> field <*> field + +instance FromField Tags where + fromField f = Tags <$> T.splitOn "," <$> fromField f + +{- Translation +- obtained from dist +- tPackage: key, package name + - comes from Package +- (shortDesc, fullDesc) are for {{Entry|Short description and {{Entry|Full description + - comes from Description-en.syn and .full (formatted text with syn) +-} +data Translation = Translation + { tPackage :: Text, + tShortDesc :: Text, + tFullDesc :: Text + } + deriving (Show) + +instance ToRow Translation where + toRow (Translation a b c) = toRow (a, b, c) + +instance FromRow Translation where + fromRow = Translation <$> field <*> field <*> field + +{- ChangeLog +- obtained from metaftp +- the latest changelog entry +- clPacakge and clVersion are keys, ignores on mismatch +- author is used for {{Project license|License verified by +- timeStamp for {{Project license|License verified date +-} +data FSDChangeLogEntry = FSDChangeLogEntry + { clPackage :: Text, + clVersion :: Text, + clAuthor :: Text, + clTimestamp :: UTCTime + } + deriving (Show) + +instance ToRow FSDChangeLogEntry where + toRow (FSDChangeLogEntry a b c d) = toRow (a, b, c, d) + +instance FromRow FSDChangeLogEntry where + fromRow = FSDChangeLogEntry <$> field <*> field <*> field <*> field + +-- copyright types + +{- +- upstreamName is used for {{Entry|Name + - stores upstream package name (canonical name); + - comes from Upstream-Name; defined to be name used by upstream; + - some copyright files use Upstream-Name for contacts (but not often), which is rather hard to detect ("Foo Library" and "John Doe" are of the same format"), so we take it at face value +- contacts is used for {{Person|Real name and {{Person|Email with Role=contact + - stores contact name and email address; + - comes from Upstream-Contact (line-based list) + - may contain urls which are ignored +- sources is used for {{Resource|Resource URL with {{Resource|Resource kind=Download + - stores resource urls with download kind; + - comes from Source (formatted text with no synopsis), in practice all in url form + - so we parse it as whitespace separated list of urls +-} + +data Contact = Contact + { coName :: Maybe Text, + coEmail :: Text + } + deriving (Show) + +serializeContacts :: [Contact] -> Text +serializeContacts = T.intercalate ";" . fmap serializeContact + +serializeContact :: Contact -> Text +serializeContact (Contact name email) = + T.concat [(fromMaybe "" name), ",", email] + +deserializeContacts :: Text -> [Contact] +deserializeContacts contacts = + if T.null contacts + then [] + else catMaybes $ deserializeContact <$> T.splitOn ";" contacts + +deserializeContact :: Text -> Maybe Contact +deserializeContact raw = + case T.splitOn "," raw of + [_, ""] -> Nothing + [name, email] -> + Just $ Contact (if T.null name then Nothing else Just name) email + otherwise -> Nothing + +data Upstream = Upstream + { uPackage :: Text, + uName :: Maybe Text, + uContacts :: [Contact], + uSources :: SourceUrls + } + deriving (Show) + +newtype SourceUrls = SourceUrls {unSources :: [Text]} deriving (Eq, Show) + +instance ToRow Upstream where + toRow (Upstream a b contacts sources) = + toRow + ( a, + b, + serializeContacts contacts, + T.intercalate "\n" $ unSources sources + ) + +instance FromRow Upstream where + fromRow = Upstream <$> field <*> field <*> field <*> field + +instance FromField SourceUrls where + fromField f = SourceUrls <$> T.splitOn "\n" <$> fromField f + +instance FromField [Contact] where + fromField f = deserializeContacts <$> fromField f + +{- License Information +- we need to filter out debian/* Files +- copyright is used for {{Project license|License copyright + - comes from Files.Copyright and Header.Copyright (formatted text no syn) +- license is used for {{Project license|License + - comes from Files.License.syn and Header.License.syn (formatted text with syn) + - we treat multi-licensing license as one (e.g. "GPL-2+ or MPL" is a valid license field) +- note is used for {{Project license|License note + - comes from Files.License.full and Header.License.full (formatted text with syn), or in case of missing fulls, License.full (formatted text with syn) for matching syn +-} +data LicenseInfo = LicenseInfo + { lCopyright :: Maybe Text, + lLicense :: Text, + lNote :: Maybe Text + } + deriving (Show) + +instance FromRow LicenseInfo where + fromRow = LicenseInfo <$> field <*> field <*> field + +{- Obtained from copyright files +- formatted as https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +- nonformatted files are ignored +-} +data Copyright = Copyright + { crPackage :: Text, + crLicenses :: [LicenseInfo] + } + deriving (Show) + +data PackageInfo = PackageInfo + { piPackage :: Text, + piVersion :: Text, + piTimestamp :: UTCTime, -- time the PackageInfo is created + piFile :: Text, + piHomepage :: Maybe Text, + piTags :: [Text], + piShortDesc :: Text, + piFullDesc :: Text, + piUpstreamName :: Maybe Text, + piContacts :: [Contact], + piSources :: [Text], + piLicenses :: [LicenseInfo], + piCLAuthor :: Text, + piCLTimestamp :: UTCTime + } + deriving (Show) + +{- TODO +- [X] refactor Copyright into CRHeader (with a chPackage) and CRLicense (with a crPackage) +- refactor so that homepage comes from source overriding package +- refactor so that shordesc from translations overriding source overriding package +- refactor so that things get assembled into a big ass object writing to various entries +- add a (Debian changelog author) to license verifier +- make sure the db creation works as expected (especially primary key on conflict replace +- utilise indexes in db +-} diff --git a/src/FSD/Wiki.hs b/src/FSD/Wiki.hs new file mode 100644 index 0000000..78e43f0 --- /dev/null +++ b/src/FSD/Wiki.hs @@ -0,0 +1,156 @@ +{- +Copyright (C) 2022 Yuchen Pei. + +This file is part of fsd. + +fsd is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +fsd is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General +Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with fsd. If not, see <https://www.gnu.org/licenses/>. + +-} + +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} + +module FSD.Wiki (formatWikiEntry) where + +import Data.Maybe +import Data.List.Extra +import Data.Text (Text) +import Data.Text qualified as T +import Data.Time +import FSD.Types + +data WikiTemplate = WikiTemplate + { templateName :: Text, + templateParams :: [(Text, Text)] + } + +formatTemplate :: WikiTemplate -> Text +formatTemplate (WikiTemplate name params) = + T.unlines $ + (T.concat ["{{", name]) : + ( fmap + (\(key, val) -> T.concat ["|", key, "=", val]) + params + ) + ++ ["}}"] +-- The main function that formats a PackageInfo to a wiki entry +formatWikiEntry :: PackageInfo -> Text +formatWikiEntry package = + formatWikiEntry' $ + (wtEntry package) : (wtImport package) : wtLicenses package + ++ wtPersons package ++ wtResources package + +type WikiEntry = [WikiTemplate] + +formatWikiEntry' :: WikiEntry -> Text +formatWikiEntry' = T.concat . fmap formatTemplate + +-- https://directory.fsf.org/wiki/Template:Entry +wtEntry :: PackageInfo -> WikiTemplate +wtEntry package = + WikiTemplate "Entry" + -- this may cause problems some times, like 0ad-data which is a + -- separate package from 0ad, but with the same upstream name (0ad), + -- and can cause overwrite of 0ad info + [ ("Name", fromMaybe (piPackage package) (piUpstreamName package)), + ("Short description", piShortDesc package), + ("Full description", piFullDesc package), + ("Homepage", fromMaybe "" $ piHomepage package), + ("Computer languages", formatImplLangs package), + ("Version identifier", piVersion package), + ("Version download", getDlUrl package), + ("Submitted by", "Debian import"), + ("Submitted date", T.pack $ show $ utctDay $ piTimestamp package) + ] + +formatImplLangs :: PackageInfo -> Text +formatImplLangs package = + T.intercalate "," $ catMaybes $ getLang <$> tagList + where tags = piTags package + tagList = filter (\tag -> head tag == "implemented-in") (T.splitOn "::" <$> tags) + getLang xs = xs !? 1 + +getDlUrl :: PackageInfo -> Text +getDlUrl package = + T.concat + ["http://ftp.debian.org/debian/pool/main/" + , if T.isPrefixOf "lib" name then T.take 4 name else T.take 1 name + , "/", name, "/", file] + where + name = piPackage package + file = piFile package + +-- TODO +-- https://directory.fsf.org/wiki/Template:Software_category +{- +{{Software category +|Game=game:: +|Interface=interface:: +|Protocol=protocol:: +|Use=use:: +... +}} +-} + +-- https://directory.fsf.org/wiki/Template:Import +makeLink :: Text -> Text +makeLink name = T.concat ["http://packages.debian.org/stable/", name] + +wtImport :: PackageInfo -> WikiTemplate +wtImport package = + WikiTemplate + "Import" + [("Source", "Debian"), ("Source link", makeLink $ piPackage package), + ("Date", T.pack $ show $ utctDay $ piTimestamp package)] + +-- https://directory.fsf.org/wiki/Template:Project_license +wtLicenses :: PackageInfo -> [WikiTemplate] +wtLicenses package = + wtLicense (piCLAuthor package) + (T.pack $ show $ utctDay $ piCLTimestamp package) <$> + (piLicenses package) + +wtLicense :: Text -> Text -> LicenseInfo -> WikiTemplate +wtLicense author time info = + WikiTemplate + "Project license" + [ ("License", lLicense info), + ("License copyright", fromMaybe "" $ lCopyright info), + ("License verified by", author), + ("License verified date", time), + ("License note", fromMaybe "" $ lNote info) + ] + +-- https://directory.fsf.org/wiki/Template:Person +data WTPerson = WTPerson + { wpName :: Text, + wpRole :: Text, + wpEmail :: Text + } + +wtPerson :: Contact -> WikiTemplate +wtPerson (Contact name email) = + WikiTemplate + "Person" + [("Real name", fromMaybe "" name), ("Role", "contact"), ("Email", email)] + +wtPersons :: PackageInfo -> [WikiTemplate] +wtPersons info = wtPerson <$> piContacts info + +-- https://directory.fsf.org/wiki/Template:Resource +wtResources :: PackageInfo -> [WikiTemplate] +wtResources package = + (\url -> WikiTemplate "Resource" + [("Resource kind", "Download"), ("Resource URL", url)]) <$> + piSources package |