aboutsummaryrefslogtreecommitdiff
path: root/src/FSD
diff options
context:
space:
mode:
authorYuchen Pei <hi@ypei.me>2022-05-19 22:23:10 +1000
committerYuchen Pei <hi@ypei.me>2022-05-19 22:23:10 +1000
commit3db93bc6f7b46bc322694e6658b8f559433a03c6 (patch)
tree1273a17e11e0d9888ae57676906c68f8bb77e287 /src/FSD
parent09c05e86a1096b08eb7483b970207d47d0388665 (diff)
Replacing the files with a haskell rewrite.
Diffstat (limited to 'src/FSD')
-rw-r--r--src/FSD/ChangeLog.hs60
-rw-r--r--src/FSD/Control.hs84
-rw-r--r--src/FSD/Copyright.hs106
-rw-r--r--src/FSD/Db.hs211
-rw-r--r--src/FSD/Download.hs118
-rw-r--r--src/FSD/Package.hs61
-rw-r--r--src/FSD/PackageInfo.hs70
-rw-r--r--src/FSD/Source.hs45
-rw-r--r--src/FSD/Translation.hs41
-rw-r--r--src/FSD/Types.hs248
-rw-r--r--src/FSD/Wiki.hs156
11 files changed, 1200 insertions, 0 deletions
diff --git a/src/FSD/ChangeLog.hs b/src/FSD/ChangeLog.hs
new file mode 100644
index 0000000..195b713
--- /dev/null
+++ b/src/FSD/ChangeLog.hs
@@ -0,0 +1,60 @@
+{-
+Copyright (C) 2022 Yuchen Pei.
+
+This file is part of fsd.
+
+fsd is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+fsd is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
+Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with fsd. If not, see <https://www.gnu.org/licenses/>.
+
+-}
+
+{-# LANGUAGE ImportQualifiedPost #-}
+module FSD.ChangeLog where
+
+import Data.Generics
+import Data.Time
+import Data.Text qualified as T
+import Debian.Changes
+import Debian.Version
+import Text.Regex.TDFA
+import FSD.Types
+
+readChangeLog :: String -> Maybe ChangeLogEntry
+readChangeLog raw = case parseEntry raw of
+ Left _ -> Nothing
+ Right (entry, _) -> Just entry
+
+-- Debian version is an abstract type with no constructor, hence the ugly hack.
+getVersion :: DebianVersion -> Maybe String
+getVersion v =
+ let match = show v =~ ("\".*\"" :: String)
+ in if null match then Nothing else Just $ init $ tail match
+
+convertChangeLogEntry :: ChangeLogEntry -> Maybe FSDChangeLogEntry
+convertChangeLogEntry entry = do
+ version <- getVersion $ logVersion entry
+ time <- parseTime' $ logDate entry
+ return $
+ FSDChangeLogEntry
+ (T.pack $ logPackage entry)
+ (T.pack version)
+ (T.pack $ logWho entry)
+ time
+ where
+ parseTime' raw =
+ orElse
+ (parseTimeM False defaultTimeLocale "%a, %d %b %Y %T %Z" raw)
+ (parseTimeM False defaultTimeLocale "%a, %e %b %Y %T %Z" raw)
+
+getChangeLogEntry :: String -> Maybe FSDChangeLogEntry
+getChangeLogEntry raw = readChangeLog raw >>= convertChangeLogEntry
diff --git a/src/FSD/Control.hs b/src/FSD/Control.hs
new file mode 100644
index 0000000..2395f85
--- /dev/null
+++ b/src/FSD/Control.hs
@@ -0,0 +1,84 @@
+{-
+Copyright (C) 2022 Yuchen Pei.
+
+This file is part of fsd.
+
+fsd is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+fsd is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
+Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with fsd. If not, see <https://www.gnu.org/licenses/>.
+
+-}
+
+{-# LANGUAGE ImportQualifiedPost #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module FSD.Control where
+
+import Data.Either
+import Data.List.Extra
+import Data.Text (Text)
+import Data.Text qualified as T
+import Debian.Control
+import Debug.Trace
+
+-- import Text.Regex.TDFA
+
+-- parse sources, packages, translations and copyright
+-- https://manpages.debian.org/bullseye/dpkg-dev/deb822.5.en.html
+-- https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+
+readControl :: String -> Control
+readControl =
+ fromRight (Control []) . parseControl ""
+
+-- stripControl . fromRight (Control []) . parseControl ""
+
+simpleFV :: String -> Text
+simpleFV = T.pack
+
+-- multiple lines with nonsignficant whitespaces
+foldedFV :: String -> Text
+foldedFV = undefined
+
+-- multilines
+-- may erroneously remove first char of first line (we'll see)
+multilineFV :: String -> Text
+multilineFV raw =
+ let (first, rest) = fSynFV raw
+ in T.strip $ T.concat [first, "\n", rest]
+
+-- whitespace separated list
+wsListFV :: String -> [Text]
+wsListFV = T.splitOn " " . foldedFV
+
+-- line-based lists
+lbListFV :: String -> [Text]
+lbListFV = fmap T.strip . T.splitOn "\n" . T.pack
+
+-- formatted text with synopsis
+fSynFV :: String -> (Text, Text)
+fSynFV raw =
+ let (synopsis, longRaw) = breakOn "\n" $ raw
+ paras =
+ if null longRaw
+ then []
+ else T.splitOn "\n .\n" $ T.pack $ tail longRaw
+ full =
+ T.intercalate "\n\n" $
+ (T.intercalate "\n" . fmap T.tail . T.lines) <$> paras
+ in (T.pack synopsis, full)
+
+firstFieldName :: Paragraph -> Maybe String
+firstFieldName (Paragraph []) = Nothing
+firstFieldName (Paragraph (Comment _ : xs)) =
+ firstFieldName (Paragraph xs)
+firstFieldName (Paragraph (Field (name, _) : _)) = Just name
diff --git a/src/FSD/Copyright.hs b/src/FSD/Copyright.hs
new file mode 100644
index 0000000..68b7b50
--- /dev/null
+++ b/src/FSD/Copyright.hs
@@ -0,0 +1,106 @@
+{-
+Copyright (C) 2022 Yuchen Pei.
+
+This file is part of fsd.
+
+fsd is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+fsd is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
+Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with fsd. If not, see <https://www.gnu.org/licenses/>.
+
+-}
+
+{-# LANGUAGE ImportQualifiedPost #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module FSD.Copyright where
+
+import FSD.Control
+import Data.Map hiding (filter)
+import Data.Maybe
+import Data.Text (Text)
+import Data.Text qualified as T
+import Debian.Control
+import Text.Regex.TDFA
+import FSD.Types
+
+parseCopyright :: Text -> Control -> Maybe (Upstream, Copyright)
+parseCopyright package control =
+ case unControl control of
+ [] -> Nothing
+ header : rest ->
+ -- no parse if header is wrong
+ if fromMaybe "" (firstFieldName header) /= "Format"
+ then Nothing
+ else
+ Just
+ ( getUpstream package header,
+ Copyright package (getLicenses (header : files) licenses)
+ )
+ where
+ files = filter (\para ->
+ fromMaybe "" (firstFieldName para) == "Files") rest
+ licenses = filter
+ (\para ->
+ fromMaybe "" (firstFieldName para) == "License") rest
+
+getUpstream :: Text -> Paragraph -> Upstream
+getUpstream package para =
+ Upstream package uName contacts (SourceUrls source)
+ where
+ uName = T.pack <$> fieldValue "Upstream-Name" para
+ contacts = case fieldValue "Upstream-Contact" para of
+ Just contact -> parseContacts $ lbListFV contact
+ Nothing -> []
+ source = case fieldValue "Source" para of
+ Just source -> parseSource $ multilineFV source
+ Nothing -> []
+
+parseContacts :: [Text] -> [Contact]
+parseContacts = catMaybes . fmap parseContact
+
+parseContact :: Text -> Maybe Contact
+parseContact raw =
+ case T.unpack raw =~ ("^(.*)<(.*@.*)>$" :: String) :: [[String]] of
+ [[_, "", email]] -> Just $ Contact Nothing (T.pack email)
+ [[_, name, email]] ->
+ Just $
+ Contact
+ (Just $ T.strip $ T.pack name)
+ (T.pack email)
+ _ -> Nothing
+
+parseSource :: Text -> [Text]
+parseSource = fmap T.strip . T.splitOn "\n"
+
+getLicenses :: [Paragraph] -> [Paragraph] -> [LicenseInfo]
+getLicenses paras licenses =
+ catMaybes $ getLicense (getLicenseMap licenses) <$> paras
+
+getLicense :: Map Text Text -> Paragraph -> Maybe LicenseInfo
+getLicense licMap para = case fSynFV <$> fieldValue "License" para of
+ Just ("", _) -> Nothing
+ Nothing -> Nothing
+ Just (name, desc) ->
+ Just $
+ LicenseInfo
+ (multilineFV <$> fieldValue "Copyright" para)
+ name
+ (if T.null desc then Data.Map.lookup name licMap else Just desc)
+
+getLicenseMap :: [Paragraph] -> Map Text Text
+getLicenseMap licenses = fromList $ catMaybes $ toPair <$> licenses
+ where
+ toPair license = case fSynFV <$> fieldValue "License" license of
+ Just ("", _) -> Nothing
+ Just (_, "") -> Nothing
+ Just (name, desc) -> Just (name, desc)
+ Nothing -> Nothing
diff --git a/src/FSD/Db.hs b/src/FSD/Db.hs
new file mode 100644
index 0000000..38a822c
--- /dev/null
+++ b/src/FSD/Db.hs
@@ -0,0 +1,211 @@
+{-
+Copyright (C) 2022 Yuchen Pei.
+
+This file is part of fsd.
+
+fsd is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+fsd is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
+Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with fsd. If not, see <https://www.gnu.org/licenses/>.
+
+-}
+
+{-# LANGUAGE ImportQualifiedPost #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TypeOperators #-}
+
+module FSD.Db where
+
+import Control.Monad
+import FSD.Copyright
+import Data.Text (Text)
+import Data.Text qualified as T
+import Database.SQLite.Simple
+import FSD.Types
+
+newtype FsDb = FsDb {getConn :: Connection}
+
+withFsDb :: FilePath -> (FsDb -> IO a) -> IO a
+withFsDb path f = withConnection path (f . FsDb)
+
+-- creations
+
+initConn :: FsDb -> IO ()
+initConn (FsDb conn) = do
+ execute_
+ conn
+ "CREATE TABLE IF NOT EXISTS sources \
+ \( package TEXT NOT NULL PRIMARY KEY ON CONFLICT REPLACE \
+ \, version TEXT NOT NULL \
+ \, file TEXT NOT NULL \
+ \, homepage TEXT \
+ \)"
+ execute_
+ conn
+ "CREATE TABLE IF NOT EXISTS packages \
+ \( package TEXT NOT NULL PRIMARY KEY ON CONFLICT REPLACE \
+ \, version TEXT NOT NULL \
+ \, homepage TEXT \
+ \, tags TEXT NOT NULL \
+ \)"
+ execute_
+ conn
+ "CREATE TABLE IF NOT EXISTS translation \
+ \( package TEXT NOT NULL PRIMARY KEY ON CONFLICT REPLACE \
+ \, shortDesc TEXT NOT NULL \
+ \, fullDesc TEXT NOT NULL \
+ \)"
+ execute_
+ conn
+ "CREATE TABLE IF NOT EXISTS changelog \
+ \( package TEXT NOT NULL PRIMARY KEY ON CONFLICT REPLACE \
+ \, version TEXT NOT NULL \
+ \, author TEXT NOT NULL \
+ \, date TEXT NOT NULL \
+ \)"
+ execute_
+ conn
+ "CREATE TABLE IF NOT EXISTS copyright \
+ \( id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT \
+ \, package TEXT NOT NULL \
+ \, entryId INTEGER NOT NULL \
+ \, copyright TEXT \
+ \, license TEXT NOT NULL \
+ \, note TEXT \
+ \, CONSTRAINT uniqname UNIQUE (package, entryId) ON CONFLICT REPLACE \
+ \)"
+ execute_
+ conn
+ "CREATE TABLE IF NOT EXISTS upstream \
+ \( package TEXT NOT NULL PRIMARY KEY ON CONFLICT REPLACE \
+ \, name TEXT \
+ \, contacts TEXT \
+ \, sources TEXT \
+ \)"
+
+-- insertions
+
+insertSource :: FsDb -> Source -> IO ()
+insertSource (FsDb conn) source =
+ execute conn "INSERT INTO sources VALUES (?,?,?,?)" source
+
+insertPackage :: FsDb -> Package -> IO ()
+insertPackage (FsDb conn) package =
+ execute conn "INSERT INTO packages VALUES (?,?,?,?)" package
+
+insertTranslation :: FsDb -> Translation -> IO ()
+insertTranslation (FsDb conn) translation =
+ execute conn "INSERT INTO translation VALUES (?,?,?)" translation
+
+insertFSDChangeLogEntry :: FsDb -> FSDChangeLogEntry -> IO ()
+insertFSDChangeLogEntry (FsDb conn) change =
+ execute conn "INSERT INTO changelog VALUES (?,?,?,?)" change
+
+insertCopyright :: FsDb -> Copyright -> IO ()
+insertCopyright db (Copyright package licenses) =
+ zipWithM_ (insertLicense db package) [0 ..] licenses
+
+insertUpstream :: FsDb -> Upstream -> IO ()
+insertUpstream (FsDb conn) upstream =
+ execute conn "INSERT INTO upstream VALUES (?,?,?,?)" upstream
+
+insertLicense :: FsDb -> Text -> Int -> LicenseInfo -> IO ()
+insertLicense (FsDb conn) package idx (LicenseInfo copyright license note) =
+ execute
+ conn
+ "INSERT INTO copyright (package,entryId,copyright,license,note) \
+ \VALUES (?,?,?,?,?)"
+ (package, idx, copyright, license, note)
+
+-- queries
+getDbAll :: FsDb -> IO [Source :. Package :. Translation :. FSDChangeLogEntry :. Upstream]
+getDbAll (FsDb conn) =
+ query_
+ conn
+ "SELECT sources.*,packages.*,translation.*,changelog.*,upstream.* \
+ \FROM sources JOIN packages USING (package,version)\
+ \ JOIN changelog USING (package,version)\
+ \ JOIN translation USING (package)\
+ \ JOIN upstream USING (package)\
+ \"
+
+getDbPkgNames :: FsDb -> IO [Text]
+getDbPkgNames (FsDb conn) = do
+ rows <- query_ conn
+ "SELECT DISTINCT package\
+ \ FROM sources JOIN packages USING (package,version)\
+ \ JOIN changelog USING (package,version)\
+ \ JOIN translation USING (package)\
+ \ JOIN upstream USING (package)\
+ \ JOIN copyright USING (package)\
+ \"
+ return $ fromOnly <$> rows
+
+getDbAllTypes :: Text -> FsDb -> IO (Maybe (Source :. Package :. Translation :. FSDChangeLogEntry :. Upstream))
+getDbAllTypes pkgName (FsDb conn) = do
+ results <- query
+ conn
+ "SELECT sources.*,packages.*,translation.*,changelog.*,upstream.* \
+ \FROM sources JOIN packages USING (package,version)\
+ \ JOIN changelog USING (package,version)\
+ \ JOIN translation USING (package)\
+ \ JOIN upstream USING (package) \
+ \WHERE package = ?\
+ \" (Only pkgName)
+ return $ case results of
+ [source] -> Just source
+ otherwise -> Nothing
+
+getDbSource :: Text -> FsDb -> IO (Maybe Source)
+getDbSource pkgName (FsDb conn) = do
+ results <- query conn "SELECT * FROM sources WHERE package = ?" (Only pkgName)
+ return $ case results of
+ [source] -> Just source
+ otherwise -> Nothing
+
+getDbPackage :: Text -> FsDb -> IO (Maybe Package)
+getDbPackage pkgName (FsDb conn) = do
+ results <- query conn "SELECT * FROM packages WHERE package = ?" (Only pkgName)
+ return $ case results of
+ [package] -> Just package
+ otherwise -> Nothing
+
+getDbTranslation :: Text -> FsDb -> IO (Maybe Translation)
+getDbTranslation pkgName (FsDb conn) = do
+ results <- query conn "SELECT * FROM translation WHERE package = ?" (Only pkgName)
+ return $ case results of
+ [translation] -> Just translation
+ otherwise -> Nothing
+
+getDbFSDChangeLogEntry :: Text -> FsDb -> IO (Maybe FSDChangeLogEntry)
+getDbFSDChangeLogEntry pkgName (FsDb conn) = do
+ results <- query conn "SELECT * FROM changelog WHERE package = ?" (Only pkgName)
+ return $ case results of
+ [entry] -> Just entry
+ otherwise -> Nothing
+
+getDbUpstream :: Text -> FsDb -> IO (Maybe Upstream)
+getDbUpstream pkgName (FsDb conn) = do
+ results <- query conn "SELECT * FROM upstream WHERE package = ?" (Only pkgName)
+ return $ case results of
+ [upstream] -> Just upstream
+ otherwise -> Nothing
+
+getDbCopyright :: Text -> FsDb -> IO (Maybe Copyright)
+getDbCopyright pkgName (FsDb conn) = do
+ results <-
+ query
+ conn
+ "SELECT copyright,license,note FROM copyright WHERE package = ?"
+ (Only pkgName) ::
+ IO [LicenseInfo]
+ return $
+ if (null results) then Nothing else Just $ Copyright pkgName results
diff --git a/src/FSD/Download.hs b/src/FSD/Download.hs
new file mode 100644
index 0000000..3631ddc
--- /dev/null
+++ b/src/FSD/Download.hs
@@ -0,0 +1,118 @@
+{-
+Copyright (C) 2022 Yuchen Pei.
+
+This file is part of fsd.
+
+fsd is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+fsd is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
+Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with fsd. If not, see <https://www.gnu.org/licenses/>.
+
+-}
+
+{-# LANGUAGE ImportQualifiedPost #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module FSD.Download where
+
+import System.FilePath
+import Control.Monad
+import Data.Function
+import Data.List
+import Data.Text (Text)
+import Data.Text qualified as T
+import System.Exit
+import System.Process
+import System.Random
+
+wgetCommand = "wget"
+
+wgetIndexFileFlags = "-O- 2>/dev/null"
+
+gunzipCommand = "gunzip"
+
+bunzip2Command = "bunzip2"
+
+wgetMetadataFlags = "-N -nv -x -c"
+
+sourcesUrl = "https://ftp.debian.org/debian/dists/stable/main/source/Sources.gz"
+
+packagesUrl = "https://ftp.debian.org/debian/dists/stable/main/binary-amd64/Packages.gz"
+
+translationUrl = "https://ftp.debian.org/debian/dists/stable/main/i18n/Translation-en.bz2"
+
+data MetadataType = MTChangelog | MTCopyright deriving Eq
+
+metadataUrlFile :: MetadataType -> Int -> String
+metadataUrlFile mtype i = toFilename mtype ++ "_urls_" ++ show i
+ where toFilename MTChangelog = "changelog"
+ toFilename MTCopyright = "copyright"
+
+wgetIndexFileCommand :: FilePath -> (String, String) -> String
+wgetIndexFileCommand outputDir (url, filename) =
+ wgetCommand ++ " " ++ wgetIndexFileFlags ++ " " ++ url ++ "|" ++ extractCommand ++ " >" ++ (outputDir </> filename) ++ " && echo Done: " ++ filename
+ where
+ extractCommand = if isSuffixOf ".gz" url then gunzipCommand else bunzip2Command
+
+downloadSources :: FilePath -> IO ExitCode
+downloadSources outputDir =
+ waitForProcess =<<
+ (runCommand $ wgetIndexFileCommand outputDir (sourcesUrl, "Sources"))
+
+downloadPackages :: FilePath -> IO ExitCode
+downloadPackages outputDir =
+ waitForProcess =<<
+ (runCommand $ wgetIndexFileCommand outputDir (packagesUrl, "Packages"))
+
+downloadTranslation :: FilePath -> IO ExitCode
+downloadTranslation outputDir =
+ waitForProcess =<<
+ (runCommand $
+ wgetIndexFileCommand outputDir (translationUrl, "Translation-en"))
+
+downloadMetadataFiles :: FilePath -> MetadataType -> Int -> IO [ProcessHandle]
+downloadMetadataFiles root mtype nWorkers =
+ sequence $
+ ( \i ->
+ runCommand
+ ( wgetCommand ++ " " ++ wgetMetadataFlags
+ ++ " -i " ++ (root </> metadataUrlFile mtype i)
+ ++ " -P " ++ root
+ ++ " && echo Done: worker " ++ show i
+ )
+ )
+ <$> [1 .. nWorkers]
+
+writeWgetListFiles :: FilePath -> MetadataType -> [Text] -> Int -> IO ()
+writeWgetListFiles outputDir mtype packages n = do
+ pairs <- zip [1 ..] . fmap T.unlines <$> mapM shuffle (replicate n urls)
+ mapM_ (\(i, list) ->
+ writeFile (outputDir </> metadataUrlFile mtype i)
+ (T.unpack list)) pairs
+ where
+ urls = metadataUrl mtype <$> packages
+
+shuffle :: [Text] -> IO [Text]
+shuffle xs =
+ fmap fst . sortBy (compare `on` snd) . zip xs
+ <$> replicateM n (randomRIO (0, n * n))
+ where
+ n = length xs
+
+metadataUrl :: MetadataType -> Text -> Text
+metadataUrl mtype pkgName =
+ "https://metadata.ftp-master.debian.org/changelogs/main/" <>
+ (if T.isPrefixOf "lib" pkgName
+ then T.take 4 pkgName else T.take 1 pkgName) <>
+ "/" <> pkgName <> "/" <> toFilename mtype
+ where
+ toFilename MTChangelog = "stable_changelog"
+ toFilename MTCopyright = "stable_copyright"
diff --git a/src/FSD/Package.hs b/src/FSD/Package.hs
new file mode 100644
index 0000000..8ccd852
--- /dev/null
+++ b/src/FSD/Package.hs
@@ -0,0 +1,61 @@
+{-
+Copyright (C) 2022 Yuchen Pei.
+
+This file is part of fsd.
+
+fsd is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+fsd is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
+Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with fsd. If not, see <https://www.gnu.org/licenses/>.
+
+-}
+
+{-# LANGUAGE ImportQualifiedPost #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module FSD.Package where
+
+import Data.Generics
+import Data.Maybe
+import Data.Text (Text)
+import Data.Text qualified as T
+import Debian.Control
+import Text.Regex.TDFA
+import FSD.Types
+
+getTags :: Paragraph -> [Text]
+getTags para = case fieldValue "Tag" para of
+ Nothing -> []
+ Just tags ->
+ (T.splitOn ", " . T.pack . concat . lines) tags
+
+{-package version should be from Source field overriding Version field:
+Package: pandoc
+Source: pandoc (2.9.2.1-1)
+Version: 2.9.2.1-1+b1
+-}
+getPackage :: Paragraph -> Maybe Package
+getPackage para = do
+ package <- T.pack <$> fieldValue "Package" para
+ let homepage = T.pack <$> fieldValue "Homepage" para
+ let tags = getTags para
+ let srcVersion = extractSrcVersion =<< fieldValue "Source" para
+ version <- T.pack <$> orElse srcVersion (fieldValue "Version" para)
+ return $ Package package version homepage (Tags tags)
+ where
+ extractSrcVersion :: String -> Maybe String
+ extractSrcVersion source =
+ case source =~ (".*\\((.*)\\)$" :: String) :: [[String]] of
+ [[_, version]] -> Just version
+ _ -> Nothing
+
+getPackages :: Control -> [Package]
+getPackages = catMaybes . fmap getPackage . unControl
diff --git a/src/FSD/PackageInfo.hs b/src/FSD/PackageInfo.hs
new file mode 100644
index 0000000..959c225
--- /dev/null
+++ b/src/FSD/PackageInfo.hs
@@ -0,0 +1,70 @@
+{-
+Copyright (C) 2022 Yuchen Pei.
+
+This file is part of fsd.
+
+fsd is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+fsd is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
+Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with fsd. If not, see <https://www.gnu.org/licenses/>.
+
+-}
+
+{-# LANGUAGE ImportQualifiedPost #-}
+module FSD.PackageInfo where
+
+import Data.Generics
+import Data.Maybe
+import Data.Text qualified as T
+import Data.Text (Text)
+import Data.Char
+import Data.Time
+import FSD.Types
+
+makePackageInfo ::
+ Source ->
+ Package ->
+ Translation ->
+ FSDChangeLogEntry ->
+ Upstream ->
+ Copyright ->
+ IO (Maybe PackageInfo)
+makePackageInfo source package trans change upstream copyright =
+ if or
+ [ sPackage source /= pPackage package,
+ sPackage source /= tPackage trans,
+ sPackage source /= clPackage change,
+ sPackage source /= uPackage upstream,
+ sPackage source /= crPackage copyright,
+ sVersion source /= pVersion package,
+ sVersion source /= clVersion change
+ ]
+ then return Nothing
+ else do
+ time <- getCurrentTime
+ return $
+ Just
+ PackageInfo
+ { piPackage = sPackage source,
+ piVersion = sVersion source,
+ piTimestamp = time,
+ piFile = sFile source,
+ piHomepage = orElse (sHomepage source) (pHomepage package),
+ piTags = unTags $ pTags package,
+ piShortDesc = tShortDesc trans,
+ piFullDesc = tFullDesc trans,
+ piUpstreamName = uName upstream,
+ piContacts = uContacts upstream,
+ piSources = unSources $ uSources upstream,
+ piLicenses = crLicenses copyright,
+ piCLAuthor = clAuthor change,
+ piCLTimestamp = clTimestamp change
+ }
diff --git a/src/FSD/Source.hs b/src/FSD/Source.hs
new file mode 100644
index 0000000..751ac31
--- /dev/null
+++ b/src/FSD/Source.hs
@@ -0,0 +1,45 @@
+{-
+Copyright (C) 2022 Yuchen Pei.
+
+This file is part of fsd.
+
+fsd is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+fsd is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
+Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with fsd. If not, see <https://www.gnu.org/licenses/>.
+
+-}
+
+{-# LANGUAGE ImportQualifiedPost #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module FSD.Source where
+
+import FSD.Control
+import Data.List.Extra
+import Data.Maybe
+import Data.Text qualified as T
+import Debian.Control
+import FSD.Types
+
+getSource :: Paragraph -> Maybe Source
+getSource para = do
+ package <- simpleFV <$> fieldValue "Package" para
+ version <- simpleFV <$> fieldValue "Version" para
+ fileLines <- lbListFV <$> fieldValue "Files" para
+ -- normally the 2nd file is the orig one
+ fileLine <- fileLines !? 2
+ file <- (T.splitOn " " fileLine) !? 2
+ let homepage = simpleFV <$> fieldValue "Homepage" para
+ return $ Source package version file homepage
+
+getSources :: Control -> [Source]
+getSources = catMaybes . fmap getSource . unControl
diff --git a/src/FSD/Translation.hs b/src/FSD/Translation.hs
new file mode 100644
index 0000000..d825baa
--- /dev/null
+++ b/src/FSD/Translation.hs
@@ -0,0 +1,41 @@
+{-
+Copyright (C) 2022 Yuchen Pei.
+
+This file is part of fsd.
+
+fsd is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+fsd is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
+Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with fsd. If not, see <https://www.gnu.org/licenses/>.
+
+-}
+
+{-# LANGUAGE ImportQualifiedPost #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module FSD.Translation where
+
+import FSD.Control
+import Data.List.Extra
+import Data.Maybe
+import Data.Text (Text)
+import Data.Text qualified as T
+import Debian.Control
+import FSD.Types
+
+getTranslation :: Paragraph -> Maybe Translation
+getTranslation para = do
+ package <- T.pack <$> fieldValue "Package" para
+ (shortDesc, fullDesc) <- fSynFV <$> fieldValue "Description-en" para
+ return $ Translation package shortDesc fullDesc
+
+getTranslations :: Control -> [Translation]
+getTranslations = catMaybes . fmap getTranslation . unControl
diff --git a/src/FSD/Types.hs b/src/FSD/Types.hs
new file mode 100644
index 0000000..6ac6c12
--- /dev/null
+++ b/src/FSD/Types.hs
@@ -0,0 +1,248 @@
+{-
+Copyright (C) 2022 Yuchen Pei.
+
+This file is part of fsd.
+
+fsd is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+fsd is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
+Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with fsd. If not, see <https://www.gnu.org/licenses/>.
+
+-}
+
+{-# LANGUAGE ImportQualifiedPost #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleInstances #-}
+
+module FSD.Types where
+
+import Data.Maybe
+import Data.Time
+import Data.Text (Text)
+import Data.Text qualified as T
+import Database.SQLite.Simple
+import Database.SQLite.Simple.FromField
+
+{- Source
+- obtained from dist/Sources.gz
+- sPackage, sVersion: key, package and version, also used for {{Entry|Version identifier, Version download and {{Import|Source link and Source packages
+- there's also Uploaders field in Source, but it is not as good as the changelog which is more precise on the version
+-}
+data Source = Source
+ {sPackage :: Text, sVersion :: Text, sFile :: Text, sHomepage :: Maybe Text}
+ deriving (Show)
+
+instance ToRow Source where
+ toRow (Source a b c d) = toRow (a, b, c, d)
+
+instance FromRow Source where
+ fromRow = Source <$> field <*> field <*> field <*> field
+
+{- Package
+- obtained from dist/Packages.gz
+- pPackage, pVersion: key, package and version
+ - comes from Package and Version
+- homepage: used for {{Entry|Homepage URL
+ - comes from Homepage
+ - also available in Sources.gz
+- tags: used for {{Entry|Computer languages (implementing lang)
+ - can also be used for use, works-with etc.
+-}
+
+data Package = Package
+ { pPackage :: Text,
+ pVersion :: Text,
+ pHomepage :: Maybe Text,
+ pTags :: Tags
+ }
+ deriving (Show)
+
+newtype Tags = Tags {unTags :: [Text]} deriving (Show, Eq)
+
+instance ToRow Package where
+ toRow (Package a b c d) = toRow (a, b, c, T.intercalate "," $ unTags d)
+
+instance FromRow Package where
+ fromRow = Package <$> field <*> field <*> field <*> field
+
+instance FromField Tags where
+ fromField f = Tags <$> T.splitOn "," <$> fromField f
+
+{- Translation
+- obtained from dist
+- tPackage: key, package name
+ - comes from Package
+- (shortDesc, fullDesc) are for {{Entry|Short description and {{Entry|Full description
+ - comes from Description-en.syn and .full (formatted text with syn)
+-}
+data Translation = Translation
+ { tPackage :: Text,
+ tShortDesc :: Text,
+ tFullDesc :: Text
+ }
+ deriving (Show)
+
+instance ToRow Translation where
+ toRow (Translation a b c) = toRow (a, b, c)
+
+instance FromRow Translation where
+ fromRow = Translation <$> field <*> field <*> field
+
+{- ChangeLog
+- obtained from metaftp
+- the latest changelog entry
+- clPacakge and clVersion are keys, ignores on mismatch
+- author is used for {{Project license|License verified by
+- timeStamp for {{Project license|License verified date
+-}
+data FSDChangeLogEntry = FSDChangeLogEntry
+ { clPackage :: Text,
+ clVersion :: Text,
+ clAuthor :: Text,
+ clTimestamp :: UTCTime
+ }
+ deriving (Show)
+
+instance ToRow FSDChangeLogEntry where
+ toRow (FSDChangeLogEntry a b c d) = toRow (a, b, c, d)
+
+instance FromRow FSDChangeLogEntry where
+ fromRow = FSDChangeLogEntry <$> field <*> field <*> field <*> field
+
+-- copyright types
+
+{-
+- upstreamName is used for {{Entry|Name
+ - stores upstream package name (canonical name);
+ - comes from Upstream-Name; defined to be name used by upstream;
+ - some copyright files use Upstream-Name for contacts (but not often), which is rather hard to detect ("Foo Library" and "John Doe" are of the same format"), so we take it at face value
+- contacts is used for {{Person|Real name and {{Person|Email with Role=contact
+ - stores contact name and email address;
+ - comes from Upstream-Contact (line-based list)
+ - may contain urls which are ignored
+- sources is used for {{Resource|Resource URL with {{Resource|Resource kind=Download
+ - stores resource urls with download kind;
+ - comes from Source (formatted text with no synopsis), in practice all in url form
+ - so we parse it as whitespace separated list of urls
+-}
+
+data Contact = Contact
+ { coName :: Maybe Text,
+ coEmail :: Text
+ }
+ deriving (Show)
+
+serializeContacts :: [Contact] -> Text
+serializeContacts = T.intercalate ";" . fmap serializeContact
+
+serializeContact :: Contact -> Text
+serializeContact (Contact name email) =
+ T.concat [(fromMaybe "" name), ",", email]
+
+deserializeContacts :: Text -> [Contact]
+deserializeContacts contacts =
+ if T.null contacts
+ then []
+ else catMaybes $ deserializeContact <$> T.splitOn ";" contacts
+
+deserializeContact :: Text -> Maybe Contact
+deserializeContact raw =
+ case T.splitOn "," raw of
+ [_, ""] -> Nothing
+ [name, email] ->
+ Just $ Contact (if T.null name then Nothing else Just name) email
+ otherwise -> Nothing
+
+data Upstream = Upstream
+ { uPackage :: Text,
+ uName :: Maybe Text,
+ uContacts :: [Contact],
+ uSources :: SourceUrls
+ }
+ deriving (Show)
+
+newtype SourceUrls = SourceUrls {unSources :: [Text]} deriving (Eq, Show)
+
+instance ToRow Upstream where
+ toRow (Upstream a b contacts sources) =
+ toRow
+ ( a,
+ b,
+ serializeContacts contacts,
+ T.intercalate "\n" $ unSources sources
+ )
+
+instance FromRow Upstream where
+ fromRow = Upstream <$> field <*> field <*> field <*> field
+
+instance FromField SourceUrls where
+ fromField f = SourceUrls <$> T.splitOn "\n" <$> fromField f
+
+instance FromField [Contact] where
+ fromField f = deserializeContacts <$> fromField f
+
+{- License Information
+- we need to filter out debian/* Files
+- copyright is used for {{Project license|License copyright
+ - comes from Files.Copyright and Header.Copyright (formatted text no syn)
+- license is used for {{Project license|License
+ - comes from Files.License.syn and Header.License.syn (formatted text with syn)
+ - we treat multi-licensing license as one (e.g. "GPL-2+ or MPL" is a valid license field)
+- note is used for {{Project license|License note
+ - comes from Files.License.full and Header.License.full (formatted text with syn), or in case of missing fulls, License.full (formatted text with syn) for matching syn
+-}
+data LicenseInfo = LicenseInfo
+ { lCopyright :: Maybe Text,
+ lLicense :: Text,
+ lNote :: Maybe Text
+ }
+ deriving (Show)
+
+instance FromRow LicenseInfo where
+ fromRow = LicenseInfo <$> field <*> field <*> field
+
+{- Obtained from copyright files
+- formatted as https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+- nonformatted files are ignored
+-}
+data Copyright = Copyright
+ { crPackage :: Text,
+ crLicenses :: [LicenseInfo]
+ }
+ deriving (Show)
+
+data PackageInfo = PackageInfo
+ { piPackage :: Text,
+ piVersion :: Text,
+ piTimestamp :: UTCTime, -- time the PackageInfo is created
+ piFile :: Text,
+ piHomepage :: Maybe Text,
+ piTags :: [Text],
+ piShortDesc :: Text,
+ piFullDesc :: Text,
+ piUpstreamName :: Maybe Text,
+ piContacts :: [Contact],
+ piSources :: [Text],
+ piLicenses :: [LicenseInfo],
+ piCLAuthor :: Text,
+ piCLTimestamp :: UTCTime
+ }
+ deriving (Show)
+
+{- TODO
+- [X] refactor Copyright into CRHeader (with a chPackage) and CRLicense (with a crPackage)
+- refactor so that homepage comes from source overriding package
+- refactor so that shordesc from translations overriding source overriding package
+- refactor so that things get assembled into a big ass object writing to various entries
+- add a (Debian changelog author) to license verifier
+- make sure the db creation works as expected (especially primary key on conflict replace
+- utilise indexes in db
+-}
diff --git a/src/FSD/Wiki.hs b/src/FSD/Wiki.hs
new file mode 100644
index 0000000..78e43f0
--- /dev/null
+++ b/src/FSD/Wiki.hs
@@ -0,0 +1,156 @@
+{-
+Copyright (C) 2022 Yuchen Pei.
+
+This file is part of fsd.
+
+fsd is free software: you can redistribute it and/or modify it under
+the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+fsd is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
+Public License for more details.
+
+You should have received a copy of the GNU Affero General Public
+License along with fsd. If not, see <https://www.gnu.org/licenses/>.
+
+-}
+
+{-# LANGUAGE ImportQualifiedPost #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module FSD.Wiki (formatWikiEntry) where
+
+import Data.Maybe
+import Data.List.Extra
+import Data.Text (Text)
+import Data.Text qualified as T
+import Data.Time
+import FSD.Types
+
+data WikiTemplate = WikiTemplate
+ { templateName :: Text,
+ templateParams :: [(Text, Text)]
+ }
+
+formatTemplate :: WikiTemplate -> Text
+formatTemplate (WikiTemplate name params) =
+ T.unlines $
+ (T.concat ["{{", name]) :
+ ( fmap
+ (\(key, val) -> T.concat ["|", key, "=", val])
+ params
+ )
+ ++ ["}}"]
+-- The main function that formats a PackageInfo to a wiki entry
+formatWikiEntry :: PackageInfo -> Text
+formatWikiEntry package =
+ formatWikiEntry' $
+ (wtEntry package) : (wtImport package) : wtLicenses package
+ ++ wtPersons package ++ wtResources package
+
+type WikiEntry = [WikiTemplate]
+
+formatWikiEntry' :: WikiEntry -> Text
+formatWikiEntry' = T.concat . fmap formatTemplate
+
+-- https://directory.fsf.org/wiki/Template:Entry
+wtEntry :: PackageInfo -> WikiTemplate
+wtEntry package =
+ WikiTemplate "Entry"
+ -- this may cause problems some times, like 0ad-data which is a
+ -- separate package from 0ad, but with the same upstream name (0ad),
+ -- and can cause overwrite of 0ad info
+ [ ("Name", fromMaybe (piPackage package) (piUpstreamName package)),
+ ("Short description", piShortDesc package),
+ ("Full description", piFullDesc package),
+ ("Homepage", fromMaybe "" $ piHomepage package),
+ ("Computer languages", formatImplLangs package),
+ ("Version identifier", piVersion package),
+ ("Version download", getDlUrl package),
+ ("Submitted by", "Debian import"),
+ ("Submitted date", T.pack $ show $ utctDay $ piTimestamp package)
+ ]
+
+formatImplLangs :: PackageInfo -> Text
+formatImplLangs package =
+ T.intercalate "," $ catMaybes $ getLang <$> tagList
+ where tags = piTags package
+ tagList = filter (\tag -> head tag == "implemented-in") (T.splitOn "::" <$> tags)
+ getLang xs = xs !? 1
+
+getDlUrl :: PackageInfo -> Text
+getDlUrl package =
+ T.concat
+ ["http://ftp.debian.org/debian/pool/main/"
+ , if T.isPrefixOf "lib" name then T.take 4 name else T.take 1 name
+ , "/", name, "/", file]
+ where
+ name = piPackage package
+ file = piFile package
+
+-- TODO
+-- https://directory.fsf.org/wiki/Template:Software_category
+{-
+{{Software category
+|Game=game::
+|Interface=interface::
+|Protocol=protocol::
+|Use=use::
+...
+}}
+-}
+
+-- https://directory.fsf.org/wiki/Template:Import
+makeLink :: Text -> Text
+makeLink name = T.concat ["http://packages.debian.org/stable/", name]
+
+wtImport :: PackageInfo -> WikiTemplate
+wtImport package =
+ WikiTemplate
+ "Import"
+ [("Source", "Debian"), ("Source link", makeLink $ piPackage package),
+ ("Date", T.pack $ show $ utctDay $ piTimestamp package)]
+
+-- https://directory.fsf.org/wiki/Template:Project_license
+wtLicenses :: PackageInfo -> [WikiTemplate]
+wtLicenses package =
+ wtLicense (piCLAuthor package)
+ (T.pack $ show $ utctDay $ piCLTimestamp package) <$>
+ (piLicenses package)
+
+wtLicense :: Text -> Text -> LicenseInfo -> WikiTemplate
+wtLicense author time info =
+ WikiTemplate
+ "Project license"
+ [ ("License", lLicense info),
+ ("License copyright", fromMaybe "" $ lCopyright info),
+ ("License verified by", author),
+ ("License verified date", time),
+ ("License note", fromMaybe "" $ lNote info)
+ ]
+
+-- https://directory.fsf.org/wiki/Template:Person
+data WTPerson = WTPerson
+ { wpName :: Text,
+ wpRole :: Text,
+ wpEmail :: Text
+ }
+
+wtPerson :: Contact -> WikiTemplate
+wtPerson (Contact name email) =
+ WikiTemplate
+ "Person"
+ [("Real name", fromMaybe "" name), ("Role", "contact"), ("Email", email)]
+
+wtPersons :: PackageInfo -> [WikiTemplate]
+wtPersons info = wtPerson <$> piContacts info
+
+-- https://directory.fsf.org/wiki/Template:Resource
+wtResources :: PackageInfo -> [WikiTemplate]
+wtResources package =
+ (\url -> WikiTemplate "Resource"
+ [("Resource kind", "Download"), ("Resource URL", url)]) <$>
+ piSources package