diff options
Diffstat (limited to 'src/FSD/Types.hs')
-rw-r--r-- | src/FSD/Types.hs | 248 |
1 files changed, 248 insertions, 0 deletions
diff --git a/src/FSD/Types.hs b/src/FSD/Types.hs new file mode 100644 index 0000000..6ac6c12 --- /dev/null +++ b/src/FSD/Types.hs @@ -0,0 +1,248 @@ +{- +Copyright (C) 2022 Yuchen Pei. + +This file is part of fsd. + +fsd is free software: you can redistribute it and/or modify it under +the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +fsd is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General +Public License for more details. + +You should have received a copy of the GNU Affero General Public +License along with fsd. If not, see <https://www.gnu.org/licenses/>. + +-} + +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleInstances #-} + +module FSD.Types where + +import Data.Maybe +import Data.Time +import Data.Text (Text) +import Data.Text qualified as T +import Database.SQLite.Simple +import Database.SQLite.Simple.FromField + +{- Source +- obtained from dist/Sources.gz +- sPackage, sVersion: key, package and version, also used for {{Entry|Version identifier, Version download and {{Import|Source link and Source packages +- there's also Uploaders field in Source, but it is not as good as the changelog which is more precise on the version +-} +data Source = Source + {sPackage :: Text, sVersion :: Text, sFile :: Text, sHomepage :: Maybe Text} + deriving (Show) + +instance ToRow Source where + toRow (Source a b c d) = toRow (a, b, c, d) + +instance FromRow Source where + fromRow = Source <$> field <*> field <*> field <*> field + +{- Package +- obtained from dist/Packages.gz +- pPackage, pVersion: key, package and version + - comes from Package and Version +- homepage: used for {{Entry|Homepage URL + - comes from Homepage + - also available in Sources.gz +- tags: used for {{Entry|Computer languages (implementing lang) + - can also be used for use, works-with etc. +-} + +data Package = Package + { pPackage :: Text, + pVersion :: Text, + pHomepage :: Maybe Text, + pTags :: Tags + } + deriving (Show) + +newtype Tags = Tags {unTags :: [Text]} deriving (Show, Eq) + +instance ToRow Package where + toRow (Package a b c d) = toRow (a, b, c, T.intercalate "," $ unTags d) + +instance FromRow Package where + fromRow = Package <$> field <*> field <*> field <*> field + +instance FromField Tags where + fromField f = Tags <$> T.splitOn "," <$> fromField f + +{- Translation +- obtained from dist +- tPackage: key, package name + - comes from Package +- (shortDesc, fullDesc) are for {{Entry|Short description and {{Entry|Full description + - comes from Description-en.syn and .full (formatted text with syn) +-} +data Translation = Translation + { tPackage :: Text, + tShortDesc :: Text, + tFullDesc :: Text + } + deriving (Show) + +instance ToRow Translation where + toRow (Translation a b c) = toRow (a, b, c) + +instance FromRow Translation where + fromRow = Translation <$> field <*> field <*> field + +{- ChangeLog +- obtained from metaftp +- the latest changelog entry +- clPacakge and clVersion are keys, ignores on mismatch +- author is used for {{Project license|License verified by +- timeStamp for {{Project license|License verified date +-} +data FSDChangeLogEntry = FSDChangeLogEntry + { clPackage :: Text, + clVersion :: Text, + clAuthor :: Text, + clTimestamp :: UTCTime + } + deriving (Show) + +instance ToRow FSDChangeLogEntry where + toRow (FSDChangeLogEntry a b c d) = toRow (a, b, c, d) + +instance FromRow FSDChangeLogEntry where + fromRow = FSDChangeLogEntry <$> field <*> field <*> field <*> field + +-- copyright types + +{- +- upstreamName is used for {{Entry|Name + - stores upstream package name (canonical name); + - comes from Upstream-Name; defined to be name used by upstream; + - some copyright files use Upstream-Name for contacts (but not often), which is rather hard to detect ("Foo Library" and "John Doe" are of the same format"), so we take it at face value +- contacts is used for {{Person|Real name and {{Person|Email with Role=contact + - stores contact name and email address; + - comes from Upstream-Contact (line-based list) + - may contain urls which are ignored +- sources is used for {{Resource|Resource URL with {{Resource|Resource kind=Download + - stores resource urls with download kind; + - comes from Source (formatted text with no synopsis), in practice all in url form + - so we parse it as whitespace separated list of urls +-} + +data Contact = Contact + { coName :: Maybe Text, + coEmail :: Text + } + deriving (Show) + +serializeContacts :: [Contact] -> Text +serializeContacts = T.intercalate ";" . fmap serializeContact + +serializeContact :: Contact -> Text +serializeContact (Contact name email) = + T.concat [(fromMaybe "" name), ",", email] + +deserializeContacts :: Text -> [Contact] +deserializeContacts contacts = + if T.null contacts + then [] + else catMaybes $ deserializeContact <$> T.splitOn ";" contacts + +deserializeContact :: Text -> Maybe Contact +deserializeContact raw = + case T.splitOn "," raw of + [_, ""] -> Nothing + [name, email] -> + Just $ Contact (if T.null name then Nothing else Just name) email + otherwise -> Nothing + +data Upstream = Upstream + { uPackage :: Text, + uName :: Maybe Text, + uContacts :: [Contact], + uSources :: SourceUrls + } + deriving (Show) + +newtype SourceUrls = SourceUrls {unSources :: [Text]} deriving (Eq, Show) + +instance ToRow Upstream where + toRow (Upstream a b contacts sources) = + toRow + ( a, + b, + serializeContacts contacts, + T.intercalate "\n" $ unSources sources + ) + +instance FromRow Upstream where + fromRow = Upstream <$> field <*> field <*> field <*> field + +instance FromField SourceUrls where + fromField f = SourceUrls <$> T.splitOn "\n" <$> fromField f + +instance FromField [Contact] where + fromField f = deserializeContacts <$> fromField f + +{- License Information +- we need to filter out debian/* Files +- copyright is used for {{Project license|License copyright + - comes from Files.Copyright and Header.Copyright (formatted text no syn) +- license is used for {{Project license|License + - comes from Files.License.syn and Header.License.syn (formatted text with syn) + - we treat multi-licensing license as one (e.g. "GPL-2+ or MPL" is a valid license field) +- note is used for {{Project license|License note + - comes from Files.License.full and Header.License.full (formatted text with syn), or in case of missing fulls, License.full (formatted text with syn) for matching syn +-} +data LicenseInfo = LicenseInfo + { lCopyright :: Maybe Text, + lLicense :: Text, + lNote :: Maybe Text + } + deriving (Show) + +instance FromRow LicenseInfo where + fromRow = LicenseInfo <$> field <*> field <*> field + +{- Obtained from copyright files +- formatted as https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +- nonformatted files are ignored +-} +data Copyright = Copyright + { crPackage :: Text, + crLicenses :: [LicenseInfo] + } + deriving (Show) + +data PackageInfo = PackageInfo + { piPackage :: Text, + piVersion :: Text, + piTimestamp :: UTCTime, -- time the PackageInfo is created + piFile :: Text, + piHomepage :: Maybe Text, + piTags :: [Text], + piShortDesc :: Text, + piFullDesc :: Text, + piUpstreamName :: Maybe Text, + piContacts :: [Contact], + piSources :: [Text], + piLicenses :: [LicenseInfo], + piCLAuthor :: Text, + piCLTimestamp :: UTCTime + } + deriving (Show) + +{- TODO +- [X] refactor Copyright into CRHeader (with a chPackage) and CRLicense (with a crPackage) +- refactor so that homepage comes from source overriding package +- refactor so that shordesc from translations overriding source overriding package +- refactor so that things get assembled into a big ass object writing to various entries +- add a (Debian changelog author) to license verifier +- make sure the db creation works as expected (especially primary key on conflict replace +- utilise indexes in db +-} |