aboutsummaryrefslogtreecommitdiff
path: root/src/FSD/Types.hs
blob: 6ac6c12fee078b6cf5ace731f608c381398a76e4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
{-
Copyright (C) 2022 Yuchen Pei.

This file is part of fsd.

fsd is free software: you can redistribute it and/or modify it under
the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

fsd is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General
Public License for more details.

You should have received a copy of the GNU Affero General Public
License along with fsd.  If not, see <https://www.gnu.org/licenses/>.

-}

{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleInstances #-}

module FSD.Types where

import Data.Maybe
import Data.Time
import Data.Text (Text)
import Data.Text qualified as T
import Database.SQLite.Simple
import Database.SQLite.Simple.FromField

{- Source
- obtained from dist/Sources.gz
- sPackage, sVersion: key, package and version, also used for {{Entry|Version identifier, Version download and {{Import|Source link and Source packages
- there's also Uploaders field in Source, but it is not as good as the changelog which is more precise on the version
-}
data Source = Source
  {sPackage :: Text, sVersion :: Text, sFile :: Text, sHomepage :: Maybe Text}
  deriving (Show)

instance ToRow Source where
  toRow (Source a b c d) = toRow (a, b, c, d)

instance FromRow Source where
  fromRow = Source <$> field <*> field <*> field <*> field

{- Package
- obtained from dist/Packages.gz
- pPackage, pVersion: key, package and version
  - comes from Package and Version
- homepage: used for {{Entry|Homepage URL
  - comes from Homepage
  - also available in Sources.gz
- tags: used for {{Entry|Computer languages (implementing lang)
  - can also be used for use, works-with etc.
-}

data Package = Package
  { pPackage :: Text,
    pVersion :: Text,
    pHomepage :: Maybe Text,
    pTags :: Tags
  }
  deriving (Show)

newtype Tags = Tags {unTags :: [Text]} deriving (Show, Eq)

instance ToRow Package where
  toRow (Package a b c d) = toRow (a, b, c, T.intercalate "," $ unTags d)

instance FromRow Package where
  fromRow = Package <$> field <*> field <*> field <*> field

instance FromField Tags where
  fromField f = Tags <$> T.splitOn "," <$> fromField f

{- Translation
- obtained from dist
- tPackage: key, package name
  - comes from Package
- (shortDesc, fullDesc) are for {{Entry|Short description and {{Entry|Full description
  - comes from Description-en.syn and .full (formatted text with syn)
-}
data Translation = Translation
  { tPackage :: Text,
    tShortDesc :: Text,
    tFullDesc :: Text
  }
  deriving (Show)

instance ToRow Translation where
  toRow (Translation a b c) = toRow (a, b, c)

instance FromRow Translation where
  fromRow = Translation <$> field <*> field <*> field

{- ChangeLog
- obtained from metaftp
- the latest changelog entry
- clPacakge and clVersion are keys, ignores on mismatch
- author is used for {{Project license|License verified by
- timeStamp for {{Project license|License verified date
-}
data FSDChangeLogEntry = FSDChangeLogEntry
  { clPackage :: Text,
    clVersion :: Text,
    clAuthor :: Text,
    clTimestamp :: UTCTime
  }
  deriving (Show)

instance ToRow FSDChangeLogEntry where
  toRow (FSDChangeLogEntry a b c d) = toRow (a, b, c, d)

instance FromRow FSDChangeLogEntry where
  fromRow = FSDChangeLogEntry <$> field <*> field <*> field <*> field

-- copyright types

{-
- upstreamName is used for {{Entry|Name
  - stores upstream package name (canonical name);
  - comes from Upstream-Name; defined to be name used by upstream;
  - some copyright files use Upstream-Name for contacts (but not often), which is rather hard to detect ("Foo Library" and "John Doe" are of the same format"), so we take it at face value
- contacts is used for {{Person|Real name and {{Person|Email with Role=contact
  - stores contact name and email address;
  - comes from Upstream-Contact (line-based list)
  - may contain urls which are ignored
- sources is used for {{Resource|Resource URL with {{Resource|Resource kind=Download
  - stores resource urls with download kind;
  - comes from Source (formatted text with no synopsis), in practice all in url form
  - so we parse it as whitespace separated list of urls
-}

data Contact = Contact
  { coName :: Maybe Text,
    coEmail :: Text
  }
  deriving (Show)

serializeContacts :: [Contact] -> Text
serializeContacts = T.intercalate ";" . fmap serializeContact

serializeContact :: Contact -> Text
serializeContact (Contact name email) =
  T.concat [(fromMaybe "" name), ",", email]

deserializeContacts :: Text -> [Contact]
deserializeContacts contacts =
  if T.null contacts
  then []
  else catMaybes $ deserializeContact <$> T.splitOn ";" contacts

deserializeContact :: Text -> Maybe Contact
deserializeContact raw =
  case T.splitOn "," raw of
    [_, ""] -> Nothing
    [name, email] ->
      Just $ Contact (if T.null name then Nothing else Just name) email
    otherwise -> Nothing

data Upstream = Upstream
  { uPackage :: Text,
    uName :: Maybe Text,
    uContacts :: [Contact],
    uSources :: SourceUrls
  }
  deriving (Show)

newtype SourceUrls = SourceUrls {unSources :: [Text]} deriving (Eq, Show)

instance ToRow Upstream where
  toRow (Upstream a b contacts sources) =
    toRow
      ( a,
        b,
        serializeContacts contacts,
        T.intercalate "\n" $ unSources sources
      )

instance FromRow Upstream where
  fromRow = Upstream <$> field <*> field <*> field <*> field

instance FromField SourceUrls where
  fromField f = SourceUrls <$> T.splitOn "\n" <$> fromField f

instance FromField [Contact] where
  fromField f = deserializeContacts <$> fromField f

{- License Information
- we need to filter out debian/* Files
- copyright is used for {{Project license|License copyright
  - comes from Files.Copyright and Header.Copyright (formatted text no syn)
- license is used for {{Project license|License
  - comes from Files.License.syn and Header.License.syn (formatted text with syn)
  - we treat multi-licensing license as one (e.g. "GPL-2+ or MPL" is a valid license field)
- note is used for {{Project license|License note
  - comes from Files.License.full and Header.License.full (formatted text with syn), or in case of missing fulls, License.full (formatted text with syn) for matching syn
-}
data LicenseInfo = LicenseInfo
  { lCopyright :: Maybe Text,
    lLicense :: Text,
    lNote :: Maybe Text
  }
  deriving (Show)

instance FromRow LicenseInfo where
  fromRow = LicenseInfo <$> field <*> field <*> field

{- Obtained from copyright files
- formatted as https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
- nonformatted files are ignored
-}
data Copyright = Copyright
  { crPackage :: Text,
    crLicenses :: [LicenseInfo]
  }
  deriving (Show)

data PackageInfo = PackageInfo
  { piPackage :: Text,
    piVersion :: Text,
    piTimestamp :: UTCTime, -- time the PackageInfo is created
    piFile :: Text,
    piHomepage :: Maybe Text,
    piTags :: [Text],
    piShortDesc :: Text,
    piFullDesc :: Text,
    piUpstreamName :: Maybe Text,
    piContacts :: [Contact],
    piSources :: [Text],
    piLicenses :: [LicenseInfo],
    piCLAuthor :: Text,
    piCLTimestamp :: UTCTime
  }
  deriving (Show)

{- TODO
- [X] refactor Copyright into CRHeader (with a chPackage) and CRLicense (with a crPackage)
- refactor so that homepage comes from source overriding package
- refactor so that shordesc from translations overriding source overriding package
- refactor so that things get assembled into a big ass object writing to various entries
- add a (Debian changelog author) to license verifier
- make sure the db creation works as expected (especially primary key on conflict replace
- utilise indexes in db
-}