1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
{-
Copyright (C) 2022 Yuchen Pei.
This file is part of f2md.
f2md is free software: you can redistribute it and/or modify it under
the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
f2md is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
Public License for more details.
You should have received a copy of the GNU Affero General Public
License along with f2md. If not, see <https://www.gnu.org/licenses/>.
-}
{-# LANGUAGE OverloadedStrings #-}
module F2Md.Import (toMessagesFromUrl) where
import Data.List
import Data.Text.Encoding
import Control.Monad
import Data.Time
import System.Process
import Data.UUID
import Data.UUID.V4
import Data.Char
import Data.Text (Text)
import Text.Feed.Types
import qualified Data.Text as T
import Data.Maybe
import Text.Feed.Query
import Data.Generics
import Text.RSS.Syntax hiding (RSSItem)
import Data.XML.Types
import Text.Feed.Import
import Text.Feed.Types
import F2Md.Types
import Crypto.Hash
toMessagesFromUrl :: Text -> Maybe ZonedTime -> IO [Message]
toMessagesFromUrl url after = do
feed <- parseFeedString <$> (fetchFeed $ T.unpack url)
case feed of
Nothing -> return []
Just feed' -> return $
sortOn (\msg -> zonedTimeToUTC (mDate msg)) $ catMaybes $
toMessage' after (toFeedMetadata feed' url) <$> getFeedItems feed'
wgetCommand :: String
wgetCommand = "wget -O- 2>/dev/null"
fetchFeed :: String -> IO String
fetchFeed url = readCreateProcess (shell $ wgetCommand ++ " " ++ url) ""
toMessage' :: Maybe ZonedTime -> FeedMetadata -> Item -> Maybe Message
toMessage' after feed item
| isNothing date = Nothing
| isNothing after = Just $ toMessage feed item
| zonedTimeToUTC (fromJust after) >= fromJust date = Nothing
| otherwise = Just $ toMessage feed item
where date = join $ getItemPublishDate item
toMessage :: FeedMetadata -> Item -> Message
toMessage (FeedMetadata title author home url) item =
Message
(fromMaybe title $ orElse (getItemAuthor' item) author)
(fromMaybe "Untitled" $ getItemTitle item)
(fromJust $ join $ getItemPublishDate item) -- default should be current date
messageId
body
url
where body =
(formatBody (getItemLink item) (getItemEnclosure item) $
(getItemContent item) `orElse` (getItemContent' item) `orElse`
(getItemDescription item))
messageId =
"<" <> (T.pack $ show $ hashWith MD5 (encodeUtf8 body)) <> "@" <>
(T.map hyphenateNonAlNum url) <> ".gnu" <> ">"
getItemAuthor' :: Item -> Maybe Text
getItemAuthor' item = case getItemAuthor item of
Just "" -> Nothing
Just author -> Just author
Nothing -> Nothing
getItemContent' :: Item -> Maybe Text
getItemContent' (RSSItem item) =
(fmap (T.concat . elementText) . listToMaybe .
filter (\e -> namePrefix (elementName e) == Just "content"))
$ rssItemOther item
getItemContent' _ = Nothing
formatBody :: Maybe Text -> Maybe (Text, Maybe Text, Maybe Integer) -> Maybe Text -> Text
formatBody link encl desc =
let linkText = case link of
Nothing -> ""
Just link' -> "Link: <" <> link' <> ">\n\n"
enclText = case encl of
Nothing -> ""
Just (url, Nothing, Nothing) -> urlPt url <> "\n\n"
Just (url, Just ty, Nothing) -> urlPt url <> " (" <> ty <> ")\n\n"
Just (url, Nothing, Just len) -> urlPt url <> " (" <> lenPt len <> ")\n\n"
Just (url, Just ty, Just len) ->
urlPt url <> " (" <> ty <> ", " <> lenPt len <> ")\n\n"
where urlPt url = "Enclosure: <" <> url <> ">"
lenPt len = (T.pack $ show len) <> " bytes"
in
linkText <> enclText <> (fromMaybe "" desc)
toFeedMetadata :: Feed -> Text -> FeedMetadata
toFeedMetadata feed url =
FeedMetadata (getFeedTitle feed) (getFeedAuthor feed) (getFeedHome feed) url
hyphenateNonAlNum :: Char -> Char
hyphenateNonAlNum c
| isAlphaNum c = c
| otherwise = '-'
|