aboutsummaryrefslogtreecommitdiff
path: root/src/F2Md/Import.hs
blob: 80dfa54b69db8b14068116cc458e3bb884db79e6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
{-
Copyright (C) 2022 Yuchen Pei.

This file is part of f2md.

f2md is free software: you can redistribute it and/or modify it under
the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

f2md is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General
Public License for more details.

You should have received a copy of the GNU Affero General Public
License along with f2md.  If not, see <https://www.gnu.org/licenses/>.

-}

{-# LANGUAGE OverloadedStrings #-}

module F2Md.Import (toMessagesFromUrl) where

import Data.List
import Data.Text.Encoding
import Control.Monad
import Data.Time
import System.Process
import Data.UUID
import Data.UUID.V4
import Data.Char
import Data.Text (Text)
import Text.Feed.Types
import qualified Data.Text as T
import Data.Maybe
import Text.Feed.Query
import Data.Generics
import Text.RSS.Syntax hiding (RSSItem)
import Data.XML.Types
import Text.Feed.Import
import F2Md.Types
import Crypto.Hash

toMessagesFromUrl :: Text -> Maybe ZonedTime -> IO [Message]
toMessagesFromUrl url after = do
  feed <- parseFeedString <$> fetchFeed (T.unpack url)
  case feed of
    Nothing -> return []
    Just feed' -> return $
      sortOn (zonedTimeToUTC . mDate) $ catMaybes $
      toMessage' after (toFeedMetadata feed' url) <$> getFeedItems feed'

wgetCommand :: String
wgetCommand = "wget -O- 2>/dev/null"

fetchFeed :: String -> IO String
fetchFeed url = readCreateProcess (shell $ wgetCommand ++ " " ++ url) ""

toMessage' :: Maybe ZonedTime -> FeedMetadata -> Item -> Maybe Message
toMessage' after feed item
  | isNothing date = Nothing
  | isNothing after = Just $ toMessage feed item
  | zonedTimeToUTC (fromJust after) >= fromJust date = Nothing
  | otherwise = Just $ toMessage feed item
  where date = join $ getItemPublishDate item

toMessage :: FeedMetadata -> Item -> Message
toMessage (FeedMetadata title author home url) item =
  Message
    (fromMaybe title $ orElse (getItemAuthor' item) author)
    (fromMaybe "Untitled" $ getItemTitle item)
    (fromJust $ join $ getItemPublishDate item) -- default should be current date
    messageId
    body
    url
  where body =
          formatBody (getItemLink item) (getItemEnclosure item) $
            (getItemContent item) `orElse` (getItemContent' item) `orElse`
            (getItemDescription item)
        messageId =
          "<" <> T.pack (show $ hashWith MD5 (encodeUtf8 body)) <> "@" <>
          T.map hyphenateNonAlNum url <> ".gnu" <> ">"


getItemAuthor' :: Item -> Maybe Text
getItemAuthor' item = case getItemAuthor item of
  Just "" -> Nothing
  Just author -> Just author
  Nothing -> Nothing

getItemContent' :: Item -> Maybe Text
getItemContent' (RSSItem item) =
  (fmap (T.concat . elementText) . find (\e -> namePrefix (elementName e) == Just "content"))
  $ rssItemOther item
getItemContent' _ = Nothing

formatBody :: Maybe Text -> Maybe (Text, Maybe Text, Maybe Integer) -> Maybe Text -> Text
formatBody link encl desc =
  let linkText = case link of
        Nothing -> ""
        Just link' -> "Link: <" <> link' <> ">\n\n"
      enclText = case encl of
        Nothing -> ""
        Just (url, Nothing, Nothing) -> urlPt url <> "\n\n"
        Just (url, Just ty, Nothing) -> urlPt url <> " (" <> ty <> ")\n\n"
        Just (url, Nothing, Just len) -> urlPt url <> " (" <> lenPt len <> ")\n\n"
        Just (url, Just ty, Just len) ->
          urlPt url <> " (" <> ty <> ", " <> lenPt len <> ")\n\n"
        where urlPt url = "Enclosure: <" <> url <> ">"
              lenPt len = T.pack (show len) <> " bytes"
  in
    linkText <> enclText <> fromMaybe "" desc

toFeedMetadata :: Feed -> Text -> FeedMetadata
toFeedMetadata feed = FeedMetadata (getFeedTitle feed) (getFeedAuthor feed) (getFeedHome feed)

hyphenateNonAlNum :: Char -> Char
hyphenateNonAlNum c
  | isAlphaNum c = c
  | otherwise = '-'