{-
Copyright (C) 2022 Yuchen Pei.
This file is part of f2md.
f2md is free software: you can redistribute it and/or modify it under
the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
f2md is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
Public License for more details.
You should have received a copy of the GNU Affero General Public
License along with f2md. If not, see .
-}
{-# LANGUAGE OverloadedStrings #-}
module F2Md.Import (toMessagesFromUrl) where
import Data.List
import Data.Text.Encoding
import Control.Monad
import Data.Time
import System.Process
import Data.Char
import Data.Text (Text)
import Text.Feed.Types
import qualified Data.Text as T
import Data.Maybe
import Text.Feed.Query
import Data.Generics
import Text.RSS.Syntax hiding (RSSItem)
import Data.XML.Types
import Text.Feed.Import
import F2Md.Types
import Crypto.Hash
toMessagesFromUrl :: Text -> Maybe ZonedTime -> IO [Message]
toMessagesFromUrl url after = do
feed <- parseFeedString <$> fetchFeed (T.unpack url)
case feed of
Nothing -> return []
Just feed' -> return $
sortOn (zonedTimeToUTC . mDate) $ catMaybes $
toMessage' after (toFeedMetadata feed' url) <$> getFeedItems feed'
wgetCommand :: String
wgetCommand = "wget -O- 2>/dev/null"
fetchFeed :: String -> IO String
fetchFeed url = readCreateProcess (shell $ wgetCommand ++ " " ++ url) ""
toMessage' :: Maybe ZonedTime -> FeedMetadata -> Item -> Maybe Message
toMessage' after feed item
| isNothing date = Nothing
| isNothing after = Just $ toMessage feed item
| zonedTimeToUTC (fromJust after) >= fromJust date = Nothing
| otherwise = Just $ toMessage feed item
where date = join $ getItemPublishDate item
toMessage :: FeedMetadata -> Item -> Message
toMessage (FeedMetadata title author home url) item =
Message
(fromMaybe title $ orElse (getItemAuthor' item) author)
(fromMaybe "Untitled" $ getItemTitle item)
(fromJust $ join $ getItemPublishDate item) -- default should be current date
messageId
body
url
where body =
formatBody (getItemLink item) (getItemEnclosure item) $
(getItemContent item) `orElse` (getItemContent' item) `orElse`
(getItemDescription item)
messageId =
"<" <> T.pack (show $ hashWith MD5 (encodeUtf8 body)) <> "@" <>
T.map hyphenateNonAlNum url <> ".gnu" <> ">"
getItemAuthor' :: Item -> Maybe Text
getItemAuthor' item = case getItemAuthor item of
Just "" -> Nothing
Just author -> Just author
Nothing -> Nothing
getItemContent' :: Item -> Maybe Text
getItemContent' (RSSItem item) =
(fmap (T.concat . elementText) . find (\e -> namePrefix (elementName e) == Just "content"))
$ rssItemOther item
getItemContent' _ = Nothing
formatBody :: Maybe Text -> Maybe (Text, Maybe Text, Maybe Integer) -> Maybe Text -> Text
formatBody link encl desc =
let linkText = case link of
Nothing -> ""
Just link' -> " "'>Link\n"
enclText = case encl of
Nothing -> ""
Just (url, Nothing, Nothing) -> urlPt url <> "\n"
Just (url, Just ty, Nothing) -> urlPt url <> " (" <> ty <> ")\n"
Just (url, Nothing, Just len) -> urlPt url <> " (" <> lenPt len <> ")\n"
Just (url, Just ty, Just len) ->
urlPt url <> " (" <> ty <> ", " <> lenPt len <> ")\n"
where urlPt url = "Enclosure"
lenPt len = T.pack (show len) <> " bytes"
in
linkText <> enclText <> fromMaybe "" desc
toFeedMetadata :: Feed -> Text -> FeedMetadata
toFeedMetadata feed = FeedMetadata (getFeedTitle feed) (getFeedAuthor feed) (getFeedHome feed)
hyphenateNonAlNum :: Char -> Char
hyphenateNonAlNum c
| isAlphaNum c = c
| otherwise = '-'