{- Copyright (C) 2022 Yuchen Pei. This file is part of f2md. f2md is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. f2md is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with f2md. If not, see . -} {-# LANGUAGE OverloadedStrings #-} module F2Md.Import (toMessagesFromUrl) where import Data.List import Data.Text.Encoding import Control.Monad import Data.Time import System.Process import Data.UUID import Data.UUID.V4 import Data.Char import Data.Text (Text) import Text.Feed.Types import qualified Data.Text as T import Data.Maybe import Text.Feed.Query import Data.Generics import Text.RSS.Syntax hiding (RSSItem) import Data.XML.Types import Text.Feed.Import import Text.Feed.Types import F2Md.Types import Crypto.Hash toMessagesFromUrl :: Text -> Maybe ZonedTime -> IO [Message] toMessagesFromUrl url after = do feed <- parseFeedString <$> (fetchFeed $ T.unpack url) case feed of Nothing -> return [] Just feed' -> return $ sortOn (\msg -> zonedTimeToUTC (mDate msg)) $ catMaybes $ toMessage' after (toFeedMetadata feed' url) <$> getFeedItems feed' wgetCommand :: String wgetCommand = "wget -O- 2>/dev/null" fetchFeed :: String -> IO String fetchFeed url = readCreateProcess (shell $ wgetCommand ++ " " ++ url) "" toMessage' :: Maybe ZonedTime -> FeedMetadata -> Item -> Maybe Message toMessage' after feed item | isNothing date = Nothing | isNothing after = Just $ toMessage feed item | zonedTimeToUTC (fromJust after) >= fromJust date = Nothing | otherwise = Just $ toMessage feed item where date = join $ getItemPublishDate item toMessage :: FeedMetadata -> Item -> Message toMessage (FeedMetadata title author home url) item = Message (fromMaybe title $ orElse (getItemAuthor' item) author) (fromMaybe "Untitled" $ getItemTitle item) (fromJust $ join $ getItemPublishDate item) -- default should be current date messageId body url where body = (formatBody (getItemLink item) (getItemEnclosure item) $ (getItemContent item) `orElse` (getItemContent' item) `orElse` (getItemDescription item)) messageId = "<" <> (T.pack $ show $ hashWith MD5 (encodeUtf8 body)) <> "@" <> (T.map hyphenateNonAlNum url) <> ".gnu" <> ">" getItemAuthor' :: Item -> Maybe Text getItemAuthor' item = case getItemAuthor item of Just "" -> Nothing Just author -> Just author Nothing -> Nothing getItemContent' :: Item -> Maybe Text getItemContent' (RSSItem item) = (fmap (T.concat . elementText) . listToMaybe . filter (\e -> namePrefix (elementName e) == Just "content")) $ rssItemOther item getItemContent' _ = Nothing formatBody :: Maybe Text -> Maybe (Text, Maybe Text, Maybe Integer) -> Maybe Text -> Text formatBody link encl desc = let linkText = case link of Nothing -> "" Just link' -> "Link: <" <> link' <> ">\n\n" enclText = case encl of Nothing -> "" Just (url, Nothing, Nothing) -> urlPt url <> "\n\n" Just (url, Just ty, Nothing) -> urlPt url <> " (" <> ty <> ")\n\n" Just (url, Nothing, Just len) -> urlPt url <> " (" <> lenPt len <> ")\n\n" Just (url, Just ty, Just len) -> urlPt url <> " (" <> ty <> ", " <> lenPt len <> ")\n\n" where urlPt url = "Enclosure: <" <> url <> ">" lenPt len = (T.pack $ show len) <> " bytes" in linkText <> enclText <> (fromMaybe "" desc) toFeedMetadata :: Feed -> Text -> FeedMetadata toFeedMetadata feed url = FeedMetadata (getFeedTitle feed) (getFeedAuthor feed) (getFeedHome feed) url hyphenateNonAlNum :: Char -> Char hyphenateNonAlNum c | isAlphaNum c = c | otherwise = '-'