From caf354c7e5bcb5142d8b4358824d22d3de122f34 Mon Sep 17 00:00:00 2001 From: Yuchen Pei Date: Fri, 27 May 2022 18:57:24 +1000 Subject: break apart the code, timestamp read imp. --- src/F2Md/Import.hs | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 src/F2Md/Import.hs (limited to 'src/F2Md/Import.hs') diff --git a/src/F2Md/Import.hs b/src/F2Md/Import.hs new file mode 100644 index 0000000..ee68578 --- /dev/null +++ b/src/F2Md/Import.hs @@ -0,0 +1,102 @@ +{-# LANGUAGE OverloadedStrings #-} + +module F2Md.Import (toMessagesFromFile, toMessagesFromUrl) where + +import Control.Monad +import Data.Time +import System.Process +import Data.UUID +import Data.UUID.V4 +import Data.Char +import Data.Text (Text) +import Text.Feed.Types +import qualified Data.Text as T +import Data.Maybe +import Text.Feed.Query +import Data.Generics +import Text.RSS.Syntax hiding (RSSItem) +import Data.XML.Types +import Text.Feed.Import +import Text.Feed.Types +import F2Md.Types + +toMessagesFromFile :: FilePath -> IO [Message] +toMessagesFromFile file = do + feed <- parseFeedFromFile file + case feed of + Nothing -> return [] + Just feed' -> + mapM (\msg -> toMessage (toFeedMetadata feed') msg) (getFeedItems feed') + +toMessagesFromUrl :: Text -> Maybe ZonedTime -> IO [Message] +toMessagesFromUrl url after = do + feed <- parseFeedString <$> (fetchFeed $ T.unpack url) + case feed of + Nothing -> return [] + Just feed' -> catMaybes <$> + mapM (\msg -> toMessage' after (toFeedMetadata feed') msg) (getFeedItems feed') + +wgetCommand :: String +wgetCommand = "wget -O- 2>/dev/null" + +fetchFeed :: String -> IO String +fetchFeed url = readCreateProcess (shell $ wgetCommand ++ " " ++ url) "" + +toMessage' :: Maybe ZonedTime -> FeedMetadata -> Item -> IO (Maybe Message) +toMessage' after feed item + | isNothing after = toMessage feed item >>= return . Just + | isNothing date || zonedTimeToUTC (fromJust after) >= fromJust date = + return Nothing + | otherwise = toMessage feed item >>= return . Just + where date = join $ getItemPublishDate item + +toMessage :: FeedMetadata -> Item -> IO Message +toMessage (FeedMetadata title author home) item = do + messageId <- genMessageId (fromMaybe "" home) (snd <$> getItemId item) + return $ Message + (fromMaybe title $ orElse (getItemAuthor' item) author) + (fromMaybe "Untitled" $ getItemTitle item) + (fromMaybe "" $ getItemDate item) -- default should be current date + messageId + (formatBody (getItemLink item) $ + (getItemContent item) `orElse` (getItemContent' item) `orElse` + (getItemDescription item)) +-- title + +getItemAuthor' :: Item -> Maybe Text +getItemAuthor' item = case getItemAuthor item of + Just "" -> Nothing + Just author -> Just author + Nothing -> Nothing + +getItemContent' :: Item -> Maybe Text +getItemContent' (RSSItem item) = + (fmap (T.concat . elementText) . listToMaybe . + filter (\e -> namePrefix (elementName e) == Just "content")) + $ rssItemOther item +getItemContent' _ = Nothing + +formatBody :: Maybe Text -> Maybe Text -> Text +formatBody link desc = + let linkText = case link of + Nothing -> "" + Just link' -> "Link: " <> link' <> "\n\n" in + linkText <> (fromMaybe "" desc) + +toFeedMetadata :: Feed -> FeedMetadata +toFeedMetadata feed = + FeedMetadata (getFeedTitle feed) (getFeedAuthor feed) (getFeedHome feed) + +genMessageId :: Text -> Maybe Text -> IO Text +genMessageId feedLink guid = do + id <- case guid of + Just guid' -> return $ T.map hyphenateNonAlNum guid' + Nothing -> toText <$> nextRandom + return $ + "<" <> id <> "@" <> (T.map hyphenateNonAlNum feedLink) <> ".gnu" <> ">" + +hyphenateNonAlNum :: Char -> Char +hyphenateNonAlNum c + | isAlphaNum c = c + | otherwise = '-' + -- cgit v1.2.3