From b454da8bf8f76e0c75a751a1a6cb93f480305199 Mon Sep 17 00:00:00 2001 From: Yuchen Pei Date: Mon, 30 May 2022 13:31:49 +1000 Subject: several enhancements - logging - sorting by date - adding enclosure - deterministic message id using md5 --- src/F2Md/Config.hs | 34 +++++++++++-------------- src/F2Md/Import.hs | 73 ++++++++++++++++++++++++++++-------------------------- src/Main.hs | 17 ++++++++++--- 3 files changed, 66 insertions(+), 58 deletions(-) (limited to 'src') diff --git a/src/F2Md/Config.hs b/src/F2Md/Config.hs index ef209dc..46a9a75 100644 --- a/src/F2Md/Config.hs +++ b/src/F2Md/Config.hs @@ -4,8 +4,10 @@ module F2Md.Config ( getUserdata , FeedUserdata(..) , updateLastUpdated + , updateLastUpdated' ) where +import Control.Monad import Data.List import F2Md.Utils import F2Md.Types @@ -48,24 +50,18 @@ getLastUpdatedMap :: FilePath -> IO (HM.HashMap Text ZonedTime) getLastUpdatedMap dbPath = fromMaybe (HM.fromList []) <$> (decodeFileStrict =<< expandPath dbPath) -updateLastUpdated :: FilePath -> [FeedUserdata] -> [[Message]] -> IO () -updateLastUpdated path feeds feedMsgs = do +updateLastUpdated' :: FilePath -> FeedUserdata -> [Message] -> IO () +updateLastUpdated' path feed msgs = + unless (null msgs) $ do decoded <- getLastUpdatedMap path - encodeFile path $ foldl' - (\hm (feed, msgs) -> - case msgs of - [] -> hm - h : _ -> HM.insert (fuUrl feed) - (utcToZonedTime (zonedTimeZone $ mDate h) . maximum $ + encodeFile path $ + HM.insert + (fuUrl feed) + (utcToZonedTime (zonedTimeZone $ mDate $ head msgs) . maximum $ (zonedTimeToUTC . mDate) <$> msgs) - hm) - decoded (zip feeds feedMsgs) - -- HM.fromList $ - -- mapMaybe - -- (\(feed, msgs) -> - -- case msgs of - -- [] -> fuLastUpdated feed >>= \date -> return (fuUrl feed, date) - -- h : _ -> - -- Just (fuUrl feed, - -- utcToZonedTime (zonedTimeZone $ mDate h) . maximum $ - -- (zonedTimeToUTC . mDate) <$> msgs)) $ zip feeds feedMsgs + decoded + +updateLastUpdated :: FilePath -> [FeedUserdata] -> [[Message]] -> IO () +updateLastUpdated path feeds feedMsgs = do + mapM_ (\(feed, msgs) -> updateLastUpdated' path feed msgs) + (zip feeds feedMsgs) diff --git a/src/F2Md/Import.hs b/src/F2Md/Import.hs index 3ed4d2f..8dfdb40 100644 --- a/src/F2Md/Import.hs +++ b/src/F2Md/Import.hs @@ -1,7 +1,9 @@ {-# LANGUAGE OverloadedStrings #-} -module F2Md.Import (toMessagesFromFile, toMessagesFromUrl) where +module F2Md.Import (toMessagesFromUrl) where +import Data.List +import Data.Text.Encoding import Control.Monad import Data.Time import System.Process @@ -19,22 +21,16 @@ import Data.XML.Types import Text.Feed.Import import Text.Feed.Types import F2Md.Types - -toMessagesFromFile :: Text -> FilePath -> IO [Message] -toMessagesFromFile url file = do - feed <- parseFeedFromFile file - case feed of - Nothing -> return [] - Just feed' -> - mapM (\msg -> toMessage (toFeedMetadata feed' url) msg) (getFeedItems feed') +import Crypto.Hash toMessagesFromUrl :: Text -> Maybe ZonedTime -> IO [Message] toMessagesFromUrl url after = do feed <- parseFeedString <$> (fetchFeed $ T.unpack url) case feed of Nothing -> return [] - Just feed' -> catMaybes <$> - mapM (\msg -> toMessage' after (toFeedMetadata feed' url) msg) (getFeedItems feed') + Just feed' -> return $ + sortOn (\msg -> zonedTimeToUTC (mDate msg)) $ catMaybes $ + toMessage' after (toFeedMetadata feed' url) <$> getFeedItems feed' wgetCommand :: String wgetCommand = "wget -O- 2>/dev/null" @@ -42,26 +38,31 @@ wgetCommand = "wget -O- 2>/dev/null" fetchFeed :: String -> IO String fetchFeed url = readCreateProcess (shell $ wgetCommand ++ " " ++ url) "" -toMessage' :: Maybe ZonedTime -> FeedMetadata -> Item -> IO (Maybe Message) +toMessage' :: Maybe ZonedTime -> FeedMetadata -> Item -> Maybe Message toMessage' after feed item - | isNothing date = return Nothing - | isNothing after = toMessage feed item >>= return . Just - | zonedTimeToUTC (fromJust after) >= fromJust date = return Nothing - | otherwise = toMessage feed item >>= return . Just + | isNothing date = Nothing + | isNothing after = Just $ toMessage feed item + | zonedTimeToUTC (fromJust after) >= fromJust date = Nothing + | otherwise = Just $ toMessage feed item where date = join $ getItemPublishDate item -toMessage :: FeedMetadata -> Item -> IO Message -toMessage (FeedMetadata title author home url) item = do - messageId <- genMessageId (fromMaybe "" home) (snd <$> getItemId item) - return $ Message +toMessage :: FeedMetadata -> Item -> Message +toMessage (FeedMetadata title author home url) item = + Message (fromMaybe title $ orElse (getItemAuthor' item) author) (fromMaybe "Untitled" $ getItemTitle item) (fromJust $ join $ getItemPublishDate item) -- default should be current date messageId - (formatBody (getItemLink item) $ - (getItemContent item) `orElse` (getItemContent' item) `orElse` - (getItemDescription item)) + body url + where body = + (formatBody (getItemLink item) (getItemEnclosure item) $ + (getItemContent item) `orElse` (getItemContent' item) `orElse` + (getItemDescription item)) + messageId = + "<" <> (T.pack $ show $ hashWith MD5 (encodeUtf8 body)) <> "@" <> + (T.map hyphenateNonAlNum url) <> ".gnu" <> ">" + getItemAuthor' :: Item -> Maybe Text getItemAuthor' item = case getItemAuthor item of @@ -76,25 +77,27 @@ getItemContent' (RSSItem item) = $ rssItemOther item getItemContent' _ = Nothing -formatBody :: Maybe Text -> Maybe Text -> Text -formatBody link desc = +formatBody :: Maybe Text -> Maybe (Text, Maybe Text, Maybe Integer) -> Maybe Text -> Text +formatBody link encl desc = let linkText = case link of Nothing -> "" - Just link' -> "Link: " <> link' <> "\n\n" in - linkText <> (fromMaybe "" desc) + Just link' -> "Link: <" <> link' <> ">\n\n" + enclText = case encl of + Nothing -> "" + Just (url, Nothing, Nothing) -> urlPt url <> "\n\n" + Just (url, Just ty, Nothing) -> urlPt url <> " (" <> ty <> ")\n\n" + Just (url, Nothing, Just len) -> urlPt url <> " (" <> lenPt len <> ")\n\n" + Just (url, Just ty, Just len) -> + urlPt url <> " (" <> ty <> ", " <> lenPt len <> ")\n\n" + where urlPt url = "Enclosure: <" <> url <> ">" + lenPt len = (T.pack $ show len) <> " bytes" + in + linkText <> enclText <> (fromMaybe "" desc) toFeedMetadata :: Feed -> Text -> FeedMetadata toFeedMetadata feed url = FeedMetadata (getFeedTitle feed) (getFeedAuthor feed) (getFeedHome feed) url -genMessageId :: Text -> Maybe Text -> IO Text -genMessageId feedLink guid = do - id <- case guid of - Just guid' -> return $ T.map hyphenateNonAlNum guid' - Nothing -> toText <$> nextRandom - return $ - "<" <> id <> "@" <> (T.map hyphenateNonAlNum feedLink) <> ".gnu" <> ">" - hyphenateNonAlNum :: Char -> Char hyphenateNonAlNum c | isAlphaNum c = c diff --git a/src/Main.hs b/src/Main.hs index 133d155..ec602cc 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,5 +1,8 @@ {-# LANGUAGE OverloadedStrings #-} +import qualified Data.Text as T +import Control.Monad.IO.Class +import Control.Monad.Logger import System.Directory import Control.Monad.Extra import F2Md.Types @@ -8,13 +11,19 @@ import F2Md.Export import F2Md.Config import System.FilePath +-- TODO: update db on interruption with the latest date +processFeed :: FilePath -> Maybe FilePath -> FeedUserdata -> LoggingT IO () +processFeed root dbPath feed = do + logInfoN $ "Fetching " <> (fuUrl feed) <> "..." + msgs <- liftIO $ toMessagesFromUrl (fuUrl feed) (fuLastUpdated feed) + logInfoN $ "Writing " <> (T.pack $ show $ length msgs) <> " messages..." + liftIO $ mapM_ (writeMessage root) msgs + whenJust dbPath $ \dbPath' -> liftIO $ updateLastUpdated' dbPath' feed msgs + main :: IO () main = do (feeds, maildir, dbPath) <- getUserdata "./.f2m.json" whenJust maildir $ \root -> do mapM_ (createDirectoryIfMissing True) [root "new", root "cur", root "tmp"] - feedMessages <- sequence $ - (\feed -> toMessagesFromUrl (fuUrl feed) (fuLastUpdated feed)) <$> feeds - mapM_ (writeMessage root) $ concat feedMessages - whenJust dbPath $ \dbPath' -> updateLastUpdated dbPath' feeds feedMessages + mapM_ (runStdoutLoggingT . processFeed root dbPath) feeds -- cgit v1.2.3