aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuchen Pei <hi@ypei.me>2022-05-30 13:31:49 +1000
committerYuchen Pei <hi@ypei.me>2022-05-30 13:31:49 +1000
commitb454da8bf8f76e0c75a751a1a6cb93f480305199 (patch)
tree6379ebefd571e6db50b8f0f0b039cc5f0fbec459
parent20231d809b57c3050fb1cd812ea4c6e66cdaf6fb (diff)
several enhancements
- logging - sorting by date - adding enclosure - deterministic message id using md5
-rw-r--r--f2md.cabal2
-rw-r--r--src/F2Md/Config.hs34
-rw-r--r--src/F2Md/Import.hs73
-rw-r--r--src/Main.hs17
4 files changed, 67 insertions, 59 deletions
diff --git a/f2md.cabal b/f2md.cabal
index e7723fe..28c43ae 100644
--- a/f2md.cabal
+++ b/f2md.cabal
@@ -32,6 +32,6 @@ executable f2md
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:
build-depends:
- base, uuid, syb, feed, time, text, unix, hostname, random, filepath, directory, extra, xml-types, process, aeson, bytestring, unordered-containers
+ base, uuid, syb, feed, time, text, unix, hostname, random, filepath, directory, extra, xml-types, process, aeson, bytestring, unordered-containers, monad-logger, cryptonite
hs-source-dirs: src
default-language: Haskell2010
diff --git a/src/F2Md/Config.hs b/src/F2Md/Config.hs
index ef209dc..46a9a75 100644
--- a/src/F2Md/Config.hs
+++ b/src/F2Md/Config.hs
@@ -4,8 +4,10 @@ module F2Md.Config
( getUserdata
, FeedUserdata(..)
, updateLastUpdated
+ , updateLastUpdated'
) where
+import Control.Monad
import Data.List
import F2Md.Utils
import F2Md.Types
@@ -48,24 +50,18 @@ getLastUpdatedMap :: FilePath -> IO (HM.HashMap Text ZonedTime)
getLastUpdatedMap dbPath =
fromMaybe (HM.fromList []) <$> (decodeFileStrict =<< expandPath dbPath)
-updateLastUpdated :: FilePath -> [FeedUserdata] -> [[Message]] -> IO ()
-updateLastUpdated path feeds feedMsgs = do
+updateLastUpdated' :: FilePath -> FeedUserdata -> [Message] -> IO ()
+updateLastUpdated' path feed msgs =
+ unless (null msgs) $ do
decoded <- getLastUpdatedMap path
- encodeFile path $ foldl'
- (\hm (feed, msgs) ->
- case msgs of
- [] -> hm
- h : _ -> HM.insert (fuUrl feed)
- (utcToZonedTime (zonedTimeZone $ mDate h) . maximum $
+ encodeFile path $
+ HM.insert
+ (fuUrl feed)
+ (utcToZonedTime (zonedTimeZone $ mDate $ head msgs) . maximum $
(zonedTimeToUTC . mDate) <$> msgs)
- hm)
- decoded (zip feeds feedMsgs)
- -- HM.fromList $
- -- mapMaybe
- -- (\(feed, msgs) ->
- -- case msgs of
- -- [] -> fuLastUpdated feed >>= \date -> return (fuUrl feed, date)
- -- h : _ ->
- -- Just (fuUrl feed,
- -- utcToZonedTime (zonedTimeZone $ mDate h) . maximum $
- -- (zonedTimeToUTC . mDate) <$> msgs)) $ zip feeds feedMsgs
+ decoded
+
+updateLastUpdated :: FilePath -> [FeedUserdata] -> [[Message]] -> IO ()
+updateLastUpdated path feeds feedMsgs = do
+ mapM_ (\(feed, msgs) -> updateLastUpdated' path feed msgs)
+ (zip feeds feedMsgs)
diff --git a/src/F2Md/Import.hs b/src/F2Md/Import.hs
index 3ed4d2f..8dfdb40 100644
--- a/src/F2Md/Import.hs
+++ b/src/F2Md/Import.hs
@@ -1,7 +1,9 @@
{-# LANGUAGE OverloadedStrings #-}
-module F2Md.Import (toMessagesFromFile, toMessagesFromUrl) where
+module F2Md.Import (toMessagesFromUrl) where
+import Data.List
+import Data.Text.Encoding
import Control.Monad
import Data.Time
import System.Process
@@ -19,22 +21,16 @@ import Data.XML.Types
import Text.Feed.Import
import Text.Feed.Types
import F2Md.Types
-
-toMessagesFromFile :: Text -> FilePath -> IO [Message]
-toMessagesFromFile url file = do
- feed <- parseFeedFromFile file
- case feed of
- Nothing -> return []
- Just feed' ->
- mapM (\msg -> toMessage (toFeedMetadata feed' url) msg) (getFeedItems feed')
+import Crypto.Hash
toMessagesFromUrl :: Text -> Maybe ZonedTime -> IO [Message]
toMessagesFromUrl url after = do
feed <- parseFeedString <$> (fetchFeed $ T.unpack url)
case feed of
Nothing -> return []
- Just feed' -> catMaybes <$>
- mapM (\msg -> toMessage' after (toFeedMetadata feed' url) msg) (getFeedItems feed')
+ Just feed' -> return $
+ sortOn (\msg -> zonedTimeToUTC (mDate msg)) $ catMaybes $
+ toMessage' after (toFeedMetadata feed' url) <$> getFeedItems feed'
wgetCommand :: String
wgetCommand = "wget -O- 2>/dev/null"
@@ -42,26 +38,31 @@ wgetCommand = "wget -O- 2>/dev/null"
fetchFeed :: String -> IO String
fetchFeed url = readCreateProcess (shell $ wgetCommand ++ " " ++ url) ""
-toMessage' :: Maybe ZonedTime -> FeedMetadata -> Item -> IO (Maybe Message)
+toMessage' :: Maybe ZonedTime -> FeedMetadata -> Item -> Maybe Message
toMessage' after feed item
- | isNothing date = return Nothing
- | isNothing after = toMessage feed item >>= return . Just
- | zonedTimeToUTC (fromJust after) >= fromJust date = return Nothing
- | otherwise = toMessage feed item >>= return . Just
+ | isNothing date = Nothing
+ | isNothing after = Just $ toMessage feed item
+ | zonedTimeToUTC (fromJust after) >= fromJust date = Nothing
+ | otherwise = Just $ toMessage feed item
where date = join $ getItemPublishDate item
-toMessage :: FeedMetadata -> Item -> IO Message
-toMessage (FeedMetadata title author home url) item = do
- messageId <- genMessageId (fromMaybe "" home) (snd <$> getItemId item)
- return $ Message
+toMessage :: FeedMetadata -> Item -> Message
+toMessage (FeedMetadata title author home url) item =
+ Message
(fromMaybe title $ orElse (getItemAuthor' item) author)
(fromMaybe "Untitled" $ getItemTitle item)
(fromJust $ join $ getItemPublishDate item) -- default should be current date
messageId
- (formatBody (getItemLink item) $
- (getItemContent item) `orElse` (getItemContent' item) `orElse`
- (getItemDescription item))
+ body
url
+ where body =
+ (formatBody (getItemLink item) (getItemEnclosure item) $
+ (getItemContent item) `orElse` (getItemContent' item) `orElse`
+ (getItemDescription item))
+ messageId =
+ "<" <> (T.pack $ show $ hashWith MD5 (encodeUtf8 body)) <> "@" <>
+ (T.map hyphenateNonAlNum url) <> ".gnu" <> ">"
+
getItemAuthor' :: Item -> Maybe Text
getItemAuthor' item = case getItemAuthor item of
@@ -76,25 +77,27 @@ getItemContent' (RSSItem item) =
$ rssItemOther item
getItemContent' _ = Nothing
-formatBody :: Maybe Text -> Maybe Text -> Text
-formatBody link desc =
+formatBody :: Maybe Text -> Maybe (Text, Maybe Text, Maybe Integer) -> Maybe Text -> Text
+formatBody link encl desc =
let linkText = case link of
Nothing -> ""
- Just link' -> "Link: " <> link' <> "\n\n" in
- linkText <> (fromMaybe "" desc)
+ Just link' -> "Link: <" <> link' <> ">\n\n"
+ enclText = case encl of
+ Nothing -> ""
+ Just (url, Nothing, Nothing) -> urlPt url <> "\n\n"
+ Just (url, Just ty, Nothing) -> urlPt url <> " (" <> ty <> ")\n\n"
+ Just (url, Nothing, Just len) -> urlPt url <> " (" <> lenPt len <> ")\n\n"
+ Just (url, Just ty, Just len) ->
+ urlPt url <> " (" <> ty <> ", " <> lenPt len <> ")\n\n"
+ where urlPt url = "Enclosure: <" <> url <> ">"
+ lenPt len = (T.pack $ show len) <> " bytes"
+ in
+ linkText <> enclText <> (fromMaybe "" desc)
toFeedMetadata :: Feed -> Text -> FeedMetadata
toFeedMetadata feed url =
FeedMetadata (getFeedTitle feed) (getFeedAuthor feed) (getFeedHome feed) url
-genMessageId :: Text -> Maybe Text -> IO Text
-genMessageId feedLink guid = do
- id <- case guid of
- Just guid' -> return $ T.map hyphenateNonAlNum guid'
- Nothing -> toText <$> nextRandom
- return $
- "<" <> id <> "@" <> (T.map hyphenateNonAlNum feedLink) <> ".gnu" <> ">"
-
hyphenateNonAlNum :: Char -> Char
hyphenateNonAlNum c
| isAlphaNum c = c
diff --git a/src/Main.hs b/src/Main.hs
index 133d155..ec602cc 100644
--- a/src/Main.hs
+++ b/src/Main.hs
@@ -1,5 +1,8 @@
{-# LANGUAGE OverloadedStrings #-}
+import qualified Data.Text as T
+import Control.Monad.IO.Class
+import Control.Monad.Logger
import System.Directory
import Control.Monad.Extra
import F2Md.Types
@@ -8,13 +11,19 @@ import F2Md.Export
import F2Md.Config
import System.FilePath
+-- TODO: update db on interruption with the latest date
+processFeed :: FilePath -> Maybe FilePath -> FeedUserdata -> LoggingT IO ()
+processFeed root dbPath feed = do
+ logInfoN $ "Fetching " <> (fuUrl feed) <> "..."
+ msgs <- liftIO $ toMessagesFromUrl (fuUrl feed) (fuLastUpdated feed)
+ logInfoN $ "Writing " <> (T.pack $ show $ length msgs) <> " messages..."
+ liftIO $ mapM_ (writeMessage root) msgs
+ whenJust dbPath $ \dbPath' -> liftIO $ updateLastUpdated' dbPath' feed msgs
+
main :: IO ()
main = do
(feeds, maildir, dbPath) <- getUserdata "./.f2m.json"
whenJust maildir $ \root -> do
mapM_ (createDirectoryIfMissing True)
[root </> "new", root </> "cur", root </> "tmp"]
- feedMessages <- sequence $
- (\feed -> toMessagesFromUrl (fuUrl feed) (fuLastUpdated feed)) <$> feeds
- mapM_ (writeMessage root) $ concat feedMessages
- whenJust dbPath $ \dbPath' -> updateLastUpdated dbPath' feeds feedMessages
+ mapM_ (runStdoutLoggingT . processFeed root dbPath) feeds