aboutsummaryrefslogtreecommitdiff
path: root/src/F2Md/Import.hs
blob: 8dfdb402186ff049a6e1ce254ad74636607ceb00 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
{-# LANGUAGE OverloadedStrings #-}

module F2Md.Import (toMessagesFromUrl) where

import Data.List
import Data.Text.Encoding
import Control.Monad
import Data.Time
import System.Process
import Data.UUID
import Data.UUID.V4
import Data.Char
import Data.Text (Text)
import Text.Feed.Types
import qualified Data.Text as T
import Data.Maybe
import Text.Feed.Query
import Data.Generics
import Text.RSS.Syntax hiding (RSSItem)
import Data.XML.Types
import Text.Feed.Import
import Text.Feed.Types
import F2Md.Types
import Crypto.Hash

toMessagesFromUrl :: Text -> Maybe ZonedTime -> IO [Message]
toMessagesFromUrl url after = do
  feed <- parseFeedString <$> (fetchFeed $ T.unpack url)
  case feed of
    Nothing -> return []
    Just feed' -> return $
      sortOn (\msg -> zonedTimeToUTC (mDate msg)) $ catMaybes $
      toMessage' after (toFeedMetadata feed' url) <$> getFeedItems feed'

wgetCommand :: String
wgetCommand = "wget -O- 2>/dev/null"

fetchFeed :: String -> IO String
fetchFeed url = readCreateProcess (shell $ wgetCommand ++ " " ++ url) ""

toMessage' :: Maybe ZonedTime -> FeedMetadata -> Item -> Maybe Message
toMessage' after feed item
  | isNothing date = Nothing
  | isNothing after = Just $ toMessage feed item
  | zonedTimeToUTC (fromJust after) >= fromJust date = Nothing
  | otherwise = Just $ toMessage feed item
  where date = join $ getItemPublishDate item

toMessage :: FeedMetadata -> Item -> Message
toMessage (FeedMetadata title author home url) item =
  Message
    (fromMaybe title $ orElse (getItemAuthor' item) author)
    (fromMaybe "Untitled" $ getItemTitle item)
    (fromJust $ join $ getItemPublishDate item) -- default should be current date
    messageId
    body
    url
  where body =
          (formatBody (getItemLink item) (getItemEnclosure item) $
            (getItemContent item) `orElse` (getItemContent' item) `orElse`
            (getItemDescription item))
        messageId =
          "<" <> (T.pack $ show $ hashWith MD5 (encodeUtf8 body)) <> "@" <>
          (T.map hyphenateNonAlNum url) <> ".gnu" <> ">"


getItemAuthor' :: Item -> Maybe Text
getItemAuthor' item = case getItemAuthor item of
  Just "" -> Nothing
  Just author -> Just author
  Nothing -> Nothing

getItemContent' :: Item -> Maybe Text
getItemContent' (RSSItem item) =
  (fmap (T.concat . elementText) . listToMaybe .
   filter (\e -> namePrefix (elementName e) == Just "content"))
  $ rssItemOther item
getItemContent' _ = Nothing

formatBody :: Maybe Text -> Maybe (Text, Maybe Text, Maybe Integer) -> Maybe Text -> Text
formatBody link encl desc =
  let linkText = case link of
        Nothing -> ""
        Just link' -> "Link: <" <> link' <> ">\n\n"
      enclText = case encl of
        Nothing -> ""
        Just (url, Nothing, Nothing) -> urlPt url <> "\n\n"
        Just (url, Just ty, Nothing) -> urlPt url <> " (" <> ty <> ")\n\n"
        Just (url, Nothing, Just len) -> urlPt url <> " (" <> lenPt len <> ")\n\n"
        Just (url, Just ty, Just len) ->
          urlPt url <> " (" <> ty <> ", " <> lenPt len <> ")\n\n"
        where urlPt url = "Enclosure: <" <> url <> ">"
              lenPt len = (T.pack $ show len) <> " bytes"
  in
    linkText <> enclText <> (fromMaybe "" desc)

toFeedMetadata :: Feed -> Text -> FeedMetadata
toFeedMetadata feed url =
  FeedMetadata (getFeedTitle feed) (getFeedAuthor feed) (getFeedHome feed) url

hyphenateNonAlNum :: Char -> Char
hyphenateNonAlNum c
  | isAlphaNum c = c
  | otherwise = '-'