summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuchen Pei <hi@ypei.me>2022-09-15 17:45:53 +1000
committerYuchen Pei <hi@ypei.me>2022-09-15 17:45:53 +1000
commit265951e087809e8a45802bc00349d198f3515b19 (patch)
tree29f4c4bfa1a520c0e9754cb48c2c11d98e8d254f
parent33f47d6d86246160cceb14804c6bd9746aad3b1e (diff)
[server] rendering some templates within infobox
also adjusted the parsers to not have overlapping skipspace
-rw-r--r--app/Main.hs2
-rw-r--r--src/Servall/Types.hs5
-rw-r--r--src/Servall/WikiParser.hs90
3 files changed, 67 insertions, 30 deletions
diff --git a/app/Main.hs b/app/Main.hs
index 579ec20..74a03eb 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -183,7 +183,7 @@ getWpSummaryFull name =
fmap (TE.decodeUtf8 . BSL.toStrict) (liftIO $ getApiWpSummary name)
getInfobox :: Text -> Handler (HM.HashMap Text Text)
-getInfobox name = infobox <$> getWikiFormat name
+getInfobox name = fmap (HM.fromList . infobox) (getWikiFormat name)
searchYt :: Text -> Handler [Video]
searchYt query = do
diff --git a/src/Servall/Types.hs b/src/Servall/Types.hs
index ae4f73a..2c49532 100644
--- a/src/Servall/Types.hs
+++ b/src/Servall/Types.hs
@@ -9,14 +9,13 @@ import Data.Aeson ( (.:)
, Value(..)
, object
)
-import qualified Data.HashMap.Lazy as HM
import Data.Text ( Text )
import GHC.Generics ( Generic )
data WikiTemplate = WikiTemplate
- { wtName :: Text
+ { wtName :: Text
-- , wtSubName :: Maybe Text
- , wtFields :: HM.HashMap Text Text
+ , wtArgs :: [(Maybe Text, Text)]
}
deriving Show
diff --git a/src/Servall/WikiParser.hs b/src/Servall/WikiParser.hs
index d509412..b5188a4 100644
--- a/src/Servall/WikiParser.hs
+++ b/src/Servall/WikiParser.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TupleSections #-}
module Servall.WikiParser
( infobox
, wikiFilter
@@ -11,6 +12,7 @@ import Data.Attoparsec.Text ( Parser
, char
, choice
, many'
+ , many1
, manyTill
, notChar
, parse
@@ -28,10 +30,10 @@ import Data.Char ( isAlpha
, isAlphaNum
)
import Data.Either ( fromRight )
-import qualified Data.HashMap.Lazy as HM
import Data.List ( find )
import Data.Maybe ( catMaybes
, fromMaybe
+ , mapMaybe
, maybeToList
)
import qualified Data.Text as T
@@ -52,39 +54,75 @@ wikilinkPrefix = "wiki:"
parseWikiTemplates :: Text -> Either String [WikiTemplate]
parseWikiTemplates = parseOnly wikiP
-infobox :: Text -> HM.HashMap Text Text
-infobox wiki = maybe
- HM.empty
- wtFields
- (find isInfobox (fromRight [] (parseWikiTemplates wiki)))
+infobox :: Text -> [(Text, Text)]
+infobox wiki = filterNameValues
+ (maybe [] wtArgs (find isInfobox (fromRight [] (parseWikiTemplates wiki))))
+
+filterNameValues :: [(Maybe Text, Text)] -> [(Text, Text)]
+filterNameValues = mapMaybe (\(k, v) -> (, v) <$> k)
isInfobox :: WikiTemplate -> Bool
-isInfobox (WikiTemplate name _) = name == "Infobox"
+isInfobox (WikiTemplate name _) = T.isPrefixOf "Infobox" name
wikiP :: Parser [WikiTemplate]
-wikiP = sepBy templateP (commentP <|> skipSpace)
+wikiP = sepBy templateP (many' (commentP <|> (many1 space >> return ())))
templateP :: Parser WikiTemplate
templateP = do
string "{{"
- name <- tempHeaderP
+ name <- tempHeaderP
+ skipSpace
fields <- many' (choice [tempFieldP, commentP >> return Nothing])
string "}}"
- return $ WikiTemplate name (HM.fromList (catMaybes fields))
+ return $ WikiTemplate name (catMaybes fields)
+
+-- simple rendering of templates in an infobox
+renderInfoTemplate :: WikiTemplate -> Text
+renderInfoTemplate (WikiTemplate "URL" ((Nothing, url) : _)) =
+ "<" <> url <> ">"
+renderInfoTemplate (WikiTemplate "coord" ((Nothing, lat1) : (Nothing, lat2) : (Nothing, lat3) : (Nothing, latd) : (Nothing, lon1) : (Nothing, lon2) : (Nothing, lon3) : (Nothing, lond) : _))
+ = lat1
+ <> "°"
+ <> lat2
+ <> "′"
+ <> lat3
+ <> "\""
+ <> latd
+ <> " "
+ <> lon1
+ <> "°"
+ <> lon2
+ <> "′"
+ <> lon3
+ <> "\""
+ <> lond
+renderInfoTemplate (WikiTemplate header ((Nothing, year) : (Nothing, month) : (Nothing, date) : _))
+ | (header == "start date and age" || header == "Death date and age")
+ = "["
+ <> year
+ <> "-"
+ <> (if (T.length month == 1) then "0" else "")
+ <> month
+ <> "-"
+ <> date
+ <> "]"
+renderInfoTemplate _ = ""
tempHeaderP :: Parser Text
tempHeaderP = do
-- Not sure whether name can contain numbers
- skipSpace
-- Does not consider multiple words like Short description
- name <- takeWhile1 isAlpha
- manyTill anyChar (lookAhead ((char '|' >> return "") <|> string "}}"))
- return name
+ name1 <- takeWhile1 isAlpha
+ name2 <- manyTill
+ anyChar
+ (lookAhead (skipSpace >> ((char '|' >> return "") <|> string "}}")))
+ return $ name1 <> T.pack name2
-tempFieldP :: Parser (Maybe (Text, Text))
+-- parse a template field
+tempFieldP :: Parser (Maybe (Maybe Text, Text))
tempFieldP = do
- skipSpace >> char '|' >> skipSpace
- choice [namedField, tempFieldValueP >> return Nothing]
+ char '|' >> skipSpace
+ choice [namedField, fmap (Just . (Nothing, )) tempFieldValueP]
where
namedField = do
key <- takeWhile1 (\c -> isAlphaNum c || c `elem` ['_', '-'])
@@ -92,15 +130,15 @@ tempFieldP = do
char '='
skipSpace
value <- tempFieldValueP
- return $ if T.null value then Nothing else Just (key, value)
+ return $ if T.null value then Nothing else Just (Just key, value)
+-- parse a field value
tempFieldValueP :: Parser Text
tempFieldValueP = do
- skipSpace
T.concat <$> many'
(choice
[ skipRefP
- , templateP >> return ""
+ , renderInfoTemplate <$> templateP
, commentP >> return ""
, wikilinkP
, simpleP
@@ -125,7 +163,7 @@ tempFieldValueP = do
commentP :: Parser ()
commentP = do
- skipSpace >> string "<!--" >> untilCommentEnd ""
+ string "<!--" >> untilCommentEnd ""
where
untilCommentEnd :: Text -> Parser ()
untilCommentEnd xs | T.isSuffixOf "--" xs = char '>' >> return ()
@@ -146,7 +184,6 @@ wikilinkP = do
<> (if T.null second then first else T.tail second)
<> "]]"
--- refs
skipRefP :: Parser Text
skipRefP = (ref1 <|> ref2) >> ""
where
@@ -176,7 +213,7 @@ fixHeaderTemplates (Pandoc m blocks) = Pandoc m (go [] blocks) where
infobox' :: Block -> [(Text, Text)]
infobox' (RawBlock (Format "mediawiki") temp) =
fromRight []
- $ (\wt -> if isInfobox wt then HM.toList (wtFields wt) else [])
+ $ (\wt -> if isInfobox wt then filterNameValues (wtArgs wt) else [])
<$> parseOnly templateP temp
infobox' _ = []
@@ -189,9 +226,10 @@ fixInline (Link attr label (url, "wikilink")) =
Link attr label ("wiki:" <> url, "")
fixInline (Note (RawBlock (Format "mediawiki") temp : rest)) =
let
- fields = fromRight HM.empty (wtFields <$> parseOnly templateP temp)
- url = HM.lookup "url" fields
- title = HM.lookup "title" fields
+ fields = (fromRight [] . fmap (filterNameValues . wtArgs))
+ (parseOnly templateP temp)
+ url = lookup "url" fields
+ title = lookup "title" fields
link =
(\url' ->
Link nullAttr (maybeToList (Str <$> title)) (url', fromMaybe "" title)