diff options
author | Yuchen Pei <hi@ypei.me> | 2022-09-15 17:45:53 +1000 |
---|---|---|
committer | Yuchen Pei <hi@ypei.me> | 2022-09-15 17:45:53 +1000 |
commit | 265951e087809e8a45802bc00349d198f3515b19 (patch) | |
tree | 29f4c4bfa1a520c0e9754cb48c2c11d98e8d254f | |
parent | 33f47d6d86246160cceb14804c6bd9746aad3b1e (diff) |
[server] rendering some templates within infobox
also adjusted the parsers to not have overlapping skipspace
-rw-r--r-- | app/Main.hs | 2 | ||||
-rw-r--r-- | src/Servall/Types.hs | 5 | ||||
-rw-r--r-- | src/Servall/WikiParser.hs | 90 |
3 files changed, 67 insertions, 30 deletions
diff --git a/app/Main.hs b/app/Main.hs index 579ec20..74a03eb 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -183,7 +183,7 @@ getWpSummaryFull name = fmap (TE.decodeUtf8 . BSL.toStrict) (liftIO $ getApiWpSummary name) getInfobox :: Text -> Handler (HM.HashMap Text Text) -getInfobox name = infobox <$> getWikiFormat name +getInfobox name = fmap (HM.fromList . infobox) (getWikiFormat name) searchYt :: Text -> Handler [Video] searchYt query = do diff --git a/src/Servall/Types.hs b/src/Servall/Types.hs index ae4f73a..2c49532 100644 --- a/src/Servall/Types.hs +++ b/src/Servall/Types.hs @@ -9,14 +9,13 @@ import Data.Aeson ( (.:) , Value(..) , object ) -import qualified Data.HashMap.Lazy as HM import Data.Text ( Text ) import GHC.Generics ( Generic ) data WikiTemplate = WikiTemplate - { wtName :: Text + { wtName :: Text -- , wtSubName :: Maybe Text - , wtFields :: HM.HashMap Text Text + , wtArgs :: [(Maybe Text, Text)] } deriving Show diff --git a/src/Servall/WikiParser.hs b/src/Servall/WikiParser.hs index d509412..b5188a4 100644 --- a/src/Servall/WikiParser.hs +++ b/src/Servall/WikiParser.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} module Servall.WikiParser ( infobox , wikiFilter @@ -11,6 +12,7 @@ import Data.Attoparsec.Text ( Parser , char , choice , many' + , many1 , manyTill , notChar , parse @@ -28,10 +30,10 @@ import Data.Char ( isAlpha , isAlphaNum ) import Data.Either ( fromRight ) -import qualified Data.HashMap.Lazy as HM import Data.List ( find ) import Data.Maybe ( catMaybes , fromMaybe + , mapMaybe , maybeToList ) import qualified Data.Text as T @@ -52,39 +54,75 @@ wikilinkPrefix = "wiki:" parseWikiTemplates :: Text -> Either String [WikiTemplate] parseWikiTemplates = parseOnly wikiP -infobox :: Text -> HM.HashMap Text Text -infobox wiki = maybe - HM.empty - wtFields - (find isInfobox (fromRight [] (parseWikiTemplates wiki))) +infobox :: Text -> [(Text, Text)] +infobox wiki = filterNameValues + (maybe [] wtArgs (find isInfobox (fromRight [] (parseWikiTemplates wiki)))) + +filterNameValues :: [(Maybe Text, Text)] -> [(Text, Text)] +filterNameValues = mapMaybe (\(k, v) -> (, v) <$> k) isInfobox :: WikiTemplate -> Bool -isInfobox (WikiTemplate name _) = name == "Infobox" +isInfobox (WikiTemplate name _) = T.isPrefixOf "Infobox" name wikiP :: Parser [WikiTemplate] -wikiP = sepBy templateP (commentP <|> skipSpace) +wikiP = sepBy templateP (many' (commentP <|> (many1 space >> return ()))) templateP :: Parser WikiTemplate templateP = do string "{{" - name <- tempHeaderP + name <- tempHeaderP + skipSpace fields <- many' (choice [tempFieldP, commentP >> return Nothing]) string "}}" - return $ WikiTemplate name (HM.fromList (catMaybes fields)) + return $ WikiTemplate name (catMaybes fields) + +-- simple rendering of templates in an infobox +renderInfoTemplate :: WikiTemplate -> Text +renderInfoTemplate (WikiTemplate "URL" ((Nothing, url) : _)) = + "<" <> url <> ">" +renderInfoTemplate (WikiTemplate "coord" ((Nothing, lat1) : (Nothing, lat2) : (Nothing, lat3) : (Nothing, latd) : (Nothing, lon1) : (Nothing, lon2) : (Nothing, lon3) : (Nothing, lond) : _)) + = lat1 + <> "°" + <> lat2 + <> "′" + <> lat3 + <> "\"" + <> latd + <> " " + <> lon1 + <> "°" + <> lon2 + <> "′" + <> lon3 + <> "\"" + <> lond +renderInfoTemplate (WikiTemplate header ((Nothing, year) : (Nothing, month) : (Nothing, date) : _)) + | (header == "start date and age" || header == "Death date and age") + = "[" + <> year + <> "-" + <> (if (T.length month == 1) then "0" else "") + <> month + <> "-" + <> date + <> "]" +renderInfoTemplate _ = "" tempHeaderP :: Parser Text tempHeaderP = do -- Not sure whether name can contain numbers - skipSpace -- Does not consider multiple words like Short description - name <- takeWhile1 isAlpha - manyTill anyChar (lookAhead ((char '|' >> return "") <|> string "}}")) - return name + name1 <- takeWhile1 isAlpha + name2 <- manyTill + anyChar + (lookAhead (skipSpace >> ((char '|' >> return "") <|> string "}}"))) + return $ name1 <> T.pack name2 -tempFieldP :: Parser (Maybe (Text, Text)) +-- parse a template field +tempFieldP :: Parser (Maybe (Maybe Text, Text)) tempFieldP = do - skipSpace >> char '|' >> skipSpace - choice [namedField, tempFieldValueP >> return Nothing] + char '|' >> skipSpace + choice [namedField, fmap (Just . (Nothing, )) tempFieldValueP] where namedField = do key <- takeWhile1 (\c -> isAlphaNum c || c `elem` ['_', '-']) @@ -92,15 +130,15 @@ tempFieldP = do char '=' skipSpace value <- tempFieldValueP - return $ if T.null value then Nothing else Just (key, value) + return $ if T.null value then Nothing else Just (Just key, value) +-- parse a field value tempFieldValueP :: Parser Text tempFieldValueP = do - skipSpace T.concat <$> many' (choice [ skipRefP - , templateP >> return "" + , renderInfoTemplate <$> templateP , commentP >> return "" , wikilinkP , simpleP @@ -125,7 +163,7 @@ tempFieldValueP = do commentP :: Parser () commentP = do - skipSpace >> string "<!--" >> untilCommentEnd "" + string "<!--" >> untilCommentEnd "" where untilCommentEnd :: Text -> Parser () untilCommentEnd xs | T.isSuffixOf "--" xs = char '>' >> return () @@ -146,7 +184,6 @@ wikilinkP = do <> (if T.null second then first else T.tail second) <> "]]" --- refs skipRefP :: Parser Text skipRefP = (ref1 <|> ref2) >> "" where @@ -176,7 +213,7 @@ fixHeaderTemplates (Pandoc m blocks) = Pandoc m (go [] blocks) where infobox' :: Block -> [(Text, Text)] infobox' (RawBlock (Format "mediawiki") temp) = fromRight [] - $ (\wt -> if isInfobox wt then HM.toList (wtFields wt) else []) + $ (\wt -> if isInfobox wt then filterNameValues (wtArgs wt) else []) <$> parseOnly templateP temp infobox' _ = [] @@ -189,9 +226,10 @@ fixInline (Link attr label (url, "wikilink")) = Link attr label ("wiki:" <> url, "") fixInline (Note (RawBlock (Format "mediawiki") temp : rest)) = let - fields = fromRight HM.empty (wtFields <$> parseOnly templateP temp) - url = HM.lookup "url" fields - title = HM.lookup "title" fields + fields = (fromRight [] . fmap (filterNameValues . wtArgs)) + (parseOnly templateP temp) + url = lookup "url" fields + title = lookup "title" fields link = (\url' -> Link nullAttr (maybeToList (Str <$> title)) (url', fromMaybe "" title) |