{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} module Servall.WikiParser ( infobox , wikiFilter ) where import Control.Applicative ( (<|>) ) import Data.Attoparsec.Combinator ( lookAhead ) import Data.Attoparsec.Text ( Parser , anyChar , char , choice , many' , many1 , manyTill , notChar , parse , parseOnly , sepBy , skipMany , skipSpace , skipWhile , space , string , takeTill , takeWhile1 ) import Data.Char ( isAlpha , isAlphaNum ) import Data.Either ( fromRight ) import Data.List ( find ) import Data.Maybe ( catMaybes , fromMaybe , mapMaybe , maybeToList ) import qualified Data.Text as T import Data.Text ( Text ) import GHC.Generics ( Generic ) import Servall.Types import Text.Pandoc.Definition ( Block(..) , Format(..) , Inline(..) , Pandoc(..) , nullAttr ) import Text.Pandoc.Generic ( topDown ) wikilinkPrefix :: Text wikilinkPrefix = "wiki:" parseWikiTemplates :: Text -> Either String [WikiTemplate] parseWikiTemplates = parseOnly wikiP infobox :: Text -> [(Text, Text)] infobox wiki = filterNameValues (maybe [] wtArgs (find isInfobox (fromRight [] (parseWikiTemplates wiki)))) filterNameValues :: [(Maybe Text, Text)] -> [(Text, Text)] filterNameValues = mapMaybe (\(k, v) -> (, v) <$> k) isInfobox :: WikiTemplate -> Bool isInfobox (WikiTemplate name _) = T.isPrefixOf "Infobox" name wikiP :: Parser [WikiTemplate] wikiP = sepBy templateP (many' (commentP <|> (many1 space >> return ()))) templateP :: Parser WikiTemplate templateP = do string "{{" name <- tempHeaderP skipSpace fields <- many' (choice [tempFieldP, commentP >> return Nothing]) string "}}" return $ WikiTemplate name (catMaybes fields) -- simple rendering of templates in an infobox renderInfoTemplate :: WikiTemplate -> Text renderInfoTemplate (WikiTemplate "URL" ((Nothing, url) : _)) = "<" <> url <> ">" renderInfoTemplate (WikiTemplate "coord" ((Nothing, lat1) : (Nothing, lat2) : (Nothing, lat3) : (Nothing, latd) : (Nothing, lon1) : (Nothing, lon2) : (Nothing, lon3) : (Nothing, lond) : _)) = lat1 <> "°" <> lat2 <> "′" <> lat3 <> "\"" <> latd <> " " <> lon1 <> "°" <> lon2 <> "′" <> lon3 <> "\"" <> lond renderInfoTemplate (WikiTemplate header ((Nothing, year) : (Nothing, month) : (Nothing, date) : _)) | (header == "start date and age" || header == "Death date and age") = "[" <> year <> "-" <> (if (T.length month == 1) then "0" else "") <> month <> "-" <> date <> "]" renderInfoTemplate _ = "" tempHeaderP :: Parser Text tempHeaderP = do -- Not sure whether name can contain numbers -- Does not consider multiple words like Short description name1 <- takeWhile1 isAlpha name2 <- manyTill anyChar (lookAhead (skipSpace >> ((char '|' >> return "") <|> string "}}"))) return $ name1 <> T.pack name2 -- parse a template field tempFieldP :: Parser (Maybe (Maybe Text, Text)) tempFieldP = do char '|' >> skipSpace choice [namedField, fmap (Just . (Nothing, )) tempFieldValueP] where namedField = do key <- takeWhile1 (\c -> isAlphaNum c || c `elem` ['_', '-']) skipSpace char '=' skipSpace value <- tempFieldValueP return $ if T.null value then Nothing else Just (Just key, value) -- parse a field value tempFieldValueP :: Parser Text tempFieldValueP = do T.concat <$> many' (choice [ skipRefP , renderInfoTemplate <$> templateP , commentP >> return "" , wikilinkP , simpleP ] ) where simpleP :: Parser Text simpleP = do t <- T.pack <$> manyTill anyChar (lookAhead ( string "[[" <|> string "{{" <|> string "