diff options
Diffstat (limited to 'app/Main.hs')
-rw-r--r-- | app/Main.hs | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/app/Main.hs b/app/Main.hs new file mode 100644 index 0000000..f9e522b --- /dev/null +++ b/app/Main.hs @@ -0,0 +1,252 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeOperators #-} + +module Main where + +import Control.Applicative ( (<|>) ) +import Control.Lens ( (^.) + , (^?) + ) +import Control.Monad.IO.Class ( liftIO ) +import Data.Aeson ( FromJSON + , ToJSON + , Value + , decode + , encode + ) +import qualified Data.Aeson.KeyMap as KM + ( KeyMap + , lookup + ) +import Data.Aeson.Lens ( AsNumber(..) + , AsValue(..) + , key + ) +import Data.Attoparsec.Combinator ( lookAhead ) +import Data.Attoparsec.Text ( Parser + , anyChar + , char + , choice + , many' + , manyTill + , parse + , parseOnly + , sepBy + , skipMany + , skipSpace + , skipWhile + , space + , string + , takeTill + , takeWhile1 + ) +import qualified Data.ByteString.Lazy as BSL +import Data.Char ( isAlpha + , isAlphaNum + ) +import Data.Either ( fromRight ) +import qualified Data.HashMap.Lazy as HM +import Data.List ( find + , isSuffixOf + ) +import Data.Maybe ( catMaybes + , fromJust + ) +import Data.Proxy ( Proxy(..) ) +import qualified Data.Text as T +import Data.Text ( Text ) +import qualified Data.Text.Encoding as TE +import GHC.Generics ( Generic ) +import Network.Wai.Handler.Warp ( run ) +import Network.Wreq ( Response + , get + , responseBody + ) +import Prelude hiding ( takeWhile ) +import Servant ( (:<|>)(..) + , (:>) + , Application + , Capture + , FromHttpApiData(..) + , Get + , Handler + , JSON + , Server + , serve + ) +import Text.Pandoc ( WrapOption(..) + , WriterOptions(..) + , def + , readMediaWiki + , runIOorExplode + , writeNative + , writeOrg + ) +import Text.Regex.TDFA ( (=~) ) + +type API = Wikipedia + +type Wikipedia + = SearchWikipedia :<|> GetWikiFormat :<|> GetOrgFormat :<|> GetPandocFormat :<|> GetWpSummary :<|> GetInfobox + +type SearchWikipedia + = "wikipedia" :> "search" :> Capture "query" Text :> Get '[JSON] Value + +type GetWikiFormat + = "wikipedia" :> "wiki" :> Capture "name" Text :> Get '[JSON] Text + +type GetOrgFormat + = "wikipedia" :> "org" :> Capture "name" Text :> Get '[JSON] Text + +type GetPandocFormat + = "wikipedia" :> "pandoc" :> Capture "name" Text :> Get '[JSON] Text + +type GetWpSummary + = "wikipedia" :> "summary" :> Capture "name" Text :> Get '[JSON] Value + +type GetInfobox + = "wikipedia" :> "infobox" :> Capture "name" Text :> Get '[JSON] (HM.HashMap Text Text) + +server :: Server API +server = + searchWikipedia + :<|> getWikiFormat + :<|> getOrgFormat + :<|> getPandocFormat + :<|> getWpSummary + :<|> getInfobox + +searchWikipedia :: Text -> Handler Value +searchWikipedia query = do + r <- liftIO $ get + ("https://en.wikipedia.org/w/api.php?action=query&format=json&list=search&srsearch=" + <> (T.unpack query) + ) + return $ fromJust $ decode $ r ^. responseBody + +getWikiFormat :: Text -> Handler Text +getWikiFormat name = do + r <- liftIO $ get + ("https://en.wikipedia.org/wiki/" <> (T.unpack name) <> "?action=raw") + return $ TE.decodeUtf8 $ BSL.toStrict $ r ^. responseBody + +getOrgFormat :: Text -> Handler Text +getOrgFormat name = do + wiki <- getWikiFormat name + liftIO $ runIOorExplode $ readMediaWiki def wiki >>= writeOrg def + { writerWrapText = WrapNone + } + +getPandocFormat :: Text -> Handler Text +getPandocFormat name = do + wiki <- getWikiFormat name + liftIO $ runIOorExplode $ readMediaWiki def wiki >>= writeNative def + { writerWrapText = WrapNone + } + +getWpSummary :: Text -> Handler Value +getWpSummary name = do + r <- liftIO $ get + ("https://en.wikipedia.org/api/rest_v1/page/summary/" <> (T.unpack name)) + return $ fromJust $ decode $ r ^. responseBody + +getInfobox :: Text -> Handler (HM.HashMap Text Text) +getInfobox name = do + wiki <- getWikiFormat name + return $ maybe + HM.empty + wtFields + (find (\(WikiTemplate name _) -> name == "Infobox") + (fromRight [] (parseOnly wikiP wiki)) + ) + +wikiP :: Parser [WikiTemplate] +wikiP = sepBy templateP (commentP <|> skipSpace) + +data WikiTemplate = WikiTemplate + { wtName :: Text + -- , wtSubName :: Maybe Text + , wtFields :: HM.HashMap Text Text + } + deriving (Show, Generic) + +instance ToJSON WikiTemplate +instance FromJSON WikiTemplate + +templateP :: Parser WikiTemplate +templateP = do + string "{{" + name <- tempHeaderP + fields <- many' (choice [tempFieldP, commentP >> return Nothing]) + string "}}" + return $ WikiTemplate name (HM.fromList (catMaybes fields)) + +tempHeaderP :: Parser Text +tempHeaderP = do + -- Not sure whether name can contain numbers + skipSpace + -- Does not consider multiple words like Short description + name <- takeWhile1 isAlpha + manyTill anyChar (lookAhead ((char '|' >> return "") <|> string "}}")) + return name + +tempFieldP :: Parser (Maybe (Text, Text)) +tempFieldP = do + skipSpace >> char '|' >> skipSpace + choice [namedField, tempFieldValueP >> return Nothing] + where + namedField = do + key <- takeWhile1 (\c -> isAlphaNum c || c == '_') + skipSpace + char '=' + skipSpace + value <- tempFieldValueP + return $ if T.null value then Nothing else Just (key, value) + +tempFieldValueP :: Parser Text +tempFieldValueP = do + skipSpace + T.concat <$> many' + (choice [templateP >> return "", commentP >> return "", wikilinkP, simpleP]) + where + simpleP :: Parser Text + simpleP = do + t <- T.pack <$> manyTill + anyChar + (lookAhead + ( string "[[" + <|> string "{{" + <|> string "<!--" + <|> (char '|' >> return "") + <|> string "}}" + ) + ) + if T.null t then fail "simpleP" else return $ T.unwords $ T.words t + +commentP :: Parser () +commentP = do + skipSpace >> string "<!--" >> untilCommentEnd "" + where + untilCommentEnd :: Text -> Parser () + untilCommentEnd xs | T.isSuffixOf "--" xs = char '>' >> return () + untilCommentEnd _ = do + xs <- takeWhile1 (/= '>') + untilCommentEnd xs + +wikilinkP :: Parser Text +wikilinkP = do + beg <- string "[[" + content <- T.pack <$> manyTill anyChar (string "]]") + return $ beg <> content <> "]]" + +app :: Application +app = serve api server + +api :: Proxy API +api = Proxy + +main :: IO () +main = do + run 5555 app |