{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TypeOperators #-} module Main where import Control.Applicative ( (<|>) ) import Control.Lens ( (^.) , (^?) ) import Control.Monad.IO.Class ( liftIO ) import Data.Aeson ( FromJSON , ToJSON , Value , decode , encode ) import qualified Data.Aeson.KeyMap as KM ( KeyMap , lookup ) import Data.Aeson.Lens ( AsNumber(..) , AsValue(..) , key ) import Data.Attoparsec.Combinator ( lookAhead ) import Data.Attoparsec.Text ( Parser , anyChar , char , choice , many' , manyTill , parse , parseOnly , sepBy , skipMany , skipSpace , skipWhile , space , string , takeTill , takeWhile1 ) import qualified Data.ByteString.Lazy as BSL import Data.Char ( isAlpha , isAlphaNum ) import Data.Either ( fromRight ) import qualified Data.HashMap.Lazy as HM import Data.List ( find , isSuffixOf ) import Data.Maybe ( catMaybes , fromJust ) import Data.Proxy ( Proxy(..) ) import qualified Data.Text as T import Data.Text ( Text ) import qualified Data.Text.Encoding as TE import GHC.Generics ( Generic ) import Network.Wai.Handler.Warp ( run ) import Network.Wreq ( Response , get , responseBody ) import Prelude hiding ( takeWhile ) import Servant ( (:<|>)(..) , (:>) , Application , Capture , FromHttpApiData(..) , Get , Handler , JSON , PlainText , Server , serve ) import Text.Pandoc ( WrapOption(..) , WriterOptions(..) , def , readMediaWiki , runIOorExplode , writeNative , writeOrg ) import Text.Regex.TDFA ( (=~) ) type API = Wikipedia type Wikipedia = SearchWikipedia :<|> GetWikiFormat :<|> GetOrgFormat :<|> GetPandocFormat :<|> GetWpSummary :<|> GetInfobox -- TODO: fix the problem with plaintext having the wront content-type: text/plain type SearchWikipedia = "wikipedia" :> "search" :> Capture "query" Text :> Get '[PlainText] Text type GetWikiFormat = "wikipedia" :> "wiki" :> Capture "name" Text :> Get '[JSON] Text type GetOrgFormat = "wikipedia" :> "org" :> Capture "name" Text :> Get '[JSON] Text type GetPandocFormat = "wikipedia" :> "pandoc" :> Capture "name" Text :> Get '[JSON] Text type GetWpSummary = "wikipedia" :> "summary" :> Capture "name" Text :> Get '[PlainText] Text type GetInfobox = "wikipedia" :> "infobox" :> Capture "name" Text :> Get '[JSON] (HM.HashMap Text Text) server :: Server API server = searchWikipedia :<|> getWikiFormat :<|> getOrgFormat :<|> getPandocFormat :<|> getWpSummary :<|> getInfobox searchWikipedia :: Text -> Handler Text searchWikipedia query = do r <- liftIO $ get ("https://en.wikipedia.org/w/api.php?action=query&format=json&list=search&srsearch=" <> (T.unpack query) ) return $ TE.decodeUtf8 $ BSL.toStrict $ r ^. responseBody getWikiFormat :: Text -> Handler Text getWikiFormat name = do r <- liftIO $ get ("https://en.wikipedia.org/wiki/" <> (T.unpack name) <> "?action=raw") return $ TE.decodeUtf8 $ BSL.toStrict $ r ^. responseBody getOrgFormat :: Text -> Handler Text getOrgFormat name = do wiki <- getWikiFormat name liftIO $ runIOorExplode $ readMediaWiki def wiki >>= writeOrg def { writerWrapText = WrapNone } getPandocFormat :: Text -> Handler Text getPandocFormat name = do wiki <- getWikiFormat name liftIO $ runIOorExplode $ readMediaWiki def wiki >>= writeNative def { writerWrapText = WrapNone } getWpSummary :: Text -> Handler Text getWpSummary name = do r <- liftIO $ get ("https://en.wikipedia.org/api/rest_v1/page/summary/" <> (T.unpack name)) return $ TE.decodeUtf8 $ BSL.toStrict $ r ^. responseBody getInfobox :: Text -> Handler (HM.HashMap Text Text) getInfobox name = do wiki <- getWikiFormat name return $ maybe HM.empty wtFields (find (\(WikiTemplate name _) -> name == "Infobox") (fromRight [] (parseOnly wikiP wiki)) ) wikiP :: Parser [WikiTemplate] wikiP = sepBy templateP (commentP <|> skipSpace) data WikiTemplate = WikiTemplate { wtName :: Text -- , wtSubName :: Maybe Text , wtFields :: HM.HashMap Text Text } deriving (Show, Generic) instance ToJSON WikiTemplate instance FromJSON WikiTemplate templateP :: Parser WikiTemplate templateP = do string "{{" name <- tempHeaderP fields <- many' (choice [tempFieldP, commentP >> return Nothing]) string "}}" return $ WikiTemplate name (HM.fromList (catMaybes fields)) tempHeaderP :: Parser Text tempHeaderP = do -- Not sure whether name can contain numbers skipSpace -- Does not consider multiple words like Short description name <- takeWhile1 isAlpha manyTill anyChar (lookAhead ((char '|' >> return "") <|> string "}}")) return name tempFieldP :: Parser (Maybe (Text, Text)) tempFieldP = do skipSpace >> char '|' >> skipSpace choice [namedField, tempFieldValueP >> return Nothing] where namedField = do key <- takeWhile1 (\c -> isAlphaNum c || c == '_') skipSpace char '=' skipSpace value <- tempFieldValueP return $ if T.null value then Nothing else Just (key, value) tempFieldValueP :: Parser Text tempFieldValueP = do skipSpace T.concat <$> many' (choice [templateP >> return "", commentP >> return "", wikilinkP, simpleP]) where simpleP :: Parser Text simpleP = do t <- T.pack <$> manyTill anyChar (lookAhead ( string "[[" <|> string "{{" <|> string "