summaryrefslogtreecommitdiff
path: root/app/Main.hs
diff options
context:
space:
mode:
Diffstat (limited to 'app/Main.hs')
-rw-r--r--app/Main.hs252
1 files changed, 252 insertions, 0 deletions
diff --git a/app/Main.hs b/app/Main.hs
new file mode 100644
index 0000000..f9e522b
--- /dev/null
+++ b/app/Main.hs
@@ -0,0 +1,252 @@
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TypeOperators #-}
+
+module Main where
+
+import Control.Applicative ( (<|>) )
+import Control.Lens ( (^.)
+ , (^?)
+ )
+import Control.Monad.IO.Class ( liftIO )
+import Data.Aeson ( FromJSON
+ , ToJSON
+ , Value
+ , decode
+ , encode
+ )
+import qualified Data.Aeson.KeyMap as KM
+ ( KeyMap
+ , lookup
+ )
+import Data.Aeson.Lens ( AsNumber(..)
+ , AsValue(..)
+ , key
+ )
+import Data.Attoparsec.Combinator ( lookAhead )
+import Data.Attoparsec.Text ( Parser
+ , anyChar
+ , char
+ , choice
+ , many'
+ , manyTill
+ , parse
+ , parseOnly
+ , sepBy
+ , skipMany
+ , skipSpace
+ , skipWhile
+ , space
+ , string
+ , takeTill
+ , takeWhile1
+ )
+import qualified Data.ByteString.Lazy as BSL
+import Data.Char ( isAlpha
+ , isAlphaNum
+ )
+import Data.Either ( fromRight )
+import qualified Data.HashMap.Lazy as HM
+import Data.List ( find
+ , isSuffixOf
+ )
+import Data.Maybe ( catMaybes
+ , fromJust
+ )
+import Data.Proxy ( Proxy(..) )
+import qualified Data.Text as T
+import Data.Text ( Text )
+import qualified Data.Text.Encoding as TE
+import GHC.Generics ( Generic )
+import Network.Wai.Handler.Warp ( run )
+import Network.Wreq ( Response
+ , get
+ , responseBody
+ )
+import Prelude hiding ( takeWhile )
+import Servant ( (:<|>)(..)
+ , (:>)
+ , Application
+ , Capture
+ , FromHttpApiData(..)
+ , Get
+ , Handler
+ , JSON
+ , Server
+ , serve
+ )
+import Text.Pandoc ( WrapOption(..)
+ , WriterOptions(..)
+ , def
+ , readMediaWiki
+ , runIOorExplode
+ , writeNative
+ , writeOrg
+ )
+import Text.Regex.TDFA ( (=~) )
+
+type API = Wikipedia
+
+type Wikipedia
+ = SearchWikipedia :<|> GetWikiFormat :<|> GetOrgFormat :<|> GetPandocFormat :<|> GetWpSummary :<|> GetInfobox
+
+type SearchWikipedia
+ = "wikipedia" :> "search" :> Capture "query" Text :> Get '[JSON] Value
+
+type GetWikiFormat
+ = "wikipedia" :> "wiki" :> Capture "name" Text :> Get '[JSON] Text
+
+type GetOrgFormat
+ = "wikipedia" :> "org" :> Capture "name" Text :> Get '[JSON] Text
+
+type GetPandocFormat
+ = "wikipedia" :> "pandoc" :> Capture "name" Text :> Get '[JSON] Text
+
+type GetWpSummary
+ = "wikipedia" :> "summary" :> Capture "name" Text :> Get '[JSON] Value
+
+type GetInfobox
+ = "wikipedia" :> "infobox" :> Capture "name" Text :> Get '[JSON] (HM.HashMap Text Text)
+
+server :: Server API
+server =
+ searchWikipedia
+ :<|> getWikiFormat
+ :<|> getOrgFormat
+ :<|> getPandocFormat
+ :<|> getWpSummary
+ :<|> getInfobox
+
+searchWikipedia :: Text -> Handler Value
+searchWikipedia query = do
+ r <- liftIO $ get
+ ("https://en.wikipedia.org/w/api.php?action=query&format=json&list=search&srsearch="
+ <> (T.unpack query)
+ )
+ return $ fromJust $ decode $ r ^. responseBody
+
+getWikiFormat :: Text -> Handler Text
+getWikiFormat name = do
+ r <- liftIO $ get
+ ("https://en.wikipedia.org/wiki/" <> (T.unpack name) <> "?action=raw")
+ return $ TE.decodeUtf8 $ BSL.toStrict $ r ^. responseBody
+
+getOrgFormat :: Text -> Handler Text
+getOrgFormat name = do
+ wiki <- getWikiFormat name
+ liftIO $ runIOorExplode $ readMediaWiki def wiki >>= writeOrg def
+ { writerWrapText = WrapNone
+ }
+
+getPandocFormat :: Text -> Handler Text
+getPandocFormat name = do
+ wiki <- getWikiFormat name
+ liftIO $ runIOorExplode $ readMediaWiki def wiki >>= writeNative def
+ { writerWrapText = WrapNone
+ }
+
+getWpSummary :: Text -> Handler Value
+getWpSummary name = do
+ r <- liftIO $ get
+ ("https://en.wikipedia.org/api/rest_v1/page/summary/" <> (T.unpack name))
+ return $ fromJust $ decode $ r ^. responseBody
+
+getInfobox :: Text -> Handler (HM.HashMap Text Text)
+getInfobox name = do
+ wiki <- getWikiFormat name
+ return $ maybe
+ HM.empty
+ wtFields
+ (find (\(WikiTemplate name _) -> name == "Infobox")
+ (fromRight [] (parseOnly wikiP wiki))
+ )
+
+wikiP :: Parser [WikiTemplate]
+wikiP = sepBy templateP (commentP <|> skipSpace)
+
+data WikiTemplate = WikiTemplate
+ { wtName :: Text
+ -- , wtSubName :: Maybe Text
+ , wtFields :: HM.HashMap Text Text
+ }
+ deriving (Show, Generic)
+
+instance ToJSON WikiTemplate
+instance FromJSON WikiTemplate
+
+templateP :: Parser WikiTemplate
+templateP = do
+ string "{{"
+ name <- tempHeaderP
+ fields <- many' (choice [tempFieldP, commentP >> return Nothing])
+ string "}}"
+ return $ WikiTemplate name (HM.fromList (catMaybes fields))
+
+tempHeaderP :: Parser Text
+tempHeaderP = do
+ -- Not sure whether name can contain numbers
+ skipSpace
+ -- Does not consider multiple words like Short description
+ name <- takeWhile1 isAlpha
+ manyTill anyChar (lookAhead ((char '|' >> return "") <|> string "}}"))
+ return name
+
+tempFieldP :: Parser (Maybe (Text, Text))
+tempFieldP = do
+ skipSpace >> char '|' >> skipSpace
+ choice [namedField, tempFieldValueP >> return Nothing]
+ where
+ namedField = do
+ key <- takeWhile1 (\c -> isAlphaNum c || c == '_')
+ skipSpace
+ char '='
+ skipSpace
+ value <- tempFieldValueP
+ return $ if T.null value then Nothing else Just (key, value)
+
+tempFieldValueP :: Parser Text
+tempFieldValueP = do
+ skipSpace
+ T.concat <$> many'
+ (choice [templateP >> return "", commentP >> return "", wikilinkP, simpleP])
+ where
+ simpleP :: Parser Text
+ simpleP = do
+ t <- T.pack <$> manyTill
+ anyChar
+ (lookAhead
+ ( string "[["
+ <|> string "{{"
+ <|> string "<!--"
+ <|> (char '|' >> return "")
+ <|> string "}}"
+ )
+ )
+ if T.null t then fail "simpleP" else return $ T.unwords $ T.words t
+
+commentP :: Parser ()
+commentP = do
+ skipSpace >> string "<!--" >> untilCommentEnd ""
+ where
+ untilCommentEnd :: Text -> Parser ()
+ untilCommentEnd xs | T.isSuffixOf "--" xs = char '>' >> return ()
+ untilCommentEnd _ = do
+ xs <- takeWhile1 (/= '>')
+ untilCommentEnd xs
+
+wikilinkP :: Parser Text
+wikilinkP = do
+ beg <- string "[["
+ content <- T.pack <$> manyTill anyChar (string "]]")
+ return $ beg <> content <> "]]"
+
+app :: Application
+app = serve api server
+
+api :: Proxy API
+api = Proxy
+
+main :: IO ()
+main = do
+ run 5555 app