diff options
-rw-r--r-- | app/Main.hs | 176 | ||||
-rw-r--r-- | servall.cabal | 18 | ||||
-rw-r--r-- | src/Servall/Types.hs | 13 | ||||
-rw-r--r-- | src/Servall/WikiParser.hs | 105 |
4 files changed, 198 insertions, 114 deletions
diff --git a/app/Main.hs b/app/Main.hs index f4c7afc..593d243 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -4,8 +4,9 @@ {-# LANGUAGE TypeOperators #-} module Main where - -import Control.Applicative ( (<|>) ) +import Control.Applicative ( (<**>) + , (<|>) + ) import Control.Lens ( (^.) , (^?) ) @@ -24,47 +25,43 @@ import Data.Aeson.Lens ( AsNumber(..) , AsValue(..) , key ) -import Data.Attoparsec.Combinator ( lookAhead ) -import Data.Attoparsec.Text ( Parser - , anyChar - , char - , choice - , many' - , manyTill - , parse - , parseOnly - , sepBy - , skipMany - , skipSpace - , skipWhile - , space - , string - , takeTill - , takeWhile1 - ) import qualified Data.ByteString.Lazy as BSL -import Data.Char ( isAlpha - , isAlphaNum - ) import Data.Either ( fromRight ) import qualified Data.HashMap.Lazy as HM import Data.List ( find , isSuffixOf ) -import Data.Maybe ( catMaybes - , fromJust - ) +import Data.Maybe ( fromJust ) import Data.Proxy ( Proxy(..) ) import qualified Data.Text as T import Data.Text ( Text ) import qualified Data.Text.Encoding as TE +import Data.Text.IO ( hGetContents ) import GHC.Generics ( Generic ) import Network.Wai.Handler.Warp ( run ) import Network.Wreq ( Response , get , responseBody ) +import Options.Applicative ( Parser + , auto + , execParser + , fullDesc + , header + , help + , helper + , info + , long + , metavar + , option + , progDesc + , short + , showDefault + , value + ) import Prelude hiding ( takeWhile ) +import Servall.Types +import Servall.WikiParser ( parseWikiTemplates ) import Servant ( (:<|>)(..) , (:>) , Application @@ -77,6 +74,11 @@ import Servant ( (:<|>)(..) , Server , serve ) +import System.Process ( CreateProcess(..) + , StdStream(..) + , createProcess + , proc + ) import Text.Pandoc ( WrapOption(..) , WriterOptions(..) , def @@ -87,11 +89,13 @@ import Text.Pandoc ( WrapOption(..) ) import Text.Regex.TDFA ( (=~) ) -type API = Wikipedia +type API = Wikipedia :<|> Ytdl type Wikipedia = SearchWikipedia :<|> GetWikiFormat :<|> GetOrgFormat :<|> GetPandocFormat :<|> GetWpSummary :<|> GetInfobox +type Ytdl = YtSearch + -- TODO: fix the problem with plaintext having the wront content-type: text/plain type SearchWikipedia = "wikipedia" :> "search" :> Capture "query" Text :> Get '[PlainText] Text @@ -111,14 +115,19 @@ type GetWpSummary type GetInfobox = "wikipedia" :> "infobox" :> Capture "name" Text :> Get '[JSON] (HM.HashMap Text Text) +type YtSearch + = "ytdl" :> "search" :> Capture "query" Text :> Get '[PlainText] Text + server :: Server API server = - searchWikipedia + ( searchWikipedia :<|> getWikiFormat :<|> getOrgFormat :<|> getPandocFormat :<|> getWpSummary :<|> getInfobox + ) + :<|> searchYt searchWikipedia :: Text -> Handler Text searchWikipedia query = do @@ -161,87 +170,16 @@ getInfobox name = do HM.empty wtFields (find (\(WikiTemplate name _) -> name == "Infobox") - (fromRight [] (parseOnly wikiP wiki)) + (fromRight [] (parseWikiTemplates wiki)) ) -wikiP :: Parser [WikiTemplate] -wikiP = sepBy templateP (commentP <|> skipSpace) - -data WikiTemplate = WikiTemplate - { wtName :: Text - -- , wtSubName :: Maybe Text - , wtFields :: HM.HashMap Text Text - } - deriving (Show, Generic) - -instance ToJSON WikiTemplate -instance FromJSON WikiTemplate - -templateP :: Parser WikiTemplate -templateP = do - string "{{" - name <- tempHeaderP - fields <- many' (choice [tempFieldP, commentP >> return Nothing]) - string "}}" - return $ WikiTemplate name (HM.fromList (catMaybes fields)) - -tempHeaderP :: Parser Text -tempHeaderP = do - -- Not sure whether name can contain numbers - skipSpace - -- Does not consider multiple words like Short description - name <- takeWhile1 isAlpha - manyTill anyChar (lookAhead ((char '|' >> return "") <|> string "}}")) - return name - -tempFieldP :: Parser (Maybe (Text, Text)) -tempFieldP = do - skipSpace >> char '|' >> skipSpace - choice [namedField, tempFieldValueP >> return Nothing] - where - namedField = do - key <- takeWhile1 (\c -> isAlphaNum c || c == '_') - skipSpace - char '=' - skipSpace - value <- tempFieldValueP - return $ if T.null value then Nothing else Just (key, value) - -tempFieldValueP :: Parser Text -tempFieldValueP = do - skipSpace - T.concat <$> many' - (choice [templateP >> return "", commentP >> return "", wikilinkP, simpleP]) - where - simpleP :: Parser Text - simpleP = do - t <- T.pack <$> manyTill - anyChar - (lookAhead - ( string "[[" - <|> string "{{" - <|> string "<!--" - <|> (char '|' >> return "") - <|> string "}}" - ) - ) - if T.null t then fail "simpleP" else return $ T.unwords $ T.words t - -commentP :: Parser () -commentP = do - skipSpace >> string "<!--" >> untilCommentEnd "" - where - untilCommentEnd :: Text -> Parser () - untilCommentEnd xs | T.isSuffixOf "--" xs = char '>' >> return () - untilCommentEnd _ = do - xs <- takeWhile1 (/= '>') - untilCommentEnd xs - -wikilinkP :: Parser Text -wikilinkP = do - beg <- string "[[" - content <- T.pack <$> manyTill anyChar (string "]]") - return $ beg <> content <> "]]" +searchYt :: Text -> Handler Text +searchYt query = do + (_, Just hout, _, _) <- liftIO $ createProcess + (proc "yt-dlp" ["-j", "ytsearch5:" <> T.unpack query]) + { std_out = CreatePipe + } + liftIO $ hGetContents hout app :: Application app = serve api server @@ -251,4 +189,26 @@ api = Proxy main :: IO () main = do - run 5555 app + config <- execParser opts + run (configPort config) app + where + opts = info + (optParser <**> helper) + (fullDesc <> progDesc "Servall everything server" <> header + "servall - an everything server." + ) + +optParser :: Parser ServerConfig +optParser = ServerConfig <$> option + auto + ( long "port" + <> short 'p' + <> metavar "PORT" + <> value 5555 + <> showDefault + <> help "Port to run the server at." + ) + +data ServerConfig = ServerConfig + { configPort :: Int + } diff --git a/servall.cabal b/servall.cabal index ec37900..05e1b2a 100644 --- a/servall.cabal +++ b/servall.cabal @@ -20,20 +20,26 @@ maintainer: hi@ypei.me -- A copyright notice. -- copyright: -- category: -extra-source-files: CHANGELOG.md - +-- extra-source-files: CHANGELOG.md + +library + default-language: Haskell2010 + exposed-modules: Servall.Types, Servall.WikiParser + hs-source-dirs: src + build-depends: base, attoparsec, text, unordered-containers + executable servall main-is: Main.hs - -- Modules included in this executable, other than Main. -- other-modules: -- LANGUAGE extensions used by modules in this package. -- other-extensions: build-depends: aeson, attoparsec, base, bytestring - , lens, lens-aeson - , pandoc, regex-tdfa + , lens, lens-aeson, optparse-applicative + , pandoc, process, regex-tdfa , servant, servant-server , text, unordered-containers, warp, wreq - hs-source-dirs: app + hs-source-dirs: app, src + other-modules: Servall.Types, Servall.WikiParser default-language: Haskell2010 diff --git a/src/Servall/Types.hs b/src/Servall/Types.hs new file mode 100644 index 0000000..e179e2c --- /dev/null +++ b/src/Servall/Types.hs @@ -0,0 +1,13 @@ +module Servall.Types + ( WikiTemplate(..) + ) where + +import qualified Data.HashMap.Lazy as HM +import Data.Text ( Text ) + +data WikiTemplate = WikiTemplate + { wtName :: Text + -- , wtSubName :: Maybe Text + , wtFields :: HM.HashMap Text Text + } + deriving Show diff --git a/src/Servall/WikiParser.hs b/src/Servall/WikiParser.hs new file mode 100644 index 0000000..ec15e1c --- /dev/null +++ b/src/Servall/WikiParser.hs @@ -0,0 +1,105 @@ +{-# LANGUAGE OverloadedStrings #-} +module Servall.WikiParser + ( parseWikiTemplates + ) where + +import Control.Applicative ( (<|>) ) +import Data.Attoparsec.Combinator ( lookAhead ) +import Data.Attoparsec.Text ( Parser + , anyChar + , char + , choice + , many' + , manyTill + , parse + , parseOnly + , sepBy + , skipMany + , skipSpace + , skipWhile + , space + , string + , takeTill + , takeWhile1 + ) +import Data.Char ( isAlpha + , isAlphaNum + ) +import qualified Data.HashMap.Lazy as HM +import Data.Maybe ( catMaybes ) +import qualified Data.Text as T +import Data.Text ( Text ) +import GHC.Generics ( Generic ) +import Servall.Types + +parseWikiTemplates :: Text -> Either String [WikiTemplate] +parseWikiTemplates = parseOnly wikiP + +wikiP :: Parser [WikiTemplate] +wikiP = sepBy templateP (commentP <|> skipSpace) + +templateP :: Parser WikiTemplate +templateP = do + string "{{" + name <- tempHeaderP + fields <- many' (choice [tempFieldP, commentP >> return Nothing]) + string "}}" + return $ WikiTemplate name (HM.fromList (catMaybes fields)) + +tempHeaderP :: Parser Text +tempHeaderP = do + -- Not sure whether name can contain numbers + skipSpace + -- Does not consider multiple words like Short description + name <- takeWhile1 isAlpha + manyTill anyChar (lookAhead ((char '|' >> return "") <|> string "}}")) + return name + +tempFieldP :: Parser (Maybe (Text, Text)) +tempFieldP = do + skipSpace >> char '|' >> skipSpace + choice [namedField, tempFieldValueP >> return Nothing] + where + namedField = do + key <- takeWhile1 (\c -> isAlphaNum c || c == '_') + skipSpace + char '=' + skipSpace + value <- tempFieldValueP + return $ if T.null value then Nothing else Just (key, value) + +tempFieldValueP :: Parser Text +tempFieldValueP = do + skipSpace + T.concat <$> many' + (choice [templateP >> return "", commentP >> return "", wikilinkP, simpleP]) + where + simpleP :: Parser Text + simpleP = do + t <- T.pack <$> manyTill + anyChar + (lookAhead + ( string "[[" + <|> string "{{" + <|> string "<!--" + <|> (char '|' >> return "") + <|> string "}}" + ) + ) + if T.null t then fail "simpleP" else return $ T.unwords $ T.words t + +commentP :: Parser () +commentP = do + skipSpace >> string "<!--" >> untilCommentEnd "" + where + untilCommentEnd :: Text -> Parser () + untilCommentEnd xs | T.isSuffixOf "--" xs = char '>' >> return () + untilCommentEnd _ = do + xs <- takeWhile1 (/= '>') + untilCommentEnd xs + +wikilinkP :: Parser Text +wikilinkP = do + beg <- string "[[" + content <- T.pack <$> manyTill anyChar (string "]]") + return $ beg <> content <> "]]" |