summaryrefslogtreecommitdiff
path: root/app/Main.hs
blob: f4c7afca9fac038c94f11a4ec88ac1f4c4e083a1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeOperators #-}

module Main where

import           Control.Applicative            ( (<|>) )
import           Control.Lens                   ( (^.)
                                                , (^?)
                                                )
import           Control.Monad.IO.Class         ( liftIO )
import           Data.Aeson                     ( FromJSON
                                                , ToJSON
                                                , Value
                                                , decode
                                                , encode
                                                )
import qualified Data.Aeson.KeyMap             as KM
                                                ( KeyMap
                                                , lookup
                                                )
import           Data.Aeson.Lens                ( AsNumber(..)
                                                , AsValue(..)
                                                , key
                                                )
import           Data.Attoparsec.Combinator     ( lookAhead )
import           Data.Attoparsec.Text           ( Parser
                                                , anyChar
                                                , char
                                                , choice
                                                , many'
                                                , manyTill
                                                , parse
                                                , parseOnly
                                                , sepBy
                                                , skipMany
                                                , skipSpace
                                                , skipWhile
                                                , space
                                                , string
                                                , takeTill
                                                , takeWhile1
                                                )
import qualified Data.ByteString.Lazy          as BSL
import           Data.Char                      ( isAlpha
                                                , isAlphaNum
                                                )
import           Data.Either                    ( fromRight )
import qualified Data.HashMap.Lazy             as HM
import           Data.List                      ( find
                                                , isSuffixOf
                                                )
import           Data.Maybe                     ( catMaybes
                                                , fromJust
                                                )
import           Data.Proxy                     ( Proxy(..) )
import qualified Data.Text                     as T
import           Data.Text                      ( Text )
import qualified Data.Text.Encoding            as TE
import           GHC.Generics                   ( Generic )
import           Network.Wai.Handler.Warp       ( run )
import           Network.Wreq                   ( Response
                                                , get
                                                , responseBody
                                                )
import           Prelude                 hiding ( takeWhile )
import           Servant                        ( (:<|>)(..)
                                                , (:>)
                                                , Application
                                                , Capture
                                                , FromHttpApiData(..)
                                                , Get
                                                , Handler
                                                , JSON
                                                , PlainText
                                                , Server
                                                , serve
                                                )
import           Text.Pandoc                    ( WrapOption(..)
                                                , WriterOptions(..)
                                                , def
                                                , readMediaWiki
                                                , runIOorExplode
                                                , writeNative
                                                , writeOrg
                                                )
import           Text.Regex.TDFA                ( (=~) )

type API = Wikipedia

type Wikipedia
  = SearchWikipedia :<|> GetWikiFormat :<|> GetOrgFormat :<|> GetPandocFormat :<|> GetWpSummary :<|> GetInfobox

-- TODO: fix the problem with plaintext having the wront content-type: text/plain
type SearchWikipedia
  = "wikipedia" :> "search" :> Capture "query" Text :> Get '[PlainText] Text

type GetWikiFormat
  = "wikipedia" :> "wiki" :> Capture "name" Text :> Get '[JSON] Text

type GetOrgFormat
  = "wikipedia" :> "org" :> Capture "name" Text :> Get '[JSON] Text

type GetPandocFormat
  = "wikipedia" :> "pandoc" :> Capture "name" Text :> Get '[JSON] Text

type GetWpSummary
  = "wikipedia" :> "summary" :> Capture "name" Text :> Get '[PlainText] Text

type GetInfobox
  = "wikipedia" :> "infobox" :> Capture "name" Text :> Get '[JSON] (HM.HashMap Text Text)

server :: Server API
server =
  searchWikipedia
    :<|> getWikiFormat
    :<|> getOrgFormat
    :<|> getPandocFormat
    :<|> getWpSummary
    :<|> getInfobox

searchWikipedia :: Text -> Handler Text
searchWikipedia query = do
  r <- liftIO $ get
    ("https://en.wikipedia.org/w/api.php?action=query&format=json&list=search&srsearch="
    <> (T.unpack query)
    )
  return $ TE.decodeUtf8 $ BSL.toStrict $ r ^. responseBody

getWikiFormat :: Text -> Handler Text
getWikiFormat name = do
  r <- liftIO $ get
    ("https://en.wikipedia.org/wiki/" <> (T.unpack name) <> "?action=raw")
  return $ TE.decodeUtf8 $ BSL.toStrict $ r ^. responseBody

getOrgFormat :: Text -> Handler Text
getOrgFormat name = do
  wiki <- getWikiFormat name
  liftIO $ runIOorExplode $ readMediaWiki def wiki >>= writeOrg def
    { writerWrapText = WrapNone
    }

getPandocFormat :: Text -> Handler Text
getPandocFormat name = do
  wiki <- getWikiFormat name
  liftIO $ runIOorExplode $ readMediaWiki def wiki >>= writeNative def
    { writerWrapText = WrapNone
    }

getWpSummary :: Text -> Handler Text
getWpSummary name = do
  r <- liftIO $ get
    ("https://en.wikipedia.org/api/rest_v1/page/summary/" <> (T.unpack name))
  return $ TE.decodeUtf8 $ BSL.toStrict $ r ^. responseBody

getInfobox :: Text -> Handler (HM.HashMap Text Text)
getInfobox name = do
  wiki <- getWikiFormat name
  return $ maybe
    HM.empty
    wtFields
    (find (\(WikiTemplate name _) -> name == "Infobox")
          (fromRight [] (parseOnly wikiP wiki))
    )

wikiP :: Parser [WikiTemplate]
wikiP = sepBy templateP (commentP <|> skipSpace)

data WikiTemplate = WikiTemplate
  { wtName   :: Text
  -- , wtSubName :: Maybe Text
  , wtFields :: HM.HashMap Text Text
  }
  deriving (Show, Generic)

instance ToJSON WikiTemplate
instance FromJSON WikiTemplate

templateP :: Parser WikiTemplate
templateP = do
  string "{{"
  name   <- tempHeaderP
  fields <- many' (choice [tempFieldP, commentP >> return Nothing])
  string "}}"
  return $ WikiTemplate name (HM.fromList (catMaybes fields))

tempHeaderP :: Parser Text
tempHeaderP = do
  -- Not sure whether name can contain numbers
  skipSpace
  -- Does not consider multiple words like Short description
  name <- takeWhile1 isAlpha
  manyTill anyChar (lookAhead ((char '|' >> return "") <|> string "}}"))
  return name

tempFieldP :: Parser (Maybe (Text, Text))
tempFieldP = do
  skipSpace >> char '|' >> skipSpace
  choice [namedField, tempFieldValueP >> return Nothing]
 where
  namedField = do
    key <- takeWhile1 (\c -> isAlphaNum c || c == '_')
    skipSpace
    char '='
    skipSpace
    value <- tempFieldValueP
    return $ if T.null value then Nothing else Just (key, value)

tempFieldValueP :: Parser Text
tempFieldValueP = do
  skipSpace
  T.concat <$> many'
    (choice [templateP >> return "", commentP >> return "", wikilinkP, simpleP])
 where
  simpleP :: Parser Text
  simpleP = do
    t <- T.pack <$> manyTill
      anyChar
      (lookAhead
        (   string "[["
        <|> string "{{"
        <|> string "<!--"
        <|> (char '|' >> return "")
        <|> string "}}"
        )
      )
    if T.null t then fail "simpleP" else return $ T.unwords $ T.words t

commentP :: Parser ()
commentP = do
  skipSpace >> string "<!--" >> untilCommentEnd ""
 where
  untilCommentEnd :: Text -> Parser ()
  untilCommentEnd xs | T.isSuffixOf "--" xs = char '>' >> return ()
  untilCommentEnd _                         = do
    xs <- takeWhile1 (/= '>')
    untilCommentEnd xs

wikilinkP :: Parser Text
wikilinkP = do
  beg     <- string "[["
  content <- T.pack <$> manyTill anyChar (string "]]")
  return $ beg <> content <> "]]"

app :: Application
app = serve api server

api :: Proxy API
api = Proxy

main :: IO ()
main = do
  run 5555 app