summaryrefslogtreecommitdiff
path: root/src/Servall/WikiParser.hs
blob: e284a3bd3eefe6f0f211236903a59f5d71e05360 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
{-# LANGUAGE OverloadedStrings #-}
module Servall.WikiParser
  ( parseWikiTemplates
  , wikiFilter
  ) where

import           Control.Applicative            ( (<|>) )
import           Data.Attoparsec.Combinator     ( lookAhead )
import           Data.Attoparsec.Text           ( Parser
                                                , anyChar
                                                , char
                                                , choice
                                                , many'
                                                , manyTill
                                                , parse
                                                , parseOnly
                                                , sepBy
                                                , skipMany
                                                , skipSpace
                                                , skipWhile
                                                , space
                                                , string
                                                , takeTill
                                                , takeWhile1
                                                )
import           Data.Char                      ( isAlpha
                                                , isAlphaNum
                                                )
import qualified Data.HashMap.Lazy             as HM
import           Data.Maybe                     ( catMaybes )
import qualified Data.Text                     as T
import           Data.Text                      ( Text )
import           GHC.Generics                   ( Generic )
import           Servall.Types
import           Text.Pandoc.Definition         ( Block(..)
                                                , Inline(..)
                                                , Pandoc(..)
                                                )
import           Text.Pandoc.Generic            ( topDown )

parseWikiTemplates :: Text -> Either String [WikiTemplate]
parseWikiTemplates = parseOnly wikiP

wikiP :: Parser [WikiTemplate]
wikiP = sepBy templateP (commentP <|> skipSpace)

templateP :: Parser WikiTemplate
templateP = do
  string "{{"
  name   <- tempHeaderP
  fields <- many' (choice [tempFieldP, commentP >> return Nothing])
  string "}}"
  return $ WikiTemplate name (HM.fromList (catMaybes fields))

tempHeaderP :: Parser Text
tempHeaderP = do
  -- Not sure whether name can contain numbers
  skipSpace
  -- Does not consider multiple words like Short description
  name <- takeWhile1 isAlpha
  manyTill anyChar (lookAhead ((char '|' >> return "") <|> string "}}"))
  return name

tempFieldP :: Parser (Maybe (Text, Text))
tempFieldP = do
  skipSpace >> char '|' >> skipSpace
  choice [namedField, tempFieldValueP >> return Nothing]
 where
  namedField = do
    key <- takeWhile1 (\c -> isAlphaNum c || c == '_')
    skipSpace
    char '='
    skipSpace
    value <- tempFieldValueP
    return $ if T.null value then Nothing else Just (key, value)

tempFieldValueP :: Parser Text
tempFieldValueP = do
  skipSpace
  T.concat <$> many'
    (choice [templateP >> return "", commentP >> return "", wikilinkP, simpleP])
 where
  simpleP :: Parser Text
  simpleP = do
    t <- T.pack <$> manyTill
      anyChar
      (lookAhead
        (   string "[["
        <|> string "{{"
        <|> string "<!--"
        <|> (char '|' >> return "")
        <|> string "}}"
        )
      )
    if T.null t then fail "simpleP" else return $ T.unwords $ T.words t

commentP :: Parser ()
commentP = do
  skipSpace >> string "<!--" >> untilCommentEnd ""
 where
  untilCommentEnd :: Text -> Parser ()
  untilCommentEnd xs | T.isSuffixOf "--" xs = char '>' >> return ()
  untilCommentEnd _                         = do
    xs <- takeWhile1 (/= '>')
    untilCommentEnd xs

wikilinkP :: Parser Text
wikilinkP = do
  beg     <- string "[["
  content <- T.pack <$> manyTill anyChar (string "]]")
  return $ beg <> content <> "]]"

wikiFilter :: Text -> Pandoc -> Pandoc
wikiFilter title = insertHeader title . topDown fixUrl

insertHeader :: Text -> Pandoc -> Pandoc
insertHeader title (Pandoc m bs) =
  Pandoc m (Header 1 ("", [], []) [Str title] : bs)

fixUrl :: Inline -> Inline
fixUrl (Link attr label (url, "wikilink")) =
  Link attr label ("wiki:" <> url, "")
fixUrl x = x