1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TypeSynonymInstances #-}
-- |
-- Module : Documentation.Haddock.Parser.Monad
-- Copyright : (c) Alec Theriault 2018-2019,
-- License : BSD-like
--
-- Maintainer : haddock@projects.haskell.org
-- Stability : experimental
-- Portability : portable
--
-- Defines the Parsec monad over which all parsing is done and also provides
-- more efficient versions of the usual parsec combinator functions (but
-- specialized to 'Text').
module Documentation.Haddock.Parser.Monad where
import qualified Text.Parsec.Char as Parsec
import qualified Text.Parsec as Parsec
import Text.Parsec.Pos ( updatePosChar )
import Text.Parsec ( State(..)
, getParserState, setParserState )
import qualified Data.Text as T
import Data.Text ( Text )
import Control.Monad ( mfilter )
import Data.Functor ( ($>) )
import Data.String ( IsString(..) )
import Data.Bits ( Bits(..) )
import Data.Char ( ord )
import Data.List ( foldl' )
import Control.Applicative as App
import Documentation.Haddock.Types ( Version )
import Prelude hiding (takeWhile)
-- | The only bit of information we really care about truding along with us
-- through parsing is the version attached to a @\@since@ annotation - if
-- the doc even contained one.
newtype ParserState = ParserState {
parserStateSince :: Maybe Version
} deriving (Eq, Show)
initialParserState :: ParserState
initialParserState = ParserState Nothing
setSince :: Version -> Parser ()
setSince since = Parsec.modifyState (\st -> st{ parserStateSince = Just since })
type Parser = Parsec.Parsec Text ParserState
instance (a ~ Text) => IsString (Parser a) where
fromString = fmap T.pack . Parsec.string
parseOnly :: Parser a -> Text -> Either String (ParserState, a)
parseOnly p t = case Parsec.runParser p' initialParserState "<haddock>" t of
Left e -> Left (show e)
Right (x,s) -> Right (s,x)
where p' = (,) <$> p <*> Parsec.getState
-- | Always succeeds, but returns 'Nothing' if at the end of input. Does not
-- consume input.
--
-- Equivalent to @Parsec.optionMaybe . Parsec.lookAhead $ Parsec.anyChar@, but
-- more efficient.
peekChar :: Parser (Maybe Char)
peekChar = headOpt . stateInput <$> getParserState
where headOpt t | T.null t = Nothing
| otherwise = Just (T.head t)
{-# INLINE peekChar #-}
-- | Fails if at the end of input. Does not consume input.
--
-- Equivalent to @Parsec.lookAhead Parsec.anyChar@, but more efficient.
peekChar' :: Parser Char
peekChar' = headFail . stateInput =<< getParserState
where headFail t | T.null t = Parsec.parserFail "peekChar': reached EOF"
| otherwise = App.pure (T.head t)
{-# INLINE peekChar' #-}
-- | Parses the given string. Returns the parsed string.
--
-- Equivalent to @Parsec.string (T.unpack t) $> t@, but more efficient.
string :: Text -> Parser Text
string t = do
s@State{ stateInput = inp, statePos = pos } <- getParserState
case T.stripPrefix t inp of
Nothing -> Parsec.parserFail "string: Failed to match the input string"
Just inp' ->
let pos' = T.foldl updatePosChar pos t
s' = s{ stateInput = inp', statePos = pos' }
in setParserState s' $> t
-- | Keep matching characters as long as the predicate function holds (and
-- return them).
--
-- Equivalent to @fmap T.pack . Parsec.many@, but more efficient.
takeWhile :: (Char -> Bool) -> Parser Text
takeWhile f = do
s@State{ stateInput = inp, statePos = pos } <- getParserState
let (t, inp') = T.span f inp
pos' = T.foldl updatePosChar pos t
s' = s{ stateInput = inp', statePos = pos' }
setParserState s' $> t
-- | Like 'takeWhile', but fails if no characters matched.
--
-- Equivalent to @fmap T.pack . Parsec.many1@, but more efficient.
takeWhile1 :: (Char -> Bool) -> Parser Text
takeWhile1 = mfilter (not . T.null) . takeWhile
-- | Scan the input text, accumulating characters as long as the scanning
-- function returns true.
scan :: (s -> Char -> Maybe s) -- ^ scan function
-> s -- ^ initial state
-> Parser Text
scan f st = do
s@State{ stateInput = inp, statePos = pos } <- getParserState
go inp st pos 0 $ \inp' pos' n ->
let s' = s{ Parsec.stateInput = inp', Parsec.statePos = pos' }
in setParserState s' $> T.take n inp
where
go inp s !pos !n cont
= case T.uncons inp of
Nothing -> cont inp pos n -- ran out of input
Just (c, inp') ->
case f s c of
Nothing -> cont inp pos n -- scan function failed
Just s' -> go inp' s' (updatePosChar pos c) (n+1) cont
-- | Parse a decimal number.
decimal :: Integral a => Parser a
decimal = foldl' step 0 `fmap` Parsec.many1 Parsec.digit
where step a c = a * 10 + fromIntegral (ord c - 48)
-- | Parse a hexadecimal number.
hexadecimal :: (Integral a, Bits a) => Parser a
hexadecimal = foldl' step 0 `fmap` Parsec.many1 Parsec.hexDigit
where
step a c | w >= 48 && w <= 57 = (a `shiftL` 4) .|. fromIntegral (w - 48)
| w >= 97 = (a `shiftL` 4) .|. fromIntegral (w - 87)
| otherwise = (a `shiftL` 4) .|. fromIntegral (w - 55)
where w = ord c
|