1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
|
{-# LANGUAGE GeneralizedNewtypeDeriving, TypeFamilies, BangPatterns #-}
module Documentation.Haddock.Parser.Monad (
module Documentation.Haddock.Parser.Monad
, Attoparsec.isDigit
, Attoparsec.isDigit_w8
, Attoparsec.isAlpha_iso8859_15
, Attoparsec.isAlpha_ascii
, Attoparsec.isSpace
, Attoparsec.isSpace_w8
, Attoparsec.inClass
, Attoparsec.notInClass
, Attoparsec.isEndOfLine
, Attoparsec.isHorizontalSpace
, Attoparsec.choice
, Attoparsec.count
, Attoparsec.option
, Attoparsec.many'
, Attoparsec.many1
, Attoparsec.many1'
, Attoparsec.manyTill
, Attoparsec.manyTill'
, Attoparsec.sepBy
, Attoparsec.sepBy'
, Attoparsec.sepBy1
, Attoparsec.sepBy1'
, Attoparsec.skipMany
, Attoparsec.skipMany1
, Attoparsec.eitherP
) where
import Control.Applicative
import Control.Monad
import Data.String
import Data.ByteString (ByteString, length)
import qualified Data.ByteString.Lazy as LB
import qualified Data.Attoparsec.ByteString.Char8 as Attoparsec
import qualified Data.Attoparsec.Combinator as Attoparsec
import Control.Monad.Trans.State
import qualified Control.Monad.Trans.Class as Trans
import Data.Word
import Data.Bits
import Data.Tuple
import Documentation.Haddock.Types (Version)
import Documentation.Haddock.Utf8 (encodeUtf8, decodeUtf8)
newtype ParserState = ParserState {
parserStateSince :: Maybe Version
} deriving (Eq, Show)
initialParserState :: ParserState
initialParserState = ParserState Nothing
newtype Parser a = Parser (StateT ParserState Attoparsec.Parser a)
deriving (Functor, Applicative, Alternative, Monad, MonadPlus)
instance (a ~ ByteString) => IsString (Parser a) where
fromString = lift . fromString
parseOnly :: Parser a -> ByteString -> Either String (ParserState, a)
parseOnly (Parser p) = fmap swap . Attoparsec.parseOnly (runStateT p initialParserState)
lift :: Attoparsec.Parser a -> Parser a
lift = Parser . Trans.lift
setParserState :: ParserState -> Parser ()
setParserState = Parser . put
setSince :: Version -> Parser ()
setSince since = Parser $ modify (\st -> st {parserStateSince = Just since})
char :: Char -> Parser Char
char = lift . Attoparsec.char
char8 :: Char -> Parser Word8
char8 = lift . Attoparsec.char8
-- | Peek a unicode character and return the number of bytes that it took up
peekUnicode :: Parser (Char, Int)
peekUnicode = lift $ Attoparsec.lookAhead $ do
-- attoparsec's take fails on shorter inputs rather than truncate
bs <- Attoparsec.choice (map Attoparsec.take [4,3,2,1])
let !c = head . decodeUtf8 $ bs
!n = Data.ByteString.length . encodeUtf8 $ [c]
pure (c, fromIntegral n)
-- | Like 'satisfy', but consuming a unicode character
satisfyUnicode :: (Char -> Bool) -> Parser Char
satisfyUnicode predicate = do
(c,n) <- peekUnicode
if predicate c
then Documentation.Haddock.Parser.Monad.take n *> pure c
else fail "satsifyUnicode"
anyChar :: Parser Char
anyChar = lift Attoparsec.anyChar
notChar :: Char -> Parser Char
notChar = lift . Attoparsec.notChar
satisfy :: (Char -> Bool) -> Parser Char
satisfy = lift . Attoparsec.satisfy
peekChar :: Parser (Maybe Char)
peekChar = lift Attoparsec.peekChar
peekChar' :: Parser Char
peekChar' = lift Attoparsec.peekChar'
digit :: Parser Char
digit = lift Attoparsec.digit
letter_iso8859_15 :: Parser Char
letter_iso8859_15 = lift Attoparsec.letter_iso8859_15
letter_ascii :: Parser Char
letter_ascii = lift Attoparsec.letter_ascii
space :: Parser Char
space = lift Attoparsec.space
string :: ByteString -> Parser ByteString
string = lift . Attoparsec.string
stringCI :: ByteString -> Parser ByteString
stringCI = lift . Attoparsec.stringCI
skipSpace :: Parser ()
skipSpace = lift Attoparsec.skipSpace
skipWhile :: (Char -> Bool) -> Parser ()
skipWhile = lift . Attoparsec.skipWhile
take :: Int -> Parser ByteString
take = lift . Attoparsec.take
scan :: s -> (s -> Char -> Maybe s) -> Parser ByteString
scan s = lift . Attoparsec.scan s
takeWhile :: (Char -> Bool) -> Parser ByteString
takeWhile = lift . Attoparsec.takeWhile
takeWhile1 :: (Char -> Bool) -> Parser ByteString
takeWhile1 = lift . Attoparsec.takeWhile1
takeTill :: (Char -> Bool) -> Parser ByteString
takeTill = lift . Attoparsec.takeTill
takeByteString :: Parser ByteString
takeByteString = lift Attoparsec.takeByteString
takeLazyByteString :: Parser LB.ByteString
takeLazyByteString = lift Attoparsec.takeLazyByteString
endOfLine :: Parser ()
endOfLine = lift Attoparsec.endOfLine
decimal :: Integral a => Parser a
decimal = lift Attoparsec.decimal
hexadecimal :: (Integral a, Bits a) => Parser a
hexadecimal = lift Attoparsec.hexadecimal
endOfInput :: Parser ()
endOfInput = lift Attoparsec.endOfInput
atEnd :: Parser Bool
atEnd = lift Attoparsec.atEnd
|