aboutsummaryrefslogtreecommitdiff
path: root/haddock-library/src/Documentation/Haddock/Parser/Monad.hs
blob: ff6101da7f08e16d160bb378b36ad32e087832db (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
{-# LANGUAGE GeneralizedNewtypeDeriving, TypeFamilies #-}
module Documentation.Haddock.Parser.Monad (
  module Documentation.Haddock.Parser.Monad
, Attoparsec.isDigit
, Attoparsec.isDigit_w8
, Attoparsec.isAlpha_iso8859_15
, Attoparsec.isAlpha_ascii
, Attoparsec.isSpace
, Attoparsec.isSpace_w8
, Attoparsec.inClass
, Attoparsec.notInClass
, Attoparsec.isEndOfLine
, Attoparsec.isHorizontalSpace
, Attoparsec.choice
, Attoparsec.count
, Attoparsec.option
, Attoparsec.many'
, Attoparsec.many1
, Attoparsec.many1'
, Attoparsec.manyTill
, Attoparsec.manyTill'
, Attoparsec.sepBy
, Attoparsec.sepBy'
, Attoparsec.sepBy1
, Attoparsec.sepBy1'
, Attoparsec.skipMany
, Attoparsec.skipMany1
, Attoparsec.eitherP
) where

import           Control.Applicative
import           Control.Monad
import           Data.String
import           Data.ByteString (ByteString, length)
import qualified Data.ByteString.Lazy as LB
import qualified Data.Attoparsec.ByteString.Char8 as Attoparsec
import qualified Data.Attoparsec.Combinator as Attoparsec
import           Control.Monad.Trans.State
import qualified Control.Monad.Trans.Class as Trans
import           Data.Word
import           Data.Bits
import           Data.Tuple

import           Documentation.Haddock.Types (Version)
import           Documentation.Haddock.Utf8  (encodeUtf8, decodeUtf8)

newtype ParserState = ParserState {
  parserStateSince :: Maybe Version
} deriving (Eq, Show)

initialParserState :: ParserState
initialParserState = ParserState Nothing

newtype Parser a = Parser (StateT ParserState Attoparsec.Parser a)
  deriving (Functor, Applicative, Alternative, Monad, MonadPlus)

instance (a ~ ByteString) => IsString (Parser a) where
  fromString = lift . fromString

parseOnly :: Parser a -> ByteString -> Either String (ParserState, a)
parseOnly (Parser p) = fmap swap . Attoparsec.parseOnly (runStateT p initialParserState)

lift :: Attoparsec.Parser a -> Parser a
lift = Parser . Trans.lift

setParserState :: ParserState -> Parser ()
setParserState = Parser . put

setSince :: Version -> Parser ()
setSince since = Parser $ modify (\st -> st {parserStateSince = Just since})

char :: Char -> Parser Char
char = lift . Attoparsec.char

char8 :: Char -> Parser Word8
char8 = lift . Attoparsec.char8

-- | Peek a unicode character and return the number of bytes that it took up
peekUnicode :: Parser (Char, Int)
peekUnicode = lift $ Attoparsec.lookAhead $ do
  
  -- attoparsec's take fails on shorter inputs rather than truncate
  bs <- Attoparsec.choice (map Attoparsec.take [4,3,2,1])
  
  let c = head . decodeUtf8 $ bs
      n = Data.ByteString.length . encodeUtf8 $ [c]
  pure (c, fromIntegral n)

-- | Like 'satisfy', but consuming a unicode character
satisfyUnicode :: (Char -> Bool) -> Parser Char
satisfyUnicode predicate = do
  (c,n) <- peekUnicode
  if predicate c
    then Documentation.Haddock.Parser.Monad.take n *> pure c
    else fail "satsifyUnicode"

anyChar :: Parser Char
anyChar = lift Attoparsec.anyChar

notChar :: Char -> Parser Char
notChar = lift . Attoparsec.notChar

satisfy :: (Char -> Bool) -> Parser Char
satisfy = lift . Attoparsec.satisfy

peekChar :: Parser (Maybe Char)
peekChar = lift Attoparsec.peekChar

peekChar' :: Parser Char
peekChar' = lift Attoparsec.peekChar'

digit :: Parser Char
digit = lift Attoparsec.digit

letter_iso8859_15 :: Parser Char
letter_iso8859_15 = lift Attoparsec.letter_iso8859_15

letter_ascii :: Parser Char
letter_ascii = lift Attoparsec.letter_ascii

space :: Parser Char
space = lift Attoparsec.space

string :: ByteString -> Parser ByteString
string = lift . Attoparsec.string

stringCI :: ByteString -> Parser ByteString
stringCI = lift . Attoparsec.stringCI

skipSpace :: Parser ()
skipSpace = lift Attoparsec.skipSpace

skipWhile :: (Char -> Bool) -> Parser ()
skipWhile = lift . Attoparsec.skipWhile

take :: Int -> Parser ByteString
take = lift . Attoparsec.take

scan :: s -> (s -> Char -> Maybe s) -> Parser ByteString
scan s = lift . Attoparsec.scan s

takeWhile :: (Char -> Bool) -> Parser ByteString
takeWhile = lift . Attoparsec.takeWhile

takeWhile1 :: (Char -> Bool) -> Parser ByteString
takeWhile1 = lift . Attoparsec.takeWhile1

takeTill :: (Char -> Bool) -> Parser ByteString
takeTill = lift . Attoparsec.takeTill

takeByteString :: Parser ByteString
takeByteString = lift Attoparsec.takeByteString

takeLazyByteString :: Parser LB.ByteString
takeLazyByteString = lift Attoparsec.takeLazyByteString

endOfLine :: Parser ()
endOfLine = lift Attoparsec.endOfLine

decimal :: Integral a => Parser a
decimal = lift Attoparsec.decimal

hexadecimal :: (Integral a, Bits a) => Parser a
hexadecimal = lift Attoparsec.hexadecimal

endOfInput :: Parser ()
endOfInput = lift Attoparsec.endOfInput

atEnd :: Parser Bool
atEnd = lift Attoparsec.atEnd