aboutsummaryrefslogtreecommitdiff
path: root/haddock-library/src/Documentation/Haddock/Parser/Monad.hs
blob: 582a0776f6071187121a18faf7b0dc039d4c4874 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE BangPatterns #-}
-- |
-- Module      :  Documentation.Haddock.Parser.Monad
-- Copyright   :  (c) Alec Theriault 2018-2019,
-- License     :  BSD-like
--
-- Maintainer  :  haddock@projects.haskell.org
-- Stability   :  experimental
-- Portability :  portable
--
-- Defines the Parsec monad over which all parsing is done and also provides
-- more efficient versions of the usual parsec combinator functions (but
-- specialized to 'Text').

module Documentation.Haddock.Parser.Monad where

import qualified Text.Parsec.Char as Parsec
import qualified Text.Parsec as Parsec
import           Text.Parsec.Pos             ( updatePosChar )
import           Text.Parsec                 ( State(..)
                                             , getParserState, setParserState )

import qualified Data.Text as T
import           Data.Text                   ( Text )

import           Control.Monad               ( mfilter )
import           Data.String                 ( IsString(..) )
import           Data.Bits                   ( Bits(..) )
import           Data.Char                   ( ord )
import           Data.List                   ( foldl' )
import           Control.Applicative as App

import           Documentation.Haddock.Types ( Version )

import           Prelude hiding (takeWhile)
import           CompatPrelude

-- | The only bit of information we really care about trudging along with us
-- through parsing is the version attached to a @\@since@ annotation - if
-- the doc even contained one.
newtype ParserState = ParserState {
  parserStateSince :: Maybe Version
} deriving (Eq, Show)

initialParserState :: ParserState
initialParserState = ParserState Nothing

setSince :: Version -> Parser ()
setSince since = Parsec.modifyState (\st -> st{ parserStateSince = Just since })

type Parser = Parsec.Parsec Text ParserState

instance (a ~ Text) => IsString (Parser a) where
  fromString = fmap T.pack . Parsec.string

parseOnly :: Parser a -> Text -> Either String (ParserState, a)
parseOnly p t = case Parsec.runParser p' initialParserState "<haddock>" t of
                  Left e -> Left (show e)
                  Right (x,s) -> Right (s,x)
  where p' = (,) <$> p <*> Parsec.getState

-- | Always succeeds, but returns 'Nothing' if at the end of input. Does not
-- consume input.
--
-- Equivalent to @Parsec.optionMaybe . Parsec.lookAhead $ Parsec.anyChar@, but
-- more efficient.
peekChar :: Parser (Maybe Char)
peekChar = headOpt . stateInput <$> getParserState
  where headOpt t | T.null t = Nothing
                  | otherwise = Just (T.head t)
{-# INLINE peekChar #-}

-- | Fails if at the end of input. Does not consume input.
--
-- Equivalent to @Parsec.lookAhead Parsec.anyChar@, but more efficient.
peekChar' :: Parser Char
peekChar' = headFail . stateInput =<< getParserState
  where headFail t | T.null t = Parsec.parserFail "peekChar': reached EOF"
                   | otherwise = App.pure (T.head t)
{-# INLINE peekChar' #-}

-- | Parses the given string. Returns the parsed string.
--
-- Equivalent to @Parsec.string (T.unpack t) $> t@, but more efficient.
string :: Text -> Parser Text
string t = do
  s@State{ stateInput = inp, statePos = pos } <- getParserState
  case T.stripPrefix t inp of
    Nothing -> Parsec.parserFail "string: Failed to match the input string"
    Just inp' ->
      let pos' = T.foldl updatePosChar pos t
          s' = s{ stateInput = inp', statePos = pos' }
      in setParserState s' $> t

-- | Keep matching characters as long as the predicate function holds (and
-- return them).
--
-- Equivalent to @fmap T.pack . Parsec.many@, but more efficient.
takeWhile :: (Char -> Bool) -> Parser Text
takeWhile f = do
  s@State{ stateInput = inp, statePos = pos } <- getParserState
  let (t, inp') = T.span f inp
      pos' = T.foldl updatePosChar pos t
      s' = s{ stateInput = inp', statePos = pos' }
  setParserState s' $> t

-- | Like 'takeWhile', but fails if no characters matched.
--
-- Equivalent to @fmap T.pack . Parsec.many1@, but more efficient.
takeWhile1 :: (Char -> Bool) -> Parser Text
takeWhile1 = mfilter (not . T.null) . takeWhile

-- | Scan the input text, accumulating characters as long as the scanning
-- function returns true.
scan :: (s -> Char -> Maybe s) -- ^ scan function
     -> s                      -- ^ initial state
     -> Parser Text 
scan f st = do
  s@State{ stateInput = inp, statePos = pos } <- getParserState
  go inp st pos 0 $ \inp' pos' n ->
    let s' = s{ Parsec.stateInput = inp', Parsec.statePos = pos' }
    in setParserState s' $> T.take n inp
  where
    go inp s !pos !n cont
      = case T.uncons inp of
          Nothing -> cont inp pos n        -- ran out of input
          Just (c, inp') ->
             case f s c of
               Nothing -> cont inp pos n   -- scan function failed
               Just s' -> go inp' s' (updatePosChar pos c) (n+1) cont


-- | Parse a decimal number.
decimal :: Integral a => Parser a
decimal = foldl' step 0 `fmap` Parsec.many1 Parsec.digit
  where step a c = a * 10 + fromIntegral (ord c - 48)

-- | Parse a hexadecimal number.
hexadecimal :: (Integral a, Bits a) => Parser a
hexadecimal = foldl' step 0 `fmap` Parsec.many1 Parsec.hexDigit 
  where
  step a c | w >= 48 && w <= 57  = (a `shiftL` 4) .|. fromIntegral (w - 48)
           | w >= 97             = (a `shiftL` 4) .|. fromIntegral (w - 87)
           | otherwise           = (a `shiftL` 4) .|. fromIntegral (w - 55)
    where w = ord c