{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TypeSynonymInstances #-}
-- |
-- Module : Documentation.Haddock.Parser.Monad
-- Copyright : (c) Alec Theriault 2018-2019,
-- License : BSD-like
--
-- Maintainer : haddock@projects.haskell.org
-- Stability : experimental
-- Portability : portable
--
-- Defines the Parsec monad over which all parsing is done and also provides
-- more efficient versions of the usual parsec combinator functions (but
-- specialized to 'Text').
module Documentation.Haddock.Parser.Monad where
import qualified Text.Parsec.Char as Parsec
import qualified Text.Parsec as Parsec
import Text.Parsec.Pos ( updatePosChar )
import Text.Parsec ( State(..)
, getParserState, setParserState )
import qualified Data.Text as T
import Data.Text ( Text )
import Control.Monad ( mfilter )
import Data.String ( IsString(..) )
import Data.Bits ( Bits(..) )
import Data.Char ( ord )
import Data.List ( foldl' )
import Control.Applicative as App
import Documentation.Haddock.Types ( Version )
import Prelude hiding (takeWhile)
import CompatPrelude
-- | The only bit of information we really care about truding along with us
-- through parsing is the version attached to a @\@since@ annotation - if
-- the doc even contained one.
newtype ParserState = ParserState {
parserStateSince :: Maybe Version
} deriving (Eq, Show)
initialParserState :: ParserState
initialParserState = ParserState Nothing
setSince :: Version -> Parser ()
setSince since = Parsec.modifyState (\st -> st{ parserStateSince = Just since })
type Parser = Parsec.Parsec Text ParserState
instance (a ~ Text) => IsString (Parser a) where
fromString = fmap T.pack . Parsec.string
parseOnly :: Parser a -> Text -> Either String (ParserState, a)
parseOnly p t = case Parsec.runParser p' initialParserState "<haddock>" t of
Left e -> Left (show e)
Right (x,s) -> Right (s,x)
where p' = (,) <$> p <*> Parsec.getState
-- | Always succeeds, but returns 'Nothing' if at the end of input. Does not
-- consume input.
--
-- Equivalent to @Parsec.optionMaybe . Parsec.lookAhead $ Parsec.anyChar@, but
-- more efficient.
peekChar :: Parser (Maybe Char)
peekChar = headOpt . stateInput <$> getParserState
where headOpt t | T.null t = Nothing
| otherwise = Just (T.head t)
{-# INLINE peekChar #-}
-- | Fails if at the end of input. Does not consume input.
--
-- Equivalent to @Parsec.lookAhead Parsec.anyChar@, but more efficient.
peekChar' :: Parser Char
peekChar' = headFail . stateInput =<< getParserState
where headFail t | T.null t = Parsec.parserFail "peekChar': reached EOF"
| otherwise = App.pure (T.head t)
{-# INLINE peekChar' #-}
-- | Parses the given string. Returns the parsed string.
--
-- Equivalent to @Parsec.string (T.unpack t) $> t@, but more efficient.
string :: Text -> Parser Text
string t = do
s@State{ stateInput = inp, statePos = pos } <- getParserState
case T.stripPrefix t inp of
Nothing -> Parsec.parserFail "string: Failed to match the input string"
Just inp' ->
let pos' = T.foldl updatePosChar pos t
s' = s{ stateInput = inp', statePos = pos' }
in setParserState s' $> t
-- | Keep matching characters as long as the predicate function holds (and
-- return them).
--
-- Equivalent to @fmap T.pack . Parsec.many@, but more efficient.
takeWhile :: (Char -> Bool) -> Parser Text
takeWhile f = do
s@State{ stateInput = inp, statePos = pos } <- getParserState
let (t, inp') = T.span f inp
pos' = T.foldl updatePosChar pos t
s' = s{ stateInput = inp', statePos = pos' }
setParserState s' $> t
-- | Like 'takeWhile', but fails if no characters matched.
--
-- Equivalent to @fmap T.pack . Parsec.many1@, but more efficient.
takeWhile1 :: (Char -> Bool) -> Parser Text
takeWhile1 = mfilter (not . T.null) . takeWhile
-- | Scan the input text, accumulating characters as long as the scanning
-- function returns true.
scan :: (s -> Char -> Maybe s) -- ^ scan function
-> s -- ^ initial state
-> Parser Text
scan f st = do
s@State{ stateInput = inp, statePos = pos } <- getParserState
go inp st pos 0 $ \inp' pos' n ->
let s' = s{ Parsec.stateInput = inp', Parsec.statePos = pos' }
in setParserState s' $> T.take n inp
where
go inp s !pos !n cont
= case T.uncons inp of
Nothing -> cont inp pos n -- ran out of input
Just (c, inp') ->
case f s c of
Nothing -> cont inp pos n -- scan function failed
Just s' -> go inp' s' (updatePosChar pos c) (n+1) cont
-- | Parse a decimal number.
decimal :: Integral a => Parser a
decimal = foldl' step 0 `fmap` Parsec.many1 Parsec.digit
where step a c = a * 10 + fromIntegral (ord c - 48)
-- | Parse a hexadecimal number.
hexadecimal :: (Integral a, Bits a) => Parser a
hexadecimal = foldl' step 0 `fmap` Parsec.many1 Parsec.hexDigit
where
step a c | w >= 48 && w <= 57 = (a `shiftL` 4) .|. fromIntegral (w - 48)
| w >= 97 = (a `shiftL` 4) .|. fromIntegral (w - 87)
| otherwise = (a `shiftL` 4) .|. fromIntegral (w - 55)
where w = ord c