{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE TypeSynonymInstances #-} -- | -- Module : Documentation.Haddock.Parser.Monad -- Copyright : (c) Alec Theriault 2018-2019, -- License : BSD-like -- -- Maintainer : haddock@projects.haskell.org -- Stability : experimental -- Portability : portable -- -- Defines the Parsec monad over which all parsing is done and also provides -- more efficient versions of the usual parsec combinator functions (but -- specialized to 'Text'). module Documentation.Haddock.Parser.Monad where import qualified Text.Parsec.Char as Parsec import qualified Text.Parsec as Parsec import Text.Parsec.Pos ( updatePosChar ) import Text.Parsec ( State(..) , getParserState, setParserState ) import qualified Data.Text as T import Data.Text ( Text ) import Control.Monad ( mfilter ) import Data.String ( IsString(..) ) import Data.Bits ( Bits(..) ) import Data.Char ( ord ) import Data.List ( foldl' ) import Control.Applicative as App import Documentation.Haddock.Types ( Version ) import Prelude hiding (takeWhile) import CompatPrelude -- | The only bit of information we really care about truding along with us -- through parsing is the version attached to a @\@since@ annotation - if -- the doc even contained one. newtype ParserState = ParserState { parserStateSince :: Maybe Version } deriving (Eq, Show) initialParserState :: ParserState initialParserState = ParserState Nothing setSince :: Version -> Parser () setSince since = Parsec.modifyState (\st -> st{ parserStateSince = Just since }) type Parser = Parsec.Parsec Text ParserState instance (a ~ Text) => IsString (Parser a) where fromString = fmap T.pack . Parsec.string parseOnly :: Parser a -> Text -> Either String (ParserState, a) parseOnly p t = case Parsec.runParser p' initialParserState "<haddock>" t of Left e -> Left (show e) Right (x,s) -> Right (s,x) where p' = (,) <$> p <*> Parsec.getState -- | Always succeeds, but returns 'Nothing' if at the end of input. Does not -- consume input. -- -- Equivalent to @Parsec.optionMaybe . Parsec.lookAhead $ Parsec.anyChar@, but -- more efficient. peekChar :: Parser (Maybe Char) peekChar = headOpt . stateInput <$> getParserState where headOpt t | T.null t = Nothing | otherwise = Just (T.head t) {-# INLINE peekChar #-} -- | Fails if at the end of input. Does not consume input. -- -- Equivalent to @Parsec.lookAhead Parsec.anyChar@, but more efficient. peekChar' :: Parser Char peekChar' = headFail . stateInput =<< getParserState where headFail t | T.null t = Parsec.parserFail "peekChar': reached EOF" | otherwise = App.pure (T.head t) {-# INLINE peekChar' #-} -- | Parses the given string. Returns the parsed string. -- -- Equivalent to @Parsec.string (T.unpack t) $> t@, but more efficient. string :: Text -> Parser Text string t = do s@State{ stateInput = inp, statePos = pos } <- getParserState case T.stripPrefix t inp of Nothing -> Parsec.parserFail "string: Failed to match the input string" Just inp' -> let pos' = T.foldl updatePosChar pos t s' = s{ stateInput = inp', statePos = pos' } in setParserState s' $> t -- | Keep matching characters as long as the predicate function holds (and -- return them). -- -- Equivalent to @fmap T.pack . Parsec.many@, but more efficient. takeWhile :: (Char -> Bool) -> Parser Text takeWhile f = do s@State{ stateInput = inp, statePos = pos } <- getParserState let (t, inp') = T.span f inp pos' = T.foldl updatePosChar pos t s' = s{ stateInput = inp', statePos = pos' } setParserState s' $> t -- | Like 'takeWhile', but fails if no characters matched. -- -- Equivalent to @fmap T.pack . Parsec.many1@, but more efficient. takeWhile1 :: (Char -> Bool) -> Parser Text takeWhile1 = mfilter (not . T.null) . takeWhile -- | Scan the input text, accumulating characters as long as the scanning -- function returns true. scan :: (s -> Char -> Maybe s) -- ^ scan function -> s -- ^ initial state -> Parser Text scan f st = do s@State{ stateInput = inp, statePos = pos } <- getParserState go inp st pos 0 $ \inp' pos' n -> let s' = s{ Parsec.stateInput = inp', Parsec.statePos = pos' } in setParserState s' $> T.take n inp where go inp s !pos !n cont = case T.uncons inp of Nothing -> cont inp pos n -- ran out of input Just (c, inp') -> case f s c of Nothing -> cont inp pos n -- scan function failed Just s' -> go inp' s' (updatePosChar pos c) (n+1) cont -- | Parse a decimal number. decimal :: Integral a => Parser a decimal = foldl' step 0 `fmap` Parsec.many1 Parsec.digit where step a c = a * 10 + fromIntegral (ord c - 48) -- | Parse a hexadecimal number. hexadecimal :: (Integral a, Bits a) => Parser a hexadecimal = foldl' step 0 `fmap` Parsec.many1 Parsec.hexDigit where step a c | w >= 48 && w <= 57 = (a `shiftL` 4) .|. fromIntegral (w - 48) | w >= 97 = (a `shiftL` 4) .|. fromIntegral (w - 87) | otherwise = (a `shiftL` 4) .|. fromIntegral (w - 55) where w = ord c