diff options
Diffstat (limited to 'haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/Internal.hs')
-rw-r--r-- | haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/Internal.hs | 485 |
1 files changed, 485 insertions, 0 deletions
diff --git a/haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/Internal.hs b/haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/Internal.hs new file mode 100644 index 00000000..f6ec3b32 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/Internal.hs @@ -0,0 +1,485 @@ +{-# LANGUAGE BangPatterns, GADTs, OverloadedStrings, RecordWildCards #-} +-- | +-- Module : Data.Attoparsec.ByteString.Internal +-- Copyright : Bryan O'Sullivan 2007-2014 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Simple, efficient parser combinators for 'ByteString' strings, +-- loosely based on the Parsec library. + +module Data.Attoparsec.ByteString.Internal + ( + -- * Parser types + Parser + , Result + + -- * Running parsers + , parse + , parseOnly + + -- * Combinators + , module Data.Attoparsec.Combinator + + -- * Parsing individual bytes + , satisfy + , satisfyWith + , anyWord8 + , skip + , word8 + , notWord8 + + -- ** Lookahead + , peekWord8 + , peekWord8' + + -- ** Byte classes + , inClass + , notInClass + + -- * Parsing more complicated structures + , storable + + -- * Efficient string handling + , skipWhile + , string + , stringTransform + , take + , scan + , runScanner + , takeWhile + , takeWhile1 + , takeTill + + -- ** Consume all remaining input + , takeByteString + , takeLazyByteString + + -- * Utilities + , endOfLine + , endOfInput + , match + , atEnd + ) where + +import Control.Applicative ((<|>), (<$>)) +import Control.Monad (when) +import Data.Attoparsec.ByteString.Buffer (Buffer, buffer) +import Data.Attoparsec.ByteString.FastSet (charClass, memberWord8) +import Data.Attoparsec.Combinator ((<?>)) +import Data.Attoparsec.Internal +import Data.Attoparsec.Internal.Fhthagn (inlinePerformIO) +import Data.Attoparsec.Internal.Types hiding (Parser, Failure, Success) +import Data.ByteString (ByteString) +import Data.Word (Word8) +import Foreign.ForeignPtr (withForeignPtr) +import Foreign.Ptr (castPtr, minusPtr, plusPtr) +import Foreign.Storable (Storable(peek, sizeOf)) +import Prelude hiding (getChar, succ, take, takeWhile) +import qualified Data.Attoparsec.ByteString.Buffer as Buf +import qualified Data.Attoparsec.Internal.Types as T +import qualified Data.ByteString as B8 +import qualified Data.ByteString.Char8 as B +import qualified Data.ByteString.Internal as B +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString.Unsafe as B + +type Parser = T.Parser ByteString +type Result = IResult ByteString +type Failure r = T.Failure ByteString Buffer r +type Success a r = T.Success ByteString Buffer a r + +-- | The parser @satisfy p@ succeeds for any byte for which the +-- predicate @p@ returns 'True'. Returns the byte that is actually +-- parsed. +-- +-- >digit = satisfy isDigit +-- > where isDigit w = w >= 48 && w <= 57 +satisfy :: (Word8 -> Bool) -> Parser Word8 +satisfy p = do + h <- peekWord8' + if p h + then advance 1 >> return h + else fail "satisfy" +{-# INLINE satisfy #-} + +-- | The parser @skip p@ succeeds for any byte for which the predicate +-- @p@ returns 'True'. +-- +-- >skipDigit = skip isDigit +-- > where isDigit w = w >= 48 && w <= 57 +skip :: (Word8 -> Bool) -> Parser () +skip p = do + h <- peekWord8' + if p h + then advance 1 + else fail "skip" + +-- | The parser @satisfyWith f p@ transforms a byte, and succeeds if +-- the predicate @p@ returns 'True' on the transformed value. The +-- parser returns the transformed byte that was parsed. +satisfyWith :: (Word8 -> a) -> (a -> Bool) -> Parser a +satisfyWith f p = do + h <- peekWord8' + let c = f h + if p c + then advance 1 >> return c + else fail "satisfyWith" +{-# INLINE satisfyWith #-} + +storable :: Storable a => Parser a +storable = hack undefined + where + hack :: Storable b => b -> Parser b + hack dummy = do + (fp,o,_) <- B.toForeignPtr `fmap` take (sizeOf dummy) + return . B.inlinePerformIO . withForeignPtr fp $ \p -> + peek (castPtr $ p `plusPtr` o) + +-- | Consume @n@ bytes of input, but succeed only if the predicate +-- returns 'True'. +takeWith :: Int -> (ByteString -> Bool) -> Parser ByteString +takeWith n0 p = do + let n = max n0 0 + s <- ensure n + if p s + then advance n >> return s + else fail "takeWith" + +-- | Consume exactly @n@ bytes of input. +take :: Int -> Parser ByteString +take n = takeWith n (const True) +{-# INLINE take #-} + +-- | @string s@ parses a sequence of bytes that identically match +-- @s@. Returns the parsed string (i.e. @s@). This parser consumes no +-- input if it fails (even if a partial match). +-- +-- /Note/: The behaviour of this parser is different to that of the +-- similarly-named parser in Parsec, as this one is all-or-nothing. +-- To illustrate the difference, the following parser will fail under +-- Parsec given an input of @\"for\"@: +-- +-- >string "foo" <|> string "for" +-- +-- The reason for its failure is that the first branch is a +-- partial match, and will consume the letters @\'f\'@ and @\'o\'@ +-- before failing. In attoparsec, the above parser will /succeed/ on +-- that input, because the failed first branch will consume nothing. +string :: ByteString -> Parser ByteString +string s = takeWith (B.length s) (==s) +{-# INLINE string #-} + +stringTransform :: (ByteString -> ByteString) -> ByteString + -> Parser ByteString +stringTransform f s = takeWith (B.length s) ((==f s) . f) +{-# INLINE stringTransform #-} + +-- | Skip past input for as long as the predicate returns 'True'. +skipWhile :: (Word8 -> Bool) -> Parser () +skipWhile p = go + where + go = do + t <- B8.takeWhile p <$> get + continue <- inputSpansChunks (B.length t) + when continue go +{-# INLINE skipWhile #-} + +-- | Consume input as long as the predicate returns 'False' +-- (i.e. until it returns 'True'), and return the consumed input. +-- +-- This parser does not fail. It will return an empty string if the +-- predicate returns 'True' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'Control.Applicative.many', because such +-- parsers loop until a failure occurs. Careless use will thus result +-- in an infinite loop. +takeTill :: (Word8 -> Bool) -> Parser ByteString +takeTill p = takeWhile (not . p) +{-# INLINE takeTill #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser does not fail. It will return an empty string if the +-- predicate returns 'False' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'Control.Applicative.many', because such +-- parsers loop until a failure occurs. Careless use will thus result +-- in an infinite loop. +takeWhile :: (Word8 -> Bool) -> Parser ByteString +takeWhile p = (B.concat . reverse) `fmap` go [] + where + go acc = do + s <- B8.takeWhile p <$> get + continue <- inputSpansChunks (B.length s) + if continue + then go (s:acc) + else return (s:acc) +{-# INLINE takeWhile #-} + +takeRest :: Parser [ByteString] +takeRest = go [] + where + go acc = do + input <- wantInput + if input + then do + s <- get + advance (B.length s) + go (s:acc) + else return (reverse acc) + +-- | Consume all remaining input and return it as a single string. +takeByteString :: Parser ByteString +takeByteString = B.concat `fmap` takeRest + +-- | Consume all remaining input and return it as a single string. +takeLazyByteString :: Parser L.ByteString +takeLazyByteString = L.fromChunks `fmap` takeRest + +data T s = T {-# UNPACK #-} !Int s + +scan_ :: (s -> [ByteString] -> Parser r) -> s -> (s -> Word8 -> Maybe s) + -> Parser r +scan_ f s0 p = go [] s0 + where + go acc s1 = do + let scanner (B.PS fp off len) = + withForeignPtr fp $ \ptr0 -> do + let start = ptr0 `plusPtr` off + end = start `plusPtr` len + inner ptr !s + | ptr < end = do + w <- peek ptr + case p s w of + Just s' -> inner (ptr `plusPtr` 1) s' + _ -> done (ptr `minusPtr` start) s + | otherwise = done (ptr `minusPtr` start) s + done !i !s = return (T i s) + inner start s1 + bs <- get + let T i s' = inlinePerformIO $ scanner bs + !h = B.unsafeTake i bs + continue <- inputSpansChunks i + if continue + then go (h:acc) s' + else f s' (h:acc) +{-# INLINE scan_ #-} + +-- | A stateful scanner. The predicate consumes and transforms a +-- state argument, and each transformed state is passed to successive +-- invocations of the predicate on each byte of the input until one +-- returns 'Nothing' or the input ends. +-- +-- This parser does not fail. It will return an empty string if the +-- predicate returns 'Nothing' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'Control.Applicative.many', because such +-- parsers loop until a failure occurs. Careless use will thus result +-- in an infinite loop. +scan :: s -> (s -> Word8 -> Maybe s) -> Parser ByteString +scan = scan_ $ \_ chunks -> + case chunks of + [x] -> return x + xs -> return $! B.concat $ reverse xs +{-# INLINE scan #-} + +-- | Like 'scan', but generalized to return the final state of the +-- scanner. +runScanner :: s -> (s -> Word8 -> Maybe s) -> Parser (ByteString, s) +runScanner = scan_ $ \s xs -> return (B.concat (reverse xs), s) +{-# INLINE runScanner #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser requires the predicate to succeed on at least one byte +-- of input: it will fail if the predicate never returns 'True' or if +-- there is no input left. +takeWhile1 :: (Word8 -> Bool) -> Parser ByteString +takeWhile1 p = do + (`when` demandInput) =<< endOfChunk + s <- B8.takeWhile p <$> get + let len = B.length s + if len == 0 + then fail "takeWhile1" + else do + advance len + eoc <- endOfChunk + if eoc + then (s<>) `fmap` takeWhile p + else return s + +-- | Match any byte in a set. +-- +-- >vowel = inClass "aeiou" +-- +-- Range notation is supported. +-- +-- >halfAlphabet = inClass "a-nA-N" +-- +-- To add a literal @\'-\'@ to a set, place it at the beginning or end +-- of the string. +inClass :: String -> Word8 -> Bool +inClass s = (`memberWord8` mySet) + where mySet = charClass s + {-# NOINLINE mySet #-} +{-# INLINE inClass #-} + +-- | Match any byte not in a set. +notInClass :: String -> Word8 -> Bool +notInClass s = not . inClass s +{-# INLINE notInClass #-} + +-- | Match any byte. +anyWord8 :: Parser Word8 +anyWord8 = satisfy $ const True +{-# INLINE anyWord8 #-} + +-- | Match a specific byte. +word8 :: Word8 -> Parser Word8 +word8 c = satisfy (== c) <?> show c +{-# INLINE word8 #-} + +-- | Match any byte except the given one. +notWord8 :: Word8 -> Parser Word8 +notWord8 c = satisfy (/= c) <?> "not " ++ show c +{-# INLINE notWord8 #-} + +-- | Match any byte, to perform lookahead. Returns 'Nothing' if end of +-- input has been reached. Does not consume any input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'Control.Applicative.many', because such +-- parsers loop until a failure occurs. Careless use will thus result +-- in an infinite loop. +peekWord8 :: Parser (Maybe Word8) +peekWord8 = T.Parser $ \t pos@(Pos pos_) more _lose succ -> + case () of + _| pos_ < Buf.length t -> + let !w = Buf.unsafeIndex t pos_ + in succ t pos more (Just w) + | more == Complete -> + succ t pos more Nothing + | otherwise -> + let succ' t' pos' more' = let !w = Buf.unsafeIndex t' pos_ + in succ t' pos' more' (Just w) + lose' t' pos' more' = succ t' pos' more' Nothing + in prompt t pos more lose' succ' +{-# INLINE peekWord8 #-} + +-- | Match any byte, to perform lookahead. Does not consume any +-- input, but will fail if end of input has been reached. +peekWord8' :: Parser Word8 +peekWord8' = T.Parser $ \t pos more lose succ -> + if lengthAtLeast pos 1 t + then succ t pos more (Buf.unsafeIndex t (fromPos pos)) + else let succ' t' pos' more' bs' = succ t' pos' more' $! B.unsafeHead bs' + in ensureSuspended 1 t pos more lose succ' +{-# INLINE peekWord8' #-} + +-- | Match either a single newline character @\'\\n\'@, or a carriage +-- return followed by a newline character @\"\\r\\n\"@. +endOfLine :: Parser () +endOfLine = (word8 10 >> return ()) <|> (string "\r\n" >> return ()) + +-- | Terminal failure continuation. +failK :: Failure a +failK t (Pos pos) _more stack msg = Fail (Buf.unsafeDrop pos t) stack msg +{-# INLINE failK #-} + +-- | Terminal success continuation. +successK :: Success a a +successK t (Pos pos) _more a = Done (Buf.unsafeDrop pos t) a +{-# INLINE successK #-} + +-- | Run a parser. +parse :: Parser a -> ByteString -> Result a +parse m s = T.runParser m (buffer s) (Pos 0) Incomplete failK successK +{-# INLINE parse #-} + +-- | Run a parser that cannot be resupplied via a 'Partial' result. +-- +-- This function does not force a parser to consume all of its input. +-- Instead, any residual input will be discarded. To force a parser +-- to consume all of its input, use something like this: +-- +-- @ +--'parseOnly' (myParser 'Control.Applicative.<*' 'endOfInput') +-- @ +parseOnly :: Parser a -> ByteString -> Either String a +parseOnly m s = case T.runParser m (buffer s) (Pos 0) Complete failK successK of + Fail _ _ err -> Left err + Done _ a -> Right a + _ -> error "parseOnly: impossible error!" +{-# INLINE parseOnly #-} + +get :: Parser ByteString +get = T.Parser $ \t pos more _lose succ -> + succ t pos more (Buf.unsafeDrop (fromPos pos) t) +{-# INLINE get #-} + +endOfChunk :: Parser Bool +endOfChunk = T.Parser $ \t pos more _lose succ -> + succ t pos more (fromPos pos == Buf.length t) +{-# INLINE endOfChunk #-} + +inputSpansChunks :: Int -> Parser Bool +inputSpansChunks i = T.Parser $ \t pos_ more _lose succ -> + let pos = pos_ + Pos i + in if fromPos pos < Buf.length t || more == Complete + then succ t pos more False + else let lose' t' pos' more' = succ t' pos' more' False + succ' t' pos' more' = succ t' pos' more' True + in prompt t pos more lose' succ' +{-# INLINE inputSpansChunks #-} + +advance :: Int -> Parser () +advance n = T.Parser $ \t pos more _lose succ -> + succ t (pos + Pos n) more () +{-# INLINE advance #-} + +ensureSuspended :: Int -> Buffer -> Pos -> More + -> Failure r + -> Success ByteString r + -> Result r +ensureSuspended n t pos more lose succ = + runParser (demandInput >> go) t pos more lose succ + where go = T.Parser $ \t' pos' more' lose' succ' -> + if lengthAtLeast pos' n t' + then succ' t' pos' more' (substring pos (Pos n) t') + else runParser (demandInput >> go) t' pos' more' lose' succ' + +-- | If at least @n@ elements of input are available, return the +-- current input, otherwise fail. +ensure :: Int -> Parser ByteString +ensure n = T.Parser $ \t pos more lose succ -> + if lengthAtLeast pos n t + then succ t pos more (substring pos (Pos n) t) + -- The uncommon case is kept out-of-line to reduce code size: + else ensureSuspended n t pos more lose succ +-- Non-recursive so the bounds check can be inlined: +{-# INLINE ensure #-} + +-- | Return both the result of a parse and the portion of the input +-- that was consumed while it was being parsed. +match :: Parser a -> Parser (ByteString, a) +match p = T.Parser $ \t pos more lose succ -> + let succ' t' pos' more' a = + succ t' pos' more' (substring pos (pos'-pos) t', a) + in runParser p t pos more lose succ' + +lengthAtLeast :: Pos -> Int -> Buffer -> Bool +lengthAtLeast (Pos pos) n bs = Buf.length bs >= pos + n +{-# INLINE lengthAtLeast #-} + +substring :: Pos -> Pos -> Buffer -> ByteString +substring (Pos pos) (Pos n) = Buf.substring pos n +{-# INLINE substring #-} |