diff options
Diffstat (limited to 'haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString')
4 files changed, 1220 insertions, 0 deletions
diff --git a/haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/Buffer.hs b/haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/Buffer.hs new file mode 100644 index 00000000..5e32d022 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/Buffer.hs @@ -0,0 +1,151 @@ +{-# LANGUAGE BangPatterns #-} +-- | +-- Module      :  Data.Attoparsec.ByteString.Buffer +-- Copyright   :  Bryan O'Sullivan 2007-2014 +-- License     :  BSD3 +-- +-- Maintainer  :  bos@serpentine.com +-- Stability   :  experimental +-- Portability :  GHC +-- +-- An "immutable" buffer that supports cheap appends. +-- +-- A Buffer is divided into an immutable read-only zone, followed by a +-- mutable area that we've preallocated, but not yet written to. +-- +-- We overallocate at the end of a Buffer so that we can cheaply +-- append.  Since a user of an existing Buffer cannot see past the end +-- of its immutable zone into the data that will change during an +-- append, this is safe. +-- +-- Once we run out of space at the end of a Buffer, we do the usual +-- doubling of the buffer size. +-- +-- The fact of having a mutable buffer really helps with performance, +-- but it does have a consequence: if someone misuses the Partial API +-- that attoparsec uses by calling the same continuation repeatedly +-- (which never makes sense in practice), they could overwrite data. +-- +-- Since the API *looks* pure, it should *act* pure, too, so we use +-- two generation counters (one mutable, one immutable) to track the +-- number of appends to a mutable buffer. If the counters ever get out +-- of sync, someone is appending twice to a mutable buffer, so we +-- duplicate the entire buffer in order to preserve the immutability +-- of its older self. +-- +-- While we could go a step further and gain protection against API +-- abuse on a multicore system, by use of an atomic increment +-- instruction to bump the mutable generation counter, that would be +-- very expensive, and feels like it would also be in the realm of the +-- ridiculous.  Clients should never call a continuation more than +-- once; we lack a linear type system that could enforce this; and +-- there's only so far we should go to accommodate broken uses. + +module Data.Attoparsec.ByteString.Buffer +    ( +      Buffer +    , buffer +    , unbuffer +    , pappend +    , length +    , unsafeIndex +    , substring +    , unsafeDrop +    ) where + +import Control.Exception (assert) +import Data.ByteString.Internal (ByteString(..), memcpy, nullForeignPtr) +import Data.Attoparsec.Internal.Fhthagn (inlinePerformIO) +import Data.List (foldl1') +import Data.Monoid (Monoid(..)) +import Data.Word (Word8) +import Foreign.ForeignPtr (ForeignPtr, withForeignPtr) +import Foreign.Ptr (castPtr, plusPtr) +import Foreign.Storable (peek, peekByteOff, poke, sizeOf) +import GHC.ForeignPtr (mallocPlainForeignPtrBytes) +import Prelude hiding (length) + +data Buffer = Buf { +      _fp  :: {-# UNPACK #-} !(ForeignPtr Word8) +    , _off :: {-# UNPACK #-} !Int +    , _len :: {-# UNPACK #-} !Int +    , _cap :: {-# UNPACK #-} !Int +    , _gen :: {-# UNPACK #-} !Int +    } + +instance Show Buffer where +    showsPrec p = showsPrec p . unbuffer + +-- | The initial 'Buffer' has no mutable zone, so we can avoid all +-- copies in the (hopefully) common case of no further input being fed +-- to us. +buffer :: ByteString -> Buffer +buffer (PS fp off len) = Buf fp off len len 0 + +unbuffer :: Buffer -> ByteString +unbuffer (Buf fp off len _ _) = PS fp off len + +instance Monoid Buffer where +    mempty = Buf nullForeignPtr 0 0 0 0 + +    mappend (Buf _ _ _ 0 _) b        = b +    mappend a (Buf _ _ _ 0 _)        = a +    mappend buf (Buf fp off len _ _) = append buf fp off len + +    mconcat [] = mempty +    mconcat xs = foldl1' mappend xs + +pappend :: Buffer -> ByteString -> Buffer +pappend (Buf _ _ _ 0 _) (PS fp off len) = Buf fp off len 0 0 +pappend buf (PS fp off len) = append buf fp off len + +append :: Buffer -> ForeignPtr a -> Int -> Int -> Buffer +append (Buf fp0 off0 len0 cap0 gen0) !fp1 !off1 !len1 = +  inlinePerformIO . withForeignPtr fp0 $ \ptr0 -> +    withForeignPtr fp1 $ \ptr1 -> do +      let genSize = sizeOf (0::Int) +          newlen  = len0 + len1 +      gen <- if gen0 == 0 +             then return 0 +             else peek (castPtr ptr0) +      if gen == gen0 && newlen <= cap0 +        then do +          let newgen = gen + 1 +          poke (castPtr ptr0) newgen +          memcpy (ptr0 `plusPtr` (off0+len0)) +                 (ptr1 `plusPtr` off1) +                 (fromIntegral len1) +          return (Buf fp0 off0 newlen cap0 newgen) +        else do +          let newcap = newlen * 2 +          fp <- mallocPlainForeignPtrBytes (newcap + genSize) +          withForeignPtr fp $ \ptr_ -> do +            let ptr    = ptr_ `plusPtr` genSize +                newgen = 1 +            poke (castPtr ptr_) newgen +            memcpy ptr (ptr0 `plusPtr` off0) (fromIntegral len0) +            memcpy (ptr `plusPtr` len0) (ptr1 `plusPtr` off1) +                   (fromIntegral len1) +            return (Buf fp genSize newlen newcap newgen) + +length :: Buffer -> Int +length (Buf _ _ len _ _) = len +{-# INLINE length #-} + +unsafeIndex :: Buffer -> Int -> Word8 +unsafeIndex (Buf fp off len _ _) i = assert (i >= 0 && i < len) . +    inlinePerformIO . withForeignPtr fp $ flip peekByteOff (off+i) +{-# INLINE unsafeIndex #-} + +substring :: Int -> Int -> Buffer -> ByteString +substring s l (Buf fp off len _ _) = +  assert (s >= 0 && s <= len) . +  assert (l >= 0 && l <= len-s) $ +  PS fp (off+s) l +{-# INLINE substring #-} + +unsafeDrop :: Int -> Buffer -> ByteString +unsafeDrop s (Buf fp off len _ _) = +  assert (s >= 0 && s <= len) $ +  PS fp (off+s) (len-s) +{-# INLINE unsafeDrop #-} diff --git a/haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/Char8.hs b/haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/Char8.hs new file mode 100644 index 00000000..eda8fd88 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/Char8.hs @@ -0,0 +1,469 @@ +{-# LANGUAGE BangPatterns, FlexibleInstances, TypeFamilies, +    TypeSynonymInstances, GADTs #-} +{-# OPTIONS_GHC -fno-warn-orphans -fno-warn-warnings-deprecations #-} + +-- | +-- Module      :  Data.Attoparsec.ByteString.Char8 +-- Copyright   :  Bryan O'Sullivan 2007-2014 +-- License     :  BSD3 +-- +-- Maintainer  :  bos@serpentine.com +-- Stability   :  experimental +-- Portability :  unknown +-- +-- Simple, efficient, character-oriented combinator parsing for +-- 'B.ByteString' strings, loosely based on the Parsec library. + +module Data.Attoparsec.ByteString.Char8 +    ( +    -- * Character encodings +    -- $encodings + +    -- * Parser types +      Parser +    , A.Result +    , A.IResult(..) +    , I.compareResults + +    -- * Running parsers +    , A.parse +    , A.feed +    , A.parseOnly +    , A.parseWith +    , A.parseTest + +    -- ** Result conversion +    , A.maybeResult +    , A.eitherResult + +    -- * Parsing individual characters +    , char +    , char8 +    , anyChar +    , notChar +    , satisfy + +    -- ** Lookahead +    , peekChar +    , peekChar' + +    -- ** Special character parsers +    , digit +    , letter_iso8859_15 +    , letter_ascii +    , space + +    -- ** Fast predicates +    , isDigit +    , isDigit_w8 +    , isAlpha_iso8859_15 +    , isAlpha_ascii +    , isSpace +    , isSpace_w8 + +    -- *** Character classes +    , inClass +    , notInClass + +    -- * Efficient string handling +    , I.string +    , stringCI +    , skipSpace +    , skipWhile +    , I.take +    , scan +    , takeWhile +    , takeWhile1 +    , takeTill + +    -- ** String combinators +    -- $specalt +    , (.*>) +    , (<*.) + +    -- ** Consume all remaining input +    , I.takeByteString +    , I.takeLazyByteString + +    -- * Text parsing +    , I.endOfLine +    , isEndOfLine +    , isHorizontalSpace + +    -- * Numeric parsers +    , decimal +    , hexadecimal +    , signed +    , Number(..) + +    -- * Combinators +    , try +    , (<?>) +    , choice +    , count +    , option +    , many' +    , many1 +    , many1' +    , manyTill +    , manyTill' +    , sepBy +    , sepBy' +    , sepBy1 +    , sepBy1' +    , skipMany +    , skipMany1 +    , eitherP +    , I.match +    -- * State observation and manipulation functions +    , I.endOfInput +    , I.atEnd +    ) where + +import Control.Applicative ((*>), (<*), (<$>), (<|>)) +import Data.Attoparsec.ByteString.FastSet (charClass, memberChar) +import Data.Attoparsec.ByteString.Internal (Parser) +import Data.Attoparsec.Combinator +import Data.Attoparsec.Number (Number(..)) +import Data.Bits (Bits, (.|.), shiftL) +import Data.ByteString.Internal (c2w, w2c) +import Data.Int (Int8, Int16, Int32, Int64) +import Data.String (IsString(..)) +import Data.Word (Word8, Word16, Word32, Word64, Word) +import Prelude hiding (takeWhile) +import qualified Data.Attoparsec.ByteString as A +import qualified Data.Attoparsec.ByteString.Internal as I +import qualified Data.Attoparsec.Internal as I +import qualified Data.ByteString as B8 +import qualified Data.ByteString.Char8 as B + +instance (a ~ B.ByteString) => IsString (Parser a) where +    fromString = I.string . B.pack + +-- $encodings +-- +-- This module is intended for parsing text that is +-- represented using an 8-bit character set, e.g. ASCII or +-- ISO-8859-15.  It /does not/ make any attempt to deal with character +-- encodings, multibyte characters, or wide characters.  In +-- particular, all attempts to use characters above code point U+00FF +-- will give wrong answers. +-- +-- Code points below U+0100 are simply translated to and from their +-- numeric values, so e.g. the code point U+00A4 becomes the byte +-- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic +-- currency sign in ISO-8859-1).  Haskell 'Char' values above U+00FF +-- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@. + +-- ASCII-specific but fast, oh yes. +toLower :: Word8 -> Word8 +toLower w | w >= 65 && w <= 90 = w + 32 +          | otherwise          = w + +-- | Satisfy a literal string, ignoring case. +stringCI :: B.ByteString -> Parser B.ByteString +stringCI = I.stringTransform (B8.map toLower) +{-# INLINE stringCI #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser requires the predicate to succeed on at least one byte +-- of input: it will fail if the predicate never returns 'True' or if +-- there is no input left. +takeWhile1 :: (Char -> Bool) -> Parser B.ByteString +takeWhile1 p = I.takeWhile1 (p . w2c) +{-# INLINE takeWhile1 #-} + +-- | The parser @satisfy p@ succeeds for any byte for which the +-- predicate @p@ returns 'True'. Returns the byte that is actually +-- parsed. +-- +-- >digit = satisfy isDigit +-- >    where isDigit c = c >= '0' && c <= '9' +satisfy :: (Char -> Bool) -> Parser Char +satisfy = I.satisfyWith w2c +{-# INLINE satisfy #-} + +-- | Match a letter, in the ISO-8859-15 encoding. +letter_iso8859_15 :: Parser Char +letter_iso8859_15 = satisfy isAlpha_iso8859_15 <?> "letter_iso8859_15" +{-# INLINE letter_iso8859_15 #-} + +-- | Match a letter, in the ASCII encoding. +letter_ascii :: Parser Char +letter_ascii = satisfy isAlpha_ascii <?> "letter_ascii" +{-# INLINE letter_ascii #-} + +-- | A fast alphabetic predicate for the ISO-8859-15 encoding +-- +-- /Note/: For all character encodings other than ISO-8859-15, and +-- almost all Unicode code points above U+00A3, this predicate gives +-- /wrong answers/. +isAlpha_iso8859_15 :: Char -> Bool +isAlpha_iso8859_15 c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || +                       (c >= '\166' && moby c) +  where moby = notInClass "\167\169\171-\179\182\183\185\187\191\215\247" +        {-# NOINLINE moby #-} +{-# INLINE isAlpha_iso8859_15 #-} + +-- | A fast alphabetic predicate for the ASCII encoding +-- +-- /Note/: For all character encodings other than ASCII, and +-- almost all Unicode code points above U+007F, this predicate gives +-- /wrong answers/. +isAlpha_ascii :: Char -> Bool +isAlpha_ascii c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') +{-# INLINE isAlpha_ascii #-} + +-- | Parse a single digit. +digit :: Parser Char +digit = satisfy isDigit <?> "digit" +{-# INLINE digit #-} + +-- | A fast digit predicate. +isDigit :: Char -> Bool +isDigit c = c >= '0' && c <= '9' +{-# INLINE isDigit #-} + +-- | A fast digit predicate. +isDigit_w8 :: Word8 -> Bool +isDigit_w8 w = w >= 48 && w <= 57 +{-# INLINE isDigit_w8 #-} + +-- | Match any character. +anyChar :: Parser Char +anyChar = satisfy $ const True +{-# INLINE anyChar #-} + +-- | Match any character, to perform lookahead. Returns 'Nothing' if +-- end of input has been reached. Does not consume any input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs.  Careless use will thus result in an infinite loop. +peekChar :: Parser (Maybe Char) +peekChar = (fmap w2c) `fmap` I.peekWord8 +{-# INLINE peekChar #-} + +-- | Match any character, to perform lookahead.  Does not consume any +-- input, but will fail if end of input has been reached. +peekChar' :: Parser Char +peekChar' = w2c `fmap` I.peekWord8' +{-# INLINE peekChar' #-} + +-- | Fast predicate for matching ASCII space characters. +-- +-- /Note/: This predicate only gives correct answers for the ASCII +-- encoding.  For instance, it does not recognise U+00A0 (non-breaking +-- space) as a space character, even though it is a valid ISO-8859-15 +-- byte. For a Unicode-aware and only slightly slower predicate, +-- use 'Data.Char.isSpace' +isSpace :: Char -> Bool +isSpace c = (c == ' ') || ('\t' <= c && c <= '\r') +{-# INLINE isSpace #-} + +-- | Fast 'Word8' predicate for matching ASCII space characters. +isSpace_w8 :: Word8 -> Bool +isSpace_w8 w = (w == 32) || (9 <= w && w <= 13) +{-# INLINE isSpace_w8 #-} + + +-- | Parse a space character. +-- +-- /Note/: This parser only gives correct answers for the ASCII +-- encoding.  For instance, it does not recognise U+00A0 (non-breaking +-- space) as a space character, even though it is a valid ISO-8859-15 +-- byte. +space :: Parser Char +space = satisfy isSpace <?> "space" +{-# INLINE space #-} + +-- | Match a specific character. +char :: Char -> Parser Char +char c = satisfy (== c) <?> [c] +{-# INLINE char #-} + +-- | Match a specific character, but return its 'Word8' value. +char8 :: Char -> Parser Word8 +char8 c = I.satisfy (== c2w c) <?> [c] +{-# INLINE char8 #-} + +-- | Match any character except the given one. +notChar :: Char -> Parser Char +notChar c = satisfy (/= c) <?> "not " ++ [c] +{-# INLINE notChar #-} + +-- | Match any character in a set. +-- +-- >vowel = inClass "aeiou" +-- +-- Range notation is supported. +-- +-- >halfAlphabet = inClass "a-nA-N" +-- +-- To add a literal \'-\' to a set, place it at the beginning or end +-- of the string. +inClass :: String -> Char -> Bool +inClass s = (`memberChar` mySet) +    where mySet = charClass s +{-# INLINE inClass #-} + +-- | Match any character not in a set. +notInClass :: String -> Char -> Bool +notInClass s = not . inClass s +{-# INLINE notInClass #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser does not fail.  It will return an empty string if the +-- predicate returns 'False' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs.  Careless use will thus result in an infinite loop. +takeWhile :: (Char -> Bool) -> Parser B.ByteString +takeWhile p = I.takeWhile (p . w2c) +{-# INLINE takeWhile #-} + +-- | A stateful scanner.  The predicate consumes and transforms a +-- state argument, and each transformed state is passed to successive +-- invocations of the predicate on each byte of the input until one +-- returns 'Nothing' or the input ends. +-- +-- This parser does not fail.  It will return an empty string if the +-- predicate returns 'Nothing' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs.  Careless use will thus result in an infinite loop. +scan :: s -> (s -> Char -> Maybe s) -> Parser B.ByteString +scan s0 p = I.scan s0 (\s -> p s . w2c) +{-# INLINE scan #-} + +-- | Consume input as long as the predicate returns 'False' +-- (i.e. until it returns 'True'), and return the consumed input. +-- +-- This parser does not fail.  It will return an empty string if the +-- predicate returns 'True' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs.  Careless use will thus result in an infinite loop. +takeTill :: (Char -> Bool) -> Parser B.ByteString +takeTill p = I.takeTill (p . w2c) +{-# INLINE takeTill #-} + +-- | Skip past input for as long as the predicate returns 'True'. +skipWhile :: (Char -> Bool) -> Parser () +skipWhile p = I.skipWhile (p . w2c) +{-# INLINE skipWhile #-} + +-- | Skip over white space. +skipSpace :: Parser () +skipSpace = I.skipWhile isSpace_w8 +{-# INLINE skipSpace #-} + +-- $specalt +-- +-- If you enable the @OverloadedStrings@ language extension, you can +-- use the '*>' and '<*' combinators to simplify the common task of +-- matching a statically known string, then immediately parsing +-- something else. +-- +-- Instead of writing something like this: +-- +-- @ +--'I.string' \"foo\" '*>' wibble +-- @ +-- +-- Using @OverloadedStrings@, you can omit the explicit use of +-- 'I.string', and write a more compact version: +-- +-- @ +-- \"foo\" '*>' wibble +-- @ +-- +-- (Note: the '.*>' and '<*.' combinators that were originally +-- provided for this purpose are obsolete and unnecessary, and will be +-- removed in the next major version.) + +-- | /Obsolete/. A type-specialized version of '*>' for +-- 'B.ByteString'. Use '*>' instead. +(.*>) :: B.ByteString -> Parser a -> Parser a +s .*> f = I.string s *> f +{-# DEPRECATED (.*>) "This is no longer necessary, and will be removed. Use '*>' instead." #-} + +-- | /Obsolete/. A type-specialized version of '<*' for +-- 'B.ByteString'. Use '<*' instead. +(<*.) :: Parser a -> B.ByteString -> Parser a +f <*. s = f <* I.string s +{-# DEPRECATED (<*.) "This is no longer necessary, and will be removed. Use '<*' instead." #-} + +-- | A predicate that matches either a carriage return @\'\\r\'@ or +-- newline @\'\\n\'@ character. +isEndOfLine :: Word8 -> Bool +isEndOfLine w = w == 13 || w == 10 +{-# INLINE isEndOfLine #-} + +-- | A predicate that matches either a space @\' \'@ or horizontal tab +-- @\'\\t\'@ character. +isHorizontalSpace :: Word8 -> Bool +isHorizontalSpace w = w == 32 || w == 9 +{-# INLINE isHorizontalSpace #-} + +-- | Parse and decode an unsigned hexadecimal number.  The hex digits +-- @\'a\'@ through @\'f\'@ may be upper or lower case. +-- +-- This parser does not accept a leading @\"0x\"@ string. +hexadecimal :: (Integral a, Bits a) => Parser a +hexadecimal = B8.foldl' step 0 `fmap` I.takeWhile1 isHexDigit +  where +    isHexDigit w = (w >= 48 && w <= 57) || +                   (w >= 97 && w <= 102) || +                   (w >= 65 && w <= 70) +    step a w | w >= 48 && w <= 57  = (a `shiftL` 4) .|. fromIntegral (w - 48) +             | w >= 97             = (a `shiftL` 4) .|. fromIntegral (w - 87) +             | otherwise           = (a `shiftL` 4) .|. fromIntegral (w - 55) +{-# SPECIALISE hexadecimal :: Parser Int #-} +{-# SPECIALISE hexadecimal :: Parser Int8 #-} +{-# SPECIALISE hexadecimal :: Parser Int16 #-} +{-# SPECIALISE hexadecimal :: Parser Int32 #-} +{-# SPECIALISE hexadecimal :: Parser Int64 #-} +{-# SPECIALISE hexadecimal :: Parser Integer #-} +{-# SPECIALISE hexadecimal :: Parser Word #-} +{-# SPECIALISE hexadecimal :: Parser Word8 #-} +{-# SPECIALISE hexadecimal :: Parser Word16 #-} +{-# SPECIALISE hexadecimal :: Parser Word32 #-} +{-# SPECIALISE hexadecimal :: Parser Word64 #-} + +-- | Parse and decode an unsigned decimal number. +decimal :: Integral a => Parser a +decimal = B8.foldl' step 0 `fmap` I.takeWhile1 isDig +  where isDig w  = w >= 48 && w <= 57 +        step a w = a * 10 + fromIntegral (w - 48) +{-# SPECIALISE decimal :: Parser Int #-} +{-# SPECIALISE decimal :: Parser Int8 #-} +{-# SPECIALISE decimal :: Parser Int16 #-} +{-# SPECIALISE decimal :: Parser Int32 #-} +{-# SPECIALISE decimal :: Parser Int64 #-} +{-# SPECIALISE decimal :: Parser Integer #-} +{-# SPECIALISE decimal :: Parser Word #-} +{-# SPECIALISE decimal :: Parser Word8 #-} +{-# SPECIALISE decimal :: Parser Word16 #-} +{-# SPECIALISE decimal :: Parser Word32 #-} +{-# SPECIALISE decimal :: Parser Word64 #-} + +-- | Parse a number with an optional leading @\'+\'@ or @\'-\'@ sign +-- character. +signed :: Num a => Parser a -> Parser a +{-# SPECIALISE signed :: Parser Int -> Parser Int #-} +{-# SPECIALISE signed :: Parser Int8 -> Parser Int8 #-} +{-# SPECIALISE signed :: Parser Int16 -> Parser Int16 #-} +{-# SPECIALISE signed :: Parser Int32 -> Parser Int32 #-} +{-# SPECIALISE signed :: Parser Int64 -> Parser Int64 #-} +{-# SPECIALISE signed :: Parser Integer -> Parser Integer #-} +signed p = (negate <$> (char8 '-' *> p)) +       <|> (char8 '+' *> p) +       <|> p diff --git a/haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/FastSet.hs b/haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/FastSet.hs new file mode 100644 index 00000000..cb615167 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/FastSet.hs @@ -0,0 +1,115 @@ +{-# LANGUAGE BangPatterns, MagicHash #-} + +----------------------------------------------------------------------------- +-- | +-- Module      :  Data.Attoparsec.ByteString.FastSet +-- Copyright   :  Bryan O'Sullivan 2007-2014 +-- License     :  BSD3 +-- +-- Maintainer  :  bos@serpentine.com +-- Stability   :  experimental +-- Portability :  unknown +-- +-- Fast set membership tests for 'Word8' and 8-bit 'Char' values.  The +-- set representation is unboxed for efficiency.  For small sets, we +-- test for membership using a binary search.  For larger sets, we use +-- a lookup table. +-- +----------------------------------------------------------------------------- +module Data.Attoparsec.ByteString.FastSet +    ( +    -- * Data type +      FastSet +    -- * Construction +    , fromList +    , set +    -- * Lookup +    , memberChar +    , memberWord8 +    -- * Debugging +    , fromSet +    -- * Handy interface +    , charClass +    ) where + +import Data.Bits ((.&.), (.|.)) +import Foreign.Storable (peekByteOff, pokeByteOff) +import GHC.Base (Int(I#), iShiftRA#, narrow8Word#, shiftL#) +import GHC.Word (Word8(W8#)) +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as B8 +import qualified Data.ByteString.Internal as I +import qualified Data.ByteString.Unsafe as U + +data FastSet = Sorted { fromSet :: !B.ByteString } +             | Table  { fromSet :: !B.ByteString } +    deriving (Eq, Ord) + +instance Show FastSet where +    show (Sorted s) = "FastSet Sorted " ++ show (B8.unpack s) +    show (Table _) = "FastSet Table" + +-- | The lower bound on the size of a lookup table.  We choose this to +-- balance table density against performance. +tableCutoff :: Int +tableCutoff = 8 + +-- | Create a set. +set :: B.ByteString -> FastSet +set s | B.length s < tableCutoff = Sorted . B.sort $ s +      | otherwise                = Table . mkTable $ s + +fromList :: [Word8] -> FastSet +fromList = set . B.pack + +data I = I {-# UNPACK #-} !Int {-# UNPACK #-} !Word8 + +shiftR :: Int -> Int -> Int +shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#) + +shiftL :: Word8 -> Int -> Word8 +shiftL (W8# x#) (I# i#) = W8# (narrow8Word# (x# `shiftL#` i#)) + +index :: Int -> I +index i = I (i `shiftR` 3) (1 `shiftL` (i .&. 7)) +{-# INLINE index #-} + +-- | Check the set for membership. +memberWord8 :: Word8 -> FastSet -> Bool +memberWord8 w (Table t)  = +    let I byte bit = index (fromIntegral w) +    in  U.unsafeIndex t byte .&. bit /= 0 +memberWord8 w (Sorted s) = search 0 (B.length s - 1) +    where search lo hi +              | hi < lo = False +              | otherwise = +                  let mid = (lo + hi) `quot` 2 +                  in case compare w (U.unsafeIndex s mid) of +                       GT -> search (mid + 1) hi +                       LT -> search lo (mid - 1) +                       _ -> True + +-- | Check the set for membership.  Only works with 8-bit characters: +-- characters above code point 255 will give wrong answers. +memberChar :: Char -> FastSet -> Bool +memberChar c = memberWord8 (I.c2w c) +{-# INLINE memberChar #-} + +mkTable :: B.ByteString -> B.ByteString +mkTable s = I.unsafeCreate 32 $ \t -> do +            _ <- I.memset t 0 32 +            U.unsafeUseAsCStringLen s $ \(p, l) -> +              let loop n | n == l = return () +                         | otherwise = do +                    c <- peekByteOff p n :: IO Word8 +                    let I byte bit = index (fromIntegral c) +                    prev <- peekByteOff t byte :: IO Word8 +                    pokeByteOff t byte (prev .|. bit) +                    loop (n + 1) +              in loop 0 + +charClass :: String -> FastSet +charClass = set . B8.pack . go +    where go (a:'-':b:xs) = [a..b] ++ go xs +          go (x:xs) = x : go xs +          go _ = "" diff --git a/haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/Internal.hs b/haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/Internal.hs new file mode 100644 index 00000000..f6ec3b32 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.12.1.1/Data/Attoparsec/ByteString/Internal.hs @@ -0,0 +1,485 @@ +{-# LANGUAGE BangPatterns, GADTs, OverloadedStrings, RecordWildCards #-} +-- | +-- Module      :  Data.Attoparsec.ByteString.Internal +-- Copyright   :  Bryan O'Sullivan 2007-2014 +-- License     :  BSD3 +-- +-- Maintainer  :  bos@serpentine.com +-- Stability   :  experimental +-- Portability :  unknown +-- +-- Simple, efficient parser combinators for 'ByteString' strings, +-- loosely based on the Parsec library. + +module Data.Attoparsec.ByteString.Internal +    ( +    -- * Parser types +      Parser +    , Result + +    -- * Running parsers +    , parse +    , parseOnly + +    -- * Combinators +    , module Data.Attoparsec.Combinator + +    -- * Parsing individual bytes +    , satisfy +    , satisfyWith +    , anyWord8 +    , skip +    , word8 +    , notWord8 + +    -- ** Lookahead +    , peekWord8 +    , peekWord8' + +    -- ** Byte classes +    , inClass +    , notInClass + +    -- * Parsing more complicated structures +    , storable + +    -- * Efficient string handling +    , skipWhile +    , string +    , stringTransform +    , take +    , scan +    , runScanner +    , takeWhile +    , takeWhile1 +    , takeTill + +    -- ** Consume all remaining input +    , takeByteString +    , takeLazyByteString + +    -- * Utilities +    , endOfLine +    , endOfInput +    , match +    , atEnd +    ) where + +import Control.Applicative ((<|>), (<$>)) +import Control.Monad (when) +import Data.Attoparsec.ByteString.Buffer (Buffer, buffer) +import Data.Attoparsec.ByteString.FastSet (charClass, memberWord8) +import Data.Attoparsec.Combinator ((<?>)) +import Data.Attoparsec.Internal +import Data.Attoparsec.Internal.Fhthagn (inlinePerformIO) +import Data.Attoparsec.Internal.Types hiding (Parser, Failure, Success) +import Data.ByteString (ByteString) +import Data.Word (Word8) +import Foreign.ForeignPtr (withForeignPtr) +import Foreign.Ptr (castPtr, minusPtr, plusPtr) +import Foreign.Storable (Storable(peek, sizeOf)) +import Prelude hiding (getChar, succ, take, takeWhile) +import qualified Data.Attoparsec.ByteString.Buffer as Buf +import qualified Data.Attoparsec.Internal.Types as T +import qualified Data.ByteString as B8 +import qualified Data.ByteString.Char8 as B +import qualified Data.ByteString.Internal as B +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString.Unsafe as B + +type Parser = T.Parser ByteString +type Result = IResult ByteString +type Failure r = T.Failure ByteString Buffer r +type Success a r = T.Success ByteString Buffer a r + +-- | The parser @satisfy p@ succeeds for any byte for which the +-- predicate @p@ returns 'True'. Returns the byte that is actually +-- parsed. +-- +-- >digit = satisfy isDigit +-- >    where isDigit w = w >= 48 && w <= 57 +satisfy :: (Word8 -> Bool) -> Parser Word8 +satisfy p = do +  h <- peekWord8' +  if p h +    then advance 1 >> return h +    else fail "satisfy" +{-# INLINE satisfy #-} + +-- | The parser @skip p@ succeeds for any byte for which the predicate +-- @p@ returns 'True'. +-- +-- >skipDigit = skip isDigit +-- >    where isDigit w = w >= 48 && w <= 57 +skip :: (Word8 -> Bool) -> Parser () +skip p = do +  h <- peekWord8' +  if p h +    then advance 1 +    else fail "skip" + +-- | The parser @satisfyWith f p@ transforms a byte, and succeeds if +-- the predicate @p@ returns 'True' on the transformed value. The +-- parser returns the transformed byte that was parsed. +satisfyWith :: (Word8 -> a) -> (a -> Bool) -> Parser a +satisfyWith f p = do +  h <- peekWord8' +  let c = f h +  if p c +    then advance 1 >> return c +    else fail "satisfyWith" +{-# INLINE satisfyWith #-} + +storable :: Storable a => Parser a +storable = hack undefined + where +  hack :: Storable b => b -> Parser b +  hack dummy = do +    (fp,o,_) <- B.toForeignPtr `fmap` take (sizeOf dummy) +    return . B.inlinePerformIO . withForeignPtr fp $ \p -> +        peek (castPtr $ p `plusPtr` o) + +-- | Consume @n@ bytes of input, but succeed only if the predicate +-- returns 'True'. +takeWith :: Int -> (ByteString -> Bool) -> Parser ByteString +takeWith n0 p = do +  let n = max n0 0 +  s <- ensure n +  if p s +    then advance n >> return s +    else fail "takeWith" + +-- | Consume exactly @n@ bytes of input. +take :: Int -> Parser ByteString +take n = takeWith n (const True) +{-# INLINE take #-} + +-- | @string s@ parses a sequence of bytes that identically match +-- @s@. Returns the parsed string (i.e. @s@).  This parser consumes no +-- input if it fails (even if a partial match). +-- +-- /Note/: The behaviour of this parser is different to that of the +-- similarly-named parser in Parsec, as this one is all-or-nothing. +-- To illustrate the difference, the following parser will fail under +-- Parsec given an input of @\"for\"@: +-- +-- >string "foo" <|> string "for" +-- +-- The reason for its failure is that the first branch is a +-- partial match, and will consume the letters @\'f\'@ and @\'o\'@ +-- before failing.  In attoparsec, the above parser will /succeed/ on +-- that input, because the failed first branch will consume nothing. +string :: ByteString -> Parser ByteString +string s = takeWith (B.length s) (==s) +{-# INLINE string #-} + +stringTransform :: (ByteString -> ByteString) -> ByteString +                -> Parser ByteString +stringTransform f s = takeWith (B.length s) ((==f s) . f) +{-# INLINE stringTransform #-} + +-- | Skip past input for as long as the predicate returns 'True'. +skipWhile :: (Word8 -> Bool) -> Parser () +skipWhile p = go + where +  go = do +    t <- B8.takeWhile p <$> get +    continue <- inputSpansChunks (B.length t) +    when continue go +{-# INLINE skipWhile #-} + +-- | Consume input as long as the predicate returns 'False' +-- (i.e. until it returns 'True'), and return the consumed input. +-- +-- This parser does not fail.  It will return an empty string if the +-- predicate returns 'True' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'Control.Applicative.many', because such +-- parsers loop until a failure occurs.  Careless use will thus result +-- in an infinite loop. +takeTill :: (Word8 -> Bool) -> Parser ByteString +takeTill p = takeWhile (not . p) +{-# INLINE takeTill #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser does not fail.  It will return an empty string if the +-- predicate returns 'False' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'Control.Applicative.many', because such +-- parsers loop until a failure occurs.  Careless use will thus result +-- in an infinite loop. +takeWhile :: (Word8 -> Bool) -> Parser ByteString +takeWhile p = (B.concat . reverse) `fmap` go [] + where +  go acc = do +    s <- B8.takeWhile p <$> get +    continue <- inputSpansChunks (B.length s) +    if continue +      then go (s:acc) +      else return (s:acc) +{-# INLINE takeWhile #-} + +takeRest :: Parser [ByteString] +takeRest = go [] + where +  go acc = do +    input <- wantInput +    if input +      then do +        s <- get +        advance (B.length s) +        go (s:acc) +      else return (reverse acc) + +-- | Consume all remaining input and return it as a single string. +takeByteString :: Parser ByteString +takeByteString = B.concat `fmap` takeRest + +-- | Consume all remaining input and return it as a single string. +takeLazyByteString :: Parser L.ByteString +takeLazyByteString = L.fromChunks `fmap` takeRest + +data T s = T {-# UNPACK #-} !Int s + +scan_ :: (s -> [ByteString] -> Parser r) -> s -> (s -> Word8 -> Maybe s) +         -> Parser r +scan_ f s0 p = go [] s0 + where +  go acc s1 = do +    let scanner (B.PS fp off len) = +          withForeignPtr fp $ \ptr0 -> do +            let start = ptr0 `plusPtr` off +                end   = start `plusPtr` len +                inner ptr !s +                  | ptr < end = do +                    w <- peek ptr +                    case p s w of +                      Just s' -> inner (ptr `plusPtr` 1) s' +                      _       -> done (ptr `minusPtr` start) s +                  | otherwise = done (ptr `minusPtr` start) s +                done !i !s = return (T i s) +            inner start s1 +    bs <- get +    let T i s' = inlinePerformIO $ scanner bs +        !h = B.unsafeTake i bs +    continue <- inputSpansChunks i +    if continue +      then go (h:acc) s' +      else f s' (h:acc) +{-# INLINE scan_ #-} + +-- | A stateful scanner.  The predicate consumes and transforms a +-- state argument, and each transformed state is passed to successive +-- invocations of the predicate on each byte of the input until one +-- returns 'Nothing' or the input ends. +-- +-- This parser does not fail.  It will return an empty string if the +-- predicate returns 'Nothing' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'Control.Applicative.many', because such +-- parsers loop until a failure occurs.  Careless use will thus result +-- in an infinite loop. +scan :: s -> (s -> Word8 -> Maybe s) -> Parser ByteString +scan = scan_ $ \_ chunks -> +  case chunks of +    [x] -> return x +    xs  -> return $! B.concat $ reverse xs +{-# INLINE scan #-} + +-- | Like 'scan', but generalized to return the final state of the +-- scanner. +runScanner :: s -> (s -> Word8 -> Maybe s) -> Parser (ByteString, s) +runScanner = scan_ $ \s xs -> return (B.concat (reverse xs), s) +{-# INLINE runScanner #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser requires the predicate to succeed on at least one byte +-- of input: it will fail if the predicate never returns 'True' or if +-- there is no input left. +takeWhile1 :: (Word8 -> Bool) -> Parser ByteString +takeWhile1 p = do +  (`when` demandInput) =<< endOfChunk +  s <- B8.takeWhile p <$> get +  let len = B.length s +  if len == 0 +    then fail "takeWhile1" +    else do +      advance len +      eoc <- endOfChunk +      if eoc +        then (s<>) `fmap` takeWhile p +        else return s + +-- | Match any byte in a set. +-- +-- >vowel = inClass "aeiou" +-- +-- Range notation is supported. +-- +-- >halfAlphabet = inClass "a-nA-N" +-- +-- To add a literal @\'-\'@ to a set, place it at the beginning or end +-- of the string. +inClass :: String -> Word8 -> Bool +inClass s = (`memberWord8` mySet) +    where mySet = charClass s +          {-# NOINLINE mySet #-} +{-# INLINE inClass #-} + +-- | Match any byte not in a set. +notInClass :: String -> Word8 -> Bool +notInClass s = not . inClass s +{-# INLINE notInClass #-} + +-- | Match any byte. +anyWord8 :: Parser Word8 +anyWord8 = satisfy $ const True +{-# INLINE anyWord8 #-} + +-- | Match a specific byte. +word8 :: Word8 -> Parser Word8 +word8 c = satisfy (== c) <?> show c +{-# INLINE word8 #-} + +-- | Match any byte except the given one. +notWord8 :: Word8 -> Parser Word8 +notWord8 c = satisfy (/= c) <?> "not " ++ show c +{-# INLINE notWord8 #-} + +-- | Match any byte, to perform lookahead. Returns 'Nothing' if end of +-- input has been reached. Does not consume any input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'Control.Applicative.many', because such +-- parsers loop until a failure occurs.  Careless use will thus result +-- in an infinite loop. +peekWord8 :: Parser (Maybe Word8) +peekWord8 = T.Parser $ \t pos@(Pos pos_) more _lose succ -> +  case () of +    _| pos_ < Buf.length t -> +       let !w = Buf.unsafeIndex t pos_ +       in succ t pos more (Just w) +     | more == Complete -> +       succ t pos more Nothing +     | otherwise -> +       let succ' t' pos' more' = let !w = Buf.unsafeIndex t' pos_ +                                 in succ t' pos' more' (Just w) +           lose' t' pos' more' = succ t' pos' more' Nothing +       in prompt t pos more lose' succ' +{-# INLINE peekWord8 #-} + +-- | Match any byte, to perform lookahead.  Does not consume any +-- input, but will fail if end of input has been reached. +peekWord8' :: Parser Word8 +peekWord8' = T.Parser $ \t pos more lose succ -> +    if lengthAtLeast pos 1 t +    then succ t pos more (Buf.unsafeIndex t (fromPos pos)) +    else let succ' t' pos' more' bs' = succ t' pos' more' $! B.unsafeHead bs' +         in ensureSuspended 1 t pos more lose succ' +{-# INLINE peekWord8' #-} + +-- | Match either a single newline character @\'\\n\'@, or a carriage +-- return followed by a newline character @\"\\r\\n\"@. +endOfLine :: Parser () +endOfLine = (word8 10 >> return ()) <|> (string "\r\n" >> return ()) + +-- | Terminal failure continuation. +failK :: Failure a +failK t (Pos pos) _more stack msg = Fail (Buf.unsafeDrop pos t) stack msg +{-# INLINE failK #-} + +-- | Terminal success continuation. +successK :: Success a a +successK t (Pos pos) _more a = Done (Buf.unsafeDrop pos t) a +{-# INLINE successK #-} + +-- | Run a parser. +parse :: Parser a -> ByteString -> Result a +parse m s = T.runParser m (buffer s) (Pos 0) Incomplete failK successK +{-# INLINE parse #-} + +-- | Run a parser that cannot be resupplied via a 'Partial' result. +-- +-- This function does not force a parser to consume all of its input. +-- Instead, any residual input will be discarded.  To force a parser +-- to consume all of its input, use something like this: +-- +-- @ +--'parseOnly' (myParser 'Control.Applicative.<*' 'endOfInput') +-- @ +parseOnly :: Parser a -> ByteString -> Either String a +parseOnly m s = case T.runParser m (buffer s) (Pos 0) Complete failK successK of +                  Fail _ _ err -> Left err +                  Done _ a     -> Right a +                  _            -> error "parseOnly: impossible error!" +{-# INLINE parseOnly #-} + +get :: Parser ByteString +get = T.Parser $ \t pos more _lose succ -> +  succ t pos more (Buf.unsafeDrop (fromPos pos) t) +{-# INLINE get #-} + +endOfChunk :: Parser Bool +endOfChunk = T.Parser $ \t pos more _lose succ -> +  succ t pos more (fromPos pos == Buf.length t) +{-# INLINE endOfChunk #-} + +inputSpansChunks :: Int -> Parser Bool +inputSpansChunks i = T.Parser $ \t pos_ more _lose succ -> +  let pos = pos_ + Pos i +  in if fromPos pos < Buf.length t || more == Complete +     then succ t pos more False +     else let lose' t' pos' more' = succ t' pos' more' False +              succ' t' pos' more' = succ t' pos' more' True +          in prompt t pos more lose' succ' +{-# INLINE inputSpansChunks #-} + +advance :: Int -> Parser () +advance n = T.Parser $ \t pos more _lose succ -> +  succ t (pos + Pos n) more () +{-# INLINE advance #-} + +ensureSuspended :: Int -> Buffer -> Pos -> More +                -> Failure r +                -> Success ByteString r +                -> Result r +ensureSuspended n t pos more lose succ = +    runParser (demandInput >> go) t pos more lose succ +  where go = T.Parser $ \t' pos' more' lose' succ' -> +          if lengthAtLeast pos' n t' +          then succ' t' pos' more' (substring pos (Pos n) t') +          else runParser (demandInput >> go) t' pos' more' lose' succ' + +-- | If at least @n@ elements of input are available, return the +-- current input, otherwise fail. +ensure :: Int -> Parser ByteString +ensure n = T.Parser $ \t pos more lose succ -> +    if lengthAtLeast pos n t +    then succ t pos more (substring pos (Pos n) t) +    -- The uncommon case is kept out-of-line to reduce code size: +    else ensureSuspended n t pos more lose succ +-- Non-recursive so the bounds check can be inlined: +{-# INLINE ensure #-} + +-- | Return both the result of a parse and the portion of the input +-- that was consumed while it was being parsed. +match :: Parser a -> Parser (ByteString, a) +match p = T.Parser $ \t pos more lose succ -> +  let succ' t' pos' more' a = +        succ t' pos' more' (substring pos (pos'-pos) t', a) +  in runParser p t pos more lose succ' + +lengthAtLeast :: Pos -> Int -> Buffer -> Bool +lengthAtLeast (Pos pos) n bs = Buf.length bs >= pos + n +{-# INLINE lengthAtLeast #-} + +substring :: Pos -> Pos -> Buffer -> ByteString +substring (Pos pos) (Pos n) = Buf.substring pos n +{-# INLINE substring #-}  | 
