From 79c7159101c03bbbc7350e07963896ca2bb97c02 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Wed, 25 Apr 2018 11:24:07 -0700 Subject: Replace 'attoparsec' with 'parsec' (#799) * Remove attoparsec with parsec and start fixing failed parses * Make tests pass * Fix encoding issues The Haddock parser no longer needs to worry about bytestrings. All the internal parsing work in haddock-library happens over 'Text'. * Remove attoparsec vendor * Fix stuff broken in 'attoparsec' -> 'parsec' * hyperlinks * codeblocks * examples Pretty much all issues are due to attoparsec's backtracking failure behaviour vs. parsec's non-backtracking failure behaviour. * Fix small TODOs * Missing quote + Haddocks * Better handle spaces before/after paragraphs * Address review comments --- .../Data/Attoparsec/ByteString/Buffer.hs | 156 ------ .../Data/Attoparsec/ByteString/Char8.hs | 464 ------------------ .../Data/Attoparsec/ByteString/FastSet.hs | 115 ----- .../Data/Attoparsec/ByteString/Internal.hs | 536 --------------------- 4 files changed, 1271 deletions(-) delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Buffer.hs delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Char8.hs delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/FastSet.hs delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Internal.hs (limited to 'haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString') diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Buffer.hs b/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Buffer.hs deleted file mode 100644 index ac94dfcc..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Buffer.hs +++ /dev/null @@ -1,156 +0,0 @@ -{-# LANGUAGE BangPatterns #-} --- | --- Module : Data.Attoparsec.ByteString.Buffer --- Copyright : Bryan O'Sullivan 2007-2015 --- License : BSD3 --- --- Maintainer : bos@serpentine.com --- Stability : experimental --- Portability : GHC --- --- An "immutable" buffer that supports cheap appends. --- --- A Buffer is divided into an immutable read-only zone, followed by a --- mutable area that we've preallocated, but not yet written to. --- --- We overallocate at the end of a Buffer so that we can cheaply --- append. Since a user of an existing Buffer cannot see past the end --- of its immutable zone into the data that will change during an --- append, this is safe. --- --- Once we run out of space at the end of a Buffer, we do the usual --- doubling of the buffer size. --- --- The fact of having a mutable buffer really helps with performance, --- but it does have a consequence: if someone misuses the Partial API --- that attoparsec uses by calling the same continuation repeatedly --- (which never makes sense in practice), they could overwrite data. --- --- Since the API *looks* pure, it should *act* pure, too, so we use --- two generation counters (one mutable, one immutable) to track the --- number of appends to a mutable buffer. If the counters ever get out --- of sync, someone is appending twice to a mutable buffer, so we --- duplicate the entire buffer in order to preserve the immutability --- of its older self. --- --- While we could go a step further and gain protection against API --- abuse on a multicore system, by use of an atomic increment --- instruction to bump the mutable generation counter, that would be --- very expensive, and feels like it would also be in the realm of the --- ridiculous. Clients should never call a continuation more than --- once; we lack a linear type system that could enforce this; and --- there's only so far we should go to accommodate broken uses. - -module Data.Attoparsec.ByteString.Buffer - ( - Buffer - , buffer - , unbuffer - , pappend - , length - , unsafeIndex - , substring - , unsafeDrop - ) where - -import Control.Exception (assert) -import Data.ByteString.Internal (ByteString(..), memcpy, nullForeignPtr) -import Data.Attoparsec.Internal.Fhthagn (inlinePerformIO) -import Data.List (foldl1') -import Data.Monoid as Mon (Monoid(..)) -import Data.Semigroup (Semigroup(..)) -import Data.Word (Word8) -import Foreign.ForeignPtr (ForeignPtr, withForeignPtr) -import Foreign.Ptr (castPtr, plusPtr) -import Foreign.Storable (peek, peekByteOff, poke, sizeOf) -import GHC.ForeignPtr (mallocPlainForeignPtrBytes) -import Prelude hiding (length) - --- If _cap is zero, this buffer is empty. -data Buffer = Buf { - _fp :: {-# UNPACK #-} !(ForeignPtr Word8) - , _off :: {-# UNPACK #-} !Int - , _len :: {-# UNPACK #-} !Int - , _cap :: {-# UNPACK #-} !Int - , _gen :: {-# UNPACK #-} !Int - } - -instance Show Buffer where - showsPrec p = showsPrec p . unbuffer - --- | The initial 'Buffer' has no mutable zone, so we can avoid all --- copies in the (hopefully) common case of no further input being fed --- to us. -buffer :: ByteString -> Buffer -buffer (PS fp off len) = Buf fp off len len 0 - -unbuffer :: Buffer -> ByteString -unbuffer (Buf fp off len _ _) = PS fp off len - -instance Semigroup Buffer where - (Buf _ _ _ 0 _) <> b = b - a <> (Buf _ _ _ 0 _) = a - buf <> (Buf fp off len _ _) = append buf fp off len - -instance Monoid Buffer where - mempty = Buf nullForeignPtr 0 0 0 0 - - mappend = (<>) - - mconcat [] = Mon.mempty - mconcat xs = foldl1' mappend xs - -pappend :: Buffer -> ByteString -> Buffer -pappend (Buf _ _ _ 0 _) bs = buffer bs -pappend buf (PS fp off len) = append buf fp off len - -append :: Buffer -> ForeignPtr a -> Int -> Int -> Buffer -append (Buf fp0 off0 len0 cap0 gen0) !fp1 !off1 !len1 = - inlinePerformIO . withForeignPtr fp0 $ \ptr0 -> - withForeignPtr fp1 $ \ptr1 -> do - let genSize = sizeOf (0::Int) - newlen = len0 + len1 - gen <- if gen0 == 0 - then return 0 - else peek (castPtr ptr0) - if gen == gen0 && newlen <= cap0 - then do - let newgen = gen + 1 - poke (castPtr ptr0) newgen - memcpy (ptr0 `plusPtr` (off0+len0)) - (ptr1 `plusPtr` off1) - (fromIntegral len1) - return (Buf fp0 off0 newlen cap0 newgen) - else do - let newcap = newlen * 2 - fp <- mallocPlainForeignPtrBytes (newcap + genSize) - withForeignPtr fp $ \ptr_ -> do - let ptr = ptr_ `plusPtr` genSize - newgen = 1 - poke (castPtr ptr_) newgen - memcpy ptr (ptr0 `plusPtr` off0) (fromIntegral len0) - memcpy (ptr `plusPtr` len0) (ptr1 `plusPtr` off1) - (fromIntegral len1) - return (Buf fp genSize newlen newcap newgen) - -length :: Buffer -> Int -length (Buf _ _ len _ _) = len -{-# INLINE length #-} - -unsafeIndex :: Buffer -> Int -> Word8 -unsafeIndex (Buf fp off len _ _) i = assert (i >= 0 && i < len) . - inlinePerformIO . withForeignPtr fp $ flip peekByteOff (off+i) -{-# INLINE unsafeIndex #-} - -substring :: Int -> Int -> Buffer -> ByteString -substring s l (Buf fp off len _ _) = - assert (s >= 0 && s <= len) . - assert (l >= 0 && l <= len-s) $ - PS fp (off+s) l -{-# INLINE substring #-} - -unsafeDrop :: Int -> Buffer -> ByteString -unsafeDrop s (Buf fp off len _ _) = - assert (s >= 0 && s <= len) $ - PS fp (off+s) (len-s) -{-# INLINE unsafeDrop #-} diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Char8.hs b/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Char8.hs deleted file mode 100644 index 7fafba40..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Char8.hs +++ /dev/null @@ -1,464 +0,0 @@ -{-# LANGUAGE BangPatterns, CPP, FlexibleInstances, TypeFamilies, - TypeSynonymInstances, GADTs #-} -#if __GLASGOW_HASKELL__ >= 702 -{-# LANGUAGE Trustworthy #-} -- Imports internal modules -#endif -{-# OPTIONS_GHC -fno-warn-orphans -fno-warn-warnings-deprecations #-} - --- | --- Module : Data.Attoparsec.ByteString.Char8 --- Copyright : Bryan O'Sullivan 2007-2015 --- License : BSD3 --- --- Maintainer : bos@serpentine.com --- Stability : experimental --- Portability : unknown --- --- Simple, efficient, character-oriented combinator parsing for --- 'B.ByteString' strings, loosely based on the Parsec library. - -module Data.Attoparsec.ByteString.Char8 - ( - -- * Character encodings - -- $encodings - - -- * Parser types - Parser - , A.Result - , A.IResult(..) - , I.compareResults - - -- * Running parsers - , A.parse - , A.feed - , A.parseOnly - , A.parseWith - , A.parseTest - - -- ** Result conversion - , A.maybeResult - , A.eitherResult - - -- * Parsing individual characters - , char - , char8 - , anyChar - , notChar - , satisfy - - -- ** Lookahead - , peekChar - , peekChar' - - -- ** Special character parsers - , digit - , letter_iso8859_15 - , letter_ascii - , space - - -- ** Fast predicates - , isDigit - , isDigit_w8 - , isAlpha_iso8859_15 - , isAlpha_ascii - , isSpace - , isSpace_w8 - - -- *** Character classes - , inClass - , notInClass - - -- * Efficient string handling - , I.string - , I.stringCI - , skipSpace - , skipWhile - , I.take - , scan - , takeWhile - , takeWhile1 - , takeTill - - -- ** String combinators - -- $specalt - , (.*>) - , (<*.) - - -- ** Consume all remaining input - , I.takeByteString - , I.takeLazyByteString - - -- * Text parsing - , I.endOfLine - , isEndOfLine - , isHorizontalSpace - - -- * Numeric parsers - , decimal - , hexadecimal - , signed - - -- * Combinators - , try - , () - , choice - , count - , option - , many' - , many1 - , many1' - , manyTill - , manyTill' - , sepBy - , sepBy' - , sepBy1 - , sepBy1' - , skipMany - , skipMany1 - , eitherP - , I.match - -- * State observation and manipulation functions - , I.endOfInput - , I.atEnd - ) where - -#if !MIN_VERSION_base(4,8,0) -import Control.Applicative (pure, (*>), (<*), (<$>)) -import Data.Word (Word) -#endif -import Control.Applicative ((<|>)) -import Data.Attoparsec.ByteString.FastSet (charClass, memberChar) -import Data.Attoparsec.ByteString.Internal (Parser) -import Data.Attoparsec.Combinator -import Data.Bits (Bits, (.|.), shiftL) -import Data.ByteString.Internal (c2w, w2c) -import Data.Int (Int8, Int16, Int32, Int64) -import Data.String (IsString(..)) -import Data.Word (Word8, Word16, Word32, Word64) -import Prelude hiding (takeWhile) -import qualified Data.Attoparsec.ByteString as A -import qualified Data.Attoparsec.ByteString.Internal as I -import qualified Data.Attoparsec.Internal as I -import qualified Data.ByteString as B8 -import qualified Data.ByteString.Char8 as B - -instance (a ~ B.ByteString) => IsString (Parser a) where - fromString = I.string . B.pack - --- $encodings --- --- This module is intended for parsing text that is --- represented using an 8-bit character set, e.g. ASCII or --- ISO-8859-15. It /does not/ make any attempt to deal with character --- encodings, multibyte characters, or wide characters. In --- particular, all attempts to use characters above code point U+00FF --- will give wrong answers. --- --- Code points below U+0100 are simply translated to and from their --- numeric values, so e.g. the code point U+00A4 becomes the byte --- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic --- currency sign in ISO-8859-1). Haskell 'Char' values above U+00FF --- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@. - --- | Consume input as long as the predicate returns 'True', and return --- the consumed input. --- --- This parser requires the predicate to succeed on at least one byte --- of input: it will fail if the predicate never returns 'True' or if --- there is no input left. -takeWhile1 :: (Char -> Bool) -> Parser B.ByteString -takeWhile1 p = I.takeWhile1 (p . w2c) -{-# INLINE takeWhile1 #-} - --- | The parser @satisfy p@ succeeds for any byte for which the --- predicate @p@ returns 'True'. Returns the byte that is actually --- parsed. --- --- >digit = satisfy isDigit --- > where isDigit c = c >= '0' && c <= '9' -satisfy :: (Char -> Bool) -> Parser Char -satisfy = I.satisfyWith w2c -{-# INLINE satisfy #-} - --- | Match a letter, in the ISO-8859-15 encoding. -letter_iso8859_15 :: Parser Char -letter_iso8859_15 = satisfy isAlpha_iso8859_15 "letter_iso8859_15" -{-# INLINE letter_iso8859_15 #-} - --- | Match a letter, in the ASCII encoding. -letter_ascii :: Parser Char -letter_ascii = satisfy isAlpha_ascii "letter_ascii" -{-# INLINE letter_ascii #-} - --- | A fast alphabetic predicate for the ISO-8859-15 encoding --- --- /Note/: For all character encodings other than ISO-8859-15, and --- almost all Unicode code points above U+00A3, this predicate gives --- /wrong answers/. -isAlpha_iso8859_15 :: Char -> Bool -isAlpha_iso8859_15 c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || - (c >= '\166' && moby c) - where moby = notInClass "\167\169\171-\179\182\183\185\187\191\215\247" - {-# NOINLINE moby #-} -{-# INLINE isAlpha_iso8859_15 #-} - --- | A fast alphabetic predicate for the ASCII encoding --- --- /Note/: For all character encodings other than ASCII, and --- almost all Unicode code points above U+007F, this predicate gives --- /wrong answers/. -isAlpha_ascii :: Char -> Bool -isAlpha_ascii c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') -{-# INLINE isAlpha_ascii #-} - --- | Parse a single digit. -digit :: Parser Char -digit = satisfy isDigit "digit" -{-# INLINE digit #-} - --- | A fast digit predicate. -isDigit :: Char -> Bool -isDigit c = c >= '0' && c <= '9' -{-# INLINE isDigit #-} - --- | A fast digit predicate. -isDigit_w8 :: Word8 -> Bool -isDigit_w8 w = w - 48 <= 9 -{-# INLINE isDigit_w8 #-} - --- | Match any character. -anyChar :: Parser Char -anyChar = satisfy $ const True -{-# INLINE anyChar #-} - --- | Match any character, to perform lookahead. Returns 'Nothing' if --- end of input has been reached. Does not consume any input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'many', because such parsers loop until a --- failure occurs. Careless use will thus result in an infinite loop. -peekChar :: Parser (Maybe Char) -peekChar = (fmap w2c) `fmap` I.peekWord8 -{-# INLINE peekChar #-} - --- | Match any character, to perform lookahead. Does not consume any --- input, but will fail if end of input has been reached. -peekChar' :: Parser Char -peekChar' = w2c `fmap` I.peekWord8' -{-# INLINE peekChar' #-} - --- | Fast predicate for matching ASCII space characters. --- --- /Note/: This predicate only gives correct answers for the ASCII --- encoding. For instance, it does not recognise U+00A0 (non-breaking --- space) as a space character, even though it is a valid ISO-8859-15 --- byte. For a Unicode-aware and only slightly slower predicate, --- use 'Data.Char.isSpace' -isSpace :: Char -> Bool -isSpace c = (c == ' ') || ('\t' <= c && c <= '\r') -{-# INLINE isSpace #-} - --- | Fast 'Word8' predicate for matching ASCII space characters. -isSpace_w8 :: Word8 -> Bool -isSpace_w8 w = w == 32 || w - 9 <= 4 -{-# INLINE isSpace_w8 #-} - - --- | Parse a space character. --- --- /Note/: This parser only gives correct answers for the ASCII --- encoding. For instance, it does not recognise U+00A0 (non-breaking --- space) as a space character, even though it is a valid ISO-8859-15 --- byte. -space :: Parser Char -space = satisfy isSpace "space" -{-# INLINE space #-} - --- | Match a specific character. -char :: Char -> Parser Char -char c = satisfy (== c) [c] -{-# INLINE char #-} - --- | Match a specific character, but return its 'Word8' value. -char8 :: Char -> Parser Word8 -char8 c = I.satisfy (== c2w c) [c] -{-# INLINE char8 #-} - --- | Match any character except the given one. -notChar :: Char -> Parser Char -notChar c = satisfy (/= c) "not " ++ [c] -{-# INLINE notChar #-} - --- | Match any character in a set. --- --- >vowel = inClass "aeiou" --- --- Range notation is supported. --- --- >halfAlphabet = inClass "a-nA-N" --- --- To add a literal \'-\' to a set, place it at the beginning or end --- of the string. -inClass :: String -> Char -> Bool -inClass s = (`memberChar` mySet) - where mySet = charClass s -{-# INLINE inClass #-} - --- | Match any character not in a set. -notInClass :: String -> Char -> Bool -notInClass s = not . inClass s -{-# INLINE notInClass #-} - --- | Consume input as long as the predicate returns 'True', and return --- the consumed input. --- --- This parser does not fail. It will return an empty string if the --- predicate returns 'False' on the first byte of input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'many', because such parsers loop until a --- failure occurs. Careless use will thus result in an infinite loop. -takeWhile :: (Char -> Bool) -> Parser B.ByteString -takeWhile p = I.takeWhile (p . w2c) -{-# INLINE takeWhile #-} - --- | A stateful scanner. The predicate consumes and transforms a --- state argument, and each transformed state is passed to successive --- invocations of the predicate on each byte of the input until one --- returns 'Nothing' or the input ends. --- --- This parser does not fail. It will return an empty string if the --- predicate returns 'Nothing' on the first byte of input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'many', because such parsers loop until a --- failure occurs. Careless use will thus result in an infinite loop. -scan :: s -> (s -> Char -> Maybe s) -> Parser B.ByteString -scan s0 p = I.scan s0 (\s -> p s . w2c) -{-# INLINE scan #-} - --- | Consume input as long as the predicate returns 'False' --- (i.e. until it returns 'True'), and return the consumed input. --- --- This parser does not fail. It will return an empty string if the --- predicate returns 'True' on the first byte of input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'many', because such parsers loop until a --- failure occurs. Careless use will thus result in an infinite loop. -takeTill :: (Char -> Bool) -> Parser B.ByteString -takeTill p = I.takeTill (p . w2c) -{-# INLINE takeTill #-} - --- | Skip past input for as long as the predicate returns 'True'. -skipWhile :: (Char -> Bool) -> Parser () -skipWhile p = I.skipWhile (p . w2c) -{-# INLINE skipWhile #-} - --- | Skip over white space. -skipSpace :: Parser () -skipSpace = I.skipWhile isSpace_w8 -{-# INLINE skipSpace #-} - --- $specalt --- --- If you enable the @OverloadedStrings@ language extension, you can --- use the '*>' and '<*' combinators to simplify the common task of --- matching a statically known string, then immediately parsing --- something else. --- --- Instead of writing something like this: --- --- @ ---'I.string' \"foo\" '*>' wibble --- @ --- --- Using @OverloadedStrings@, you can omit the explicit use of --- 'I.string', and write a more compact version: --- --- @ --- \"foo\" '*>' wibble --- @ --- --- (Note: the '.*>' and '<*.' combinators that were originally --- provided for this purpose are obsolete and unnecessary, and will be --- removed in the next major version.) - --- | /Obsolete/. A type-specialized version of '*>' for --- 'B.ByteString'. Use '*>' instead. -(.*>) :: B.ByteString -> Parser a -> Parser a -s .*> f = I.string s *> f -{-# DEPRECATED (.*>) "This is no longer necessary, and will be removed. Use '*>' instead." #-} - --- | /Obsolete/. A type-specialized version of '<*' for --- 'B.ByteString'. Use '<*' instead. -(<*.) :: Parser a -> B.ByteString -> Parser a -f <*. s = f <* I.string s -{-# DEPRECATED (<*.) "This is no longer necessary, and will be removed. Use '<*' instead." #-} - --- | A predicate that matches either a carriage return @\'\\r\'@ or --- newline @\'\\n\'@ character. -isEndOfLine :: Word8 -> Bool -isEndOfLine w = w == 13 || w == 10 -{-# INLINE isEndOfLine #-} - --- | A predicate that matches either a space @\' \'@ or horizontal tab --- @\'\\t\'@ character. -isHorizontalSpace :: Word8 -> Bool -isHorizontalSpace w = w == 32 || w == 9 -{-# INLINE isHorizontalSpace #-} - --- | Parse and decode an unsigned hexadecimal number. The hex digits --- @\'a\'@ through @\'f\'@ may be upper or lower case. --- --- This parser does not accept a leading @\"0x\"@ string. -hexadecimal :: (Integral a, Bits a) => Parser a -hexadecimal = B8.foldl' step 0 `fmap` I.takeWhile1 isHexDigit - where - isHexDigit w = (w >= 48 && w <= 57) || - (w >= 97 && w <= 102) || - (w >= 65 && w <= 70) - step a w | w >= 48 && w <= 57 = (a `shiftL` 4) .|. fromIntegral (w - 48) - | w >= 97 = (a `shiftL` 4) .|. fromIntegral (w - 87) - | otherwise = (a `shiftL` 4) .|. fromIntegral (w - 55) -{-# SPECIALISE hexadecimal :: Parser Int #-} -{-# SPECIALISE hexadecimal :: Parser Int8 #-} -{-# SPECIALISE hexadecimal :: Parser Int16 #-} -{-# SPECIALISE hexadecimal :: Parser Int32 #-} -{-# SPECIALISE hexadecimal :: Parser Int64 #-} -{-# SPECIALISE hexadecimal :: Parser Integer #-} -{-# SPECIALISE hexadecimal :: Parser Word #-} -{-# SPECIALISE hexadecimal :: Parser Word8 #-} -{-# SPECIALISE hexadecimal :: Parser Word16 #-} -{-# SPECIALISE hexadecimal :: Parser Word32 #-} -{-# SPECIALISE hexadecimal :: Parser Word64 #-} - --- | Parse and decode an unsigned decimal number. -decimal :: Integral a => Parser a -decimal = B8.foldl' step 0 `fmap` I.takeWhile1 isDigit_w8 - where step a w = a * 10 + fromIntegral (w - 48) -{-# SPECIALISE decimal :: Parser Int #-} -{-# SPECIALISE decimal :: Parser Int8 #-} -{-# SPECIALISE decimal :: Parser Int16 #-} -{-# SPECIALISE decimal :: Parser Int32 #-} -{-# SPECIALISE decimal :: Parser Int64 #-} -{-# SPECIALISE decimal :: Parser Integer #-} -{-# SPECIALISE decimal :: Parser Word #-} -{-# SPECIALISE decimal :: Parser Word8 #-} -{-# SPECIALISE decimal :: Parser Word16 #-} -{-# SPECIALISE decimal :: Parser Word32 #-} -{-# SPECIALISE decimal :: Parser Word64 #-} - --- | Parse a number with an optional leading @\'+\'@ or @\'-\'@ sign --- character. -signed :: Num a => Parser a -> Parser a -{-# SPECIALISE signed :: Parser Int -> Parser Int #-} -{-# SPECIALISE signed :: Parser Int8 -> Parser Int8 #-} -{-# SPECIALISE signed :: Parser Int16 -> Parser Int16 #-} -{-# SPECIALISE signed :: Parser Int32 -> Parser Int32 #-} -{-# SPECIALISE signed :: Parser Int64 -> Parser Int64 #-} -{-# SPECIALISE signed :: Parser Integer -> Parser Integer #-} -signed p = (negate <$> (char8 '-' *> p)) - <|> (char8 '+' *> p) - <|> p - diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/FastSet.hs b/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/FastSet.hs deleted file mode 100644 index d15854c4..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/FastSet.hs +++ /dev/null @@ -1,115 +0,0 @@ -{-# LANGUAGE BangPatterns, MagicHash #-} - ------------------------------------------------------------------------------ --- | --- Module : Data.Attoparsec.ByteString.FastSet --- Copyright : Bryan O'Sullivan 2007-2015 --- License : BSD3 --- --- Maintainer : bos@serpentine.com --- Stability : experimental --- Portability : unknown --- --- Fast set membership tests for 'Word8' and 8-bit 'Char' values. The --- set representation is unboxed for efficiency. For small sets, we --- test for membership using a binary search. For larger sets, we use --- a lookup table. --- ------------------------------------------------------------------------------ -module Data.Attoparsec.ByteString.FastSet - ( - -- * Data type - FastSet - -- * Construction - , fromList - , set - -- * Lookup - , memberChar - , memberWord8 - -- * Debugging - , fromSet - -- * Handy interface - , charClass - ) where - -import Data.Bits ((.&.), (.|.)) -import Foreign.Storable (peekByteOff, pokeByteOff) -import GHC.Base (Int(I#), iShiftRA#, narrow8Word#, shiftL#) -import GHC.Word (Word8(W8#)) -import qualified Data.ByteString as B -import qualified Data.ByteString.Char8 as B8 -import qualified Data.ByteString.Internal as I -import qualified Data.ByteString.Unsafe as U - -data FastSet = Sorted { fromSet :: !B.ByteString } - | Table { fromSet :: !B.ByteString } - deriving (Eq, Ord) - -instance Show FastSet where - show (Sorted s) = "FastSet Sorted " ++ show (B8.unpack s) - show (Table _) = "FastSet Table" - --- | The lower bound on the size of a lookup table. We choose this to --- balance table density against performance. -tableCutoff :: Int -tableCutoff = 8 - --- | Create a set. -set :: B.ByteString -> FastSet -set s | B.length s < tableCutoff = Sorted . B.sort $ s - | otherwise = Table . mkTable $ s - -fromList :: [Word8] -> FastSet -fromList = set . B.pack - -data I = I {-# UNPACK #-} !Int {-# UNPACK #-} !Word8 - -shiftR :: Int -> Int -> Int -shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#) - -shiftL :: Word8 -> Int -> Word8 -shiftL (W8# x#) (I# i#) = W8# (narrow8Word# (x# `shiftL#` i#)) - -index :: Int -> I -index i = I (i `shiftR` 3) (1 `shiftL` (i .&. 7)) -{-# INLINE index #-} - --- | Check the set for membership. -memberWord8 :: Word8 -> FastSet -> Bool -memberWord8 w (Table t) = - let I byte bit = index (fromIntegral w) - in U.unsafeIndex t byte .&. bit /= 0 -memberWord8 w (Sorted s) = search 0 (B.length s - 1) - where search lo hi - | hi < lo = False - | otherwise = - let mid = (lo + hi) `quot` 2 - in case compare w (U.unsafeIndex s mid) of - GT -> search (mid + 1) hi - LT -> search lo (mid - 1) - _ -> True - --- | Check the set for membership. Only works with 8-bit characters: --- characters above code point 255 will give wrong answers. -memberChar :: Char -> FastSet -> Bool -memberChar c = memberWord8 (I.c2w c) -{-# INLINE memberChar #-} - -mkTable :: B.ByteString -> B.ByteString -mkTable s = I.unsafeCreate 32 $ \t -> do - _ <- I.memset t 0 32 - U.unsafeUseAsCStringLen s $ \(p, l) -> - let loop n | n == l = return () - | otherwise = do - c <- peekByteOff p n :: IO Word8 - let I byte bit = index (fromIntegral c) - prev <- peekByteOff t byte :: IO Word8 - pokeByteOff t byte (prev .|. bit) - loop (n + 1) - in loop 0 - -charClass :: String -> FastSet -charClass = set . B8.pack . go - where go (a:'-':b:xs) = [a..b] ++ go xs - go (x:xs) = x : go xs - go _ = "" diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Internal.hs b/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Internal.hs deleted file mode 100644 index 4938ea87..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Internal.hs +++ /dev/null @@ -1,536 +0,0 @@ -{-# LANGUAGE BangPatterns, CPP, GADTs, OverloadedStrings, RankNTypes, - RecordWildCards #-} --- | --- Module : Data.Attoparsec.ByteString.Internal --- Copyright : Bryan O'Sullivan 2007-2015 --- License : BSD3 --- --- Maintainer : bos@serpentine.com --- Stability : experimental --- Portability : unknown --- --- Simple, efficient parser combinators for 'ByteString' strings, --- loosely based on the Parsec library. - -module Data.Attoparsec.ByteString.Internal - ( - -- * Parser types - Parser - , Result - - -- * Running parsers - , parse - , parseOnly - - -- * Combinators - , module Data.Attoparsec.Combinator - - -- * Parsing individual bytes - , satisfy - , satisfyWith - , anyWord8 - , skip - , word8 - , notWord8 - - -- ** Lookahead - , peekWord8 - , peekWord8' - - -- ** Byte classes - , inClass - , notInClass - - -- * Parsing more complicated structures - , storable - - -- * Efficient string handling - , skipWhile - , string - , stringCI - , take - , scan - , runScanner - , takeWhile - , takeWhile1 - , takeTill - - -- ** Consume all remaining input - , takeByteString - , takeLazyByteString - - -- * Utilities - , endOfLine - , endOfInput - , match - , atEnd - ) where - -#if !MIN_VERSION_base(4,8,0) -import Control.Applicative ((<$>)) -#endif -import Control.Applicative ((<|>)) -import Control.Monad (when) -import Data.Attoparsec.ByteString.Buffer (Buffer, buffer) -import Data.Attoparsec.ByteString.FastSet (charClass, memberWord8) -import Data.Attoparsec.Combinator (()) -import Data.Attoparsec.Internal -import Data.Attoparsec.Internal.Fhthagn (inlinePerformIO) -import Data.Attoparsec.Internal.Types hiding (Parser, Failure, Success) -import Data.ByteString (ByteString) -import Data.List (intercalate) -import Data.Word (Word8) -import Foreign.ForeignPtr (withForeignPtr) -import Foreign.Ptr (castPtr, minusPtr, plusPtr) -import Foreign.Storable (Storable(peek, sizeOf)) -import Prelude hiding (getChar, succ, take, takeWhile) -import qualified Data.Attoparsec.ByteString.Buffer as Buf -import qualified Data.Attoparsec.Internal.Types as T -import qualified Data.ByteString as B8 -import qualified Data.ByteString.Char8 as B -import qualified Data.ByteString.Internal as B -import qualified Data.ByteString.Lazy as L -import qualified Data.ByteString.Unsafe as B - -type Parser = T.Parser ByteString -type Result = IResult ByteString -type Failure r = T.Failure ByteString Buffer r -type Success a r = T.Success ByteString Buffer a r - --- | The parser @satisfy p@ succeeds for any byte for which the --- predicate @p@ returns 'True'. Returns the byte that is actually --- parsed. --- --- >digit = satisfy isDigit --- > where isDigit w = w >= 48 && w <= 57 -satisfy :: (Word8 -> Bool) -> Parser Word8 -satisfy p = do - h <- peekWord8' - if p h - then advance 1 >> return h - else fail "satisfy" -{-# INLINE satisfy #-} - --- | The parser @skip p@ succeeds for any byte for which the predicate --- @p@ returns 'True'. --- --- >skipDigit = skip isDigit --- > where isDigit w = w >= 48 && w <= 57 -skip :: (Word8 -> Bool) -> Parser () -skip p = do - h <- peekWord8' - if p h - then advance 1 - else fail "skip" - --- | The parser @satisfyWith f p@ transforms a byte, and succeeds if --- the predicate @p@ returns 'True' on the transformed value. The --- parser returns the transformed byte that was parsed. -satisfyWith :: (Word8 -> a) -> (a -> Bool) -> Parser a -satisfyWith f p = do - h <- peekWord8' - let c = f h - if p c - then advance 1 >> return c - else fail "satisfyWith" -{-# INLINE satisfyWith #-} - -storable :: Storable a => Parser a -storable = hack undefined - where - hack :: Storable b => b -> Parser b - hack dummy = do - (fp,o,_) <- B.toForeignPtr `fmap` take (sizeOf dummy) - return . inlinePerformIO . withForeignPtr fp $ \p -> - peek (castPtr $ p `plusPtr` o) - --- | Consume exactly @n@ bytes of input. -take :: Int -> Parser ByteString -take n0 = do - let n = max n0 0 - s <- ensure n - advance n >> return s -{-# INLINE take #-} - --- | @string s@ parses a sequence of bytes that identically match --- @s@. Returns the parsed string (i.e. @s@). This parser consumes no --- input if it fails (even if a partial match). --- --- /Note/: The behaviour of this parser is different to that of the --- similarly-named parser in Parsec, as this one is all-or-nothing. --- To illustrate the difference, the following parser will fail under --- Parsec given an input of @\"for\"@: --- --- >string "foo" <|> string "for" --- --- The reason for its failure is that the first branch is a --- partial match, and will consume the letters @\'f\'@ and @\'o\'@ --- before failing. In attoparsec, the above parser will /succeed/ on --- that input, because the failed first branch will consume nothing. -string :: ByteString -> Parser ByteString -string s = string_ (stringSuspended id) id s -{-# INLINE string #-} - --- ASCII-specific but fast, oh yes. -toLower :: Word8 -> Word8 -toLower w | w >= 65 && w <= 90 = w + 32 - | otherwise = w - --- | Satisfy a literal string, ignoring case. -stringCI :: ByteString -> Parser ByteString -stringCI s = string_ (stringSuspended lower) lower s - where lower = B8.map toLower -{-# INLINE stringCI #-} - -string_ :: (forall r. ByteString -> ByteString -> Buffer -> Pos -> More - -> Failure r -> Success ByteString r -> Result r) - -> (ByteString -> ByteString) - -> ByteString -> Parser ByteString -string_ suspended f s0 = T.Parser $ \t pos more lose succ -> - let n = B.length s - s = f s0 - in if lengthAtLeast pos n t - then let t' = substring pos (Pos n) t - in if s == f t' - then succ t (pos + Pos n) more t' - else lose t pos more [] "string" - else let t' = Buf.unsafeDrop (fromPos pos) t - in if f t' `B.isPrefixOf` s - then suspended s (B.drop (B.length t') s) t pos more lose succ - else lose t pos more [] "string" -{-# INLINE string_ #-} - -stringSuspended :: (ByteString -> ByteString) - -> ByteString -> ByteString -> Buffer -> Pos -> More - -> Failure r - -> Success ByteString r - -> Result r -stringSuspended f s0 s t pos more lose succ = - runParser (demandInput_ >>= go) t pos more lose succ - where go s'0 = T.Parser $ \t' pos' more' lose' succ' -> - let m = B.length s - s' = f s'0 - n = B.length s' - in if n >= m - then if B.unsafeTake m s' == s - then let o = Pos (B.length s0) - in succ' t' (pos' + o) more' - (substring pos' o t') - else lose' t' pos' more' [] "string" - else if s' == B.unsafeTake n s - then stringSuspended f s0 (B.unsafeDrop n s) - t' pos' more' lose' succ' - else lose' t' pos' more' [] "string" - --- | Skip past input for as long as the predicate returns 'True'. -skipWhile :: (Word8 -> Bool) -> Parser () -skipWhile p = go - where - go = do - t <- B8.takeWhile p <$> get - continue <- inputSpansChunks (B.length t) - when continue go -{-# INLINE skipWhile #-} - --- | Consume input as long as the predicate returns 'False' --- (i.e. until it returns 'True'), and return the consumed input. --- --- This parser does not fail. It will return an empty string if the --- predicate returns 'True' on the first byte of input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'Control.Applicative.many', because such --- parsers loop until a failure occurs. Careless use will thus result --- in an infinite loop. -takeTill :: (Word8 -> Bool) -> Parser ByteString -takeTill p = takeWhile (not . p) -{-# INLINE takeTill #-} - --- | Consume input as long as the predicate returns 'True', and return --- the consumed input. --- --- This parser does not fail. It will return an empty string if the --- predicate returns 'False' on the first byte of input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'Control.Applicative.many', because such --- parsers loop until a failure occurs. Careless use will thus result --- in an infinite loop. -takeWhile :: (Word8 -> Bool) -> Parser ByteString -takeWhile p = do - s <- B8.takeWhile p <$> get - continue <- inputSpansChunks (B.length s) - if continue - then takeWhileAcc p [s] - else return s -{-# INLINE takeWhile #-} - -takeWhileAcc :: (Word8 -> Bool) -> [ByteString] -> Parser ByteString -takeWhileAcc p = go - where - go acc = do - s <- B8.takeWhile p <$> get - continue <- inputSpansChunks (B.length s) - if continue - then go (s:acc) - else return $ concatReverse (s:acc) -{-# INLINE takeWhileAcc #-} - -takeRest :: Parser [ByteString] -takeRest = go [] - where - go acc = do - input <- wantInput - if input - then do - s <- get - advance (B.length s) - go (s:acc) - else return (reverse acc) - --- | Consume all remaining input and return it as a single string. -takeByteString :: Parser ByteString -takeByteString = B.concat `fmap` takeRest - --- | Consume all remaining input and return it as a single string. -takeLazyByteString :: Parser L.ByteString -takeLazyByteString = L.fromChunks `fmap` takeRest - -data T s = T {-# UNPACK #-} !Int s - -scan_ :: (s -> [ByteString] -> Parser r) -> s -> (s -> Word8 -> Maybe s) - -> Parser r -scan_ f s0 p = go [] s0 - where - go acc s1 = do - let scanner (B.PS fp off len) = - withForeignPtr fp $ \ptr0 -> do - let start = ptr0 `plusPtr` off - end = start `plusPtr` len - inner ptr !s - | ptr < end = do - w <- peek ptr - case p s w of - Just s' -> inner (ptr `plusPtr` 1) s' - _ -> done (ptr `minusPtr` start) s - | otherwise = done (ptr `minusPtr` start) s - done !i !s = return (T i s) - inner start s1 - bs <- get - let T i s' = inlinePerformIO $ scanner bs - !h = B.unsafeTake i bs - continue <- inputSpansChunks i - if continue - then go (h:acc) s' - else f s' (h:acc) -{-# INLINE scan_ #-} - --- | A stateful scanner. The predicate consumes and transforms a --- state argument, and each transformed state is passed to successive --- invocations of the predicate on each byte of the input until one --- returns 'Nothing' or the input ends. --- --- This parser does not fail. It will return an empty string if the --- predicate returns 'Nothing' on the first byte of input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'Control.Applicative.many', because such --- parsers loop until a failure occurs. Careless use will thus result --- in an infinite loop. -scan :: s -> (s -> Word8 -> Maybe s) -> Parser ByteString -scan = scan_ $ \_ chunks -> return $! concatReverse chunks -{-# INLINE scan #-} - --- | Like 'scan', but generalized to return the final state of the --- scanner. -runScanner :: s -> (s -> Word8 -> Maybe s) -> Parser (ByteString, s) -runScanner = scan_ $ \s xs -> let !sx = concatReverse xs in return (sx, s) -{-# INLINE runScanner #-} - --- | Consume input as long as the predicate returns 'True', and return --- the consumed input. --- --- This parser requires the predicate to succeed on at least one byte --- of input: it will fail if the predicate never returns 'True' or if --- there is no input left. -takeWhile1 :: (Word8 -> Bool) -> Parser ByteString -takeWhile1 p = do - (`when` demandInput) =<< endOfChunk - s <- B8.takeWhile p <$> get - let len = B.length s - if len == 0 - then fail "takeWhile1" - else do - advance len - eoc <- endOfChunk - if eoc - then takeWhileAcc p [s] - else return s -{-# INLINE takeWhile1 #-} - --- | Match any byte in a set. --- --- >vowel = inClass "aeiou" --- --- Range notation is supported. --- --- >halfAlphabet = inClass "a-nA-N" --- --- To add a literal @\'-\'@ to a set, place it at the beginning or end --- of the string. -inClass :: String -> Word8 -> Bool -inClass s = (`memberWord8` mySet) - where mySet = charClass s - {-# NOINLINE mySet #-} -{-# INLINE inClass #-} - --- | Match any byte not in a set. -notInClass :: String -> Word8 -> Bool -notInClass s = not . inClass s -{-# INLINE notInClass #-} - --- | Match any byte. -anyWord8 :: Parser Word8 -anyWord8 = satisfy $ const True -{-# INLINE anyWord8 #-} - --- | Match a specific byte. -word8 :: Word8 -> Parser Word8 -word8 c = satisfy (== c) show c -{-# INLINE word8 #-} - --- | Match any byte except the given one. -notWord8 :: Word8 -> Parser Word8 -notWord8 c = satisfy (/= c) "not " ++ show c -{-# INLINE notWord8 #-} - --- | Match any byte, to perform lookahead. Returns 'Nothing' if end of --- input has been reached. Does not consume any input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'Control.Applicative.many', because such --- parsers loop until a failure occurs. Careless use will thus result --- in an infinite loop. -peekWord8 :: Parser (Maybe Word8) -peekWord8 = T.Parser $ \t pos@(Pos pos_) more _lose succ -> - case () of - _| pos_ < Buf.length t -> - let !w = Buf.unsafeIndex t pos_ - in succ t pos more (Just w) - | more == Complete -> - succ t pos more Nothing - | otherwise -> - let succ' t' pos' more' = let !w = Buf.unsafeIndex t' pos_ - in succ t' pos' more' (Just w) - lose' t' pos' more' = succ t' pos' more' Nothing - in prompt t pos more lose' succ' -{-# INLINE peekWord8 #-} - --- | Match any byte, to perform lookahead. Does not consume any --- input, but will fail if end of input has been reached. -peekWord8' :: Parser Word8 -peekWord8' = T.Parser $ \t pos more lose succ -> - if lengthAtLeast pos 1 t - then succ t pos more (Buf.unsafeIndex t (fromPos pos)) - else let succ' t' pos' more' bs' = succ t' pos' more' $! B.unsafeHead bs' - in ensureSuspended 1 t pos more lose succ' -{-# INLINE peekWord8' #-} - --- | Match either a single newline character @\'\\n\'@, or a carriage --- return followed by a newline character @\"\\r\\n\"@. -endOfLine :: Parser () -endOfLine = (word8 10 >> return ()) <|> (string "\r\n" >> return ()) - --- | Terminal failure continuation. -failK :: Failure a -failK t (Pos pos) _more stack msg = Fail (Buf.unsafeDrop pos t) stack msg -{-# INLINE failK #-} - --- | Terminal success continuation. -successK :: Success a a -successK t (Pos pos) _more a = Done (Buf.unsafeDrop pos t) a -{-# INLINE successK #-} - --- | Run a parser. -parse :: Parser a -> ByteString -> Result a -parse m s = T.runParser m (buffer s) (Pos 0) Incomplete failK successK -{-# INLINE parse #-} - --- | Run a parser that cannot be resupplied via a 'Partial' result. --- --- This function does not force a parser to consume all of its input. --- Instead, any residual input will be discarded. To force a parser --- to consume all of its input, use something like this: --- --- @ ---'parseOnly' (myParser 'Control.Applicative.<*' 'endOfInput') --- @ -parseOnly :: Parser a -> ByteString -> Either String a -parseOnly m s = case T.runParser m (buffer s) (Pos 0) Complete failK successK of - Fail _ [] err -> Left err - Fail _ ctxs err -> Left (intercalate " > " ctxs ++ ": " ++ err) - Done _ a -> Right a - _ -> error "parseOnly: impossible error!" -{-# INLINE parseOnly #-} - -get :: Parser ByteString -get = T.Parser $ \t pos more _lose succ -> - succ t pos more (Buf.unsafeDrop (fromPos pos) t) -{-# INLINE get #-} - -endOfChunk :: Parser Bool -endOfChunk = T.Parser $ \t pos more _lose succ -> - succ t pos more (fromPos pos == Buf.length t) -{-# INLINE endOfChunk #-} - -inputSpansChunks :: Int -> Parser Bool -inputSpansChunks i = T.Parser $ \t pos_ more _lose succ -> - let pos = pos_ + Pos i - in if fromPos pos < Buf.length t || more == Complete - then succ t pos more False - else let lose' t' pos' more' = succ t' pos' more' False - succ' t' pos' more' = succ t' pos' more' True - in prompt t pos more lose' succ' -{-# INLINE inputSpansChunks #-} - -advance :: Int -> Parser () -advance n = T.Parser $ \t pos more _lose succ -> - succ t (pos + Pos n) more () -{-# INLINE advance #-} - -ensureSuspended :: Int -> Buffer -> Pos -> More - -> Failure r - -> Success ByteString r - -> Result r -ensureSuspended n t pos more lose succ = - runParser (demandInput >> go) t pos more lose succ - where go = T.Parser $ \t' pos' more' lose' succ' -> - if lengthAtLeast pos' n t' - then succ' t' pos' more' (substring pos (Pos n) t') - else runParser (demandInput >> go) t' pos' more' lose' succ' - --- | If at least @n@ elements of input are available, return the --- current input, otherwise fail. -ensure :: Int -> Parser ByteString -ensure n = T.Parser $ \t pos more lose succ -> - if lengthAtLeast pos n t - then succ t pos more (substring pos (Pos n) t) - -- The uncommon case is kept out-of-line to reduce code size: - else ensureSuspended n t pos more lose succ -{-# INLINE ensure #-} - --- | Return both the result of a parse and the portion of the input --- that was consumed while it was being parsed. -match :: Parser a -> Parser (ByteString, a) -match p = T.Parser $ \t pos more lose succ -> - let succ' t' pos' more' a = - succ t' pos' more' (substring pos (pos'-pos) t', a) - in runParser p t pos more lose succ' - -lengthAtLeast :: Pos -> Int -> Buffer -> Bool -lengthAtLeast (Pos pos) n bs = Buf.length bs >= pos + n -{-# INLINE lengthAtLeast #-} - -substring :: Pos -> Pos -> Buffer -> ByteString -substring (Pos pos) (Pos n) = Buf.substring pos n -{-# INLINE substring #-} -- cgit v1.2.3