diff options
Diffstat (limited to 'vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs')
-rw-r--r-- | vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs | 549 |
1 files changed, 0 insertions, 549 deletions
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs b/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs deleted file mode 100644 index 3bbe51f0..00000000 --- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs +++ /dev/null @@ -1,549 +0,0 @@ -{-# LANGUAGE BangPatterns, FlexibleInstances, TypeFamilies, - TypeSynonymInstances, GADTs #-} -{-# OPTIONS_GHC -fno-warn-orphans #-} - --- | --- Module : Data.Attoparsec.ByteString.Char8 --- Copyright : Bryan O'Sullivan 2007-2011 --- License : BSD3 --- --- Maintainer : bos@serpentine.com --- Stability : experimental --- Portability : unknown --- --- Simple, efficient, character-oriented combinator parsing for --- 'B.ByteString' strings, loosely based on the Parsec library. - -module Data.Attoparsec.ByteString.Char8 - ( - -- * Character encodings - -- $encodings - - -- * Parser types - Parser - , A.Result - , A.IResult(..) - , I.compareResults - - -- * Running parsers - , A.parse - , A.feed - , A.parseOnly - , A.parseTest - , A.parseWith - - -- ** Result conversion - , A.maybeResult - , A.eitherResult - - -- * Combinators - , (I.<?>) - , I.try - , module Data.Attoparsec.Combinator - - -- * Parsing individual characters - , char - , char8 - , anyChar - , notChar - , peekChar - , satisfy - - -- ** Special character parsers - , digit - , letter_iso8859_15 - , letter_ascii - , space - - -- ** Fast predicates - , isDigit - , isDigit_w8 - , isAlpha_iso8859_15 - , isAlpha_ascii - , isSpace - , isSpace_w8 - - -- *** Character classes - , inClass - , notInClass - - -- * Efficient string handling - , I.string - , stringCI - , skipSpace - , skipWhile - , I.take - , scan - , takeWhile - , takeWhile1 - , takeTill - - -- ** String combinators - -- $specalt - , (.*>) - , (<*.) - - -- ** Consume all remaining input - , I.takeByteString - , I.takeLazyByteString - - -- * Text parsing - , I.endOfLine - , isEndOfLine - , isHorizontalSpace - - -- * Numeric parsers - , decimal - , hexadecimal - , signed - , double - , Number(..) - , number - , rational - - -- * State observation and manipulation functions - , I.endOfInput - , I.atEnd - ) where - -import Control.Applicative ((*>), (<*), (<$>), (<|>)) -import Data.Attoparsec.ByteString.FastSet (charClass, memberChar) -import Data.Attoparsec.ByteString.Internal (Parser, (<?>)) -import Data.Attoparsec.Combinator -import Data.Attoparsec.Number (Number(..)) -import Data.Bits (Bits, (.|.), shiftL) -import Data.ByteString.Internal (c2w, w2c) -import Data.Int (Int8, Int16, Int32, Int64) -import Data.Ratio ((%)) -import Data.String (IsString(..)) -import Data.Word (Word8, Word16, Word32, Word64, Word) -import Prelude hiding (takeWhile) -import qualified Data.Attoparsec.ByteString as A -import qualified Data.Attoparsec.ByteString.Internal as I -import qualified Data.Attoparsec.Internal as I -import qualified Data.ByteString as B8 -import qualified Data.ByteString.Char8 as B - -instance (a ~ B.ByteString) => IsString (Parser a) where - fromString = I.string . B.pack - --- $encodings --- --- This module is intended for parsing text that is --- represented using an 8-bit character set, e.g. ASCII or --- ISO-8859-15. It /does not/ make any attempt to deal with character --- encodings, multibyte characters, or wide characters. In --- particular, all attempts to use characters above code point U+00FF --- will give wrong answers. --- --- Code points below U+0100 are simply translated to and from their --- numeric values, so e.g. the code point U+00A4 becomes the byte --- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic --- currency sign in ISO-8859-1). Haskell 'Char' values above U+00FF --- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@. - --- ASCII-specific but fast, oh yes. -toLower :: Word8 -> Word8 -toLower w | w >= 65 && w <= 90 = w + 32 - | otherwise = w - --- | Satisfy a literal string, ignoring case. -stringCI :: B.ByteString -> Parser B.ByteString -stringCI = I.stringTransform (B8.map toLower) -{-# INLINE stringCI #-} - --- | Consume input as long as the predicate returns 'True', and return --- the consumed input. --- --- This parser requires the predicate to succeed on at least one byte --- of input: it will fail if the predicate never returns 'True' or if --- there is no input left. -takeWhile1 :: (Char -> Bool) -> Parser B.ByteString -takeWhile1 p = I.takeWhile1 (p . w2c) -{-# INLINE takeWhile1 #-} - --- | The parser @satisfy p@ succeeds for any byte for which the --- predicate @p@ returns 'True'. Returns the byte that is actually --- parsed. --- --- >digit = satisfy isDigit --- > where isDigit c = c >= '0' && c <= '9' -satisfy :: (Char -> Bool) -> Parser Char -satisfy = I.satisfyWith w2c -{-# INLINE satisfy #-} - --- | Match a letter, in the ISO-8859-15 encoding. -letter_iso8859_15 :: Parser Char -letter_iso8859_15 = satisfy isAlpha_iso8859_15 <?> "letter_iso8859_15" -{-# INLINE letter_iso8859_15 #-} - --- | Match a letter, in the ASCII encoding. -letter_ascii :: Parser Char -letter_ascii = satisfy isAlpha_ascii <?> "letter_ascii" -{-# INLINE letter_ascii #-} - --- | A fast alphabetic predicate for the ISO-8859-15 encoding --- --- /Note/: For all character encodings other than ISO-8859-15, and --- almost all Unicode code points above U+00A3, this predicate gives --- /wrong answers/. -isAlpha_iso8859_15 :: Char -> Bool -isAlpha_iso8859_15 c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || - (c >= '\166' && moby c) - where moby = notInClass "\167\169\171-\179\182\183\185\187\191\215\247" - {-# NOINLINE moby #-} -{-# INLINE isAlpha_iso8859_15 #-} - --- | A fast alphabetic predicate for the ASCII encoding --- --- /Note/: For all character encodings other than ASCII, and --- almost all Unicode code points above U+007F, this predicate gives --- /wrong answers/. -isAlpha_ascii :: Char -> Bool -isAlpha_ascii c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') -{-# INLINE isAlpha_ascii #-} - --- | Parse a single digit. -digit :: Parser Char -digit = satisfy isDigit <?> "digit" -{-# INLINE digit #-} - --- | A fast digit predicate. -isDigit :: Char -> Bool -isDigit c = c >= '0' && c <= '9' -{-# INLINE isDigit #-} - --- | A fast digit predicate. -isDigit_w8 :: Word8 -> Bool -isDigit_w8 w = w >= 48 && w <= 57 -{-# INLINE isDigit_w8 #-} - --- | Match any character. -anyChar :: Parser Char -anyChar = satisfy $ const True -{-# INLINE anyChar #-} - --- | Match any character. Returns 'Nothing' if end of input has been --- reached. Does not consume any input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'many', because such parsers loop until a --- failure occurs. Careless use will thus result in an infinite loop. -peekChar :: Parser (Maybe Char) -peekChar = (fmap w2c) `fmap` I.peekWord8 -{-# INLINE peekChar #-} - --- | Fast predicate for matching ASCII space characters. --- --- /Note/: This predicate only gives correct answers for the ASCII --- encoding. For instance, it does not recognise U+00A0 (non-breaking --- space) as a space character, even though it is a valid ISO-8859-15 --- byte. For a Unicode-aware and only slightly slower predicate, --- use 'Data.Char.isSpace' -isSpace :: Char -> Bool -isSpace c = (c == ' ') || ('\t' <= c && c <= '\r') -{-# INLINE isSpace #-} - --- | Fast 'Word8' predicate for matching ASCII space characters. -isSpace_w8 :: Word8 -> Bool -isSpace_w8 w = (w == 32) || (9 <= w && w <= 13) -{-# INLINE isSpace_w8 #-} - - --- | Parse a space character. --- --- /Note/: This parser only gives correct answers for the ASCII --- encoding. For instance, it does not recognise U+00A0 (non-breaking --- space) as a space character, even though it is a valid ISO-8859-15 --- byte. -space :: Parser Char -space = satisfy isSpace <?> "space" -{-# INLINE space #-} - --- | Match a specific character. -char :: Char -> Parser Char -char c = satisfy (== c) <?> [c] -{-# INLINE char #-} - --- | Match a specific character, but return its 'Word8' value. -char8 :: Char -> Parser Word8 -char8 c = I.satisfy (== c2w c) <?> [c] -{-# INLINE char8 #-} - --- | Match any character except the given one. -notChar :: Char -> Parser Char -notChar c = satisfy (/= c) <?> "not " ++ [c] -{-# INLINE notChar #-} - --- | Match any character in a set. --- --- >vowel = inClass "aeiou" --- --- Range notation is supported. --- --- >halfAlphabet = inClass "a-nA-N" --- --- To add a literal \'-\' to a set, place it at the beginning or end --- of the string. -inClass :: String -> Char -> Bool -inClass s = (`memberChar` mySet) - where mySet = charClass s -{-# INLINE inClass #-} - --- | Match any character not in a set. -notInClass :: String -> Char -> Bool -notInClass s = not . inClass s -{-# INLINE notInClass #-} - --- | Consume input as long as the predicate returns 'True', and return --- the consumed input. --- --- This parser does not fail. It will return an empty string if the --- predicate returns 'False' on the first byte of input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'many', because such parsers loop until a --- failure occurs. Careless use will thus result in an infinite loop. -takeWhile :: (Char -> Bool) -> Parser B.ByteString -takeWhile p = I.takeWhile (p . w2c) -{-# INLINE takeWhile #-} - --- | A stateful scanner. The predicate consumes and transforms a --- state argument, and each transformed state is passed to successive --- invocations of the predicate on each byte of the input until one --- returns 'Nothing' or the input ends. --- --- This parser does not fail. It will return an empty string if the --- predicate returns 'Nothing' on the first byte of input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'many', because such parsers loop until a --- failure occurs. Careless use will thus result in an infinite loop. -scan :: s -> (s -> Char -> Maybe s) -> Parser B.ByteString -scan s0 p = I.scan s0 (\s -> p s . w2c) -{-# INLINE scan #-} - --- | Consume input as long as the predicate returns 'False' --- (i.e. until it returns 'True'), and return the consumed input. --- --- This parser does not fail. It will return an empty string if the --- predicate returns 'True' on the first byte of input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'many', because such parsers loop until a --- failure occurs. Careless use will thus result in an infinite loop. -takeTill :: (Char -> Bool) -> Parser B.ByteString -takeTill p = I.takeTill (p . w2c) -{-# INLINE takeTill #-} - --- | Skip past input for as long as the predicate returns 'True'. -skipWhile :: (Char -> Bool) -> Parser () -skipWhile p = I.skipWhile (p . w2c) -{-# INLINE skipWhile #-} - --- | Skip over white space. -skipSpace :: Parser () -skipSpace = I.skipWhile isSpace_w8 -{-# INLINE skipSpace #-} - --- $specalt --- --- The '.*>' and '<*.' combinators are intended for use with the --- @OverloadedStrings@ language extension. They simplify the common --- task of matching a statically known string, then immediately --- parsing something else. --- --- An example makes this easier to understand: --- --- @{-\# LANGUAGE OverloadedStrings #-} --- --- shoeSize = \"Shoe size: \" '.*>' 'decimal' --- @ --- --- If we were to try to use '*>' above instead, the type checker would --- not be able to tell which 'IsString' instance to use for the text --- in quotes. We would have to be explicit, using either a type --- signature or the 'I.string' parser. - --- | Type-specialized version of '*>' for 'B.ByteString'. -(.*>) :: B.ByteString -> Parser a -> Parser a -s .*> f = I.string s *> f - --- | Type-specialized version of '<*' for 'B.ByteString'. -(<*.) :: Parser a -> B.ByteString -> Parser a -f <*. s = f <* I.string s - --- | A predicate that matches either a carriage return @\'\\r\'@ or --- newline @\'\\n\'@ character. -isEndOfLine :: Word8 -> Bool -isEndOfLine w = w == 13 || w == 10 -{-# INLINE isEndOfLine #-} - --- | A predicate that matches either a space @\' \'@ or horizontal tab --- @\'\\t\'@ character. -isHorizontalSpace :: Word8 -> Bool -isHorizontalSpace w = w == 32 || w == 9 -{-# INLINE isHorizontalSpace #-} - --- | Parse and decode an unsigned hexadecimal number. The hex digits --- @\'a\'@ through @\'f\'@ may be upper or lower case. --- --- This parser does not accept a leading @\"0x\"@ string. -hexadecimal :: (Integral a, Bits a) => Parser a -hexadecimal = B8.foldl' step 0 `fmap` I.takeWhile1 isHexDigit - where - isHexDigit w = (w >= 48 && w <= 57) || - (w >= 97 && w <= 102) || - (w >= 65 && w <= 70) - step a w | w >= 48 && w <= 57 = (a `shiftL` 4) .|. fromIntegral (w - 48) - | w >= 97 = (a `shiftL` 4) .|. fromIntegral (w - 87) - | otherwise = (a `shiftL` 4) .|. fromIntegral (w - 55) -{-# SPECIALISE hexadecimal :: Parser Int #-} -{-# SPECIALISE hexadecimal :: Parser Int8 #-} -{-# SPECIALISE hexadecimal :: Parser Int16 #-} -{-# SPECIALISE hexadecimal :: Parser Int32 #-} -{-# SPECIALISE hexadecimal :: Parser Int64 #-} -{-# SPECIALISE hexadecimal :: Parser Integer #-} -{-# SPECIALISE hexadecimal :: Parser Word #-} -{-# SPECIALISE hexadecimal :: Parser Word8 #-} -{-# SPECIALISE hexadecimal :: Parser Word16 #-} -{-# SPECIALISE hexadecimal :: Parser Word32 #-} -{-# SPECIALISE hexadecimal :: Parser Word64 #-} - --- | Parse and decode an unsigned decimal number. -decimal :: Integral a => Parser a -decimal = B8.foldl' step 0 `fmap` I.takeWhile1 isDig - where isDig w = w >= 48 && w <= 57 - step a w = a * 10 + fromIntegral (w - 48) -{-# SPECIALISE decimal :: Parser Int #-} -{-# SPECIALISE decimal :: Parser Int8 #-} -{-# SPECIALISE decimal :: Parser Int16 #-} -{-# SPECIALISE decimal :: Parser Int32 #-} -{-# SPECIALISE decimal :: Parser Int64 #-} -{-# SPECIALISE decimal :: Parser Integer #-} -{-# SPECIALISE decimal :: Parser Word #-} -{-# SPECIALISE decimal :: Parser Word8 #-} -{-# SPECIALISE decimal :: Parser Word16 #-} -{-# SPECIALISE decimal :: Parser Word32 #-} -{-# SPECIALISE decimal :: Parser Word64 #-} - --- | Parse a number with an optional leading @\'+\'@ or @\'-\'@ sign --- character. -signed :: Num a => Parser a -> Parser a -{-# SPECIALISE signed :: Parser Int -> Parser Int #-} -{-# SPECIALISE signed :: Parser Int8 -> Parser Int8 #-} -{-# SPECIALISE signed :: Parser Int16 -> Parser Int16 #-} -{-# SPECIALISE signed :: Parser Int32 -> Parser Int32 #-} -{-# SPECIALISE signed :: Parser Int64 -> Parser Int64 #-} -{-# SPECIALISE signed :: Parser Integer -> Parser Integer #-} -signed p = (negate <$> (char8 '-' *> p)) - <|> (char8 '+' *> p) - <|> p - --- | Parse a rational number. --- --- This parser accepts an optional leading sign character, followed by --- at least one decimal digit. The syntax similar to that accepted by --- the 'read' function, with the exception that a trailing @\'.\'@ or --- @\'e\'@ /not/ followed by a number is not consumed. --- --- Examples with behaviour identical to 'read', if you feed an empty --- continuation to the first result: --- --- >rational "3" == Done 3.0 "" --- >rational "3.1" == Done 3.1 "" --- >rational "3e4" == Done 30000.0 "" --- >rational "3.1e4" == Done 31000.0, "" --- --- Examples with behaviour identical to 'read': --- --- >rational ".3" == Fail "input does not start with a digit" --- >rational "e3" == Fail "input does not start with a digit" --- --- Examples of differences from 'read': --- --- >rational "3.foo" == Done 3.0 ".foo" --- >rational "3e" == Done 3.0 "e" --- --- This function does not accept string representations of \"NaN\" or --- \"Infinity\". -rational :: Fractional a => Parser a -{-# SPECIALIZE rational :: Parser Double #-} -{-# SPECIALIZE rational :: Parser Float #-} -{-# SPECIALIZE rational :: Parser Rational #-} -rational = floaty $ \real frac fracDenom -> fromRational $ - real % 1 + frac % fracDenom - --- | Parse a rational number. --- --- The syntax accepted by this parser is the same as for 'rational'. --- --- /Note/: This function is almost ten times faster than 'rational', --- but is slightly less accurate. --- --- The 'Double' type supports about 16 decimal places of accuracy. --- For 94.2% of numbers, this function and 'rational' give identical --- results, but for the remaining 5.8%, this function loses precision --- around the 15th decimal place. For 0.001% of numbers, this --- function will lose precision at the 13th or 14th decimal place. --- --- This function does not accept string representations of \"NaN\" or --- \"Infinity\". -double :: Parser Double -double = floaty asDouble - -asDouble :: Integer -> Integer -> Integer -> Double -asDouble real frac fracDenom = - fromIntegral real + fromIntegral frac / fromIntegral fracDenom -{-# INLINE asDouble #-} - --- | Parse a number, attempting to preserve both speed and precision. --- --- The syntax accepted by this parser is the same as for 'rational'. --- --- /Note/: This function is almost ten times faster than 'rational'. --- On integral inputs, it gives perfectly accurate answers, and on --- floating point inputs, it is slightly less accurate than --- 'rational'. --- --- This function does not accept string representations of \"NaN\" or --- \"Infinity\". -number :: Parser Number -number = floaty $ \real frac fracDenom -> - if frac == 0 && fracDenom == 0 - then I real - else D (asDouble real frac fracDenom) -{-# INLINE number #-} - -data T = T !Integer !Int - -floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Parser a -{-# INLINE floaty #-} -floaty f = do - let minus = 45 - plus = 43 - !positive <- ((== plus) <$> I.satisfy (\c -> c == minus || c == plus)) <|> - return True - real <- decimal - let tryFraction = do - let dot = 46 - _ <- I.satisfy (==dot) - ds <- I.takeWhile isDigit_w8 - case I.parseOnly decimal ds of - Right n -> return $ T n (B.length ds) - _ -> fail "no digits after decimal" - T fraction fracDigits <- tryFraction <|> return (T 0 0) - let littleE = 101 - bigE = 69 - e w = w == littleE || w == bigE - power <- (I.satisfy e *> signed decimal) <|> return (0::Int) - let n = if fracDigits == 0 - then if power == 0 - then fromIntegral real - else fromIntegral real * (10 ^^ power) - else if power == 0 - then f real fraction (10 ^ fracDigits) - else f real fraction (10 ^ fracDigits) * (10 ^^ power) - return $ if positive - then n - else -n |