Add ByteString version of Attoparsec

author: Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk> 2013-07-09 14:11:22 +0100
committer: Austin Seipp <austin@well-typed.com> 2014-01-12 14:48:35 -0600
commit: c1228df0339d041b455bb993786a9ed6322c5e01 (patch)
tree: 6d42c42934820868fa931919bcdd9f45b228c222 /vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs
parent: a2f3551c276cc77d3c93f048b77cab96a5e648ed (diff)
1 files changed, 549 insertions, 0 deletions
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs b/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs
new file mode 100644
index 00000000..3bbe51f0
--- /dev/null
+++ b/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs
@@ -0,0 +1,549 @@
+{-# LANGUAGE BangPatterns, FlexibleInstances, TypeFamilies,
+    TypeSynonymInstances, GADTs #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+-- |
+-- Module      :  Data.Attoparsec.ByteString.Char8
+-- Copyright   :  Bryan O'Sullivan 2007-2011
+-- License     :  BSD3
+--
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Simple, efficient, character-oriented combinator parsing for
+-- 'B.ByteString' strings, loosely based on the Parsec library.
+
+module Data.Attoparsec.ByteString.Char8
+    (
+    -- * Character encodings
+    -- $encodings
+
+    -- * Parser types
+      Parser
+    , A.Result
+    , A.IResult(..)
+    , I.compareResults
+
+    -- * Running parsers
+    , A.parse
+    , A.feed
+    , A.parseOnly
+    , A.parseTest
+    , A.parseWith
+
+    -- ** Result conversion
+    , A.maybeResult
+    , A.eitherResult
+
+    -- * Combinators
+    , (I.<?>)
+    , I.try
+    , module Data.Attoparsec.Combinator
+
+    -- * Parsing individual characters
+    , char
+    , char8
+    , anyChar
+    , notChar
+    , peekChar
+    , satisfy
+
+    -- ** Special character parsers
+    , digit
+    , letter_iso8859_15
+    , letter_ascii
+    , space
+
+    -- ** Fast predicates
+    , isDigit
+    , isDigit_w8
+    , isAlpha_iso8859_15
+    , isAlpha_ascii
+    , isSpace
+    , isSpace_w8
+
+    -- *** Character classes
+    , inClass
+    , notInClass
+
+    -- * Efficient string handling
+    , I.string
+    , stringCI
+    , skipSpace
+    , skipWhile
+    , I.take
+    , scan
+    , takeWhile
+    , takeWhile1
+    , takeTill
+
+    -- ** String combinators
+    -- $specalt
+    , (.*>)
+    , (<*.)
+
+    -- ** Consume all remaining input
+    , I.takeByteString
+    , I.takeLazyByteString
+
+    -- * Text parsing
+    , I.endOfLine
+    , isEndOfLine
+    , isHorizontalSpace
+
+    -- * Numeric parsers
+    , decimal
+    , hexadecimal
+    , signed
+    , double
+    , Number(..)
+    , number
+    , rational
+
+    -- * State observation and manipulation functions
+    , I.endOfInput
+    , I.atEnd
+    ) where
+
+import Control.Applicative ((*>), (<*), (<$>), (<|>))
+import Data.Attoparsec.ByteString.FastSet (charClass, memberChar)
+import Data.Attoparsec.ByteString.Internal (Parser, (<?>))
+import Data.Attoparsec.Combinator
+import Data.Attoparsec.Number (Number(..))
+import Data.Bits (Bits, (.|.), shiftL)
+import Data.ByteString.Internal (c2w, w2c)
+import Data.Int (Int8, Int16, Int32, Int64)
+import Data.Ratio ((%))
+import Data.String (IsString(..))
+import Data.Word (Word8, Word16, Word32, Word64, Word)
+import Prelude hiding (takeWhile)
+import qualified Data.Attoparsec.ByteString as A
+import qualified Data.Attoparsec.ByteString.Internal as I
+import qualified Data.Attoparsec.Internal as I
+import qualified Data.ByteString as B8
+import qualified Data.ByteString.Char8 as B
+
+instance (a ~ B.ByteString) => IsString (Parser a) where
+    fromString = I.string . B.pack
+
+-- $encodings
+--
+-- This module is intended for parsing text that is
+-- represented using an 8-bit character set, e.g. ASCII or
+-- ISO-8859-15.  It /does not/ make any attempt to deal with character
+-- encodings, multibyte characters, or wide characters.  In
+-- particular, all attempts to use characters above code point U+00FF
+-- will give wrong answers.
+--
+-- Code points below U+0100 are simply translated to and from their
+-- numeric values, so e.g. the code point U+00A4 becomes the byte
+-- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic
+-- currency sign in ISO-8859-1).  Haskell 'Char' values above U+00FF
+-- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@.
+
+-- ASCII-specific but fast, oh yes.
+toLower :: Word8 -> Word8
+toLower w | w >= 65 && w <= 90 = w + 32
+          | otherwise          = w
+
+-- | Satisfy a literal string, ignoring case.
+stringCI :: B.ByteString -> Parser B.ByteString
+stringCI = I.stringTransform (B8.map toLower)
+{-# INLINE stringCI #-}
+
+-- | Consume input as long as the predicate returns 'True', and return
+-- the consumed input.
+--
+-- This parser requires the predicate to succeed on at least one byte
+-- of input: it will fail if the predicate never returns 'True' or if
+-- there is no input left.
+takeWhile1 :: (Char -> Bool) -> Parser B.ByteString
+takeWhile1 p = I.takeWhile1 (p . w2c)
+{-# INLINE takeWhile1 #-}
+
+-- | The parser @satisfy p@ succeeds for any byte for which the
+-- predicate @p@ returns 'True'. Returns the byte that is actually
+-- parsed.
+--
+-- >digit = satisfy isDigit
+-- >    where isDigit c = c >= '0' && c <= '9'
+satisfy :: (Char -> Bool) -> Parser Char
+satisfy = I.satisfyWith w2c
+{-# INLINE satisfy #-}
+
+-- | Match a letter, in the ISO-8859-15 encoding.
+letter_iso8859_15 :: Parser Char
+letter_iso8859_15 = satisfy isAlpha_iso8859_15 <?> "letter_iso8859_15"
+{-# INLINE letter_iso8859_15 #-}
+
+-- | Match a letter, in the ASCII encoding.
+letter_ascii :: Parser Char
+letter_ascii = satisfy isAlpha_ascii <?> "letter_ascii"
+{-# INLINE letter_ascii #-}
+
+-- | A fast alphabetic predicate for the ISO-8859-15 encoding
+--
+-- /Note/: For all character encodings other than ISO-8859-15, and
+-- almost all Unicode code points above U+00A3, this predicate gives
+-- /wrong answers/.
+isAlpha_iso8859_15 :: Char -> Bool
+isAlpha_iso8859_15 c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+                       (c >= '\166' && moby c)
+  where moby = notInClass "\167\169\171-\179\182\183\185\187\191\215\247"
+        {-# NOINLINE moby #-}
+{-# INLINE isAlpha_iso8859_15 #-}
+
+-- | A fast alphabetic predicate for the ASCII encoding
+--
+-- /Note/: For all character encodings other than ASCII, and
+-- almost all Unicode code points above U+007F, this predicate gives
+-- /wrong answers/.
+isAlpha_ascii :: Char -> Bool
+isAlpha_ascii c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+{-# INLINE isAlpha_ascii #-}
+
+-- | Parse a single digit.
+digit :: Parser Char
+digit = satisfy isDigit <?> "digit"
+{-# INLINE digit #-}
+
+-- | A fast digit predicate.
+isDigit :: Char -> Bool
+isDigit c = c >= '0' && c <= '9'
+{-# INLINE isDigit #-}
+
+-- | A fast digit predicate.
+isDigit_w8 :: Word8 -> Bool
+isDigit_w8 w = w >= 48 && w <= 57
+{-# INLINE isDigit_w8 #-}
+
+-- | Match any character.
+anyChar :: Parser Char
+anyChar = satisfy $ const True
+{-# INLINE anyChar #-}
+
+-- | Match any character. Returns 'Nothing' if end of input has been
+-- reached. Does not consume any input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+peekChar :: Parser (Maybe Char)
+peekChar = (fmap w2c) `fmap` I.peekWord8
+{-# INLINE peekChar #-}
+
+-- | Fast predicate for matching ASCII space characters.
+--
+-- /Note/: This predicate only gives correct answers for the ASCII
+-- encoding.  For instance, it does not recognise U+00A0 (non-breaking
+-- space) as a space character, even though it is a valid ISO-8859-15
+-- byte. For a Unicode-aware and only slightly slower predicate,
+-- use 'Data.Char.isSpace'
+isSpace :: Char -> Bool
+isSpace c = (c == ' ') || ('\t' <= c && c <= '\r')
+{-# INLINE isSpace #-}
+
+-- | Fast 'Word8' predicate for matching ASCII space characters.
+isSpace_w8 :: Word8 -> Bool
+isSpace_w8 w = (w == 32) || (9 <= w && w <= 13)
+{-# INLINE isSpace_w8 #-}
+
+
+-- | Parse a space character.
+--
+-- /Note/: This parser only gives correct answers for the ASCII
+-- encoding.  For instance, it does not recognise U+00A0 (non-breaking
+-- space) as a space character, even though it is a valid ISO-8859-15
+-- byte.
+space :: Parser Char
+space = satisfy isSpace <?> "space"
+{-# INLINE space #-}
+
+-- | Match a specific character.
+char :: Char -> Parser Char
+char c = satisfy (== c) <?> [c]
+{-# INLINE char #-}
+
+-- | Match a specific character, but return its 'Word8' value.
+char8 :: Char -> Parser Word8
+char8 c = I.satisfy (== c2w c) <?> [c]
+{-# INLINE char8 #-}
+
+-- | Match any character except the given one.
+notChar :: Char -> Parser Char
+notChar c = satisfy (/= c) <?> "not " ++ [c]
+{-# INLINE notChar #-}
+
+-- | Match any character in a set.
+--
+-- >vowel = inClass "aeiou"
+--
+-- Range notation is supported.
+--
+-- >halfAlphabet = inClass "a-nA-N"
+--
+-- To add a literal \'-\' to a set, place it at the beginning or end
+-- of the string.
+inClass :: String -> Char -> Bool
+inClass s = (`memberChar` mySet)
+    where mySet = charClass s
+{-# INLINE inClass #-}
+
+-- | Match any character not in a set.
+notInClass :: String -> Char -> Bool
+notInClass s = not . inClass s
+{-# INLINE notInClass #-}
+
+-- | Consume input as long as the predicate returns 'True', and return
+-- the consumed input.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'False' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+takeWhile :: (Char -> Bool) -> Parser B.ByteString
+takeWhile p = I.takeWhile (p . w2c)
+{-# INLINE takeWhile #-}
+
+-- | A stateful scanner.  The predicate consumes and transforms a
+-- state argument, and each transformed state is passed to successive
+-- invocations of the predicate on each byte of the input until one
+-- returns 'Nothing' or the input ends.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'Nothing' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+scan :: s -> (s -> Char -> Maybe s) -> Parser B.ByteString
+scan s0 p = I.scan s0 (\s -> p s . w2c)
+{-# INLINE scan #-}
+
+-- | Consume input as long as the predicate returns 'False'
+-- (i.e. until it returns 'True'), and return the consumed input.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'True' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+takeTill :: (Char -> Bool) -> Parser B.ByteString
+takeTill p = I.takeTill (p . w2c)
+{-# INLINE takeTill #-}
+
+-- | Skip past input for as long as the predicate returns 'True'.
+skipWhile :: (Char -> Bool) -> Parser ()
+skipWhile p = I.skipWhile (p . w2c)
+{-# INLINE skipWhile #-}
+
+-- | Skip over white space.
+skipSpace :: Parser ()
+skipSpace = I.skipWhile isSpace_w8
+{-# INLINE skipSpace #-}
+
+-- $specalt
+--
+-- The '.*>' and '<*.' combinators are intended for use with the
+-- @OverloadedStrings@ language extension.  They simplify the common
+-- task of matching a statically known string, then immediately
+-- parsing something else.
+--
+-- An example makes this easier to understand:
+--
+-- @{-\# LANGUAGE OverloadedStrings #-}
+--
+-- shoeSize = \"Shoe size: \" '.*>' 'decimal'
+-- @
+--
+-- If we were to try to use '*>' above instead, the type checker would
+-- not be able to tell which 'IsString' instance to use for the text
+-- in quotes.  We would have to be explicit, using either a type
+-- signature or the 'I.string' parser.
+
+-- | Type-specialized version of '*>' for 'B.ByteString'.
+(.*>) :: B.ByteString -> Parser a -> Parser a
+s .*> f = I.string s *> f
+
+-- | Type-specialized version of '<*' for 'B.ByteString'.
+(<*.) :: Parser a -> B.ByteString -> Parser a
+f <*. s = f <* I.string s
+
+-- | A predicate that matches either a carriage return @\'\\r\'@ or
+-- newline @\'\\n\'@ character.
+isEndOfLine :: Word8 -> Bool
+isEndOfLine w = w == 13 || w == 10
+{-# INLINE isEndOfLine #-}
+
+-- | A predicate that matches either a space @\' \'@ or horizontal tab
+-- @\'\\t\'@ character.
+isHorizontalSpace :: Word8 -> Bool
+isHorizontalSpace w = w == 32 || w == 9
+{-# INLINE isHorizontalSpace #-}
+
+-- | Parse and decode an unsigned hexadecimal number.  The hex digits
+-- @\'a\'@ through @\'f\'@ may be upper or lower case.
+--
+-- This parser does not accept a leading @\"0x\"@ string.
+hexadecimal :: (Integral a, Bits a) => Parser a
+hexadecimal = B8.foldl' step 0 `fmap` I.takeWhile1 isHexDigit
+  where
+    isHexDigit w = (w >= 48 && w <= 57) ||
+                   (w >= 97 && w <= 102) ||
+                   (w >= 65 && w <= 70)
+    step a w | w >= 48 && w <= 57  = (a `shiftL` 4) .|. fromIntegral (w - 48)
+             | w >= 97             = (a `shiftL` 4) .|. fromIntegral (w - 87)
+             | otherwise           = (a `shiftL` 4) .|. fromIntegral (w - 55)
+{-# SPECIALISE hexadecimal :: Parser Int #-}
+{-# SPECIALISE hexadecimal :: Parser Int8 #-}
+{-# SPECIALISE hexadecimal :: Parser Int16 #-}
+{-# SPECIALISE hexadecimal :: Parser Int32 #-}
+{-# SPECIALISE hexadecimal :: Parser Int64 #-}
+{-# SPECIALISE hexadecimal :: Parser Integer #-}
+{-# SPECIALISE hexadecimal :: Parser Word #-}
+{-# SPECIALISE hexadecimal :: Parser Word8 #-}
+{-# SPECIALISE hexadecimal :: Parser Word16 #-}
+{-# SPECIALISE hexadecimal :: Parser Word32 #-}
+{-# SPECIALISE hexadecimal :: Parser Word64 #-}
+
+-- | Parse and decode an unsigned decimal number.
+decimal :: Integral a => Parser a
+decimal = B8.foldl' step 0 `fmap` I.takeWhile1 isDig
+  where isDig w  = w >= 48 && w <= 57
+        step a w = a * 10 + fromIntegral (w - 48)
+{-# SPECIALISE decimal :: Parser Int #-}
+{-# SPECIALISE decimal :: Parser Int8 #-}
+{-# SPECIALISE decimal :: Parser Int16 #-}
+{-# SPECIALISE decimal :: Parser Int32 #-}
+{-# SPECIALISE decimal :: Parser Int64 #-}
+{-# SPECIALISE decimal :: Parser Integer #-}
+{-# SPECIALISE decimal :: Parser Word #-}
+{-# SPECIALISE decimal :: Parser Word8 #-}
+{-# SPECIALISE decimal :: Parser Word16 #-}
+{-# SPECIALISE decimal :: Parser Word32 #-}
+{-# SPECIALISE decimal :: Parser Word64 #-}
+
+-- | Parse a number with an optional leading @\'+\'@ or @\'-\'@ sign
+-- character.
+signed :: Num a => Parser a -> Parser a
+{-# SPECIALISE signed :: Parser Int -> Parser Int #-}
+{-# SPECIALISE signed :: Parser Int8 -> Parser Int8 #-}
+{-# SPECIALISE signed :: Parser Int16 -> Parser Int16 #-}
+{-# SPECIALISE signed :: Parser Int32 -> Parser Int32 #-}
+{-# SPECIALISE signed :: Parser Int64 -> Parser Int64 #-}
+{-# SPECIALISE signed :: Parser Integer -> Parser Integer #-}
+signed p = (negate <$> (char8 '-' *> p))
+       <|> (char8 '+' *> p)
+       <|> p
+
+-- | Parse a rational number.
+--
+-- This parser accepts an optional leading sign character, followed by
+-- at least one decimal digit.  The syntax similar to that accepted by
+-- the 'read' function, with the exception that a trailing @\'.\'@ or
+-- @\'e\'@ /not/ followed by a number is not consumed.
+--
+-- Examples with behaviour identical to 'read', if you feed an empty
+-- continuation to the first result:
+--
+-- >rational "3"     == Done 3.0 ""
+-- >rational "3.1"   == Done 3.1 ""
+-- >rational "3e4"   == Done 30000.0 ""
+-- >rational "3.1e4" == Done 31000.0, ""
+--
+-- Examples with behaviour identical to 'read':
+--
+-- >rational ".3"    == Fail "input does not start with a digit"
+-- >rational "e3"    == Fail "input does not start with a digit"
+--
+-- Examples of differences from 'read':
+--
+-- >rational "3.foo" == Done 3.0 ".foo"
+-- >rational "3e"    == Done 3.0 "e"
+--
+-- This function does not accept string representations of \"NaN\" or
+-- \"Infinity\".
+rational :: Fractional a => Parser a
+{-# SPECIALIZE rational :: Parser Double #-}
+{-# SPECIALIZE rational :: Parser Float #-}
+{-# SPECIALIZE rational :: Parser Rational #-}
+rational = floaty $ \real frac fracDenom -> fromRational $
+                     real % 1 + frac % fracDenom
+
+-- | Parse a rational number.
+--
+-- The syntax accepted by this parser is the same as for 'rational'.
+--
+-- /Note/: This function is almost ten times faster than 'rational',
+-- but is slightly less accurate.
+--
+-- The 'Double' type supports about 16 decimal places of accuracy.
+-- For 94.2% of numbers, this function and 'rational' give identical
+-- results, but for the remaining 5.8%, this function loses precision
+-- around the 15th decimal place.  For 0.001% of numbers, this
+-- function will lose precision at the 13th or 14th decimal place.
+--
+-- This function does not accept string representations of \"NaN\" or
+-- \"Infinity\".
+double :: Parser Double
+double = floaty asDouble
+
+asDouble :: Integer -> Integer -> Integer -> Double
+asDouble real frac fracDenom =
+    fromIntegral real + fromIntegral frac / fromIntegral fracDenom
+{-# INLINE asDouble #-}
+
+-- | Parse a number, attempting to preserve both speed and precision.
+--
+-- The syntax accepted by this parser is the same as for 'rational'.
+--
+-- /Note/: This function is almost ten times faster than 'rational'.
+-- On integral inputs, it gives perfectly accurate answers, and on
+-- floating point inputs, it is slightly less accurate than
+-- 'rational'.
+--
+-- This function does not accept string representations of \"NaN\" or
+-- \"Infinity\".
+number :: Parser Number
+number = floaty $ \real frac fracDenom ->
+         if frac == 0 && fracDenom == 0
+         then I real
+         else D (asDouble real frac fracDenom)
+{-# INLINE number #-}
+
+data T = T !Integer !Int
+
+floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Parser a
+{-# INLINE floaty #-}
+floaty f = do
+  let minus = 45
+      plus  = 43
+  !positive <- ((== plus) <$> I.satisfy (\c -> c == minus || c == plus)) <|>
+               return True
+  real <- decimal
+  let tryFraction = do
+        let dot = 46
+        _ <- I.satisfy (==dot)
+        ds <- I.takeWhile isDigit_w8
+        case I.parseOnly decimal ds of
+                Right n -> return $ T n (B.length ds)
+                _       -> fail "no digits after decimal"
+  T fraction fracDigits <- tryFraction <|> return (T 0 0)
+  let littleE = 101
+      bigE    = 69
+      e w = w == littleE || w == bigE
+  power <- (I.satisfy e *> signed decimal) <|> return (0::Int)
+  let n = if fracDigits == 0
+          then if power == 0
+               then fromIntegral real
+               else fromIntegral real * (10 ^^ power)
+          else if power == 0
+               then f real fraction (10 ^ fracDigits)
+               else f real fraction (10 ^ fracDigits) * (10 ^^ power)
+  return $ if positive
+           then n
+           else -n
author	Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk>	2013-07-09 14:11:22 +0100
committer	Austin Seipp <austin@well-typed.com>	2014-01-12 14:48:35 -0600
commit	c1228df0339d041b455bb993786a9ed6322c5e01 (patch)
tree	6d42c42934820868fa931919bcdd9f45b228c222 /vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs
parent	a2f3551c276cc77d3c93f048b77cab96a5e648ed (diff)