From cc269e6b0b615e9e237c35a985e4ace7b9ab0dd9 Mon Sep 17 00:00:00 2001 From: Mateusz Kowalczyk Date: Mon, 5 May 2014 09:01:03 +0200 Subject: Move parser + parser tests out to own package. We move some types out that are necessary as well and then re-export and specialise them in the core Haddock. Reason for moving out spec tests is that if we're working on the parser, we can simply work on that and we can ignore the rest of Haddock. The downside is that it's a little inconvenient if at the end of the day we want to see that everything passes. --- .../Data/Attoparsec/ByteString.hs | 205 +++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs (limited to 'haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs') diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs new file mode 100644 index 00000000..d2f3761c --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs @@ -0,0 +1,205 @@ +-- | +-- Module : Data.Attoparsec.ByteString +-- Copyright : Bryan O'Sullivan 2007-2011 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Simple, efficient combinator parsing for 'B.ByteString' strings, +-- loosely based on the Parsec library. + +module Data.Attoparsec.ByteString + ( + -- * Differences from Parsec + -- $parsec + + -- * Incremental input + -- $incremental + + -- * Performance considerations + -- $performance + + -- * Parser types + I.Parser + , Result + , T.IResult(..) + , I.compareResults + + -- * Running parsers + , parse + , feed + , I.parseOnly + , parseWith + , parseTest + + -- ** Result conversion + , maybeResult + , eitherResult + + -- * Combinators + , (I.) + , I.try + , module Data.Attoparsec.Combinator + + -- * Parsing individual bytes + , I.word8 + , I.anyWord8 + , I.notWord8 + , I.peekWord8 + , I.satisfy + , I.satisfyWith + , I.skip + + -- ** Byte classes + , I.inClass + , I.notInClass + + -- * Efficient string handling + , I.string + , I.skipWhile + , I.take + , I.scan + , I.takeWhile + , I.takeWhile1 + , I.takeTill + + -- ** Consume all remaining input + , I.takeByteString + , I.takeLazyByteString + + -- * State observation and manipulation functions + , I.endOfInput + , I.atEnd + ) where + +import Data.Attoparsec.Combinator +import qualified Data.Attoparsec.ByteString.Internal as I +import qualified Data.Attoparsec.Internal as I +import qualified Data.ByteString as B +import Data.Attoparsec.ByteString.Internal (Result, parse) +import qualified Data.Attoparsec.Internal.Types as T + +-- $parsec +-- +-- Compared to Parsec 3, Attoparsec makes several tradeoffs. It is +-- not intended for, or ideal for, all possible uses. +-- +-- * While Attoparsec can consume input incrementally, Parsec cannot. +-- Incremental input is a huge deal for efficient and secure network +-- and system programming, since it gives much more control to users +-- of the library over matters such as resource usage and the I/O +-- model to use. +-- +-- * Much of the performance advantage of Attoparsec is gained via +-- high-performance parsers such as 'I.takeWhile' and 'I.string'. +-- If you use complicated combinators that return lists of bytes or +-- characters, there is less performance difference between the two +-- libraries. +-- +-- * Unlike Parsec 3, Attoparsec does not support being used as a +-- monad transformer. +-- +-- * Attoparsec is specialised to deal only with strict 'B.ByteString' +-- input. Efficiency concerns rule out both lists and lazy +-- bytestrings. The usual use for lazy bytestrings would be to +-- allow consumption of very large input without a large footprint. +-- For this need, Attoparsec's incremental input provides an +-- excellent substitute, with much more control over when input +-- takes place. If you must use lazy bytestrings, see the 'Lazy' +-- module, which feeds lazy chunks to a regular parser. +-- +-- * Parsec parsers can produce more helpful error messages than +-- Attoparsec parsers. This is a matter of focus: Attoparsec avoids +-- the extra book-keeping in favour of higher performance. + +-- $incremental +-- +-- Attoparsec supports incremental input, meaning that you can feed it +-- a bytestring that represents only part of the expected total amount +-- of data to parse. If your parser reaches the end of a fragment of +-- input and could consume more input, it will suspend parsing and +-- return a 'T.Partial' continuation. +-- +-- Supplying the 'T.Partial' continuation with another bytestring will +-- resume parsing at the point where it was suspended. You must be +-- prepared for the result of the resumed parse to be another +-- 'T.Partial' continuation. +-- +-- To indicate that you have no more input, supply the 'T.Partial' +-- continuation with an empty bytestring. +-- +-- Remember that some parsing combinators will not return a result +-- until they reach the end of input. They may thus cause 'T.Partial' +-- results to be returned. +-- +-- If you do not need support for incremental input, consider using +-- the 'I.parseOnly' function to run your parser. It will never +-- prompt for more input. + +-- $performance +-- +-- If you write an Attoparsec-based parser carefully, it can be +-- realistic to expect it to perform within a factor of 2 of a +-- hand-rolled C parser (measuring megabytes parsed per second). +-- +-- To actually achieve high performance, there are a few guidelines +-- that it is useful to follow. +-- +-- Use the 'B.ByteString'-oriented parsers whenever possible, +-- e.g. 'I.takeWhile1' instead of 'many1' 'I.anyWord8'. There is +-- about a factor of 100 difference in performance between the two +-- kinds of parser. +-- +-- For very simple byte-testing predicates, write them by hand instead +-- of using 'I.inClass' or 'I.notInClass'. For instance, both of +-- these predicates test for an end-of-line byte, but the first is +-- much faster than the second: +-- +-- >endOfLine_fast w = w == 13 || w == 10 +-- >endOfLine_slow = inClass "\r\n" +-- +-- Make active use of benchmarking and profiling tools to measure, +-- find the problems with, and improve the performance of your parser. + +-- | If a parser has returned a 'T.Partial' result, supply it with more +-- input. +feed :: Result r -> B.ByteString -> Result r +feed f@(T.Fail _ _ _) _ = f +feed (T.Partial k) d = k d +feed (T.Done bs r) d = T.Done (B.append bs d) r +{-# INLINE feed #-} + +-- | Run a parser and print its result to standard output. +parseTest :: (Show a) => I.Parser a -> B.ByteString -> IO () +parseTest p s = print (parse p s) + +-- | Run a parser with an initial input string, and a monadic action +-- that can supply more input if needed. +parseWith :: Monad m => + (m B.ByteString) + -- ^ An action that will be executed to provide the parser + -- with more input, if necessary. The action must return an + -- 'B.empty' string when there is no more input available. + -> I.Parser a + -> B.ByteString + -- ^ Initial input for the parser. + -> m (Result a) +parseWith refill p s = step $ parse p s + where step (T.Partial k) = (step . k) =<< refill + step r = return r +{-# INLINE parseWith #-} + +-- | Convert a 'Result' value to a 'Maybe' value. A 'T.Partial' result +-- is treated as failure. +maybeResult :: Result r -> Maybe r +maybeResult (T.Done _ r) = Just r +maybeResult _ = Nothing + +-- | Convert a 'Result' value to an 'Either' value. A 'T.Partial' +-- result is treated as failure. +eitherResult :: Result r -> Either String r +eitherResult (T.Done _ r) = Right r +eitherResult (T.Fail _ _ msg) = Left msg +eitherResult _ = Left "Result: incomplete input" -- cgit v1.2.3