From cc269e6b0b615e9e237c35a985e4ace7b9ab0dd9 Mon Sep 17 00:00:00 2001 From: Mateusz Kowalczyk Date: Mon, 5 May 2014 09:01:03 +0200 Subject: Move parser + parser tests out to own package. We move some types out that are necessary as well and then re-export and specialise them in the core Haddock. Reason for moving out spec tests is that if we're working on the parser, we can simply work on that and we can ignore the rest of Haddock. The downside is that it's a little inconvenient if at the end of the day we want to see that everything passes. --- .ghci | 2 +- .gitignore | 1 + haddock-library/.ghci | 1 + haddock-library/LICENSE | 23 + haddock-library/Setup.hs | 2 + haddock-library/haddock-library.cabal | 75 ++ haddock-library/src/Documentation/Haddock/Doc.hs | 60 ++ .../src/Documentation/Haddock/Parser.hs | 474 ++++++++++++ .../src/Documentation/Haddock/Parser/Util.hs | 61 ++ haddock-library/src/Documentation/Haddock/Types.hs | 70 ++ haddock-library/src/Documentation/Haddock/Utf8.hs | 74 ++ .../test/Documentation/Haddock/Parser/UtilSpec.hs | 22 + .../test/Documentation/Haddock/ParserSpec.hs | 816 ++++++++++++++++++++ .../test/Documentation/Haddock/Utf8Spec.hs | 14 + haddock-library/test/Spec.hs | 1 + .../vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs | 18 + .../Data/Attoparsec/ByteString.hs | 205 +++++ .../Data/Attoparsec/ByteString/Char8.hs | 549 ++++++++++++++ .../Data/Attoparsec/ByteString/FastSet.hs | 115 +++ .../Data/Attoparsec/ByteString/Internal.hs | 516 +++++++++++++ .../Data/Attoparsec/Combinator.hs | 205 +++++ .../Data/Attoparsec/Internal.hs | 31 + .../Data/Attoparsec/Internal/Types.hs | 227 ++++++ .../attoparsec-0.10.4.0/Data/Attoparsec/Number.hs | 127 ++++ haddock.cabal | 59 +- src/Documentation/Haddock.hs | 3 +- src/Haddock.hs | 2 +- src/Haddock/Doc.hs | 51 +- src/Haddock/Interface/LexParseRn.hs | 20 +- src/Haddock/Interface/ParseModuleHeader.hs | 2 +- src/Haddock/Parser.hs | 529 +------------ src/Haddock/Parser/Util.hs | 28 - src/Haddock/Types.hs | 58 +- src/Haddock/Utf8.hs | 74 -- test/Haddock/Parser/UtilSpec.hs | 23 - test/Haddock/ParserSpec.hs | 825 --------------------- test/Haddock/Utf8Spec.hs | 15 - test/Helper.hs | 186 ----- test/Spec.hs | 1 - vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs | 18 - .../Data/Attoparsec/ByteString.hs | 205 ----- .../Data/Attoparsec/ByteString/Char8.hs | 549 -------------- .../Data/Attoparsec/ByteString/FastSet.hs | 115 --- .../Data/Attoparsec/ByteString/Internal.hs | 516 ------------- .../Data/Attoparsec/Combinator.hs | 205 ----- .../Data/Attoparsec/Internal.hs | 31 - .../Data/Attoparsec/Internal/Types.hs | 227 ------ .../attoparsec-0.10.4.0/Data/Attoparsec/Number.hs | 127 ---- 48 files changed, 3746 insertions(+), 3812 deletions(-) create mode 100644 haddock-library/.ghci create mode 100644 haddock-library/LICENSE create mode 100644 haddock-library/Setup.hs create mode 100644 haddock-library/haddock-library.cabal create mode 100644 haddock-library/src/Documentation/Haddock/Doc.hs create mode 100644 haddock-library/src/Documentation/Haddock/Parser.hs create mode 100644 haddock-library/src/Documentation/Haddock/Parser/Util.hs create mode 100644 haddock-library/src/Documentation/Haddock/Types.hs create mode 100644 haddock-library/src/Documentation/Haddock/Utf8.hs create mode 100644 haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs create mode 100644 haddock-library/test/Documentation/Haddock/ParserSpec.hs create mode 100644 haddock-library/test/Documentation/Haddock/Utf8Spec.hs create mode 100644 haddock-library/test/Spec.hs create mode 100644 haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs create mode 100644 haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs create mode 100644 haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs create mode 100644 haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs create mode 100644 haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs create mode 100644 haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs create mode 100644 haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs create mode 100644 haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs create mode 100644 haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs delete mode 100644 src/Haddock/Parser/Util.hs delete mode 100644 src/Haddock/Utf8.hs delete mode 100644 test/Haddock/Parser/UtilSpec.hs delete mode 100644 test/Haddock/ParserSpec.hs delete mode 100644 test/Haddock/Utf8Spec.hs delete mode 100644 test/Helper.hs delete mode 100644 test/Spec.hs delete mode 100644 vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs delete mode 100644 vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs delete mode 100644 vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs delete mode 100644 vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs delete mode 100644 vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs delete mode 100644 vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs delete mode 100644 vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs delete mode 100644 vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs delete mode 100644 vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs diff --git a/.ghci b/.ghci index 8312f0ee..5407b2c3 100644 --- a/.ghci +++ b/.ghci @@ -1 +1 @@ -:set -isrc -ivendor/attoparsec-0.10.4.0 -itest -idist/build -idist/build/autogen -packageghc -optP-include -optPdist/build/autogen/cabal_macros.h +:set -isrc -itest -idist/build -idist/build/autogen -packageghc -optP-include -optPdist/build/autogen/cabal_macros.h diff --git a/.gitignore b/.gitignore index bc554c00..4752003e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ /dist/ +/haddock-library/dist /html-test/out/ /latex-test/out/ diff --git a/haddock-library/.ghci b/haddock-library/.ghci new file mode 100644 index 00000000..6a26395e --- /dev/null +++ b/haddock-library/.ghci @@ -0,0 +1 @@ +:set -isrc -ivendor/attoparsec-0.10.4.0 -itest -idist/build -idist/build/autogen -optP-include -optPdist/build/autogen/cabal_macros.h diff --git a/haddock-library/LICENSE b/haddock-library/LICENSE new file mode 100644 index 00000000..1636bfcd --- /dev/null +++ b/haddock-library/LICENSE @@ -0,0 +1,23 @@ +Copyright 2002-2010, Simon Marlow. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS" AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/haddock-library/Setup.hs b/haddock-library/Setup.hs new file mode 100644 index 00000000..9a994af6 --- /dev/null +++ b/haddock-library/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/haddock-library/haddock-library.cabal b/haddock-library/haddock-library.cabal new file mode 100644 index 00000000..3a016c5a --- /dev/null +++ b/haddock-library/haddock-library.cabal @@ -0,0 +1,75 @@ +name: haddock-library +version: 2.15.0 +synopsis: Library exposing some functionality of Haddock. + +description: Haddock is a documentation-generation tool for Haskell + libraries. These modules expose some functionality of it + without pulling in the GHC dependency. +license: BSD3 +license-file: LICENSE +maintainer: Simon Hengel , Mateusz Kowalczyk +homepage: http://www.haskell.org/haddock/ +bug-reports: http://trac.haskell.org/haddock +category: Documentation +build-type: Simple +cabal-version: >= 1.10 +stability: experimental + +library + default-language: Haskell2010 + + build-depends: + base >= 4.3 && < 4.8, + bytestring, + deepseq + + hs-source-dirs: src, vendor/attoparsec-0.10.4.0 + ghc-options: -funbox-strict-fields -Wall -fwarn-tabs -O2 + + exposed-modules: + Documentation.Haddock.Parser + Documentation.Haddock.Types + Documentation.Haddock.Doc + + other-modules: + Data.Attoparsec + Data.Attoparsec.ByteString + Data.Attoparsec.ByteString.Char8 + Data.Attoparsec.Combinator + Data.Attoparsec.Number + Data.Attoparsec.ByteString.FastSet + Data.Attoparsec.ByteString.Internal + Data.Attoparsec.Internal + Data.Attoparsec.Internal.Types + Documentation.Haddock.Utf8 + Documentation.Haddock.Parser.Util + +test-suite spec + type: exitcode-stdio-1.0 + default-language: Haskell2010 + main-is: Spec.hs + hs-source-dirs: + test + , src + , vendor/attoparsec-0.10.4.0 + + cpp-options: + -DTEST + + other-modules: + Documentation.Haddock.ParserSpec + Documentation.Haddock.Utf8Spec + Documentation.Haddock.Parser.UtilSpec + + build-depends: + base + , hspec + , bytestring + , deepseq + , QuickCheck == 2.* + , haddock-library + + +source-repository head + type: git + location: http://git.haskell.org/haddock.git diff --git a/haddock-library/src/Documentation/Haddock/Doc.hs b/haddock-library/src/Documentation/Haddock/Doc.hs new file mode 100644 index 00000000..4d6c10a4 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Doc.hs @@ -0,0 +1,60 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} +module Documentation.Haddock.Doc (docParagraph) where + +import Data.Monoid +import Documentation.Haddock.Types +import Data.Char (isSpace) + +-- We put it here so that we can avoid a circular import +-- anything relevant imports this module anyway +instance Monoid (DocH mod id) where + mempty = DocEmpty + mappend = docAppend + +docAppend :: DocH mod id -> DocH mod id -> DocH mod id +docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2) +docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d +docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 ++ ds2) +docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 ++ ds2) +docAppend DocEmpty d = d +docAppend d DocEmpty = d +docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2) +docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 ++ s2)) +docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 ++ s2)) d +docAppend d1 d2 = DocAppend d1 d2 + +-- again to make parsing easier - we spot a paragraph whose only item +-- is a DocMonospaced and make it into a DocCodeBlock +docParagraph :: DocH mod id -> DocH mod id +docParagraph (DocMonospaced p) + = DocCodeBlock (docCodeBlock p) +docParagraph (DocAppend (DocString s1) (DocMonospaced p)) + | all isSpace s1 + = DocCodeBlock (docCodeBlock p) +docParagraph (DocAppend (DocString s1) + (DocAppend (DocMonospaced p) (DocString s2))) + | all isSpace s1 && all isSpace s2 + = DocCodeBlock (docCodeBlock p) +docParagraph (DocAppend (DocMonospaced p) (DocString s2)) + | all isSpace s2 + = DocCodeBlock (docCodeBlock p) +docParagraph p + = DocParagraph p + + +-- Drop trailing whitespace from @..@ code blocks. Otherwise this: +-- +-- -- @ +-- -- foo +-- -- @ +-- +-- turns into (DocCodeBlock "\nfoo\n ") which when rendered in HTML +-- gives an extra vertical space after the code block. The single space +-- on the final line seems to trigger the extra vertical space. +-- +docCodeBlock :: DocH mod id -> DocH mod id +docCodeBlock (DocString s) + = DocString (reverse $ dropWhile (`elem` " \t") $ reverse s) +docCodeBlock (DocAppend l r) + = DocAppend l (docCodeBlock r) +docCodeBlock d = d diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs new file mode 100644 index 00000000..3d146d33 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -0,0 +1,474 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE IncoherentInstances #-} +{-# LANGUAGE LambdaCase #-} +-- | +-- Module : Documentation.Haddock.Parser +-- Copyright : (c) Mateusz Kowalczyk 2013-2014, +-- Simon Hengel 2013 +-- License : BSD-like +-- +-- Maintainer : haddock@projects.haskell.org +-- Stability : experimental +-- Portability : portable + +module Documentation.Haddock.Parser ( parseString, parseParas + , overIdentifier, toRegular, Identifier + ) where + +import Control.Applicative +import Control.Arrow (first) +import Control.Monad (void, mfilter) +import Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine) +import qualified Data.ByteString.Char8 as BS +import Data.Char (chr, isAsciiUpper) +import Data.List (stripPrefix, intercalate, unfoldr) +import Data.Maybe (fromMaybe) +import Data.Monoid +import Documentation.Haddock.Doc +import Documentation.Haddock.Parser.Util +import Documentation.Haddock.Types +import Documentation.Haddock.Utf8 +import Prelude hiding (takeWhile) + +-- | Identifier string surrounded with opening and closing quotes/backticks. +type Identifier = (Char, String, Char) + +-- | Drops the quotes/backticks around all identifiers, as if they +-- were valid but still 'String's. +toRegular :: DocH mod Identifier -> DocH mod String +toRegular = fmap (\(_, x, _) -> x) + +-- | Maps over 'DocIdentifier's over 'String' with potentially failing +-- conversion using user-supplied function. If the conversion fails, +-- the identifier is deemed to not be valid and is treated as a +-- regular string. +overIdentifier :: (String -> Maybe a) + -> DocH mod Identifier + -> DocH mod a +overIdentifier f d = g d + where + g (DocIdentifier (o, x, e)) = case f x of + Nothing -> DocString $ o : x ++ [e] + Just x' -> DocIdentifier x' + g DocEmpty = DocEmpty + g (DocAppend x x') = DocAppend (g x) (g x') + g (DocString x) = DocString x + g (DocParagraph x) = DocParagraph $ g x + g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x + g (DocModule x) = DocModule x + g (DocWarning x) = DocWarning $ g x + g (DocEmphasis x) = DocEmphasis $ g x + g (DocMonospaced x) = DocMonospaced $ g x + g (DocBold x) = DocBold $ g x + g (DocUnorderedList x) = DocUnorderedList $ fmap g x + g (DocOrderedList x) = DocOrderedList $ fmap g x + g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x + g (DocCodeBlock x) = DocCodeBlock $ g x + g (DocHyperlink x) = DocHyperlink x + g (DocPic x) = DocPic x + g (DocAName x) = DocAName x + g (DocProperty x) = DocProperty x + g (DocExamples x) = DocExamples x + g (DocHeader (Header l x)) = DocHeader . Header l $ g x + +parse :: Parser a -> BS.ByteString -> a +parse p = either err id . parseOnly (p <* endOfInput) + where + err = error . ("Haddock.Parser.parse: " ++) + +-- | Main entry point to the parser. Appends the newline character +-- to the input string. +parseParas :: String -- ^ String to parse + -> DocH mod Identifier +parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n") + where + p :: Parser (DocH mod Identifier) + p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n") + +-- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which +-- drops leading whitespace and encodes the string to UTF8 first. +parseString :: String -> DocH mod Identifier +parseString = parseStringBS . encodeUtf8 . dropWhile isSpace + +parseStringBS :: BS.ByteString -> DocH mod Identifier +parseStringBS = parse p + where + p :: Parser (DocH mod Identifier) + p = mconcat <$> many (monospace <|> anchor <|> identifier + <|> moduleName <|> picture <|> hyperlink <|> autoUrl <|> bold + <|> emphasis <|> encodedChar <|> string' <|> skipSpecialChar) + +-- | Parses and processes +-- +-- +-- >>> parseOnly encodedChar "ABC" +-- Right (DocString "ABC") +encodedChar :: Parser (DocH mod a) +encodedChar = "&#" *> c <* ";" + where + c = DocString . return . chr <$> num + num = hex <|> decimal + hex = ("x" <|> "X") *> hexadecimal + +specialChar :: [Char] +specialChar = "_/<@\"&'`" + +-- | Plain, regular parser for text. Called as one of the last parsers +-- to ensure that we have already given a chance to more meaningful parsers +-- before capturing their characers. +string' :: Parser (DocH mod a) +string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar) + where + unescape "" = "" + unescape ('\\':x:xs) = x : unescape xs + unescape (x:xs) = x : unescape xs + +-- | Skips a single special character and treats it as a plain string. +-- This is done to skip over any special characters belonging to other +-- elements but which were not deemed meaningful at their positions. +skipSpecialChar :: Parser (DocH mod a) +skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar) + +-- | Emphasis parser. +-- +-- >>> parseOnly emphasis "/Hello world/" +-- Right (DocEmphasis (DocString "Hello world")) +emphasis :: Parser (DocH mod Identifier) +emphasis = DocEmphasis . parseStringBS <$> + mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/") + +-- | Bold parser. +-- +-- >>> parseOnly bold "__Hello world__" +-- Right (DocBold (DocString "Hello world")) +bold :: Parser (DocH mod Identifier) +bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__") + +disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString +disallowNewline = mfilter ('\n' `BS.notElem`) + +-- | Like `takeWhile`, but unconditionally take escaped characters. +takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString +takeWhile_ p = scan False p_ + where + p_ escaped c + | escaped = Just False + | not $ p c = Nothing + | otherwise = Just (c == '\\') + +-- | Like `takeWhile1`, but unconditionally take escaped characters. +takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString +takeWhile1_ = mfilter (not . BS.null) . takeWhile_ + +-- | Text anchors to allow for jumping around the generated documentation. +-- +-- >>> parseOnly anchor "#Hello world#" +-- Right (DocAName "Hello world") +anchor :: Parser (DocH mod a) +anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#") + +-- | Monospaced strings. +-- +-- >>> parseOnly monospace "@cruel@" +-- Right (DocMonospaced (DocString "cruel")) +monospace :: Parser (DocH mod Identifier) +monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@") + +moduleName :: Parser (DocH mod a) +moduleName = DocModule <$> (char '"' *> modid <* char '"') + where + modid = intercalate "." <$> conid `sepBy1` "." + conid = (:) + <$> satisfy isAsciiUpper + -- NOTE: According to Haskell 2010 we shouldd actually only + -- accept {small | large | digit | ' } here. But as we can't + -- match on unicode characters, this is currently not possible. + <*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n")) + +-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify +-- a title for the picture. +-- +-- >>> parseOnly picture "<>" +-- Right (DocPic (Picture "hello.png" Nothing)) +-- >>> parseOnly picture "<>" +-- Right (DocPic (Picture "hello.png" (Just "world"))) +picture :: Parser (DocH mod a) +picture = DocPic . makeLabeled Picture . decodeUtf8 + <$> disallowNewline ("<<" *> takeUntil ">>") + +-- | Paragraph parser, called by 'parseParas'. +paragraph :: Parser (DocH mod Identifier) +paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock + <|> property <|> header + <|> textParagraph) + +header :: Parser (DocH mod Identifier) +header = do + let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1] + pser = foldl1 (<|>) psers + delim <- decodeUtf8 <$> pser + line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString + rest <- paragraph <|> return mempty + return $ DocParagraph (DocHeader (Header (length delim) line)) <> rest + +textParagraph :: Parser (DocH mod Identifier) +textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine + +-- | List parser, called by 'paragraph'. +list :: Parser (DocH mod Identifier) +list = DocUnorderedList <$> unorderedList + <|> DocOrderedList <$> orderedList + <|> DocDefList <$> definitionList + +-- | Parses unordered (bullet) lists. +unorderedList :: Parser [DocH mod Identifier] +unorderedList = ("*" <|> "-") *> innerList unorderedList + +-- | Parses ordered lists (numbered or dashed). +orderedList :: Parser [DocH mod Identifier] +orderedList = (paren <|> dot) *> innerList orderedList + where + dot = (decimal :: Parser Int) <* "." + paren = "(" *> decimal <* ")" + +-- | Generic function collecting any further lines belonging to the +-- list entry and recursively collecting any further lists in the +-- same paragraph. Usually used as +-- +-- > someListFunction = listBeginning *> innerList someListFunction +innerList :: Parser [DocH mod Identifier] -> Parser [DocH mod Identifier] +innerList item = do + c <- takeLine + (cs, items) <- more item + let contents = docParagraph . parseString . dropNLs . unlines $ c : cs + return $ case items of + Left p -> [contents `joinPara` p] + Right i -> contents : i + +-- | Parses definition lists. +definitionList :: Parser [(DocH mod Identifier, DocH mod Identifier)] +definitionList = do + label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` "]\n")) <* "]" + c <- takeLine + (cs, items) <- more definitionList + let contents = parseString . dropNLs . unlines $ c : cs + return $ case items of + Left p -> [(label, contents `joinPara` p)] + Right i -> (label, contents) : i + +-- | If possible, appends two 'Doc's under a 'DocParagraph' rather than +-- outside of it. This allows to get structures like +-- +-- @DocParagraph (DocAppend … …)@ +-- +-- rather than +-- +-- @DocAppend (DocParagraph …) …@ +joinPara :: DocH mod id -> DocH mod id -> DocH mod id +joinPara (DocParagraph p) c = docParagraph $ p <> c +joinPara d p = d <> p + +-- | Drops all trailing newlines. +dropNLs :: String -> String +dropNLs = reverse . dropWhile (== '\n') . reverse + +-- | Main worker for 'innerList' and 'definitionList'. +-- We need the 'Either' here to be able to tell in the respective functions +-- whether we're dealing with the next list or a nested paragraph. +more :: Monoid a => Parser a + -> Parser ([String], Either (DocH mod Identifier) a) +more item = innerParagraphs <|> moreListItems item + <|> moreContent item <|> pure ([], Right mempty) + +-- | Use by 'innerList' and 'definitionList' to parse any nested paragraphs. +innerParagraphs :: Parser ([String], Either (DocH mod Identifier) a) +innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs) + +-- | Attemps to fetch the next list if possibly. Used by 'innerList' and +-- 'definitionList' to recursivly grab lists that aren't separated by a whole +-- paragraph. +moreListItems :: Parser a + -> Parser ([String], Either (DocH mod Identifier) a) +moreListItems item = (,) [] . Right <$> (skipSpace *> item) + +-- | Helper for 'innerList' and 'definitionList' which simply takes +-- a line of text and attempts to parse more list content with 'more'. +moreContent :: Monoid a => Parser a + -> Parser ([String], Either (DocH mod Identifier) a) +moreContent item = first . (:) <$> nonEmptyLine <*> more item + +-- | Runs the 'parseParas' parser on an indented paragraph. +-- The indentation is 4 spaces. +indentedParagraphs :: Parser (DocH mod Identifier) +indentedParagraphs = parseParas . concat <$> dropFrontOfPara " " + +-- | Grab as many fully indented paragraphs as we can. +dropFrontOfPara :: Parser BS.ByteString -> Parser [String] +dropFrontOfPara sp = do + currentParagraph <- some (sp *> takeNonEmptyLine) + followingParagraphs <- + skipHorizontalSpace *> nextPar -- we have more paragraphs to take + <|> skipHorizontalSpace *> nlList -- end of the ride, remember the newline + <|> endOfInput *> return [] -- nothing more to take at all + return (currentParagraph ++ followingParagraphs) + where + nextPar = (++) <$> nlList <*> dropFrontOfPara sp + nlList = "\n" *> return ["\n"] + +nonSpace :: BS.ByteString -> Parser BS.ByteString +nonSpace xs + | not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line" + | otherwise = return xs + +-- | Takes a non-empty, not fully whitespace line. +-- +-- Doesn't discard the trailing newline. +takeNonEmptyLine :: Parser String +takeNonEmptyLine = do + (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n" + +birdtracks :: Parser (DocH mod a) +birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line + where + line = skipHorizontalSpace *> ">" *> takeLine + +stripSpace :: [String] -> [String] +stripSpace = fromMaybe <*> mapM strip' + where + strip' (' ':xs') = Just xs' + strip' "" = Just "" + strip' _ = Nothing + +-- | Parses examples. Examples are a paragraph level entitity (separated by an empty line). +-- Consecutive examples are accepted. +examples :: Parser (DocH mod a) +examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go) + where + go :: Parser [Example] + go = do + prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>" + expr <- takeLine + (rs, es) <- resultAndMoreExamples + return (makeExample prefix expr rs : es) + where + resultAndMoreExamples :: Parser ([String], [Example]) + resultAndMoreExamples = moreExamples <|> result <|> pure ([], []) + where + moreExamples :: Parser ([String], [Example]) + moreExamples = (,) [] <$> go + + result :: Parser ([String], [Example]) + result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples + + makeExample :: String -> String -> [String] -> Example + makeExample prefix expression res = + Example (strip expression) result + where + result = map (substituteBlankLine . tryStripPrefix) res + + tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs) + + substituteBlankLine "" = "" + substituteBlankLine xs = xs + +nonEmptyLine :: Parser String +nonEmptyLine = mfilter (any (not . isSpace)) takeLine + +takeLine :: Parser String +takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine + +endOfLine :: Parser () +endOfLine = void "\n" <|> endOfInput + +-- | Property parser. +-- +-- >>> parseOnly property "prop> hello world" +-- Right (DocProperty "hello world") +property :: Parser (DocH mod a) +property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n')) + +-- | +-- Paragraph level codeblock. Anything between the two delimiting @ is parsed +-- for markup. +codeblock :: Parser (DocH mod Identifier) +codeblock = + DocCodeBlock . parseStringBS . dropSpaces + <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@") + where + dropSpaces xs = + let rs = decodeUtf8 xs + in case splitByNl rs of + [] -> xs + ys -> case last ys of + ' ':_ -> case mapM dropSpace ys of + Nothing -> xs + Just zs -> encodeUtf8 $ intercalate "\n" zs + _ -> xs + + -- This is necessary because ‘lines’ swallows up a trailing newline + -- and we lose information about whether the last line belongs to @ or to + -- text which we need to decide whether we actually want to be dropping + -- anything at all. + splitByNl = unfoldr (\case '\n':s -> Just (span (/= '\n') s) + _ -> Nothing) + . ('\n' :) + + dropSpace "" = Just "" + dropSpace (' ':xs) = Just xs + dropSpace _ = Nothing + + block' = scan False p + where + p isNewline c + | isNewline && c == '@' = Nothing + | isNewline && isSpace c = Just isNewline + | otherwise = Just $ c == '\n' + +hyperlink :: Parser (DocH mod a) +hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 + <$> disallowNewline ("<" *> takeUntil ">") + <|> autoUrl + +autoUrl :: Parser (DocH mod a) +autoUrl = mkLink <$> url + where + url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace) + mkLink :: BS.ByteString -> DocH mod a + mkLink s = case BS.unsnoc s of + Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x] + _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing) + +-- | Parses strings between identifier delimiters. Consumes all input that it +-- deems to be valid in an identifier. Note that it simply blindly consumes +-- characters and does no actual validation itself. +parseValid :: Parser String +parseValid = do + vs' <- many' $ utf8String "⋆" <|> return <$> idChar + let vs = concat vs' + c <- peekChar + case c of + Just '`' -> return vs + Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid) + <|> return vs + _ -> fail "outofvalid" + where + idChar = satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:^") + <|> digit <|> letter_ascii + +-- | Parses UTF8 strings from ByteString streams. +utf8String :: String -> Parser String +utf8String x = decodeUtf8 <$> string (encodeUtf8 x) + +-- | Parses identifiers with help of 'parseValid'. Asks GHC for 'String' from the +-- string it deems valid. +identifier :: Parser (DocH mod Identifier) +identifier = do + o <- idDelim + vid <- parseValid + e <- idDelim + return $ DocIdentifier (o, vid, e) + where + idDelim = char '\'' <|> char '`' diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs new file mode 100644 index 00000000..25dba2d5 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs @@ -0,0 +1,61 @@ +-- | +-- Module : Documentation.Haddock.Parser.Util +-- Copyright : (c) Mateusz Kowalczyk 2013-2014, +-- Simon Hengel 2013 +-- License : BSD-like +-- +-- Maintainer : haddock@projects.haskell.org +-- Stability : experimental +-- Portability : portable +-- +-- Various utility functions used by the parser. +module Documentation.Haddock.Parser.Util where + +import Control.Applicative +import Control.Monad (mfilter) +import Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine) +import Data.ByteString.Char8 (ByteString) +import qualified Data.ByteString.Char8 as BS +import Prelude hiding (takeWhile) + +-- | Remove all leading and trailing whitespace +strip :: String -> String +strip = (\f -> f . f) $ dropWhile isSpace . reverse + +skipHorizontalSpace :: Parser () +skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r") + +takeHorizontalSpace :: Parser BS.ByteString +takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r") + +makeLabeled :: (String -> Maybe String -> a) -> String -> a +makeLabeled f input = case break isSpace $ removeEscapes $ strip input of + (uri, "") -> f uri Nothing + (uri, label) -> f uri (Just $ dropWhile isSpace label) + where + -- As we don't parse these any further, we don't do any processing to the + -- string so we at least remove escape character here. Perhaps we should + -- actually be parsing the label at the very least? + removeEscapes "" = "" + removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs + removeEscapes ('\\':xs) = removeEscapes xs + removeEscapes (x:xs) = x : removeEscapes xs + +takeUntil :: ByteString -> Parser ByteString +takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome + where + end = BS.unpack end_ + + p :: (Bool, String) -> Char -> Maybe (Bool, String) + p acc c = case acc of + (True, _) -> Just (False, end) + (_, []) -> Nothing + (_, x:xs) | x == c -> Just (False, xs) + _ -> Just (c == '\\', end) + + dropEnd = BS.reverse . BS.drop (length end) . BS.reverse + requireEnd = mfilter (BS.isSuffixOf end_) + + gotSome xs + | BS.null xs = fail "didn't get any content" + | otherwise = return xs diff --git a/haddock-library/src/Documentation/Haddock/Types.hs b/haddock-library/src/Documentation/Haddock/Types.hs new file mode 100644 index 00000000..3e25d230 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Types.hs @@ -0,0 +1,70 @@ +{-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable #-} +{-# LANGUAGE DeriveTraversable, StandaloneDeriving #-} + +-- | +-- Module : Documentation.Haddock.Types +-- Copyright : (c) Simon Marlow 2003-2006, +-- David Waern 2006-2009, +-- Mateusz Kowalczyk 2013 +-- License : BSD-like +-- +-- Maintainer : haddock@projects.haskellorg +-- Stability : experimental +-- Portability : portable +-- +-- Exposes documentation data types used for (some) of Haddock. +module Documentation.Haddock.Types where + +import Data.Foldable +import Data.Traversable + +instance Foldable Header where + foldMap f (Header _ a) = f a + +instance Traversable Header where + traverse f (Header l a) = Header l `fmap` f a + +data Hyperlink = Hyperlink + { hyperlinkUrl :: String + , hyperlinkLabel :: Maybe String + } deriving (Eq, Show) + + +data Picture = Picture + { pictureUri :: String + , pictureTitle :: Maybe String + } deriving (Eq, Show) + +data Header id = Header + { headerLevel :: Int + , headerTitle :: id + } deriving Functor + +data Example = Example + { exampleExpression :: String + , exampleResult :: [String] + } deriving (Eq, Show) + +data DocH mod id + = DocEmpty + | DocAppend (DocH mod id) (DocH mod id) + | DocString String + | DocParagraph (DocH mod id) + | DocIdentifier id + | DocIdentifierUnchecked mod + | DocModule String + | DocWarning (DocH mod id) + | DocEmphasis (DocH mod id) + | DocMonospaced (DocH mod id) + | DocBold (DocH mod id) + | DocUnorderedList [DocH mod id] + | DocOrderedList [DocH mod id] + | DocDefList [(DocH mod id, DocH mod id)] + | DocCodeBlock (DocH mod id) + | DocHyperlink Hyperlink + | DocPic Picture + | DocAName String + | DocProperty String + | DocExamples [Example] + | DocHeader (Header (DocH mod id)) + deriving (Functor, Foldable, Traversable) diff --git a/haddock-library/src/Documentation/Haddock/Utf8.hs b/haddock-library/src/Documentation/Haddock/Utf8.hs new file mode 100644 index 00000000..3f75e53b --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Utf8.hs @@ -0,0 +1,74 @@ +module Documentation.Haddock.Utf8 (encodeUtf8, decodeUtf8) where +import Data.Bits ((.|.), (.&.), shiftL, shiftR) +import qualified Data.ByteString as BS +import Data.Char (chr, ord) +import Data.Word (Word8) + +-- | Helper that encodes and packs a 'String' into a 'BS.ByteString' +encodeUtf8 :: String -> BS.ByteString +encodeUtf8 = BS.pack . encode + +-- | Helper that unpacks and decodes a 'BS.ByteString' into a 'String' +decodeUtf8 :: BS.ByteString -> String +decodeUtf8 = decode . BS.unpack + +-- Copy/pasted functions from Codec.Binary.UTF8.String for encoding/decoding +-- | Character to use when 'encode' or 'decode' fail for a byte. +replacementCharacter :: Char +replacementCharacter = '\xfffd' + +-- | Encode a Haskell String to a list of Word8 values, in UTF8 format. +encode :: String -> [Word8] +encode = concatMap (map fromIntegral . go . ord) + where + go oc + | oc <= 0x7f = [oc] + + | oc <= 0x7ff = [ 0xc0 + (oc `shiftR` 6) + , 0x80 + oc .&. 0x3f + ] + + | oc <= 0xffff = [ 0xe0 + (oc `shiftR` 12) + , 0x80 + ((oc `shiftR` 6) .&. 0x3f) + , 0x80 + oc .&. 0x3f + ] + | otherwise = [ 0xf0 + (oc `shiftR` 18) + , 0x80 + ((oc `shiftR` 12) .&. 0x3f) + , 0x80 + ((oc `shiftR` 6) .&. 0x3f) + , 0x80 + oc .&. 0x3f + ] + +-- | Decode a UTF8 string packed into a list of Word8 values, directly to String +decode :: [Word8] -> String +decode [ ] = "" +decode (c:cs) + | c < 0x80 = chr (fromEnum c) : decode cs + | c < 0xc0 = replacementCharacter : decode cs + | c < 0xe0 = multi1 + | c < 0xf0 = multi_byte 2 0xf 0x800 + | c < 0xf8 = multi_byte 3 0x7 0x10000 + | c < 0xfc = multi_byte 4 0x3 0x200000 + | c < 0xfe = multi_byte 5 0x1 0x4000000 + | otherwise = replacementCharacter : decode cs + where + multi1 = case cs of + c1 : ds | c1 .&. 0xc0 == 0x80 -> + let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|. fromEnum (c1 .&. 0x3f) + in if d >= 0x000080 then toEnum d : decode ds + else replacementCharacter : decode ds + _ -> replacementCharacter : decode cs + + multi_byte :: Int -> Word8 -> Int -> String + multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask)) + where + aux 0 rs acc + | overlong <= acc && acc <= 0x10ffff && + (acc < 0xd800 || 0xdfff < acc) && + (acc < 0xfffe || 0xffff < acc) = chr acc : decode rs + | otherwise = replacementCharacter : decode rs + + aux n (r:rs) acc + | r .&. 0xc0 == 0x80 = aux (n-1) rs + $ shiftL acc 6 .|. fromEnum (r .&. 0x3f) + + aux _ rs _ = replacementCharacter : decode rs diff --git a/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs b/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs new file mode 100644 index 00000000..265a4d49 --- /dev/null +++ b/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs @@ -0,0 +1,22 @@ +{-# LANGUAGE OverloadedStrings #-} +module Documentation.Haddock.Parser.UtilSpec (main, spec) where + +import Data.Attoparsec.ByteString.Char8 +import Data.Either +import Documentation.Haddock.Parser.Util +import Test.Hspec + +main :: IO () +main = hspec spec + +spec :: Spec +spec = do + describe "takeUntil" $ do + it "takes everything until a specified byte sequence" $ do + parseOnly (takeUntil "end") "someend" `shouldBe` Right "some" + + it "requires the end sequence" $ do + parseOnly (takeUntil "end") "someen" `shouldSatisfy` isLeft + + it "takes escaped bytes unconditionally" $ do + parseOnly (takeUntil "end") "some\\endend" `shouldBe` Right "some\\end" diff --git a/haddock-library/test/Documentation/Haddock/ParserSpec.hs b/haddock-library/test/Documentation/Haddock/ParserSpec.hs new file mode 100644 index 00000000..3889d555 --- /dev/null +++ b/haddock-library/test/Documentation/Haddock/ParserSpec.hs @@ -0,0 +1,816 @@ +{-# LANGUAGE OverloadedStrings, StandaloneDeriving + , FlexibleInstances, UndecidableInstances + , IncoherentInstances #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Documentation.Haddock.ParserSpec (main, spec) where + +import Data.Monoid +import Data.String +import qualified Documentation.Haddock.Parser as Parse +import Documentation.Haddock.Types +import Test.Hspec +import Test.QuickCheck + +type Doc id = DocH () id + +deriving instance Show a => Show (Header a) +deriving instance Show a => Show (Doc a) +deriving instance Eq a => Eq (Header a) +deriving instance Eq a => Eq (Doc a) + +instance IsString (Doc String) where + fromString = DocString + +instance IsString a => IsString (Maybe a) where + fromString = Just . fromString + +parseParas :: String -> Doc String +parseParas = Parse.toRegular . Parse.parseParas + +parseString :: String -> Doc String +parseString = Parse.toRegular . Parse.parseString + +main :: IO () +main = hspec spec + +spec :: Spec +spec = do + describe "parseString" $ do + let infix 1 `shouldParseTo` + shouldParseTo :: String -> Doc String -> Expectation + shouldParseTo input ast = parseString input `shouldBe` ast + + it "is total" $ do + property $ \xs -> + (length . show . parseString) xs `shouldSatisfy` (> 0) + + context "when parsing text" $ do + it "can handle unicode" $ do + "灼眼のシャナ" `shouldParseTo` "灼眼のシャナ" + + it "accepts numeric character references" $ do + "foo bar baz λ" `shouldParseTo` "foo bar baz λ" + + it "accepts hexadecimal character references" $ do + "e" `shouldParseTo` "e" + + it "allows to backslash-escape characters" $ do + property $ \x -> ['\\', x] `shouldParseTo` DocString [x] + + context "when parsing strings contaning numeric character references" $ do + it "will implicitly convert digits to characters" $ do + "AAAA" `shouldParseTo` "AAAA" + + "灼眼のシャナ" + `shouldParseTo` "灼眼のシャナ" + + it "will implicitly convert hex encoded characters" $ do + "eeee" `shouldParseTo` "eeee" + + context "when parsing identifiers" $ do + it "parses identifiers enclosed within single ticks" $ do + "'foo'" `shouldParseTo` DocIdentifier "foo" + + it "parses identifiers enclosed within backticks" $ do + "`foo`" `shouldParseTo` DocIdentifier "foo" + + it "parses a word with an one of the delimiters in it as DocString" $ do + "don't" `shouldParseTo` "don't" + + it "doesn't pass pairs of delimiters with spaces between them" $ do + "hel'lo w'orld" `shouldParseTo` "hel'lo w'orld" + + it "don't use apostrophe's in the wrong place's" $ do + " don't use apostrophe's in the wrong place's" `shouldParseTo` + "don't use apostrophe's in the wrong place's" + + context "when parsing URLs" $ do + let hyperlink :: String -> Maybe String -> Doc String + hyperlink url = DocHyperlink . Hyperlink url + + it "parses a URL" $ do + "" `shouldParseTo` hyperlink "http://example.com/" Nothing + + it "accepts an optional label" $ do + "" `shouldParseTo` hyperlink "http://example.com/" "some link" + + it "does not accept newlines in label" $ do + "" `shouldParseTo` "" + + -- new behaviour test, this will be now consistent with other markup + it "allows us to escape > inside the URL" $ do + "le.com>" `shouldParseTo` + hyperlink "http://examp>le.com" Nothing + + "mp\\>le.com>" `shouldParseTo` + hyperlink "http://exa>mp>le.com" Nothing + + -- Likewise in label + "oo>" `shouldParseTo` + hyperlink "http://example.com" "f>oo" + + it "parses inline URLs" $ do + "foo bar" `shouldParseTo` + "foo " <> hyperlink "http://example.com/" Nothing <> " bar" + + it "doesn't allow for multi-line link tags" $ do + "" `shouldParseTo` "" + + context "when autolinking URLs" $ do + it "autolinks HTTP URLs" $ do + "http://example.com/" `shouldParseTo` hyperlink "http://example.com/" Nothing + + it "autolinks HTTPS URLs" $ do + "https://www.example.com/" `shouldParseTo` hyperlink "https://www.example.com/" Nothing + + it "autolinks FTP URLs" $ do + "ftp://example.com/" `shouldParseTo` hyperlink "ftp://example.com/" Nothing + + it "does not include a trailing comma" $ do + "http://example.com/, Some other sentence." `shouldParseTo` + hyperlink "http://example.com/" Nothing <> ", Some other sentence." + + it "does not include a trailing dot" $ do + "http://example.com/. Some other sentence." `shouldParseTo` + hyperlink "http://example.com/" Nothing <> ". Some other sentence." + + it "does not include a trailing exclamation mark" $ do + "http://example.com/! Some other sentence." `shouldParseTo` + hyperlink "http://example.com/" Nothing <> "! Some other sentence." + + it "does not include a trailing question mark" $ do + "http://example.com/? Some other sentence." `shouldParseTo` + hyperlink "http://example.com/" Nothing <> "? Some other sentence." + + context "when parsing pictures" $ do + let picture :: String -> Maybe String -> Doc String + picture uri = DocPic . Picture uri + + it "parses a simple picture" $ do + "<>" `shouldParseTo` picture "baz" Nothing + + it "parses a picture with a title" $ do + "<>" `shouldParseTo` picture "b" (Just "a z") + + it "parses a picture with unicode" $ do + "<<灼眼のシャナ>>" `shouldParseTo` picture "灼眼のシャナ" Nothing + + it "allows for escaping of the closing tags" $ do + "<>z>>" `shouldParseTo` picture "ba>>z" Nothing + + it "doesn't allow for multi-line picture tags" $ do + "<>" `shouldParseTo` "<>" + + context "when parsing anchors" $ do + it "parses a single word anchor" $ do + "#foo#" `shouldParseTo` DocAName "foo" + + it "parses a multi word anchor" $ do + "#foo bar#" `shouldParseTo` DocAName "foo bar" + + it "parses a unicode anchor" $ do + "#灼眼のシャナ#" `shouldParseTo` DocAName "灼眼のシャナ" + + it "does not accept newlines in anchors" $ do + "#foo\nbar#" `shouldParseTo` "#foo\nbar#" + + context "when parsing emphasised text" $ do + it "emphasises a word on its own" $ do + "/foo/" `shouldParseTo` DocEmphasis "foo" + + it "emphasises inline correctly" $ do + "foo /bar/ baz" `shouldParseTo` "foo " <> DocEmphasis "bar" <> " baz" + + it "emphasises unicode" $ do + "/灼眼のシャナ/" `shouldParseTo` DocEmphasis "灼眼のシャナ" + + it "does not emphasise multi-line strings" $ do + " /foo\nbar/" `shouldParseTo` "/foo\nbar/" + + it "does not emphasise the empty string" $ do + "//" `shouldParseTo` "//" + + it "parses escaped slashes literally" $ do + "/foo\\/bar/" `shouldParseTo` DocEmphasis "foo/bar" + + it "recognizes other markup constructs within emphasised text" $ do + "/foo @bar@ baz/" `shouldParseTo` + DocEmphasis ("foo " <> DocMonospaced "bar" <> " baz") + + it "allows other markup inside of emphasis" $ do + "/__inner bold__/" `shouldParseTo` DocEmphasis (DocBold "inner bold") + + it "doesn't mangle inner markup unicode" $ do + "/__灼眼のシャナ A__/" `shouldParseTo` DocEmphasis (DocBold "灼眼のシャナ A") + + it "properly converts HTML escape sequences" $ do + "/AAAA/" `shouldParseTo` DocEmphasis "AAAA" + + it "allows to escape the emphasis delimiter inside of emphasis" $ do + "/empha\\/sis/" `shouldParseTo` DocEmphasis "empha/sis" + + context "when parsing monospaced text" $ do + it "parses simple monospaced text" $ do + "@foo@" `shouldParseTo` DocMonospaced "foo" + + it "parses inline monospaced text" $ do + "foo @bar@ baz" `shouldParseTo` "foo " <> DocMonospaced "bar" <> " baz" + + it "allows to escape @" $ do + "@foo \\@ bar@" `shouldParseTo` DocMonospaced "foo @ bar" + + it "accepts unicode" $ do + "@foo 灼眼のシャナ bar@" `shouldParseTo` DocMonospaced "foo 灼眼のシャナ bar" + + it "accepts other markup in monospaced text" $ do + "@/foo/@" `shouldParseTo` DocMonospaced (DocEmphasis "foo") + + it "requires the closing @" $ do + "@foo /bar/ baz" `shouldParseTo` "@foo " <> DocEmphasis "bar" <> " baz" + + context "when parsing bold strings" $ do + it "allows for a bold string on its own" $ do + "__bold string__" `shouldParseTo` + DocBold "bold string" + + it "bolds inline correctly" $ do + "hello __everyone__ there" `shouldParseTo` + "hello " + <> DocBold "everyone" <> " there" + + it "bolds unicode" $ do + "__灼眼のシャナ__" `shouldParseTo` + DocBold "灼眼のシャナ" + + it "does not do __multi-line\\n bold__" $ do + " __multi-line\n bold__" `shouldParseTo` "__multi-line\n bold__" + + it "allows other markup inside of bold" $ do + "__/inner emphasis/__" `shouldParseTo` + (DocBold $ DocEmphasis "inner emphasis") + + it "doesn't mangle inner markup unicode" $ do + "__/灼眼のシャナ A/__" `shouldParseTo` + (DocBold $ DocEmphasis "灼眼のシャナ A") + + it "properly converts HTML escape sequences" $ do + "__AAAA__" `shouldParseTo` + DocBold "AAAA" + + it "allows to escape the bold delimiter inside of bold" $ do + "__bo\\__ld__" `shouldParseTo` + DocBold "bo__ld" + + it "doesn't allow for empty bold" $ do + "____" `shouldParseTo` "____" + + context "when parsing module strings" $ do + it "should parse a module on its own" $ do + "\"Module\"" `shouldParseTo` + DocModule "Module" + + it "should parse a module inline" $ do + "This is a \"Module\"." `shouldParseTo` + "This is a " <> DocModule "Module" <> "." + + it "can accept a simple module name" $ do + "\"Hello\"" `shouldParseTo` DocModule "Hello" + + it "can accept a module name with dots" $ do + "\"Hello.World\"" `shouldParseTo` DocModule "Hello.World" + + it "can accept a module name with unicode" $ do + "\"Hello.Worldλ\"" `shouldParseTo` DocModule "Hello.Worldλ" + + it "parses a module name with a trailing dot as regular quoted string" $ do + "\"Hello.\"" `shouldParseTo` "\"Hello.\"" + + it "parses a module name with a space as regular quoted string" $ do + "\"Hello World\"" `shouldParseTo` "\"Hello World\"" + + it "parses a module name with invalid characters as regular quoted string" $ do + "\"Hello&[{}(=*)+]!\"" `shouldParseTo` "\"Hello&[{}(=*)+]!\"" + + it "accepts a module name with unicode" $ do + "\"Foo.Barλ\"" `shouldParseTo` DocModule "Foo.Barλ" + + it "treats empty module name as regular double quotes" $ do + "\"\"" `shouldParseTo` "\"\"" + + describe "parseParas" $ do + let infix 1 `shouldParseTo` + shouldParseTo :: String -> Doc String -> Expectation + shouldParseTo input ast = parseParas input `shouldBe` ast + + it "is total" $ do + property $ \xs -> + (length . show . parseParas) xs `shouldSatisfy` (> 0) + + context "when parsing text paragraphs" $ do + let filterSpecial = filter (`notElem` (".(=#-[*`\v\f\n\t\r\\\"'_/@<> " :: String)) + + it "parses an empty paragraph" $ do + "" `shouldParseTo` DocEmpty + + it "parses a simple text paragraph" $ do + "foo bar baz" `shouldParseTo` DocParagraph "foo bar baz" + + it "accepts markup in text paragraphs" $ do + "foo /bar/ baz" `shouldParseTo` DocParagraph ("foo " <> DocEmphasis "bar" <> " baz") + + it "preserve all regular characters" $ do + property $ \xs -> let input = filterSpecial xs in (not . null) input ==> + input `shouldParseTo` DocParagraph (DocString input) + + it "separates paragraphs by empty lines" $ do + unlines [ + "foo" + , " \t " + , "bar" + ] `shouldParseTo` DocParagraph "foo" <> DocParagraph "bar" + + context "when a pragraph only contains monospaced text" $ do + it "turns it into a code block" $ do + "@foo@" `shouldParseTo` DocCodeBlock "foo" + + context "when parsing birdtracks" $ do + it "parses them as a code block" $ do + unlines [ + ">foo" + , ">bar" + , ">baz" + ] `shouldParseTo` DocCodeBlock "foo\nbar\nbaz" + + it "ignores leading whitespace" $ do + unlines [ + " >foo" + , " \t >bar" + , " >baz" + ] + `shouldParseTo` DocCodeBlock "foo\nbar\nbaz" + + it "strips one leading space from each line of the block" $ do + unlines [ + "> foo" + , "> bar" + , "> baz" + ] `shouldParseTo` DocCodeBlock "foo\n bar\nbaz" + + it "ignores empty lines when stripping spaces" $ do + unlines [ + "> foo" + , ">" + , "> bar" + ] `shouldParseTo` DocCodeBlock "foo\n\nbar" + + context "when any non-empty line does not start with a space" $ do + it "does not strip any spaces" $ do + unlines [ + ">foo" + , "> bar" + ] `shouldParseTo` DocCodeBlock "foo\n bar" + + it "ignores nested markup" $ do + unlines [ + ">/foo/" + ] `shouldParseTo` DocCodeBlock "/foo/" + + it "treats them as regular text inside text paragraphs" $ do + unlines [ + "foo" + , ">bar" + ] `shouldParseTo` DocParagraph "foo\n>bar" + + context "when parsing code blocks" $ do + it "accepts a simple code block" $ do + unlines [ + "@" + , "foo" + , "bar" + , "baz" + , "@" + ] `shouldParseTo` DocCodeBlock "foo\nbar\nbaz\n" + + it "ignores trailing whitespace after the opening @" $ do + unlines [ + "@ " + , "foo" + , "@" + ] `shouldParseTo` DocCodeBlock "foo\n" + + it "rejects code blocks that are not closed" $ do + unlines [ + "@" + , "foo" + ] `shouldParseTo` DocParagraph "@\nfoo" + + it "accepts nested markup" $ do + unlines [ + "@" + , "/foo/" + , "@" + ] `shouldParseTo` DocCodeBlock (DocEmphasis "foo" <> "\n") + + it "allows to escape the @" $ do + unlines [ + "@" + , "foo" + , "\\@" + , "bar" + , "@" + ] `shouldParseTo` DocCodeBlock "foo\n@\nbar\n" + + it "accepts horizontal space before the @" $ do + unlines [ " @" + , "foo" + , "" + , "bar" + , "@" + ] `shouldParseTo` DocCodeBlock "foo\n\nbar\n" + + it "strips a leading space from a @ block if present" $ do + unlines [ " @" + , " hello" + , " world" + , " @" + ] `shouldParseTo` DocCodeBlock "hello\nworld\n" + + unlines [ " @" + , " hello" + , "" + , " world" + , " @" + ] `shouldParseTo` DocCodeBlock "hello\n\nworld\n" + + it "only drops whitespace if there's some before closing @" $ do + unlines [ "@" + , " Formatting" + , " matters." + , "@" + ] + `shouldParseTo` DocCodeBlock " Formatting\n matters.\n" + + it "accepts unicode" $ do + "@foo 灼眼のシャナ bar@" `shouldParseTo` DocCodeBlock "foo 灼眼のシャナ bar" + + it "requires the closing @" $ do + "@foo /bar/ baz" + `shouldParseTo` DocParagraph ("@foo " <> DocEmphasis "bar" <> " baz") + + + context "when parsing examples" $ do + it "parses a simple example" $ do + ">>> foo" `shouldParseTo` DocExamples [Example "foo" []] + + it "parses an example with result" $ do + unlines [ + ">>> foo" + , "bar" + , "baz" + ] `shouldParseTo` DocExamples [Example "foo" ["bar", "baz"]] + + it "parses consecutive examples" $ do + unlines [ + ">>> fib 5" + , "5" + , ">>> fib 10" + , "55" + ] `shouldParseTo` DocExamples [ + Example "fib 5" ["5"] + , Example "fib 10" ["55"] + ] + + it ("requires an example to be separated" + ++ " from a previous paragraph by an empty line") $ do + "foobar\n\n>>> fib 10\n55" `shouldParseTo` + DocParagraph "foobar" + <> DocExamples [Example "fib 10" ["55"]] + + it "parses bird-tracks inside of paragraphs as plain strings" $ do + let xs = "foo\n>>> bar" + xs `shouldParseTo` DocParagraph (DocString xs) + + it "skips empty lines in front of an example" $ do + "\n \n\n>>> foo" `shouldParseTo` DocExamples [Example "foo" []] + + it "terminates example on empty line" $ do + unlines [ + ">>> foo" + , "bar" + , " " + , "baz" + ] + `shouldParseTo` + DocExamples [Example "foo" ["bar"]] <> DocParagraph "baz" + + it "parses a result as an empty result" $ do + unlines [ + ">>> foo" + , "bar" + , "" + , "baz" + ] + `shouldParseTo` DocExamples [Example "foo" ["bar", "", "baz"]] + + it "accepts unicode in examples" $ do + ">>> 灼眼\nシャナ" `shouldParseTo` DocExamples [Example "灼眼" ["シャナ"]] + + context "when prompt is prefixed by whitespace" $ do + it "strips the exact same amount of whitespace from result lines" $ do + unlines [ + " >>> foo" + , " bar" + , " baz" + ] `shouldParseTo` DocExamples [Example "foo" ["bar", "baz"]] + + it "preserves additional whitespace" $ do + unlines [ + " >>> foo" + , " bar" + ] `shouldParseTo` DocExamples [Example "foo" [" bar"]] + + it "keeps original if stripping is not possible" $ do + unlines [ + " >>> foo" + , " bar" + ] `shouldParseTo` DocExamples [Example "foo" [" bar"]] + + + context "when parsing paragraphs nested in lists" $ do + it "can nest the same type of list" $ do + "* foo\n\n * bar" `shouldParseTo` + DocUnorderedList [ DocParagraph $ "foo" + <> DocUnorderedList [DocParagraph "bar"]] + + it "can nest another type of list inside" $ do + "* foo\n\n 1. bar" `shouldParseTo` + DocUnorderedList [ DocParagraph $ "foo" + <> DocOrderedList [DocParagraph "bar"]] + + it "can nest a code block inside" $ do + "* foo\n\n @foo bar baz@" `shouldParseTo` + DocUnorderedList [ DocParagraph $ "foo" + <> DocCodeBlock "foo bar baz"] + + "* foo\n\n @\n foo bar baz\n @" `shouldParseTo` + DocUnorderedList [ DocParagraph $ "foo" + <> DocCodeBlock "foo bar baz\n"] + + it "can nest more than one level" $ do + "* foo\n\n * bar\n\n * baz\n qux" `shouldParseTo` + DocUnorderedList [ DocParagraph $ "foo" + <> DocUnorderedList [ DocParagraph $ "bar" + <> DocUnorderedList [DocParagraph "baz\nqux"] + ] + ] + + it "won't fail on not fully indented paragraph" $ do + "* foo\n\n * bar\n\n * qux\nquux" `shouldParseTo` + DocUnorderedList [ DocParagraph $ "foo" + <> DocUnorderedList [ DocParagraph "bar" ] + , DocParagraph "qux\nquux"] + + + it "can nest definition lists" $ do + "[a] foo\n\n [b] bar\n\n [c] baz\n qux" `shouldParseTo` + DocDefList [ ("a", "foo" + <> DocDefList [ ("b", "bar" + <> DocDefList [("c", "baz\nqux")]) + ]) + ] + + it "can come back to top level with a different list" $ do + "* foo\n\n * bar\n\n1. baz" `shouldParseTo` + DocUnorderedList [ DocParagraph $ "foo" + <> DocUnorderedList [ DocParagraph "bar" ] + ] + <> DocOrderedList [ DocParagraph "baz" ] + + it "definition lists can come back to top level with a different list" $ do + "[foo] foov\n\n [bar] barv\n\n1. baz" `shouldParseTo` + DocDefList [ ("foo", "foov" + <> DocDefList [ ("bar", "barv") ]) + ] + <> DocOrderedList [ DocParagraph "baz" ] + + context "when parsing properties" $ do + it "can parse a single property" $ do + "prop> 23 == 23" `shouldParseTo` DocProperty "23 == 23" + + it "can parse multiple subsequent properties" $ do + unlines [ + "prop> 23 == 23" + , "prop> 42 == 42" + ] + `shouldParseTo` + DocProperty "23 == 23" <> DocProperty "42 == 42" + + it "accepts unicode in properties" $ do + "prop> 灼眼のシャナ ≡ 愛" `shouldParseTo` + DocProperty "灼眼のシャナ ≡ 愛" + + it "can deal with whitespace before and after the prop> prompt" $ do + " prop> xs == (reverse $ reverse xs) " `shouldParseTo` + DocProperty "xs == (reverse $ reverse xs)" + + context "when parsing unordered lists" $ do + it "parses a simple list" $ do + unlines [ + " * one" + , " * two" + , " * three" + ] + `shouldParseTo` DocUnorderedList [ + DocParagraph "one" + , DocParagraph "two" + , DocParagraph "three" + ] + + it "ignores empty lines between list items" $ do + unlines [ + "* one" + , "" + , "* two" + ] + `shouldParseTo` DocUnorderedList [ + DocParagraph "one" + , DocParagraph "two" + ] + + it "accepts an empty list item" $ do + "*" `shouldParseTo` DocUnorderedList [DocParagraph DocEmpty] + + it "accepts multi-line list items" $ do + unlines [ + "* point one" + , " more one" + , "* point two" + , "more two" + ] + `shouldParseTo` DocUnorderedList [ + DocParagraph "point one\n more one" + , DocParagraph "point two\nmore two" + ] + + it "accepts markup in list items" $ do + "* /foo/" `shouldParseTo` DocUnorderedList [DocParagraph (DocEmphasis "foo")] + + it "requires empty lines between list and other paragraphs" $ do + unlines [ + "foo" + , "" + , "* bar" + , "" + , "baz" + ] + `shouldParseTo` DocParagraph "foo" <> DocUnorderedList [DocParagraph "bar"] <> DocParagraph "baz" + + context "when parsing ordered lists" $ do + it "parses a simple list" $ do + unlines [ + " 1. one" + , " (1) two" + , " 3. three" + ] + `shouldParseTo` DocOrderedList [ + DocParagraph "one" + , DocParagraph "two" + , DocParagraph "three" + ] + + it "ignores empty lines between list items" $ do + unlines [ + "1. one" + , "" + , "2. two" + ] + `shouldParseTo` DocOrderedList [ + DocParagraph "one" + , DocParagraph "two" + ] + + it "accepts an empty list item" $ do + "1." `shouldParseTo` DocOrderedList [DocParagraph DocEmpty] + + it "accepts multi-line list items" $ do + unlines [ + "1. point one" + , " more one" + , "1. point two" + , "more two" + ] + `shouldParseTo` DocOrderedList [ + DocParagraph "point one\n more one" + , DocParagraph "point two\nmore two" + ] + + it "accepts markup in list items" $ do + "1. /foo/" `shouldParseTo` DocOrderedList [DocParagraph (DocEmphasis "foo")] + + it "requires empty lines between list and other paragraphs" $ do + unlines [ + "foo" + , "" + , "1. bar" + , "" + , "baz" + ] + `shouldParseTo` DocParagraph "foo" <> DocOrderedList [DocParagraph "bar"] <> DocParagraph "baz" + + context "when parsing definition lists" $ do + it "parses a simple list" $ do + unlines [ + " [foo] one" + , " [bar] two" + , " [baz] three" + ] + `shouldParseTo` DocDefList [ + ("foo", "one") + , ("bar", "two") + , ("baz", "three") + ] + + it "ignores empty lines between list items" $ do + unlines [ + "[foo] one" + , "" + , "[bar] two" + ] + `shouldParseTo` DocDefList [ + ("foo", "one") + , ("bar", "two") + ] + + it "accepts an empty list item" $ do + "[foo]" `shouldParseTo` DocDefList [("foo", DocEmpty)] + + it "accepts multi-line list items" $ do + unlines [ + "[foo] point one" + , " more one" + , "[bar] point two" + , "more two" + ] + `shouldParseTo` DocDefList [ + ("foo", "point one\n more one") + , ("bar", "point two\nmore two") + ] + + it "accepts markup in list items" $ do + "[foo] /foo/" `shouldParseTo` DocDefList [("foo", DocEmphasis "foo")] + + it "accepts markup for the label" $ do + "[/foo/] bar" `shouldParseTo` DocDefList [(DocEmphasis "foo", "bar")] + + it "requires empty lines between list and other paragraphs" $ do + unlines [ + "foo" + , "" + , "[foo] bar" + , "" + , "baz" + ] + `shouldParseTo` DocParagraph "foo" <> DocDefList [("foo", "bar")] <> DocParagraph "baz" + + context "when parsing consecutive paragraphs" $ do + it "will not capture irrelevant consecutive lists" $ do + unlines [ " * bullet" + , "" + , "" + , " - different bullet" + , "" + , "" + , " (1) ordered" + , " " + , " 2. different bullet" + , " " + , " [cat] kitten" + , " " + , " [pineapple] fruit" + ] `shouldParseTo` + DocUnorderedList [ DocParagraph "bullet" + , DocParagraph "different bullet"] + <> DocOrderedList [ DocParagraph "ordered" + , DocParagraph "different bullet" + ] + <> DocDefList [ ("cat", "kitten") + , ("pineapple", "fruit") + ] + + context "when parsing function documentation headers" $ do + it "can parse a simple header" $ do + "= Header 1\nHello." `shouldParseTo` + DocParagraph (DocHeader (Header 1 "Header 1")) + <> DocParagraph "Hello." + + it "allow consecutive headers" $ do + "= Header 1\n== Header 2" `shouldParseTo` + DocParagraph (DocHeader (Header 1 "Header 1")) + <> DocParagraph (DocHeader (Header 2 "Header 2")) + + it "accepts markup in the header" $ do + "= /Header/ __1__\nFoo" `shouldParseTo` + DocParagraph (DocHeader + (Header 1 (DocEmphasis "Header" <> " " <> DocBold "1"))) + <> DocParagraph "Foo" diff --git a/haddock-library/test/Documentation/Haddock/Utf8Spec.hs b/haddock-library/test/Documentation/Haddock/Utf8Spec.hs new file mode 100644 index 00000000..47e12704 --- /dev/null +++ b/haddock-library/test/Documentation/Haddock/Utf8Spec.hs @@ -0,0 +1,14 @@ +module Documentation.Haddock.Utf8Spec (main, spec) where + +import Test.Hspec +import Test.QuickCheck +import Documentation.Haddock.Utf8 + +main :: IO () +main = hspec spec + +spec :: Spec +spec = do + describe "decodeUtf8" $ do + it "is inverse to encodeUtf8" $ do + property $ \xs -> (decodeUtf8 . encodeUtf8) xs `shouldBe` xs diff --git a/haddock-library/test/Spec.hs b/haddock-library/test/Spec.hs new file mode 100644 index 00000000..a824f8c3 --- /dev/null +++ b/haddock-library/test/Spec.hs @@ -0,0 +1 @@ +{-# OPTIONS_GHC -F -pgmF hspec-discover #-} diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs new file mode 100644 index 00000000..41b4ed30 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs @@ -0,0 +1,18 @@ +-- | +-- Module : Data.Attoparsec +-- Copyright : Bryan O'Sullivan 2007-2011 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Simple, efficient combinator parsing for 'ByteString' strings, +-- loosely based on the Parsec library. + +module Data.Attoparsec + ( + module Data.Attoparsec.ByteString + ) where + +import Data.Attoparsec.ByteString diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs new file mode 100644 index 00000000..d2f3761c --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs @@ -0,0 +1,205 @@ +-- | +-- Module : Data.Attoparsec.ByteString +-- Copyright : Bryan O'Sullivan 2007-2011 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Simple, efficient combinator parsing for 'B.ByteString' strings, +-- loosely based on the Parsec library. + +module Data.Attoparsec.ByteString + ( + -- * Differences from Parsec + -- $parsec + + -- * Incremental input + -- $incremental + + -- * Performance considerations + -- $performance + + -- * Parser types + I.Parser + , Result + , T.IResult(..) + , I.compareResults + + -- * Running parsers + , parse + , feed + , I.parseOnly + , parseWith + , parseTest + + -- ** Result conversion + , maybeResult + , eitherResult + + -- * Combinators + , (I.) + , I.try + , module Data.Attoparsec.Combinator + + -- * Parsing individual bytes + , I.word8 + , I.anyWord8 + , I.notWord8 + , I.peekWord8 + , I.satisfy + , I.satisfyWith + , I.skip + + -- ** Byte classes + , I.inClass + , I.notInClass + + -- * Efficient string handling + , I.string + , I.skipWhile + , I.take + , I.scan + , I.takeWhile + , I.takeWhile1 + , I.takeTill + + -- ** Consume all remaining input + , I.takeByteString + , I.takeLazyByteString + + -- * State observation and manipulation functions + , I.endOfInput + , I.atEnd + ) where + +import Data.Attoparsec.Combinator +import qualified Data.Attoparsec.ByteString.Internal as I +import qualified Data.Attoparsec.Internal as I +import qualified Data.ByteString as B +import Data.Attoparsec.ByteString.Internal (Result, parse) +import qualified Data.Attoparsec.Internal.Types as T + +-- $parsec +-- +-- Compared to Parsec 3, Attoparsec makes several tradeoffs. It is +-- not intended for, or ideal for, all possible uses. +-- +-- * While Attoparsec can consume input incrementally, Parsec cannot. +-- Incremental input is a huge deal for efficient and secure network +-- and system programming, since it gives much more control to users +-- of the library over matters such as resource usage and the I/O +-- model to use. +-- +-- * Much of the performance advantage of Attoparsec is gained via +-- high-performance parsers such as 'I.takeWhile' and 'I.string'. +-- If you use complicated combinators that return lists of bytes or +-- characters, there is less performance difference between the two +-- libraries. +-- +-- * Unlike Parsec 3, Attoparsec does not support being used as a +-- monad transformer. +-- +-- * Attoparsec is specialised to deal only with strict 'B.ByteString' +-- input. Efficiency concerns rule out both lists and lazy +-- bytestrings. The usual use for lazy bytestrings would be to +-- allow consumption of very large input without a large footprint. +-- For this need, Attoparsec's incremental input provides an +-- excellent substitute, with much more control over when input +-- takes place. If you must use lazy bytestrings, see the 'Lazy' +-- module, which feeds lazy chunks to a regular parser. +-- +-- * Parsec parsers can produce more helpful error messages than +-- Attoparsec parsers. This is a matter of focus: Attoparsec avoids +-- the extra book-keeping in favour of higher performance. + +-- $incremental +-- +-- Attoparsec supports incremental input, meaning that you can feed it +-- a bytestring that represents only part of the expected total amount +-- of data to parse. If your parser reaches the end of a fragment of +-- input and could consume more input, it will suspend parsing and +-- return a 'T.Partial' continuation. +-- +-- Supplying the 'T.Partial' continuation with another bytestring will +-- resume parsing at the point where it was suspended. You must be +-- prepared for the result of the resumed parse to be another +-- 'T.Partial' continuation. +-- +-- To indicate that you have no more input, supply the 'T.Partial' +-- continuation with an empty bytestring. +-- +-- Remember that some parsing combinators will not return a result +-- until they reach the end of input. They may thus cause 'T.Partial' +-- results to be returned. +-- +-- If you do not need support for incremental input, consider using +-- the 'I.parseOnly' function to run your parser. It will never +-- prompt for more input. + +-- $performance +-- +-- If you write an Attoparsec-based parser carefully, it can be +-- realistic to expect it to perform within a factor of 2 of a +-- hand-rolled C parser (measuring megabytes parsed per second). +-- +-- To actually achieve high performance, there are a few guidelines +-- that it is useful to follow. +-- +-- Use the 'B.ByteString'-oriented parsers whenever possible, +-- e.g. 'I.takeWhile1' instead of 'many1' 'I.anyWord8'. There is +-- about a factor of 100 difference in performance between the two +-- kinds of parser. +-- +-- For very simple byte-testing predicates, write them by hand instead +-- of using 'I.inClass' or 'I.notInClass'. For instance, both of +-- these predicates test for an end-of-line byte, but the first is +-- much faster than the second: +-- +-- >endOfLine_fast w = w == 13 || w == 10 +-- >endOfLine_slow = inClass "\r\n" +-- +-- Make active use of benchmarking and profiling tools to measure, +-- find the problems with, and improve the performance of your parser. + +-- | If a parser has returned a 'T.Partial' result, supply it with more +-- input. +feed :: Result r -> B.ByteString -> Result r +feed f@(T.Fail _ _ _) _ = f +feed (T.Partial k) d = k d +feed (T.Done bs r) d = T.Done (B.append bs d) r +{-# INLINE feed #-} + +-- | Run a parser and print its result to standard output. +parseTest :: (Show a) => I.Parser a -> B.ByteString -> IO () +parseTest p s = print (parse p s) + +-- | Run a parser with an initial input string, and a monadic action +-- that can supply more input if needed. +parseWith :: Monad m => + (m B.ByteString) + -- ^ An action that will be executed to provide the parser + -- with more input, if necessary. The action must return an + -- 'B.empty' string when there is no more input available. + -> I.Parser a + -> B.ByteString + -- ^ Initial input for the parser. + -> m (Result a) +parseWith refill p s = step $ parse p s + where step (T.Partial k) = (step . k) =<< refill + step r = return r +{-# INLINE parseWith #-} + +-- | Convert a 'Result' value to a 'Maybe' value. A 'T.Partial' result +-- is treated as failure. +maybeResult :: Result r -> Maybe r +maybeResult (T.Done _ r) = Just r +maybeResult _ = Nothing + +-- | Convert a 'Result' value to an 'Either' value. A 'T.Partial' +-- result is treated as failure. +eitherResult :: Result r -> Either String r +eitherResult (T.Done _ r) = Right r +eitherResult (T.Fail _ _ msg) = Left msg +eitherResult _ = Left "Result: incomplete input" diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs new file mode 100644 index 00000000..3bbe51f0 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs @@ -0,0 +1,549 @@ +{-# LANGUAGE BangPatterns, FlexibleInstances, TypeFamilies, + TypeSynonymInstances, GADTs #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +-- | +-- Module : Data.Attoparsec.ByteString.Char8 +-- Copyright : Bryan O'Sullivan 2007-2011 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Simple, efficient, character-oriented combinator parsing for +-- 'B.ByteString' strings, loosely based on the Parsec library. + +module Data.Attoparsec.ByteString.Char8 + ( + -- * Character encodings + -- $encodings + + -- * Parser types + Parser + , A.Result + , A.IResult(..) + , I.compareResults + + -- * Running parsers + , A.parse + , A.feed + , A.parseOnly + , A.parseTest + , A.parseWith + + -- ** Result conversion + , A.maybeResult + , A.eitherResult + + -- * Combinators + , (I.) + , I.try + , module Data.Attoparsec.Combinator + + -- * Parsing individual characters + , char + , char8 + , anyChar + , notChar + , peekChar + , satisfy + + -- ** Special character parsers + , digit + , letter_iso8859_15 + , letter_ascii + , space + + -- ** Fast predicates + , isDigit + , isDigit_w8 + , isAlpha_iso8859_15 + , isAlpha_ascii + , isSpace + , isSpace_w8 + + -- *** Character classes + , inClass + , notInClass + + -- * Efficient string handling + , I.string + , stringCI + , skipSpace + , skipWhile + , I.take + , scan + , takeWhile + , takeWhile1 + , takeTill + + -- ** String combinators + -- $specalt + , (.*>) + , (<*.) + + -- ** Consume all remaining input + , I.takeByteString + , I.takeLazyByteString + + -- * Text parsing + , I.endOfLine + , isEndOfLine + , isHorizontalSpace + + -- * Numeric parsers + , decimal + , hexadecimal + , signed + , double + , Number(..) + , number + , rational + + -- * State observation and manipulation functions + , I.endOfInput + , I.atEnd + ) where + +import Control.Applicative ((*>), (<*), (<$>), (<|>)) +import Data.Attoparsec.ByteString.FastSet (charClass, memberChar) +import Data.Attoparsec.ByteString.Internal (Parser, ()) +import Data.Attoparsec.Combinator +import Data.Attoparsec.Number (Number(..)) +import Data.Bits (Bits, (.|.), shiftL) +import Data.ByteString.Internal (c2w, w2c) +import Data.Int (Int8, Int16, Int32, Int64) +import Data.Ratio ((%)) +import Data.String (IsString(..)) +import Data.Word (Word8, Word16, Word32, Word64, Word) +import Prelude hiding (takeWhile) +import qualified Data.Attoparsec.ByteString as A +import qualified Data.Attoparsec.ByteString.Internal as I +import qualified Data.Attoparsec.Internal as I +import qualified Data.ByteString as B8 +import qualified Data.ByteString.Char8 as B + +instance (a ~ B.ByteString) => IsString (Parser a) where + fromString = I.string . B.pack + +-- $encodings +-- +-- This module is intended for parsing text that is +-- represented using an 8-bit character set, e.g. ASCII or +-- ISO-8859-15. It /does not/ make any attempt to deal with character +-- encodings, multibyte characters, or wide characters. In +-- particular, all attempts to use characters above code point U+00FF +-- will give wrong answers. +-- +-- Code points below U+0100 are simply translated to and from their +-- numeric values, so e.g. the code point U+00A4 becomes the byte +-- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic +-- currency sign in ISO-8859-1). Haskell 'Char' values above U+00FF +-- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@. + +-- ASCII-specific but fast, oh yes. +toLower :: Word8 -> Word8 +toLower w | w >= 65 && w <= 90 = w + 32 + | otherwise = w + +-- | Satisfy a literal string, ignoring case. +stringCI :: B.ByteString -> Parser B.ByteString +stringCI = I.stringTransform (B8.map toLower) +{-# INLINE stringCI #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser requires the predicate to succeed on at least one byte +-- of input: it will fail if the predicate never returns 'True' or if +-- there is no input left. +takeWhile1 :: (Char -> Bool) -> Parser B.ByteString +takeWhile1 p = I.takeWhile1 (p . w2c) +{-# INLINE takeWhile1 #-} + +-- | The parser @satisfy p@ succeeds for any byte for which the +-- predicate @p@ returns 'True'. Returns the byte that is actually +-- parsed. +-- +-- >digit = satisfy isDigit +-- > where isDigit c = c >= '0' && c <= '9' +satisfy :: (Char -> Bool) -> Parser Char +satisfy = I.satisfyWith w2c +{-# INLINE satisfy #-} + +-- | Match a letter, in the ISO-8859-15 encoding. +letter_iso8859_15 :: Parser Char +letter_iso8859_15 = satisfy isAlpha_iso8859_15 "letter_iso8859_15" +{-# INLINE letter_iso8859_15 #-} + +-- | Match a letter, in the ASCII encoding. +letter_ascii :: Parser Char +letter_ascii = satisfy isAlpha_ascii "letter_ascii" +{-# INLINE letter_ascii #-} + +-- | A fast alphabetic predicate for the ISO-8859-15 encoding +-- +-- /Note/: For all character encodings other than ISO-8859-15, and +-- almost all Unicode code points above U+00A3, this predicate gives +-- /wrong answers/. +isAlpha_iso8859_15 :: Char -> Bool +isAlpha_iso8859_15 c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '\166' && moby c) + where moby = notInClass "\167\169\171-\179\182\183\185\187\191\215\247" + {-# NOINLINE moby #-} +{-# INLINE isAlpha_iso8859_15 #-} + +-- | A fast alphabetic predicate for the ASCII encoding +-- +-- /Note/: For all character encodings other than ASCII, and +-- almost all Unicode code points above U+007F, this predicate gives +-- /wrong answers/. +isAlpha_ascii :: Char -> Bool +isAlpha_ascii c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') +{-# INLINE isAlpha_ascii #-} + +-- | Parse a single digit. +digit :: Parser Char +digit = satisfy isDigit "digit" +{-# INLINE digit #-} + +-- | A fast digit predicate. +isDigit :: Char -> Bool +isDigit c = c >= '0' && c <= '9' +{-# INLINE isDigit #-} + +-- | A fast digit predicate. +isDigit_w8 :: Word8 -> Bool +isDigit_w8 w = w >= 48 && w <= 57 +{-# INLINE isDigit_w8 #-} + +-- | Match any character. +anyChar :: Parser Char +anyChar = satisfy $ const True +{-# INLINE anyChar #-} + +-- | Match any character. Returns 'Nothing' if end of input has been +-- reached. Does not consume any input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +peekChar :: Parser (Maybe Char) +peekChar = (fmap w2c) `fmap` I.peekWord8 +{-# INLINE peekChar #-} + +-- | Fast predicate for matching ASCII space characters. +-- +-- /Note/: This predicate only gives correct answers for the ASCII +-- encoding. For instance, it does not recognise U+00A0 (non-breaking +-- space) as a space character, even though it is a valid ISO-8859-15 +-- byte. For a Unicode-aware and only slightly slower predicate, +-- use 'Data.Char.isSpace' +isSpace :: Char -> Bool +isSpace c = (c == ' ') || ('\t' <= c && c <= '\r') +{-# INLINE isSpace #-} + +-- | Fast 'Word8' predicate for matching ASCII space characters. +isSpace_w8 :: Word8 -> Bool +isSpace_w8 w = (w == 32) || (9 <= w && w <= 13) +{-# INLINE isSpace_w8 #-} + + +-- | Parse a space character. +-- +-- /Note/: This parser only gives correct answers for the ASCII +-- encoding. For instance, it does not recognise U+00A0 (non-breaking +-- space) as a space character, even though it is a valid ISO-8859-15 +-- byte. +space :: Parser Char +space = satisfy isSpace "space" +{-# INLINE space #-} + +-- | Match a specific character. +char :: Char -> Parser Char +char c = satisfy (== c) [c] +{-# INLINE char #-} + +-- | Match a specific character, but return its 'Word8' value. +char8 :: Char -> Parser Word8 +char8 c = I.satisfy (== c2w c) [c] +{-# INLINE char8 #-} + +-- | Match any character except the given one. +notChar :: Char -> Parser Char +notChar c = satisfy (/= c) "not " ++ [c] +{-# INLINE notChar #-} + +-- | Match any character in a set. +-- +-- >vowel = inClass "aeiou" +-- +-- Range notation is supported. +-- +-- >halfAlphabet = inClass "a-nA-N" +-- +-- To add a literal \'-\' to a set, place it at the beginning or end +-- of the string. +inClass :: String -> Char -> Bool +inClass s = (`memberChar` mySet) + where mySet = charClass s +{-# INLINE inClass #-} + +-- | Match any character not in a set. +notInClass :: String -> Char -> Bool +notInClass s = not . inClass s +{-# INLINE notInClass #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser does not fail. It will return an empty string if the +-- predicate returns 'False' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +takeWhile :: (Char -> Bool) -> Parser B.ByteString +takeWhile p = I.takeWhile (p . w2c) +{-# INLINE takeWhile #-} + +-- | A stateful scanner. The predicate consumes and transforms a +-- state argument, and each transformed state is passed to successive +-- invocations of the predicate on each byte of the input until one +-- returns 'Nothing' or the input ends. +-- +-- This parser does not fail. It will return an empty string if the +-- predicate returns 'Nothing' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +scan :: s -> (s -> Char -> Maybe s) -> Parser B.ByteString +scan s0 p = I.scan s0 (\s -> p s . w2c) +{-# INLINE scan #-} + +-- | Consume input as long as the predicate returns 'False' +-- (i.e. until it returns 'True'), and return the consumed input. +-- +-- This parser does not fail. It will return an empty string if the +-- predicate returns 'True' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +takeTill :: (Char -> Bool) -> Parser B.ByteString +takeTill p = I.takeTill (p . w2c) +{-# INLINE takeTill #-} + +-- | Skip past input for as long as the predicate returns 'True'. +skipWhile :: (Char -> Bool) -> Parser () +skipWhile p = I.skipWhile (p . w2c) +{-# INLINE skipWhile #-} + +-- | Skip over white space. +skipSpace :: Parser () +skipSpace = I.skipWhile isSpace_w8 +{-# INLINE skipSpace #-} + +-- $specalt +-- +-- The '.*>' and '<*.' combinators are intended for use with the +-- @OverloadedStrings@ language extension. They simplify the common +-- task of matching a statically known string, then immediately +-- parsing something else. +-- +-- An example makes this easier to understand: +-- +-- @{-\# LANGUAGE OverloadedStrings #-} +-- +-- shoeSize = \"Shoe size: \" '.*>' 'decimal' +-- @ +-- +-- If we were to try to use '*>' above instead, the type checker would +-- not be able to tell which 'IsString' instance to use for the text +-- in quotes. We would have to be explicit, using either a type +-- signature or the 'I.string' parser. + +-- | Type-specialized version of '*>' for 'B.ByteString'. +(.*>) :: B.ByteString -> Parser a -> Parser a +s .*> f = I.string s *> f + +-- | Type-specialized version of '<*' for 'B.ByteString'. +(<*.) :: Parser a -> B.ByteString -> Parser a +f <*. s = f <* I.string s + +-- | A predicate that matches either a carriage return @\'\\r\'@ or +-- newline @\'\\n\'@ character. +isEndOfLine :: Word8 -> Bool +isEndOfLine w = w == 13 || w == 10 +{-# INLINE isEndOfLine #-} + +-- | A predicate that matches either a space @\' \'@ or horizontal tab +-- @\'\\t\'@ character. +isHorizontalSpace :: Word8 -> Bool +isHorizontalSpace w = w == 32 || w == 9 +{-# INLINE isHorizontalSpace #-} + +-- | Parse and decode an unsigned hexadecimal number. The hex digits +-- @\'a\'@ through @\'f\'@ may be upper or lower case. +-- +-- This parser does not accept a leading @\"0x\"@ string. +hexadecimal :: (Integral a, Bits a) => Parser a +hexadecimal = B8.foldl' step 0 `fmap` I.takeWhile1 isHexDigit + where + isHexDigit w = (w >= 48 && w <= 57) || + (w >= 97 && w <= 102) || + (w >= 65 && w <= 70) + step a w | w >= 48 && w <= 57 = (a `shiftL` 4) .|. fromIntegral (w - 48) + | w >= 97 = (a `shiftL` 4) .|. fromIntegral (w - 87) + | otherwise = (a `shiftL` 4) .|. fromIntegral (w - 55) +{-# SPECIALISE hexadecimal :: Parser Int #-} +{-# SPECIALISE hexadecimal :: Parser Int8 #-} +{-# SPECIALISE hexadecimal :: Parser Int16 #-} +{-# SPECIALISE hexadecimal :: Parser Int32 #-} +{-# SPECIALISE hexadecimal :: Parser Int64 #-} +{-# SPECIALISE hexadecimal :: Parser Integer #-} +{-# SPECIALISE hexadecimal :: Parser Word #-} +{-# SPECIALISE hexadecimal :: Parser Word8 #-} +{-# SPECIALISE hexadecimal :: Parser Word16 #-} +{-# SPECIALISE hexadecimal :: Parser Word32 #-} +{-# SPECIALISE hexadecimal :: Parser Word64 #-} + +-- | Parse and decode an unsigned decimal number. +decimal :: Integral a => Parser a +decimal = B8.foldl' step 0 `fmap` I.takeWhile1 isDig + where isDig w = w >= 48 && w <= 57 + step a w = a * 10 + fromIntegral (w - 48) +{-# SPECIALISE decimal :: Parser Int #-} +{-# SPECIALISE decimal :: Parser Int8 #-} +{-# SPECIALISE decimal :: Parser Int16 #-} +{-# SPECIALISE decimal :: Parser Int32 #-} +{-# SPECIALISE decimal :: Parser Int64 #-} +{-# SPECIALISE decimal :: Parser Integer #-} +{-# SPECIALISE decimal :: Parser Word #-} +{-# SPECIALISE decimal :: Parser Word8 #-} +{-# SPECIALISE decimal :: Parser Word16 #-} +{-# SPECIALISE decimal :: Parser Word32 #-} +{-# SPECIALISE decimal :: Parser Word64 #-} + +-- | Parse a number with an optional leading @\'+\'@ or @\'-\'@ sign +-- character. +signed :: Num a => Parser a -> Parser a +{-# SPECIALISE signed :: Parser Int -> Parser Int #-} +{-# SPECIALISE signed :: Parser Int8 -> Parser Int8 #-} +{-# SPECIALISE signed :: Parser Int16 -> Parser Int16 #-} +{-# SPECIALISE signed :: Parser Int32 -> Parser Int32 #-} +{-# SPECIALISE signed :: Parser Int64 -> Parser Int64 #-} +{-# SPECIALISE signed :: Parser Integer -> Parser Integer #-} +signed p = (negate <$> (char8 '-' *> p)) + <|> (char8 '+' *> p) + <|> p + +-- | Parse a rational number. +-- +-- This parser accepts an optional leading sign character, followed by +-- at least one decimal digit. The syntax similar to that accepted by +-- the 'read' function, with the exception that a trailing @\'.\'@ or +-- @\'e\'@ /not/ followed by a number is not consumed. +-- +-- Examples with behaviour identical to 'read', if you feed an empty +-- continuation to the first result: +-- +-- >rational "3" == Done 3.0 "" +-- >rational "3.1" == Done 3.1 "" +-- >rational "3e4" == Done 30000.0 "" +-- >rational "3.1e4" == Done 31000.0, "" +-- +-- Examples with behaviour identical to 'read': +-- +-- >rational ".3" == Fail "input does not start with a digit" +-- >rational "e3" == Fail "input does not start with a digit" +-- +-- Examples of differences from 'read': +-- +-- >rational "3.foo" == Done 3.0 ".foo" +-- >rational "3e" == Done 3.0 "e" +-- +-- This function does not accept string representations of \"NaN\" or +-- \"Infinity\". +rational :: Fractional a => Parser a +{-# SPECIALIZE rational :: Parser Double #-} +{-# SPECIALIZE rational :: Parser Float #-} +{-# SPECIALIZE rational :: Parser Rational #-} +rational = floaty $ \real frac fracDenom -> fromRational $ + real % 1 + frac % fracDenom + +-- | Parse a rational number. +-- +-- The syntax accepted by this parser is the same as for 'rational'. +-- +-- /Note/: This function is almost ten times faster than 'rational', +-- but is slightly less accurate. +-- +-- The 'Double' type supports about 16 decimal places of accuracy. +-- For 94.2% of numbers, this function and 'rational' give identical +-- results, but for the remaining 5.8%, this function loses precision +-- around the 15th decimal place. For 0.001% of numbers, this +-- function will lose precision at the 13th or 14th decimal place. +-- +-- This function does not accept string representations of \"NaN\" or +-- \"Infinity\". +double :: Parser Double +double = floaty asDouble + +asDouble :: Integer -> Integer -> Integer -> Double +asDouble real frac fracDenom = + fromIntegral real + fromIntegral frac / fromIntegral fracDenom +{-# INLINE asDouble #-} + +-- | Parse a number, attempting to preserve both speed and precision. +-- +-- The syntax accepted by this parser is the same as for 'rational'. +-- +-- /Note/: This function is almost ten times faster than 'rational'. +-- On integral inputs, it gives perfectly accurate answers, and on +-- floating point inputs, it is slightly less accurate than +-- 'rational'. +-- +-- This function does not accept string representations of \"NaN\" or +-- \"Infinity\". +number :: Parser Number +number = floaty $ \real frac fracDenom -> + if frac == 0 && fracDenom == 0 + then I real + else D (asDouble real frac fracDenom) +{-# INLINE number #-} + +data T = T !Integer !Int + +floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Parser a +{-# INLINE floaty #-} +floaty f = do + let minus = 45 + plus = 43 + !positive <- ((== plus) <$> I.satisfy (\c -> c == minus || c == plus)) <|> + return True + real <- decimal + let tryFraction = do + let dot = 46 + _ <- I.satisfy (==dot) + ds <- I.takeWhile isDigit_w8 + case I.parseOnly decimal ds of + Right n -> return $ T n (B.length ds) + _ -> fail "no digits after decimal" + T fraction fracDigits <- tryFraction <|> return (T 0 0) + let littleE = 101 + bigE = 69 + e w = w == littleE || w == bigE + power <- (I.satisfy e *> signed decimal) <|> return (0::Int) + let n = if fracDigits == 0 + then if power == 0 + then fromIntegral real + else fromIntegral real * (10 ^^ power) + else if power == 0 + then f real fraction (10 ^ fracDigits) + else f real fraction (10 ^ fracDigits) * (10 ^^ power) + return $ if positive + then n + else -n diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs new file mode 100644 index 00000000..73d02056 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs @@ -0,0 +1,115 @@ +{-# LANGUAGE BangPatterns, MagicHash #-} + +----------------------------------------------------------------------------- +-- | +-- Module : Data.Attoparsec.ByteString.FastSet +-- Copyright : Bryan O'Sullivan 2008 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Fast set membership tests for 'Word8' and 8-bit 'Char' values. The +-- set representation is unboxed for efficiency. For small sets, we +-- test for membership using a binary search. For larger sets, we use +-- a lookup table. +-- +----------------------------------------------------------------------------- +module Data.Attoparsec.ByteString.FastSet + ( + -- * Data type + FastSet + -- * Construction + , fromList + , set + -- * Lookup + , memberChar + , memberWord8 + -- * Debugging + , fromSet + -- * Handy interface + , charClass + ) where + +import Data.Bits ((.&.), (.|.)) +import Foreign.Storable (peekByteOff, pokeByteOff) +import GHC.Base (Int(I#), iShiftRA#, narrow8Word#, shiftL#) +import GHC.Word (Word8(W8#)) +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as B8 +import qualified Data.ByteString.Internal as I +import qualified Data.ByteString.Unsafe as U + +data FastSet = Sorted { fromSet :: !B.ByteString } + | Table { fromSet :: !B.ByteString } + deriving (Eq, Ord) + +instance Show FastSet where + show (Sorted s) = "FastSet Sorted " ++ show (B8.unpack s) + show (Table _) = "FastSet Table" + +-- | The lower bound on the size of a lookup table. We choose this to +-- balance table density against performance. +tableCutoff :: Int +tableCutoff = 8 + +-- | Create a set. +set :: B.ByteString -> FastSet +set s | B.length s < tableCutoff = Sorted . B.sort $ s + | otherwise = Table . mkTable $ s + +fromList :: [Word8] -> FastSet +fromList = set . B.pack + +data I = I {-# UNPACK #-} !Int {-# UNPACK #-} !Word8 + +shiftR :: Int -> Int -> Int +shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#) + +shiftL :: Word8 -> Int -> Word8 +shiftL (W8# x#) (I# i#) = W8# (narrow8Word# (x# `shiftL#` i#)) + +index :: Int -> I +index i = I (i `shiftR` 3) (1 `shiftL` (i .&. 7)) +{-# INLINE index #-} + +-- | Check the set for membership. +memberWord8 :: Word8 -> FastSet -> Bool +memberWord8 w (Table t) = + let I byte bit = index (fromIntegral w) + in U.unsafeIndex t byte .&. bit /= 0 +memberWord8 w (Sorted s) = search 0 (B.length s - 1) + where search lo hi + | hi < lo = False + | otherwise = + let mid = (lo + hi) `div` 2 + in case compare w (U.unsafeIndex s mid) of + GT -> search (mid + 1) hi + LT -> search lo (mid - 1) + _ -> True + +-- | Check the set for membership. Only works with 8-bit characters: +-- characters above code point 255 will give wrong answers. +memberChar :: Char -> FastSet -> Bool +memberChar c = memberWord8 (I.c2w c) +{-# INLINE memberChar #-} + +mkTable :: B.ByteString -> B.ByteString +mkTable s = I.unsafeCreate 32 $ \t -> do + _ <- I.memset t 0 32 + U.unsafeUseAsCStringLen s $ \(p, l) -> + let loop n | n == l = return () + | otherwise = do + c <- peekByteOff p n :: IO Word8 + let I byte bit = index (fromIntegral c) + prev <- peekByteOff t byte :: IO Word8 + pokeByteOff t byte (prev .|. bit) + loop (n + 1) + in loop 0 + +charClass :: String -> FastSet +charClass = set . B8.pack . go + where go (a:'-':b:xs) = [a..b] ++ go xs + go (x:xs) = x : go xs + go _ = "" diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs new file mode 100644 index 00000000..b3699728 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs @@ -0,0 +1,516 @@ +{-# LANGUAGE BangPatterns, CPP, Rank2Types, OverloadedStrings, + RecordWildCards, MagicHash, UnboxedTuples #-} +-- | +-- Module : Data.Attoparsec.ByteString.Internal +-- Copyright : Bryan O'Sullivan 2007-2011 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Simple, efficient parser combinators for 'B.ByteString' strings, +-- loosely based on the Parsec library. + +module Data.Attoparsec.ByteString.Internal + ( + -- * Parser types + Parser + , Result + + -- * Running parsers + , parse + , parseOnly + + -- * Combinators + , () + , try + , module Data.Attoparsec.Combinator + + -- * Parsing individual bytes + , satisfy + , satisfyWith + , anyWord8 + , skip + , word8 + , notWord8 + , peekWord8 + + -- ** Byte classes + , inClass + , notInClass + + -- * Parsing more complicated structures + , storable + + -- * Efficient string handling + , skipWhile + , string + , stringTransform + , take + , scan + , takeWhile + , takeWhile1 + , takeTill + + -- ** Consume all remaining input + , takeByteString + , takeLazyByteString + + -- * State observation and manipulation functions + , endOfInput + , atEnd + + -- * Utilities + , endOfLine + ) where + +import Control.Applicative ((<|>), (<$>)) +import Control.Monad (when) +import Data.Attoparsec.ByteString.FastSet (charClass, memberWord8) +import Data.Attoparsec.Combinator +import Data.Attoparsec.Internal.Types + hiding (Parser, Input, Added, Failure, Success) +import Data.Monoid (Monoid(..)) +import Data.Word (Word8) +import Foreign.ForeignPtr (withForeignPtr) +import Foreign.Ptr (castPtr, minusPtr, plusPtr) +import Foreign.Storable (Storable(peek, sizeOf)) +import Prelude hiding (getChar, take, takeWhile) +import qualified Data.Attoparsec.Internal.Types as T +import qualified Data.ByteString as B8 +import qualified Data.ByteString.Char8 as B +import qualified Data.ByteString.Internal as B +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString.Unsafe as B + +#if defined(__GLASGOW_HASKELL__) +import GHC.Base (realWorld#) +import GHC.IO (IO(IO)) +#else +import System.IO.Unsafe (unsafePerformIO) +#endif + +type Parser = T.Parser B.ByteString +type Result = IResult B.ByteString +type Input = T.Input B.ByteString +type Added = T.Added B.ByteString +type Failure r = T.Failure B.ByteString r +type Success a r = T.Success B.ByteString a r + +ensure' :: Int -> Input -> Added -> More -> Failure r -> Success B.ByteString r + -> IResult B.ByteString r +ensure' !n0 i0 a0 m0 kf0 ks0 = + T.runParser (demandInput >> go n0) i0 a0 m0 kf0 ks0 + where + go !n = T.Parser $ \i a m kf ks -> + if B.length (unI i) >= n + then ks i a m (unI i) + else T.runParser (demandInput >> go n) i a m kf ks + +-- | If at least @n@ bytes of input are available, return the current +-- input, otherwise fail. +ensure :: Int -> Parser B.ByteString +ensure !n = T.Parser $ \i0 a0 m0 kf ks -> + if B.length (unI i0) >= n + then ks i0 a0 m0 (unI i0) + -- The uncommon case is kept out-of-line to reduce code size: + else ensure' n i0 a0 m0 kf ks +-- Non-recursive so the bounds check can be inlined: +{-# INLINE ensure #-} + +-- | Ask for input. If we receive any, pass it to a success +-- continuation, otherwise to a failure continuation. +prompt :: Input -> Added -> More + -> (Input -> Added -> More -> Result r) + -> (Input -> Added -> More -> Result r) + -> Result r +prompt i0 a0 _m0 kf ks = Partial $ \s -> + if B.null s + then kf i0 a0 Complete + else ks (i0 <> I s) (a0 <> A s) Incomplete + +-- | Immediately demand more input via a 'Partial' continuation +-- result. +demandInput :: Parser () +demandInput = T.Parser $ \i0 a0 m0 kf ks -> + if m0 == Complete + then kf i0 a0 m0 ["demandInput"] "not enough bytes" + else let kf' i a m = kf i a m ["demandInput"] "not enough bytes" + ks' i a m = ks i a m () + in prompt i0 a0 m0 kf' ks' + +-- | This parser always succeeds. It returns 'True' if any input is +-- available either immediately or on demand, and 'False' if the end +-- of all input has been reached. +wantInput :: Parser Bool +wantInput = T.Parser $ \i0 a0 m0 _kf ks -> + case () of + _ | not (B.null (unI i0)) -> ks i0 a0 m0 True + | m0 == Complete -> ks i0 a0 m0 False + | otherwise -> let kf' i a m = ks i a m False + ks' i a m = ks i a m True + in prompt i0 a0 m0 kf' ks' + +get :: Parser B.ByteString +get = T.Parser $ \i0 a0 m0 _kf ks -> ks i0 a0 m0 (unI i0) + +put :: B.ByteString -> Parser () +put s = T.Parser $ \_i0 a0 m0 _kf ks -> ks (I s) a0 m0 () + +-- | Attempt a parse, and if it fails, rewind the input so that no +-- input appears to have been consumed. +-- +-- This combinator is provided for compatibility with Parsec. +-- Attoparsec parsers always backtrack on failure. +try :: Parser a -> Parser a +try p = p +{-# INLINE try #-} + +-- | The parser @satisfy p@ succeeds for any byte for which the +-- predicate @p@ returns 'True'. Returns the byte that is actually +-- parsed. +-- +-- >digit = satisfy isDigit +-- > where isDigit w = w >= 48 && w <= 57 +satisfy :: (Word8 -> Bool) -> Parser Word8 +satisfy p = do + s <- ensure 1 + let !w = B.unsafeHead s + if p w + then put (B.unsafeTail s) >> return w + else fail "satisfy" +{-# INLINE satisfy #-} + +-- | The parser @skip p@ succeeds for any byte for which the predicate +-- @p@ returns 'True'. +-- +-- >skipDigit = skip isDigit +-- > where isDigit w = w >= 48 && w <= 57 +skip :: (Word8 -> Bool) -> Parser () +skip p = do + s <- ensure 1 + if p (B.unsafeHead s) + then put (B.unsafeTail s) + else fail "skip" + +-- | The parser @satisfyWith f p@ transforms a byte, and succeeds if +-- the predicate @p@ returns 'True' on the transformed value. The +-- parser returns the transformed byte that was parsed. +satisfyWith :: (Word8 -> a) -> (a -> Bool) -> Parser a +satisfyWith f p = do + s <- ensure 1 + let c = f $! B.unsafeHead s + if p c + then let !t = B.unsafeTail s + in put t >> return c + else fail "satisfyWith" +{-# INLINE satisfyWith #-} + +storable :: Storable a => Parser a +storable = hack undefined + where + hack :: Storable b => b -> Parser b + hack dummy = do + (fp,o,_) <- B.toForeignPtr `fmap` take (sizeOf dummy) + return . B.inlinePerformIO . withForeignPtr fp $ \p -> + peek (castPtr $ p `plusPtr` o) + +-- | Consume @n@ bytes of input, but succeed only if the predicate +-- returns 'True'. +takeWith :: Int -> (B.ByteString -> Bool) -> Parser B.ByteString +takeWith n0 p = do + let n = max n0 0 + s <- ensure n + let h = B.unsafeTake n s + t = B.unsafeDrop n s + if p h + then put t >> return h + else fail "takeWith" + +-- | Consume exactly @n@ bytes of input. +take :: Int -> Parser B.ByteString +take n = takeWith n (const True) +{-# INLINE take #-} + +-- | @string s@ parses a sequence of bytes that identically match +-- @s@. Returns the parsed string (i.e. @s@). This parser consumes no +-- input if it fails (even if a partial match). +-- +-- /Note/: The behaviour of this parser is different to that of the +-- similarly-named parser in Parsec, as this one is all-or-nothing. +-- To illustrate the difference, the following parser will fail under +-- Parsec given an input of @\"for\"@: +-- +-- >string "foo" <|> string "for" +-- +-- The reason for its failure is that the first branch is a +-- partial match, and will consume the letters @\'f\'@ and @\'o\'@ +-- before failing. In Attoparsec, the above parser will /succeed/ on +-- that input, because the failed first branch will consume nothing. +string :: B.ByteString -> Parser B.ByteString +string s = takeWith (B.length s) (==s) +{-# INLINE string #-} + +stringTransform :: (B.ByteString -> B.ByteString) -> B.ByteString + -> Parser B.ByteString +stringTransform f s = takeWith (B.length s) ((==f s) . f) +{-# INLINE stringTransform #-} + +-- | Skip past input for as long as the predicate returns 'True'. +skipWhile :: (Word8 -> Bool) -> Parser () +skipWhile p = go + where + go = do + t <- B8.dropWhile p <$> get + put t + when (B.null t) $ do + input <- wantInput + when input go +{-# INLINE skipWhile #-} + +-- | Consume input as long as the predicate returns 'False' +-- (i.e. until it returns 'True'), and return the consumed input. +-- +-- This parser does not fail. It will return an empty string if the +-- predicate returns 'True' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +takeTill :: (Word8 -> Bool) -> Parser B.ByteString +takeTill p = takeWhile (not . p) +{-# INLINE takeTill #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser does not fail. It will return an empty string if the +-- predicate returns 'False' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +takeWhile :: (Word8 -> Bool) -> Parser B.ByteString +takeWhile p = (B.concat . reverse) `fmap` go [] + where + go acc = do + (h,t) <- B8.span p <$> get + put t + if B.null t + then do + input <- wantInput + if input + then go (h:acc) + else return (h:acc) + else return (h:acc) +{-# INLINE takeWhile #-} + +takeRest :: Parser [B.ByteString] +takeRest = go [] + where + go acc = do + input <- wantInput + if input + then do + s <- get + put B.empty + go (s:acc) + else return (reverse acc) + +-- | Consume all remaining input and return it as a single string. +takeByteString :: Parser B.ByteString +takeByteString = B.concat `fmap` takeRest + +-- | Consume all remaining input and return it as a single string. +takeLazyByteString :: Parser L.ByteString +takeLazyByteString = L.fromChunks `fmap` takeRest + +data T s = T {-# UNPACK #-} !Int s + +-- | A stateful scanner. The predicate consumes and transforms a +-- state argument, and each transformed state is passed to successive +-- invocations of the predicate on each byte of the input until one +-- returns 'Nothing' or the input ends. +-- +-- This parser does not fail. It will return an empty string if the +-- predicate returns 'Nothing' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +scan :: s -> (s -> Word8 -> Maybe s) -> Parser B.ByteString +scan s0 p = do + chunks <- go [] s0 + case chunks of + [x] -> return x + xs -> return $! B.concat $ reverse xs + where + go acc s1 = do + let scanner (B.PS fp off len) = + withForeignPtr fp $ \ptr0 -> do + let start = ptr0 `plusPtr` off + end = start `plusPtr` len + inner ptr !s + | ptr < end = do + w <- peek ptr + case p s w of + Just s' -> inner (ptr `plusPtr` 1) s' + _ -> done (ptr `minusPtr` start) s + | otherwise = done (ptr `minusPtr` start) s + done !i !s = return (T i s) + inner start s1 + bs <- get + let T i s' = inlinePerformIO $ scanner bs + !h = B.unsafeTake i bs + !t = B.unsafeDrop i bs + put t + if B.null t + then do + input <- wantInput + if input + then go (h:acc) s' + else return (h:acc) + else return (h:acc) +{-# INLINE scan #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser requires the predicate to succeed on at least one byte +-- of input: it will fail if the predicate never returns 'True' or if +-- there is no input left. +takeWhile1 :: (Word8 -> Bool) -> Parser B.ByteString +takeWhile1 p = do + (`when` demandInput) =<< B.null <$> get + (h,t) <- B8.span p <$> get + when (B.null h) $ fail "takeWhile1" + put t + if B.null t + then (h<>) `fmap` takeWhile p + else return h + +-- | Match any byte in a set. +-- +-- >vowel = inClass "aeiou" +-- +-- Range notation is supported. +-- +-- >halfAlphabet = inClass "a-nA-N" +-- +-- To add a literal @\'-\'@ to a set, place it at the beginning or end +-- of the string. +inClass :: String -> Word8 -> Bool +inClass s = (`memberWord8` mySet) + where mySet = charClass s + {-# NOINLINE mySet #-} +{-# INLINE inClass #-} + +-- | Match any byte not in a set. +notInClass :: String -> Word8 -> Bool +notInClass s = not . inClass s +{-# INLINE notInClass #-} + +-- | Match any byte. +anyWord8 :: Parser Word8 +anyWord8 = satisfy $ const True +{-# INLINE anyWord8 #-} + +-- | Match a specific byte. +word8 :: Word8 -> Parser Word8 +word8 c = satisfy (== c) show c +{-# INLINE word8 #-} + +-- | Match any byte except the given one. +notWord8 :: Word8 -> Parser Word8 +notWord8 c = satisfy (/= c) "not " ++ show c +{-# INLINE notWord8 #-} + +-- | Match any byte. Returns 'Nothing' if end of input has been +-- reached. Does not consume any input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +peekWord8 :: Parser (Maybe Word8) +peekWord8 = T.Parser $ \i0 a0 m0 _kf ks -> + if B.null (unI i0) + then if m0 == Complete + then ks i0 a0 m0 Nothing + else let ks' i a m = let !w = B.unsafeHead (unI i) + in ks i a m (Just w) + kf' i a m = ks i a m Nothing + in prompt i0 a0 m0 kf' ks' + else let !w = B.unsafeHead (unI i0) + in ks i0 a0 m0 (Just w) +{-# INLINE peekWord8 #-} + +-- | Match only if all input has been consumed. +endOfInput :: Parser () +endOfInput = T.Parser $ \i0 a0 m0 kf ks -> + if B.null (unI i0) + then if m0 == Complete + then ks i0 a0 m0 () + else let kf' i1 a1 m1 _ _ = addS i0 a0 m0 i1 a1 m1 $ + \ i2 a2 m2 -> ks i2 a2 m2 () + ks' i1 a1 m1 _ = addS i0 a0 m0 i1 a1 m1 $ + \ i2 a2 m2 -> kf i2 a2 m2 [] + "endOfInput" + in T.runParser demandInput i0 a0 m0 kf' ks' + else kf i0 a0 m0 [] "endOfInput" + +-- | Return an indication of whether the end of input has been +-- reached. +atEnd :: Parser Bool +atEnd = not <$> wantInput +{-# INLINE atEnd #-} + +-- | Match either a single newline character @\'\\n\'@, or a carriage +-- return followed by a newline character @\"\\r\\n\"@. +endOfLine :: Parser () +endOfLine = (word8 10 >> return ()) <|> (string "\r\n" >> return ()) + +-- | Name the parser, in case failure occurs. +() :: Parser a + -> String -- ^ the name to use if parsing fails + -> Parser a +p msg0 = T.Parser $ \i0 a0 m0 kf ks -> + let kf' i a m strs msg = kf i a m (msg0:strs) msg + in T.runParser p i0 a0 m0 kf' ks +{-# INLINE () #-} +infix 0 + +-- | Terminal failure continuation. +failK :: Failure a +failK i0 _a0 _m0 stack msg = Fail (unI i0) stack msg +{-# INLINE failK #-} + +-- | Terminal success continuation. +successK :: Success a a +successK i0 _a0 _m0 a = Done (unI i0) a +{-# INLINE successK #-} + +-- | Run a parser. +parse :: Parser a -> B.ByteString -> Result a +parse m s = T.runParser m (I s) mempty Incomplete failK successK +{-# INLINE parse #-} + +-- | Run a parser that cannot be resupplied via a 'Partial' result. +parseOnly :: Parser a -> B.ByteString -> Either String a +parseOnly m s = case T.runParser m (I s) mempty Complete failK successK of + Fail _ _ err -> Left err + Done _ a -> Right a + _ -> error "parseOnly: impossible error!" +{-# INLINE parseOnly #-} + +-- | Just like unsafePerformIO, but we inline it. Big performance gains as +-- it exposes lots of things to further inlining. /Very unsafe/. In +-- particular, you should do no memory allocation inside an +-- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@. +inlinePerformIO :: IO a -> a +#if defined(__GLASGOW_HASKELL__) +inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r +#else +inlinePerformIO = unsafePerformIO +#endif +{-# INLINE inlinePerformIO #-} diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs new file mode 100644 index 00000000..cb9cee83 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs @@ -0,0 +1,205 @@ +{-# LANGUAGE BangPatterns, CPP #-} +-- | +-- Module : Data.Attoparsec.Combinator +-- Copyright : Daan Leijen 1999-2001, Bryan O'Sullivan 2009-2010 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : portable +-- +-- Useful parser combinators, similar to those provided by Parsec. +module Data.Attoparsec.Combinator + ( + choice + , count + , option + , many' + , many1 + , many1' + , manyTill + , manyTill' + , sepBy + , sepBy' + , sepBy1 + , sepBy1' + , skipMany + , skipMany1 + , eitherP + ) where + +import Control.Applicative (Alternative(..), Applicative(..), empty, liftA2, + (<|>), (*>), (<$>)) +import Control.Monad (MonadPlus(..)) +#if !MIN_VERSION_base(4,2,0) +import Control.Applicative (many) +#endif + +#if __GLASGOW_HASKELL__ >= 700 +import Data.Attoparsec.Internal.Types (Parser) +import Data.ByteString (ByteString) +#endif + +-- | @choice ps@ tries to apply the actions in the list @ps@ in order, +-- until one of them succeeds. Returns the value of the succeeding +-- action. +choice :: Alternative f => [f a] -> f a +choice = foldr (<|>) empty +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE choice :: [Parser ByteString a] -> Parser ByteString a #-} +#endif + +-- | @option x p@ tries to apply action @p@. If @p@ fails without +-- consuming input, it returns the value @x@, otherwise the value +-- returned by @p@. +-- +-- > priority = option 0 (digitToInt <$> digit) +option :: Alternative f => a -> f a -> f a +option x p = p <|> pure x +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE option :: a -> Parser ByteString a -> Parser ByteString a #-} +#endif + +-- | A version of 'liftM2' that is strict in the result of its first +-- action. +liftM2' :: (Monad m) => (a -> b -> c) -> m a -> m b -> m c +liftM2' f a b = do + !x <- a + y <- b + return (f x y) +{-# INLINE liftM2' #-} + +-- | @many' p@ applies the action @p@ /zero/ or more times. Returns a +-- list of the returned values of @p@. The value returned by @p@ is +-- forced to WHNF. +-- +-- > word = many' letter +many' :: (MonadPlus m) => m a -> m [a] +many' p = many_p + where many_p = some_p `mplus` return [] + some_p = liftM2' (:) p many_p +{-# INLINE many' #-} + +-- | @many1 p@ applies the action @p@ /one/ or more times. Returns a +-- list of the returned values of @p@. +-- +-- > word = many1 letter +many1 :: Alternative f => f a -> f [a] +many1 p = liftA2 (:) p (many p) +{-# INLINE many1 #-} + +-- | @many1' p@ applies the action @p@ /one/ or more times. Returns a +-- list of the returned values of @p@. The value returned by @p@ is +-- forced to WHNF. +-- +-- > word = many1' letter +many1' :: (MonadPlus m) => m a -> m [a] +many1' p = liftM2' (:) p (many' p) +{-# INLINE many1' #-} + +-- | @sepBy p sep@ applies /zero/ or more occurrences of @p@, separated +-- by @sep@. Returns a list of the values returned by @p@. +-- +-- > commaSep p = p `sepBy` (symbol ",") +sepBy :: Alternative f => f a -> f s -> f [a] +sepBy p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure []) <|> pure [] +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE sepBy :: Parser ByteString a -> Parser ByteString s + -> Parser ByteString [a] #-} +#endif + +-- | @sepBy' p sep@ applies /zero/ or more occurrences of @p@, separated +-- by @sep@. Returns a list of the values returned by @p@. The value +-- returned by @p@ is forced to WHNF. +-- +-- > commaSep p = p `sepBy'` (symbol ",") +sepBy' :: (MonadPlus m) => m a -> m s -> m [a] +sepBy' p s = scan `mplus` return [] + where scan = liftM2' (:) p ((s >> sepBy1' p s) `mplus` return []) +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE sepBy' :: Parser ByteString a -> Parser ByteString s + -> Parser ByteString [a] #-} +#endif + +-- | @sepBy1 p sep@ applies /one/ or more occurrences of @p@, separated +-- by @sep@. Returns a list of the values returned by @p@. +-- +-- > commaSep p = p `sepBy1` (symbol ",") +sepBy1 :: Alternative f => f a -> f s -> f [a] +sepBy1 p s = scan + where scan = liftA2 (:) p ((s *> scan) <|> pure []) +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE sepBy1 :: Parser ByteString a -> Parser ByteString s + -> Parser ByteString [a] #-} +#endif + +-- | @sepBy1' p sep@ applies /one/ or more occurrences of @p@, separated +-- by @sep@. Returns a list of the values returned by @p@. The value +-- returned by @p@ is forced to WHNF. +-- +-- > commaSep p = p `sepBy1'` (symbol ",") +sepBy1' :: (MonadPlus m) => m a -> m s -> m [a] +sepBy1' p s = scan + where scan = liftM2' (:) p ((s >> scan) `mplus` return []) +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE sepBy1' :: Parser ByteString a -> Parser ByteString s + -> Parser ByteString [a] #-} +#endif + +-- | @manyTill p end@ applies action @p@ /zero/ or more times until +-- action @end@ succeeds, and returns the list of values returned by +-- @p@. This can be used to scan comments: +-- +-- > simpleComment = string "")) +-- +-- Note the overlapping parsers @anyChar@ and @string \"")) +-- +-- Note the overlapping parsers @anyChar@ and @string \"")) --- --- Note the overlapping parsers @anyChar@ and @string \"")) --- --- Note the overlapping parsers @anyChar@ and @string \"