diff options
Diffstat (limited to 'haddock-library')
22 files changed, 3686 insertions, 0 deletions
diff --git a/haddock-library/.ghci b/haddock-library/.ghci new file mode 100644 index 00000000..6a26395e --- /dev/null +++ b/haddock-library/.ghci @@ -0,0 +1 @@ +:set -isrc -ivendor/attoparsec-0.10.4.0 -itest -idist/build -idist/build/autogen -optP-include -optPdist/build/autogen/cabal_macros.h diff --git a/haddock-library/LICENSE b/haddock-library/LICENSE new file mode 100644 index 00000000..1636bfcd --- /dev/null +++ b/haddock-library/LICENSE @@ -0,0 +1,23 @@ +Copyright 2002-2010, Simon Marlow. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS" AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/haddock-library/Setup.hs b/haddock-library/Setup.hs new file mode 100644 index 00000000..9a994af6 --- /dev/null +++ b/haddock-library/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/haddock-library/haddock-library.cabal b/haddock-library/haddock-library.cabal new file mode 100644 index 00000000..3a016c5a --- /dev/null +++ b/haddock-library/haddock-library.cabal @@ -0,0 +1,75 @@ +name: haddock-library +version: 2.15.0 +synopsis: Library exposing some functionality of Haddock. + +description: Haddock is a documentation-generation tool for Haskell + libraries. These modules expose some functionality of it + without pulling in the GHC dependency. +license: BSD3 +license-file: LICENSE +maintainer: Simon Hengel <sol@typeful.net>, Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk> +homepage: http://www.haskell.org/haddock/ +bug-reports: http://trac.haskell.org/haddock +category: Documentation +build-type: Simple +cabal-version: >= 1.10 +stability: experimental + +library + default-language: Haskell2010 + + build-depends: + base >= 4.3 && < 4.8, + bytestring, + deepseq + + hs-source-dirs: src, vendor/attoparsec-0.10.4.0 + ghc-options: -funbox-strict-fields -Wall -fwarn-tabs -O2 + + exposed-modules: + Documentation.Haddock.Parser + Documentation.Haddock.Types + Documentation.Haddock.Doc + + other-modules: + Data.Attoparsec + Data.Attoparsec.ByteString + Data.Attoparsec.ByteString.Char8 + Data.Attoparsec.Combinator + Data.Attoparsec.Number + Data.Attoparsec.ByteString.FastSet + Data.Attoparsec.ByteString.Internal + Data.Attoparsec.Internal + Data.Attoparsec.Internal.Types + Documentation.Haddock.Utf8 + Documentation.Haddock.Parser.Util + +test-suite spec + type: exitcode-stdio-1.0 + default-language: Haskell2010 + main-is: Spec.hs + hs-source-dirs: + test + , src + , vendor/attoparsec-0.10.4.0 + + cpp-options: + -DTEST + + other-modules: + Documentation.Haddock.ParserSpec + Documentation.Haddock.Utf8Spec + Documentation.Haddock.Parser.UtilSpec + + build-depends: + base + , hspec + , bytestring + , deepseq + , QuickCheck == 2.* + , haddock-library + + +source-repository head + type: git + location: http://git.haskell.org/haddock.git diff --git a/haddock-library/src/Documentation/Haddock/Doc.hs b/haddock-library/src/Documentation/Haddock/Doc.hs new file mode 100644 index 00000000..4d6c10a4 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Doc.hs @@ -0,0 +1,60 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} +module Documentation.Haddock.Doc (docParagraph) where + +import Data.Monoid +import Documentation.Haddock.Types +import Data.Char (isSpace) + +-- We put it here so that we can avoid a circular import +-- anything relevant imports this module anyway +instance Monoid (DocH mod id) where + mempty = DocEmpty + mappend = docAppend + +docAppend :: DocH mod id -> DocH mod id -> DocH mod id +docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2) +docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d +docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 ++ ds2) +docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 ++ ds2) +docAppend DocEmpty d = d +docAppend d DocEmpty = d +docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2) +docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 ++ s2)) +docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 ++ s2)) d +docAppend d1 d2 = DocAppend d1 d2 + +-- again to make parsing easier - we spot a paragraph whose only item +-- is a DocMonospaced and make it into a DocCodeBlock +docParagraph :: DocH mod id -> DocH mod id +docParagraph (DocMonospaced p) + = DocCodeBlock (docCodeBlock p) +docParagraph (DocAppend (DocString s1) (DocMonospaced p)) + | all isSpace s1 + = DocCodeBlock (docCodeBlock p) +docParagraph (DocAppend (DocString s1) + (DocAppend (DocMonospaced p) (DocString s2))) + | all isSpace s1 && all isSpace s2 + = DocCodeBlock (docCodeBlock p) +docParagraph (DocAppend (DocMonospaced p) (DocString s2)) + | all isSpace s2 + = DocCodeBlock (docCodeBlock p) +docParagraph p + = DocParagraph p + + +-- Drop trailing whitespace from @..@ code blocks. Otherwise this: +-- +-- -- @ +-- -- foo +-- -- @ +-- +-- turns into (DocCodeBlock "\nfoo\n ") which when rendered in HTML +-- gives an extra vertical space after the code block. The single space +-- on the final line seems to trigger the extra vertical space. +-- +docCodeBlock :: DocH mod id -> DocH mod id +docCodeBlock (DocString s) + = DocString (reverse $ dropWhile (`elem` " \t") $ reverse s) +docCodeBlock (DocAppend l r) + = DocAppend l (docCodeBlock r) +docCodeBlock d = d diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs new file mode 100644 index 00000000..3d146d33 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -0,0 +1,474 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE IncoherentInstances #-} +{-# LANGUAGE LambdaCase #-} +-- | +-- Module : Documentation.Haddock.Parser +-- Copyright : (c) Mateusz Kowalczyk 2013-2014, +-- Simon Hengel 2013 +-- License : BSD-like +-- +-- Maintainer : haddock@projects.haskell.org +-- Stability : experimental +-- Portability : portable + +module Documentation.Haddock.Parser ( parseString, parseParas + , overIdentifier, toRegular, Identifier + ) where + +import Control.Applicative +import Control.Arrow (first) +import Control.Monad (void, mfilter) +import Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine) +import qualified Data.ByteString.Char8 as BS +import Data.Char (chr, isAsciiUpper) +import Data.List (stripPrefix, intercalate, unfoldr) +import Data.Maybe (fromMaybe) +import Data.Monoid +import Documentation.Haddock.Doc +import Documentation.Haddock.Parser.Util +import Documentation.Haddock.Types +import Documentation.Haddock.Utf8 +import Prelude hiding (takeWhile) + +-- | Identifier string surrounded with opening and closing quotes/backticks. +type Identifier = (Char, String, Char) + +-- | Drops the quotes/backticks around all identifiers, as if they +-- were valid but still 'String's. +toRegular :: DocH mod Identifier -> DocH mod String +toRegular = fmap (\(_, x, _) -> x) + +-- | Maps over 'DocIdentifier's over 'String' with potentially failing +-- conversion using user-supplied function. If the conversion fails, +-- the identifier is deemed to not be valid and is treated as a +-- regular string. +overIdentifier :: (String -> Maybe a) + -> DocH mod Identifier + -> DocH mod a +overIdentifier f d = g d + where + g (DocIdentifier (o, x, e)) = case f x of + Nothing -> DocString $ o : x ++ [e] + Just x' -> DocIdentifier x' + g DocEmpty = DocEmpty + g (DocAppend x x') = DocAppend (g x) (g x') + g (DocString x) = DocString x + g (DocParagraph x) = DocParagraph $ g x + g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x + g (DocModule x) = DocModule x + g (DocWarning x) = DocWarning $ g x + g (DocEmphasis x) = DocEmphasis $ g x + g (DocMonospaced x) = DocMonospaced $ g x + g (DocBold x) = DocBold $ g x + g (DocUnorderedList x) = DocUnorderedList $ fmap g x + g (DocOrderedList x) = DocOrderedList $ fmap g x + g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x + g (DocCodeBlock x) = DocCodeBlock $ g x + g (DocHyperlink x) = DocHyperlink x + g (DocPic x) = DocPic x + g (DocAName x) = DocAName x + g (DocProperty x) = DocProperty x + g (DocExamples x) = DocExamples x + g (DocHeader (Header l x)) = DocHeader . Header l $ g x + +parse :: Parser a -> BS.ByteString -> a +parse p = either err id . parseOnly (p <* endOfInput) + where + err = error . ("Haddock.Parser.parse: " ++) + +-- | Main entry point to the parser. Appends the newline character +-- to the input string. +parseParas :: String -- ^ String to parse + -> DocH mod Identifier +parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n") + where + p :: Parser (DocH mod Identifier) + p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n") + +-- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which +-- drops leading whitespace and encodes the string to UTF8 first. +parseString :: String -> DocH mod Identifier +parseString = parseStringBS . encodeUtf8 . dropWhile isSpace + +parseStringBS :: BS.ByteString -> DocH mod Identifier +parseStringBS = parse p + where + p :: Parser (DocH mod Identifier) + p = mconcat <$> many (monospace <|> anchor <|> identifier + <|> moduleName <|> picture <|> hyperlink <|> autoUrl <|> bold + <|> emphasis <|> encodedChar <|> string' <|> skipSpecialChar) + +-- | Parses and processes +-- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references> +-- +-- >>> parseOnly encodedChar "ABC" +-- Right (DocString "ABC") +encodedChar :: Parser (DocH mod a) +encodedChar = "&#" *> c <* ";" + where + c = DocString . return . chr <$> num + num = hex <|> decimal + hex = ("x" <|> "X") *> hexadecimal + +specialChar :: [Char] +specialChar = "_/<@\"&'`" + +-- | Plain, regular parser for text. Called as one of the last parsers +-- to ensure that we have already given a chance to more meaningful parsers +-- before capturing their characers. +string' :: Parser (DocH mod a) +string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar) + where + unescape "" = "" + unescape ('\\':x:xs) = x : unescape xs + unescape (x:xs) = x : unescape xs + +-- | Skips a single special character and treats it as a plain string. +-- This is done to skip over any special characters belonging to other +-- elements but which were not deemed meaningful at their positions. +skipSpecialChar :: Parser (DocH mod a) +skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar) + +-- | Emphasis parser. +-- +-- >>> parseOnly emphasis "/Hello world/" +-- Right (DocEmphasis (DocString "Hello world")) +emphasis :: Parser (DocH mod Identifier) +emphasis = DocEmphasis . parseStringBS <$> + mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/") + +-- | Bold parser. +-- +-- >>> parseOnly bold "__Hello world__" +-- Right (DocBold (DocString "Hello world")) +bold :: Parser (DocH mod Identifier) +bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__") + +disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString +disallowNewline = mfilter ('\n' `BS.notElem`) + +-- | Like `takeWhile`, but unconditionally take escaped characters. +takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString +takeWhile_ p = scan False p_ + where + p_ escaped c + | escaped = Just False + | not $ p c = Nothing + | otherwise = Just (c == '\\') + +-- | Like `takeWhile1`, but unconditionally take escaped characters. +takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString +takeWhile1_ = mfilter (not . BS.null) . takeWhile_ + +-- | Text anchors to allow for jumping around the generated documentation. +-- +-- >>> parseOnly anchor "#Hello world#" +-- Right (DocAName "Hello world") +anchor :: Parser (DocH mod a) +anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#") + +-- | Monospaced strings. +-- +-- >>> parseOnly monospace "@cruel@" +-- Right (DocMonospaced (DocString "cruel")) +monospace :: Parser (DocH mod Identifier) +monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@") + +moduleName :: Parser (DocH mod a) +moduleName = DocModule <$> (char '"' *> modid <* char '"') + where + modid = intercalate "." <$> conid `sepBy1` "." + conid = (:) + <$> satisfy isAsciiUpper + -- NOTE: According to Haskell 2010 we shouldd actually only + -- accept {small | large | digit | ' } here. But as we can't + -- match on unicode characters, this is currently not possible. + <*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n")) + +-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify +-- a title for the picture. +-- +-- >>> parseOnly picture "<<hello.png>>" +-- Right (DocPic (Picture "hello.png" Nothing)) +-- >>> parseOnly picture "<<hello.png world>>" +-- Right (DocPic (Picture "hello.png" (Just "world"))) +picture :: Parser (DocH mod a) +picture = DocPic . makeLabeled Picture . decodeUtf8 + <$> disallowNewline ("<<" *> takeUntil ">>") + +-- | Paragraph parser, called by 'parseParas'. +paragraph :: Parser (DocH mod Identifier) +paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock + <|> property <|> header + <|> textParagraph) + +header :: Parser (DocH mod Identifier) +header = do + let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1] + pser = foldl1 (<|>) psers + delim <- decodeUtf8 <$> pser + line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString + rest <- paragraph <|> return mempty + return $ DocParagraph (DocHeader (Header (length delim) line)) <> rest + +textParagraph :: Parser (DocH mod Identifier) +textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine + +-- | List parser, called by 'paragraph'. +list :: Parser (DocH mod Identifier) +list = DocUnorderedList <$> unorderedList + <|> DocOrderedList <$> orderedList + <|> DocDefList <$> definitionList + +-- | Parses unordered (bullet) lists. +unorderedList :: Parser [DocH mod Identifier] +unorderedList = ("*" <|> "-") *> innerList unorderedList + +-- | Parses ordered lists (numbered or dashed). +orderedList :: Parser [DocH mod Identifier] +orderedList = (paren <|> dot) *> innerList orderedList + where + dot = (decimal :: Parser Int) <* "." + paren = "(" *> decimal <* ")" + +-- | Generic function collecting any further lines belonging to the +-- list entry and recursively collecting any further lists in the +-- same paragraph. Usually used as +-- +-- > someListFunction = listBeginning *> innerList someListFunction +innerList :: Parser [DocH mod Identifier] -> Parser [DocH mod Identifier] +innerList item = do + c <- takeLine + (cs, items) <- more item + let contents = docParagraph . parseString . dropNLs . unlines $ c : cs + return $ case items of + Left p -> [contents `joinPara` p] + Right i -> contents : i + +-- | Parses definition lists. +definitionList :: Parser [(DocH mod Identifier, DocH mod Identifier)] +definitionList = do + label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` "]\n")) <* "]" + c <- takeLine + (cs, items) <- more definitionList + let contents = parseString . dropNLs . unlines $ c : cs + return $ case items of + Left p -> [(label, contents `joinPara` p)] + Right i -> (label, contents) : i + +-- | If possible, appends two 'Doc's under a 'DocParagraph' rather than +-- outside of it. This allows to get structures like +-- +-- @DocParagraph (DocAppend … …)@ +-- +-- rather than +-- +-- @DocAppend (DocParagraph …) …@ +joinPara :: DocH mod id -> DocH mod id -> DocH mod id +joinPara (DocParagraph p) c = docParagraph $ p <> c +joinPara d p = d <> p + +-- | Drops all trailing newlines. +dropNLs :: String -> String +dropNLs = reverse . dropWhile (== '\n') . reverse + +-- | Main worker for 'innerList' and 'definitionList'. +-- We need the 'Either' here to be able to tell in the respective functions +-- whether we're dealing with the next list or a nested paragraph. +more :: Monoid a => Parser a + -> Parser ([String], Either (DocH mod Identifier) a) +more item = innerParagraphs <|> moreListItems item + <|> moreContent item <|> pure ([], Right mempty) + +-- | Use by 'innerList' and 'definitionList' to parse any nested paragraphs. +innerParagraphs :: Parser ([String], Either (DocH mod Identifier) a) +innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs) + +-- | Attemps to fetch the next list if possibly. Used by 'innerList' and +-- 'definitionList' to recursivly grab lists that aren't separated by a whole +-- paragraph. +moreListItems :: Parser a + -> Parser ([String], Either (DocH mod Identifier) a) +moreListItems item = (,) [] . Right <$> (skipSpace *> item) + +-- | Helper for 'innerList' and 'definitionList' which simply takes +-- a line of text and attempts to parse more list content with 'more'. +moreContent :: Monoid a => Parser a + -> Parser ([String], Either (DocH mod Identifier) a) +moreContent item = first . (:) <$> nonEmptyLine <*> more item + +-- | Runs the 'parseParas' parser on an indented paragraph. +-- The indentation is 4 spaces. +indentedParagraphs :: Parser (DocH mod Identifier) +indentedParagraphs = parseParas . concat <$> dropFrontOfPara " " + +-- | Grab as many fully indented paragraphs as we can. +dropFrontOfPara :: Parser BS.ByteString -> Parser [String] +dropFrontOfPara sp = do + currentParagraph <- some (sp *> takeNonEmptyLine) + followingParagraphs <- + skipHorizontalSpace *> nextPar -- we have more paragraphs to take + <|> skipHorizontalSpace *> nlList -- end of the ride, remember the newline + <|> endOfInput *> return [] -- nothing more to take at all + return (currentParagraph ++ followingParagraphs) + where + nextPar = (++) <$> nlList <*> dropFrontOfPara sp + nlList = "\n" *> return ["\n"] + +nonSpace :: BS.ByteString -> Parser BS.ByteString +nonSpace xs + | not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line" + | otherwise = return xs + +-- | Takes a non-empty, not fully whitespace line. +-- +-- Doesn't discard the trailing newline. +takeNonEmptyLine :: Parser String +takeNonEmptyLine = do + (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n" + +birdtracks :: Parser (DocH mod a) +birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line + where + line = skipHorizontalSpace *> ">" *> takeLine + +stripSpace :: [String] -> [String] +stripSpace = fromMaybe <*> mapM strip' + where + strip' (' ':xs') = Just xs' + strip' "" = Just "" + strip' _ = Nothing + +-- | Parses examples. Examples are a paragraph level entitity (separated by an empty line). +-- Consecutive examples are accepted. +examples :: Parser (DocH mod a) +examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go) + where + go :: Parser [Example] + go = do + prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>" + expr <- takeLine + (rs, es) <- resultAndMoreExamples + return (makeExample prefix expr rs : es) + where + resultAndMoreExamples :: Parser ([String], [Example]) + resultAndMoreExamples = moreExamples <|> result <|> pure ([], []) + where + moreExamples :: Parser ([String], [Example]) + moreExamples = (,) [] <$> go + + result :: Parser ([String], [Example]) + result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples + + makeExample :: String -> String -> [String] -> Example + makeExample prefix expression res = + Example (strip expression) result + where + result = map (substituteBlankLine . tryStripPrefix) res + + tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs) + + substituteBlankLine "<BLANKLINE>" = "" + substituteBlankLine xs = xs + +nonEmptyLine :: Parser String +nonEmptyLine = mfilter (any (not . isSpace)) takeLine + +takeLine :: Parser String +takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine + +endOfLine :: Parser () +endOfLine = void "\n" <|> endOfInput + +-- | Property parser. +-- +-- >>> parseOnly property "prop> hello world" +-- Right (DocProperty "hello world") +property :: Parser (DocH mod a) +property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n')) + +-- | +-- Paragraph level codeblock. Anything between the two delimiting @ is parsed +-- for markup. +codeblock :: Parser (DocH mod Identifier) +codeblock = + DocCodeBlock . parseStringBS . dropSpaces + <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@") + where + dropSpaces xs = + let rs = decodeUtf8 xs + in case splitByNl rs of + [] -> xs + ys -> case last ys of + ' ':_ -> case mapM dropSpace ys of + Nothing -> xs + Just zs -> encodeUtf8 $ intercalate "\n" zs + _ -> xs + + -- This is necessary because ‘lines’ swallows up a trailing newline + -- and we lose information about whether the last line belongs to @ or to + -- text which we need to decide whether we actually want to be dropping + -- anything at all. + splitByNl = unfoldr (\case '\n':s -> Just (span (/= '\n') s) + _ -> Nothing) + . ('\n' :) + + dropSpace "" = Just "" + dropSpace (' ':xs) = Just xs + dropSpace _ = Nothing + + block' = scan False p + where + p isNewline c + | isNewline && c == '@' = Nothing + | isNewline && isSpace c = Just isNewline + | otherwise = Just $ c == '\n' + +hyperlink :: Parser (DocH mod a) +hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 + <$> disallowNewline ("<" *> takeUntil ">") + <|> autoUrl + +autoUrl :: Parser (DocH mod a) +autoUrl = mkLink <$> url + where + url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace) + mkLink :: BS.ByteString -> DocH mod a + mkLink s = case BS.unsnoc s of + Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x] + _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing) + +-- | Parses strings between identifier delimiters. Consumes all input that it +-- deems to be valid in an identifier. Note that it simply blindly consumes +-- characters and does no actual validation itself. +parseValid :: Parser String +parseValid = do + vs' <- many' $ utf8String "⋆" <|> return <$> idChar + let vs = concat vs' + c <- peekChar + case c of + Just '`' -> return vs + Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid) + <|> return vs + _ -> fail "outofvalid" + where + idChar = satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:^") + <|> digit <|> letter_ascii + +-- | Parses UTF8 strings from ByteString streams. +utf8String :: String -> Parser String +utf8String x = decodeUtf8 <$> string (encodeUtf8 x) + +-- | Parses identifiers with help of 'parseValid'. Asks GHC for 'String' from the +-- string it deems valid. +identifier :: Parser (DocH mod Identifier) +identifier = do + o <- idDelim + vid <- parseValid + e <- idDelim + return $ DocIdentifier (o, vid, e) + where + idDelim = char '\'' <|> char '`' diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs new file mode 100644 index 00000000..25dba2d5 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs @@ -0,0 +1,61 @@ +-- | +-- Module : Documentation.Haddock.Parser.Util +-- Copyright : (c) Mateusz Kowalczyk 2013-2014, +-- Simon Hengel 2013 +-- License : BSD-like +-- +-- Maintainer : haddock@projects.haskell.org +-- Stability : experimental +-- Portability : portable +-- +-- Various utility functions used by the parser. +module Documentation.Haddock.Parser.Util where + +import Control.Applicative +import Control.Monad (mfilter) +import Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine) +import Data.ByteString.Char8 (ByteString) +import qualified Data.ByteString.Char8 as BS +import Prelude hiding (takeWhile) + +-- | Remove all leading and trailing whitespace +strip :: String -> String +strip = (\f -> f . f) $ dropWhile isSpace . reverse + +skipHorizontalSpace :: Parser () +skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r") + +takeHorizontalSpace :: Parser BS.ByteString +takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r") + +makeLabeled :: (String -> Maybe String -> a) -> String -> a +makeLabeled f input = case break isSpace $ removeEscapes $ strip input of + (uri, "") -> f uri Nothing + (uri, label) -> f uri (Just $ dropWhile isSpace label) + where + -- As we don't parse these any further, we don't do any processing to the + -- string so we at least remove escape character here. Perhaps we should + -- actually be parsing the label at the very least? + removeEscapes "" = "" + removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs + removeEscapes ('\\':xs) = removeEscapes xs + removeEscapes (x:xs) = x : removeEscapes xs + +takeUntil :: ByteString -> Parser ByteString +takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome + where + end = BS.unpack end_ + + p :: (Bool, String) -> Char -> Maybe (Bool, String) + p acc c = case acc of + (True, _) -> Just (False, end) + (_, []) -> Nothing + (_, x:xs) | x == c -> Just (False, xs) + _ -> Just (c == '\\', end) + + dropEnd = BS.reverse . BS.drop (length end) . BS.reverse + requireEnd = mfilter (BS.isSuffixOf end_) + + gotSome xs + | BS.null xs = fail "didn't get any content" + | otherwise = return xs diff --git a/haddock-library/src/Documentation/Haddock/Types.hs b/haddock-library/src/Documentation/Haddock/Types.hs new file mode 100644 index 00000000..3e25d230 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Types.hs @@ -0,0 +1,70 @@ +{-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable #-} +{-# LANGUAGE DeriveTraversable, StandaloneDeriving #-} + +-- | +-- Module : Documentation.Haddock.Types +-- Copyright : (c) Simon Marlow 2003-2006, +-- David Waern 2006-2009, +-- Mateusz Kowalczyk 2013 +-- License : BSD-like +-- +-- Maintainer : haddock@projects.haskellorg +-- Stability : experimental +-- Portability : portable +-- +-- Exposes documentation data types used for (some) of Haddock. +module Documentation.Haddock.Types where + +import Data.Foldable +import Data.Traversable + +instance Foldable Header where + foldMap f (Header _ a) = f a + +instance Traversable Header where + traverse f (Header l a) = Header l `fmap` f a + +data Hyperlink = Hyperlink + { hyperlinkUrl :: String + , hyperlinkLabel :: Maybe String + } deriving (Eq, Show) + + +data Picture = Picture + { pictureUri :: String + , pictureTitle :: Maybe String + } deriving (Eq, Show) + +data Header id = Header + { headerLevel :: Int + , headerTitle :: id + } deriving Functor + +data Example = Example + { exampleExpression :: String + , exampleResult :: [String] + } deriving (Eq, Show) + +data DocH mod id + = DocEmpty + | DocAppend (DocH mod id) (DocH mod id) + | DocString String + | DocParagraph (DocH mod id) + | DocIdentifier id + | DocIdentifierUnchecked mod + | DocModule String + | DocWarning (DocH mod id) + | DocEmphasis (DocH mod id) + | DocMonospaced (DocH mod id) + | DocBold (DocH mod id) + | DocUnorderedList [DocH mod id] + | DocOrderedList [DocH mod id] + | DocDefList [(DocH mod id, DocH mod id)] + | DocCodeBlock (DocH mod id) + | DocHyperlink Hyperlink + | DocPic Picture + | DocAName String + | DocProperty String + | DocExamples [Example] + | DocHeader (Header (DocH mod id)) + deriving (Functor, Foldable, Traversable) diff --git a/haddock-library/src/Documentation/Haddock/Utf8.hs b/haddock-library/src/Documentation/Haddock/Utf8.hs new file mode 100644 index 00000000..3f75e53b --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Utf8.hs @@ -0,0 +1,74 @@ +module Documentation.Haddock.Utf8 (encodeUtf8, decodeUtf8) where +import Data.Bits ((.|.), (.&.), shiftL, shiftR) +import qualified Data.ByteString as BS +import Data.Char (chr, ord) +import Data.Word (Word8) + +-- | Helper that encodes and packs a 'String' into a 'BS.ByteString' +encodeUtf8 :: String -> BS.ByteString +encodeUtf8 = BS.pack . encode + +-- | Helper that unpacks and decodes a 'BS.ByteString' into a 'String' +decodeUtf8 :: BS.ByteString -> String +decodeUtf8 = decode . BS.unpack + +-- Copy/pasted functions from Codec.Binary.UTF8.String for encoding/decoding +-- | Character to use when 'encode' or 'decode' fail for a byte. +replacementCharacter :: Char +replacementCharacter = '\xfffd' + +-- | Encode a Haskell String to a list of Word8 values, in UTF8 format. +encode :: String -> [Word8] +encode = concatMap (map fromIntegral . go . ord) + where + go oc + | oc <= 0x7f = [oc] + + | oc <= 0x7ff = [ 0xc0 + (oc `shiftR` 6) + , 0x80 + oc .&. 0x3f + ] + + | oc <= 0xffff = [ 0xe0 + (oc `shiftR` 12) + , 0x80 + ((oc `shiftR` 6) .&. 0x3f) + , 0x80 + oc .&. 0x3f + ] + | otherwise = [ 0xf0 + (oc `shiftR` 18) + , 0x80 + ((oc `shiftR` 12) .&. 0x3f) + , 0x80 + ((oc `shiftR` 6) .&. 0x3f) + , 0x80 + oc .&. 0x3f + ] + +-- | Decode a UTF8 string packed into a list of Word8 values, directly to String +decode :: [Word8] -> String +decode [ ] = "" +decode (c:cs) + | c < 0x80 = chr (fromEnum c) : decode cs + | c < 0xc0 = replacementCharacter : decode cs + | c < 0xe0 = multi1 + | c < 0xf0 = multi_byte 2 0xf 0x800 + | c < 0xf8 = multi_byte 3 0x7 0x10000 + | c < 0xfc = multi_byte 4 0x3 0x200000 + | c < 0xfe = multi_byte 5 0x1 0x4000000 + | otherwise = replacementCharacter : decode cs + where + multi1 = case cs of + c1 : ds | c1 .&. 0xc0 == 0x80 -> + let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|. fromEnum (c1 .&. 0x3f) + in if d >= 0x000080 then toEnum d : decode ds + else replacementCharacter : decode ds + _ -> replacementCharacter : decode cs + + multi_byte :: Int -> Word8 -> Int -> String + multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask)) + where + aux 0 rs acc + | overlong <= acc && acc <= 0x10ffff && + (acc < 0xd800 || 0xdfff < acc) && + (acc < 0xfffe || 0xffff < acc) = chr acc : decode rs + | otherwise = replacementCharacter : decode rs + + aux n (r:rs) acc + | r .&. 0xc0 == 0x80 = aux (n-1) rs + $ shiftL acc 6 .|. fromEnum (r .&. 0x3f) + + aux _ rs _ = replacementCharacter : decode rs diff --git a/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs b/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs new file mode 100644 index 00000000..265a4d49 --- /dev/null +++ b/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs @@ -0,0 +1,22 @@ +{-# LANGUAGE OverloadedStrings #-} +module Documentation.Haddock.Parser.UtilSpec (main, spec) where + +import Data.Attoparsec.ByteString.Char8 +import Data.Either +import Documentation.Haddock.Parser.Util +import Test.Hspec + +main :: IO () +main = hspec spec + +spec :: Spec +spec = do + describe "takeUntil" $ do + it "takes everything until a specified byte sequence" $ do + parseOnly (takeUntil "end") "someend" `shouldBe` Right "some" + + it "requires the end sequence" $ do + parseOnly (takeUntil "end") "someen" `shouldSatisfy` isLeft + + it "takes escaped bytes unconditionally" $ do + parseOnly (takeUntil "end") "some\\endend" `shouldBe` Right "some\\end" diff --git a/haddock-library/test/Documentation/Haddock/ParserSpec.hs b/haddock-library/test/Documentation/Haddock/ParserSpec.hs new file mode 100644 index 00000000..3889d555 --- /dev/null +++ b/haddock-library/test/Documentation/Haddock/ParserSpec.hs @@ -0,0 +1,816 @@ +{-# LANGUAGE OverloadedStrings, StandaloneDeriving + , FlexibleInstances, UndecidableInstances + , IncoherentInstances #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Documentation.Haddock.ParserSpec (main, spec) where + +import Data.Monoid +import Data.String +import qualified Documentation.Haddock.Parser as Parse +import Documentation.Haddock.Types +import Test.Hspec +import Test.QuickCheck + +type Doc id = DocH () id + +deriving instance Show a => Show (Header a) +deriving instance Show a => Show (Doc a) +deriving instance Eq a => Eq (Header a) +deriving instance Eq a => Eq (Doc a) + +instance IsString (Doc String) where + fromString = DocString + +instance IsString a => IsString (Maybe a) where + fromString = Just . fromString + +parseParas :: String -> Doc String +parseParas = Parse.toRegular . Parse.parseParas + +parseString :: String -> Doc String +parseString = Parse.toRegular . Parse.parseString + +main :: IO () +main = hspec spec + +spec :: Spec +spec = do + describe "parseString" $ do + let infix 1 `shouldParseTo` + shouldParseTo :: String -> Doc String -> Expectation + shouldParseTo input ast = parseString input `shouldBe` ast + + it "is total" $ do + property $ \xs -> + (length . show . parseString) xs `shouldSatisfy` (> 0) + + context "when parsing text" $ do + it "can handle unicode" $ do + "灼眼のシャナ" `shouldParseTo` "灼眼のシャナ" + + it "accepts numeric character references" $ do + "foo bar baz λ" `shouldParseTo` "foo bar baz λ" + + it "accepts hexadecimal character references" $ do + "e" `shouldParseTo` "e" + + it "allows to backslash-escape characters" $ do + property $ \x -> ['\\', x] `shouldParseTo` DocString [x] + + context "when parsing strings contaning numeric character references" $ do + it "will implicitly convert digits to characters" $ do + "AAAA" `shouldParseTo` "AAAA" + + "灼眼のシャナ" + `shouldParseTo` "灼眼のシャナ" + + it "will implicitly convert hex encoded characters" $ do + "eeee" `shouldParseTo` "eeee" + + context "when parsing identifiers" $ do + it "parses identifiers enclosed within single ticks" $ do + "'foo'" `shouldParseTo` DocIdentifier "foo" + + it "parses identifiers enclosed within backticks" $ do + "`foo`" `shouldParseTo` DocIdentifier "foo" + + it "parses a word with an one of the delimiters in it as DocString" $ do + "don't" `shouldParseTo` "don't" + + it "doesn't pass pairs of delimiters with spaces between them" $ do + "hel'lo w'orld" `shouldParseTo` "hel'lo w'orld" + + it "don't use apostrophe's in the wrong place's" $ do + " don't use apostrophe's in the wrong place's" `shouldParseTo` + "don't use apostrophe's in the wrong place's" + + context "when parsing URLs" $ do + let hyperlink :: String -> Maybe String -> Doc String + hyperlink url = DocHyperlink . Hyperlink url + + it "parses a URL" $ do + "<http://example.com/>" `shouldParseTo` hyperlink "http://example.com/" Nothing + + it "accepts an optional label" $ do + "<http://example.com/ some link>" `shouldParseTo` hyperlink "http://example.com/" "some link" + + it "does not accept newlines in label" $ do + "<foo bar\nbaz>" `shouldParseTo` "<foo bar\nbaz>" + + -- new behaviour test, this will be now consistent with other markup + it "allows us to escape > inside the URL" $ do + "<http://examp\\>le.com>" `shouldParseTo` + hyperlink "http://examp>le.com" Nothing + + "<http://exa\\>mp\\>le.com>" `shouldParseTo` + hyperlink "http://exa>mp>le.com" Nothing + + -- Likewise in label + "<http://example.com f\\>oo>" `shouldParseTo` + hyperlink "http://example.com" "f>oo" + + it "parses inline URLs" $ do + "foo <http://example.com/> bar" `shouldParseTo` + "foo " <> hyperlink "http://example.com/" Nothing <> " bar" + + it "doesn't allow for multi-line link tags" $ do + "<ba\nz aar>" `shouldParseTo` "<ba\nz aar>" + + context "when autolinking URLs" $ do + it "autolinks HTTP URLs" $ do + "http://example.com/" `shouldParseTo` hyperlink "http://example.com/" Nothing + + it "autolinks HTTPS URLs" $ do + "https://www.example.com/" `shouldParseTo` hyperlink "https://www.example.com/" Nothing + + it "autolinks FTP URLs" $ do + "ftp://example.com/" `shouldParseTo` hyperlink "ftp://example.com/" Nothing + + it "does not include a trailing comma" $ do + "http://example.com/, Some other sentence." `shouldParseTo` + hyperlink "http://example.com/" Nothing <> ", Some other sentence." + + it "does not include a trailing dot" $ do + "http://example.com/. Some other sentence." `shouldParseTo` + hyperlink "http://example.com/" Nothing <> ". Some other sentence." + + it "does not include a trailing exclamation mark" $ do + "http://example.com/! Some other sentence." `shouldParseTo` + hyperlink "http://example.com/" Nothing <> "! Some other sentence." + + it "does not include a trailing question mark" $ do + "http://example.com/? Some other sentence." `shouldParseTo` + hyperlink "http://example.com/" Nothing <> "? Some other sentence." + + context "when parsing pictures" $ do + let picture :: String -> Maybe String -> Doc String + picture uri = DocPic . Picture uri + + it "parses a simple picture" $ do + "<<baz>>" `shouldParseTo` picture "baz" Nothing + + it "parses a picture with a title" $ do + "<<b a z>>" `shouldParseTo` picture "b" (Just "a z") + + it "parses a picture with unicode" $ do + "<<灼眼のシャナ>>" `shouldParseTo` picture "灼眼のシャナ" Nothing + + it "allows for escaping of the closing tags" $ do + "<<ba\\>>z>>" `shouldParseTo` picture "ba>>z" Nothing + + it "doesn't allow for multi-line picture tags" $ do + "<<ba\nz aar>>" `shouldParseTo` "<<ba\nz aar>>" + + context "when parsing anchors" $ do + it "parses a single word anchor" $ do + "#foo#" `shouldParseTo` DocAName "foo" + + it "parses a multi word anchor" $ do + "#foo bar#" `shouldParseTo` DocAName "foo bar" + + it "parses a unicode anchor" $ do + "#灼眼のシャナ#" `shouldParseTo` DocAName "灼眼のシャナ" + + it "does not accept newlines in anchors" $ do + "#foo\nbar#" `shouldParseTo` "#foo\nbar#" + + context "when parsing emphasised text" $ do + it "emphasises a word on its own" $ do + "/foo/" `shouldParseTo` DocEmphasis "foo" + + it "emphasises inline correctly" $ do + "foo /bar/ baz" `shouldParseTo` "foo " <> DocEmphasis "bar" <> " baz" + + it "emphasises unicode" $ do + "/灼眼のシャナ/" `shouldParseTo` DocEmphasis "灼眼のシャナ" + + it "does not emphasise multi-line strings" $ do + " /foo\nbar/" `shouldParseTo` "/foo\nbar/" + + it "does not emphasise the empty string" $ do + "//" `shouldParseTo` "//" + + it "parses escaped slashes literally" $ do + "/foo\\/bar/" `shouldParseTo` DocEmphasis "foo/bar" + + it "recognizes other markup constructs within emphasised text" $ do + "/foo @bar@ baz/" `shouldParseTo` + DocEmphasis ("foo " <> DocMonospaced "bar" <> " baz") + + it "allows other markup inside of emphasis" $ do + "/__inner bold__/" `shouldParseTo` DocEmphasis (DocBold "inner bold") + + it "doesn't mangle inner markup unicode" $ do + "/__灼眼のシャナ A__/" `shouldParseTo` DocEmphasis (DocBold "灼眼のシャナ A") + + it "properly converts HTML escape sequences" $ do + "/AAAA/" `shouldParseTo` DocEmphasis "AAAA" + + it "allows to escape the emphasis delimiter inside of emphasis" $ do + "/empha\\/sis/" `shouldParseTo` DocEmphasis "empha/sis" + + context "when parsing monospaced text" $ do + it "parses simple monospaced text" $ do + "@foo@" `shouldParseTo` DocMonospaced "foo" + + it "parses inline monospaced text" $ do + "foo @bar@ baz" `shouldParseTo` "foo " <> DocMonospaced "bar" <> " baz" + + it "allows to escape @" $ do + "@foo \\@ bar@" `shouldParseTo` DocMonospaced "foo @ bar" + + it "accepts unicode" $ do + "@foo 灼眼のシャナ bar@" `shouldParseTo` DocMonospaced "foo 灼眼のシャナ bar" + + it "accepts other markup in monospaced text" $ do + "@/foo/@" `shouldParseTo` DocMonospaced (DocEmphasis "foo") + + it "requires the closing @" $ do + "@foo /bar/ baz" `shouldParseTo` "@foo " <> DocEmphasis "bar" <> " baz" + + context "when parsing bold strings" $ do + it "allows for a bold string on its own" $ do + "__bold string__" `shouldParseTo` + DocBold "bold string" + + it "bolds inline correctly" $ do + "hello __everyone__ there" `shouldParseTo` + "hello " + <> DocBold "everyone" <> " there" + + it "bolds unicode" $ do + "__灼眼のシャナ__" `shouldParseTo` + DocBold "灼眼のシャナ" + + it "does not do __multi-line\\n bold__" $ do + " __multi-line\n bold__" `shouldParseTo` "__multi-line\n bold__" + + it "allows other markup inside of bold" $ do + "__/inner emphasis/__" `shouldParseTo` + (DocBold $ DocEmphasis "inner emphasis") + + it "doesn't mangle inner markup unicode" $ do + "__/灼眼のシャナ A/__" `shouldParseTo` + (DocBold $ DocEmphasis "灼眼のシャナ A") + + it "properly converts HTML escape sequences" $ do + "__AAAA__" `shouldParseTo` + DocBold "AAAA" + + it "allows to escape the bold delimiter inside of bold" $ do + "__bo\\__ld__" `shouldParseTo` + DocBold "bo__ld" + + it "doesn't allow for empty bold" $ do + "____" `shouldParseTo` "____" + + context "when parsing module strings" $ do + it "should parse a module on its own" $ do + "\"Module\"" `shouldParseTo` + DocModule "Module" + + it "should parse a module inline" $ do + "This is a \"Module\"." `shouldParseTo` + "This is a " <> DocModule "Module" <> "." + + it "can accept a simple module name" $ do + "\"Hello\"" `shouldParseTo` DocModule "Hello" + + it "can accept a module name with dots" $ do + "\"Hello.World\"" `shouldParseTo` DocModule "Hello.World" + + it "can accept a module name with unicode" $ do + "\"Hello.Worldλ\"" `shouldParseTo` DocModule "Hello.Worldλ" + + it "parses a module name with a trailing dot as regular quoted string" $ do + "\"Hello.\"" `shouldParseTo` "\"Hello.\"" + + it "parses a module name with a space as regular quoted string" $ do + "\"Hello World\"" `shouldParseTo` "\"Hello World\"" + + it "parses a module name with invalid characters as regular quoted string" $ do + "\"Hello&[{}(=*)+]!\"" `shouldParseTo` "\"Hello&[{}(=*)+]!\"" + + it "accepts a module name with unicode" $ do + "\"Foo.Barλ\"" `shouldParseTo` DocModule "Foo.Barλ" + + it "treats empty module name as regular double quotes" $ do + "\"\"" `shouldParseTo` "\"\"" + + describe "parseParas" $ do + let infix 1 `shouldParseTo` + shouldParseTo :: String -> Doc String -> Expectation + shouldParseTo input ast = parseParas input `shouldBe` ast + + it "is total" $ do + property $ \xs -> + (length . show . parseParas) xs `shouldSatisfy` (> 0) + + context "when parsing text paragraphs" $ do + let filterSpecial = filter (`notElem` (".(=#-[*`\v\f\n\t\r\\\"'_/@<> " :: String)) + + it "parses an empty paragraph" $ do + "" `shouldParseTo` DocEmpty + + it "parses a simple text paragraph" $ do + "foo bar baz" `shouldParseTo` DocParagraph "foo bar baz" + + it "accepts markup in text paragraphs" $ do + "foo /bar/ baz" `shouldParseTo` DocParagraph ("foo " <> DocEmphasis "bar" <> " baz") + + it "preserve all regular characters" $ do + property $ \xs -> let input = filterSpecial xs in (not . null) input ==> + input `shouldParseTo` DocParagraph (DocString input) + + it "separates paragraphs by empty lines" $ do + unlines [ + "foo" + , " \t " + , "bar" + ] `shouldParseTo` DocParagraph "foo" <> DocParagraph "bar" + + context "when a pragraph only contains monospaced text" $ do + it "turns it into a code block" $ do + "@foo@" `shouldParseTo` DocCodeBlock "foo" + + context "when parsing birdtracks" $ do + it "parses them as a code block" $ do + unlines [ + ">foo" + , ">bar" + , ">baz" + ] `shouldParseTo` DocCodeBlock "foo\nbar\nbaz" + + it "ignores leading whitespace" $ do + unlines [ + " >foo" + , " \t >bar" + , " >baz" + ] + `shouldParseTo` DocCodeBlock "foo\nbar\nbaz" + + it "strips one leading space from each line of the block" $ do + unlines [ + "> foo" + , "> bar" + , "> baz" + ] `shouldParseTo` DocCodeBlock "foo\n bar\nbaz" + + it "ignores empty lines when stripping spaces" $ do + unlines [ + "> foo" + , ">" + , "> bar" + ] `shouldParseTo` DocCodeBlock "foo\n\nbar" + + context "when any non-empty line does not start with a space" $ do + it "does not strip any spaces" $ do + unlines [ + ">foo" + , "> bar" + ] `shouldParseTo` DocCodeBlock "foo\n bar" + + it "ignores nested markup" $ do + unlines [ + ">/foo/" + ] `shouldParseTo` DocCodeBlock "/foo/" + + it "treats them as regular text inside text paragraphs" $ do + unlines [ + "foo" + , ">bar" + ] `shouldParseTo` DocParagraph "foo\n>bar" + + context "when parsing code blocks" $ do + it "accepts a simple code block" $ do + unlines [ + "@" + , "foo" + , "bar" + , "baz" + , "@" + ] `shouldParseTo` DocCodeBlock "foo\nbar\nbaz\n" + + it "ignores trailing whitespace after the opening @" $ do + unlines [ + "@ " + , "foo" + , "@" + ] `shouldParseTo` DocCodeBlock "foo\n" + + it "rejects code blocks that are not closed" $ do + unlines [ + "@" + , "foo" + ] `shouldParseTo` DocParagraph "@\nfoo" + + it "accepts nested markup" $ do + unlines [ + "@" + , "/foo/" + , "@" + ] `shouldParseTo` DocCodeBlock (DocEmphasis "foo" <> "\n") + + it "allows to escape the @" $ do + unlines [ + "@" + , "foo" + , "\\@" + , "bar" + , "@" + ] `shouldParseTo` DocCodeBlock "foo\n@\nbar\n" + + it "accepts horizontal space before the @" $ do + unlines [ " @" + , "foo" + , "" + , "bar" + , "@" + ] `shouldParseTo` DocCodeBlock "foo\n\nbar\n" + + it "strips a leading space from a @ block if present" $ do + unlines [ " @" + , " hello" + , " world" + , " @" + ] `shouldParseTo` DocCodeBlock "hello\nworld\n" + + unlines [ " @" + , " hello" + , "" + , " world" + , " @" + ] `shouldParseTo` DocCodeBlock "hello\n\nworld\n" + + it "only drops whitespace if there's some before closing @" $ do + unlines [ "@" + , " Formatting" + , " matters." + , "@" + ] + `shouldParseTo` DocCodeBlock " Formatting\n matters.\n" + + it "accepts unicode" $ do + "@foo 灼眼のシャナ bar@" `shouldParseTo` DocCodeBlock "foo 灼眼のシャナ bar" + + it "requires the closing @" $ do + "@foo /bar/ baz" + `shouldParseTo` DocParagraph ("@foo " <> DocEmphasis "bar" <> " baz") + + + context "when parsing examples" $ do + it "parses a simple example" $ do + ">>> foo" `shouldParseTo` DocExamples [Example "foo" []] + + it "parses an example with result" $ do + unlines [ + ">>> foo" + , "bar" + , "baz" + ] `shouldParseTo` DocExamples [Example "foo" ["bar", "baz"]] + + it "parses consecutive examples" $ do + unlines [ + ">>> fib 5" + , "5" + , ">>> fib 10" + , "55" + ] `shouldParseTo` DocExamples [ + Example "fib 5" ["5"] + , Example "fib 10" ["55"] + ] + + it ("requires an example to be separated" + ++ " from a previous paragraph by an empty line") $ do + "foobar\n\n>>> fib 10\n55" `shouldParseTo` + DocParagraph "foobar" + <> DocExamples [Example "fib 10" ["55"]] + + it "parses bird-tracks inside of paragraphs as plain strings" $ do + let xs = "foo\n>>> bar" + xs `shouldParseTo` DocParagraph (DocString xs) + + it "skips empty lines in front of an example" $ do + "\n \n\n>>> foo" `shouldParseTo` DocExamples [Example "foo" []] + + it "terminates example on empty line" $ do + unlines [ + ">>> foo" + , "bar" + , " " + , "baz" + ] + `shouldParseTo` + DocExamples [Example "foo" ["bar"]] <> DocParagraph "baz" + + it "parses a <BLANKLINE> result as an empty result" $ do + unlines [ + ">>> foo" + , "bar" + , "<BLANKLINE>" + , "baz" + ] + `shouldParseTo` DocExamples [Example "foo" ["bar", "", "baz"]] + + it "accepts unicode in examples" $ do + ">>> 灼眼\nシャナ" `shouldParseTo` DocExamples [Example "灼眼" ["シャナ"]] + + context "when prompt is prefixed by whitespace" $ do + it "strips the exact same amount of whitespace from result lines" $ do + unlines [ + " >>> foo" + , " bar" + , " baz" + ] `shouldParseTo` DocExamples [Example "foo" ["bar", "baz"]] + + it "preserves additional whitespace" $ do + unlines [ + " >>> foo" + , " bar" + ] `shouldParseTo` DocExamples [Example "foo" [" bar"]] + + it "keeps original if stripping is not possible" $ do + unlines [ + " >>> foo" + , " bar" + ] `shouldParseTo` DocExamples [Example "foo" [" bar"]] + + + context "when parsing paragraphs nested in lists" $ do + it "can nest the same type of list" $ do + "* foo\n\n * bar" `shouldParseTo` + DocUnorderedList [ DocParagraph $ "foo" + <> DocUnorderedList [DocParagraph "bar"]] + + it "can nest another type of list inside" $ do + "* foo\n\n 1. bar" `shouldParseTo` + DocUnorderedList [ DocParagraph $ "foo" + <> DocOrderedList [DocParagraph "bar"]] + + it "can nest a code block inside" $ do + "* foo\n\n @foo bar baz@" `shouldParseTo` + DocUnorderedList [ DocParagraph $ "foo" + <> DocCodeBlock "foo bar baz"] + + "* foo\n\n @\n foo bar baz\n @" `shouldParseTo` + DocUnorderedList [ DocParagraph $ "foo" + <> DocCodeBlock "foo bar baz\n"] + + it "can nest more than one level" $ do + "* foo\n\n * bar\n\n * baz\n qux" `shouldParseTo` + DocUnorderedList [ DocParagraph $ "foo" + <> DocUnorderedList [ DocParagraph $ "bar" + <> DocUnorderedList [DocParagraph "baz\nqux"] + ] + ] + + it "won't fail on not fully indented paragraph" $ do + "* foo\n\n * bar\n\n * qux\nquux" `shouldParseTo` + DocUnorderedList [ DocParagraph $ "foo" + <> DocUnorderedList [ DocParagraph "bar" ] + , DocParagraph "qux\nquux"] + + + it "can nest definition lists" $ do + "[a] foo\n\n [b] bar\n\n [c] baz\n qux" `shouldParseTo` + DocDefList [ ("a", "foo" + <> DocDefList [ ("b", "bar" + <> DocDefList [("c", "baz\nqux")]) + ]) + ] + + it "can come back to top level with a different list" $ do + "* foo\n\n * bar\n\n1. baz" `shouldParseTo` + DocUnorderedList [ DocParagraph $ "foo" + <> DocUnorderedList [ DocParagraph "bar" ] + ] + <> DocOrderedList [ DocParagraph "baz" ] + + it "definition lists can come back to top level with a different list" $ do + "[foo] foov\n\n [bar] barv\n\n1. baz" `shouldParseTo` + DocDefList [ ("foo", "foov" + <> DocDefList [ ("bar", "barv") ]) + ] + <> DocOrderedList [ DocParagraph "baz" ] + + context "when parsing properties" $ do + it "can parse a single property" $ do + "prop> 23 == 23" `shouldParseTo` DocProperty "23 == 23" + + it "can parse multiple subsequent properties" $ do + unlines [ + "prop> 23 == 23" + , "prop> 42 == 42" + ] + `shouldParseTo` + DocProperty "23 == 23" <> DocProperty "42 == 42" + + it "accepts unicode in properties" $ do + "prop> 灼眼のシャナ ≡ 愛" `shouldParseTo` + DocProperty "灼眼のシャナ ≡ 愛" + + it "can deal with whitespace before and after the prop> prompt" $ do + " prop> xs == (reverse $ reverse xs) " `shouldParseTo` + DocProperty "xs == (reverse $ reverse xs)" + + context "when parsing unordered lists" $ do + it "parses a simple list" $ do + unlines [ + " * one" + , " * two" + , " * three" + ] + `shouldParseTo` DocUnorderedList [ + DocParagraph "one" + , DocParagraph "two" + , DocParagraph "three" + ] + + it "ignores empty lines between list items" $ do + unlines [ + "* one" + , "" + , "* two" + ] + `shouldParseTo` DocUnorderedList [ + DocParagraph "one" + , DocParagraph "two" + ] + + it "accepts an empty list item" $ do + "*" `shouldParseTo` DocUnorderedList [DocParagraph DocEmpty] + + it "accepts multi-line list items" $ do + unlines [ + "* point one" + , " more one" + , "* point two" + , "more two" + ] + `shouldParseTo` DocUnorderedList [ + DocParagraph "point one\n more one" + , DocParagraph "point two\nmore two" + ] + + it "accepts markup in list items" $ do + "* /foo/" `shouldParseTo` DocUnorderedList [DocParagraph (DocEmphasis "foo")] + + it "requires empty lines between list and other paragraphs" $ do + unlines [ + "foo" + , "" + , "* bar" + , "" + , "baz" + ] + `shouldParseTo` DocParagraph "foo" <> DocUnorderedList [DocParagraph "bar"] <> DocParagraph "baz" + + context "when parsing ordered lists" $ do + it "parses a simple list" $ do + unlines [ + " 1. one" + , " (1) two" + , " 3. three" + ] + `shouldParseTo` DocOrderedList [ + DocParagraph "one" + , DocParagraph "two" + , DocParagraph "three" + ] + + it "ignores empty lines between list items" $ do + unlines [ + "1. one" + , "" + , "2. two" + ] + `shouldParseTo` DocOrderedList [ + DocParagraph "one" + , DocParagraph "two" + ] + + it "accepts an empty list item" $ do + "1." `shouldParseTo` DocOrderedList [DocParagraph DocEmpty] + + it "accepts multi-line list items" $ do + unlines [ + "1. point one" + , " more one" + , "1. point two" + , "more two" + ] + `shouldParseTo` DocOrderedList [ + DocParagraph "point one\n more one" + , DocParagraph "point two\nmore two" + ] + + it "accepts markup in list items" $ do + "1. /foo/" `shouldParseTo` DocOrderedList [DocParagraph (DocEmphasis "foo")] + + it "requires empty lines between list and other paragraphs" $ do + unlines [ + "foo" + , "" + , "1. bar" + , "" + , "baz" + ] + `shouldParseTo` DocParagraph "foo" <> DocOrderedList [DocParagraph "bar"] <> DocParagraph "baz" + + context "when parsing definition lists" $ do + it "parses a simple list" $ do + unlines [ + " [foo] one" + , " [bar] two" + , " [baz] three" + ] + `shouldParseTo` DocDefList [ + ("foo", "one") + , ("bar", "two") + , ("baz", "three") + ] + + it "ignores empty lines between list items" $ do + unlines [ + "[foo] one" + , "" + , "[bar] two" + ] + `shouldParseTo` DocDefList [ + ("foo", "one") + , ("bar", "two") + ] + + it "accepts an empty list item" $ do + "[foo]" `shouldParseTo` DocDefList [("foo", DocEmpty)] + + it "accepts multi-line list items" $ do + unlines [ + "[foo] point one" + , " more one" + , "[bar] point two" + , "more two" + ] + `shouldParseTo` DocDefList [ + ("foo", "point one\n more one") + , ("bar", "point two\nmore two") + ] + + it "accepts markup in list items" $ do + "[foo] /foo/" `shouldParseTo` DocDefList [("foo", DocEmphasis "foo")] + + it "accepts markup for the label" $ do + "[/foo/] bar" `shouldParseTo` DocDefList [(DocEmphasis "foo", "bar")] + + it "requires empty lines between list and other paragraphs" $ do + unlines [ + "foo" + , "" + , "[foo] bar" + , "" + , "baz" + ] + `shouldParseTo` DocParagraph "foo" <> DocDefList [("foo", "bar")] <> DocParagraph "baz" + + context "when parsing consecutive paragraphs" $ do + it "will not capture irrelevant consecutive lists" $ do + unlines [ " * bullet" + , "" + , "" + , " - different bullet" + , "" + , "" + , " (1) ordered" + , " " + , " 2. different bullet" + , " " + , " [cat] kitten" + , " " + , " [pineapple] fruit" + ] `shouldParseTo` + DocUnorderedList [ DocParagraph "bullet" + , DocParagraph "different bullet"] + <> DocOrderedList [ DocParagraph "ordered" + , DocParagraph "different bullet" + ] + <> DocDefList [ ("cat", "kitten") + , ("pineapple", "fruit") + ] + + context "when parsing function documentation headers" $ do + it "can parse a simple header" $ do + "= Header 1\nHello." `shouldParseTo` + DocParagraph (DocHeader (Header 1 "Header 1")) + <> DocParagraph "Hello." + + it "allow consecutive headers" $ do + "= Header 1\n== Header 2" `shouldParseTo` + DocParagraph (DocHeader (Header 1 "Header 1")) + <> DocParagraph (DocHeader (Header 2 "Header 2")) + + it "accepts markup in the header" $ do + "= /Header/ __1__\nFoo" `shouldParseTo` + DocParagraph (DocHeader + (Header 1 (DocEmphasis "Header" <> " " <> DocBold "1"))) + <> DocParagraph "Foo" diff --git a/haddock-library/test/Documentation/Haddock/Utf8Spec.hs b/haddock-library/test/Documentation/Haddock/Utf8Spec.hs new file mode 100644 index 00000000..47e12704 --- /dev/null +++ b/haddock-library/test/Documentation/Haddock/Utf8Spec.hs @@ -0,0 +1,14 @@ +module Documentation.Haddock.Utf8Spec (main, spec) where + +import Test.Hspec +import Test.QuickCheck +import Documentation.Haddock.Utf8 + +main :: IO () +main = hspec spec + +spec :: Spec +spec = do + describe "decodeUtf8" $ do + it "is inverse to encodeUtf8" $ do + property $ \xs -> (decodeUtf8 . encodeUtf8) xs `shouldBe` xs diff --git a/haddock-library/test/Spec.hs b/haddock-library/test/Spec.hs new file mode 100644 index 00000000..a824f8c3 --- /dev/null +++ b/haddock-library/test/Spec.hs @@ -0,0 +1 @@ +{-# OPTIONS_GHC -F -pgmF hspec-discover #-} diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs new file mode 100644 index 00000000..41b4ed30 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs @@ -0,0 +1,18 @@ +-- | +-- Module : Data.Attoparsec +-- Copyright : Bryan O'Sullivan 2007-2011 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Simple, efficient combinator parsing for 'ByteString' strings, +-- loosely based on the Parsec library. + +module Data.Attoparsec + ( + module Data.Attoparsec.ByteString + ) where + +import Data.Attoparsec.ByteString diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs new file mode 100644 index 00000000..d2f3761c --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs @@ -0,0 +1,205 @@ +-- | +-- Module : Data.Attoparsec.ByteString +-- Copyright : Bryan O'Sullivan 2007-2011 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Simple, efficient combinator parsing for 'B.ByteString' strings, +-- loosely based on the Parsec library. + +module Data.Attoparsec.ByteString + ( + -- * Differences from Parsec + -- $parsec + + -- * Incremental input + -- $incremental + + -- * Performance considerations + -- $performance + + -- * Parser types + I.Parser + , Result + , T.IResult(..) + , I.compareResults + + -- * Running parsers + , parse + , feed + , I.parseOnly + , parseWith + , parseTest + + -- ** Result conversion + , maybeResult + , eitherResult + + -- * Combinators + , (I.<?>) + , I.try + , module Data.Attoparsec.Combinator + + -- * Parsing individual bytes + , I.word8 + , I.anyWord8 + , I.notWord8 + , I.peekWord8 + , I.satisfy + , I.satisfyWith + , I.skip + + -- ** Byte classes + , I.inClass + , I.notInClass + + -- * Efficient string handling + , I.string + , I.skipWhile + , I.take + , I.scan + , I.takeWhile + , I.takeWhile1 + , I.takeTill + + -- ** Consume all remaining input + , I.takeByteString + , I.takeLazyByteString + + -- * State observation and manipulation functions + , I.endOfInput + , I.atEnd + ) where + +import Data.Attoparsec.Combinator +import qualified Data.Attoparsec.ByteString.Internal as I +import qualified Data.Attoparsec.Internal as I +import qualified Data.ByteString as B +import Data.Attoparsec.ByteString.Internal (Result, parse) +import qualified Data.Attoparsec.Internal.Types as T + +-- $parsec +-- +-- Compared to Parsec 3, Attoparsec makes several tradeoffs. It is +-- not intended for, or ideal for, all possible uses. +-- +-- * While Attoparsec can consume input incrementally, Parsec cannot. +-- Incremental input is a huge deal for efficient and secure network +-- and system programming, since it gives much more control to users +-- of the library over matters such as resource usage and the I/O +-- model to use. +-- +-- * Much of the performance advantage of Attoparsec is gained via +-- high-performance parsers such as 'I.takeWhile' and 'I.string'. +-- If you use complicated combinators that return lists of bytes or +-- characters, there is less performance difference between the two +-- libraries. +-- +-- * Unlike Parsec 3, Attoparsec does not support being used as a +-- monad transformer. +-- +-- * Attoparsec is specialised to deal only with strict 'B.ByteString' +-- input. Efficiency concerns rule out both lists and lazy +-- bytestrings. The usual use for lazy bytestrings would be to +-- allow consumption of very large input without a large footprint. +-- For this need, Attoparsec's incremental input provides an +-- excellent substitute, with much more control over when input +-- takes place. If you must use lazy bytestrings, see the 'Lazy' +-- module, which feeds lazy chunks to a regular parser. +-- +-- * Parsec parsers can produce more helpful error messages than +-- Attoparsec parsers. This is a matter of focus: Attoparsec avoids +-- the extra book-keeping in favour of higher performance. + +-- $incremental +-- +-- Attoparsec supports incremental input, meaning that you can feed it +-- a bytestring that represents only part of the expected total amount +-- of data to parse. If your parser reaches the end of a fragment of +-- input and could consume more input, it will suspend parsing and +-- return a 'T.Partial' continuation. +-- +-- Supplying the 'T.Partial' continuation with another bytestring will +-- resume parsing at the point where it was suspended. You must be +-- prepared for the result of the resumed parse to be another +-- 'T.Partial' continuation. +-- +-- To indicate that you have no more input, supply the 'T.Partial' +-- continuation with an empty bytestring. +-- +-- Remember that some parsing combinators will not return a result +-- until they reach the end of input. They may thus cause 'T.Partial' +-- results to be returned. +-- +-- If you do not need support for incremental input, consider using +-- the 'I.parseOnly' function to run your parser. It will never +-- prompt for more input. + +-- $performance +-- +-- If you write an Attoparsec-based parser carefully, it can be +-- realistic to expect it to perform within a factor of 2 of a +-- hand-rolled C parser (measuring megabytes parsed per second). +-- +-- To actually achieve high performance, there are a few guidelines +-- that it is useful to follow. +-- +-- Use the 'B.ByteString'-oriented parsers whenever possible, +-- e.g. 'I.takeWhile1' instead of 'many1' 'I.anyWord8'. There is +-- about a factor of 100 difference in performance between the two +-- kinds of parser. +-- +-- For very simple byte-testing predicates, write them by hand instead +-- of using 'I.inClass' or 'I.notInClass'. For instance, both of +-- these predicates test for an end-of-line byte, but the first is +-- much faster than the second: +-- +-- >endOfLine_fast w = w == 13 || w == 10 +-- >endOfLine_slow = inClass "\r\n" +-- +-- Make active use of benchmarking and profiling tools to measure, +-- find the problems with, and improve the performance of your parser. + +-- | If a parser has returned a 'T.Partial' result, supply it with more +-- input. +feed :: Result r -> B.ByteString -> Result r +feed f@(T.Fail _ _ _) _ = f +feed (T.Partial k) d = k d +feed (T.Done bs r) d = T.Done (B.append bs d) r +{-# INLINE feed #-} + +-- | Run a parser and print its result to standard output. +parseTest :: (Show a) => I.Parser a -> B.ByteString -> IO () +parseTest p s = print (parse p s) + +-- | Run a parser with an initial input string, and a monadic action +-- that can supply more input if needed. +parseWith :: Monad m => + (m B.ByteString) + -- ^ An action that will be executed to provide the parser + -- with more input, if necessary. The action must return an + -- 'B.empty' string when there is no more input available. + -> I.Parser a + -> B.ByteString + -- ^ Initial input for the parser. + -> m (Result a) +parseWith refill p s = step $ parse p s + where step (T.Partial k) = (step . k) =<< refill + step r = return r +{-# INLINE parseWith #-} + +-- | Convert a 'Result' value to a 'Maybe' value. A 'T.Partial' result +-- is treated as failure. +maybeResult :: Result r -> Maybe r +maybeResult (T.Done _ r) = Just r +maybeResult _ = Nothing + +-- | Convert a 'Result' value to an 'Either' value. A 'T.Partial' +-- result is treated as failure. +eitherResult :: Result r -> Either String r +eitherResult (T.Done _ r) = Right r +eitherResult (T.Fail _ _ msg) = Left msg +eitherResult _ = Left "Result: incomplete input" diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs new file mode 100644 index 00000000..3bbe51f0 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs @@ -0,0 +1,549 @@ +{-# LANGUAGE BangPatterns, FlexibleInstances, TypeFamilies, + TypeSynonymInstances, GADTs #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +-- | +-- Module : Data.Attoparsec.ByteString.Char8 +-- Copyright : Bryan O'Sullivan 2007-2011 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Simple, efficient, character-oriented combinator parsing for +-- 'B.ByteString' strings, loosely based on the Parsec library. + +module Data.Attoparsec.ByteString.Char8 + ( + -- * Character encodings + -- $encodings + + -- * Parser types + Parser + , A.Result + , A.IResult(..) + , I.compareResults + + -- * Running parsers + , A.parse + , A.feed + , A.parseOnly + , A.parseTest + , A.parseWith + + -- ** Result conversion + , A.maybeResult + , A.eitherResult + + -- * Combinators + , (I.<?>) + , I.try + , module Data.Attoparsec.Combinator + + -- * Parsing individual characters + , char + , char8 + , anyChar + , notChar + , peekChar + , satisfy + + -- ** Special character parsers + , digit + , letter_iso8859_15 + , letter_ascii + , space + + -- ** Fast predicates + , isDigit + , isDigit_w8 + , isAlpha_iso8859_15 + , isAlpha_ascii + , isSpace + , isSpace_w8 + + -- *** Character classes + , inClass + , notInClass + + -- * Efficient string handling + , I.string + , stringCI + , skipSpace + , skipWhile + , I.take + , scan + , takeWhile + , takeWhile1 + , takeTill + + -- ** String combinators + -- $specalt + , (.*>) + , (<*.) + + -- ** Consume all remaining input + , I.takeByteString + , I.takeLazyByteString + + -- * Text parsing + , I.endOfLine + , isEndOfLine + , isHorizontalSpace + + -- * Numeric parsers + , decimal + , hexadecimal + , signed + , double + , Number(..) + , number + , rational + + -- * State observation and manipulation functions + , I.endOfInput + , I.atEnd + ) where + +import Control.Applicative ((*>), (<*), (<$>), (<|>)) +import Data.Attoparsec.ByteString.FastSet (charClass, memberChar) +import Data.Attoparsec.ByteString.Internal (Parser, (<?>)) +import Data.Attoparsec.Combinator +import Data.Attoparsec.Number (Number(..)) +import Data.Bits (Bits, (.|.), shiftL) +import Data.ByteString.Internal (c2w, w2c) +import Data.Int (Int8, Int16, Int32, Int64) +import Data.Ratio ((%)) +import Data.String (IsString(..)) +import Data.Word (Word8, Word16, Word32, Word64, Word) +import Prelude hiding (takeWhile) +import qualified Data.Attoparsec.ByteString as A +import qualified Data.Attoparsec.ByteString.Internal as I +import qualified Data.Attoparsec.Internal as I +import qualified Data.ByteString as B8 +import qualified Data.ByteString.Char8 as B + +instance (a ~ B.ByteString) => IsString (Parser a) where + fromString = I.string . B.pack + +-- $encodings +-- +-- This module is intended for parsing text that is +-- represented using an 8-bit character set, e.g. ASCII or +-- ISO-8859-15. It /does not/ make any attempt to deal with character +-- encodings, multibyte characters, or wide characters. In +-- particular, all attempts to use characters above code point U+00FF +-- will give wrong answers. +-- +-- Code points below U+0100 are simply translated to and from their +-- numeric values, so e.g. the code point U+00A4 becomes the byte +-- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic +-- currency sign in ISO-8859-1). Haskell 'Char' values above U+00FF +-- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@. + +-- ASCII-specific but fast, oh yes. +toLower :: Word8 -> Word8 +toLower w | w >= 65 && w <= 90 = w + 32 + | otherwise = w + +-- | Satisfy a literal string, ignoring case. +stringCI :: B.ByteString -> Parser B.ByteString +stringCI = I.stringTransform (B8.map toLower) +{-# INLINE stringCI #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser requires the predicate to succeed on at least one byte +-- of input: it will fail if the predicate never returns 'True' or if +-- there is no input left. +takeWhile1 :: (Char -> Bool) -> Parser B.ByteString +takeWhile1 p = I.takeWhile1 (p . w2c) +{-# INLINE takeWhile1 #-} + +-- | The parser @satisfy p@ succeeds for any byte for which the +-- predicate @p@ returns 'True'. Returns the byte that is actually +-- parsed. +-- +-- >digit = satisfy isDigit +-- > where isDigit c = c >= '0' && c <= '9' +satisfy :: (Char -> Bool) -> Parser Char +satisfy = I.satisfyWith w2c +{-# INLINE satisfy #-} + +-- | Match a letter, in the ISO-8859-15 encoding. +letter_iso8859_15 :: Parser Char +letter_iso8859_15 = satisfy isAlpha_iso8859_15 <?> "letter_iso8859_15" +{-# INLINE letter_iso8859_15 #-} + +-- | Match a letter, in the ASCII encoding. +letter_ascii :: Parser Char +letter_ascii = satisfy isAlpha_ascii <?> "letter_ascii" +{-# INLINE letter_ascii #-} + +-- | A fast alphabetic predicate for the ISO-8859-15 encoding +-- +-- /Note/: For all character encodings other than ISO-8859-15, and +-- almost all Unicode code points above U+00A3, this predicate gives +-- /wrong answers/. +isAlpha_iso8859_15 :: Char -> Bool +isAlpha_iso8859_15 c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '\166' && moby c) + where moby = notInClass "\167\169\171-\179\182\183\185\187\191\215\247" + {-# NOINLINE moby #-} +{-# INLINE isAlpha_iso8859_15 #-} + +-- | A fast alphabetic predicate for the ASCII encoding +-- +-- /Note/: For all character encodings other than ASCII, and +-- almost all Unicode code points above U+007F, this predicate gives +-- /wrong answers/. +isAlpha_ascii :: Char -> Bool +isAlpha_ascii c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') +{-# INLINE isAlpha_ascii #-} + +-- | Parse a single digit. +digit :: Parser Char +digit = satisfy isDigit <?> "digit" +{-# INLINE digit #-} + +-- | A fast digit predicate. +isDigit :: Char -> Bool +isDigit c = c >= '0' && c <= '9' +{-# INLINE isDigit #-} + +-- | A fast digit predicate. +isDigit_w8 :: Word8 -> Bool +isDigit_w8 w = w >= 48 && w <= 57 +{-# INLINE isDigit_w8 #-} + +-- | Match any character. +anyChar :: Parser Char +anyChar = satisfy $ const True +{-# INLINE anyChar #-} + +-- | Match any character. Returns 'Nothing' if end of input has been +-- reached. Does not consume any input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +peekChar :: Parser (Maybe Char) +peekChar = (fmap w2c) `fmap` I.peekWord8 +{-# INLINE peekChar #-} + +-- | Fast predicate for matching ASCII space characters. +-- +-- /Note/: This predicate only gives correct answers for the ASCII +-- encoding. For instance, it does not recognise U+00A0 (non-breaking +-- space) as a space character, even though it is a valid ISO-8859-15 +-- byte. For a Unicode-aware and only slightly slower predicate, +-- use 'Data.Char.isSpace' +isSpace :: Char -> Bool +isSpace c = (c == ' ') || ('\t' <= c && c <= '\r') +{-# INLINE isSpace #-} + +-- | Fast 'Word8' predicate for matching ASCII space characters. +isSpace_w8 :: Word8 -> Bool +isSpace_w8 w = (w == 32) || (9 <= w && w <= 13) +{-# INLINE isSpace_w8 #-} + + +-- | Parse a space character. +-- +-- /Note/: This parser only gives correct answers for the ASCII +-- encoding. For instance, it does not recognise U+00A0 (non-breaking +-- space) as a space character, even though it is a valid ISO-8859-15 +-- byte. +space :: Parser Char +space = satisfy isSpace <?> "space" +{-# INLINE space #-} + +-- | Match a specific character. +char :: Char -> Parser Char +char c = satisfy (== c) <?> [c] +{-# INLINE char #-} + +-- | Match a specific character, but return its 'Word8' value. +char8 :: Char -> Parser Word8 +char8 c = I.satisfy (== c2w c) <?> [c] +{-# INLINE char8 #-} + +-- | Match any character except the given one. +notChar :: Char -> Parser Char +notChar c = satisfy (/= c) <?> "not " ++ [c] +{-# INLINE notChar #-} + +-- | Match any character in a set. +-- +-- >vowel = inClass "aeiou" +-- +-- Range notation is supported. +-- +-- >halfAlphabet = inClass "a-nA-N" +-- +-- To add a literal \'-\' to a set, place it at the beginning or end +-- of the string. +inClass :: String -> Char -> Bool +inClass s = (`memberChar` mySet) + where mySet = charClass s +{-# INLINE inClass #-} + +-- | Match any character not in a set. +notInClass :: String -> Char -> Bool +notInClass s = not . inClass s +{-# INLINE notInClass #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser does not fail. It will return an empty string if the +-- predicate returns 'False' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +takeWhile :: (Char -> Bool) -> Parser B.ByteString +takeWhile p = I.takeWhile (p . w2c) +{-# INLINE takeWhile #-} + +-- | A stateful scanner. The predicate consumes and transforms a +-- state argument, and each transformed state is passed to successive +-- invocations of the predicate on each byte of the input until one +-- returns 'Nothing' or the input ends. +-- +-- This parser does not fail. It will return an empty string if the +-- predicate returns 'Nothing' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +scan :: s -> (s -> Char -> Maybe s) -> Parser B.ByteString +scan s0 p = I.scan s0 (\s -> p s . w2c) +{-# INLINE scan #-} + +-- | Consume input as long as the predicate returns 'False' +-- (i.e. until it returns 'True'), and return the consumed input. +-- +-- This parser does not fail. It will return an empty string if the +-- predicate returns 'True' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +takeTill :: (Char -> Bool) -> Parser B.ByteString +takeTill p = I.takeTill (p . w2c) +{-# INLINE takeTill #-} + +-- | Skip past input for as long as the predicate returns 'True'. +skipWhile :: (Char -> Bool) -> Parser () +skipWhile p = I.skipWhile (p . w2c) +{-# INLINE skipWhile #-} + +-- | Skip over white space. +skipSpace :: Parser () +skipSpace = I.skipWhile isSpace_w8 +{-# INLINE skipSpace #-} + +-- $specalt +-- +-- The '.*>' and '<*.' combinators are intended for use with the +-- @OverloadedStrings@ language extension. They simplify the common +-- task of matching a statically known string, then immediately +-- parsing something else. +-- +-- An example makes this easier to understand: +-- +-- @{-\# LANGUAGE OverloadedStrings #-} +-- +-- shoeSize = \"Shoe size: \" '.*>' 'decimal' +-- @ +-- +-- If we were to try to use '*>' above instead, the type checker would +-- not be able to tell which 'IsString' instance to use for the text +-- in quotes. We would have to be explicit, using either a type +-- signature or the 'I.string' parser. + +-- | Type-specialized version of '*>' for 'B.ByteString'. +(.*>) :: B.ByteString -> Parser a -> Parser a +s .*> f = I.string s *> f + +-- | Type-specialized version of '<*' for 'B.ByteString'. +(<*.) :: Parser a -> B.ByteString -> Parser a +f <*. s = f <* I.string s + +-- | A predicate that matches either a carriage return @\'\\r\'@ or +-- newline @\'\\n\'@ character. +isEndOfLine :: Word8 -> Bool +isEndOfLine w = w == 13 || w == 10 +{-# INLINE isEndOfLine #-} + +-- | A predicate that matches either a space @\' \'@ or horizontal tab +-- @\'\\t\'@ character. +isHorizontalSpace :: Word8 -> Bool +isHorizontalSpace w = w == 32 || w == 9 +{-# INLINE isHorizontalSpace #-} + +-- | Parse and decode an unsigned hexadecimal number. The hex digits +-- @\'a\'@ through @\'f\'@ may be upper or lower case. +-- +-- This parser does not accept a leading @\"0x\"@ string. +hexadecimal :: (Integral a, Bits a) => Parser a +hexadecimal = B8.foldl' step 0 `fmap` I.takeWhile1 isHexDigit + where + isHexDigit w = (w >= 48 && w <= 57) || + (w >= 97 && w <= 102) || + (w >= 65 && w <= 70) + step a w | w >= 48 && w <= 57 = (a `shiftL` 4) .|. fromIntegral (w - 48) + | w >= 97 = (a `shiftL` 4) .|. fromIntegral (w - 87) + | otherwise = (a `shiftL` 4) .|. fromIntegral (w - 55) +{-# SPECIALISE hexadecimal :: Parser Int #-} +{-# SPECIALISE hexadecimal :: Parser Int8 #-} +{-# SPECIALISE hexadecimal :: Parser Int16 #-} +{-# SPECIALISE hexadecimal :: Parser Int32 #-} +{-# SPECIALISE hexadecimal :: Parser Int64 #-} +{-# SPECIALISE hexadecimal :: Parser Integer #-} +{-# SPECIALISE hexadecimal :: Parser Word #-} +{-# SPECIALISE hexadecimal :: Parser Word8 #-} +{-# SPECIALISE hexadecimal :: Parser Word16 #-} +{-# SPECIALISE hexadecimal :: Parser Word32 #-} +{-# SPECIALISE hexadecimal :: Parser Word64 #-} + +-- | Parse and decode an unsigned decimal number. +decimal :: Integral a => Parser a +decimal = B8.foldl' step 0 `fmap` I.takeWhile1 isDig + where isDig w = w >= 48 && w <= 57 + step a w = a * 10 + fromIntegral (w - 48) +{-# SPECIALISE decimal :: Parser Int #-} +{-# SPECIALISE decimal :: Parser Int8 #-} +{-# SPECIALISE decimal :: Parser Int16 #-} +{-# SPECIALISE decimal :: Parser Int32 #-} +{-# SPECIALISE decimal :: Parser Int64 #-} +{-# SPECIALISE decimal :: Parser Integer #-} +{-# SPECIALISE decimal :: Parser Word #-} +{-# SPECIALISE decimal :: Parser Word8 #-} +{-# SPECIALISE decimal :: Parser Word16 #-} +{-# SPECIALISE decimal :: Parser Word32 #-} +{-# SPECIALISE decimal :: Parser Word64 #-} + +-- | Parse a number with an optional leading @\'+\'@ or @\'-\'@ sign +-- character. +signed :: Num a => Parser a -> Parser a +{-# SPECIALISE signed :: Parser Int -> Parser Int #-} +{-# SPECIALISE signed :: Parser Int8 -> Parser Int8 #-} +{-# SPECIALISE signed :: Parser Int16 -> Parser Int16 #-} +{-# SPECIALISE signed :: Parser Int32 -> Parser Int32 #-} +{-# SPECIALISE signed :: Parser Int64 -> Parser Int64 #-} +{-# SPECIALISE signed :: Parser Integer -> Parser Integer #-} +signed p = (negate <$> (char8 '-' *> p)) + <|> (char8 '+' *> p) + <|> p + +-- | Parse a rational number. +-- +-- This parser accepts an optional leading sign character, followed by +-- at least one decimal digit. The syntax similar to that accepted by +-- the 'read' function, with the exception that a trailing @\'.\'@ or +-- @\'e\'@ /not/ followed by a number is not consumed. +-- +-- Examples with behaviour identical to 'read', if you feed an empty +-- continuation to the first result: +-- +-- >rational "3" == Done 3.0 "" +-- >rational "3.1" == Done 3.1 "" +-- >rational "3e4" == Done 30000.0 "" +-- >rational "3.1e4" == Done 31000.0, "" +-- +-- Examples with behaviour identical to 'read': +-- +-- >rational ".3" == Fail "input does not start with a digit" +-- >rational "e3" == Fail "input does not start with a digit" +-- +-- Examples of differences from 'read': +-- +-- >rational "3.foo" == Done 3.0 ".foo" +-- >rational "3e" == Done 3.0 "e" +-- +-- This function does not accept string representations of \"NaN\" or +-- \"Infinity\". +rational :: Fractional a => Parser a +{-# SPECIALIZE rational :: Parser Double #-} +{-# SPECIALIZE rational :: Parser Float #-} +{-# SPECIALIZE rational :: Parser Rational #-} +rational = floaty $ \real frac fracDenom -> fromRational $ + real % 1 + frac % fracDenom + +-- | Parse a rational number. +-- +-- The syntax accepted by this parser is the same as for 'rational'. +-- +-- /Note/: This function is almost ten times faster than 'rational', +-- but is slightly less accurate. +-- +-- The 'Double' type supports about 16 decimal places of accuracy. +-- For 94.2% of numbers, this function and 'rational' give identical +-- results, but for the remaining 5.8%, this function loses precision +-- around the 15th decimal place. For 0.001% of numbers, this +-- function will lose precision at the 13th or 14th decimal place. +-- +-- This function does not accept string representations of \"NaN\" or +-- \"Infinity\". +double :: Parser Double +double = floaty asDouble + +asDouble :: Integer -> Integer -> Integer -> Double +asDouble real frac fracDenom = + fromIntegral real + fromIntegral frac / fromIntegral fracDenom +{-# INLINE asDouble #-} + +-- | Parse a number, attempting to preserve both speed and precision. +-- +-- The syntax accepted by this parser is the same as for 'rational'. +-- +-- /Note/: This function is almost ten times faster than 'rational'. +-- On integral inputs, it gives perfectly accurate answers, and on +-- floating point inputs, it is slightly less accurate than +-- 'rational'. +-- +-- This function does not accept string representations of \"NaN\" or +-- \"Infinity\". +number :: Parser Number +number = floaty $ \real frac fracDenom -> + if frac == 0 && fracDenom == 0 + then I real + else D (asDouble real frac fracDenom) +{-# INLINE number #-} + +data T = T !Integer !Int + +floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Parser a +{-# INLINE floaty #-} +floaty f = do + let minus = 45 + plus = 43 + !positive <- ((== plus) <$> I.satisfy (\c -> c == minus || c == plus)) <|> + return True + real <- decimal + let tryFraction = do + let dot = 46 + _ <- I.satisfy (==dot) + ds <- I.takeWhile isDigit_w8 + case I.parseOnly decimal ds of + Right n -> return $ T n (B.length ds) + _ -> fail "no digits after decimal" + T fraction fracDigits <- tryFraction <|> return (T 0 0) + let littleE = 101 + bigE = 69 + e w = w == littleE || w == bigE + power <- (I.satisfy e *> signed decimal) <|> return (0::Int) + let n = if fracDigits == 0 + then if power == 0 + then fromIntegral real + else fromIntegral real * (10 ^^ power) + else if power == 0 + then f real fraction (10 ^ fracDigits) + else f real fraction (10 ^ fracDigits) * (10 ^^ power) + return $ if positive + then n + else -n diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs new file mode 100644 index 00000000..73d02056 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs @@ -0,0 +1,115 @@ +{-# LANGUAGE BangPatterns, MagicHash #-} + +----------------------------------------------------------------------------- +-- | +-- Module : Data.Attoparsec.ByteString.FastSet +-- Copyright : Bryan O'Sullivan 2008 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Fast set membership tests for 'Word8' and 8-bit 'Char' values. The +-- set representation is unboxed for efficiency. For small sets, we +-- test for membership using a binary search. For larger sets, we use +-- a lookup table. +-- +----------------------------------------------------------------------------- +module Data.Attoparsec.ByteString.FastSet + ( + -- * Data type + FastSet + -- * Construction + , fromList + , set + -- * Lookup + , memberChar + , memberWord8 + -- * Debugging + , fromSet + -- * Handy interface + , charClass + ) where + +import Data.Bits ((.&.), (.|.)) +import Foreign.Storable (peekByteOff, pokeByteOff) +import GHC.Base (Int(I#), iShiftRA#, narrow8Word#, shiftL#) +import GHC.Word (Word8(W8#)) +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as B8 +import qualified Data.ByteString.Internal as I +import qualified Data.ByteString.Unsafe as U + +data FastSet = Sorted { fromSet :: !B.ByteString } + | Table { fromSet :: !B.ByteString } + deriving (Eq, Ord) + +instance Show FastSet where + show (Sorted s) = "FastSet Sorted " ++ show (B8.unpack s) + show (Table _) = "FastSet Table" + +-- | The lower bound on the size of a lookup table. We choose this to +-- balance table density against performance. +tableCutoff :: Int +tableCutoff = 8 + +-- | Create a set. +set :: B.ByteString -> FastSet +set s | B.length s < tableCutoff = Sorted . B.sort $ s + | otherwise = Table . mkTable $ s + +fromList :: [Word8] -> FastSet +fromList = set . B.pack + +data I = I {-# UNPACK #-} !Int {-# UNPACK #-} !Word8 + +shiftR :: Int -> Int -> Int +shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#) + +shiftL :: Word8 -> Int -> Word8 +shiftL (W8# x#) (I# i#) = W8# (narrow8Word# (x# `shiftL#` i#)) + +index :: Int -> I +index i = I (i `shiftR` 3) (1 `shiftL` (i .&. 7)) +{-# INLINE index #-} + +-- | Check the set for membership. +memberWord8 :: Word8 -> FastSet -> Bool +memberWord8 w (Table t) = + let I byte bit = index (fromIntegral w) + in U.unsafeIndex t byte .&. bit /= 0 +memberWord8 w (Sorted s) = search 0 (B.length s - 1) + where search lo hi + | hi < lo = False + | otherwise = + let mid = (lo + hi) `div` 2 + in case compare w (U.unsafeIndex s mid) of + GT -> search (mid + 1) hi + LT -> search lo (mid - 1) + _ -> True + +-- | Check the set for membership. Only works with 8-bit characters: +-- characters above code point 255 will give wrong answers. +memberChar :: Char -> FastSet -> Bool +memberChar c = memberWord8 (I.c2w c) +{-# INLINE memberChar #-} + +mkTable :: B.ByteString -> B.ByteString +mkTable s = I.unsafeCreate 32 $ \t -> do + _ <- I.memset t 0 32 + U.unsafeUseAsCStringLen s $ \(p, l) -> + let loop n | n == l = return () + | otherwise = do + c <- peekByteOff p n :: IO Word8 + let I byte bit = index (fromIntegral c) + prev <- peekByteOff t byte :: IO Word8 + pokeByteOff t byte (prev .|. bit) + loop (n + 1) + in loop 0 + +charClass :: String -> FastSet +charClass = set . B8.pack . go + where go (a:'-':b:xs) = [a..b] ++ go xs + go (x:xs) = x : go xs + go _ = "" diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs new file mode 100644 index 00000000..b3699728 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs @@ -0,0 +1,516 @@ +{-# LANGUAGE BangPatterns, CPP, Rank2Types, OverloadedStrings, + RecordWildCards, MagicHash, UnboxedTuples #-} +-- | +-- Module : Data.Attoparsec.ByteString.Internal +-- Copyright : Bryan O'Sullivan 2007-2011 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Simple, efficient parser combinators for 'B.ByteString' strings, +-- loosely based on the Parsec library. + +module Data.Attoparsec.ByteString.Internal + ( + -- * Parser types + Parser + , Result + + -- * Running parsers + , parse + , parseOnly + + -- * Combinators + , (<?>) + , try + , module Data.Attoparsec.Combinator + + -- * Parsing individual bytes + , satisfy + , satisfyWith + , anyWord8 + , skip + , word8 + , notWord8 + , peekWord8 + + -- ** Byte classes + , inClass + , notInClass + + -- * Parsing more complicated structures + , storable + + -- * Efficient string handling + , skipWhile + , string + , stringTransform + , take + , scan + , takeWhile + , takeWhile1 + , takeTill + + -- ** Consume all remaining input + , takeByteString + , takeLazyByteString + + -- * State observation and manipulation functions + , endOfInput + , atEnd + + -- * Utilities + , endOfLine + ) where + +import Control.Applicative ((<|>), (<$>)) +import Control.Monad (when) +import Data.Attoparsec.ByteString.FastSet (charClass, memberWord8) +import Data.Attoparsec.Combinator +import Data.Attoparsec.Internal.Types + hiding (Parser, Input, Added, Failure, Success) +import Data.Monoid (Monoid(..)) +import Data.Word (Word8) +import Foreign.ForeignPtr (withForeignPtr) +import Foreign.Ptr (castPtr, minusPtr, plusPtr) +import Foreign.Storable (Storable(peek, sizeOf)) +import Prelude hiding (getChar, take, takeWhile) +import qualified Data.Attoparsec.Internal.Types as T +import qualified Data.ByteString as B8 +import qualified Data.ByteString.Char8 as B +import qualified Data.ByteString.Internal as B +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString.Unsafe as B + +#if defined(__GLASGOW_HASKELL__) +import GHC.Base (realWorld#) +import GHC.IO (IO(IO)) +#else +import System.IO.Unsafe (unsafePerformIO) +#endif + +type Parser = T.Parser B.ByteString +type Result = IResult B.ByteString +type Input = T.Input B.ByteString +type Added = T.Added B.ByteString +type Failure r = T.Failure B.ByteString r +type Success a r = T.Success B.ByteString a r + +ensure' :: Int -> Input -> Added -> More -> Failure r -> Success B.ByteString r + -> IResult B.ByteString r +ensure' !n0 i0 a0 m0 kf0 ks0 = + T.runParser (demandInput >> go n0) i0 a0 m0 kf0 ks0 + where + go !n = T.Parser $ \i a m kf ks -> + if B.length (unI i) >= n + then ks i a m (unI i) + else T.runParser (demandInput >> go n) i a m kf ks + +-- | If at least @n@ bytes of input are available, return the current +-- input, otherwise fail. +ensure :: Int -> Parser B.ByteString +ensure !n = T.Parser $ \i0 a0 m0 kf ks -> + if B.length (unI i0) >= n + then ks i0 a0 m0 (unI i0) + -- The uncommon case is kept out-of-line to reduce code size: + else ensure' n i0 a0 m0 kf ks +-- Non-recursive so the bounds check can be inlined: +{-# INLINE ensure #-} + +-- | Ask for input. If we receive any, pass it to a success +-- continuation, otherwise to a failure continuation. +prompt :: Input -> Added -> More + -> (Input -> Added -> More -> Result r) + -> (Input -> Added -> More -> Result r) + -> Result r +prompt i0 a0 _m0 kf ks = Partial $ \s -> + if B.null s + then kf i0 a0 Complete + else ks (i0 <> I s) (a0 <> A s) Incomplete + +-- | Immediately demand more input via a 'Partial' continuation +-- result. +demandInput :: Parser () +demandInput = T.Parser $ \i0 a0 m0 kf ks -> + if m0 == Complete + then kf i0 a0 m0 ["demandInput"] "not enough bytes" + else let kf' i a m = kf i a m ["demandInput"] "not enough bytes" + ks' i a m = ks i a m () + in prompt i0 a0 m0 kf' ks' + +-- | This parser always succeeds. It returns 'True' if any input is +-- available either immediately or on demand, and 'False' if the end +-- of all input has been reached. +wantInput :: Parser Bool +wantInput = T.Parser $ \i0 a0 m0 _kf ks -> + case () of + _ | not (B.null (unI i0)) -> ks i0 a0 m0 True + | m0 == Complete -> ks i0 a0 m0 False + | otherwise -> let kf' i a m = ks i a m False + ks' i a m = ks i a m True + in prompt i0 a0 m0 kf' ks' + +get :: Parser B.ByteString +get = T.Parser $ \i0 a0 m0 _kf ks -> ks i0 a0 m0 (unI i0) + +put :: B.ByteString -> Parser () +put s = T.Parser $ \_i0 a0 m0 _kf ks -> ks (I s) a0 m0 () + +-- | Attempt a parse, and if it fails, rewind the input so that no +-- input appears to have been consumed. +-- +-- This combinator is provided for compatibility with Parsec. +-- Attoparsec parsers always backtrack on failure. +try :: Parser a -> Parser a +try p = p +{-# INLINE try #-} + +-- | The parser @satisfy p@ succeeds for any byte for which the +-- predicate @p@ returns 'True'. Returns the byte that is actually +-- parsed. +-- +-- >digit = satisfy isDigit +-- > where isDigit w = w >= 48 && w <= 57 +satisfy :: (Word8 -> Bool) -> Parser Word8 +satisfy p = do + s <- ensure 1 + let !w = B.unsafeHead s + if p w + then put (B.unsafeTail s) >> return w + else fail "satisfy" +{-# INLINE satisfy #-} + +-- | The parser @skip p@ succeeds for any byte for which the predicate +-- @p@ returns 'True'. +-- +-- >skipDigit = skip isDigit +-- > where isDigit w = w >= 48 && w <= 57 +skip :: (Word8 -> Bool) -> Parser () +skip p = do + s <- ensure 1 + if p (B.unsafeHead s) + then put (B.unsafeTail s) + else fail "skip" + +-- | The parser @satisfyWith f p@ transforms a byte, and succeeds if +-- the predicate @p@ returns 'True' on the transformed value. The +-- parser returns the transformed byte that was parsed. +satisfyWith :: (Word8 -> a) -> (a -> Bool) -> Parser a +satisfyWith f p = do + s <- ensure 1 + let c = f $! B.unsafeHead s + if p c + then let !t = B.unsafeTail s + in put t >> return c + else fail "satisfyWith" +{-# INLINE satisfyWith #-} + +storable :: Storable a => Parser a +storable = hack undefined + where + hack :: Storable b => b -> Parser b + hack dummy = do + (fp,o,_) <- B.toForeignPtr `fmap` take (sizeOf dummy) + return . B.inlinePerformIO . withForeignPtr fp $ \p -> + peek (castPtr $ p `plusPtr` o) + +-- | Consume @n@ bytes of input, but succeed only if the predicate +-- returns 'True'. +takeWith :: Int -> (B.ByteString -> Bool) -> Parser B.ByteString +takeWith n0 p = do + let n = max n0 0 + s <- ensure n + let h = B.unsafeTake n s + t = B.unsafeDrop n s + if p h + then put t >> return h + else fail "takeWith" + +-- | Consume exactly @n@ bytes of input. +take :: Int -> Parser B.ByteString +take n = takeWith n (const True) +{-# INLINE take #-} + +-- | @string s@ parses a sequence of bytes that identically match +-- @s@. Returns the parsed string (i.e. @s@). This parser consumes no +-- input if it fails (even if a partial match). +-- +-- /Note/: The behaviour of this parser is different to that of the +-- similarly-named parser in Parsec, as this one is all-or-nothing. +-- To illustrate the difference, the following parser will fail under +-- Parsec given an input of @\"for\"@: +-- +-- >string "foo" <|> string "for" +-- +-- The reason for its failure is that the first branch is a +-- partial match, and will consume the letters @\'f\'@ and @\'o\'@ +-- before failing. In Attoparsec, the above parser will /succeed/ on +-- that input, because the failed first branch will consume nothing. +string :: B.ByteString -> Parser B.ByteString +string s = takeWith (B.length s) (==s) +{-# INLINE string #-} + +stringTransform :: (B.ByteString -> B.ByteString) -> B.ByteString + -> Parser B.ByteString +stringTransform f s = takeWith (B.length s) ((==f s) . f) +{-# INLINE stringTransform #-} + +-- | Skip past input for as long as the predicate returns 'True'. +skipWhile :: (Word8 -> Bool) -> Parser () +skipWhile p = go + where + go = do + t <- B8.dropWhile p <$> get + put t + when (B.null t) $ do + input <- wantInput + when input go +{-# INLINE skipWhile #-} + +-- | Consume input as long as the predicate returns 'False' +-- (i.e. until it returns 'True'), and return the consumed input. +-- +-- This parser does not fail. It will return an empty string if the +-- predicate returns 'True' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +takeTill :: (Word8 -> Bool) -> Parser B.ByteString +takeTill p = takeWhile (not . p) +{-# INLINE takeTill #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser does not fail. It will return an empty string if the +-- predicate returns 'False' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +takeWhile :: (Word8 -> Bool) -> Parser B.ByteString +takeWhile p = (B.concat . reverse) `fmap` go [] + where + go acc = do + (h,t) <- B8.span p <$> get + put t + if B.null t + then do + input <- wantInput + if input + then go (h:acc) + else return (h:acc) + else return (h:acc) +{-# INLINE takeWhile #-} + +takeRest :: Parser [B.ByteString] +takeRest = go [] + where + go acc = do + input <- wantInput + if input + then do + s <- get + put B.empty + go (s:acc) + else return (reverse acc) + +-- | Consume all remaining input and return it as a single string. +takeByteString :: Parser B.ByteString +takeByteString = B.concat `fmap` takeRest + +-- | Consume all remaining input and return it as a single string. +takeLazyByteString :: Parser L.ByteString +takeLazyByteString = L.fromChunks `fmap` takeRest + +data T s = T {-# UNPACK #-} !Int s + +-- | A stateful scanner. The predicate consumes and transforms a +-- state argument, and each transformed state is passed to successive +-- invocations of the predicate on each byte of the input until one +-- returns 'Nothing' or the input ends. +-- +-- This parser does not fail. It will return an empty string if the +-- predicate returns 'Nothing' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +scan :: s -> (s -> Word8 -> Maybe s) -> Parser B.ByteString +scan s0 p = do + chunks <- go [] s0 + case chunks of + [x] -> return x + xs -> return $! B.concat $ reverse xs + where + go acc s1 = do + let scanner (B.PS fp off len) = + withForeignPtr fp $ \ptr0 -> do + let start = ptr0 `plusPtr` off + end = start `plusPtr` len + inner ptr !s + | ptr < end = do + w <- peek ptr + case p s w of + Just s' -> inner (ptr `plusPtr` 1) s' + _ -> done (ptr `minusPtr` start) s + | otherwise = done (ptr `minusPtr` start) s + done !i !s = return (T i s) + inner start s1 + bs <- get + let T i s' = inlinePerformIO $ scanner bs + !h = B.unsafeTake i bs + !t = B.unsafeDrop i bs + put t + if B.null t + then do + input <- wantInput + if input + then go (h:acc) s' + else return (h:acc) + else return (h:acc) +{-# INLINE scan #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser requires the predicate to succeed on at least one byte +-- of input: it will fail if the predicate never returns 'True' or if +-- there is no input left. +takeWhile1 :: (Word8 -> Bool) -> Parser B.ByteString +takeWhile1 p = do + (`when` demandInput) =<< B.null <$> get + (h,t) <- B8.span p <$> get + when (B.null h) $ fail "takeWhile1" + put t + if B.null t + then (h<>) `fmap` takeWhile p + else return h + +-- | Match any byte in a set. +-- +-- >vowel = inClass "aeiou" +-- +-- Range notation is supported. +-- +-- >halfAlphabet = inClass "a-nA-N" +-- +-- To add a literal @\'-\'@ to a set, place it at the beginning or end +-- of the string. +inClass :: String -> Word8 -> Bool +inClass s = (`memberWord8` mySet) + where mySet = charClass s + {-# NOINLINE mySet #-} +{-# INLINE inClass #-} + +-- | Match any byte not in a set. +notInClass :: String -> Word8 -> Bool +notInClass s = not . inClass s +{-# INLINE notInClass #-} + +-- | Match any byte. +anyWord8 :: Parser Word8 +anyWord8 = satisfy $ const True +{-# INLINE anyWord8 #-} + +-- | Match a specific byte. +word8 :: Word8 -> Parser Word8 +word8 c = satisfy (== c) <?> show c +{-# INLINE word8 #-} + +-- | Match any byte except the given one. +notWord8 :: Word8 -> Parser Word8 +notWord8 c = satisfy (/= c) <?> "not " ++ show c +{-# INLINE notWord8 #-} + +-- | Match any byte. Returns 'Nothing' if end of input has been +-- reached. Does not consume any input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs. Careless use will thus result in an infinite loop. +peekWord8 :: Parser (Maybe Word8) +peekWord8 = T.Parser $ \i0 a0 m0 _kf ks -> + if B.null (unI i0) + then if m0 == Complete + then ks i0 a0 m0 Nothing + else let ks' i a m = let !w = B.unsafeHead (unI i) + in ks i a m (Just w) + kf' i a m = ks i a m Nothing + in prompt i0 a0 m0 kf' ks' + else let !w = B.unsafeHead (unI i0) + in ks i0 a0 m0 (Just w) +{-# INLINE peekWord8 #-} + +-- | Match only if all input has been consumed. +endOfInput :: Parser () +endOfInput = T.Parser $ \i0 a0 m0 kf ks -> + if B.null (unI i0) + then if m0 == Complete + then ks i0 a0 m0 () + else let kf' i1 a1 m1 _ _ = addS i0 a0 m0 i1 a1 m1 $ + \ i2 a2 m2 -> ks i2 a2 m2 () + ks' i1 a1 m1 _ = addS i0 a0 m0 i1 a1 m1 $ + \ i2 a2 m2 -> kf i2 a2 m2 [] + "endOfInput" + in T.runParser demandInput i0 a0 m0 kf' ks' + else kf i0 a0 m0 [] "endOfInput" + +-- | Return an indication of whether the end of input has been +-- reached. +atEnd :: Parser Bool +atEnd = not <$> wantInput +{-# INLINE atEnd #-} + +-- | Match either a single newline character @\'\\n\'@, or a carriage +-- return followed by a newline character @\"\\r\\n\"@. +endOfLine :: Parser () +endOfLine = (word8 10 >> return ()) <|> (string "\r\n" >> return ()) + +-- | Name the parser, in case failure occurs. +(<?>) :: Parser a + -> String -- ^ the name to use if parsing fails + -> Parser a +p <?> msg0 = T.Parser $ \i0 a0 m0 kf ks -> + let kf' i a m strs msg = kf i a m (msg0:strs) msg + in T.runParser p i0 a0 m0 kf' ks +{-# INLINE (<?>) #-} +infix 0 <?> + +-- | Terminal failure continuation. +failK :: Failure a +failK i0 _a0 _m0 stack msg = Fail (unI i0) stack msg +{-# INLINE failK #-} + +-- | Terminal success continuation. +successK :: Success a a +successK i0 _a0 _m0 a = Done (unI i0) a +{-# INLINE successK #-} + +-- | Run a parser. +parse :: Parser a -> B.ByteString -> Result a +parse m s = T.runParser m (I s) mempty Incomplete failK successK +{-# INLINE parse #-} + +-- | Run a parser that cannot be resupplied via a 'Partial' result. +parseOnly :: Parser a -> B.ByteString -> Either String a +parseOnly m s = case T.runParser m (I s) mempty Complete failK successK of + Fail _ _ err -> Left err + Done _ a -> Right a + _ -> error "parseOnly: impossible error!" +{-# INLINE parseOnly #-} + +-- | Just like unsafePerformIO, but we inline it. Big performance gains as +-- it exposes lots of things to further inlining. /Very unsafe/. In +-- particular, you should do no memory allocation inside an +-- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@. +inlinePerformIO :: IO a -> a +#if defined(__GLASGOW_HASKELL__) +inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r +#else +inlinePerformIO = unsafePerformIO +#endif +{-# INLINE inlinePerformIO #-} diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs new file mode 100644 index 00000000..cb9cee83 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs @@ -0,0 +1,205 @@ +{-# LANGUAGE BangPatterns, CPP #-} +-- | +-- Module : Data.Attoparsec.Combinator +-- Copyright : Daan Leijen 1999-2001, Bryan O'Sullivan 2009-2010 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : portable +-- +-- Useful parser combinators, similar to those provided by Parsec. +module Data.Attoparsec.Combinator + ( + choice + , count + , option + , many' + , many1 + , many1' + , manyTill + , manyTill' + , sepBy + , sepBy' + , sepBy1 + , sepBy1' + , skipMany + , skipMany1 + , eitherP + ) where + +import Control.Applicative (Alternative(..), Applicative(..), empty, liftA2, + (<|>), (*>), (<$>)) +import Control.Monad (MonadPlus(..)) +#if !MIN_VERSION_base(4,2,0) +import Control.Applicative (many) +#endif + +#if __GLASGOW_HASKELL__ >= 700 +import Data.Attoparsec.Internal.Types (Parser) +import Data.ByteString (ByteString) +#endif + +-- | @choice ps@ tries to apply the actions in the list @ps@ in order, +-- until one of them succeeds. Returns the value of the succeeding +-- action. +choice :: Alternative f => [f a] -> f a +choice = foldr (<|>) empty +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE choice :: [Parser ByteString a] -> Parser ByteString a #-} +#endif + +-- | @option x p@ tries to apply action @p@. If @p@ fails without +-- consuming input, it returns the value @x@, otherwise the value +-- returned by @p@. +-- +-- > priority = option 0 (digitToInt <$> digit) +option :: Alternative f => a -> f a -> f a +option x p = p <|> pure x +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE option :: a -> Parser ByteString a -> Parser ByteString a #-} +#endif + +-- | A version of 'liftM2' that is strict in the result of its first +-- action. +liftM2' :: (Monad m) => (a -> b -> c) -> m a -> m b -> m c +liftM2' f a b = do + !x <- a + y <- b + return (f x y) +{-# INLINE liftM2' #-} + +-- | @many' p@ applies the action @p@ /zero/ or more times. Returns a +-- list of the returned values of @p@. The value returned by @p@ is +-- forced to WHNF. +-- +-- > word = many' letter +many' :: (MonadPlus m) => m a -> m [a] +many' p = many_p + where many_p = some_p `mplus` return [] + some_p = liftM2' (:) p many_p +{-# INLINE many' #-} + +-- | @many1 p@ applies the action @p@ /one/ or more times. Returns a +-- list of the returned values of @p@. +-- +-- > word = many1 letter +many1 :: Alternative f => f a -> f [a] +many1 p = liftA2 (:) p (many p) +{-# INLINE many1 #-} + +-- | @many1' p@ applies the action @p@ /one/ or more times. Returns a +-- list of the returned values of @p@. The value returned by @p@ is +-- forced to WHNF. +-- +-- > word = many1' letter +many1' :: (MonadPlus m) => m a -> m [a] +many1' p = liftM2' (:) p (many' p) +{-# INLINE many1' #-} + +-- | @sepBy p sep@ applies /zero/ or more occurrences of @p@, separated +-- by @sep@. Returns a list of the values returned by @p@. +-- +-- > commaSep p = p `sepBy` (symbol ",") +sepBy :: Alternative f => f a -> f s -> f [a] +sepBy p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure []) <|> pure [] +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE sepBy :: Parser ByteString a -> Parser ByteString s + -> Parser ByteString [a] #-} +#endif + +-- | @sepBy' p sep@ applies /zero/ or more occurrences of @p@, separated +-- by @sep@. Returns a list of the values returned by @p@. The value +-- returned by @p@ is forced to WHNF. +-- +-- > commaSep p = p `sepBy'` (symbol ",") +sepBy' :: (MonadPlus m) => m a -> m s -> m [a] +sepBy' p s = scan `mplus` return [] + where scan = liftM2' (:) p ((s >> sepBy1' p s) `mplus` return []) +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE sepBy' :: Parser ByteString a -> Parser ByteString s + -> Parser ByteString [a] #-} +#endif + +-- | @sepBy1 p sep@ applies /one/ or more occurrences of @p@, separated +-- by @sep@. Returns a list of the values returned by @p@. +-- +-- > commaSep p = p `sepBy1` (symbol ",") +sepBy1 :: Alternative f => f a -> f s -> f [a] +sepBy1 p s = scan + where scan = liftA2 (:) p ((s *> scan) <|> pure []) +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE sepBy1 :: Parser ByteString a -> Parser ByteString s + -> Parser ByteString [a] #-} +#endif + +-- | @sepBy1' p sep@ applies /one/ or more occurrences of @p@, separated +-- by @sep@. Returns a list of the values returned by @p@. The value +-- returned by @p@ is forced to WHNF. +-- +-- > commaSep p = p `sepBy1'` (symbol ",") +sepBy1' :: (MonadPlus m) => m a -> m s -> m [a] +sepBy1' p s = scan + where scan = liftM2' (:) p ((s >> scan) `mplus` return []) +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE sepBy1' :: Parser ByteString a -> Parser ByteString s + -> Parser ByteString [a] #-} +#endif + +-- | @manyTill p end@ applies action @p@ /zero/ or more times until +-- action @end@ succeeds, and returns the list of values returned by +-- @p@. This can be used to scan comments: +-- +-- > simpleComment = string "<!--" *> manyTill anyChar (try (string "-->")) +-- +-- Note the overlapping parsers @anyChar@ and @string \"<!--\"@, and +-- therefore the use of the 'try' combinator. +manyTill :: Alternative f => f a -> f b -> f [a] +manyTill p end = scan + where scan = (end *> pure []) <|> liftA2 (:) p scan +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE manyTill :: Parser ByteString a -> Parser ByteString b + -> Parser ByteString [a] #-} +#endif + +-- | @manyTill' p end@ applies action @p@ /zero/ or more times until +-- action @end@ succeeds, and returns the list of values returned by +-- @p@. This can be used to scan comments: +-- +-- > simpleComment = string "<!--" *> manyTill' anyChar (try (string "-->")) +-- +-- Note the overlapping parsers @anyChar@ and @string \"<!--\"@, and +-- therefore the use of the 'try' combinator. The value returned by @p@ +-- is forced to WHNF. +manyTill' :: (MonadPlus m) => m a -> m b -> m [a] +manyTill' p end = scan + where scan = (end >> return []) `mplus` liftM2' (:) p scan +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE manyTill' :: Parser ByteString a -> Parser ByteString b + -> Parser ByteString [a] #-} +#endif + +-- | Skip zero or more instances of an action. +skipMany :: Alternative f => f a -> f () +skipMany p = scan + where scan = (p *> scan) <|> pure () +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE skipMany :: Parser ByteString a -> Parser ByteString () #-} +#endif + +-- | Skip one or more instances of an action. +skipMany1 :: Alternative f => f a -> f () +skipMany1 p = p *> skipMany p +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE skipMany1 :: Parser ByteString a -> Parser ByteString () #-} +#endif + +-- | Apply the given action repeatedly, returning every result. +count :: Monad m => Int -> m a -> m [a] +count n p = sequence (replicate n p) +{-# INLINE count #-} + +-- | Combine two alternatives. +eitherP :: (Alternative f) => f a -> f b -> f (Either a b) +eitherP a b = (Left <$> a) <|> (Right <$> b) +{-# INLINE eitherP #-} diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs new file mode 100644 index 00000000..0572d682 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs @@ -0,0 +1,31 @@ +-- | +-- Module : Data.Attoparsec.Internal +-- Copyright : Bryan O'Sullivan 2012 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Simple, efficient parser combinators, loosely based on the Parsec +-- library. + +module Data.Attoparsec.Internal + ( + compareResults + ) where + +import Data.Attoparsec.Internal.Types (IResult(..)) + +-- | Compare two 'IResult' values for equality. +-- +-- If both 'IResult's are 'Partial', the result will be 'Nothing', as +-- they are incomplete and hence their equality cannot be known. +-- (This is why there is no 'Eq' instance for 'IResult'.) +compareResults :: (Eq t, Eq r) => IResult t r -> IResult t r -> Maybe Bool +compareResults (Fail i0 ctxs0 msg0) (Fail i1 ctxs1 msg1) = + Just (i0 == i1 && ctxs0 == ctxs1 && msg0 == msg1) +compareResults (Done i0 r0) (Done i1 r1) = + Just (i0 == i1 && r0 == r1) +compareResults (Partial _) (Partial _) = Nothing +compareResults _ _ = Just False diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs new file mode 100644 index 00000000..e47e5c9e --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs @@ -0,0 +1,227 @@ +{-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving, OverloadedStrings, + Rank2Types, RecordWildCards #-} +-- | +-- Module : Data.Attoparsec.Internal.Types +-- Copyright : Bryan O'Sullivan 2007-2011 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- Simple, efficient parser combinators, loosely based on the Parsec +-- library. + +module Data.Attoparsec.Internal.Types + ( + Parser(..) + , Failure + , Success + , IResult(..) + , Input(..) + , Added(..) + , More(..) + , addS + , (<>) + ) where + +import Control.Applicative (Alternative(..), Applicative(..), (<$>)) +import Control.DeepSeq (NFData(rnf)) +import Control.Monad (MonadPlus(..)) +import Data.Monoid (Monoid(..)) +import Prelude hiding (getChar, take, takeWhile) + +-- | The result of a parse. This is parameterised over the type @t@ +-- of string that was processed. +-- +-- This type is an instance of 'Functor', where 'fmap' transforms the +-- value in a 'Done' result. +data IResult t r = Fail t [String] String + -- ^ The parse failed. The 't' parameter is the + -- input that had not yet been consumed when the + -- failure occurred. The @[@'String'@]@ is a list of + -- contexts in which the error occurred. The + -- 'String' is the message describing the error, if + -- any. + | Partial (t -> IResult t r) + -- ^ Supply this continuation with more input so that + -- the parser can resume. To indicate that no more + -- input is available, use an empty string. + | Done t r + -- ^ The parse succeeded. The 't' parameter is the + -- input that had not yet been consumed (if any) when + -- the parse succeeded. + +instance (Show t, Show r) => Show (IResult t r) where + show (Fail t stk msg) = + "Fail " ++ show t ++ " " ++ show stk ++ " " ++ show msg + show (Partial _) = "Partial _" + show (Done t r) = "Done " ++ show t ++ " " ++ show r + +instance (NFData t, NFData r) => NFData (IResult t r) where + rnf (Fail t stk msg) = rnf t `seq` rnf stk `seq` rnf msg + rnf (Partial _) = () + rnf (Done t r) = rnf t `seq` rnf r + {-# INLINE rnf #-} + +fmapR :: (a -> b) -> IResult t a -> IResult t b +fmapR _ (Fail t stk msg) = Fail t stk msg +fmapR f (Partial k) = Partial (fmapR f . k) +fmapR f (Done t r) = Done t (f r) + +instance Functor (IResult t) where + fmap = fmapR + {-# INLINE fmap #-} + +newtype Input t = I {unI :: t} deriving (Monoid) +newtype Added t = A {unA :: t} deriving (Monoid) + +-- | The core parser type. This is parameterised over the type @t@ of +-- string being processed. +-- +-- This type is an instance of the following classes: +-- +-- * 'Monad', where 'fail' throws an exception (i.e. fails) with an +-- error message. +-- +-- * 'Functor' and 'Applicative', which follow the usual definitions. +-- +-- * 'MonadPlus', where 'mzero' fails (with no error message) and +-- 'mplus' executes the right-hand parser if the left-hand one +-- fails. When the parser on the right executes, the input is reset +-- to the same state as the parser on the left started with. (In +-- other words, Attoparsec is a backtracking parser that supports +-- arbitrary lookahead.) +-- +-- * 'Alternative', which follows 'MonadPlus'. +newtype Parser t a = Parser { + runParser :: forall r. Input t -> Added t -> More + -> Failure t r + -> Success t a r + -> IResult t r + } + +type Failure t r = Input t -> Added t -> More -> [String] -> String + -> IResult t r +type Success t a r = Input t -> Added t -> More -> a -> IResult t r + +-- | Have we read all available input? +data More = Complete | Incomplete + deriving (Eq, Show) + +instance Monoid More where + mappend c@Complete _ = c + mappend _ m = m + mempty = Incomplete + +addS :: (Monoid t) => + Input t -> Added t -> More + -> Input t -> Added t -> More + -> (Input t -> Added t -> More -> r) -> r +addS i0 a0 m0 _i1 a1 m1 f = + let !i = i0 <> I (unA a1) + a = a0 <> a1 + !m = m0 <> m1 + in f i a m +{-# INLINE addS #-} + +bindP :: Parser t a -> (a -> Parser t b) -> Parser t b +bindP m g = + Parser $ \i0 a0 m0 kf ks -> runParser m i0 a0 m0 kf $ + \i1 a1 m1 a -> runParser (g a) i1 a1 m1 kf ks +{-# INLINE bindP #-} + +returnP :: a -> Parser t a +returnP a = Parser (\i0 a0 m0 _kf ks -> ks i0 a0 m0 a) +{-# INLINE returnP #-} + +instance Monad (Parser t) where + return = returnP + (>>=) = bindP + fail = failDesc + +noAdds :: (Monoid t) => + Input t -> Added t -> More + -> (Input t -> Added t -> More -> r) -> r +noAdds i0 _a0 m0 f = f i0 mempty m0 +{-# INLINE noAdds #-} + +plus :: (Monoid t) => Parser t a -> Parser t a -> Parser t a +plus a b = Parser $ \i0 a0 m0 kf ks -> + let kf' i1 a1 m1 _ _ = addS i0 a0 m0 i1 a1 m1 $ + \ i2 a2 m2 -> runParser b i2 a2 m2 kf ks + ks' i1 a1 m1 = ks i1 (a0 <> a1) m1 + in noAdds i0 a0 m0 $ \i2 a2 m2 -> runParser a i2 a2 m2 kf' ks' +{-# INLINE plus #-} + +instance (Monoid t) => MonadPlus (Parser t) where + mzero = failDesc "mzero" + {-# INLINE mzero #-} + mplus = plus + +fmapP :: (a -> b) -> Parser t a -> Parser t b +fmapP p m = Parser $ \i0 a0 m0 f k -> + runParser m i0 a0 m0 f $ \i1 a1 s1 a -> k i1 a1 s1 (p a) +{-# INLINE fmapP #-} + +instance Functor (Parser t) where + fmap = fmapP + {-# INLINE fmap #-} + +apP :: Parser t (a -> b) -> Parser t a -> Parser t b +apP d e = do + b <- d + a <- e + return (b a) +{-# INLINE apP #-} + +instance Applicative (Parser t) where + pure = returnP + {-# INLINE pure #-} + (<*>) = apP + {-# INLINE (<*>) #-} + +#if MIN_VERSION_base(4,2,0) + -- These definitions are equal to the defaults, but this + -- way the optimizer doesn't have to work so hard to figure + -- that out. + (*>) = (>>) + {-# INLINE (*>) #-} + x <* y = x >>= \a -> y >> return a + {-# INLINE (<*) #-} +#endif + +instance (Monoid t) => Monoid (Parser t a) where + mempty = failDesc "mempty" + {-# INLINE mempty #-} + mappend = plus + {-# INLINE mappend #-} + +instance (Monoid t) => Alternative (Parser t) where + empty = failDesc "empty" + {-# INLINE empty #-} + + (<|>) = plus + {-# INLINE (<|>) #-} + +#if MIN_VERSION_base(4,2,0) + many v = many_v + where many_v = some_v <|> pure [] + some_v = (:) <$> v <*> many_v + {-# INLINE many #-} + + some v = some_v + where + many_v = some_v <|> pure [] + some_v = (:) <$> v <*> many_v + {-# INLINE some #-} +#endif + +failDesc :: String -> Parser t a +failDesc err = Parser (\i0 a0 m0 kf _ks -> kf i0 a0 m0 [] msg) + where msg = "Failed reading: " ++ err +{-# INLINE failDesc #-} + +(<>) :: (Monoid m) => m -> m -> m +(<>) = mappend +{-# INLINE (<>) #-} diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs new file mode 100644 index 00000000..bf175f4b --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs @@ -0,0 +1,127 @@ +{-# LANGUAGE DeriveDataTypeable #-} +-- | +-- Module : Data.Attoparsec.Number +-- Copyright : Bryan O'Sullivan 2011 +-- License : BSD3 +-- +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : unknown +-- +-- A simple number type, useful for parsing both exact and inexact +-- quantities without losing much precision. +module Data.Attoparsec.Number + ( + Number(..) + ) where + +import Control.DeepSeq (NFData(rnf)) +import Data.Data (Data) +import Data.Function (on) +import Data.Typeable (Typeable) + +-- | A numeric type that can represent integers accurately, and +-- floating point numbers to the precision of a 'Double'. +data Number = I !Integer + | D {-# UNPACK #-} !Double + deriving (Typeable, Data) + +instance Show Number where + show (I a) = show a + show (D a) = show a + +instance NFData Number where + rnf (I _) = () + rnf (D _) = () + {-# INLINE rnf #-} + +binop :: (Integer -> Integer -> a) -> (Double -> Double -> a) + -> Number -> Number -> a +binop _ d (D a) (D b) = d a b +binop i _ (I a) (I b) = i a b +binop _ d (D a) (I b) = d a (fromIntegral b) +binop _ d (I a) (D b) = d (fromIntegral a) b +{-# INLINE binop #-} + +instance Eq Number where + (==) = binop (==) (==) + {-# INLINE (==) #-} + + (/=) = binop (/=) (/=) + {-# INLINE (/=) #-} + +instance Ord Number where + (<) = binop (<) (<) + {-# INLINE (<) #-} + + (<=) = binop (<=) (<=) + {-# INLINE (<=) #-} + + (>) = binop (>) (>) + {-# INLINE (>) #-} + + (>=) = binop (>=) (>=) + {-# INLINE (>=) #-} + + compare = binop compare compare + {-# INLINE compare #-} + +instance Num Number where + (+) = binop (((I$!).) . (+)) (((D$!).) . (+)) + {-# INLINE (+) #-} + + (-) = binop (((I$!).) . (-)) (((D$!).) . (-)) + {-# INLINE (-) #-} + + (*) = binop (((I$!).) . (*)) (((D$!).) . (*)) + {-# INLINE (*) #-} + + abs (I a) = I $! abs a + abs (D a) = D $! abs a + {-# INLINE abs #-} + + negate (I a) = I $! negate a + negate (D a) = D $! negate a + {-# INLINE negate #-} + + signum (I a) = I $! signum a + signum (D a) = D $! signum a + {-# INLINE signum #-} + + fromInteger = (I$!) . fromInteger + {-# INLINE fromInteger #-} + +instance Real Number where + toRational (I a) = fromIntegral a + toRational (D a) = toRational a + {-# INLINE toRational #-} + +instance Fractional Number where + fromRational = (D$!) . fromRational + {-# INLINE fromRational #-} + + (/) = binop (((D$!).) . (/) `on` fromIntegral) + (((D$!).) . (/)) + {-# INLINE (/) #-} + + recip (I a) = D $! recip (fromIntegral a) + recip (D a) = D $! recip a + {-# INLINE recip #-} + +instance RealFrac Number where + properFraction (I a) = (fromIntegral a,0) + properFraction (D a) = case properFraction a of + (i,d) -> (i,D d) + {-# INLINE properFraction #-} + truncate (I a) = fromIntegral a + truncate (D a) = truncate a + {-# INLINE truncate #-} + round (I a) = fromIntegral a + round (D a) = round a + {-# INLINE round #-} + ceiling (I a) = fromIntegral a + ceiling (D a) = ceiling a + {-# INLINE ceiling #-} + floor (I a) = fromIntegral a + floor (D a) = floor a + {-# INLINE floor #-} |