diff options
Diffstat (limited to 'haddock-library')
22 files changed, 3686 insertions, 0 deletions
| diff --git a/haddock-library/.ghci b/haddock-library/.ghci new file mode 100644 index 00000000..6a26395e --- /dev/null +++ b/haddock-library/.ghci @@ -0,0 +1 @@ +:set -isrc -ivendor/attoparsec-0.10.4.0 -itest -idist/build -idist/build/autogen -optP-include -optPdist/build/autogen/cabal_macros.h diff --git a/haddock-library/LICENSE b/haddock-library/LICENSE new file mode 100644 index 00000000..1636bfcd --- /dev/null +++ b/haddock-library/LICENSE @@ -0,0 +1,23 @@ +Copyright 2002-2010, Simon Marlow.  All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +  +- Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +  +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS" AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/haddock-library/Setup.hs b/haddock-library/Setup.hs new file mode 100644 index 00000000..9a994af6 --- /dev/null +++ b/haddock-library/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/haddock-library/haddock-library.cabal b/haddock-library/haddock-library.cabal new file mode 100644 index 00000000..3a016c5a --- /dev/null +++ b/haddock-library/haddock-library.cabal @@ -0,0 +1,75 @@ +name:                 haddock-library +version:              2.15.0 +synopsis:             Library exposing some functionality of Haddock. + +description:          Haddock is a documentation-generation tool for Haskell +                      libraries. These modules expose some functionality of it +                      without pulling in the GHC dependency. +license:              BSD3 +license-file:         LICENSE +maintainer:           Simon Hengel <sol@typeful.net>, Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk> +homepage:             http://www.haskell.org/haddock/ +bug-reports:          http://trac.haskell.org/haddock +category:             Documentation +build-type:           Simple +cabal-version:        >= 1.10 +stability:            experimental + +library +  default-language:     Haskell2010 + +  build-depends: +    base >= 4.3 && < 4.8, +    bytestring, +    deepseq + +  hs-source-dirs:       src, vendor/attoparsec-0.10.4.0 +  ghc-options:          -funbox-strict-fields -Wall -fwarn-tabs -O2 + +  exposed-modules: +    Documentation.Haddock.Parser +    Documentation.Haddock.Types +    Documentation.Haddock.Doc + +  other-modules: +    Data.Attoparsec +    Data.Attoparsec.ByteString +    Data.Attoparsec.ByteString.Char8 +    Data.Attoparsec.Combinator +    Data.Attoparsec.Number +    Data.Attoparsec.ByteString.FastSet +    Data.Attoparsec.ByteString.Internal +    Data.Attoparsec.Internal +    Data.Attoparsec.Internal.Types +    Documentation.Haddock.Utf8 +    Documentation.Haddock.Parser.Util + +test-suite spec +  type:             exitcode-stdio-1.0 +  default-language: Haskell2010 +  main-is:          Spec.hs +  hs-source-dirs: +      test +    , src +    , vendor/attoparsec-0.10.4.0 + +  cpp-options: +      -DTEST + +  other-modules: +      Documentation.Haddock.ParserSpec +      Documentation.Haddock.Utf8Spec +      Documentation.Haddock.Parser.UtilSpec + +  build-depends: +      base +    , hspec +    , bytestring +    , deepseq +    , QuickCheck == 2.* +    , haddock-library + + +source-repository head +  type:     git +  location: http://git.haskell.org/haddock.git diff --git a/haddock-library/src/Documentation/Haddock/Doc.hs b/haddock-library/src/Documentation/Haddock/Doc.hs new file mode 100644 index 00000000..4d6c10a4 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Doc.hs @@ -0,0 +1,60 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} +module Documentation.Haddock.Doc (docParagraph) where + +import Data.Monoid +import Documentation.Haddock.Types +import Data.Char (isSpace) + +-- We put it here so that we can avoid a circular import +-- anything relevant imports this module anyway +instance Monoid (DocH mod id) where +  mempty  = DocEmpty +  mappend = docAppend + +docAppend :: DocH mod id -> DocH mod id -> DocH mod id +docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2) +docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d +docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 ++ ds2) +docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 ++ ds2) +docAppend DocEmpty d = d +docAppend d DocEmpty = d +docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2) +docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 ++ s2)) +docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 ++ s2)) d +docAppend d1 d2 = DocAppend d1 d2 + +-- again to make parsing easier - we spot a paragraph whose only item +-- is a DocMonospaced and make it into a DocCodeBlock +docParagraph :: DocH mod id -> DocH mod id +docParagraph (DocMonospaced p) +  = DocCodeBlock (docCodeBlock p) +docParagraph (DocAppend (DocString s1) (DocMonospaced p)) +  | all isSpace s1 +  = DocCodeBlock (docCodeBlock p) +docParagraph (DocAppend (DocString s1) +    (DocAppend (DocMonospaced p) (DocString s2))) +  | all isSpace s1 && all isSpace s2 +  = DocCodeBlock (docCodeBlock p) +docParagraph (DocAppend (DocMonospaced p) (DocString s2)) +  | all isSpace s2 +  = DocCodeBlock (docCodeBlock p) +docParagraph p +  = DocParagraph p + + +-- Drop trailing whitespace from @..@ code blocks.  Otherwise this: +-- +--    -- @ +--    -- foo +--    -- @ +-- +-- turns into (DocCodeBlock "\nfoo\n ") which when rendered in HTML +-- gives an extra vertical space after the code block.  The single space +-- on the final line seems to trigger the extra vertical space. +-- +docCodeBlock :: DocH mod id -> DocH mod id +docCodeBlock (DocString s) +  = DocString (reverse $ dropWhile (`elem` " \t") $ reverse s) +docCodeBlock (DocAppend l r) +  = DocAppend l (docCodeBlock r) +docCodeBlock d = d diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs new file mode 100644 index 00000000..3d146d33 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -0,0 +1,474 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE IncoherentInstances #-} +{-# LANGUAGE LambdaCase #-} +-- | +-- Module      :  Documentation.Haddock.Parser +-- Copyright   :  (c) Mateusz Kowalczyk 2013-2014, +--                    Simon Hengel      2013 +-- License     :  BSD-like +-- +-- Maintainer  :  haddock@projects.haskell.org +-- Stability   :  experimental +-- Portability :  portable + +module Documentation.Haddock.Parser ( parseString, parseParas +                                    , overIdentifier, toRegular, Identifier +                                    ) where + +import           Control.Applicative +import           Control.Arrow (first) +import           Control.Monad (void, mfilter) +import           Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine) +import qualified Data.ByteString.Char8 as BS +import           Data.Char (chr, isAsciiUpper) +import           Data.List (stripPrefix, intercalate, unfoldr) +import           Data.Maybe (fromMaybe) +import           Data.Monoid +import           Documentation.Haddock.Doc +import           Documentation.Haddock.Parser.Util +import           Documentation.Haddock.Types +import           Documentation.Haddock.Utf8 +import           Prelude hiding (takeWhile) + +-- | Identifier string surrounded with opening and closing quotes/backticks. +type Identifier = (Char, String, Char) + +-- | Drops the quotes/backticks around all identifiers, as if they +-- were valid but still 'String's. +toRegular :: DocH mod Identifier -> DocH mod String +toRegular = fmap (\(_, x, _) -> x) + +-- | Maps over 'DocIdentifier's over 'String' with potentially failing +-- conversion using user-supplied function. If the conversion fails, +-- the identifier is deemed to not be valid and is treated as a +-- regular string. +overIdentifier :: (String -> Maybe a) +               -> DocH mod Identifier +               -> DocH mod a +overIdentifier f d = g d +  where +    g (DocIdentifier (o, x, e)) = case f x of +      Nothing -> DocString $ o : x ++ [e] +      Just x' -> DocIdentifier x' +    g DocEmpty = DocEmpty +    g (DocAppend x x') = DocAppend (g x) (g x') +    g (DocString x) = DocString x +    g (DocParagraph x) = DocParagraph $ g x +    g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x +    g (DocModule x) = DocModule x +    g (DocWarning x) = DocWarning $ g x +    g (DocEmphasis x) = DocEmphasis $ g x +    g (DocMonospaced x) = DocMonospaced $ g x +    g (DocBold x) = DocBold $ g x +    g (DocUnorderedList x) = DocUnorderedList $ fmap g x +    g (DocOrderedList x) = DocOrderedList $ fmap g x +    g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x +    g (DocCodeBlock x) = DocCodeBlock $ g x +    g (DocHyperlink x) = DocHyperlink x +    g (DocPic x) = DocPic x +    g (DocAName x) = DocAName x +    g (DocProperty x) = DocProperty x +    g (DocExamples x) = DocExamples x +    g (DocHeader (Header l x)) = DocHeader . Header l $ g x + +parse :: Parser a -> BS.ByteString -> a +parse p = either err id . parseOnly (p <* endOfInput) +  where +    err = error . ("Haddock.Parser.parse: " ++) + +-- | Main entry point to the parser. Appends the newline character +-- to the input string. +parseParas :: String -- ^ String to parse +           -> DocH mod Identifier +parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n") +  where +    p :: Parser (DocH mod Identifier) +    p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n") + +-- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which +-- drops leading whitespace and encodes the string to UTF8 first. +parseString :: String -> DocH mod Identifier +parseString = parseStringBS . encodeUtf8 . dropWhile isSpace + +parseStringBS :: BS.ByteString -> DocH mod Identifier +parseStringBS = parse p +  where +    p :: Parser (DocH mod Identifier) +    p = mconcat <$> many (monospace <|> anchor <|> identifier +                          <|> moduleName <|> picture <|> hyperlink <|> autoUrl <|> bold +                          <|> emphasis <|> encodedChar <|> string' <|> skipSpecialChar) + +-- | Parses and processes +-- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references> +-- +-- >>> parseOnly encodedChar "ABC" +-- Right (DocString "ABC") +encodedChar :: Parser (DocH mod a) +encodedChar = "&#" *> c <* ";" +  where +    c = DocString . return . chr <$> num +    num = hex <|> decimal +    hex = ("x" <|> "X") *> hexadecimal + +specialChar :: [Char] +specialChar = "_/<@\"&'`" + +-- | Plain, regular parser for text. Called as one of the last parsers +-- to ensure that we have already given a chance to more meaningful parsers +-- before capturing their characers. +string' :: Parser (DocH mod a) +string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar) +  where +    unescape "" = "" +    unescape ('\\':x:xs) = x : unescape xs +    unescape (x:xs) = x : unescape xs + +-- | Skips a single special character and treats it as a plain string. +-- This is done to skip over any special characters belonging to other +-- elements but which were not deemed meaningful at their positions. +skipSpecialChar :: Parser (DocH mod a) +skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar) + +-- | Emphasis parser. +-- +-- >>> parseOnly emphasis "/Hello world/" +-- Right (DocEmphasis (DocString "Hello world")) +emphasis :: Parser (DocH mod Identifier) +emphasis = DocEmphasis . parseStringBS <$> +  mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/") + +-- | Bold parser. +-- +-- >>> parseOnly bold "__Hello world__" +-- Right (DocBold (DocString "Hello world")) +bold :: Parser (DocH mod Identifier) +bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__") + +disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString +disallowNewline = mfilter ('\n' `BS.notElem`) + +-- | Like `takeWhile`, but unconditionally take escaped characters. +takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString +takeWhile_ p = scan False p_ +  where +    p_ escaped c +      | escaped = Just False +      | not $ p c = Nothing +      | otherwise = Just (c == '\\') + +-- | Like `takeWhile1`, but unconditionally take escaped characters. +takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString +takeWhile1_ = mfilter (not . BS.null) . takeWhile_ + +-- | Text anchors to allow for jumping around the generated documentation. +-- +-- >>> parseOnly anchor "#Hello world#" +-- Right (DocAName "Hello world") +anchor :: Parser (DocH mod a) +anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#") + +-- | Monospaced strings. +-- +-- >>> parseOnly monospace "@cruel@" +-- Right (DocMonospaced (DocString "cruel")) +monospace :: Parser (DocH mod Identifier) +monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@") + +moduleName :: Parser (DocH mod a) +moduleName = DocModule <$> (char '"' *> modid <* char '"') +  where +    modid = intercalate "." <$> conid `sepBy1` "." +    conid = (:) +      <$> satisfy isAsciiUpper +      -- NOTE: According to Haskell 2010 we shouldd actually only +      -- accept {small | large | digit | ' } here.  But as we can't +      -- match on unicode characters, this is currently not possible. +      <*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n")) + +-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify +-- a title for the picture. +-- +-- >>> parseOnly picture "<<hello.png>>" +-- Right (DocPic (Picture "hello.png" Nothing)) +-- >>> parseOnly picture "<<hello.png world>>" +-- Right (DocPic (Picture "hello.png" (Just "world"))) +picture :: Parser (DocH mod a) +picture = DocPic . makeLabeled Picture . decodeUtf8 +          <$> disallowNewline ("<<" *> takeUntil ">>") + +-- | Paragraph parser, called by 'parseParas'. +paragraph :: Parser (DocH mod Identifier) +paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock +                                       <|> property <|> header +                                       <|> textParagraph) + +header :: Parser (DocH mod Identifier) +header = do +  let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1] +      pser = foldl1 (<|>) psers +  delim <- decodeUtf8 <$> pser +  line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString +  rest <- paragraph <|> return mempty +  return $ DocParagraph (DocHeader (Header (length delim) line)) <> rest + +textParagraph :: Parser (DocH mod Identifier) +textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine + +-- | List parser, called by 'paragraph'. +list :: Parser (DocH mod Identifier) +list = DocUnorderedList <$> unorderedList +       <|> DocOrderedList <$> orderedList +       <|> DocDefList <$> definitionList + +-- | Parses unordered (bullet) lists. +unorderedList :: Parser [DocH mod Identifier] +unorderedList = ("*" <|> "-") *> innerList unorderedList + +-- | Parses ordered lists (numbered or dashed). +orderedList :: Parser [DocH mod Identifier] +orderedList = (paren <|> dot) *> innerList orderedList +  where +    dot = (decimal :: Parser Int) <* "." +    paren = "(" *> decimal <* ")" + +-- | Generic function collecting any further lines belonging to the +-- list entry and recursively collecting any further lists in the +-- same paragraph. Usually used as +-- +-- > someListFunction = listBeginning *> innerList someListFunction +innerList :: Parser [DocH mod Identifier] -> Parser [DocH mod Identifier] +innerList item = do +  c <- takeLine +  (cs, items) <- more item +  let contents = docParagraph . parseString . dropNLs . unlines $ c : cs +  return $ case items of +    Left p -> [contents `joinPara` p] +    Right i -> contents : i + +-- | Parses definition lists. +definitionList :: Parser [(DocH mod Identifier, DocH mod Identifier)] +definitionList = do +  label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` "]\n")) <* "]" +  c <- takeLine +  (cs, items) <- more definitionList +  let contents = parseString . dropNLs . unlines $ c : cs +  return $ case items of +    Left p -> [(label, contents `joinPara` p)] +    Right i -> (label, contents) : i + +-- | If possible, appends two 'Doc's under a 'DocParagraph' rather than +-- outside of it. This allows to get structures like +-- +-- @DocParagraph (DocAppend … …)@ +-- +-- rather than +-- +-- @DocAppend (DocParagraph …) …@ +joinPara :: DocH mod id -> DocH mod id -> DocH mod id +joinPara (DocParagraph p) c = docParagraph $ p <> c +joinPara d p = d <> p + +-- | Drops all trailing newlines. +dropNLs :: String -> String +dropNLs = reverse . dropWhile (== '\n') . reverse + +-- | Main worker for 'innerList' and 'definitionList'. +-- We need the 'Either' here to be able to tell in the respective functions +-- whether we're dealing with the next list or a nested paragraph. +more :: Monoid a => Parser a +     -> Parser ([String], Either (DocH mod Identifier) a) +more item = innerParagraphs <|> moreListItems item +            <|> moreContent item <|> pure ([], Right mempty) + +-- | Use by 'innerList' and 'definitionList' to parse any nested paragraphs. +innerParagraphs :: Parser ([String], Either (DocH mod Identifier) a) +innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs) + +-- | Attemps to fetch the next list if possibly. Used by 'innerList' and +-- 'definitionList' to recursivly grab lists that aren't separated by a whole +-- paragraph. +moreListItems :: Parser a +              -> Parser ([String], Either (DocH mod Identifier) a) +moreListItems item = (,) [] . Right <$> (skipSpace *> item) + +-- | Helper for 'innerList' and 'definitionList' which simply takes +-- a line of text and attempts to parse more list content with 'more'. +moreContent :: Monoid a => Parser a +            -> Parser ([String], Either (DocH mod Identifier) a) +moreContent item = first . (:) <$> nonEmptyLine <*> more item + +-- | Runs the 'parseParas' parser on an indented paragraph. +-- The indentation is 4 spaces. +indentedParagraphs :: Parser (DocH mod Identifier) +indentedParagraphs = parseParas . concat <$> dropFrontOfPara "    " + +-- | Grab as many fully indented paragraphs as we can. +dropFrontOfPara :: Parser BS.ByteString -> Parser [String] +dropFrontOfPara sp = do +  currentParagraph <- some (sp *> takeNonEmptyLine) +  followingParagraphs <- +    skipHorizontalSpace *> nextPar -- we have more paragraphs to take +    <|> skipHorizontalSpace *> nlList -- end of the ride, remember the newline +    <|> endOfInput *> return [] -- nothing more to take at all +  return (currentParagraph ++ followingParagraphs) +  where +    nextPar = (++) <$> nlList <*> dropFrontOfPara sp +    nlList = "\n" *> return ["\n"] + +nonSpace :: BS.ByteString -> Parser BS.ByteString +nonSpace xs +  | not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line" +  | otherwise = return xs + +-- | Takes a non-empty, not fully whitespace line. +-- +--  Doesn't discard the trailing newline. +takeNonEmptyLine :: Parser String +takeNonEmptyLine = do +    (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n" + +birdtracks :: Parser (DocH mod a) +birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line +  where +    line = skipHorizontalSpace *> ">" *> takeLine + +stripSpace :: [String] -> [String] +stripSpace = fromMaybe <*> mapM strip' +  where +    strip' (' ':xs') = Just xs' +    strip' "" = Just "" +    strip' _  = Nothing + +-- | Parses examples. Examples are a paragraph level entitity (separated by an empty line). +-- Consecutive examples are accepted. +examples :: Parser (DocH mod a) +examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go) +  where +    go :: Parser [Example] +    go = do +      prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>" +      expr <- takeLine +      (rs, es) <- resultAndMoreExamples +      return (makeExample prefix expr rs : es) +      where +        resultAndMoreExamples :: Parser ([String], [Example]) +        resultAndMoreExamples = moreExamples <|> result <|> pure ([], []) +          where +            moreExamples :: Parser ([String], [Example]) +            moreExamples = (,) [] <$> go + +            result :: Parser ([String], [Example]) +            result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples + +    makeExample :: String -> String -> [String] -> Example +    makeExample prefix expression res = +      Example (strip expression) result +      where +        result = map (substituteBlankLine . tryStripPrefix) res + +        tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs) + +        substituteBlankLine "<BLANKLINE>" = "" +        substituteBlankLine xs = xs + +nonEmptyLine :: Parser String +nonEmptyLine = mfilter (any (not . isSpace)) takeLine + +takeLine :: Parser String +takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine + +endOfLine :: Parser () +endOfLine = void "\n" <|> endOfInput + +-- | Property parser. +-- +-- >>> parseOnly property "prop> hello world" +-- Right (DocProperty "hello world") +property :: Parser (DocH mod a) +property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n')) + +-- | +-- Paragraph level codeblock. Anything between the two delimiting @ is parsed +-- for markup. +codeblock :: Parser (DocH mod Identifier) +codeblock = +  DocCodeBlock . parseStringBS . dropSpaces +  <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@") +  where +    dropSpaces xs = +      let rs = decodeUtf8 xs +      in case splitByNl rs of +        [] -> xs +        ys -> case last ys of +          ' ':_ -> case mapM dropSpace ys of +            Nothing -> xs +            Just zs -> encodeUtf8 $ intercalate "\n" zs +          _ -> xs + +    -- This is necessary because ‘lines’ swallows up a trailing newline +    -- and we lose information about whether the last line belongs to @ or to +    -- text which we need to decide whether we actually want to be dropping +    -- anything at all. +    splitByNl = unfoldr (\case '\n':s -> Just (span (/= '\n') s) +                               _      -> Nothing) +                . ('\n' :) + +    dropSpace "" = Just "" +    dropSpace (' ':xs) = Just xs +    dropSpace _ = Nothing + +    block' = scan False p +      where +        p isNewline c +          | isNewline && c == '@' = Nothing +          | isNewline && isSpace c = Just isNewline +          | otherwise = Just $ c == '\n' + +hyperlink :: Parser (DocH mod a) +hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 +              <$> disallowNewline ("<" *> takeUntil ">") +            <|> autoUrl + +autoUrl :: Parser (DocH mod a) +autoUrl = mkLink <$> url +  where +    url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace) +    mkLink :: BS.ByteString -> DocH mod a +    mkLink s = case BS.unsnoc s of +      Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x] +      _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing) + +-- | Parses strings between identifier delimiters. Consumes all input that it +-- deems to be valid in an identifier. Note that it simply blindly consumes +-- characters and does no actual validation itself. +parseValid :: Parser String +parseValid = do +  vs' <- many' $ utf8String "⋆" <|> return <$> idChar +  let vs = concat vs' +  c <- peekChar +  case c of +    Just '`' -> return vs +    Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid) +                 <|> return vs +    _ -> fail "outofvalid" +  where +    idChar = satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:^") +             <|> digit <|> letter_ascii + +-- | Parses UTF8 strings from ByteString streams. +utf8String :: String -> Parser String +utf8String x = decodeUtf8 <$> string (encodeUtf8 x) + +-- | Parses identifiers with help of 'parseValid'. Asks GHC for 'String' from the +-- string it deems valid. +identifier :: Parser (DocH mod Identifier) +identifier = do +  o <- idDelim +  vid <- parseValid +  e <- idDelim +  return $ DocIdentifier (o, vid, e) +  where +    idDelim = char '\'' <|> char '`' diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs new file mode 100644 index 00000000..25dba2d5 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs @@ -0,0 +1,61 @@ +-- | +-- Module      :  Documentation.Haddock.Parser.Util +-- Copyright   :  (c) Mateusz Kowalczyk 2013-2014, +--                    Simon Hengel      2013 +-- License     :  BSD-like +-- +-- Maintainer  :  haddock@projects.haskell.org +-- Stability   :  experimental +-- Portability :  portable +-- +-- Various utility functions used by the parser. +module Documentation.Haddock.Parser.Util where + +import           Control.Applicative +import           Control.Monad (mfilter) +import           Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine) +import           Data.ByteString.Char8 (ByteString) +import qualified Data.ByteString.Char8 as BS +import           Prelude hiding (takeWhile) + +-- | Remove all leading and trailing whitespace +strip :: String -> String +strip = (\f -> f . f) $ dropWhile isSpace . reverse + +skipHorizontalSpace :: Parser () +skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r") + +takeHorizontalSpace :: Parser BS.ByteString +takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r") + +makeLabeled :: (String -> Maybe String -> a) -> String -> a +makeLabeled f input = case break isSpace $ removeEscapes $ strip input of +  (uri, "")    -> f uri Nothing +  (uri, label) -> f uri (Just $ dropWhile isSpace label) +  where +    -- As we don't parse these any further, we don't do any processing to the +    -- string so we at least remove escape character here. Perhaps we should +    -- actually be parsing the label at the very least? +    removeEscapes "" = "" +    removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs +    removeEscapes ('\\':xs) = removeEscapes xs +    removeEscapes (x:xs) = x : removeEscapes xs + +takeUntil :: ByteString -> Parser ByteString +takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome +  where +    end = BS.unpack end_ + +    p :: (Bool, String) -> Char -> Maybe (Bool, String) +    p acc c = case acc of +      (True, _) -> Just (False, end) +      (_, []) -> Nothing +      (_, x:xs) | x == c -> Just (False, xs) +      _ -> Just (c == '\\', end) + +    dropEnd = BS.reverse . BS.drop (length end) . BS.reverse +    requireEnd = mfilter (BS.isSuffixOf end_) + +    gotSome xs +      | BS.null xs = fail "didn't get any content" +      | otherwise = return xs diff --git a/haddock-library/src/Documentation/Haddock/Types.hs b/haddock-library/src/Documentation/Haddock/Types.hs new file mode 100644 index 00000000..3e25d230 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Types.hs @@ -0,0 +1,70 @@ +{-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable #-} +{-# LANGUAGE DeriveTraversable, StandaloneDeriving #-} + +-- | +-- Module      :  Documentation.Haddock.Types +-- Copyright   :  (c) Simon Marlow      2003-2006, +--                    David Waern       2006-2009, +--                    Mateusz Kowalczyk 2013 +-- License     :  BSD-like +-- +-- Maintainer  :  haddock@projects.haskellorg +-- Stability   :  experimental +-- Portability :  portable +-- +-- Exposes documentation data types used for (some) of Haddock. +module Documentation.Haddock.Types where + +import Data.Foldable +import Data.Traversable + +instance Foldable Header where +  foldMap f (Header _ a) = f a + +instance Traversable Header where +  traverse f (Header l a) = Header l `fmap` f a + +data Hyperlink = Hyperlink +  { hyperlinkUrl   :: String +  , hyperlinkLabel :: Maybe String +  } deriving (Eq, Show) + + +data Picture = Picture +  { pictureUri   :: String +  , pictureTitle :: Maybe String +  } deriving (Eq, Show) + +data Header id = Header +  { headerLevel :: Int +  , headerTitle :: id +  } deriving Functor + +data Example = Example +  { exampleExpression :: String +  , exampleResult     :: [String] +  } deriving (Eq, Show) + +data DocH mod id +  = DocEmpty +  | DocAppend (DocH mod id) (DocH mod id) +  | DocString String +  | DocParagraph (DocH mod id) +  | DocIdentifier id +  | DocIdentifierUnchecked mod +  | DocModule String +  | DocWarning (DocH mod id) +  | DocEmphasis (DocH mod id) +  | DocMonospaced (DocH mod id) +  | DocBold (DocH mod id) +  | DocUnorderedList [DocH mod id] +  | DocOrderedList [DocH mod id] +  | DocDefList [(DocH mod id, DocH mod id)] +  | DocCodeBlock (DocH mod id) +  | DocHyperlink Hyperlink +  | DocPic Picture +  | DocAName String +  | DocProperty String +  | DocExamples [Example] +  | DocHeader (Header (DocH mod id)) +  deriving (Functor, Foldable, Traversable) diff --git a/haddock-library/src/Documentation/Haddock/Utf8.hs b/haddock-library/src/Documentation/Haddock/Utf8.hs new file mode 100644 index 00000000..3f75e53b --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Utf8.hs @@ -0,0 +1,74 @@ +module Documentation.Haddock.Utf8 (encodeUtf8, decodeUtf8) where +import           Data.Bits ((.|.), (.&.), shiftL, shiftR) +import qualified Data.ByteString as BS +import           Data.Char (chr, ord) +import           Data.Word (Word8) + +-- | Helper that encodes and packs a 'String' into a 'BS.ByteString' +encodeUtf8 :: String -> BS.ByteString +encodeUtf8 = BS.pack . encode + +-- | Helper that unpacks and decodes a 'BS.ByteString' into a 'String' +decodeUtf8 :: BS.ByteString -> String +decodeUtf8 = decode . BS.unpack + +-- Copy/pasted functions from Codec.Binary.UTF8.String for encoding/decoding +-- | Character to use when 'encode' or 'decode' fail for a byte. +replacementCharacter :: Char +replacementCharacter = '\xfffd' + +-- | Encode a Haskell String to a list of Word8 values, in UTF8 format. +encode :: String -> [Word8] +encode = concatMap (map fromIntegral . go . ord) + where +  go oc +   | oc <= 0x7f       = [oc] + +   | oc <= 0x7ff      = [ 0xc0 + (oc `shiftR` 6) +                        , 0x80 + oc .&. 0x3f +                        ] + +   | oc <= 0xffff     = [ 0xe0 + (oc `shiftR` 12) +                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f) +                        , 0x80 + oc .&. 0x3f +                        ] +   | otherwise        = [ 0xf0 + (oc `shiftR` 18) +                        , 0x80 + ((oc `shiftR` 12) .&. 0x3f) +                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f) +                        , 0x80 + oc .&. 0x3f +                        ] + +-- | Decode a UTF8 string packed into a list of Word8 values, directly to String +decode :: [Word8] -> String +decode [    ] = "" +decode (c:cs) +  | c < 0x80  = chr (fromEnum c) : decode cs +  | c < 0xc0  = replacementCharacter : decode cs +  | c < 0xe0  = multi1 +  | c < 0xf0  = multi_byte 2 0xf  0x800 +  | c < 0xf8  = multi_byte 3 0x7  0x10000 +  | c < 0xfc  = multi_byte 4 0x3  0x200000 +  | c < 0xfe  = multi_byte 5 0x1  0x4000000 +  | otherwise = replacementCharacter : decode cs +  where +    multi1 = case cs of +      c1 : ds | c1 .&. 0xc0 == 0x80 -> +        let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|.  fromEnum (c1 .&. 0x3f) +        in if d >= 0x000080 then toEnum d : decode ds +                            else replacementCharacter : decode ds +      _ -> replacementCharacter : decode cs + +    multi_byte :: Int -> Word8 -> Int -> String +    multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask)) +      where +        aux 0 rs acc +          | overlong <= acc && acc <= 0x10ffff && +            (acc < 0xd800 || 0xdfff < acc)     && +            (acc < 0xfffe || 0xffff < acc)      = chr acc : decode rs +          | otherwise = replacementCharacter : decode rs + +        aux n (r:rs) acc +          | r .&. 0xc0 == 0x80 = aux (n-1) rs +                               $ shiftL acc 6 .|. fromEnum (r .&. 0x3f) + +        aux _ rs     _ = replacementCharacter : decode rs diff --git a/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs b/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs new file mode 100644 index 00000000..265a4d49 --- /dev/null +++ b/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs @@ -0,0 +1,22 @@ +{-# LANGUAGE OverloadedStrings #-} +module Documentation.Haddock.Parser.UtilSpec (main, spec) where + +import Data.Attoparsec.ByteString.Char8 +import Data.Either +import Documentation.Haddock.Parser.Util +import Test.Hspec + +main :: IO () +main = hspec spec + +spec :: Spec +spec = do +  describe "takeUntil" $ do +    it "takes everything until a specified byte sequence" $ do +      parseOnly (takeUntil "end") "someend" `shouldBe` Right "some" + +    it "requires the end sequence" $ do +      parseOnly (takeUntil "end") "someen" `shouldSatisfy` isLeft + +    it "takes escaped bytes unconditionally" $ do +      parseOnly (takeUntil "end") "some\\endend" `shouldBe` Right "some\\end" diff --git a/haddock-library/test/Documentation/Haddock/ParserSpec.hs b/haddock-library/test/Documentation/Haddock/ParserSpec.hs new file mode 100644 index 00000000..3889d555 --- /dev/null +++ b/haddock-library/test/Documentation/Haddock/ParserSpec.hs @@ -0,0 +1,816 @@ +{-# LANGUAGE OverloadedStrings, StandaloneDeriving +             , FlexibleInstances, UndecidableInstances +             , IncoherentInstances #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Documentation.Haddock.ParserSpec (main, spec) where + +import           Data.Monoid +import           Data.String +import qualified Documentation.Haddock.Parser as Parse +import           Documentation.Haddock.Types +import           Test.Hspec +import           Test.QuickCheck + +type Doc id = DocH () id + +deriving instance Show a => Show (Header a) +deriving instance Show a => Show (Doc a) +deriving instance Eq a => Eq (Header a) +deriving instance Eq a => Eq (Doc a) + +instance IsString (Doc String) where +  fromString = DocString + +instance IsString a => IsString (Maybe a) where +  fromString = Just . fromString + +parseParas :: String -> Doc String +parseParas = Parse.toRegular . Parse.parseParas + +parseString :: String -> Doc String +parseString = Parse.toRegular . Parse.parseString + +main :: IO () +main = hspec spec + +spec :: Spec +spec = do +  describe "parseString" $ do +    let infix 1 `shouldParseTo` +        shouldParseTo :: String -> Doc String -> Expectation +        shouldParseTo input ast = parseString input `shouldBe` ast + +    it "is total" $ do +      property $ \xs -> +        (length . show . parseString) xs `shouldSatisfy` (> 0) + +    context "when parsing text" $ do +      it "can handle unicode" $ do +        "灼眼のシャナ" `shouldParseTo` "灼眼のシャナ" + +      it "accepts numeric character references" $ do +        "foo bar baz λ" `shouldParseTo` "foo bar baz λ" + +      it "accepts hexadecimal character references" $ do +        "e" `shouldParseTo` "e" + +      it "allows to backslash-escape characters" $ do +        property $ \x -> ['\\', x] `shouldParseTo` DocString [x] + +      context "when parsing strings contaning numeric character references" $ do +        it "will implicitly convert digits to characters" $ do +          "AAAA" `shouldParseTo` "AAAA" + +          "灼眼のシャナ" +            `shouldParseTo` "灼眼のシャナ" + +        it "will implicitly convert hex encoded characters" $ do +          "eeee" `shouldParseTo` "eeee" + +    context "when parsing identifiers" $ do +      it "parses identifiers enclosed within single ticks" $ do +        "'foo'" `shouldParseTo` DocIdentifier "foo" + +      it "parses identifiers enclosed within backticks" $ do +        "`foo`" `shouldParseTo` DocIdentifier "foo" + +      it "parses a word with an one of the delimiters in it as DocString" $ do +          "don't" `shouldParseTo` "don't" + +      it "doesn't pass pairs of delimiters with spaces between them" $ do +        "hel'lo w'orld" `shouldParseTo` "hel'lo w'orld" + +      it "don't use apostrophe's in the wrong place's" $ do +        " don't use apostrophe's in the wrong place's" `shouldParseTo` +          "don't use apostrophe's in the wrong place's" + +    context "when parsing URLs" $ do +      let hyperlink :: String -> Maybe String -> Doc String +          hyperlink url = DocHyperlink . Hyperlink url + +      it "parses a URL" $ do +        "<http://example.com/>" `shouldParseTo` hyperlink "http://example.com/" Nothing + +      it "accepts an optional label" $ do +        "<http://example.com/ some link>" `shouldParseTo` hyperlink "http://example.com/" "some link" + +      it "does not accept newlines in label" $ do +        "<foo bar\nbaz>" `shouldParseTo` "<foo bar\nbaz>" + +      -- new behaviour test, this will be now consistent with other markup +      it "allows us to escape > inside the URL" $ do +        "<http://examp\\>le.com>" `shouldParseTo` +          hyperlink "http://examp>le.com" Nothing + +        "<http://exa\\>mp\\>le.com>" `shouldParseTo` +          hyperlink "http://exa>mp>le.com" Nothing + +        -- Likewise in label +        "<http://example.com f\\>oo>" `shouldParseTo` +          hyperlink "http://example.com" "f>oo" + +      it "parses inline URLs" $ do +        "foo <http://example.com/> bar" `shouldParseTo` +          "foo " <> hyperlink "http://example.com/" Nothing <> " bar" + +      it "doesn't allow for multi-line link tags" $ do +        "<ba\nz aar>" `shouldParseTo` "<ba\nz aar>" + +      context "when autolinking URLs" $ do +        it "autolinks HTTP URLs" $ do +          "http://example.com/" `shouldParseTo` hyperlink "http://example.com/" Nothing + +        it "autolinks HTTPS URLs" $ do +          "https://www.example.com/" `shouldParseTo` hyperlink "https://www.example.com/" Nothing + +        it "autolinks FTP URLs" $ do +          "ftp://example.com/" `shouldParseTo` hyperlink "ftp://example.com/" Nothing + +        it "does not include a trailing comma" $ do +          "http://example.com/, Some other sentence." `shouldParseTo` +            hyperlink "http://example.com/" Nothing <> ", Some other sentence." + +        it "does not include a trailing dot" $ do +          "http://example.com/. Some other sentence." `shouldParseTo` +            hyperlink "http://example.com/" Nothing <> ". Some other sentence." + +        it "does not include a trailing exclamation mark" $ do +          "http://example.com/! Some other sentence." `shouldParseTo` +            hyperlink "http://example.com/" Nothing <> "! Some other sentence." + +        it "does not include a trailing question mark" $ do +          "http://example.com/? Some other sentence." `shouldParseTo` +            hyperlink "http://example.com/" Nothing <> "? Some other sentence." + +    context "when parsing pictures" $ do +      let picture :: String -> Maybe String -> Doc String +          picture uri = DocPic . Picture uri + +      it "parses a simple picture" $ do +        "<<baz>>" `shouldParseTo` picture "baz" Nothing + +      it "parses a picture with a title" $ do +        "<<b a z>>" `shouldParseTo` picture "b" (Just "a z") + +      it "parses a picture with unicode" $ do +        "<<灼眼のシャナ>>" `shouldParseTo` picture "灼眼のシャナ" Nothing + +      it "allows for escaping of the closing tags" $ do +        "<<ba\\>>z>>" `shouldParseTo` picture "ba>>z" Nothing + +      it "doesn't allow for multi-line picture tags" $ do +        "<<ba\nz aar>>" `shouldParseTo` "<<ba\nz aar>>" + +    context "when parsing anchors" $ do +      it "parses a single word anchor" $ do +        "#foo#" `shouldParseTo` DocAName "foo" + +      it "parses a multi word anchor" $ do +        "#foo bar#" `shouldParseTo` DocAName "foo bar" + +      it "parses a unicode anchor" $ do +        "#灼眼のシャナ#" `shouldParseTo` DocAName "灼眼のシャナ" + +      it "does not accept newlines in anchors" $ do +        "#foo\nbar#" `shouldParseTo` "#foo\nbar#" + +    context "when parsing emphasised text" $ do +      it "emphasises a word on its own" $ do +        "/foo/" `shouldParseTo` DocEmphasis "foo" + +      it "emphasises inline correctly" $ do +        "foo /bar/ baz" `shouldParseTo` "foo " <> DocEmphasis "bar" <> " baz" + +      it "emphasises unicode" $ do +        "/灼眼のシャナ/" `shouldParseTo` DocEmphasis "灼眼のシャナ" + +      it "does not emphasise multi-line strings" $ do +        " /foo\nbar/" `shouldParseTo` "/foo\nbar/" + +      it "does not emphasise the empty string" $ do +        "//" `shouldParseTo` "//" + +      it "parses escaped slashes literally" $ do +        "/foo\\/bar/" `shouldParseTo` DocEmphasis "foo/bar" + +      it "recognizes other markup constructs within emphasised text" $ do +        "/foo @bar@ baz/" `shouldParseTo` +          DocEmphasis ("foo " <> DocMonospaced "bar" <> " baz") + +      it "allows other markup inside of emphasis" $ do +        "/__inner bold__/" `shouldParseTo` DocEmphasis (DocBold "inner bold") + +      it "doesn't mangle inner markup unicode" $ do +        "/__灼眼のシャナ A__/" `shouldParseTo` DocEmphasis (DocBold "灼眼のシャナ A") + +      it "properly converts HTML escape sequences" $ do +        "/AAAA/" `shouldParseTo` DocEmphasis "AAAA" + +      it "allows to escape the emphasis delimiter inside of emphasis" $ do +        "/empha\\/sis/" `shouldParseTo` DocEmphasis "empha/sis" + +    context "when parsing monospaced text" $ do +      it "parses simple monospaced text" $ do +        "@foo@" `shouldParseTo` DocMonospaced "foo" + +      it "parses inline monospaced text" $ do +        "foo @bar@ baz" `shouldParseTo` "foo " <> DocMonospaced "bar" <> " baz" + +      it "allows to escape @" $ do +        "@foo \\@ bar@" `shouldParseTo` DocMonospaced "foo @ bar" + +      it "accepts unicode" $ do +        "@foo 灼眼のシャナ bar@" `shouldParseTo` DocMonospaced "foo 灼眼のシャナ bar" + +      it "accepts other markup in monospaced text" $ do +        "@/foo/@" `shouldParseTo` DocMonospaced (DocEmphasis "foo") + +      it "requires the closing @" $ do +        "@foo /bar/ baz" `shouldParseTo` "@foo " <> DocEmphasis "bar" <> " baz" + +    context "when parsing bold strings" $ do +      it "allows for a bold string on its own" $ do +        "__bold string__" `shouldParseTo` +          DocBold "bold string" + +      it "bolds inline correctly" $ do +        "hello __everyone__ there" `shouldParseTo` +          "hello " +           <> DocBold "everyone" <> " there" + +      it "bolds unicode" $ do +        "__灼眼のシャナ__" `shouldParseTo` +          DocBold "灼眼のシャナ" + +      it "does not do __multi-line\\n bold__" $ do +        " __multi-line\n bold__" `shouldParseTo` "__multi-line\n bold__" + +      it "allows other markup inside of bold" $ do +        "__/inner emphasis/__" `shouldParseTo` +          (DocBold $ DocEmphasis "inner emphasis") + +      it "doesn't mangle inner markup unicode" $ do +        "__/灼眼のシャナ A/__" `shouldParseTo` +          (DocBold $ DocEmphasis "灼眼のシャナ A") + +      it "properly converts HTML escape sequences" $ do +        "__AAAA__" `shouldParseTo` +          DocBold "AAAA" + +      it "allows to escape the bold delimiter inside of bold" $ do +        "__bo\\__ld__" `shouldParseTo` +          DocBold "bo__ld" + +      it "doesn't allow for empty bold" $ do +        "____" `shouldParseTo` "____" + +    context "when parsing module strings" $ do +      it "should parse a module on its own" $ do +        "\"Module\"" `shouldParseTo` +          DocModule "Module" + +      it "should parse a module inline" $ do +        "This is a \"Module\"." `shouldParseTo` +          "This is a " <> DocModule "Module" <> "." + +      it "can accept a simple module name" $ do +        "\"Hello\"" `shouldParseTo` DocModule "Hello" + +      it "can accept a module name with dots" $ do +        "\"Hello.World\"" `shouldParseTo` DocModule "Hello.World" + +      it "can accept a module name with unicode" $ do +        "\"Hello.Worldλ\"" `shouldParseTo` DocModule "Hello.Worldλ" + +      it "parses a module name with a trailing dot as regular quoted string" $ do +        "\"Hello.\"" `shouldParseTo` "\"Hello.\"" + +      it "parses a module name with a space as regular quoted string" $ do +        "\"Hello World\"" `shouldParseTo` "\"Hello World\"" + +      it "parses a module name with invalid characters as regular quoted string" $ do +        "\"Hello&[{}(=*)+]!\"" `shouldParseTo` "\"Hello&[{}(=*)+]!\"" + +      it "accepts a module name with unicode" $ do +        "\"Foo.Barλ\"" `shouldParseTo` DocModule "Foo.Barλ" + +      it "treats empty module name as regular double quotes" $ do +        "\"\"" `shouldParseTo` "\"\"" + +  describe "parseParas" $ do +    let infix 1 `shouldParseTo` +        shouldParseTo :: String -> Doc String -> Expectation +        shouldParseTo input ast = parseParas input `shouldBe` ast + +    it "is total" $ do +      property $ \xs -> +        (length . show . parseParas) xs `shouldSatisfy` (> 0) + +    context "when parsing text paragraphs" $ do +      let filterSpecial = filter (`notElem` (".(=#-[*`\v\f\n\t\r\\\"'_/@<> " :: String)) + +      it "parses an empty paragraph" $ do +        "" `shouldParseTo` DocEmpty + +      it "parses a simple text paragraph" $ do +        "foo bar baz" `shouldParseTo` DocParagraph "foo bar baz" + +      it "accepts markup in text paragraphs" $ do +        "foo /bar/ baz" `shouldParseTo` DocParagraph ("foo " <> DocEmphasis "bar" <> " baz") + +      it "preserve all regular characters" $ do +        property $ \xs -> let input = filterSpecial xs in (not . null) input ==> +          input `shouldParseTo` DocParagraph (DocString input) + +      it "separates paragraphs by empty lines" $ do +        unlines [ +            "foo" +          , " \t " +          , "bar" +          ] `shouldParseTo` DocParagraph "foo" <> DocParagraph "bar" + +      context "when a pragraph only contains monospaced text" $ do +        it "turns it into a code block" $ do +          "@foo@" `shouldParseTo` DocCodeBlock "foo" + +    context "when parsing birdtracks" $ do +      it "parses them as a code block" $ do +        unlines [ +            ">foo" +          , ">bar" +          , ">baz" +          ] `shouldParseTo` DocCodeBlock "foo\nbar\nbaz" + +      it "ignores leading whitespace" $ do +        unlines [ +            " >foo" +          , " \t >bar" +          , " >baz" +          ] +        `shouldParseTo` DocCodeBlock "foo\nbar\nbaz" + +      it "strips one leading space from each line of the block" $ do +        unlines [ +            "> foo" +          , ">  bar" +          , "> baz" +          ] `shouldParseTo` DocCodeBlock "foo\n bar\nbaz" + +      it "ignores empty lines when stripping spaces" $ do +        unlines [ +            "> foo" +          , ">" +          , "> bar" +          ] `shouldParseTo` DocCodeBlock "foo\n\nbar" + +      context "when any non-empty line does not start with a space" $ do +        it "does not strip any spaces" $ do +          unlines [ +              ">foo" +            , ">  bar" +            ] `shouldParseTo` DocCodeBlock "foo\n  bar" + +      it "ignores nested markup" $ do +        unlines [ +            ">/foo/" +          ] `shouldParseTo` DocCodeBlock "/foo/" + +      it "treats them as regular text inside text paragraphs" $ do +        unlines [ +            "foo" +          , ">bar" +          ] `shouldParseTo` DocParagraph "foo\n>bar" + +    context "when parsing code blocks" $ do +      it "accepts a simple code block" $ do +        unlines [ +            "@" +          , "foo" +          , "bar" +          , "baz" +          , "@" +          ] `shouldParseTo` DocCodeBlock "foo\nbar\nbaz\n" + +      it "ignores trailing whitespace after the opening @" $ do +        unlines [ +            "@   " +          , "foo" +          , "@" +          ] `shouldParseTo` DocCodeBlock "foo\n" + +      it "rejects code blocks that are not closed" $ do +        unlines [ +            "@" +          , "foo" +          ] `shouldParseTo` DocParagraph "@\nfoo" + +      it "accepts nested markup" $ do +        unlines [ +            "@" +          , "/foo/" +          , "@" +          ] `shouldParseTo` DocCodeBlock (DocEmphasis "foo" <> "\n") + +      it "allows to escape the @" $ do +        unlines [ +            "@" +          , "foo" +          , "\\@" +          , "bar" +          , "@" +          ] `shouldParseTo` DocCodeBlock "foo\n@\nbar\n" + +      it "accepts horizontal space before the @" $ do +        unlines [ "     @" +                , "foo" +                , "" +                , "bar" +                , "@" +                ] `shouldParseTo` DocCodeBlock "foo\n\nbar\n" + +      it "strips a leading space from a @ block if present" $ do +        unlines [ " @" +                , " hello" +                , " world" +                , " @" +                ] `shouldParseTo` DocCodeBlock "hello\nworld\n" + +        unlines [ " @" +                , " hello" +                , "" +                , " world" +                , " @" +                ] `shouldParseTo` DocCodeBlock "hello\n\nworld\n" + +      it "only drops whitespace if there's some before closing @" $ do +        unlines [ "@" +                , "    Formatting" +                , "        matters." +                , "@" +                ] +          `shouldParseTo` DocCodeBlock "    Formatting\n        matters.\n" + +      it "accepts unicode" $ do +        "@foo 灼眼のシャナ bar@" `shouldParseTo` DocCodeBlock "foo 灼眼のシャナ bar" + +      it "requires the closing @" $ do +        "@foo /bar/ baz" +          `shouldParseTo` DocParagraph ("@foo " <> DocEmphasis "bar" <> " baz") + + +    context "when parsing examples" $ do +      it "parses a simple example" $ do +        ">>> foo" `shouldParseTo` DocExamples [Example "foo" []] + +      it "parses an example with result" $ do +        unlines [ +            ">>> foo" +          , "bar" +          , "baz" +          ] `shouldParseTo` DocExamples [Example "foo" ["bar", "baz"]] + +      it "parses consecutive examples" $ do +        unlines [ +            ">>> fib 5" +          , "5" +          , ">>> fib 10" +          , "55" +          ] `shouldParseTo` DocExamples [ +            Example "fib 5" ["5"] +          , Example "fib 10" ["55"] +          ] + +      it ("requires an example to be separated" +          ++ " from a previous paragraph by an empty line") $ do +        "foobar\n\n>>> fib 10\n55" `shouldParseTo` +          DocParagraph "foobar" +                <> DocExamples [Example "fib 10" ["55"]] + +      it "parses bird-tracks inside of paragraphs as plain strings" $ do +        let xs = "foo\n>>> bar" +        xs `shouldParseTo` DocParagraph (DocString xs) + +      it "skips empty lines in front of an example" $ do +        "\n   \n\n>>> foo" `shouldParseTo` DocExamples [Example "foo" []] + +      it "terminates example on empty line" $ do +        unlines [ +            ">>> foo" +          , "bar" +          , "    " +          , "baz" +          ] +        `shouldParseTo` +          DocExamples [Example "foo" ["bar"]] <> DocParagraph "baz" + +      it "parses a <BLANKLINE> result as an empty result" $ do +        unlines [ +            ">>> foo" +          , "bar" +          , "<BLANKLINE>" +          , "baz" +          ] +        `shouldParseTo` DocExamples [Example "foo" ["bar", "", "baz"]] + +      it "accepts unicode in examples" $ do +        ">>> 灼眼\nシャナ" `shouldParseTo` DocExamples [Example "灼眼" ["シャナ"]] + +      context "when prompt is prefixed by whitespace" $ do +        it "strips the exact same amount of whitespace from result lines" $ do +          unlines [ +              "   >>> foo" +            , "   bar" +            , "   baz" +            ] `shouldParseTo` DocExamples [Example "foo" ["bar", "baz"]] + +        it "preserves additional whitespace" $ do +          unlines [ +              "   >>> foo" +            , "    bar" +            ] `shouldParseTo` DocExamples [Example "foo" [" bar"]] + +        it "keeps original if stripping is not possible" $ do +          unlines [ +              "   >>> foo" +            , " bar" +            ] `shouldParseTo` DocExamples [Example "foo" [" bar"]] + + +    context "when parsing paragraphs nested in lists" $ do +      it "can nest the same type of list" $ do +        "* foo\n\n    * bar" `shouldParseTo` +          DocUnorderedList [ DocParagraph $ "foo" +                             <> DocUnorderedList [DocParagraph "bar"]] + +      it "can nest another type of list inside" $ do +        "* foo\n\n    1. bar" `shouldParseTo` +          DocUnorderedList [ DocParagraph $ "foo" +                             <> DocOrderedList [DocParagraph "bar"]] + +      it "can nest a code block inside" $ do +        "* foo\n\n    @foo bar baz@" `shouldParseTo` +          DocUnorderedList [ DocParagraph $ "foo" +                             <> DocCodeBlock "foo bar baz"] + +        "* foo\n\n    @\n    foo bar baz\n    @" `shouldParseTo` +          DocUnorderedList [ DocParagraph $ "foo" +                             <> DocCodeBlock "foo bar baz\n"] + +      it "can nest more than one level" $ do +        "* foo\n\n    * bar\n\n        * baz\n        qux" `shouldParseTo` +          DocUnorderedList [ DocParagraph $ "foo" +                             <> DocUnorderedList [ DocParagraph $ "bar" +                                                   <> DocUnorderedList [DocParagraph "baz\nqux"] +                                                 ] +                           ] + +      it "won't fail on not fully indented paragraph" $ do +        "* foo\n\n    * bar\n\n        * qux\nquux" `shouldParseTo` +          DocUnorderedList [ DocParagraph $ "foo" +                             <> DocUnorderedList [ DocParagraph "bar" ] +                           , DocParagraph "qux\nquux"] + + +      it "can nest definition lists" $ do +        "[a] foo\n\n    [b] bar\n\n        [c] baz\n        qux" `shouldParseTo` +          DocDefList [ ("a", "foo" +                             <> DocDefList [ ("b", "bar" +                                                   <> DocDefList [("c", "baz\nqux")]) +                                           ]) +                     ] + +      it "can come back to top level with a different list" $ do +        "* foo\n\n    * bar\n\n1. baz" `shouldParseTo` +          DocUnorderedList [ DocParagraph $ "foo" +                             <> DocUnorderedList [ DocParagraph "bar" ] +                           ] +          <> DocOrderedList [ DocParagraph "baz" ] + +      it "definition lists can come back to top level with a different list" $ do +        "[foo] foov\n\n    [bar] barv\n\n1. baz" `shouldParseTo` +          DocDefList [ ("foo", "foov" +                               <> DocDefList [ ("bar", "barv") ]) +                     ] +          <> DocOrderedList [ DocParagraph "baz" ] + +    context "when parsing properties" $ do +      it "can parse a single property" $ do +        "prop> 23 == 23" `shouldParseTo` DocProperty "23 == 23" + +      it "can parse multiple subsequent properties" $ do +        unlines [ +              "prop> 23 == 23" +            , "prop> 42 == 42" +            ] +        `shouldParseTo` +          DocProperty "23 == 23" <> DocProperty "42 == 42" + +      it "accepts unicode in properties" $ do +        "prop> 灼眼のシャナ ≡ 愛" `shouldParseTo` +          DocProperty "灼眼のシャナ ≡ 愛" + +      it "can deal with whitespace before and after the prop> prompt" $ do +        "  prop>     xs == (reverse $ reverse xs)  " `shouldParseTo` +          DocProperty "xs == (reverse $ reverse xs)" + +    context "when parsing unordered lists" $ do +      it "parses a simple list" $ do +        unlines [ +            " * one" +          , " * two" +          , " * three" +          ] +        `shouldParseTo` DocUnorderedList [ +            DocParagraph "one" +          , DocParagraph "two" +          , DocParagraph "three" +          ] + +      it "ignores empty lines between list items" $ do +        unlines [ +            "* one" +          , "" +          , "* two" +          ] +        `shouldParseTo` DocUnorderedList [ +            DocParagraph "one" +          , DocParagraph "two" +          ] + +      it "accepts an empty list item" $ do +        "*" `shouldParseTo` DocUnorderedList [DocParagraph DocEmpty] + +      it "accepts multi-line list items" $ do +        unlines [ +            "* point one" +          , "  more one" +          , "* point two" +          , "more two" +          ] +        `shouldParseTo` DocUnorderedList [ +            DocParagraph "point one\n  more one" +          , DocParagraph "point two\nmore two" +          ] + +      it "accepts markup in list items" $ do +        "* /foo/" `shouldParseTo` DocUnorderedList [DocParagraph (DocEmphasis "foo")] + +      it "requires empty lines between list and other paragraphs" $ do +        unlines [ +            "foo" +          , "" +          , "* bar" +          , "" +          , "baz" +          ] +        `shouldParseTo` DocParagraph "foo" <> DocUnorderedList [DocParagraph "bar"] <> DocParagraph "baz" + +    context "when parsing ordered lists" $ do +      it "parses a simple list" $ do +        unlines [ +            " 1. one" +          , " (1) two" +          , " 3. three" +          ] +        `shouldParseTo` DocOrderedList [ +            DocParagraph "one" +          , DocParagraph "two" +          , DocParagraph "three" +          ] + +      it "ignores empty lines between list items" $ do +        unlines [ +            "1. one" +          , "" +          , "2. two" +          ] +        `shouldParseTo` DocOrderedList [ +            DocParagraph "one" +          , DocParagraph "two" +          ] + +      it "accepts an empty list item" $ do +        "1." `shouldParseTo` DocOrderedList [DocParagraph DocEmpty] + +      it "accepts multi-line list items" $ do +        unlines [ +            "1. point one" +          , "  more one" +          , "1. point two" +          , "more two" +          ] +        `shouldParseTo` DocOrderedList [ +            DocParagraph "point one\n  more one" +          , DocParagraph "point two\nmore two" +          ] + +      it "accepts markup in list items" $ do +        "1. /foo/" `shouldParseTo` DocOrderedList [DocParagraph (DocEmphasis "foo")] + +      it "requires empty lines between list and other paragraphs" $ do +        unlines [ +            "foo" +          , "" +          , "1. bar" +          , "" +          , "baz" +          ] +        `shouldParseTo` DocParagraph "foo" <> DocOrderedList [DocParagraph "bar"] <> DocParagraph "baz" + +    context "when parsing definition lists" $ do +      it "parses a simple list" $ do +        unlines [ +            " [foo] one" +          , " [bar] two" +          , " [baz] three" +          ] +        `shouldParseTo` DocDefList [ +            ("foo", "one") +          , ("bar", "two") +          , ("baz", "three") +          ] + +      it "ignores empty lines between list items" $ do +        unlines [ +            "[foo] one" +          , "" +          , "[bar] two" +          ] +        `shouldParseTo` DocDefList [ +            ("foo", "one") +          , ("bar", "two") +          ] + +      it "accepts an empty list item" $ do +        "[foo]" `shouldParseTo` DocDefList [("foo", DocEmpty)] + +      it "accepts multi-line list items" $ do +        unlines [ +            "[foo] point one" +          , "  more one" +          , "[bar] point two" +          , "more two" +          ] +        `shouldParseTo` DocDefList [ +            ("foo", "point one\n  more one") +          , ("bar", "point two\nmore two") +          ] + +      it "accepts markup in list items" $ do +        "[foo] /foo/" `shouldParseTo` DocDefList [("foo", DocEmphasis "foo")] + +      it "accepts markup for the label" $ do +        "[/foo/] bar" `shouldParseTo` DocDefList [(DocEmphasis "foo", "bar")] + +      it "requires empty lines between list and other paragraphs" $ do +        unlines [ +            "foo" +          , "" +          , "[foo] bar" +          , "" +          , "baz" +          ] +        `shouldParseTo` DocParagraph "foo" <> DocDefList [("foo", "bar")] <> DocParagraph "baz" + +    context "when parsing consecutive paragraphs" $ do +      it "will not capture irrelevant consecutive lists" $ do +        unlines [ "   * bullet" +                , "" +                , "" +                , "   - different bullet" +                , "" +                , "" +                , "   (1) ordered" +                , " " +                , "   2. different bullet" +                , "   " +                , "   [cat] kitten" +                , "   " +                , "   [pineapple] fruit" +                ] `shouldParseTo` +          DocUnorderedList [ DocParagraph "bullet" +                           , DocParagraph "different bullet"] +          <> DocOrderedList [ DocParagraph "ordered" +                            , DocParagraph "different bullet" +                            ] +          <> DocDefList [ ("cat", "kitten") +                        , ("pineapple", "fruit") +                        ] + +    context "when parsing function documentation headers" $ do +      it "can parse a simple header" $ do +        "= Header 1\nHello." `shouldParseTo` +          DocParagraph (DocHeader (Header 1 "Header 1")) +          <> DocParagraph "Hello." + +      it "allow consecutive headers" $ do +        "= Header 1\n== Header 2" `shouldParseTo` +          DocParagraph (DocHeader (Header 1 "Header 1")) +          <> DocParagraph (DocHeader (Header 2 "Header 2")) + +      it "accepts markup in the header" $ do +        "= /Header/ __1__\nFoo" `shouldParseTo` +          DocParagraph (DocHeader +                        (Header 1 (DocEmphasis "Header" <> " " <> DocBold "1"))) +          <> DocParagraph "Foo" diff --git a/haddock-library/test/Documentation/Haddock/Utf8Spec.hs b/haddock-library/test/Documentation/Haddock/Utf8Spec.hs new file mode 100644 index 00000000..47e12704 --- /dev/null +++ b/haddock-library/test/Documentation/Haddock/Utf8Spec.hs @@ -0,0 +1,14 @@ +module Documentation.Haddock.Utf8Spec (main, spec) where + +import Test.Hspec +import Test.QuickCheck +import Documentation.Haddock.Utf8 + +main :: IO () +main = hspec spec + +spec :: Spec +spec = do +  describe "decodeUtf8" $ do +    it "is inverse to encodeUtf8" $ do +      property $ \xs -> (decodeUtf8 . encodeUtf8) xs `shouldBe` xs diff --git a/haddock-library/test/Spec.hs b/haddock-library/test/Spec.hs new file mode 100644 index 00000000..a824f8c3 --- /dev/null +++ b/haddock-library/test/Spec.hs @@ -0,0 +1 @@ +{-# OPTIONS_GHC -F -pgmF hspec-discover #-} diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs new file mode 100644 index 00000000..41b4ed30 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs @@ -0,0 +1,18 @@ +-- | +-- Module      :  Data.Attoparsec +-- Copyright   :  Bryan O'Sullivan 2007-2011 +-- License     :  BSD3 +-- +-- Maintainer  :  bos@serpentine.com +-- Stability   :  experimental +-- Portability :  unknown +-- +-- Simple, efficient combinator parsing for 'ByteString' strings, +-- loosely based on the Parsec library. + +module Data.Attoparsec +    ( +      module Data.Attoparsec.ByteString +    ) where + +import Data.Attoparsec.ByteString diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs new file mode 100644 index 00000000..d2f3761c --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs @@ -0,0 +1,205 @@ +-- | +-- Module      :  Data.Attoparsec.ByteString +-- Copyright   :  Bryan O'Sullivan 2007-2011 +-- License     :  BSD3 +-- +-- Maintainer  :  bos@serpentine.com +-- Stability   :  experimental +-- Portability :  unknown +-- +-- Simple, efficient combinator parsing for 'B.ByteString' strings, +-- loosely based on the Parsec library. + +module Data.Attoparsec.ByteString +    ( +    -- * Differences from Parsec +    -- $parsec + +    -- * Incremental input +    -- $incremental + +    -- * Performance considerations +    -- $performance + +    -- * Parser types +      I.Parser +    , Result +    , T.IResult(..) +    , I.compareResults + +    -- * Running parsers +    , parse +    , feed +    , I.parseOnly +    , parseWith +    , parseTest + +    -- ** Result conversion +    , maybeResult +    , eitherResult + +    -- * Combinators +    , (I.<?>) +    , I.try +    , module Data.Attoparsec.Combinator + +    -- * Parsing individual bytes +    , I.word8 +    , I.anyWord8 +    , I.notWord8 +    , I.peekWord8 +    , I.satisfy +    , I.satisfyWith +    , I.skip + +    -- ** Byte classes +    , I.inClass +    , I.notInClass + +    -- * Efficient string handling +    , I.string +    , I.skipWhile +    , I.take +    , I.scan +    , I.takeWhile +    , I.takeWhile1 +    , I.takeTill + +    -- ** Consume all remaining input +    , I.takeByteString +    , I.takeLazyByteString + +    -- * State observation and manipulation functions +    , I.endOfInput +    , I.atEnd +    ) where + +import Data.Attoparsec.Combinator +import qualified Data.Attoparsec.ByteString.Internal as I +import qualified Data.Attoparsec.Internal as I +import qualified Data.ByteString as B +import Data.Attoparsec.ByteString.Internal (Result, parse) +import qualified Data.Attoparsec.Internal.Types as T + +-- $parsec +-- +-- Compared to Parsec 3, Attoparsec makes several tradeoffs.  It is +-- not intended for, or ideal for, all possible uses. +-- +-- * While Attoparsec can consume input incrementally, Parsec cannot. +--   Incremental input is a huge deal for efficient and secure network +--   and system programming, since it gives much more control to users +--   of the library over matters such as resource usage and the I/O +--   model to use. +-- +-- * Much of the performance advantage of Attoparsec is gained via +--   high-performance parsers such as 'I.takeWhile' and 'I.string'. +--   If you use complicated combinators that return lists of bytes or +--   characters, there is less performance difference between the two +--   libraries. +-- +-- * Unlike Parsec 3, Attoparsec does not support being used as a +--   monad transformer. +-- +-- * Attoparsec is specialised to deal only with strict 'B.ByteString' +--   input.  Efficiency concerns rule out both lists and lazy +--   bytestrings.  The usual use for lazy bytestrings would be to +--   allow consumption of very large input without a large footprint. +--   For this need, Attoparsec's incremental input provides an +--   excellent substitute, with much more control over when input +--   takes place.  If you must use lazy bytestrings, see the 'Lazy' +--   module, which feeds lazy chunks to a regular parser. +-- +-- * Parsec parsers can produce more helpful error messages than +--   Attoparsec parsers.  This is a matter of focus: Attoparsec avoids +--   the extra book-keeping in favour of higher performance. + +-- $incremental +-- +-- Attoparsec supports incremental input, meaning that you can feed it +-- a bytestring that represents only part of the expected total amount +-- of data to parse. If your parser reaches the end of a fragment of +-- input and could consume more input, it will suspend parsing and +-- return a 'T.Partial' continuation. +-- +-- Supplying the 'T.Partial' continuation with another bytestring will +-- resume parsing at the point where it was suspended. You must be +-- prepared for the result of the resumed parse to be another +-- 'T.Partial' continuation. +-- +-- To indicate that you have no more input, supply the 'T.Partial' +-- continuation with an empty bytestring. +-- +-- Remember that some parsing combinators will not return a result +-- until they reach the end of input.  They may thus cause 'T.Partial' +-- results to be returned. +-- +-- If you do not need support for incremental input, consider using +-- the 'I.parseOnly' function to run your parser.  It will never +-- prompt for more input. + +-- $performance +-- +-- If you write an Attoparsec-based parser carefully, it can be +-- realistic to expect it to perform within a factor of 2 of a +-- hand-rolled C parser (measuring megabytes parsed per second). +-- +-- To actually achieve high performance, there are a few guidelines +-- that it is useful to follow. +-- +-- Use the 'B.ByteString'-oriented parsers whenever possible, +-- e.g. 'I.takeWhile1' instead of 'many1' 'I.anyWord8'.  There is +-- about a factor of 100 difference in performance between the two +-- kinds of parser. +-- +-- For very simple byte-testing predicates, write them by hand instead +-- of using 'I.inClass' or 'I.notInClass'.  For instance, both of +-- these predicates test for an end-of-line byte, but the first is +-- much faster than the second: +-- +-- >endOfLine_fast w = w == 13 || w == 10 +-- >endOfLine_slow   = inClass "\r\n" +-- +-- Make active use of benchmarking and profiling tools to measure, +-- find the problems with, and improve the performance of your parser. + +-- | If a parser has returned a 'T.Partial' result, supply it with more +-- input. +feed :: Result r -> B.ByteString -> Result r +feed f@(T.Fail _ _ _) _ = f +feed (T.Partial k) d    = k d +feed (T.Done bs r) d    = T.Done (B.append bs d) r +{-# INLINE feed #-} + +-- | Run a parser and print its result to standard output. +parseTest :: (Show a) => I.Parser a -> B.ByteString -> IO () +parseTest p s = print (parse p s) + +-- | Run a parser with an initial input string, and a monadic action +-- that can supply more input if needed. +parseWith :: Monad m => +             (m B.ByteString) +          -- ^ An action that will be executed to provide the parser +          -- with more input, if necessary.  The action must return an +          -- 'B.empty' string when there is no more input available. +          -> I.Parser a +          -> B.ByteString +          -- ^ Initial input for the parser. +          -> m (Result a) +parseWith refill p s = step $ parse p s +  where step (T.Partial k) = (step . k) =<< refill +        step r             = return r +{-# INLINE parseWith #-} + +-- | Convert a 'Result' value to a 'Maybe' value. A 'T.Partial' result +-- is treated as failure. +maybeResult :: Result r -> Maybe r +maybeResult (T.Done _ r) = Just r +maybeResult _            = Nothing + +-- | Convert a 'Result' value to an 'Either' value. A 'T.Partial' +-- result is treated as failure. +eitherResult :: Result r -> Either String r +eitherResult (T.Done _ r)     = Right r +eitherResult (T.Fail _ _ msg) = Left msg +eitherResult _                = Left "Result: incomplete input" diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs new file mode 100644 index 00000000..3bbe51f0 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs @@ -0,0 +1,549 @@ +{-# LANGUAGE BangPatterns, FlexibleInstances, TypeFamilies, +    TypeSynonymInstances, GADTs #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +-- | +-- Module      :  Data.Attoparsec.ByteString.Char8 +-- Copyright   :  Bryan O'Sullivan 2007-2011 +-- License     :  BSD3 +-- +-- Maintainer  :  bos@serpentine.com +-- Stability   :  experimental +-- Portability :  unknown +-- +-- Simple, efficient, character-oriented combinator parsing for +-- 'B.ByteString' strings, loosely based on the Parsec library. + +module Data.Attoparsec.ByteString.Char8 +    ( +    -- * Character encodings +    -- $encodings + +    -- * Parser types +      Parser +    , A.Result +    , A.IResult(..) +    , I.compareResults + +    -- * Running parsers +    , A.parse +    , A.feed +    , A.parseOnly +    , A.parseTest +    , A.parseWith + +    -- ** Result conversion +    , A.maybeResult +    , A.eitherResult + +    -- * Combinators +    , (I.<?>) +    , I.try +    , module Data.Attoparsec.Combinator + +    -- * Parsing individual characters +    , char +    , char8 +    , anyChar +    , notChar +    , peekChar +    , satisfy + +    -- ** Special character parsers +    , digit +    , letter_iso8859_15 +    , letter_ascii +    , space + +    -- ** Fast predicates +    , isDigit +    , isDigit_w8 +    , isAlpha_iso8859_15 +    , isAlpha_ascii +    , isSpace +    , isSpace_w8 + +    -- *** Character classes +    , inClass +    , notInClass + +    -- * Efficient string handling +    , I.string +    , stringCI +    , skipSpace +    , skipWhile +    , I.take +    , scan +    , takeWhile +    , takeWhile1 +    , takeTill + +    -- ** String combinators +    -- $specalt +    , (.*>) +    , (<*.) + +    -- ** Consume all remaining input +    , I.takeByteString +    , I.takeLazyByteString + +    -- * Text parsing +    , I.endOfLine +    , isEndOfLine +    , isHorizontalSpace + +    -- * Numeric parsers +    , decimal +    , hexadecimal +    , signed +    , double +    , Number(..) +    , number +    , rational + +    -- * State observation and manipulation functions +    , I.endOfInput +    , I.atEnd +    ) where + +import Control.Applicative ((*>), (<*), (<$>), (<|>)) +import Data.Attoparsec.ByteString.FastSet (charClass, memberChar) +import Data.Attoparsec.ByteString.Internal (Parser, (<?>)) +import Data.Attoparsec.Combinator +import Data.Attoparsec.Number (Number(..)) +import Data.Bits (Bits, (.|.), shiftL) +import Data.ByteString.Internal (c2w, w2c) +import Data.Int (Int8, Int16, Int32, Int64) +import Data.Ratio ((%)) +import Data.String (IsString(..)) +import Data.Word (Word8, Word16, Word32, Word64, Word) +import Prelude hiding (takeWhile) +import qualified Data.Attoparsec.ByteString as A +import qualified Data.Attoparsec.ByteString.Internal as I +import qualified Data.Attoparsec.Internal as I +import qualified Data.ByteString as B8 +import qualified Data.ByteString.Char8 as B + +instance (a ~ B.ByteString) => IsString (Parser a) where +    fromString = I.string . B.pack + +-- $encodings +-- +-- This module is intended for parsing text that is +-- represented using an 8-bit character set, e.g. ASCII or +-- ISO-8859-15.  It /does not/ make any attempt to deal with character +-- encodings, multibyte characters, or wide characters.  In +-- particular, all attempts to use characters above code point U+00FF +-- will give wrong answers. +-- +-- Code points below U+0100 are simply translated to and from their +-- numeric values, so e.g. the code point U+00A4 becomes the byte +-- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic +-- currency sign in ISO-8859-1).  Haskell 'Char' values above U+00FF +-- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@. + +-- ASCII-specific but fast, oh yes. +toLower :: Word8 -> Word8 +toLower w | w >= 65 && w <= 90 = w + 32 +          | otherwise          = w + +-- | Satisfy a literal string, ignoring case. +stringCI :: B.ByteString -> Parser B.ByteString +stringCI = I.stringTransform (B8.map toLower) +{-# INLINE stringCI #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser requires the predicate to succeed on at least one byte +-- of input: it will fail if the predicate never returns 'True' or if +-- there is no input left. +takeWhile1 :: (Char -> Bool) -> Parser B.ByteString +takeWhile1 p = I.takeWhile1 (p . w2c) +{-# INLINE takeWhile1 #-} + +-- | The parser @satisfy p@ succeeds for any byte for which the +-- predicate @p@ returns 'True'. Returns the byte that is actually +-- parsed. +-- +-- >digit = satisfy isDigit +-- >    where isDigit c = c >= '0' && c <= '9' +satisfy :: (Char -> Bool) -> Parser Char +satisfy = I.satisfyWith w2c +{-# INLINE satisfy #-} + +-- | Match a letter, in the ISO-8859-15 encoding. +letter_iso8859_15 :: Parser Char +letter_iso8859_15 = satisfy isAlpha_iso8859_15 <?> "letter_iso8859_15" +{-# INLINE letter_iso8859_15 #-} + +-- | Match a letter, in the ASCII encoding. +letter_ascii :: Parser Char +letter_ascii = satisfy isAlpha_ascii <?> "letter_ascii" +{-# INLINE letter_ascii #-} + +-- | A fast alphabetic predicate for the ISO-8859-15 encoding +-- +-- /Note/: For all character encodings other than ISO-8859-15, and +-- almost all Unicode code points above U+00A3, this predicate gives +-- /wrong answers/. +isAlpha_iso8859_15 :: Char -> Bool +isAlpha_iso8859_15 c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || +                       (c >= '\166' && moby c) +  where moby = notInClass "\167\169\171-\179\182\183\185\187\191\215\247" +        {-# NOINLINE moby #-} +{-# INLINE isAlpha_iso8859_15 #-} + +-- | A fast alphabetic predicate for the ASCII encoding +-- +-- /Note/: For all character encodings other than ASCII, and +-- almost all Unicode code points above U+007F, this predicate gives +-- /wrong answers/. +isAlpha_ascii :: Char -> Bool +isAlpha_ascii c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') +{-# INLINE isAlpha_ascii #-} + +-- | Parse a single digit. +digit :: Parser Char +digit = satisfy isDigit <?> "digit" +{-# INLINE digit #-} + +-- | A fast digit predicate. +isDigit :: Char -> Bool +isDigit c = c >= '0' && c <= '9' +{-# INLINE isDigit #-} + +-- | A fast digit predicate. +isDigit_w8 :: Word8 -> Bool +isDigit_w8 w = w >= 48 && w <= 57 +{-# INLINE isDigit_w8 #-} + +-- | Match any character. +anyChar :: Parser Char +anyChar = satisfy $ const True +{-# INLINE anyChar #-} + +-- | Match any character. Returns 'Nothing' if end of input has been +-- reached. Does not consume any input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs.  Careless use will thus result in an infinite loop. +peekChar :: Parser (Maybe Char) +peekChar = (fmap w2c) `fmap` I.peekWord8 +{-# INLINE peekChar #-} + +-- | Fast predicate for matching ASCII space characters. +-- +-- /Note/: This predicate only gives correct answers for the ASCII +-- encoding.  For instance, it does not recognise U+00A0 (non-breaking +-- space) as a space character, even though it is a valid ISO-8859-15 +-- byte. For a Unicode-aware and only slightly slower predicate, +-- use 'Data.Char.isSpace' +isSpace :: Char -> Bool +isSpace c = (c == ' ') || ('\t' <= c && c <= '\r') +{-# INLINE isSpace #-} + +-- | Fast 'Word8' predicate for matching ASCII space characters. +isSpace_w8 :: Word8 -> Bool +isSpace_w8 w = (w == 32) || (9 <= w && w <= 13) +{-# INLINE isSpace_w8 #-} + + +-- | Parse a space character. +-- +-- /Note/: This parser only gives correct answers for the ASCII +-- encoding.  For instance, it does not recognise U+00A0 (non-breaking +-- space) as a space character, even though it is a valid ISO-8859-15 +-- byte. +space :: Parser Char +space = satisfy isSpace <?> "space" +{-# INLINE space #-} + +-- | Match a specific character. +char :: Char -> Parser Char +char c = satisfy (== c) <?> [c] +{-# INLINE char #-} + +-- | Match a specific character, but return its 'Word8' value. +char8 :: Char -> Parser Word8 +char8 c = I.satisfy (== c2w c) <?> [c] +{-# INLINE char8 #-} + +-- | Match any character except the given one. +notChar :: Char -> Parser Char +notChar c = satisfy (/= c) <?> "not " ++ [c] +{-# INLINE notChar #-} + +-- | Match any character in a set. +-- +-- >vowel = inClass "aeiou" +-- +-- Range notation is supported. +-- +-- >halfAlphabet = inClass "a-nA-N" +-- +-- To add a literal \'-\' to a set, place it at the beginning or end +-- of the string. +inClass :: String -> Char -> Bool +inClass s = (`memberChar` mySet) +    where mySet = charClass s +{-# INLINE inClass #-} + +-- | Match any character not in a set. +notInClass :: String -> Char -> Bool +notInClass s = not . inClass s +{-# INLINE notInClass #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser does not fail.  It will return an empty string if the +-- predicate returns 'False' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs.  Careless use will thus result in an infinite loop. +takeWhile :: (Char -> Bool) -> Parser B.ByteString +takeWhile p = I.takeWhile (p . w2c) +{-# INLINE takeWhile #-} + +-- | A stateful scanner.  The predicate consumes and transforms a +-- state argument, and each transformed state is passed to successive +-- invocations of the predicate on each byte of the input until one +-- returns 'Nothing' or the input ends. +-- +-- This parser does not fail.  It will return an empty string if the +-- predicate returns 'Nothing' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs.  Careless use will thus result in an infinite loop. +scan :: s -> (s -> Char -> Maybe s) -> Parser B.ByteString +scan s0 p = I.scan s0 (\s -> p s . w2c) +{-# INLINE scan #-} + +-- | Consume input as long as the predicate returns 'False' +-- (i.e. until it returns 'True'), and return the consumed input. +-- +-- This parser does not fail.  It will return an empty string if the +-- predicate returns 'True' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs.  Careless use will thus result in an infinite loop. +takeTill :: (Char -> Bool) -> Parser B.ByteString +takeTill p = I.takeTill (p . w2c) +{-# INLINE takeTill #-} + +-- | Skip past input for as long as the predicate returns 'True'. +skipWhile :: (Char -> Bool) -> Parser () +skipWhile p = I.skipWhile (p . w2c) +{-# INLINE skipWhile #-} + +-- | Skip over white space. +skipSpace :: Parser () +skipSpace = I.skipWhile isSpace_w8 +{-# INLINE skipSpace #-} + +-- $specalt +-- +-- The '.*>' and '<*.' combinators are intended for use with the +-- @OverloadedStrings@ language extension.  They simplify the common +-- task of matching a statically known string, then immediately +-- parsing something else. +-- +-- An example makes this easier to understand: +-- +-- @{-\# LANGUAGE OverloadedStrings #-} +-- +-- shoeSize = \"Shoe size: \" '.*>' 'decimal' +-- @ +-- +-- If we were to try to use '*>' above instead, the type checker would +-- not be able to tell which 'IsString' instance to use for the text +-- in quotes.  We would have to be explicit, using either a type +-- signature or the 'I.string' parser. + +-- | Type-specialized version of '*>' for 'B.ByteString'. +(.*>) :: B.ByteString -> Parser a -> Parser a +s .*> f = I.string s *> f + +-- | Type-specialized version of '<*' for 'B.ByteString'. +(<*.) :: Parser a -> B.ByteString -> Parser a +f <*. s = f <* I.string s + +-- | A predicate that matches either a carriage return @\'\\r\'@ or +-- newline @\'\\n\'@ character. +isEndOfLine :: Word8 -> Bool +isEndOfLine w = w == 13 || w == 10 +{-# INLINE isEndOfLine #-} + +-- | A predicate that matches either a space @\' \'@ or horizontal tab +-- @\'\\t\'@ character. +isHorizontalSpace :: Word8 -> Bool +isHorizontalSpace w = w == 32 || w == 9 +{-# INLINE isHorizontalSpace #-} + +-- | Parse and decode an unsigned hexadecimal number.  The hex digits +-- @\'a\'@ through @\'f\'@ may be upper or lower case. +-- +-- This parser does not accept a leading @\"0x\"@ string. +hexadecimal :: (Integral a, Bits a) => Parser a +hexadecimal = B8.foldl' step 0 `fmap` I.takeWhile1 isHexDigit +  where +    isHexDigit w = (w >= 48 && w <= 57) || +                   (w >= 97 && w <= 102) || +                   (w >= 65 && w <= 70) +    step a w | w >= 48 && w <= 57  = (a `shiftL` 4) .|. fromIntegral (w - 48) +             | w >= 97             = (a `shiftL` 4) .|. fromIntegral (w - 87) +             | otherwise           = (a `shiftL` 4) .|. fromIntegral (w - 55) +{-# SPECIALISE hexadecimal :: Parser Int #-} +{-# SPECIALISE hexadecimal :: Parser Int8 #-} +{-# SPECIALISE hexadecimal :: Parser Int16 #-} +{-# SPECIALISE hexadecimal :: Parser Int32 #-} +{-# SPECIALISE hexadecimal :: Parser Int64 #-} +{-# SPECIALISE hexadecimal :: Parser Integer #-} +{-# SPECIALISE hexadecimal :: Parser Word #-} +{-# SPECIALISE hexadecimal :: Parser Word8 #-} +{-# SPECIALISE hexadecimal :: Parser Word16 #-} +{-# SPECIALISE hexadecimal :: Parser Word32 #-} +{-# SPECIALISE hexadecimal :: Parser Word64 #-} + +-- | Parse and decode an unsigned decimal number. +decimal :: Integral a => Parser a +decimal = B8.foldl' step 0 `fmap` I.takeWhile1 isDig +  where isDig w  = w >= 48 && w <= 57 +        step a w = a * 10 + fromIntegral (w - 48) +{-# SPECIALISE decimal :: Parser Int #-} +{-# SPECIALISE decimal :: Parser Int8 #-} +{-# SPECIALISE decimal :: Parser Int16 #-} +{-# SPECIALISE decimal :: Parser Int32 #-} +{-# SPECIALISE decimal :: Parser Int64 #-} +{-# SPECIALISE decimal :: Parser Integer #-} +{-# SPECIALISE decimal :: Parser Word #-} +{-# SPECIALISE decimal :: Parser Word8 #-} +{-# SPECIALISE decimal :: Parser Word16 #-} +{-# SPECIALISE decimal :: Parser Word32 #-} +{-# SPECIALISE decimal :: Parser Word64 #-} + +-- | Parse a number with an optional leading @\'+\'@ or @\'-\'@ sign +-- character. +signed :: Num a => Parser a -> Parser a +{-# SPECIALISE signed :: Parser Int -> Parser Int #-} +{-# SPECIALISE signed :: Parser Int8 -> Parser Int8 #-} +{-# SPECIALISE signed :: Parser Int16 -> Parser Int16 #-} +{-# SPECIALISE signed :: Parser Int32 -> Parser Int32 #-} +{-# SPECIALISE signed :: Parser Int64 -> Parser Int64 #-} +{-# SPECIALISE signed :: Parser Integer -> Parser Integer #-} +signed p = (negate <$> (char8 '-' *> p)) +       <|> (char8 '+' *> p) +       <|> p + +-- | Parse a rational number. +-- +-- This parser accepts an optional leading sign character, followed by +-- at least one decimal digit.  The syntax similar to that accepted by +-- the 'read' function, with the exception that a trailing @\'.\'@ or +-- @\'e\'@ /not/ followed by a number is not consumed. +-- +-- Examples with behaviour identical to 'read', if you feed an empty +-- continuation to the first result: +-- +-- >rational "3"     == Done 3.0 "" +-- >rational "3.1"   == Done 3.1 "" +-- >rational "3e4"   == Done 30000.0 "" +-- >rational "3.1e4" == Done 31000.0, "" +-- +-- Examples with behaviour identical to 'read': +-- +-- >rational ".3"    == Fail "input does not start with a digit" +-- >rational "e3"    == Fail "input does not start with a digit" +-- +-- Examples of differences from 'read': +-- +-- >rational "3.foo" == Done 3.0 ".foo" +-- >rational "3e"    == Done 3.0 "e" +-- +-- This function does not accept string representations of \"NaN\" or +-- \"Infinity\". +rational :: Fractional a => Parser a +{-# SPECIALIZE rational :: Parser Double #-} +{-# SPECIALIZE rational :: Parser Float #-} +{-# SPECIALIZE rational :: Parser Rational #-} +rational = floaty $ \real frac fracDenom -> fromRational $ +                     real % 1 + frac % fracDenom + +-- | Parse a rational number. +-- +-- The syntax accepted by this parser is the same as for 'rational'. +-- +-- /Note/: This function is almost ten times faster than 'rational', +-- but is slightly less accurate. +-- +-- The 'Double' type supports about 16 decimal places of accuracy. +-- For 94.2% of numbers, this function and 'rational' give identical +-- results, but for the remaining 5.8%, this function loses precision +-- around the 15th decimal place.  For 0.001% of numbers, this +-- function will lose precision at the 13th or 14th decimal place. +-- +-- This function does not accept string representations of \"NaN\" or +-- \"Infinity\". +double :: Parser Double +double = floaty asDouble + +asDouble :: Integer -> Integer -> Integer -> Double +asDouble real frac fracDenom = +    fromIntegral real + fromIntegral frac / fromIntegral fracDenom +{-# INLINE asDouble #-} + +-- | Parse a number, attempting to preserve both speed and precision. +-- +-- The syntax accepted by this parser is the same as for 'rational'. +-- +-- /Note/: This function is almost ten times faster than 'rational'. +-- On integral inputs, it gives perfectly accurate answers, and on +-- floating point inputs, it is slightly less accurate than +-- 'rational'. +-- +-- This function does not accept string representations of \"NaN\" or +-- \"Infinity\". +number :: Parser Number +number = floaty $ \real frac fracDenom -> +         if frac == 0 && fracDenom == 0 +         then I real +         else D (asDouble real frac fracDenom) +{-# INLINE number #-} + +data T = T !Integer !Int + +floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Parser a +{-# INLINE floaty #-} +floaty f = do +  let minus = 45 +      plus  = 43 +  !positive <- ((== plus) <$> I.satisfy (\c -> c == minus || c == plus)) <|> +               return True +  real <- decimal +  let tryFraction = do +        let dot = 46 +        _ <- I.satisfy (==dot) +        ds <- I.takeWhile isDigit_w8 +        case I.parseOnly decimal ds of +                Right n -> return $ T n (B.length ds) +                _       -> fail "no digits after decimal" +  T fraction fracDigits <- tryFraction <|> return (T 0 0) +  let littleE = 101 +      bigE    = 69 +      e w = w == littleE || w == bigE +  power <- (I.satisfy e *> signed decimal) <|> return (0::Int) +  let n = if fracDigits == 0 +          then if power == 0 +               then fromIntegral real +               else fromIntegral real * (10 ^^ power) +          else if power == 0 +               then f real fraction (10 ^ fracDigits) +               else f real fraction (10 ^ fracDigits) * (10 ^^ power) +  return $ if positive +           then n +           else -n diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs new file mode 100644 index 00000000..73d02056 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs @@ -0,0 +1,115 @@ +{-# LANGUAGE BangPatterns, MagicHash #-} + +----------------------------------------------------------------------------- +-- | +-- Module      :  Data.Attoparsec.ByteString.FastSet +-- Copyright   :  Bryan O'Sullivan 2008 +-- License     :  BSD3 +-- +-- Maintainer  :  bos@serpentine.com +-- Stability   :  experimental +-- Portability :  unknown +-- +-- Fast set membership tests for 'Word8' and 8-bit 'Char' values.  The +-- set representation is unboxed for efficiency.  For small sets, we +-- test for membership using a binary search.  For larger sets, we use +-- a lookup table. +-- +----------------------------------------------------------------------------- +module Data.Attoparsec.ByteString.FastSet +    ( +    -- * Data type +      FastSet +    -- * Construction +    , fromList +    , set +    -- * Lookup +    , memberChar +    , memberWord8 +    -- * Debugging +    , fromSet +    -- * Handy interface +    , charClass +    ) where + +import Data.Bits ((.&.), (.|.)) +import Foreign.Storable (peekByteOff, pokeByteOff) +import GHC.Base (Int(I#), iShiftRA#, narrow8Word#, shiftL#) +import GHC.Word (Word8(W8#)) +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as B8 +import qualified Data.ByteString.Internal as I +import qualified Data.ByteString.Unsafe as U + +data FastSet = Sorted { fromSet :: !B.ByteString } +             | Table  { fromSet :: !B.ByteString } +    deriving (Eq, Ord) + +instance Show FastSet where +    show (Sorted s) = "FastSet Sorted " ++ show (B8.unpack s) +    show (Table _) = "FastSet Table" + +-- | The lower bound on the size of a lookup table.  We choose this to +-- balance table density against performance. +tableCutoff :: Int +tableCutoff = 8 + +-- | Create a set. +set :: B.ByteString -> FastSet +set s | B.length s < tableCutoff = Sorted . B.sort $ s +      | otherwise                = Table . mkTable $ s + +fromList :: [Word8] -> FastSet +fromList = set . B.pack + +data I = I {-# UNPACK #-} !Int {-# UNPACK #-} !Word8 + +shiftR :: Int -> Int -> Int +shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#) + +shiftL :: Word8 -> Int -> Word8 +shiftL (W8# x#) (I# i#) = W8# (narrow8Word# (x# `shiftL#` i#)) + +index :: Int -> I +index i = I (i `shiftR` 3) (1 `shiftL` (i .&. 7)) +{-# INLINE index #-} + +-- | Check the set for membership. +memberWord8 :: Word8 -> FastSet -> Bool +memberWord8 w (Table t)  = +    let I byte bit = index (fromIntegral w) +    in  U.unsafeIndex t byte .&. bit /= 0 +memberWord8 w (Sorted s) = search 0 (B.length s - 1) +    where search lo hi +              | hi < lo = False +              | otherwise = +                  let mid = (lo + hi) `div` 2 +                  in case compare w (U.unsafeIndex s mid) of +                       GT -> search (mid + 1) hi +                       LT -> search lo (mid - 1) +                       _ -> True + +-- | Check the set for membership.  Only works with 8-bit characters: +-- characters above code point 255 will give wrong answers. +memberChar :: Char -> FastSet -> Bool +memberChar c = memberWord8 (I.c2w c) +{-# INLINE memberChar #-} + +mkTable :: B.ByteString -> B.ByteString +mkTable s = I.unsafeCreate 32 $ \t -> do +            _ <- I.memset t 0 32 +            U.unsafeUseAsCStringLen s $ \(p, l) -> +              let loop n | n == l = return () +                         | otherwise = do +                    c <- peekByteOff p n :: IO Word8 +                    let I byte bit = index (fromIntegral c) +                    prev <- peekByteOff t byte :: IO Word8 +                    pokeByteOff t byte (prev .|. bit) +                    loop (n + 1) +              in loop 0 + +charClass :: String -> FastSet +charClass = set . B8.pack . go +    where go (a:'-':b:xs) = [a..b] ++ go xs +          go (x:xs) = x : go xs +          go _ = "" diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs new file mode 100644 index 00000000..b3699728 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs @@ -0,0 +1,516 @@ +{-# LANGUAGE BangPatterns, CPP, Rank2Types, OverloadedStrings, +    RecordWildCards, MagicHash, UnboxedTuples #-} +-- | +-- Module      :  Data.Attoparsec.ByteString.Internal +-- Copyright   :  Bryan O'Sullivan 2007-2011 +-- License     :  BSD3 +-- +-- Maintainer  :  bos@serpentine.com +-- Stability   :  experimental +-- Portability :  unknown +-- +-- Simple, efficient parser combinators for 'B.ByteString' strings, +-- loosely based on the Parsec library. + +module Data.Attoparsec.ByteString.Internal +    ( +    -- * Parser types +      Parser +    , Result + +    -- * Running parsers +    , parse +    , parseOnly + +    -- * Combinators +    , (<?>) +    , try +    , module Data.Attoparsec.Combinator + +    -- * Parsing individual bytes +    , satisfy +    , satisfyWith +    , anyWord8 +    , skip +    , word8 +    , notWord8 +    , peekWord8 + +    -- ** Byte classes +    , inClass +    , notInClass + +    -- * Parsing more complicated structures +    , storable + +    -- * Efficient string handling +    , skipWhile +    , string +    , stringTransform +    , take +    , scan +    , takeWhile +    , takeWhile1 +    , takeTill + +    -- ** Consume all remaining input +    , takeByteString +    , takeLazyByteString + +    -- * State observation and manipulation functions +    , endOfInput +    , atEnd + +    -- * Utilities +    , endOfLine +    ) where + +import Control.Applicative ((<|>), (<$>)) +import Control.Monad (when) +import Data.Attoparsec.ByteString.FastSet (charClass, memberWord8) +import Data.Attoparsec.Combinator +import Data.Attoparsec.Internal.Types +    hiding (Parser, Input, Added, Failure, Success) +import Data.Monoid (Monoid(..)) +import Data.Word (Word8) +import Foreign.ForeignPtr (withForeignPtr) +import Foreign.Ptr (castPtr, minusPtr, plusPtr) +import Foreign.Storable (Storable(peek, sizeOf)) +import Prelude hiding (getChar, take, takeWhile) +import qualified Data.Attoparsec.Internal.Types as T +import qualified Data.ByteString as B8 +import qualified Data.ByteString.Char8 as B +import qualified Data.ByteString.Internal as B +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString.Unsafe as B + +#if defined(__GLASGOW_HASKELL__) +import GHC.Base (realWorld#) +import GHC.IO (IO(IO)) +#else +import System.IO.Unsafe (unsafePerformIO) +#endif + +type Parser = T.Parser B.ByteString +type Result = IResult B.ByteString +type Input = T.Input B.ByteString +type Added = T.Added B.ByteString +type Failure r = T.Failure B.ByteString r +type Success a r = T.Success B.ByteString a r + +ensure' :: Int -> Input -> Added -> More -> Failure r -> Success B.ByteString r +        -> IResult B.ByteString r +ensure' !n0 i0 a0 m0 kf0 ks0 = +    T.runParser (demandInput >> go n0) i0 a0 m0 kf0 ks0 +  where +    go !n = T.Parser $ \i a m kf ks -> +        if B.length (unI i) >= n +        then ks i a m (unI i) +        else T.runParser (demandInput >> go n) i a m kf ks + +-- | If at least @n@ bytes of input are available, return the current +-- input, otherwise fail. +ensure :: Int -> Parser B.ByteString +ensure !n = T.Parser $ \i0 a0 m0 kf ks -> +    if B.length (unI i0) >= n +    then ks i0 a0 m0 (unI i0) +    -- The uncommon case is kept out-of-line to reduce code size: +    else ensure' n i0 a0 m0 kf ks +-- Non-recursive so the bounds check can be inlined: +{-# INLINE ensure #-} + +-- | Ask for input.  If we receive any, pass it to a success +-- continuation, otherwise to a failure continuation. +prompt :: Input -> Added -> More +       -> (Input -> Added -> More -> Result r) +       -> (Input -> Added -> More -> Result r) +       -> Result r +prompt i0 a0 _m0 kf ks = Partial $ \s -> +    if B.null s +    then kf i0 a0 Complete +    else ks (i0 <> I s) (a0 <> A s) Incomplete + +-- | Immediately demand more input via a 'Partial' continuation +-- result. +demandInput :: Parser () +demandInput = T.Parser $ \i0 a0 m0 kf ks -> +    if m0 == Complete +    then kf i0 a0 m0 ["demandInput"] "not enough bytes" +    else let kf' i a m = kf i a m ["demandInput"] "not enough bytes" +             ks' i a m = ks i a m () +         in prompt i0 a0 m0 kf' ks' + +-- | This parser always succeeds.  It returns 'True' if any input is +-- available either immediately or on demand, and 'False' if the end +-- of all input has been reached. +wantInput :: Parser Bool +wantInput = T.Parser $ \i0 a0 m0 _kf ks -> +  case () of +    _ | not (B.null (unI i0)) -> ks i0 a0 m0 True +      | m0 == Complete  -> ks i0 a0 m0 False +      | otherwise       -> let kf' i a m = ks i a m False +                               ks' i a m = ks i a m True +                           in prompt i0 a0 m0 kf' ks' + +get :: Parser B.ByteString +get  = T.Parser $ \i0 a0 m0 _kf ks -> ks i0 a0 m0 (unI i0) + +put :: B.ByteString -> Parser () +put s = T.Parser $ \_i0 a0 m0 _kf ks -> ks (I s) a0 m0 () + +-- | Attempt a parse, and if it fails, rewind the input so that no +-- input appears to have been consumed. +-- +-- This combinator is provided for compatibility with Parsec. +-- Attoparsec parsers always backtrack on failure. +try :: Parser a -> Parser a +try p = p +{-# INLINE try #-} + +-- | The parser @satisfy p@ succeeds for any byte for which the +-- predicate @p@ returns 'True'. Returns the byte that is actually +-- parsed. +-- +-- >digit = satisfy isDigit +-- >    where isDigit w = w >= 48 && w <= 57 +satisfy :: (Word8 -> Bool) -> Parser Word8 +satisfy p = do +  s <- ensure 1 +  let !w = B.unsafeHead s +  if p w +    then put (B.unsafeTail s) >> return w +    else fail "satisfy" +{-# INLINE satisfy #-} + +-- | The parser @skip p@ succeeds for any byte for which the predicate +-- @p@ returns 'True'. +-- +-- >skipDigit = skip isDigit +-- >    where isDigit w = w >= 48 && w <= 57 +skip :: (Word8 -> Bool) -> Parser () +skip p = do +  s <- ensure 1 +  if p (B.unsafeHead s) +    then put (B.unsafeTail s) +    else fail "skip" + +-- | The parser @satisfyWith f p@ transforms a byte, and succeeds if +-- the predicate @p@ returns 'True' on the transformed value. The +-- parser returns the transformed byte that was parsed. +satisfyWith :: (Word8 -> a) -> (a -> Bool) -> Parser a +satisfyWith f p = do +  s <- ensure 1 +  let c = f $! B.unsafeHead s +  if p c +    then let !t = B.unsafeTail s +         in put t >> return c +    else fail "satisfyWith" +{-# INLINE satisfyWith #-} + +storable :: Storable a => Parser a +storable = hack undefined + where +  hack :: Storable b => b -> Parser b +  hack dummy = do +    (fp,o,_) <- B.toForeignPtr `fmap` take (sizeOf dummy) +    return . B.inlinePerformIO . withForeignPtr fp $ \p -> +        peek (castPtr $ p `plusPtr` o) + +-- | Consume @n@ bytes of input, but succeed only if the predicate +-- returns 'True'. +takeWith :: Int -> (B.ByteString -> Bool) -> Parser B.ByteString +takeWith n0 p = do +  let n = max n0 0 +  s <- ensure n +  let h = B.unsafeTake n s +      t = B.unsafeDrop n s +  if p h +    then put t >> return h +    else fail "takeWith" + +-- | Consume exactly @n@ bytes of input. +take :: Int -> Parser B.ByteString +take n = takeWith n (const True) +{-# INLINE take #-} + +-- | @string s@ parses a sequence of bytes that identically match +-- @s@. Returns the parsed string (i.e. @s@).  This parser consumes no +-- input if it fails (even if a partial match). +-- +-- /Note/: The behaviour of this parser is different to that of the +-- similarly-named parser in Parsec, as this one is all-or-nothing. +-- To illustrate the difference, the following parser will fail under +-- Parsec given an input of @\"for\"@: +-- +-- >string "foo" <|> string "for" +-- +-- The reason for its failure is that the first branch is a +-- partial match, and will consume the letters @\'f\'@ and @\'o\'@ +-- before failing.  In Attoparsec, the above parser will /succeed/ on +-- that input, because the failed first branch will consume nothing. +string :: B.ByteString -> Parser B.ByteString +string s = takeWith (B.length s) (==s) +{-# INLINE string #-} + +stringTransform :: (B.ByteString -> B.ByteString) -> B.ByteString +                -> Parser B.ByteString +stringTransform f s = takeWith (B.length s) ((==f s) . f) +{-# INLINE stringTransform #-} + +-- | Skip past input for as long as the predicate returns 'True'. +skipWhile :: (Word8 -> Bool) -> Parser () +skipWhile p = go + where +  go = do +    t <- B8.dropWhile p <$> get +    put t +    when (B.null t) $ do +      input <- wantInput +      when input go +{-# INLINE skipWhile #-} + +-- | Consume input as long as the predicate returns 'False' +-- (i.e. until it returns 'True'), and return the consumed input. +-- +-- This parser does not fail.  It will return an empty string if the +-- predicate returns 'True' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs.  Careless use will thus result in an infinite loop. +takeTill :: (Word8 -> Bool) -> Parser B.ByteString +takeTill p = takeWhile (not . p) +{-# INLINE takeTill #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser does not fail.  It will return an empty string if the +-- predicate returns 'False' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs.  Careless use will thus result in an infinite loop. +takeWhile :: (Word8 -> Bool) -> Parser B.ByteString +takeWhile p = (B.concat . reverse) `fmap` go [] + where +  go acc = do +    (h,t) <- B8.span p <$> get +    put t +    if B.null t +      then do +        input <- wantInput +        if input +          then go (h:acc) +          else return (h:acc) +      else return (h:acc) +{-# INLINE takeWhile #-} + +takeRest :: Parser [B.ByteString] +takeRest = go [] + where +  go acc = do +    input <- wantInput +    if input +      then do +        s <- get +        put B.empty +        go (s:acc) +      else return (reverse acc) + +-- | Consume all remaining input and return it as a single string. +takeByteString :: Parser B.ByteString +takeByteString = B.concat `fmap` takeRest + +-- | Consume all remaining input and return it as a single string. +takeLazyByteString :: Parser L.ByteString +takeLazyByteString = L.fromChunks `fmap` takeRest + +data T s = T {-# UNPACK #-} !Int s + +-- | A stateful scanner.  The predicate consumes and transforms a +-- state argument, and each transformed state is passed to successive +-- invocations of the predicate on each byte of the input until one +-- returns 'Nothing' or the input ends. +-- +-- This parser does not fail.  It will return an empty string if the +-- predicate returns 'Nothing' on the first byte of input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs.  Careless use will thus result in an infinite loop. +scan :: s -> (s -> Word8 -> Maybe s) -> Parser B.ByteString +scan s0 p = do +  chunks <- go [] s0 +  case chunks of +    [x] -> return x +    xs  -> return $! B.concat $ reverse xs + where +  go acc s1 = do +    let scanner (B.PS fp off len) = +          withForeignPtr fp $ \ptr0 -> do +            let start = ptr0 `plusPtr` off +                end   = start `plusPtr` len +                inner ptr !s +                  | ptr < end = do +                    w <- peek ptr +                    case p s w of +                      Just s' -> inner (ptr `plusPtr` 1) s' +                      _       -> done (ptr `minusPtr` start) s +                  | otherwise = done (ptr `minusPtr` start) s +                done !i !s = return (T i s) +            inner start s1 +    bs <- get +    let T i s' = inlinePerformIO $ scanner bs +        !h = B.unsafeTake i bs +        !t = B.unsafeDrop i bs +    put t +    if B.null t +      then do +        input <- wantInput +        if input +          then go (h:acc) s' +          else return (h:acc) +      else return (h:acc) +{-# INLINE scan #-} + +-- | Consume input as long as the predicate returns 'True', and return +-- the consumed input. +-- +-- This parser requires the predicate to succeed on at least one byte +-- of input: it will fail if the predicate never returns 'True' or if +-- there is no input left. +takeWhile1 :: (Word8 -> Bool) -> Parser B.ByteString +takeWhile1 p = do +  (`when` demandInput) =<< B.null <$> get +  (h,t) <- B8.span p <$> get +  when (B.null h) $ fail "takeWhile1" +  put t +  if B.null t +    then (h<>) `fmap` takeWhile p +    else return h + +-- | Match any byte in a set. +-- +-- >vowel = inClass "aeiou" +-- +-- Range notation is supported. +-- +-- >halfAlphabet = inClass "a-nA-N" +-- +-- To add a literal @\'-\'@ to a set, place it at the beginning or end +-- of the string. +inClass :: String -> Word8 -> Bool +inClass s = (`memberWord8` mySet) +    where mySet = charClass s +          {-# NOINLINE mySet #-} +{-# INLINE inClass #-} + +-- | Match any byte not in a set. +notInClass :: String -> Word8 -> Bool +notInClass s = not . inClass s +{-# INLINE notInClass #-} + +-- | Match any byte. +anyWord8 :: Parser Word8 +anyWord8 = satisfy $ const True +{-# INLINE anyWord8 #-} + +-- | Match a specific byte. +word8 :: Word8 -> Parser Word8 +word8 c = satisfy (== c) <?> show c +{-# INLINE word8 #-} + +-- | Match any byte except the given one. +notWord8 :: Word8 -> Parser Word8 +notWord8 c = satisfy (/= c) <?> "not " ++ show c +{-# INLINE notWord8 #-} + +-- | Match any byte. Returns 'Nothing' if end of input has been +-- reached. Does not consume any input. +-- +-- /Note/: Because this parser does not fail, do not use it with +-- combinators such as 'many', because such parsers loop until a +-- failure occurs.  Careless use will thus result in an infinite loop. +peekWord8 :: Parser (Maybe Word8) +peekWord8 = T.Parser $ \i0 a0 m0 _kf ks -> +            if B.null (unI i0) +            then if m0 == Complete +                 then ks i0 a0 m0 Nothing +                 else let ks' i a m = let !w = B.unsafeHead (unI i) +                                      in ks i a m (Just w) +                          kf' i a m = ks i a m Nothing +                      in prompt i0 a0 m0 kf' ks' +            else let !w = B.unsafeHead (unI i0) +                 in ks i0 a0 m0 (Just w) +{-# INLINE peekWord8 #-} + +-- | Match only if all input has been consumed. +endOfInput :: Parser () +endOfInput = T.Parser $ \i0 a0 m0 kf ks -> +             if B.null (unI i0) +             then if m0 == Complete +                  then ks i0 a0 m0 () +                  else let kf' i1 a1 m1 _ _ = addS i0 a0 m0 i1 a1 m1 $ +                                              \ i2 a2 m2 -> ks i2 a2 m2 () +                           ks' i1 a1 m1 _   = addS i0 a0 m0 i1 a1 m1 $ +                                              \ i2 a2 m2 -> kf i2 a2 m2 [] +                                                            "endOfInput" +                       in  T.runParser demandInput i0 a0 m0 kf' ks' +             else kf i0 a0 m0 [] "endOfInput" + +-- | Return an indication of whether the end of input has been +-- reached. +atEnd :: Parser Bool +atEnd = not <$> wantInput +{-# INLINE atEnd #-} + +-- | Match either a single newline character @\'\\n\'@, or a carriage +-- return followed by a newline character @\"\\r\\n\"@. +endOfLine :: Parser () +endOfLine = (word8 10 >> return ()) <|> (string "\r\n" >> return ()) + +-- | Name the parser, in case failure occurs. +(<?>) :: Parser a +      -> String                 -- ^ the name to use if parsing fails +      -> Parser a +p <?> msg0 = T.Parser $ \i0 a0 m0 kf ks -> +             let kf' i a m strs msg = kf i a m (msg0:strs) msg +             in T.runParser p i0 a0 m0 kf' ks +{-# INLINE (<?>) #-} +infix 0 <?> + +-- | Terminal failure continuation. +failK :: Failure a +failK i0 _a0 _m0 stack msg = Fail (unI i0) stack msg +{-# INLINE failK #-} + +-- | Terminal success continuation. +successK :: Success a a +successK i0 _a0 _m0 a = Done (unI i0) a +{-# INLINE successK #-} + +-- | Run a parser. +parse :: Parser a -> B.ByteString -> Result a +parse m s = T.runParser m (I s) mempty Incomplete failK successK +{-# INLINE parse #-} + +-- | Run a parser that cannot be resupplied via a 'Partial' result. +parseOnly :: Parser a -> B.ByteString -> Either String a +parseOnly m s = case T.runParser m (I s) mempty Complete failK successK of +                  Fail _ _ err -> Left err +                  Done _ a     -> Right a +                  _            -> error "parseOnly: impossible error!" +{-# INLINE parseOnly #-} + +-- | Just like unsafePerformIO, but we inline it. Big performance gains as +-- it exposes lots of things to further inlining. /Very unsafe/. In +-- particular, you should do no memory allocation inside an +-- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@. +inlinePerformIO :: IO a -> a +#if defined(__GLASGOW_HASKELL__) +inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r +#else +inlinePerformIO = unsafePerformIO +#endif +{-# INLINE inlinePerformIO #-} diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs new file mode 100644 index 00000000..cb9cee83 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs @@ -0,0 +1,205 @@ +{-# LANGUAGE BangPatterns, CPP #-} +-- | +-- Module      :  Data.Attoparsec.Combinator +-- Copyright   :  Daan Leijen 1999-2001, Bryan O'Sullivan 2009-2010 +-- License     :  BSD3 +-- +-- Maintainer  :  bos@serpentine.com +-- Stability   :  experimental +-- Portability :  portable +-- +-- Useful parser combinators, similar to those provided by Parsec. +module Data.Attoparsec.Combinator +    ( +      choice +    , count +    , option +    , many' +    , many1 +    , many1' +    , manyTill +    , manyTill' +    , sepBy +    , sepBy' +    , sepBy1 +    , sepBy1' +    , skipMany +    , skipMany1 +    , eitherP +    ) where + +import Control.Applicative (Alternative(..), Applicative(..), empty, liftA2, +                            (<|>), (*>), (<$>)) +import Control.Monad (MonadPlus(..)) +#if !MIN_VERSION_base(4,2,0) +import Control.Applicative (many) +#endif + +#if __GLASGOW_HASKELL__ >= 700 +import Data.Attoparsec.Internal.Types (Parser) +import Data.ByteString (ByteString) +#endif + +-- | @choice ps@ tries to apply the actions in the list @ps@ in order, +-- until one of them succeeds. Returns the value of the succeeding +-- action. +choice :: Alternative f => [f a] -> f a +choice = foldr (<|>) empty +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE choice :: [Parser ByteString a] -> Parser ByteString a #-} +#endif + +-- | @option x p@ tries to apply action @p@. If @p@ fails without +-- consuming input, it returns the value @x@, otherwise the value +-- returned by @p@. +-- +-- > priority  = option 0 (digitToInt <$> digit) +option :: Alternative f => a -> f a -> f a +option x p = p <|> pure x +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE option :: a -> Parser ByteString a -> Parser ByteString a #-} +#endif + +-- | A version of 'liftM2' that is strict in the result of its first +-- action. +liftM2' :: (Monad m) => (a -> b -> c) -> m a -> m b -> m c +liftM2' f a b = do +  !x <- a +  y <- b +  return (f x y) +{-# INLINE liftM2' #-} + +-- | @many' p@ applies the action @p@ /zero/ or more times. Returns a +-- list of the returned values of @p@. The value returned by @p@ is +-- forced to WHNF. +-- +-- >  word  = many' letter +many' :: (MonadPlus m) => m a -> m [a] +many' p = many_p +  where many_p = some_p `mplus` return [] +        some_p = liftM2' (:) p many_p +{-# INLINE many' #-} + +-- | @many1 p@ applies the action @p@ /one/ or more times. Returns a +-- list of the returned values of @p@. +-- +-- >  word  = many1 letter +many1 :: Alternative f => f a -> f [a] +many1 p = liftA2 (:) p (many p) +{-# INLINE many1 #-} + +-- | @many1' p@ applies the action @p@ /one/ or more times. Returns a +-- list of the returned values of @p@. The value returned by @p@ is +-- forced to WHNF. +-- +-- >  word  = many1' letter +many1' :: (MonadPlus m) => m a -> m [a] +many1' p = liftM2' (:) p (many' p) +{-# INLINE many1' #-} + +-- | @sepBy p sep@ applies /zero/ or more occurrences of @p@, separated +-- by @sep@. Returns a list of the values returned by @p@. +-- +-- > commaSep p  = p `sepBy` (symbol ",") +sepBy :: Alternative f => f a -> f s -> f [a] +sepBy p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure []) <|> pure [] +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE sepBy :: Parser ByteString a -> Parser ByteString s +                     -> Parser ByteString [a] #-} +#endif + +-- | @sepBy' p sep@ applies /zero/ or more occurrences of @p@, separated +-- by @sep@. Returns a list of the values returned by @p@. The value +-- returned by @p@ is forced to WHNF. +-- +-- > commaSep p  = p `sepBy'` (symbol ",") +sepBy' :: (MonadPlus m) => m a -> m s -> m [a] +sepBy' p s = scan `mplus` return [] +  where scan = liftM2' (:) p ((s >> sepBy1' p s) `mplus` return []) +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE sepBy' :: Parser ByteString a -> Parser ByteString s +                      -> Parser ByteString [a] #-} +#endif + +-- | @sepBy1 p sep@ applies /one/ or more occurrences of @p@, separated +-- by @sep@. Returns a list of the values returned by @p@. +-- +-- > commaSep p  = p `sepBy1` (symbol ",") +sepBy1 :: Alternative f => f a -> f s -> f [a] +sepBy1 p s = scan +    where scan = liftA2 (:) p ((s *> scan) <|> pure []) +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE sepBy1 :: Parser ByteString a -> Parser ByteString s +                      -> Parser ByteString [a] #-} +#endif + +-- | @sepBy1' p sep@ applies /one/ or more occurrences of @p@, separated +-- by @sep@. Returns a list of the values returned by @p@. The value +-- returned by @p@ is forced to WHNF. +-- +-- > commaSep p  = p `sepBy1'` (symbol ",") +sepBy1' :: (MonadPlus m) => m a -> m s -> m [a] +sepBy1' p s = scan +    where scan = liftM2' (:) p ((s >> scan) `mplus` return []) +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE sepBy1' :: Parser ByteString a -> Parser ByteString s +                       -> Parser ByteString [a] #-} +#endif + +-- | @manyTill p end@ applies action @p@ /zero/ or more times until +-- action @end@ succeeds, and returns the list of values returned by +-- @p@.  This can be used to scan comments: +-- +-- >  simpleComment   = string "<!--" *> manyTill anyChar (try (string "-->")) +-- +-- Note the overlapping parsers @anyChar@ and @string \"<!--\"@, and +-- therefore the use of the 'try' combinator. +manyTill :: Alternative f => f a -> f b -> f [a] +manyTill p end = scan +    where scan = (end *> pure []) <|> liftA2 (:) p scan +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE manyTill :: Parser ByteString a -> Parser ByteString b +                        -> Parser ByteString [a] #-} +#endif + +-- | @manyTill' p end@ applies action @p@ /zero/ or more times until +-- action @end@ succeeds, and returns the list of values returned by +-- @p@.  This can be used to scan comments: +-- +-- >  simpleComment   = string "<!--" *> manyTill' anyChar (try (string "-->")) +-- +-- Note the overlapping parsers @anyChar@ and @string \"<!--\"@, and +-- therefore the use of the 'try' combinator. The value returned by @p@ +-- is forced to WHNF. +manyTill' :: (MonadPlus m) => m a -> m b -> m [a] +manyTill' p end = scan +    where scan = (end >> return []) `mplus` liftM2' (:) p scan +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE manyTill' :: Parser ByteString a -> Parser ByteString b +                         -> Parser ByteString [a] #-} +#endif + +-- | Skip zero or more instances of an action. +skipMany :: Alternative f => f a -> f () +skipMany p = scan +    where scan = (p *> scan) <|> pure () +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE skipMany :: Parser ByteString a -> Parser ByteString () #-} +#endif + +-- | Skip one or more instances of an action. +skipMany1 :: Alternative f => f a -> f () +skipMany1 p = p *> skipMany p +#if __GLASGOW_HASKELL__ >= 700 +{-# SPECIALIZE skipMany1 :: Parser ByteString a -> Parser ByteString () #-} +#endif + +-- | Apply the given action repeatedly, returning every result. +count :: Monad m => Int -> m a -> m [a] +count n p = sequence (replicate n p) +{-# INLINE count #-} + +-- | Combine two alternatives. +eitherP :: (Alternative f) => f a -> f b -> f (Either a b) +eitherP a b = (Left <$> a) <|> (Right <$> b) +{-# INLINE eitherP #-} diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs new file mode 100644 index 00000000..0572d682 --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs @@ -0,0 +1,31 @@ +-- | +-- Module      :  Data.Attoparsec.Internal +-- Copyright   :  Bryan O'Sullivan 2012 +-- License     :  BSD3 +-- +-- Maintainer  :  bos@serpentine.com +-- Stability   :  experimental +-- Portability :  unknown +-- +-- Simple, efficient parser combinators, loosely based on the Parsec +-- library. + +module Data.Attoparsec.Internal +    ( +      compareResults +    ) where + +import Data.Attoparsec.Internal.Types (IResult(..)) + +-- | Compare two 'IResult' values for equality. +-- +-- If both 'IResult's are 'Partial', the result will be 'Nothing', as +-- they are incomplete and hence their equality cannot be known. +-- (This is why there is no 'Eq' instance for 'IResult'.) +compareResults :: (Eq t, Eq r) => IResult t r -> IResult t r -> Maybe Bool +compareResults (Fail i0 ctxs0 msg0) (Fail i1 ctxs1 msg1) = +    Just (i0 == i1 && ctxs0 == ctxs1 && msg0 == msg1) +compareResults (Done i0 r0) (Done i1 r1) = +    Just (i0 == i1 && r0 == r1) +compareResults (Partial _) (Partial _) = Nothing +compareResults _ _ = Just False diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs new file mode 100644 index 00000000..e47e5c9e --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs @@ -0,0 +1,227 @@ +{-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving, OverloadedStrings, +    Rank2Types, RecordWildCards #-} +-- | +-- Module      :  Data.Attoparsec.Internal.Types +-- Copyright   :  Bryan O'Sullivan 2007-2011 +-- License     :  BSD3 +-- +-- Maintainer  :  bos@serpentine.com +-- Stability   :  experimental +-- Portability :  unknown +-- +-- Simple, efficient parser combinators, loosely based on the Parsec +-- library. + +module Data.Attoparsec.Internal.Types +    ( +      Parser(..) +    , Failure +    , Success +    , IResult(..) +    , Input(..) +    , Added(..) +    , More(..) +    , addS +    , (<>) +    ) where + +import Control.Applicative (Alternative(..), Applicative(..), (<$>)) +import Control.DeepSeq (NFData(rnf)) +import Control.Monad (MonadPlus(..)) +import Data.Monoid (Monoid(..)) +import Prelude hiding (getChar, take, takeWhile) + +-- | The result of a parse.  This is parameterised over the type @t@ +-- of string that was processed. +-- +-- This type is an instance of 'Functor', where 'fmap' transforms the +-- value in a 'Done' result. +data IResult t r = Fail t [String] String +                 -- ^ The parse failed.  The 't' parameter is the +                 -- input that had not yet been consumed when the +                 -- failure occurred.  The @[@'String'@]@ is a list of +                 -- contexts in which the error occurred.  The +                 -- 'String' is the message describing the error, if +                 -- any. +                 | Partial (t -> IResult t r) +                 -- ^ Supply this continuation with more input so that +                 -- the parser can resume.  To indicate that no more +                 -- input is available, use an empty string. +                 | Done t r +                 -- ^ The parse succeeded.  The 't' parameter is the +                 -- input that had not yet been consumed (if any) when +                 -- the parse succeeded. + +instance (Show t, Show r) => Show (IResult t r) where +    show (Fail t stk msg) = +        "Fail " ++ show t ++ " " ++ show stk ++ " " ++ show msg +    show (Partial _)      = "Partial _" +    show (Done t r)       = "Done " ++ show t ++ " " ++ show r + +instance (NFData t, NFData r) => NFData (IResult t r) where +    rnf (Fail t stk msg) = rnf t `seq` rnf stk `seq` rnf msg +    rnf (Partial _)  = () +    rnf (Done t r)   = rnf t `seq` rnf r +    {-# INLINE rnf #-} + +fmapR :: (a -> b) -> IResult t a -> IResult t b +fmapR _ (Fail t stk msg) = Fail t stk msg +fmapR f (Partial k)       = Partial (fmapR f . k) +fmapR f (Done t r)       = Done t (f r) + +instance Functor (IResult t) where +    fmap = fmapR +    {-# INLINE fmap #-} + +newtype Input t = I {unI :: t} deriving (Monoid) +newtype Added t = A {unA :: t} deriving (Monoid) + +-- | The core parser type.  This is parameterised over the type @t@ of +-- string being processed. +-- +-- This type is an instance of the following classes: +-- +-- * 'Monad', where 'fail' throws an exception (i.e. fails) with an +--   error message. +-- +-- * 'Functor' and 'Applicative', which follow the usual definitions. +-- +-- * 'MonadPlus', where 'mzero' fails (with no error message) and +--   'mplus' executes the right-hand parser if the left-hand one +--   fails.  When the parser on the right executes, the input is reset +--   to the same state as the parser on the left started with. (In +--   other words, Attoparsec is a backtracking parser that supports +--   arbitrary lookahead.) +-- +-- * 'Alternative', which follows 'MonadPlus'. +newtype Parser t a = Parser { +      runParser :: forall r. Input t -> Added t -> More +                -> Failure t   r +                -> Success t a r +                -> IResult t r +    } + +type Failure t   r = Input t -> Added t -> More -> [String] -> String +                   -> IResult t r +type Success t a r = Input t -> Added t -> More -> a -> IResult t r + +-- | Have we read all available input? +data More = Complete | Incomplete +            deriving (Eq, Show) + +instance Monoid More where +    mappend c@Complete _ = c +    mappend _ m          = m +    mempty               = Incomplete + +addS :: (Monoid t) => +        Input t -> Added t -> More +     -> Input t -> Added t -> More +     -> (Input t -> Added t -> More -> r) -> r +addS i0 a0 m0 _i1 a1 m1 f = +    let !i = i0 <> I (unA a1) +        a  = a0 <> a1 +        !m = m0 <> m1 +    in f i a m +{-# INLINE addS #-} + +bindP :: Parser t a -> (a -> Parser t b) -> Parser t b +bindP m g = +    Parser $ \i0 a0 m0 kf ks -> runParser m i0 a0 m0 kf $ +                                \i1 a1 m1 a -> runParser (g a) i1 a1 m1 kf ks +{-# INLINE bindP #-} + +returnP :: a -> Parser t a +returnP a = Parser (\i0 a0 m0 _kf ks -> ks i0 a0 m0 a) +{-# INLINE returnP #-} + +instance Monad (Parser t) where +    return = returnP +    (>>=)  = bindP +    fail   = failDesc + +noAdds :: (Monoid t) => +          Input t -> Added t -> More +       -> (Input t -> Added t -> More -> r) -> r +noAdds i0 _a0 m0 f = f i0 mempty m0 +{-# INLINE noAdds #-} + +plus :: (Monoid t) => Parser t a -> Parser t a -> Parser t a +plus a b = Parser $ \i0 a0 m0 kf ks -> +           let kf' i1 a1 m1 _ _ = addS i0 a0 m0 i1 a1 m1 $ +                                  \ i2 a2 m2 -> runParser b i2 a2 m2 kf ks +               ks' i1 a1 m1 = ks i1 (a0 <> a1) m1 +           in  noAdds i0 a0 m0 $ \i2 a2 m2 -> runParser a i2 a2 m2 kf' ks' +{-# INLINE plus #-} + +instance (Monoid t) => MonadPlus (Parser t) where +    mzero = failDesc "mzero" +    {-# INLINE mzero #-} +    mplus = plus + +fmapP :: (a -> b) -> Parser t a -> Parser t b +fmapP p m = Parser $ \i0 a0 m0 f k -> +            runParser m i0 a0 m0 f $ \i1 a1 s1 a -> k i1 a1 s1 (p a) +{-# INLINE fmapP #-} + +instance Functor (Parser t) where +    fmap = fmapP +    {-# INLINE fmap #-} + +apP :: Parser t (a -> b) -> Parser t a -> Parser t b +apP d e = do +  b <- d +  a <- e +  return (b a) +{-# INLINE apP #-} + +instance Applicative (Parser t) where +    pure   = returnP +    {-# INLINE pure #-} +    (<*>)  = apP +    {-# INLINE (<*>) #-} + +#if MIN_VERSION_base(4,2,0) +    -- These definitions are equal to the defaults, but this +    -- way the optimizer doesn't have to work so hard to figure +    -- that out. +    (*>)   = (>>) +    {-# INLINE (*>) #-} +    x <* y = x >>= \a -> y >> return a +    {-# INLINE (<*) #-} +#endif + +instance (Monoid t) => Monoid (Parser t a) where +    mempty  = failDesc "mempty" +    {-# INLINE mempty #-} +    mappend = plus +    {-# INLINE mappend #-} + +instance (Monoid t) => Alternative (Parser t) where +    empty = failDesc "empty" +    {-# INLINE empty #-} + +    (<|>) = plus +    {-# INLINE (<|>) #-} + +#if MIN_VERSION_base(4,2,0) +    many v = many_v +        where many_v = some_v <|> pure [] +              some_v = (:) <$> v <*> many_v +    {-# INLINE many #-} + +    some v = some_v +      where +        many_v = some_v <|> pure [] +        some_v = (:) <$> v <*> many_v +    {-# INLINE some #-} +#endif + +failDesc :: String -> Parser t a +failDesc err = Parser (\i0 a0 m0 kf _ks -> kf i0 a0 m0 [] msg) +    where msg = "Failed reading: " ++ err +{-# INLINE failDesc #-} + +(<>) :: (Monoid m) => m -> m -> m +(<>) = mappend +{-# INLINE (<>) #-} diff --git a/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs new file mode 100644 index 00000000..bf175f4b --- /dev/null +++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs @@ -0,0 +1,127 @@ +{-# LANGUAGE DeriveDataTypeable #-} +-- | +-- Module      :  Data.Attoparsec.Number +-- Copyright   :  Bryan O'Sullivan 2011 +-- License     :  BSD3 +-- +-- Maintainer  :  bos@serpentine.com +-- Stability   :  experimental +-- Portability :  unknown +-- +-- A simple number type, useful for parsing both exact and inexact +-- quantities without losing much precision. +module Data.Attoparsec.Number +    ( +      Number(..) +    ) where + +import Control.DeepSeq (NFData(rnf)) +import Data.Data (Data) +import Data.Function (on) +import Data.Typeable (Typeable) + +-- | A numeric type that can represent integers accurately, and +-- floating point numbers to the precision of a 'Double'. +data Number = I !Integer +            | D {-# UNPACK #-} !Double +              deriving (Typeable, Data) + +instance Show Number where +    show (I a) = show a +    show (D a) = show a + +instance NFData Number where +    rnf (I _) = () +    rnf (D _) = () +    {-# INLINE rnf #-} + +binop :: (Integer -> Integer -> a) -> (Double -> Double -> a) +      -> Number -> Number -> a +binop _ d (D a) (D b) = d a b +binop i _ (I a) (I b) = i a b +binop _ d (D a) (I b) = d a (fromIntegral b) +binop _ d (I a) (D b) = d (fromIntegral a) b +{-# INLINE binop #-} + +instance Eq Number where +    (==) = binop (==) (==) +    {-# INLINE (==) #-} + +    (/=) = binop (/=) (/=) +    {-# INLINE (/=) #-} + +instance Ord Number where +    (<) = binop (<) (<) +    {-# INLINE (<) #-} + +    (<=) = binop (<=) (<=) +    {-# INLINE (<=) #-} + +    (>) = binop (>) (>) +    {-# INLINE (>) #-} + +    (>=) = binop (>=) (>=) +    {-# INLINE (>=) #-} + +    compare = binop compare compare +    {-# INLINE compare #-} + +instance Num Number where +    (+) = binop (((I$!).) . (+)) (((D$!).) . (+)) +    {-# INLINE (+) #-} + +    (-) = binop (((I$!).) . (-)) (((D$!).) . (-)) +    {-# INLINE (-) #-} + +    (*) = binop (((I$!).) . (*)) (((D$!).) . (*)) +    {-# INLINE (*) #-} + +    abs (I a) = I $! abs a +    abs (D a) = D $! abs a +    {-# INLINE abs #-} + +    negate (I a) = I $! negate a +    negate (D a) = D $! negate a +    {-# INLINE negate #-} + +    signum (I a) = I $! signum a +    signum (D a) = D $! signum a +    {-# INLINE signum #-} + +    fromInteger = (I$!) . fromInteger +    {-# INLINE fromInteger #-} + +instance Real Number where +    toRational (I a) = fromIntegral a +    toRational (D a) = toRational a +    {-# INLINE toRational #-} + +instance Fractional Number where +    fromRational = (D$!) . fromRational +    {-# INLINE fromRational #-} + +    (/) = binop (((D$!).) . (/) `on` fromIntegral) +                (((D$!).) . (/)) +    {-# INLINE (/) #-} + +    recip (I a) = D $! recip (fromIntegral a) +    recip (D a) = D $! recip a +    {-# INLINE recip #-} + +instance RealFrac Number where +    properFraction (I a) = (fromIntegral a,0) +    properFraction (D a) = case properFraction a of +                             (i,d) -> (i,D d) +    {-# INLINE properFraction #-} +    truncate (I a) = fromIntegral a +    truncate (D a) = truncate a +    {-# INLINE truncate #-} +    round (I a) = fromIntegral a +    round (D a) = round a +    {-# INLINE round #-} +    ceiling (I a) = fromIntegral a +    ceiling (D a) = ceiling a +    {-# INLINE ceiling #-} +    floor (I a) = fromIntegral a +    floor (D a) = floor a +    {-# INLINE floor #-} | 
