From 79c7159101c03bbbc7350e07963896ca2bb97c02 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Wed, 25 Apr 2018 11:24:07 -0700 Subject: Replace 'attoparsec' with 'parsec' (#799) * Remove attoparsec with parsec and start fixing failed parses * Make tests pass * Fix encoding issues The Haddock parser no longer needs to worry about bytestrings. All the internal parsing work in haddock-library happens over 'Text'. * Remove attoparsec vendor * Fix stuff broken in 'attoparsec' -> 'parsec' * hyperlinks * codeblocks * examples Pretty much all issues are due to attoparsec's backtracking failure behaviour vs. parsec's non-backtracking failure behaviour. * Fix small TODOs * Missing quote + Haddocks * Better handle spaces before/after paragraphs * Address review comments --- haddock-library/haddock-library.cabal | 64 +-- .../src/Documentation/Haddock/Parser.hs | 428 ++++++++-------- .../src/Documentation/Haddock/Parser/Monad.hs | 225 +++------ .../src/Documentation/Haddock/Parser/Util.hs | 82 ++-- .../vendor/attoparsec-0.13.1.0/Data/Attoparsec.hs | 23 - .../Data/Attoparsec/ByteString.hs | 230 --------- .../Data/Attoparsec/ByteString/Buffer.hs | 156 ------ .../Data/Attoparsec/ByteString/Char8.hs | 464 ------------------ .../Data/Attoparsec/ByteString/FastSet.hs | 115 ----- .../Data/Attoparsec/ByteString/Internal.hs | 536 --------------------- .../Data/Attoparsec/Combinator.hs | 233 --------- .../Data/Attoparsec/Internal.hs | 157 ------ .../Data/Attoparsec/Internal/Fhthagn.hs | 18 - .../Data/Attoparsec/Internal/Types.hs | 243 ---------- .../attoparsec-0.13.1.0/Data/Attoparsec/Number.hs | 137 ------ haddock-library/vendor/attoparsec-0.13.1.0/LICENSE | 30 -- 16 files changed, 358 insertions(+), 2783 deletions(-) delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec.hs delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString.hs delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Buffer.hs delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Char8.hs delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/FastSet.hs delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Internal.hs delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Combinator.hs delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Internal.hs delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Internal/Fhthagn.hs delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Internal/Types.hs delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Number.hs delete mode 100644 haddock-library/vendor/attoparsec-0.13.1.0/LICENSE (limited to 'haddock-library') diff --git a/haddock-library/haddock-library.cabal b/haddock-library/haddock-library.cabal index deeaa98d..49ec826c 100644 --- a/haddock-library/haddock-library.cabal +++ b/haddock-library/haddock-library.cabal @@ -10,7 +10,6 @@ description: Haddock is a documentation-generation tool for Haskell itself, see the ‘haddock’ package. license: BSD3 license-files: LICENSE - vendor/attoparsec-0.13.1.0/LICENSE maintainer: Alex Biehl , Simon Hengel , Mateusz Kowalczyk homepage: http://www.haskell.org/haddock/ bug-reports: https://github.com/haskell/haddock/issues @@ -27,9 +26,8 @@ library , bytestring >= 0.9.2.1 && < 0.11 , containers >= 0.4.2.1 && < 0.6 , transformers >= 0.3.0 && < 0.6 - - -- internal sub-lib - build-depends: attoparsec + , text >= 1.2.3.0 && < 1.3 + , parsec >= 3.1.13.0 && < 3.2 hs-source-dirs: src @@ -48,39 +46,6 @@ library if impl(ghc >= 8.0) ghc-options: -Wcompat -Wnoncanonical-monad-instances -Wnoncanonical-monadfail-instances -library attoparsec - default-language: Haskell2010 - - build-depends: - base >= 4.5 && < 4.12 - , bytestring >= 0.9.2.1 && < 0.11 - , deepseq >= 1.3 && < 1.5 - - hs-source-dirs: vendor/attoparsec-0.13.1.0 - - -- NB: haddock-library needs only small part of lib:attoparsec - -- internally, so we only bundle that subset here - exposed-modules: - Data.Attoparsec.ByteString - Data.Attoparsec.ByteString.Char8 - Data.Attoparsec.Combinator - - other-modules: - Data.Attoparsec - Data.Attoparsec.ByteString.Buffer - Data.Attoparsec.ByteString.FastSet - Data.Attoparsec.ByteString.Internal - Data.Attoparsec.Internal - Data.Attoparsec.Internal.Fhthagn - Data.Attoparsec.Internal.Types - Data.Attoparsec.Number - - ghc-options: -funbox-strict-fields -Wall -fwarn-tabs -O2 - if impl(ghc >= 8.0) - ghc-options: -Wcompat -Wnoncanonical-monad-instances -Wnoncanonical-monadfail-instances - else - build-depends: semigroups ^>= 0.18.3, fail ^>= 4.9.0.0 - test-suite spec type: exitcode-stdio-1.0 @@ -106,21 +71,16 @@ test-suite spec Documentation.Haddock.Utf8Spec build-depends: - base-compat ^>= 0.9.3 + base >= 4.5 && < 4.12 + , base-compat ^>= 0.9.3 + , bytestring >= 0.9.2.1 && < 0.11 , containers >= 0.4.2.1 && < 0.6 , transformers >= 0.3.0 && < 0.6 , hspec ^>= 2.4.4 , QuickCheck ^>= 2.11 - - -- internal sub-lib - build-depends: attoparsec - - -- Versions for the dependencies below are transitively pinned by - -- dependency on haddock-library:lib:attoparsec - build-depends: - base - , bytestring - , deepseq + , text >= 1.2.3.0 && < 1.3 + , parsec >= 3.1.13.0 && < 3.2 + , deepseq >= 1.3 && < 1.5 build-tool-depends: hspec-discover:hspec-discover ^>= 2.4.4 @@ -132,7 +92,8 @@ test-suite fixtures ghc-options: -Wall -O0 hs-source-dirs: fixtures build-depends: - base-compat ^>= 0.9.3 + base >= 4.5 && < 4.12 + , base-compat ^>= 0.9.3 , directory ^>= 1.3.0.2 , filepath ^>= 1.4.1.2 , optparse-applicative ^>= 0.14.0.0 @@ -142,11 +103,6 @@ test-suite fixtures build-depends: haddock-library - -- Versions for the dependencies below are transitively pinned by - -- dependency on haddock-library:lib:attoparsec - build-depends: - base - source-repository head type: git subdir: haddock-library diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs index 4921b3a7..d79da40b 100644 --- a/haddock-library/src/Documentation/Haddock/Parser.hs +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -16,27 +16,35 @@ -- to be -- -- @'toRegular' . '_doc' . 'parseParas'@ -module Documentation.Haddock.Parser ( parseString, parseParas - , overIdentifier, toRegular, Identifier - ) where +module Documentation.Haddock.Parser ( + parseString, + parseParas, + overIdentifier, + toRegular, + Identifier +) where import Control.Applicative import Control.Arrow (first) import Control.Monad -import qualified Data.ByteString.Char8 as BS -import Data.Char (chr, isUpper, isAlpha, isAlphaNum) -import Data.List (stripPrefix, intercalate, unfoldr, elemIndex) +import Data.Char (chr, isUpper, isAlpha, isAlphaNum, isSpace) +import Data.List (intercalate, unfoldr, elemIndex, notElem) import Data.Maybe (fromMaybe, mapMaybe) import Data.Monoid import qualified Data.Set as Set import Documentation.Haddock.Doc -import Documentation.Haddock.Parser.Monad hiding (take, endOfLine) +import Documentation.Haddock.Parser.Monad import Documentation.Haddock.Parser.Util import Documentation.Haddock.Types -import Documentation.Haddock.Utf8 import Prelude hiding (takeWhile) import qualified Prelude as P +import qualified Text.Parsec as Parsec +import Text.Parsec (try) + +import qualified Data.Text as T +import Data.Text (Text) + #if MIN_VERSION_base(4,9,0) import Text.Read.Lex (isSymbolChar) #else @@ -56,7 +64,7 @@ isSymbolChar c = not (isPuncChar c) && case generalCategory c of ModifierSymbol -> True OtherSymbol -> True DashPunctuation -> True - OtherPunctuation -> not (c `elem` ("'\"" :: String)) + OtherPunctuation -> c `notElem` ("'\"" :: String) ConnectorPunctuation -> c /= '_' _ -> False where @@ -109,8 +117,14 @@ overIdentifier f d = g d g (DocHeader (Header l x)) = DocHeader . Header l $ g x g (DocTable (Table h b)) = DocTable (Table (map (fmap g) h) (map (fmap g) b)) -parse :: Parser a -> BS.ByteString -> (ParserState, a) -parse p = either err id . parseOnly (p <* endOfInput) + +choice' :: [Parser a] -> Parser a +choice' [] = empty +choice' [p] = p +choice' (p : ps) = try p <|> choice' ps + +parse :: Parser a -> Text -> (ParserState, a) +parse p = either err id . parseOnly (p <* Parsec.eof) where err = error . ("Haddock.Parser.parse: " ++) @@ -127,31 +141,46 @@ parseParas pkg input = case parseParasState input of } parseParasState :: String -> (ParserState, DocH mod Identifier) -parseParasState = - parse (p <* skipSpace) . encodeUtf8 . (++ "\n") . filter (/= '\r') +parseParasState = parse (emptyLines *> p) . T.pack . (++ "\n") . filter (/= '\r') where p :: Parser (DocH mod Identifier) - p = docConcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n") + p = docConcat <$> many (paragraph <* emptyLines) + + emptyLines :: Parser () + emptyLines = void $ many (try (skipHorizontalSpace *> "\n")) parseParagraphs :: String -> Parser (DocH mod Identifier) parseParagraphs input = case parseParasState input of - (state, a) -> setParserState state >> return a + (state, a) -> Parsec.putState state *> pure a --- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which --- drops leading whitespace and encodes the string to UTF8 first. +-- | Variant of 'parseText' for 'String' instead of 'Text' parseString :: String -> DocH mod Identifier -parseString = parseStringBS . encodeUtf8 . dropWhile isSpace . filter (/= '\r') +parseString = parseText . T.pack -parseStringBS :: BS.ByteString -> DocH mod Identifier -parseStringBS = snd . parse p +-- | Parse a text paragraph. Actually just a wrapper over 'parseParagraph' which +-- drops leading whitespace. +parseText :: Text -> DocH mod Identifier +parseText = parseParagraph . T.dropWhile isSpace . T.filter (/= '\r') + +parseParagraph :: Text -> DocH mod Identifier +parseParagraph = snd . parse p where p :: Parser (DocH mod Identifier) - p = docConcat <$> many (monospace <|> anchor <|> identifier <|> moduleName - <|> picture <|> mathDisplay <|> mathInline - <|> markdownImage - <|> hyperlink <|> bold - <|> emphasis <|> encodedChar <|> string' - <|> skipSpecialChar) + p = docConcat <$> many (choice' [ monospace + , anchor + , identifier + , moduleName + , picture + , mathDisplay + , mathInline + , markdownImage + , hyperlink + , bold + , emphasis + , encodedChar + , string' + , skipSpecialChar + ]) -- | Parses and processes -- @@ -175,7 +204,7 @@ specialChar = "_/<@\"&'`# " -- to ensure that we have already given a chance to more meaningful parsers -- before capturing their characers. string' :: Parser (DocH mod a) -string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (notInClass specialChar) +string' = DocString . unescape . T.unpack <$> takeWhile1_ (`notElem` specialChar) where unescape "" = "" unescape ('\\':x:xs) = x : unescape xs @@ -185,45 +214,45 @@ string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (notInClass specialC -- This is done to skip over any special characters belonging to other -- elements but which were not deemed meaningful at their positions. skipSpecialChar :: Parser (DocH mod a) -skipSpecialChar = DocString . return <$> satisfy (inClass specialChar) +skipSpecialChar = DocString . return <$> Parsec.oneOf specialChar -- | Emphasis parser. -- -- >>> parseString "/Hello world/" -- DocEmphasis (DocString "Hello world") emphasis :: Parser (DocH mod Identifier) -emphasis = DocEmphasis . parseStringBS <$> - mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/") +emphasis = DocEmphasis . parseParagraph <$> + disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/") -- | Bold parser. -- -- >>> parseString "__Hello world__" -- DocBold (DocString "Hello world") bold :: Parser (DocH mod Identifier) -bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__") +bold = DocBold . parseParagraph <$> disallowNewline ("__" *> takeUntil "__") -disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString -disallowNewline = mfilter ('\n' `BS.notElem`) +disallowNewline :: Parser Text -> Parser Text +disallowNewline = mfilter (T.all (/= '\n')) -- | Like `takeWhile`, but unconditionally take escaped characters. -takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString -takeWhile_ p = scan False p_ +takeWhile_ :: (Char -> Bool) -> Parser Text +takeWhile_ p = scan p_ False where p_ escaped c | escaped = Just False | not $ p c = Nothing | otherwise = Just (c == '\\') --- | Like `takeWhile1`, but unconditionally take escaped characters. -takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString -takeWhile1_ = mfilter (not . BS.null) . takeWhile_ +-- | Like 'takeWhile1', but unconditionally take escaped characters. +takeWhile1_ :: (Char -> Bool) -> Parser Text +takeWhile1_ = mfilter (not . T.null) . takeWhile_ -- | Text anchors to allow for jumping around the generated documentation. -- -- >>> parseString "#Hello world#" -- DocAName "Hello world" anchor :: Parser (DocH mod a) -anchor = DocAName . decodeUtf8 <$> +anchor = DocAName . T.unpack <$> disallowNewline ("#" *> takeWhile1_ (/= '#') <* "#") -- | Monospaced strings. @@ -231,7 +260,7 @@ anchor = DocAName . decodeUtf8 <$> -- >>> parseString "@cruel@" -- DocMonospaced (DocString "cruel") monospace :: Parser (DocH mod Identifier) -monospace = DocMonospaced . parseStringBS +monospace = DocMonospaced . parseParagraph <$> ("@" *> takeWhile1_ (/= '@') <* "@") -- | Module names. @@ -239,14 +268,14 @@ monospace = DocMonospaced . parseStringBS -- Note that we allow '#' and '\' to support anchors (old style anchors are of -- the form "SomeModule\#anchor"). moduleName :: Parser (DocH mod a) -moduleName = DocModule <$> (char '"' *> modid <* char '"') +moduleName = DocModule <$> ("\"" *> modid <* "\"") where - modid = intercalate "." <$> conid `sepBy1` "." + modid = intercalate "." <$> conid `Parsec.sepBy1` "." conid = (:) - <$> satisfyUnicode (\c -> isAlpha c && isUpper c) - <*> many (satisfyUnicode conChar <|> char '\\' <|> char '#') + <$> Parsec.satisfy (\c -> isAlpha c && isUpper c) + <*> many (conChar <|> Parsec.oneOf "\\#") - conChar c = isAlphaNum c || c == '_' + conChar = Parsec.alphaNum <|> Parsec.char '_' -- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify -- a title for the picture. @@ -256,7 +285,7 @@ moduleName = DocModule <$> (char '"' *> modid <* char '"') -- >>> parseString "<>" -- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Just "world"}) picture :: Parser (DocH mod a) -picture = DocPic . makeLabeled Picture . decodeUtf8 +picture = DocPic . makeLabeled Picture <$> disallowNewline ("<<" *> takeUntil ">>") -- | Inline math parser, surrounded by \\( and \\). @@ -264,7 +293,7 @@ picture = DocPic . makeLabeled Picture . decodeUtf8 -- >>> parseString "\\(\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\)" -- DocMathInline "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}" mathInline :: Parser (DocH mod a) -mathInline = DocMathInline . decodeUtf8 +mathInline = DocMathInline . T.unpack <$> disallowNewline ("\\(" *> takeUntil "\\)") -- | Display math parser, surrounded by \\[ and \\]. @@ -272,7 +301,7 @@ mathInline = DocMathInline . decodeUtf8 -- >>> parseString "\\[\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\]" -- DocMathDisplay "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}" mathDisplay :: Parser (DocH mod a) -mathDisplay = DocMathDisplay . decodeUtf8 +mathDisplay = DocMathDisplay . T.unpack <$> ("\\[" *> takeUntil "\\]") markdownImage :: Parser (DocH mod a) @@ -282,20 +311,21 @@ markdownImage = fromHyperlink <$> ("!" *> linkParser) -- | Paragraph parser, called by 'parseParas'. paragraph :: Parser (DocH mod Identifier) -paragraph = examples <|> table <|> do - indent <- takeIndent - choice - [ since - , unorderedList indent - , orderedList indent - , birdtracks - , codeblock - , property - , header - , textParagraphThatStartsWithMarkdownLink - , definitionList indent - , docParagraph <$> textParagraph - ] +paragraph = choice' [ examples + , table + , do indent <- takeIndent + choice' [ since + , unorderedList indent + , orderedList indent + , birdtracks + , codeblock + , property + , header + , textParagraphThatStartsWithMarkdownLink + , definitionList indent + , docParagraph <$> textParagraph + ] + ] -- | Provides support for grid tables. -- @@ -317,45 +347,45 @@ table :: Parser (DocH mod Identifier) table = do -- first we parse the first row, which determines the width of the table firstRow <- parseFirstRow - let len = BS.length firstRow + let len = T.length firstRow -- then we parse all consequtive rows starting and ending with + or |, -- of the width `len`. - restRows <- many (parseRestRows len) + restRows <- many (try (parseRestRows len)) -- Now we gathered the table block, the next step is to split the block -- into cells. DocTable <$> tableStepTwo len (firstRow : restRows) where - parseFirstRow :: Parser BS.ByteString + parseFirstRow :: Parser Text parseFirstRow = do skipHorizontalSpace -- upper-left corner is + - c <- char '+' - cs <- many1 (char '-' <|> char '+') + c <- Parsec.char '+' + cs <- some (Parsec.char '-' <|> Parsec.char '+') -- upper right corner is + too guard (last cs == '+') -- trailing space skipHorizontalSpace - _ <- char '\n' + _ <- Parsec.newline - return (BS.cons c $ BS.pack cs) + return (T.cons c $ T.pack cs) - parseRestRows :: Int -> Parser BS.ByteString + parseRestRows :: Int -> Parser Text parseRestRows l = do skipHorizontalSpace - c <- char '|' <|> char '+' - bs <- scan (l - 2) predicate - c2 <- char '|' <|> char '+' + c <- Parsec.char '|' <|> Parsec.char '+' + bs <- scan predicate (l - 2) + c2 <- Parsec.char '|' <|> Parsec.char '+' -- trailing space skipHorizontalSpace - _ <- char '\n' + _ <- Parsec.newline - return (BS.cons c (BS.snoc bs c2)) + return (T.cons c (T.snoc bs c2)) where predicate n c | n <= 0 = Nothing @@ -366,22 +396,22 @@ table = do -- and changes to '=' to '-'. tableStepTwo :: Int -- ^ width - -> [BS.ByteString] -- ^ rows + -> [Text] -- ^ rows -> Parser (Table (DocH mod Identifier)) tableStepTwo width = go 0 [] where go _ left [] = tableStepThree width (reverse left) Nothing go n left (r : rs) - | BS.all (`elem` ['+', '=']) r = + | T.all (`elem` ['+', '=']) r = tableStepThree width (reverse left ++ r' : rs) (Just n) | otherwise = go (n + 1) (r : left) rs where - r' = BS.map (\c -> if c == '=' then '-' else c) r + r' = T.map (\c -> if c == '=' then '-' else c) r -- Third step recognises cells in the table area, returning a list of TC, cells. tableStepThree :: Int -- ^ width - -> [BS.ByteString] -- ^ rows + -> [Text] -- ^ rows -> Maybe Int -- ^ index of header separator -> Parser (Table (DocH mod Identifier)) tableStepThree width rs hdrIndex = do @@ -408,32 +438,32 @@ tableStepThree width rs hdrIndex = do scanRight :: Int -> Int -> Maybe (Int, Int) scanRight x y = go (x + 1) where bs = rs !! y - go x' | x' >= width = fail "overflow right " - | BS.index bs x' == '+' = scanDown x y x' <|> go (x' + 1) - | BS.index bs x' == '-' = go (x' + 1) - | otherwise = fail $ "not a border (right) " ++ show (x,y,x') + go x' | x' >= width = fail "overflow right " + | T.index bs x' == '+' = scanDown x y x' <|> go (x' + 1) + | T.index bs x' == '-' = go (x' + 1) + | otherwise = fail $ "not a border (right) " ++ show (x,y,x') -- scan down looking for + scanDown :: Int -> Int -> Int -> Maybe (Int, Int) scanDown x y x2 = go (y + 1) where - go y' | y' >= height = fail "overflow down" - | BS.index (rs !! y') x2 == '+' = scanLeft x y x2 y' <|> go (y' + 1) - | BS.index (rs !! y') x2 == '|' = go (y' + 1) - | otherwise = fail $ "not a border (down) " ++ show (x,y,x2,y') + go y' | y' >= height = fail "overflow down" + | T.index (rs !! y') x2 == '+' = scanLeft x y x2 y' <|> go (y' + 1) + | T.index (rs !! y') x2 == '|' = go (y' + 1) + | otherwise = fail $ "not a border (down) " ++ show (x,y,x2,y') -- check that at y2 x..x2 characters are '+' or '-' scanLeft :: Int -> Int -> Int -> Int -> Maybe (Int, Int) scanLeft x y x2 y2 - | all (\x' -> BS.index bs x' `elem` ['+', '-']) [x..x2] = scanUp x y x2 y2 - | otherwise = fail $ "not a border (left) " ++ show (x,y,x2,y2) + | all (\x' -> T.index bs x' `elem` ['+', '-']) [x..x2] = scanUp x y x2 y2 + | otherwise = fail $ "not a border (left) " ++ show (x,y,x2,y2) where bs = rs !! y2 -- check that at y2 x..x2 characters are '+' or '-' scanUp :: Int -> Int -> Int -> Int -> Maybe (Int, Int) scanUp x y x2 y2 - | all (\y' -> BS.index (rs !! y') x `elem` ['+', '|']) [y..y2] = return (x2, y2) - | otherwise = fail $ "not a border (up) " ++ show (x,y,x2,y2) + | all (\y' -> T.index (rs !! y') x `elem` ['+', '|']) [y..y2] = return (x2, y2) + | otherwise = fail $ "not a border (up) " ++ show (x,y,x2,y2) -- | table cell: top left bottom right data TC = TC !Int !Int !Int !Int @@ -446,7 +476,7 @@ tcYS :: TC -> [Int] tcYS (TC y _ y2 _) = [y, y2] -- | Fourth step. Given the locations of cells, forms 'Table' structure. -tableStepFour :: [BS.ByteString] -> Maybe Int -> [TC] -> Parser (Table (DocH mod Identifier)) +tableStepFour :: [Text] -> Maybe Int -> [TC] -> Parser (Table (DocH mod Identifier)) tableStepFour rs hdrIndex cells = case hdrIndex of Nothing -> return $ Table [] rowsDoc Just i -> case elemIndex i yTabStops of @@ -464,7 +494,7 @@ tableStepFour rs hdrIndex cells = case hdrIndex of init' [_] = [] init' (x : xs) = x : init' xs - rowsDoc = (fmap . fmap) parseStringBS rows + rowsDoc = (fmap . fmap) parseParagraph rows rows = map makeRow (init' yTabStops) where @@ -477,9 +507,9 @@ tableStepFour rs hdrIndex cells = case hdrIndex of yts = length $ P.takeWhile (< y2) $ dropWhile (< y) yTabStops -- extract cell contents given boundaries - extract :: Int -> Int -> Int -> Int -> BS.ByteString - extract x y x2 y2 = BS.intercalate "\n" - [ BS.take (x2 - x + 1) $ BS.drop x $ rs !! y' + extract :: Int -> Int -> Int -> Int -> Text + extract x y x2 y2 = T.intercalate "\n" + [ T.take (x2 - x + 1) $ T.drop x $ rs !! y' | y' <- [y .. y2] ] @@ -487,7 +517,7 @@ tableStepFour rs hdrIndex cells = case hdrIndex of since :: Parser (DocH mod a) since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince >> return DocEmpty where - version = decimal `sepBy1'` "." + version = decimal `Parsec.sepBy1` "." -- | Headers inside the comment denoted with @=@ signs, up to 6 levels -- deep. @@ -498,38 +528,39 @@ since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince -- Right (DocHeader (Header {headerLevel = 2, headerTitle = DocString "World"})) header :: Parser (DocH mod Identifier) header = do - let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1] - pser = foldl1 (<|>) psers - delim <- decodeUtf8 <$> pser - line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString - rest <- paragraph <|> return DocEmpty + let psers = map (string . flip T.replicate "=") [6, 5 .. 1] + pser = choice' psers + delim <- T.unpack <$> pser + line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseText + rest <- try paragraph <|> return DocEmpty return $ DocHeader (Header (length delim) line) `docAppend` rest textParagraph :: Parser (DocH mod Identifier) -textParagraph = parseString . intercalate "\n" <$> many1 nonEmptyLine +textParagraph = parseText . T.intercalate "\n" <$> some nonEmptyLine textParagraphThatStartsWithMarkdownLink :: Parser (DocH mod Identifier) textParagraphThatStartsWithMarkdownLink = docParagraph <$> (docAppend <$> markdownLink <*> optionalTextParagraph) where optionalTextParagraph :: Parser (DocH mod Identifier) - optionalTextParagraph = (docAppend <$> whitespace <*> textParagraph) <|> pure DocEmpty + optionalTextParagraph = choice' [ docAppend <$> whitespace <*> textParagraph + , pure DocEmpty ] whitespace :: Parser (DocH mod a) whitespace = DocString <$> (f <$> takeHorizontalSpace <*> optional "\n") where - f :: BS.ByteString -> Maybe BS.ByteString -> String + f :: Text -> Maybe Text -> String f xs (fromMaybe "" -> x) - | BS.null (xs <> x) = "" + | T.null (xs <> x) = "" | otherwise = " " -- | Parses unordered (bullet) lists. -unorderedList :: BS.ByteString -> Parser (DocH mod Identifier) +unorderedList :: Text -> Parser (DocH mod Identifier) unorderedList indent = DocUnorderedList <$> p where p = ("*" <|> "-") *> innerList indent p -- | Parses ordered lists (numbered or dashed). -orderedList :: BS.ByteString -> Parser (DocH mod Identifier) +orderedList :: Text -> Parser (DocH mod Identifier) orderedList indent = DocOrderedList <$> p where p = (paren <|> dot) *> innerList indent p @@ -541,104 +572,110 @@ orderedList indent = DocOrderedList <$> p -- same paragraph. Usually used as -- -- > someListFunction = listBeginning *> innerList someListFunction -innerList :: BS.ByteString -> Parser [DocH mod Identifier] +innerList :: Text -> Parser [DocH mod Identifier] -> Parser [DocH mod Identifier] innerList indent item = do c <- takeLine (cs, items) <- more indent item - let contents = docParagraph . parseString . dropNLs . unlines $ c : cs + let contents = docParagraph . parseText . dropNLs . T.unlines $ c : cs return $ case items of Left p -> [contents `docAppend` p] Right i -> contents : i -- | Parses definition lists. -definitionList :: BS.ByteString -> Parser (DocH mod Identifier) +definitionList :: Text -> Parser (DocH mod Identifier) definitionList indent = DocDefList <$> p where p = do - label <- "[" *> (parseStringBS <$> takeWhile1_ (notInClass "]\n")) <* ("]" <* optional ":") + label <- "[" *> (parseParagraph <$> takeWhile1_ (`notElem` ("]\n" :: String))) <* ("]" <* optional ":") c <- takeLine (cs, items) <- more indent p - let contents = parseString . dropNLs . unlines $ c : cs + let contents = parseText . dropNLs . T.unlines $ c : cs return $ case items of Left x -> [(label, contents `docAppend` x)] Right i -> (label, contents) : i -- | Drops all trailing newlines. -dropNLs :: String -> String -dropNLs = reverse . dropWhile (== '\n') . reverse +dropNLs :: Text -> Text +dropNLs = T.dropWhileEnd (== '\n') -- | Main worker for 'innerList' and 'definitionList'. -- We need the 'Either' here to be able to tell in the respective functions -- whether we're dealing with the next list or a nested paragraph. -more :: Monoid a => BS.ByteString -> Parser a - -> Parser ([String], Either (DocH mod Identifier) a) -more indent item = innerParagraphs indent - <|> moreListItems indent item - <|> moreContent indent item - <|> pure ([], Right mempty) +more :: Monoid a => Text -> Parser a + -> Parser ([Text], Either (DocH mod Identifier) a) +more indent item = choice' [ innerParagraphs indent + , moreListItems indent item + , moreContent indent item + , pure ([], Right mempty) + ] -- | Used by 'innerList' and 'definitionList' to parse any nested paragraphs. -innerParagraphs :: BS.ByteString - -> Parser ([String], Either (DocH mod Identifier) a) +innerParagraphs :: Text + -> Parser ([Text], Either (DocH mod Identifier) a) innerParagraphs indent = (,) [] . Left <$> ("\n" *> indentedParagraphs indent) -- | Attempts to fetch the next list if possibly. Used by 'innerList' and -- 'definitionList' to recursively grab lists that aren't separated by a whole -- paragraph. -moreListItems :: BS.ByteString -> Parser a - -> Parser ([String], Either (DocH mod Identifier) a) +moreListItems :: Text -> Parser a + -> Parser ([Text], Either (DocH mod Identifier) a) moreListItems indent item = (,) [] . Right <$> indentedItem where - indentedItem = string indent *> skipSpace *> item + indentedItem = string indent *> Parsec.spaces *> item -- | Helper for 'innerList' and 'definitionList' which simply takes -- a line of text and attempts to parse more list content with 'more'. -moreContent :: Monoid a => BS.ByteString -> Parser a - -> Parser ([String], Either (DocH mod Identifier) a) +moreContent :: Monoid a => Text -> Parser a + -> Parser ([Text], Either (DocH mod Identifier) a) moreContent indent item = first . (:) <$> nonEmptyLine <*> more indent item -- | Parses an indented paragraph. -- The indentation is 4 spaces. -indentedParagraphs :: BS.ByteString -> Parser (DocH mod Identifier) +indentedParagraphs :: Text -> Parser (DocH mod Identifier) indentedParagraphs indent = - (concat <$> dropFrontOfPara indent') >>= parseParagraphs + (T.unpack . T.concat <$> dropFrontOfPara indent') >>= parseParagraphs where - indent' = string $ BS.append indent " " + indent' = string $ indent <> " " -- | Grab as many fully indented paragraphs as we can. -dropFrontOfPara :: Parser BS.ByteString -> Parser [String] +dropFrontOfPara :: Parser Text -> Parser [Text] dropFrontOfPara sp = do - currentParagraph <- some (sp *> takeNonEmptyLine) + currentParagraph <- some (try (sp *> takeNonEmptyLine)) followingParagraphs <- - skipHorizontalSpace *> nextPar -- we have more paragraphs to take - <|> skipHorizontalSpace *> nlList -- end of the ride, remember the newline - <|> endOfInput *> return [] -- nothing more to take at all + choice' [ skipHorizontalSpace *> nextPar -- we have more paragraphs to take + , skipHorizontalSpace *> nlList -- end of the ride, remember the newline + , Parsec.eof *> return [] -- nothing more to take at all + ] return (currentParagraph ++ followingParagraphs) where nextPar = (++) <$> nlList <*> dropFrontOfPara sp nlList = "\n" *> return ["\n"] -nonSpace :: BS.ByteString -> Parser BS.ByteString +nonSpace :: Text -> Parser Text nonSpace xs - | not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line" + | T.all isSpace xs = fail "empty line" | otherwise = return xs -- | Takes a non-empty, not fully whitespace line. -- -- Doesn't discard the trailing newline. -takeNonEmptyLine :: Parser String +takeNonEmptyLine :: Parser Text takeNonEmptyLine = do - (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n" + l <- takeWhile1 (Parsec.noneOf "\n") >>= nonSpace + _ <- "\n" + pure (l <> "\n") -- | Takes indentation of first non-empty line. -- -- More precisely: skips all whitespace-only lines and returns indentation -- (horizontal space, might be empty) of that non-empty line. -takeIndent :: Parser BS.ByteString +takeIndent :: Parser Text takeIndent = do indent <- takeHorizontalSpace - "\n" *> takeIndent <|> return indent + choice' [ "\n" *> takeIndent + , return indent + ] -- | Blocks of text of the form: -- @@ -647,97 +684,98 @@ takeIndent = do -- >> baz -- birdtracks :: Parser (DocH mod a) -birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line +birdtracks = DocCodeBlock . DocString . T.unpack . T.intercalate "\n" . stripSpace <$> some line where - line = skipHorizontalSpace *> ">" *> takeLine + line = try (skipHorizontalSpace *> ">" *> takeLine) -stripSpace :: [String] -> [String] +stripSpace :: [Text] -> [Text] stripSpace = fromMaybe <*> mapM strip' where - strip' (' ':xs') = Just xs' - strip' "" = Just "" - strip' _ = Nothing + strip' t = case T.uncons t of + Nothing -> Just "" + Just (' ',t') -> Just t' + _ -> Nothing -- | Parses examples. Examples are a paragraph level entitity (separated by an empty line). -- Consecutive examples are accepted. examples :: Parser (DocH mod a) -examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go) +examples = DocExamples <$> (many (try (skipHorizontalSpace *> "\n")) *> go) where go :: Parser [Example] go = do - prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>" + prefix <- takeHorizontalSpace <* ">>>" expr <- takeLine (rs, es) <- resultAndMoreExamples return (makeExample prefix expr rs : es) where - resultAndMoreExamples :: Parser ([String], [Example]) - resultAndMoreExamples = moreExamples <|> result <|> pure ([], []) + resultAndMoreExamples :: Parser ([Text], [Example]) + resultAndMoreExamples = choice' [ moreExamples, result, pure ([], []) ] where - moreExamples :: Parser ([String], [Example]) + moreExamples :: Parser ([Text], [Example]) moreExamples = (,) [] <$> go - result :: Parser ([String], [Example]) + result :: Parser ([Text], [Example]) result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples - makeExample :: String -> String -> [String] -> Example + makeExample :: Text -> Text -> [Text] -> Example makeExample prefix expression res = - Example (strip expression) result + Example (T.unpack (T.strip expression)) result where - result = map (substituteBlankLine . tryStripPrefix) res + result = map (T.unpack . substituteBlankLine . tryStripPrefix) res - tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs) + tryStripPrefix xs = fromMaybe xs (T.stripPrefix prefix xs) substituteBlankLine "" = "" substituteBlankLine xs = xs -nonEmptyLine :: Parser String -nonEmptyLine = mfilter (any (not . isSpace)) takeLine +nonEmptyLine :: Parser Text +nonEmptyLine = try (mfilter (T.any (not . isSpace)) takeLine) -takeLine :: Parser String -takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine +takeLine :: Parser Text +takeLine = try (takeWhile (Parsec.noneOf "\n") <* endOfLine) endOfLine :: Parser () -endOfLine = void "\n" <|> endOfInput +endOfLine = void "\n" <|> Parsec.eof -- | Property parser. -- -- >>> snd <$> parseOnly property "prop> hello world" -- Right (DocProperty "hello world") property :: Parser (DocH mod a) -property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n')) +property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (Parsec.noneOf "\n")) -- | -- Paragraph level codeblock. Anything between the two delimiting \@ is parsed -- for markup. codeblock :: Parser (DocH mod Identifier) codeblock = - DocCodeBlock . parseStringBS . dropSpaces + DocCodeBlock . parseParagraph . dropSpaces <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@") where dropSpaces xs = - let rs = decodeUtf8 xs - in case splitByNl rs of + case splitByNl xs of [] -> xs - ys -> case last ys of - ' ':_ -> case mapM dropSpace ys of - Nothing -> xs - Just zs -> encodeUtf8 $ intercalate "\n" zs + ys -> case T.uncons (last ys) of + Just (' ',_) -> case mapM dropSpace ys of + Nothing -> xs + Just zs -> T.intercalate "\n" zs _ -> xs -- This is necessary because ‘lines’ swallows up a trailing newline -- and we lose information about whether the last line belongs to @ or to -- text which we need to decide whether we actually want to be dropping -- anything at all. - splitByNl = unfoldr (\x -> case x of - '\n':s -> Just (span (/= '\n') s) - _ -> Nothing) - . ('\n' :) + splitByNl = unfoldr (\x -> case T.uncons x of + Just ('\n',x') -> Just (T.span (/= '\n') x') + _ -> Nothing) + . ("\n" <>) - dropSpace "" = Just "" - dropSpace (' ':xs) = Just xs - dropSpace _ = Nothing + dropSpace t = case T.uncons t of + Nothing -> Just "" + Just (' ',t') -> Just t' + _ -> Nothing - block' = scan False p + block' = scan p False where p isNewline c | isNewline && c == '@' = Nothing @@ -745,11 +783,11 @@ codeblock = | otherwise = Just $ c == '\n' hyperlink :: Parser (DocH mod a) -hyperlink = angleBracketLink <|> markdownLink <|> autoUrl +hyperlink = choice' [ angleBracketLink, markdownLink, autoUrl ] angleBracketLink :: Parser (DocH mod a) angleBracketLink = - DocHyperlink . makeLabeled Hyperlink . decodeUtf8 + DocHyperlink . makeLabeled Hyperlink <$> disallowNewline ("<" *> takeUntil ">") markdownLink :: Parser (DocH mod a) @@ -759,7 +797,7 @@ linkParser :: Parser Hyperlink linkParser = flip Hyperlink <$> label <*> (whitespace *> url) where label :: Parser (Maybe String) - label = Just . strip . decode <$> ("[" *> takeUntil "]") + label = Just . decode . T.strip <$> ("[" *> takeUntil "]") whitespace :: Parser () whitespace = skipHorizontalSpace <* optional ("\n" *> skipHorizontalSpace) @@ -770,19 +808,25 @@ linkParser = flip Hyperlink <$> label <*> (whitespace *> url) rejectWhitespace :: MonadPlus m => m String -> m String rejectWhitespace = mfilter (all (not . isSpace)) - decode :: BS.ByteString -> String - decode = removeEscapes . decodeUtf8 + decode :: Text -> String + decode = T.unpack . removeEscapes -- | Looks for URL-like things to automatically hyperlink even if they -- weren't marked as links. autoUrl :: Parser (DocH mod a) autoUrl = mkLink <$> url where - url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace) - mkLink :: BS.ByteString -> DocH mod a - mkLink s = case unsnoc s of - Just (xs, x) | inClass ",.!?" x -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) `docAppend` DocString [x] - _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing) + url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (Parsec.satisfy (not . isSpace)) + + mkLink :: Text -> DocH mod a + mkLink s = case T.unsnoc s of + Just (xs,x) | x `elem` (",.!?" :: String) -> DocHyperlink (mkHyperlink xs) `docAppend` DocString [x] + _ -> DocHyperlink (mkHyperlink s) + + mkHyperlink :: Text -> Hyperlink + mkHyperlink lnk = Hyperlink (T.unpack lnk) Nothing + + -- | Parses strings between identifier delimiters. Consumes all input that it -- deems to be valid in an identifier. Note that it simply blindly consumes @@ -790,14 +834,14 @@ autoUrl = mkLink <$> url parseValid :: Parser String parseValid = p some where - idChar = satisfyUnicode (\c -> isAlphaNum c || isSymbolChar c || c == '_') + idChar = Parsec.satisfy (\c -> isAlphaNum c || isSymbolChar c || c == '_') p p' = do vs <- p' idChar c <- peekChar' case c of '`' -> return vs - '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> p many') <|> return vs + '\'' -> choice' [ (\x -> vs ++ "'" ++ x) <$> ("'" *> p many), return vs ] _ -> fail "outofvalid" -- | Parses identifiers with help of 'parseValid'. Asks GHC for @@ -809,4 +853,4 @@ identifier = do e <- idDelim return $ DocIdentifier (o, vid, e) where - idDelim = satisfy (\c -> c == '\'' || c == '`') + idDelim = Parsec.satisfy (\c -> c == '\'' || c == '`') diff --git a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs index ff6101da..585c76bb 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs @@ -1,48 +1,24 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving, TypeFamilies #-} -module Documentation.Haddock.Parser.Monad ( - module Documentation.Haddock.Parser.Monad -, Attoparsec.isDigit -, Attoparsec.isDigit_w8 -, Attoparsec.isAlpha_iso8859_15 -, Attoparsec.isAlpha_ascii -, Attoparsec.isSpace -, Attoparsec.isSpace_w8 -, Attoparsec.inClass -, Attoparsec.notInClass -, Attoparsec.isEndOfLine -, Attoparsec.isHorizontalSpace -, Attoparsec.choice -, Attoparsec.count -, Attoparsec.option -, Attoparsec.many' -, Attoparsec.many1 -, Attoparsec.many1' -, Attoparsec.manyTill -, Attoparsec.manyTill' -, Attoparsec.sepBy -, Attoparsec.sepBy' -, Attoparsec.sepBy1 -, Attoparsec.sepBy1' -, Attoparsec.skipMany -, Attoparsec.skipMany1 -, Attoparsec.eitherP -) where - -import Control.Applicative -import Control.Monad -import Data.String -import Data.ByteString (ByteString, length) -import qualified Data.ByteString.Lazy as LB -import qualified Data.Attoparsec.ByteString.Char8 as Attoparsec -import qualified Data.Attoparsec.Combinator as Attoparsec -import Control.Monad.Trans.State -import qualified Control.Monad.Trans.Class as Trans -import Data.Word -import Data.Bits -import Data.Tuple - -import Documentation.Haddock.Types (Version) -import Documentation.Haddock.Utf8 (encodeUtf8, decodeUtf8) +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE TypeSynonymInstances #-} + +module Documentation.Haddock.Parser.Monad where + +import qualified Text.Parsec.Char as Parsec +import qualified Text.Parsec as Parsec + +import qualified Data.Text as T +import Data.Text ( Text ) + +import Data.String ( IsString(..) ) +import Data.Bits ( Bits(..) ) +import Data.Char ( ord ) +import Data.List ( foldl' ) + +import Documentation.Haddock.Types ( Version ) newtype ParserState = ParserState { parserStateSince :: Maybe Version @@ -51,120 +27,65 @@ newtype ParserState = ParserState { initialParserState :: ParserState initialParserState = ParserState Nothing -newtype Parser a = Parser (StateT ParserState Attoparsec.Parser a) - deriving (Functor, Applicative, Alternative, Monad, MonadPlus) - -instance (a ~ ByteString) => IsString (Parser a) where - fromString = lift . fromString - -parseOnly :: Parser a -> ByteString -> Either String (ParserState, a) -parseOnly (Parser p) = fmap swap . Attoparsec.parseOnly (runStateT p initialParserState) +setSince :: Version -> Parser () +setSince since = Parsec.modifyState (\st -> st {parserStateSince = Just since}) -lift :: Attoparsec.Parser a -> Parser a -lift = Parser . Trans.lift +type Parser = Parsec.Parsec Text ParserState -setParserState :: ParserState -> Parser () -setParserState = Parser . put +instance (a ~ Text) => IsString (Parser a) where + fromString = fmap T.pack . Parsec.string -setSince :: Version -> Parser () -setSince since = Parser $ modify (\st -> st {parserStateSince = Just since}) - -char :: Char -> Parser Char -char = lift . Attoparsec.char - -char8 :: Char -> Parser Word8 -char8 = lift . Attoparsec.char8 - --- | Peek a unicode character and return the number of bytes that it took up -peekUnicode :: Parser (Char, Int) -peekUnicode = lift $ Attoparsec.lookAhead $ do - - -- attoparsec's take fails on shorter inputs rather than truncate - bs <- Attoparsec.choice (map Attoparsec.take [4,3,2,1]) - - let c = head . decodeUtf8 $ bs - n = Data.ByteString.length . encodeUtf8 $ [c] - pure (c, fromIntegral n) - --- | Like 'satisfy', but consuming a unicode character -satisfyUnicode :: (Char -> Bool) -> Parser Char -satisfyUnicode predicate = do - (c,n) <- peekUnicode - if predicate c - then Documentation.Haddock.Parser.Monad.take n *> pure c - else fail "satsifyUnicode" - -anyChar :: Parser Char -anyChar = lift Attoparsec.anyChar - -notChar :: Char -> Parser Char -notChar = lift . Attoparsec.notChar - -satisfy :: (Char -> Bool) -> Parser Char -satisfy = lift . Attoparsec.satisfy +parseOnly :: Parser a -> Text -> Either String (ParserState, a) +parseOnly p t = case Parsec.runParser p' initialParserState "" t of + Left e -> Left (show e) + Right (x,s) -> Right (s,x) + where p' = (,) <$> p <*> Parsec.getState +-- | Always succeeds, but returns 'Nothing' if at the end of input. Does not +-- consume input. peekChar :: Parser (Maybe Char) -peekChar = lift Attoparsec.peekChar +peekChar = Parsec.optionMaybe . Parsec.try . Parsec.lookAhead $ Parsec.anyChar +-- | Fails if at the end of input. Does not consume input. peekChar' :: Parser Char -peekChar' = lift Attoparsec.peekChar' - -digit :: Parser Char -digit = lift Attoparsec.digit - -letter_iso8859_15 :: Parser Char -letter_iso8859_15 = lift Attoparsec.letter_iso8859_15 - -letter_ascii :: Parser Char -letter_ascii = lift Attoparsec.letter_ascii - -space :: Parser Char -space = lift Attoparsec.space - -string :: ByteString -> Parser ByteString -string = lift . Attoparsec.string - -stringCI :: ByteString -> Parser ByteString -stringCI = lift . Attoparsec.stringCI - -skipSpace :: Parser () -skipSpace = lift Attoparsec.skipSpace - -skipWhile :: (Char -> Bool) -> Parser () -skipWhile = lift . Attoparsec.skipWhile - -take :: Int -> Parser ByteString -take = lift . Attoparsec.take - -scan :: s -> (s -> Char -> Maybe s) -> Parser ByteString -scan s = lift . Attoparsec.scan s - -takeWhile :: (Char -> Bool) -> Parser ByteString -takeWhile = lift . Attoparsec.takeWhile - -takeWhile1 :: (Char -> Bool) -> Parser ByteString -takeWhile1 = lift . Attoparsec.takeWhile1 - -takeTill :: (Char -> Bool) -> Parser ByteString -takeTill = lift . Attoparsec.takeTill - -takeByteString :: Parser ByteString -takeByteString = lift Attoparsec.takeByteString - -takeLazyByteString :: Parser LB.ByteString -takeLazyByteString = lift Attoparsec.takeLazyByteString - -endOfLine :: Parser () -endOfLine = lift Attoparsec.endOfLine - +peekChar' = Parsec.lookAhead Parsec.anyChar + +-- | Parses the given string. Returns the parsed string. +string :: Text -> Parser Text +string t = Parsec.string (T.unpack t) *> pure t + +-- | Scan the input text, accumulating characters as long as the scanning +-- function returns true. +scan :: (s -> Char -> Maybe s) -- ^ scan function + -> s -- ^ initial state + -> Parser Text +scan f = fmap T.pack . go + where go s1 = do { cOpt <- peekChar + ; case cOpt >>= f s1 of + Nothing -> pure "" + Just s2 -> (:) <$> Parsec.anyChar <*> go s2 + } + +-- | Apply a parser for a character zero or more times and collect the result in +-- a string. +takeWhile :: Parser Char -> Parser Text +takeWhile = fmap T.pack . Parsec.many + +-- | Apply a parser for a character one or more times and collect the result in +-- a string. +takeWhile1 :: Parser Char -> Parser Text +takeWhile1 = fmap T.pack . Parsec.many1 + +-- | Parse a decimal number. decimal :: Integral a => Parser a -decimal = lift Attoparsec.decimal +decimal = foldl' step 0 `fmap` Parsec.many1 Parsec.digit + where step a c = a * 10 + fromIntegral (ord c - 48) +-- | Parse a hexadecimal number. hexadecimal :: (Integral a, Bits a) => Parser a -hexadecimal = lift Attoparsec.hexadecimal - -endOfInput :: Parser () -endOfInput = lift Attoparsec.endOfInput - -atEnd :: Parser Bool -atEnd = lift Attoparsec.atEnd +hexadecimal = foldl' step 0 `fmap` Parsec.many1 Parsec.hexDigit + where + step a c | w >= 48 && w <= 57 = (a `shiftL` 4) .|. fromIntegral (w - 48) + | w >= 97 = (a `shiftL` 4) .|. fromIntegral (w - 87) + | otherwise = (a `shiftL` 4) .|. fromIntegral (w - 55) + where w = ord c diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs index ab5e5e9e..ffa91b09 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Util.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} -- | -- Module : Documentation.Haddock.Parser.Util -- Copyright : (c) Mateusz Kowalczyk 2013-2014, @@ -11,62 +11,59 @@ -- -- Various utility functions used by the parser. module Documentation.Haddock.Parser.Util ( - unsnoc -, strip -, takeUntil -, removeEscapes -, makeLabeled -, takeHorizontalSpace -, skipHorizontalSpace + takeUntil, + removeEscapes, + makeLabeled, + takeHorizontalSpace, + skipHorizontalSpace, ) where +import qualified Text.Parsec as Parsec + +import qualified Data.Text as T +import Data.Text (Text) + import Control.Applicative import Control.Monad (mfilter) -import Documentation.Haddock.Parser.Monad hiding (isHorizontalSpace) -import Data.ByteString.Char8 (ByteString) -import qualified Data.ByteString.Char8 as BS +import Documentation.Haddock.Parser.Monad import Prelude hiding (takeWhile) -#if MIN_VERSION_bytestring(0,10,2) -import Data.ByteString.Char8 (unsnoc) -#else -unsnoc :: ByteString -> Maybe (ByteString, Char) -unsnoc bs - | BS.null bs = Nothing - | otherwise = Just (BS.init bs, BS.last bs) -#endif +import Data.Char (isSpace) --- | Remove all leading and trailing whitespace -strip :: String -> String -strip = (\f -> f . f) $ dropWhile isSpace . reverse - -isHorizontalSpace :: Char -> Bool -isHorizontalSpace = inClass " \t\f\v\r" +-- | Characters that count as horizontal space +horizontalSpace :: [Char] +horizontalSpace = " \t\f\v\r" +-- | Skip and ignore leading horizontal space skipHorizontalSpace :: Parser () -skipHorizontalSpace = skipWhile isHorizontalSpace +skipHorizontalSpace = Parsec.skipMany (Parsec.oneOf horizontalSpace) -takeHorizontalSpace :: Parser BS.ByteString -takeHorizontalSpace = takeWhile isHorizontalSpace +-- | Take leading horizontal space +takeHorizontalSpace :: Parser Text +takeHorizontalSpace = takeWhile (Parsec.oneOf horizontalSpace) -makeLabeled :: (String -> Maybe String -> a) -> String -> a -makeLabeled f input = case break isSpace $ removeEscapes $ strip input of - (uri, "") -> f uri Nothing - (uri, label) -> f uri (Just $ dropWhile isSpace label) +makeLabeled :: (String -> Maybe String -> a) -> Text -> a +makeLabeled f input = case T.break isSpace $ removeEscapes $ T.strip input of + (uri, "") -> f (T.unpack uri) Nothing + (uri, label) -> f (T.unpack uri) (Just . T.unpack $ T.stripStart label) -- | Remove escapes from given string. -- -- Only do this if you do not process (read: parse) the input any further. -removeEscapes :: String -> String -removeEscapes "" = "" -removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs -removeEscapes ('\\':xs) = removeEscapes xs -removeEscapes (x:xs) = x : removeEscapes xs +removeEscapes :: Text -> Text +removeEscapes = T.unfoldr go + where + go :: Text -> Maybe (Char, Text) + go xs = case T.uncons xs of + Just ('\\',ys) -> T.uncons ys + unconsed -> unconsed -takeUntil :: ByteString -> Parser ByteString -takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome +-- | Consume characters from the input up to and including the given pattern. +-- Return everything consumed except for the end pattern itself. +takeUntil :: Text -> Parser Text +takeUntil end_ = T.dropEnd (T.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome where - end = BS.unpack end_ + end = T.unpack end_ p :: (Bool, String) -> Char -> Maybe (Bool, String) p acc c = case acc of @@ -75,9 +72,8 @@ takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome (_, x:xs) | x == c -> Just (False, xs) _ -> Just (c == '\\', end) - dropEnd = BS.reverse . BS.drop (length end) . BS.reverse - requireEnd = mfilter (BS.isSuffixOf end_) + requireEnd = mfilter (T.isSuffixOf end_) gotSome xs - | BS.null xs = fail "didn't get any content" + | T.null xs = fail "didn't get any content" | otherwise = return xs diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec.hs b/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec.hs deleted file mode 100644 index bd3c5592..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec.hs +++ /dev/null @@ -1,23 +0,0 @@ --- | --- Module : Data.Attoparsec --- Copyright : Bryan O'Sullivan 2007-2015 --- License : BSD3 --- --- Maintainer : bos@serpentine.com --- Stability : experimental --- Portability : unknown --- --- Simple, efficient combinator parsing for --- 'Data.ByteString.ByteString' strings, loosely based on the Parsec --- library. --- --- This module is deprecated. Use "Data.Attoparsec.ByteString" --- instead. - -module Data.Attoparsec - {-# DEPRECATED "This module will be removed in the next major release." #-} - ( - module Data.Attoparsec.ByteString - ) where - -import Data.Attoparsec.ByteString diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString.hs b/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString.hs deleted file mode 100644 index 84e567d9..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString.hs +++ /dev/null @@ -1,230 +0,0 @@ -{-# LANGUAGE CPP #-} -#if __GLASGOW_HASKELL__ >= 702 -{-# LANGUAGE Trustworthy #-} -#endif --- | --- Module : Data.Attoparsec.ByteString --- Copyright : Bryan O'Sullivan 2007-2015 --- License : BSD3 --- --- Maintainer : bos@serpentine.com --- Stability : experimental --- Portability : unknown --- --- Simple, efficient combinator parsing for 'B.ByteString' strings, --- loosely based on the Parsec library. - -module Data.Attoparsec.ByteString - ( - -- * Differences from Parsec - -- $parsec - - -- * Incremental input - -- $incremental - - -- * Performance considerations - -- $performance - - -- * Parser types - I.Parser - , Result - , T.IResult(..) - , I.compareResults - - -- * Running parsers - , parse - , feed - , I.parseOnly - , parseWith - , parseTest - - -- ** Result conversion - , maybeResult - , eitherResult - - -- * Parsing individual bytes - , I.word8 - , I.anyWord8 - , I.notWord8 - , I.satisfy - , I.satisfyWith - , I.skip - - -- ** Lookahead - , I.peekWord8 - , I.peekWord8' - - -- ** Byte classes - , I.inClass - , I.notInClass - - -- * Efficient string handling - , I.string - , I.skipWhile - , I.take - , I.scan - , I.runScanner - , I.takeWhile - , I.takeWhile1 - , I.takeTill - - -- ** Consume all remaining input - , I.takeByteString - , I.takeLazyByteString - - -- * Combinators - , try - , () - , choice - , count - , option - , many' - , many1 - , many1' - , manyTill - , manyTill' - , sepBy - , sepBy' - , sepBy1 - , sepBy1' - , skipMany - , skipMany1 - , eitherP - , I.match - -- * State observation and manipulation functions - , I.endOfInput - , I.atEnd - ) where - -import Data.Attoparsec.Combinator -import Data.List (intercalate) -import qualified Data.Attoparsec.ByteString.Internal as I -import qualified Data.Attoparsec.Internal as I -import qualified Data.ByteString as B -import Data.Attoparsec.ByteString.Internal (Result, parse) -import qualified Data.Attoparsec.Internal.Types as T - --- $parsec --- --- Compared to Parsec 3, attoparsec makes several tradeoffs. It is --- not intended for, or ideal for, all possible uses. --- --- * While attoparsec can consume input incrementally, Parsec cannot. --- Incremental input is a huge deal for efficient and secure network --- and system programming, since it gives much more control to users --- of the library over matters such as resource usage and the I/O --- model to use. --- --- * Much of the performance advantage of attoparsec is gained via --- high-performance parsers such as 'I.takeWhile' and 'I.string'. --- If you use complicated combinators that return lists of bytes or --- characters, there is less performance difference between the two --- libraries. --- --- * Unlike Parsec 3, attoparsec does not support being used as a --- monad transformer. --- --- * attoparsec is specialised to deal only with strict 'B.ByteString' --- input. Efficiency concerns rule out both lists and lazy --- bytestrings. The usual use for lazy bytestrings would be to --- allow consumption of very large input without a large footprint. --- For this need, attoparsec's incremental input provides an --- excellent substitute, with much more control over when input --- takes place. If you must use lazy bytestrings, see the --- "Data.Attoparsec.ByteString.Lazy" module, which feeds lazy chunks --- to a regular parser. --- --- * Parsec parsers can produce more helpful error messages than --- attoparsec parsers. This is a matter of focus: attoparsec avoids --- the extra book-keeping in favour of higher performance. - --- $incremental --- --- attoparsec supports incremental input, meaning that you can feed it --- a bytestring that represents only part of the expected total amount --- of data to parse. If your parser reaches the end of a fragment of --- input and could consume more input, it will suspend parsing and --- return a 'T.Partial' continuation. --- --- Supplying the 'T.Partial' continuation with a bytestring will --- resume parsing at the point where it was suspended, with the --- bytestring you supplied used as new input at the end of the --- existing input. You must be prepared for the result of the resumed --- parse to be another 'T.Partial' continuation. --- --- To indicate that you have no more input, supply the 'T.Partial' --- continuation with an empty bytestring. --- --- Remember that some parsing combinators will not return a result --- until they reach the end of input. They may thus cause 'T.Partial' --- results to be returned. --- --- If you do not need support for incremental input, consider using --- the 'I.parseOnly' function to run your parser. It will never --- prompt for more input. --- --- /Note/: incremental input does /not/ imply that attoparsec will --- release portions of its internal state for garbage collection as it --- proceeds. Its internal representation is equivalent to a single --- 'ByteString': if you feed incremental input to a parser, it will --- require memory proportional to the amount of input you supply. --- (This is necessary to support arbitrary backtracking.) - --- $performance --- --- If you write an attoparsec-based parser carefully, it can be --- realistic to expect it to perform similarly to a hand-rolled C --- parser (measuring megabytes parsed per second). --- --- To actually achieve high performance, there are a few guidelines --- that it is useful to follow. --- --- Use the 'B.ByteString'-oriented parsers whenever possible, --- e.g. 'I.takeWhile1' instead of 'many1' 'I.anyWord8'. There is --- about a factor of 100 difference in performance between the two --- kinds of parser. --- --- For very simple byte-testing predicates, write them by hand instead --- of using 'I.inClass' or 'I.notInClass'. For instance, both of --- these predicates test for an end-of-line byte, but the first is --- much faster than the second: --- --- >endOfLine_fast w = w == 13 || w == 10 --- >endOfLine_slow = inClass "\r\n" --- --- Make active use of benchmarking and profiling tools to measure, --- find the problems with, and improve the performance of your parser. - --- | Run a parser and print its result to standard output. -parseTest :: (Show a) => I.Parser a -> B.ByteString -> IO () -parseTest p s = print (parse p s) - --- | Run a parser with an initial input string, and a monadic action --- that can supply more input if needed. -parseWith :: Monad m => - (m B.ByteString) - -- ^ An action that will be executed to provide the parser - -- with more input, if necessary. The action must return an - -- 'B.empty' string when there is no more input available. - -> I.Parser a - -> B.ByteString - -- ^ Initial input for the parser. - -> m (Result a) -parseWith refill p s = step $ parse p s - where step (T.Partial k) = (step . k) =<< refill - step r = return r -{-# INLINE parseWith #-} - --- | Convert a 'Result' value to a 'Maybe' value. A 'T.Partial' result --- is treated as failure. -maybeResult :: Result r -> Maybe r -maybeResult (T.Done _ r) = Just r -maybeResult _ = Nothing - --- | Convert a 'Result' value to an 'Either' value. A 'T.Partial' --- result is treated as failure. -eitherResult :: Result r -> Either String r -eitherResult (T.Done _ r) = Right r -eitherResult (T.Fail _ [] msg) = Left msg -eitherResult (T.Fail _ ctxs msg) = Left (intercalate " > " ctxs ++ ": " ++ msg) -eitherResult _ = Left "Result: incomplete input" diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Buffer.hs b/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Buffer.hs deleted file mode 100644 index ac94dfcc..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Buffer.hs +++ /dev/null @@ -1,156 +0,0 @@ -{-# LANGUAGE BangPatterns #-} --- | --- Module : Data.Attoparsec.ByteString.Buffer --- Copyright : Bryan O'Sullivan 2007-2015 --- License : BSD3 --- --- Maintainer : bos@serpentine.com --- Stability : experimental --- Portability : GHC --- --- An "immutable" buffer that supports cheap appends. --- --- A Buffer is divided into an immutable read-only zone, followed by a --- mutable area that we've preallocated, but not yet written to. --- --- We overallocate at the end of a Buffer so that we can cheaply --- append. Since a user of an existing Buffer cannot see past the end --- of its immutable zone into the data that will change during an --- append, this is safe. --- --- Once we run out of space at the end of a Buffer, we do the usual --- doubling of the buffer size. --- --- The fact of having a mutable buffer really helps with performance, --- but it does have a consequence: if someone misuses the Partial API --- that attoparsec uses by calling the same continuation repeatedly --- (which never makes sense in practice), they could overwrite data. --- --- Since the API *looks* pure, it should *act* pure, too, so we use --- two generation counters (one mutable, one immutable) to track the --- number of appends to a mutable buffer. If the counters ever get out --- of sync, someone is appending twice to a mutable buffer, so we --- duplicate the entire buffer in order to preserve the immutability --- of its older self. --- --- While we could go a step further and gain protection against API --- abuse on a multicore system, by use of an atomic increment --- instruction to bump the mutable generation counter, that would be --- very expensive, and feels like it would also be in the realm of the --- ridiculous. Clients should never call a continuation more than --- once; we lack a linear type system that could enforce this; and --- there's only so far we should go to accommodate broken uses. - -module Data.Attoparsec.ByteString.Buffer - ( - Buffer - , buffer - , unbuffer - , pappend - , length - , unsafeIndex - , substring - , unsafeDrop - ) where - -import Control.Exception (assert) -import Data.ByteString.Internal (ByteString(..), memcpy, nullForeignPtr) -import Data.Attoparsec.Internal.Fhthagn (inlinePerformIO) -import Data.List (foldl1') -import Data.Monoid as Mon (Monoid(..)) -import Data.Semigroup (Semigroup(..)) -import Data.Word (Word8) -import Foreign.ForeignPtr (ForeignPtr, withForeignPtr) -import Foreign.Ptr (castPtr, plusPtr) -import Foreign.Storable (peek, peekByteOff, poke, sizeOf) -import GHC.ForeignPtr (mallocPlainForeignPtrBytes) -import Prelude hiding (length) - --- If _cap is zero, this buffer is empty. -data Buffer = Buf { - _fp :: {-# UNPACK #-} !(ForeignPtr Word8) - , _off :: {-# UNPACK #-} !Int - , _len :: {-# UNPACK #-} !Int - , _cap :: {-# UNPACK #-} !Int - , _gen :: {-# UNPACK #-} !Int - } - -instance Show Buffer where - showsPrec p = showsPrec p . unbuffer - --- | The initial 'Buffer' has no mutable zone, so we can avoid all --- copies in the (hopefully) common case of no further input being fed --- to us. -buffer :: ByteString -> Buffer -buffer (PS fp off len) = Buf fp off len len 0 - -unbuffer :: Buffer -> ByteString -unbuffer (Buf fp off len _ _) = PS fp off len - -instance Semigroup Buffer where - (Buf _ _ _ 0 _) <> b = b - a <> (Buf _ _ _ 0 _) = a - buf <> (Buf fp off len _ _) = append buf fp off len - -instance Monoid Buffer where - mempty = Buf nullForeignPtr 0 0 0 0 - - mappend = (<>) - - mconcat [] = Mon.mempty - mconcat xs = foldl1' mappend xs - -pappend :: Buffer -> ByteString -> Buffer -pappend (Buf _ _ _ 0 _) bs = buffer bs -pappend buf (PS fp off len) = append buf fp off len - -append :: Buffer -> ForeignPtr a -> Int -> Int -> Buffer -append (Buf fp0 off0 len0 cap0 gen0) !fp1 !off1 !len1 = - inlinePerformIO . withForeignPtr fp0 $ \ptr0 -> - withForeignPtr fp1 $ \ptr1 -> do - let genSize = sizeOf (0::Int) - newlen = len0 + len1 - gen <- if gen0 == 0 - then return 0 - else peek (castPtr ptr0) - if gen == gen0 && newlen <= cap0 - then do - let newgen = gen + 1 - poke (castPtr ptr0) newgen - memcpy (ptr0 `plusPtr` (off0+len0)) - (ptr1 `plusPtr` off1) - (fromIntegral len1) - return (Buf fp0 off0 newlen cap0 newgen) - else do - let newcap = newlen * 2 - fp <- mallocPlainForeignPtrBytes (newcap + genSize) - withForeignPtr fp $ \ptr_ -> do - let ptr = ptr_ `plusPtr` genSize - newgen = 1 - poke (castPtr ptr_) newgen - memcpy ptr (ptr0 `plusPtr` off0) (fromIntegral len0) - memcpy (ptr `plusPtr` len0) (ptr1 `plusPtr` off1) - (fromIntegral len1) - return (Buf fp genSize newlen newcap newgen) - -length :: Buffer -> Int -length (Buf _ _ len _ _) = len -{-# INLINE length #-} - -unsafeIndex :: Buffer -> Int -> Word8 -unsafeIndex (Buf fp off len _ _) i = assert (i >= 0 && i < len) . - inlinePerformIO . withForeignPtr fp $ flip peekByteOff (off+i) -{-# INLINE unsafeIndex #-} - -substring :: Int -> Int -> Buffer -> ByteString -substring s l (Buf fp off len _ _) = - assert (s >= 0 && s <= len) . - assert (l >= 0 && l <= len-s) $ - PS fp (off+s) l -{-# INLINE substring #-} - -unsafeDrop :: Int -> Buffer -> ByteString -unsafeDrop s (Buf fp off len _ _) = - assert (s >= 0 && s <= len) $ - PS fp (off+s) (len-s) -{-# INLINE unsafeDrop #-} diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Char8.hs b/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Char8.hs deleted file mode 100644 index 7fafba40..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Char8.hs +++ /dev/null @@ -1,464 +0,0 @@ -{-# LANGUAGE BangPatterns, CPP, FlexibleInstances, TypeFamilies, - TypeSynonymInstances, GADTs #-} -#if __GLASGOW_HASKELL__ >= 702 -{-# LANGUAGE Trustworthy #-} -- Imports internal modules -#endif -{-# OPTIONS_GHC -fno-warn-orphans -fno-warn-warnings-deprecations #-} - --- | --- Module : Data.Attoparsec.ByteString.Char8 --- Copyright : Bryan O'Sullivan 2007-2015 --- License : BSD3 --- --- Maintainer : bos@serpentine.com --- Stability : experimental --- Portability : unknown --- --- Simple, efficient, character-oriented combinator parsing for --- 'B.ByteString' strings, loosely based on the Parsec library. - -module Data.Attoparsec.ByteString.Char8 - ( - -- * Character encodings - -- $encodings - - -- * Parser types - Parser - , A.Result - , A.IResult(..) - , I.compareResults - - -- * Running parsers - , A.parse - , A.feed - , A.parseOnly - , A.parseWith - , A.parseTest - - -- ** Result conversion - , A.maybeResult - , A.eitherResult - - -- * Parsing individual characters - , char - , char8 - , anyChar - , notChar - , satisfy - - -- ** Lookahead - , peekChar - , peekChar' - - -- ** Special character parsers - , digit - , letter_iso8859_15 - , letter_ascii - , space - - -- ** Fast predicates - , isDigit - , isDigit_w8 - , isAlpha_iso8859_15 - , isAlpha_ascii - , isSpace - , isSpace_w8 - - -- *** Character classes - , inClass - , notInClass - - -- * Efficient string handling - , I.string - , I.stringCI - , skipSpace - , skipWhile - , I.take - , scan - , takeWhile - , takeWhile1 - , takeTill - - -- ** String combinators - -- $specalt - , (.*>) - , (<*.) - - -- ** Consume all remaining input - , I.takeByteString - , I.takeLazyByteString - - -- * Text parsing - , I.endOfLine - , isEndOfLine - , isHorizontalSpace - - -- * Numeric parsers - , decimal - , hexadecimal - , signed - - -- * Combinators - , try - , () - , choice - , count - , option - , many' - , many1 - , many1' - , manyTill - , manyTill' - , sepBy - , sepBy' - , sepBy1 - , sepBy1' - , skipMany - , skipMany1 - , eitherP - , I.match - -- * State observation and manipulation functions - , I.endOfInput - , I.atEnd - ) where - -#if !MIN_VERSION_base(4,8,0) -import Control.Applicative (pure, (*>), (<*), (<$>)) -import Data.Word (Word) -#endif -import Control.Applicative ((<|>)) -import Data.Attoparsec.ByteString.FastSet (charClass, memberChar) -import Data.Attoparsec.ByteString.Internal (Parser) -import Data.Attoparsec.Combinator -import Data.Bits (Bits, (.|.), shiftL) -import Data.ByteString.Internal (c2w, w2c) -import Data.Int (Int8, Int16, Int32, Int64) -import Data.String (IsString(..)) -import Data.Word (Word8, Word16, Word32, Word64) -import Prelude hiding (takeWhile) -import qualified Data.Attoparsec.ByteString as A -import qualified Data.Attoparsec.ByteString.Internal as I -import qualified Data.Attoparsec.Internal as I -import qualified Data.ByteString as B8 -import qualified Data.ByteString.Char8 as B - -instance (a ~ B.ByteString) => IsString (Parser a) where - fromString = I.string . B.pack - --- $encodings --- --- This module is intended for parsing text that is --- represented using an 8-bit character set, e.g. ASCII or --- ISO-8859-15. It /does not/ make any attempt to deal with character --- encodings, multibyte characters, or wide characters. In --- particular, all attempts to use characters above code point U+00FF --- will give wrong answers. --- --- Code points below U+0100 are simply translated to and from their --- numeric values, so e.g. the code point U+00A4 becomes the byte --- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic --- currency sign in ISO-8859-1). Haskell 'Char' values above U+00FF --- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@. - --- | Consume input as long as the predicate returns 'True', and return --- the consumed input. --- --- This parser requires the predicate to succeed on at least one byte --- of input: it will fail if the predicate never returns 'True' or if --- there is no input left. -takeWhile1 :: (Char -> Bool) -> Parser B.ByteString -takeWhile1 p = I.takeWhile1 (p . w2c) -{-# INLINE takeWhile1 #-} - --- | The parser @satisfy p@ succeeds for any byte for which the --- predicate @p@ returns 'True'. Returns the byte that is actually --- parsed. --- --- >digit = satisfy isDigit --- > where isDigit c = c >= '0' && c <= '9' -satisfy :: (Char -> Bool) -> Parser Char -satisfy = I.satisfyWith w2c -{-# INLINE satisfy #-} - --- | Match a letter, in the ISO-8859-15 encoding. -letter_iso8859_15 :: Parser Char -letter_iso8859_15 = satisfy isAlpha_iso8859_15 "letter_iso8859_15" -{-# INLINE letter_iso8859_15 #-} - --- | Match a letter, in the ASCII encoding. -letter_ascii :: Parser Char -letter_ascii = satisfy isAlpha_ascii "letter_ascii" -{-# INLINE letter_ascii #-} - --- | A fast alphabetic predicate for the ISO-8859-15 encoding --- --- /Note/: For all character encodings other than ISO-8859-15, and --- almost all Unicode code points above U+00A3, this predicate gives --- /wrong answers/. -isAlpha_iso8859_15 :: Char -> Bool -isAlpha_iso8859_15 c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || - (c >= '\166' && moby c) - where moby = notInClass "\167\169\171-\179\182\183\185\187\191\215\247" - {-# NOINLINE moby #-} -{-# INLINE isAlpha_iso8859_15 #-} - --- | A fast alphabetic predicate for the ASCII encoding --- --- /Note/: For all character encodings other than ASCII, and --- almost all Unicode code points above U+007F, this predicate gives --- /wrong answers/. -isAlpha_ascii :: Char -> Bool -isAlpha_ascii c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') -{-# INLINE isAlpha_ascii #-} - --- | Parse a single digit. -digit :: Parser Char -digit = satisfy isDigit "digit" -{-# INLINE digit #-} - --- | A fast digit predicate. -isDigit :: Char -> Bool -isDigit c = c >= '0' && c <= '9' -{-# INLINE isDigit #-} - --- | A fast digit predicate. -isDigit_w8 :: Word8 -> Bool -isDigit_w8 w = w - 48 <= 9 -{-# INLINE isDigit_w8 #-} - --- | Match any character. -anyChar :: Parser Char -anyChar = satisfy $ const True -{-# INLINE anyChar #-} - --- | Match any character, to perform lookahead. Returns 'Nothing' if --- end of input has been reached. Does not consume any input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'many', because such parsers loop until a --- failure occurs. Careless use will thus result in an infinite loop. -peekChar :: Parser (Maybe Char) -peekChar = (fmap w2c) `fmap` I.peekWord8 -{-# INLINE peekChar #-} - --- | Match any character, to perform lookahead. Does not consume any --- input, but will fail if end of input has been reached. -peekChar' :: Parser Char -peekChar' = w2c `fmap` I.peekWord8' -{-# INLINE peekChar' #-} - --- | Fast predicate for matching ASCII space characters. --- --- /Note/: This predicate only gives correct answers for the ASCII --- encoding. For instance, it does not recognise U+00A0 (non-breaking --- space) as a space character, even though it is a valid ISO-8859-15 --- byte. For a Unicode-aware and only slightly slower predicate, --- use 'Data.Char.isSpace' -isSpace :: Char -> Bool -isSpace c = (c == ' ') || ('\t' <= c && c <= '\r') -{-# INLINE isSpace #-} - --- | Fast 'Word8' predicate for matching ASCII space characters. -isSpace_w8 :: Word8 -> Bool -isSpace_w8 w = w == 32 || w - 9 <= 4 -{-# INLINE isSpace_w8 #-} - - --- | Parse a space character. --- --- /Note/: This parser only gives correct answers for the ASCII --- encoding. For instance, it does not recognise U+00A0 (non-breaking --- space) as a space character, even though it is a valid ISO-8859-15 --- byte. -space :: Parser Char -space = satisfy isSpace "space" -{-# INLINE space #-} - --- | Match a specific character. -char :: Char -> Parser Char -char c = satisfy (== c) [c] -{-# INLINE char #-} - --- | Match a specific character, but return its 'Word8' value. -char8 :: Char -> Parser Word8 -char8 c = I.satisfy (== c2w c) [c] -{-# INLINE char8 #-} - --- | Match any character except the given one. -notChar :: Char -> Parser Char -notChar c = satisfy (/= c) "not " ++ [c] -{-# INLINE notChar #-} - --- | Match any character in a set. --- --- >vowel = inClass "aeiou" --- --- Range notation is supported. --- --- >halfAlphabet = inClass "a-nA-N" --- --- To add a literal \'-\' to a set, place it at the beginning or end --- of the string. -inClass :: String -> Char -> Bool -inClass s = (`memberChar` mySet) - where mySet = charClass s -{-# INLINE inClass #-} - --- | Match any character not in a set. -notInClass :: String -> Char -> Bool -notInClass s = not . inClass s -{-# INLINE notInClass #-} - --- | Consume input as long as the predicate returns 'True', and return --- the consumed input. --- --- This parser does not fail. It will return an empty string if the --- predicate returns 'False' on the first byte of input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'many', because such parsers loop until a --- failure occurs. Careless use will thus result in an infinite loop. -takeWhile :: (Char -> Bool) -> Parser B.ByteString -takeWhile p = I.takeWhile (p . w2c) -{-# INLINE takeWhile #-} - --- | A stateful scanner. The predicate consumes and transforms a --- state argument, and each transformed state is passed to successive --- invocations of the predicate on each byte of the input until one --- returns 'Nothing' or the input ends. --- --- This parser does not fail. It will return an empty string if the --- predicate returns 'Nothing' on the first byte of input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'many', because such parsers loop until a --- failure occurs. Careless use will thus result in an infinite loop. -scan :: s -> (s -> Char -> Maybe s) -> Parser B.ByteString -scan s0 p = I.scan s0 (\s -> p s . w2c) -{-# INLINE scan #-} - --- | Consume input as long as the predicate returns 'False' --- (i.e. until it returns 'True'), and return the consumed input. --- --- This parser does not fail. It will return an empty string if the --- predicate returns 'True' on the first byte of input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'many', because such parsers loop until a --- failure occurs. Careless use will thus result in an infinite loop. -takeTill :: (Char -> Bool) -> Parser B.ByteString -takeTill p = I.takeTill (p . w2c) -{-# INLINE takeTill #-} - --- | Skip past input for as long as the predicate returns 'True'. -skipWhile :: (Char -> Bool) -> Parser () -skipWhile p = I.skipWhile (p . w2c) -{-# INLINE skipWhile #-} - --- | Skip over white space. -skipSpace :: Parser () -skipSpace = I.skipWhile isSpace_w8 -{-# INLINE skipSpace #-} - --- $specalt --- --- If you enable the @OverloadedStrings@ language extension, you can --- use the '*>' and '<*' combinators to simplify the common task of --- matching a statically known string, then immediately parsing --- something else. --- --- Instead of writing something like this: --- --- @ ---'I.string' \"foo\" '*>' wibble --- @ --- --- Using @OverloadedStrings@, you can omit the explicit use of --- 'I.string', and write a more compact version: --- --- @ --- \"foo\" '*>' wibble --- @ --- --- (Note: the '.*>' and '<*.' combinators that were originally --- provided for this purpose are obsolete and unnecessary, and will be --- removed in the next major version.) - --- | /Obsolete/. A type-specialized version of '*>' for --- 'B.ByteString'. Use '*>' instead. -(.*>) :: B.ByteString -> Parser a -> Parser a -s .*> f = I.string s *> f -{-# DEPRECATED (.*>) "This is no longer necessary, and will be removed. Use '*>' instead." #-} - --- | /Obsolete/. A type-specialized version of '<*' for --- 'B.ByteString'. Use '<*' instead. -(<*.) :: Parser a -> B.ByteString -> Parser a -f <*. s = f <* I.string s -{-# DEPRECATED (<*.) "This is no longer necessary, and will be removed. Use '<*' instead." #-} - --- | A predicate that matches either a carriage return @\'\\r\'@ or --- newline @\'\\n\'@ character. -isEndOfLine :: Word8 -> Bool -isEndOfLine w = w == 13 || w == 10 -{-# INLINE isEndOfLine #-} - --- | A predicate that matches either a space @\' \'@ or horizontal tab --- @\'\\t\'@ character. -isHorizontalSpace :: Word8 -> Bool -isHorizontalSpace w = w == 32 || w == 9 -{-# INLINE isHorizontalSpace #-} - --- | Parse and decode an unsigned hexadecimal number. The hex digits --- @\'a\'@ through @\'f\'@ may be upper or lower case. --- --- This parser does not accept a leading @\"0x\"@ string. -hexadecimal :: (Integral a, Bits a) => Parser a -hexadecimal = B8.foldl' step 0 `fmap` I.takeWhile1 isHexDigit - where - isHexDigit w = (w >= 48 && w <= 57) || - (w >= 97 && w <= 102) || - (w >= 65 && w <= 70) - step a w | w >= 48 && w <= 57 = (a `shiftL` 4) .|. fromIntegral (w - 48) - | w >= 97 = (a `shiftL` 4) .|. fromIntegral (w - 87) - | otherwise = (a `shiftL` 4) .|. fromIntegral (w - 55) -{-# SPECIALISE hexadecimal :: Parser Int #-} -{-# SPECIALISE hexadecimal :: Parser Int8 #-} -{-# SPECIALISE hexadecimal :: Parser Int16 #-} -{-# SPECIALISE hexadecimal :: Parser Int32 #-} -{-# SPECIALISE hexadecimal :: Parser Int64 #-} -{-# SPECIALISE hexadecimal :: Parser Integer #-} -{-# SPECIALISE hexadecimal :: Parser Word #-} -{-# SPECIALISE hexadecimal :: Parser Word8 #-} -{-# SPECIALISE hexadecimal :: Parser Word16 #-} -{-# SPECIALISE hexadecimal :: Parser Word32 #-} -{-# SPECIALISE hexadecimal :: Parser Word64 #-} - --- | Parse and decode an unsigned decimal number. -decimal :: Integral a => Parser a -decimal = B8.foldl' step 0 `fmap` I.takeWhile1 isDigit_w8 - where step a w = a * 10 + fromIntegral (w - 48) -{-# SPECIALISE decimal :: Parser Int #-} -{-# SPECIALISE decimal :: Parser Int8 #-} -{-# SPECIALISE decimal :: Parser Int16 #-} -{-# SPECIALISE decimal :: Parser Int32 #-} -{-# SPECIALISE decimal :: Parser Int64 #-} -{-# SPECIALISE decimal :: Parser Integer #-} -{-# SPECIALISE decimal :: Parser Word #-} -{-# SPECIALISE decimal :: Parser Word8 #-} -{-# SPECIALISE decimal :: Parser Word16 #-} -{-# SPECIALISE decimal :: Parser Word32 #-} -{-# SPECIALISE decimal :: Parser Word64 #-} - --- | Parse a number with an optional leading @\'+\'@ or @\'-\'@ sign --- character. -signed :: Num a => Parser a -> Parser a -{-# SPECIALISE signed :: Parser Int -> Parser Int #-} -{-# SPECIALISE signed :: Parser Int8 -> Parser Int8 #-} -{-# SPECIALISE signed :: Parser Int16 -> Parser Int16 #-} -{-# SPECIALISE signed :: Parser Int32 -> Parser Int32 #-} -{-# SPECIALISE signed :: Parser Int64 -> Parser Int64 #-} -{-# SPECIALISE signed :: Parser Integer -> Parser Integer #-} -signed p = (negate <$> (char8 '-' *> p)) - <|> (char8 '+' *> p) - <|> p - diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/FastSet.hs b/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/FastSet.hs deleted file mode 100644 index d15854c4..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/FastSet.hs +++ /dev/null @@ -1,115 +0,0 @@ -{-# LANGUAGE BangPatterns, MagicHash #-} - ------------------------------------------------------------------------------ --- | --- Module : Data.Attoparsec.ByteString.FastSet --- Copyright : Bryan O'Sullivan 2007-2015 --- License : BSD3 --- --- Maintainer : bos@serpentine.com --- Stability : experimental --- Portability : unknown --- --- Fast set membership tests for 'Word8' and 8-bit 'Char' values. The --- set representation is unboxed for efficiency. For small sets, we --- test for membership using a binary search. For larger sets, we use --- a lookup table. --- ------------------------------------------------------------------------------ -module Data.Attoparsec.ByteString.FastSet - ( - -- * Data type - FastSet - -- * Construction - , fromList - , set - -- * Lookup - , memberChar - , memberWord8 - -- * Debugging - , fromSet - -- * Handy interface - , charClass - ) where - -import Data.Bits ((.&.), (.|.)) -import Foreign.Storable (peekByteOff, pokeByteOff) -import GHC.Base (Int(I#), iShiftRA#, narrow8Word#, shiftL#) -import GHC.Word (Word8(W8#)) -import qualified Data.ByteString as B -import qualified Data.ByteString.Char8 as B8 -import qualified Data.ByteString.Internal as I -import qualified Data.ByteString.Unsafe as U - -data FastSet = Sorted { fromSet :: !B.ByteString } - | Table { fromSet :: !B.ByteString } - deriving (Eq, Ord) - -instance Show FastSet where - show (Sorted s) = "FastSet Sorted " ++ show (B8.unpack s) - show (Table _) = "FastSet Table" - --- | The lower bound on the size of a lookup table. We choose this to --- balance table density against performance. -tableCutoff :: Int -tableCutoff = 8 - --- | Create a set. -set :: B.ByteString -> FastSet -set s | B.length s < tableCutoff = Sorted . B.sort $ s - | otherwise = Table . mkTable $ s - -fromList :: [Word8] -> FastSet -fromList = set . B.pack - -data I = I {-# UNPACK #-} !Int {-# UNPACK #-} !Word8 - -shiftR :: Int -> Int -> Int -shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#) - -shiftL :: Word8 -> Int -> Word8 -shiftL (W8# x#) (I# i#) = W8# (narrow8Word# (x# `shiftL#` i#)) - -index :: Int -> I -index i = I (i `shiftR` 3) (1 `shiftL` (i .&. 7)) -{-# INLINE index #-} - --- | Check the set for membership. -memberWord8 :: Word8 -> FastSet -> Bool -memberWord8 w (Table t) = - let I byte bit = index (fromIntegral w) - in U.unsafeIndex t byte .&. bit /= 0 -memberWord8 w (Sorted s) = search 0 (B.length s - 1) - where search lo hi - | hi < lo = False - | otherwise = - let mid = (lo + hi) `quot` 2 - in case compare w (U.unsafeIndex s mid) of - GT -> search (mid + 1) hi - LT -> search lo (mid - 1) - _ -> True - --- | Check the set for membership. Only works with 8-bit characters: --- characters above code point 255 will give wrong answers. -memberChar :: Char -> FastSet -> Bool -memberChar c = memberWord8 (I.c2w c) -{-# INLINE memberChar #-} - -mkTable :: B.ByteString -> B.ByteString -mkTable s = I.unsafeCreate 32 $ \t -> do - _ <- I.memset t 0 32 - U.unsafeUseAsCStringLen s $ \(p, l) -> - let loop n | n == l = return () - | otherwise = do - c <- peekByteOff p n :: IO Word8 - let I byte bit = index (fromIntegral c) - prev <- peekByteOff t byte :: IO Word8 - pokeByteOff t byte (prev .|. bit) - loop (n + 1) - in loop 0 - -charClass :: String -> FastSet -charClass = set . B8.pack . go - where go (a:'-':b:xs) = [a..b] ++ go xs - go (x:xs) = x : go xs - go _ = "" diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Internal.hs b/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Internal.hs deleted file mode 100644 index 4938ea87..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/ByteString/Internal.hs +++ /dev/null @@ -1,536 +0,0 @@ -{-# LANGUAGE BangPatterns, CPP, GADTs, OverloadedStrings, RankNTypes, - RecordWildCards #-} --- | --- Module : Data.Attoparsec.ByteString.Internal --- Copyright : Bryan O'Sullivan 2007-2015 --- License : BSD3 --- --- Maintainer : bos@serpentine.com --- Stability : experimental --- Portability : unknown --- --- Simple, efficient parser combinators for 'ByteString' strings, --- loosely based on the Parsec library. - -module Data.Attoparsec.ByteString.Internal - ( - -- * Parser types - Parser - , Result - - -- * Running parsers - , parse - , parseOnly - - -- * Combinators - , module Data.Attoparsec.Combinator - - -- * Parsing individual bytes - , satisfy - , satisfyWith - , anyWord8 - , skip - , word8 - , notWord8 - - -- ** Lookahead - , peekWord8 - , peekWord8' - - -- ** Byte classes - , inClass - , notInClass - - -- * Parsing more complicated structures - , storable - - -- * Efficient string handling - , skipWhile - , string - , stringCI - , take - , scan - , runScanner - , takeWhile - , takeWhile1 - , takeTill - - -- ** Consume all remaining input - , takeByteString - , takeLazyByteString - - -- * Utilities - , endOfLine - , endOfInput - , match - , atEnd - ) where - -#if !MIN_VERSION_base(4,8,0) -import Control.Applicative ((<$>)) -#endif -import Control.Applicative ((<|>)) -import Control.Monad (when) -import Data.Attoparsec.ByteString.Buffer (Buffer, buffer) -import Data.Attoparsec.ByteString.FastSet (charClass, memberWord8) -import Data.Attoparsec.Combinator (()) -import Data.Attoparsec.Internal -import Data.Attoparsec.Internal.Fhthagn (inlinePerformIO) -import Data.Attoparsec.Internal.Types hiding (Parser, Failure, Success) -import Data.ByteString (ByteString) -import Data.List (intercalate) -import Data.Word (Word8) -import Foreign.ForeignPtr (withForeignPtr) -import Foreign.Ptr (castPtr, minusPtr, plusPtr) -import Foreign.Storable (Storable(peek, sizeOf)) -import Prelude hiding (getChar, succ, take, takeWhile) -import qualified Data.Attoparsec.ByteString.Buffer as Buf -import qualified Data.Attoparsec.Internal.Types as T -import qualified Data.ByteString as B8 -import qualified Data.ByteString.Char8 as B -import qualified Data.ByteString.Internal as B -import qualified Data.ByteString.Lazy as L -import qualified Data.ByteString.Unsafe as B - -type Parser = T.Parser ByteString -type Result = IResult ByteString -type Failure r = T.Failure ByteString Buffer r -type Success a r = T.Success ByteString Buffer a r - --- | The parser @satisfy p@ succeeds for any byte for which the --- predicate @p@ returns 'True'. Returns the byte that is actually --- parsed. --- --- >digit = satisfy isDigit --- > where isDigit w = w >= 48 && w <= 57 -satisfy :: (Word8 -> Bool) -> Parser Word8 -satisfy p = do - h <- peekWord8' - if p h - then advance 1 >> return h - else fail "satisfy" -{-# INLINE satisfy #-} - --- | The parser @skip p@ succeeds for any byte for which the predicate --- @p@ returns 'True'. --- --- >skipDigit = skip isDigit --- > where isDigit w = w >= 48 && w <= 57 -skip :: (Word8 -> Bool) -> Parser () -skip p = do - h <- peekWord8' - if p h - then advance 1 - else fail "skip" - --- | The parser @satisfyWith f p@ transforms a byte, and succeeds if --- the predicate @p@ returns 'True' on the transformed value. The --- parser returns the transformed byte that was parsed. -satisfyWith :: (Word8 -> a) -> (a -> Bool) -> Parser a -satisfyWith f p = do - h <- peekWord8' - let c = f h - if p c - then advance 1 >> return c - else fail "satisfyWith" -{-# INLINE satisfyWith #-} - -storable :: Storable a => Parser a -storable = hack undefined - where - hack :: Storable b => b -> Parser b - hack dummy = do - (fp,o,_) <- B.toForeignPtr `fmap` take (sizeOf dummy) - return . inlinePerformIO . withForeignPtr fp $ \p -> - peek (castPtr $ p `plusPtr` o) - --- | Consume exactly @n@ bytes of input. -take :: Int -> Parser ByteString -take n0 = do - let n = max n0 0 - s <- ensure n - advance n >> return s -{-# INLINE take #-} - --- | @string s@ parses a sequence of bytes that identically match --- @s@. Returns the parsed string (i.e. @s@). This parser consumes no --- input if it fails (even if a partial match). --- --- /Note/: The behaviour of this parser is different to that of the --- similarly-named parser in Parsec, as this one is all-or-nothing. --- To illustrate the difference, the following parser will fail under --- Parsec given an input of @\"for\"@: --- --- >string "foo" <|> string "for" --- --- The reason for its failure is that the first branch is a --- partial match, and will consume the letters @\'f\'@ and @\'o\'@ --- before failing. In attoparsec, the above parser will /succeed/ on --- that input, because the failed first branch will consume nothing. -string :: ByteString -> Parser ByteString -string s = string_ (stringSuspended id) id s -{-# INLINE string #-} - --- ASCII-specific but fast, oh yes. -toLower :: Word8 -> Word8 -toLower w | w >= 65 && w <= 90 = w + 32 - | otherwise = w - --- | Satisfy a literal string, ignoring case. -stringCI :: ByteString -> Parser ByteString -stringCI s = string_ (stringSuspended lower) lower s - where lower = B8.map toLower -{-# INLINE stringCI #-} - -string_ :: (forall r. ByteString -> ByteString -> Buffer -> Pos -> More - -> Failure r -> Success ByteString r -> Result r) - -> (ByteString -> ByteString) - -> ByteString -> Parser ByteString -string_ suspended f s0 = T.Parser $ \t pos more lose succ -> - let n = B.length s - s = f s0 - in if lengthAtLeast pos n t - then let t' = substring pos (Pos n) t - in if s == f t' - then succ t (pos + Pos n) more t' - else lose t pos more [] "string" - else let t' = Buf.unsafeDrop (fromPos pos) t - in if f t' `B.isPrefixOf` s - then suspended s (B.drop (B.length t') s) t pos more lose succ - else lose t pos more [] "string" -{-# INLINE string_ #-} - -stringSuspended :: (ByteString -> ByteString) - -> ByteString -> ByteString -> Buffer -> Pos -> More - -> Failure r - -> Success ByteString r - -> Result r -stringSuspended f s0 s t pos more lose succ = - runParser (demandInput_ >>= go) t pos more lose succ - where go s'0 = T.Parser $ \t' pos' more' lose' succ' -> - let m = B.length s - s' = f s'0 - n = B.length s' - in if n >= m - then if B.unsafeTake m s' == s - then let o = Pos (B.length s0) - in succ' t' (pos' + o) more' - (substring pos' o t') - else lose' t' pos' more' [] "string" - else if s' == B.unsafeTake n s - then stringSuspended f s0 (B.unsafeDrop n s) - t' pos' more' lose' succ' - else lose' t' pos' more' [] "string" - --- | Skip past input for as long as the predicate returns 'True'. -skipWhile :: (Word8 -> Bool) -> Parser () -skipWhile p = go - where - go = do - t <- B8.takeWhile p <$> get - continue <- inputSpansChunks (B.length t) - when continue go -{-# INLINE skipWhile #-} - --- | Consume input as long as the predicate returns 'False' --- (i.e. until it returns 'True'), and return the consumed input. --- --- This parser does not fail. It will return an empty string if the --- predicate returns 'True' on the first byte of input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'Control.Applicative.many', because such --- parsers loop until a failure occurs. Careless use will thus result --- in an infinite loop. -takeTill :: (Word8 -> Bool) -> Parser ByteString -takeTill p = takeWhile (not . p) -{-# INLINE takeTill #-} - --- | Consume input as long as the predicate returns 'True', and return --- the consumed input. --- --- This parser does not fail. It will return an empty string if the --- predicate returns 'False' on the first byte of input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'Control.Applicative.many', because such --- parsers loop until a failure occurs. Careless use will thus result --- in an infinite loop. -takeWhile :: (Word8 -> Bool) -> Parser ByteString -takeWhile p = do - s <- B8.takeWhile p <$> get - continue <- inputSpansChunks (B.length s) - if continue - then takeWhileAcc p [s] - else return s -{-# INLINE takeWhile #-} - -takeWhileAcc :: (Word8 -> Bool) -> [ByteString] -> Parser ByteString -takeWhileAcc p = go - where - go acc = do - s <- B8.takeWhile p <$> get - continue <- inputSpansChunks (B.length s) - if continue - then go (s:acc) - else return $ concatReverse (s:acc) -{-# INLINE takeWhileAcc #-} - -takeRest :: Parser [ByteString] -takeRest = go [] - where - go acc = do - input <- wantInput - if input - then do - s <- get - advance (B.length s) - go (s:acc) - else return (reverse acc) - --- | Consume all remaining input and return it as a single string. -takeByteString :: Parser ByteString -takeByteString = B.concat `fmap` takeRest - --- | Consume all remaining input and return it as a single string. -takeLazyByteString :: Parser L.ByteString -takeLazyByteString = L.fromChunks `fmap` takeRest - -data T s = T {-# UNPACK #-} !Int s - -scan_ :: (s -> [ByteString] -> Parser r) -> s -> (s -> Word8 -> Maybe s) - -> Parser r -scan_ f s0 p = go [] s0 - where - go acc s1 = do - let scanner (B.PS fp off len) = - withForeignPtr fp $ \ptr0 -> do - let start = ptr0 `plusPtr` off - end = start `plusPtr` len - inner ptr !s - | ptr < end = do - w <- peek ptr - case p s w of - Just s' -> inner (ptr `plusPtr` 1) s' - _ -> done (ptr `minusPtr` start) s - | otherwise = done (ptr `minusPtr` start) s - done !i !s = return (T i s) - inner start s1 - bs <- get - let T i s' = inlinePerformIO $ scanner bs - !h = B.unsafeTake i bs - continue <- inputSpansChunks i - if continue - then go (h:acc) s' - else f s' (h:acc) -{-# INLINE scan_ #-} - --- | A stateful scanner. The predicate consumes and transforms a --- state argument, and each transformed state is passed to successive --- invocations of the predicate on each byte of the input until one --- returns 'Nothing' or the input ends. --- --- This parser does not fail. It will return an empty string if the --- predicate returns 'Nothing' on the first byte of input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'Control.Applicative.many', because such --- parsers loop until a failure occurs. Careless use will thus result --- in an infinite loop. -scan :: s -> (s -> Word8 -> Maybe s) -> Parser ByteString -scan = scan_ $ \_ chunks -> return $! concatReverse chunks -{-# INLINE scan #-} - --- | Like 'scan', but generalized to return the final state of the --- scanner. -runScanner :: s -> (s -> Word8 -> Maybe s) -> Parser (ByteString, s) -runScanner = scan_ $ \s xs -> let !sx = concatReverse xs in return (sx, s) -{-# INLINE runScanner #-} - --- | Consume input as long as the predicate returns 'True', and return --- the consumed input. --- --- This parser requires the predicate to succeed on at least one byte --- of input: it will fail if the predicate never returns 'True' or if --- there is no input left. -takeWhile1 :: (Word8 -> Bool) -> Parser ByteString -takeWhile1 p = do - (`when` demandInput) =<< endOfChunk - s <- B8.takeWhile p <$> get - let len = B.length s - if len == 0 - then fail "takeWhile1" - else do - advance len - eoc <- endOfChunk - if eoc - then takeWhileAcc p [s] - else return s -{-# INLINE takeWhile1 #-} - --- | Match any byte in a set. --- --- >vowel = inClass "aeiou" --- --- Range notation is supported. --- --- >halfAlphabet = inClass "a-nA-N" --- --- To add a literal @\'-\'@ to a set, place it at the beginning or end --- of the string. -inClass :: String -> Word8 -> Bool -inClass s = (`memberWord8` mySet) - where mySet = charClass s - {-# NOINLINE mySet #-} -{-# INLINE inClass #-} - --- | Match any byte not in a set. -notInClass :: String -> Word8 -> Bool -notInClass s = not . inClass s -{-# INLINE notInClass #-} - --- | Match any byte. -anyWord8 :: Parser Word8 -anyWord8 = satisfy $ const True -{-# INLINE anyWord8 #-} - --- | Match a specific byte. -word8 :: Word8 -> Parser Word8 -word8 c = satisfy (== c) show c -{-# INLINE word8 #-} - --- | Match any byte except the given one. -notWord8 :: Word8 -> Parser Word8 -notWord8 c = satisfy (/= c) "not " ++ show c -{-# INLINE notWord8 #-} - --- | Match any byte, to perform lookahead. Returns 'Nothing' if end of --- input has been reached. Does not consume any input. --- --- /Note/: Because this parser does not fail, do not use it with --- combinators such as 'Control.Applicative.many', because such --- parsers loop until a failure occurs. Careless use will thus result --- in an infinite loop. -peekWord8 :: Parser (Maybe Word8) -peekWord8 = T.Parser $ \t pos@(Pos pos_) more _lose succ -> - case () of - _| pos_ < Buf.length t -> - let !w = Buf.unsafeIndex t pos_ - in succ t pos more (Just w) - | more == Complete -> - succ t pos more Nothing - | otherwise -> - let succ' t' pos' more' = let !w = Buf.unsafeIndex t' pos_ - in succ t' pos' more' (Just w) - lose' t' pos' more' = succ t' pos' more' Nothing - in prompt t pos more lose' succ' -{-# INLINE peekWord8 #-} - --- | Match any byte, to perform lookahead. Does not consume any --- input, but will fail if end of input has been reached. -peekWord8' :: Parser Word8 -peekWord8' = T.Parser $ \t pos more lose succ -> - if lengthAtLeast pos 1 t - then succ t pos more (Buf.unsafeIndex t (fromPos pos)) - else let succ' t' pos' more' bs' = succ t' pos' more' $! B.unsafeHead bs' - in ensureSuspended 1 t pos more lose succ' -{-# INLINE peekWord8' #-} - --- | Match either a single newline character @\'\\n\'@, or a carriage --- return followed by a newline character @\"\\r\\n\"@. -endOfLine :: Parser () -endOfLine = (word8 10 >> return ()) <|> (string "\r\n" >> return ()) - --- | Terminal failure continuation. -failK :: Failure a -failK t (Pos pos) _more stack msg = Fail (Buf.unsafeDrop pos t) stack msg -{-# INLINE failK #-} - --- | Terminal success continuation. -successK :: Success a a -successK t (Pos pos) _more a = Done (Buf.unsafeDrop pos t) a -{-# INLINE successK #-} - --- | Run a parser. -parse :: Parser a -> ByteString -> Result a -parse m s = T.runParser m (buffer s) (Pos 0) Incomplete failK successK -{-# INLINE parse #-} - --- | Run a parser that cannot be resupplied via a 'Partial' result. --- --- This function does not force a parser to consume all of its input. --- Instead, any residual input will be discarded. To force a parser --- to consume all of its input, use something like this: --- --- @ ---'parseOnly' (myParser 'Control.Applicative.<*' 'endOfInput') --- @ -parseOnly :: Parser a -> ByteString -> Either String a -parseOnly m s = case T.runParser m (buffer s) (Pos 0) Complete failK successK of - Fail _ [] err -> Left err - Fail _ ctxs err -> Left (intercalate " > " ctxs ++ ": " ++ err) - Done _ a -> Right a - _ -> error "parseOnly: impossible error!" -{-# INLINE parseOnly #-} - -get :: Parser ByteString -get = T.Parser $ \t pos more _lose succ -> - succ t pos more (Buf.unsafeDrop (fromPos pos) t) -{-# INLINE get #-} - -endOfChunk :: Parser Bool -endOfChunk = T.Parser $ \t pos more _lose succ -> - succ t pos more (fromPos pos == Buf.length t) -{-# INLINE endOfChunk #-} - -inputSpansChunks :: Int -> Parser Bool -inputSpansChunks i = T.Parser $ \t pos_ more _lose succ -> - let pos = pos_ + Pos i - in if fromPos pos < Buf.length t || more == Complete - then succ t pos more False - else let lose' t' pos' more' = succ t' pos' more' False - succ' t' pos' more' = succ t' pos' more' True - in prompt t pos more lose' succ' -{-# INLINE inputSpansChunks #-} - -advance :: Int -> Parser () -advance n = T.Parser $ \t pos more _lose succ -> - succ t (pos + Pos n) more () -{-# INLINE advance #-} - -ensureSuspended :: Int -> Buffer -> Pos -> More - -> Failure r - -> Success ByteString r - -> Result r -ensureSuspended n t pos more lose succ = - runParser (demandInput >> go) t pos more lose succ - where go = T.Parser $ \t' pos' more' lose' succ' -> - if lengthAtLeast pos' n t' - then succ' t' pos' more' (substring pos (Pos n) t') - else runParser (demandInput >> go) t' pos' more' lose' succ' - --- | If at least @n@ elements of input are available, return the --- current input, otherwise fail. -ensure :: Int -> Parser ByteString -ensure n = T.Parser $ \t pos more lose succ -> - if lengthAtLeast pos n t - then succ t pos more (substring pos (Pos n) t) - -- The uncommon case is kept out-of-line to reduce code size: - else ensureSuspended n t pos more lose succ -{-# INLINE ensure #-} - --- | Return both the result of a parse and the portion of the input --- that was consumed while it was being parsed. -match :: Parser a -> Parser (ByteString, a) -match p = T.Parser $ \t pos more lose succ -> - let succ' t' pos' more' a = - succ t' pos' more' (substring pos (pos'-pos) t', a) - in runParser p t pos more lose succ' - -lengthAtLeast :: Pos -> Int -> Buffer -> Bool -lengthAtLeast (Pos pos) n bs = Buf.length bs >= pos + n -{-# INLINE lengthAtLeast #-} - -substring :: Pos -> Pos -> Buffer -> ByteString -substring (Pos pos) (Pos n) = Buf.substring pos n -{-# INLINE substring #-} diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Combinator.hs b/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Combinator.hs deleted file mode 100644 index dde0c27a..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Combinator.hs +++ /dev/null @@ -1,233 +0,0 @@ -{-# LANGUAGE BangPatterns, CPP #-} -#if __GLASGOW_HASKELL__ >= 702 -{-# LANGUAGE Trustworthy #-} -- Imports internal modules -#endif --- | --- Module : Data.Attoparsec.Combinator --- Copyright : Daan Leijen 1999-2001, Bryan O'Sullivan 2007-2015 --- License : BSD3 --- --- Maintainer : bos@serpentine.com --- Stability : experimental --- Portability : portable --- --- Useful parser combinators, similar to those provided by Parsec. -module Data.Attoparsec.Combinator - ( - -- * Combinators - try - , () - , choice - , count - , option - , many' - , many1 - , many1' - , manyTill - , manyTill' - , sepBy - , sepBy' - , sepBy1 - , sepBy1' - , skipMany - , skipMany1 - , eitherP - , feed - , satisfyElem - , endOfInput - , atEnd - , lookAhead - ) where - -#if !MIN_VERSION_base(4,8,0) -import Control.Applicative (Applicative(..), (<$>)) -import Data.Monoid (Monoid(mappend)) -#endif -import Control.Applicative (Alternative(..), empty, liftA2, many, (<|>)) -import Control.Monad (MonadPlus(..)) -import Data.Attoparsec.Internal.Types (Parser(..), IResult(..)) -import Data.Attoparsec.Internal (endOfInput, atEnd, satisfyElem) -import Data.ByteString (ByteString) -import Prelude hiding (succ) - --- | Attempt a parse, and if it fails, rewind the input so that no --- input appears to have been consumed. --- --- This combinator is provided for compatibility with Parsec. --- attoparsec parsers always backtrack on failure. -try :: Parser i a -> Parser i a -try p = p -{-# INLINE try #-} - --- | Name the parser, in case failure occurs. -() :: Parser i a - -> String -- ^ the name to use if parsing fails - -> Parser i a -p msg0 = Parser $ \t pos more lose succ -> - let lose' t' pos' more' strs msg = lose t' pos' more' (msg0:strs) msg - in runParser p t pos more lose' succ -{-# INLINE () #-} -infix 0 - --- | @choice ps@ tries to apply the actions in the list @ps@ in order, --- until one of them succeeds. Returns the value of the succeeding --- action. -choice :: Alternative f => [f a] -> f a -choice = foldr (<|>) empty -{-# SPECIALIZE choice :: [Parser ByteString a] - -> Parser ByteString a #-} - --- | @option x p@ tries to apply action @p@. If @p@ fails without --- consuming input, it returns the value @x@, otherwise the value --- returned by @p@. --- --- > priority = option 0 (digitToInt <$> digit) -option :: Alternative f => a -> f a -> f a -option x p = p <|> pure x -{-# SPECIALIZE option :: a -> Parser ByteString a -> Parser ByteString a #-} - --- | A version of 'liftM2' that is strict in the result of its first --- action. -liftM2' :: (Monad m) => (a -> b -> c) -> m a -> m b -> m c -liftM2' f a b = do - !x <- a - y <- b - return (f x y) -{-# INLINE liftM2' #-} - --- | @many' p@ applies the action @p@ /zero/ or more times. Returns a --- list of the returned values of @p@. The value returned by @p@ is --- forced to WHNF. --- --- > word = many' letter -many' :: (MonadPlus m) => m a -> m [a] -many' p = many_p - where many_p = some_p `mplus` return [] - some_p = liftM2' (:) p many_p -{-# INLINE many' #-} - --- | @many1 p@ applies the action @p@ /one/ or more times. Returns a --- list of the returned values of @p@. --- --- > word = many1 letter -many1 :: Alternative f => f a -> f [a] -many1 p = liftA2 (:) p (many p) -{-# INLINE many1 #-} - --- | @many1' p@ applies the action @p@ /one/ or more times. Returns a --- list of the returned values of @p@. The value returned by @p@ is --- forced to WHNF. --- --- > word = many1' letter -many1' :: (MonadPlus m) => m a -> m [a] -many1' p = liftM2' (:) p (many' p) -{-# INLINE many1' #-} - --- | @sepBy p sep@ applies /zero/ or more occurrences of @p@, separated --- by @sep@. Returns a list of the values returned by @p@. --- --- > commaSep p = p `sepBy` (char ',') -sepBy :: Alternative f => f a -> f s -> f [a] -sepBy p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure []) <|> pure [] -{-# SPECIALIZE sepBy :: Parser ByteString a -> Parser ByteString s - -> Parser ByteString [a] #-} - --- | @sepBy' p sep@ applies /zero/ or more occurrences of @p@, separated --- by @sep@. Returns a list of the values returned by @p@. The value --- returned by @p@ is forced to WHNF. --- --- > commaSep p = p `sepBy'` (char ',') -sepBy' :: (MonadPlus m) => m a -> m s -> m [a] -sepBy' p s = scan `mplus` return [] - where scan = liftM2' (:) p ((s >> sepBy1' p s) `mplus` return []) -{-# SPECIALIZE sepBy' :: Parser ByteString a -> Parser ByteString s - -> Parser ByteString [a] #-} - --- | @sepBy1 p sep@ applies /one/ or more occurrences of @p@, separated --- by @sep@. Returns a list of the values returned by @p@. --- --- > commaSep p = p `sepBy1` (char ',') -sepBy1 :: Alternative f => f a -> f s -> f [a] -sepBy1 p s = scan - where scan = liftA2 (:) p ((s *> scan) <|> pure []) -{-# SPECIALIZE sepBy1 :: Parser ByteString a -> Parser ByteString s - -> Parser ByteString [a] #-} - --- | @sepBy1' p sep@ applies /one/ or more occurrences of @p@, separated --- by @sep@. Returns a list of the values returned by @p@. The value --- returned by @p@ is forced to WHNF. --- --- > commaSep p = p `sepBy1'` (char ',') -sepBy1' :: (MonadPlus m) => m a -> m s -> m [a] -sepBy1' p s = scan - where scan = liftM2' (:) p ((s >> scan) `mplus` return []) -{-# SPECIALIZE sepBy1' :: Parser ByteString a -> Parser ByteString s - -> Parser ByteString [a] #-} - --- | @manyTill p end@ applies action @p@ /zero/ or more times until --- action @end@ succeeds, and returns the list of values returned by --- @p@. This can be used to scan comments: --- --- > simpleComment = string "") --- --- (Note the overlapping parsers @anyChar@ and @string \"-->\"@. --- While this will work, it is not very efficient, as it will cause a --- lot of backtracking.) -manyTill :: Alternative f => f a -> f b -> f [a] -manyTill p end = scan - where scan = (end *> pure []) <|> liftA2 (:) p scan -{-# SPECIALIZE manyTill :: Parser ByteString a -> Parser ByteString b - -> Parser ByteString [a] #-} - --- | @manyTill' p end@ applies action @p@ /zero/ or more times until --- action @end@ succeeds, and returns the list of values returned by --- @p@. This can be used to scan comments: --- --- > simpleComment = string "") --- --- (Note the overlapping parsers @anyChar@ and @string \"-->\"@. --- While this will work, it is not very efficient, as it will cause a --- lot of backtracking.) --- --- The value returned by @p@ is forced to WHNF. -manyTill' :: (MonadPlus m) => m a -> m b -> m [a] -manyTill' p end = scan - where scan = (end >> return []) `mplus` liftM2' (:) p scan -{-# SPECIALIZE manyTill' :: Parser ByteString a -> Parser ByteString b - -> Parser ByteString [a] #-} - --- | Skip zero or more instances of an action. -skipMany :: Alternative f => f a -> f () -skipMany p = scan - where scan = (p *> scan) <|> pure () -{-# SPECIALIZE skipMany :: Parser ByteString a -> Parser ByteString () #-} - --- | Skip one or more instances of an action. -skipMany1 :: Alternative f => f a -> f () -skipMany1 p = p *> skipMany p -{-# SPECIALIZE skipMany1 :: Parser ByteString a -> Parser ByteString () #-} - --- | Apply the given action repeatedly, returning every result. -count :: Monad m => Int -> m a -> m [a] -count n p = sequence (replicate n p) -{-# INLINE count #-} - --- | Combine two alternatives. -eitherP :: (Alternative f) => f a -> f b -> f (Either a b) -eitherP a b = (Left <$> a) <|> (Right <$> b) -{-# INLINE eitherP #-} - --- | If a parser has returned a 'T.Partial' result, supply it with more --- input. -feed :: Monoid i => IResult i r -> i -> IResult i r -feed (Fail t ctxs msg) d = Fail (mappend t d) ctxs msg -feed (Partial k) d = k d -feed (Done t r) d = Done (mappend t d) r -{-# INLINE feed #-} - --- | Apply a parser without consuming any input. -lookAhead :: Parser i a -> Parser i a -lookAhead p = Parser $ \t pos more lose succ -> - let succ' t' _pos' more' = succ t' pos more' - in runParser p t pos more lose succ' -{-# INLINE lookAhead #-} diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Internal.hs b/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Internal.hs deleted file mode 100644 index ee758b26..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Internal.hs +++ /dev/null @@ -1,157 +0,0 @@ -{-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-} --- | --- Module : Data.Attoparsec.Internal --- Copyright : Bryan O'Sullivan 2007-2015 --- License : BSD3 --- --- Maintainer : bos@serpentine.com --- Stability : experimental --- Portability : unknown --- --- Simple, efficient parser combinators, loosely based on the Parsec --- library. - -module Data.Attoparsec.Internal - ( compareResults - , prompt - , demandInput - , demandInput_ - , wantInput - , endOfInput - , atEnd - , satisfyElem - , concatReverse - ) where - -#if !MIN_VERSION_base(4,8,0) -import Control.Applicative ((<$>)) -import Data.Monoid (Monoid, mconcat) -#endif -import Data.Attoparsec.Internal.Types -import Data.ByteString (ByteString) -import Prelude hiding (succ) - --- | Compare two 'IResult' values for equality. --- --- If both 'IResult's are 'Partial', the result will be 'Nothing', as --- they are incomplete and hence their equality cannot be known. --- (This is why there is no 'Eq' instance for 'IResult'.) -compareResults :: (Eq i, Eq r) => IResult i r -> IResult i r -> Maybe Bool -compareResults (Fail t0 ctxs0 msg0) (Fail t1 ctxs1 msg1) = - Just (t0 == t1 && ctxs0 == ctxs1 && msg0 == msg1) -compareResults (Done t0 r0) (Done t1 r1) = - Just (t0 == t1 && r0 == r1) -compareResults (Partial _) (Partial _) = Nothing -compareResults _ _ = Just False - --- | Ask for input. If we receive any, pass the augmented input to a --- success continuation, otherwise to a failure continuation. -prompt :: Chunk t - => State t -> Pos -> More - -> (State t -> Pos -> More -> IResult t r) - -> (State t -> Pos -> More -> IResult t r) - -> IResult t r -prompt t pos _more lose succ = Partial $ \s -> - if nullChunk s - then lose t pos Complete - else succ (pappendChunk t s) pos Incomplete -{-# SPECIALIZE prompt :: State ByteString -> Pos -> More - -> (State ByteString -> Pos -> More - -> IResult ByteString r) - -> (State ByteString -> Pos -> More - -> IResult ByteString r) - -> IResult ByteString r #-} - --- | Immediately demand more input via a 'Partial' continuation --- result. -demandInput :: Chunk t => Parser t () -demandInput = Parser $ \t pos more lose succ -> - case more of - Complete -> lose t pos more [] "not enough input" - _ -> let lose' _ pos' more' = lose t pos' more' [] "not enough input" - succ' t' pos' more' = succ t' pos' more' () - in prompt t pos more lose' succ' -{-# SPECIALIZE demandInput :: Parser ByteString () #-} - --- | Immediately demand more input via a 'Partial' continuation --- result. Return the new input. -demandInput_ :: Chunk t => Parser t t -demandInput_ = Parser $ \t pos more lose succ -> - case more of - Complete -> lose t pos more [] "not enough input" - _ -> Partial $ \s -> - if nullChunk s - then lose t pos Complete [] "not enough input" - else succ (pappendChunk t s) pos more s -{-# SPECIALIZE demandInput_ :: Parser ByteString ByteString #-} - --- | This parser always succeeds. It returns 'True' if any input is --- available either immediately or on demand, and 'False' if the end --- of all input has been reached. -wantInput :: forall t . Chunk t => Parser t Bool -wantInput = Parser $ \t pos more _lose succ -> - case () of - _ | pos < atBufferEnd (undefined :: t) t -> succ t pos more True - | more == Complete -> succ t pos more False - | otherwise -> let lose' t' pos' more' = succ t' pos' more' False - succ' t' pos' more' = succ t' pos' more' True - in prompt t pos more lose' succ' -{-# INLINE wantInput #-} - --- | Match only if all input has been consumed. -endOfInput :: forall t . Chunk t => Parser t () -endOfInput = Parser $ \t pos more lose succ -> - case () of - _| pos < atBufferEnd (undefined :: t) t -> lose t pos more [] "endOfInput" - | more == Complete -> succ t pos more () - | otherwise -> - let lose' t' pos' more' _ctx _msg = succ t' pos' more' () - succ' t' pos' more' _a = lose t' pos' more' [] "endOfInput" - in runParser demandInput t pos more lose' succ' -{-# SPECIALIZE endOfInput :: Parser ByteString () #-} - --- | Return an indication of whether the end of input has been --- reached. -atEnd :: Chunk t => Parser t Bool -atEnd = not <$> wantInput -{-# INLINE atEnd #-} - -satisfySuspended :: forall t r . Chunk t - => (ChunkElem t -> Bool) - -> State t -> Pos -> More - -> Failure t (State t) r - -> Success t (State t) (ChunkElem t) r - -> IResult t r -satisfySuspended p t pos more lose succ = - runParser (demandInput >> go) t pos more lose succ - where go = Parser $ \t' pos' more' lose' succ' -> - case bufferElemAt (undefined :: t) pos' t' of - Just (e, l) | p e -> succ' t' (pos' + Pos l) more' e - | otherwise -> lose' t' pos' more' [] "satisfyElem" - Nothing -> runParser (demandInput >> go) t' pos' more' lose' succ' -{-# SPECIALIZE satisfySuspended :: (ChunkElem ByteString -> Bool) - -> State ByteString -> Pos -> More - -> Failure ByteString (State ByteString) r - -> Success ByteString (State ByteString) - (ChunkElem ByteString) r - -> IResult ByteString r #-} - --- | The parser @satisfyElem p@ succeeds for any chunk element for which the --- predicate @p@ returns 'True'. Returns the element that is --- actually parsed. -satisfyElem :: forall t . Chunk t - => (ChunkElem t -> Bool) -> Parser t (ChunkElem t) -satisfyElem p = Parser $ \t pos more lose succ -> - case bufferElemAt (undefined :: t) pos t of - Just (e, l) | p e -> succ t (pos + Pos l) more e - | otherwise -> lose t pos more [] "satisfyElem" - Nothing -> satisfySuspended p t pos more lose succ -{-# INLINE satisfyElem #-} - --- | Concatenate a monoid after reversing its elements. Used to --- glue together a series of textual chunks that have been accumulated --- \"backwards\". -concatReverse :: Monoid m => [m] -> m -concatReverse [x] = x -concatReverse xs = mconcat (reverse xs) -{-# INLINE concatReverse #-} diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Internal/Fhthagn.hs b/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Internal/Fhthagn.hs deleted file mode 100644 index 0e00ed2c..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Internal/Fhthagn.hs +++ /dev/null @@ -1,18 +0,0 @@ -{-# LANGUAGE BangPatterns, Rank2Types, OverloadedStrings, - RecordWildCards, MagicHash, UnboxedTuples #-} - -module Data.Attoparsec.Internal.Fhthagn - ( - inlinePerformIO - ) where - -import GHC.Base (realWorld#) -import GHC.IO (IO(IO)) - --- | Just like unsafePerformIO, but we inline it. Big performance gains as --- it exposes lots of things to further inlining. /Very unsafe/. In --- particular, you should do no memory allocation inside an --- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@. -inlinePerformIO :: IO a -> a -inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r -{-# INLINE inlinePerformIO #-} diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Internal/Types.hs b/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Internal/Types.hs deleted file mode 100644 index 96bc319e..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Internal/Types.hs +++ /dev/null @@ -1,243 +0,0 @@ -{-# LANGUAGE BangPatterns, GeneralizedNewtypeDeriving, OverloadedStrings, - Rank2Types, RecordWildCards, TypeFamilies #-} --- | --- Module : Data.Attoparsec.Internal.Types --- Copyright : Bryan O'Sullivan 2007-2015 --- License : BSD3 --- --- Maintainer : bos@serpentine.com --- Stability : experimental --- Portability : unknown --- --- Simple, efficient parser combinators, loosely based on the Parsec --- library. - -module Data.Attoparsec.Internal.Types - ( - Parser(..) - , State - , Failure - , Success - , Pos(..) - , IResult(..) - , More(..) - , (<>) - , Chunk(..) - ) where - -import Control.Applicative as App (Applicative(..), (<$>)) -import Control.Applicative (Alternative(..)) -import Control.DeepSeq (NFData(rnf)) -import Control.Monad (MonadPlus(..)) -import qualified Control.Monad.Fail as Fail (MonadFail(..)) -import Data.Monoid as Mon (Monoid(..)) -import Data.Semigroup (Semigroup(..)) -import Data.Word (Word8) -import Data.ByteString (ByteString) -import qualified Data.ByteString as BS -import Data.ByteString.Internal (w2c) -import Prelude hiding (getChar, succ) -import qualified Data.Attoparsec.ByteString.Buffer as B - -newtype Pos = Pos { fromPos :: Int } - deriving (Eq, Ord, Show, Num) - --- | The result of a parse. This is parameterised over the type @i@ --- of string that was processed. --- --- This type is an instance of 'Functor', where 'fmap' transforms the --- value in a 'Done' result. -data IResult i r = - Fail i [String] String - -- ^ The parse failed. The @i@ parameter is the input that had - -- not yet been consumed when the failure occurred. The - -- @[@'String'@]@ is a list of contexts in which the error - -- occurred. The 'String' is the message describing the error, if - -- any. - | Partial (i -> IResult i r) - -- ^ Supply this continuation with more input so that the parser - -- can resume. To indicate that no more input is available, pass - -- an empty string to the continuation. - -- - -- __Note__: if you get a 'Partial' result, do not call its - -- continuation more than once. - | Done i r - -- ^ The parse succeeded. The @i@ parameter is the input that had - -- not yet been consumed (if any) when the parse succeeded. - -instance (Show i, Show r) => Show (IResult i r) where - showsPrec d ir = showParen (d > 10) $ - case ir of - (Fail t stk msg) -> showString "Fail" . f t . f stk . f msg - (Partial _) -> showString "Partial _" - (Done t r) -> showString "Done" . f t . f r - where f :: Show a => a -> ShowS - f x = showChar ' ' . showsPrec 11 x - -instance (NFData i, NFData r) => NFData (IResult i r) where - rnf (Fail t stk msg) = rnf t `seq` rnf stk `seq` rnf msg - rnf (Partial _) = () - rnf (Done t r) = rnf t `seq` rnf r - {-# INLINE rnf #-} - -instance Functor (IResult i) where - fmap _ (Fail t stk msg) = Fail t stk msg - fmap f (Partial k) = Partial (fmap f . k) - fmap f (Done t r) = Done t (f r) - --- | The core parser type. This is parameterised over the type @i@ --- of string being processed. --- --- This type is an instance of the following classes: --- --- * 'Monad', where 'fail' throws an exception (i.e. fails) with an --- error message. --- --- * 'Functor' and 'Applicative', which follow the usual definitions. --- --- * 'MonadPlus', where 'mzero' fails (with no error message) and --- 'mplus' executes the right-hand parser if the left-hand one --- fails. When the parser on the right executes, the input is reset --- to the same state as the parser on the left started with. (In --- other words, attoparsec is a backtracking parser that supports --- arbitrary lookahead.) --- --- * 'Alternative', which follows 'MonadPlus'. -newtype Parser i a = Parser { - runParser :: forall r. - State i -> Pos -> More - -> Failure i (State i) r - -> Success i (State i) a r - -> IResult i r - } - -type family State i -type instance State ByteString = B.Buffer - -type Failure i t r = t -> Pos -> More -> [String] -> String - -> IResult i r -type Success i t a r = t -> Pos -> More -> a -> IResult i r - --- | Have we read all available input? -data More = Complete | Incomplete - deriving (Eq, Show) - -instance Semigroup More where - c@Complete <> _ = c - _ <> m = m - -instance Mon.Monoid More where - mappend = (<>) - mempty = Incomplete - -instance Monad (Parser i) where - fail = Fail.fail - {-# INLINE fail #-} - - return = App.pure - {-# INLINE return #-} - - m >>= k = Parser $ \t !pos more lose succ -> - let succ' t' !pos' more' a = runParser (k a) t' pos' more' lose succ - in runParser m t pos more lose succ' - {-# INLINE (>>=) #-} - - (>>) = (*>) - {-# INLINE (>>) #-} - - -instance Fail.MonadFail (Parser i) where - fail err = Parser $ \t pos more lose _succ -> lose t pos more [] msg - where msg = "Failed reading: " ++ err - {-# INLINE fail #-} - -plus :: Parser i a -> Parser i a -> Parser i a -plus f g = Parser $ \t pos more lose succ -> - let lose' t' _pos' more' _ctx _msg = runParser g t' pos more' lose succ - in runParser f t pos more lose' succ - -instance MonadPlus (Parser i) where - mzero = fail "mzero" - {-# INLINE mzero #-} - mplus = plus - -instance Functor (Parser i) where - fmap f p = Parser $ \t pos more lose succ -> - let succ' t' pos' more' a = succ t' pos' more' (f a) - in runParser p t pos more lose succ' - {-# INLINE fmap #-} - -apP :: Parser i (a -> b) -> Parser i a -> Parser i b -apP d e = do - b <- d - a <- e - return (b a) -{-# INLINE apP #-} - -instance Applicative (Parser i) where - pure v = Parser $ \t pos more _lose succ -> succ t pos more v - {-# INLINE pure #-} - (<*>) = apP - {-# INLINE (<*>) #-} - m *> k = m >>= \_ -> k - {-# INLINE (*>) #-} - x <* y = x >>= \a -> y >> pure a - {-# INLINE (<*) #-} - -instance Semigroup (Parser i a) where - (<>) = plus - {-# INLINE (<>) #-} - -instance Monoid (Parser i a) where - mempty = fail "mempty" - {-# INLINE mempty #-} - mappend = (<>) - {-# INLINE mappend #-} - -instance Alternative (Parser i) where - empty = fail "empty" - {-# INLINE empty #-} - - (<|>) = plus - {-# INLINE (<|>) #-} - - many v = many_v - where many_v = some_v <|> pure [] - some_v = (:) App.<$> v <*> many_v - {-# INLINE many #-} - - some v = some_v - where - many_v = some_v <|> pure [] - some_v = (:) <$> v <*> many_v - {-# INLINE some #-} - --- | A common interface for input chunks. -class Monoid c => Chunk c where - type ChunkElem c - -- | Test if the chunk is empty. - nullChunk :: c -> Bool - -- | Append chunk to a buffer. - pappendChunk :: State c -> c -> State c - -- | Position at the end of a buffer. The first argument is ignored. - atBufferEnd :: c -> State c -> Pos - -- | Return the buffer element at the given position along with its length. - bufferElemAt :: c -> Pos -> State c -> Maybe (ChunkElem c, Int) - -- | Map an element to the corresponding character. - -- The first argument is ignored. - chunkElemToChar :: c -> ChunkElem c -> Char - -instance Chunk ByteString where - type ChunkElem ByteString = Word8 - nullChunk = BS.null - {-# INLINE nullChunk #-} - pappendChunk = B.pappend - {-# INLINE pappendChunk #-} - atBufferEnd _ = Pos . B.length - {-# INLINE atBufferEnd #-} - bufferElemAt _ (Pos i) buf - | i < B.length buf = Just (B.unsafeIndex buf i, 1) - | otherwise = Nothing - {-# INLINE bufferElemAt #-} - chunkElemToChar _ = w2c - {-# INLINE chunkElemToChar #-} diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Number.hs b/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Number.hs deleted file mode 100644 index d0970d90..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/Data/Attoparsec/Number.hs +++ /dev/null @@ -1,137 +0,0 @@ -{-# LANGUAGE DeriveDataTypeable #-} --- | --- Module : Data.Attoparsec.Number --- Copyright : Bryan O'Sullivan 2007-2015 --- License : BSD3 --- --- Maintainer : bos@serpentine.com --- Stability : experimental --- Portability : unknown --- --- This module is deprecated, and both the module and 'Number' type --- will be removed in the next major release. Use the --- package --- and the 'Data.Scientific.Scientific' type instead. --- --- A simple number type, useful for parsing both exact and inexact --- quantities without losing much precision. -module Data.Attoparsec.Number - {-# DEPRECATED "This module will be removed in the next major release." #-} - ( - Number(..) - ) where - -import Control.DeepSeq (NFData(rnf)) -import Data.Data (Data) -import Data.Function (on) -import Data.Typeable (Typeable) - --- | A numeric type that can represent integers accurately, and --- floating point numbers to the precision of a 'Double'. --- --- /Note/: this type is deprecated, and will be removed in the next --- major release. Use the 'Data.Scientific.Scientific' type instead. -data Number = I !Integer - | D {-# UNPACK #-} !Double - deriving (Typeable, Data) -{-# DEPRECATED Number "Use Scientific instead." #-} - -instance Show Number where - show (I a) = show a - show (D a) = show a - -instance NFData Number where - rnf (I _) = () - rnf (D _) = () - {-# INLINE rnf #-} - -binop :: (Integer -> Integer -> a) -> (Double -> Double -> a) - -> Number -> Number -> a -binop _ d (D a) (D b) = d a b -binop i _ (I a) (I b) = i a b -binop _ d (D a) (I b) = d a (fromIntegral b) -binop _ d (I a) (D b) = d (fromIntegral a) b -{-# INLINE binop #-} - -instance Eq Number where - (==) = binop (==) (==) - {-# INLINE (==) #-} - - (/=) = binop (/=) (/=) - {-# INLINE (/=) #-} - -instance Ord Number where - (<) = binop (<) (<) - {-# INLINE (<) #-} - - (<=) = binop (<=) (<=) - {-# INLINE (<=) #-} - - (>) = binop (>) (>) - {-# INLINE (>) #-} - - (>=) = binop (>=) (>=) - {-# INLINE (>=) #-} - - compare = binop compare compare - {-# INLINE compare #-} - -instance Num Number where - (+) = binop (((I$!).) . (+)) (((D$!).) . (+)) - {-# INLINE (+) #-} - - (-) = binop (((I$!).) . (-)) (((D$!).) . (-)) - {-# INLINE (-) #-} - - (*) = binop (((I$!).) . (*)) (((D$!).) . (*)) - {-# INLINE (*) #-} - - abs (I a) = I $! abs a - abs (D a) = D $! abs a - {-# INLINE abs #-} - - negate (I a) = I $! negate a - negate (D a) = D $! negate a - {-# INLINE negate #-} - - signum (I a) = I $! signum a - signum (D a) = D $! signum a - {-# INLINE signum #-} - - fromInteger = (I$!) . fromInteger - {-# INLINE fromInteger #-} - -instance Real Number where - toRational (I a) = fromIntegral a - toRational (D a) = toRational a - {-# INLINE toRational #-} - -instance Fractional Number where - fromRational = (D$!) . fromRational - {-# INLINE fromRational #-} - - (/) = binop (((D$!).) . (/) `on` fromIntegral) - (((D$!).) . (/)) - {-# INLINE (/) #-} - - recip (I a) = D $! recip (fromIntegral a) - recip (D a) = D $! recip a - {-# INLINE recip #-} - -instance RealFrac Number where - properFraction (I a) = (fromIntegral a,0) - properFraction (D a) = case properFraction a of - (i,d) -> (i,D d) - {-# INLINE properFraction #-} - truncate (I a) = fromIntegral a - truncate (D a) = truncate a - {-# INLINE truncate #-} - round (I a) = fromIntegral a - round (D a) = round a - {-# INLINE round #-} - ceiling (I a) = fromIntegral a - ceiling (D a) = ceiling a - {-# INLINE ceiling #-} - floor (I a) = fromIntegral a - floor (D a) = floor a - {-# INLINE floor #-} diff --git a/haddock-library/vendor/attoparsec-0.13.1.0/LICENSE b/haddock-library/vendor/attoparsec-0.13.1.0/LICENSE deleted file mode 100644 index 97392a62..00000000 --- a/haddock-library/vendor/attoparsec-0.13.1.0/LICENSE +++ /dev/null @@ -1,30 +0,0 @@ -Copyright (c) Lennart Kolmodin - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the name of the author nor the names of his contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -- cgit v1.2.3