From 2448bd71609688be7b8bfe362a8534959531cd79 Mon Sep 17 00:00:00 2001 From: Simon Hengel Date: Sun, 8 Sep 2013 10:33:38 +0200 Subject: Fix totality, unicode, examples, paragraph parsing Also simplify specs and parsers while we're at it. Some parsers were made more generic. This commit is a part of GHC pre-merge squash, email fuuzetsu@fuuzetsu.co.uk if you need the full commit history. --- src/Haddock/Doc.hs | 57 +--- src/Haddock/Interface/LexParseRn.hs | 4 +- src/Haddock/Interface/ParseModuleHeader.hs | 4 +- src/Haddock/Parser.hs | 482 ++++++++++++----------------- 4 files changed, 216 insertions(+), 331 deletions(-) (limited to 'src/Haddock') diff --git a/src/Haddock/Doc.hs b/src/Haddock/Doc.hs index 4d68c554..69b2dd6f 100644 --- a/src/Haddock/Doc.hs +++ b/src/Haddock/Doc.hs @@ -1,16 +1,14 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} module Haddock.Doc ( - docAppend, - docParagraph, - combineStringNodes, - combineDocumentation - ) where + docAppend +, docParagraph +, combineDocumentation +) where import Data.Maybe import Data.Monoid import Haddock.Types import Data.Char (isSpace) -import Control.Arrow ((***)) -- We put it here so that we can avoid a circular import -- anything relevant imports this module anyway @@ -22,25 +20,15 @@ combineDocumentation :: Documentation name -> Maybe (Doc name) combineDocumentation (Documentation Nothing Nothing) = Nothing combineDocumentation (Documentation mDoc mWarning) = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc) --- used to make parsing easier; we group the list items later docAppend :: Doc id -> Doc id -> Doc id -docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) - = DocUnorderedList (ds1++ds2) -docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d) - = DocAppend (DocUnorderedList (ds1++ds2)) d -docAppend (DocOrderedList ds1) (DocOrderedList ds2) - = DocOrderedList (ds1++ds2) -docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d) - = DocAppend (DocOrderedList (ds1++ds2)) d -docAppend (DocDefList ds1) (DocDefList ds2) - = DocDefList (ds1++ds2) -docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) - = DocAppend (DocDefList (ds1++ds2)) d +docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2) +docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d docAppend DocEmpty d = d docAppend d DocEmpty = d -docAppend d1 d2 - = DocAppend d1 d2 - +docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2) +docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 ++ s2)) +docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 ++ s2)) d +docAppend d1 d2 = DocAppend d1 d2 -- again to make parsing easier - we spot a paragraph whose only item -- is a DocMonospaced and make it into a DocCodeBlock @@ -77,28 +65,3 @@ docCodeBlock (DocString s) docCodeBlock (DocAppend l r) = DocAppend l (docCodeBlock r) docCodeBlock d = d - --- | This is a hack that joins neighbouring 'DocString's into a single one. --- This is done to ease up the testing and doesn't change the final result --- as this would be done later anyway. -combineStringNodes :: Doc id -> Doc id -combineStringNodes (DocAppend (DocString x) (DocString y)) = DocString (x ++ y) -combineStringNodes (DocAppend (DocString x) (DocAppend (DocString y) z)) = - tryjoin (DocAppend (DocString (x ++ y)) (combineStringNodes z)) -combineStringNodes (DocAppend x y) = tryjoin (DocAppend (combineStringNodes x) (combineStringNodes y)) -combineStringNodes (DocParagraph x) = DocParagraph (combineStringNodes x) -combineStringNodes (DocWarning x) = DocWarning (combineStringNodes x) -combineStringNodes (DocEmphasis x) = DocEmphasis (combineStringNodes x) -combineStringNodes (DocMonospaced x) = DocMonospaced (combineStringNodes x) -combineStringNodes (DocUnorderedList xs) = DocUnorderedList (map combineStringNodes xs) -combineStringNodes (DocOrderedList x) = DocOrderedList (map combineStringNodes x) -combineStringNodes (DocDefList xs) = DocDefList (map (combineStringNodes *** combineStringNodes) xs) -combineStringNodes (DocCodeBlock x) = DocCodeBlock (combineStringNodes x) -combineStringNodes x = x - -tryjoin :: Doc id -> Doc id -tryjoin (DocAppend (DocString x) (DocString y)) = DocString (x ++ y) -tryjoin (DocAppend (DocString x) (DocAppend (DocString y) z)) = DocAppend (DocString (x ++ y)) z -tryjoin (DocAppend (DocAppend x (DocString y)) (DocString z)) - = tryjoin (DocAppend (combineStringNodes x) (DocString $ y ++ z)) -tryjoin x = x diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs index 13563532..8c33ade6 100644 --- a/src/Haddock/Interface/LexParseRn.hs +++ b/src/Haddock/Interface/LexParseRn.hs @@ -43,11 +43,11 @@ processDocStrings dflags gre strs = do processDocStringParas :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name)) -processDocStringParas = process parseParas +processDocStringParas = process parseParasMaybe processDocString :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name)) -processDocString = process parseString +processDocString = process parseStringMaybe process :: (DynFlags -> String -> Maybe (Doc RdrName)) -> DynFlags diff --git a/src/Haddock/Interface/ParseModuleHeader.hs b/src/Haddock/Interface/ParseModuleHeader.hs index 2e4fe73b..ade28728 100644 --- a/src/Haddock/Interface/ParseModuleHeader.hs +++ b/src/Haddock/Interface/ParseModuleHeader.hs @@ -46,13 +46,13 @@ parseModuleHeader dflags str0 = description1 :: Either String (Maybe (Doc RdrName)) description1 = case descriptionOpt of Nothing -> Right Nothing - Just description -> case parseString dflags description of + Just description -> case parseStringMaybe dflags description of Nothing -> Left ("Cannot parse Description: " ++ description) Just doc -> Right (Just doc) in case description1 of Left mess -> Left mess - Right docOpt -> case parseParas dflags str8 of + Right docOpt -> case parseParasMaybe dflags str8 of Nothing -> Left "Cannot parse header documentation paragraphs" Just doc -> Right (HaddockModInfo { hmi_description = docOpt, diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs index 43a2b169..fe8904d4 100644 --- a/src/Haddock/Parser.hs +++ b/src/Haddock/Parser.hs @@ -9,15 +9,15 @@ -- Stability : experimental -- Portability : portable -module Haddock.Parser (parseString, parseParas) where +module Haddock.Parser (parseString, parseParas, parseStringMaybe, parseParasMaybe) where +import Prelude hiding (takeWhile) +import Control.Monad (void, mfilter) import Control.Applicative -import Data.Attoparsec.ByteString hiding (parse, takeWhile1, take, inClass) -import qualified Data.Attoparsec.ByteString.Char8 as A8 -import Data.Attoparsec.ByteString.Char8 hiding (parse, take, string) -import qualified Data.ByteString as BS -import Data.Char (chr) -import Data.List (stripPrefix) +import Data.Attoparsec.ByteString.Char8 hiding (parse, take, string, endOfLine) +import qualified Data.ByteString.Char8 as BS +import Data.Char (chr, isAsciiUpper) +import Data.List (stripPrefix, intercalate) import Data.Maybe (fromMaybe) import Data.Monoid import DynFlags @@ -31,157 +31,117 @@ import SrcLoc (mkRealSrcLoc, unLoc) import StringBuffer (stringToStringBuffer) import Haddock.Utf8 -parse :: Parser a -> String -> Maybe a -parse p = either (const Nothing) Just . parseOnly (p <* endOfInput) . encodeUtf8 +{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-} +parseParasMaybe :: DynFlags -> String -> Maybe (Doc RdrName) +parseParasMaybe d = Just . parseParas d + +{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-} +parseStringMaybe :: DynFlags -> String -> Maybe (Doc RdrName) +parseStringMaybe d = Just . parseString d + +parse :: Parser a -> BS.ByteString -> a +parse p = either err id . parseOnly (p <* endOfInput) + where + err = error . ("Haddock.Parser.parse: " ++) -- | Main entry point to the parser. Appends the newline character -- to the input string. parseParas :: DynFlags -> String -- ^ String to parse - -> Maybe (Doc RdrName) -parseParas d = fmap combineStringNodes . parse (p <* skipSpace) . (++ "\n") + -> Doc RdrName +parseParas d = parse (p <* skipSpace) . encodeUtf8 . (++ "\n") where p :: Parser (Doc RdrName) - -- make sure that we don't swallow up whitespace belonging to next paragraph - p = mconcat <$> paragraph d `sepBy` some (optWs *> "\n") - --- | A parser that parsers separate lines of the comments. Eventually --- called by 'parseParas'. Appends a newline character to the input string. --- Drops any whitespace in front of the input string. It's dropped for the sake of --- section headings. -parseString :: DynFlags -> String -> Maybe (Doc RdrName) -parseString d = parseString' d . dropWhile isSpace - --- | A parser that parsers separate lines of the comments. Eventually --- called by 'parseParas'. Appends a newline character to the input string. --- Unlike 'parseString', doesn't drop the preceding whitespace. Internal use. -parseString'' :: DynFlags -> String -> Maybe (Doc RdrName) -parseString'' d = parseString' d . (++ "\n") - --- | An internal use function. Split from the 'parseString' is useful --- as we can specify separately when we want the newline to be appended. -parseString' :: DynFlags -> String -> Maybe (Doc RdrName) -parseString' d = fmap combineStringNodes . parse p + p = mconcat <$> paragraph d `sepBy` many (skipHorizontalSpace *> "\n") + +-- | Parse a text paragraph. +parseString :: DynFlags -> String -> Doc RdrName +parseString d = parseStringBS d . encodeUtf8 . dropWhile isSpace + +parseStringBS :: DynFlags -> BS.ByteString -> Doc RdrName +parseStringBS d = parse p where p :: Parser (Doc RdrName) - p = mconcat <$> some (charEscape <|> monospace d <|> anchor <|> identifier d - <|> moduleName <|> picture <|> url - <|> emphasis d <|> encodedChar <|> string' <|> skipChar) + p = mconcat <$> many (monospace d <|> anchor <|> identifier d + <|> moduleName <|> picture <|> hyperlink <|> autoUrl + <|> emphasis d <|> encodedChar <|> string' <|> skipSpecialChar) -- | Parses and processes -- -- -- >>> parseOnly encodedChar "ABC" -- Right (DocString "ABC") -encodedChar :: Parser (Doc RdrName) +encodedChar :: Parser (Doc a) encodedChar = "&#" *> c <* ";" where c = DocString . return . chr <$> num num = hex <|> decimal hex = ("x" <|> "X") *> hexadecimal +specialChar :: [Char] +specialChar = "/<@\"&'`" + -- | Plain, regular parser for text. Called as one of the last parsers -- to ensure that we have already given a chance to more meaningful parsers -- before capturing their characers. -string' :: Parser (Doc RdrName) -string' = DocString . decodeUtf8 <$> takeWhile1 (`notElem` "/<@\" &'`\\") +string' :: Parser (Doc a) +string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar) + where + unescape "" = "" + unescape ('\\':x:xs) = x : unescape xs + unescape (x:xs) = x : unescape xs + +-- | Skips a single special character and treats it as a plain string. +-- This is done to skip over any special characters belonging to other +-- elements but which were not deemed meaningful at their positions. +skipSpecialChar :: Parser (Doc a) +skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar) -- | Emphasis parser. -- -- >>> parseOnly emphasis "/Hello world/" -- Right (DocEmphasis (DocString "Hello world")) emphasis :: DynFlags -> Parser (Doc RdrName) -emphasis d = DocEmphasis <$> stringBlock d "/" "/" "\n" +emphasis d = DocEmphasis . parseStringBS d <$> + mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/") --- | Skips a single character and treats it as a plain string. --- This is done to skip over any special characters belonging to other --- elements but which were not deemed meaningful at their positions. --- Note that this can only be used in places where we're absolutely certain --- no unicode is present, such as to skip a 100% certain ASCII delimeter. -skipChar :: Parser (Doc RdrName) -skipChar = DocString . return <$> anyChar +-- | Like `takeWhile`, but unconditionally take escaped characters. +takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString +takeWhile_ p = scan False p_ + where + p_ escaped c + | escaped = Just False + | not $ p c = Nothing + | otherwise = Just (c == '\\') --- | Treats the next character as a regular string, even if it's normally --- used for markup. -charEscape :: Parser (Doc RdrName) -charEscape = "\\" *> (DocString . return <$> A8.satisfy (/= '\n')) +-- | Like `takeWhile1`, but unconditionally take escaped characters. +takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString +takeWhile1_ = mfilter (not . BS.null) . takeWhile_ -- | Text anchors to allow for jumping around the generated documentation. -- -- >>> parseOnly anchor "#Hello world#" -- Right (DocAName "Hello world") -anchor :: Parser (Doc RdrName) +anchor :: Parser (Doc a) anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#") --- | Helper for markup structures surrounded with delimiters. -stringBlock - :: DynFlags - -> String -- ^ Opening delimiter - -> String -- ^ Closing delimiter - -> String -- ^ Additional characters to terminate parsing on - -> Parser (Doc RdrName) -stringBlock d op ed n = do - inner <- block op ed n - case parseString' d inner of - Just r -> return r - _ -> fail $ "inner parse fail with op: ‘" ++ op ++ "’, ed: ‘" ++ ed ++ "’" - --- | Returns sections of text delimited by specified text. -block :: String -> String -> String -> Parser String -block op ed n = reverse . drop (length ed) . reverse <$> block' op ed - where - block' op' ed' = string (encodeUtf8 op') *> mid - where - mid :: Parser String - mid = decodeUtf8 <$> string (encodeUtf8 ed') - <|> do - inner <- takeWithSkip (head ed') n - more <- decodeUtf8 <$> string (encodeUtf8 $ tail ed') - <|> block' "" ed' -- not full ending, take more - return $ inner ++ more - - --- | Takes all characters until the specified one. Unconditionally --- takes a character if it's escaped. Fails if it doesn't find the character or --- when the input string is empty. -takeWithSkip :: Char -> String -> Parser String -takeWithSkip s n = do - content <- decodeUtf8 <$> A8.scan (False, False) p >>= gotSome - if or (map (`elem` content) n) || last content /= s - then fail "failed in takeWithSkip" - else return content - where - gotSome [] = fail "EOF in takeWithSkip" - gotSome xs = return xs - -- Apparently ‘scan’ is so magical that it doesn't mangle unicode. - p (escaped, terminate) c - | terminate = Nothing -- swallows up that extra character - | escaped = Just (False, False) - | c == s = Just (False, True) - | otherwise = Just (c == '\\', False) - -- | Monospaced strings. -- -- >>> parseOnly (monospace dynflags) "@cruel@" -- Right (DocMonospaced (DocString "cruel")) monospace :: DynFlags -> Parser (Doc RdrName) -monospace d = DocMonospaced <$> stringBlock d "@" "@" "" - --- | Module name parser, surrounded by double quotes. This does a very primitive and --- purely syntactic checking so that obviously invalid names are not treated as valid --- and blindly hyperlinked (not starting with a capital letter or including spaces). -moduleName :: Parser (Doc RdrName) -moduleName = DocModule <$> ("\"" *> legalModule <* "\"") - where legalModule = do - n <- (:) <$> A8.satisfy (`elem` ['A' .. 'Z']) - <*> (decodeUtf8 <$> A8.takeWhile (`notElem` "\"\n")) - - if any (`elem` n) " &[{}(=*)+]!#|@/;,^?" - then fail "invalid characters in module name" - else case n of - [] -> return [] - _ -> if last n == '.' then fail "trailing dot in module name" else return n +monospace d = DocMonospaced . parseStringBS d <$> ("@" *> takeWhile1_ (/= '@') <* "@") +moduleName :: Parser (Doc a) +moduleName = DocModule <$> (char '"' *> modid <* char '"') + where + modid = intercalate "." <$> conid `sepBy1` "." + conid = (:) + <$> satisfy isAsciiUpper + -- NOTE: According to Haskell 2010 we shouldd actually only + -- accept {small | large | digit | ' } here. But as we can't + -- match on unicode characters, this is currently not possible. + <*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n")) -- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify -- a title for the picture. @@ -190,181 +150,166 @@ moduleName = DocModule <$> ("\"" *> legalModule <* "\"") -- Right (DocPic (Picture "hello.png" Nothing)) -- >>> parseOnly picture "<>" -- Right (DocPic (Picture "hello.png" (Just "world"))) -picture :: Parser (Doc RdrName) -picture = DocPic . makePicture . decodeUtf8 <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>") +picture :: Parser (Doc a) +picture = DocPic . makeLabeled Picture . decodeUtf8 + <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>") -- | Paragraph parser, called by 'parseParas'. paragraph :: DynFlags -> Parser (Doc RdrName) paragraph d = examples <|> skipSpace *> (list d <|> birdtracks <|> codeblock d <|> property <|> textParagraph d) +textParagraph :: DynFlags -> Parser (Doc RdrName) +textParagraph d = docParagraph . parseString d . intercalate "\n" <$> many1 nonEmptyLine + -- | List parser, called by 'paragraph'. list :: DynFlags -> Parser (Doc RdrName) list d = DocUnorderedList <$> unorderedList d <|> DocOrderedList <$> orderedList d <|> DocDefList <$> definitionList d --- | Parse given text with a provided parser, casting --- Nothing to a failure -parseLine :: (String -> Maybe (Doc RdrName)) -- ^ Parser to use - -> (Doc RdrName -> a) -- ^ Doc function to wrap around the result - -> BS.ByteString -- ^ Text to parse - -> Parser a -parseLine f doc str = maybe (fail "invalid string") (return . doc) (f $ decodeUtf8 str) - -- | Parses unordered (bullet) lists. unorderedList :: DynFlags -> Parser [Doc RdrName] -unorderedList d = ("*" <|> "-") *> innerList unorderedList d +unorderedList d = ("*" <|> "-") *> innerList (unorderedList d) d -- | Parses ordered lists (numbered or dashed). orderedList :: DynFlags -> Parser [Doc RdrName] -orderedList d = skipSpace *> (paren <|> dot) *> innerList orderedList d +orderedList d = (paren <|> dot) *> innerList (orderedList d) d where - dot = decimal <* "." - paren = "(" *> (decimal :: Parser Int) <* ")" + dot = (decimal :: Parser Int) <* "." + paren = "(" *> decimal <* ")" -- | Generic function collecting any further lines belonging to the -- list entry and recursively collecting any further lists in the -- same paragraph. Usually used as -- -- > someListFunction dynflags = listBeginning *> innerList someListFunction dynflags -innerList :: (DynFlags -> Parser [Doc RdrName]) -- ^ parser calling this function - -> DynFlags - -> Parser [Doc RdrName] -innerList p d = do - cl <- do - content <- A8.takeWhile (/= '\n') <* "\n" -- allow empty - parseLine (parseString'' d) id content - ulcs <- many ulc - let contents = docParagraph $ mconcat $ cl : [x | Right x <- ulcs] - unLists = mconcat [x | Left x <- ulcs] - return $ contents : unLists +innerList :: Parser [Doc RdrName] -> DynFlags -> Parser [Doc RdrName] +innerList item d = do + c <- takeLine + (cs, items) <- more + let contents = (docParagraph . parseString d . unlines) (c : cs) + return (contents : items) where - ulc :: Parser (Either [Doc RdrName] (Doc RdrName)) - ulc = Left <$> (optWs *> p d) - <|> Right <$> nonEmptyLine d - --- | Takes the remained of the line until the newline character --- and calls 'parseLine' using 'parseString'. Fails if it's made --- up strictly of whitespace. -nonEmptyLine :: DynFlags -> Parser (Doc RdrName) -nonEmptyLine d = do - s <- (takeWhile1 (/= '\n') >>= nonSpace) <* "\n" - parseLine (parseString'' d) id s - where - nonSpace xs - | not (any (not . isSpace) (decodeUtf8 xs)) = fail "empty line" - | otherwise = return xs + more :: Parser ([String], [Doc RdrName]) + more = moreListItems <|> moreContent <|> pure ([], []) + + moreListItems :: Parser ([String], [Doc RdrName]) + moreListItems = (,) [] <$> (skipSpace *> item) + + moreContent :: Parser ([String], [Doc RdrName]) + moreContent = mapFst . (:) <$> nonEmptyLine <*> more -- | Parses definition lists. definitionList :: DynFlags -> Parser [(Doc RdrName, Doc RdrName)] definitionList d = do - _ <- "[" - inner <- parseLine (parseString' d) id =<< takeWhile1 (`notElem` "]\n") - _ <- "]" - outer <- parseLine (parseString'' d) id =<< (A8.takeWhile (/= '\n') <* "\n") - ulcs <- many ulc - let contents = mconcat $ outer : [x | Right x <- ulcs] - unLists = map mconcat [x | Left x <- ulcs] - return $ (inner, contents) : unLists + label <- parseStringBS d <$> ("[" *> takeWhile1 (`notElem` "]\n") <* "]") + c <- takeLine + (cs, items) <- more + let contents = (parseString d . unlines) (c : cs) + return ((label, contents) : items) where - ulc :: Parser (Either [(Doc RdrName, Doc RdrName)] (Doc RdrName)) - ulc = Left <$> (optWs *> definitionList d) - <|> Right <$> nonEmptyLine d - --- | Parses birdtracks. No further markup is parsed after the birdtrack. --- Consecutive birdtracks are allowed. -birdtracks :: Parser (Doc RdrName) -birdtracks = DocCodeBlock . mconcat . map (DocString . (++ "\n") . decodeUtf8) <$> line `sepBy1` "\n" + more :: Parser ([String], [(Doc RdrName, Doc RdrName)]) + more = moreListItems <|> moreContent <|> pure ([], []) + + moreListItems :: Parser ([String], [(Doc RdrName, Doc RdrName)]) + moreListItems = (,) [] <$> (skipSpace *> definitionList d) + + moreContent :: Parser ([String], [(Doc RdrName, Doc RdrName)]) + moreContent = mapFst . (:) <$> nonEmptyLine <*> more + +birdtracks :: Parser (Doc a) +birdtracks = DocCodeBlock . DocString . intercalate "\n" <$> many1 line where - line = optWs *> ">" *> A8.takeWhile (/= '\n') + line = skipHorizontalSpace *> ">" *> takeLine -- | Parses examples. Examples are a paragraph level entitity (separated by an empty line). -- Consecutive examples are accepted. -examples :: Parser (Doc RdrName) -examples = DocExamples <$> example - --- | Collects consecutive examples and their results. -example :: Parser [Example] -example = do - ws <- optWs - prompt <- decodeUtf8 <$> string ">>>" - expr <- (++ "\n") . decodeUtf8 <$> (A8.takeWhile (/= '\n') <* "\n") - results <- many result - let exs = concat [ e | Left e <- results ] - res = filter (not . null) [ r | Right r <- results ] - return $ makeExample (decodeUtf8 ws ++ prompt) expr res : exs +examples :: Parser (Doc a) +examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go) where - result = Left <$> example - <|> Right . decodeUtf8 <$> takeWhile1 (/= '\n') <* "\n" + go :: Parser [Example] + go = do + prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>" + expr <- takeLine + (rs, es) <- resultAndMoreExamples + return (makeExample prefix expr rs : es) + where + resultAndMoreExamples :: Parser ([String], [Example]) + resultAndMoreExamples = moreExamples <|> result <|> pure ([], []) + where + moreExamples :: Parser ([String], [Example]) + moreExamples = (,) [] <$> go + + result :: Parser ([String], [Example]) + result = mapFst . (:) <$> nonEmptyLine <*> resultAndMoreExamples + + makeExample :: String -> String -> [String] -> Example + makeExample prefix expression res = + Example (strip expression) result + where + result = map (substituteBlankLine . tryStripPrefix) res + + tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs) + + substituteBlankLine "" = "" + substituteBlankLine xs = xs + +nonEmptyLine :: Parser String +nonEmptyLine = mfilter (any (not . isSpace)) takeLine --- | Propery parser. +takeLine :: Parser String +takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine + +endOfLine :: Parser () +endOfLine = void "\n" <|> endOfInput + +mapFst :: (a -> b) -> (a, c) -> (b, c) +mapFst f (a, b) = (f a, b) + +-- | Property parser. -- -- >>> parseOnly property "prop> hello world" -- Right (DocProperty "hello world") -property :: Parser (Doc RdrName) -property = do - _ <- skipSpace - s <- decodeUtf8 <$> (string "prop>" *> takeWhile1 (/= '\n')) - return $ makeProperty ("prop>" ++ s) - --- | Paragraph level codeblock. Anything between the two delimiting @ --- is parsed for markup. +property :: Parser (Doc a) +property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n')) + +-- | +-- Paragraph level codeblock. Anything between the two delimiting @ is parsed +-- for markup. codeblock :: DynFlags -> Parser (Doc RdrName) -codeblock d = do - -- Note that we don't need to use optWs here because in cases where - -- we don't see a \n immediatelly after the opening @, this parser - -- fails but we still have a chance to get a codeblock by getting - -- a monospaced doc on its own in the paragraph. With that, the cases - -- are covered. This should be updated if the implementation ever changes. - s <- parseString' d . ('\n':) . decodeUtf8 <$> ("@\n" *> block' <* "@") - maybe (fail "codeblock") (return . DocCodeBlock) s +codeblock d = + DocCodeBlock . parseStringBS d <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@") where - block' = A8.scan False p + block' = scan False p where p isNewline c | isNewline && c == '@' = Nothing | otherwise = Just $ c == '\n' --- | Calls 'parseString'' on each line of a paragraph -textParagraph :: DynFlags -> Parser (Doc RdrName) -textParagraph d = do - s <- parseString' d . concatMap ((++ "\n") . decodeUtf8) <$> line `sepBy1` "\n" - maybe (fail "textParagraph") (return . docParagraph) s - where - line = takeWhile1 (/= '\n') - --- | See 'picture' for adding a page title. -url :: Parser (Doc RdrName) -url = DocHyperlink . makeHyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">") - <|> autoUrl - --- | Naive implementation of auto-linking. Will link everything after --- @http://@, @https://@, @ftp://@, @ssh://@, @gopher://@ until a space. --- Single trailing punctuation character (.!?,) is split off. -autoUrl :: Parser (Doc RdrName) -autoUrl = do - link <- decodeUtf8 <$> urlLone - return $ formatLink link +hyperlink :: Parser (Doc a) +hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">") + +autoUrl :: Parser (Doc a) +autoUrl = mkLink <$> url where - urlLone = mappend <$> choice prefixes <*> takeWhile1 (not . isSpace) - prefixes = [ "http://", "https://", "ftp://" - , "ssh://", "gopher://" ] - formatLink :: String -> Doc RdrName - formatLink s = if last s `elem` ".!?," - then docAppend (DocHyperlink $ Hyperlink (init s) Nothing) (DocString [last s]) - else DocHyperlink $ Hyperlink s Nothing + url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace) + mkLink :: BS.ByteString -> Doc a + mkLink s = case BS.unsnoc s of + Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x] + _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing) -- | Parses strings between identifier delimiters. Consumes all input that it -- deems to be valid in an identifier. Note that it simply blindly consumes -- characters and does no actual validation itself. parseValid :: Parser String parseValid = do - vs <- many' (A8.satisfy (`elem` "_.!#$%&*+/<=>?@\\?|-~:") <|> digit <|> letter_ascii) + vs <- many' $ satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:") <|> digit <|> letter_ascii c <- peekChar case c of Just '`' -> return vs - Just '\'' -> (do {c'' <- char '\''; y'' <- parseValid; return $ vs ++ [c''] ++ y''}) <|> return vs + Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid) + <|> return vs _ -> fail "outofvalid" -- | Parses identifiers with help of 'parseValid'. Asks GHC for 'RdrName' from the @@ -374,56 +319,33 @@ identifier dflags = do o <- idDelim vid <- parseValid e <- idDelim - return $ validIdentifier $ o : (vid ++ [e]) - where idDelim = char '\'' <|> char '`' - validIdentifier str = case parseIdent (tail $ init str) of - Just identName -> DocIdentifier identName - Nothing -> DocString str - parseIdent :: String -> Maybe RdrName - parseIdent str0 = - let buffer = stringToStringBuffer str0 - realSrcLc = mkRealSrcLoc (mkFastString "") 0 0 - pstate = mkPState dflags buffer realSrcLc - in case unP parseIdentifier pstate of - POk _ name -> Just (unLoc name) - _ -> Nothing + return $ validIdentifier o vid e + where + idDelim = char '\'' <|> char '`' + validIdentifier o ident e = case parseIdent ident of + Just identName -> DocIdentifier identName + Nothing -> DocString $ o : ident ++ [e] + + parseIdent :: String -> Maybe RdrName + parseIdent str0 = + let buffer = stringToStringBuffer str0 + realSrcLc = mkRealSrcLoc (mkFastString "") 0 0 + pstate = mkPState dflags buffer realSrcLc + in case unP parseIdentifier pstate of + POk _ name -> Just (unLoc name) + _ -> Nothing -- | Remove all leading and trailing whitespace strip :: String -> String strip = (\f -> f . f) $ dropWhile isSpace . reverse --- | Consumes whitespace, excluding a newline. -optWs :: Parser BS.ByteString -optWs = A8.takeWhile (`elem` " \t\f\v\r") - --- | Create an 'Example', stripping superfluous characters as appropriate. --- Remembers the amount of indentation used for the prompt. -makeExample :: String -> String -> [String] -> Example -makeExample prompt expression res = - Example (strip expression) result' -- drop whitespace in expressions - where (prefix, _) = span isSpace prompt - result' = map substituteBlankLine $ filter (not . null) $ map (tryStripPrefix prefix) res - where tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys - substituteBlankLine "" = "" - substituteBlankLine line = line - --- | Creates a 'Picture' with an optional title. Called by 'picture'. -makePicture :: String -> Picture -makePicture input = case break isSpace $ strip input of - (uri, "") -> Picture uri Nothing - (uri, label) -> Picture uri (Just $ dropWhile isSpace label) - --- | Creates a 'Hyperlink' with an optional title. Called by 'example'. -makeHyperlink :: String -> Hyperlink -makeHyperlink input = case break isSpace $ strip input of - (u, "") -> Hyperlink u Nothing - (u, label) -> Hyperlink u (Just $ dropWhile isSpace label) - --- | Makes a property that can be used by other programs for assertions. --- Drops whitespace around the property. Called by 'property' -makeProperty :: String -> Doc RdrName -makeProperty s = case strip s of - 'p':'r':'o':'p':'>':xs -> - DocProperty (dropWhile isSpace xs) - xs -> - error $ "makeProperty: invalid input " ++ show xs +skipHorizontalSpace :: Parser () +skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r") + +takeHorizontalSpace :: Parser BS.ByteString +takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r") + +makeLabeled :: (String -> Maybe String -> a) -> String -> a +makeLabeled f input = case break isSpace $ strip input of + (uri, "") -> f uri Nothing + (uri, label) -> f uri (Just $ dropWhile isSpace label) -- cgit v1.2.3