From deb106d8250b7582e01f78e88c88ca7465fc0bc1 Mon Sep 17 00:00:00 2001 From: Mateusz Kowalczyk Date: Mon, 16 Sep 2013 03:01:29 +0100 Subject: Allow escaping in URLs and pictures. Some tests were moved under parseString as they weren't about paragraph level markup. Conflicts: src/Haddock/Parser.hs test/Haddock/ParserSpec.hs --- src/Haddock/Parser.hs | 19 +++++++++++++++---- src/Haddock/Parser/Util.hs | 6 +++++- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'src/Haddock') diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs index b8aa9cb4..6370eecb 100644 --- a/src/Haddock/Parser.hs +++ b/src/Haddock/Parser.hs @@ -167,7 +167,7 @@ moduleName = DocModule <$> (char '"' *> modid <* char '"') -- Right (DocPic (Picture "hello.png" (Just "world"))) picture :: Parser (Doc a) picture = DocPic . makeLabeled Picture . decodeUtf8 - <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>") + <$> disallowNewline ("<<" *> takeUntil ">>") -- | Paragraph parser, called by 'parseParas'. paragraph :: DynFlags -> Parser (Doc RdrName) @@ -271,7 +271,8 @@ moreContent :: Monoid a => Parser a -> DynFlags -> Parser ([String], Either (Doc RdrName) a) moreContent item d = first . (:) <$> nonEmptyLine <*> more item d --- | Collects and parses the result of 'dropFrontOfPara' +-- | Runs the 'parseParas' parser on an indented paragraph. +-- The indentation is 4 spaces. indentedParagraphs :: DynFlags -> Parser (Doc RdrName) indentedParagraphs d = parseParas d . concat <$> dropFrontOfPara " " @@ -367,7 +368,9 @@ codeblock d = | otherwise = Just $ c == '\n' hyperlink :: Parser (Doc a) -hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">") +hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 + <$> disallowNewline ("<" *> takeUntil ">") + <|> autoUrl autoUrl :: Parser (Doc a) autoUrl = mkLink <$> url @@ -425,6 +428,14 @@ takeHorizontalSpace :: Parser BS.ByteString takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r") makeLabeled :: (String -> Maybe String -> a) -> String -> a -makeLabeled f input = case break isSpace $ strip input of +makeLabeled f input = case break isSpace $ removeEscapes $ strip input of (uri, "") -> f uri Nothing (uri, label) -> f uri (Just $ dropWhile isSpace label) + where + -- As we don't parse these any further, we don't do any processing to the + -- string so we at least remove escape character here. Perhaps we should + -- actually be parsing the label at the very least? + removeEscapes "" = "" + removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs + removeEscapes ('\\':xs) = removeEscapes xs + removeEscapes (x:xs) = x : removeEscapes xs diff --git a/src/Haddock/Parser/Util.hs b/src/Haddock/Parser/Util.hs index ea682601..92fa7448 100644 --- a/src/Haddock/Parser/Util.hs +++ b/src/Haddock/Parser/Util.hs @@ -7,7 +7,7 @@ import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as BS takeUntil :: ByteString -> Parser ByteString -takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) +takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome where end = BS.unpack end_ @@ -20,3 +20,7 @@ takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) dropEnd = BS.reverse . BS.drop (length end) . BS.reverse requireEnd = mfilter (BS.isSuffixOf end_) + + gotSome xs + | BS.null xs = fail "didn't get any content" + | otherwise = return xs -- cgit v1.2.3