Allow escaping in URLs and pictures.

Some tests were moved under parseString as they weren't about paragraph level markup. Conflicts: src/Haddock/Parser.hs test/Haddock/ParserSpec.hs
author: Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk> 2013-09-16 03:01:29 +0100
committer: Austin Seipp <austin@well-typed.com> 2014-01-12 14:48:36 -0600
commit: deb106d8250b7582e01f78e88c88ca7465fc0bc1 (patch)
tree: fef76ea6a8c491359becf8ee5d620b34558ffb85 /src
parent: a03c93524ba2ca4143c10770a2fa0dd134b57a83 (diff)
2 files changed, 20 insertions, 5 deletions
diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs
index b8aa9cb4..6370eecb 100644
--- a/src/Haddock/Parser.hs
+++ b/src/Haddock/Parser.hs
@@ -167,7 +167,7 @@ moduleName = DocModule <$> (char '"' *> modid <* char '"')
 -- Right (DocPic (Picture "hello.png" (Just "world")))
 picture :: Parser (Doc a)
 picture = DocPic . makeLabeled Picture . decodeUtf8
-          <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>")
+          <$> disallowNewline ("<<" *> takeUntil ">>")
 
 -- | Paragraph parser, called by 'parseParas'.
 paragraph :: DynFlags -> Parser (Doc RdrName)
@@ -271,7 +271,8 @@ moreContent :: Monoid a => Parser a -> DynFlags
             -> Parser ([String], Either (Doc RdrName) a)
 moreContent item d = first . (:) <$> nonEmptyLine <*> more item d
 
--- | Collects and parses the result of 'dropFrontOfPara'
+-- | Runs the 'parseParas' parser on an indented paragraph.
+-- The indentation is 4 spaces.
 indentedParagraphs :: DynFlags -> Parser (Doc RdrName)
 indentedParagraphs d = parseParas d . concat <$> dropFrontOfPara "    "
 
@@ -367,7 +368,9 @@ codeblock d =
           | otherwise = Just $ c == '\n'
 
 hyperlink :: Parser (Doc a)
-hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">")
+hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8
+              <$> disallowNewline ("<" *> takeUntil ">")
+            <|> autoUrl
 
 autoUrl :: Parser (Doc a)
 autoUrl = mkLink <$> url
@@ -425,6 +428,14 @@ takeHorizontalSpace :: Parser BS.ByteString
 takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r")
 
 makeLabeled :: (String -> Maybe String -> a) -> String -> a
-makeLabeled f input = case break isSpace $ strip input of
+makeLabeled f input = case break isSpace $ removeEscapes $ strip input of
   (uri, "")    -> f uri Nothing
   (uri, label) -> f uri (Just $ dropWhile isSpace label)
+  where
+    -- As we don't parse these any further, we don't do any processing to the
+    -- string so we at least remove escape character here. Perhaps we should
+    -- actually be parsing the label at the very least?
+    removeEscapes "" = ""
+    removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs
+    removeEscapes ('\\':xs) = removeEscapes xs
+    removeEscapes (x:xs) = x : removeEscapes xs
diff --git a/src/Haddock/Parser/Util.hs b/src/Haddock/Parser/Util.hs
index ea682601..92fa7448 100644
--- a/src/Haddock/Parser/Util.hs
+++ b/src/Haddock/Parser/Util.hs
@@ -7,7 +7,7 @@ import           Data.ByteString.Char8 (ByteString)
 import qualified Data.ByteString.Char8 as BS
 
 takeUntil :: ByteString -> Parser ByteString
-takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p)
+takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome
   where
     end = BS.unpack end_
 
@@ -20,3 +20,7 @@ takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p)
 
     dropEnd = BS.reverse . BS.drop (length end) . BS.reverse
     requireEnd = mfilter (BS.isSuffixOf end_)
+
+    gotSome xs
+      | BS.null xs = fail "didn't get any content"
+      | otherwise = return xs
author	Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk>	2013-09-16 03:01:29 +0100
committer	Austin Seipp <austin@well-typed.com>	2014-01-12 14:48:36 -0600
commit	deb106d8250b7582e01f78e88c88ca7465fc0bc1 (patch)
tree	fef76ea6a8c491359becf8ee5d620b34558ffb85 /src
parent	a03c93524ba2ca4143c10770a2fa0dd134b57a83 (diff)