Fix totality, unicode, examples, paragraph parsing

Also simplify specs and parsers while we're at it. Some parsers were made more generic. This commit is a part of GHC pre-merge squash, email fuuzetsu@fuuzetsu.co.uk if you need the full commit history.
author: Simon Hengel <sol@typeful.net> 2013-09-08 10:33:38 +0200
committer: Austin Seipp <austin@well-typed.com> 2014-01-12 14:48:35 -0600
commit: 2448bd71609688be7b8bfe362a8534959531cd79 (patch)
tree: 66f23e3cc5fd6c97da832e8704f8f633e508b64b /src/Haddock
parent: 27876dc77ff259e27a71ea6f30662a668adfd134 (diff)
4 files changed, 216 insertions, 331 deletions
diff --git a/src/Haddock/Doc.hs b/src/Haddock/Doc.hs
index 4d68c554..69b2dd6f 100644
--- a/src/Haddock/Doc.hs
+++ b/src/Haddock/Doc.hs
@@ -1,16 +1,14 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 module Haddock.Doc (
-  docAppend,
-  docParagraph,
-  combineStringNodes,
-  combineDocumentation
-  ) where
+  docAppend
+, docParagraph
+, combineDocumentation
+) where
 
 import Data.Maybe
 import Data.Monoid
 import Haddock.Types
 import Data.Char (isSpace)
-import Control.Arrow ((***))
 
 -- We put it here so that we can avoid a circular import
 -- anything relevant imports this module anyway
@@ -22,25 +20,15 @@ combineDocumentation :: Documentation name -> Maybe (Doc name)
 combineDocumentation (Documentation Nothing Nothing) = Nothing
 combineDocumentation (Documentation mDoc mWarning)   = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc)
 
--- used to make parsing easier; we group the list items later
 docAppend :: Doc id -> Doc id -> Doc id
-docAppend (DocUnorderedList ds1) (DocUnorderedList ds2)
-  = DocUnorderedList (ds1++ds2)
-docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d)
-  = DocAppend (DocUnorderedList (ds1++ds2)) d
-docAppend (DocOrderedList ds1) (DocOrderedList ds2)
-  = DocOrderedList (ds1++ds2)
-docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d)
-  = DocAppend (DocOrderedList (ds1++ds2)) d
-docAppend (DocDefList ds1) (DocDefList ds2)
-  = DocDefList (ds1++ds2)
-docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d)
-  = DocAppend (DocDefList (ds1++ds2)) d
+docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2)
+docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d
 docAppend DocEmpty d = d
 docAppend d DocEmpty = d
-docAppend d1 d2
-  = DocAppend d1 d2
-
+docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2)
+docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 ++ s2))
+docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 ++ s2)) d
+docAppend d1 d2 = DocAppend d1 d2
 
 -- again to make parsing easier - we spot a paragraph whose only item
 -- is a DocMonospaced and make it into a DocCodeBlock
@@ -77,28 +65,3 @@ docCodeBlock (DocString s)
 docCodeBlock (DocAppend l r)
   = DocAppend l (docCodeBlock r)
 docCodeBlock d = d
-
--- | This is a hack that joins neighbouring 'DocString's into a single one.
--- This is done to ease up the testing and doesn't change the final result
--- as this would be done later anyway.
-combineStringNodes :: Doc id -> Doc id
-combineStringNodes (DocAppend (DocString x) (DocString y)) = DocString (x ++ y)
-combineStringNodes (DocAppend (DocString x) (DocAppend (DocString y) z)) =
-  tryjoin (DocAppend (DocString (x ++ y)) (combineStringNodes z))
-combineStringNodes (DocAppend x y) = tryjoin (DocAppend (combineStringNodes x) (combineStringNodes y))
-combineStringNodes (DocParagraph x) = DocParagraph (combineStringNodes x)
-combineStringNodes (DocWarning x) = DocWarning (combineStringNodes x)
-combineStringNodes (DocEmphasis x) = DocEmphasis (combineStringNodes x)
-combineStringNodes (DocMonospaced x) = DocMonospaced (combineStringNodes x)
-combineStringNodes (DocUnorderedList xs) = DocUnorderedList (map combineStringNodes xs)
-combineStringNodes (DocOrderedList x) = DocOrderedList (map combineStringNodes x)
-combineStringNodes (DocDefList xs) = DocDefList (map (combineStringNodes *** combineStringNodes) xs)
-combineStringNodes (DocCodeBlock x) = DocCodeBlock (combineStringNodes x)
-combineStringNodes x = x
-
-tryjoin :: Doc id -> Doc id
-tryjoin (DocAppend (DocString x) (DocString y)) = DocString (x ++ y)
-tryjoin (DocAppend (DocString x) (DocAppend (DocString y) z)) = DocAppend (DocString (x ++ y)) z
-tryjoin (DocAppend (DocAppend x (DocString y)) (DocString z))
-  = tryjoin (DocAppend (combineStringNodes x) (DocString $ y ++ z))
-tryjoin x = x
diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs
index 13563532..8c33ade6 100644
--- a/src/Haddock/Interface/LexParseRn.hs
+++ b/src/Haddock/Interface/LexParseRn.hs
@@ -43,11 +43,11 @@ processDocStrings dflags gre strs = do
 
 
 processDocStringParas :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name))
-processDocStringParas = process parseParas
+processDocStringParas = process parseParasMaybe
 
 
 processDocString :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name))
-processDocString = process parseString
+processDocString = process parseStringMaybe
 
 process :: (DynFlags -> String -> Maybe (Doc RdrName))
         -> DynFlags
diff --git a/src/Haddock/Interface/ParseModuleHeader.hs b/src/Haddock/Interface/ParseModuleHeader.hs
index 2e4fe73b..ade28728 100644
--- a/src/Haddock/Interface/ParseModuleHeader.hs
+++ b/src/Haddock/Interface/ParseModuleHeader.hs
@@ -46,13 +46,13 @@ parseModuleHeader dflags str0 =
       description1 :: Either String (Maybe (Doc RdrName))
       description1 = case descriptionOpt of
          Nothing -> Right Nothing
-         Just description -> case parseString dflags description of
+         Just description -> case parseStringMaybe dflags description of
             Nothing -> Left ("Cannot parse Description: " ++ description)
             Just doc -> Right (Just doc)
    in
       case description1 of
          Left mess -> Left mess
-         Right docOpt -> case parseParas dflags str8 of
+         Right docOpt -> case parseParasMaybe dflags str8 of
            Nothing -> Left "Cannot parse header documentation paragraphs"
            Just doc -> Right (HaddockModInfo {
             hmi_description = docOpt,
diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs
index 43a2b169..fe8904d4 100644
--- a/src/Haddock/Parser.hs
+++ b/src/Haddock/Parser.hs
@@ -9,15 +9,15 @@
 -- Stability   :  experimental
 -- Portability :  portable
 
-module Haddock.Parser (parseString, parseParas) where
+module Haddock.Parser (parseString, parseParas, parseStringMaybe, parseParasMaybe) where
 
+import           Prelude hiding (takeWhile)
+import           Control.Monad (void, mfilter)
 import           Control.Applicative
-import           Data.Attoparsec.ByteString hiding (parse, takeWhile1, take, inClass)
-import qualified Data.Attoparsec.ByteString.Char8 as A8
-import           Data.Attoparsec.ByteString.Char8 hiding (parse, take, string)
-import qualified Data.ByteString as BS
-import           Data.Char (chr)
-import           Data.List (stripPrefix)
+import           Data.Attoparsec.ByteString.Char8 hiding (parse, take, string, endOfLine)
+import qualified Data.ByteString.Char8 as BS
+import           Data.Char (chr, isAsciiUpper)
+import           Data.List (stripPrefix, intercalate)
 import           Data.Maybe (fromMaybe)
 import           Data.Monoid
 import           DynFlags
@@ -31,157 +31,117 @@ import           SrcLoc (mkRealSrcLoc, unLoc)
 import           StringBuffer (stringToStringBuffer)
 import           Haddock.Utf8
 
-parse :: Parser a -> String -> Maybe a
-parse p = either (const Nothing) Just . parseOnly (p <* endOfInput) . encodeUtf8
+{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-}
+parseParasMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
+parseParasMaybe d = Just . parseParas d
+
+{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-}
+parseStringMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
+parseStringMaybe d = Just . parseString d
+
+parse :: Parser a -> BS.ByteString -> a
+parse p = either err id . parseOnly (p <* endOfInput)
+  where
+    err = error . ("Haddock.Parser.parse: " ++)
 
 -- | Main entry point to the parser. Appends the newline character
 -- to the input string.
 parseParas :: DynFlags
               -> String -- ^ String to parse
-              -> Maybe (Doc RdrName)
-parseParas d = fmap combineStringNodes . parse (p <* skipSpace) . (++ "\n")
+              -> Doc RdrName
+parseParas d = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
   where
     p :: Parser (Doc RdrName)
-    -- make sure that we don't swallow up whitespace belonging to next paragraph
-    p = mconcat <$> paragraph d `sepBy` some (optWs *> "\n")
-
--- | A parser that parsers separate lines of the comments. Eventually
--- called by 'parseParas'. Appends a newline character to the input string.
--- Drops any whitespace in front of the input string. It's dropped for the sake of
--- section headings.
-parseString :: DynFlags -> String -> Maybe (Doc RdrName)
-parseString d = parseString' d . dropWhile isSpace
-
--- | A parser that parsers separate lines of the comments. Eventually
--- called by 'parseParas'. Appends a newline character to the input string.
--- Unlike 'parseString', doesn't drop the preceding whitespace. Internal use.
-parseString'' :: DynFlags -> String -> Maybe (Doc RdrName)
-parseString'' d = parseString' d . (++ "\n")
-
--- | An internal use function. Split from the 'parseString' is useful
--- as we can specify separately when we want the newline to be appended.
-parseString' :: DynFlags -> String -> Maybe (Doc RdrName)
-parseString' d = fmap combineStringNodes . parse p
+    p = mconcat <$> paragraph d `sepBy` many (skipHorizontalSpace *> "\n")
+
+-- | Parse a text paragraph.
+parseString :: DynFlags -> String -> Doc RdrName
+parseString d = parseStringBS d . encodeUtf8 . dropWhile isSpace
+
+parseStringBS :: DynFlags -> BS.ByteString -> Doc RdrName
+parseStringBS d = parse p
   where
     p :: Parser (Doc RdrName)
-    p = mconcat <$> some (charEscape <|> monospace d <|> anchor <|> identifier d
-                          <|> moduleName <|> picture <|> url
-                          <|> emphasis d <|> encodedChar <|> string' <|> skipChar)
+    p = mconcat <$> many (monospace d <|> anchor <|> identifier d
+                          <|> moduleName <|> picture <|> hyperlink <|> autoUrl
+                          <|> emphasis d <|> encodedChar <|> string' <|> skipSpecialChar)
 
 -- | Parses and processes
 -- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
 --
 -- >>> parseOnly encodedChar "&#65;&#66;&#67;"
 -- Right (DocString "ABC")
-encodedChar :: Parser (Doc RdrName)
+encodedChar :: Parser (Doc a)
 encodedChar = "&#" *> c <* ";"
   where
     c = DocString . return . chr <$> num
     num = hex <|> decimal
     hex = ("x" <|> "X") *> hexadecimal
 
+specialChar :: [Char]
+specialChar = "/<@\"&'`"
+
 -- | Plain, regular parser for text. Called as one of the last parsers
 -- to ensure that we have already given a chance to more meaningful parsers
 -- before capturing their characers.
-string' :: Parser (Doc RdrName)
-string' = DocString . decodeUtf8 <$> takeWhile1 (`notElem` "/<@\" &'`\\")
+string' :: Parser (Doc a)
+string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar)
+  where
+    unescape "" = ""
+    unescape ('\\':x:xs) = x : unescape xs
+    unescape (x:xs) = x : unescape xs
+
+-- | Skips a single special character and treats it as a plain string.
+-- This is done to skip over any special characters belonging to other
+-- elements but which were not deemed meaningful at their positions.
+skipSpecialChar :: Parser (Doc a)
+skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
 
 -- | Emphasis parser.
 --
 -- >>> parseOnly emphasis "/Hello world/"
 -- Right (DocEmphasis (DocString "Hello world"))
 emphasis :: DynFlags -> Parser (Doc RdrName)
-emphasis d = DocEmphasis <$> stringBlock d "/" "/" "\n"
+emphasis d = DocEmphasis . parseStringBS d <$>
+  mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
 
--- | Skips a single character and treats it as a plain string.
--- This is done to skip over any special characters belonging to other
--- elements but which were not deemed meaningful at their positions.
--- Note that this can only be used in places where we're absolutely certain
--- no unicode is present, such as to skip a 100% certain ASCII delimeter.
-skipChar :: Parser (Doc RdrName)
-skipChar = DocString . return <$> anyChar
+-- | Like `takeWhile`, but unconditionally take escaped characters.
+takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString
+takeWhile_ p = scan False p_
+  where
+    p_ escaped c
+      | escaped = Just False
+      | not $ p c = Nothing
+      | otherwise = Just (c == '\\')
 
--- | Treats the next character as a regular string, even if it's normally
--- used for markup.
-charEscape :: Parser (Doc RdrName)
-charEscape = "\\" *> (DocString . return <$> A8.satisfy (/= '\n'))
+-- | Like `takeWhile1`, but unconditionally take escaped characters.
+takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString
+takeWhile1_ = mfilter (not . BS.null) . takeWhile_
 
 -- | Text anchors to allow for jumping around the generated documentation.
 --
 -- >>> parseOnly anchor "#Hello world#"
 -- Right (DocAName "Hello world")
-anchor :: Parser (Doc RdrName)
+anchor :: Parser (Doc a)
 anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#")
 
--- | Helper for markup structures surrounded with delimiters.
-stringBlock
-  :: DynFlags
-     -> String -- ^ Opening delimiter
-     -> String -- ^ Closing delimiter
-     -> String -- ^ Additional characters to terminate parsing on
-     -> Parser (Doc RdrName)
-stringBlock d op ed n = do
-  inner <- block op ed n
-  case parseString' d inner of
-    Just r -> return r
-    _ -> fail $ "inner parse fail with op: ‘" ++ op ++ "’, ed: ‘" ++ ed ++ "’"
-
--- | Returns sections of text delimited by specified text.
-block :: String -> String -> String -> Parser String
-block op ed n = reverse . drop (length ed) . reverse <$> block' op ed
-  where
-    block' op' ed' = string (encodeUtf8 op') *> mid
-      where
-        mid :: Parser String
-        mid = decodeUtf8 <$> string (encodeUtf8 ed')
-              <|> do
-                inner <- takeWithSkip (head ed') n
-                more <- decodeUtf8 <$> string (encodeUtf8 $ tail ed')
-                        <|> block' "" ed'  -- not full ending, take more
-                return $ inner ++ more
-
-
--- | Takes all characters until the specified one. Unconditionally
--- takes a character if it's escaped. Fails if it doesn't find the character or
--- when the input string is empty.
-takeWithSkip :: Char -> String -> Parser String
-takeWithSkip s n = do
-  content <- decodeUtf8 <$> A8.scan (False, False) p >>= gotSome
-  if or (map (`elem` content) n) || last content /= s
-    then fail "failed in takeWithSkip"
-    else return content
-  where
-    gotSome [] = fail "EOF in takeWithSkip"
-    gotSome xs = return xs
-    -- Apparently ‘scan’ is so magical that it doesn't mangle unicode.
-    p (escaped, terminate) c
-      | terminate = Nothing -- swallows up that extra character
-      | escaped = Just (False, False)
-      | c == s = Just (False, True)
-      | otherwise = Just (c == '\\', False)
-
 -- | Monospaced strings.
 --
 -- >>> parseOnly (monospace dynflags) "@cruel@"
 -- Right (DocMonospaced (DocString "cruel"))
 monospace :: DynFlags -> Parser (Doc RdrName)
-monospace d = DocMonospaced <$> stringBlock d "@" "@" ""
-
--- | Module name parser, surrounded by double quotes. This does a very primitive and
--- purely syntactic checking so that obviously invalid names are not treated as valid
--- and blindly hyperlinked (not starting with a capital letter or including spaces).
-moduleName :: Parser (Doc RdrName)
-moduleName = DocModule <$> ("\"" *> legalModule <* "\"")
-  where legalModule = do
-          n <- (:) <$> A8.satisfy (`elem` ['A' .. 'Z'])
-               <*> (decodeUtf8 <$> A8.takeWhile (`notElem` "\"\n"))
-
-          if any (`elem` n) " &[{}(=*)+]!#|@/;,^?"
-            then fail "invalid characters in module name"
-            else case n of
-            [] -> return []
-            _ -> if last n == '.' then fail "trailing dot in module name" else return n
+monospace d = DocMonospaced . parseStringBS d <$> ("@" *> takeWhile1_ (/= '@') <* "@")
 
+moduleName :: Parser (Doc a)
+moduleName = DocModule <$> (char '"' *> modid <* char '"')
+  where
+    modid = intercalate "." <$> conid `sepBy1` "."
+    conid = (:)
+      <$> satisfy isAsciiUpper
+      -- NOTE: According to Haskell 2010 we shouldd actually only
+      -- accept {small | large | digit | ' } here.  But as we can't
+      -- match on unicode characters, this is currently not possible.
+      <*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n"))
 
 -- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
 -- a title for the picture.
@@ -190,181 +150,166 @@ moduleName = DocModule <$> ("\"" *> legalModule <* "\"")
 -- Right (DocPic (Picture "hello.png" Nothing))
 -- >>> parseOnly picture "<<hello.png world>>"
 -- Right (DocPic (Picture "hello.png" (Just "world")))
-picture :: Parser (Doc RdrName)
-picture = DocPic . makePicture . decodeUtf8 <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>")
+picture :: Parser (Doc a)
+picture = DocPic . makeLabeled Picture . decodeUtf8
+          <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>")
 
 -- | Paragraph parser, called by 'parseParas'.
 paragraph :: DynFlags -> Parser (Doc RdrName)
 paragraph d = examples <|> skipSpace *> (list d <|> birdtracks <|> codeblock d
                                          <|> property <|> textParagraph d)
 
+textParagraph :: DynFlags -> Parser (Doc RdrName)
+textParagraph d = docParagraph . parseString d . intercalate "\n" <$> many1 nonEmptyLine
+
 -- | List parser, called by 'paragraph'.
 list :: DynFlags -> Parser (Doc RdrName)
 list d = DocUnorderedList <$> unorderedList d
          <|> DocOrderedList <$> orderedList d
          <|> DocDefList <$> definitionList d
 
--- | Parse given text with a provided parser, casting
--- Nothing to a failure
-parseLine :: (String -> Maybe (Doc RdrName)) -- ^ Parser to use
-             -> (Doc RdrName -> a) -- ^ Doc function to wrap around the result
-             -> BS.ByteString -- ^ Text to parse
-             -> Parser a
-parseLine f doc str = maybe (fail "invalid string") (return . doc) (f $ decodeUtf8 str)
-
 -- | Parses unordered (bullet) lists.
 unorderedList :: DynFlags -> Parser [Doc RdrName]
-unorderedList d = ("*" <|> "-") *> innerList unorderedList d
+unorderedList d = ("*" <|> "-") *> innerList (unorderedList d) d
 
 -- | Parses ordered lists (numbered or dashed).
 orderedList :: DynFlags -> Parser [Doc RdrName]
-orderedList d = skipSpace *> (paren <|> dot) *> innerList orderedList d
+orderedList d = (paren <|> dot) *> innerList (orderedList d) d
   where
-    dot = decimal <* "."
-    paren = "(" *> (decimal :: Parser Int) <* ")"
+    dot = (decimal :: Parser Int) <* "."
+    paren = "(" *> decimal <* ")"
 
 -- | Generic function collecting any further lines belonging to the
 -- list entry and recursively collecting any further lists in the
 -- same paragraph. Usually used as
 --
 -- > someListFunction dynflags = listBeginning *> innerList someListFunction dynflags
-innerList :: (DynFlags -> Parser [Doc RdrName]) -- ^ parser calling this function
-             -> DynFlags
-             -> Parser [Doc RdrName]
-innerList p d = do
-  cl <- do
-    content <- A8.takeWhile (/= '\n') <* "\n" -- allow empty
-    parseLine (parseString'' d) id content
-  ulcs <- many ulc
-  let contents = docParagraph $ mconcat $ cl : [x | Right x <- ulcs]
-      unLists = mconcat [x | Left x <- ulcs]
-  return $ contents : unLists
+innerList :: Parser [Doc RdrName] -> DynFlags -> Parser [Doc RdrName]
+innerList item d = do
+  c <- takeLine
+  (cs, items) <- more
+  let contents = (docParagraph . parseString d . unlines) (c : cs)
+  return (contents : items)
   where
-    ulc :: Parser (Either [Doc RdrName] (Doc RdrName))
-    ulc = Left <$> (optWs *> p d)
-          <|> Right <$> nonEmptyLine d
-
--- | Takes the remained of the line until the newline character
--- and calls 'parseLine' using 'parseString'. Fails if it's made
--- up strictly of whitespace.
-nonEmptyLine :: DynFlags -> Parser (Doc RdrName)
-nonEmptyLine d = do
-  s <- (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
-  parseLine (parseString'' d) id s
-  where
-    nonSpace xs
-      | not (any (not . isSpace) (decodeUtf8 xs)) = fail "empty line"
-      | otherwise = return xs
+    more :: Parser ([String], [Doc RdrName])
+    more = moreListItems <|> moreContent <|> pure ([], [])
+
+    moreListItems :: Parser ([String], [Doc RdrName])
+    moreListItems = (,) [] <$> (skipSpace *> item)
+
+    moreContent :: Parser ([String], [Doc RdrName])
+    moreContent = mapFst . (:) <$> nonEmptyLine <*> more
 
 -- | Parses definition lists.
 definitionList :: DynFlags -> Parser [(Doc RdrName, Doc RdrName)]
 definitionList d = do
-  _ <- "["
-  inner <- parseLine (parseString' d) id =<< takeWhile1 (`notElem` "]\n")
-  _ <- "]"
-  outer <- parseLine (parseString'' d) id =<< (A8.takeWhile (/= '\n') <* "\n")
-  ulcs <- many ulc
-  let contents = mconcat $ outer : [x | Right x <- ulcs]
-      unLists = map mconcat [x | Left x <- ulcs]
-  return $ (inner, contents) : unLists
+  label <- parseStringBS d <$> ("[" *> takeWhile1 (`notElem` "]\n") <* "]")
+  c <- takeLine
+  (cs, items) <- more
+  let contents = (parseString d . unlines) (c : cs)
+  return ((label, contents) : items)
   where
-    ulc :: Parser (Either [(Doc RdrName, Doc RdrName)] (Doc RdrName))
-    ulc = Left <$> (optWs *> definitionList d)
-          <|> Right <$> nonEmptyLine d
-
--- | Parses birdtracks. No further markup is parsed after the birdtrack.
--- Consecutive birdtracks are allowed.
-birdtracks :: Parser (Doc RdrName)
-birdtracks = DocCodeBlock . mconcat . map (DocString . (++ "\n") . decodeUtf8) <$> line `sepBy1` "\n"
+    more :: Parser ([String], [(Doc RdrName, Doc RdrName)])
+    more = moreListItems <|> moreContent <|> pure ([], [])
+
+    moreListItems :: Parser ([String], [(Doc RdrName, Doc RdrName)])
+    moreListItems = (,) [] <$> (skipSpace *> definitionList d)
+
+    moreContent :: Parser ([String], [(Doc RdrName, Doc RdrName)])
+    moreContent = mapFst . (:) <$> nonEmptyLine <*> more
+
+birdtracks :: Parser (Doc a)
+birdtracks = DocCodeBlock . DocString . intercalate "\n" <$> many1 line
   where
-    line = optWs *> ">" *> A8.takeWhile (/= '\n')
+    line = skipHorizontalSpace *> ">" *> takeLine
 
 -- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).
 -- Consecutive examples are accepted.
-examples :: Parser (Doc RdrName)
-examples = DocExamples <$> example
-
--- | Collects consecutive examples and their results.
-example :: Parser [Example]
-example = do
-  ws <- optWs
-  prompt <- decodeUtf8 <$> string ">>>"
-  expr <- (++ "\n") . decodeUtf8 <$> (A8.takeWhile (/= '\n') <* "\n")
-  results <- many result
-  let exs = concat [ e | Left e <- results ]
-      res = filter (not . null) [ r | Right r <- results ]
-  return $ makeExample (decodeUtf8 ws ++ prompt) expr res : exs
+examples :: Parser (Doc a)
+examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)
   where
-    result = Left <$> example
-             <|> Right . decodeUtf8 <$> takeWhile1 (/= '\n') <* "\n"
+    go :: Parser [Example]
+    go = do
+      prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>"
+      expr <- takeLine
+      (rs, es) <- resultAndMoreExamples
+      return (makeExample prefix expr rs : es)
+      where
+        resultAndMoreExamples :: Parser ([String], [Example])
+        resultAndMoreExamples = moreExamples <|> result <|> pure ([], [])
+          where
+            moreExamples :: Parser ([String], [Example])
+            moreExamples = (,) [] <$> go
+
+            result :: Parser ([String], [Example])
+            result = mapFst . (:) <$> nonEmptyLine <*> resultAndMoreExamples
+
+    makeExample :: String -> String -> [String] -> Example
+    makeExample prefix expression res =
+      Example (strip expression) result
+      where
+        result = map (substituteBlankLine . tryStripPrefix) res
+
+        tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs)
+
+        substituteBlankLine "<BLANKLINE>" = ""
+        substituteBlankLine xs = xs
+
+nonEmptyLine :: Parser String
+nonEmptyLine = mfilter (any (not . isSpace)) takeLine
 
--- | Propery parser.
+takeLine :: Parser String
+takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine
+
+endOfLine :: Parser ()
+endOfLine = void "\n" <|> endOfInput
+
+mapFst :: (a -> b) -> (a, c) -> (b, c)
+mapFst f (a, b) = (f a, b)
+
+-- | Property parser.
 --
 -- >>> parseOnly property "prop> hello world"
 -- Right (DocProperty "hello world")
-property :: Parser (Doc RdrName)
-property = do
-    _ <- skipSpace
-    s <- decodeUtf8 <$> (string "prop>" *> takeWhile1 (/= '\n'))
-    return $ makeProperty ("prop>" ++ s)
-
--- | Paragraph level codeblock. Anything between the two delimiting @
--- is parsed for markup.
+property :: Parser (Doc a)
+property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
+
+-- |
+-- Paragraph level codeblock. Anything between the two delimiting @ is parsed
+-- for markup.
 codeblock :: DynFlags -> Parser (Doc RdrName)
-codeblock d = do
-  -- Note that we don't need to use optWs here because in cases where
-  -- we don't see a \n immediatelly after the opening @, this parser
-  -- fails but we still have a chance to get a codeblock by getting
-  -- a monospaced doc on its own in the paragraph. With that, the cases
-  -- are covered. This should be updated if the implementation ever changes.
-  s <- parseString' d . ('\n':) . decodeUtf8 <$> ("@\n" *> block' <* "@")
-  maybe (fail "codeblock") (return . DocCodeBlock) s
+codeblock d =
+  DocCodeBlock . parseStringBS d <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
   where
-    block' = A8.scan False p
+    block' = scan False p
       where
         p isNewline c
           | isNewline && c == '@' = Nothing
           | otherwise = Just $ c == '\n'
 
--- | Calls 'parseString'' on each line of a paragraph
-textParagraph :: DynFlags -> Parser (Doc RdrName)
-textParagraph d = do
-  s <- parseString' d . concatMap ((++ "\n") . decodeUtf8) <$> line `sepBy1` "\n"
-  maybe (fail "textParagraph") (return . docParagraph) s
-  where
-    line = takeWhile1 (/= '\n')
-
--- | See 'picture' for adding a page title.
-url :: Parser (Doc RdrName)
-url = DocHyperlink . makeHyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">")
-      <|> autoUrl
-
--- | Naive implementation of auto-linking. Will link everything after
--- @http://@, @https://@, @ftp://@, @ssh://@, @gopher://@ until a space.
--- Single trailing punctuation character (.!?,) is split off.
-autoUrl :: Parser (Doc RdrName)
-autoUrl = do
-  link <- decodeUtf8 <$> urlLone
-  return $ formatLink link
+hyperlink :: Parser (Doc a)
+hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">")
+
+autoUrl :: Parser (Doc a)
+autoUrl = mkLink <$> url
   where
-    urlLone = mappend <$> choice prefixes <*> takeWhile1 (not . isSpace)
-    prefixes = [ "http://", "https://", "ftp://"
-               , "ssh://", "gopher://" ]
-    formatLink :: String -> Doc RdrName
-    formatLink s = if last s `elem` ".!?,"
-                   then docAppend (DocHyperlink $ Hyperlink (init s) Nothing) (DocString [last s])
-                   else DocHyperlink $ Hyperlink s Nothing
+    url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
+    mkLink :: BS.ByteString -> Doc a
+    mkLink s = case BS.unsnoc s of
+      Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x]
+      _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
 
 -- | Parses strings between identifier delimiters. Consumes all input that it
 -- deems to be valid in an identifier. Note that it simply blindly consumes
 -- characters and does no actual validation itself.
 parseValid :: Parser String
 parseValid = do
-  vs <- many' (A8.satisfy (`elem` "_.!#$%&*+/<=>?@\\?|-~:") <|> digit <|> letter_ascii)
+  vs <- many' $ satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:") <|> digit <|> letter_ascii
   c <- peekChar
   case c of
     Just '`' -> return vs
-    Just '\'' -> (do {c'' <- char '\''; y'' <- parseValid; return $ vs ++ [c''] ++ y''}) <|> return vs
+    Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid)
+                 <|> return vs
     _ -> fail "outofvalid"
 
 -- | Parses identifiers with help of 'parseValid'. Asks GHC for 'RdrName' from the
@@ -374,56 +319,33 @@ identifier dflags = do
   o <- idDelim
   vid <- parseValid
   e <- idDelim
-  return $ validIdentifier $ o : (vid ++ [e])
-  where idDelim = char '\'' <|> char '`'
-        validIdentifier str = case parseIdent (tail $ init str) of
-          Just identName -> DocIdentifier identName
-          Nothing -> DocString str
-        parseIdent :: String -> Maybe RdrName
-        parseIdent str0 =
-          let buffer = stringToStringBuffer str0
-              realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
-              pstate = mkPState dflags buffer realSrcLc
-          in case unP parseIdentifier pstate of
-            POk _ name -> Just (unLoc name)
-            _ -> Nothing
+  return $ validIdentifier o vid e
+  where
+    idDelim = char '\'' <|> char '`'
+    validIdentifier o ident e = case parseIdent ident of
+      Just identName -> DocIdentifier identName
+      Nothing -> DocString $ o : ident ++ [e]
+
+    parseIdent :: String -> Maybe RdrName
+    parseIdent str0 =
+      let buffer = stringToStringBuffer str0
+          realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
+          pstate = mkPState dflags buffer realSrcLc
+      in case unP parseIdentifier pstate of
+        POk _ name -> Just (unLoc name)
+        _ -> Nothing
 
 -- | Remove all leading and trailing whitespace
 strip :: String -> String
 strip = (\f -> f . f) $ dropWhile isSpace . reverse
 
--- | Consumes whitespace, excluding a newline.
-optWs :: Parser BS.ByteString
-optWs = A8.takeWhile (`elem` " \t\f\v\r")
-
--- | Create an 'Example', stripping superfluous characters as appropriate.
--- Remembers the amount of indentation used for the prompt.
-makeExample :: String -> String -> [String] -> Example
-makeExample prompt expression res =
-  Example (strip expression) result'       -- drop whitespace in expressions
-  where (prefix, _) = span isSpace prompt
-        result' = map substituteBlankLine $ filter (not . null) $ map (tryStripPrefix prefix) res
-          where tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
-                substituteBlankLine "<BLANKLINE>" = ""
-                substituteBlankLine line          = line
-
--- | Creates a 'Picture' with an optional title. Called by 'picture'.
-makePicture :: String -> Picture
-makePicture input = case break isSpace $ strip input of
-  (uri, "")    -> Picture uri Nothing
-  (uri, label) -> Picture uri (Just $ dropWhile isSpace label)
-
--- | Creates a 'Hyperlink' with an optional title. Called by 'example'.
-makeHyperlink :: String -> Hyperlink
-makeHyperlink input = case break isSpace $ strip input of
-  (u, "")    -> Hyperlink u Nothing
-  (u, label) -> Hyperlink u (Just $ dropWhile isSpace label)
-
--- | Makes a property that can be used by other programs for assertions.
--- Drops whitespace around the property. Called by 'property'
-makeProperty :: String -> Doc RdrName
-makeProperty s = case strip s of
-  'p':'r':'o':'p':'>':xs ->
-    DocProperty (dropWhile isSpace xs)
-  xs ->
-    error $ "makeProperty: invalid input " ++ show xs
+skipHorizontalSpace :: Parser ()
+skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r")
+
+takeHorizontalSpace :: Parser BS.ByteString
+takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r")
+
+makeLabeled :: (String -> Maybe String -> a) -> String -> a
+makeLabeled f input = case break isSpace $ strip input of
+  (uri, "")    -> f uri Nothing
+  (uri, label) -> f uri (Just $ dropWhile isSpace label)
author	Simon Hengel <sol@typeful.net>	2013-09-08 10:33:38 +0200
committer	Austin Seipp <austin@well-typed.com>	2014-01-12 14:48:35 -0600
commit	2448bd71609688be7b8bfe362a8534959531cd79 (patch)
tree	66f23e3cc5fd6c97da832e8704f8f633e508b64b /src/Haddock
parent	27876dc77ff259e27a71ea6f30662a668adfd134 (diff)