1 files changed, 147 insertions, 113 deletions
diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs
index ece9291f..1489ae84 100644
--- a/src/Haddock/Parser.hs
+++ b/src/Haddock/Parser.hs
@@ -13,7 +13,10 @@
 -- Stability   :  experimental
 -- Portability :  portable
 
-module Haddock.Parser (parseString, parseParas, parseStringMaybe, parseParasMaybe) where
+module Haddock.Parser ( parseString, parseParas
+                      , overIdentifier, toRegular
+                      , takeUntil
+                      ) where
 
 import           Prelude hiding (takeWhile)
 import           Control.Arrow (first)
@@ -25,25 +28,51 @@ import           Data.Char (chr, isAsciiUpper)
 import           Data.List (stripPrefix, intercalate, unfoldr)
 import           Data.Maybe (fromMaybe)
 import           Data.Monoid
-import           DynFlags
-import           FastString (mkFastString)
 import           Haddock.Doc
-import           Haddock.Types
-import           Lexer (mkPState, unP, ParseResult(POk))
-import           Parser (parseIdentifier)
-import           RdrName
-import           SrcLoc (mkRealSrcLoc, unLoc)
-import           StringBuffer (stringToStringBuffer)
+import           Haddock.Types hiding (Doc)
 import           Haddock.Utf8
-import           Haddock.Parser.Util
-
-{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-}
-parseParasMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
-parseParasMaybe d = Just . parseParas d
-
-{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-}
-parseStringMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
-parseStringMaybe d = Just . parseString d
+import           Data.ByteString.Char8 (ByteString)
+
+-- | Identifier string surrounded with opening and closing quotes/backticks.
+type IString = (Char, String, Char)
+
+-- | Drops the quotes/backticks around all identifiers, as if they
+-- were valid but still 'String's.
+toRegular :: DocH mod IString -> DocH mod String
+toRegular = fmap (\(_, x, _) -> x)
+
+-- | Maps over 'DocIdentifier's over 'String' with potentially failing
+-- conversion using user-supplied function. If the conversion fails,
+-- the identifier is deemed to not be valid and is treated as a
+-- regular string.
+overIdentifier :: (String -> Maybe a)
+               -> DocH mod IString
+               -> DocH mod a
+overIdentifier f d = g d
+  where
+    g (DocIdentifier (o, x, e)) = case f x of
+      Nothing -> DocString $ o : x ++ [e]
+      Just x' -> DocIdentifier x'
+    g DocEmpty = DocEmpty
+    g (DocAppend x x') = DocAppend (g x) (g x')
+    g (DocString x) = DocString x
+    g (DocParagraph x) = DocParagraph $ g x
+    g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x
+    g (DocModule x) = DocModule x
+    g (DocWarning x) = DocWarning $ g x
+    g (DocEmphasis x) = DocEmphasis $ g x
+    g (DocMonospaced x) = DocMonospaced $ g x
+    g (DocBold x) = DocBold $ g x
+    g (DocUnorderedList x) = DocUnorderedList $ fmap g x
+    g (DocOrderedList x) = DocOrderedList $ fmap g x
+    g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x
+    g (DocCodeBlock x) = DocCodeBlock $ g x
+    g (DocHyperlink x) = DocHyperlink x
+    g (DocPic x) = DocPic x
+    g (DocAName x) = DocAName x
+    g (DocProperty x) = DocProperty x
+    g (DocExamples x) = DocExamples x
+    g (DocHeader (Header l x)) = DocHeader . Header l $ g x
 
 parse :: Parser a -> BS.ByteString -> a
 parse p = either err id . parseOnly (p <* endOfInput)
@@ -52,33 +81,32 @@ parse p = either err id . parseOnly (p <* endOfInput)
 
 -- | Main entry point to the parser. Appends the newline character
 -- to the input string.
-parseParas :: DynFlags
-              -> String -- ^ String to parse
-              -> Doc RdrName
-parseParas d = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
+parseParas :: String -- ^ String to parse
+           -> DocH mod IString
+parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
   where
-    p :: Parser (Doc RdrName)
-    p = mconcat <$> paragraph d `sepBy` many (skipHorizontalSpace *> "\n")
+    p :: Parser (DocH mod IString)
+    p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
 
 -- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which
 -- drops leading whitespace and encodes the string to UTF8 first.
-parseString :: DynFlags -> String -> Doc RdrName
-parseString d = parseStringBS d . encodeUtf8 . dropWhile isSpace
+parseString :: String -> DocH mod IString
+parseString = parseStringBS . encodeUtf8 . dropWhile isSpace
 
-parseStringBS :: DynFlags -> BS.ByteString -> Doc RdrName
-parseStringBS d = parse p
+parseStringBS :: BS.ByteString -> DocH mod IString
+parseStringBS = parse p
   where
-    p :: Parser (Doc RdrName)
-    p = mconcat <$> many (monospace d <|> anchor <|> identifier d
-                          <|> moduleName <|> picture <|> hyperlink <|> autoUrl <|> bold d
-                          <|> emphasis d <|> encodedChar <|> string' <|> skipSpecialChar)
+    p :: Parser (DocH mod IString)
+    p = mconcat <$> many (monospace <|> anchor <|> identifier
+                          <|> moduleName <|> picture <|> hyperlink <|> autoUrl <|> bold
+                          <|> emphasis <|> encodedChar <|> string' <|> skipSpecialChar)
 
 -- | Parses and processes
 -- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
 --
 -- >>> parseOnly encodedChar "&#65;&#66;&#67;"
 -- Right (DocString "ABC")
-encodedChar :: Parser (Doc a)
+encodedChar :: Parser (DocH mod a)
 encodedChar = "&#" *> c <* ";"
   where
     c = DocString . return . chr <$> num
@@ -91,7 +119,7 @@ specialChar = "_/<@\"&'`"
 -- | Plain, regular parser for text. Called as one of the last parsers
 -- to ensure that we have already given a chance to more meaningful parsers
 -- before capturing their characers.
-string' :: Parser (Doc a)
+string' :: Parser (DocH mod a)
 string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar)
   where
     unescape "" = ""
@@ -101,23 +129,23 @@ string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialCh
 -- | Skips a single special character and treats it as a plain string.
 -- This is done to skip over any special characters belonging to other
 -- elements but which were not deemed meaningful at their positions.
-skipSpecialChar :: Parser (Doc a)
+skipSpecialChar :: Parser (DocH mod a)
 skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
 
 -- | Emphasis parser.
 --
 -- >>> parseOnly emphasis "/Hello world/"
 -- Right (DocEmphasis (DocString "Hello world"))
-emphasis :: DynFlags -> Parser (Doc RdrName)
-emphasis d = DocEmphasis . parseStringBS d <$>
+emphasis :: Parser (DocH mod IString)
+emphasis = DocEmphasis . parseStringBS <$>
   mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
 
 -- | Bold parser.
 --
 -- >>> parseOnly bold "__Hello world__"
 -- Right (DocBold (DocString "Hello world"))
-bold :: DynFlags -> Parser (Doc RdrName)
-bold d = DocBold . parseStringBS d <$> disallowNewline ("__" *> takeUntil "__")
+bold :: Parser (DocH mod IString)
+bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__")
 
 disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString
 disallowNewline = mfilter ('\n' `BS.notElem`)
@@ -139,17 +167,17 @@ takeWhile1_ = mfilter (not . BS.null) . takeWhile_
 --
 -- >>> parseOnly anchor "#Hello world#"
 -- Right (DocAName "Hello world")
-anchor :: Parser (Doc a)
+anchor :: Parser (DocH mod a)
 anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#")
 
 -- | Monospaced strings.
 --
--- >>> parseOnly (monospace dynflags) "@cruel@"
+-- >>> parseOnly monospace "@cruel@"
 -- Right (DocMonospaced (DocString "cruel"))
-monospace :: DynFlags -> Parser (Doc RdrName)
-monospace d = DocMonospaced . parseStringBS d <$> ("@" *> takeWhile1_ (/= '@') <* "@")
+monospace :: Parser (DocH mod IString)
+monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@")
 
-moduleName :: Parser (Doc a)
+moduleName :: Parser (DocH mod a)
 moduleName = DocModule <$> (char '"' *> modid <* char '"')
   where
     modid = intercalate "." <$> conid `sepBy1` "."
@@ -167,41 +195,41 @@ moduleName = DocModule <$> (char '"' *> modid <* char '"')
 -- Right (DocPic (Picture "hello.png" Nothing))
 -- >>> parseOnly picture "<<hello.png world>>"
 -- Right (DocPic (Picture "hello.png" (Just "world")))
-picture :: Parser (Doc a)
+picture :: Parser (DocH mod a)
 picture = DocPic . makeLabeled Picture . decodeUtf8
           <$> disallowNewline ("<<" *> takeUntil ">>")
 
 -- | Paragraph parser, called by 'parseParas'.
-paragraph :: DynFlags -> Parser (Doc RdrName)
-paragraph d = examples <|> skipSpace *> (list d <|> birdtracks <|> codeblock d
-                                         <|> property <|> header d
-                                         <|> textParagraph d)
+paragraph :: Parser (DocH mod IString)
+paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock
+                                       <|> property <|> header
+                                       <|> textParagraph)
 
-header :: DynFlags -> Parser (Doc RdrName)
-header d = do
+header :: Parser (DocH mod IString)
+header = do
   let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1]
       pser = foldl1 (<|>) psers
   delim <- decodeUtf8 <$> pser
-  line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString d
-  rest <- paragraph d <|> return mempty
+  line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString
+  rest <- paragraph <|> return mempty
   return $ docAppend (DocParagraph (DocHeader (Header (length delim) line))) rest
 
-textParagraph :: DynFlags -> Parser (Doc RdrName)
-textParagraph d = docParagraph . parseString d . intercalate "\n" <$> many1 nonEmptyLine
+textParagraph :: Parser (DocH mod IString)
+textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine
 
 -- | List parser, called by 'paragraph'.
-list :: DynFlags -> Parser (Doc RdrName)
-list d = DocUnorderedList <$> unorderedList d
-         <|> DocOrderedList <$> orderedList d
-         <|> DocDefList <$> definitionList d
+list :: Parser (DocH mod IString)
+list = DocUnorderedList <$> unorderedList
+       <|> DocOrderedList <$> orderedList
+       <|> DocDefList <$> definitionList
 
 -- | Parses unordered (bullet) lists.
-unorderedList :: DynFlags -> Parser [Doc RdrName]
-unorderedList d = ("*" <|> "-") *> innerList (unorderedList d) d
+unorderedList :: Parser [DocH mod IString]
+unorderedList = ("*" <|> "-") *> innerList unorderedList
 
 -- | Parses ordered lists (numbered or dashed).
-orderedList :: DynFlags -> Parser [Doc RdrName]
-orderedList d = (paren <|> dot) *> innerList (orderedList d) d
+orderedList :: Parser [DocH mod IString]
+orderedList = (paren <|> dot) *> innerList orderedList
   where
     dot = (decimal :: Parser Int) <* "."
     paren = "(" *> decimal <* ")"
@@ -210,23 +238,23 @@ orderedList d = (paren <|> dot) *> innerList (orderedList d) d
 -- list entry and recursively collecting any further lists in the
 -- same paragraph. Usually used as
 --
--- > someListFunction dynflags = listBeginning *> innerList someListFunction dynflags
-innerList :: Parser [Doc RdrName] -> DynFlags -> Parser [Doc RdrName]
-innerList item d = do
+-- > someListFunction = listBeginning *> innerList someListFunction
+innerList :: Parser [DocH mod IString] -> Parser [DocH mod IString]
+innerList item = do
   c <- takeLine
-  (cs, items) <- more item d
-  let contents = docParagraph . parseString d . dropNLs . unlines $ c : cs
+  (cs, items) <- more item
+  let contents = docParagraph . parseString . dropNLs . unlines $ c : cs
   return $ case items of
     Left p -> [contents `joinPara` p]
     Right i -> contents : i
 
 -- | Parses definition lists.
-definitionList :: DynFlags -> Parser [(Doc RdrName, Doc RdrName)]
-definitionList d = do
-  label <- "[" *> (parseStringBS d <$> takeWhile1 (`notElem` "]\n")) <* "]"
+definitionList :: Parser [(DocH mod IString, DocH mod IString)]
+definitionList = do
+  label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` "]\n")) <* "]"
   c <- takeLine
-  (cs, items) <- more (definitionList d) d
-  let contents = parseString d . dropNLs . unlines $ c : cs
+  (cs, items) <- more definitionList
+  let contents = parseString . dropNLs . unlines $ c : cs
   return $ case items of
     Left p -> [(label, contents `joinPara` p)]
     Right i -> (label, contents) : i
@@ -239,7 +267,7 @@ definitionList d = do
 -- rather than
 --
 -- @DocAppend (DocParagraph …) …@
-joinPara :: Doc id -> Doc id -> Doc id
+joinPara :: DocH mod id -> DocH mod id -> DocH mod id
 joinPara (DocParagraph p) c = docParagraph $ docAppend p c
 joinPara d p = docAppend d p
 
@@ -250,33 +278,32 @@ dropNLs = reverse . dropWhile (== '\n') . reverse
 -- | Main worker for 'innerList' and 'definitionList'.
 -- We need the 'Either' here to be able to tell in the respective functions
 -- whether we're dealing with the next list or a nested paragraph.
-more :: Monoid a => Parser a -> DynFlags
-     -> Parser ([String], Either (Doc RdrName) a)
-more item d = innerParagraphs d <|> moreListItems item
-              <|> moreContent item d <|> pure ([], Right mempty)
+more :: Monoid a => Parser a
+     -> Parser ([String], Either (DocH mod IString) a)
+more item = innerParagraphs <|> moreListItems item
+            <|> moreContent item <|> pure ([], Right mempty)
 
 -- | Use by 'innerList' and 'definitionList' to parse any nested paragraphs.
-innerParagraphs :: DynFlags
-                -> Parser ([String], Either (Doc RdrName) a)
-innerParagraphs d = (,) [] . Left <$> ("\n" *> indentedParagraphs d)
+innerParagraphs :: Parser ([String], Either (DocH mod IString) a)
+innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs)
 
 -- | Attemps to fetch the next list if possibly. Used by 'innerList' and
 -- 'definitionList' to recursivly grab lists that aren't separated by a whole
 -- paragraph.
 moreListItems :: Parser a
-              -> Parser ([String], Either (Doc RdrName) a)
+              -> Parser ([String], Either (DocH mod IString) a)
 moreListItems item = (,) [] . Right <$> (skipSpace *> item)
 
 -- | Helper for 'innerList' and 'definitionList' which simply takes
 -- a line of text and attempts to parse more list content with 'more'.
-moreContent :: Monoid a => Parser a -> DynFlags
-            -> Parser ([String], Either (Doc RdrName) a)
-moreContent item d = first . (:) <$> nonEmptyLine <*> more item d
+moreContent :: Monoid a => Parser a
+            -> Parser ([String], Either (DocH mod IString) a)
+moreContent item = first . (:) <$> nonEmptyLine <*> more item
 
 -- | Runs the 'parseParas' parser on an indented paragraph.
 -- The indentation is 4 spaces.
-indentedParagraphs :: DynFlags -> Parser (Doc RdrName)
-indentedParagraphs d = parseParas d . concat <$> dropFrontOfPara "    "
+indentedParagraphs :: Parser (DocH mod IString)
+indentedParagraphs = parseParas . concat <$> dropFrontOfPara "    "
 
 -- | Grab as many fully indented paragraphs as we can.
 dropFrontOfPara :: Parser BS.ByteString -> Parser [String]
@@ -303,7 +330,7 @@ takeNonEmptyLine :: Parser String
 takeNonEmptyLine = do
     (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
 
-birdtracks :: Parser (Doc a)
+birdtracks :: Parser (DocH mod a)
 birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line
   where
     line = skipHorizontalSpace *> ">" *> takeLine
@@ -317,7 +344,7 @@ stripSpace = fromMaybe <*> mapM strip'
 
 -- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).
 -- Consecutive examples are accepted.
-examples :: Parser (Doc a)
+examples :: Parser (DocH mod a)
 examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)
   where
     go :: Parser [Example]
@@ -360,15 +387,15 @@ endOfLine = void "\n" <|> endOfInput
 --
 -- >>> parseOnly property "prop> hello world"
 -- Right (DocProperty "hello world")
-property :: Parser (Doc a)
+property :: Parser (DocH mod a)
 property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
 
 -- |
 -- Paragraph level codeblock. Anything between the two delimiting @ is parsed
 -- for markup.
-codeblock :: DynFlags -> Parser (Doc RdrName)
-codeblock d =
-  DocCodeBlock . parseStringBS d . dropSpaces
+codeblock :: Parser (DocH mod IString)
+codeblock =
+  DocCodeBlock . parseStringBS . dropSpaces
   <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
   where
     dropSpaces xs =
@@ -386,7 +413,7 @@ codeblock d =
     -- text which we need to decide whether we actually want to be dropping
     -- anything at all.
     splitByNl = unfoldr (\case '\n':s -> Just (span (/= '\n') s)
-                               _ -> Nothing)
+                               _      -> Nothing)
                 . ('\n' :)
 
     dropSpace "" = Just ""
@@ -400,16 +427,16 @@ codeblock d =
           | isNewline && isSpace c = Just isNewline
           | otherwise = Just $ c == '\n'
 
-hyperlink :: Parser (Doc a)
+hyperlink :: Parser (DocH mod a)
 hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8
               <$> disallowNewline ("<" *> takeUntil ">")
             <|> autoUrl
 
-autoUrl :: Parser (Doc a)
+autoUrl :: Parser (DocH mod a)
 autoUrl = mkLink <$> url
   where
     url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
-    mkLink :: BS.ByteString -> Doc a
+    mkLink :: BS.ByteString -> DocH mod a
     mkLink s = case BS.unsnoc s of
       Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x]
       _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
@@ -435,28 +462,16 @@ parseValid = do
 utf8String :: String -> Parser String
 utf8String x = decodeUtf8 <$> string (encodeUtf8 x)
 
--- | Parses identifiers with help of 'parseValid'. Asks GHC for 'RdrName' from the
+-- | Parses identifiers with help of 'parseValid'. Asks GHC for 'String' from the
 -- string it deems valid.
-identifier :: DynFlags -> Parser (Doc RdrName)
-identifier dflags = do
+identifier :: Parser (DocH mod IString)
+identifier = do
   o <- idDelim
   vid <- parseValid
   e <- idDelim
-  return $ validIdentifier o vid e
+  return $ DocIdentifier (o, vid, e)
   where
     idDelim = char '\'' <|> char '`'
-    validIdentifier o ident e = case parseIdent ident of
-      Just identName -> DocIdentifier identName
-      Nothing -> DocString $ o : ident ++ [e]
-
-    parseIdent :: String -> Maybe RdrName
-    parseIdent str0 =
-      let buffer = stringToStringBuffer str0
-          realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
-          pstate = mkPState dflags buffer realSrcLc
-      in case unP parseIdentifier pstate of
-        POk _ name -> Just (unLoc name)
-        _ -> Nothing
 
 -- | Remove all leading and trailing whitespace
 strip :: String -> String
@@ -480,3 +495,22 @@ makeLabeled f input = case break isSpace $ removeEscapes $ strip input of
     removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs
     removeEscapes ('\\':xs) = removeEscapes xs
     removeEscapes (x:xs) = x : removeEscapes xs
+
+takeUntil :: ByteString -> Parser ByteString
+takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome
+  where
+    end = BS.unpack end_
+
+    p :: (Bool, String) -> Char -> Maybe (Bool, String)
+    p acc c = case acc of
+      (True, _) -> Just (False, end)
+      (_, []) -> Nothing
+      (_, x:xs) | x == c -> Just (False, xs)
+      _ -> Just (c == '\\', end)
+
+    dropEnd = BS.reverse . BS.drop (length end) . BS.reverse
+    requireEnd = mfilter (BS.isSuffixOf end_)
+
+    gotSome xs
+      | BS.null xs = fail "didn't get any content"
+      | otherwise = return xs