7 files changed, 384 insertions, 211 deletions
diff --git a/haddock-library/src/Documentation/Haddock/Markup.hs b/haddock-library/src/Documentation/Haddock/Markup.hs
index da8edcd4..365041ee 100644
--- a/haddock-library/src/Documentation/Haddock/Markup.hs
+++ b/haddock-library/src/Documentation/Haddock/Markup.hs
@@ -2,35 +2,38 @@
 module Documentation.Haddock.Markup (
     markup
   , idMarkup
+  , plainMarkup
   ) where
 
 import Documentation.Haddock.Types
 
+import Data.Maybe ( fromMaybe )
+
 markup :: DocMarkupH mod id a -> DocH mod id -> a
-markup m DocEmpty                    = markupEmpty m
-markup m (DocAppend d1 d2)           = markupAppend m (markup m d1) (markup m d2)
-markup m (DocString s)               = markupString m s
-markup m (DocParagraph d)            = markupParagraph m (markup m d)
-markup m (DocIdentifier x)           = markupIdentifier m x
-markup m (DocIdentifierUnchecked x)  = markupIdentifierUnchecked m x
-markup m (DocModule mod0)            = markupModule m mod0
-markup m (DocWarning d)              = markupWarning m (markup m d)
-markup m (DocEmphasis d)             = markupEmphasis m (markup m d)
-markup m (DocBold d)                 = markupBold m (markup m d)
-markup m (DocMonospaced d)           = markupMonospaced m (markup m d)
-markup m (DocUnorderedList ds)       = markupUnorderedList m (map (markup m) ds)
-markup m (DocOrderedList ds)         = markupOrderedList m (map (markup m) ds)
-markup m (DocDefList ds)             = markupDefList m (map (markupPair m) ds)
-markup m (DocCodeBlock d)            = markupCodeBlock m (markup m d)
-markup m (DocHyperlink l)            = markupHyperlink m l
-markup m (DocAName ref)              = markupAName m ref
-markup m (DocPic img)                = markupPic m img
-markup m (DocMathInline mathjax)     = markupMathInline m mathjax
-markup m (DocMathDisplay mathjax)    = markupMathDisplay m mathjax
-markup m (DocProperty p)             = markupProperty m p
-markup m (DocExamples e)             = markupExample m e
-markup m (DocHeader (Header l t))    = markupHeader m (Header l (markup m t))
-markup m (DocTable (Table h b))      = markupTable m (Table (map (fmap (markup m)) h) (map (fmap (markup m)) b))
+markup m DocEmpty                       = markupEmpty m
+markup m (DocAppend d1 d2)              = markupAppend m (markup m d1) (markup m d2)
+markup m (DocString s)                  = markupString m s
+markup m (DocParagraph d)               = markupParagraph m (markup m d)
+markup m (DocIdentifier x)              = markupIdentifier m x
+markup m (DocIdentifierUnchecked x)     = markupIdentifierUnchecked m x
+markup m (DocModule mod0)               = markupModule m mod0
+markup m (DocWarning d)                 = markupWarning m (markup m d)
+markup m (DocEmphasis d)                = markupEmphasis m (markup m d)
+markup m (DocBold d)                    = markupBold m (markup m d)
+markup m (DocMonospaced d)              = markupMonospaced m (markup m d)
+markup m (DocUnorderedList ds)          = markupUnorderedList m (map (markup m) ds)
+markup m (DocOrderedList ds)            = markupOrderedList m (map (markup m) ds)
+markup m (DocDefList ds)                = markupDefList m (map (markupPair m) ds)
+markup m (DocCodeBlock d)               = markupCodeBlock m (markup m d)
+markup m (DocHyperlink (Hyperlink u l)) = markupHyperlink m (Hyperlink u (fmap (markup m) l))
+markup m (DocAName ref)                 = markupAName m ref
+markup m (DocPic img)                   = markupPic m img
+markup m (DocMathInline mathjax)        = markupMathInline m mathjax
+markup m (DocMathDisplay mathjax)       = markupMathDisplay m mathjax
+markup m (DocProperty p)                = markupProperty m p
+markup m (DocExamples e)                = markupExample m e
+markup m (DocHeader (Header l t))       = markupHeader m (Header l (markup m t))
+markup m (DocTable (Table h b))         = markupTable m (Table (map (fmap (markup m)) h) (map (fmap (markup m)) b))
 
 markupPair :: DocMarkupH mod id a -> (DocH mod id, DocH mod id) -> (a, a)
 markupPair m (a,b) = (markup m a, markup m b)
@@ -63,3 +66,34 @@ idMarkup = Markup {
   markupHeader               = DocHeader,
   markupTable                = DocTable
   }
+
+-- | Map a 'DocH' into a best estimate of an alternate string. The idea is to
+-- strip away any formatting while preserving as much of the actual text as
+-- possible.
+plainMarkup :: (mod -> String) -> (id -> String) -> DocMarkupH mod id String
+plainMarkup plainMod plainIdent = Markup {
+  markupEmpty                = "",
+  markupString               = id,
+  markupParagraph            = id,
+  markupAppend               = (++),
+  markupIdentifier           = plainIdent,
+  markupIdentifierUnchecked  = plainMod,
+  markupModule               = id,
+  markupWarning              = id,
+  markupEmphasis             = id,
+  markupBold                 = id,
+  markupMonospaced           = id,
+  markupUnorderedList        = const "",
+  markupOrderedList          = const "",
+  markupDefList              = const "",
+  markupCodeBlock            = id,
+  markupHyperlink            = \(Hyperlink url lbl) -> fromMaybe url lbl,
+  markupAName                = id,
+  markupPic                  = \(Picture uri title) -> fromMaybe uri title,
+  markupMathInline           = id,
+  markupMathDisplay          = id,
+  markupProperty             = id,
+  markupExample              = const "",
+  markupHeader               = \(Header _ title) -> title,
+  markupTable                = const ""
+  }
diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs
index d79da40b..36c8bb5b 100644
--- a/haddock-library/src/Documentation/Haddock/Parser.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser.hs
@@ -27,14 +27,16 @@ module Documentation.Haddock.Parser (
 import           Control.Applicative
 import           Control.Arrow (first)
 import           Control.Monad
-import           Data.Char (chr, isUpper, isAlpha, isAlphaNum, isSpace)
-import           Data.List (intercalate, unfoldr, elemIndex, notElem)
+import           Data.Char (chr, isUpper, isAlpha, isSpace)
+import           Data.List (intercalate, unfoldr, elemIndex)
 import           Data.Maybe (fromMaybe, mapMaybe)
 import           Data.Monoid
 import qualified Data.Set as Set
 import           Documentation.Haddock.Doc
+import           Documentation.Haddock.Markup ( markup, plainMarkup )
 import           Documentation.Haddock.Parser.Monad
 import           Documentation.Haddock.Parser.Util
+import           Documentation.Haddock.Parser.Identifier
 import           Documentation.Haddock.Types
 import           Prelude hiding (takeWhile)
 import qualified Prelude as P
@@ -45,53 +47,26 @@ import           Text.Parsec (try)
 import qualified Data.Text as T
 import           Data.Text (Text)
 
-#if MIN_VERSION_base(4,9,0)
-import           Text.Read.Lex                      (isSymbolChar)
-#else
-import           Data.Char                          (GeneralCategory (..),
-                                                     generalCategory)
-#endif
 
 -- $setup
 -- >>> :set -XOverloadedStrings
 
-#if !MIN_VERSION_base(4,9,0)
--- inlined from base-4.10.0.0
-isSymbolChar :: Char -> Bool
-isSymbolChar c = not (isPuncChar c) && case generalCategory c of
-    MathSymbol           -> True
-    CurrencySymbol       -> True
-    ModifierSymbol       -> True
-    OtherSymbol          -> True
-    DashPunctuation      -> True
-    OtherPunctuation     -> c `notElem` ("'\"" :: String)
-    ConnectorPunctuation -> c /= '_'
-    _                    -> False
-  where
-    -- | The @special@ character class as defined in the Haskell Report.
-    isPuncChar :: Char -> Bool
-    isPuncChar = (`elem` (",;()[]{}`" :: String))
-#endif
-
--- | Identifier string surrounded with opening and closing quotes/backticks.
-type Identifier = (Char, String, Char)
-
 -- | Drops the quotes/backticks around all identifiers, as if they
 -- were valid but still 'String's.
 toRegular :: DocH mod Identifier -> DocH mod String
-toRegular = fmap (\(_, x, _) -> x)
+toRegular = fmap (\(Identifier _ _ x _) -> x)
 
 -- | Maps over 'DocIdentifier's over 'String' with potentially failing
 -- conversion using user-supplied function. If the conversion fails,
 -- the identifier is deemed to not be valid and is treated as a
 -- regular string.
-overIdentifier :: (String -> Maybe a)
+overIdentifier :: (Namespace -> String -> Maybe a)
                -> DocH mod Identifier
                -> DocH mod a
 overIdentifier f d = g d
   where
-    g (DocIdentifier (o, x, e)) = case f x of
-      Nothing -> DocString $ o : x ++ [e]
+    g (DocIdentifier (Identifier ns o x e)) = case f ns x of
+      Nothing -> DocString $ renderNs ns ++ [o] ++ x ++ [e]
       Just x' -> DocIdentifier x'
     g DocEmpty = DocEmpty
     g (DocAppend x x') = DocAppend (g x) (g x')
@@ -107,7 +82,7 @@ overIdentifier f d = g d
     g (DocOrderedList x) = DocOrderedList $ fmap g x
     g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x
     g (DocCodeBlock x) = DocCodeBlock $ g x
-    g (DocHyperlink x) = DocHyperlink x
+    g (DocHyperlink (Hyperlink u x)) = DocHyperlink (Hyperlink u (fmap g x))
     g (DocPic x) = DocPic x
     g (DocMathInline x) = DocMathInline x
     g (DocMathDisplay x) = DocMathDisplay x
@@ -301,13 +276,20 @@ mathInline = DocMathInline . T.unpack
 -- >>> parseString "\\[\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\]"
 -- DocMathDisplay "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}"
 mathDisplay :: Parser (DocH mod a)
-mathDisplay = DocMathDisplay . T.unpack 
+mathDisplay = DocMathDisplay . T.unpack
               <$> ("\\[" *> takeUntil "\\]")
 
-markdownImage :: Parser (DocH mod a)
-markdownImage = fromHyperlink <$> ("!" *> linkParser)
+-- | Markdown image parser. As per the commonmark reference recommendation, the
+-- description text for an image converted to its a plain string representation.
+--
+-- >>> parseString "![some /emphasis/ in a description](www.site.com)"
+-- DocPic (Picture "www.site.com" (Just "some emphasis in a description"))
+markdownImage :: Parser (DocH mod Identifier)
+markdownImage = DocPic . fromHyperlink <$> ("!" *> linkParser)
   where
-    fromHyperlink (Hyperlink url label) = DocPic (Picture url label)
+    fromHyperlink (Hyperlink u l) = Picture u (fmap (markup stringMarkup) l)
+    stringMarkup = plainMarkup (const "") renderIdent
+    renderIdent (Identifier ns l c r) = renderNs ns <> [l] <> c <> [r]
 
 -- | Paragraph parser, called by 'parseParas'.
 paragraph :: Parser (DocH mod Identifier)
@@ -360,32 +342,34 @@ table = do
     parseFirstRow :: Parser Text
     parseFirstRow = do
         skipHorizontalSpace
-        -- upper-left corner is +
-        c <- Parsec.char '+'
-        cs <- some (Parsec.char '-' <|> Parsec.char '+')
+        cs <- takeWhile (\c -> c == '-' || c == '+')
 
-        -- upper right corner is + too
-        guard (last cs == '+')
+        -- upper-left and upper-right corners are `+`
+        guard (T.length cs >= 2 &&
+               T.head cs == '+' &&
+               T.last cs == '+')
 
         -- trailing space
         skipHorizontalSpace
         _ <- Parsec.newline
 
-        return (T.cons c $ T.pack cs)
+        return cs
 
     parseRestRows :: Int -> Parser Text
     parseRestRows l = do
         skipHorizontalSpace
+        bs <- scan predicate l
 
-        c <- Parsec.char '|' <|> Parsec.char '+'
-        bs <- scan predicate (l - 2)
-        c2 <- Parsec.char '|' <|> Parsec.char '+'
+        -- Left and right edges are `|` or `+`
+        guard (T.length bs >= 2 &&
+               (T.head bs == '|' || T.head bs == '+') &&
+               (T.last bs == '|' || T.last bs == '+'))
 
         -- trailing space
         skipHorizontalSpace
         _ <- Parsec.newline
 
-        return (T.cons c (T.snoc bs c2))
+        return bs
       where
         predicate n c
             | n <= 0    = Nothing
@@ -662,7 +646,7 @@ nonSpace xs
 --  Doesn't discard the trailing newline.
 takeNonEmptyLine :: Parser Text
 takeNonEmptyLine = do
-    l <- takeWhile1 (Parsec.noneOf "\n") >>= nonSpace
+    l <- takeWhile1 (/= '\n') >>= nonSpace
     _ <- "\n"
     pure (l <> "\n")
 
@@ -732,7 +716,7 @@ nonEmptyLine :: Parser Text
 nonEmptyLine = try (mfilter (T.any (not . isSpace)) takeLine)
 
 takeLine :: Parser Text
-takeLine = try (takeWhile (Parsec.noneOf "\n") <* endOfLine)
+takeLine = try (takeWhile (/= '\n') <* endOfLine)
 
 endOfLine :: Parser ()
 endOfLine = void "\n" <|> Parsec.eof 
@@ -742,7 +726,7 @@ endOfLine = void "\n" <|> Parsec.eof
 -- >>> snd <$> parseOnly property "prop> hello world"
 -- Right (DocProperty "hello world")
 property :: Parser (DocH mod a)
-property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (Parsec.noneOf "\n"))
+property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (/= '\n'))
 
 -- |
 -- Paragraph level codeblock. Anything between the two delimiting \@ is parsed
@@ -782,22 +766,22 @@ codeblock =
           | isNewline && isSpace c = Just isNewline
           | otherwise = Just $ c == '\n'
 
-hyperlink :: Parser (DocH mod a)
+hyperlink :: Parser (DocH mod Identifier)
 hyperlink = choice' [ angleBracketLink, markdownLink, autoUrl ]
 
 angleBracketLink :: Parser (DocH mod a)
 angleBracketLink =
-    DocHyperlink . makeLabeled Hyperlink 
+    DocHyperlink . makeLabeled (\s -> Hyperlink s . fmap DocString)
     <$> disallowNewline ("<" *> takeUntil ">")
 
-markdownLink :: Parser (DocH mod a)
+markdownLink :: Parser (DocH mod Identifier)
 markdownLink = DocHyperlink <$> linkParser
 
-linkParser :: Parser Hyperlink
+linkParser :: Parser (Hyperlink (DocH mod Identifier))
 linkParser = flip Hyperlink <$> label <*> (whitespace *> url)
   where
-    label :: Parser (Maybe String)
-    label = Just . decode . T.strip <$> ("[" *> takeUntil "]")
+    label :: Parser (Maybe (DocH mod Identifier))
+    label = Just . parseParagraph . T.strip <$> ("[" *> takeUntil "]")
 
     whitespace :: Parser ()
     whitespace = skipHorizontalSpace <* optional ("\n" *> skipHorizontalSpace)
@@ -816,41 +800,17 @@ linkParser = flip Hyperlink <$> label <*> (whitespace *> url)
 autoUrl :: Parser (DocH mod a)
 autoUrl = mkLink <$> url
   where
-    url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (Parsec.satisfy (not . isSpace))
+    url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (not . isSpace)
     
     mkLink :: Text -> DocH mod a
     mkLink s = case T.unsnoc s of
       Just (xs,x) | x `elem` (",.!?" :: String) -> DocHyperlink (mkHyperlink xs) `docAppend` DocString [x]
       _ -> DocHyperlink (mkHyperlink s)
 
-    mkHyperlink :: Text -> Hyperlink
+    mkHyperlink :: Text -> Hyperlink (DocH mod a)
     mkHyperlink lnk = Hyperlink (T.unpack lnk) Nothing
 
 
-
--- | Parses strings between identifier delimiters. Consumes all input that it
--- deems to be valid in an identifier. Note that it simply blindly consumes
--- characters and does no actual validation itself.
-parseValid :: Parser String
-parseValid = p some
-  where
-    idChar = Parsec.satisfy (\c -> isAlphaNum c || isSymbolChar c || c == '_')
-
-    p p' = do
-      vs <- p' idChar
-      c <- peekChar'
-      case c of
-        '`' -> return vs
-        '\'' -> choice' [ (\x -> vs ++ "'" ++ x) <$> ("'" *> p many), return vs ]
-        _ -> fail "outofvalid"
-
--- | Parses identifiers with help of 'parseValid'. Asks GHC for
--- 'String' from the string it deems valid.
+-- | Parses identifiers with help of 'parseValid'.
 identifier :: Parser (DocH mod Identifier)
-identifier = do
-  o <- idDelim
-  vid <- parseValid
-  e <- idDelim
-  return $ DocIdentifier (o, vid, e)
-  where
-    idDelim = Parsec.satisfy (\c -> c == '\'' || c == '`')
+identifier = DocIdentifier <$> parseValid
diff --git a/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs b/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs
new file mode 100644
index 00000000..7bc98b62
--- /dev/null
+++ b/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs
@@ -0,0 +1,186 @@
+{-# LANGUAGE CPP          #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE ViewPatterns #-}
+-- |
+-- Module      :  Documentation.Haddock.Parser.Identifier
+-- Copyright   :  (c) Alec Theriault 2019,
+-- License     :  BSD-like
+--
+-- Maintainer  :  haddock@projects.haskell.org
+-- Stability   :  experimental
+-- Portability :  portable
+--
+-- Functionality for parsing identifiers and operators
+
+module Documentation.Haddock.Parser.Identifier (
+  Identifier(..),
+  parseValid,
+) where
+
+import Documentation.Haddock.Types           ( Namespace(..) )
+import Documentation.Haddock.Parser.Monad
+import qualified Text.Parsec as Parsec
+import           Text.Parsec.Pos             ( updatePosChar )
+import           Text.Parsec                 ( State(..)
+                                             , getParserState, setParserState )
+
+import Data.Text (Text)
+import qualified Data.Text as T
+
+import           Data.Char (isAlpha, isAlphaNum)
+import Control.Monad (guard)
+import Data.Functor (($>))
+#if MIN_VERSION_base(4,9,0)
+import           Text.Read.Lex                      (isSymbolChar)
+#else
+import           Data.Char                          (GeneralCategory (..),
+                                                     generalCategory)
+#endif
+
+import Data.Maybe
+
+-- | Identifier string surrounded with namespace, opening, and closing quotes/backticks.
+data Identifier = Identifier !Namespace !Char String !Char
+  deriving (Show, Eq)
+
+parseValid :: Parser Identifier
+parseValid = do
+  s@State{ stateInput = inp, statePos = pos } <- getParserState
+
+  case takeIdentifier inp of
+    Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier"
+    Just (ns, op, ident, cl, inp') ->
+      let posOp = updatePosChar pos op
+          posIdent = T.foldl updatePosChar posOp ident
+          posCl = updatePosChar posIdent cl
+          s' = s{ stateInput = inp', statePos = posCl }
+      in setParserState s' $> Identifier ns op (T.unpack ident) cl
+
+
+#if !MIN_VERSION_base(4,9,0)
+-- inlined from base-4.10.0.0
+isSymbolChar :: Char -> Bool
+isSymbolChar c = not (isPuncChar c) && case generalCategory c of
+    MathSymbol           -> True
+    CurrencySymbol       -> True
+    ModifierSymbol       -> True
+    OtherSymbol          -> True
+    DashPunctuation      -> True
+    OtherPunctuation     -> c `notElem` "'\""
+    ConnectorPunctuation -> c /= '_'
+    _                    -> False
+  where
+    -- | The @special@ character class as defined in the Haskell Report.
+    isPuncChar :: Char -> Bool
+    isPuncChar = (`elem` (",;()[]{}`" :: String))
+#endif
+
+-- | Try to parse a delimited identifier off the front of the given input.
+--
+-- This tries to match as many valid Haskell identifiers/operators as possible,
+-- to the point of sometimes accepting invalid things (ex: keywords). Some
+-- considerations:
+--
+--   - operators and identifiers can have module qualifications
+--   - operators can be wrapped in parens (for prefix)
+--   - identifiers can be wrapped in backticks (for infix)
+--   - delimiters are backticks or regular ticks
+--   - since regular ticks are also valid in identifiers, we opt for the
+--     longest successful parse
+--
+-- This function should make /O(1)/ allocations
+takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text)
+takeIdentifier input = listToMaybe $ do
+
+    -- Optional namespace
+    let (ns, input') = case T.uncons input of
+                         Just ('v', i) -> (Value, i)
+                         Just ('t', i) -> (Type, i)
+                         _             -> (None, input)
+
+    -- Opening tick
+    (op, input'') <- maybeToList (T.uncons input')
+    guard (op == '\'' || op == '`')
+
+    -- Identifier/operator
+    (ident, input''') <- wrapped input''
+
+    -- Closing tick
+    (cl, input'''') <- maybeToList (T.uncons input''')
+    guard (cl == '\'' || cl == '`')
+
+    pure (ns, op, ident, cl, input'''')
+
+  where
+
+    -- | Parse out a wrapped, possibly qualified, operator or identifier
+    wrapped t = do
+      (c, t'  ) <- maybeToList (T.uncons t)
+      -- Tuples
+      case c of
+        '(' | Just (c', _) <- T.uncons t'
+            , c' == ',' || c' == ')'
+            -> do let (commas, t'') = T.span (== ',') t'
+                  (')', t''') <- maybeToList (T.uncons t'')
+                  pure (T.take (T.length commas + 2) t, t''')
+
+        -- Parenthesized
+        '(' -> do (n,   t'' ) <- general False 0 [] t'
+                  (')', t''') <- maybeToList (T.uncons t'')
+                  pure (T.take (n + 2) t, t''')
+
+        -- Backticked
+        '`' -> do (n,   t'' ) <- general False 0 [] t'
+                  ('`', t''') <- maybeToList (T.uncons t'')
+                  pure (T.take (n + 2) t, t''')
+
+        -- Unadorned
+        _   -> do (n,   t'' ) <- general False 0 [] t
+                  pure (T.take n t, t'')
+
+    -- | Parse out a possibly qualified operator or identifier
+    general :: Bool           -- ^ refuse inputs starting with operators
+            -> Int            -- ^ total characters \"consumed\" so far
+            -> [(Int, Text)]  -- ^ accumulated results
+            -> Text           -- ^ current input
+            -> [(Int, Text)]  -- ^ total characters parsed & what remains
+    general !identOnly !i acc t
+      -- Starts with an identifier (either just an identifier, or a module qual)
+      | Just (n, rest) <- identLike t
+      = if T.null rest
+          then acc
+          else case T.head rest of
+                 '`' -> (n + i, rest) : acc
+                 ')' -> (n + i, rest) : acc
+                 '.' -> general False (n + i + 1) acc (T.tail rest)
+                 '\'' -> let (m, rest') = quotes rest
+                         in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (T.tail rest')
+                 _ -> acc
+
+      -- An operator
+      | Just (n, rest) <- optr t
+      , not identOnly
+      = (n + i, rest) : acc
+
+      -- Anything else
+      | otherwise
+      = acc
+
+    -- | Parse an identifier off the front of the input
+    identLike t
+      | T.null t = Nothing
+      | isAlpha (T.head t) || '_' == T.head t
+      = let !(idt, rest) = T.span (\c -> isAlphaNum c || c == '_') t
+            !(octos, rest') = T.span (== '#') rest
+      in Just (T.length idt + T.length octos, rest')
+      | otherwise = Nothing
+
+    -- | Parse all but the last quote off the front of the input
+    -- PRECONDITION: T.head t == '\''
+    quotes :: Text -> (Int, Text)
+    quotes t = let !n = T.length (T.takeWhile (== '\'') t) - 1
+               in (n, T.drop n t)
+
+    -- | Parse an operator off the front of the input
+    optr t = let !(op, rest) = T.span isSymbolChar t
+             in if T.null op then Nothing else Just (T.length op, rest)
diff --git a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs
index a5664aa8..fa46f536 100644
--- a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs
@@ -4,15 +4,32 @@
 {-# LANGUAGE TypeFamilies #-}
 {-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE TypeSynonymInstances #-}
+-- |
+-- Module      :  Documentation.Haddock.Parser.Monad
+-- Copyright   :  (c) Alec Theriault 2018-2019,
+-- License     :  BSD-like
+--
+-- Maintainer  :  haddock@projects.haskell.org
+-- Stability   :  experimental
+-- Portability :  portable
+--
+-- Defines the Parsec monad over which all parsing is done and also provides
+-- more efficient versions of the usual parsec combinator functions (but
+-- specialized to 'Text').
 
 module Documentation.Haddock.Parser.Monad where
 
 import qualified Text.Parsec.Char as Parsec
 import qualified Text.Parsec as Parsec
+import           Text.Parsec.Pos             ( updatePosChar )
+import           Text.Parsec                 ( State(..)
+                                             , getParserState, setParserState )
 
 import qualified Data.Text as T
 import           Data.Text                   ( Text )
 
+import           Control.Monad               ( mfilter )
+import           Data.Functor                ( ($>) )
 import           Data.String                 ( IsString(..) )
 import           Data.Bits                   ( Bits(..) )
 import           Data.Char                   ( ord )
@@ -20,7 +37,11 @@ import           Data.List                   ( foldl' )
 import           Control.Applicative as App
 
 import           Documentation.Haddock.Types ( Version )
+import           Prelude hiding (takeWhile)
 
+-- | The only bit of information we really care about truding along with us
+-- through parsing is the version attached to a @\@since@ annotation - if
+-- the doc even contained one.
 newtype ParserState = ParserState {
   parserStateSince :: Maybe Version
 } deriving (Eq, Show)
@@ -29,7 +50,7 @@ initialParserState :: ParserState
 initialParserState = ParserState Nothing
 
 setSince :: Version -> Parser ()
-setSince since = Parsec.modifyState (\st -> st {parserStateSince = Just since})
+setSince since = Parsec.modifyState (\st -> st{ parserStateSince = Just since })
 
 type Parser = Parsec.Parsec Text ParserState
 
@@ -44,38 +65,74 @@ parseOnly p t = case Parsec.runParser p' initialParserState "<haddock>" t of
 
 -- | Always succeeds, but returns 'Nothing' if at the end of input. Does not
 -- consume input.
+--
+-- Equivalent to @Parsec.optionMaybe . Parsec.lookAhead $ Parsec.anyChar@, but
+-- more efficient.
 peekChar :: Parser (Maybe Char)
-peekChar = Parsec.optionMaybe . Parsec.try . Parsec.lookAhead $ Parsec.anyChar
+peekChar = headOpt . stateInput <$> getParserState
+  where headOpt t | T.null t = Nothing
+                  | otherwise = Just (T.head t)
+{-# INLINE peekChar #-}
 
 -- | Fails if at the end of input. Does not consume input.
+--
+-- Equivalent to @Parsec.lookAhead Parsec.anyChar@, but more efficient.
 peekChar' :: Parser Char
-peekChar' = Parsec.lookAhead Parsec.anyChar 
+peekChar' = headFail . stateInput =<< getParserState
+  where headFail t | T.null t = Parsec.parserFail "peekChar': reached EOF"
+                   | otherwise = App.pure (T.head t)
+{-# INLINE peekChar' #-}
 
 -- | Parses the given string. Returns the parsed string.
+--
+-- Equivalent to @Parsec.string (T.unpack t) $> t@, but more efficient.
 string :: Text -> Parser Text
-string t = Parsec.string (T.unpack t) *> App.pure t
+string t = do
+  s@State{ stateInput = inp, statePos = pos } <- getParserState
+  case T.stripPrefix t inp of
+    Nothing -> Parsec.parserFail "string: Failed to match the input string"
+    Just inp' ->
+      let pos' = T.foldl updatePosChar pos t
+          s' = s{ stateInput = inp', statePos = pos' }
+      in setParserState s' $> t
+
+-- | Keep matching characters as long as the predicate function holds (and
+-- return them).
+--
+-- Equivalent to @fmap T.pack . Parsec.many@, but more efficient.
+takeWhile :: (Char -> Bool) -> Parser Text
+takeWhile f = do
+  s@State{ stateInput = inp, statePos = pos } <- getParserState
+  let (t, inp') = T.span f inp
+      pos' = T.foldl updatePosChar pos t
+      s' = s{ stateInput = inp', statePos = pos' }
+  setParserState s' $> t
+
+-- | Like 'takeWhile', but fails if no characters matched.
+--
+-- Equivalent to @fmap T.pack . Parsec.many1@, but more efficient.
+takeWhile1 :: (Char -> Bool) -> Parser Text
+takeWhile1 = mfilter (not . T.null) . takeWhile
 
 -- | Scan the input text, accumulating characters as long as the scanning
 -- function returns true.
 scan :: (s -> Char -> Maybe s) -- ^ scan function
      -> s                      -- ^ initial state
      -> Parser Text 
-scan f = fmap T.pack . go
-  where go s1 = do { cOpt <- peekChar
-                   ; case cOpt >>= f s1 of
-                       Nothing -> pure ""
-                       Just s2 -> (:) <$> Parsec.anyChar <*> go s2
-                   }
-
--- | Apply a parser for a character zero or more times and collect the result in
--- a string.
-takeWhile :: Parser Char -> Parser Text
-takeWhile = fmap T.pack . Parsec.many
-
--- | Apply a parser for a character one or more times and collect the result in
--- a string.
-takeWhile1 :: Parser Char -> Parser Text
-takeWhile1 =  fmap T.pack . Parsec.many1
+scan f st = do
+  s@State{ stateInput = inp, statePos = pos } <- getParserState
+  go inp st pos 0 $ \inp' pos' n ->
+    let s' = s{ Parsec.stateInput = inp', Parsec.statePos = pos' }
+    in setParserState s' $> T.take n inp
+  where
+    go inp s !pos !n cont
+      = case T.uncons inp of
+          Nothing -> cont inp pos n        -- ran out of input
+          Just (c, inp') ->
+             case f s c of
+               Nothing -> cont inp pos n   -- scan function failed
+               Just s' -> go inp' s' (updatePosChar pos c) (n+1) cont
+
 
 -- | Parse a decimal number.
 decimal :: Integral a => Parser a
diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
index ffa91b09..98570c22 100644
--- a/haddock-library/src/Documentation/Haddock/Parser/Util.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
@@ -40,7 +40,7 @@ skipHorizontalSpace = Parsec.skipMany (Parsec.oneOf horizontalSpace)
 
 -- | Take leading horizontal space
 takeHorizontalSpace :: Parser Text 
-takeHorizontalSpace = takeWhile (Parsec.oneOf horizontalSpace)
+takeHorizontalSpace = takeWhile (`elem` horizontalSpace)
 
 makeLabeled :: (String -> Maybe String -> a) -> Text -> a
 makeLabeled f input = case T.break isSpace $ removeEscapes $ T.strip input of
diff --git a/haddock-library/src/Documentation/Haddock/Types.hs b/haddock-library/src/Documentation/Haddock/Types.hs
index b5dea3d4..ba2f873c 100644
--- a/haddock-library/src/Documentation/Haddock/Types.hs
+++ b/haddock-library/src/Documentation/Haddock/Types.hs
@@ -65,10 +65,10 @@ overDocF f d = (\x -> d { _doc = x }) <$> f (_doc d)
 type Version = [Int]
 type Package = String
 
-data Hyperlink = Hyperlink
+data Hyperlink id = Hyperlink
   { hyperlinkUrl   :: String
-  , hyperlinkLabel :: Maybe String
-  } deriving (Eq, Show)
+  , hyperlinkLabel :: Maybe id
+  } deriving (Eq, Show, Functor, Foldable, Traversable)
 
 data Picture = Picture
   { pictureUri   :: String
@@ -118,7 +118,7 @@ data DocH mod id
   | DocOrderedList [DocH mod id]
   | DocDefList [(DocH mod id, DocH mod id)]
   | DocCodeBlock (DocH mod id)
-  | DocHyperlink Hyperlink
+  | DocHyperlink (Hyperlink (DocH mod id))
   | DocPic Picture
   | DocMathInline String
   | DocMathDisplay String
@@ -147,7 +147,7 @@ instance Bifunctor DocH where
   bimap f g (DocOrderedList docs) = DocOrderedList (map (bimap f g) docs)
   bimap f g (DocDefList docs) = DocDefList (map (bimap f g *** bimap f g) docs)
   bimap f g (DocCodeBlock doc) = DocCodeBlock (bimap f g doc)
-  bimap _ _ (DocHyperlink hyperlink) = DocHyperlink hyperlink
+  bimap f g (DocHyperlink (Hyperlink url lbl)) = DocHyperlink (Hyperlink url (fmap (bimap f g) lbl))
   bimap _ _ (DocPic picture) = DocPic picture
   bimap _ _ (DocMathInline s) = DocMathInline s
   bimap _ _ (DocMathDisplay s) = DocMathDisplay s
@@ -192,7 +192,7 @@ instance Bitraversable DocH where
   bitraverse f g (DocOrderedList docs) = DocOrderedList <$> traverse (bitraverse f g) docs
   bitraverse f g (DocDefList docs) = DocDefList <$> traverse (bitraverse (bitraverse f g) (bitraverse f g)) docs
   bitraverse f g (DocCodeBlock doc) = DocCodeBlock <$> bitraverse f g doc
-  bitraverse _ _ (DocHyperlink hyperlink) = pure (DocHyperlink hyperlink)
+  bitraverse f g (DocHyperlink (Hyperlink url lbl)) = DocHyperlink <$> (Hyperlink url <$> traverse (bitraverse f g) lbl)
   bitraverse _ _ (DocPic picture) = pure (DocPic picture)
   bitraverse _ _ (DocMathInline s) = pure (DocMathInline s)
   bitraverse _ _ (DocMathDisplay s) = pure (DocMathDisplay s)
@@ -203,6 +203,16 @@ instance Bitraversable DocH where
   bitraverse f g (DocTable (Table header body)) = (\h b -> DocTable (Table h b)) <$> traverse (traverse (bitraverse f g)) header <*> traverse (traverse (bitraverse f g)) body
 #endif
 
+-- | The namespace qualification for an identifier.
+data Namespace = Value | Type | None deriving (Eq, Ord, Enum, Show)
+
+-- | Render the a namespace into the same format it was initially parsed.
+renderNs :: Namespace -> String
+renderNs Value = "v"
+renderNs Type = "t"
+renderNs None = ""
+
+
 -- | 'DocMarkupH' is a set of instructions for marking up documentation.
 -- In fact, it's really just a mapping from 'Doc' to some other
 -- type [a], where [a] is usually the type of the output (HTML, say).
@@ -227,7 +237,7 @@ data DocMarkupH mod id a = Markup
   , markupOrderedList          :: [a] -> a
   , markupDefList              :: [(a,a)] -> a
   , markupCodeBlock            :: a -> a
-  , markupHyperlink            :: Hyperlink -> a
+  , markupHyperlink            :: Hyperlink a -> a
   , markupAName                :: String -> a
   , markupPic                  :: Picture -> a
   , markupMathInline           :: String -> a
diff --git a/haddock-library/src/Documentation/Haddock/Utf8.hs b/haddock-library/src/Documentation/Haddock/Utf8.hs
deleted file mode 100644
index 3f75e53b..00000000
--- a/haddock-library/src/Documentation/Haddock/Utf8.hs
+++ /dev/null
@@ -1,74 +0,0 @@
-module Documentation.Haddock.Utf8 (encodeUtf8, decodeUtf8) where
-import           Data.Bits ((.|.), (.&.), shiftL, shiftR)
-import qualified Data.ByteString as BS
-import           Data.Char (chr, ord)
-import           Data.Word (Word8)
-
--- | Helper that encodes and packs a 'String' into a 'BS.ByteString'
-encodeUtf8 :: String -> BS.ByteString
-encodeUtf8 = BS.pack . encode
-
--- | Helper that unpacks and decodes a 'BS.ByteString' into a 'String'
-decodeUtf8 :: BS.ByteString -> String
-decodeUtf8 = decode . BS.unpack
-
--- Copy/pasted functions from Codec.Binary.UTF8.String for encoding/decoding
--- | Character to use when 'encode' or 'decode' fail for a byte.
-replacementCharacter :: Char
-replacementCharacter = '\xfffd'
-
--- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
-encode :: String -> [Word8]
-encode = concatMap (map fromIntegral . go . ord)
- where
-  go oc
-   | oc <= 0x7f       = [oc]
-
-   | oc <= 0x7ff      = [ 0xc0 + (oc `shiftR` 6)
-                        , 0x80 + oc .&. 0x3f
-                        ]
-
-   | oc <= 0xffff     = [ 0xe0 + (oc `shiftR` 12)
-                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
-                        , 0x80 + oc .&. 0x3f
-                        ]
-   | otherwise        = [ 0xf0 + (oc `shiftR` 18)
-                        , 0x80 + ((oc `shiftR` 12) .&. 0x3f)
-                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
-                        , 0x80 + oc .&. 0x3f
-                        ]
-
--- | Decode a UTF8 string packed into a list of Word8 values, directly to String
-decode :: [Word8] -> String
-decode [    ] = ""
-decode (c:cs)
-  | c < 0x80  = chr (fromEnum c) : decode cs
-  | c < 0xc0  = replacementCharacter : decode cs
-  | c < 0xe0  = multi1
-  | c < 0xf0  = multi_byte 2 0xf  0x800
-  | c < 0xf8  = multi_byte 3 0x7  0x10000
-  | c < 0xfc  = multi_byte 4 0x3  0x200000
-  | c < 0xfe  = multi_byte 5 0x1  0x4000000
-  | otherwise = replacementCharacter : decode cs
-  where
-    multi1 = case cs of
-      c1 : ds | c1 .&. 0xc0 == 0x80 ->
-        let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|.  fromEnum (c1 .&. 0x3f)
-        in if d >= 0x000080 then toEnum d : decode ds
-                            else replacementCharacter : decode ds
-      _ -> replacementCharacter : decode cs
-
-    multi_byte :: Int -> Word8 -> Int -> String
-    multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask))
-      where
-        aux 0 rs acc
-          | overlong <= acc && acc <= 0x10ffff &&
-            (acc < 0xd800 || 0xdfff < acc)     &&
-            (acc < 0xfffe || 0xffff < acc)      = chr acc : decode rs
-          | otherwise = replacementCharacter : decode rs
-
-        aux n (r:rs) acc
-          | r .&. 0xc0 == 0x80 = aux (n-1) rs
-                               $ shiftL acc 6 .|. fromEnum (r .&. 0x3f)
-
-        aux _ rs     _ = replacementCharacter : decode rs