9 files changed, 52 insertions, 715 deletions
diff --git a/src/Documentation/Haddock.hs b/src/Documentation/Haddock.hs
index 36115a2a..655a9723 100644
--- a/src/Documentation/Haddock.hs
+++ b/src/Documentation/Haddock.hs
@@ -33,7 +33,8 @@ module Documentation.Haddock (
   InstHead,
 
   -- * Documentation comments
-  Doc(..),
+  Doc,
+  DocH(..),
   Example(..),
   Hyperlink(..),
   DocMarkup(..),
diff --git a/src/Haddock.hs b/src/Haddock.hs
index 66dfb168..78844c96 100644
--- a/src/Haddock.hs
+++ b/src/Haddock.hs
@@ -25,7 +25,7 @@ import Haddock.Backends.Xhtml.Themes (getThemes)
 import Haddock.Backends.LaTeX
 import Haddock.Backends.Hoogle
 import Haddock.Interface
-import Haddock.Parser.Util
+import Haddock.Parser
 import Haddock.Types
 import Haddock.Version
 import Haddock.InterfaceFile
diff --git a/src/Haddock/Doc.hs b/src/Haddock/Doc.hs
index d812aee2..79a59ac2 100644
--- a/src/Haddock/Doc.hs
+++ b/src/Haddock/Doc.hs
@@ -1,55 +1,18 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
-module Haddock.Doc (
-  docAppend
-, docParagraph
-, combineDocumentation
-) where
+module Haddock.Doc ( module Documentation.Haddock.Doc
+                   , docCodeBlock
+                   , combineDocumentation
+                   ) where
 
 import Data.Maybe
 import Data.Monoid
+import Documentation.Haddock.Doc
 import Haddock.Types
-import Data.Char (isSpace)
-
--- We put it here so that we can avoid a circular import
--- anything relevant imports this module anyway
-instance Monoid (DocH mod id) where
-  mempty  = DocEmpty
-  mappend = docAppend
 
 combineDocumentation :: Documentation name -> Maybe (Doc name)
 combineDocumentation (Documentation Nothing Nothing) = Nothing
-combineDocumentation (Documentation mDoc mWarning)   = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc)
-
-docAppend :: DocH mod id -> DocH mod id -> DocH mod id
-docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2)
-docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d
-docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 ++ ds2)
-docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 ++ ds2)
-docAppend DocEmpty d = d
-docAppend d DocEmpty = d
-docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2)
-docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 ++ s2))
-docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 ++ s2)) d
-docAppend d1 d2 = DocAppend d1 d2
-
--- again to make parsing easier - we spot a paragraph whose only item
--- is a DocMonospaced and make it into a DocCodeBlock
-docParagraph :: DocH mod id -> DocH mod id
-docParagraph (DocMonospaced p)
-  = DocCodeBlock (docCodeBlock p)
-docParagraph (DocAppend (DocString s1) (DocMonospaced p))
-  | all isSpace s1
-  = DocCodeBlock (docCodeBlock p)
-docParagraph (DocAppend (DocString s1)
-    (DocAppend (DocMonospaced p) (DocString s2)))
-  | all isSpace s1 && all isSpace s2
-  = DocCodeBlock (docCodeBlock p)
-docParagraph (DocAppend (DocMonospaced p) (DocString s2))
-  | all isSpace s2
-  = DocCodeBlock (docCodeBlock p)
-docParagraph p
-  = DocParagraph p
-
+combineDocumentation (Documentation mDoc mWarning)   =
+  Just (fromMaybe mempty mWarning <> fromMaybe mempty mDoc)
 
 -- Drop trailing whitespace from @..@ code blocks.  Otherwise this:
 --
diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs
index 01276310..d9d4ae58 100644
--- a/src/Haddock/Interface/LexParseRn.hs
+++ b/src/Haddock/Interface/LexParseRn.hs
@@ -18,31 +18,29 @@ module Haddock.Interface.LexParseRn
   , processModuleHeader
   ) where
 
-import qualified Data.IntSet as IS
-import Haddock.Types
-import Haddock.Parser.Util
-import Haddock.Interface.ParseModuleHeader
-import Haddock.Doc
-
 import Control.Applicative
+import Data.IntSet (toList)
 import Data.List
 import Data.Maybe
+import Data.Monoid ((<>))
+import DynFlags (ExtensionFlag(..), languageExtensions)
 import FastString
 import GHC
-import DynFlags (ExtensionFlag(..), languageExtensions)
+import Haddock.Interface.ParseModuleHeader
+import Haddock.Parser
+import Haddock.Types
 import Name
-import Outputable
+import Outputable (showPpr)
 import RdrName
 
 processDocStrings :: DynFlags -> GlobalRdrEnv -> [HsDocString] -> ErrMsgM (Maybe (Doc Name))
 processDocStrings dflags gre strs = do
   docs <- catMaybes <$> mapM (processDocStringParas dflags gre) strs
-  let doc = foldl' docAppend DocEmpty docs
+  let doc = foldl' (<>) DocEmpty docs
   case doc of
     DocEmpty -> return Nothing
     _ -> return (Just doc)
 
-
 processDocStringParas :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name))
 processDocStringParas = process parseParasMaybe
 
@@ -86,7 +84,7 @@ processModuleHeader dflags gre safety mayStr = do
 
   let flags :: [ExtensionFlag]
       -- We remove the flags implied by the language setting and we display the language instead
-      flags = map toEnum (IS.toList $ extensionFlags dflags) \\ languageExtensions (language dflags)
+      flags = map toEnum (toList $ extensionFlags dflags) \\ languageExtensions (language dflags)
   return (hmi { hmi_safety = Just $ showPpr dflags safety
               , hmi_language = language dflags
               , hmi_extensions = flags
diff --git a/src/Haddock/Interface/ParseModuleHeader.hs b/src/Haddock/Interface/ParseModuleHeader.hs
index c155a83b..0be2511f 100644
--- a/src/Haddock/Interface/ParseModuleHeader.hs
+++ b/src/Haddock/Interface/ParseModuleHeader.hs
@@ -12,7 +12,7 @@
 module Haddock.Interface.ParseModuleHeader (parseModuleHeader) where
 
 import Haddock.Types
-import Haddock.Parser.Util
+import Haddock.Parser
 
 import RdrName
 import DynFlags
diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs
index 1489ae84..720f442b 100644
--- a/src/Haddock/Parser.hs
+++ b/src/Haddock/Parser.hs
@@ -13,504 +13,35 @@
 -- Stability   :  experimental
 -- Portability :  portable
 
-module Haddock.Parser ( parseString, parseParas
-                      , overIdentifier, toRegular
-                      , takeUntil
+module Haddock.Parser ( module Documentation.Haddock.Parser
+                      , parseParasMaybe
+                      , parseStringMaybe
+                      , parseIdent
                       ) where
 
-import           Prelude hiding (takeWhile)
-import           Control.Arrow (first)
-import           Control.Monad (void, mfilter)
-import           Control.Applicative
-import           Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine)
-import qualified Data.ByteString.Char8 as BS
-import           Data.Char (chr, isAsciiUpper)
-import           Data.List (stripPrefix, intercalate, unfoldr)
-import           Data.Maybe (fromMaybe)
-import           Data.Monoid
-import           Haddock.Doc
-import           Haddock.Types hiding (Doc)
-import           Haddock.Utf8
-import           Data.ByteString.Char8 (ByteString)
-
--- | Identifier string surrounded with opening and closing quotes/backticks.
-type IString = (Char, String, Char)
-
--- | Drops the quotes/backticks around all identifiers, as if they
--- were valid but still 'String's.
-toRegular :: DocH mod IString -> DocH mod String
-toRegular = fmap (\(_, x, _) -> x)
-
--- | Maps over 'DocIdentifier's over 'String' with potentially failing
--- conversion using user-supplied function. If the conversion fails,
--- the identifier is deemed to not be valid and is treated as a
--- regular string.
-overIdentifier :: (String -> Maybe a)
-               -> DocH mod IString
-               -> DocH mod a
-overIdentifier f d = g d
-  where
-    g (DocIdentifier (o, x, e)) = case f x of
-      Nothing -> DocString $ o : x ++ [e]
-      Just x' -> DocIdentifier x'
-    g DocEmpty = DocEmpty
-    g (DocAppend x x') = DocAppend (g x) (g x')
-    g (DocString x) = DocString x
-    g (DocParagraph x) = DocParagraph $ g x
-    g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x
-    g (DocModule x) = DocModule x
-    g (DocWarning x) = DocWarning $ g x
-    g (DocEmphasis x) = DocEmphasis $ g x
-    g (DocMonospaced x) = DocMonospaced $ g x
-    g (DocBold x) = DocBold $ g x
-    g (DocUnorderedList x) = DocUnorderedList $ fmap g x
-    g (DocOrderedList x) = DocOrderedList $ fmap g x
-    g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x
-    g (DocCodeBlock x) = DocCodeBlock $ g x
-    g (DocHyperlink x) = DocHyperlink x
-    g (DocPic x) = DocPic x
-    g (DocAName x) = DocAName x
-    g (DocProperty x) = DocProperty x
-    g (DocExamples x) = DocExamples x
-    g (DocHeader (Header l x)) = DocHeader . Header l $ g x
-
-parse :: Parser a -> BS.ByteString -> a
-parse p = either err id . parseOnly (p <* endOfInput)
-  where
-    err = error . ("Haddock.Parser.parse: " ++)
-
--- | Main entry point to the parser. Appends the newline character
--- to the input string.
-parseParas :: String -- ^ String to parse
-           -> DocH mod IString
-parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
-  where
-    p :: Parser (DocH mod IString)
-    p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
-
--- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which
--- drops leading whitespace and encodes the string to UTF8 first.
-parseString :: String -> DocH mod IString
-parseString = parseStringBS . encodeUtf8 . dropWhile isSpace
-
-parseStringBS :: BS.ByteString -> DocH mod IString
-parseStringBS = parse p
-  where
-    p :: Parser (DocH mod IString)
-    p = mconcat <$> many (monospace <|> anchor <|> identifier
-                          <|> moduleName <|> picture <|> hyperlink <|> autoUrl <|> bold
-                          <|> emphasis <|> encodedChar <|> string' <|> skipSpecialChar)
-
--- | Parses and processes
--- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
---
--- >>> parseOnly encodedChar "&#65;&#66;&#67;"
--- Right (DocString "ABC")
-encodedChar :: Parser (DocH mod a)
-encodedChar = "&#" *> c <* ";"
-  where
-    c = DocString . return . chr <$> num
-    num = hex <|> decimal
-    hex = ("x" <|> "X") *> hexadecimal
-
-specialChar :: [Char]
-specialChar = "_/<@\"&'`"
-
--- | Plain, regular parser for text. Called as one of the last parsers
--- to ensure that we have already given a chance to more meaningful parsers
--- before capturing their characers.
-string' :: Parser (DocH mod a)
-string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar)
-  where
-    unescape "" = ""
-    unescape ('\\':x:xs) = x : unescape xs
-    unescape (x:xs) = x : unescape xs
-
--- | Skips a single special character and treats it as a plain string.
--- This is done to skip over any special characters belonging to other
--- elements but which were not deemed meaningful at their positions.
-skipSpecialChar :: Parser (DocH mod a)
-skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
-
--- | Emphasis parser.
---
--- >>> parseOnly emphasis "/Hello world/"
--- Right (DocEmphasis (DocString "Hello world"))
-emphasis :: Parser (DocH mod IString)
-emphasis = DocEmphasis . parseStringBS <$>
-  mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
-
--- | Bold parser.
---
--- >>> parseOnly bold "__Hello world__"
--- Right (DocBold (DocString "Hello world"))
-bold :: Parser (DocH mod IString)
-bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__")
-
-disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString
-disallowNewline = mfilter ('\n' `BS.notElem`)
-
--- | Like `takeWhile`, but unconditionally take escaped characters.
-takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString
-takeWhile_ p = scan False p_
-  where
-    p_ escaped c
-      | escaped = Just False
-      | not $ p c = Nothing
-      | otherwise = Just (c == '\\')
-
--- | Like `takeWhile1`, but unconditionally take escaped characters.
-takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString
-takeWhile1_ = mfilter (not . BS.null) . takeWhile_
-
--- | Text anchors to allow for jumping around the generated documentation.
---
--- >>> parseOnly anchor "#Hello world#"
--- Right (DocAName "Hello world")
-anchor :: Parser (DocH mod a)
-anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#")
-
--- | Monospaced strings.
---
--- >>> parseOnly monospace "@cruel@"
--- Right (DocMonospaced (DocString "cruel"))
-monospace :: Parser (DocH mod IString)
-monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@")
-
-moduleName :: Parser (DocH mod a)
-moduleName = DocModule <$> (char '"' *> modid <* char '"')
-  where
-    modid = intercalate "." <$> conid `sepBy1` "."
-    conid = (:)
-      <$> satisfy isAsciiUpper
-      -- NOTE: According to Haskell 2010 we shouldd actually only
-      -- accept {small | large | digit | ' } here.  But as we can't
-      -- match on unicode characters, this is currently not possible.
-      <*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n"))
-
--- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
--- a title for the picture.
---
--- >>> parseOnly picture "<<hello.png>>"
--- Right (DocPic (Picture "hello.png" Nothing))
--- >>> parseOnly picture "<<hello.png world>>"
--- Right (DocPic (Picture "hello.png" (Just "world")))
-picture :: Parser (DocH mod a)
-picture = DocPic . makeLabeled Picture . decodeUtf8
-          <$> disallowNewline ("<<" *> takeUntil ">>")
-
--- | Paragraph parser, called by 'parseParas'.
-paragraph :: Parser (DocH mod IString)
-paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock
-                                       <|> property <|> header
-                                       <|> textParagraph)
-
-header :: Parser (DocH mod IString)
-header = do
-  let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1]
-      pser = foldl1 (<|>) psers
-  delim <- decodeUtf8 <$> pser
-  line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString
-  rest <- paragraph <|> return mempty
-  return $ docAppend (DocParagraph (DocHeader (Header (length delim) line))) rest
-
-textParagraph :: Parser (DocH mod IString)
-textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine
-
--- | List parser, called by 'paragraph'.
-list :: Parser (DocH mod IString)
-list = DocUnorderedList <$> unorderedList
-       <|> DocOrderedList <$> orderedList
-       <|> DocDefList <$> definitionList
-
--- | Parses unordered (bullet) lists.
-unorderedList :: Parser [DocH mod IString]
-unorderedList = ("*" <|> "-") *> innerList unorderedList
-
--- | Parses ordered lists (numbered or dashed).
-orderedList :: Parser [DocH mod IString]
-orderedList = (paren <|> dot) *> innerList orderedList
-  where
-    dot = (decimal :: Parser Int) <* "."
-    paren = "(" *> decimal <* ")"
-
--- | Generic function collecting any further lines belonging to the
--- list entry and recursively collecting any further lists in the
--- same paragraph. Usually used as
---
--- > someListFunction = listBeginning *> innerList someListFunction
-innerList :: Parser [DocH mod IString] -> Parser [DocH mod IString]
-innerList item = do
-  c <- takeLine
-  (cs, items) <- more item
-  let contents = docParagraph . parseString . dropNLs . unlines $ c : cs
-  return $ case items of
-    Left p -> [contents `joinPara` p]
-    Right i -> contents : i
-
--- | Parses definition lists.
-definitionList :: Parser [(DocH mod IString, DocH mod IString)]
-definitionList = do
-  label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` "]\n")) <* "]"
-  c <- takeLine
-  (cs, items) <- more definitionList
-  let contents = parseString . dropNLs . unlines $ c : cs
-  return $ case items of
-    Left p -> [(label, contents `joinPara` p)]
-    Right i -> (label, contents) : i
-
--- | If possible, appends two 'Doc's under a 'DocParagraph' rather than
--- outside of it. This allows to get structures like
---
--- @DocParagraph (DocAppend … …)@
---
--- rather than
---
--- @DocAppend (DocParagraph …) …@
-joinPara :: DocH mod id -> DocH mod id -> DocH mod id
-joinPara (DocParagraph p) c = docParagraph $ docAppend p c
-joinPara d p = docAppend d p
-
--- | Drops all trailing newlines.
-dropNLs :: String -> String
-dropNLs = reverse . dropWhile (== '\n') . reverse
-
--- | Main worker for 'innerList' and 'definitionList'.
--- We need the 'Either' here to be able to tell in the respective functions
--- whether we're dealing with the next list or a nested paragraph.
-more :: Monoid a => Parser a
-     -> Parser ([String], Either (DocH mod IString) a)
-more item = innerParagraphs <|> moreListItems item
-            <|> moreContent item <|> pure ([], Right mempty)
-
--- | Use by 'innerList' and 'definitionList' to parse any nested paragraphs.
-innerParagraphs :: Parser ([String], Either (DocH mod IString) a)
-innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs)
-
--- | Attemps to fetch the next list if possibly. Used by 'innerList' and
--- 'definitionList' to recursivly grab lists that aren't separated by a whole
--- paragraph.
-moreListItems :: Parser a
-              -> Parser ([String], Either (DocH mod IString) a)
-moreListItems item = (,) [] . Right <$> (skipSpace *> item)
-
--- | Helper for 'innerList' and 'definitionList' which simply takes
--- a line of text and attempts to parse more list content with 'more'.
-moreContent :: Monoid a => Parser a
-            -> Parser ([String], Either (DocH mod IString) a)
-moreContent item = first . (:) <$> nonEmptyLine <*> more item
-
--- | Runs the 'parseParas' parser on an indented paragraph.
--- The indentation is 4 spaces.
-indentedParagraphs :: Parser (DocH mod IString)
-indentedParagraphs = parseParas . concat <$> dropFrontOfPara "    "
-
--- | Grab as many fully indented paragraphs as we can.
-dropFrontOfPara :: Parser BS.ByteString -> Parser [String]
-dropFrontOfPara sp = do
-  currentParagraph <- some (sp *> takeNonEmptyLine)
-  followingParagraphs <-
-    skipHorizontalSpace *> nextPar -- we have more paragraphs to take
-    <|> skipHorizontalSpace *> nlList -- end of the ride, remember the newline
-    <|> endOfInput *> return [] -- nothing more to take at all
-  return (currentParagraph ++ followingParagraphs)
-  where
-    nextPar = (++) <$> nlList <*> dropFrontOfPara sp
-    nlList = "\n" *> return ["\n"]
-
-nonSpace :: BS.ByteString -> Parser BS.ByteString
-nonSpace xs
-  | not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line"
-  | otherwise = return xs
-
--- | Takes a non-empty, not fully whitespace line.
---
---  Doesn't discard the trailing newline.
-takeNonEmptyLine :: Parser String
-takeNonEmptyLine = do
-    (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
-
-birdtracks :: Parser (DocH mod a)
-birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line
-  where
-    line = skipHorizontalSpace *> ">" *> takeLine
-
-stripSpace :: [String] -> [String]
-stripSpace = fromMaybe <*> mapM strip'
-  where
-    strip' (' ':xs') = Just xs'
-    strip' "" = Just ""
-    strip' _  = Nothing
-
--- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).
--- Consecutive examples are accepted.
-examples :: Parser (DocH mod a)
-examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)
-  where
-    go :: Parser [Example]
-    go = do
-      prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>"
-      expr <- takeLine
-      (rs, es) <- resultAndMoreExamples
-      return (makeExample prefix expr rs : es)
-      where
-        resultAndMoreExamples :: Parser ([String], [Example])
-        resultAndMoreExamples = moreExamples <|> result <|> pure ([], [])
-          where
-            moreExamples :: Parser ([String], [Example])
-            moreExamples = (,) [] <$> go
-
-            result :: Parser ([String], [Example])
-            result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples
-
-    makeExample :: String -> String -> [String] -> Example
-    makeExample prefix expression res =
-      Example (strip expression) result
-      where
-        result = map (substituteBlankLine . tryStripPrefix) res
-
-        tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs)
-
-        substituteBlankLine "<BLANKLINE>" = ""
-        substituteBlankLine xs = xs
-
-nonEmptyLine :: Parser String
-nonEmptyLine = mfilter (any (not . isSpace)) takeLine
-
-takeLine :: Parser String
-takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine
-
-endOfLine :: Parser ()
-endOfLine = void "\n" <|> endOfInput
-
--- | Property parser.
---
--- >>> parseOnly property "prop> hello world"
--- Right (DocProperty "hello world")
-property :: Parser (DocH mod a)
-property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
-
--- |
--- Paragraph level codeblock. Anything between the two delimiting @ is parsed
--- for markup.
-codeblock :: Parser (DocH mod IString)
-codeblock =
-  DocCodeBlock . parseStringBS . dropSpaces
-  <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
-  where
-    dropSpaces xs =
-      let rs = decodeUtf8 xs
-      in case splitByNl rs of
-        [] -> xs
-        ys -> case last ys of
-          ' ':_ -> case mapM dropSpace ys of
-            Nothing -> xs
-            Just zs -> encodeUtf8 $ intercalate "\n" zs
-          _ -> xs
-
-    -- This is necessary because ‘lines’ swallows up a trailing newline
-    -- and we lose information about whether the last line belongs to @ or to
-    -- text which we need to decide whether we actually want to be dropping
-    -- anything at all.
-    splitByNl = unfoldr (\case '\n':s -> Just (span (/= '\n') s)
-                               _      -> Nothing)
-                . ('\n' :)
-
-    dropSpace "" = Just ""
-    dropSpace (' ':xs) = Just xs
-    dropSpace _ = Nothing
-
-    block' = scan False p
-      where
-        p isNewline c
-          | isNewline && c == '@' = Nothing
-          | isNewline && isSpace c = Just isNewline
-          | otherwise = Just $ c == '\n'
-
-hyperlink :: Parser (DocH mod a)
-hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8
-              <$> disallowNewline ("<" *> takeUntil ">")
-            <|> autoUrl
-
-autoUrl :: Parser (DocH mod a)
-autoUrl = mkLink <$> url
-  where
-    url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
-    mkLink :: BS.ByteString -> DocH mod a
-    mkLink s = case BS.unsnoc s of
-      Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x]
-      _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
-
--- | Parses strings between identifier delimiters. Consumes all input that it
--- deems to be valid in an identifier. Note that it simply blindly consumes
--- characters and does no actual validation itself.
-parseValid :: Parser String
-parseValid = do
-  vs' <- many' $ utf8String "⋆" <|> return <$> idChar
-  let vs = concat vs'
-  c <- peekChar
-  case c of
-    Just '`' -> return vs
-    Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid)
-                 <|> return vs
-    _ -> fail "outofvalid"
-  where
-    idChar = satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:^")
-             <|> digit <|> letter_ascii
-
--- | Parses UTF8 strings from ByteString streams.
-utf8String :: String -> Parser String
-utf8String x = decodeUtf8 <$> string (encodeUtf8 x)
-
--- | Parses identifiers with help of 'parseValid'. Asks GHC for 'String' from the
--- string it deems valid.
-identifier :: Parser (DocH mod IString)
-identifier = do
-  o <- idDelim
-  vid <- parseValid
-  e <- idDelim
-  return $ DocIdentifier (o, vid, e)
-  where
-    idDelim = char '\'' <|> char '`'
-
--- | Remove all leading and trailing whitespace
-strip :: String -> String
-strip = (\f -> f . f) $ dropWhile isSpace . reverse
-
-skipHorizontalSpace :: Parser ()
-skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r")
-
-takeHorizontalSpace :: Parser BS.ByteString
-takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r")
-
-makeLabeled :: (String -> Maybe String -> a) -> String -> a
-makeLabeled f input = case break isSpace $ removeEscapes $ strip input of
-  (uri, "")    -> f uri Nothing
-  (uri, label) -> f uri (Just $ dropWhile isSpace label)
-  where
-    -- As we don't parse these any further, we don't do any processing to the
-    -- string so we at least remove escape character here. Perhaps we should
-    -- actually be parsing the label at the very least?
-    removeEscapes "" = ""
-    removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs
-    removeEscapes ('\\':xs) = removeEscapes xs
-    removeEscapes (x:xs) = x : removeEscapes xs
-
-takeUntil :: ByteString -> Parser ByteString
-takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome
-  where
-    end = BS.unpack end_
-
-    p :: (Bool, String) -> Char -> Maybe (Bool, String)
-    p acc c = case acc of
-      (True, _) -> Just (False, end)
-      (_, []) -> Nothing
-      (_, x:xs) | x == c -> Just (False, xs)
-      _ -> Just (c == '\\', end)
-
-    dropEnd = BS.reverse . BS.drop (length end) . BS.reverse
-    requireEnd = mfilter (BS.isSuffixOf end_)
-
-    gotSome xs
-      | BS.null xs = fail "didn't get any content"
-      | otherwise = return xs
+import Documentation.Haddock.Parser
+import DynFlags (DynFlags)
+import FastString (mkFastString)
+import Documentation.Haddock.Types
+import Lexer (mkPState, unP, ParseResult(POk))
+import Parser (parseIdentifier)
+import RdrName (RdrName)
+import SrcLoc (mkRealSrcLoc, unLoc)
+import StringBuffer (stringToStringBuffer)
+
+{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-}
+parseParasMaybe :: DynFlags -> String -> Maybe (DocH mod RdrName)
+parseParasMaybe d = Just . overIdentifier (parseIdent d) . parseParas
+
+{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-}
+parseStringMaybe :: DynFlags -> String -> Maybe (DocH mod RdrName)
+parseStringMaybe d = Just . overIdentifier (parseIdent d) . parseString
+
+parseIdent :: DynFlags -> String -> Maybe RdrName
+parseIdent dflags str0 =
+  let buffer = stringToStringBuffer str0
+      realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
+      pstate = mkPState dflags buffer realSrcLc
+  in case unP parseIdentifier pstate of
+    POk _ name -> Just (unLoc name)
+    _ -> Nothing
diff --git a/src/Haddock/Parser/Util.hs b/src/Haddock/Parser/Util.hs
deleted file mode 100644
index 29da91c0..00000000
--- a/src/Haddock/Parser/Util.hs
+++ /dev/null
@@ -1,28 +0,0 @@
-module Haddock.Parser.Util where
-
-import DynFlags (DynFlags)
-import FastString (mkFastString)
-import Haddock.Types
-import Haddock.Parser
-import Lexer (mkPState, unP, ParseResult(POk))
-import Parser (parseIdentifier)
-import RdrName (RdrName)
-import SrcLoc (mkRealSrcLoc, unLoc)
-import StringBuffer (stringToStringBuffer)
-
-{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-}
-parseParasMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
-parseParasMaybe d = Just . overIdentifier (parseIdent d) . parseParas
-
-{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-}
-parseStringMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
-parseStringMaybe d = Just . overIdentifier (parseIdent d) . parseString
-
-parseIdent :: DynFlags -> String -> Maybe RdrName
-parseIdent dflags str0 =
-  let buffer = stringToStringBuffer str0
-      realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
-      pstate = mkPState dflags buffer realSrcLc
-  in case unP parseIdentifier pstate of
-    POk _ name -> Just (unLoc name)
-    _ -> Nothing
diff --git a/src/Haddock/Types.hs b/src/Haddock/Types.hs
index cd615bf4..85b3a592 100644
--- a/src/Haddock/Types.hs
+++ b/src/Haddock/Types.hs
@@ -19,16 +19,16 @@ module Haddock.Types (
   module Haddock.Types
   , HsDocString, LHsDocString
   , Fixity(..)
+  , module Documentation.Haddock.Types
  ) where
 
-import Data.Foldable
-import Data.Traversable
 import Control.Exception
 import Control.Arrow hiding ((<+>))
 import Control.DeepSeq
 import Data.Typeable
 import Data.Map (Map)
 import qualified Data.Map as Map
+import Documentation.Haddock.Types
 import BasicTypes (Fixity(..))
 import GHC hiding (NoLink)
 import DynFlags (ExtensionFlag, Language)
@@ -316,36 +316,6 @@ type LDoc id = Located (Doc id)
 
 type Doc id = DocH (ModuleName, OccName) id
 
-data DocH mod id
-  = DocEmpty
-  | DocAppend (DocH mod id) (DocH mod id)
-  | DocString String
-  | DocParagraph (DocH mod id)
-  | DocIdentifier id
-  | DocIdentifierUnchecked mod
-  | DocModule String
-  | DocWarning (DocH mod id)
-  | DocEmphasis (DocH mod id)
-  | DocMonospaced (DocH mod id)
-  | DocBold (DocH mod id)
-  | DocUnorderedList [DocH mod id]
-  | DocOrderedList [DocH mod id]
-  | DocDefList [(DocH mod id, DocH mod id)]
-  | DocCodeBlock (DocH mod id)
-  | DocHyperlink Hyperlink
-  | DocPic Picture
-  | DocAName String
-  | DocProperty String
-  | DocExamples [Example]
-  | DocHeader (Header (DocH mod id))
-  deriving (Functor, Foldable, Traversable)
-
-instance Foldable Header where
-  foldMap f (Header _ a) = f a
-
-instance Traversable Header where
-  traverse f (Header l a) = Header l `fmap` f a
-
 instance (NFData a, NFData mod)
          => NFData (DocH mod a) where
   rnf doc = case doc of
@@ -376,23 +346,6 @@ instance NFData Name
 instance NFData OccName
 instance NFData ModuleName
 
-
-data Hyperlink = Hyperlink
-  { hyperlinkUrl   :: String
-  , hyperlinkLabel :: Maybe String
-  } deriving (Eq, Show)
-
-
-data Picture = Picture
-  { pictureUri   :: String
-  , pictureTitle :: Maybe String
-  } deriving (Eq, Show)
-
-data Header id = Header
-  { headerLevel :: Int
-  , headerTitle :: id
-  } deriving Functor
-
 instance NFData id => NFData (Header id) where
   rnf (Header a b) = a `deepseq` b `deepseq` ()
 
@@ -402,13 +355,6 @@ instance NFData Hyperlink where
 instance NFData Picture where
   rnf (Picture a b) = a `deepseq` b `deepseq` ()
 
-
-data Example = Example
-  { exampleExpression :: String
-  , exampleResult     :: [String]
-  } deriving (Eq, Show)
-
-
 instance NFData Example where
   rnf (Example a b) = a `deepseq` b `deepseq` ()
 
diff --git a/src/Haddock/Utf8.hs b/src/Haddock/Utf8.hs
deleted file mode 100644
index 1fb0e818..00000000
--- a/src/Haddock/Utf8.hs
+++ /dev/null
@@ -1,74 +0,0 @@
-module Haddock.Utf8 (encodeUtf8, decodeUtf8) where
-import           Data.Bits ((.|.), (.&.), shiftL, shiftR)
-import qualified Data.ByteString as BS
-import           Data.Char (chr, ord)
-import           Data.Word (Word8)
-
--- | Helper that encodes and packs a 'String' into a 'BS.ByteString'
-encodeUtf8 :: String -> BS.ByteString
-encodeUtf8 = BS.pack . encode
-
--- | Helper that unpacks and decodes a 'BS.ByteString' into a 'String'
-decodeUtf8 :: BS.ByteString -> String
-decodeUtf8 = decode . BS.unpack
-
--- Copy/pasted functions from Codec.Binary.UTF8.String for encoding/decoding
--- | Character to use when 'encode' or 'decode' fail for a byte.
-replacementCharacter :: Char
-replacementCharacter = '\xfffd'
-
--- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
-encode :: String -> [Word8]
-encode = concatMap (map fromIntegral . go . ord)
- where
-  go oc
-   | oc <= 0x7f       = [oc]
-
-   | oc <= 0x7ff      = [ 0xc0 + (oc `shiftR` 6)
-                        , 0x80 + oc .&. 0x3f
-                        ]
-
-   | oc <= 0xffff     = [ 0xe0 + (oc `shiftR` 12)
-                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
-                        , 0x80 + oc .&. 0x3f
-                        ]
-   | otherwise        = [ 0xf0 + (oc `shiftR` 18)
-                        , 0x80 + ((oc `shiftR` 12) .&. 0x3f)
-                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
-                        , 0x80 + oc .&. 0x3f
-                        ]
-
--- | Decode a UTF8 string packed into a list of Word8 values, directly to String
-decode :: [Word8] -> String
-decode [    ] = ""
-decode (c:cs)
-  | c < 0x80  = chr (fromEnum c) : decode cs
-  | c < 0xc0  = replacementCharacter : decode cs
-  | c < 0xe0  = multi1
-  | c < 0xf0  = multi_byte 2 0xf  0x800
-  | c < 0xf8  = multi_byte 3 0x7  0x10000
-  | c < 0xfc  = multi_byte 4 0x3  0x200000
-  | c < 0xfe  = multi_byte 5 0x1  0x4000000
-  | otherwise = replacementCharacter : decode cs
-  where
-    multi1 = case cs of
-      c1 : ds | c1 .&. 0xc0 == 0x80 ->
-        let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|.  fromEnum (c1 .&. 0x3f)
-        in if d >= 0x000080 then toEnum d : decode ds
-                            else replacementCharacter : decode ds
-      _ -> replacementCharacter : decode cs
-
-    multi_byte :: Int -> Word8 -> Int -> String
-    multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask))
-      where
-        aux 0 rs acc
-          | overlong <= acc && acc <= 0x10ffff &&
-            (acc < 0xd800 || 0xdfff < acc)     &&
-            (acc < 0xfffe || 0xffff < acc)      = chr acc : decode rs
-          | otherwise = replacementCharacter : decode rs
-
-        aux n (r:rs) acc
-          | r .&. 0xc0 == 0x80 = aux (n-1) rs
-                               $ shiftL acc 6 .|. fromEnum (r .&. 0x3f)
-
-        aux _ rs     _ = replacementCharacter : decode rs