Move parser + parser tests out to own package.

We move some types out that are necessary as well and then re-export and specialise them in the core Haddock. Reason for moving out spec tests is that if we're working on the parser, we can simply work on that and we can ignore the rest of Haddock. The downside is that it's a little inconvenient if at the end of the day we want to see that everything passes.
author: Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk> 2014-05-05 09:01:03 +0200
committer: Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk> 2014-05-05 11:00:41 +0200
commit: cc269e6b0b615e9e237c35a985e4ace7b9ab0dd9 (patch)
tree: f0264138c81909151f9724c1f02f7bf8b30803cb
parent: 7ac2d0f2d31c2e1c7ede09828f3d5ba5626bd0d4 (diff)
34 files changed, 841 insertions, 907 deletions
diff --git a/.ghci b/.ghci
index 8312f0ee..5407b2c3 100644
--- a/.ghci
+++ b/.ghci
@@ -1 +1 @@
-:set -isrc -ivendor/attoparsec-0.10.4.0 -itest -idist/build -idist/build/autogen -packageghc -optP-include -optPdist/build/autogen/cabal_macros.h
+:set -isrc -itest -idist/build -idist/build/autogen -packageghc -optP-include -optPdist/build/autogen/cabal_macros.h
diff --git a/.gitignore b/.gitignore
index bc554c00..4752003e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 /dist/
+/haddock-library/dist
 /html-test/out/
 /latex-test/out/
 
diff --git a/haddock-library/.ghci b/haddock-library/.ghci
new file mode 100644
index 00000000..6a26395e
--- /dev/null
+++ b/haddock-library/.ghci
@@ -0,0 +1 @@
+:set -isrc -ivendor/attoparsec-0.10.4.0 -itest -idist/build -idist/build/autogen -optP-include -optPdist/build/autogen/cabal_macros.h
diff --git a/haddock-library/LICENSE b/haddock-library/LICENSE
new file mode 100644
index 00000000..1636bfcd
--- /dev/null
+++ b/haddock-library/LICENSE
@@ -0,0 +1,23 @@
+Copyright 2002-2010, Simon Marlow.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+ 
+- Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+ 
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/haddock-library/Setup.hs b/haddock-library/Setup.hs
new file mode 100644
index 00000000..9a994af6
--- /dev/null
+++ b/haddock-library/Setup.hs
@@ -0,0 +1,2 @@
+import Distribution.Simple
+main = defaultMain
diff --git a/haddock-library/haddock-library.cabal b/haddock-library/haddock-library.cabal
new file mode 100644
index 00000000..3a016c5a
--- /dev/null
+++ b/haddock-library/haddock-library.cabal
@@ -0,0 +1,75 @@
+name:                 haddock-library
+version:              2.15.0
+synopsis:             Library exposing some functionality of Haddock.
+
+description:          Haddock is a documentation-generation tool for Haskell
+                      libraries. These modules expose some functionality of it
+                      without pulling in the GHC dependency.
+license:              BSD3
+license-file:         LICENSE
+maintainer:           Simon Hengel <sol@typeful.net>, Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk>
+homepage:             http://www.haskell.org/haddock/
+bug-reports:          http://trac.haskell.org/haddock
+category:             Documentation
+build-type:           Simple
+cabal-version:        >= 1.10
+stability:            experimental
+
+library
+  default-language:     Haskell2010
+
+  build-depends:
+    base >= 4.3 && < 4.8,
+    bytestring,
+    deepseq
+
+  hs-source-dirs:       src, vendor/attoparsec-0.10.4.0
+  ghc-options:          -funbox-strict-fields -Wall -fwarn-tabs -O2
+
+  exposed-modules:
+    Documentation.Haddock.Parser
+    Documentation.Haddock.Types
+    Documentation.Haddock.Doc
+
+  other-modules:
+    Data.Attoparsec
+    Data.Attoparsec.ByteString
+    Data.Attoparsec.ByteString.Char8
+    Data.Attoparsec.Combinator
+    Data.Attoparsec.Number
+    Data.Attoparsec.ByteString.FastSet
+    Data.Attoparsec.ByteString.Internal
+    Data.Attoparsec.Internal
+    Data.Attoparsec.Internal.Types
+    Documentation.Haddock.Utf8
+    Documentation.Haddock.Parser.Util
+
+test-suite spec
+  type:             exitcode-stdio-1.0
+  default-language: Haskell2010
+  main-is:          Spec.hs
+  hs-source-dirs:
+      test
+    , src
+    , vendor/attoparsec-0.10.4.0
+
+  cpp-options:
+      -DTEST
+
+  other-modules:
+      Documentation.Haddock.ParserSpec
+      Documentation.Haddock.Utf8Spec
+      Documentation.Haddock.Parser.UtilSpec
+
+  build-depends:
+      base
+    , hspec
+    , bytestring
+    , deepseq
+    , QuickCheck == 2.*
+    , haddock-library
+
+
+source-repository head
+  type:     git
+  location: http://git.haskell.org/haddock.git
diff --git a/haddock-library/src/Documentation/Haddock/Doc.hs b/haddock-library/src/Documentation/Haddock/Doc.hs
new file mode 100644
index 00000000..4d6c10a4
--- /dev/null
+++ b/haddock-library/src/Documentation/Haddock/Doc.hs
@@ -0,0 +1,60 @@
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+module Documentation.Haddock.Doc (docParagraph) where
+
+import Data.Monoid
+import Documentation.Haddock.Types
+import Data.Char (isSpace)
+
+-- We put it here so that we can avoid a circular import
+-- anything relevant imports this module anyway
+instance Monoid (DocH mod id) where
+  mempty  = DocEmpty
+  mappend = docAppend
+
+docAppend :: DocH mod id -> DocH mod id -> DocH mod id
+docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2)
+docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d
+docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 ++ ds2)
+docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 ++ ds2)
+docAppend DocEmpty d = d
+docAppend d DocEmpty = d
+docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2)
+docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 ++ s2))
+docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 ++ s2)) d
+docAppend d1 d2 = DocAppend d1 d2
+
+-- again to make parsing easier - we spot a paragraph whose only item
+-- is a DocMonospaced and make it into a DocCodeBlock
+docParagraph :: DocH mod id -> DocH mod id
+docParagraph (DocMonospaced p)
+  = DocCodeBlock (docCodeBlock p)
+docParagraph (DocAppend (DocString s1) (DocMonospaced p))
+  | all isSpace s1
+  = DocCodeBlock (docCodeBlock p)
+docParagraph (DocAppend (DocString s1)
+    (DocAppend (DocMonospaced p) (DocString s2)))
+  | all isSpace s1 && all isSpace s2
+  = DocCodeBlock (docCodeBlock p)
+docParagraph (DocAppend (DocMonospaced p) (DocString s2))
+  | all isSpace s2
+  = DocCodeBlock (docCodeBlock p)
+docParagraph p
+  = DocParagraph p
+
+
+-- Drop trailing whitespace from @..@ code blocks.  Otherwise this:
+--
+--    -- @
+--    -- foo
+--    -- @
+--
+-- turns into (DocCodeBlock "\nfoo\n ") which when rendered in HTML
+-- gives an extra vertical space after the code block.  The single space
+-- on the final line seems to trigger the extra vertical space.
+--
+docCodeBlock :: DocH mod id -> DocH mod id
+docCodeBlock (DocString s)
+  = DocString (reverse $ dropWhile (`elem` " \t") $ reverse s)
+docCodeBlock (DocAppend l r)
+  = DocAppend l (docCodeBlock r)
+docCodeBlock d = d
diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs
new file mode 100644
index 00000000..3d146d33
--- /dev/null
+++ b/haddock-library/src/Documentation/Haddock/Parser.hs
@@ -0,0 +1,474 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE IncoherentInstances #-}
+{-# LANGUAGE LambdaCase #-}
+-- |
+-- Module      :  Documentation.Haddock.Parser
+-- Copyright   :  (c) Mateusz Kowalczyk 2013-2014,
+--                    Simon Hengel      2013
+-- License     :  BSD-like
+--
+-- Maintainer  :  haddock@projects.haskell.org
+-- Stability   :  experimental
+-- Portability :  portable
+
+module Documentation.Haddock.Parser ( parseString, parseParas
+                                    , overIdentifier, toRegular, Identifier
+                                    ) where
+
+import           Control.Applicative
+import           Control.Arrow (first)
+import           Control.Monad (void, mfilter)
+import           Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine)
+import qualified Data.ByteString.Char8 as BS
+import           Data.Char (chr, isAsciiUpper)
+import           Data.List (stripPrefix, intercalate, unfoldr)
+import           Data.Maybe (fromMaybe)
+import           Data.Monoid
+import           Documentation.Haddock.Doc
+import           Documentation.Haddock.Parser.Util
+import           Documentation.Haddock.Types
+import           Documentation.Haddock.Utf8
+import           Prelude hiding (takeWhile)
+
+-- | Identifier string surrounded with opening and closing quotes/backticks.
+type Identifier = (Char, String, Char)
+
+-- | Drops the quotes/backticks around all identifiers, as if they
+-- were valid but still 'String's.
+toRegular :: DocH mod Identifier -> DocH mod String
+toRegular = fmap (\(_, x, _) -> x)
+
+-- | Maps over 'DocIdentifier's over 'String' with potentially failing
+-- conversion using user-supplied function. If the conversion fails,
+-- the identifier is deemed to not be valid and is treated as a
+-- regular string.
+overIdentifier :: (String -> Maybe a)
+               -> DocH mod Identifier
+               -> DocH mod a
+overIdentifier f d = g d
+  where
+    g (DocIdentifier (o, x, e)) = case f x of
+      Nothing -> DocString $ o : x ++ [e]
+      Just x' -> DocIdentifier x'
+    g DocEmpty = DocEmpty
+    g (DocAppend x x') = DocAppend (g x) (g x')
+    g (DocString x) = DocString x
+    g (DocParagraph x) = DocParagraph $ g x
+    g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x
+    g (DocModule x) = DocModule x
+    g (DocWarning x) = DocWarning $ g x
+    g (DocEmphasis x) = DocEmphasis $ g x
+    g (DocMonospaced x) = DocMonospaced $ g x
+    g (DocBold x) = DocBold $ g x
+    g (DocUnorderedList x) = DocUnorderedList $ fmap g x
+    g (DocOrderedList x) = DocOrderedList $ fmap g x
+    g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x
+    g (DocCodeBlock x) = DocCodeBlock $ g x
+    g (DocHyperlink x) = DocHyperlink x
+    g (DocPic x) = DocPic x
+    g (DocAName x) = DocAName x
+    g (DocProperty x) = DocProperty x
+    g (DocExamples x) = DocExamples x
+    g (DocHeader (Header l x)) = DocHeader . Header l $ g x
+
+parse :: Parser a -> BS.ByteString -> a
+parse p = either err id . parseOnly (p <* endOfInput)
+  where
+    err = error . ("Haddock.Parser.parse: " ++)
+
+-- | Main entry point to the parser. Appends the newline character
+-- to the input string.
+parseParas :: String -- ^ String to parse
+           -> DocH mod Identifier
+parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
+  where
+    p :: Parser (DocH mod Identifier)
+    p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
+
+-- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which
+-- drops leading whitespace and encodes the string to UTF8 first.
+parseString :: String -> DocH mod Identifier
+parseString = parseStringBS . encodeUtf8 . dropWhile isSpace
+
+parseStringBS :: BS.ByteString -> DocH mod Identifier
+parseStringBS = parse p
+  where
+    p :: Parser (DocH mod Identifier)
+    p = mconcat <$> many (monospace <|> anchor <|> identifier
+                          <|> moduleName <|> picture <|> hyperlink <|> autoUrl <|> bold
+                          <|> emphasis <|> encodedChar <|> string' <|> skipSpecialChar)
+
+-- | Parses and processes
+-- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
+--
+-- >>> parseOnly encodedChar "&#65;&#66;&#67;"
+-- Right (DocString "ABC")
+encodedChar :: Parser (DocH mod a)
+encodedChar = "&#" *> c <* ";"
+  where
+    c = DocString . return . chr <$> num
+    num = hex <|> decimal
+    hex = ("x" <|> "X") *> hexadecimal
+
+specialChar :: [Char]
+specialChar = "_/<@\"&'`"
+
+-- | Plain, regular parser for text. Called as one of the last parsers
+-- to ensure that we have already given a chance to more meaningful parsers
+-- before capturing their characers.
+string' :: Parser (DocH mod a)
+string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar)
+  where
+    unescape "" = ""
+    unescape ('\\':x:xs) = x : unescape xs
+    unescape (x:xs) = x : unescape xs
+
+-- | Skips a single special character and treats it as a plain string.
+-- This is done to skip over any special characters belonging to other
+-- elements but which were not deemed meaningful at their positions.
+skipSpecialChar :: Parser (DocH mod a)
+skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
+
+-- | Emphasis parser.
+--
+-- >>> parseOnly emphasis "/Hello world/"
+-- Right (DocEmphasis (DocString "Hello world"))
+emphasis :: Parser (DocH mod Identifier)
+emphasis = DocEmphasis . parseStringBS <$>
+  mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
+
+-- | Bold parser.
+--
+-- >>> parseOnly bold "__Hello world__"
+-- Right (DocBold (DocString "Hello world"))
+bold :: Parser (DocH mod Identifier)
+bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__")
+
+disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString
+disallowNewline = mfilter ('\n' `BS.notElem`)
+
+-- | Like `takeWhile`, but unconditionally take escaped characters.
+takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString
+takeWhile_ p = scan False p_
+  where
+    p_ escaped c
+      | escaped = Just False
+      | not $ p c = Nothing
+      | otherwise = Just (c == '\\')
+
+-- | Like `takeWhile1`, but unconditionally take escaped characters.
+takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString
+takeWhile1_ = mfilter (not . BS.null) . takeWhile_
+
+-- | Text anchors to allow for jumping around the generated documentation.
+--
+-- >>> parseOnly anchor "#Hello world#"
+-- Right (DocAName "Hello world")
+anchor :: Parser (DocH mod a)
+anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#")
+
+-- | Monospaced strings.
+--
+-- >>> parseOnly monospace "@cruel@"
+-- Right (DocMonospaced (DocString "cruel"))
+monospace :: Parser (DocH mod Identifier)
+monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@")
+
+moduleName :: Parser (DocH mod a)
+moduleName = DocModule <$> (char '"' *> modid <* char '"')
+  where
+    modid = intercalate "." <$> conid `sepBy1` "."
+    conid = (:)
+      <$> satisfy isAsciiUpper
+      -- NOTE: According to Haskell 2010 we shouldd actually only
+      -- accept {small | large | digit | ' } here.  But as we can't
+      -- match on unicode characters, this is currently not possible.
+      <*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n"))
+
+-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
+-- a title for the picture.
+--
+-- >>> parseOnly picture "<<hello.png>>"
+-- Right (DocPic (Picture "hello.png" Nothing))
+-- >>> parseOnly picture "<<hello.png world>>"
+-- Right (DocPic (Picture "hello.png" (Just "world")))
+picture :: Parser (DocH mod a)
+picture = DocPic . makeLabeled Picture . decodeUtf8
+          <$> disallowNewline ("<<" *> takeUntil ">>")
+
+-- | Paragraph parser, called by 'parseParas'.
+paragraph :: Parser (DocH mod Identifier)
+paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock
+                                       <|> property <|> header
+                                       <|> textParagraph)
+
+header :: Parser (DocH mod Identifier)
+header = do
+  let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1]
+      pser = foldl1 (<|>) psers
+  delim <- decodeUtf8 <$> pser
+  line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString
+  rest <- paragraph <|> return mempty
+  return $ DocParagraph (DocHeader (Header (length delim) line)) <> rest
+
+textParagraph :: Parser (DocH mod Identifier)
+textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine
+
+-- | List parser, called by 'paragraph'.
+list :: Parser (DocH mod Identifier)
+list = DocUnorderedList <$> unorderedList
+       <|> DocOrderedList <$> orderedList
+       <|> DocDefList <$> definitionList
+
+-- | Parses unordered (bullet) lists.
+unorderedList :: Parser [DocH mod Identifier]
+unorderedList = ("*" <|> "-") *> innerList unorderedList
+
+-- | Parses ordered lists (numbered or dashed).
+orderedList :: Parser [DocH mod Identifier]
+orderedList = (paren <|> dot) *> innerList orderedList
+  where
+    dot = (decimal :: Parser Int) <* "."
+    paren = "(" *> decimal <* ")"
+
+-- | Generic function collecting any further lines belonging to the
+-- list entry and recursively collecting any further lists in the
+-- same paragraph. Usually used as
+--
+-- > someListFunction = listBeginning *> innerList someListFunction
+innerList :: Parser [DocH mod Identifier] -> Parser [DocH mod Identifier]
+innerList item = do
+  c <- takeLine
+  (cs, items) <- more item
+  let contents = docParagraph . parseString . dropNLs . unlines $ c : cs
+  return $ case items of
+    Left p -> [contents `joinPara` p]
+    Right i -> contents : i
+
+-- | Parses definition lists.
+definitionList :: Parser [(DocH mod Identifier, DocH mod Identifier)]
+definitionList = do
+  label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` "]\n")) <* "]"
+  c <- takeLine
+  (cs, items) <- more definitionList
+  let contents = parseString . dropNLs . unlines $ c : cs
+  return $ case items of
+    Left p -> [(label, contents `joinPara` p)]
+    Right i -> (label, contents) : i
+
+-- | If possible, appends two 'Doc's under a 'DocParagraph' rather than
+-- outside of it. This allows to get structures like
+--
+-- @DocParagraph (DocAppend … …)@
+--
+-- rather than
+--
+-- @DocAppend (DocParagraph …) …@
+joinPara :: DocH mod id -> DocH mod id -> DocH mod id
+joinPara (DocParagraph p) c = docParagraph $ p <> c
+joinPara d p = d <> p
+
+-- | Drops all trailing newlines.
+dropNLs :: String -> String
+dropNLs = reverse . dropWhile (== '\n') . reverse
+
+-- | Main worker for 'innerList' and 'definitionList'.
+-- We need the 'Either' here to be able to tell in the respective functions
+-- whether we're dealing with the next list or a nested paragraph.
+more :: Monoid a => Parser a
+     -> Parser ([String], Either (DocH mod Identifier) a)
+more item = innerParagraphs <|> moreListItems item
+            <|> moreContent item <|> pure ([], Right mempty)
+
+-- | Use by 'innerList' and 'definitionList' to parse any nested paragraphs.
+innerParagraphs :: Parser ([String], Either (DocH mod Identifier) a)
+innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs)
+
+-- | Attemps to fetch the next list if possibly. Used by 'innerList' and
+-- 'definitionList' to recursivly grab lists that aren't separated by a whole
+-- paragraph.
+moreListItems :: Parser a
+              -> Parser ([String], Either (DocH mod Identifier) a)
+moreListItems item = (,) [] . Right <$> (skipSpace *> item)
+
+-- | Helper for 'innerList' and 'definitionList' which simply takes
+-- a line of text and attempts to parse more list content with 'more'.
+moreContent :: Monoid a => Parser a
+            -> Parser ([String], Either (DocH mod Identifier) a)
+moreContent item = first . (:) <$> nonEmptyLine <*> more item
+
+-- | Runs the 'parseParas' parser on an indented paragraph.
+-- The indentation is 4 spaces.
+indentedParagraphs :: Parser (DocH mod Identifier)
+indentedParagraphs = parseParas . concat <$> dropFrontOfPara "    "
+
+-- | Grab as many fully indented paragraphs as we can.
+dropFrontOfPara :: Parser BS.ByteString -> Parser [String]
+dropFrontOfPara sp = do
+  currentParagraph <- some (sp *> takeNonEmptyLine)
+  followingParagraphs <-
+    skipHorizontalSpace *> nextPar -- we have more paragraphs to take
+    <|> skipHorizontalSpace *> nlList -- end of the ride, remember the newline
+    <|> endOfInput *> return [] -- nothing more to take at all
+  return (currentParagraph ++ followingParagraphs)
+  where
+    nextPar = (++) <$> nlList <*> dropFrontOfPara sp
+    nlList = "\n" *> return ["\n"]
+
+nonSpace :: BS.ByteString -> Parser BS.ByteString
+nonSpace xs
+  | not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line"
+  | otherwise = return xs
+
+-- | Takes a non-empty, not fully whitespace line.
+--
+--  Doesn't discard the trailing newline.
+takeNonEmptyLine :: Parser String
+takeNonEmptyLine = do
+    (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
+
+birdtracks :: Parser (DocH mod a)
+birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line
+  where
+    line = skipHorizontalSpace *> ">" *> takeLine
+
+stripSpace :: [String] -> [String]
+stripSpace = fromMaybe <*> mapM strip'
+  where
+    strip' (' ':xs') = Just xs'
+    strip' "" = Just ""
+    strip' _  = Nothing
+
+-- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).
+-- Consecutive examples are accepted.
+examples :: Parser (DocH mod a)
+examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)
+  where
+    go :: Parser [Example]
+    go = do
+      prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>"
+      expr <- takeLine
+      (rs, es) <- resultAndMoreExamples
+      return (makeExample prefix expr rs : es)
+      where
+        resultAndMoreExamples :: Parser ([String], [Example])
+        resultAndMoreExamples = moreExamples <|> result <|> pure ([], [])
+          where
+            moreExamples :: Parser ([String], [Example])
+            moreExamples = (,) [] <$> go
+
+            result :: Parser ([String], [Example])
+            result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples
+
+    makeExample :: String -> String -> [String] -> Example
+    makeExample prefix expression res =
+      Example (strip expression) result
+      where
+        result = map (substituteBlankLine . tryStripPrefix) res
+
+        tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs)
+
+        substituteBlankLine "<BLANKLINE>" = ""
+        substituteBlankLine xs = xs
+
+nonEmptyLine :: Parser String
+nonEmptyLine = mfilter (any (not . isSpace)) takeLine
+
+takeLine :: Parser String
+takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine
+
+endOfLine :: Parser ()
+endOfLine = void "\n" <|> endOfInput
+
+-- | Property parser.
+--
+-- >>> parseOnly property "prop> hello world"
+-- Right (DocProperty "hello world")
+property :: Parser (DocH mod a)
+property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
+
+-- |
+-- Paragraph level codeblock. Anything between the two delimiting @ is parsed
+-- for markup.
+codeblock :: Parser (DocH mod Identifier)
+codeblock =
+  DocCodeBlock . parseStringBS . dropSpaces
+  <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
+  where
+    dropSpaces xs =
+      let rs = decodeUtf8 xs
+      in case splitByNl rs of
+        [] -> xs
+        ys -> case last ys of
+          ' ':_ -> case mapM dropSpace ys of
+            Nothing -> xs
+            Just zs -> encodeUtf8 $ intercalate "\n" zs
+          _ -> xs
+
+    -- This is necessary because ‘lines’ swallows up a trailing newline
+    -- and we lose information about whether the last line belongs to @ or to
+    -- text which we need to decide whether we actually want to be dropping
+    -- anything at all.
+    splitByNl = unfoldr (\case '\n':s -> Just (span (/= '\n') s)
+                               _      -> Nothing)
+                . ('\n' :)
+
+    dropSpace "" = Just ""
+    dropSpace (' ':xs) = Just xs
+    dropSpace _ = Nothing
+
+    block' = scan False p
+      where
+        p isNewline c
+          | isNewline && c == '@' = Nothing
+          | isNewline && isSpace c = Just isNewline
+          | otherwise = Just $ c == '\n'
+
+hyperlink :: Parser (DocH mod a)
+hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8
+              <$> disallowNewline ("<" *> takeUntil ">")
+            <|> autoUrl
+
+autoUrl :: Parser (DocH mod a)
+autoUrl = mkLink <$> url
+  where
+    url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
+    mkLink :: BS.ByteString -> DocH mod a
+    mkLink s = case BS.unsnoc s of
+      Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x]
+      _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
+
+-- | Parses strings between identifier delimiters. Consumes all input that it
+-- deems to be valid in an identifier. Note that it simply blindly consumes
+-- characters and does no actual validation itself.
+parseValid :: Parser String
+parseValid = do
+  vs' <- many' $ utf8String "⋆" <|> return <$> idChar
+  let vs = concat vs'
+  c <- peekChar
+  case c of
+    Just '`' -> return vs
+    Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid)
+                 <|> return vs
+    _ -> fail "outofvalid"
+  where
+    idChar = satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:^")
+             <|> digit <|> letter_ascii
+
+-- | Parses UTF8 strings from ByteString streams.
+utf8String :: String -> Parser String
+utf8String x = decodeUtf8 <$> string (encodeUtf8 x)
+
+-- | Parses identifiers with help of 'parseValid'. Asks GHC for 'String' from the
+-- string it deems valid.
+identifier :: Parser (DocH mod Identifier)
+identifier = do
+  o <- idDelim
+  vid <- parseValid
+  e <- idDelim
+  return $ DocIdentifier (o, vid, e)
+  where
+    idDelim = char '\'' <|> char '`'
diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
new file mode 100644
index 00000000..25dba2d5
--- /dev/null
+++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
@@ -0,0 +1,61 @@
+-- |
+-- Module      :  Documentation.Haddock.Parser.Util
+-- Copyright   :  (c) Mateusz Kowalczyk 2013-2014,
+--                    Simon Hengel      2013
+-- License     :  BSD-like
+--
+-- Maintainer  :  haddock@projects.haskell.org
+-- Stability   :  experimental
+-- Portability :  portable
+--
+-- Various utility functions used by the parser.
+module Documentation.Haddock.Parser.Util where
+
+import           Control.Applicative
+import           Control.Monad (mfilter)
+import           Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine)
+import           Data.ByteString.Char8 (ByteString)
+import qualified Data.ByteString.Char8 as BS
+import           Prelude hiding (takeWhile)
+
+-- | Remove all leading and trailing whitespace
+strip :: String -> String
+strip = (\f -> f . f) $ dropWhile isSpace . reverse
+
+skipHorizontalSpace :: Parser ()
+skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r")
+
+takeHorizontalSpace :: Parser BS.ByteString
+takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r")
+
+makeLabeled :: (String -> Maybe String -> a) -> String -> a
+makeLabeled f input = case break isSpace $ removeEscapes $ strip input of
+  (uri, "")    -> f uri Nothing
+  (uri, label) -> f uri (Just $ dropWhile isSpace label)
+  where
+    -- As we don't parse these any further, we don't do any processing to the
+    -- string so we at least remove escape character here. Perhaps we should
+    -- actually be parsing the label at the very least?
+    removeEscapes "" = ""
+    removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs
+    removeEscapes ('\\':xs) = removeEscapes xs
+    removeEscapes (x:xs) = x : removeEscapes xs
+
+takeUntil :: ByteString -> Parser ByteString
+takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome
+  where
+    end = BS.unpack end_
+
+    p :: (Bool, String) -> Char -> Maybe (Bool, String)
+    p acc c = case acc of
+      (True, _) -> Just (False, end)
+      (_, []) -> Nothing
+      (_, x:xs) | x == c -> Just (False, xs)
+      _ -> Just (c == '\\', end)
+
+    dropEnd = BS.reverse . BS.drop (length end) . BS.reverse
+    requireEnd = mfilter (BS.isSuffixOf end_)
+
+    gotSome xs
+      | BS.null xs = fail "didn't get any content"
+      | otherwise = return xs
diff --git a/haddock-library/src/Documentation/Haddock/Types.hs b/haddock-library/src/Documentation/Haddock/Types.hs
new file mode 100644
index 00000000..3e25d230
--- /dev/null
+++ b/haddock-library/src/Documentation/Haddock/Types.hs
@@ -0,0 +1,70 @@
+{-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable #-}
+{-# LANGUAGE DeriveTraversable, StandaloneDeriving #-}
+
+-- |
+-- Module      :  Documentation.Haddock.Types
+-- Copyright   :  (c) Simon Marlow      2003-2006,
+--                    David Waern       2006-2009,
+--                    Mateusz Kowalczyk 2013
+-- License     :  BSD-like
+--
+-- Maintainer  :  haddock@projects.haskellorg
+-- Stability   :  experimental
+-- Portability :  portable
+--
+-- Exposes documentation data types used for (some) of Haddock.
+module Documentation.Haddock.Types where
+
+import Data.Foldable
+import Data.Traversable
+
+instance Foldable Header where
+  foldMap f (Header _ a) = f a
+
+instance Traversable Header where
+  traverse f (Header l a) = Header l `fmap` f a
+
+data Hyperlink = Hyperlink
+  { hyperlinkUrl   :: String
+  , hyperlinkLabel :: Maybe String
+  } deriving (Eq, Show)
+
+
+data Picture = Picture
+  { pictureUri   :: String
+  , pictureTitle :: Maybe String
+  } deriving (Eq, Show)
+
+data Header id = Header
+  { headerLevel :: Int
+  , headerTitle :: id
+  } deriving Functor
+
+data Example = Example
+  { exampleExpression :: String
+  , exampleResult     :: [String]
+  } deriving (Eq, Show)
+
+data DocH mod id
+  = DocEmpty
+  | DocAppend (DocH mod id) (DocH mod id)
+  | DocString String
+  | DocParagraph (DocH mod id)
+  | DocIdentifier id
+  | DocIdentifierUnchecked mod
+  | DocModule String
+  | DocWarning (DocH mod id)
+  | DocEmphasis (DocH mod id)
+  | DocMonospaced (DocH mod id)
+  | DocBold (DocH mod id)
+  | DocUnorderedList [DocH mod id]
+  | DocOrderedList [DocH mod id]
+  | DocDefList [(DocH mod id, DocH mod id)]
+  | DocCodeBlock (DocH mod id)
+  | DocHyperlink Hyperlink
+  | DocPic Picture
+  | DocAName String
+  | DocProperty String
+  | DocExamples [Example]
+  | DocHeader (Header (DocH mod id))
+  deriving (Functor, Foldable, Traversable)
diff --git a/src/Haddock/Utf8.hs b/haddock-library/src/Documentation/Haddock/Utf8.hs
index 1fb0e818..3f75e53b 100644
--- a/src/Haddock/Utf8.hs
+++ b/haddock-library/src/Documentation/Haddock/Utf8.hs
@@ -1,4 +1,4 @@
-module Haddock.Utf8 (encodeUtf8, decodeUtf8) where
+module Documentation.Haddock.Utf8 (encodeUtf8, decodeUtf8) where
 import           Data.Bits ((.|.), (.&.), shiftL, shiftR)
 import qualified Data.ByteString as BS
 import           Data.Char (chr, ord)
diff --git a/test/Haddock/Parser/UtilSpec.hs b/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs
index 9e1e8de1..265a4d49 100644
--- a/test/Haddock/Parser/UtilSpec.hs
+++ b/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs
@@ -1,11 +1,10 @@
 {-# LANGUAGE OverloadedStrings #-}
-module Haddock.Parser.UtilSpec (main, spec) where
+module Documentation.Haddock.Parser.UtilSpec (main, spec) where
 
-import           Test.Hspec
-import           Data.Either
-
-import           Data.Attoparsec.ByteString.Char8
-import           Haddock.Parser
+import Data.Attoparsec.ByteString.Char8
+import Data.Either
+import Documentation.Haddock.Parser.Util
+import Test.Hspec
 
 main :: IO ()
 main = hspec spec
diff --git a/test/Haddock/ParserSpec.hs b/haddock-library/test/Documentation/Haddock/ParserSpec.hs
index 53fef943..3889d555 100644
--- a/test/Haddock/ParserSpec.hs
+++ b/haddock-library/test/Documentation/Haddock/ParserSpec.hs
@@ -3,23 +3,16 @@
              , IncoherentInstances #-}
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 
-module Haddock.ParserSpec (main, spec) where
+module Documentation.Haddock.ParserSpec (main, spec) where
 
 import           Data.Monoid
 import           Data.String
-import qualified Haddock.Parser as Parse
-import           Haddock.Types
-import           Outputable (Outputable, showSDoc, ppr)
-import           RdrName (RdrName, mkVarUnqual)
-import           FastString (fsLit)
-import           StaticFlags (initStaticOpts)
+import qualified Documentation.Haddock.Parser as Parse
+import           Documentation.Haddock.Types
 import           Test.Hspec
 import           Test.QuickCheck
 
-import           Helper
-
-instance Outputable a => Show a where
-  show = showSDoc dynFlags . ppr
+type Doc id = DocH () id
 
 deriving instance Show a => Show (Header a)
 deriving instance Show a => Show (Doc a)
@@ -38,13 +31,11 @@ parseParas = Parse.toRegular . Parse.parseParas
 parseString :: String -> Doc String
 parseString = Parse.toRegular . Parse.parseString
 
-
-
 main :: IO ()
 main = hspec spec
 
 spec :: Spec
-spec = before initStaticOpts $ do
+spec = do
   describe "parseString" $ do
     let infix 1 `shouldParseTo`
         shouldParseTo :: String -> Doc String -> Expectation
diff --git a/test/Haddock/Utf8Spec.hs b/haddock-library/test/Documentation/Haddock/Utf8Spec.hs
index a352bf61..47e12704 100644
--- a/test/Haddock/Utf8Spec.hs
+++ b/haddock-library/test/Documentation/Haddock/Utf8Spec.hs
@@ -1,9 +1,8 @@
-module Haddock.Utf8Spec (main, spec) where
+module Documentation.Haddock.Utf8Spec (main, spec) where
 
-import           Test.Hspec
-import           Test.QuickCheck
-
-import           Haddock.Utf8
+import Test.Hspec
+import Test.QuickCheck
+import Documentation.Haddock.Utf8
 
 main :: IO ()
 main = hspec spec
diff --git a/test/Spec.hs b/haddock-library/test/Spec.hs
index a824f8c3..a824f8c3 100644
--- a/test/Spec.hs
+++ b/haddock-library/test/Spec.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs
index 41b4ed30..41b4ed30 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs
index d2f3761c..d2f3761c 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs
index 3bbe51f0..3bbe51f0 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs
index 73d02056..73d02056 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs
index b3699728..b3699728 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs
index cb9cee83..cb9cee83 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs
index 0572d682..0572d682 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs
index e47e5c9e..e47e5c9e 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs
index bf175f4b..bf175f4b 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs
diff --git a/haddock.cabal b/haddock.cabal
index 0e26949a..b308a022 100644
--- a/haddock.cabal
+++ b/haddock.cabal
@@ -84,19 +84,11 @@ executable haddock
       xhtml >= 3000.2 && < 3000.3,
       Cabal >= 1.10,
       ghc == 7.9.*,
-      bytestring
+      bytestring,
+      haddock-library
 
     other-modules:
       Documentation.Haddock
-      Data.Attoparsec
-      Data.Attoparsec.ByteString
-      Data.Attoparsec.ByteString.Char8
-      Data.Attoparsec.Combinator
-      Data.Attoparsec.Number
-      Data.Attoparsec.ByteString.FastSet
-      Data.Attoparsec.ByteString.Internal
-      Data.Attoparsec.Internal
-      Data.Attoparsec.Internal.Types
       Haddock
       Haddock.Interface
       Haddock.Interface.Rename
@@ -105,7 +97,6 @@ executable haddock
       Haddock.Interface.LexParseRn
       Haddock.Interface.ParseModuleHeader
       Haddock.Parser
-      Haddock.Parser.Util
       Haddock.Utf8
       Haddock.Utils
       Haddock.Backends.Xhtml
@@ -128,7 +119,7 @@ executable haddock
       Haddock.GhcUtils
       Haddock.Convert
   else
-    build-depends:  haddock
+    build-depends:  haddock, haddock-library
 
 library
   default-language:     Haskell2010
@@ -143,14 +134,15 @@ library
     array,
     xhtml >= 3000.2 && < 3000.3,
     Cabal >= 1.10,
-    ghc == 7.9.*
+    ghc == 7.9.*,
+    haddock-library
 
   if flag(in-ghc-tree)
     cpp-options: -DIN_GHC_TREE
   else
     build-depends: ghc-paths
 
-  hs-source-dirs:       src, vendor/attoparsec-0.10.4.0
+  hs-source-dirs:       src
   if flag(dev)
     ghc-options:          -funbox-strict-fields -Wall -fwarn-tabs
   else
@@ -160,15 +152,6 @@ library
     Documentation.Haddock
 
   other-modules:
-    Data.Attoparsec
-    Data.Attoparsec.ByteString
-    Data.Attoparsec.ByteString.Char8
-    Data.Attoparsec.Combinator
-    Data.Attoparsec.Number
-    Data.Attoparsec.ByteString.FastSet
-    Data.Attoparsec.ByteString.Internal
-    Data.Attoparsec.Internal
-    Data.Attoparsec.Internal.Types
     Haddock
     Haddock.Interface
     Haddock.Interface.Rename
@@ -177,8 +160,6 @@ library
     Haddock.Interface.LexParseRn
     Haddock.Interface.ParseModuleHeader
     Haddock.Parser
-    Haddock.Parser.Util
-    Haddock.Utf8
     Haddock.Utils
     Haddock.Backends.Xhtml
     Haddock.Backends.Xhtml.Decl
@@ -218,34 +199,6 @@ test-suite latex-test
   hs-source-dirs:   latex-test
   build-depends:    base, directory, process, filepath, Cabal
 
-test-suite spec
-  type:             exitcode-stdio-1.0
-  default-language: Haskell2010
-  main-is:          Spec.hs
-  hs-source-dirs:
-      test
-    , src
-    , vendor/attoparsec-0.10.4.0
-
-  other-modules:
-      Helper
-      Haddock.ParserSpec
-      Haddock.Utf8Spec
-      Haddock.Parser.UtilSpec
-
-  build-depends:
-      base
-    , bytestring
-    , ghc
-    , containers
-    , deepseq
-    , array
-    , hspec
-    , QuickCheck == 2.*
-
-  build-depends:
-      haddock
-
 source-repository head
   type:     git
   location: http://git.haskell.org/haddock.git
diff --git a/src/Documentation/Haddock.hs b/src/Documentation/Haddock.hs
index 36115a2a..655a9723 100644
--- a/src/Documentation/Haddock.hs
+++ b/src/Documentation/Haddock.hs
@@ -33,7 +33,8 @@ module Documentation.Haddock (
   InstHead,
 
   -- * Documentation comments
-  Doc(..),
+  Doc,
+  DocH(..),
   Example(..),
   Hyperlink(..),
   DocMarkup(..),
diff --git a/src/Haddock.hs b/src/Haddock.hs
index 66dfb168..78844c96 100644
--- a/src/Haddock.hs
+++ b/src/Haddock.hs
@@ -25,7 +25,7 @@ import Haddock.Backends.Xhtml.Themes (getThemes)
 import Haddock.Backends.LaTeX
 import Haddock.Backends.Hoogle
 import Haddock.Interface
-import Haddock.Parser.Util
+import Haddock.Parser
 import Haddock.Types
 import Haddock.Version
 import Haddock.InterfaceFile
diff --git a/src/Haddock/Doc.hs b/src/Haddock/Doc.hs
index d812aee2..79a59ac2 100644
--- a/src/Haddock/Doc.hs
+++ b/src/Haddock/Doc.hs
@@ -1,55 +1,18 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
-module Haddock.Doc (
-  docAppend
-, docParagraph
-, combineDocumentation
-) where
+module Haddock.Doc ( module Documentation.Haddock.Doc
+                   , docCodeBlock
+                   , combineDocumentation
+                   ) where
 
 import Data.Maybe
 import Data.Monoid
+import Documentation.Haddock.Doc
 import Haddock.Types
-import Data.Char (isSpace)
-
--- We put it here so that we can avoid a circular import
--- anything relevant imports this module anyway
-instance Monoid (DocH mod id) where
-  mempty  = DocEmpty
-  mappend = docAppend
 
 combineDocumentation :: Documentation name -> Maybe (Doc name)
 combineDocumentation (Documentation Nothing Nothing) = Nothing
-combineDocumentation (Documentation mDoc mWarning)   = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc)
-
-docAppend :: DocH mod id -> DocH mod id -> DocH mod id
-docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2)
-docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d
-docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 ++ ds2)
-docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 ++ ds2)
-docAppend DocEmpty d = d
-docAppend d DocEmpty = d
-docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2)
-docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 ++ s2))
-docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 ++ s2)) d
-docAppend d1 d2 = DocAppend d1 d2
-
--- again to make parsing easier - we spot a paragraph whose only item
--- is a DocMonospaced and make it into a DocCodeBlock
-docParagraph :: DocH mod id -> DocH mod id
-docParagraph (DocMonospaced p)
-  = DocCodeBlock (docCodeBlock p)
-docParagraph (DocAppend (DocString s1) (DocMonospaced p))
-  | all isSpace s1
-  = DocCodeBlock (docCodeBlock p)
-docParagraph (DocAppend (DocString s1)
-    (DocAppend (DocMonospaced p) (DocString s2)))
-  | all isSpace s1 && all isSpace s2
-  = DocCodeBlock (docCodeBlock p)
-docParagraph (DocAppend (DocMonospaced p) (DocString s2))
-  | all isSpace s2
-  = DocCodeBlock (docCodeBlock p)
-docParagraph p
-  = DocParagraph p
-
+combineDocumentation (Documentation mDoc mWarning)   =
+  Just (fromMaybe mempty mWarning <> fromMaybe mempty mDoc)
 
 -- Drop trailing whitespace from @..@ code blocks.  Otherwise this:
 --
diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs
index 01276310..d9d4ae58 100644
--- a/src/Haddock/Interface/LexParseRn.hs
+++ b/src/Haddock/Interface/LexParseRn.hs
@@ -18,31 +18,29 @@ module Haddock.Interface.LexParseRn
   , processModuleHeader
   ) where
 
-import qualified Data.IntSet as IS
-import Haddock.Types
-import Haddock.Parser.Util
-import Haddock.Interface.ParseModuleHeader
-import Haddock.Doc
-
 import Control.Applicative
+import Data.IntSet (toList)
 import Data.List
 import Data.Maybe
+import Data.Monoid ((<>))
+import DynFlags (ExtensionFlag(..), languageExtensions)
 import FastString
 import GHC
-import DynFlags (ExtensionFlag(..), languageExtensions)
+import Haddock.Interface.ParseModuleHeader
+import Haddock.Parser
+import Haddock.Types
 import Name
-import Outputable
+import Outputable (showPpr)
 import RdrName
 
 processDocStrings :: DynFlags -> GlobalRdrEnv -> [HsDocString] -> ErrMsgM (Maybe (Doc Name))
 processDocStrings dflags gre strs = do
   docs <- catMaybes <$> mapM (processDocStringParas dflags gre) strs
-  let doc = foldl' docAppend DocEmpty docs
+  let doc = foldl' (<>) DocEmpty docs
   case doc of
     DocEmpty -> return Nothing
     _ -> return (Just doc)
 
-
 processDocStringParas :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name))
 processDocStringParas = process parseParasMaybe
 
@@ -86,7 +84,7 @@ processModuleHeader dflags gre safety mayStr = do
 
   let flags :: [ExtensionFlag]
       -- We remove the flags implied by the language setting and we display the language instead
-      flags = map toEnum (IS.toList $ extensionFlags dflags) \\ languageExtensions (language dflags)
+      flags = map toEnum (toList $ extensionFlags dflags) \\ languageExtensions (language dflags)
   return (hmi { hmi_safety = Just $ showPpr dflags safety
               , hmi_language = language dflags
               , hmi_extensions = flags
diff --git a/src/Haddock/Interface/ParseModuleHeader.hs b/src/Haddock/Interface/ParseModuleHeader.hs
index c155a83b..0be2511f 100644
--- a/src/Haddock/Interface/ParseModuleHeader.hs
+++ b/src/Haddock/Interface/ParseModuleHeader.hs
@@ -12,7 +12,7 @@
 module Haddock.Interface.ParseModuleHeader (parseModuleHeader) where
 
 import Haddock.Types
-import Haddock.Parser.Util
+import Haddock.Parser
 
 import RdrName
 import DynFlags
diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs
index 1489ae84..720f442b 100644
--- a/src/Haddock/Parser.hs
+++ b/src/Haddock/Parser.hs
@@ -13,504 +13,35 @@
 -- Stability   :  experimental
 -- Portability :  portable
 
-module Haddock.Parser ( parseString, parseParas
-                      , overIdentifier, toRegular
-                      , takeUntil
+module Haddock.Parser ( module Documentation.Haddock.Parser
+                      , parseParasMaybe
+                      , parseStringMaybe
+                      , parseIdent
                       ) where
 
-import           Prelude hiding (takeWhile)
-import           Control.Arrow (first)
-import           Control.Monad (void, mfilter)
-import           Control.Applicative
-import           Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine)
-import qualified Data.ByteString.Char8 as BS
-import           Data.Char (chr, isAsciiUpper)
-import           Data.List (stripPrefix, intercalate, unfoldr)
-import           Data.Maybe (fromMaybe)
-import           Data.Monoid
-import           Haddock.Doc
-import           Haddock.Types hiding (Doc)
-import           Haddock.Utf8
-import           Data.ByteString.Char8 (ByteString)
-
--- | Identifier string surrounded with opening and closing quotes/backticks.
-type IString = (Char, String, Char)
-
--- | Drops the quotes/backticks around all identifiers, as if they
--- were valid but still 'String's.
-toRegular :: DocH mod IString -> DocH mod String
-toRegular = fmap (\(_, x, _) -> x)
-
--- | Maps over 'DocIdentifier's over 'String' with potentially failing
--- conversion using user-supplied function. If the conversion fails,
--- the identifier is deemed to not be valid and is treated as a
--- regular string.
-overIdentifier :: (String -> Maybe a)
-               -> DocH mod IString
-               -> DocH mod a
-overIdentifier f d = g d
-  where
-    g (DocIdentifier (o, x, e)) = case f x of
-      Nothing -> DocString $ o : x ++ [e]
-      Just x' -> DocIdentifier x'
-    g DocEmpty = DocEmpty
-    g (DocAppend x x') = DocAppend (g x) (g x')
-    g (DocString x) = DocString x
-    g (DocParagraph x) = DocParagraph $ g x
-    g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x
-    g (DocModule x) = DocModule x
-    g (DocWarning x) = DocWarning $ g x
-    g (DocEmphasis x) = DocEmphasis $ g x
-    g (DocMonospaced x) = DocMonospaced $ g x
-    g (DocBold x) = DocBold $ g x
-    g (DocUnorderedList x) = DocUnorderedList $ fmap g x
-    g (DocOrderedList x) = DocOrderedList $ fmap g x
-    g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x
-    g (DocCodeBlock x) = DocCodeBlock $ g x
-    g (DocHyperlink x) = DocHyperlink x
-    g (DocPic x) = DocPic x
-    g (DocAName x) = DocAName x
-    g (DocProperty x) = DocProperty x
-    g (DocExamples x) = DocExamples x
-    g (DocHeader (Header l x)) = DocHeader . Header l $ g x
-
-parse :: Parser a -> BS.ByteString -> a
-parse p = either err id . parseOnly (p <* endOfInput)
-  where
-    err = error . ("Haddock.Parser.parse: " ++)
-
--- | Main entry point to the parser. Appends the newline character
--- to the input string.
-parseParas :: String -- ^ String to parse
-           -> DocH mod IString
-parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
-  where
-    p :: Parser (DocH mod IString)
-    p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
-
--- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which
--- drops leading whitespace and encodes the string to UTF8 first.
-parseString :: String -> DocH mod IString
-parseString = parseStringBS . encodeUtf8 . dropWhile isSpace
-
-parseStringBS :: BS.ByteString -> DocH mod IString
-parseStringBS = parse p
-  where
-    p :: Parser (DocH mod IString)
-    p = mconcat <$> many (monospace <|> anchor <|> identifier
-                          <|> moduleName <|> picture <|> hyperlink <|> autoUrl <|> bold
-                          <|> emphasis <|> encodedChar <|> string' <|> skipSpecialChar)
-
--- | Parses and processes
--- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
---
--- >>> parseOnly encodedChar "&#65;&#66;&#67;"
--- Right (DocString "ABC")
-encodedChar :: Parser (DocH mod a)
-encodedChar = "&#" *> c <* ";"
-  where
-    c = DocString . return . chr <$> num
-    num = hex <|> decimal
-    hex = ("x" <|> "X") *> hexadecimal
-
-specialChar :: [Char]
-specialChar = "_/<@\"&'`"
-
--- | Plain, regular parser for text. Called as one of the last parsers
--- to ensure that we have already given a chance to more meaningful parsers
--- before capturing their characers.
-string' :: Parser (DocH mod a)
-string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar)
-  where
-    unescape "" = ""
-    unescape ('\\':x:xs) = x : unescape xs
-    unescape (x:xs) = x : unescape xs
-
--- | Skips a single special character and treats it as a plain string.
--- This is done to skip over any special characters belonging to other
--- elements but which were not deemed meaningful at their positions.
-skipSpecialChar :: Parser (DocH mod a)
-skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
-
--- | Emphasis parser.
---
--- >>> parseOnly emphasis "/Hello world/"
--- Right (DocEmphasis (DocString "Hello world"))
-emphasis :: Parser (DocH mod IString)
-emphasis = DocEmphasis . parseStringBS <$>
-  mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
-
--- | Bold parser.
---
--- >>> parseOnly bold "__Hello world__"
--- Right (DocBold (DocString "Hello world"))
-bold :: Parser (DocH mod IString)
-bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__")
-
-disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString
-disallowNewline = mfilter ('\n' `BS.notElem`)
-
--- | Like `takeWhile`, but unconditionally take escaped characters.
-takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString
-takeWhile_ p = scan False p_
-  where
-    p_ escaped c
-      | escaped = Just False
-      | not $ p c = Nothing
-      | otherwise = Just (c == '\\')
-
--- | Like `takeWhile1`, but unconditionally take escaped characters.
-takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString
-takeWhile1_ = mfilter (not . BS.null) . takeWhile_
-
--- | Text anchors to allow for jumping around the generated documentation.
---
--- >>> parseOnly anchor "#Hello world#"
--- Right (DocAName "Hello world")
-anchor :: Parser (DocH mod a)
-anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#")
-
--- | Monospaced strings.
---
--- >>> parseOnly monospace "@cruel@"
--- Right (DocMonospaced (DocString "cruel"))
-monospace :: Parser (DocH mod IString)
-monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@")
-
-moduleName :: Parser (DocH mod a)
-moduleName = DocModule <$> (char '"' *> modid <* char '"')
-  where
-    modid = intercalate "." <$> conid `sepBy1` "."
-    conid = (:)
-      <$> satisfy isAsciiUpper
-      -- NOTE: According to Haskell 2010 we shouldd actually only
-      -- accept {small | large | digit | ' } here.  But as we can't
-      -- match on unicode characters, this is currently not possible.
-      <*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n"))
-
--- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
--- a title for the picture.
---
--- >>> parseOnly picture "<<hello.png>>"
--- Right (DocPic (Picture "hello.png" Nothing))
--- >>> parseOnly picture "<<hello.png world>>"
--- Right (DocPic (Picture "hello.png" (Just "world")))
-picture :: Parser (DocH mod a)
-picture = DocPic . makeLabeled Picture . decodeUtf8
-          <$> disallowNewline ("<<" *> takeUntil ">>")
-
--- | Paragraph parser, called by 'parseParas'.
-paragraph :: Parser (DocH mod IString)
-paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock
-                                       <|> property <|> header
-                                       <|> textParagraph)
-
-header :: Parser (DocH mod IString)
-header = do
-  let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1]
-      pser = foldl1 (<|>) psers
-  delim <- decodeUtf8 <$> pser
-  line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString
-  rest <- paragraph <|> return mempty
-  return $ docAppend (DocParagraph (DocHeader (Header (length delim) line))) rest
-
-textParagraph :: Parser (DocH mod IString)
-textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine
-
--- | List parser, called by 'paragraph'.
-list :: Parser (DocH mod IString)
-list = DocUnorderedList <$> unorderedList
-       <|> DocOrderedList <$> orderedList
-       <|> DocDefList <$> definitionList
-
--- | Parses unordered (bullet) lists.
-unorderedList :: Parser [DocH mod IString]
-unorderedList = ("*" <|> "-") *> innerList unorderedList
-
--- | Parses ordered lists (numbered or dashed).
-orderedList :: Parser [DocH mod IString]
-orderedList = (paren <|> dot) *> innerList orderedList
-  where
-    dot = (decimal :: Parser Int) <* "."
-    paren = "(" *> decimal <* ")"
-
--- | Generic function collecting any further lines belonging to the
--- list entry and recursively collecting any further lists in the
--- same paragraph. Usually used as
---
--- > someListFunction = listBeginning *> innerList someListFunction
-innerList :: Parser [DocH mod IString] -> Parser [DocH mod IString]
-innerList item = do
-  c <- takeLine
-  (cs, items) <- more item
-  let contents = docParagraph . parseString . dropNLs . unlines $ c : cs
-  return $ case items of
-    Left p -> [contents `joinPara` p]
-    Right i -> contents : i
-
--- | Parses definition lists.
-definitionList :: Parser [(DocH mod IString, DocH mod IString)]
-definitionList = do
-  label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` "]\n")) <* "]"
-  c <- takeLine
-  (cs, items) <- more definitionList
-  let contents = parseString . dropNLs . unlines $ c : cs
-  return $ case items of
-    Left p -> [(label, contents `joinPara` p)]
-    Right i -> (label, contents) : i
-
--- | If possible, appends two 'Doc's under a 'DocParagraph' rather than
--- outside of it. This allows to get structures like
---
--- @DocParagraph (DocAppend … …)@
---
--- rather than
---
--- @DocAppend (DocParagraph …) …@
-joinPara :: DocH mod id -> DocH mod id -> DocH mod id
-joinPara (DocParagraph p) c = docParagraph $ docAppend p c
-joinPara d p = docAppend d p
-
--- | Drops all trailing newlines.
-dropNLs :: String -> String
-dropNLs = reverse . dropWhile (== '\n') . reverse
-
--- | Main worker for 'innerList' and 'definitionList'.
--- We need the 'Either' here to be able to tell in the respective functions
--- whether we're dealing with the next list or a nested paragraph.
-more :: Monoid a => Parser a
-     -> Parser ([String], Either (DocH mod IString) a)
-more item = innerParagraphs <|> moreListItems item
-            <|> moreContent item <|> pure ([], Right mempty)
-
--- | Use by 'innerList' and 'definitionList' to parse any nested paragraphs.
-innerParagraphs :: Parser ([String], Either (DocH mod IString) a)
-innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs)
-
--- | Attemps to fetch the next list if possibly. Used by 'innerList' and
--- 'definitionList' to recursivly grab lists that aren't separated by a whole
--- paragraph.
-moreListItems :: Parser a
-              -> Parser ([String], Either (DocH mod IString) a)
-moreListItems item = (,) [] . Right <$> (skipSpace *> item)
-
--- | Helper for 'innerList' and 'definitionList' which simply takes
--- a line of text and attempts to parse more list content with 'more'.
-moreContent :: Monoid a => Parser a
-            -> Parser ([String], Either (DocH mod IString) a)
-moreContent item = first . (:) <$> nonEmptyLine <*> more item
-
--- | Runs the 'parseParas' parser on an indented paragraph.
--- The indentation is 4 spaces.
-indentedParagraphs :: Parser (DocH mod IString)
-indentedParagraphs = parseParas . concat <$> dropFrontOfPara "    "
-
--- | Grab as many fully indented paragraphs as we can.
-dropFrontOfPara :: Parser BS.ByteString -> Parser [String]
-dropFrontOfPara sp = do
-  currentParagraph <- some (sp *> takeNonEmptyLine)
-  followingParagraphs <-
-    skipHorizontalSpace *> nextPar -- we have more paragraphs to take
-    <|> skipHorizontalSpace *> nlList -- end of the ride, remember the newline
-    <|> endOfInput *> return [] -- nothing more to take at all
-  return (currentParagraph ++ followingParagraphs)
-  where
-    nextPar = (++) <$> nlList <*> dropFrontOfPara sp
-    nlList = "\n" *> return ["\n"]
-
-nonSpace :: BS.ByteString -> Parser BS.ByteString
-nonSpace xs
-  | not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line"
-  | otherwise = return xs
-
--- | Takes a non-empty, not fully whitespace line.
---
---  Doesn't discard the trailing newline.
-takeNonEmptyLine :: Parser String
-takeNonEmptyLine = do
-    (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
-
-birdtracks :: Parser (DocH mod a)
-birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line
-  where
-    line = skipHorizontalSpace *> ">" *> takeLine
-
-stripSpace :: [String] -> [String]
-stripSpace = fromMaybe <*> mapM strip'
-  where
-    strip' (' ':xs') = Just xs'
-    strip' "" = Just ""
-    strip' _  = Nothing
-
--- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).
--- Consecutive examples are accepted.
-examples :: Parser (DocH mod a)
-examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)
-  where
-    go :: Parser [Example]
-    go = do
-      prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>"
-      expr <- takeLine
-      (rs, es) <- resultAndMoreExamples
-      return (makeExample prefix expr rs : es)
-      where
-        resultAndMoreExamples :: Parser ([String], [Example])
-        resultAndMoreExamples = moreExamples <|> result <|> pure ([], [])
-          where
-            moreExamples :: Parser ([String], [Example])
-            moreExamples = (,) [] <$> go
-
-            result :: Parser ([String], [Example])
-            result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples
-
-    makeExample :: String -> String -> [String] -> Example
-    makeExample prefix expression res =
-      Example (strip expression) result
-      where
-        result = map (substituteBlankLine . tryStripPrefix) res
-
-        tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs)
-
-        substituteBlankLine "<BLANKLINE>" = ""
-        substituteBlankLine xs = xs
-
-nonEmptyLine :: Parser String
-nonEmptyLine = mfilter (any (not . isSpace)) takeLine
-
-takeLine :: Parser String
-takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine
-
-endOfLine :: Parser ()
-endOfLine = void "\n" <|> endOfInput
-
--- | Property parser.
---
--- >>> parseOnly property "prop> hello world"
--- Right (DocProperty "hello world")
-property :: Parser (DocH mod a)
-property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
-
--- |
--- Paragraph level codeblock. Anything between the two delimiting @ is parsed
--- for markup.
-codeblock :: Parser (DocH mod IString)
-codeblock =
-  DocCodeBlock . parseStringBS . dropSpaces
-  <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
-  where
-    dropSpaces xs =
-      let rs = decodeUtf8 xs
-      in case splitByNl rs of
-        [] -> xs
-        ys -> case last ys of
-          ' ':_ -> case mapM dropSpace ys of
-            Nothing -> xs
-            Just zs -> encodeUtf8 $ intercalate "\n" zs
-          _ -> xs
-
-    -- This is necessary because ‘lines’ swallows up a trailing newline
-    -- and we lose information about whether the last line belongs to @ or to
-    -- text which we need to decide whether we actually want to be dropping
-    -- anything at all.
-    splitByNl = unfoldr (\case '\n':s -> Just (span (/= '\n') s)
-                               _      -> Nothing)
-                . ('\n' :)
-
-    dropSpace "" = Just ""
-    dropSpace (' ':xs) = Just xs
-    dropSpace _ = Nothing
-
-    block' = scan False p
-      where
-        p isNewline c
-          | isNewline && c == '@' = Nothing
-          | isNewline && isSpace c = Just isNewline
-          | otherwise = Just $ c == '\n'
-
-hyperlink :: Parser (DocH mod a)
-hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8
-              <$> disallowNewline ("<" *> takeUntil ">")
-            <|> autoUrl
-
-autoUrl :: Parser (DocH mod a)
-autoUrl = mkLink <$> url
-  where
-    url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
-    mkLink :: BS.ByteString -> DocH mod a
-    mkLink s = case BS.unsnoc s of
-      Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x]
-      _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
-
--- | Parses strings between identifier delimiters. Consumes all input that it
--- deems to be valid in an identifier. Note that it simply blindly consumes
--- characters and does no actual validation itself.
-parseValid :: Parser String
-parseValid = do
-  vs' <- many' $ utf8String "⋆" <|> return <$> idChar
-  let vs = concat vs'
-  c <- peekChar
-  case c of
-    Just '`' -> return vs
-    Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid)
-                 <|> return vs
-    _ -> fail "outofvalid"
-  where
-    idChar = satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:^")
-             <|> digit <|> letter_ascii
-
--- | Parses UTF8 strings from ByteString streams.
-utf8String :: String -> Parser String
-utf8String x = decodeUtf8 <$> string (encodeUtf8 x)
-
--- | Parses identifiers with help of 'parseValid'. Asks GHC for 'String' from the
--- string it deems valid.
-identifier :: Parser (DocH mod IString)
-identifier = do
-  o <- idDelim
-  vid <- parseValid
-  e <- idDelim
-  return $ DocIdentifier (o, vid, e)
-  where
-    idDelim = char '\'' <|> char '`'
-
--- | Remove all leading and trailing whitespace
-strip :: String -> String
-strip = (\f -> f . f) $ dropWhile isSpace . reverse
-
-skipHorizontalSpace :: Parser ()
-skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r")
-
-takeHorizontalSpace :: Parser BS.ByteString
-takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r")
-
-makeLabeled :: (String -> Maybe String -> a) -> String -> a
-makeLabeled f input = case break isSpace $ removeEscapes $ strip input of
-  (uri, "")    -> f uri Nothing
-  (uri, label) -> f uri (Just $ dropWhile isSpace label)
-  where
-    -- As we don't parse these any further, we don't do any processing to the
-    -- string so we at least remove escape character here. Perhaps we should
-    -- actually be parsing the label at the very least?
-    removeEscapes "" = ""
-    removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs
-    removeEscapes ('\\':xs) = removeEscapes xs
-    removeEscapes (x:xs) = x : removeEscapes xs
-
-takeUntil :: ByteString -> Parser ByteString
-takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome
-  where
-    end = BS.unpack end_
-
-    p :: (Bool, String) -> Char -> Maybe (Bool, String)
-    p acc c = case acc of
-      (True, _) -> Just (False, end)
-      (_, []) -> Nothing
-      (_, x:xs) | x == c -> Just (False, xs)
-      _ -> Just (c == '\\', end)
-
-    dropEnd = BS.reverse . BS.drop (length end) . BS.reverse
-    requireEnd = mfilter (BS.isSuffixOf end_)
-
-    gotSome xs
-      | BS.null xs = fail "didn't get any content"
-      | otherwise = return xs
+import Documentation.Haddock.Parser
+import DynFlags (DynFlags)
+import FastString (mkFastString)
+import Documentation.Haddock.Types
+import Lexer (mkPState, unP, ParseResult(POk))
+import Parser (parseIdentifier)
+import RdrName (RdrName)
+import SrcLoc (mkRealSrcLoc, unLoc)
+import StringBuffer (stringToStringBuffer)
+
+{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-}
+parseParasMaybe :: DynFlags -> String -> Maybe (DocH mod RdrName)
+parseParasMaybe d = Just . overIdentifier (parseIdent d) . parseParas
+
+{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-}
+parseStringMaybe :: DynFlags -> String -> Maybe (DocH mod RdrName)
+parseStringMaybe d = Just . overIdentifier (parseIdent d) . parseString
+
+parseIdent :: DynFlags -> String -> Maybe RdrName
+parseIdent dflags str0 =
+  let buffer = stringToStringBuffer str0
+      realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
+      pstate = mkPState dflags buffer realSrcLc
+  in case unP parseIdentifier pstate of
+    POk _ name -> Just (unLoc name)
+    _ -> Nothing
diff --git a/src/Haddock/Parser/Util.hs b/src/Haddock/Parser/Util.hs
deleted file mode 100644
index 29da91c0..00000000
--- a/src/Haddock/Parser/Util.hs
+++ /dev/null
@@ -1,28 +0,0 @@
-module Haddock.Parser.Util where
-
-import DynFlags (DynFlags)
-import FastString (mkFastString)
-import Haddock.Types
-import Haddock.Parser
-import Lexer (mkPState, unP, ParseResult(POk))
-import Parser (parseIdentifier)
-import RdrName (RdrName)
-import SrcLoc (mkRealSrcLoc, unLoc)
-import StringBuffer (stringToStringBuffer)
-
-{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-}
-parseParasMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
-parseParasMaybe d = Just . overIdentifier (parseIdent d) . parseParas
-
-{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-}
-parseStringMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
-parseStringMaybe d = Just . overIdentifier (parseIdent d) . parseString
-
-parseIdent :: DynFlags -> String -> Maybe RdrName
-parseIdent dflags str0 =
-  let buffer = stringToStringBuffer str0
-      realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
-      pstate = mkPState dflags buffer realSrcLc
-  in case unP parseIdentifier pstate of
-    POk _ name -> Just (unLoc name)
-    _ -> Nothing
diff --git a/src/Haddock/Types.hs b/src/Haddock/Types.hs
index cd615bf4..85b3a592 100644
--- a/src/Haddock/Types.hs
+++ b/src/Haddock/Types.hs
@@ -19,16 +19,16 @@ module Haddock.Types (
   module Haddock.Types
   , HsDocString, LHsDocString
   , Fixity(..)
+  , module Documentation.Haddock.Types
  ) where
 
-import Data.Foldable
-import Data.Traversable
 import Control.Exception
 import Control.Arrow hiding ((<+>))
 import Control.DeepSeq
 import Data.Typeable
 import Data.Map (Map)
 import qualified Data.Map as Map
+import Documentation.Haddock.Types
 import BasicTypes (Fixity(..))
 import GHC hiding (NoLink)
 import DynFlags (ExtensionFlag, Language)
@@ -316,36 +316,6 @@ type LDoc id = Located (Doc id)
 
 type Doc id = DocH (ModuleName, OccName) id
 
-data DocH mod id
-  = DocEmpty
-  | DocAppend (DocH mod id) (DocH mod id)
-  | DocString String
-  | DocParagraph (DocH mod id)
-  | DocIdentifier id
-  | DocIdentifierUnchecked mod
-  | DocModule String
-  | DocWarning (DocH mod id)
-  | DocEmphasis (DocH mod id)
-  | DocMonospaced (DocH mod id)
-  | DocBold (DocH mod id)
-  | DocUnorderedList [DocH mod id]
-  | DocOrderedList [DocH mod id]
-  | DocDefList [(DocH mod id, DocH mod id)]
-  | DocCodeBlock (DocH mod id)
-  | DocHyperlink Hyperlink
-  | DocPic Picture
-  | DocAName String
-  | DocProperty String
-  | DocExamples [Example]
-  | DocHeader (Header (DocH mod id))
-  deriving (Functor, Foldable, Traversable)
-
-instance Foldable Header where
-  foldMap f (Header _ a) = f a
-
-instance Traversable Header where
-  traverse f (Header l a) = Header l `fmap` f a
-
 instance (NFData a, NFData mod)
          => NFData (DocH mod a) where
   rnf doc = case doc of
@@ -376,23 +346,6 @@ instance NFData Name
 instance NFData OccName
 instance NFData ModuleName
 
-
-data Hyperlink = Hyperlink
-  { hyperlinkUrl   :: String
-  , hyperlinkLabel :: Maybe String
-  } deriving (Eq, Show)
-
-
-data Picture = Picture
-  { pictureUri   :: String
-  , pictureTitle :: Maybe String
-  } deriving (Eq, Show)
-
-data Header id = Header
-  { headerLevel :: Int
-  , headerTitle :: id
-  } deriving Functor
-
 instance NFData id => NFData (Header id) where
   rnf (Header a b) = a `deepseq` b `deepseq` ()
 
@@ -402,13 +355,6 @@ instance NFData Hyperlink where
 instance NFData Picture where
   rnf (Picture a b) = a `deepseq` b `deepseq` ()
 
-
-data Example = Example
-  { exampleExpression :: String
-  , exampleResult     :: [String]
-  } deriving (Eq, Show)
-
-
 instance NFData Example where
   rnf (Example a b) = a `deepseq` b `deepseq` ()
 
diff --git a/test/Helper.hs b/test/Helper.hs
deleted file mode 100644
index f0a3c05f..00000000
--- a/test/Helper.hs
+++ /dev/null
@@ -1,186 +0,0 @@
-module Helper where
-import           DynFlags (Settings(..), DynFlags, defaultDynFlags)
-import           Platform
-import           PlatformConstants
-
-dynFlags :: DynFlags
-dynFlags = defaultDynFlags settings
-  where
-    settings = Settings {
-        sTargetPlatform = platform
-      , sGhcUsagePath = error "Haddock.ParserSpec.sGhcUsagePath"
-      , sGhciUsagePath = error "Haddock.ParserSpec.sGhciUsagePath"
-      , sTopDir = error "Haddock.ParserSpec.sTopDir"
-      , sTmpDir = error "Haddock.ParserSpec.sTmpDir"
-      , sRawSettings = []
-      , sExtraGccViaCFlags = error "Haddock.ParserSpec.sExtraGccViaCFlags"
-      , sSystemPackageConfig = error "Haddock.ParserSpec.sSystemPackageConfig"
-      , sPgm_L = error "Haddock.ParserSpec.sPgm_L"
-      , sPgm_P = error "Haddock.ParserSpec.sPgm_P"
-      , sPgm_F = error "Haddock.ParserSpec.sPgm_F"
-      , sPgm_c = error "Haddock.ParserSpec.sPgm_c"
-      , sPgm_s = error "Haddock.ParserSpec.sPgm_s"
-      , sPgm_a = error "Haddock.ParserSpec.sPgm_a"
-      , sPgm_l = error "Haddock.ParserSpec.sPgm_l"
-      , sPgm_dll = error "Haddock.ParserSpec.sPgm_dll"
-      , sPgm_T = error "Haddock.ParserSpec.sPgm_T"
-      , sPgm_sysman = error "Haddock.ParserSpec.sPgm_sysman"
-      , sPgm_windres = error "Haddock.ParserSpec.sPgm_windres"
-      , sPgm_libtool = error "Haddock.ParserSpec.sPgm_libtool"
-      , sPgm_lo = error "Haddock.ParserSpec.sPgm_lo"
-      , sPgm_lc = error "Haddock.ParserSpec.sPgm_lc"
-      , sOpt_L = []
-      , sOpt_P = []
-      , sOpt_F = []
-      , sOpt_c = []
-      , sOpt_a = []
-      , sOpt_l = []
-      , sOpt_windres = []
-      , sOpt_lo = []
-      , sOpt_lc = []
-      , sLdSupportsCompactUnwind = error "Haddock.ParserSpec.sLdSupportsCompactUnwind"
-      , sLdSupportsBuildId  = error "Haddock.ParserSpec.sLdSupportsBuildId "
-      , sLdSupportsFilelist  = error "Haddock.ParserSpec.sLdSupportsFilelist "
-      , sLdIsGnuLd = error "Haddock.ParserSpec.sLdIsGnuLd"
-      , sPlatformConstants = platformConstants
-      }
-    platform = Platform {
-        platformArch = ArchUnknown
-      , platformOS = OSUnknown
-      , platformWordSize = 64
-      , platformHasGnuNonexecStack = False
-      , platformHasIdentDirective = False
-      , platformHasSubsectionsViaSymbols = False
-      , platformUnregisterised = error "Haddock.ParserSpec.platformUnregisterised"
-      }
-
-    platformConstants = PlatformConstants {
-        pc_platformConstants = ()
-      , pc_STD_HDR_SIZE = 0
-      , pc_PROF_HDR_SIZE = 0
-      , pc_BLOCK_SIZE = 0
-      , pc_BLOCKS_PER_MBLOCK = 0
-      , pc_OFFSET_StgRegTable_rR1 = 0
-      , pc_OFFSET_StgRegTable_rR2 = 0
-      , pc_OFFSET_StgRegTable_rR3 = 0
-      , pc_OFFSET_StgRegTable_rR4 = 0
-      , pc_OFFSET_StgRegTable_rR5 = 0
-      , pc_OFFSET_StgRegTable_rR6 = 0
-      , pc_OFFSET_StgRegTable_rR7 = 0
-      , pc_OFFSET_StgRegTable_rR8 = 0
-      , pc_OFFSET_StgRegTable_rR9 = 0
-      , pc_OFFSET_StgRegTable_rR10 = 0
-      , pc_OFFSET_StgRegTable_rF1 = 0
-      , pc_OFFSET_StgRegTable_rF2 = 0
-      , pc_OFFSET_StgRegTable_rF3 = 0
-      , pc_OFFSET_StgRegTable_rF4 = 0
-      , pc_OFFSET_StgRegTable_rF5 = 0
-      , pc_OFFSET_StgRegTable_rF6 = 0
-      , pc_OFFSET_StgRegTable_rD1 = 0
-      , pc_OFFSET_StgRegTable_rD2 = 0
-      , pc_OFFSET_StgRegTable_rD3 = 0
-      , pc_OFFSET_StgRegTable_rD4 = 0
-      , pc_OFFSET_StgRegTable_rD5 = 0
-      , pc_OFFSET_StgRegTable_rD6 = 0
-      , pc_OFFSET_StgRegTable_rXMM1 = 0
-      , pc_OFFSET_StgRegTable_rXMM2 = 0
-      , pc_OFFSET_StgRegTable_rXMM3 = 0
-      , pc_OFFSET_StgRegTable_rXMM4 = 0
-      , pc_OFFSET_StgRegTable_rXMM5 = 0
-      , pc_OFFSET_StgRegTable_rXMM6 = 0
-      , pc_OFFSET_StgRegTable_rL1 = 0
-      , pc_OFFSET_StgRegTable_rSp = 0
-      , pc_OFFSET_StgRegTable_rSpLim = 0
-      , pc_OFFSET_StgRegTable_rHp = 0
-      , pc_OFFSET_StgRegTable_rHpLim = 0
-      , pc_OFFSET_StgRegTable_rCCCS = 0
-      , pc_OFFSET_StgRegTable_rCurrentTSO = 0
-      , pc_OFFSET_StgRegTable_rCurrentNursery = 0
-      , pc_OFFSET_StgRegTable_rHpAlloc = 0
-      , pc_OFFSET_stgEagerBlackholeInfo = 0
-      , pc_OFFSET_stgGCEnter1 = 0
-      , pc_OFFSET_stgGCFun = 0
-      , pc_OFFSET_Capability_r = 0
-      , pc_OFFSET_bdescr_start = 0
-      , pc_OFFSET_bdescr_free = 0
-      , pc_OFFSET_bdescr_blocks = 0
-      , pc_SIZEOF_CostCentreStack = 0
-      , pc_OFFSET_CostCentreStack_mem_alloc = 0
-      , pc_REP_CostCentreStack_mem_alloc = 0
-      , pc_OFFSET_CostCentreStack_scc_count = 0
-      , pc_REP_CostCentreStack_scc_count = 0
-      , pc_OFFSET_StgHeader_ccs = 0
-      , pc_OFFSET_StgHeader_ldvw = 0
-      , pc_SIZEOF_StgSMPThunkHeader = 0
-      , pc_OFFSET_StgEntCounter_allocs = 0
-      , pc_REP_StgEntCounter_allocs = 0
-      , pc_OFFSET_StgEntCounter_allocd = 0
-      , pc_REP_StgEntCounter_allocd = 0
-      , pc_OFFSET_StgEntCounter_registeredp = 0
-      , pc_OFFSET_StgEntCounter_link = 0
-      , pc_OFFSET_StgEntCounter_entry_count = 0
-      , pc_SIZEOF_StgUpdateFrame_NoHdr = 0
-      , pc_SIZEOF_StgMutArrPtrs_NoHdr = 0
-      , pc_OFFSET_StgMutArrPtrs_ptrs = 0
-      , pc_OFFSET_StgMutArrPtrs_size = 0
-      , pc_SIZEOF_StgArrWords_NoHdr = 0
-      , pc_OFFSET_StgTSO_cccs = 0
-      , pc_OFFSET_StgTSO_stackobj = 0
-      , pc_OFFSET_StgStack_sp = 0
-      , pc_OFFSET_StgStack_stack = 0
-      , pc_OFFSET_StgUpdateFrame_updatee = 0
-      , pc_SIZEOF_StgFunInfoExtraRev = 0
-      , pc_MAX_SPEC_SELECTEE_SIZE = 0
-      , pc_MAX_SPEC_AP_SIZE = 0
-      , pc_MIN_PAYLOAD_SIZE = 0
-      , pc_MIN_INTLIKE = 0
-      , pc_MAX_INTLIKE = 0
-      , pc_MIN_CHARLIKE = 0
-      , pc_MAX_CHARLIKE = 0
-      , pc_MUT_ARR_PTRS_CARD_BITS = 0
-      , pc_MAX_Vanilla_REG = 0
-      , pc_MAX_Float_REG = 0
-      , pc_MAX_Double_REG = 0
-      , pc_MAX_Long_REG = 0
-      , pc_MAX_XMM_REG = 0
-      , pc_MAX_Real_Vanilla_REG = 0
-      , pc_MAX_Real_Float_REG = 0
-      , pc_MAX_Real_Double_REG = 0
-      , pc_MAX_Real_XMM_REG = 0
-      , pc_MAX_Real_Long_REG = 0
-      , pc_RESERVED_C_STACK_BYTES = 0
-      , pc_RESERVED_STACK_WORDS = 0
-      , pc_AP_STACK_SPLIM = 0
-      , pc_WORD_SIZE = 0
-      , pc_DOUBLE_SIZE = 0
-      , pc_CINT_SIZE = 0
-      , pc_CLONG_SIZE = 0
-      , pc_CLONG_LONG_SIZE = 0
-      , pc_BITMAP_BITS_SHIFT = 0
-      , pc_TAG_BITS = 0
-      , pc_WORDS_BIGENDIAN = False
-      , pc_DYNAMIC_BY_DEFAULT = False
-      , pc_LDV_SHIFT = 0
-      , pc_ILDV_CREATE_MASK = 0
-      , pc_ILDV_STATE_CREATE = 0
-      , pc_ILDV_STATE_USE = 0
-      , pc_OFFSET_StgRegTable_rYMM1 = 0
-      , pc_OFFSET_StgRegTable_rYMM2 = 0
-      , pc_OFFSET_StgRegTable_rYMM3 = 0
-      , pc_OFFSET_StgRegTable_rYMM4 = 0
-      , pc_OFFSET_StgRegTable_rYMM5 = 0
-      , pc_OFFSET_StgRegTable_rYMM6 = 0
-      , pc_OFFSET_StgRegTable_rZMM1 = 0
-      , pc_OFFSET_StgRegTable_rZMM2 = 0
-      , pc_OFFSET_StgRegTable_rZMM3 = 0
-      , pc_OFFSET_StgRegTable_rZMM4 = 0
-      , pc_OFFSET_StgRegTable_rZMM5 = 0
-      , pc_OFFSET_StgRegTable_rZMM6 = 0
-      , pc_OFFSET_StgFunInfoExtraFwd_arity = 0
-      , pc_REP_StgFunInfoExtraFwd_arity = 0
-      , pc_OFFSET_StgFunInfoExtraRev_arity = 0
-      , pc_REP_StgFunInfoExtraRev_arity = 0
-      , pc_OFFSET_StgArrWords_bytes = 0
-      , pc_SIZEOF_StgSmallMutArrPtrs_NoHdr = 0
-      , pc_OFFSET_StgSmallMutArrPtrs_ptrs = 0
-      }
author	Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk>	2014-05-05 09:01:03 +0200
committer	Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk>	2014-05-05 11:00:41 +0200
commit	cc269e6b0b615e9e237c35a985e4ace7b9ab0dd9 (patch)
tree	f0264138c81909151f9724c1f02f7bf8b30803cb
parent	7ac2d0f2d31c2e1c7ede09828f3d5ba5626bd0d4 (diff)