20 files changed, 986 insertions, 662 deletions
diff --git a/haddock.cabal b/haddock.cabal
index d61c78c5..8067db7c 100644
--- a/haddock.cabal
+++ b/haddock.cabal
@@ -69,8 +69,7 @@ executable haddock
     ghc-options:          -funbox-strict-fields -Wall -fwarn-tabs
   else
     ghc-options:          -funbox-strict-fields -Wall -fwarn-tabs -O2
-  if !flag(in-ghc-tree)
-    build-tools: alex >= 3, happy >= 1.18
+
   build-depends:
     base >= 4.3 && < 4.8
   if flag(in-ghc-tree)
@@ -105,8 +104,8 @@ executable haddock
       Haddock.Interface.AttachInstances
       Haddock.Interface.LexParseRn
       Haddock.Interface.ParseModuleHeader
-      Haddock.Lex
-      Haddock.Parse
+      Haddock.Parser
+      Haddock.Utf8
       Haddock.Utils
       Haddock.Backends.Xhtml
       Haddock.Backends.Xhtml.Decl
@@ -129,10 +128,7 @@ executable haddock
       Haddock.Convert
   else
     build-depends:  haddock
-  -- In a GHC tree - in particular, in a source tarball - we don't
-  -- require alex or happy
-  if !flag(in-ghc-tree)
-    build-tools: alex >= 3, happy >= 1.18
+
 library
   default-language:     Haskell2010
 
@@ -179,8 +175,8 @@ library
     Haddock.Interface.AttachInstances
     Haddock.Interface.LexParseRn
     Haddock.Interface.ParseModuleHeader
-    Haddock.Lex
-    Haddock.Parse
+    Haddock.Parser
+    Haddock.Utf8
     Haddock.Utils
     Haddock.Backends.Xhtml
     Haddock.Backends.Xhtml.Decl
@@ -221,16 +217,6 @@ test-suite latex-test
   build-depends:    base, directory, process, filepath, Cabal
 
 test-suite spec
-  -- NOTE: As of this writing, Cabal does not properly handle alex/happy for
-  -- test suites.  We work around this by adding dist/build to hs-source-dirs,
-  -- so that the the generated lexer/parser from the library is used.  In
-  -- addition we depend on 'haddock', so that the library is compiled before
-  -- the test suite.
-  --
-  -- The corresponding cabal ticket is here:
-  -- https://github.com/haskell/cabal/issues/943
-  hs-source-dirs:
-      dist/build
   type:             exitcode-stdio-1.0
   default-language: Haskell2010
   main-is:          Spec.hs
@@ -250,6 +236,7 @@ test-suite spec
     , deepseq
     , array
     , hspec
+    , QuickCheck == 2.*
 
   build-depends:
       haddock
diff --git a/html-test/ref/DeprecationMessageParseError.html b/html-test/ref/TitledPicture.html
index c71ab1a7..34788690 100644
--- a/html-test/ref/DeprecationMessageParseError.html
+++ b/html-test/ref/TitledPicture.html
@@ -3,13 +3,13 @@
 ><head
   ><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"
      /><title
-    >DeprecationMessageParseError</title
+    >TitledPicture</title
     ><link href="ocean.css" rel="stylesheet" type="text/css" title="Ocean"
      /><script src="haddock-util.js" type="text/javascript"
     ></script
     ><script type="text/javascript"
     >//<![CDATA[
-window.onload = function () {pageLoad();setSynopsis("mini_DeprecationMessageParseError.html");};
+window.onload = function () {pageLoad();setSynopsis("mini_TitledPicture.html");};
 //]]>
 </script
     ></head
@@ -39,22 +39,7 @@ window.onload = function () {pageLoad();setSynopsis("mini_DeprecationMessagePars
 	    ></tr
 	  ></table
 	><p class="caption"
-	>DeprecationMessageParseError</p
-	></div
-      ><div id="description"
-      ><p class="caption"
-	>Description</p
-	><div class="doc"
-	><p
-	  >What is tested here:
-</p
-	  ><ul
-	  ><li
-	    > If parsing of a deprecation message fails, the message is included
-   verbatim.
-</li
-	    ></ul
-	  ></div
+	>TitledPicture</p
 	></div
       ><div id="synopsis"
       ><p id="control.syn" class="caption expander" onclick="toggleSection('syn')"
@@ -64,7 +49,13 @@ window.onload = function () {pageLoad();setSynopsis("mini_DeprecationMessagePars
 	  ><a href=""
 	    >foo</a
 	    > :: <a href=""
-	    >Int</a
+	    >Integer</a
+	    ></li
+	  ><li class="src short"
+	  ><a href=""
+	    >bar</a
+	    > :: <a href=""
+	    >Integer</a
 	    ></li
 	  ></ul
 	></div
@@ -76,15 +67,34 @@ window.onload = function () {pageLoad();setSynopsis("mini_DeprecationMessagePars
 	  ><a name="v:foo" class="def"
 	    >foo</a
 	    > :: <a href=""
-	    >Int</a
+	    >Integer</a
+	    ></p
+	  ><div class="doc"
+	  ><p
+	    >Picture for <code
+	      ><a href=""
+		>foo</a
+		></code
+	      > without a title <img src="bar"
+	       />
+</p
+	    ></div
+	  ></div
+	><div class="top"
+	><p class="src"
+	  ><a name="v:bar" class="def"
+	    >bar</a
+	    > :: <a href=""
+	    >Integer</a
 	    ></p
 	  ><div class="doc"
-	  ><div class="warning"
-	    ><p
-	      >Deprecated: use @bar instead</p
-	      ></div
-	    ><p
-	    >some documentation for foo
+	  ><p
+	    >Picture for <code
+	      ><a href=""
+		>bar</a
+		></code
+	      > with title <img src="un&#8739;&#8705;&#8728;" title="&#948;&#8712;"
+	       />
 </p
 	    ></div
 	  ></div
diff --git a/html-test/src/DeprecationMessageParseError.hs b/html-test/src/DeprecationMessageParseError.hs
deleted file mode 100644
index 2f8fb492..00000000
--- a/html-test/src/DeprecationMessageParseError.hs
+++ /dev/null
@@ -1,12 +0,0 @@
--- |
--- What is tested here:
---
--- * If parsing of a deprecation message fails, the message is included
---   verbatim.
---
-module DeprecationMessageParseError where
-
--- | some documentation for foo
-foo :: Int
-foo = 23
-{-# DEPRECATED foo "use @bar instead" #-}
diff --git a/html-test/src/TitledPicture.hs b/html-test/src/TitledPicture.hs
new file mode 100644
index 00000000..7029d98a
--- /dev/null
+++ b/html-test/src/TitledPicture.hs
@@ -0,0 +1,7 @@
+module TitledPicture where
+
+-- | Picture for 'foo' without a title <<bar>>
+foo = 5
+
+-- | Picture for 'bar' with title <<un∣∁∘ δ∈>>
+bar = 6
diff --git a/html-test/src/Unicode.hs.disabled b/html-test/src/Unicode.hs
index d5bbf445..d5bbf445 100644
--- a/html-test/src/Unicode.hs.disabled
+++ b/html-test/src/Unicode.hs
diff --git a/src/Haddock.hs b/src/Haddock.hs
index 6d16d601..b741f5f1 100644
--- a/src/Haddock.hs
+++ b/src/Haddock.hs
@@ -23,8 +23,7 @@ import Haddock.Backends.Xhtml.Themes (getThemes)
 import Haddock.Backends.LaTeX
 import Haddock.Backends.Hoogle
 import Haddock.Interface
-import Haddock.Lex
-import Haddock.Parse
+import Haddock.Parser
 import Haddock.Types
 import Haddock.Version
 import Haddock.InterfaceFile
@@ -447,8 +446,7 @@ getPrologue dflags flags =
     [] -> return Nothing
     [filename] -> do
       str <- readFile filename
-      case parseParas (tokenise dflags str
-                      (1,0) {- TODO: real position -}) of
+      case parseParas dflags str of
         Nothing -> throwE $ "failed to parse haddock prologue from file: " ++ filename
         Just doc -> return (Just doc)
     _otherwise -> throwE "multiple -p/--prologue options"
diff --git a/src/Haddock/Backends/LaTeX.hs b/src/Haddock/Backends/LaTeX.hs
index 0bc95719..64966ac2 100644
--- a/src/Haddock/Backends/LaTeX.hs
+++ b/src/Haddock/Backends/LaTeX.hs
@@ -33,6 +33,9 @@ import Data.Char
 import Control.Monad
 import Data.Maybe
 import Data.List
+
+import Haddock.Doc (combineDocumentation)
+
 -- import Debug.Trace
 
 {- SAMPLE OUTPUT
@@ -278,11 +281,11 @@ ppDecl :: LHsDecl DocName
 
 ppDecl (L loc decl) (doc, fnArgsDoc) instances subdocs = case decl of
   TyClD d@(FamDecl {})          -> ppTyFam False loc doc d unicode
-  TyClD d@(DataDecl {})   
+  TyClD d@(DataDecl {})
                                 -> ppDataDecl instances subdocs loc doc d unicode
   TyClD d@(SynDecl {})          -> ppTySyn loc (doc, fnArgsDoc) d unicode
 -- Family instances happen via FamInst now
---  TyClD d@(TySynonym {})         
+--  TyClD d@(TySynonym {})
 --    | Just _  <- tcdTyPats d    -> ppTyInst False loc doc d unicode
 -- Family instances happen via FamInst now
   TyClD d@(ClassDecl {})         -> ppClassDecl instances loc doc subdocs d unicode
@@ -465,7 +468,7 @@ ppClassDecl :: [DocInstance DocName] -> SrcSpan
             -> Documentation DocName -> [(DocName, DocForDecl DocName)]
             -> TyClDecl DocName -> Bool -> LaTeX
 ppClassDecl instances loc doc subdocs
-  (ClassDecl { tcdCtxt = lctxt, tcdLName = lname, tcdTyVars = ltyvars, tcdFDs = lfds 
+  (ClassDecl { tcdCtxt = lctxt, tcdLName = lname, tcdTyVars = ltyvars, tcdFDs = lfds
              , tcdSigs = lsigs, tcdATs = ats, tcdATDefs = at_defs }) unicode
   = declWithDoc classheader (if null body then Nothing else Just (vcat body)) $$
     instancesBit
@@ -504,7 +507,7 @@ ppDocInstances unicode (i : rest)
   | Just ihead <- isUndocdInstance i
   = declWithDoc (vcat (map (ppInstDecl unicode) (ihead:is))) Nothing $$
     ppDocInstances unicode rest'
-  | otherwise 
+  | otherwise
   = ppDocInstance unicode i $$ ppDocInstances unicode rest
   where
     (is, rest') = spanWith isUndocdInstance rest
@@ -650,9 +653,9 @@ ppSideBySideField subdocs unicode (ConDeclField (L _ name) ltype _) =
 
 -- {-
 -- ppHsFullConstr :: HsConDecl -> LaTeX
--- ppHsFullConstr (HsConDecl _ nm tvs ctxt typeList doc) = 
+-- ppHsFullConstr (HsConDecl _ nm tvs ctxt typeList doc) =
 --      declWithDoc False doc (
--- 	hsep ((ppHsConstrHdr tvs ctxt +++ 
+-- 	hsep ((ppHsConstrHdr tvs ctxt +++
 -- 		ppHsBinder False nm) : map ppHsBangType typeList)
 --       )
 -- ppHsFullConstr (HsRecDecl _ nm tvs ctxt fields doc) =
@@ -661,35 +664,35 @@ ppSideBySideField subdocs unicode (ConDeclField (L _ name) ltype _) =
 --        Nothing -> aboves [hdr, fields_html]
 --        Just _  -> aboves [hdr, constr_doc, fields_html]
 --    )
--- 
+--
 --   where hdr = declBox (ppHsConstrHdr tvs ctxt +++ ppHsBinder False nm)
--- 
--- 	constr_doc	
+--
+-- 	constr_doc
 -- 	  | isJust doc = docBox (docToLaTeX (fromJust doc))
 -- 	  | otherwise  = LaTeX.emptyTable
--- 
--- 	fields_html = 
--- 	   td << 
+--
+-- 	fields_html =
+-- 	   td <<
 -- 	      table ! [width "100%", cellpadding 0, cellspacing 8] << (
 -- 		   aboves (map ppFullField (concat (map expandField fields)))
 -- 		)
 -- -}
--- 
+--
 -- ppShortField :: Bool -> Bool -> ConDeclField DocName -> LaTeX
 -- ppShortField summary unicode (ConDeclField (L _ name) ltype _)
 --   = tda [theclass "recfield"] << (
 --       ppBinder summary (docNameOcc name)
 --       <+> dcolon unicode <+> ppLType unicode ltype
 --     )
--- 
+--
 -- {-
 -- ppFullField :: HsFieldDecl -> LaTeX
--- ppFullField (HsFieldDecl [n] ty doc) 
+-- ppFullField (HsFieldDecl [n] ty doc)
 --   = declWithDoc False doc (
 -- 	ppHsBinder False n <+> dcolon <+> ppHsBangType ty
 --     )
 -- ppFullField _ = error "ppFullField"
--- 
+--
 -- expandField :: HsFieldDecl -> [HsFieldDecl]
 -- expandField (HsFieldDecl ns ty doc) = [ HsFieldDecl [n] ty doc | n <- ns ]
 -- -}
@@ -718,7 +721,7 @@ ppAppNameTypes :: DocName -> [HsType DocName] -> Bool -> LaTeX
 ppAppNameTypes n ts unicode = ppTypeApp n ts ppDocName (ppParendType unicode)
 
 
--- | Print an application of a DocName and a list of Names 
+-- | Print an application of a DocName and a list of Names
 ppAppDocNameNames :: Bool -> DocName -> [Name] -> LaTeX
 ppAppDocNameNames _summ n ns =
   ppTypeApp n ns (ppBinder . nameOccName . getName) ppSymName
@@ -1002,7 +1005,7 @@ parLatexMarkup ppId = Markup {
   markupEmphasis             = \p v -> emph (p v),
   markupMonospaced           = \p _ -> tt (p Mono),
   markupUnorderedList        = \p v -> itemizedList (map ($v) p) $$ text "",
-  markupPic                  = \path _ -> parens (text "image: " <> text path),
+  markupPic                  = \p _ -> markupPic p,
   markupOrderedList          = \p v -> enumeratedList (map ($v) p) $$ text "",
   markupDefList              = \l v -> descriptionList (map (\(a,b) -> (a v, b v)) l),
   markupCodeBlock            = \p _ -> quote (verb (p Verb)) $$ text "",
@@ -1020,6 +1023,14 @@ parLatexMarkup ppId = Markup {
       Just label -> text "\\href" <> braces (text url) <> braces (text label)
       Nothing    -> text "\\url"  <> braces (text url)
 
+    -- Is there a better way of doing this? Just a space is an aribtrary choice.
+    markupPic (Picture uri title) = parens (imageText title)
+      where
+        imageText Nothing = beg
+        imageText (Just t) = beg <> text " " <> text t
+
+        beg = text "image: " <> text uri
+
     markupId ppId_ id v =
       case v of
         Verb  -> theid
diff --git a/src/Haddock/Backends/Xhtml/Decl.hs b/src/Haddock/Backends/Xhtml/Decl.hs
index 2ecc6464..91e6871d 100644
--- a/src/Haddock/Backends/Xhtml/Decl.hs
+++ b/src/Haddock/Backends/Xhtml/Decl.hs
@@ -25,6 +25,7 @@ import Haddock.Backends.Xhtml.Types
 import Haddock.Backends.Xhtml.Utils
 import Haddock.GhcUtils
 import Haddock.Types
+import Haddock.Doc (combineDocumentation)
 
 import           Data.List             ( intersperse )
 import qualified Data.Map as Map
@@ -115,7 +116,7 @@ ppFor _ _ _ _ _ _ _ = error "ppFor"
 ppTySyn :: Bool -> LinksInfo -> SrcSpan -> DocForDecl DocName -> TyClDecl DocName -> Bool
         -> Qualification -> Html
 ppTySyn summary links loc doc (SynDecl { tcdLName = L _ name, tcdTyVars = ltyvars
-                                       , tcdRhs = ltype }) 
+                                       , tcdRhs = ltype })
         unicode qual
   = ppTypeOrFunSig summary links loc [name] (unLoc ltype) doc
                    (full, hdr, spaceHtml +++ equals) unicode qual
@@ -204,12 +205,12 @@ ppAssocType summ links doc (L loc decl) unicode qual =
 -- * TyClDecl helpers
 --------------------------------------------------------------------------------
 
--- | Print a type family and its variables 
+-- | Print a type family and its variables
 ppFamDeclBinderWithVars :: Bool -> FamilyDecl DocName -> Html
 ppFamDeclBinderWithVars summ (FamilyDecl { fdLName = lname, fdTyVars = tvs }) =
   ppAppDocNameNames summ (unLoc lname) (tyvarNames tvs)
 
--- | Print a newtype / data binder and its variables 
+-- | Print a newtype / data binder and its variables
 ppDataBinderWithVars :: Bool -> TyClDecl DocName -> Html
 ppDataBinderWithVars summ decl =
   ppAppDocNameNames summ (tcdName decl) (tyvarNames $ tcdTyVars decl)
@@ -226,7 +227,7 @@ ppAppNameTypes n ts unicode qual =
     ppTypeApp n ts (ppDocName qual) (ppParendType unicode qual)
 
 
--- | Print an application of a DocName and a list of Names 
+-- | Print an application of a DocName and a list of Names
 ppAppDocNameNames :: Bool -> DocName -> [Name] -> Html
 ppAppDocNameNames summ n ns =
   ppTypeApp n ns (ppBinder summ . nameOccName . getName) ppTyName
@@ -305,7 +306,7 @@ ppShortClassDecl :: Bool -> LinksInfo -> TyClDecl DocName -> SrcSpan
                  -> Html
 ppShortClassDecl summary links (ClassDecl { tcdCtxt = lctxt, tcdLName = lname, tcdTyVars = tvs
                                           , tcdFDs = fds, tcdSigs = sigs, tcdATs = ats }) loc
-    subdocs unicode qual = 
+    subdocs unicode qual =
   if null sigs && null ats
     then (if summary then id else topDeclElem links loc [nm]) hdr
     else (if summary then id else topDeclElem links loc [nm]) (hdr <+> keyword "where")
@@ -580,7 +581,7 @@ ppShortField summary unicode qual (ConDeclField (L _ name) ltype _)
 -- Currently doesn't handle 'data instance' decls or kind signatures
 ppDataHeader :: Bool -> TyClDecl DocName -> Bool -> Qualification -> Html
 ppDataHeader summary decl@(DataDecl { tcdDataDefn = HsDataDefn { dd_ND = nd
-                                                               , dd_ctxt = ctxt } }) 
+                                                               , dd_ctxt = ctxt } })
              unicode qual
   = -- newtype or data
     (case nd of { NewType -> keyword "newtype"; DataType -> keyword "data" }) <+>
@@ -723,5 +724,3 @@ ppr_fun_ty ctxt_prec ty1 ty2 unicode qual
     in
     maybeParen ctxt_prec pREC_FUN $
     hsep [p1, arrow unicode <+> p2]
-
-
diff --git a/src/Haddock/Backends/Xhtml/DocMarkup.hs b/src/Haddock/Backends/Xhtml/DocMarkup.hs
index aa4ba377..31d653bc 100644
--- a/src/Haddock/Backends/Xhtml/DocMarkup.hs
+++ b/src/Haddock/Backends/Xhtml/DocMarkup.hs
@@ -18,18 +18,19 @@ module Haddock.Backends.Xhtml.DocMarkup (
   docElement, docSection, docSection_,
 ) where
 
+import Control.Applicative ((<$>))
 
 import Haddock.Backends.Xhtml.Names
 import Haddock.Backends.Xhtml.Utils
 import Haddock.Types
 import Haddock.Utils
+import Haddock.Doc (combineDocumentation)
 
-import Text.XHtml hiding ( name, title, p, quote )
+import Text.XHtml hiding ( name, p, quote )
 import Data.Maybe (fromMaybe)
 
 import GHC
 
-
 parHtmlMarkup :: Qualification -> (a -> Html) -> DocMarkup a Html
 parHtmlMarkup qual ppId = Markup {
   markupEmpty                = noHtml,
@@ -49,7 +50,7 @@ parHtmlMarkup qual ppId = Markup {
   markupCodeBlock            = pre,
   markupHyperlink            = \(Hyperlink url mLabel) -> anchor ! [href url] << fromMaybe url mLabel,
   markupAName                = \aname -> namedAnchor aname << "",
-  markupPic                  = \path -> image ! [src path],
+  markupPic                  = \(Picture uri t) -> image ! ([src uri] ++ fromMaybe [] (return . title <$> t)),
   markupProperty             = pre . toHtml,
   markupExample              = examplesToHtml
   }
diff --git a/src/Haddock/Doc.hs b/src/Haddock/Doc.hs
index 18555cfb..4d68c554 100644
--- a/src/Haddock/Doc.hs
+++ b/src/Haddock/Doc.hs
@@ -1,14 +1,27 @@
+{-# OPTIONS_GHC -fno-warn-orphans #-}
 module Haddock.Doc (
   docAppend,
   docParagraph,
-  combineStringNodes
+  combineStringNodes,
+  combineDocumentation
   ) where
 
-
+import Data.Maybe
+import Data.Monoid
 import Haddock.Types
 import Data.Char (isSpace)
 import Control.Arrow ((***))
 
+-- We put it here so that we can avoid a circular import
+-- anything relevant imports this module anyway
+instance Monoid (Doc id) where
+  mempty  = DocEmpty
+  mappend = docAppend
+
+combineDocumentation :: Documentation name -> Maybe (Doc name)
+combineDocumentation (Documentation Nothing Nothing) = Nothing
+combineDocumentation (Documentation mDoc mWarning)   = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc)
+
 -- used to make parsing easier; we group the list items later
 docAppend :: Doc id -> Doc id -> Doc id
 docAppend (DocUnorderedList ds1) (DocUnorderedList ds2)
@@ -85,4 +98,7 @@ combineStringNodes x = x
 
 tryjoin :: Doc id -> Doc id
 tryjoin (DocAppend (DocString x) (DocString y)) = DocString (x ++ y)
+tryjoin (DocAppend (DocString x) (DocAppend (DocString y) z)) = DocAppend (DocString (x ++ y)) z
+tryjoin (DocAppend (DocAppend x (DocString y)) (DocString z))
+  = tryjoin (DocAppend (combineStringNodes x) (DocString $ y ++ z))
 tryjoin x = x
diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs
index 9d775bcb..13563532 100644
--- a/src/Haddock/Interface/LexParseRn.hs
+++ b/src/Haddock/Interface/LexParseRn.hs
@@ -19,8 +19,7 @@ module Haddock.Interface.LexParseRn
 
 
 import Haddock.Types
-import Haddock.Lex
-import Haddock.Parse
+import Haddock.Parser
 import Haddock.Interface.ParseModuleHeader
 import Haddock.Doc
 
@@ -50,19 +49,19 @@ processDocStringParas = process parseParas
 processDocString :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name))
 processDocString = process parseString
 
-process :: ([LToken] -> Maybe (Doc RdrName))
+process :: (DynFlags -> String -> Maybe (Doc RdrName))
         -> DynFlags
         -> GlobalRdrEnv
         -> HsDocString
         -> ErrMsgM (Maybe (Doc Name))
 process parse dflags gre (HsDocString fs) = do
    let str = unpackFS fs
-   let toks = tokenise dflags str (0,0)  -- TODO: real position
-   case parse toks of
+   case parse dflags str of
      Nothing -> do
        tell [ "doc comment parse failed: " ++ str ]
        return Nothing
-     Just doc -> return (Just (rename dflags gre doc))
+     Just doc -> do
+       return (Just (rename dflags gre doc))
 
 
 processModuleHeader :: DynFlags -> GlobalRdrEnv -> SafeHaskellMode -> Maybe LHsDocString
diff --git a/src/Haddock/Interface/ParseModuleHeader.hs b/src/Haddock/Interface/ParseModuleHeader.hs
index 5087affe..2e4fe73b 100644
--- a/src/Haddock/Interface/ParseModuleHeader.hs
+++ b/src/Haddock/Interface/ParseModuleHeader.hs
@@ -12,8 +12,7 @@
 module Haddock.Interface.ParseModuleHeader (parseModuleHeader) where
 
 import Haddock.Types
-import Haddock.Lex
-import Haddock.Parse
+import Haddock.Parser
 
 import RdrName
 import DynFlags
@@ -47,15 +46,13 @@ parseModuleHeader dflags str0 =
       description1 :: Either String (Maybe (Doc RdrName))
       description1 = case descriptionOpt of
          Nothing -> Right Nothing
-         -- TODO: pass real file position
-         Just description -> case parseString $ tokenise dflags description (0,0) of
+         Just description -> case parseString dflags description of
             Nothing -> Left ("Cannot parse Description: " ++ description)
             Just doc -> Right (Just doc)
    in
       case description1 of
          Left mess -> Left mess
-         -- TODO: pass real file position
-         Right docOpt -> case parseParas $ tokenise dflags str8 (0,0) of
+         Right docOpt -> case parseParas dflags str8 of
            Nothing -> Left "Cannot parse header documentation paragraphs"
            Just doc -> Right (HaddockModInfo {
             hmi_description = docOpt,
diff --git a/src/Haddock/InterfaceFile.hs b/src/Haddock/InterfaceFile.hs
index b993dd67..189550b8 100644
--- a/src/Haddock/InterfaceFile.hs
+++ b/src/Haddock/InterfaceFile.hs
@@ -428,6 +428,16 @@ instance Binary Hyperlink where
         label <- get bh
         return (Hyperlink url label)
 
+instance Binary Picture where
+    put_ bh (Picture uri title) = do
+        put_ bh uri
+        put_ bh title
+    get bh = do
+        uri <- get bh
+        title <- get bh
+        return (Picture uri title)
+
+
 
 {-* Generated by DrIFT : Look, but Don't Touch. *-}
 instance (Binary id) => Binary (Doc id) where
diff --git a/src/Haddock/Lex.x b/src/Haddock/Lex.x
deleted file mode 100644
index 9e59fa4c..00000000
--- a/src/Haddock/Lex.x
+++ /dev/null
@@ -1,255 +0,0 @@
---
--- Haddock - A Haskell Documentation Tool
---
--- (c) Simon Marlow 2002
---
--- This file was modified and integrated into GHC by David Waern 2006.
--- Then moved back into Haddock by Isaac Dupree in 2009 :-)
---
-
-{
-{-# LANGUAGE BangPatterns #-}   -- Generated by Alex
-{-# OPTIONS -Wwarn -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-module Haddock.Lex (
-	Token(..),
-	LToken,
-	tokenise
- ) where
-
-import Lexer hiding (Token)
-import Parser ( parseIdentifier )
-import StringBuffer
-import RdrName
-import SrcLoc
-import DynFlags
-import FastString
-
-import qualified Data.Bits
-import Data.Char
-import Data.Word (Word8)
-import Numeric
-import System.IO.Unsafe
-import Debug.Trace
-}
-
-$ws    = $white # \n
-$digit = [0-9]
-$hexdigit = [0-9a-fA-F]
-$special =  [\"\@]
-$alphanum = [A-Za-z0-9]
-$ident    = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~\:]
-
-:-
-
--- beginning of a paragraph
-<0,para> {
- $ws* \n		;
- $ws* \>		{ begin birdtrack }
- $ws* prop \> .* \n	{ strtoken TokProperty `andBegin` property}
- $ws* \>\>\>            { strtoken TokExamplePrompt `andBegin` exampleexpr }
- $ws* [\*\-]		{ token TokBullet `andBegin` string }
- $ws* \[		{ token TokDefStart `andBegin` def }
- $ws* \( $digit+ \) 	{ token TokNumber `andBegin` string }
- $ws* $digit+ \. 	{ token TokNumber `andBegin` string }
- $ws*			{ begin string }		
-}
-
--- beginning of a line
-<line> {
-  $ws* \>		{ begin birdtrack }
-  $ws* \>\>\>		{ strtoken TokExamplePrompt `andBegin` exampleexpr }
-  $ws* \n		{ token TokPara `andBegin` para }
-  -- Here, we really want to be able to say
-  -- $ws* (\n | <eof>) 	{ token TokPara `andBegin` para}
-  -- because otherwise a trailing line of whitespace will result in 
-  -- a spurious TokString at the end of a docstring.  We don't have <eof>,
-  -- though (NOW I realise what it was for :-).  To get around this, we always
-  -- append \n to the end of a docstring.
-  () 			{ begin string }
-}
-
-<birdtrack> .*	\n?	{ strtokenNL TokBirdTrack `andBegin` line }
-
-<property> ()           { token TokPara `andBegin` para }
-
-<example> {
-  $ws*	\n		{ token TokPara `andBegin` para }
-  $ws* \>\>\>	        { strtoken TokExamplePrompt `andBegin` exampleexpr }
-  ()			{ begin exampleresult }
-}
-
-<exampleexpr> .* \n	{ strtokenNL TokExampleExpression `andBegin` example }
-
-<exampleresult> .* \n	{ strtokenNL TokExampleResult `andBegin` example }
-
-<string,def> {
-  $special			{ strtoken $ \s -> TokSpecial (head s) }
-  \<\< [^\>]* \>\>              { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) }
-  \< [^\>]* \>			{ strtoken $ \s -> TokURL (init (tail s)) }
-  \# [^\#]* \#			{ strtoken $ \s -> TokAName (init (tail s)) }
-  \/ [^\/]* \/                  { strtoken $ \s -> TokEmphasis (init (tail s)) }
-  [\'\`] $ident+ [\'\`]		{ ident }
-  \\ .				{ strtoken (TokString . tail) }
-  "&#" $digit+ \;		{ strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] }
-  "&#" [xX] $hexdigit+ \;	{ strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] }
-  -- allow special characters through if they don't fit one of the previous
-  -- patterns.
-  [\/\'\`\<\#\&\\]			{ strtoken TokString }
-  [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line }
-  [^ $special \/ \< \# \n \'\` \& \\ \]]+    { strtoken TokString }
-}
-
-<def> {
-  \]				{ token TokDefEnd `andBegin` string }
-}
-
--- ']' doesn't have any special meaning outside of the [...] at the beginning
--- of a definition paragraph.
-<string> {
-  \]				{ strtoken TokString }
-}
-
-{
--- | A located token
-type LToken = (Token, AlexPosn)
-
-data Token
-  = TokPara
-  | TokNumber
-  | TokBullet
-  | TokDefStart
-  | TokDefEnd
-  | TokSpecial Char
-  | TokIdent RdrName
-  | TokString String
-  | TokURL String
-  | TokPic String
-  | TokEmphasis String
-  | TokAName String
-  | TokBirdTrack String
-  | TokProperty String
-  | TokExamplePrompt String
-  | TokExampleExpression String
-  | TokExampleResult String
---  deriving Show
-
-tokenPos :: LToken -> (Int, Int)
-tokenPos t = let AlexPn _ line col = snd t in (line, col)
-
--- -----------------------------------------------------------------------------
--- Alex support stuff
-
--- XXX: copied the posn wrapper code from Alex to make this lexer work
--- with both Alex 2.x and Alex 3.x.  However, we are not using the
--- Unicode/UTF-8 support in Alex 3.x, and Unicode documentation will
--- probably get mangled.
-
--- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
-utf8Encode :: Char -> [Word8]
-utf8Encode = map fromIntegral . go . ord
- where
-  go oc
-   | oc <= 0x7f       = [oc]
-
-   | oc <= 0x7ff      = [ 0xc0 + (oc `Data.Bits.shiftR` 6)
-                        , 0x80 + oc Data.Bits..&. 0x3f
-                        ]
-
-   | oc <= 0xffff     = [ 0xe0 + (oc `Data.Bits.shiftR` 12)
-                        , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
-                        , 0x80 + oc Data.Bits..&. 0x3f
-                        ]
-   | otherwise        = [ 0xf0 + (oc `Data.Bits.shiftR` 18)
-                        , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f)
-                        , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
-                        , 0x80 + oc Data.Bits..&. 0x3f
-                        ]
-
-type Byte = Word8
-
-type AlexInput = (AlexPosn,     -- current position,
-                  Char,         -- previous char
-                  [Byte],       -- pending bytes on current char
-                  String)       -- current input string
-
-alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (p,c,bs,s) = c
-
-alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
-alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s))
-alexGetByte (p,c,[],[]) = Nothing
-alexGetByte (p,_,[],(c:s))  = let p' = alexMove p c
-                                  (b:bs) = utf8Encode c
-                              in p' `seq`  Just (b, (p', c, bs, s))
-
-data AlexPosn = AlexPn !Int !Int !Int
-        deriving (Eq,Show)
-
-alexMove :: AlexPosn -> Char -> AlexPosn
-alexMove (AlexPn a l c) '\t' = AlexPn (a+1)  l     (((c+7) `div` 8)*8+1)
-alexMove (AlexPn a l c) '\n' = AlexPn (a+1) (l+1)   1
-alexMove (AlexPn a l c) _    = AlexPn (a+1)  l     (c+1)
-
-type StartCode = Int
-type Action = AlexPosn -> String -> StartCode -> (StartCode -> [LToken]) -> DynFlags -> [LToken]
-
-tokenise :: DynFlags -> String -> (Int, Int) -> [LToken]
-tokenise dflags str (line, col) = go (posn,'\n',[],eofHack str) para
-  where posn = AlexPn 0 line col
-        go inp@(pos,_,_,str) sc =
-          case alexScan inp sc of
-                AlexEOF -> []
-                AlexError _ -> []
-                AlexSkip  inp' len     -> go inp' sc
-                AlexToken inp' len act -> act pos (take len str) sc (\sc -> go inp' sc) dflags
-
--- NB. we add a final \n to the string, (see comment in the beginning of line
--- production above).
-eofHack str = str++"\n"
-
-andBegin  :: Action -> StartCode -> Action
-andBegin act new_sc = \pos str _ cont dflags -> act pos str new_sc cont dflags
-
-token :: Token -> Action
-token t = \pos _ sc cont _ -> (t, pos) : cont sc
-
-strtoken, strtokenNL :: (String -> Token) -> Action
-strtoken t = \pos str sc cont _ -> (t str, pos) : cont sc
-strtokenNL t = \pos str sc cont _ -> (t (filter (/= '\r') str), pos) : cont sc
--- ^ We only want LF line endings in our internal doc string format, so we
--- filter out all CRs.
-
-begin :: StartCode -> Action
-begin sc = \_ _ _ cont _ -> cont sc
-
--- -----------------------------------------------------------------------------
--- Lex a string as a Haskell identifier
-
-ident :: Action
-ident pos str sc cont dflags = 
-  case parseIdent dflags loc id of
-	Just names -> (TokIdent names, pos) : cont sc
-	Nothing -> (TokString str, pos) : cont sc
- where id = init (tail str)
-       -- TODO: Get the real filename here. Maybe we should just be
-       --       using GHC SrcLoc's ourself?
-       filename = mkFastString "<unknown file>"
-       loc = case pos of
-             AlexPn _ line col ->
-                 mkRealSrcLoc filename line col
-
-parseIdent :: DynFlags -> RealSrcLoc -> String -> Maybe RdrName
-parseIdent dflags loc str0 = 
-  let buffer = stringToStringBuffer str0
-      pstate = mkPState dflags buffer loc
-      result = unP parseIdentifier pstate 
-  in case result of 
-       POk _ name -> Just (unLoc name)
-       _ -> Nothing
-}
diff --git a/src/Haddock/Parse.y b/src/Haddock/Parse.y
deleted file mode 100644
index f40ff521..00000000
--- a/src/Haddock/Parse.y
+++ /dev/null
@@ -1,171 +0,0 @@
-{
-{-# LANGUAGE BangPatterns #-} -- required for versions of Happy before 1.18.6
-{-# OPTIONS -Wwarn -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-module Haddock.Parse (parseString, parseParas) where
-
-import Haddock.Lex
-import Haddock.Types (Doc(..), Example(Example), Hyperlink(..))
-import Haddock.Doc
-import HsSyn
-import RdrName
-import Data.Char  (isSpace)
-import Data.Maybe (fromMaybe)
-import Data.List  (stripPrefix)
-}
-
-%expect 0
-
-%tokentype { LToken }
-
-%token	'/'	{ (TokSpecial '/',_) }
-	'@'	{ (TokSpecial '@',_) }
-	'['     { (TokDefStart,_) }
-	']'     { (TokDefEnd,_) }
-	DQUO 	{ (TokSpecial '\"',_) }
-	URL	{ (TokURL $$,_) }
-	PIC     { (TokPic $$,_) }
-	ANAME	{ (TokAName $$,_) }
-	'/../'  { (TokEmphasis $$,_) }
-	'-'	{ (TokBullet,_) }
-	'(n)'	{ (TokNumber,_) }
-	'>..'	{ (TokBirdTrack $$,_) }
-	PROP	{ (TokProperty $$,_) }
-	PROMPT	{ (TokExamplePrompt $$,_) }
-	RESULT	{ (TokExampleResult $$,_) }
-	EXP	{ (TokExampleExpression $$,_) }
-	IDENT   { (TokIdent $$,_) }
-	PARA    { (TokPara,_) }
-	STRING	{ (TokString $$,_) }
-
-%monad { Maybe }
-
-%name parseParas doc
-%name parseString seq
-
-%%
-
-doc	:: { Doc RdrName }
-	: apara PARA doc	{ docAppend $1 $3 }
-	| PARA doc 		{ $2 }
-	| apara			{ $1 }
-	| {- empty -}		{ DocEmpty }
-
-apara	:: { Doc RdrName }
-	: ulpara		{ DocUnorderedList [$1] }
-	| olpara		{ DocOrderedList [$1] }
-        | defpara               { DocDefList [$1] }
-	| para			{ $1 }
-
-ulpara  :: { Doc RdrName }
-	: '-' para		{ $2 }
-
-olpara  :: { Doc RdrName } 
-	: '(n)' para		{ $2 }
-
-defpara :: { (Doc RdrName, Doc RdrName) }
-	: '[' seq ']' seq	{ ($2, $4) }
-
-para    :: { Doc RdrName }
-	: seq			{ docParagraph $1 }
-	| codepara		{ DocCodeBlock $1 }
-	| property		{ $1 }
-	| examples		{ DocExamples $1 }
-
-codepara :: { Doc RdrName }
-	: '>..' codepara	{ docAppend (DocString $1) $2 }
-	| '>..'			{ DocString $1 }
-
-property :: { Doc RdrName }
-	: PROP			{ makeProperty $1 }
-
-examples :: { [Example] }
-	: example examples	{ $1 : $2 }
-	| example		{ [$1] }
-
-example :: { Example }
-	: PROMPT EXP result	{ makeExample $1 $2 (lines $3) }
-	| PROMPT EXP		{ makeExample $1 $2 [] }
-
-result :: { String }
-	: RESULT result		{ $1 ++ $2 }
-	| RESULT		{ $1 }
-
-seq	:: { Doc RdrName }
-	: elem seq		{ docAppend $1 $2 }
-	| elem			{ $1 }
-
-elem	:: { Doc RdrName }
-	: elem1			{ $1 }
-	| '@' seq1 '@'		{ DocMonospaced $2 }
-
-seq1	:: { Doc RdrName }
-	: PARA seq1             { docAppend (DocString "\n") $2 }
-	| elem1 seq1            { docAppend $1 $2 }
-	| elem1			{ $1 }
-
-elem1	:: { Doc RdrName }
-	: STRING		{ DocString $1 }
-	| '/../'                { DocEmphasis (DocString $1) }
-	| URL			{ DocHyperlink (makeHyperlink $1) }
-	| PIC                   { DocPic $1 }
-	| ANAME			{ DocAName $1 }
-	| IDENT			{ DocIdentifier $1 }
-	| DQUO strings DQUO	{ DocModule $2 }
-
-strings  :: { String }
-	: STRING		{ $1 }
-	| STRING strings	{ $1 ++ $2 }
-
-{
-happyError :: [LToken] -> Maybe a
-happyError toks = Nothing
-
--- | Create a `Hyperlink` from given string.
---
--- A hyperlink consists of a URL and an optional label.  The label is separated
--- from the url by one or more whitespace characters.
-makeHyperlink :: String -> Hyperlink
-makeHyperlink input = case break isSpace $ strip input of
-  (url, "")    -> Hyperlink url Nothing
-  (url, label) -> Hyperlink url (Just . dropWhile isSpace $ label)
-
-makeProperty :: String -> Doc RdrName
-makeProperty s = case strip s of
-  'p':'r':'o':'p':'>':xs ->
-	DocProperty (dropWhile isSpace xs)
-  xs ->
-	error $ "makeProperty: invalid input " ++ show xs
-
--- | Create an 'Example', stripping superfluous characters as appropriate
-makeExample :: String -> String -> [String] -> Example
-makeExample prompt expression result =
-  Example
-	(strip expression)	-- we do not care about leading and trailing
-				-- whitespace in expressions, so drop them
-	result'
-  where
-	-- 1. drop trailing whitespace from the prompt, remember the prefix
-	(prefix, _) = span isSpace prompt
-
-	-- 2. drop, if possible, the exact same sequence of whitespace
-	-- characters from each result line
-	--
-	-- 3. interpret lines that only contain the string "<BLANKLINE>" as an
-	-- empty line
-	result' = map (substituteBlankLine . tryStripPrefix prefix) result
-	  where
-		tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
-
-		substituteBlankLine "<BLANKLINE>" = ""
-		substituteBlankLine line          = line
-
--- | Remove all leading and trailing whitespace
-strip :: String -> String
-strip = dropWhile isSpace . reverse . dropWhile isSpace . reverse
-}
diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs
new file mode 100644
index 00000000..81e274ed
--- /dev/null
+++ b/src/Haddock/Parser.hs
@@ -0,0 +1,435 @@
+{-# LANGUAGE OverloadedStrings #-}
+-- |
+-- Module      :  Haddock.Parser
+-- Copyright   :  (c) Mateusz Kowalczyk 2013,
+--                    Simon Hengel      2013
+-- License     :  BSD-like
+--
+-- Maintainer  :  haddock@projects.haskell.org
+-- Stability   :  experimental
+-- Portability :  portable
+
+module Haddock.Parser (parseString, parseParas) where
+
+import           Control.Applicative
+import           Data.Attoparsec.ByteString   hiding (takeWhile1, take, inClass)
+import qualified Data.Attoparsec.ByteString.Char8 as A8
+import           Data.Attoparsec.ByteString.Char8 hiding (take, string)
+import qualified Data.ByteString as BS
+import           Data.Char (chr)
+import           Data.List (stripPrefix)
+import           Data.Maybe (fromMaybe)
+import           Data.Monoid
+import           DynFlags
+import           FastString (mkFastString)
+import           Haddock.Doc
+import           Haddock.Types
+import           Lexer (mkPState, unP, ParseResult(POk))
+import           Parser (parseIdentifier)
+import           RdrName
+import           SrcLoc (mkRealSrcLoc, unLoc)
+import           StringBuffer (stringToStringBuffer)
+import           Haddock.Utf8
+
+default (Int)
+
+-- | Main entry point to the parser. Appends the newline character
+-- to the input string.
+parseParas :: DynFlags
+              -> String -- ^ String to parse
+              -> Maybe (Doc RdrName)
+parseParas d s = case parseOnly (p <* skipSpace) (encodeUtf8 $ s ++ "\n") of
+  Right r -> Just $ combineStringNodes r
+  _ -> Nothing
+  where
+    p :: Parser (Doc RdrName)
+    -- make sure that we don't swallow up whitespace belonging to next paragraph
+    p = mconcat <$> paragraph d `sepBy` some (optWs *> "\n")
+
+-- | A parser that parsers separate lines of the comments. Eventually
+-- called by 'parseParas'. Appends a newline character to the input string.
+-- Drops any whitespace in front of the input string. It's dropped for the sake of
+-- section headings.
+parseString :: DynFlags -> String -> Maybe (Doc RdrName)
+parseString d = parseString'' d . dropWhile isSpace
+
+-- | A parser that parsers separate lines of the comments. Eventually
+-- called by 'parseParas'. Appends a newline character to the input string.
+-- Unlike 'parseString', doesn't drop the preceding whitespace. Internal use.
+parseString'' :: DynFlags -> String -> Maybe (Doc RdrName)
+parseString'' d = parseString' d . (++ "\n")
+
+-- | An internal use function. Split from the 'parseString' is useful
+-- as we can specify separately when we want the newline to be appended.
+parseString' :: DynFlags -> String -> Maybe (Doc RdrName)
+parseString' d s = case parseOnly p (encodeUtf8 s) of
+  Right r -> Just $ combineStringNodes r
+  _ -> Nothing
+  where
+    p :: Parser (Doc RdrName)
+    p = mconcat <$> some (charEscape <|> monospace d <|> anchor <|> identifier d
+                          <|> moduleName <|> picture <|> url
+                          <|> emphasis d <|> encodedChar <|> string' <|> skipChar)
+
+-- | Parses and processes
+-- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
+--
+-- >>> parseOnly encodedChar "&#65;&#66;&#67;"
+-- Right (DocString "ABC")
+encodedChar :: Parser (Doc RdrName)
+encodedChar = "&#" *> c <* ";"
+  where
+    c = DocString . return . chr <$> num
+    num = hex <|> decimal
+    hex = ("x" <|> "X") *> hexadecimal
+
+-- | Plain, regular parser for text. Called as one of the last parsers
+-- to ensure that we have already given a chance to more meaningful parsers
+-- before capturing their characers.
+string' :: Parser (Doc RdrName)
+string' = DocString . decodeUtf8 <$> takeWhile1 (`notElem` "/<@\" &'`\\")
+
+-- | Emphasis parser.
+--
+-- >>> parseOnly emphasis "/Hello world/"
+-- Right (DocEmphasis (DocString "Hello world"))
+emphasis :: DynFlags -> Parser (Doc RdrName)
+emphasis d = stringBlock d id DocEmphasis "/" "/" "\n"
+
+-- | Skips a single character and treats it as a plain string.
+-- This is done to skip over any special characters belonging to other
+-- elements but which were not deemed meaningful at their positions.
+-- Note that this can only be used in places where we're absolutely certain
+-- no unicode is present, such as to skip a 100% certain ASCII delimeter.
+skipChar :: Parser (Doc RdrName)
+skipChar = DocString . return <$> anyChar
+
+-- | Treats the next character as a regular string, even if it's normally
+-- used for markup.
+charEscape :: Parser (Doc RdrName)
+charEscape = "\\" *> (DocString . return <$> A8.satisfy (/= '\n'))
+
+-- | Text anchors to allow for jumping around the generated documentation.
+--
+-- >>> parseOnly anchor "#Hello world#"
+-- Right (DocAName "Hello world")
+anchor :: Parser (Doc RdrName)
+anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#")
+
+-- | Helper for markup structures surrounded with delimiters.
+stringBlock
+  :: DynFlags
+     -> (String -> String) -- ^ Function used to transform parsed out text
+                           -- before we send it to 'parseString''
+     -> (Doc RdrName -> Doc RdrName) -- ^ 'Doc' to wrap around the result
+     -> String -- ^ Opening delimiter
+     -> String -- ^ Closing delimiter
+     -> String -- ^ Additional characters to terminate parsing on
+     -> Parser (Doc RdrName)
+stringBlock d f doc op ed n = do
+  inner <- block op ed n
+  case parseString' d (f inner) of
+    Just r -> return $ doc r
+    _ -> fail $ "inner parse fail with op: ‘" ++ op ++ "’, ed: ‘" ++ ed ++ "’"
+
+-- | Returns sections of text delimited by specified text.
+block :: String -> String -> String -> Parser String
+block op ed n = reverse . drop (length ed) . reverse <$> block' op ed
+  where
+    block' op' ed' = string (encodeUtf8 op') *> mid
+      where
+        mid :: Parser String
+        mid = decodeUtf8 <$> string (encodeUtf8 ed')
+              <|> do
+                inner <- takeWithSkip (head ed') n
+                more <- decodeUtf8 <$> string (encodeUtf8 $ tail ed')
+                        <|> block' "" ed'  -- not full ending, take more
+                return $ inner ++ more
+
+
+-- | Takes all characters until the specified one. Unconditionally
+-- takes a character if it's escaped. Fails if it doesn't find the character or
+-- when the input string is empty.
+takeWithSkip :: Char -> String -> Parser String
+takeWithSkip s n = do
+  content <- decodeUtf8 <$> A8.scan (False, False) p >>= gotSome
+  if or (map (`elem` content) n) || last content /= s
+    then fail "failed in takeWithSkip"
+    else return content
+  where
+    gotSome [] = fail "EOF in takeWithSkip"
+    gotSome xs = return xs
+    -- Apparently ‘scan’ is so magical that it doesn't mangle unicode.
+    p (escaped, terminate) c
+      | terminate = Nothing -- swallows up that extra character
+      | escaped = Just (False, False)
+      | c == s = Just (False, True)
+      | otherwise = Just (c == '\\', False)
+
+-- | Monospaced strings.
+--
+-- >>> parseOnly (monospace dynflags) "@cruel@"
+-- Right (DocMonospaced (DocString "cruel"))
+monospace :: DynFlags -> Parser (Doc RdrName)
+monospace d = stringBlock d id DocMonospaced "@" "@" ""
+
+-- | Module name parser, surrounded by double quotes. This does a very primitive and
+-- purely syntactic checking so that obviously invalid names are not treated as valid
+-- and blindly hyperlinked (not starting with a capital letter or including spaces).
+moduleName :: Parser (Doc RdrName)
+moduleName = DocModule <$> ("\"" *> legalModule <* "\"")
+  where legalModule = do
+          n <- (:) <$> A8.satisfy (`elem` ['A' .. 'Z'])
+               <*> (decodeUtf8 <$> A8.takeWhile (`notElem` "\"\n"))
+
+          if any (`elem` n) " &[{}(=*)+]!#|@/;,^?"
+            then fail "invalid characters in module name"
+            else case n of
+            [] -> return []
+            _ -> if last n == '.' then fail "trailing dot in module name" else return n
+
+
+-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
+-- a title for the picture.
+--
+-- >>> parseOnly picture "<<hello.png>>"
+-- Right (DocPic (Picture "hello.png" Nothing))
+-- >>> parseOnly picture "<<hello.png world>>"
+-- Right (DocPic (Picture "hello.png" (Just "world")))
+picture :: Parser (Doc RdrName)
+picture = DocPic . makePicture . decodeUtf8 <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>")
+
+-- | Paragraph parser, called by 'parseParas'.
+paragraph :: DynFlags -> Parser (Doc RdrName)
+paragraph d = examples <|> skipSpace *> (list d <|> birdtracks <|> codeblock d
+                                         <|> property <|> textParagraph d)
+
+-- | List parser, called by 'paragraph'.
+list :: DynFlags -> Parser (Doc RdrName)
+list d = DocUnorderedList <$> unorderedList d
+         <|> DocOrderedList <$> orderedList d
+         <|> DocDefList <$> definitionList d
+
+-- | Parse given text with a provided parser, casting
+-- Nothing to a failure
+parseLine :: (String -> Maybe (Doc RdrName)) -- ^ Parser to use
+             -> (Doc RdrName -> a) -- ^ Doc function to wrap around the result
+             -> BS.ByteString -- ^ Text to parse
+             -> Parser a
+parseLine f doc str = maybe (fail "invalid string") (return . doc) (f $ decodeUtf8 str)
+
+-- | Parses unordered (bullet) lists.
+unorderedList :: DynFlags -> Parser [Doc RdrName]
+unorderedList d = ("*" <|> "-") *> innerList unorderedList d
+
+-- | Parses ordered lists (numbered or dashed).
+orderedList :: DynFlags -> Parser [Doc RdrName]
+orderedList d = skipSpace *> (paren <|> dot) *> innerList orderedList d
+  where
+    dot = decimal <* "."
+    paren = "(" *> (decimal :: Parser Int) <* ")"
+
+-- | Generic function collecting any further lines belonging to the
+-- list entry and recursively collecting any further lists in the
+-- same paragraph. Usually used as
+--
+-- > someListFunction dynflags = listBeginning *> innerList someListFunction dynflags
+innerList :: (DynFlags -> Parser [Doc RdrName]) -- ^ parser calling this function
+             -> DynFlags
+             -> Parser [Doc RdrName]
+innerList p d = do
+  cl <- do
+    content <- A8.takeWhile (/= '\n') <* "\n" -- allow empty
+    parseLine (parseString'' d) id content
+  ulcs <- many ulc
+  let contents = docParagraph $ mconcat $ cl : [x | Right x <- ulcs]
+      unLists = mconcat [x | Left x <- ulcs]
+  return $ contents : unLists
+  where
+    ulc :: Parser (Either [Doc RdrName] (Doc RdrName))
+    ulc = Left <$> (optWs *> p d)
+          <|> Right <$> nonEmptyLine d
+
+-- | Takes the remained of the line until the newline character
+-- and calls 'parseLine' using 'parseString'. Fails if it's made
+-- up strictly of whitespace.
+nonEmptyLine :: DynFlags -> Parser (Doc RdrName)
+nonEmptyLine d = do
+  s <- (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
+  parseLine (parseString'' d) id s
+  where
+    nonSpace xs
+      | not (any (not . isSpace) (decodeUtf8 xs)) = fail "empty line"
+      | otherwise = return xs
+
+-- | Parses definition lists.
+definitionList :: DynFlags -> Parser [(Doc RdrName, Doc RdrName)]
+definitionList d = do
+  _ <- "["
+  inner <- parseLine (parseString' d) id =<< takeWhile1 (`notElem` "]\n")
+  _ <- "]"
+  outer <- parseLine (parseString'' d) id =<< (A8.takeWhile (/= '\n') <* "\n")
+  ulcs <- many ulc
+  let contents = mconcat $ outer : [x | Right x <- ulcs]
+      unLists = map mconcat [x | Left x <- ulcs]
+  return $ (inner, contents) : unLists
+  where
+    ulc :: Parser (Either [(Doc RdrName, Doc RdrName)] (Doc RdrName))
+    ulc = Left <$> (optWs *> definitionList d)
+          <|> Right <$> nonEmptyLine d
+
+-- | Parses birdtracks. No further markup is parsed after the birdtrack.
+-- Consecutive birdtracks are allowed.
+birdtracks :: Parser (Doc RdrName)
+birdtracks = DocCodeBlock . mconcat . map (DocString . (++ "\n") . decodeUtf8) <$> line `sepBy1` "\n"
+  where
+    line = optWs *> ">" *> A8.takeWhile (/= '\n')
+
+-- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).
+-- Consecutive examples are accepted.
+examples :: Parser (Doc RdrName)
+examples = DocExamples <$> example
+
+-- | Collects consecutive examples and their results.
+example :: Parser [Example]
+example = do
+  ws <- optWs
+  prompt <- decodeUtf8 <$> string ">>>"
+  expr <- (++ "\n") . decodeUtf8 <$> (A8.takeWhile (/= '\n') <* "\n")
+  results <- many result
+  let exs = concat [ e | Left e <- results ]
+      res = filter (not . null) [ r | Right r <- results ]
+  return $ makeExample (decodeUtf8 ws ++ prompt) expr res : exs
+  where
+    result = Left <$> example
+             <|> Right . decodeUtf8 <$> takeWhile1 (/= '\n') <* "\n"
+
+-- | Propery parser.
+--
+-- >>> parseOnly property "prop> hello world"
+-- Right (DocProperty "hello world")
+property :: Parser (Doc RdrName)
+property = do
+    _ <- skipSpace
+    s <- decodeUtf8 <$> (string "prop>" *> takeWhile1 (/= '\n'))
+    return $ makeProperty ("prop>" ++ s)
+
+-- | Paragraph level codeblock. Anything between the two delimiting @
+-- is parsed for markup.
+codeblock :: DynFlags -> Parser (Doc RdrName)
+codeblock d = do
+  -- Note that we don't need to use optWs here because in cases where
+  -- we don't see a \n immediatelly after the opening @, this parser
+  -- fails but we still have a chance to get a codeblock by getting
+  -- a monospaced doc on its own in the paragraph. With that, the cases
+  -- are covered. This should be updated if the implementation ever changes.
+  s <- parseString' d . ('\n':) . decodeUtf8 <$> ("@\n" *> block' <* "@")
+  maybe (fail "codeblock") (return . DocCodeBlock) s
+  where
+    block' = A8.scan False p
+      where
+        p isNewline c
+          | isNewline && c == '@' = Nothing
+          | otherwise = Just $ c == '\n'
+
+-- | Calls 'parseString'' on each line of a paragraph
+textParagraph :: DynFlags -> Parser (Doc RdrName)
+textParagraph d = do
+  s <- parseString' d . concatMap ((++ "\n") . decodeUtf8) <$> line `sepBy1` "\n"
+  maybe (fail "textParagraph") (return . docParagraph) s
+  where
+    line = takeWhile1 (/= '\n')
+
+-- | See 'picture' for adding a page title.
+url :: Parser (Doc RdrName)
+url = DocHyperlink . makeHyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">")
+      <|> autoUrl
+
+-- | Naive implementation of auto-linking. Will link everything after
+-- @http://@, @https://@, @ftp://@, @ssh://@, @gopher://@ until a space.
+-- Single trailing punctuation character (.!?,) is split off.
+autoUrl :: Parser (Doc RdrName)
+autoUrl = do
+  link <- decodeUtf8 <$> urlLone
+  return $ formatLink link
+  where
+    urlLone = mappend <$> choice prefixes <*> takeWhile1 (not . isSpace)
+    prefixes = [ "http://", "https://", "ftp://"
+               , "ssh://", "gopher://" ]
+    formatLink :: String -> Doc RdrName
+    formatLink s = if last s `elem` ".!?,"
+                   then docAppend (DocHyperlink $ Hyperlink (init s) Nothing) (DocString [last s])
+                   else DocHyperlink $ Hyperlink s Nothing
+
+-- | Parses strings between identifier delimiters. Consumes all input that it
+-- deems to be valid in an identifier. Note that it simply blindly consumes
+-- characters and does no actual validation itself.
+parseValid :: Parser String
+parseValid = do
+  vs <- many' (A8.satisfy (`elem` "_.!#$%&*+/<=>?@\\?|-~:") <|> digit <|> letter_ascii)
+  c <- peekChar
+  case c of
+    Just '`' -> return vs
+    Just '\'' -> (do {c'' <- char '\''; y'' <- parseValid; return $ vs ++ [c''] ++ y''}) <|> return vs
+    _ -> fail "outofvalid"
+
+-- | Parses identifiers with help of 'parseValid'. Asks GHC for 'RdrName' from the
+-- string it deems valid.
+identifier :: DynFlags -> Parser (Doc RdrName)
+identifier dflags = do
+  o <- idDelim
+  vid <- parseValid
+  e <- idDelim
+  return $ validIdentifier $ o : (vid ++ [e])
+  where idDelim = char '\'' <|> char '`'
+        validIdentifier str = case parseIdent (tail $ init str) of
+          Just identName -> DocIdentifier identName
+          Nothing -> DocString str
+        parseIdent :: String -> Maybe RdrName
+        parseIdent str0 =
+          let buffer = stringToStringBuffer str0
+              realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
+              pstate = mkPState dflags buffer realSrcLc
+          in case unP parseIdentifier pstate of
+            POk _ name -> Just (unLoc name)
+            _ -> Nothing
+
+-- | Remove all leading and trailing whitespace
+strip :: String -> String
+strip = (\f -> f . f) $ dropWhile isSpace . reverse
+
+-- | Consumes whitespace, excluding a newline.
+optWs :: Parser BS.ByteString
+optWs = A8.takeWhile (`elem` " \t\f\v\r")
+
+-- | Create an 'Example', stripping superfluous characters as appropriate.
+-- Remembers the amount of indentation used for the prompt.
+makeExample :: String -> String -> [String] -> Example
+makeExample prompt expression res =
+  Example (strip expression) result'       -- drop whitespace in expressions
+  where (prefix, _) = span isSpace prompt
+        result' = map substituteBlankLine $ filter (not . null) $ map (tryStripPrefix prefix) res
+          where tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
+                substituteBlankLine "<BLANKLINE>" = ""
+                substituteBlankLine line          = line
+
+-- | Creates a 'Picture' with an optional title. Called by 'picture'.
+makePicture :: String -> Picture
+makePicture input = case break isSpace $ strip input of
+  (uri, "")    -> Picture uri Nothing
+  (uri, label) -> Picture uri (Just $ dropWhile isSpace label)
+
+-- | Creates a 'Hyperlink' with an optional title. Called by 'example'.
+makeHyperlink :: String -> Hyperlink
+makeHyperlink input = case break isSpace $ strip input of
+  (u, "")    -> Hyperlink u Nothing
+  (u, label) -> Hyperlink u (Just $ dropWhile isSpace label)
+
+-- | Makes a property that can be used by other programs for assertions.
+-- Drops whitespace around the property. Called by 'property'
+makeProperty :: String -> Doc RdrName
+makeProperty s = case strip s of
+  'p':'r':'o':'p':'>':xs ->
+    DocProperty (dropWhile isSpace xs)
+  xs ->
+    error $ "makeProperty: invalid input " ++ show xs
diff --git a/src/Haddock/Types.hs b/src/Haddock/Types.hs
index 73fafd6b..00a8b68f 100644
--- a/src/Haddock/Types.hs
+++ b/src/Haddock/Types.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable, DeriveTraversable #-}
+{-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable, DeriveTraversable, GADTs #-}
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 -----------------------------------------------------------------------------
 -- |
@@ -19,7 +19,6 @@ module Haddock.Types (
   , HsDocString, LHsDocString
  ) where
 
-
 import Data.Foldable
 import Data.Traversable
 import Control.Exception
@@ -27,15 +26,12 @@ import Control.Arrow
 import Control.DeepSeq
 import Data.Typeable
 import Data.Map (Map)
-import Data.Maybe
 import qualified Data.Map as Map
-import Data.Monoid
 import GHC hiding (NoLink)
 import OccName
 import Control.Applicative (Applicative(..))
 import Control.Monad (ap)
 
-
 -----------------------------------------------------------------------------
 -- * Convenient synonyms
 -----------------------------------------------------------------------------
@@ -205,7 +201,7 @@ data ExportItem name
       , expItemSubs :: ![name]
       }
 
-  -- | A section heading. 
+  -- | A section heading.
   | ExportGroup
       {
         -- | Section level (1, 2, 3, ...).
@@ -230,11 +226,6 @@ data Documentation name = Documentation
   } deriving Functor
 
 
-combineDocumentation :: Documentation name -> Maybe (Doc name)
-combineDocumentation (Documentation Nothing Nothing) = Nothing
-combineDocumentation (Documentation mDoc mWarning)   = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc)
-
-
 -- | Arguments and result are indexed by Int, zero-based from the left,
 -- because that's the easiest to use when recursing over types.
 type FnArgsDoc name = Map Int (Doc name)
@@ -289,7 +280,6 @@ type DocInstance name = (InstHead name, Maybe (Doc name))
 -- of instance types.
 type InstHead name = ([HsType name], name, [HsType name])
 
-
 -----------------------------------------------------------------------------
 -- * Documentation comments
 -----------------------------------------------------------------------------
@@ -314,18 +304,12 @@ data Doc id
   | DocDefList [(Doc id, Doc id)]
   | DocCodeBlock (Doc id)
   | DocHyperlink Hyperlink
-  | DocPic String
+  | DocPic Picture
   | DocAName String
   | DocProperty String
   | DocExamples [Example]
   deriving (Functor, Foldable, Traversable)
 
-
-instance Monoid (Doc id) where
-  mempty  = DocEmpty
-  mappend = DocAppend
-
-
 instance NFData a => NFData (Doc a) where
   rnf doc = case doc of
     DocEmpty                  -> ()
@@ -360,9 +344,18 @@ data Hyperlink = Hyperlink
   } deriving (Eq, Show)
 
 
+data Picture = Picture
+  { pictureUri   :: String
+  , pictureTitle :: Maybe String
+  } deriving (Eq, Show)
+
+
 instance NFData Hyperlink where
   rnf (Hyperlink a b) = a `deepseq` b `deepseq` ()
 
+instance NFData Picture where
+  rnf (Picture a b) = a `deepseq` b `deepseq` ()
+
 
 data Example = Example
   { exampleExpression :: String
@@ -396,7 +389,7 @@ data DocMarkup id a = Markup
   , markupCodeBlock            :: a -> a
   , markupHyperlink            :: Hyperlink -> a
   , markupAName                :: String -> a
-  , markupPic                  :: String -> a
+  , markupPic                  :: Picture -> a
   , markupProperty             :: String -> a
   , markupExample              :: [Example] -> a
   }
diff --git a/src/Haddock/Utf8.hs b/src/Haddock/Utf8.hs
new file mode 100644
index 00000000..1fb0e818
--- /dev/null
+++ b/src/Haddock/Utf8.hs
@@ -0,0 +1,74 @@
+module Haddock.Utf8 (encodeUtf8, decodeUtf8) where
+import           Data.Bits ((.|.), (.&.), shiftL, shiftR)
+import qualified Data.ByteString as BS
+import           Data.Char (chr, ord)
+import           Data.Word (Word8)
+
+-- | Helper that encodes and packs a 'String' into a 'BS.ByteString'
+encodeUtf8 :: String -> BS.ByteString
+encodeUtf8 = BS.pack . encode
+
+-- | Helper that unpacks and decodes a 'BS.ByteString' into a 'String'
+decodeUtf8 :: BS.ByteString -> String
+decodeUtf8 = decode . BS.unpack
+
+-- Copy/pasted functions from Codec.Binary.UTF8.String for encoding/decoding
+-- | Character to use when 'encode' or 'decode' fail for a byte.
+replacementCharacter :: Char
+replacementCharacter = '\xfffd'
+
+-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
+encode :: String -> [Word8]
+encode = concatMap (map fromIntegral . go . ord)
+ where
+  go oc
+   | oc <= 0x7f       = [oc]
+
+   | oc <= 0x7ff      = [ 0xc0 + (oc `shiftR` 6)
+                        , 0x80 + oc .&. 0x3f
+                        ]
+
+   | oc <= 0xffff     = [ 0xe0 + (oc `shiftR` 12)
+                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
+                        , 0x80 + oc .&. 0x3f
+                        ]
+   | otherwise        = [ 0xf0 + (oc `shiftR` 18)
+                        , 0x80 + ((oc `shiftR` 12) .&. 0x3f)
+                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
+                        , 0x80 + oc .&. 0x3f
+                        ]
+
+-- | Decode a UTF8 string packed into a list of Word8 values, directly to String
+decode :: [Word8] -> String
+decode [    ] = ""
+decode (c:cs)
+  | c < 0x80  = chr (fromEnum c) : decode cs
+  | c < 0xc0  = replacementCharacter : decode cs
+  | c < 0xe0  = multi1
+  | c < 0xf0  = multi_byte 2 0xf  0x800
+  | c < 0xf8  = multi_byte 3 0x7  0x10000
+  | c < 0xfc  = multi_byte 4 0x3  0x200000
+  | c < 0xfe  = multi_byte 5 0x1  0x4000000
+  | otherwise = replacementCharacter : decode cs
+  where
+    multi1 = case cs of
+      c1 : ds | c1 .&. 0xc0 == 0x80 ->
+        let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|.  fromEnum (c1 .&. 0x3f)
+        in if d >= 0x000080 then toEnum d : decode ds
+                            else replacementCharacter : decode ds
+      _ -> replacementCharacter : decode cs
+
+    multi_byte :: Int -> Word8 -> Int -> String
+    multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask))
+      where
+        aux 0 rs acc
+          | overlong <= acc && acc <= 0x10ffff &&
+            (acc < 0xd800 || 0xdfff < acc)     &&
+            (acc < 0xfffe || 0xffff < acc)      = chr acc : decode rs
+          | otherwise = replacementCharacter : decode rs
+
+        aux n (r:rs) acc
+          | r .&. 0xc0 == 0x80 = aux (n-1) rs
+                               $ shiftL acc 6 .|. fromEnum (r .&. 0x3f)
+
+        aux _ rs     _ = replacementCharacter : decode rs
diff --git a/test/Haddock/ParseSpec.hs b/test/Haddock/ParseSpec.hs
index 799330c2..b649d901 100644
--- a/test/Haddock/ParseSpec.hs
+++ b/test/Haddock/ParseSpec.hs
@@ -6,15 +6,16 @@
 module Haddock.ParseSpec (main, spec) where
 
 import           Control.Applicative
+import           Data.Maybe (isJust)
 import           Data.Monoid
 import           Data.String
 import           Haddock.Doc (combineStringNodes)
-import           Haddock.Lex (tokenise)
-import qualified Haddock.Parse as Parse
+import qualified Haddock.Parser as Parse
 import           Haddock.Types
 import           Outputable (Outputable, showSDoc, ppr)
 import           RdrName (RdrName)
 import           Test.Hspec
+import           Test.QuickCheck (property)
 
 import           Helper
 
@@ -32,25 +33,111 @@ instance IsString a => IsString (Maybe a) where
   fromString = Just . fromString
 
 parseParas :: String -> Maybe (Doc RdrName)
-parseParas s = Parse.parseParas $ tokenise dynFlags s (0,0)
+parseParas = Parse.parseParas dynFlags
 
 parseString :: String -> Maybe (Doc RdrName)
-parseString s = Parse.parseString $ tokenise dynFlags s (0,0)
+parseString = Parse.parseString dynFlags
 
 main :: IO ()
 main = hspec spec
 
-infix 1 `shouldParseTo`
-shouldParseTo :: String -> Doc RdrName -> Expectation
-shouldParseTo input ast = (combineStringNodes <$> parseParas input)
-                          `shouldBe` Just ast
-
 spec :: Spec
 spec = do
+
+  let filterSpecial = filter (`notElem` (".(=#-[*`\v\f\n\t\r\\\"'_/@<> " :: String))
+
+  describe "parseString" $ do
+    let infix 1 `shouldParseTo`
+        shouldParseTo :: String -> Doc RdrName -> Expectation
+        shouldParseTo input ast = parseString input `shouldBe` Just ast
+
+    it "is total" $ do
+      property $ \xs ->
+        -- filter out primes as we might end up with an identifier
+        -- which will fail due to undefined DynFlags
+        parseString (filter (/= '\'') xs) `shouldSatisfy` isJust
+
+    context "when parsing URLs" $ do
+      it "parses a URL" $ do
+        "<http://example.com/>" `shouldParseTo`
+          hyperlink "http://example.com/" Nothing <> "\n"
+
+      it "accepts an optional label" $ do
+        "<http://example.com/ some link>" `shouldParseTo`
+          hyperlink "http://example.com/" "some link" <> "\n"
+
+      it "finishes URL parsing as soon as it sees >, even if it's escaped" $ do
+        "<http://examp\\>le.com" `shouldParseTo`
+          hyperlink "http://examp\\" Nothing <> "le.com\n"
+
+        "<http://exa\\>mp\\>le.com>" `shouldParseTo`
+          hyperlink "http://exa\\" Nothing <> "mp>le.com>\n"
+
+        -- Likewise in label
+        "<http://example.com f\\>oo>" `shouldParseTo`
+          hyperlink "http://example.com" "f\\" <> "oo>\n"
+
+      it "parses inline URLs" $ do
+        "Not yet working, see <http://trac.haskell.org/haddock/ticket/223>\n , isEmptyChan" `shouldParseTo`
+             "Not yet working, see "
+          <> hyperlink "http://trac.haskell.org/haddock/ticket/223" Nothing
+          <> "\n , isEmptyChan\n"
+
+      context "when autolinking URLs" $ do
+        it "autolinks HTTP URLs" $ do
+          "http://example.com/" `shouldParseTo`
+            hyperlink "http://example.com/" Nothing <> "\n"
+
+        it "autolinks HTTPS URLs" $ do
+          "https://www.example.com/" `shouldParseTo`
+            hyperlink "https://www.example.com/" Nothing <> "\n"
+
+        it "autolinks FTP URLs" $ do
+          "ftp://example.com/" `shouldParseTo`
+            hyperlink "ftp://example.com/" Nothing <> "\n"
+
+        it "does not include a trailing exclamation mark" $ do
+          "http://example.com/! Some other sentence." `shouldParseTo`
+            hyperlink "http://example.com/" Nothing <> "! Some other sentence.\n"
+
+        it "does not include a trailing comma" $ do
+          "http://example.com/, Some other sentence." `shouldParseTo`
+            hyperlink "http://example.com/" Nothing <> ", Some other sentence.\n"
+
+        it "does not include a trailing dot" $ do
+          "http://example.com/. Some other sentence." `shouldParseTo`
+            hyperlink "http://example.com/" Nothing <> ". Some other sentence.\n"
+
+        it "does not include a trailing question mark" $ do
+          "http://example.com/? Some other sentence." `shouldParseTo`
+            hyperlink "http://example.com/" Nothing <> "? Some other sentence.\n"
+
+
   describe "parseParas" $ do
+    let infix 1 `shouldParseTo`
+        shouldParseTo :: String -> Doc RdrName -> Expectation
+        shouldParseTo input ast = (combineStringNodes <$> parseParas input)
+                                  `shouldBe` Just ast
+
+    it "is total" $ do
+      property $ \xs ->
+        -- filter out primes as we might end up with an identifier
+        -- which will fail due to undefined DynFlags
+        parseParas (filter (/= '\'') xs) `shouldSatisfy` isJust
+
     it "parses a paragraph" $ do
       "foobar" `shouldParseTo` DocParagraph "foobar\n"
 
+    it "empty input produces DocEmpty" $ do
+      "" `shouldParseTo` DocEmpty
+
+    it "should preserve all regular characters" $ do
+      property $ \xs ->
+        let input = filterSpecial xs
+        in case input of
+          [] -> input `shouldParseTo` DocEmpty
+          _ -> input `shouldParseTo` DocParagraph (DocString $ input ++ "\n")
+
     context "when parsing a simple string" $ do
       it "] should be made into a DocString" $ do
         "hell]o" `shouldParseTo` DocParagraph "hell]o\n"
@@ -65,7 +152,7 @@ spec = do
 
       it "should parse a module inline" $ do
         "This is a \"Module\"." `shouldParseTo`
-          DocParagraph ("This is a " <> ((DocModule "Module") <> ".\n"))
+          DocParagraph ("This is a " <> (DocModule "Module" <> ".\n"))
 
     context "when parsing emphasised strings" $ do
       it "emphasises a word on its own" $ do
@@ -80,7 +167,7 @@ spec = do
         "/灼眼のシャナ/" `shouldParseTo`
           (DocParagraph $ DocEmphasis "灼眼のシャナ" <> "\n")
 
-      it "does /multi-line\\n codeblocks/" $ do
+      it "does not do /multi-line\\n emphasis/" $ do
         " /multi-line\n emphasis/" `shouldParseTo`
           DocParagraph "/multi-line\n emphasis/\n"
 
@@ -99,7 +186,7 @@ spec = do
       it "accepts other elements in a codeblock" $ do
         "@/emphasis/ \"Module\" <<picture>>@" `shouldParseTo`
           (DocCodeBlock $ DocEmphasis "emphasis" <> " "
-                <> DocModule "Module" <> " " <> DocPic "picture")
+                <> DocModule "Module" <> " " <> pic "picture" Nothing)
 
     context "when parsing monospaced strings" $ do
       it "monospaces inline strings" $ do
@@ -107,6 +194,10 @@ spec = do
           (DocParagraph $ "This comment applies to the "
                 <> DocMonospaced "following" <> " declaration\n")
 
+      it "should allow us to escape the @" $ do
+        "foo @hey \\@ world@ bar" `shouldParseTo`
+          DocParagraph ("foo " <> DocMonospaced "hey @ world" <> " bar\n")
+
       it "monospaces inline unicode" $ do
         "hello @灼眼のシャナ@ unicode" `shouldParseTo`
           (DocParagraph $ "hello "
@@ -115,17 +206,43 @@ spec = do
       it "accepts other elements in a monospaced section" $ do
         "hey @/emphasis/ \"Module\" <<picture>>@ world" `shouldParseTo`
           (DocParagraph $
-               "hey "
-            <> DocMonospaced (DocEmphasis "emphasis" <> " "
-                              <> DocModule "Module" <> " " <> DocPic "picture")
-            <> " world\n")
+           "hey "
+           <> DocMonospaced (DocEmphasis "emphasis" <> " "
+                             <> DocModule "Module" <> " " <> pic "picture" Nothing)
+           <> " world\n")
 
 
     context "when parsing unordered lists" $ do
       it "parses a simple unordered list" $ do
         "* point one\n\n* point two" `shouldParseTo`
           DocUnorderedList [ DocParagraph " point one\n"
-                                 , DocParagraph " point two\n"]
+                           , DocParagraph " point two\n"]
+
+        "* 1.parameter re :  the derived regular expression"
+          ++ "\n\n- returns : empty String" `shouldParseTo`
+          (DocUnorderedList
+           [DocParagraph " 1.parameter re :  the derived regular expression\n",
+            DocParagraph " returns : empty String\n"])
+
+      it "doesn't accept a list where unexpected" $ do
+        " expression?\n -> matches\n\n    * 1.parameter \n\n"
+          `shouldParseTo`
+           DocParagraph "expression?\n -> matches\n" <> DocUnorderedList [DocParagraph " 1.parameter \n"]
+
+
+      it "parses a simple unordered list without the empty line separator" $ do
+        "* point one\n* point two" `shouldParseTo`
+          DocUnorderedList [ DocParagraph " point one\n"
+                           , DocParagraph " point two\n"]
+
+        "* point one\nmore one\n* point two\nmore two" `shouldParseTo`
+          DocUnorderedList [ DocParagraph " point one\nmore one\n"
+                           , DocParagraph " point two\nmore two\n"]
+
+        " * point one\nmore one\n * point two\nmore two" `shouldParseTo`
+          DocUnorderedList [ DocParagraph " point one\nmore one\n"
+                           , DocParagraph " point two\nmore two\n"
+                           ]
 
       it "parses an empty unordered list" $ do
         "*" `shouldParseTo` DocUnorderedList [DocParagraph "\n"]
@@ -134,6 +251,9 @@ spec = do
         "* 灼眼のシャナ" `shouldParseTo`
           DocUnorderedList [DocParagraph " 灼眼のシャナ\n"]
 
+      it "preserves whitespace on the front of additional lines" $ do
+        "* foo\n    bar" `shouldParseTo` DocUnorderedList [DocParagraph " foo\n    bar\n"]
+
       it "accepts other elements in an unordered list" $ do
         ("* \"Module\"\n\n* /emphasis/"
                     ++ "\n\n* @code@\n\n* a@mono@b \n\n*") `shouldParseTo`
@@ -141,17 +261,43 @@ spec = do
               DocParagraph (" " <> DocModule "Module" <> "\n")
             , DocParagraph (" " <> DocEmphasis "emphasis" <> "\n")
             , DocCodeBlock "code"
-            , DocParagraph (" a" <> (DocMonospaced "mono") <> "b \n")
+            , DocParagraph (" a" <> DocMonospaced "mono" <> "b \n")
+            , DocParagraph "\n"
+            ]
+
+        ("* \"Module\"\n* /emphasis/"
+                    ++ "\n* @code@\n* a@mono@b \n*") `shouldParseTo`
+          DocUnorderedList [
+              DocParagraph (" " <> DocModule "Module" <> "\n")
+            , DocParagraph (" " <> DocEmphasis "emphasis" <> "\n")
+            , DocCodeBlock "code"
+            , DocParagraph (" a" <> DocMonospaced "mono" <> "b \n")
             , DocParagraph "\n"
             ]
 
     context "when parsing ordered lists" $ do
       it "parses a simple ordered list" $ do
         "1. point one\n\n2. point two" `shouldParseTo`
-          DocOrderedList [
-              DocParagraph " point one\n"
-            , DocParagraph " point two\n"
-            ]
+          DocOrderedList [ DocParagraph " point one\n"
+                         , DocParagraph " point two\n"
+                         ]
+
+      it "parses a simple ordered list without the newline separator" $ do
+        "1. point one\n2. point two" `shouldParseTo`
+          DocOrderedList [ DocParagraph " point one\n"
+                         , DocParagraph " point two\n"
+                         ]
+
+        "1. point one\nmore\n2. point two\nmore" `shouldParseTo`
+          DocOrderedList [ DocParagraph " point one\nmore\n"
+                         , DocParagraph " point two\nmore\n"
+                         ]
+
+        -- space before list
+        " 1. point one\nmore\n 2. point two\nmore" `shouldParseTo`
+          DocOrderedList [ DocParagraph " point one\nmore\n"
+                         , DocParagraph " point two\nmore\n"
+                         ]
 
       it "parses an empty list" $ do
         "1." `shouldParseTo` DocOrderedList [DocParagraph "\n"]
@@ -165,6 +311,9 @@ spec = do
         "(1) 灼眼のシャナ" `shouldParseTo`
           DocOrderedList [DocParagraph " 灼眼のシャナ\n"]
 
+      it "preserves whitespace on the front of additional lines" $ do
+        "1. foo\n    bar" `shouldParseTo` DocOrderedList [DocParagraph " foo\n    bar\n"]
+
       it "accepts other elements" $ do
         ("1. \"Module\"\n\n2. /emphasis/"
                     ++ "\n\n3. @code@\n\n4. a@mono@b \n\n5.") `shouldParseTo`
@@ -181,6 +330,16 @@ spec = do
         "[foo] bar\n\n[baz] quux" `shouldParseTo`
           DocDefList [("foo", " bar\n"), ("baz", " quux\n")]
 
+      it "parses a simple list without the newline separator" $ do
+        "[foo] bar\n[baz] quux" `shouldParseTo`
+          DocDefList [("foo", " bar\n"), ("baz", " quux\n")]
+
+        "[foo] bar\nmore\n[baz] quux\nmore" `shouldParseTo`
+          DocDefList [("foo", " bar\nmore\n"), ("baz", " quux\nmore\n")]
+
+        " [foo] bar\nmore\n [baz] quux\nmore" `shouldParseTo`
+          DocDefList [("foo", " bar\nmore\n"), ("baz", " quux\nmore\n")]
+
       it "parses a list with unicode in it" $ do
         "[灼眼] シャナ" `shouldParseTo`
           DocDefList [("灼眼", " シャナ\n")]
@@ -197,6 +356,26 @@ spec = do
         "[[world]] bar" `shouldParseTo`
           DocDefList [("[world", "] bar\n")]
 
+      it "treats broken up definition list as regular string" $ do
+        "[qu\nx] hey" `shouldParseTo` DocParagraph "[qu\nx] hey\n"
+
+      it "preserves whitespace on the front of additional lines" $ do
+        "[foo] bar\n    baz" `shouldParseTo` DocDefList [("foo", " bar\n    baz\n")]
+
+    context "when parsing consecutive paragraphs" $ do
+      it "will not capture irrelevant consecutive lists" $ do
+        "   * bullet\n\n   - different bullet\n\n   (1) ordered\n \n   "
+          ++ "2. different bullet\n   \n   [cat] kitten\n   \n   [pineapple] fruit"
+          `shouldParseTo`
+          DocUnorderedList [ DocParagraph " bullet\n"
+                           , DocParagraph " different bullet\n"]
+          <> DocOrderedList [ DocParagraph " ordered\n"
+                            , DocParagraph " different bullet\n"
+                            ]
+          <> DocDefList [ ("cat", " kitten\n")
+                        , ("pineapple", " fruit\n")
+                        ]
+
     context "when parsing an example" $ do
       it ("requires an example to be separated"
           ++ " from a previous paragraph by an empty line") $ do
@@ -205,27 +384,50 @@ spec = do
                 <> DocExamples [Example "fib 10" ["55"]]
 
         -- parse error
-        parseParas "foobar\n>>> fib 10\n55" `shouldBe` Nothing
+      it "parses bird-tracks inside of paragraphs as plain strings" $ do
+        "foobar\n>>> fib 10\n55" `shouldParseTo` DocParagraph "foobar\n>>> fib 10\n55\n"
 
       it "parses a prompt with no example results" $ do
         " >>> import Data.Char\n " `shouldParseTo`
           DocExamples [ Example { exampleExpression = "import Data.Char"
-                                      , exampleResult = []
-                                      }
-                            ]
+                                , exampleResult = []
+                                }
+                      ]
 
       it "is able to parse example sections with unicode" $ do
         " >>> 灼眼\n の\n >>> シャナ\n 封絶" `shouldParseTo`
           DocExamples [ Example { exampleExpression = "灼眼"
-                                      , exampleResult = ["の"]
-                                      }
-                            , Example { exampleExpression = "シャナ"
-                                      , exampleResult = ["封絶"]
-                                      }
-                            ]
+                                , exampleResult = ["の"]
+                                }
+                      , Example { exampleExpression = "シャナ"
+                                , exampleResult = ["封絶"]
+                                }
+                      ]
+      it "preserves whitespace before the prompt with consecutive paragraphs" $ do
+        " Examples:\n\n >>> fib 5\n 5\n >>> fib 10\n 55\n\n >>> fib 10\n 55"
+        `shouldParseTo`
+        DocParagraph "Examples:\n"
+          <> DocExamples [ Example { exampleExpression = "fib 5"
+                                   , exampleResult = ["5"]}
+                         , Example {exampleExpression = "fib 10"
+                                   , exampleResult = ["55"]}]
+          <> DocExamples [ Example { exampleExpression = "fib 10"
+                                   , exampleResult = ["55"]}]
+
+      it "can parse consecutive prompts with results" $ do
+        " >>> fib 5\n 5\n >>> fib 10\n 55" `shouldParseTo`
+          DocExamples [ Example { exampleExpression = "fib 5"
+                                , exampleResult = ["5"] }
+                      , Example { exampleExpression = "fib 10"
+                                , exampleResult = ["55"] }]
 
-      it ("parses a result line that only "
-          ++ "contains <BLANKLINE> as an empty line") $ do
+      it "can parse results if they don't have the same whitespace prefix" $ do
+        " >>> hey\n5\n 5\n  5" `shouldParseTo`
+          DocExamples [ Example { exampleExpression = "hey"
+                                , exampleResult = ["5", "5", " 5"] }]
+
+
+      it "parses a <BLANKLINE> result as an empty result" $ do
         ">>> putFooBar\nfoo\n<BLANKLINE>\nbar" `shouldParseTo`
           DocExamples [Example "putFooBar" ["foo","","bar"]]
 
@@ -235,11 +437,18 @@ spec = do
         "foobar\n\n> some code" `shouldParseTo`
           DocParagraph "foobar\n" <> DocCodeBlock " some code\n"
 
-        -- parse error
-        parseParas "foobar\n> some code" `shouldBe` Nothing
+      it "parses birdtracks inside of paragraphs as plain strings" $ do
+        "foobar\n> some code" `shouldParseTo` DocParagraph "foobar\n> some code\n"
+
+      it "long birdtrack block without spaces in front" $ do
+        "beginning\n\n> foo\n> bar\n> baz" `shouldParseTo`
+          DocParagraph "beginning\n"
+          <> DocCodeBlock " foo\n bar\n baz\n"
 
-      it "consecutive birdtracks " $ do
-        ">test3\n>test4\n\n" `shouldParseTo` DocCodeBlock "test3\ntest4\n"
+      it "single DocCodeBlock even if there's space before birdtracks" $ do
+        "beginning\n\n > foo\n > bar\n > baz" `shouldParseTo`
+          DocParagraph "beginning\n"
+          <> DocCodeBlock " foo\n bar\n baz\n"
 
       it "consecutive birdtracks with spaces " $ do
         " > foo\n \n > bar\n \n" `shouldParseTo`
@@ -247,72 +456,39 @@ spec = do
 
       it "code block + birdtracks" $ do
         "@\ntest1\ntest2\n@\n\n>test3\n>test4\n\n" `shouldParseTo`
-          DocCodeBlock "\ntest1\ntest2\n" <> DocCodeBlock "test3\ntest4\n"
+          DocCodeBlock "\ntest1\ntest2\n"
+          <> DocCodeBlock "test3\ntest4\n"
 
-      it "birdtracks + code block" $ do
-        ">test3\n>test4\n\n@\ntest1\ntest2\n@\n\n" `shouldParseTo`
-          DocCodeBlock "test3\ntest4\n" <> DocCodeBlock "\ntest1\ntest2\n"
+      it "requires the code block to be closed" $ do
+        "@hello" `shouldParseTo` DocParagraph "@hello\n"
 
+      it "preserves the first trailing whitespace after the opening @ in a code block" $ do
+        "@\ntest1\ntest2\n@" `shouldParseTo` DocCodeBlock "\ntest1\ntest2\n"
 
+        "@   \ntest1\ntest2\n@" `shouldParseTo` DocCodeBlock "   \ntest1\ntest2\n"
 
-      it "can parse consecutive prompts with results" $ do
-        " >>> fib 5\n 5\n >>> fib 10\n 55" `shouldParseTo`
-          DocExamples [ Example { exampleExpression = "fib 5"
-                                      , exampleResult = ["5"]
-                                      }
-                            , Example { exampleExpression = "fib 10"
-                                      , exampleResult = ["55"]
-                                      }
-                            ]
+      it "markup in a @ code block" $ do
+        "@hello <world> \"Foo.Bar\" <<how is>> it /going/?@" `shouldParseTo`
+          DocCodeBlock
+          ("hello " <>
+           (DocHyperlink (Hyperlink {hyperlinkUrl = "world", hyperlinkLabel = Nothing}))
+           <> " "
+           <> DocModule "Foo.Bar"
+           <> " "
+           <> (DocPic (Picture {pictureUri = "how", pictureTitle = Just "is"}))
+           <> " it " <> (DocEmphasis "going")
+           <> "?")
 
-    context "when parsing a URL" $ do
-      it "parses a URL" $ do
-        "<http://example.com/>" `shouldParseTo`
-          (DocParagraph $ hyperlink "http://example.com/" Nothing <> "\n")
+      it "should allow us to escape the @ in a paragraph level @ code block" $ do
+        "@hello \\@ world@" `shouldParseTo` DocCodeBlock "hello @ world"
 
-      it "accepts an optional label" $ do
-        "<http://example.com/ some link>" `shouldParseTo`
-          (DocParagraph $ hyperlink "http://example.com/" "some link" <> "\n")
-
-      it "consecutive URL and URL + label" $ do
-        (" \nA plain URL: <http://example.com/>\n\n A URL with a "
-                    ++ "label: <http://example.com/ some link>") `shouldParseTo`
-          DocParagraph (
-            "A plain URL: " <>
-              DocHyperlink (Hyperlink "http://example.com/" Nothing) <> "\n"
-          ) <>
-          DocParagraph (
-            "A URL with a label: " <>
-              DocHyperlink (Hyperlink "http://example.com/" "some link") <> "\n"
-          )
-
-      it "finishes URL parsing as soon as it sees >, even if it's escaped" $ do
-        "<http://examp\\>le.com" `shouldParseTo`
-          DocParagraph (
-            DocHyperlink (Hyperlink "http://examp\\" Nothing) <> "le.com\n"
-          )
+      it "should swallow up trailing spaces in code blocks" $ do
+        "@ foo @" `shouldParseTo` DocCodeBlock " foo"
 
-        "<http://exa\\>mp\\>le.com>" `shouldParseTo`
-          DocParagraph (
-            DocHyperlink (Hyperlink "http://exa\\" Nothing) <> "mp>le.com>\n"
-          )
-
-        -- Likewise in label
-        "<http://example.com f\\>oo>" `shouldParseTo`
-          DocParagraph (
-            DocHyperlink (Hyperlink "http://example.com" "f\\") <> "oo>\n"
-          )
-
-      it "parses inline URLs" $ do
-        (" Not yet working, see <http://trac.haskell.org"
-                    ++ "/haddock/ticket/223>\n , isEmptyChan") `shouldParseTo`
-          DocParagraph
-                ("Not yet working, see "
-                 <> ((DocHyperlink
-                      (Hyperlink { hyperlinkUrl = "http://trac.haskell.org"
-                                                  ++ "/haddock/ticket/223"
-                                 , hyperlinkLabel = Nothing
-                                 })) <> "\n , isEmptyChan\n"))
+      it "birdtracks + code block" $ do
+        ">test3\n>test4\n\n@\ntest1\ntest2\n@\n\n" `shouldParseTo`
+          DocCodeBlock "test3\ntest4\n"
+          <> DocCodeBlock "\ntest1\ntest2\n"
 
     context "when parsing properties" $ do
       it "can parse a single property" $ do
@@ -347,28 +523,28 @@ spec = do
     context "when parsing pictures" $ do
       it "parses a simple picture" $ do
         "<<baz>>" `shouldParseTo`
-          DocParagraph ((DocPic "baz") <> "\n")
+          DocParagraph (pic "baz" Nothing <> "\n")
 
-      it "parses a picture with spaces" $ do
+      it "parses a picture with a title" $ do
         "<<b a z>>" `shouldParseTo`
-          DocParagraph ((DocPic "b a z") <> "\n")
+          DocParagraph (pic "b" (Just "a z") <> "\n")
 
       it "parses a picture with unicode" $ do
         "<<灼眼のシャナ>>" `shouldParseTo`
-          DocParagraph ((DocPic "灼眼のシャナ") <> "\n")
+          DocParagraph ((pic "灼眼のシャナ" Nothing) <> "\n")
 
       it "doesn't allow for escaping of the closing tags" $ do -- bug?
         "<<ba\\>>z>>" `shouldParseTo`
-          (DocParagraph $ DocPic "ba\\" <> "z>>\n")
+          (DocParagraph $ pic "ba\\" Nothing <> "z>>\n")
 
     context "when parsing anchors" $ do
       it "should parse a single word anchor" $ do
         "#foo#" `shouldParseTo`
-          DocParagraph ((DocAName "foo") <> "\n")
+          DocParagraph (DocAName "foo" <> "\n")
 
       it "should parse a multi word anchor" $ do
         "#foo bar#" `shouldParseTo`
-          DocParagraph ((DocAName "foo bar") <> "\n")
+          DocParagraph (DocAName "foo bar" <> "\n")
 
       it "should parse a unicode anchor" $ do
         "#灼眼のシャナ#" `shouldParseTo`
@@ -387,17 +563,18 @@ spec = do
       it "[@q/uu/x@] h\\ney" $ do
         "[@q/uu/x@] h\ney" `shouldParseTo`
           DocDefList
-                [(DocMonospaced
-                  ((DocString "q")
-                   <> ((DocEmphasis (DocString "uu"))
-                       <> "x")), " h\ney\n")]
-
-      it "[qu\\nx] hey" $ do
-        parseParas "[qu\nx] hey" `shouldBe` Nothing
+                [(DocMonospaced ("q" <> DocEmphasis "uu" <> "x"), " h\ney\n")]
 
       it "/qu\\nux/" $ do
         "/qu\nux/" `shouldParseTo` DocParagraph "/qu\nux/\n"
 
+      -- regression test
+      it "requires markup to be fully closed, even if nested" $ do
+        "@hel/lo" `shouldParseTo` DocParagraph "@hel/lo\n"
+
+      it "will be total even if only the first delimiter is present" $ do
+        "/" `shouldParseTo` DocParagraph "/\n"
+
     context "when parsing strings with apostrophes" $ do
       it "parses a word with an one of the delimiters in it as DocString" $ do
         "don't" `shouldParseTo` DocParagraph "don't\n"
@@ -409,6 +586,39 @@ spec = do
         " don't use apostrophe's in the wrong place's" `shouldParseTo`
           DocParagraph "don't use apostrophe's in the wrong place's\n"
 
+    context "when parsing strings contaning numeric character references" $ do
+      it "will implicitly convert digits to characters" $ do
+        "&#65;&#65;&#65;&#65;" `shouldParseTo` DocParagraph "AAAA\n"
+
+        "&#28796;&#30524;&#12398;&#12471;&#12515;&#12490;" `shouldParseTo`
+          DocParagraph "灼眼のシャナ\n"
+
+      it "will implicitly convert hex encoded characters" $ do
+        "&#x65;&#x65;&#x65;&#x65;" `shouldParseTo` DocParagraph "eeee\n"
+
+    context "when parsing module names" $ do
+      it "can accept a simple module name" $ do
+        "\"Hello\"" `shouldParseTo` DocParagraph (DocModule "Hello" <> "\n")
+
+      it "can accept a module name with dots" $ do
+        "\"Hello.World\"" `shouldParseTo` DocParagraph (DocModule "Hello.World" <> "\n")
+
+      it "can accept a module name with unicode" $ do
+        "\"Hello.Worldλ\"" `shouldParseTo` DocParagraph ((DocModule "Hello.Worldλ") <> "\n")
+
+      it "parses a module name with a trailing dot as regular quoted string" $ do
+        "\"Hello.\"" `shouldParseTo` DocParagraph "\"Hello.\"\n"
+
+      it "parses a module name with a space as regular quoted string" $ do
+        "\"Hello World\"" `shouldParseTo` DocParagraph "\"Hello World\"\n"
+
+      it "parses a module name with invalid characters as regular quoted string" $ do
+        "\"Hello&[{}(=*)+]!\"" `shouldParseTo` DocParagraph "\"Hello&[{}(=*)+]!\"\n"
+
+
   where
     hyperlink :: String -> Maybe String -> Doc RdrName
     hyperlink url = DocHyperlink . Hyperlink url
+
+    pic :: String -> Maybe String -> Doc RdrName
+    pic uri = DocPic . Picture uri
diff --git a/test/Haddock/Utf8Spec.hs b/test/Haddock/Utf8Spec.hs
new file mode 100644
index 00000000..a352bf61
--- /dev/null
+++ b/test/Haddock/Utf8Spec.hs
@@ -0,0 +1,15 @@
+module Haddock.Utf8Spec (main, spec) where
+
+import           Test.Hspec
+import           Test.QuickCheck
+
+import           Haddock.Utf8
+
+main :: IO ()
+main = hspec spec
+
+spec :: Spec
+spec = do
+  describe "decodeUtf8" $ do
+    it "is inverse to encodeUtf8" $ do
+      property $ \xs -> (decodeUtf8 . encodeUtf8) xs `shouldBe` xs