From 1b26460fb3b5df5215cc1e6715661cbc7c950085 Mon Sep 17 00:00:00 2001
From: Alec Theriault <alec.theriault@gmail.com>
Date: Thu, 31 Jan 2019 01:37:25 -0800
Subject: Use `.hie` files for the Hyperlinker backend (#977)

# Summary

This is a large architectural change to the Hyperlinker.

  * extract link (and now also type) information from `.hie` instead
    of doing ad-hoc SYB traversals of the `RenamedSource`. Also
    adds a superb type-on-hover feature (#715).

 * re-engineer the lexer to avoid needless string conversions. By going
    directly through GHC's `P` monad and taking bytestring slices, we
    avoid a ton of allocation and have better handling of position
    pragmas and CPP.

In terms of performance, the Haddock side of things has gotten _much_
more efficient. Unfortunately, much of this is cancelled out by the
increased GHC workload for generating `.hie` files. For the full set of
boot libs (including `ghc`-the-library)

  * the sum of total time went down by 9-10% overall
  * the sum of total allocations went down by 6-7%

# Motivation

Haddock is moving towards working entirely over `.hi` and `.hie` files.
This change means we no longer need the `RenamedSource` from
`TypecheckedModule` (something which is _not_ in `.hi` files).

# Details

Along the way a bunch of things were fixed:

 * Cross package (and other) links are now more reliable (#496)
 * The lexer tries to recover from errors on every line (instead of at CPP
    boundaries)
 * `LINE`/`COLUMN` pragmas are taken into account
 * filter out zero length tokens before rendering
 * avoid recomputing the `ModuleName`-based `SrcMap`
 * remove the last use of `Documentation.Haddock.Utf8` (see  #998)
 * restructure temporary folder logic for `.hi`/`.hie` model
---
 haddock-api/haddock-api.cabal                      |   4 +-
 haddock-api/resources/html/solarized.css           |  42 +++
 haddock-api/src/Haddock.hs                         |  52 ++-
 haddock-api/src/Haddock/Backends/Hyperlinker.hs    |  55 ++-
 .../src/Haddock/Backends/Hyperlinker/Ast.hs        | 237 -------------
 .../src/Haddock/Backends/Hyperlinker/Parser.hs     | 386 ++++++++++-----------
 .../src/Haddock/Backends/Hyperlinker/Renderer.hs   | 276 +++++++++++----
 .../src/Haddock/Backends/Hyperlinker/Types.hs      |  36 +-
 .../src/Haddock/Backends/Hyperlinker/Utils.hs      |  98 +++++-
 haddock-api/src/Haddock/GhcUtils.hs                | 134 ++++++-
 haddock-api/src/Haddock/Interface.hs               |  55 +--
 haddock-api/src/Haddock/Interface/Create.hs        |  39 +--
 haddock-api/src/Haddock/Types.hs                   |  14 +-
 haddock-api/src/Haddock/Utils.hs                   |   8 +-
 14 files changed, 771 insertions(+), 665 deletions(-)
 delete mode 100644 haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs

(limited to 'haddock-api')

diff --git a/haddock-api/haddock-api.cabal b/haddock-api/haddock-api.cabal
index 2a94c5f5..a4dea01f 100644
--- a/haddock-api/haddock-api.cabal
+++ b/haddock-api/haddock-api.cabal
@@ -59,6 +59,7 @@ library
                , directory
                , filepath
                , ghc-boot
+               , ghc-boot-th
                , transformers
 
   hs-source-dirs: src
@@ -97,7 +98,6 @@ library
     Haddock.Backends.HaddockDB
     Haddock.Backends.Hoogle
     Haddock.Backends.Hyperlinker
-    Haddock.Backends.Hyperlinker.Ast
     Haddock.Backends.Hyperlinker.Parser
     Haddock.Backends.Hyperlinker.Renderer
     Haddock.Backends.Hyperlinker.Types
@@ -130,7 +130,6 @@ test-suite spec
     Haddock
     Haddock.Backends.Hoogle
     Haddock.Backends.Hyperlinker
-    Haddock.Backends.Hyperlinker.Ast
     Haddock.Backends.Hyperlinker.Renderer
     Haddock.Backends.Hyperlinker.Utils
     Haddock.Backends.LaTeX
@@ -187,6 +186,7 @@ test-suite spec
                , directory
                , filepath
                , ghc-boot
+               , ghc-boot-th
                , transformers
 
   build-tool-depends:
diff --git a/haddock-api/resources/html/solarized.css b/haddock-api/resources/html/solarized.css
index e83dc5ec..0146eedd 100644
--- a/haddock-api/resources/html/solarized.css
+++ b/haddock-api/resources/html/solarized.css
@@ -53,3 +53,45 @@ a:link, a:visited {
 a:hover, a.hover-highlight {
 	background-color: #eee8d5;
 }
+
+span.annot{
+    position:relative;
+    color:#000;
+    text-decoration:none
+  }
+
+span.annot:hover{z-index:25; background-color:#ff0}
+
+span.annot span.annottext{
+  display: none;
+  border-radius: 5px 5px;
+
+  -moz-border-radius: 5px;
+  -webkit-border-radius: 5px;
+
+  box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.1);
+  -webkit-box-shadow: 5px 5px rgba(0, 0, 0, 0.1);
+  -moz-box-shadow: 5px 5px rgba(0, 0, 0, 0.1);
+
+  position: absolute;
+  left: 1em; top: 2em;
+  z-index: 99;
+  margin-left: 5;
+  background: #FFFFAA;
+  border: 2px solid #FFAD33;
+  padding: 0.8em 1em;
+}
+
+span.annot:hover span.annottext{
+  display:block;
+}
+
+/* This bridges the gap so you can mouse into the tooltip without it disappearing */
+span.annot span.annottext:before{
+  content: "";
+  position: absolute;
+  left: -1em; top: -1em;
+  background: #FFFFFF00;
+  z-index:-1;
+  padding: 2em 2em;
+}
diff --git a/haddock-api/src/Haddock.hs b/haddock-api/src/Haddock.hs
index 2bae60e7..358e5c3a 100644
--- a/haddock-api/src/Haddock.hs
+++ b/haddock-api/src/Haddock.hs
@@ -39,6 +39,7 @@ import Haddock.Version
 import Haddock.InterfaceFile
 import Haddock.Options
 import Haddock.Utils
+import Haddock.GhcUtils (modifySessionDynFlags, setOutputDir)
 
 import Control.Monad hiding (forM_)
 import Data.Foldable (forM_, foldl')
@@ -66,6 +67,8 @@ import qualified GHC.Paths as GhcPaths
 import Paths_haddock_api (getDataDir)
 import System.Directory (doesDirectoryExist)
 #endif
+import System.Directory (getTemporaryDirectory)
+import System.FilePath ((</>))
 
 import Text.ParserCombinators.ReadP (readP_to_S)
 import GHC hiding (verbosity)
@@ -164,6 +167,15 @@ haddockWithGhc ghc args = handleTopExceptions $ do
   -- bypass the interface version check
   let noChecks = Flag_BypassInterfaceVersonCheck `elem` flags
 
+  -- Create a temporary directory and redirect GHC output there (unless user
+  -- requested otherwise).
+  --
+  -- Output dir needs to be set before calling 'depanal' since 'depanal' uses it
+  -- to compute output file names that are stored in the 'DynFlags' of the
+  -- resulting 'ModSummary's.
+  let withDir | Flag_NoTmpCompDir `elem` flags = id
+              | otherwise = withTempOutputDir
+
   unless (Flag_NoWarnings `elem` flags) $ do
     hypSrcWarnings flags
     forM_ (warnings args) $ \warning -> do
@@ -171,7 +183,7 @@ haddockWithGhc ghc args = handleTopExceptions $ do
     when noChecks $
       hPutStrLn stderr noCheckWarning
 
-  ghc flags' $ do
+  ghc flags' $ withDir $ do
     dflags <- getDynFlags
 
     forM_ (optShowInterfaceFile flags) $ \path -> liftIO $ do
@@ -202,6 +214,15 @@ haddockWithGhc ghc args = handleTopExceptions $ do
       -- Render even though there are no input files (usually contents/index).
       liftIO $ renderStep dflags flags sinceQual qual packages []
 
+-- | Run the GHC action using a temporary output directory
+withTempOutputDir :: Ghc a -> Ghc a
+withTempOutputDir action = do
+  tmp <- liftIO getTemporaryDirectory
+  x   <- liftIO getProcessID
+  let dir = tmp </> ".haddock-" ++ show x
+  modifySessionDynFlags (setOutputDir dir)
+  withTempDir dir action
+
 -- | Create warnings about potential misuse of -optghc
 warnings :: [String] -> [String]
 warnings = map format . filter (isPrefixOf "-optghc")
@@ -221,8 +242,9 @@ withGhc flags action = do
   let handleSrcErrors action' = flip handleSourceError action' $ \err -> do
         printException err
         liftIO exitFailure
+      needHieFiles = Flag_HyperlinkedSource `elem` flags
 
-  withGhc' libDir (ghcFlags flags) (\_ -> handleSrcErrors action)
+  withGhc' libDir needHieFiles (ghcFlags flags) (\_ -> handleSrcErrors action)
 
 
 readPackagesAndProcessModules :: [Flag] -> [String]
@@ -444,14 +466,10 @@ readInterfaceFiles name_cache_accessor pairs bypass_version_check = do
 
 -- | Start a GHC session with the -haddock flag set. Also turn off
 -- compilation and linking. Then run the given 'Ghc' action.
-withGhc' :: String -> [String] -> (DynFlags -> Ghc a) -> IO a
-withGhc' libDir flags ghcActs = runGhc (Just libDir) $ do
-  dynflags  <- getSessionDynFlags
-  dynflags' <- parseGhcFlags (gopt_set dynflags Opt_Haddock) {
-    hscTarget = HscNothing,
-    ghcMode   = CompManager,
-    ghcLink   = NoLink
-    }
+withGhc' :: String -> Bool -> [String] -> (DynFlags -> Ghc a) -> IO a
+withGhc' libDir needHieFiles flags ghcActs = runGhc (Just libDir) $ do
+  dynflags' <- parseGhcFlags =<< getSessionDynFlags
+
   -- We disable pattern match warnings because than can be very
   -- expensive to check
   let dynflags'' = unsetPatternMatchWarnings $
@@ -482,11 +500,19 @@ withGhc' libDir flags ghcActs = runGhc (Just libDir) $ do
     parseGhcFlags dynflags = do
       -- TODO: handle warnings?
 
-      let flags' = filterRtsFlags flags
-      (dynflags', rest, _) <- parseDynamicFlags dynflags (map noLoc flags')
+      let extra_opts | needHieFiles = [Opt_WriteHie, Opt_Haddock]
+                     | otherwise = [Opt_Haddock]
+          dynflags' = (foldl' gopt_set dynflags extra_opts)
+                        { hscTarget = HscNothing
+                        , ghcMode   = CompManager
+                        , ghcLink   = NoLink
+                        }
+          flags' = filterRtsFlags flags
+
+      (dynflags'', rest, _) <- parseDynamicFlags dynflags' (map noLoc flags')
       if not (null rest)
         then throwE ("Couldn't parse GHC options: " ++ unwords flags')
-        else return dynflags'
+        else return dynflags''
 
 unsetPatternMatchWarnings :: DynFlags -> DynFlags
 unsetPatternMatchWarnings dflags =
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker.hs b/haddock-api/src/Haddock/Backends/Hyperlinker.hs
index 8f0c4b67..5ef7d9bb 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 module Haddock.Backends.Hyperlinker
     ( ppHyperlinkedSource
     , module Haddock.Backends.Hyperlinker.Types
@@ -8,15 +9,24 @@ module Haddock.Backends.Hyperlinker
 import Haddock.Types
 import Haddock.Utils (writeUtf8File)
 import Haddock.Backends.Hyperlinker.Renderer
+import Haddock.Backends.Hyperlinker.Parser
 import Haddock.Backends.Hyperlinker.Types
 import Haddock.Backends.Hyperlinker.Utils
-
-import Text.XHtml hiding ((</>))
+import Haddock.Backends.Xhtml.Utils ( renderToString )
 
 import Data.Maybe
 import System.Directory
 import System.FilePath
 
+import HieTypes       ( HieFile(..), HieASTs(..) )
+import HieBin         ( readHieFile )
+import Data.Map as M
+import FastString     ( mkFastString )
+import Module         ( Module, moduleName )
+import NameCache      ( initNameCache )
+import UniqSupply     ( mkSplitUniqSupply )
+import SysTools.Info  ( getCompilerInfo' )
+
 
 -- | Generate hyperlinked source for given interfaces.
 --
@@ -27,10 +37,10 @@ ppHyperlinkedSource :: FilePath -- ^ Output directory
                     -> FilePath -- ^ Resource directory
                     -> Maybe FilePath -- ^ Custom CSS file path
                     -> Bool -- ^ Flag indicating whether to pretty-print HTML
-                    -> SrcMap -- ^ Paths to sources
+                    -> M.Map Module SrcPath -- ^ Paths to sources
                     -> [Interface] -- ^ Interfaces for which we create source
                     -> IO ()
-ppHyperlinkedSource outdir libdir mstyle pretty srcs ifaces = do
+ppHyperlinkedSource outdir libdir mstyle pretty srcs' ifaces = do
     createDirectoryIfMissing True srcdir
     let cssFile = fromMaybe (defaultCssFile libdir) mstyle
     copyFile cssFile $ srcdir </> srcCssFile
@@ -39,17 +49,39 @@ ppHyperlinkedSource outdir libdir mstyle pretty srcs ifaces = do
     mapM_ (ppHyperlinkedModuleSource srcdir pretty srcs) ifaces
   where
     srcdir = outdir </> hypSrcDir
+    srcs = (srcs', M.mapKeys moduleName srcs')
 
 -- | Generate hyperlinked source for particular interface.
-ppHyperlinkedModuleSource :: FilePath -> Bool -> SrcMap -> Interface
-                          -> IO ()
-ppHyperlinkedModuleSource srcdir pretty srcs iface =
-    case ifaceTokenizedSrc iface of
-        Just tokens -> writeUtf8File path . html . render' $ tokens
-        Nothing -> return ()
+ppHyperlinkedModuleSource :: FilePath -> Bool -> SrcMaps -> Interface -> IO ()
+ppHyperlinkedModuleSource srcdir pretty srcs iface = case ifaceHieFile iface of
+    Just hfp -> do
+        -- Parse the GHC-produced HIE file
+        u <- mkSplitUniqSupply 'a'
+        HieFile { hie_hs_file = file
+                , hie_asts = HieASTs asts
+                , hie_types = types
+                , hie_hs_src = rawSrc
+                } <- fmap fst (readHieFile (initNameCache u []) hfp)
+        comp <- getCompilerInfo' df
+
+        -- Get the AST and tokens corresponding to the source file we want
+        let mast | M.size asts == 1 = snd <$> M.lookupMin asts
+                 | otherwise        = M.lookup (mkFastString file) asts
+            tokens = parse comp df file rawSrc
+
+        -- Produce and write out the hyperlinked sources
+        case mast of
+          Just ast ->
+              let fullAst = recoverFullIfaceTypes df types ast
+              in writeUtf8File path . renderToString pretty . render' fullAst $ tokens
+          Nothing
+            | M.size asts == 0 -> return ()
+            | otherwise -> error $ unwords [ "couldn't find ast for"
+                                           , file, show (M.keys asts) ]
+    Nothing -> return ()
   where
+    df = ifaceDynFlags iface
     render' = render (Just srcCssFile) (Just highlightScript) srcs
-    html = if pretty then renderHtml else showHtml
     path = srcdir </> hypSrcModuleFile (ifaceMod iface)
 
 -- | Name of CSS file in output directory.
@@ -63,3 +95,4 @@ highlightScript = "highlight.js"
 -- | Path to default CSS file.
 defaultCssFile :: FilePath -> FilePath
 defaultCssFile libdir = libdir </> "html" </> "solarized.css"
+
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
deleted file mode 100644
index a9ffc36e..00000000
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
+++ /dev/null
@@ -1,237 +0,0 @@
-{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE RankNTypes #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE TypeFamilies #-}
-{-# LANGUAGE TypeApplications #-}
-{-# LANGUAGE ViewPatterns #-}
-
-module Haddock.Backends.Hyperlinker.Ast (enrich) where
-
-
-import qualified Haddock.Syb as Syb
-import Haddock.Backends.Hyperlinker.Types
-
-import qualified GHC
-import qualified SrcLoc
-import qualified Outputable as GHC
-
-import Control.Applicative
-import Control.Monad (guard)
-import Data.Data
-import qualified Data.Map.Strict as Map
-import Data.Maybe
-
-import Prelude hiding (span)
-
-everythingInRenamedSource :: (Alternative f, Data x)
-  => (forall a. Data a => a -> f r) -> x -> f r
-everythingInRenamedSource f = Syb.everythingButType @GHC.Name (<|>) f
-
--- | Add more detailed information to token stream using GHC API.
-enrich :: GHC.RenamedSource -> [Token] -> [RichToken]
-enrich src =
-    map $ \token -> RichToken
-        { rtkToken = token
-        , rtkDetails = enrichToken token detailsMap
-        }
-  where
-    detailsMap =
-      mkDetailsMap (concatMap ($ src)
-                     [ variables
-                     , types
-                     , decls
-                     , binds
-                     , imports
-                     ])
-
-type LTokenDetails = [(GHC.SrcSpan, TokenDetails)]
-
--- | A map containing association between source locations and "details" of
--- this location.
---
-type DetailsMap = Map.Map Position (Span, TokenDetails)
-
-mkDetailsMap :: [(GHC.SrcSpan, TokenDetails)] -> DetailsMap
-mkDetailsMap xs =
-  Map.fromListWith select_details [ (start, (span, token_details))
-                                  | (ghc_span, token_details) <- xs
-                                  , GHC.RealSrcSpan span <- [ghc_span]
-                                  , let start = SrcLoc.realSrcSpanStart span
-                                  ]
-  where
-    -- favour token details which appear earlier in the list
-    select_details _new old = old
-
-lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails
-lookupBySpan span details = do
-  let pos = SrcLoc.realSrcSpanStart span
-  (_, (tok_span, tok_details)) <- Map.lookupLE pos details
-  guard (tok_span `SrcLoc.containsSpan` span)
-  return tok_details
-
-enrichToken :: Token -> DetailsMap -> Maybe TokenDetails
-enrichToken (Token typ _ spn) dm
-    | typ `elem` [TkIdentifier, TkOperator] = lookupBySpan spn dm
-enrichToken _ _ = Nothing
-
--- | Obtain details map for variables ("normally" used identifiers).
-variables :: GHC.RenamedSource -> LTokenDetails
-variables =
-    everythingInRenamedSource (var `Syb.combine` rec)
-  where
-    var term = case cast term of
-        (Just ((GHC.dL->GHC.L sspan (GHC.HsVar _ name))
-               :: GHC.LHsExpr GHC.GhcRn)) ->
-            pure (sspan, RtkVar (GHC.unLoc name))
-        (Just (GHC.dL->GHC.L _ (GHC.RecordCon _
-                                (GHC.dL->GHC.L sspan name) _))) ->
-            pure (sspan, RtkVar name)
-        _ -> empty
-    rec term = case cast term of
-        Just (GHC.HsRecField (GHC.dL->GHC.L sspan name)
-              (_ :: GHC.LHsExpr GHC.GhcRn) _) ->
-            pure (sspan, RtkVar name)
-        _ -> empty
-
--- | Obtain details map for types.
-types :: GHC.RenamedSource -> LTokenDetails
-types = everythingInRenamedSource ty
-  where
-    ty :: forall a. Data a => a -> [(GHC.SrcSpan, TokenDetails)]
-    ty term = case cast term of
-        (Just ((GHC.dL->GHC.L sspan (GHC.HsTyVar _ _ name))
-               :: GHC.LHsType GHC.GhcRn)) ->
-            pure (sspan, RtkType (GHC.unLoc name))
-        (Just ((GHC.dL->GHC.L sspan (GHC.HsOpTy _ l name r))
-               :: GHC.LHsType GHC.GhcRn)) ->
-            (sspan, RtkType (GHC.unLoc name)):(ty l ++ ty r)
-        _ -> empty
-
--- | Obtain details map for identifier bindings.
---
--- That includes both identifiers bound by pattern matching or declared using
--- ordinary assignment (in top-level declarations, let-expressions and where
--- clauses).
-
-binds :: GHC.RenamedSource -> LTokenDetails
-binds = everythingInRenamedSource
-      (fun `Syb.combine` pat `Syb.combine` tvar)
-  where
-    fun term = case cast term of
-        (Just (GHC.FunBind _ (GHC.dL->GHC.L sspan name) _ _ _
-               :: GHC.HsBind GHC.GhcRn)) ->
-            pure (sspan, RtkBind name)
-        (Just (GHC.PatSynBind _
-               (GHC.PSB _ (GHC.dL->GHC.L sspan name) args _ _))) ->
-            pure (sspan, RtkBind name)
-            ++ everythingInRenamedSource patsyn_binds args
-        _ -> empty
-    patsyn_binds term = case cast term of
-        (Just (GHC.L sspan (name :: GHC.Name))) ->
-          pure (sspan, RtkVar name)
-        _ -> empty
-    pat term = case cast term of
-        (Just ((GHC.dL->GHC.L sspan (GHC.VarPat _ name))
-               :: GHC.LPat GHC.GhcRn)) ->
-            pure (sspan, RtkBind (GHC.unLoc name))
-        (Just (GHC.dL->GHC.L _
-                (GHC.ConPatIn (GHC.dL->GHC.L sspan name) recs))) ->
-            [(sspan, RtkVar name)] ++ everythingInRenamedSource rec recs
-        (Just (GHC.dL->GHC.L _ (GHC.AsPat _ (GHC.dL->GHC.L sspan name) _))) ->
-            pure (sspan, RtkBind name)
-        _ -> empty
-    rec term = case cast term of
-        (Just (GHC.HsRecField (GHC.dL->GHC.L sspan name)
-               (_ :: GHC.LPat GHC.GhcRn) _)) ->
-            pure (sspan, RtkVar name)
-        _ -> empty
-    tvar term = case cast term of
-        (Just ((GHC.dL->GHC.L sspan (GHC.UserTyVar _ name))
-               :: GHC.LHsTyVarBndr GHC.GhcRn)) ->
-            pure (sspan, RtkBind (GHC.unLoc name))
-        (Just (GHC.dL->GHC.L _ (GHC.KindedTyVar _ (GHC.dL->GHC.L sspan name) _))) ->
-            pure (sspan, RtkBind name)
-        _ -> empty
-
--- | Obtain details map for top-level declarations.
-decls :: GHC.RenamedSource -> LTokenDetails
-decls (group, _, _, _) = concatMap ($ group)
-    [ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds
-    , everythingInRenamedSource fun . GHC.hs_valds
-    , everythingInRenamedSource fix . GHC.hs_fixds
-    , everythingInRenamedSource (con `Syb.combine` ins)
-    ]
-  where
-    typ (GHC.dL->GHC.L _ t) = case t of
-        GHC.DataDecl { tcdLName = name } -> pure . decl $ name
-        GHC.SynDecl _ name _ _ _ -> pure . decl $ name
-        GHC.FamDecl _ fam -> pure . decl $ GHC.fdLName fam
-        GHC.ClassDecl{..} ->
-          [decl tcdLName]
-            ++ concatMap sig tcdSigs
-            ++ concatMap tyfam tcdATs
-        GHC.XTyClDecl {} -> GHC.panic "haddock:decls"
-    fun term = case cast term of
-        (Just (GHC.FunBind _ (GHC.dL->GHC.L sspan name) _ _ _
-               :: GHC.HsBind GHC.GhcRn))
-            | GHC.isExternalName name -> pure (sspan, RtkDecl name)
-        (Just (GHC.PatSynBind _ (GHC.PSB _ (GHC.dL->GHC.L sspan name) _ _ _)))
-            | GHC.isExternalName name -> pure (sspan, RtkDecl name)
-        _ -> empty
-    con term = case cast term of
-        (Just (cdcl :: GHC.ConDecl GHC.GhcRn)) ->
-            map decl (GHC.getConNames cdcl)
-              ++ everythingInRenamedSource fld cdcl
-        Nothing -> empty
-    ins term = case cast term of
-        (Just ((GHC.DataFamInstD _ (GHC.DataFamInstDecl eqn))
-                :: GHC.InstDecl GHC.GhcRn))
-          -> pure . tyref $ GHC.feqn_tycon $ GHC.hsib_body eqn
-        (Just (GHC.TyFamInstD _ (GHC.TyFamInstDecl eqn))) ->
-            pure . tyref $ GHC.feqn_tycon $ GHC.hsib_body eqn
-        _ -> empty
-    fld term = case cast term of
-        Just (field :: GHC.ConDeclField GHC.GhcRn)
-          -> map (decl . fmap GHC.extFieldOcc) $ GHC.cd_fld_names field
-        Nothing -> empty
-    fix term = case cast term of
-        Just ((GHC.FixitySig _ names _) :: GHC.FixitySig GHC.GhcRn)
-          -> map (\(GHC.dL->GHC.L sspan x) -> (sspan, RtkVar x)) names
-        Just ((GHC.XFixitySig {}) :: GHC.FixitySig GHC.GhcRn)
-          -> GHC.panic "haddock:decls"
-        Nothing -> empty
-    tyfam (GHC.dL->GHC.L _ (GHC.FamilyDecl{..})) = [decl fdLName]
-    tyfam (GHC.dL->GHC.L _ (GHC.XFamilyDecl {})) = GHC.panic "haddock:dels"
-    tyfam _ = GHC.panic "tyfam: Impossible Match"
-
-    sig (GHC.dL->GHC.L _ (GHC.TypeSig _ names _)) = map decl names
-    sig (GHC.dL->GHC.L _ (GHC.PatSynSig _ names _)) = map decl names
-    sig (GHC.dL->GHC.L _ (GHC.ClassOpSig _ _ names _)) = map decl names
-    sig _ = []
-    decl (GHC.dL->GHC.L sspan name) = (sspan, RtkDecl name)
-    tyref (GHC.dL->GHC.L sspan name) = (sspan, RtkType name)
-
--- | Obtain details map for import declarations.
---
--- This map also includes type and variable details for items in export and
--- import lists.
-imports :: GHC.RenamedSource -> LTokenDetails
-imports src@(_, imps, _, _) =
-    everythingInRenamedSource ie src ++ mapMaybe (imp . GHC.unLoc) imps
-  where
-    ie term = case cast term of
-        (Just ((GHC.IEVar _ v) :: GHC.IE GHC.GhcRn)) -> pure $ var
-          $ GHC.ieLWrappedName v
-        (Just (GHC.IEThingAbs _ t)) -> pure $ typ $ GHC.ieLWrappedName t
-        (Just (GHC.IEThingAll _ t)) -> pure $ typ $ GHC.ieLWrappedName t
-        (Just (GHC.IEThingWith _ t _ vs _fls)) ->
-          [typ $ GHC.ieLWrappedName t] ++ map (var . GHC.ieLWrappedName) vs
-        (Just (GHC.IEModuleContents _ m)) -> pure $ modu m
-        _ -> empty
-    typ (GHC.dL->GHC.L sspan name) = (sspan, RtkType name)
-    var (GHC.dL->GHC.L sspan name) = (sspan, RtkVar name)
-    modu (GHC.dL->GHC.L sspan name) = (sspan, RtkModule name)
-    imp idecl
-      | not . GHC.ideclImplicit $ idecl = Just (modu (GHC.ideclName idecl))
-      | otherwise = Nothing
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
index f8494242..1d5576cc 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
@@ -1,213 +1,212 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
 module Haddock.Backends.Hyperlinker.Parser (parse) where
 
-import Data.Either         ( isRight, isLeft )
-import Data.List           ( foldl', isPrefixOf, isSuffixOf )
-import Data.Maybe          ( maybeToList )
-import Data.Char           ( isSpace )
-import qualified Text.Read as R
+import Control.Applicative ( Alternative(..) )
+import Data.List           ( isPrefixOf, isSuffixOf )
 
-import GHC                 ( DynFlags, addSourceToTokens )
-import SrcLoc
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Char8 as BSC
+
+import GHC.LanguageExtensions.Type
+
+import BasicTypes          ( IntegralLit(..) )
+import DynFlags
+import qualified EnumSet as E
+import ErrUtils            ( emptyMessages )
 import FastString          ( mkFastString )
-import StringBuffer        ( stringToStringBuffer )
-import Lexer               ( Token(..) )
-import qualified Lexer as L
+import Lexer               ( P(..), ParseResult(..), PState(..), Token(..)
+                           , mkPStatePure, lexer, mkParserFlags' )
+import Outputable          ( showSDoc, panic )
+import SrcLoc
+import StringBuffer        ( StringBuffer, atEnd )
 
 import Haddock.Backends.Hyperlinker.Types as T
-
+import Haddock.GhcUtils
 
 -- | Turn source code string into a stream of more descriptive tokens.
 --
--- Result should retain original file layout (including comments, whitespace,
--- etc.), i.e. the following "law" should hold:
---
--- prop> concat . map tkValue . parse = id
---
--- (In reality, this only holds for input not containing '\r', '\t', '\f', '\v',
--- characters, since GHC transforms those into ' ' and '\n')
-parse :: DynFlags -> FilePath -> String -> [T.Token]
-parse dflags fp = ghcToks . processCPP dflags fp . filterCRLF
+-- Result should retain original file layout (including comments,
+-- whitespace, and CPP).
+parse
+  :: CompilerInfo  -- ^ Underlying CC compiler (whatever expanded CPP)
+  -> DynFlags      -- ^ Flags for this module
+  -> FilePath      -- ^ Path to the source of this module
+  -> BS.ByteString -- ^ Raw UTF-8 encoded source of this module
+  -> [T.Token]
+parse comp dflags fpath bs = case unP (go False []) initState of
+    POk _ toks -> reverse toks
+    PFailed _ ss errMsg -> panic $ "Hyperlinker parse error at " ++ show ss ++
+                                   ": " ++ showSDoc dflags errMsg
   where
-    -- Remove CRLFs from source
-    filterCRLF :: String -> String
-    filterCRLF ('\r':'\n':cs) = '\n' : filterCRLF cs
-    filterCRLF (c:cs) = c : filterCRLF cs
-    filterCRLF [] = []
 
--- | Parse the source into tokens using the GHC lexer.
+    initState = mkPStatePure pflags buf start
+    buf = stringBufferFromByteString bs
+    start = mkRealSrcLoc (mkFastString fpath) 1 1
+    needPragHack' = needPragHack comp dflags
+    pflags = mkParserFlags' (warningFlags dflags)
+                            (extensionFlags dflags)
+                            (thisPackage dflags)
+                            (safeImportsOn dflags)
+                            False -- lex Haddocks as comment tokens
+                            True  -- produce comment tokens
+                            False -- produce position pragmas tokens
+
+    go :: Bool        -- ^ are we currently in a pragma?
+       -> [T.Token]   -- ^ tokens accumulated so far (in reverse)
+       -> P [T.Token]
+    go inPrag toks = do
+      (b, _) <- getInput
+      if not (atEnd b)
+        then do
+          (newToks, inPrag') <- parseCppLine <|> parsePlainTok inPrag <|> unknownLine
+          go inPrag' (newToks ++ toks)
+        else
+          pure toks
+
+    -- | Like 'Lexer.lexer', but slower, with a better API, and filtering out empty tokens
+    wrappedLexer :: P (RealLocated Lexer.Token)
+    wrappedLexer = Lexer.lexer False andThen
+      where andThen (L (RealSrcSpan s) t)
+              | srcSpanStartLine s /= srcSpanEndLine s ||
+                srcSpanStartCol s /= srcSpanEndCol s
+              = pure (L s t)
+            andThen (L (RealSrcSpan s) ITeof) = pure (L s ITeof)
+            andThen _ = wrappedLexer
+
+    -- | Try to parse a CPP line (can fail)
+    parseCppLine :: P ([T.Token], Bool)
+    parseCppLine = do
+      (b, l) <- getInput
+      case tryCppLine l b of
+        Just (cppBStr, l', b')
+             -> let cppTok = T.Token { tkType = TkCpp
+                                     , tkValue = cppBStr
+                                     , tkSpan = mkRealSrcSpan l l' }
+                in setInput (b', l') *> pure ([cppTok], False)
+        _    -> empty
+
+    -- | Try to parse a regular old token (can fail)
+    parsePlainTok :: Bool -> P ([T.Token], Bool)  -- return list is only ever 0-2 elements
+    parsePlainTok inPrag = do
+      (bInit, lInit) <- getInput
+      L sp tok <- Lexer.lexer False return
+      (bEnd, _) <- getInput
+      case sp of
+        UnhelpfulSpan _ -> pure ([], False) -- pretend the token never existed
+        RealSrcSpan rsp -> do
+          let typ = if inPrag then TkPragma else classify tok
+              RealSrcLoc lStart = srcSpanStart sp -- safe since @sp@ is real
+              (spaceBStr, bStart) = spanPosition lInit lStart bInit
+              inPragDef = inPragma inPrag tok
+
+          (bEnd', inPrag') <- case tok of
+
+            -- Update internal line + file position if this is a LINE pragma
+            ITline_prag _ -> tryOrElse (bEnd, inPragDef) $ do
+              L _ (ITinteger (IL { il_value = line })) <- wrappedLexer
+              L _ (ITstring _ file)                    <- wrappedLexer
+              L spF ITclose_prag                       <- wrappedLexer
+
+              let newLoc = mkRealSrcLoc file (fromIntegral line - 1) (srcSpanEndCol spF)
+              (bEnd'', _) <- getInput
+              setInput (bEnd'', newLoc)
+
+              pure (bEnd'', False)
+
+            -- Update internal column position if this is a COLUMN pragma
+            ITcolumn_prag _ -> tryOrElse (bEnd, inPragDef) $ do
+              L _ (ITinteger (IL { il_value = col }))  <- wrappedLexer
+              L spF ITclose_prag                       <- wrappedLexer
+
+              let newLoc = mkRealSrcLoc (srcSpanFile spF) (srcSpanEndLine spF) (fromIntegral col)
+              (bEnd'', _) <- getInput
+              setInput (bEnd'', newLoc)
+
+              pure (bEnd'', False)
+
+            -- See 'needPragHack'
+            ITclose_prag{}
+              | needPragHack'
+              , '\n' `BSC.elem` spaceBStr
+              -> getInput >>= \(b,p) -> setInput (b,advanceSrcLoc p '\n') >> pure (bEnd, False)
+
+            _ -> pure (bEnd, inPragDef)
+
+          let tokBStr = splitStringBuffer bStart bEnd'
+              plainTok = T.Token { tkType = typ
+                                 , tkValue = tokBStr
+                                 , tkSpan = rsp }
+              spaceTok = T.Token { tkType = TkSpace
+                                 , tkValue = spaceBStr
+                                 , tkSpan = mkRealSrcSpan lInit lStart }
+
+          pure (plainTok : [ spaceTok | not (BS.null spaceBStr) ], inPrag')
+
+    -- | Parse whatever remains of the line as an unknown token (can't fail)
+    unknownLine :: P ([T.Token], Bool)
+    unknownLine = do
+      (b, l) <- getInput
+      let (unkBStr, l', b') = spanLine l b
+          unkTok = T.Token { tkType = TkUnknown
+                           , tkValue = unkBStr
+                           , tkSpan = mkRealSrcSpan l l' }
+      setInput (b', l')
+      pure ([unkTok], False)
+
+
+-- | This is really, really, /really/ gross. Problem: consider a Haskell
+-- file that looks like:
 --
---   * CPP lines are removed and reinserted as line-comments
---   * top-level file pragmas are parsed as block comments (see the
---     'ITblockComment' case of 'classify' for more details)
+-- @
+-- {-# LANGUAGE CPP #-}
+-- module SomeMod where
 --
-processCPP :: DynFlags    -- ^ GHC's flags
-           -> FilePath    -- ^ source file name (for position information)
-           -> String      -- ^ source file contents
-           -> [(Located L.Token, String)]
-processCPP dflags fpath s = addSrc . go start . splitCPP $ s
-  where
-    start = mkRealSrcLoc (mkFastString fpath) 1 1
-    addSrc = addSourceToTokens start (stringToStringBuffer s)
-
-    -- Transform a list of Haskell/CPP lines into a list of tokens
-    go :: RealSrcLoc -> [Either String String] -> [Located L.Token]
-    go _   [] = []
-    go pos ls =
-      let (hLinesRight,  ls')  = span isRight ls
-          (cppLinesLeft, rest) = span isLeft ls'
-
-          hSrc   = concat [ hLine   | Right hLine  <- hLinesRight  ]
-          cppSrc = concat [ cppLine | Left cppLine <- cppLinesLeft ]
-
-      in case L.lexTokenStream (stringToStringBuffer hSrc) pos dflags of
-
-           -- Stuff that fails to lex gets turned into comments
-           L.PFailed _ _ss _msg ->
-             let (src_pos, failed) = mkToken ITunknown pos hSrc
-                 (new_pos, cpp)    = mkToken ITlineComment src_pos cppSrc
-             in failed : cpp : go new_pos rest
-
-           -- Successfully lexed
-           L.POk ss toks ->
-             let (new_pos, cpp) = mkToken ITlineComment (L.loc ss) cppSrc
-             in toks ++ [cpp] ++ go new_pos rest
-
-    -- Manually make a token from a 'String', advancing the cursor position
-    mkToken tok start' str =
-      let end = foldl' advanceSrcLoc start' str
-      in (end, L (RealSrcSpan $ mkRealSrcSpan start' end) (tok str))
-
-
--- | Split apart the initial file into Haskell source lines ('Left' entries) and
--- CPP lines ('Right' entries).
+-- #define SIX 6
+--
+-- {-# INLINE foo
+--   #-}
+-- foo = 1
+-- @
 --
--- All characters in the input are present in the output:
+-- Clang's CPP replaces the @#define SIX 6@ line with an empty line (as it
+-- should), but get confused about @#-}@. I'm guessing it /starts/ by
+-- parsing that as a pre-processor directive and, when it fails to, it just
+-- leaves the line alone. HOWEVER, it still adds an extra newline. =.=
 --
--- prop> concat . map (either id id) . splitCPP = id
-splitCPP :: String -> [Either String String]
-splitCPP "" = []
-splitCPP s | isCPPline s = Left l : splitCPP rest
-           | otherwise =  Right l : splitCPP rest
+-- This function makes sure that the Hyperlinker backend also adds that
+-- extra newline (or else our spans won't line up with GHC's anymore).
+needPragHack :: CompilerInfo -> DynFlags -> Bool
+needPragHack comp dflags = isCcClang && E.member Cpp (extensionFlags dflags)
   where
-    ~(l, rest) = spanToNewline 0 s
+    isCcClang = case comp of
+      GCC -> False
+      Clang -> True
+      AppleClang -> True
+      AppleClang51 -> True
+      UnknownCC -> False
 
+-- | Get the input
+getInput :: P (StringBuffer, RealSrcLoc)
+getInput = P $ \p @ PState { buffer = buf, loc = srcLoc } -> POk p (buf, srcLoc)
 
--- | Heuristic to decide if a line is going to be a CPP line. This should be a
--- cheap operation since it is going to be run on every line being processed.
---
--- Right now it just checks if the first non-whitespace character in the first
--- five characters of the line is a '#':
---
--- >>> isCPPline "#define FOO 1"
--- True
---
--- >>> isCPPline "\t\t  #ifdef GHC"
--- True
---
--- >>> isCPPline "       #endif"
--- False
---
-isCPPline :: String -> Bool
-isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5
+-- | Set the input
+setInput :: (StringBuffer, RealSrcLoc) -> P ()
+setInput (buf, srcLoc) = P $ \p -> POk (p { buffer = buf, loc = srcLoc }) ()
 
 
--- | Split a "line" off the front of a string, hopefully without cutting tokens
--- in half. I say "hopefully" because knowing what a token is requires lexing,
--- yet lexing depends on this function.
---
--- All characters in the input are present in the output:
---
--- prop> curry (++) . spanToNewLine 0 = id
-spanToNewline :: Int                 -- ^ open '{-'
-              -> String              -- ^ input
-              -> (String, String)
-
--- Base case and space characters
-spanToNewline _ "" = ("", "")
-spanToNewline n ('\n':str) | n <= 0 = ("\n", str)
-spanToNewline n ('\n':str) | n <= 0 = ("\n", str)
-spanToNewline n ('\\':'\n':str) =
-    let (str', rest) = spanToNewline n str
-    in ('\\':'\n':str', rest)
-
--- Block comments
-spanToNewline n ('{':'-':str) =
-    let (str', rest) = spanToNewline (n+1) str
-    in ('{':'-':str', rest)
-spanToNewline n ('-':'}':str) =
-    let (str', rest) = spanToNewline (n-1) str
-    in ('-':'}':str', rest)
-
--- When not in a block comment, try to lex a Haskell token
-spanToNewline 0 str@(c:_) | ((lexed, str') : _) <- R.lex str, not (isSpace c) =
-    if all (== '-') lexed && length lexed >= 2
-      -- A Haskell line comment
-      then case span (/= '\n') str' of
-             (str'', '\n':rest) -> (lexed ++ str'' ++ "\n", rest)
-             (_, _) -> (str, "")
-
-      -- An actual Haskell token
-      else let (str'', rest) = spanToNewline 0 str'
-           in (lexed ++ str'', rest)
-
--- In all other cases, advance one character at a time
-spanToNewline n (c:str) =
-    let (str', rest) = spanToNewline n str
-    in (c:str', rest)
-
-
--- | Turn a list of GHC's 'L.Token' (and their source 'String') into a list of
--- Haddock's 'T.Token'.
-ghcToks :: [(Located L.Token, String)] -> [T.Token]
-ghcToks = reverse . (\(_,ts,_) -> ts) . foldl' go (start, [], False)
-  where
-    start = mkRealSrcLoc (mkFastString "lexing") 1 1
-
-    go :: (RealSrcLoc, [T.Token], Bool)
-       -- ^ current position, tokens accumulated, currently in pragma (or not)
-
-       -> (Located L.Token, String)
-       -- ^ next token, its content
-
-       -> (RealSrcLoc, [T.Token], Bool)
-       -- ^ new position, new tokens accumulated, currently in pragma (or not)
-
-    go (pos, toks, in_prag) (L l tok, raw) =
-        ( next_pos
-        , classifiedTok ++ maybeToList white ++ toks
-        , inPragma in_prag tok
-        )
-       where
-         (next_pos, white) = mkWhitespace pos l
-
-         classifiedTok = [ Token (classify' tok) raw rss
-                         | RealSrcSpan rss <- [l]
-                         , not (null raw)
-                         ]
-
-         classify' | in_prag = const TkPragma
-                   | otherwise = classify
-
-
--- | Find the correct amount of whitespace between tokens.
-mkWhitespace :: RealSrcLoc -> SrcSpan -> (RealSrcLoc, Maybe T.Token)
-mkWhitespace prev spn =
-  case spn of
-    UnhelpfulSpan _ -> (prev,Nothing)
-    RealSrcSpan s | null wsstring -> (end, Nothing)
-                  | otherwise -> (end, Just (Token TkSpace wsstring wsspan))
-      where
-        start = realSrcSpanStart s
-        end = realSrcSpanEnd s
-        wsspan = mkRealSrcSpan prev start
-        nls = srcLocLine start - srcLocLine prev
-        spaces = if nls == 0 then srcLocCol start - srcLocCol prev
-                             else srcLocCol start - 1
-        wsstring = replicate nls '\n' ++ replicate spaces ' '
+-- | Orphan instance that adds backtracking to 'P'
+instance Alternative P where
+  empty = P $ \_ -> PFailed (const emptyMessages) noSrcSpan "Alterative.empty"
+  P x <|> P y = P $ \s -> case x s of { p@POk{} -> p
+                                      ; _ -> y s }
 
+-- | Try a parser. If it fails, backtrack and return the pure value.
+tryOrElse :: a -> P a -> P a
+tryOrElse x p = p <|> pure x
 
 -- | Classify given tokens as appropriate Haskell token type.
-classify :: L.Token -> TokenType
+classify :: Lexer.Token -> TokenType
 classify tok =
   case tok of
     ITas                   -> TkKeyword
@@ -382,12 +381,7 @@ classify tok =
     ITunknown           {} -> TkUnknown
     ITeof                  -> TkUnknown
 
-    -- Line comments are only supposed to start with '--'. Starting with '#'
-    -- means that this was probably a CPP.
-    ITlineComment s
-      | isCPPline s        -> TkCpp
-      | otherwise          -> TkComment
-
+    ITlineComment       {} -> TkComment
     ITdocCommentNext    {} -> TkComment
     ITdocCommentPrev    {} -> TkComment
     ITdocCommentNamed   {} -> TkComment
@@ -404,9 +398,9 @@ classify tok =
       | otherwise          -> TkComment
 
 -- | Classify given tokens as beginning pragmas (or not).
-inPragma :: Bool     -- ^ currently in pragma
-         -> L.Token  -- ^ current token
-         -> Bool     -- ^ new information about whether we are in a pragma
+inPragma :: Bool           -- ^ currently in pragma
+         -> Lexer.Token    -- ^ current token
+         -> Bool           -- ^ new information about whether we are in a pragma
 inPragma _ ITclose_prag = False
 inPragma True _ = True
 inPragma False tok =
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
index d7ea70a6..a4dcb77b 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
@@ -1,4 +1,8 @@
 {-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE BangPatterns #-}
 
 module Haddock.Backends.Hyperlinker.Renderer (render) where
 
@@ -6,15 +10,19 @@ module Haddock.Backends.Hyperlinker.Renderer (render) where
 import Haddock.Backends.Hyperlinker.Types
 import Haddock.Backends.Hyperlinker.Utils
 
-import qualified GHC
-import qualified Name as GHC
-import qualified Unique as GHC
+import qualified Data.ByteString as BS
+
+import HieTypes
+import Module   ( ModuleName, moduleNameString )
+import Name     ( getOccString, isInternalName, Name, nameModule, nameUnique )
+import SrcLoc
+import Unique   ( getKey )
+import Encoding ( utf8DecodeByteString )
 
 import System.FilePath.Posix ((</>))
 
-import Data.List
-import Data.Maybe
 import qualified Data.Map as Map
+import qualified Data.Set as Set
 
 import Text.XHtml (Html, HtmlAttr, (!))
 import qualified Text.XHtml as Html
@@ -22,22 +30,24 @@ import qualified Text.XHtml as Html
 
 type StyleClass = String
 
+-- | Produce the HTML corresponding to a hyperlinked Haskell source
+render
+  :: Maybe FilePath    -- ^ path to the CSS file
+  -> Maybe FilePath    -- ^ path to the JS file
+  -> SrcMaps            -- ^ Paths to sources
+  -> HieAST PrintedType  -- ^ ASTs from @.hie@ files
+  -> [Token]       -- ^ tokens to render
+  -> Html
+render mcss mjs srcs ast tokens = header mcss mjs <> body srcs ast tokens
 
-render :: Maybe FilePath -> Maybe FilePath -> SrcMap -> [RichToken]
-       -> Html
-render mcss mjs srcs tokens = header mcss mjs <> body srcs tokens
-
-body :: SrcMap -> [RichToken] -> Html
-body srcs tokens = Html.body . Html.pre $ hypsrc
+body :: SrcMaps -> HieAST PrintedType -> [Token] -> Html
+body srcs ast tokens = Html.body . Html.pre $ hypsrc
   where
-    hypsrc = mconcat . map (richToken srcs) $ tokens
-
+    hypsrc = renderWithAst srcs ast tokens
 
 header :: Maybe FilePath -> Maybe FilePath -> Html
-header mcss mjs
-    | isNothing mcss && isNothing mjs = Html.noHtml
-header mcss mjs =
-    Html.header $ css mcss <> js mjs
+header Nothing Nothing = Html.noHtml
+header mcss mjs = Html.header $ css mcss <> js mjs
   where
     css Nothing = Html.noHtml
     css (Just cssFile) = Html.thelink Html.noHtml !
@@ -51,25 +61,132 @@ header mcss mjs =
         , Html.src scriptFile
         ]
 
+
+splitTokens :: HieAST PrintedType -> [Token] -> ([Token],[Token],[Token])
+splitTokens ast toks = (before,during,after)
+  where
+    (before,rest) = span leftOf toks
+    (during,after) = span inAst rest
+    leftOf t = realSrcSpanEnd (tkSpan t) <= realSrcSpanStart nodeSp
+    inAst t = nodeSp `containsSpan` tkSpan t
+    nodeSp = nodeSpan ast
+
+-- | Turn a list of tokens into hyperlinked sources, threading in relevant link
+-- information from the 'HieAST'.
+renderWithAst :: SrcMaps -> HieAST PrintedType -> [Token] -> Html
+renderWithAst srcs Node{..} toks = anchored $ case toks of
+
+    [tok] | nodeSpan == tkSpan tok -> richToken srcs nodeInfo tok
+
+    -- NB: the GHC lexer lexes backquoted identifiers and parenthesized operators
+    -- as multiple tokens.
+    --
+    --  * @a `elem` b@ turns into @[a, `, elem, `, b]@ (excluding space tokens)
+    --  * @(+) 1 2@    turns into @[(, +, ), 1, 2]@    (excluding space tokens)
+    --
+    -- However, the HIE ast considers @`elem`@ and @(+)@ to be single nodes. In
+    -- order to make sure these get hyperlinked properly, we intercept these
+    -- special sequences of tokens and merge them into just one identifier or
+    -- operator token.
+    [BacktickTok s1, tok @ Token{ tkType = TkIdentifier }, BacktickTok s2]
+          | realSrcSpanStart s1 == realSrcSpanStart nodeSpan
+          , realSrcSpanEnd s2   == realSrcSpanEnd nodeSpan
+          -> richToken srcs nodeInfo
+                       (Token{ tkValue = "`" <> tkValue tok <> "`"
+                             , tkType = TkOperator
+                             , tkSpan = nodeSpan })
+    [OpenParenTok s1, tok @ Token{ tkType = TkOperator }, CloseParenTok s2]
+          | realSrcSpanStart s1 == realSrcSpanStart nodeSpan
+          , realSrcSpanEnd s2   == realSrcSpanEnd nodeSpan
+          -> richToken srcs nodeInfo
+                       (Token{ tkValue = "(" <> tkValue tok <> ")"
+                             , tkType = TkOperator
+                             , tkSpan = nodeSpan })
+
+    _ -> go nodeChildren toks
+  where
+    go _ [] = mempty
+    go [] xs = foldMap renderToken xs
+    go (cur:rest) xs =
+        foldMap renderToken before <> renderWithAst srcs cur during <> go rest after
+      where
+        (before,during,after) = splitTokens cur xs
+    anchored c = Map.foldrWithKey anchorOne c (nodeIdentifiers nodeInfo)
+    anchorOne n dets c = externalAnchor n d $ internalAnchor n d c
+      where d = identInfo dets
+
+renderToken :: Token -> Html
+renderToken Token{..}
+    | BS.null tkValue = mempty
+    | tkType == TkSpace = renderSpace (srcSpanStartLine tkSpan) tkValue'
+    | otherwise = tokenSpan ! [ multiclass style ]
+  where
+    tkValue' = filterCRLF $ utf8DecodeByteString tkValue
+    style = tokenStyle tkType
+    tokenSpan = Html.thespan (Html.toHtml tkValue')
+
+
 -- | Given information about the source position of definitions, render a token
-richToken :: SrcMap -> RichToken -> Html
-richToken srcs (RichToken Token{..} details)
-    | tkType == TkSpace = renderSpace (GHC.srcSpanStartLine tkSpan) tkValue
-    | otherwise = linked content
+richToken :: SrcMaps -> NodeInfo PrintedType -> Token -> Html
+richToken srcs details Token{..}
+    | tkType == TkSpace = renderSpace (srcSpanStartLine tkSpan) tkValue'
+    | otherwise = annotate details $ linked content
   where
+    tkValue' = filterCRLF $ utf8DecodeByteString tkValue
     content = tokenSpan ! [ multiclass style ]
-    tokenSpan = Html.thespan (Html.toHtml tkValue)
-    style = tokenStyle tkType ++ maybe [] richTokenStyle details
+    tokenSpan = Html.thespan (Html.toHtml tkValue')
+    style = tokenStyle tkType ++ concatMap (richTokenStyle (null (nodeType details))) contexts
+
+    contexts = concatMap (Set.elems . identInfo) . Map.elems . nodeIdentifiers $ details
+
+    -- pick an arbitary identifier to hyperlink with
+    identDet = Map.lookupMin . nodeIdentifiers $ details
 
     -- If we have name information, we can make links
-    linked = case details of
-      Just d -> externalAnchor d . internalAnchor d . hyperlink srcs d
+    linked = case identDet of
+      Just (n,_) -> hyperlink srcs n
       Nothing -> id
 
-richTokenStyle :: TokenDetails -> [StyleClass]
-richTokenStyle (RtkVar _) = ["hs-var"]
-richTokenStyle (RtkType _) = ["hs-type"]
-richTokenStyle _ = []
+-- | Remove CRLFs from source
+filterCRLF :: String -> String
+filterCRLF ('\r':'\n':cs) = '\n' : filterCRLF cs
+filterCRLF (c:cs) = c : filterCRLF cs
+filterCRLF [] = []
+
+annotate :: NodeInfo PrintedType -> Html -> Html
+annotate  ni content =
+    Html.thespan (annot <> content) ! [ Html.theclass "annot" ]
+  where
+    annot
+      | not (null annotation) =
+          Html.thespan (Html.toHtml annotation) ! [ Html.theclass "annottext" ]
+      | otherwise = mempty
+    annotation = typ ++ identTyps
+    typ = unlines (nodeType ni)
+    typedIdents = [ (n,t) | (n, identType -> Just t) <- Map.toList $ nodeIdentifiers ni ]
+    identTyps
+      | length typedIdents > 1 || null (nodeType ni)
+          = concatMap (\(n,t) -> printName n ++ " :: " ++ t ++ "\n") typedIdents
+      | otherwise = ""
+
+    printName :: Either ModuleName Name -> String
+    printName = either moduleNameString getOccString
+
+richTokenStyle
+  :: Bool         -- ^ are we lacking a type annotation?
+  -> ContextInfo  -- ^ in what context did this token show up?
+  -> [StyleClass]
+richTokenStyle True  Use           = ["hs-type"]
+richTokenStyle False Use           = ["hs-var"]
+richTokenStyle  _    RecField{}    = ["hs-var"]
+richTokenStyle  _    PatternBind{} = ["hs-var"]
+richTokenStyle  _    MatchBind{}   = ["hs-var"]
+richTokenStyle  _    TyVarBind{}   = ["hs-type"]
+richTokenStyle  _    ValBind{}     = ["hs-var"]
+richTokenStyle  _    TyDecl        = ["hs-type"]
+richTokenStyle  _    ClassTyDecl{} = ["hs-type"]
+richTokenStyle  _    Decl{}        = ["hs-var"]
+richTokenStyle  _    IEThing{}     = []  -- could be either a value or type
 
 tokenStyle :: TokenType -> [StyleClass]
 tokenStyle TkIdentifier = ["hs-identifier"]
@@ -87,61 +204,70 @@ tokenStyle TkPragma = ["hs-pragma"]
 tokenStyle TkUnknown = []
 
 multiclass :: [StyleClass] -> HtmlAttr
-multiclass = Html.theclass . intercalate " "
+multiclass = Html.theclass . unwords
+
+externalAnchor :: Identifier -> Set.Set ContextInfo -> Html -> Html
+externalAnchor (Right name) contexts content
+  | not (isInternalName name)
+  , any isBinding contexts
+  = Html.thespan content ! [ Html.identifier $ externalAnchorIdent name ]
+externalAnchor _ _ content = content
 
-externalAnchor :: TokenDetails -> Html -> Html
-externalAnchor (RtkDecl name) content =
-    Html.anchor content ! [ Html.name $ externalAnchorIdent name ]
-externalAnchor _ content = content
+isBinding :: ContextInfo -> Bool
+isBinding (ValBind RegularBind _ _) = True
+isBinding PatternBind{} = True
+isBinding Decl{} = True
+isBinding (RecField RecFieldDecl _) = True
+isBinding TyVarBind{} = True
+isBinding ClassTyDecl{} = True
+isBinding _ = False
 
-internalAnchor :: TokenDetails -> Html -> Html
-internalAnchor (RtkBind name) content =
-    Html.anchor content ! [ Html.name $ internalAnchorIdent name ]
-internalAnchor _ content = content
+internalAnchor :: Identifier -> Set.Set ContextInfo -> Html -> Html
+internalAnchor (Right name) contexts content
+  | isInternalName name
+  , any isBinding contexts
+  = Html.thespan content ! [ Html.identifier $ internalAnchorIdent name ]
+internalAnchor _ _ content = content
 
-externalAnchorIdent :: GHC.Name -> String
+externalAnchorIdent :: Name -> String
 externalAnchorIdent = hypSrcNameUrl
 
-internalAnchorIdent :: GHC.Name -> String
-internalAnchorIdent = ("local-" ++) . show . GHC.getKey . GHC.nameUnique
-
-hyperlink :: SrcMap -> TokenDetails -> Html -> Html
-hyperlink srcs details = case rtkName details of
-    Left name ->
-        if GHC.isInternalName name
-        then internalHyperlink name
-        else externalNameHyperlink srcs name
-    Right name -> externalModHyperlink srcs name
-
-internalHyperlink :: GHC.Name -> Html -> Html
-internalHyperlink name content =
-    Html.anchor content ! [ Html.href $ "#" ++ internalAnchorIdent name ]
-
-externalNameHyperlink :: SrcMap -> GHC.Name -> Html -> Html
-externalNameHyperlink srcs name content = case Map.lookup mdl srcs of
-    Just SrcLocal -> Html.anchor content !
-        [ Html.href $ hypSrcModuleNameUrl mdl name ]
-    Just (SrcExternal path) -> Html.anchor content !
-        [ Html.href $ path </> hypSrcModuleNameUrl mdl name ]
-    Nothing -> content
+internalAnchorIdent :: Name -> String
+internalAnchorIdent = ("local-" ++) . show . getKey . nameUnique
+
+-- | Generate the HTML hyperlink for an identifier
+hyperlink :: SrcMaps -> Identifier -> Html -> Html
+hyperlink (srcs, srcs') ident = case ident of
+    Right name | isInternalName name -> internalHyperlink name
+               | otherwise -> externalNameHyperlink name
+    Left name -> externalModHyperlink name
+
   where
-    mdl = GHC.nameModule name
+    internalHyperlink name content =
+        Html.anchor content ! [ Html.href $ "#" ++ internalAnchorIdent name ]
+
+    externalNameHyperlink name content = case Map.lookup mdl srcs of
+        Just SrcLocal -> Html.anchor content !
+            [ Html.href $ hypSrcModuleNameUrl mdl name ]
+        Just (SrcExternal path) -> Html.anchor content !
+            [ Html.href $ spliceURL Nothing (Just mdl) (Just name) Nothing (".." </> path) ]
+        Nothing -> content
+      where
+        mdl = nameModule name
 
-externalModHyperlink :: SrcMap -> GHC.ModuleName -> Html -> Html
-externalModHyperlink srcs name content =
-    let srcs' = Map.mapKeys GHC.moduleName srcs in
-    case Map.lookup name srcs' of
-      Just SrcLocal -> Html.anchor content !
-        [ Html.href $ hypSrcModuleUrl' name ]
-      Just (SrcExternal path) -> Html.anchor content !
-        [ Html.href $ path </> hypSrcModuleUrl' name ]
-      Nothing -> content
+    externalModHyperlink moduleName content =
+        case Map.lookup moduleName srcs' of
+          Just SrcLocal -> Html.anchor content !
+            [ Html.href $ hypSrcModuleUrl' moduleName ]
+          Just (SrcExternal path) -> Html.anchor content !
+            [ Html.href $ spliceURL' Nothing (Just moduleName) Nothing Nothing (".." </> path) ]
+          Nothing -> content
 
 
 renderSpace :: Int -> String -> Html
-renderSpace _ [] = Html.noHtml
-renderSpace line ('\n':rest) = mconcat
-    [ Html.thespan . Html.toHtml $ "\n"
+renderSpace !_ "" = Html.noHtml
+renderSpace !line ('\n':rest) = mconcat
+    [ Html.thespan (Html.toHtml '\n')
     , lineAnchor (line + 1)
     , renderSpace (line + 1) rest
     ]
@@ -151,4 +277,4 @@ renderSpace line space =
 
 
 lineAnchor :: Int -> Html
-lineAnchor line = Html.anchor Html.noHtml ! [ Html.name $ hypSrcLineUrl line ]
+lineAnchor line = Html.thespan Html.noHtml ! [ Html.identifier $ hypSrcLineUrl line ]
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
index e377471e..50916937 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
@@ -1,17 +1,24 @@
+{-# LANGUAGE PatternSynonyms, OverloadedStrings #-}
 module Haddock.Backends.Hyperlinker.Types where
 
-
 import qualified GHC
 
+import Data.ByteString  ( ByteString )
+
 import Data.Map (Map)
 
 data Token = Token
     { tkType :: TokenType
-    , tkValue :: String
+    , tkValue :: ByteString -- ^ UTF-8 encoded
     , tkSpan :: {-# UNPACK #-} !Span
     }
     deriving (Show)
 
+pattern BacktickTok, OpenParenTok, CloseParenTok :: Span -> Token
+pattern BacktickTok   sp = Token TkSpecial "`" sp
+pattern OpenParenTok  sp = Token TkSpecial "(" sp
+pattern CloseParenTok sp = Token TkSpecial ")" sp
+
 type Position = GHC.RealSrcLoc
 type Span = GHC.RealSrcSpan
 
@@ -31,29 +38,6 @@ data TokenType
     | TkUnknown
     deriving (Show, Eq)
 
-
-data RichToken = RichToken
-    { rtkToken :: Token
-    , rtkDetails :: Maybe TokenDetails
-    }
-
-data TokenDetails
-    = RtkVar GHC.Name
-    | RtkType GHC.Name
-    | RtkBind GHC.Name
-    | RtkDecl GHC.Name
-    | RtkModule GHC.ModuleName
-    deriving (Eq)
-
-
-rtkName :: TokenDetails -> Either GHC.Name GHC.ModuleName
-rtkName (RtkVar name) = Left name
-rtkName (RtkType name) = Left name
-rtkName (RtkBind name) = Left name
-rtkName (RtkDecl name) = Left name
-rtkName (RtkModule name) = Right name
-
-
 -- | Path for making cross-package hyperlinks in generated sources.
 --
 -- Used in 'SrcMap' to determine whether module originates in current package
@@ -63,5 +47,5 @@ data SrcPath
     | SrcLocal
 
 -- | Mapping from modules to cross-package source paths.
-type SrcMap = Map GHC.Module SrcPath
+type SrcMaps = (Map GHC.Module SrcPath, Map GHC.ModuleName SrcPath)
 
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs
index 9de4a03d..4e8b88d2 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 module Haddock.Backends.Hyperlinker.Utils
     ( hypSrcDir, hypSrcModuleFile, hypSrcModuleFile'
     , hypSrcModuleUrl, hypSrcModuleUrl'
@@ -6,21 +7,35 @@ module Haddock.Backends.Hyperlinker.Utils
     , hypSrcModuleNameUrl, hypSrcModuleLineUrl
     , hypSrcModuleUrlFormat
     , hypSrcModuleNameUrlFormat, hypSrcModuleLineUrlFormat
-    ) where
+    , spliceURL, spliceURL'
 
+    -- * HIE file processing
+    , PrintedType
+    , recoverFullIfaceTypes
+    ) where
 
+import Haddock.Utils
 import Haddock.Backends.Xhtml.Utils
 
 import GHC
-import FastString
-import System.FilePath.Posix ((</>))
+import HieTypes     ( HieAST(..), HieType(..), HieArgs(..), TypeIndex, HieTypeFlat )
+import IfaceType
+import Name         ( getOccFS, getOccString )
+import Outputable   ( showSDoc )
+import Var          ( VarBndr(..) )
+
+import System.FilePath.Posix ((</>), (<.>))
 
+import qualified Data.Array as A
 
+
+{-# INLINE hypSrcDir #-}
 hypSrcDir :: FilePath
 hypSrcDir = "src"
 
+{-# INLINE hypSrcModuleFile #-}
 hypSrcModuleFile :: Module -> FilePath
-hypSrcModuleFile = hypSrcModuleFile' . moduleName
+hypSrcModuleFile m = moduleNameString (moduleName m) <.> "html"
 
 hypSrcModuleFile' :: ModuleName -> FilePath
 hypSrcModuleFile' mdl = spliceURL'
@@ -32,20 +47,19 @@ hypSrcModuleUrl = hypSrcModuleFile
 hypSrcModuleUrl' :: ModuleName -> String
 hypSrcModuleUrl' = hypSrcModuleFile'
 
+{-# INLINE hypSrcNameUrl #-}
 hypSrcNameUrl :: Name -> String
-hypSrcNameUrl name = spliceURL
-    Nothing Nothing (Just name) Nothing nameFormat
+hypSrcNameUrl = escapeStr . getOccString
 
+{-# INLINE hypSrcLineUrl #-}
 hypSrcLineUrl :: Int -> String
-hypSrcLineUrl line = spliceURL
-    Nothing Nothing Nothing (Just spn) lineFormat
-  where
-    loc = mkSrcLoc nilFS line 1
-    spn = mkSrcSpan loc loc
+hypSrcLineUrl line = "line-" ++ show line
 
+{-# INLINE hypSrcModuleNameUrl #-}
 hypSrcModuleNameUrl :: Module -> Name -> String
 hypSrcModuleNameUrl mdl name = hypSrcModuleUrl mdl ++ "#" ++ hypSrcNameUrl name
 
+{-# INLINE hypSrcModuleLineUrl #-}
 hypSrcModuleLineUrl :: Module -> Int -> String
 hypSrcModuleLineUrl mdl line = hypSrcModuleUrl mdl ++ "#" ++ hypSrcLineUrl line
 
@@ -66,3 +80,65 @@ nameFormat = "%{NAME}"
 
 lineFormat :: String
 lineFormat = "line-%{LINE}"
+
+
+-- * HIE file procesddsing
+
+-- This belongs in GHC's HieUtils...
+
+-- | Pretty-printed type, ready to be turned into HTML by @xhtml@
+type PrintedType = String
+
+-- | Expand the flattened HIE AST into one where the types printed out and
+-- ready for end-users to look at.
+--
+-- Using just primitives found in GHC's HIE utilities, we could write this as
+-- follows:
+--
+-- > 'recoverFullIfaceTypes' dflags hieTypes hieAst
+-- >     = 'fmap' (\ti -> 'showSDoc' df .
+-- >                      'pprIfaceType' $
+-- >                      'recoverFullType' ti hieTypes)
+-- >       hieAst
+--
+-- However, this is very inefficient (both in time and space) because the
+-- mutliple calls to 'recoverFullType' don't share intermediate results. This
+-- function fixes that.
+recoverFullIfaceTypes
+  :: DynFlags
+  -> A.Array TypeIndex HieTypeFlat -- ^ flat types
+  -> HieAST TypeIndex              -- ^ flattened AST
+  -> HieAST PrintedType       -- ^ full AST
+recoverFullIfaceTypes df flattened ast = fmap (printed A.!) ast
+    where
+
+    -- Splitting this out into its own array is also important: we don't want
+    -- to pretty print the same type many times
+    printed :: A.Array TypeIndex PrintedType
+    printed = fmap (showSDoc df . pprIfaceType) unflattened
+
+    -- The recursion in 'unflattened' is crucial - it's what gives us sharing
+    -- between the IfaceType's produced
+    unflattened :: A.Array TypeIndex IfaceType
+    unflattened = fmap (\flatTy -> go (fmap (unflattened A.!) flatTy)) flattened
+
+    -- Unfold an 'HieType' whose subterms have already been unfolded
+    go :: HieType IfaceType -> IfaceType
+    go (HTyVarTy n) = IfaceTyVar (getOccFS n)
+    go (HAppTy a b) = IfaceAppTy a (hieToIfaceArgs b)
+    go (HLitTy l) = IfaceLitTy l
+    go (HForAllTy ((n,k),af) t) = let b = (getOccFS n, k)
+                                  in IfaceForAllTy (Bndr (IfaceTvBndr b) af) t
+    go (HFunTy a b) = IfaceFunTy a b
+    go (HQualTy con b) = IfaceDFunTy con b
+    go (HCastTy a) = a
+    go HCoercionTy = IfaceTyVar "<coercion type>"
+    go (HTyConApp a xs) = IfaceTyConApp a (hieToIfaceArgs xs)
+
+    -- This isn't fully faithful - we can't produce the 'Inferred' case
+    hieToIfaceArgs :: HieArgs IfaceType -> IfaceAppArgs
+    hieToIfaceArgs (HieArgs args) = go' args
+      where
+        go' [] = IA_Nil
+        go' ((True ,x):xs) = IA_Arg x Required $ go' xs
+        go' ((False,x):xs) = IA_Arg x Specified $ go' xs
diff --git a/haddock-api/src/Haddock/GhcUtils.hs b/haddock-api/src/Haddock/GhcUtils.hs
index cdaf6ae4..a342de00 100644
--- a/haddock-api/src/Haddock/GhcUtils.hs
+++ b/haddock-api/src/Haddock/GhcUtils.hs
@@ -19,10 +19,12 @@ module Haddock.GhcUtils where
 
 
 import Control.Arrow
+import Data.Char ( isSpace )
+
 import Haddock.Types( DocNameI )
 
 import Exception
-import Outputable
+import Outputable ( Outputable, panic, showPpr )
 import Name
 import NameSet
 import Module
@@ -30,6 +32,14 @@ import HscTypes
 import GHC
 import Class
 import DynFlags
+import SrcLoc    ( advanceSrcLoc )
+
+import           StringBuffer ( StringBuffer )
+import qualified StringBuffer             as S
+
+import           Data.ByteString ( ByteString )
+import qualified Data.ByteString          as BS
+import qualified Data.ByteString.Internal as BS
 
 
 moduleString :: Module -> String
@@ -413,11 +423,129 @@ minimalDef n = do
 -------------------------------------------------------------------------------
 
 
-setObjectDir, setHiDir, setStubDir, setOutputDir :: String -> DynFlags -> DynFlags
+setObjectDir, setHiDir, setHieDir, setStubDir, setOutputDir :: String -> DynFlags -> DynFlags
 setObjectDir  f d = d{ objectDir  = Just f}
 setHiDir      f d = d{ hiDir      = Just f}
+setHieDir     f d = d{ hieDir     = Just f}
 setStubDir    f d = d{ stubDir    = Just f
                      , includePaths = addGlobalInclude (includePaths d) [f] }
   -- -stubdir D adds an implicit -I D, so that gcc can find the _stub.h file
   -- \#included from the .hc file when compiling with -fvia-C.
-setOutputDir  f = setObjectDir f . setHiDir f . setStubDir f
+setOutputDir  f = setObjectDir f . setHiDir f . setHieDir f . setStubDir f
+
+
+-------------------------------------------------------------------------------
+-- * 'StringBuffer' and 'ByteString'
+-------------------------------------------------------------------------------
+-- We get away with a bunch of these functions because 'StringBuffer' and
+-- 'ByteString' have almost exactly the same structure.
+
+-- | Convert a UTF-8 encoded 'ByteString' into a 'StringBuffer. This really
+-- relies on the internals of both 'ByteString' and 'StringBuffer'.
+--
+-- /O(n)/ (but optimized into a @memcpy@ by @bytestring@ under the hood)
+stringBufferFromByteString :: ByteString -> StringBuffer
+stringBufferFromByteString bs =
+  let BS.PS fp off len = bs <> BS.pack [0,0,0]
+  in S.StringBuffer { S.buf = fp, S.len = len - 3, S.cur = off }
+
+-- | Take the first @n@ /bytes/ of the 'StringBuffer' and put them in a
+-- 'ByteString'.
+--
+-- /O(1)/
+takeStringBuffer :: Int -> StringBuffer -> ByteString
+takeStringBuffer !n !(S.StringBuffer fp _ cur) = BS.PS fp cur n
+
+-- | Return the prefix of the first 'StringBuffer' that /isn't/ in the second
+-- 'StringBuffer'. **The behavior is undefined if the 'StringBuffers' use
+-- separate buffers.**
+--
+-- /O(1)/
+splitStringBuffer :: StringBuffer -> StringBuffer -> ByteString
+splitStringBuffer buf1 buf2 = takeStringBuffer n buf1
+  where n = S.byteDiff buf1 buf2
+
+-- | Split the 'StringBuffer' at the next newline (or the end of the buffer).
+-- Also: initial position is passed in and the updated position is returned.
+--
+-- /O(n)/ (but /O(1)/ space)
+spanLine :: RealSrcLoc -> StringBuffer -> (ByteString, RealSrcLoc, StringBuffer)
+spanLine !loc !buf = go loc buf
+  where
+
+  go !l !b
+    | not (S.atEnd b)
+    = case S.nextChar b of
+        ('\n', b') -> (splitStringBuffer buf b', advanceSrcLoc l '\n', b')
+        (c,    b') -> go (advanceSrcLoc l c) b'
+    | otherwise
+    = (splitStringBuffer buf b, advanceSrcLoc l '\n', b)
+
+-- | Given a start position and a buffer with that start position, split the
+-- buffer at an end position.
+--
+-- /O(n)/ (but /O(1)/ space)
+spanPosition :: RealSrcLoc   -- ^ start of buffeer
+             -> RealSrcLoc   -- ^ position until which to take
+             -> StringBuffer -- ^ buffer from which to take
+             -> (ByteString, StringBuffer)
+spanPosition !start !end !buf = go start buf
+  where
+
+  go !l !b
+    | l < end
+    , not (S.atEnd b)
+    , (c, b') <- S.nextChar b
+    = go (advanceSrcLoc l c) b'
+    | otherwise
+    = (splitStringBuffer buf b, b)
+
+-- | Try to parse a line of CPP from the from of the buffer. A \"line\" of CPP
+-- consists of
+--
+--   * at most 10 whitespace characters, including at least one newline
+--   * a @#@ character
+--   * keep parsing lines until you find a line not ending in @\\@.
+--
+-- This is chock full of heuristics about what a line of CPP is.
+--
+-- /O(n)/ (but /O(1)/ space)
+tryCppLine :: RealSrcLoc -> StringBuffer -> Maybe (ByteString, RealSrcLoc, StringBuffer)
+tryCppLine !loc !buf = spanSpace (S.prevChar buf '\n' == '\n') loc buf
+  where
+
+  -- Keep consuming space characters until we hit either a @#@ or something
+  -- else. If we hit a @#@, start parsing CPP
+  spanSpace !seenNl !l !b
+    | S.atEnd b
+    = Nothing
+    | otherwise
+    = case S.nextChar b of
+        ('#' , b') | not (S.atEnd b')
+                   , ('-', b'') <- S.nextChar b'
+                   , ('}', _) <- S.nextChar b''
+                   -> Nothing -- Edge case exception for @#-}@
+                   | seenNl
+                   -> Just (spanCppLine (advanceSrcLoc l '#') b') -- parse CPP
+                   | otherwise
+                   -> Nothing -- We didn't see a newline, so this can't be CPP!
+
+        (c   , b') | isSpace c -> spanSpace (seenNl || c == '\n')
+                                            (advanceSrcLoc l c) b'
+                   | otherwise -> Nothing
+
+  -- Consume a CPP line to its "end" (basically the first line that ends not
+  -- with a @\@ character)
+  spanCppLine !l !b
+    | S.atEnd b
+    = (splitStringBuffer buf b, l, b)
+    | otherwise
+    = case S.nextChar b of
+        ('\\', b') | not (S.atEnd b')
+                   , ('\n', b'') <- S.nextChar b'
+                   -> spanCppLine (advanceSrcLoc (advanceSrcLoc l '\\') '\n') b''
+
+        ('\n', b') -> (splitStringBuffer buf b', advanceSrcLoc l '\n', b')
+
+        (c   , b') -> spanCppLine (advanceSrcLoc l c) b'
+
diff --git a/haddock-api/src/Haddock/Interface.hs b/haddock-api/src/Haddock/Interface.hs
index 8bfc249c..f1b2d45e 100644
--- a/haddock-api/src/Haddock/Interface.hs
+++ b/haddock-api/src/Haddock/Interface.hs
@@ -43,18 +43,16 @@ import Haddock.Types
 import Haddock.Utils
 
 import Control.Monad
+import Control.Exception (evaluate)
 import Data.List
 import qualified Data.Map as Map
 import qualified Data.Set as Set
 import Distribution.Verbosity
-import System.Directory
-import System.FilePath
 import Text.Printf
 
 import Module (mkModuleSet, emptyModuleSet, unionModuleSet, ModuleSet)
 import Digraph
 import DynFlags hiding (verbosity)
-import Exception
 import GHC hiding (verbosity)
 import HscTypes
 import FastString (unpackFS)
@@ -90,7 +88,7 @@ processModules verbosity modules flags extIfaces = do
   out verbosity verbose "Creating interfaces..."
   let instIfaceMap =  Map.fromList [ (instMod iface, iface) | ext <- extIfaces
                                    , iface <- ifInstalledIfaces ext ]
-  (interfaces, ms) <- createIfaces0 verbosity modules flags instIfaceMap
+  (interfaces, ms) <- createIfaces verbosity modules flags instIfaceMap
 
   let exportedNames =
         Set.unions $ map (Set.fromList . ifaceExports) $
@@ -123,39 +121,15 @@ processModules verbosity modules flags extIfaces = do
 --------------------------------------------------------------------------------
 
 
-createIfaces0 :: Verbosity -> [String] -> [Flag] -> InstIfaceMap -> Ghc ([Interface], ModuleSet)
-createIfaces0 verbosity modules flags instIfaceMap =
-  -- Output dir needs to be set before calling depanal since depanal uses it to
-  -- compute output file names that are stored in the DynFlags of the
-  -- resulting ModSummaries.
-  (if useTempDir then withTempOutputDir else id) $ do
-    modGraph <- depAnalysis
-    createIfaces verbosity flags instIfaceMap modGraph
+createIfaces :: Verbosity -> [String] -> [Flag] -> InstIfaceMap -> Ghc ([Interface], ModuleSet)
+createIfaces verbosity modules flags instIfaceMap = do
+  -- Ask GHC to tell us what the module graph is
+  targets <- mapM (\filePath -> guessTarget filePath Nothing) modules
+  setTargets targets
+  modGraph <- depanal [] False
 
-  where
-    useTempDir :: Bool
-    useTempDir = Flag_NoTmpCompDir `notElem` flags
-
-
-    withTempOutputDir :: Ghc a -> Ghc a
-    withTempOutputDir action = do
-      tmp <- liftIO getTemporaryDirectory
-      x   <- liftIO getProcessID
-      let dir = tmp </> ".haddock-" ++ show x
-      modifySessionDynFlags (setOutputDir dir)
-      withTempDir dir action
-
-
-    depAnalysis :: Ghc ModuleGraph
-    depAnalysis = do
-      targets <- mapM (\f -> guessTarget f Nothing) modules
-      setTargets targets
-      depanal [] False
-
-
-createIfaces :: Verbosity -> [Flag] -> InstIfaceMap -> ModuleGraph -> Ghc ([Interface], ModuleSet)
-createIfaces verbosity flags instIfaceMap mods = do
-  let sortedMods = flattenSCCs $ topSortModuleGraph False mods Nothing
+  -- Visit modules in that order
+  let sortedMods = flattenSCCs $ topSortModuleGraph False modGraph Nothing
   out verbosity normal "Haddock coverage:"
   (ifaces, _, !ms) <- foldM f ([], Map.empty, emptyModuleSet) sortedMods
   return (reverse ifaces, ms)
@@ -263,12 +237,3 @@ buildHomeLinks ifaces = foldl upd Map.empty (reverse ifaces)
         keep_old env n = Map.insertWith (\_ old -> old) n mdl env
         keep_new env n = Map.insert n mdl env
 
-
---------------------------------------------------------------------------------
--- * Utils
---------------------------------------------------------------------------------
-
-
-withTempDir :: (ExceptionMonad m) => FilePath -> m a -> m a
-withTempDir dir = gbracket_ (liftIO $ createDirectory dir)
-                            (liftIO $ removeDirectoryRecursive dir)
diff --git a/haddock-api/src/Haddock/Interface/Create.hs b/haddock-api/src/Haddock/Interface/Create.hs
index c9290ed0..36cfeaca 100644
--- a/haddock-api/src/Haddock/Interface/Create.hs
+++ b/haddock-api/src/Haddock/Interface/Create.hs
@@ -20,27 +20,21 @@
 module Haddock.Interface.Create (createInterface) where
 
 import Documentation.Haddock.Doc (metaDocAppend)
-import Documentation.Haddock.Utf8 as Utf8
 import Haddock.Types
 import Haddock.Options
 import Haddock.GhcUtils
 import Haddock.Utils
 import Haddock.Convert
 import Haddock.Interface.LexParseRn
-import Haddock.Backends.Hyperlinker.Types
-import Haddock.Backends.Hyperlinker.Ast as Hyperlinker
-import Haddock.Backends.Hyperlinker.Parser as Hyperlinker
 
 import Data.Bifunctor
 import Data.Bitraversable
-import qualified Data.ByteString as BS
 import qualified Data.Map as M
 import Data.Map (Map)
 import Data.List
 import Data.Maybe
 import Data.Ord
 import Control.Applicative
-import Control.Exception (evaluate)
 import Control.Monad
 import Data.Traversable
 
@@ -169,8 +163,6 @@ createInterface tm flags modMap instIfaceMap = do
 
   modWarn <- liftErrMsg (moduleWarning dflags gre warnings)
 
-  tokenizedSrc <- mkMaybeTokenizedSrc dflags flags tm
-
   return $! Interface {
     ifaceMod               = mdl
   , ifaceIsSig             = is_sig
@@ -196,7 +188,8 @@ createInterface tm flags modMap instIfaceMap = do
   , ifaceRnOrphanInstances = [] -- Filled in `renameInterface`
   , ifaceHaddockCoverage   = coverage
   , ifaceWarningMap        = warningMap
-  , ifaceTokenizedSrc      = tokenizedSrc
+  , ifaceHieFile           = Just $ ml_hie_file $ ms_location ms
+  , ifaceDynFlags          = dflags
   }
 
 
@@ -1200,34 +1193,6 @@ seqList :: [a] -> ()
 seqList [] = ()
 seqList (x : xs) = x `seq` seqList xs
 
-mkMaybeTokenizedSrc :: DynFlags -> [Flag] -> TypecheckedModule
-                    -> ErrMsgGhc (Maybe [RichToken])
-mkMaybeTokenizedSrc dflags flags tm
-    | Flag_HyperlinkedSource `elem` flags = case renamedSource tm of
-        Just src -> do
-            tokens <- liftGhcToErrMsgGhc (liftIO (mkTokenizedSrc dflags summary src))
-            return $ Just tokens
-        Nothing -> do
-            liftErrMsg . tell . pure $ concat
-                [ "Warning: Cannot hyperlink module \""
-                , moduleNameString . ms_mod_name $ summary
-                , "\" because renamed source is not available"
-                ]
-            return Nothing
-    | otherwise = return Nothing
-  where
-    summary = pm_mod_summary . tm_parsed_module $ tm
-
-mkTokenizedSrc :: DynFlags -> ModSummary -> RenamedSource -> IO [RichToken]
-mkTokenizedSrc dflags ms src = do
-  -- make sure to read the whole file at once otherwise
-  -- we run out of file descriptors (see #495)
-  rawSrc <- BS.readFile (msHsFilePath ms) >>= evaluate
-  let tokens = Hyperlinker.parse dflags filepath (Utf8.decodeUtf8 rawSrc)
-  return $ Hyperlinker.enrich src tokens
-  where
-    filepath = msHsFilePath ms
-
 -- | Find a stand-alone documentation comment by its name.
 findNamedDoc :: String -> [HsDecl GhcRn] -> ErrMsgM (Maybe HsDocString)
 findNamedDoc name = search
diff --git a/haddock-api/src/Haddock/Types.hs b/haddock-api/src/Haddock/Types.hs
index 2f5d0a9a..a4ef5f82 100644
--- a/haddock-api/src/Haddock/Types.hs
+++ b/haddock-api/src/Haddock/Types.hs
@@ -30,22 +30,19 @@ module Haddock.Types (
 import Control.Exception
 import Control.Arrow hiding ((<+>))
 import Control.DeepSeq
+import Control.Monad (ap)
 import Control.Monad.IO.Class (MonadIO(..))
-import Data.Typeable
+import Data.Typeable (Typeable)
 import Data.Map (Map)
 import Data.Data (Data)
-import qualified Data.Map as Map
 import Documentation.Haddock.Types
 import BasicTypes (Fixity(..), PromotionFlag(..))
 
-import GHC hiding (NoLink)
+import GHC
 import DynFlags (Language)
 import qualified GHC.LanguageExtensions as LangExt
 import OccName
 import Outputable
-import Control.Monad (ap)
-
-import Haddock.Backends.Hyperlinker.Types
 
 -----------------------------------------------------------------------------
 -- * Convenient synonyms
@@ -144,7 +141,8 @@ data Interface = Interface
 
     -- | Tokenized source code of module (avaliable if Haddock is invoked with
     -- source generation flag).
-  , ifaceTokenizedSrc :: !(Maybe [RichToken])
+  , ifaceHieFile :: !(Maybe FilePath)
+  , ifaceDynFlags :: !DynFlags
   }
 
 type WarningMap = Map Name (Doc Name)
@@ -275,7 +273,7 @@ type DocForDecl name = (Documentation name, FnArgsDoc name)
 
 
 noDocForDecl :: DocForDecl name
-noDocForDecl = (Documentation Nothing Nothing, Map.empty)
+noDocForDecl = (Documentation Nothing Nothing, mempty)
 
 
 -----------------------------------------------------------------------------
diff --git a/haddock-api/src/Haddock/Utils.hs b/haddock-api/src/Haddock/Utils.hs
index 49a82717..dda42cea 100644
--- a/haddock-api/src/Haddock/Utils.hs
+++ b/haddock-api/src/Haddock/Utils.hs
@@ -33,7 +33,7 @@ module Haddock.Utils (
 
   -- * Miscellaneous utilities
   getProgramName, bye, die, dieMsg, noDieMsg, mapSnd, mapMaybeM, escapeStr,
-  writeUtf8File,
+  writeUtf8File, withTempDir,
 
   -- * HTML cross reference mapping
   html_xrefs_ref, html_xrefs_ref',
@@ -62,6 +62,7 @@ import Haddock.Types
 import Haddock.GhcUtils
 
 import BasicTypes ( PromotionFlag(..) )
+import Exception (ExceptionMonad)
 import GHC
 import Name
 import Outputable ( panic )
@@ -76,6 +77,7 @@ import Data.List ( isSuffixOf )
 import Data.Maybe ( mapMaybe )
 import System.Environment ( getProgName )
 import System.Exit
+import System.Directory ( createDirectory, removeDirectoryRecursive )
 import System.IO ( hPutStr, hSetEncoding, IOMode(..), stderr, utf8, withFile )
 import System.IO.Unsafe ( unsafePerformIO )
 import qualified System.FilePath.Posix as HtmlPath
@@ -406,6 +408,10 @@ writeUtf8File filepath contents = withFile filepath WriteMode $ \h -> do
     hSetEncoding h utf8
     hPutStr h contents
 
+withTempDir :: (ExceptionMonad m) => FilePath -> m a -> m a
+withTempDir dir = gbracket_ (liftIO $ createDirectory dir)
+                            (liftIO $ removeDirectoryRecursive dir)
+
 -----------------------------------------------------------------------------
 -- * HTML cross references
 --
-- 
cgit v1.2.3