aboutsummaryrefslogtreecommitdiff
path: root/haddock-api/src/Haddock/Backends/Hyperlinker
diff options
context:
space:
mode:
Diffstat (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker')
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs185
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs204
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs189
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs85
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs68
5 files changed, 731 insertions, 0 deletions
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
new file mode 100644
index 00000000..e8baae88
--- /dev/null
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
@@ -0,0 +1,185 @@
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TypeFamilies #-}
+
+
+module Haddock.Backends.Hyperlinker.Ast (enrich) where
+
+
+import Haddock.Syb
+import Haddock.Backends.Hyperlinker.Types
+
+import qualified GHC
+
+import Control.Applicative
+import Data.Data
+import Data.Maybe
+
+
+-- | Add more detailed information to token stream using GHC API.
+enrich :: GHC.RenamedSource -> [Token] -> [RichToken]
+enrich src =
+ map $ \token -> RichToken
+ { rtkToken = token
+ , rtkDetails = enrichToken token detailsMap
+ }
+ where
+ detailsMap = concatMap ($ src)
+ [ variables
+ , types
+ , decls
+ , binds
+ , imports
+ ]
+
+-- | A map containing association between source locations and "details" of
+-- this location.
+--
+-- For the time being, it is just a list of pairs. However, looking up things
+-- in such structure has linear complexity. We cannot use any hashmap-like
+-- stuff because source locations are not ordered. In the future, this should
+-- be replaced with interval tree data structure.
+type DetailsMap = [(GHC.SrcSpan, TokenDetails)]
+
+lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails
+lookupBySpan tspan = listToMaybe . map snd . filter (matches tspan . fst)
+
+enrichToken :: Token -> DetailsMap -> Maybe TokenDetails
+enrichToken (Token typ _ spn) dm
+ | typ `elem` [TkIdentifier, TkOperator] = lookupBySpan spn dm
+enrichToken _ _ = Nothing
+
+-- | Obtain details map for variables ("normally" used identifiers).
+variables :: GHC.RenamedSource -> DetailsMap
+variables =
+ everything (<|>) (var `combine` rec)
+ where
+ var term = case cast term of
+ (Just (GHC.L sspan (GHC.HsVar name))) ->
+ pure (sspan, RtkVar (GHC.unLoc name))
+ (Just (GHC.L _ (GHC.RecordCon (GHC.L sspan name) _ _ _))) ->
+ pure (sspan, RtkVar name)
+ _ -> empty
+ rec term = case cast term of
+ Just (GHC.HsRecField (GHC.L sspan name) (_ :: GHC.LHsExpr GHC.Name) _) ->
+ pure (sspan, RtkVar name)
+ _ -> empty
+
+-- | Obtain details map for types.
+types :: GHC.RenamedSource -> DetailsMap
+types =
+ everything (<|>) ty
+ where
+ ty term = case cast term of
+ (Just (GHC.L sspan (GHC.HsTyVar name))) ->
+ pure (sspan, RtkType (GHC.unLoc name))
+ _ -> empty
+
+-- | Obtain details map for identifier bindings.
+--
+-- That includes both identifiers bound by pattern matching or declared using
+-- ordinary assignment (in top-level declarations, let-expressions and where
+-- clauses).
+binds :: GHC.RenamedSource -> DetailsMap
+binds =
+ everything (<|>) (fun `combine` pat `combine` tvar)
+ where
+ fun term = case cast term of
+ (Just (GHC.FunBind (GHC.L sspan name) _ _ _ _ :: GHC.HsBind GHC.Name)) ->
+ pure (sspan, RtkBind name)
+ _ -> empty
+ pat term = case cast term of
+ (Just (GHC.L sspan (GHC.VarPat name))) ->
+ pure (sspan, RtkBind (GHC.unLoc name))
+ (Just (GHC.L _ (GHC.ConPatIn (GHC.L sspan name) recs))) ->
+ [(sspan, RtkVar name)] ++ everything (<|>) rec recs
+ (Just (GHC.L _ (GHC.AsPat (GHC.L sspan name) _))) ->
+ pure (sspan, RtkBind name)
+ _ -> empty
+ rec term = case cast term of
+ (Just (GHC.HsRecField (GHC.L sspan name) (_ :: GHC.LPat GHC.Name) _)) ->
+ pure (sspan, RtkVar name)
+ _ -> empty
+ tvar term = case cast term of
+ (Just (GHC.L sspan (GHC.UserTyVar name))) ->
+ pure (sspan, RtkBind (GHC.unLoc name))
+ (Just (GHC.L _ (GHC.KindedTyVar (GHC.L sspan name) _))) ->
+ pure (sspan, RtkBind name)
+ _ -> empty
+
+-- | Obtain details map for top-level declarations.
+decls :: GHC.RenamedSource -> DetailsMap
+decls (group, _, _, _) = concatMap ($ group)
+ [ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds
+ , everything (<|>) fun . GHC.hs_valds
+ , everything (<|>) (con `combine` ins)
+ ]
+ where
+ typ (GHC.L _ t) = case t of
+ GHC.DataDecl name _ _ _ -> pure . decl $ name
+ GHC.SynDecl name _ _ _ -> pure . decl $ name
+ GHC.FamDecl fam -> pure . decl $ GHC.fdLName fam
+ GHC.ClassDecl{..} -> [decl tcdLName] ++ concatMap sig tcdSigs
+ fun term = case cast term of
+ (Just (GHC.FunBind (GHC.L sspan name) _ _ _ _ :: GHC.HsBind GHC.Name))
+ | GHC.isExternalName name -> pure (sspan, RtkDecl name)
+ _ -> empty
+ con term = case cast term of
+ (Just cdcl) ->
+ map decl (GHC.getConNames cdcl) ++ everything (<|>) fld cdcl
+ Nothing -> empty
+ ins term = case cast term of
+ (Just (GHC.DataFamInstD inst)) -> pure . tyref $ GHC.dfid_tycon inst
+ (Just (GHC.TyFamInstD (GHC.TyFamInstDecl (GHC.L _ eqn) _))) ->
+ pure . tyref $ GHC.tfe_tycon eqn
+ _ -> empty
+ fld term = case cast term of
+ Just (field :: GHC.ConDeclField GHC.Name)
+ -> map (decl . fmap GHC.selectorFieldOcc) $ GHC.cd_fld_names field
+ Nothing -> empty
+ sig (GHC.L _ (GHC.TypeSig names _)) = map decl names
+ sig _ = []
+ decl (GHC.L sspan name) = (sspan, RtkDecl name)
+ tyref (GHC.L sspan name) = (sspan, RtkType name)
+
+-- | Obtain details map for import declarations.
+--
+-- This map also includes type and variable details for items in export and
+-- import lists.
+imports :: GHC.RenamedSource -> DetailsMap
+imports src@(_, imps, _, _) =
+ everything (<|>) ie src ++ mapMaybe (imp . GHC.unLoc) imps
+ where
+ ie term = case cast term of
+ (Just (GHC.IEVar v)) -> pure $ var v
+ (Just (GHC.IEThingAbs t)) -> pure $ typ t
+ (Just (GHC.IEThingAll t)) -> pure $ typ t
+ (Just (GHC.IEThingWith t _ vs _fls)) ->
+ [typ t] ++ map var vs
+ _ -> empty
+ typ (GHC.L sspan name) = (sspan, RtkType name)
+ var (GHC.L sspan name) = (sspan, RtkVar name)
+ imp idecl | not . GHC.ideclImplicit $ idecl =
+ let (GHC.L sspan name) = GHC.ideclName idecl
+ in Just (sspan, RtkModule name)
+ imp _ = Nothing
+
+-- | Check whether token stream span matches GHC source span.
+--
+-- Currently, it is implemented as checking whether "our" span is contained
+-- in GHC span. The reason for that is because GHC span are generally wider
+-- and may spread across couple tokens. For example, @(>>=)@ consists of three
+-- tokens: @(@, @>>=@, @)@, but GHC source span associated with @>>=@ variable
+-- contains @(@ and @)@. Similarly, qualified identifiers like @Foo.Bar.quux@
+-- are tokenized as @Foo@, @.@, @Bar@, @.@, @quux@ but GHC source span
+-- associated with @quux@ contains all five elements.
+matches :: Span -> GHC.SrcSpan -> Bool
+matches tspan (GHC.RealSrcSpan aspan)
+ | saspan <= stspan && etspan <= easpan = True
+ where
+ stspan = (posRow . spStart $ tspan, posCol . spStart $ tspan)
+ etspan = (posRow . spEnd $ tspan, posCol . spEnd $ tspan)
+ saspan = (GHC.srcSpanStartLine aspan, GHC.srcSpanStartCol aspan)
+ easpan = (GHC.srcSpanEndLine aspan, GHC.srcSpanEndCol aspan)
+matches _ _ = False
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
new file mode 100644
index 00000000..e206413e
--- /dev/null
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
@@ -0,0 +1,204 @@
+module Haddock.Backends.Hyperlinker.Parser (parse) where
+
+
+import Data.Char
+import Data.List
+import Data.Maybe
+
+import Haddock.Backends.Hyperlinker.Types
+
+
+-- | Turn source code string into a stream of more descriptive tokens.
+--
+-- Result should retain original file layout (including comments, whitespace,
+-- etc.), i.e. the following "law" should hold:
+--
+-- @concat . map 'tkValue' . 'parse' = id@
+parse :: String -> [Token]
+parse = tokenize . tag . chunk
+
+-- | Split raw source string to more meaningful chunks.
+--
+-- This is the initial stage of tokenization process. Each chunk is either
+-- a comment (including comment delimiters), a whitespace string, preprocessor
+-- macro (and all its content until the end of a line) or valid Haskell lexeme.
+chunk :: String -> [String]
+chunk [] = []
+chunk str@(c:_)
+ | isSpace c =
+ let (space, mcpp, rest) = spanSpaceOrCpp str
+ in [space] ++ maybeToList mcpp ++ chunk rest
+chunk str
+ | "--" `isPrefixOf` str = chunk' $ spanToNewline str
+ | "{-" `isPrefixOf` str = chunk' $ chunkComment 0 str
+ | otherwise = case lex str of
+ (tok:_) -> chunk' tok
+ [] -> [str]
+ where
+ chunk' (c, rest) = c:(chunk rest)
+
+-- | Split input to "first line" string and the rest of it.
+--
+-- Ideally, this should be done simply with @'break' (== '\n')@. However,
+-- Haskell also allows line-unbreaking (or whatever it is called) so things
+-- are not as simple and this function deals with that.
+spanToNewline :: String -> (String, String)
+spanToNewline [] = ([], [])
+spanToNewline ('\\':'\n':str) =
+ let (str', rest) = spanToNewline str
+ in ('\\':'\n':str', rest)
+spanToNewline str@('\n':_) = ("", str)
+spanToNewline (c:str) =
+ let (str', rest) = spanToNewline str
+ in (c:str', rest)
+
+-- | Split input to whitespace string, (optional) preprocessor directive and
+-- the rest of it.
+--
+-- Again, using something like @'span' 'isSpace'@ would be nice to chunk input
+-- to whitespace. The problem is with /#/ symbol - if it is placed at the very
+-- beginning of a line, it should be recognized as preprocessor macro. In any
+-- other case, it is ordinary Haskell symbol and can be used to declare
+-- operators. Hence, while dealing with whitespace we also check whether there
+-- happens to be /#/ symbol just after a newline character - if that is the
+-- case, we begin treating the whole line as preprocessor macro.
+spanSpaceOrCpp :: String -> (String, Maybe String, String)
+spanSpaceOrCpp ('\n':'#':str) =
+ let (str', rest) = spanToNewline str
+ in ("\n", Just $ '#':str', rest)
+spanSpaceOrCpp (c:str')
+ | isSpace c =
+ let (space, mcpp, rest) = spanSpaceOrCpp str'
+ in (c:space, mcpp, rest)
+spanSpaceOrCpp str = ("", Nothing, str)
+
+-- | Split input to comment content (including delimiters) and the rest.
+--
+-- Again, some more logic than simple 'span' is required because of Haskell
+-- comment nesting policy.
+chunkComment :: Int -> String -> (String, String)
+chunkComment _ [] = ("", "")
+chunkComment depth ('{':'-':str) =
+ let (c, rest) = chunkComment (depth + 1) str
+ in ("{-" ++ c, rest)
+chunkComment depth ('-':'}':str)
+ | depth == 1 = ("-}", str)
+ | otherwise =
+ let (c, rest) = chunkComment (depth - 1) str
+ in ("-}" ++ c, rest)
+chunkComment depth (e:str) =
+ let (c, rest) = chunkComment depth str
+ in (e:c, rest)
+
+-- | Assign source location for each chunk in given stream.
+tag :: [String] -> [(Span, String)]
+tag =
+ reverse . snd . foldl aux (Position 1 1, [])
+ where
+ aux (pos, cs) str =
+ let pos' = foldl move pos str
+ in (pos', (Span pos pos', str):cs)
+ move pos '\n' = pos { posRow = posRow pos + 1, posCol = 1 }
+ move pos _ = pos { posCol = posCol pos + 1 }
+
+-- | Turn unrecognised chunk stream to more descriptive token stream.
+tokenize :: [(Span, String)] -> [Token]
+tokenize =
+ map aux
+ where
+ aux (sp, str) = Token
+ { tkType = classify str
+ , tkValue = str
+ , tkSpan = sp
+ }
+
+-- | Classify given string as appropriate Haskell token.
+--
+-- This method is based on Haskell 98 Report lexical structure description:
+-- https://www.haskell.org/onlinereport/lexemes.html
+--
+-- However, this is probably far from being perfect and most probably does not
+-- handle correctly all corner cases.
+classify :: String -> TokenType
+classify str
+ | "--" `isPrefixOf` str = TkComment
+ | "{-#" `isPrefixOf` str = TkPragma
+ | "{-" `isPrefixOf` str = TkComment
+classify str@(c:_)
+ | isSpace c = TkSpace
+ | isDigit c = TkNumber
+ | c `elem` special = TkSpecial
+ | str `elem` glyphs = TkGlyph
+ | all (`elem` symbols) str = TkOperator
+ | c == '#' = TkCpp
+ | c == '"' = TkString
+ | c == '\'' = TkChar
+classify str
+ | str `elem` keywords = TkKeyword
+ | isIdentifier str = TkIdentifier
+ | otherwise = TkUnknown
+
+keywords :: [String]
+keywords =
+ [ "as"
+ , "case"
+ , "class"
+ , "data"
+ , "default"
+ , "deriving"
+ , "do"
+ , "else"
+ , "hiding"
+ , "if"
+ , "import"
+ , "in"
+ , "infix"
+ , "infixl"
+ , "infixr"
+ , "instance"
+ , "let"
+ , "module"
+ , "newtype"
+ , "of"
+ , "qualified"
+ , "then"
+ , "type"
+ , "where"
+ , "forall"
+ , "family"
+ , "mdo"
+ ]
+
+glyphs :: [String]
+glyphs =
+ [ ".."
+ , ":"
+ , "::"
+ , "="
+ , "\\"
+ , "|"
+ , "<-"
+ , "->"
+ , "@"
+ , "~"
+ , "~#"
+ , "=>"
+ , "-"
+ , "!"
+ ]
+
+special :: [Char]
+special = "()[]{},;`"
+
+-- TODO: Add support for any Unicode symbol or punctuation.
+-- source: http://stackoverflow.com/questions/10548170/what-characters-are-permitted-for-haskell-operators
+symbols :: [Char]
+symbols = "!#$%&*+./<=>?@\\^|-~:"
+
+isIdentifier :: String -> Bool
+isIdentifier (s:str)
+ | (isLower' s || isUpper s) && all isAlphaNum' str = True
+ where
+ isLower' c = isLower c || c == '_'
+ isAlphaNum' c = isAlphaNum c || c == '_' || c == '\''
+isIdentifier _ = False
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
new file mode 100644
index 00000000..15793f0c
--- /dev/null
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
@@ -0,0 +1,189 @@
+{-# LANGUAGE RecordWildCards #-}
+
+
+module Haddock.Backends.Hyperlinker.Renderer (render) where
+
+
+import Haddock.Backends.Hyperlinker.Types
+import Haddock.Backends.Hyperlinker.Utils
+
+import qualified GHC
+import qualified Name as GHC
+import qualified Unique as GHC
+
+import System.FilePath.Posix ((</>))
+
+import Data.List
+import Data.Maybe
+import Data.Monoid
+import qualified Data.Map as Map
+
+import Text.XHtml (Html, HtmlAttr, (!))
+import qualified Text.XHtml as Html
+
+
+type StyleClass = String
+
+
+render :: Maybe FilePath -> Maybe FilePath -> SrcMap -> [RichToken]
+ -> Html
+render mcss mjs srcs tokens = header mcss mjs <> body srcs tokens
+
+
+data TokenGroup
+ = GrpNormal Token
+ | GrpRich TokenDetails [Token]
+
+
+-- | Group consecutive tokens pointing to the same element.
+--
+-- We want to render qualified identifiers as one entity. For example,
+-- @Bar.Baz.foo@ consists of 5 tokens (@Bar@, @.@, @Baz@, @.@, @foo@) but for
+-- better user experience when highlighting and clicking links, these tokens
+-- should be regarded as one identifier. Therefore, before rendering we must
+-- group consecutive elements pointing to the same 'GHC.Name' (note that even
+-- dot token has it if it is part of qualified name).
+groupTokens :: [RichToken] -> [TokenGroup]
+groupTokens [] = []
+groupTokens ((RichToken tok Nothing):rest) = (GrpNormal tok):(groupTokens rest)
+groupTokens ((RichToken tok (Just det)):rest) =
+ let (grp, rest') = span same rest
+ in (GrpRich det (tok:(map rtkToken grp))):(groupTokens rest')
+ where
+ same (RichToken _ (Just det')) = det == det'
+ same _ = False
+
+
+body :: SrcMap -> [RichToken] -> Html
+body srcs tokens =
+ Html.body . Html.pre $ hypsrc
+ where
+ hypsrc = mconcat . map (tokenGroup srcs) . groupTokens $ tokens
+
+
+header :: Maybe FilePath -> Maybe FilePath -> Html
+header mcss mjs
+ | isNothing mcss && isNothing mjs = Html.noHtml
+header mcss mjs =
+ Html.header $ css mcss <> js mjs
+ where
+ css Nothing = Html.noHtml
+ css (Just cssFile) = Html.thelink Html.noHtml !
+ [ Html.rel "stylesheet"
+ , Html.thetype "text/css"
+ , Html.href cssFile
+ ]
+ js Nothing = Html.noHtml
+ js (Just scriptFile) = Html.script Html.noHtml !
+ [ Html.thetype "text/javascript"
+ , Html.src scriptFile
+ ]
+
+
+tokenGroup :: SrcMap -> TokenGroup -> Html
+tokenGroup _ (GrpNormal tok@(Token { .. }))
+ | tkType == TkSpace = renderSpace (posRow . spStart $ tkSpan) tkValue
+ | otherwise = tokenSpan tok ! attrs
+ where
+ attrs = [ multiclass . tokenStyle $ tkType ]
+tokenGroup srcs (GrpRich det tokens) =
+ externalAnchor det . internalAnchor det . hyperlink srcs det $ content
+ where
+ content = mconcat . map (richToken det) $ tokens
+
+
+richToken :: TokenDetails -> Token -> Html
+richToken det tok =
+ tokenSpan tok ! [ multiclass style ]
+ where
+ style = (tokenStyle . tkType) tok ++ richTokenStyle det
+
+
+tokenSpan :: Token -> Html
+tokenSpan = Html.thespan . Html.toHtml . tkValue
+
+
+richTokenStyle :: TokenDetails -> [StyleClass]
+richTokenStyle (RtkVar _) = ["hs-var"]
+richTokenStyle (RtkType _) = ["hs-type"]
+richTokenStyle _ = []
+
+tokenStyle :: TokenType -> [StyleClass]
+tokenStyle TkIdentifier = ["hs-identifier"]
+tokenStyle TkKeyword = ["hs-keyword"]
+tokenStyle TkString = ["hs-string"]
+tokenStyle TkChar = ["hs-char"]
+tokenStyle TkNumber = ["hs-number"]
+tokenStyle TkOperator = ["hs-operator"]
+tokenStyle TkGlyph = ["hs-glyph"]
+tokenStyle TkSpecial = ["hs-special"]
+tokenStyle TkSpace = []
+tokenStyle TkComment = ["hs-comment"]
+tokenStyle TkCpp = ["hs-cpp"]
+tokenStyle TkPragma = ["hs-pragma"]
+tokenStyle TkUnknown = []
+
+multiclass :: [StyleClass] -> HtmlAttr
+multiclass = Html.theclass . intercalate " "
+
+externalAnchor :: TokenDetails -> Html -> Html
+externalAnchor (RtkDecl name) content =
+ Html.anchor content ! [ Html.name $ externalAnchorIdent name ]
+externalAnchor _ content = content
+
+internalAnchor :: TokenDetails -> Html -> Html
+internalAnchor (RtkBind name) content =
+ Html.anchor content ! [ Html.name $ internalAnchorIdent name ]
+internalAnchor _ content = content
+
+externalAnchorIdent :: GHC.Name -> String
+externalAnchorIdent = hypSrcNameUrl
+
+internalAnchorIdent :: GHC.Name -> String
+internalAnchorIdent = ("local-" ++) . show . GHC.getKey . GHC.nameUnique
+
+hyperlink :: SrcMap -> TokenDetails -> Html -> Html
+hyperlink srcs details = case rtkName details of
+ Left name ->
+ if GHC.isInternalName name
+ then internalHyperlink name
+ else externalNameHyperlink srcs name
+ Right name -> externalModHyperlink srcs name
+
+internalHyperlink :: GHC.Name -> Html -> Html
+internalHyperlink name content =
+ Html.anchor content ! [ Html.href $ "#" ++ internalAnchorIdent name ]
+
+externalNameHyperlink :: SrcMap -> GHC.Name -> Html -> Html
+externalNameHyperlink (srcs, _) name content = case Map.lookup mdl srcs of
+ Just SrcLocal -> Html.anchor content !
+ [ Html.href $ hypSrcModuleNameUrl mdl name ]
+ Just (SrcExternal path) -> Html.anchor content !
+ [ Html.href $ path </> hypSrcModuleNameUrl mdl name ]
+ Nothing -> content
+ where
+ mdl = GHC.nameModule name
+
+externalModHyperlink :: SrcMap -> GHC.ModuleName -> Html -> Html
+externalModHyperlink (_, srcs) name content = case Map.lookup name srcs of
+ Just SrcLocal -> Html.anchor content !
+ [ Html.href $ hypSrcModuleUrl' name ]
+ Just (SrcExternal path) -> Html.anchor content !
+ [ Html.href $ path </> hypSrcModuleUrl' name ]
+ Nothing -> content
+
+
+renderSpace :: Int -> String -> Html
+renderSpace _ [] = Html.noHtml
+renderSpace line ('\n':rest) = mconcat
+ [ Html.thespan . Html.toHtml $ "\n"
+ , lineAnchor (line + 1)
+ , renderSpace (line + 1) rest
+ ]
+renderSpace line space =
+ let (hspace, rest) = span (/= '\n') space
+ in (Html.thespan . Html.toHtml) hspace <> renderSpace line rest
+
+
+lineAnchor :: Int -> Html
+lineAnchor line = Html.anchor Html.noHtml ! [ Html.name $ hypSrcLineUrl line ]
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
new file mode 100644
index 00000000..5f4dbc8c
--- /dev/null
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
@@ -0,0 +1,85 @@
+module Haddock.Backends.Hyperlinker.Types where
+
+
+import qualified GHC
+
+import Data.Map (Map)
+import qualified Data.Map as Map
+
+
+data Token = Token
+ { tkType :: TokenType
+ , tkValue :: String
+ , tkSpan :: Span
+ }
+
+data Position = Position
+ { posRow :: !Int
+ , posCol :: !Int
+ }
+
+data Span = Span
+ { spStart :: Position
+ , spEnd :: Position
+ }
+
+data TokenType
+ = TkIdentifier
+ | TkKeyword
+ | TkString
+ | TkChar
+ | TkNumber
+ | TkOperator
+ | TkGlyph
+ | TkSpecial
+ | TkSpace
+ | TkComment
+ | TkCpp
+ | TkPragma
+ | TkUnknown
+ deriving (Show, Eq)
+
+
+data RichToken = RichToken
+ { rtkToken :: Token
+ , rtkDetails :: Maybe TokenDetails
+ }
+
+data TokenDetails
+ = RtkVar GHC.Name
+ | RtkType GHC.Name
+ | RtkBind GHC.Name
+ | RtkDecl GHC.Name
+ | RtkModule GHC.ModuleName
+ deriving (Eq)
+
+
+rtkName :: TokenDetails -> Either GHC.Name GHC.ModuleName
+rtkName (RtkVar name) = Left name
+rtkName (RtkType name) = Left name
+rtkName (RtkBind name) = Left name
+rtkName (RtkDecl name) = Left name
+rtkName (RtkModule name) = Right name
+
+
+-- | Path for making cross-package hyperlinks in generated sources.
+--
+-- Used in 'SrcMap' to determine whether module originates in current package
+-- or in an external package.
+data SrcPath
+ = SrcExternal FilePath
+ | SrcLocal
+
+-- | Mapping from modules to cross-package source paths.
+--
+-- This mapping is actually a pair of maps instead of just one map. The reason
+-- for this is because when hyperlinking modules in import lists we have no
+-- 'GHC.Module' available. On the other hand, we can't just use map with
+-- 'GHC.ModuleName' as indices because certain modules may have common name
+-- but originate in different packages. Hence, we use both /rich/ and /poor/
+-- versions, where the /poor/ is just projection of /rich/ one cached in pair
+-- for better performance.
+type SrcMap = (Map GHC.Module SrcPath, Map GHC.ModuleName SrcPath)
+
+mkSrcMap :: Map GHC.Module SrcPath -> SrcMap
+mkSrcMap srcs = (srcs, Map.mapKeys GHC.moduleName srcs)
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs
new file mode 100644
index 00000000..9de4a03d
--- /dev/null
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs
@@ -0,0 +1,68 @@
+module Haddock.Backends.Hyperlinker.Utils
+ ( hypSrcDir, hypSrcModuleFile, hypSrcModuleFile'
+ , hypSrcModuleUrl, hypSrcModuleUrl'
+ , hypSrcNameUrl
+ , hypSrcLineUrl
+ , hypSrcModuleNameUrl, hypSrcModuleLineUrl
+ , hypSrcModuleUrlFormat
+ , hypSrcModuleNameUrlFormat, hypSrcModuleLineUrlFormat
+ ) where
+
+
+import Haddock.Backends.Xhtml.Utils
+
+import GHC
+import FastString
+import System.FilePath.Posix ((</>))
+
+
+hypSrcDir :: FilePath
+hypSrcDir = "src"
+
+hypSrcModuleFile :: Module -> FilePath
+hypSrcModuleFile = hypSrcModuleFile' . moduleName
+
+hypSrcModuleFile' :: ModuleName -> FilePath
+hypSrcModuleFile' mdl = spliceURL'
+ Nothing (Just mdl) Nothing Nothing moduleFormat
+
+hypSrcModuleUrl :: Module -> String
+hypSrcModuleUrl = hypSrcModuleFile
+
+hypSrcModuleUrl' :: ModuleName -> String
+hypSrcModuleUrl' = hypSrcModuleFile'
+
+hypSrcNameUrl :: Name -> String
+hypSrcNameUrl name = spliceURL
+ Nothing Nothing (Just name) Nothing nameFormat
+
+hypSrcLineUrl :: Int -> String
+hypSrcLineUrl line = spliceURL
+ Nothing Nothing Nothing (Just spn) lineFormat
+ where
+ loc = mkSrcLoc nilFS line 1
+ spn = mkSrcSpan loc loc
+
+hypSrcModuleNameUrl :: Module -> Name -> String
+hypSrcModuleNameUrl mdl name = hypSrcModuleUrl mdl ++ "#" ++ hypSrcNameUrl name
+
+hypSrcModuleLineUrl :: Module -> Int -> String
+hypSrcModuleLineUrl mdl line = hypSrcModuleUrl mdl ++ "#" ++ hypSrcLineUrl line
+
+hypSrcModuleUrlFormat :: String
+hypSrcModuleUrlFormat = hypSrcDir </> moduleFormat
+
+hypSrcModuleNameUrlFormat :: String
+hypSrcModuleNameUrlFormat = hypSrcModuleUrlFormat ++ "#" ++ nameFormat
+
+hypSrcModuleLineUrlFormat :: String
+hypSrcModuleLineUrlFormat = hypSrcModuleUrlFormat ++ "#" ++ lineFormat
+
+moduleFormat :: String
+moduleFormat = "%{MODULE}.html"
+
+nameFormat :: String
+nameFormat = "%{NAME}"
+
+lineFormat :: String
+lineFormat = "line-%{LINE}"