aboutsummaryrefslogtreecommitdiff
path: root/haddock-api/src/Haddock/Backends/Hyperlinker
diff options
context:
space:
mode:
authorAlec Theriault <alec.theriault@gmail.com>2017-12-10 12:22:21 -0800
committerAlexander Biehl <alexbiehl@gmail.com>2018-02-01 14:58:18 +0100
commit4f75be94f45a0e92553eccefe56230c554333ce7 (patch)
treeb88a2dd52d4bcd001f423c490c14b4c3cbaaee0e /haddock-api/src/Haddock/Backends/Hyperlinker
parent60e10eb876899165e9644013508361bf72048bdb (diff)
Use the GHC lexer for the Hyperlinker backend (#714)
* Start changing to use GHC lexer * better cpp * Change SrcSpan to RealSrcSpan * Remove error * Try to stop too many open files * wip * wip * Revert "wip" This reverts commit b605510a195f26315e3d8ca90e6d95a6737553e1. Conflicts: haddock-api/haddock-api.cabal haddock-api/src/Haddock/Interface.hs * Remove pointless 'caching' * Use dlist rather than lists when finding vars * Use a map rather than list * Delete bogus comment * Rebase followup Things now run using the GHC lexer. There are still - stray debug statements - unnecessary changes w.r.t. master * Cleaned up differences w.r.t. current Haddock HEAD Things are looking good. quasiquotes in particular look beautiful: the TH ones (with Haskell source inside) colour/link their contents too! Haven't yet begun to check for possible performance problems. * Support CPP and top-level pragmas The support for these is hackier - but no more hacky than the existing support. * Tests pass, CPP is better recognized The tests were in some cases altered: I consider the new output to be more correct than the old one.... * Fix shrinking of source without tabs in test * Replace 'Position'/'Span' with GHC counterparts Replaces 'Position' -> 'GHC.RealSrcLoc' and 'Span' -> 'GHC.RealSrcSpan'. * Nits * Forgot entry in .cabal * Update changelog
Diffstat (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker')
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs19
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs534
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs14
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs33
4 files changed, 362 insertions, 238 deletions
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
index 57ff72ff..361bc15d 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
@@ -12,6 +12,7 @@ import qualified Haddock.Syb as Syb
import Haddock.Backends.Hyperlinker.Types
import qualified GHC
+import qualified SrcLoc
import Control.Applicative
import Control.Monad (guard)
@@ -51,10 +52,10 @@ type DetailsMap = Map.Map Position (Span, TokenDetails)
mkDetailsMap :: [(GHC.SrcSpan, TokenDetails)] -> DetailsMap
mkDetailsMap xs =
- Map.fromListWith select_details [ (start, (token_span, token_details))
+ Map.fromListWith select_details [ (start, (span, token_details))
| (ghc_span, token_details) <- xs
- , Just !token_span <- [ghcSrcSpanToSpan ghc_span]
- , let start = spStart token_span
+ , GHC.RealSrcSpan span <- [ghc_span]
+ , let start = SrcLoc.realSrcSpanStart span
]
where
-- favour token details which appear earlier in the list
@@ -62,17 +63,11 @@ mkDetailsMap xs =
lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails
lookupBySpan span details = do
- (_, (tok_span, tok_details)) <- Map.lookupLE (spStart span) details
- guard (tok_span `containsSpan` span )
+ let pos = SrcLoc.realSrcSpanStart span
+ (_, (tok_span, tok_details)) <- Map.lookupLE pos details
+ guard (tok_span `SrcLoc.containsSpan` span)
return tok_details
-ghcSrcSpanToSpan :: GHC.SrcSpan -> Maybe Span
-ghcSrcSpanToSpan (GHC.RealSrcSpan span) =
- Just (Span { spStart = Position (GHC.srcSpanStartLine span) (GHC.srcSpanStartCol span)
- , spEnd = Position (GHC.srcSpanEndLine span) (GHC.srcSpanEndCol span)
- })
-ghcSrcSpanToSpan _ = Nothing
-
enrichToken :: Token -> DetailsMap -> Maybe TokenDetails
enrichToken (Token typ _ spn) dm
| typ `elem` [TkIdentifier, TkOperator] = lookupBySpan spn dm
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
index e4345602..da1555b8 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
@@ -1,11 +1,17 @@
module Haddock.Backends.Hyperlinker.Parser (parse) where
+import Data.Either ( isRight, isLeft )
+import Data.List ( foldl', isPrefixOf, isSuffixOf )
+import Data.Maybe ( maybeToList )
-import Data.Char
-import Data.List
-import Data.Maybe
+import GHC ( DynFlags, addSourceToTokens )
+import SrcLoc
+import FastString ( mkFastString )
+import StringBuffer ( stringToStringBuffer )
+import Lexer ( Token(..) )
+import qualified Lexer as L
-import Haddock.Backends.Hyperlinker.Types
+import Haddock.Backends.Hyperlinker.Types as T
-- | Turn source code string into a stream of more descriptive tokens.
@@ -13,202 +19,350 @@ import Haddock.Backends.Hyperlinker.Types
-- Result should retain original file layout (including comments, whitespace,
-- etc.), i.e. the following "law" should hold:
--
--- @concat . map 'tkValue' . 'parse' = id@
-parse :: String -> [Token]
-parse = tokenize . tag . chunk
+-- prop> concat . map tkValue . parse = id
+--
+-- (In reality, this only holds for input not containing '\r', '\t', '\f', '\v',
+-- characters, since GHC transforms those into ' ' and '\n')
+parse :: DynFlags -> FilePath -> String -> [T.Token]
+parse dflags fp s = ghcToks (processCPP dflags fp s)
+
--- | Split raw source string to more meaningful chunks.
+-- | Parse the source into tokens using the GHC lexer.
+--
+-- * CPP lines are removed and reinserted as line-comments
+-- * top-level file pragmas are parsed as block comments (see the
+-- 'ITblockComment' case of 'classify' for more details)
--
--- This is the initial stage of tokenization process. Each chunk is either
--- a comment (including comment delimiters), a whitespace string, preprocessor
--- macro (and all its content until the end of a line) or valid Haskell lexeme.
-chunk :: String -> [String]
-chunk [] = []
-chunk str@(c:_)
- | isSpace c =
- let (space, mcpp, rest) = spanSpaceOrCpp str
- in [space] ++ maybeToList mcpp ++ chunk rest
-chunk str
- | "--" `isPrefixOf` str = chunk' $ spanToNewline str
- | "{-" `isPrefixOf` str = chunk' $ chunkComment 0 str
- | otherwise = case lex' str of
- (tok:_) -> chunk' tok
- [] -> [str]
+processCPP :: DynFlags -- ^ GHC's flags
+ -> FilePath -- ^ source file name (for position information)
+ -> String -- ^ source file contents
+ -> [(Located L.Token, String)]
+processCPP dflags fpath s = addSrc . go start . splitCPP $ s
where
- chunk' (c, rest) = c:(chunk rest)
+ start = mkRealSrcLoc (mkFastString fpath) 1 1
+ addSrc = addSourceToTokens start (stringToStringBuffer s)
--- | A bit better lexer then the default, i.e. handles DataKinds quotes
-lex' :: ReadS String
-lex' ('\'' : '\'' : rest) = [("''", rest)]
-lex' str@('\'' : '\\' : _ : '\'' : _) = lex str
-lex' str@('\'' : _ : '\'' : _) = lex str
-lex' ('\'' : rest) = [("'", rest)]
-lex' str = lex str
+ -- Transform a list of Haskell/CPP lines into a list of tokens
+ go :: RealSrcLoc -> [Either String String] -> [Located L.Token]
+ go _ [] = []
+ go pos ls =
+ let (hLinesRight, ls') = span isRight ls
+ (cppLinesLeft, rest) = span isLeft ls'
+
+ hSrc = concat [ hLine | Right hLine <- hLinesRight ]
+ cppSrc = concat [ cppLine | Left cppLine <- cppLinesLeft ]
+
+ in case L.lexTokenStream (stringToStringBuffer hSrc) pos dflags of
--- | Split input to "first line" string and the rest of it.
---
--- Ideally, this should be done simply with @'break' (== '\n')@. However,
--- Haskell also allows line-unbreaking (or whatever it is called) so things
--- are not as simple and this function deals with that.
-spanToNewline :: String -> (String, String)
-spanToNewline [] = ([], [])
-spanToNewline ('\\':'\n':str) =
- let (str', rest) = spanToNewline str
- in ('\\':'\n':str', rest)
-spanToNewline str@('\n':_) = ("", str)
-spanToNewline (c:str) =
- let (str', rest) = spanToNewline str
- in (c:str', rest)
+ -- Stuff that fails to lex gets turned into comments
+ L.PFailed _ss _msg ->
+ let (src_pos, failed) = mkToken ITunknown pos hSrc
+ (new_pos, cpp) = mkToken ITlineComment src_pos cppSrc
+ in failed : cpp : go new_pos rest
--- | Split input to whitespace string, (optional) preprocessor directive and
--- the rest of it.
+ -- Successfully lexed
+ L.POk ss toks ->
+ let (new_pos, cpp) = mkToken ITlineComment (L.loc ss) cppSrc
+ in toks ++ [cpp] ++ go new_pos rest
+
+ -- Manually make a token from a 'String', advancing the cursor position
+ mkToken tok start' str =
+ let end = foldl' advanceSrcLoc start' str
+ in (end, L (RealSrcSpan $ mkRealSrcSpan start' end) (tok str))
+
+
+-- | Split apart the initial file into Haskell source lines ('Left' entries) and
+-- CPP lines ('Right' entries).
--
--- Again, using something like @'span' 'isSpace'@ would be nice to chunk input
--- to whitespace. The problem is with /#/ symbol - if it is placed at the very
--- beginning of a line, it should be recognized as preprocessor macro. In any
--- other case, it is ordinary Haskell symbol and can be used to declare
--- operators. Hence, while dealing with whitespace we also check whether there
--- happens to be /#/ symbol just after a newline character - if that is the
--- case, we begin treating the whole line as preprocessor macro.
-spanSpaceOrCpp :: String -> (String, Maybe String, String)
-spanSpaceOrCpp ('\n':'#':str) =
- let (str', rest) = spanToNewline str
- in ("\n", Just $ '#':str', rest)
-spanSpaceOrCpp (c:str')
- | isSpace c =
- let (space, mcpp, rest) = spanSpaceOrCpp str'
- in (c:space, mcpp, rest)
-spanSpaceOrCpp str = ("", Nothing, str)
-
--- | Split input to comment content (including delimiters) and the rest.
+-- All characters in the input are present in the output:
--
--- Again, some more logic than simple 'span' is required because of Haskell
--- comment nesting policy.
-chunkComment :: Int -> String -> (String, String)
-chunkComment _ [] = ("", "")
-chunkComment depth ('{':'-':str) =
- let (c, rest) = chunkComment (depth + 1) str
- in ("{-" ++ c, rest)
-chunkComment depth ('-':'}':str)
- | depth == 1 = ("-}", str)
- | otherwise =
- let (c, rest) = chunkComment (depth - 1) str
- in ("-}" ++ c, rest)
-chunkComment depth (e:str) =
- let (c, rest) = chunkComment depth str
- in (e:c, rest)
-
--- | Assign source location for each chunk in given stream.
-tag :: [String] -> [(Span, String)]
-tag =
- reverse . snd . foldl aux (Position 1 1, [])
- where
- aux (pos, cs) str =
- let pos' = foldl move pos str
- in (pos', (Span pos pos', str):cs)
- move pos '\n' = pos { posRow = posRow pos + 1, posCol = 1 }
- move pos _ = pos { posCol = posCol pos + 1 }
-
--- | Turn unrecognised chunk stream to more descriptive token stream.
-tokenize :: [(Span, String)] -> [Token]
-tokenize =
- map aux
+-- prop> concat . map (either id id) . splitCPP = id
+splitCPP :: String -> [Either String String]
+splitCPP "" = []
+splitCPP s | isCPPline s = Left l : splitCPP rest
+ | otherwise = Right l : splitCPP rest
where
- aux (sp, str) = Token
- { tkType = classify str
- , tkValue = str
- , tkSpan = sp
- }
+ ~(l, rest) = spanToNewline 0 s
--- | Classify given string as appropriate Haskell token.
+
+-- | Heuristic to decide if a line is going to be a CPP line. This should be a
+-- cheap operation since it is going to be run on every line being processed.
--
--- This method is based on Haskell 98 Report lexical structure description:
--- https://www.haskell.org/onlinereport/lexemes.html
+-- Right now it just checks if the first non-whitespace character in the first
+-- five characters of the line is a '#':
--
--- However, this is probably far from being perfect and most probably does not
--- handle correctly all corner cases.
-classify :: String -> TokenType
-classify str
- | "--" `isPrefixOf` str = TkComment
- | "{-#" `isPrefixOf` str = TkPragma
- | "{-" `isPrefixOf` str = TkComment
-classify "''" = TkSpecial
-classify "'" = TkSpecial
-classify str@(c:_)
- | isSpace c = TkSpace
- | isDigit c = TkNumber
- | c `elem` special = TkSpecial
- | str `elem` glyphs = TkGlyph
- | all (`elem` symbols) str = TkOperator
- | c == '#' = TkCpp
- | c == '"' = TkString
- | c == '\'' = TkChar
-classify str
- | str `elem` keywords = TkKeyword
- | isIdentifier str = TkIdentifier
- | otherwise = TkUnknown
-
-keywords :: [String]
-keywords =
- [ "as"
- , "case"
- , "class"
- , "data"
- , "default"
- , "deriving"
- , "do"
- , "else"
- , "hiding"
- , "if"
- , "import"
- , "in"
- , "infix"
- , "infixl"
- , "infixr"
- , "instance"
- , "let"
- , "module"
- , "newtype"
- , "of"
- , "qualified"
- , "then"
- , "type"
- , "where"
- , "forall"
- , "family"
- , "mdo"
- ]
-
-glyphs :: [String]
-glyphs =
- [ ".."
- , ":"
- , "::"
- , "="
- , "\\"
- , "|"
- , "<-"
- , "->"
- , "@"
- , "~"
- , "~#"
- , "=>"
- , "-"
- , "!"
- ]
-
-special :: [Char]
-special = "()[]{},;`"
-
--- TODO: Add support for any Unicode symbol or punctuation.
--- source: http://stackoverflow.com/questions/10548170/what-characters-are-permitted-for-haskell-operators
-symbols :: [Char]
-symbols = "!#$%&*+./<=>?@\\^|-~:"
-
-isIdentifier :: String -> Bool
-isIdentifier (s:str)
- | (isLower' s || isUpper s) && all isAlphaNum' str = True
+-- >>> isCPPline "#define FOO 1"
+-- True
+--
+-- >>> isCPPline "\t\t #ifdef GHC"
+-- True
+--
+-- >>> isCPPline " #endif"
+-- False
+--
+isCPPline :: String -> Bool
+isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5
+
+
+-- | Split a "line" off the front of a string, supporting newline escapes.
+--
+-- By "line", we understand: the shortest substring ending in a '\n' that is not
+--
+-- 1. immediately preceded by a '\\'
+-- 2. not inside some (possibly nested) block comment
+--
+-- All characters in the input are present in the output:
+--
+-- prop> curry (++) . spanToNewLine 0 = id
+spanToNewline :: Int -- ^ open '{-'
+ -> String -- ^ input
+ -> (String, String)
+spanToNewline _ [] = ([], [])
+spanToNewline n ('\n':str) | n <= 0 = ("\n", str)
+spanToNewline n ('\\':'\n':str) =
+ let (str', rest) = spanToNewline n str
+ in ('\\':'\n':str', rest)
+spanToNewline n ('{':'-':str) =
+ let (str', rest) = spanToNewline (n+1) str
+ in ('{':'-':str', rest)
+spanToNewline n ('-':'}':str) =
+ let (str', rest) = spanToNewline (n-1) str
+ in ('-':'}':str', rest)
+spanToNewline n (c:str) =
+ let (str', rest) = spanToNewline n str
+ in (c:str', rest)
+
+
+-- | Turn a list of GHC's 'L.Token' (and their source 'String') into a list of
+-- Haddock's 'T.Token'.
+ghcToks :: [(Located L.Token, String)] -> [T.Token]
+ghcToks = reverse . snd . foldl' go (start, [])
where
- isLower' c = isLower c || c == '_'
- isAlphaNum' c = isAlphaNum c || c == '_' || c == '\''
-isIdentifier _ = False
+ start = mkRealSrcLoc (mkFastString "lexing") 1 1
+
+ go :: (RealSrcLoc, [T.Token]) -- ^ current position, tokens accumulated
+ -> (Located L.Token, String) -- ^ next token, its content
+ -> (RealSrcLoc, [T.Token]) -- ^ new position, new tokens accumulated
+ go (pos, toks) (L l tok, raw) = ( next_pos
+ , classifiedTok ++ maybeToList white ++ toks
+ )
+ where
+ (next_pos, white) = mkWhitespace pos l
+ classifiedTok = [ Token (classify tok) raw rss
+ | RealSrcSpan rss <- [l]
+ , not (null raw)
+ ]
+
+-- | Find the correct amount of whitespace between tokens.
+mkWhitespace :: RealSrcLoc -> SrcSpan -> (RealSrcLoc, Maybe T.Token)
+mkWhitespace prev spn =
+ case spn of
+ UnhelpfulSpan _ -> (prev,Nothing)
+ RealSrcSpan s | null wsstring -> (end, Nothing)
+ | otherwise -> (end, Just (Token TkSpace wsstring wsspan))
+ where
+ start = realSrcSpanStart s
+ end = realSrcSpanEnd s
+ wsspan = mkRealSrcSpan prev start
+ nls = srcLocLine start - srcLocLine prev
+ spaces = if nls == 0 then srcLocCol start - srcLocCol prev
+ else srcLocCol start - 1
+ wsstring = replicate nls '\n' ++ replicate spaces ' '
+
+
+-- | Classify given string as appropriate Haskell token.
+classify :: L.Token -> TokenType
+classify tok =
+ case tok of
+ ITas -> TkKeyword
+ ITcase -> TkKeyword
+ ITclass -> TkKeyword
+ ITdata -> TkKeyword
+ ITdefault -> TkKeyword
+ ITderiving -> TkKeyword
+ ITdo -> TkKeyword
+ ITelse -> TkKeyword
+ IThiding -> TkKeyword
+ ITforeign -> TkKeyword
+ ITif -> TkKeyword
+ ITimport -> TkKeyword
+ ITin -> TkKeyword
+ ITinfix -> TkKeyword
+ ITinfixl -> TkKeyword
+ ITinfixr -> TkKeyword
+ ITinstance -> TkKeyword
+ ITlet -> TkKeyword
+ ITmodule -> TkKeyword
+ ITnewtype -> TkKeyword
+ ITof -> TkKeyword
+ ITqualified -> TkKeyword
+ ITthen -> TkKeyword
+ ITtype -> TkKeyword
+ ITwhere -> TkKeyword
+
+ ITforall {} -> TkKeyword
+ ITexport -> TkKeyword
+ ITlabel -> TkKeyword
+ ITdynamic -> TkKeyword
+ ITsafe -> TkKeyword
+ ITinterruptible -> TkKeyword
+ ITunsafe -> TkKeyword
+ ITstdcallconv -> TkKeyword
+ ITccallconv -> TkKeyword
+ ITcapiconv -> TkKeyword
+ ITprimcallconv -> TkKeyword
+ ITjavascriptcallconv -> TkKeyword
+ ITmdo -> TkKeyword
+ ITfamily -> TkKeyword
+ ITrole -> TkKeyword
+ ITgroup -> TkKeyword
+ ITby -> TkKeyword
+ ITusing -> TkKeyword
+ ITpattern -> TkKeyword
+ ITstatic -> TkKeyword
+ ITstock -> TkKeyword
+ ITanyclass -> TkKeyword
+
+ ITunit -> TkKeyword
+ ITsignature -> TkKeyword
+ ITdependency -> TkKeyword
+ ITrequires -> TkKeyword
+
+ ITinline_prag {} -> TkPragma
+ ITspec_prag {} -> TkPragma
+ ITspec_inline_prag {} -> TkPragma
+ ITsource_prag {} -> TkPragma
+ ITrules_prag {} -> TkPragma
+ ITwarning_prag {} -> TkPragma
+ ITdeprecated_prag {} -> TkPragma
+ ITline_prag -> TkPragma
+ ITscc_prag {} -> TkPragma
+ ITgenerated_prag {} -> TkPragma
+ ITcore_prag {} -> TkPragma
+ ITunpack_prag {} -> TkPragma
+ ITnounpack_prag {} -> TkPragma
+ ITann_prag {} -> TkPragma
+ ITcomplete_prag {} -> TkPragma
+ ITclose_prag -> TkPragma
+ IToptions_prag {} -> TkPragma
+ ITinclude_prag {} -> TkPragma
+ ITlanguage_prag -> TkPragma
+ ITvect_prag {} -> TkPragma
+ ITvect_scalar_prag {} -> TkPragma
+ ITnovect_prag {} -> TkPragma
+ ITminimal_prag {} -> TkPragma
+ IToverlappable_prag {} -> TkPragma
+ IToverlapping_prag {} -> TkPragma
+ IToverlaps_prag {} -> TkPragma
+ ITincoherent_prag {} -> TkPragma
+ ITctype {} -> TkPragma
+
+ ITdotdot -> TkGlyph
+ ITcolon -> TkGlyph
+ ITdcolon {} -> TkGlyph
+ ITequal -> TkGlyph
+ ITlam -> TkGlyph
+ ITlcase -> TkGlyph
+ ITvbar -> TkGlyph
+ ITlarrow {} -> TkGlyph
+ ITrarrow {} -> TkGlyph
+ ITat -> TkGlyph
+ ITtilde -> TkGlyph
+ ITtildehsh -> TkGlyph
+ ITdarrow {} -> TkGlyph
+ ITminus -> TkGlyph
+ ITbang -> TkGlyph
+ ITdot -> TkOperator
+ ITtypeApp -> TkGlyph
+
+ ITbiglam -> TkGlyph
+
+ ITocurly -> TkSpecial
+ ITccurly -> TkSpecial
+ ITvocurly -> TkSpecial
+ ITvccurly -> TkSpecial
+ ITobrack -> TkSpecial
+ ITopabrack -> TkSpecial
+ ITcpabrack -> TkSpecial
+ ITcbrack -> TkSpecial
+ IToparen -> TkSpecial
+ ITcparen -> TkSpecial
+ IToubxparen -> TkSpecial
+ ITcubxparen -> TkSpecial
+ ITsemi -> TkSpecial
+ ITcomma -> TkSpecial
+ ITunderscore -> TkIdentifier
+ ITbackquote -> TkSpecial
+ ITsimpleQuote -> TkSpecial
+
+ ITvarid {} -> TkIdentifier
+ ITconid {} -> TkIdentifier
+ ITvarsym {} -> TkOperator
+ ITconsym {} -> TkOperator
+ ITqvarid {} -> TkIdentifier
+ ITqconid {} -> TkIdentifier
+ ITqvarsym {} -> TkOperator
+ ITqconsym {} -> TkOperator
+
+ ITdupipvarid {} -> TkUnknown
+ ITlabelvarid {} -> TkUnknown
+
+ ITchar {} -> TkChar
+ ITstring {} -> TkString
+ ITinteger {} -> TkNumber
+ ITrational {} -> TkNumber
+
+ ITprimchar {} -> TkChar
+ ITprimstring {} -> TkString
+ ITprimint {} -> TkNumber
+ ITprimword {} -> TkNumber
+ ITprimfloat {} -> TkNumber
+ ITprimdouble {} -> TkNumber
+
+ ITopenExpQuote {} -> TkSpecial
+ ITopenPatQuote -> TkSpecial
+ ITopenDecQuote -> TkSpecial
+ ITopenTypQuote -> TkSpecial
+ ITcloseQuote {} -> TkSpecial
+ ITopenTExpQuote {} -> TkSpecial
+ ITcloseTExpQuote -> TkSpecial
+ ITidEscape {} -> TkUnknown
+ ITparenEscape -> TkSpecial
+ ITidTyEscape {} -> TkUnknown
+ ITparenTyEscape -> TkSpecial
+ ITtyQuote -> TkSpecial
+ ITquasiQuote {} -> TkUnknown
+ ITqQuasiQuote {} -> TkUnknown
+
+ ITproc -> TkKeyword
+ ITrec -> TkKeyword
+ IToparenbar {} -> TkGlyph
+ ITcparenbar {} -> TkGlyph
+ ITlarrowtail {} -> TkGlyph
+ ITrarrowtail {} -> TkGlyph
+ ITLarrowtail {} -> TkGlyph
+ ITRarrowtail {} -> TkGlyph
+
+ ITunknown {} -> TkUnknown
+ ITeof -> TkUnknown
+
+ -- Line comments are only supposed to start with '--'. Starting with '#'
+ -- means that this was probably a CPP.
+ ITlineComment s
+ | isCPPline s -> TkCpp
+ | otherwise -> TkComment
+
+ ITdocCommentNext {} -> TkComment
+ ITdocCommentPrev {} -> TkComment
+ ITdocCommentNamed {} -> TkComment
+ ITdocSection {} -> TkComment
+ ITdocOptions {} -> TkComment
+
+ -- The lexer considers top-level pragmas as comments (see `pragState` in
+ -- the GHC lexer for more), so we have to manually reverse this. The
+ -- following is a hammer: it smashes _all_ pragma-like block comments into
+ -- pragmas.
+ ITblockComment c
+ | isPrefixOf "{-#" c
+ , isSuffixOf "#-}" c -> TkPragma
+ | otherwise -> TkComment
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
index 15793f0c..27bf7605 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
@@ -82,7 +82,7 @@ header mcss mjs =
tokenGroup :: SrcMap -> TokenGroup -> Html
tokenGroup _ (GrpNormal tok@(Token { .. }))
- | tkType == TkSpace = renderSpace (posRow . spStart $ tkSpan) tkValue
+ | tkType == TkSpace = renderSpace (GHC.srcSpanStartLine tkSpan) tkValue
| otherwise = tokenSpan tok ! attrs
where
attrs = [ multiclass . tokenStyle $ tkType ]
@@ -155,7 +155,7 @@ internalHyperlink name content =
Html.anchor content ! [ Html.href $ "#" ++ internalAnchorIdent name ]
externalNameHyperlink :: SrcMap -> GHC.Name -> Html -> Html
-externalNameHyperlink (srcs, _) name content = case Map.lookup mdl srcs of
+externalNameHyperlink srcs name content = case Map.lookup mdl srcs of
Just SrcLocal -> Html.anchor content !
[ Html.href $ hypSrcModuleNameUrl mdl name ]
Just (SrcExternal path) -> Html.anchor content !
@@ -165,12 +165,14 @@ externalNameHyperlink (srcs, _) name content = case Map.lookup mdl srcs of
mdl = GHC.nameModule name
externalModHyperlink :: SrcMap -> GHC.ModuleName -> Html -> Html
-externalModHyperlink (_, srcs) name content = case Map.lookup name srcs of
- Just SrcLocal -> Html.anchor content !
+externalModHyperlink srcs name content =
+ let srcs' = Map.mapKeys GHC.moduleName srcs in
+ case Map.lookup name srcs' of
+ Just SrcLocal -> Html.anchor content !
[ Html.href $ hypSrcModuleUrl' name ]
- Just (SrcExternal path) -> Html.anchor content !
+ Just (SrcExternal path) -> Html.anchor content !
[ Html.href $ path </> hypSrcModuleUrl' name ]
- Nothing -> content
+ Nothing -> content
renderSpace :: Int -> String -> Html
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
index d8ae89e4..e377471e 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
@@ -4,8 +4,6 @@ module Haddock.Backends.Hyperlinker.Types where
import qualified GHC
import Data.Map (Map)
-import qualified Data.Map as Map
-
data Token = Token
{ tkType :: TokenType
@@ -14,23 +12,8 @@ data Token = Token
}
deriving (Show)
-data Position = Position
- { posRow :: !Int
- , posCol :: !Int
- }
- deriving (Eq, Ord, Show)
-
-data Span = Span
- { spStart :: !Position
- , spEnd :: !Position
- }
- deriving (Show)
-
--- | Tests whether the first span "contains" the other span, meaning
--- that it covers at least as much source code. True where spans are equal.
-containsSpan :: Span -> Span -> Bool
-containsSpan s1 s2 =
- spStart s1 <= spStart s2 && spEnd s1 >= spEnd s2
+type Position = GHC.RealSrcLoc
+type Span = GHC.RealSrcSpan
data TokenType
= TkIdentifier
@@ -80,15 +63,5 @@ data SrcPath
| SrcLocal
-- | Mapping from modules to cross-package source paths.
---
--- This mapping is actually a pair of maps instead of just one map. The reason
--- for this is because when hyperlinking modules in import lists we have no
--- 'GHC.Module' available. On the other hand, we can't just use map with
--- 'GHC.ModuleName' as indices because certain modules may have common name
--- but originate in different packages. Hence, we use both /rich/ and /poor/
--- versions, where the /poor/ is just projection of /rich/ one cached in pair
--- for better performance.
-type SrcMap = (Map GHC.Module SrcPath, Map GHC.ModuleName SrcPath)
+type SrcMap = Map GHC.Module SrcPath
-mkSrcMap :: Map GHC.Module SrcPath -> SrcMap
-mkSrcMap srcs = (srcs, Map.mapKeys GHC.moduleName srcs)