aboutsummaryrefslogtreecommitdiff
path: root/haddock-api/src/Haddock/Backends/Hyperlinker
diff options
context:
space:
mode:
authorAlexander Biehl <alexbiehl@gmail.com>2018-06-14 15:28:52 +0200
committerGitHub <noreply@github.com>2018-06-14 15:28:52 +0200
commit6247ec8b5a5bc8145ce851dce11eb617a380381c (patch)
tree7856c0dd1ddd0c1f3eef0422b0cd8e8a5a6b71cb /haddock-api/src/Haddock/Backends/Hyperlinker
parent9a7f539d0c20654ff394f2ff99836412a6844df1 (diff)
parent095fa970b32c818ed4c06cefc00ba98aaff756fa (diff)
Merge pull request #857 from sjakobi/ghc-head-update-3
Update ghc-head
Diffstat (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker')
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs54
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs605
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs77
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs33
4 files changed, 476 insertions, 293 deletions
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
index 56137f51..0ecf7109 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
@@ -12,6 +12,7 @@ import qualified Haddock.Syb as Syb
import Haddock.Backends.Hyperlinker.Types
import qualified GHC
+import qualified SrcLoc
import qualified Outputable as GHC
import Control.Applicative
@@ -52,10 +53,10 @@ type DetailsMap = Map.Map Position (Span, TokenDetails)
mkDetailsMap :: [(GHC.SrcSpan, TokenDetails)] -> DetailsMap
mkDetailsMap xs =
- Map.fromListWith select_details [ (start, (token_span, token_details))
+ Map.fromListWith select_details [ (start, (span, token_details))
| (ghc_span, token_details) <- xs
- , Just !token_span <- [ghcSrcSpanToSpan ghc_span]
- , let start = spStart token_span
+ , GHC.RealSrcSpan span <- [ghc_span]
+ , let start = SrcLoc.realSrcSpanStart span
]
where
-- favour token details which appear earlier in the list
@@ -63,17 +64,11 @@ mkDetailsMap xs =
lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails
lookupBySpan span details = do
- (_, (tok_span, tok_details)) <- Map.lookupLE (spStart span) details
- guard (tok_span `containsSpan` span )
+ let pos = SrcLoc.realSrcSpanStart span
+ (_, (tok_span, tok_details)) <- Map.lookupLE pos details
+ guard (tok_span `SrcLoc.containsSpan` span)
return tok_details
-ghcSrcSpanToSpan :: GHC.SrcSpan -> Maybe Span
-ghcSrcSpanToSpan (GHC.RealSrcSpan span) =
- Just (Span { spStart = Position (GHC.srcSpanStartLine span) (GHC.srcSpanStartCol span)
- , spEnd = Position (GHC.srcSpanEndLine span) (GHC.srcSpanEndCol span)
- })
-ghcSrcSpanToSpan _ = Nothing
-
enrichToken :: Token -> DetailsMap -> Maybe TokenDetails
enrichToken (Token typ _ spn) dm
| typ `elem` [TkIdentifier, TkOperator] = lookupBySpan spn dm
@@ -99,9 +94,12 @@ variables =
types :: GHC.RenamedSource -> LTokenDetails
types = everythingInRenamedSource ty
where
+ ty :: forall a. Data a => a -> [(GHC.SrcSpan, TokenDetails)]
ty term = case cast term of
(Just ((GHC.L sspan (GHC.HsTyVar _ _ name)) :: GHC.LHsType GHC.GhcRn)) ->
pure (sspan, RtkType (GHC.unLoc name))
+ (Just ((GHC.L sspan (GHC.HsOpTy _ l name r)) :: GHC.LHsType GHC.GhcRn)) ->
+ (sspan, RtkType (GHC.unLoc name)):(ty l ++ ty r)
_ -> empty
-- | Obtain details map for identifier bindings.
@@ -117,6 +115,11 @@ binds = everythingInRenamedSource
fun term = case cast term of
(Just (GHC.FunBind _ (GHC.L sspan name) _ _ _ :: GHC.HsBind GHC.GhcRn)) ->
pure (sspan, RtkBind name)
+ (Just (GHC.PatSynBind _ (GHC.PSB _ (GHC.L sspan name) args _ _))) ->
+ pure (sspan, RtkBind name) ++ everythingInRenamedSource patsyn_binds args
+ _ -> empty
+ patsyn_binds term = case cast term of
+ (Just (GHC.L sspan (name :: GHC.Name))) -> pure (sspan, RtkVar name)
_ -> empty
pat term = case cast term of
(Just ((GHC.L sspan (GHC.VarPat _ name)) :: GHC.LPat GHC.GhcRn)) ->
@@ -142,6 +145,7 @@ decls :: GHC.RenamedSource -> LTokenDetails
decls (group, _, _, _) = concatMap ($ group)
[ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds
, everythingInRenamedSource fun . GHC.hs_valds
+ , everythingInRenamedSource fix . GHC.hs_fixds
, everythingInRenamedSource (con `Syb.combine` ins)
]
where
@@ -149,11 +153,16 @@ decls (group, _, _, _) = concatMap ($ group)
GHC.DataDecl { tcdLName = name } -> pure . decl $ name
GHC.SynDecl _ name _ _ _ -> pure . decl $ name
GHC.FamDecl _ fam -> pure . decl $ GHC.fdLName fam
- GHC.ClassDecl{..} -> [decl tcdLName] ++ concatMap sig tcdSigs
+ GHC.ClassDecl{..} ->
+ [decl tcdLName]
+ ++ concatMap sig tcdSigs
+ ++ concatMap tyfam tcdATs
GHC.XTyClDecl {} -> GHC.panic "haddock:decls"
fun term = case cast term of
(Just (GHC.FunBind _ (GHC.L sspan name) _ _ _ :: GHC.HsBind GHC.GhcRn))
| GHC.isExternalName name -> pure (sspan, RtkDecl name)
+ (Just (GHC.PatSynBind _ (GHC.PSB _ (GHC.L sspan name) _ _ _)))
+ | GHC.isExternalName name -> pure (sspan, RtkDecl name)
_ -> empty
con term = case cast term of
(Just (cdcl :: GHC.ConDecl GHC.GhcRn)) ->
@@ -171,7 +180,17 @@ decls (group, _, _, _) = concatMap ($ group)
Just (field :: GHC.ConDeclField GHC.GhcRn)
-> map (decl . fmap GHC.extFieldOcc) $ GHC.cd_fld_names field
Nothing -> empty
+ fix term = case cast term of
+ Just ((GHC.FixitySig _ names _) :: GHC.FixitySig GHC.GhcRn)
+ -> map (\(GHC.L sspan x) -> (sspan, RtkVar x)) names
+ Just ((GHC.XFixitySig {}) :: GHC.FixitySig GHC.GhcRn)
+ -> GHC.panic "haddock:decls"
+ Nothing -> empty
+ tyfam (GHC.L _ (GHC.FamilyDecl{..})) = [decl fdLName]
+ tyfam (GHC.L _ (GHC.XFamilyDecl {})) = GHC.panic "haddock:dels"
sig (GHC.L _ (GHC.TypeSig _ names _)) = map decl names
+ sig (GHC.L _ (GHC.PatSynSig _ names _)) = map decl names
+ sig (GHC.L _ (GHC.ClassOpSig _ _ names _)) = map decl names
sig _ = []
decl (GHC.L sspan name) = (sspan, RtkDecl name)
tyref (GHC.L sspan name) = (sspan, RtkType name)
@@ -190,10 +209,11 @@ imports src@(_, imps, _, _) =
(Just (GHC.IEThingAll _ t)) -> pure $ typ $ GHC.ieLWrappedName t
(Just (GHC.IEThingWith _ t _ vs _fls)) ->
[typ $ GHC.ieLWrappedName t] ++ map (var . GHC.ieLWrappedName) vs
+ (Just (GHC.IEModuleContents _ m)) -> pure $ modu m
_ -> empty
typ (GHC.L sspan name) = (sspan, RtkType name)
var (GHC.L sspan name) = (sspan, RtkVar name)
- imp idecl | not . GHC.ideclImplicit $ idecl =
- let (GHC.L sspan name) = GHC.ideclName idecl
- in Just (sspan, RtkModule name)
- imp _ = Nothing
+ modu (GHC.L sspan name) = (sspan, RtkModule name)
+ imp idecl
+ | not . GHC.ideclImplicit $ idecl = Just (modu (GHC.ideclName idecl))
+ | otherwise = Nothing
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
index e4345602..e7ecac73 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
@@ -1,11 +1,19 @@
module Haddock.Backends.Hyperlinker.Parser (parse) where
+import Data.Either ( isRight, isLeft )
+import Data.List ( foldl', isPrefixOf, isSuffixOf )
+import Data.Maybe ( maybeToList )
+import Data.Char ( isSpace )
+import qualified Text.Read as R
-import Data.Char
-import Data.List
-import Data.Maybe
+import GHC ( DynFlags, addSourceToTokens )
+import SrcLoc
+import FastString ( mkFastString )
+import StringBuffer ( stringToStringBuffer )
+import Lexer ( Token(..) )
+import qualified Lexer as L
-import Haddock.Backends.Hyperlinker.Types
+import Haddock.Backends.Hyperlinker.Types as T
-- | Turn source code string into a stream of more descriptive tokens.
@@ -13,202 +21,419 @@ import Haddock.Backends.Hyperlinker.Types
-- Result should retain original file layout (including comments, whitespace,
-- etc.), i.e. the following "law" should hold:
--
--- @concat . map 'tkValue' . 'parse' = id@
-parse :: String -> [Token]
-parse = tokenize . tag . chunk
+-- prop> concat . map tkValue . parse = id
+--
+-- (In reality, this only holds for input not containing '\r', '\t', '\f', '\v',
+-- characters, since GHC transforms those into ' ' and '\n')
+parse :: DynFlags -> FilePath -> String -> [T.Token]
+parse dflags fp = ghcToks . processCPP dflags fp . filterCRLF
+ where
+ -- Remove CRLFs from source
+ filterCRLF :: String -> String
+ filterCRLF ('\r':'\n':cs) = '\n' : filterCRLF cs
+ filterCRLF (c:cs) = c : filterCRLF cs
+ filterCRLF [] = []
--- | Split raw source string to more meaningful chunks.
+-- | Parse the source into tokens using the GHC lexer.
--
--- This is the initial stage of tokenization process. Each chunk is either
--- a comment (including comment delimiters), a whitespace string, preprocessor
--- macro (and all its content until the end of a line) or valid Haskell lexeme.
-chunk :: String -> [String]
-chunk [] = []
-chunk str@(c:_)
- | isSpace c =
- let (space, mcpp, rest) = spanSpaceOrCpp str
- in [space] ++ maybeToList mcpp ++ chunk rest
-chunk str
- | "--" `isPrefixOf` str = chunk' $ spanToNewline str
- | "{-" `isPrefixOf` str = chunk' $ chunkComment 0 str
- | otherwise = case lex' str of
- (tok:_) -> chunk' tok
- [] -> [str]
+-- * CPP lines are removed and reinserted as line-comments
+-- * top-level file pragmas are parsed as block comments (see the
+-- 'ITblockComment' case of 'classify' for more details)
+--
+processCPP :: DynFlags -- ^ GHC's flags
+ -> FilePath -- ^ source file name (for position information)
+ -> String -- ^ source file contents
+ -> [(Located L.Token, String)]
+processCPP dflags fpath s = addSrc . go start . splitCPP $ s
where
- chunk' (c, rest) = c:(chunk rest)
+ start = mkRealSrcLoc (mkFastString fpath) 1 1
+ addSrc = addSourceToTokens start (stringToStringBuffer s)
--- | A bit better lexer then the default, i.e. handles DataKinds quotes
-lex' :: ReadS String
-lex' ('\'' : '\'' : rest) = [("''", rest)]
-lex' str@('\'' : '\\' : _ : '\'' : _) = lex str
-lex' str@('\'' : _ : '\'' : _) = lex str
-lex' ('\'' : rest) = [("'", rest)]
-lex' str = lex str
+ -- Transform a list of Haskell/CPP lines into a list of tokens
+ go :: RealSrcLoc -> [Either String String] -> [Located L.Token]
+ go _ [] = []
+ go pos ls =
+ let (hLinesRight, ls') = span isRight ls
+ (cppLinesLeft, rest) = span isLeft ls'
--- | Split input to "first line" string and the rest of it.
---
--- Ideally, this should be done simply with @'break' (== '\n')@. However,
--- Haskell also allows line-unbreaking (or whatever it is called) so things
--- are not as simple and this function deals with that.
-spanToNewline :: String -> (String, String)
-spanToNewline [] = ([], [])
-spanToNewline ('\\':'\n':str) =
- let (str', rest) = spanToNewline str
- in ('\\':'\n':str', rest)
-spanToNewline str@('\n':_) = ("", str)
-spanToNewline (c:str) =
- let (str', rest) = spanToNewline str
- in (c:str', rest)
+ hSrc = concat [ hLine | Right hLine <- hLinesRight ]
+ cppSrc = concat [ cppLine | Left cppLine <- cppLinesLeft ]
+
+ in case L.lexTokenStream (stringToStringBuffer hSrc) pos dflags of
--- | Split input to whitespace string, (optional) preprocessor directive and
--- the rest of it.
+ -- Stuff that fails to lex gets turned into comments
+ L.PFailed _ _ss _msg ->
+ let (src_pos, failed) = mkToken ITunknown pos hSrc
+ (new_pos, cpp) = mkToken ITlineComment src_pos cppSrc
+ in failed : cpp : go new_pos rest
+
+ -- Successfully lexed
+ L.POk ss toks ->
+ let (new_pos, cpp) = mkToken ITlineComment (L.loc ss) cppSrc
+ in toks ++ [cpp] ++ go new_pos rest
+
+ -- Manually make a token from a 'String', advancing the cursor position
+ mkToken tok start' str =
+ let end = foldl' advanceSrcLoc start' str
+ in (end, L (RealSrcSpan $ mkRealSrcSpan start' end) (tok str))
+
+
+-- | Split apart the initial file into Haskell source lines ('Left' entries) and
+-- CPP lines ('Right' entries).
--
--- Again, using something like @'span' 'isSpace'@ would be nice to chunk input
--- to whitespace. The problem is with /#/ symbol - if it is placed at the very
--- beginning of a line, it should be recognized as preprocessor macro. In any
--- other case, it is ordinary Haskell symbol and can be used to declare
--- operators. Hence, while dealing with whitespace we also check whether there
--- happens to be /#/ symbol just after a newline character - if that is the
--- case, we begin treating the whole line as preprocessor macro.
-spanSpaceOrCpp :: String -> (String, Maybe String, String)
-spanSpaceOrCpp ('\n':'#':str) =
- let (str', rest) = spanToNewline str
- in ("\n", Just $ '#':str', rest)
-spanSpaceOrCpp (c:str')
- | isSpace c =
- let (space, mcpp, rest) = spanSpaceOrCpp str'
- in (c:space, mcpp, rest)
-spanSpaceOrCpp str = ("", Nothing, str)
-
--- | Split input to comment content (including delimiters) and the rest.
+-- All characters in the input are present in the output:
--
--- Again, some more logic than simple 'span' is required because of Haskell
--- comment nesting policy.
-chunkComment :: Int -> String -> (String, String)
-chunkComment _ [] = ("", "")
-chunkComment depth ('{':'-':str) =
- let (c, rest) = chunkComment (depth + 1) str
- in ("{-" ++ c, rest)
-chunkComment depth ('-':'}':str)
- | depth == 1 = ("-}", str)
- | otherwise =
- let (c, rest) = chunkComment (depth - 1) str
- in ("-}" ++ c, rest)
-chunkComment depth (e:str) =
- let (c, rest) = chunkComment depth str
- in (e:c, rest)
-
--- | Assign source location for each chunk in given stream.
-tag :: [String] -> [(Span, String)]
-tag =
- reverse . snd . foldl aux (Position 1 1, [])
+-- prop> concat . map (either id id) . splitCPP = id
+splitCPP :: String -> [Either String String]
+splitCPP "" = []
+splitCPP s | isCPPline s = Left l : splitCPP rest
+ | otherwise = Right l : splitCPP rest
where
- aux (pos, cs) str =
- let pos' = foldl move pos str
- in (pos', (Span pos pos', str):cs)
- move pos '\n' = pos { posRow = posRow pos + 1, posCol = 1 }
- move pos _ = pos { posCol = posCol pos + 1 }
-
--- | Turn unrecognised chunk stream to more descriptive token stream.
-tokenize :: [(Span, String)] -> [Token]
-tokenize =
- map aux
- where
- aux (sp, str) = Token
- { tkType = classify str
- , tkValue = str
- , tkSpan = sp
- }
+ ~(l, rest) = spanToNewline 0 s
+
--- | Classify given string as appropriate Haskell token.
+-- | Heuristic to decide if a line is going to be a CPP line. This should be a
+-- cheap operation since it is going to be run on every line being processed.
+--
+-- Right now it just checks if the first non-whitespace character in the first
+-- five characters of the line is a '#':
--
--- This method is based on Haskell 98 Report lexical structure description:
--- https://www.haskell.org/onlinereport/lexemes.html
+-- >>> isCPPline "#define FOO 1"
+-- True
--
--- However, this is probably far from being perfect and most probably does not
--- handle correctly all corner cases.
-classify :: String -> TokenType
-classify str
- | "--" `isPrefixOf` str = TkComment
- | "{-#" `isPrefixOf` str = TkPragma
- | "{-" `isPrefixOf` str = TkComment
-classify "''" = TkSpecial
-classify "'" = TkSpecial
-classify str@(c:_)
- | isSpace c = TkSpace
- | isDigit c = TkNumber
- | c `elem` special = TkSpecial
- | str `elem` glyphs = TkGlyph
- | all (`elem` symbols) str = TkOperator
- | c == '#' = TkCpp
- | c == '"' = TkString
- | c == '\'' = TkChar
-classify str
- | str `elem` keywords = TkKeyword
- | isIdentifier str = TkIdentifier
- | otherwise = TkUnknown
-
-keywords :: [String]
-keywords =
- [ "as"
- , "case"
- , "class"
- , "data"
- , "default"
- , "deriving"
- , "do"
- , "else"
- , "hiding"
- , "if"
- , "import"
- , "in"
- , "infix"
- , "infixl"
- , "infixr"
- , "instance"
- , "let"
- , "module"
- , "newtype"
- , "of"
- , "qualified"
- , "then"
- , "type"
- , "where"
- , "forall"
- , "family"
- , "mdo"
- ]
-
-glyphs :: [String]
-glyphs =
- [ ".."
- , ":"
- , "::"
- , "="
- , "\\"
- , "|"
- , "<-"
- , "->"
- , "@"
- , "~"
- , "~#"
- , "=>"
- , "-"
- , "!"
- ]
-
-special :: [Char]
-special = "()[]{},;`"
-
--- TODO: Add support for any Unicode symbol or punctuation.
--- source: http://stackoverflow.com/questions/10548170/what-characters-are-permitted-for-haskell-operators
-symbols :: [Char]
-symbols = "!#$%&*+./<=>?@\\^|-~:"
-
-isIdentifier :: String -> Bool
-isIdentifier (s:str)
- | (isLower' s || isUpper s) && all isAlphaNum' str = True
+-- >>> isCPPline "\t\t #ifdef GHC"
+-- True
+--
+-- >>> isCPPline " #endif"
+-- False
+--
+isCPPline :: String -> Bool
+isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5
+
+
+-- | Split a "line" off the front of a string, hopefully without cutting tokens
+-- in half. I say "hopefully" because knowing what a token is requires lexing,
+-- yet lexing depends on this function.
+--
+-- All characters in the input are present in the output:
+--
+-- prop> curry (++) . spanToNewLine 0 = id
+spanToNewline :: Int -- ^ open '{-'
+ -> String -- ^ input
+ -> (String, String)
+
+-- Base case and space characters
+spanToNewline _ "" = ("", "")
+spanToNewline n ('\n':str) | n <= 0 = ("\n", str)
+spanToNewline n ('\n':str) | n <= 0 = ("\n", str)
+spanToNewline n ('\\':'\n':str) =
+ let (str', rest) = spanToNewline n str
+ in ('\\':'\n':str', rest)
+
+-- Block comments
+spanToNewline n ('{':'-':str) =
+ let (str', rest) = spanToNewline (n+1) str
+ in ('{':'-':str', rest)
+spanToNewline n ('-':'}':str) =
+ let (str', rest) = spanToNewline (n-1) str
+ in ('-':'}':str', rest)
+
+-- When not in a block comment, try to lex a Haskell token
+spanToNewline 0 str@(c:_) | ((lexed, str') : _) <- R.lex str, not (isSpace c) =
+ if all (== '-') lexed && length lexed >= 2
+ -- A Haskell line comment
+ then case span (/= '\n') str' of
+ (str'', '\n':rest) -> (lexed ++ str'' ++ "\n", rest)
+ (_, _) -> (str, "")
+
+ -- An actual Haskell token
+ else let (str'', rest) = spanToNewline 0 str'
+ in (lexed ++ str'', rest)
+
+-- In all other cases, advance one character at a time
+spanToNewline n (c:str) =
+ let (str', rest) = spanToNewline n str
+ in (c:str', rest)
+
+
+-- | Turn a list of GHC's 'L.Token' (and their source 'String') into a list of
+-- Haddock's 'T.Token'.
+ghcToks :: [(Located L.Token, String)] -> [T.Token]
+ghcToks = reverse . (\(_,ts,_) -> ts) . foldl' go (start, [], False)
where
- isLower' c = isLower c || c == '_'
- isAlphaNum' c = isAlphaNum c || c == '_' || c == '\''
-isIdentifier _ = False
+ start = mkRealSrcLoc (mkFastString "lexing") 1 1
+
+ go :: (RealSrcLoc, [T.Token], Bool)
+ -- ^ current position, tokens accumulated, currently in pragma (or not)
+
+ -> (Located L.Token, String)
+ -- ^ next token, its content
+
+ -> (RealSrcLoc, [T.Token], Bool)
+ -- ^ new position, new tokens accumulated, currently in pragma (or not)
+
+ go (pos, toks, in_prag) (L l tok, raw) =
+ ( next_pos
+ , classifiedTok ++ maybeToList white ++ toks
+ , inPragma in_prag tok
+ )
+ where
+ (next_pos, white) = mkWhitespace pos l
+
+ classifiedTok = [ Token (classify' tok) raw rss
+ | RealSrcSpan rss <- [l]
+ , not (null raw)
+ ]
+
+ classify' | in_prag = const TkPragma
+ | otherwise = classify
+
+
+-- | Find the correct amount of whitespace between tokens.
+mkWhitespace :: RealSrcLoc -> SrcSpan -> (RealSrcLoc, Maybe T.Token)
+mkWhitespace prev spn =
+ case spn of
+ UnhelpfulSpan _ -> (prev,Nothing)
+ RealSrcSpan s | null wsstring -> (end, Nothing)
+ | otherwise -> (end, Just (Token TkSpace wsstring wsspan))
+ where
+ start = realSrcSpanStart s
+ end = realSrcSpanEnd s
+ wsspan = mkRealSrcSpan prev start
+ nls = srcLocLine start - srcLocLine prev
+ spaces = if nls == 0 then srcLocCol start - srcLocCol prev
+ else srcLocCol start - 1
+ wsstring = replicate nls '\n' ++ replicate spaces ' '
+
+
+-- | Classify given tokens as appropriate Haskell token type.
+classify :: L.Token -> TokenType
+classify tok =
+ case tok of
+ ITas -> TkKeyword
+ ITcase -> TkKeyword
+ ITclass -> TkKeyword
+ ITdata -> TkKeyword
+ ITdefault -> TkKeyword
+ ITderiving -> TkKeyword
+ ITdo -> TkKeyword
+ ITelse -> TkKeyword
+ IThiding -> TkKeyword
+ ITforeign -> TkKeyword
+ ITif -> TkKeyword
+ ITimport -> TkKeyword
+ ITin -> TkKeyword
+ ITinfix -> TkKeyword
+ ITinfixl -> TkKeyword
+ ITinfixr -> TkKeyword
+ ITinstance -> TkKeyword
+ ITlet -> TkKeyword
+ ITmodule -> TkKeyword
+ ITnewtype -> TkKeyword
+ ITof -> TkKeyword
+ ITqualified -> TkKeyword
+ ITthen -> TkKeyword
+ ITtype -> TkKeyword
+ ITvia -> TkKeyword
+ ITwhere -> TkKeyword
+
+ ITforall {} -> TkKeyword
+ ITexport -> TkKeyword
+ ITlabel -> TkKeyword
+ ITdynamic -> TkKeyword
+ ITsafe -> TkKeyword
+ ITinterruptible -> TkKeyword
+ ITunsafe -> TkKeyword
+ ITstdcallconv -> TkKeyword
+ ITccallconv -> TkKeyword
+ ITcapiconv -> TkKeyword
+ ITprimcallconv -> TkKeyword
+ ITjavascriptcallconv -> TkKeyword
+ ITmdo -> TkKeyword
+ ITfamily -> TkKeyword
+ ITrole -> TkKeyword
+ ITgroup -> TkKeyword
+ ITby -> TkKeyword
+ ITusing -> TkKeyword
+ ITpattern -> TkKeyword
+ ITstatic -> TkKeyword
+ ITstock -> TkKeyword
+ ITanyclass -> TkKeyword
+
+ ITunit -> TkKeyword
+ ITsignature -> TkKeyword
+ ITdependency -> TkKeyword
+ ITrequires -> TkKeyword
+
+ ITinline_prag {} -> TkPragma
+ ITspec_prag {} -> TkPragma
+ ITspec_inline_prag {} -> TkPragma
+ ITsource_prag {} -> TkPragma
+ ITrules_prag {} -> TkPragma
+ ITwarning_prag {} -> TkPragma
+ ITdeprecated_prag {} -> TkPragma
+ ITline_prag {} -> TkPragma
+ ITcolumn_prag {} -> TkPragma
+ ITscc_prag {} -> TkPragma
+ ITgenerated_prag {} -> TkPragma
+ ITcore_prag {} -> TkPragma
+ ITunpack_prag {} -> TkPragma
+ ITnounpack_prag {} -> TkPragma
+ ITann_prag {} -> TkPragma
+ ITcomplete_prag {} -> TkPragma
+ ITclose_prag -> TkPragma
+ IToptions_prag {} -> TkPragma
+ ITinclude_prag {} -> TkPragma
+ ITlanguage_prag -> TkPragma
+ ITminimal_prag {} -> TkPragma
+ IToverlappable_prag {} -> TkPragma
+ IToverlapping_prag {} -> TkPragma
+ IToverlaps_prag {} -> TkPragma
+ ITincoherent_prag {} -> TkPragma
+ ITctype {} -> TkPragma
+
+ ITdotdot -> TkGlyph
+ ITcolon -> TkGlyph
+ ITdcolon {} -> TkGlyph
+ ITequal -> TkGlyph
+ ITlam -> TkGlyph
+ ITlcase -> TkGlyph
+ ITvbar -> TkGlyph
+ ITlarrow {} -> TkGlyph
+ ITrarrow {} -> TkGlyph
+ ITat -> TkGlyph
+ ITtilde -> TkGlyph
+ ITdarrow {} -> TkGlyph
+ ITminus -> TkGlyph
+ ITbang -> TkGlyph
+ ITdot -> TkOperator
+ ITtypeApp -> TkGlyph
+
+ ITbiglam -> TkGlyph
+
+ ITocurly -> TkSpecial
+ ITccurly -> TkSpecial
+ ITvocurly -> TkSpecial
+ ITvccurly -> TkSpecial
+ ITobrack -> TkSpecial
+ ITopabrack -> TkSpecial
+ ITcpabrack -> TkSpecial
+ ITcbrack -> TkSpecial
+ IToparen -> TkSpecial
+ ITcparen -> TkSpecial
+ IToubxparen -> TkSpecial
+ ITcubxparen -> TkSpecial
+ ITsemi -> TkSpecial
+ ITcomma -> TkSpecial
+ ITunderscore -> TkIdentifier
+ ITbackquote -> TkSpecial
+ ITsimpleQuote -> TkSpecial
+
+ ITvarid {} -> TkIdentifier
+ ITconid {} -> TkIdentifier
+ ITvarsym {} -> TkOperator
+ ITconsym {} -> TkOperator
+ ITqvarid {} -> TkIdentifier
+ ITqconid {} -> TkIdentifier
+ ITqvarsym {} -> TkOperator
+ ITqconsym {} -> TkOperator
+
+ ITdupipvarid {} -> TkUnknown
+ ITlabelvarid {} -> TkUnknown
+
+ ITchar {} -> TkChar
+ ITstring {} -> TkString
+ ITinteger {} -> TkNumber
+ ITrational {} -> TkNumber
+
+ ITprimchar {} -> TkChar
+ ITprimstring {} -> TkString
+ ITprimint {} -> TkNumber
+ ITprimword {} -> TkNumber
+ ITprimfloat {} -> TkNumber
+ ITprimdouble {} -> TkNumber
+
+ ITopenExpQuote {} -> TkSpecial
+ ITopenPatQuote -> TkSpecial
+ ITopenDecQuote -> TkSpecial
+ ITopenTypQuote -> TkSpecial
+ ITcloseQuote {} -> TkSpecial
+ ITopenTExpQuote {} -> TkSpecial
+ ITcloseTExpQuote -> TkSpecial
+ ITidEscape {} -> TkUnknown
+ ITparenEscape -> TkSpecial
+ ITidTyEscape {} -> TkUnknown
+ ITparenTyEscape -> TkSpecial
+ ITtyQuote -> TkSpecial
+ ITquasiQuote {} -> TkUnknown
+ ITqQuasiQuote {} -> TkUnknown
+
+ ITproc -> TkKeyword
+ ITrec -> TkKeyword
+ IToparenbar {} -> TkGlyph
+ ITcparenbar {} -> TkGlyph
+ ITlarrowtail {} -> TkGlyph
+ ITrarrowtail {} -> TkGlyph
+ ITLarrowtail {} -> TkGlyph
+ ITRarrowtail {} -> TkGlyph
+
+ ITunknown {} -> TkUnknown
+ ITeof -> TkUnknown
+
+ -- Line comments are only supposed to start with '--'. Starting with '#'
+ -- means that this was probably a CPP.
+ ITlineComment s
+ | isCPPline s -> TkCpp
+ | otherwise -> TkComment
+
+ ITdocCommentNext {} -> TkComment
+ ITdocCommentPrev {} -> TkComment
+ ITdocCommentNamed {} -> TkComment
+ ITdocSection {} -> TkComment
+ ITdocOptions {} -> TkComment
+
+ -- The lexer considers top-level pragmas as comments (see `pragState` in
+ -- the GHC lexer for more), so we have to manually reverse this. The
+ -- following is a hammer: it smashes _all_ pragma-like block comments into
+ -- pragmas.
+ ITblockComment c
+ | isPrefixOf "{-#" c
+ , isSuffixOf "#-}" c -> TkPragma
+ | otherwise -> TkComment
+
+-- | Classify given tokens as beginning pragmas (or not).
+inPragma :: Bool -- ^ currently in pragma
+ -> L.Token -- ^ current token
+ -> Bool -- ^ new information about whether we are in a pragma
+inPragma _ ITclose_prag = False
+inPragma True _ = True
+inPragma False tok =
+ case tok of
+ ITinline_prag {} -> True
+ ITspec_prag {} -> True
+ ITspec_inline_prag {} -> True
+ ITsource_prag {} -> True
+ ITrules_prag {} -> True
+ ITwarning_prag {} -> True
+ ITdeprecated_prag {} -> True
+ ITline_prag {} -> True
+ ITcolumn_prag {} -> True
+ ITscc_prag {} -> True
+ ITgenerated_prag {} -> True
+ ITcore_prag {} -> True
+ ITunpack_prag {} -> True
+ ITnounpack_prag {} -> True
+ ITann_prag {} -> True
+ ITcomplete_prag {} -> True
+ IToptions_prag {} -> True
+ ITinclude_prag {} -> True
+ ITlanguage_prag -> True
+ ITminimal_prag {} -> True
+ IToverlappable_prag {} -> True
+ IToverlapping_prag {} -> True
+ IToverlaps_prag {} -> True
+ ITincoherent_prag {} -> True
+ ITctype {} -> True
+
+ _ -> False
+
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
index 15793f0c..d7ea70a6 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
@@ -1,6 +1,5 @@
{-# LANGUAGE RecordWildCards #-}
-
module Haddock.Backends.Hyperlinker.Renderer (render) where
@@ -15,7 +14,6 @@ import System.FilePath.Posix ((</>))
import Data.List
import Data.Maybe
-import Data.Monoid
import qualified Data.Map as Map
import Text.XHtml (Html, HtmlAttr, (!))
@@ -29,36 +27,10 @@ render :: Maybe FilePath -> Maybe FilePath -> SrcMap -> [RichToken]
-> Html
render mcss mjs srcs tokens = header mcss mjs <> body srcs tokens
-
-data TokenGroup
- = GrpNormal Token
- | GrpRich TokenDetails [Token]
-
-
--- | Group consecutive tokens pointing to the same element.
---
--- We want to render qualified identifiers as one entity. For example,
--- @Bar.Baz.foo@ consists of 5 tokens (@Bar@, @.@, @Baz@, @.@, @foo@) but for
--- better user experience when highlighting and clicking links, these tokens
--- should be regarded as one identifier. Therefore, before rendering we must
--- group consecutive elements pointing to the same 'GHC.Name' (note that even
--- dot token has it if it is part of qualified name).
-groupTokens :: [RichToken] -> [TokenGroup]
-groupTokens [] = []
-groupTokens ((RichToken tok Nothing):rest) = (GrpNormal tok):(groupTokens rest)
-groupTokens ((RichToken tok (Just det)):rest) =
- let (grp, rest') = span same rest
- in (GrpRich det (tok:(map rtkToken grp))):(groupTokens rest')
- where
- same (RichToken _ (Just det')) = det == det'
- same _ = False
-
-
body :: SrcMap -> [RichToken] -> Html
-body srcs tokens =
- Html.body . Html.pre $ hypsrc
+body srcs tokens = Html.body . Html.pre $ hypsrc
where
- hypsrc = mconcat . map (tokenGroup srcs) . groupTokens $ tokens
+ hypsrc = mconcat . map (richToken srcs) $ tokens
header :: Maybe FilePath -> Maybe FilePath -> Html
@@ -79,29 +51,20 @@ header mcss mjs =
, Html.src scriptFile
]
-
-tokenGroup :: SrcMap -> TokenGroup -> Html
-tokenGroup _ (GrpNormal tok@(Token { .. }))
- | tkType == TkSpace = renderSpace (posRow . spStart $ tkSpan) tkValue
- | otherwise = tokenSpan tok ! attrs
+-- | Given information about the source position of definitions, render a token
+richToken :: SrcMap -> RichToken -> Html
+richToken srcs (RichToken Token{..} details)
+ | tkType == TkSpace = renderSpace (GHC.srcSpanStartLine tkSpan) tkValue
+ | otherwise = linked content
where
- attrs = [ multiclass . tokenStyle $ tkType ]
-tokenGroup srcs (GrpRich det tokens) =
- externalAnchor det . internalAnchor det . hyperlink srcs det $ content
- where
- content = mconcat . map (richToken det) $ tokens
-
-
-richToken :: TokenDetails -> Token -> Html
-richToken det tok =
- tokenSpan tok ! [ multiclass style ]
- where
- style = (tokenStyle . tkType) tok ++ richTokenStyle det
-
-
-tokenSpan :: Token -> Html
-tokenSpan = Html.thespan . Html.toHtml . tkValue
+ content = tokenSpan ! [ multiclass style ]
+ tokenSpan = Html.thespan (Html.toHtml tkValue)
+ style = tokenStyle tkType ++ maybe [] richTokenStyle details
+ -- If we have name information, we can make links
+ linked = case details of
+ Just d -> externalAnchor d . internalAnchor d . hyperlink srcs d
+ Nothing -> id
richTokenStyle :: TokenDetails -> [StyleClass]
richTokenStyle (RtkVar _) = ["hs-var"]
@@ -155,7 +118,7 @@ internalHyperlink name content =
Html.anchor content ! [ Html.href $ "#" ++ internalAnchorIdent name ]
externalNameHyperlink :: SrcMap -> GHC.Name -> Html -> Html
-externalNameHyperlink (srcs, _) name content = case Map.lookup mdl srcs of
+externalNameHyperlink srcs name content = case Map.lookup mdl srcs of
Just SrcLocal -> Html.anchor content !
[ Html.href $ hypSrcModuleNameUrl mdl name ]
Just (SrcExternal path) -> Html.anchor content !
@@ -165,12 +128,14 @@ externalNameHyperlink (srcs, _) name content = case Map.lookup mdl srcs of
mdl = GHC.nameModule name
externalModHyperlink :: SrcMap -> GHC.ModuleName -> Html -> Html
-externalModHyperlink (_, srcs) name content = case Map.lookup name srcs of
- Just SrcLocal -> Html.anchor content !
+externalModHyperlink srcs name content =
+ let srcs' = Map.mapKeys GHC.moduleName srcs in
+ case Map.lookup name srcs' of
+ Just SrcLocal -> Html.anchor content !
[ Html.href $ hypSrcModuleUrl' name ]
- Just (SrcExternal path) -> Html.anchor content !
+ Just (SrcExternal path) -> Html.anchor content !
[ Html.href $ path </> hypSrcModuleUrl' name ]
- Nothing -> content
+ Nothing -> content
renderSpace :: Int -> String -> Html
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
index d8ae89e4..e377471e 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
@@ -4,8 +4,6 @@ module Haddock.Backends.Hyperlinker.Types where
import qualified GHC
import Data.Map (Map)
-import qualified Data.Map as Map
-
data Token = Token
{ tkType :: TokenType
@@ -14,23 +12,8 @@ data Token = Token
}
deriving (Show)
-data Position = Position
- { posRow :: !Int
- , posCol :: !Int
- }
- deriving (Eq, Ord, Show)
-
-data Span = Span
- { spStart :: !Position
- , spEnd :: !Position
- }
- deriving (Show)
-
--- | Tests whether the first span "contains" the other span, meaning
--- that it covers at least as much source code. True where spans are equal.
-containsSpan :: Span -> Span -> Bool
-containsSpan s1 s2 =
- spStart s1 <= spStart s2 && spEnd s1 >= spEnd s2
+type Position = GHC.RealSrcLoc
+type Span = GHC.RealSrcSpan
data TokenType
= TkIdentifier
@@ -80,15 +63,5 @@ data SrcPath
| SrcLocal
-- | Mapping from modules to cross-package source paths.
---
--- This mapping is actually a pair of maps instead of just one map. The reason
--- for this is because when hyperlinking modules in import lists we have no
--- 'GHC.Module' available. On the other hand, we can't just use map with
--- 'GHC.ModuleName' as indices because certain modules may have common name
--- but originate in different packages. Hence, we use both /rich/ and /poor/
--- versions, where the /poor/ is just projection of /rich/ one cached in pair
--- for better performance.
-type SrcMap = (Map GHC.Module SrcPath, Map GHC.ModuleName SrcPath)
+type SrcMap = Map GHC.Module SrcPath
-mkSrcMap :: Map GHC.Module SrcPath -> SrcMap
-mkSrcMap srcs = (srcs, Map.mapKeys GHC.moduleName srcs)