diff options
Diffstat (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker')
4 files changed, 476 insertions, 293 deletions
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs index 56137f51..0ecf7109 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs @@ -12,6 +12,7 @@ import qualified Haddock.Syb as Syb import Haddock.Backends.Hyperlinker.Types import qualified GHC +import qualified SrcLoc import qualified Outputable as GHC import Control.Applicative @@ -52,10 +53,10 @@ type DetailsMap = Map.Map Position (Span, TokenDetails) mkDetailsMap :: [(GHC.SrcSpan, TokenDetails)] -> DetailsMap mkDetailsMap xs = - Map.fromListWith select_details [ (start, (token_span, token_details)) + Map.fromListWith select_details [ (start, (span, token_details)) | (ghc_span, token_details) <- xs - , Just !token_span <- [ghcSrcSpanToSpan ghc_span] - , let start = spStart token_span + , GHC.RealSrcSpan span <- [ghc_span] + , let start = SrcLoc.realSrcSpanStart span ] where -- favour token details which appear earlier in the list @@ -63,17 +64,11 @@ mkDetailsMap xs = lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails lookupBySpan span details = do - (_, (tok_span, tok_details)) <- Map.lookupLE (spStart span) details - guard (tok_span `containsSpan` span ) + let pos = SrcLoc.realSrcSpanStart span + (_, (tok_span, tok_details)) <- Map.lookupLE pos details + guard (tok_span `SrcLoc.containsSpan` span) return tok_details -ghcSrcSpanToSpan :: GHC.SrcSpan -> Maybe Span -ghcSrcSpanToSpan (GHC.RealSrcSpan span) = - Just (Span { spStart = Position (GHC.srcSpanStartLine span) (GHC.srcSpanStartCol span) - , spEnd = Position (GHC.srcSpanEndLine span) (GHC.srcSpanEndCol span) - }) -ghcSrcSpanToSpan _ = Nothing - enrichToken :: Token -> DetailsMap -> Maybe TokenDetails enrichToken (Token typ _ spn) dm | typ `elem` [TkIdentifier, TkOperator] = lookupBySpan spn dm @@ -99,9 +94,12 @@ variables = types :: GHC.RenamedSource -> LTokenDetails types = everythingInRenamedSource ty where + ty :: forall a. Data a => a -> [(GHC.SrcSpan, TokenDetails)] ty term = case cast term of (Just ((GHC.L sspan (GHC.HsTyVar _ _ name)) :: GHC.LHsType GHC.GhcRn)) -> pure (sspan, RtkType (GHC.unLoc name)) + (Just ((GHC.L sspan (GHC.HsOpTy _ l name r)) :: GHC.LHsType GHC.GhcRn)) -> + (sspan, RtkType (GHC.unLoc name)):(ty l ++ ty r) _ -> empty -- | Obtain details map for identifier bindings. @@ -117,6 +115,11 @@ binds = everythingInRenamedSource fun term = case cast term of (Just (GHC.FunBind _ (GHC.L sspan name) _ _ _ :: GHC.HsBind GHC.GhcRn)) -> pure (sspan, RtkBind name) + (Just (GHC.PatSynBind _ (GHC.PSB _ (GHC.L sspan name) args _ _))) -> + pure (sspan, RtkBind name) ++ everythingInRenamedSource patsyn_binds args + _ -> empty + patsyn_binds term = case cast term of + (Just (GHC.L sspan (name :: GHC.Name))) -> pure (sspan, RtkVar name) _ -> empty pat term = case cast term of (Just ((GHC.L sspan (GHC.VarPat _ name)) :: GHC.LPat GHC.GhcRn)) -> @@ -142,6 +145,7 @@ decls :: GHC.RenamedSource -> LTokenDetails decls (group, _, _, _) = concatMap ($ group) [ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds , everythingInRenamedSource fun . GHC.hs_valds + , everythingInRenamedSource fix . GHC.hs_fixds , everythingInRenamedSource (con `Syb.combine` ins) ] where @@ -149,11 +153,16 @@ decls (group, _, _, _) = concatMap ($ group) GHC.DataDecl { tcdLName = name } -> pure . decl $ name GHC.SynDecl _ name _ _ _ -> pure . decl $ name GHC.FamDecl _ fam -> pure . decl $ GHC.fdLName fam - GHC.ClassDecl{..} -> [decl tcdLName] ++ concatMap sig tcdSigs + GHC.ClassDecl{..} -> + [decl tcdLName] + ++ concatMap sig tcdSigs + ++ concatMap tyfam tcdATs GHC.XTyClDecl {} -> GHC.panic "haddock:decls" fun term = case cast term of (Just (GHC.FunBind _ (GHC.L sspan name) _ _ _ :: GHC.HsBind GHC.GhcRn)) | GHC.isExternalName name -> pure (sspan, RtkDecl name) + (Just (GHC.PatSynBind _ (GHC.PSB _ (GHC.L sspan name) _ _ _))) + | GHC.isExternalName name -> pure (sspan, RtkDecl name) _ -> empty con term = case cast term of (Just (cdcl :: GHC.ConDecl GHC.GhcRn)) -> @@ -171,7 +180,17 @@ decls (group, _, _, _) = concatMap ($ group) Just (field :: GHC.ConDeclField GHC.GhcRn) -> map (decl . fmap GHC.extFieldOcc) $ GHC.cd_fld_names field Nothing -> empty + fix term = case cast term of + Just ((GHC.FixitySig _ names _) :: GHC.FixitySig GHC.GhcRn) + -> map (\(GHC.L sspan x) -> (sspan, RtkVar x)) names + Just ((GHC.XFixitySig {}) :: GHC.FixitySig GHC.GhcRn) + -> GHC.panic "haddock:decls" + Nothing -> empty + tyfam (GHC.L _ (GHC.FamilyDecl{..})) = [decl fdLName] + tyfam (GHC.L _ (GHC.XFamilyDecl {})) = GHC.panic "haddock:dels" sig (GHC.L _ (GHC.TypeSig _ names _)) = map decl names + sig (GHC.L _ (GHC.PatSynSig _ names _)) = map decl names + sig (GHC.L _ (GHC.ClassOpSig _ _ names _)) = map decl names sig _ = [] decl (GHC.L sspan name) = (sspan, RtkDecl name) tyref (GHC.L sspan name) = (sspan, RtkType name) @@ -190,10 +209,11 @@ imports src@(_, imps, _, _) = (Just (GHC.IEThingAll _ t)) -> pure $ typ $ GHC.ieLWrappedName t (Just (GHC.IEThingWith _ t _ vs _fls)) -> [typ $ GHC.ieLWrappedName t] ++ map (var . GHC.ieLWrappedName) vs + (Just (GHC.IEModuleContents _ m)) -> pure $ modu m _ -> empty typ (GHC.L sspan name) = (sspan, RtkType name) var (GHC.L sspan name) = (sspan, RtkVar name) - imp idecl | not . GHC.ideclImplicit $ idecl = - let (GHC.L sspan name) = GHC.ideclName idecl - in Just (sspan, RtkModule name) - imp _ = Nothing + modu (GHC.L sspan name) = (sspan, RtkModule name) + imp idecl + | not . GHC.ideclImplicit $ idecl = Just (modu (GHC.ideclName idecl)) + | otherwise = Nothing diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs index e4345602..e7ecac73 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs @@ -1,11 +1,19 @@ module Haddock.Backends.Hyperlinker.Parser (parse) where +import Data.Either ( isRight, isLeft ) +import Data.List ( foldl', isPrefixOf, isSuffixOf ) +import Data.Maybe ( maybeToList ) +import Data.Char ( isSpace ) +import qualified Text.Read as R -import Data.Char -import Data.List -import Data.Maybe +import GHC ( DynFlags, addSourceToTokens ) +import SrcLoc +import FastString ( mkFastString ) +import StringBuffer ( stringToStringBuffer ) +import Lexer ( Token(..) ) +import qualified Lexer as L -import Haddock.Backends.Hyperlinker.Types +import Haddock.Backends.Hyperlinker.Types as T -- | Turn source code string into a stream of more descriptive tokens. @@ -13,202 +21,419 @@ import Haddock.Backends.Hyperlinker.Types -- Result should retain original file layout (including comments, whitespace, -- etc.), i.e. the following "law" should hold: -- --- @concat . map 'tkValue' . 'parse' = id@ -parse :: String -> [Token] -parse = tokenize . tag . chunk +-- prop> concat . map tkValue . parse = id +-- +-- (In reality, this only holds for input not containing '\r', '\t', '\f', '\v', +-- characters, since GHC transforms those into ' ' and '\n') +parse :: DynFlags -> FilePath -> String -> [T.Token] +parse dflags fp = ghcToks . processCPP dflags fp . filterCRLF + where + -- Remove CRLFs from source + filterCRLF :: String -> String + filterCRLF ('\r':'\n':cs) = '\n' : filterCRLF cs + filterCRLF (c:cs) = c : filterCRLF cs + filterCRLF [] = [] --- | Split raw source string to more meaningful chunks. +-- | Parse the source into tokens using the GHC lexer. -- --- This is the initial stage of tokenization process. Each chunk is either --- a comment (including comment delimiters), a whitespace string, preprocessor --- macro (and all its content until the end of a line) or valid Haskell lexeme. -chunk :: String -> [String] -chunk [] = [] -chunk str@(c:_) - | isSpace c = - let (space, mcpp, rest) = spanSpaceOrCpp str - in [space] ++ maybeToList mcpp ++ chunk rest -chunk str - | "--" `isPrefixOf` str = chunk' $ spanToNewline str - | "{-" `isPrefixOf` str = chunk' $ chunkComment 0 str - | otherwise = case lex' str of - (tok:_) -> chunk' tok - [] -> [str] +-- * CPP lines are removed and reinserted as line-comments +-- * top-level file pragmas are parsed as block comments (see the +-- 'ITblockComment' case of 'classify' for more details) +-- +processCPP :: DynFlags -- ^ GHC's flags + -> FilePath -- ^ source file name (for position information) + -> String -- ^ source file contents + -> [(Located L.Token, String)] +processCPP dflags fpath s = addSrc . go start . splitCPP $ s where - chunk' (c, rest) = c:(chunk rest) + start = mkRealSrcLoc (mkFastString fpath) 1 1 + addSrc = addSourceToTokens start (stringToStringBuffer s) --- | A bit better lexer then the default, i.e. handles DataKinds quotes -lex' :: ReadS String -lex' ('\'' : '\'' : rest) = [("''", rest)] -lex' str@('\'' : '\\' : _ : '\'' : _) = lex str -lex' str@('\'' : _ : '\'' : _) = lex str -lex' ('\'' : rest) = [("'", rest)] -lex' str = lex str + -- Transform a list of Haskell/CPP lines into a list of tokens + go :: RealSrcLoc -> [Either String String] -> [Located L.Token] + go _ [] = [] + go pos ls = + let (hLinesRight, ls') = span isRight ls + (cppLinesLeft, rest) = span isLeft ls' --- | Split input to "first line" string and the rest of it. --- --- Ideally, this should be done simply with @'break' (== '\n')@. However, --- Haskell also allows line-unbreaking (or whatever it is called) so things --- are not as simple and this function deals with that. -spanToNewline :: String -> (String, String) -spanToNewline [] = ([], []) -spanToNewline ('\\':'\n':str) = - let (str', rest) = spanToNewline str - in ('\\':'\n':str', rest) -spanToNewline str@('\n':_) = ("", str) -spanToNewline (c:str) = - let (str', rest) = spanToNewline str - in (c:str', rest) + hSrc = concat [ hLine | Right hLine <- hLinesRight ] + cppSrc = concat [ cppLine | Left cppLine <- cppLinesLeft ] + + in case L.lexTokenStream (stringToStringBuffer hSrc) pos dflags of --- | Split input to whitespace string, (optional) preprocessor directive and --- the rest of it. + -- Stuff that fails to lex gets turned into comments + L.PFailed _ _ss _msg -> + let (src_pos, failed) = mkToken ITunknown pos hSrc + (new_pos, cpp) = mkToken ITlineComment src_pos cppSrc + in failed : cpp : go new_pos rest + + -- Successfully lexed + L.POk ss toks -> + let (new_pos, cpp) = mkToken ITlineComment (L.loc ss) cppSrc + in toks ++ [cpp] ++ go new_pos rest + + -- Manually make a token from a 'String', advancing the cursor position + mkToken tok start' str = + let end = foldl' advanceSrcLoc start' str + in (end, L (RealSrcSpan $ mkRealSrcSpan start' end) (tok str)) + + +-- | Split apart the initial file into Haskell source lines ('Left' entries) and +-- CPP lines ('Right' entries). -- --- Again, using something like @'span' 'isSpace'@ would be nice to chunk input --- to whitespace. The problem is with /#/ symbol - if it is placed at the very --- beginning of a line, it should be recognized as preprocessor macro. In any --- other case, it is ordinary Haskell symbol and can be used to declare --- operators. Hence, while dealing with whitespace we also check whether there --- happens to be /#/ symbol just after a newline character - if that is the --- case, we begin treating the whole line as preprocessor macro. -spanSpaceOrCpp :: String -> (String, Maybe String, String) -spanSpaceOrCpp ('\n':'#':str) = - let (str', rest) = spanToNewline str - in ("\n", Just $ '#':str', rest) -spanSpaceOrCpp (c:str') - | isSpace c = - let (space, mcpp, rest) = spanSpaceOrCpp str' - in (c:space, mcpp, rest) -spanSpaceOrCpp str = ("", Nothing, str) - --- | Split input to comment content (including delimiters) and the rest. +-- All characters in the input are present in the output: -- --- Again, some more logic than simple 'span' is required because of Haskell --- comment nesting policy. -chunkComment :: Int -> String -> (String, String) -chunkComment _ [] = ("", "") -chunkComment depth ('{':'-':str) = - let (c, rest) = chunkComment (depth + 1) str - in ("{-" ++ c, rest) -chunkComment depth ('-':'}':str) - | depth == 1 = ("-}", str) - | otherwise = - let (c, rest) = chunkComment (depth - 1) str - in ("-}" ++ c, rest) -chunkComment depth (e:str) = - let (c, rest) = chunkComment depth str - in (e:c, rest) - --- | Assign source location for each chunk in given stream. -tag :: [String] -> [(Span, String)] -tag = - reverse . snd . foldl aux (Position 1 1, []) +-- prop> concat . map (either id id) . splitCPP = id +splitCPP :: String -> [Either String String] +splitCPP "" = [] +splitCPP s | isCPPline s = Left l : splitCPP rest + | otherwise = Right l : splitCPP rest where - aux (pos, cs) str = - let pos' = foldl move pos str - in (pos', (Span pos pos', str):cs) - move pos '\n' = pos { posRow = posRow pos + 1, posCol = 1 } - move pos _ = pos { posCol = posCol pos + 1 } - --- | Turn unrecognised chunk stream to more descriptive token stream. -tokenize :: [(Span, String)] -> [Token] -tokenize = - map aux - where - aux (sp, str) = Token - { tkType = classify str - , tkValue = str - , tkSpan = sp - } + ~(l, rest) = spanToNewline 0 s + --- | Classify given string as appropriate Haskell token. +-- | Heuristic to decide if a line is going to be a CPP line. This should be a +-- cheap operation since it is going to be run on every line being processed. +-- +-- Right now it just checks if the first non-whitespace character in the first +-- five characters of the line is a '#': -- --- This method is based on Haskell 98 Report lexical structure description: --- https://www.haskell.org/onlinereport/lexemes.html +-- >>> isCPPline "#define FOO 1" +-- True -- --- However, this is probably far from being perfect and most probably does not --- handle correctly all corner cases. -classify :: String -> TokenType -classify str - | "--" `isPrefixOf` str = TkComment - | "{-#" `isPrefixOf` str = TkPragma - | "{-" `isPrefixOf` str = TkComment -classify "''" = TkSpecial -classify "'" = TkSpecial -classify str@(c:_) - | isSpace c = TkSpace - | isDigit c = TkNumber - | c `elem` special = TkSpecial - | str `elem` glyphs = TkGlyph - | all (`elem` symbols) str = TkOperator - | c == '#' = TkCpp - | c == '"' = TkString - | c == '\'' = TkChar -classify str - | str `elem` keywords = TkKeyword - | isIdentifier str = TkIdentifier - | otherwise = TkUnknown - -keywords :: [String] -keywords = - [ "as" - , "case" - , "class" - , "data" - , "default" - , "deriving" - , "do" - , "else" - , "hiding" - , "if" - , "import" - , "in" - , "infix" - , "infixl" - , "infixr" - , "instance" - , "let" - , "module" - , "newtype" - , "of" - , "qualified" - , "then" - , "type" - , "where" - , "forall" - , "family" - , "mdo" - ] - -glyphs :: [String] -glyphs = - [ ".." - , ":" - , "::" - , "=" - , "\\" - , "|" - , "<-" - , "->" - , "@" - , "~" - , "~#" - , "=>" - , "-" - , "!" - ] - -special :: [Char] -special = "()[]{},;`" - --- TODO: Add support for any Unicode symbol or punctuation. --- source: http://stackoverflow.com/questions/10548170/what-characters-are-permitted-for-haskell-operators -symbols :: [Char] -symbols = "!#$%&*+./<=>?@\\^|-~:" - -isIdentifier :: String -> Bool -isIdentifier (s:str) - | (isLower' s || isUpper s) && all isAlphaNum' str = True +-- >>> isCPPline "\t\t #ifdef GHC" +-- True +-- +-- >>> isCPPline " #endif" +-- False +-- +isCPPline :: String -> Bool +isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5 + + +-- | Split a "line" off the front of a string, hopefully without cutting tokens +-- in half. I say "hopefully" because knowing what a token is requires lexing, +-- yet lexing depends on this function. +-- +-- All characters in the input are present in the output: +-- +-- prop> curry (++) . spanToNewLine 0 = id +spanToNewline :: Int -- ^ open '{-' + -> String -- ^ input + -> (String, String) + +-- Base case and space characters +spanToNewline _ "" = ("", "") +spanToNewline n ('\n':str) | n <= 0 = ("\n", str) +spanToNewline n ('\n':str) | n <= 0 = ("\n", str) +spanToNewline n ('\\':'\n':str) = + let (str', rest) = spanToNewline n str + in ('\\':'\n':str', rest) + +-- Block comments +spanToNewline n ('{':'-':str) = + let (str', rest) = spanToNewline (n+1) str + in ('{':'-':str', rest) +spanToNewline n ('-':'}':str) = + let (str', rest) = spanToNewline (n-1) str + in ('-':'}':str', rest) + +-- When not in a block comment, try to lex a Haskell token +spanToNewline 0 str@(c:_) | ((lexed, str') : _) <- R.lex str, not (isSpace c) = + if all (== '-') lexed && length lexed >= 2 + -- A Haskell line comment + then case span (/= '\n') str' of + (str'', '\n':rest) -> (lexed ++ str'' ++ "\n", rest) + (_, _) -> (str, "") + + -- An actual Haskell token + else let (str'', rest) = spanToNewline 0 str' + in (lexed ++ str'', rest) + +-- In all other cases, advance one character at a time +spanToNewline n (c:str) = + let (str', rest) = spanToNewline n str + in (c:str', rest) + + +-- | Turn a list of GHC's 'L.Token' (and their source 'String') into a list of +-- Haddock's 'T.Token'. +ghcToks :: [(Located L.Token, String)] -> [T.Token] +ghcToks = reverse . (\(_,ts,_) -> ts) . foldl' go (start, [], False) where - isLower' c = isLower c || c == '_' - isAlphaNum' c = isAlphaNum c || c == '_' || c == '\'' -isIdentifier _ = False + start = mkRealSrcLoc (mkFastString "lexing") 1 1 + + go :: (RealSrcLoc, [T.Token], Bool) + -- ^ current position, tokens accumulated, currently in pragma (or not) + + -> (Located L.Token, String) + -- ^ next token, its content + + -> (RealSrcLoc, [T.Token], Bool) + -- ^ new position, new tokens accumulated, currently in pragma (or not) + + go (pos, toks, in_prag) (L l tok, raw) = + ( next_pos + , classifiedTok ++ maybeToList white ++ toks + , inPragma in_prag tok + ) + where + (next_pos, white) = mkWhitespace pos l + + classifiedTok = [ Token (classify' tok) raw rss + | RealSrcSpan rss <- [l] + , not (null raw) + ] + + classify' | in_prag = const TkPragma + | otherwise = classify + + +-- | Find the correct amount of whitespace between tokens. +mkWhitespace :: RealSrcLoc -> SrcSpan -> (RealSrcLoc, Maybe T.Token) +mkWhitespace prev spn = + case spn of + UnhelpfulSpan _ -> (prev,Nothing) + RealSrcSpan s | null wsstring -> (end, Nothing) + | otherwise -> (end, Just (Token TkSpace wsstring wsspan)) + where + start = realSrcSpanStart s + end = realSrcSpanEnd s + wsspan = mkRealSrcSpan prev start + nls = srcLocLine start - srcLocLine prev + spaces = if nls == 0 then srcLocCol start - srcLocCol prev + else srcLocCol start - 1 + wsstring = replicate nls '\n' ++ replicate spaces ' ' + + +-- | Classify given tokens as appropriate Haskell token type. +classify :: L.Token -> TokenType +classify tok = + case tok of + ITas -> TkKeyword + ITcase -> TkKeyword + ITclass -> TkKeyword + ITdata -> TkKeyword + ITdefault -> TkKeyword + ITderiving -> TkKeyword + ITdo -> TkKeyword + ITelse -> TkKeyword + IThiding -> TkKeyword + ITforeign -> TkKeyword + ITif -> TkKeyword + ITimport -> TkKeyword + ITin -> TkKeyword + ITinfix -> TkKeyword + ITinfixl -> TkKeyword + ITinfixr -> TkKeyword + ITinstance -> TkKeyword + ITlet -> TkKeyword + ITmodule -> TkKeyword + ITnewtype -> TkKeyword + ITof -> TkKeyword + ITqualified -> TkKeyword + ITthen -> TkKeyword + ITtype -> TkKeyword + ITvia -> TkKeyword + ITwhere -> TkKeyword + + ITforall {} -> TkKeyword + ITexport -> TkKeyword + ITlabel -> TkKeyword + ITdynamic -> TkKeyword + ITsafe -> TkKeyword + ITinterruptible -> TkKeyword + ITunsafe -> TkKeyword + ITstdcallconv -> TkKeyword + ITccallconv -> TkKeyword + ITcapiconv -> TkKeyword + ITprimcallconv -> TkKeyword + ITjavascriptcallconv -> TkKeyword + ITmdo -> TkKeyword + ITfamily -> TkKeyword + ITrole -> TkKeyword + ITgroup -> TkKeyword + ITby -> TkKeyword + ITusing -> TkKeyword + ITpattern -> TkKeyword + ITstatic -> TkKeyword + ITstock -> TkKeyword + ITanyclass -> TkKeyword + + ITunit -> TkKeyword + ITsignature -> TkKeyword + ITdependency -> TkKeyword + ITrequires -> TkKeyword + + ITinline_prag {} -> TkPragma + ITspec_prag {} -> TkPragma + ITspec_inline_prag {} -> TkPragma + ITsource_prag {} -> TkPragma + ITrules_prag {} -> TkPragma + ITwarning_prag {} -> TkPragma + ITdeprecated_prag {} -> TkPragma + ITline_prag {} -> TkPragma + ITcolumn_prag {} -> TkPragma + ITscc_prag {} -> TkPragma + ITgenerated_prag {} -> TkPragma + ITcore_prag {} -> TkPragma + ITunpack_prag {} -> TkPragma + ITnounpack_prag {} -> TkPragma + ITann_prag {} -> TkPragma + ITcomplete_prag {} -> TkPragma + ITclose_prag -> TkPragma + IToptions_prag {} -> TkPragma + ITinclude_prag {} -> TkPragma + ITlanguage_prag -> TkPragma + ITminimal_prag {} -> TkPragma + IToverlappable_prag {} -> TkPragma + IToverlapping_prag {} -> TkPragma + IToverlaps_prag {} -> TkPragma + ITincoherent_prag {} -> TkPragma + ITctype {} -> TkPragma + + ITdotdot -> TkGlyph + ITcolon -> TkGlyph + ITdcolon {} -> TkGlyph + ITequal -> TkGlyph + ITlam -> TkGlyph + ITlcase -> TkGlyph + ITvbar -> TkGlyph + ITlarrow {} -> TkGlyph + ITrarrow {} -> TkGlyph + ITat -> TkGlyph + ITtilde -> TkGlyph + ITdarrow {} -> TkGlyph + ITminus -> TkGlyph + ITbang -> TkGlyph + ITdot -> TkOperator + ITtypeApp -> TkGlyph + + ITbiglam -> TkGlyph + + ITocurly -> TkSpecial + ITccurly -> TkSpecial + ITvocurly -> TkSpecial + ITvccurly -> TkSpecial + ITobrack -> TkSpecial + ITopabrack -> TkSpecial + ITcpabrack -> TkSpecial + ITcbrack -> TkSpecial + IToparen -> TkSpecial + ITcparen -> TkSpecial + IToubxparen -> TkSpecial + ITcubxparen -> TkSpecial + ITsemi -> TkSpecial + ITcomma -> TkSpecial + ITunderscore -> TkIdentifier + ITbackquote -> TkSpecial + ITsimpleQuote -> TkSpecial + + ITvarid {} -> TkIdentifier + ITconid {} -> TkIdentifier + ITvarsym {} -> TkOperator + ITconsym {} -> TkOperator + ITqvarid {} -> TkIdentifier + ITqconid {} -> TkIdentifier + ITqvarsym {} -> TkOperator + ITqconsym {} -> TkOperator + + ITdupipvarid {} -> TkUnknown + ITlabelvarid {} -> TkUnknown + + ITchar {} -> TkChar + ITstring {} -> TkString + ITinteger {} -> TkNumber + ITrational {} -> TkNumber + + ITprimchar {} -> TkChar + ITprimstring {} -> TkString + ITprimint {} -> TkNumber + ITprimword {} -> TkNumber + ITprimfloat {} -> TkNumber + ITprimdouble {} -> TkNumber + + ITopenExpQuote {} -> TkSpecial + ITopenPatQuote -> TkSpecial + ITopenDecQuote -> TkSpecial + ITopenTypQuote -> TkSpecial + ITcloseQuote {} -> TkSpecial + ITopenTExpQuote {} -> TkSpecial + ITcloseTExpQuote -> TkSpecial + ITidEscape {} -> TkUnknown + ITparenEscape -> TkSpecial + ITidTyEscape {} -> TkUnknown + ITparenTyEscape -> TkSpecial + ITtyQuote -> TkSpecial + ITquasiQuote {} -> TkUnknown + ITqQuasiQuote {} -> TkUnknown + + ITproc -> TkKeyword + ITrec -> TkKeyword + IToparenbar {} -> TkGlyph + ITcparenbar {} -> TkGlyph + ITlarrowtail {} -> TkGlyph + ITrarrowtail {} -> TkGlyph + ITLarrowtail {} -> TkGlyph + ITRarrowtail {} -> TkGlyph + + ITunknown {} -> TkUnknown + ITeof -> TkUnknown + + -- Line comments are only supposed to start with '--'. Starting with '#' + -- means that this was probably a CPP. + ITlineComment s + | isCPPline s -> TkCpp + | otherwise -> TkComment + + ITdocCommentNext {} -> TkComment + ITdocCommentPrev {} -> TkComment + ITdocCommentNamed {} -> TkComment + ITdocSection {} -> TkComment + ITdocOptions {} -> TkComment + + -- The lexer considers top-level pragmas as comments (see `pragState` in + -- the GHC lexer for more), so we have to manually reverse this. The + -- following is a hammer: it smashes _all_ pragma-like block comments into + -- pragmas. + ITblockComment c + | isPrefixOf "{-#" c + , isSuffixOf "#-}" c -> TkPragma + | otherwise -> TkComment + +-- | Classify given tokens as beginning pragmas (or not). +inPragma :: Bool -- ^ currently in pragma + -> L.Token -- ^ current token + -> Bool -- ^ new information about whether we are in a pragma +inPragma _ ITclose_prag = False +inPragma True _ = True +inPragma False tok = + case tok of + ITinline_prag {} -> True + ITspec_prag {} -> True + ITspec_inline_prag {} -> True + ITsource_prag {} -> True + ITrules_prag {} -> True + ITwarning_prag {} -> True + ITdeprecated_prag {} -> True + ITline_prag {} -> True + ITcolumn_prag {} -> True + ITscc_prag {} -> True + ITgenerated_prag {} -> True + ITcore_prag {} -> True + ITunpack_prag {} -> True + ITnounpack_prag {} -> True + ITann_prag {} -> True + ITcomplete_prag {} -> True + IToptions_prag {} -> True + ITinclude_prag {} -> True + ITlanguage_prag -> True + ITminimal_prag {} -> True + IToverlappable_prag {} -> True + IToverlapping_prag {} -> True + IToverlaps_prag {} -> True + ITincoherent_prag {} -> True + ITctype {} -> True + + _ -> False + diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs index 15793f0c..d7ea70a6 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs @@ -1,6 +1,5 @@ {-# LANGUAGE RecordWildCards #-} - module Haddock.Backends.Hyperlinker.Renderer (render) where @@ -15,7 +14,6 @@ import System.FilePath.Posix ((</>)) import Data.List import Data.Maybe -import Data.Monoid import qualified Data.Map as Map import Text.XHtml (Html, HtmlAttr, (!)) @@ -29,36 +27,10 @@ render :: Maybe FilePath -> Maybe FilePath -> SrcMap -> [RichToken] -> Html render mcss mjs srcs tokens = header mcss mjs <> body srcs tokens - -data TokenGroup - = GrpNormal Token - | GrpRich TokenDetails [Token] - - --- | Group consecutive tokens pointing to the same element. --- --- We want to render qualified identifiers as one entity. For example, --- @Bar.Baz.foo@ consists of 5 tokens (@Bar@, @.@, @Baz@, @.@, @foo@) but for --- better user experience when highlighting and clicking links, these tokens --- should be regarded as one identifier. Therefore, before rendering we must --- group consecutive elements pointing to the same 'GHC.Name' (note that even --- dot token has it if it is part of qualified name). -groupTokens :: [RichToken] -> [TokenGroup] -groupTokens [] = [] -groupTokens ((RichToken tok Nothing):rest) = (GrpNormal tok):(groupTokens rest) -groupTokens ((RichToken tok (Just det)):rest) = - let (grp, rest') = span same rest - in (GrpRich det (tok:(map rtkToken grp))):(groupTokens rest') - where - same (RichToken _ (Just det')) = det == det' - same _ = False - - body :: SrcMap -> [RichToken] -> Html -body srcs tokens = - Html.body . Html.pre $ hypsrc +body srcs tokens = Html.body . Html.pre $ hypsrc where - hypsrc = mconcat . map (tokenGroup srcs) . groupTokens $ tokens + hypsrc = mconcat . map (richToken srcs) $ tokens header :: Maybe FilePath -> Maybe FilePath -> Html @@ -79,29 +51,20 @@ header mcss mjs = , Html.src scriptFile ] - -tokenGroup :: SrcMap -> TokenGroup -> Html -tokenGroup _ (GrpNormal tok@(Token { .. })) - | tkType == TkSpace = renderSpace (posRow . spStart $ tkSpan) tkValue - | otherwise = tokenSpan tok ! attrs +-- | Given information about the source position of definitions, render a token +richToken :: SrcMap -> RichToken -> Html +richToken srcs (RichToken Token{..} details) + | tkType == TkSpace = renderSpace (GHC.srcSpanStartLine tkSpan) tkValue + | otherwise = linked content where - attrs = [ multiclass . tokenStyle $ tkType ] -tokenGroup srcs (GrpRich det tokens) = - externalAnchor det . internalAnchor det . hyperlink srcs det $ content - where - content = mconcat . map (richToken det) $ tokens - - -richToken :: TokenDetails -> Token -> Html -richToken det tok = - tokenSpan tok ! [ multiclass style ] - where - style = (tokenStyle . tkType) tok ++ richTokenStyle det - - -tokenSpan :: Token -> Html -tokenSpan = Html.thespan . Html.toHtml . tkValue + content = tokenSpan ! [ multiclass style ] + tokenSpan = Html.thespan (Html.toHtml tkValue) + style = tokenStyle tkType ++ maybe [] richTokenStyle details + -- If we have name information, we can make links + linked = case details of + Just d -> externalAnchor d . internalAnchor d . hyperlink srcs d + Nothing -> id richTokenStyle :: TokenDetails -> [StyleClass] richTokenStyle (RtkVar _) = ["hs-var"] @@ -155,7 +118,7 @@ internalHyperlink name content = Html.anchor content ! [ Html.href $ "#" ++ internalAnchorIdent name ] externalNameHyperlink :: SrcMap -> GHC.Name -> Html -> Html -externalNameHyperlink (srcs, _) name content = case Map.lookup mdl srcs of +externalNameHyperlink srcs name content = case Map.lookup mdl srcs of Just SrcLocal -> Html.anchor content ! [ Html.href $ hypSrcModuleNameUrl mdl name ] Just (SrcExternal path) -> Html.anchor content ! @@ -165,12 +128,14 @@ externalNameHyperlink (srcs, _) name content = case Map.lookup mdl srcs of mdl = GHC.nameModule name externalModHyperlink :: SrcMap -> GHC.ModuleName -> Html -> Html -externalModHyperlink (_, srcs) name content = case Map.lookup name srcs of - Just SrcLocal -> Html.anchor content ! +externalModHyperlink srcs name content = + let srcs' = Map.mapKeys GHC.moduleName srcs in + case Map.lookup name srcs' of + Just SrcLocal -> Html.anchor content ! [ Html.href $ hypSrcModuleUrl' name ] - Just (SrcExternal path) -> Html.anchor content ! + Just (SrcExternal path) -> Html.anchor content ! [ Html.href $ path </> hypSrcModuleUrl' name ] - Nothing -> content + Nothing -> content renderSpace :: Int -> String -> Html diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs index d8ae89e4..e377471e 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs @@ -4,8 +4,6 @@ module Haddock.Backends.Hyperlinker.Types where import qualified GHC import Data.Map (Map) -import qualified Data.Map as Map - data Token = Token { tkType :: TokenType @@ -14,23 +12,8 @@ data Token = Token } deriving (Show) -data Position = Position - { posRow :: !Int - , posCol :: !Int - } - deriving (Eq, Ord, Show) - -data Span = Span - { spStart :: !Position - , spEnd :: !Position - } - deriving (Show) - --- | Tests whether the first span "contains" the other span, meaning --- that it covers at least as much source code. True where spans are equal. -containsSpan :: Span -> Span -> Bool -containsSpan s1 s2 = - spStart s1 <= spStart s2 && spEnd s1 >= spEnd s2 +type Position = GHC.RealSrcLoc +type Span = GHC.RealSrcSpan data TokenType = TkIdentifier @@ -80,15 +63,5 @@ data SrcPath | SrcLocal -- | Mapping from modules to cross-package source paths. --- --- This mapping is actually a pair of maps instead of just one map. The reason --- for this is because when hyperlinking modules in import lists we have no --- 'GHC.Module' available. On the other hand, we can't just use map with --- 'GHC.ModuleName' as indices because certain modules may have common name --- but originate in different packages. Hence, we use both /rich/ and /poor/ --- versions, where the /poor/ is just projection of /rich/ one cached in pair --- for better performance. -type SrcMap = (Map GHC.Module SrcPath, Map GHC.ModuleName SrcPath) +type SrcMap = Map GHC.Module SrcPath -mkSrcMap :: Map GHC.Module SrcPath -> SrcMap -mkSrcMap srcs = (srcs, Map.mapKeys GHC.moduleName srcs) |