From 4f75be94f45a0e92553eccefe56230c554333ce7 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Sun, 10 Dec 2017 12:22:21 -0800 Subject: Use the GHC lexer for the Hyperlinker backend (#714) * Start changing to use GHC lexer * better cpp * Change SrcSpan to RealSrcSpan * Remove error * Try to stop too many open files * wip * wip * Revert "wip" This reverts commit b605510a195f26315e3d8ca90e6d95a6737553e1. Conflicts: haddock-api/haddock-api.cabal haddock-api/src/Haddock/Interface.hs * Remove pointless 'caching' * Use dlist rather than lists when finding vars * Use a map rather than list * Delete bogus comment * Rebase followup Things now run using the GHC lexer. There are still - stray debug statements - unnecessary changes w.r.t. master * Cleaned up differences w.r.t. current Haddock HEAD Things are looking good. quasiquotes in particular look beautiful: the TH ones (with Haskell source inside) colour/link their contents too! Haven't yet begun to check for possible performance problems. * Support CPP and top-level pragmas The support for these is hackier - but no more hacky than the existing support. * Tests pass, CPP is better recognized The tests were in some cases altered: I consider the new output to be more correct than the old one.... * Fix shrinking of source without tabs in test * Replace 'Position'/'Span' with GHC counterparts Replaces 'Position' -> 'GHC.RealSrcLoc' and 'Span' -> 'GHC.RealSrcSpan'. * Nits * Forgot entry in .cabal * Update changelog --- .../src/Haddock/Backends/Hyperlinker/Ast.hs | 19 +- .../src/Haddock/Backends/Hyperlinker/Parser.hs | 534 +++++++++++++-------- .../src/Haddock/Backends/Hyperlinker/Renderer.hs | 14 +- .../src/Haddock/Backends/Hyperlinker/Types.hs | 33 +- 4 files changed, 362 insertions(+), 238 deletions(-) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs index 57ff72ff..361bc15d 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs @@ -12,6 +12,7 @@ import qualified Haddock.Syb as Syb import Haddock.Backends.Hyperlinker.Types import qualified GHC +import qualified SrcLoc import Control.Applicative import Control.Monad (guard) @@ -51,10 +52,10 @@ type DetailsMap = Map.Map Position (Span, TokenDetails) mkDetailsMap :: [(GHC.SrcSpan, TokenDetails)] -> DetailsMap mkDetailsMap xs = - Map.fromListWith select_details [ (start, (token_span, token_details)) + Map.fromListWith select_details [ (start, (span, token_details)) | (ghc_span, token_details) <- xs - , Just !token_span <- [ghcSrcSpanToSpan ghc_span] - , let start = spStart token_span + , GHC.RealSrcSpan span <- [ghc_span] + , let start = SrcLoc.realSrcSpanStart span ] where -- favour token details which appear earlier in the list @@ -62,17 +63,11 @@ mkDetailsMap xs = lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails lookupBySpan span details = do - (_, (tok_span, tok_details)) <- Map.lookupLE (spStart span) details - guard (tok_span `containsSpan` span ) + let pos = SrcLoc.realSrcSpanStart span + (_, (tok_span, tok_details)) <- Map.lookupLE pos details + guard (tok_span `SrcLoc.containsSpan` span) return tok_details -ghcSrcSpanToSpan :: GHC.SrcSpan -> Maybe Span -ghcSrcSpanToSpan (GHC.RealSrcSpan span) = - Just (Span { spStart = Position (GHC.srcSpanStartLine span) (GHC.srcSpanStartCol span) - , spEnd = Position (GHC.srcSpanEndLine span) (GHC.srcSpanEndCol span) - }) -ghcSrcSpanToSpan _ = Nothing - enrichToken :: Token -> DetailsMap -> Maybe TokenDetails enrichToken (Token typ _ spn) dm | typ `elem` [TkIdentifier, TkOperator] = lookupBySpan spn dm diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs index e4345602..da1555b8 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs @@ -1,11 +1,17 @@ module Haddock.Backends.Hyperlinker.Parser (parse) where +import Data.Either ( isRight, isLeft ) +import Data.List ( foldl', isPrefixOf, isSuffixOf ) +import Data.Maybe ( maybeToList ) -import Data.Char -import Data.List -import Data.Maybe +import GHC ( DynFlags, addSourceToTokens ) +import SrcLoc +import FastString ( mkFastString ) +import StringBuffer ( stringToStringBuffer ) +import Lexer ( Token(..) ) +import qualified Lexer as L -import Haddock.Backends.Hyperlinker.Types +import Haddock.Backends.Hyperlinker.Types as T -- | Turn source code string into a stream of more descriptive tokens. @@ -13,202 +19,350 @@ import Haddock.Backends.Hyperlinker.Types -- Result should retain original file layout (including comments, whitespace, -- etc.), i.e. the following "law" should hold: -- --- @concat . map 'tkValue' . 'parse' = id@ -parse :: String -> [Token] -parse = tokenize . tag . chunk +-- prop> concat . map tkValue . parse = id +-- +-- (In reality, this only holds for input not containing '\r', '\t', '\f', '\v', +-- characters, since GHC transforms those into ' ' and '\n') +parse :: DynFlags -> FilePath -> String -> [T.Token] +parse dflags fp s = ghcToks (processCPP dflags fp s) + --- | Split raw source string to more meaningful chunks. +-- | Parse the source into tokens using the GHC lexer. +-- +-- * CPP lines are removed and reinserted as line-comments +-- * top-level file pragmas are parsed as block comments (see the +-- 'ITblockComment' case of 'classify' for more details) -- --- This is the initial stage of tokenization process. Each chunk is either --- a comment (including comment delimiters), a whitespace string, preprocessor --- macro (and all its content until the end of a line) or valid Haskell lexeme. -chunk :: String -> [String] -chunk [] = [] -chunk str@(c:_) - | isSpace c = - let (space, mcpp, rest) = spanSpaceOrCpp str - in [space] ++ maybeToList mcpp ++ chunk rest -chunk str - | "--" `isPrefixOf` str = chunk' $ spanToNewline str - | "{-" `isPrefixOf` str = chunk' $ chunkComment 0 str - | otherwise = case lex' str of - (tok:_) -> chunk' tok - [] -> [str] +processCPP :: DynFlags -- ^ GHC's flags + -> FilePath -- ^ source file name (for position information) + -> String -- ^ source file contents + -> [(Located L.Token, String)] +processCPP dflags fpath s = addSrc . go start . splitCPP $ s where - chunk' (c, rest) = c:(chunk rest) + start = mkRealSrcLoc (mkFastString fpath) 1 1 + addSrc = addSourceToTokens start (stringToStringBuffer s) --- | A bit better lexer then the default, i.e. handles DataKinds quotes -lex' :: ReadS String -lex' ('\'' : '\'' : rest) = [("''", rest)] -lex' str@('\'' : '\\' : _ : '\'' : _) = lex str -lex' str@('\'' : _ : '\'' : _) = lex str -lex' ('\'' : rest) = [("'", rest)] -lex' str = lex str + -- Transform a list of Haskell/CPP lines into a list of tokens + go :: RealSrcLoc -> [Either String String] -> [Located L.Token] + go _ [] = [] + go pos ls = + let (hLinesRight, ls') = span isRight ls + (cppLinesLeft, rest) = span isLeft ls' + + hSrc = concat [ hLine | Right hLine <- hLinesRight ] + cppSrc = concat [ cppLine | Left cppLine <- cppLinesLeft ] + + in case L.lexTokenStream (stringToStringBuffer hSrc) pos dflags of --- | Split input to "first line" string and the rest of it. --- --- Ideally, this should be done simply with @'break' (== '\n')@. However, --- Haskell also allows line-unbreaking (or whatever it is called) so things --- are not as simple and this function deals with that. -spanToNewline :: String -> (String, String) -spanToNewline [] = ([], []) -spanToNewline ('\\':'\n':str) = - let (str', rest) = spanToNewline str - in ('\\':'\n':str', rest) -spanToNewline str@('\n':_) = ("", str) -spanToNewline (c:str) = - let (str', rest) = spanToNewline str - in (c:str', rest) + -- Stuff that fails to lex gets turned into comments + L.PFailed _ss _msg -> + let (src_pos, failed) = mkToken ITunknown pos hSrc + (new_pos, cpp) = mkToken ITlineComment src_pos cppSrc + in failed : cpp : go new_pos rest --- | Split input to whitespace string, (optional) preprocessor directive and --- the rest of it. + -- Successfully lexed + L.POk ss toks -> + let (new_pos, cpp) = mkToken ITlineComment (L.loc ss) cppSrc + in toks ++ [cpp] ++ go new_pos rest + + -- Manually make a token from a 'String', advancing the cursor position + mkToken tok start' str = + let end = foldl' advanceSrcLoc start' str + in (end, L (RealSrcSpan $ mkRealSrcSpan start' end) (tok str)) + + +-- | Split apart the initial file into Haskell source lines ('Left' entries) and +-- CPP lines ('Right' entries). -- --- Again, using something like @'span' 'isSpace'@ would be nice to chunk input --- to whitespace. The problem is with /#/ symbol - if it is placed at the very --- beginning of a line, it should be recognized as preprocessor macro. In any --- other case, it is ordinary Haskell symbol and can be used to declare --- operators. Hence, while dealing with whitespace we also check whether there --- happens to be /#/ symbol just after a newline character - if that is the --- case, we begin treating the whole line as preprocessor macro. -spanSpaceOrCpp :: String -> (String, Maybe String, String) -spanSpaceOrCpp ('\n':'#':str) = - let (str', rest) = spanToNewline str - in ("\n", Just $ '#':str', rest) -spanSpaceOrCpp (c:str') - | isSpace c = - let (space, mcpp, rest) = spanSpaceOrCpp str' - in (c:space, mcpp, rest) -spanSpaceOrCpp str = ("", Nothing, str) - --- | Split input to comment content (including delimiters) and the rest. +-- All characters in the input are present in the output: -- --- Again, some more logic than simple 'span' is required because of Haskell --- comment nesting policy. -chunkComment :: Int -> String -> (String, String) -chunkComment _ [] = ("", "") -chunkComment depth ('{':'-':str) = - let (c, rest) = chunkComment (depth + 1) str - in ("{-" ++ c, rest) -chunkComment depth ('-':'}':str) - | depth == 1 = ("-}", str) - | otherwise = - let (c, rest) = chunkComment (depth - 1) str - in ("-}" ++ c, rest) -chunkComment depth (e:str) = - let (c, rest) = chunkComment depth str - in (e:c, rest) - --- | Assign source location for each chunk in given stream. -tag :: [String] -> [(Span, String)] -tag = - reverse . snd . foldl aux (Position 1 1, []) - where - aux (pos, cs) str = - let pos' = foldl move pos str - in (pos', (Span pos pos', str):cs) - move pos '\n' = pos { posRow = posRow pos + 1, posCol = 1 } - move pos _ = pos { posCol = posCol pos + 1 } - --- | Turn unrecognised chunk stream to more descriptive token stream. -tokenize :: [(Span, String)] -> [Token] -tokenize = - map aux +-- prop> concat . map (either id id) . splitCPP = id +splitCPP :: String -> [Either String String] +splitCPP "" = [] +splitCPP s | isCPPline s = Left l : splitCPP rest + | otherwise = Right l : splitCPP rest where - aux (sp, str) = Token - { tkType = classify str - , tkValue = str - , tkSpan = sp - } + ~(l, rest) = spanToNewline 0 s --- | Classify given string as appropriate Haskell token. + +-- | Heuristic to decide if a line is going to be a CPP line. This should be a +-- cheap operation since it is going to be run on every line being processed. -- --- This method is based on Haskell 98 Report lexical structure description: --- https://www.haskell.org/onlinereport/lexemes.html +-- Right now it just checks if the first non-whitespace character in the first +-- five characters of the line is a '#': -- --- However, this is probably far from being perfect and most probably does not --- handle correctly all corner cases. -classify :: String -> TokenType -classify str - | "--" `isPrefixOf` str = TkComment - | "{-#" `isPrefixOf` str = TkPragma - | "{-" `isPrefixOf` str = TkComment -classify "''" = TkSpecial -classify "'" = TkSpecial -classify str@(c:_) - | isSpace c = TkSpace - | isDigit c = TkNumber - | c `elem` special = TkSpecial - | str `elem` glyphs = TkGlyph - | all (`elem` symbols) str = TkOperator - | c == '#' = TkCpp - | c == '"' = TkString - | c == '\'' = TkChar -classify str - | str `elem` keywords = TkKeyword - | isIdentifier str = TkIdentifier - | otherwise = TkUnknown - -keywords :: [String] -keywords = - [ "as" - , "case" - , "class" - , "data" - , "default" - , "deriving" - , "do" - , "else" - , "hiding" - , "if" - , "import" - , "in" - , "infix" - , "infixl" - , "infixr" - , "instance" - , "let" - , "module" - , "newtype" - , "of" - , "qualified" - , "then" - , "type" - , "where" - , "forall" - , "family" - , "mdo" - ] - -glyphs :: [String] -glyphs = - [ ".." - , ":" - , "::" - , "=" - , "\\" - , "|" - , "<-" - , "->" - , "@" - , "~" - , "~#" - , "=>" - , "-" - , "!" - ] - -special :: [Char] -special = "()[]{},;`" - --- TODO: Add support for any Unicode symbol or punctuation. --- source: http://stackoverflow.com/questions/10548170/what-characters-are-permitted-for-haskell-operators -symbols :: [Char] -symbols = "!#$%&*+./<=>?@\\^|-~:" - -isIdentifier :: String -> Bool -isIdentifier (s:str) - | (isLower' s || isUpper s) && all isAlphaNum' str = True +-- >>> isCPPline "#define FOO 1" +-- True +-- +-- >>> isCPPline "\t\t #ifdef GHC" +-- True +-- +-- >>> isCPPline " #endif" +-- False +-- +isCPPline :: String -> Bool +isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5 + + +-- | Split a "line" off the front of a string, supporting newline escapes. +-- +-- By "line", we understand: the shortest substring ending in a '\n' that is not +-- +-- 1. immediately preceded by a '\\' +-- 2. not inside some (possibly nested) block comment +-- +-- All characters in the input are present in the output: +-- +-- prop> curry (++) . spanToNewLine 0 = id +spanToNewline :: Int -- ^ open '{-' + -> String -- ^ input + -> (String, String) +spanToNewline _ [] = ([], []) +spanToNewline n ('\n':str) | n <= 0 = ("\n", str) +spanToNewline n ('\\':'\n':str) = + let (str', rest) = spanToNewline n str + in ('\\':'\n':str', rest) +spanToNewline n ('{':'-':str) = + let (str', rest) = spanToNewline (n+1) str + in ('{':'-':str', rest) +spanToNewline n ('-':'}':str) = + let (str', rest) = spanToNewline (n-1) str + in ('-':'}':str', rest) +spanToNewline n (c:str) = + let (str', rest) = spanToNewline n str + in (c:str', rest) + + +-- | Turn a list of GHC's 'L.Token' (and their source 'String') into a list of +-- Haddock's 'T.Token'. +ghcToks :: [(Located L.Token, String)] -> [T.Token] +ghcToks = reverse . snd . foldl' go (start, []) where - isLower' c = isLower c || c == '_' - isAlphaNum' c = isAlphaNum c || c == '_' || c == '\'' -isIdentifier _ = False + start = mkRealSrcLoc (mkFastString "lexing") 1 1 + + go :: (RealSrcLoc, [T.Token]) -- ^ current position, tokens accumulated + -> (Located L.Token, String) -- ^ next token, its content + -> (RealSrcLoc, [T.Token]) -- ^ new position, new tokens accumulated + go (pos, toks) (L l tok, raw) = ( next_pos + , classifiedTok ++ maybeToList white ++ toks + ) + where + (next_pos, white) = mkWhitespace pos l + classifiedTok = [ Token (classify tok) raw rss + | RealSrcSpan rss <- [l] + , not (null raw) + ] + +-- | Find the correct amount of whitespace between tokens. +mkWhitespace :: RealSrcLoc -> SrcSpan -> (RealSrcLoc, Maybe T.Token) +mkWhitespace prev spn = + case spn of + UnhelpfulSpan _ -> (prev,Nothing) + RealSrcSpan s | null wsstring -> (end, Nothing) + | otherwise -> (end, Just (Token TkSpace wsstring wsspan)) + where + start = realSrcSpanStart s + end = realSrcSpanEnd s + wsspan = mkRealSrcSpan prev start + nls = srcLocLine start - srcLocLine prev + spaces = if nls == 0 then srcLocCol start - srcLocCol prev + else srcLocCol start - 1 + wsstring = replicate nls '\n' ++ replicate spaces ' ' + + +-- | Classify given string as appropriate Haskell token. +classify :: L.Token -> TokenType +classify tok = + case tok of + ITas -> TkKeyword + ITcase -> TkKeyword + ITclass -> TkKeyword + ITdata -> TkKeyword + ITdefault -> TkKeyword + ITderiving -> TkKeyword + ITdo -> TkKeyword + ITelse -> TkKeyword + IThiding -> TkKeyword + ITforeign -> TkKeyword + ITif -> TkKeyword + ITimport -> TkKeyword + ITin -> TkKeyword + ITinfix -> TkKeyword + ITinfixl -> TkKeyword + ITinfixr -> TkKeyword + ITinstance -> TkKeyword + ITlet -> TkKeyword + ITmodule -> TkKeyword + ITnewtype -> TkKeyword + ITof -> TkKeyword + ITqualified -> TkKeyword + ITthen -> TkKeyword + ITtype -> TkKeyword + ITwhere -> TkKeyword + + ITforall {} -> TkKeyword + ITexport -> TkKeyword + ITlabel -> TkKeyword + ITdynamic -> TkKeyword + ITsafe -> TkKeyword + ITinterruptible -> TkKeyword + ITunsafe -> TkKeyword + ITstdcallconv -> TkKeyword + ITccallconv -> TkKeyword + ITcapiconv -> TkKeyword + ITprimcallconv -> TkKeyword + ITjavascriptcallconv -> TkKeyword + ITmdo -> TkKeyword + ITfamily -> TkKeyword + ITrole -> TkKeyword + ITgroup -> TkKeyword + ITby -> TkKeyword + ITusing -> TkKeyword + ITpattern -> TkKeyword + ITstatic -> TkKeyword + ITstock -> TkKeyword + ITanyclass -> TkKeyword + + ITunit -> TkKeyword + ITsignature -> TkKeyword + ITdependency -> TkKeyword + ITrequires -> TkKeyword + + ITinline_prag {} -> TkPragma + ITspec_prag {} -> TkPragma + ITspec_inline_prag {} -> TkPragma + ITsource_prag {} -> TkPragma + ITrules_prag {} -> TkPragma + ITwarning_prag {} -> TkPragma + ITdeprecated_prag {} -> TkPragma + ITline_prag -> TkPragma + ITscc_prag {} -> TkPragma + ITgenerated_prag {} -> TkPragma + ITcore_prag {} -> TkPragma + ITunpack_prag {} -> TkPragma + ITnounpack_prag {} -> TkPragma + ITann_prag {} -> TkPragma + ITcomplete_prag {} -> TkPragma + ITclose_prag -> TkPragma + IToptions_prag {} -> TkPragma + ITinclude_prag {} -> TkPragma + ITlanguage_prag -> TkPragma + ITvect_prag {} -> TkPragma + ITvect_scalar_prag {} -> TkPragma + ITnovect_prag {} -> TkPragma + ITminimal_prag {} -> TkPragma + IToverlappable_prag {} -> TkPragma + IToverlapping_prag {} -> TkPragma + IToverlaps_prag {} -> TkPragma + ITincoherent_prag {} -> TkPragma + ITctype {} -> TkPragma + + ITdotdot -> TkGlyph + ITcolon -> TkGlyph + ITdcolon {} -> TkGlyph + ITequal -> TkGlyph + ITlam -> TkGlyph + ITlcase -> TkGlyph + ITvbar -> TkGlyph + ITlarrow {} -> TkGlyph + ITrarrow {} -> TkGlyph + ITat -> TkGlyph + ITtilde -> TkGlyph + ITtildehsh -> TkGlyph + ITdarrow {} -> TkGlyph + ITminus -> TkGlyph + ITbang -> TkGlyph + ITdot -> TkOperator + ITtypeApp -> TkGlyph + + ITbiglam -> TkGlyph + + ITocurly -> TkSpecial + ITccurly -> TkSpecial + ITvocurly -> TkSpecial + ITvccurly -> TkSpecial + ITobrack -> TkSpecial + ITopabrack -> TkSpecial + ITcpabrack -> TkSpecial + ITcbrack -> TkSpecial + IToparen -> TkSpecial + ITcparen -> TkSpecial + IToubxparen -> TkSpecial + ITcubxparen -> TkSpecial + ITsemi -> TkSpecial + ITcomma -> TkSpecial + ITunderscore -> TkIdentifier + ITbackquote -> TkSpecial + ITsimpleQuote -> TkSpecial + + ITvarid {} -> TkIdentifier + ITconid {} -> TkIdentifier + ITvarsym {} -> TkOperator + ITconsym {} -> TkOperator + ITqvarid {} -> TkIdentifier + ITqconid {} -> TkIdentifier + ITqvarsym {} -> TkOperator + ITqconsym {} -> TkOperator + + ITdupipvarid {} -> TkUnknown + ITlabelvarid {} -> TkUnknown + + ITchar {} -> TkChar + ITstring {} -> TkString + ITinteger {} -> TkNumber + ITrational {} -> TkNumber + + ITprimchar {} -> TkChar + ITprimstring {} -> TkString + ITprimint {} -> TkNumber + ITprimword {} -> TkNumber + ITprimfloat {} -> TkNumber + ITprimdouble {} -> TkNumber + + ITopenExpQuote {} -> TkSpecial + ITopenPatQuote -> TkSpecial + ITopenDecQuote -> TkSpecial + ITopenTypQuote -> TkSpecial + ITcloseQuote {} -> TkSpecial + ITopenTExpQuote {} -> TkSpecial + ITcloseTExpQuote -> TkSpecial + ITidEscape {} -> TkUnknown + ITparenEscape -> TkSpecial + ITidTyEscape {} -> TkUnknown + ITparenTyEscape -> TkSpecial + ITtyQuote -> TkSpecial + ITquasiQuote {} -> TkUnknown + ITqQuasiQuote {} -> TkUnknown + + ITproc -> TkKeyword + ITrec -> TkKeyword + IToparenbar {} -> TkGlyph + ITcparenbar {} -> TkGlyph + ITlarrowtail {} -> TkGlyph + ITrarrowtail {} -> TkGlyph + ITLarrowtail {} -> TkGlyph + ITRarrowtail {} -> TkGlyph + + ITunknown {} -> TkUnknown + ITeof -> TkUnknown + + -- Line comments are only supposed to start with '--'. Starting with '#' + -- means that this was probably a CPP. + ITlineComment s + | isCPPline s -> TkCpp + | otherwise -> TkComment + + ITdocCommentNext {} -> TkComment + ITdocCommentPrev {} -> TkComment + ITdocCommentNamed {} -> TkComment + ITdocSection {} -> TkComment + ITdocOptions {} -> TkComment + + -- The lexer considers top-level pragmas as comments (see `pragState` in + -- the GHC lexer for more), so we have to manually reverse this. The + -- following is a hammer: it smashes _all_ pragma-like block comments into + -- pragmas. + ITblockComment c + | isPrefixOf "{-#" c + , isSuffixOf "#-}" c -> TkPragma + | otherwise -> TkComment diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs index 15793f0c..27bf7605 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs @@ -82,7 +82,7 @@ header mcss mjs = tokenGroup :: SrcMap -> TokenGroup -> Html tokenGroup _ (GrpNormal tok@(Token { .. })) - | tkType == TkSpace = renderSpace (posRow . spStart $ tkSpan) tkValue + | tkType == TkSpace = renderSpace (GHC.srcSpanStartLine tkSpan) tkValue | otherwise = tokenSpan tok ! attrs where attrs = [ multiclass . tokenStyle $ tkType ] @@ -155,7 +155,7 @@ internalHyperlink name content = Html.anchor content ! [ Html.href $ "#" ++ internalAnchorIdent name ] externalNameHyperlink :: SrcMap -> GHC.Name -> Html -> Html -externalNameHyperlink (srcs, _) name content = case Map.lookup mdl srcs of +externalNameHyperlink srcs name content = case Map.lookup mdl srcs of Just SrcLocal -> Html.anchor content ! [ Html.href $ hypSrcModuleNameUrl mdl name ] Just (SrcExternal path) -> Html.anchor content ! @@ -165,12 +165,14 @@ externalNameHyperlink (srcs, _) name content = case Map.lookup mdl srcs of mdl = GHC.nameModule name externalModHyperlink :: SrcMap -> GHC.ModuleName -> Html -> Html -externalModHyperlink (_, srcs) name content = case Map.lookup name srcs of - Just SrcLocal -> Html.anchor content ! +externalModHyperlink srcs name content = + let srcs' = Map.mapKeys GHC.moduleName srcs in + case Map.lookup name srcs' of + Just SrcLocal -> Html.anchor content ! [ Html.href $ hypSrcModuleUrl' name ] - Just (SrcExternal path) -> Html.anchor content ! + Just (SrcExternal path) -> Html.anchor content ! [ Html.href $ path hypSrcModuleUrl' name ] - Nothing -> content + Nothing -> content renderSpace :: Int -> String -> Html diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs index d8ae89e4..e377471e 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs @@ -4,8 +4,6 @@ module Haddock.Backends.Hyperlinker.Types where import qualified GHC import Data.Map (Map) -import qualified Data.Map as Map - data Token = Token { tkType :: TokenType @@ -14,23 +12,8 @@ data Token = Token } deriving (Show) -data Position = Position - { posRow :: !Int - , posCol :: !Int - } - deriving (Eq, Ord, Show) - -data Span = Span - { spStart :: !Position - , spEnd :: !Position - } - deriving (Show) - --- | Tests whether the first span "contains" the other span, meaning --- that it covers at least as much source code. True where spans are equal. -containsSpan :: Span -> Span -> Bool -containsSpan s1 s2 = - spStart s1 <= spStart s2 && spEnd s1 >= spEnd s2 +type Position = GHC.RealSrcLoc +type Span = GHC.RealSrcSpan data TokenType = TkIdentifier @@ -80,15 +63,5 @@ data SrcPath | SrcLocal -- | Mapping from modules to cross-package source paths. --- --- This mapping is actually a pair of maps instead of just one map. The reason --- for this is because when hyperlinking modules in import lists we have no --- 'GHC.Module' available. On the other hand, we can't just use map with --- 'GHC.ModuleName' as indices because certain modules may have common name --- but originate in different packages. Hence, we use both /rich/ and /poor/ --- versions, where the /poor/ is just projection of /rich/ one cached in pair --- for better performance. -type SrcMap = (Map GHC.Module SrcPath, Map GHC.ModuleName SrcPath) +type SrcMap = Map GHC.Module SrcPath -mkSrcMap :: Map GHC.Module SrcPath -> SrcMap -mkSrcMap srcs = (srcs, Map.mapKeys GHC.moduleName srcs) -- cgit v1.2.3 From 150c8fc2415ab6cd254c9391d1a00b3d2931927e Mon Sep 17 00:00:00 2001 From: Alexander Biehl Date: Thu, 1 Feb 2018 11:04:53 +0100 Subject: Hyperlinker: Adjust parser to new PFailed constructor --- haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs index da1555b8..0ea3eba2 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs @@ -48,23 +48,23 @@ processCPP dflags fpath s = addSrc . go start . splitCPP $ s go pos ls = let (hLinesRight, ls') = span isRight ls (cppLinesLeft, rest) = span isLeft ls' - + hSrc = concat [ hLine | Right hLine <- hLinesRight ] cppSrc = concat [ cppLine | Left cppLine <- cppLinesLeft ] - + in case L.lexTokenStream (stringToStringBuffer hSrc) pos dflags of -- Stuff that fails to lex gets turned into comments - L.PFailed _ss _msg -> + L.PFailed _ _ss _msg -> let (src_pos, failed) = mkToken ITunknown pos hSrc (new_pos, cpp) = mkToken ITlineComment src_pos cppSrc in failed : cpp : go new_pos rest - -- Successfully lexed + -- Successfully lexed L.POk ss toks -> let (new_pos, cpp) = mkToken ITlineComment (L.loc ss) cppSrc in toks ++ [cpp] ++ go new_pos rest - + -- Manually make a token from a 'String', advancing the cursor position mkToken tok start' str = let end = foldl' advanceSrcLoc start' str @@ -107,7 +107,7 @@ isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5 -- | Split a "line" off the front of a string, supporting newline escapes. -- -- By "line", we understand: the shortest substring ending in a '\n' that is not --- +-- -- 1. immediately preceded by a '\\' -- 2. not inside some (possibly nested) block comment -- @@ -124,10 +124,10 @@ spanToNewline n ('\\':'\n':str) = in ('\\':'\n':str', rest) spanToNewline n ('{':'-':str) = let (str', rest) = spanToNewline (n+1) str - in ('{':'-':str', rest) + in ('{':'-':str', rest) spanToNewline n ('-':'}':str) = let (str', rest) = spanToNewline (n-1) str - in ('-':'}':str', rest) + in ('-':'}':str', rest) spanToNewline n (c:str) = let (str', rest) = spanToNewline n str in (c:str', rest) -- cgit v1.2.3 From bfd1b156e9bd4c3b2b70b03cb59f778a1061daed Mon Sep 17 00:00:00 2001 From: Alexander Biehl Date: Thu, 1 Feb 2018 11:12:57 +0100 Subject: Warning free compilation --- haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs | 1 - haddock-api/src/Haddock/Convert.hs | 2 +- haddock-api/src/Haddock/Interface/AttachInstances.hs | 1 - haddock-api/src/Haddock/Interface/LexParseRn.hs | 1 - 4 files changed, 1 insertion(+), 4 deletions(-) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs index 27bf7605..5291220a 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs @@ -15,7 +15,6 @@ import System.FilePath.Posix (()) import Data.List import Data.Maybe -import Data.Monoid import qualified Data.Map as Map import Text.XHtml (Html, HtmlAttr, (!)) diff --git a/haddock-api/src/Haddock/Convert.hs b/haddock-api/src/Haddock/Convert.hs index 68f39cf2..8b227c50 100644 --- a/haddock-api/src/Haddock/Convert.hs +++ b/haddock-api/src/Haddock/Convert.hs @@ -37,7 +37,7 @@ import Type import TyCoRep import TysPrim ( alphaTyVars ) import TysWiredIn ( listTyConName, starKindTyConName, unitTy ) -import PrelNames ( hasKey, eqTyConKey, funTyConKey, ipClassKey +import PrelNames ( hasKey, eqTyConKey, ipClassKey , tYPETyConKey, liftedRepDataConKey ) import Unique ( getUnique ) import Util ( chkAppend, compareLength, dropList, filterByList, filterOut diff --git a/haddock-api/src/Haddock/Interface/AttachInstances.hs b/haddock-api/src/Haddock/Interface/AttachInstances.hs index 4fd9d264..ec8b98c8 100644 --- a/haddock-api/src/Haddock/Interface/AttachInstances.hs +++ b/haddock-api/src/Haddock/Interface/AttachInstances.hs @@ -19,7 +19,6 @@ import Haddock.Types import Haddock.Convert import Haddock.GhcUtils -import Control.Applicative import Control.Arrow hiding ((<+>)) import Data.List import Data.Ord (comparing) diff --git a/haddock-api/src/Haddock/Interface/LexParseRn.hs b/haddock-api/src/Haddock/Interface/LexParseRn.hs index 311301ee..1269df3f 100644 --- a/haddock-api/src/Haddock/Interface/LexParseRn.hs +++ b/haddock-api/src/Haddock/Interface/LexParseRn.hs @@ -18,7 +18,6 @@ module Haddock.Interface.LexParseRn , processModuleHeader ) where -import Data.IntSet (toList) import Data.List import Documentation.Haddock.Doc (metaDocConcat) import DynFlags (languageExtensions) -- cgit v1.2.3 From 97b0189927924b82ee26f762c88ccd965eee8d80 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Mon, 22 Jan 2018 11:44:04 -0800 Subject: Properly color pragma contents in hyperlinker The hyperlinker backend now classifies the content of pragmas as 'TkPragma'. That means that in something like '{-# INLINE foo #-}', 'foo' still gets classified as a pragma token. --- .../src/Haddock/Backends/Hyperlinker/Parser.hs | 78 ++++++++++++++++++---- 1 file changed, 65 insertions(+), 13 deletions(-) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs index 0ea3eba2..cd2237e9 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs @@ -136,22 +136,35 @@ spanToNewline n (c:str) = -- | Turn a list of GHC's 'L.Token' (and their source 'String') into a list of -- Haddock's 'T.Token'. ghcToks :: [(Located L.Token, String)] -> [T.Token] -ghcToks = reverse . snd . foldl' go (start, []) +ghcToks = reverse . (\(_,ts,_) -> ts) . foldl' go (start, [], False) where start = mkRealSrcLoc (mkFastString "lexing") 1 1 - go :: (RealSrcLoc, [T.Token]) -- ^ current position, tokens accumulated - -> (Located L.Token, String) -- ^ next token, its content - -> (RealSrcLoc, [T.Token]) -- ^ new position, new tokens accumulated - go (pos, toks) (L l tok, raw) = ( next_pos - , classifiedTok ++ maybeToList white ++ toks - ) + go :: (RealSrcLoc, [T.Token], Bool) + -- ^ current position, tokens accumulated, currently in pragma (or not) + + -> (Located L.Token, String) + -- ^ next token, its content + + -> (RealSrcLoc, [T.Token], Bool) + -- ^ new position, new tokens accumulated, currently in pragma (or not) + + go (pos, toks, in_prag) (L l tok, raw) = + ( next_pos + , classifiedTok ++ maybeToList white ++ toks + , inPragma in_prag tok + ) where (next_pos, white) = mkWhitespace pos l - classifiedTok = [ Token (classify tok) raw rss + + classifiedTok = [ Token (classify' tok) raw rss | RealSrcSpan rss <- [l] , not (null raw) ] + + classify' | in_prag = const TkPragma + | otherwise = classify + -- | Find the correct amount of whitespace between tokens. mkWhitespace :: RealSrcLoc -> SrcSpan -> (RealSrcLoc, Maybe T.Token) @@ -170,7 +183,7 @@ mkWhitespace prev spn = wsstring = replicate nls '\n' ++ replicate spaces ' ' --- | Classify given string as appropriate Haskell token. +-- | Classify given tokens as appropriate Haskell token type. classify :: L.Token -> TokenType classify tok = case tok of @@ -200,7 +213,7 @@ classify tok = ITtype -> TkKeyword ITwhere -> TkKeyword - ITforall {} -> TkKeyword + ITforall {} -> TkKeyword ITexport -> TkKeyword ITlabel -> TkKeyword ITdynamic -> TkKeyword @@ -235,7 +248,7 @@ classify tok = ITrules_prag {} -> TkPragma ITwarning_prag {} -> TkPragma ITdeprecated_prag {} -> TkPragma - ITline_prag -> TkPragma + ITline_prag {} -> TkPragma ITscc_prag {} -> TkPragma ITgenerated_prag {} -> TkPragma ITcore_prag {} -> TkPragma @@ -331,8 +344,8 @@ classify tok = ITidTyEscape {} -> TkUnknown ITparenTyEscape -> TkSpecial ITtyQuote -> TkSpecial - ITquasiQuote {} -> TkUnknown - ITqQuasiQuote {} -> TkUnknown + ITquasiQuote {} -> TkUnknown + ITqQuasiQuote {} -> TkUnknown ITproc -> TkKeyword ITrec -> TkKeyword @@ -366,3 +379,42 @@ classify tok = | isPrefixOf "{-#" c , isSuffixOf "#-}" c -> TkPragma | otherwise -> TkComment + +-- | Classify given tokens as beginning pragmas (or not). +inPragma :: Bool -- ^ currently in pragma + -> L.Token -- ^ current token + -> Bool -- ^ new information about whether we are in a pragma +inPragma _ ITclose_prag = False +inPragma True _ = True +inPragma False tok = + case tok of + ITinline_prag {} -> True + ITspec_prag {} -> True + ITspec_inline_prag {} -> True + ITsource_prag {} -> True + ITrules_prag {} -> True + ITwarning_prag {} -> True + ITdeprecated_prag {} -> True + ITline_prag {} -> True + ITscc_prag {} -> True + ITgenerated_prag {} -> True + ITcore_prag {} -> True + ITunpack_prag {} -> True + ITnounpack_prag {} -> True + ITann_prag {} -> True + ITcomplete_prag {} -> True + IToptions_prag {} -> True + ITinclude_prag {} -> True + ITlanguage_prag -> True + ITvect_prag {} -> True + ITvect_scalar_prag {} -> True + ITnovect_prag {} -> True + ITminimal_prag {} -> True + IToverlappable_prag {} -> True + IToverlapping_prag {} -> True + IToverlaps_prag {} -> True + ITincoherent_prag {} -> True + ITctype {} -> True + + _ -> False + -- cgit v1.2.3 From ac33472e834d381f95fd56586e57e6653263055c Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Sat, 27 Jan 2018 00:42:02 -0800 Subject: Support the new 'ITcolumn_prag' token --- haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs index cd2237e9..34512de8 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs @@ -249,6 +249,7 @@ classify tok = ITwarning_prag {} -> TkPragma ITdeprecated_prag {} -> TkPragma ITline_prag {} -> TkPragma + ITcolumn_prag {} -> TkPragma ITscc_prag {} -> TkPragma ITgenerated_prag {} -> TkPragma ITcore_prag {} -> TkPragma @@ -396,6 +397,7 @@ inPragma False tok = ITwarning_prag {} -> True ITdeprecated_prag {} -> True ITline_prag {} -> True + ITcolumn_prag {} -> True ITscc_prag {} -> True ITgenerated_prag {} -> True ITcore_prag {} -> True -- cgit v1.2.3 From 1e335fc0828f6f1927c6d2a125919c59f04c0bc0 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Mon, 5 Feb 2018 10:15:39 -0800 Subject: Hyperlink pattern synonyms and 'module' imports (#744) Links to pattern synonyms are now generated, as well as links from modules in import lists. Fixes #731. --- haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs index 361bc15d..841dff76 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs @@ -111,6 +111,8 @@ binds = everythingInRenamedSource fun term = case cast term of (Just (GHC.FunBind (GHC.L sspan name) _ _ _ _ :: GHC.HsBind GHC.GhcRn)) -> pure (sspan, RtkBind name) + (Just (GHC.PatSynBind (GHC.PSB (GHC.L sspan name) _ _ _ _))) -> + pure (sspan, RtkBind name) _ -> empty pat term = case cast term of (Just ((GHC.L sspan (GHC.VarPat name)) :: GHC.LPat GHC.GhcRn)) -> @@ -147,6 +149,8 @@ decls (group, _, _, _) = concatMap ($ group) fun term = case cast term of (Just (GHC.FunBind (GHC.L sspan name) _ _ _ _ :: GHC.HsBind GHC.GhcRn)) | GHC.isExternalName name -> pure (sspan, RtkDecl name) + (Just (GHC.PatSynBind (GHC.PSB (GHC.L sspan name) _ _ _ _))) + | GHC.isExternalName name -> pure (sspan, RtkDecl name) _ -> empty con term = case cast term of (Just (cdcl :: GHC.ConDecl GHC.GhcRn)) -> @@ -165,6 +169,7 @@ decls (group, _, _, _) = concatMap ($ group) -> map (decl . fmap GHC.selectorFieldOcc) $ GHC.cd_fld_names field Nothing -> empty sig (GHC.L _ (GHC.TypeSig names _)) = map decl names + sig (GHC.L _ (GHC.PatSynSig names _)) = map decl names sig _ = [] decl (GHC.L sspan name) = (sspan, RtkDecl name) tyref (GHC.L sspan name) = (sspan, RtkType name) @@ -183,10 +188,11 @@ imports src@(_, imps, _, _) = (Just (GHC.IEThingAll t)) -> pure $ typ $ GHC.ieLWrappedName t (Just (GHC.IEThingWith t _ vs _fls)) -> [typ $ GHC.ieLWrappedName t] ++ map (var . GHC.ieLWrappedName) vs + (Just (GHC.IEModuleContents m)) -> pure $ modu m _ -> empty typ (GHC.L sspan name) = (sspan, RtkType name) var (GHC.L sspan name) = (sspan, RtkVar name) - imp idecl | not . GHC.ideclImplicit $ idecl = - let (GHC.L sspan name) = GHC.ideclName idecl - in Just (sspan, RtkModule name) - imp _ = Nothing + modu (GHC.L sspan name) = (sspan, RtkModule name) + imp idecl + | not . GHC.ideclImplicit $ idecl = Just (modu (GHC.ideclName idecl)) + | otherwise = Nothing -- cgit v1.2.3 From e870c70cdbe739693c1eacddc42e64106c8ecfdf Mon Sep 17 00:00:00 2001 From: alexbiehl Date: Tue, 6 Feb 2018 13:56:17 +0100 Subject: Hyperlinker: Also link pattern synonym arguments --- haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs index 841dff76..02c4ca0b 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs @@ -111,8 +111,11 @@ binds = everythingInRenamedSource fun term = case cast term of (Just (GHC.FunBind (GHC.L sspan name) _ _ _ _ :: GHC.HsBind GHC.GhcRn)) -> pure (sspan, RtkBind name) - (Just (GHC.PatSynBind (GHC.PSB (GHC.L sspan name) _ _ _ _))) -> - pure (sspan, RtkBind name) + (Just (GHC.PatSynBind (GHC.PSB (GHC.L sspan name) _ args _ _))) -> + pure (sspan, RtkBind name) ++ everythingInRenamedSource patsyn_binds args + _ -> empty + patsyn_binds term = case cast term of + (Just (GHC.L sspan (name :: GHC.Name))) -> pure (sspan, RtkVar name) _ -> empty pat term = case cast term of (Just ((GHC.L sspan (GHC.VarPat name)) :: GHC.LPat GHC.GhcRn)) -> -- cgit v1.2.3 From 3077a12b57e1b93a738082aa73fab72e9c3e3f83 Mon Sep 17 00:00:00 2001 From: alexbiehl Date: Mon, 5 Mar 2018 18:10:07 +0100 Subject: Hyperlinker: Links for TyOps, class methods and associated types --- .../src/Haddock/Backends/Hyperlinker/Ast.hs | 15 +++++++- hypsrc-test/ref/src/Classes.html | 40 ++++++++++++++++------ 2 files changed, 44 insertions(+), 11 deletions(-) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs index 02c4ca0b..3c96db98 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs @@ -93,9 +93,12 @@ variables = types :: GHC.RenamedSource -> LTokenDetails types = everythingInRenamedSource ty where + ty :: forall a. Data a => a -> [(GHC.SrcSpan, TokenDetails)] ty term = case cast term of (Just ((GHC.L sspan (GHC.HsTyVar _ name)) :: GHC.LHsType GHC.GhcRn)) -> pure (sspan, RtkType (GHC.unLoc name)) + (Just ((GHC.L sspan (GHC.HsOpTy l name r)) :: GHC.LHsType GHC.GhcRn)) -> + (sspan, RtkType (GHC.unLoc name)):(ty l ++ ty r) _ -> empty -- | Obtain details map for identifier bindings. @@ -141,6 +144,7 @@ decls :: GHC.RenamedSource -> LTokenDetails decls (group, _, _, _) = concatMap ($ group) [ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds , everythingInRenamedSource fun . GHC.hs_valds + , everythingInRenamedSource fix . GHC.hs_fixds , everythingInRenamedSource (con `Syb.combine` ins) ] where @@ -148,7 +152,10 @@ decls (group, _, _, _) = concatMap ($ group) GHC.DataDecl { tcdLName = name } -> pure . decl $ name GHC.SynDecl name _ _ _ _ -> pure . decl $ name GHC.FamDecl fam -> pure . decl $ GHC.fdLName fam - GHC.ClassDecl{..} -> [decl tcdLName] ++ concatMap sig tcdSigs + GHC.ClassDecl{..} -> + [decl tcdLName] + ++ concatMap sig tcdSigs + ++ concatMap tyfam tcdATs fun term = case cast term of (Just (GHC.FunBind (GHC.L sspan name) _ _ _ _ :: GHC.HsBind GHC.GhcRn)) | GHC.isExternalName name -> pure (sspan, RtkDecl name) @@ -171,8 +178,14 @@ decls (group, _, _, _) = concatMap ($ group) Just (field :: GHC.ConDeclField GHC.GhcRn) -> map (decl . fmap GHC.selectorFieldOcc) $ GHC.cd_fld_names field Nothing -> empty + fix term = case cast term of + Just ((GHC.FixitySig names _) :: GHC.FixitySig GHC.GhcRn) + -> map decl names + Nothing -> empty + tyfam (GHC.L _ (GHC.FamilyDecl{..})) = [decl fdLName] sig (GHC.L _ (GHC.TypeSig names _)) = map decl names sig (GHC.L _ (GHC.PatSynSig names _)) = map decl names + sig (GHC.L _ (GHC.ClassOpSig _ names _)) = map decl names sig _ = [] decl (GHC.L sspan name) = (sspan, RtkDecl name) tyref (GHC.L sspan name) = (sspan, RtkType name) diff --git a/hypsrc-test/ref/src/Classes.html b/hypsrc-test/ref/src/Classes.html index abff8877..d2604e82 100644 --- a/hypsrc-test/ref/src/Classes.html +++ b/hypsrc-test/ref/src/Classes.html @@ -60,8 +60,12 @@ > barbar bazbaz quuxquux norfnorf plughplugh Date: Fri, 20 Apr 2018 07:31:44 +0200 Subject: Don't treat fixity signatures like declarations --- haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs index 3c96db98..c4a9091f 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs @@ -180,7 +180,7 @@ decls (group, _, _, _) = concatMap ($ group) Nothing -> empty fix term = case cast term of Just ((GHC.FixitySig names _) :: GHC.FixitySig GHC.GhcRn) - -> map decl names + -> map (\(GHC.L sspan x) -> (sspan, RtkVar x)) names Nothing -> empty tyfam (GHC.L _ (GHC.FamilyDecl{..})) = [decl fdLName] sig (GHC.L _ (GHC.TypeSig names _)) = map decl names -- cgit v1.2.3 From 3b028ce3d1996f82cad8a273bcf95445238f5c6e Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Wed, 9 May 2018 10:42:03 -0400 Subject: DerivingVia changes --- haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs | 7 +------ haddock-api/src/Haddock/Interface/Rename.hs | 11 +++++++++-- haddock-api/src/Haddock/Types.hs | 1 + 3 files changed, 11 insertions(+), 8 deletions(-) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs index 34512de8..7abb01a3 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs @@ -211,6 +211,7 @@ classify tok = ITqualified -> TkKeyword ITthen -> TkKeyword ITtype -> TkKeyword + ITvia -> TkKeyword ITwhere -> TkKeyword ITforall {} -> TkKeyword @@ -261,9 +262,6 @@ classify tok = IToptions_prag {} -> TkPragma ITinclude_prag {} -> TkPragma ITlanguage_prag -> TkPragma - ITvect_prag {} -> TkPragma - ITvect_scalar_prag {} -> TkPragma - ITnovect_prag {} -> TkPragma ITminimal_prag {} -> TkPragma IToverlappable_prag {} -> TkPragma IToverlapping_prag {} -> TkPragma @@ -408,9 +406,6 @@ inPragma False tok = IToptions_prag {} -> True ITinclude_prag {} -> True ITlanguage_prag -> True - ITvect_prag {} -> True - ITvect_scalar_prag {} -> True - ITnovect_prag {} -> True ITminimal_prag {} -> True IToverlappable_prag {} -> True IToverlapping_prag {} -> True diff --git a/haddock-api/src/Haddock/Interface/Rename.hs b/haddock-api/src/Haddock/Interface/Rename.hs index 5684e70a..c07f8300 100644 --- a/haddock-api/src/Haddock/Interface/Rename.hs +++ b/haddock-api/src/Haddock/Interface/Rename.hs @@ -560,13 +560,20 @@ renameDerivD :: DerivDecl GhcRn -> RnM (DerivDecl DocNameI) renameDerivD (DerivDecl { deriv_type = ty , deriv_strategy = strat , deriv_overlap_mode = omode }) = do - ty' <- renameLSigWcType ty + ty' <- renameLSigWcType ty + strat' <- mapM (mapM renameDerivStrategy) strat return (DerivDecl { deriv_ext = noExt , deriv_type = ty' - , deriv_strategy = strat + , deriv_strategy = strat' , deriv_overlap_mode = omode }) renameDerivD (XDerivDecl _) = panic "haddock:renameDerivD" +renameDerivStrategy :: DerivStrategy GhcRn -> RnM (DerivStrategy DocNameI) +renameDerivStrategy StockStrategy = pure StockStrategy +renameDerivStrategy AnyclassStrategy = pure AnyclassStrategy +renameDerivStrategy NewtypeStrategy = pure NewtypeStrategy +renameDerivStrategy (ViaStrategy ty) = ViaStrategy <$> renameLSigType ty + renameClsInstD :: ClsInstDecl GhcRn -> RnM (ClsInstDecl DocNameI) renameClsInstD (ClsInstDecl { cid_overlap_mode = omode , cid_poly_ty =ltype, cid_tyfam_insts = lATs diff --git a/haddock-api/src/Haddock/Types.hs b/haddock-api/src/Haddock/Types.hs index e4d79760..1f96abc0 100644 --- a/haddock-api/src/Haddock/Types.hs +++ b/haddock-api/src/Haddock/Types.hs @@ -725,6 +725,7 @@ type instance XCFamEqn DocNameI _ _ = NoExt type instance XCClsInstDecl DocNameI = NoExt type instance XCDerivDecl DocNameI = NoExt +type instance XViaStrategy DocNameI = LHsSigType DocNameI type instance XDataFamInstD DocNameI = NoExt type instance XTyFamInstD DocNameI = NoExt type instance XClsInstD DocNameI = NoExt -- cgit v1.2.3 From 524566646723ced878d49c903358e7b5f25442b9 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Wed, 13 Jun 2018 23:10:43 +0200 Subject: Remove `ITtildehsh` token --- haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs | 1 - 1 file changed, 1 deletion(-) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs index 7abb01a3..8f77b8f5 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs @@ -280,7 +280,6 @@ classify tok = ITrarrow {} -> TkGlyph ITat -> TkGlyph ITtilde -> TkGlyph - ITtildehsh -> TkGlyph ITdarrow {} -> TkGlyph ITminus -> TkGlyph ITbang -> TkGlyph -- cgit v1.2.3 From 52405584f978c8d1afd6439b3ac0e3fd0f9b825e Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Mon, 7 May 2018 18:53:15 -0700 Subject: Filter out CRLFs in hyperlinker backend (#813) This prevents spurious lines from appearing in the final output. --- haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs index 8f77b8f5..92443bff 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs @@ -24,8 +24,13 @@ import Haddock.Backends.Hyperlinker.Types as T -- (In reality, this only holds for input not containing '\r', '\t', '\f', '\v', -- characters, since GHC transforms those into ' ' and '\n') parse :: DynFlags -> FilePath -> String -> [T.Token] -parse dflags fp s = ghcToks (processCPP dflags fp s) - +parse dflags fp = ghcToks . processCPP dflags fp . filterCRLF + where + -- Remove CRLFs from source + filterCRLF :: String -> String + filterCRLF ('\r':'\n':cs) = '\n' : filterCRLF cs + filterCRLF (c:cs) = c : filterCRLF cs + filterCRLF [] = [] -- | Parse the source into tokens using the GHC lexer. -- -- cgit v1.2.3 From 276c352b5dd3dd52b333e0d04ea71f7686ecd7b9 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Tue, 8 May 2018 02:15:45 -0700 Subject: Remove 'TokenGroup' from Hyperlinker (#818) Since the hyperlinker backend now relies on the GHC tokenizer, something like 'Bar.Baz.foo' already gets bunched together into one token (as opposed to being spread across 'Bar', '.', 'Baz', '.', and 'foo'). --- .../src/Haddock/Backends/Hyperlinker/Renderer.hs | 64 +++++----------------- 1 file changed, 14 insertions(+), 50 deletions(-) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs index 5291220a..d7ea70a6 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs @@ -1,6 +1,5 @@ {-# LANGUAGE RecordWildCards #-} - module Haddock.Backends.Hyperlinker.Renderer (render) where @@ -28,36 +27,10 @@ render :: Maybe FilePath -> Maybe FilePath -> SrcMap -> [RichToken] -> Html render mcss mjs srcs tokens = header mcss mjs <> body srcs tokens - -data TokenGroup - = GrpNormal Token - | GrpRich TokenDetails [Token] - - --- | Group consecutive tokens pointing to the same element. --- --- We want to render qualified identifiers as one entity. For example, --- @Bar.Baz.foo@ consists of 5 tokens (@Bar@, @.@, @Baz@, @.@, @foo@) but for --- better user experience when highlighting and clicking links, these tokens --- should be regarded as one identifier. Therefore, before rendering we must --- group consecutive elements pointing to the same 'GHC.Name' (note that even --- dot token has it if it is part of qualified name). -groupTokens :: [RichToken] -> [TokenGroup] -groupTokens [] = [] -groupTokens ((RichToken tok Nothing):rest) = (GrpNormal tok):(groupTokens rest) -groupTokens ((RichToken tok (Just det)):rest) = - let (grp, rest') = span same rest - in (GrpRich det (tok:(map rtkToken grp))):(groupTokens rest') - where - same (RichToken _ (Just det')) = det == det' - same _ = False - - body :: SrcMap -> [RichToken] -> Html -body srcs tokens = - Html.body . Html.pre $ hypsrc +body srcs tokens = Html.body . Html.pre $ hypsrc where - hypsrc = mconcat . map (tokenGroup srcs) . groupTokens $ tokens + hypsrc = mconcat . map (richToken srcs) $ tokens header :: Maybe FilePath -> Maybe FilePath -> Html @@ -78,29 +51,20 @@ header mcss mjs = , Html.src scriptFile ] - -tokenGroup :: SrcMap -> TokenGroup -> Html -tokenGroup _ (GrpNormal tok@(Token { .. })) +-- | Given information about the source position of definitions, render a token +richToken :: SrcMap -> RichToken -> Html +richToken srcs (RichToken Token{..} details) | tkType == TkSpace = renderSpace (GHC.srcSpanStartLine tkSpan) tkValue - | otherwise = tokenSpan tok ! attrs - where - attrs = [ multiclass . tokenStyle $ tkType ] -tokenGroup srcs (GrpRich det tokens) = - externalAnchor det . internalAnchor det . hyperlink srcs det $ content + | otherwise = linked content where - content = mconcat . map (richToken det) $ tokens - - -richToken :: TokenDetails -> Token -> Html -richToken det tok = - tokenSpan tok ! [ multiclass style ] - where - style = (tokenStyle . tkType) tok ++ richTokenStyle det - - -tokenSpan :: Token -> Html -tokenSpan = Html.thespan . Html.toHtml . tkValue - + content = tokenSpan ! [ multiclass style ] + tokenSpan = Html.thespan (Html.toHtml tkValue) + style = tokenStyle tkType ++ maybe [] richTokenStyle details + + -- If we have name information, we can make links + linked = case details of + Just d -> externalAnchor d . internalAnchor d . hyperlink srcs d + Nothing -> id richTokenStyle :: TokenDetails -> [StyleClass] richTokenStyle (RtkVar _) = ["hs-var"] -- cgit v1.2.3 From 254de3010dddb06bc1dacf4c029a9e8f30ff1600 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Tue, 5 Jun 2018 10:47:16 -0700 Subject: Improve hyperlinker's 'spanToNewline' (#846) 'spanToNewline' is used to help break apart the source into lines which can then be partioned into CPP and non-CPP chunks. It is important that 'spanToNewline' not break apart tokens, so it needs to properly handle things like * block comments, possibly nested * string literals, possibly multi-line * CPP macros, possibly multi-line String literals in particular were not being properly handled. The fix is to to fall back in 'Text.Read.lex' to help lex things that are not comments. Fixes #837. --- .../src/Haddock/Backends/Hyperlinker/Parser.hs | 32 ++- hypsrc-test/ref/src/CPP.html | 216 +++++++++++++++++++++ hypsrc-test/src/CPP.hs | 26 +++ 3 files changed, 267 insertions(+), 7 deletions(-) create mode 100644 hypsrc-test/ref/src/CPP.html create mode 100644 hypsrc-test/src/CPP.hs (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs index 92443bff..e7ecac73 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs @@ -3,6 +3,8 @@ module Haddock.Backends.Hyperlinker.Parser (parse) where import Data.Either ( isRight, isLeft ) import Data.List ( foldl', isPrefixOf, isSuffixOf ) import Data.Maybe ( maybeToList ) +import Data.Char ( isSpace ) +import qualified Text.Read as R import GHC ( DynFlags, addSourceToTokens ) import SrcLoc @@ -109,12 +111,9 @@ isCPPline :: String -> Bool isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5 --- | Split a "line" off the front of a string, supporting newline escapes. --- --- By "line", we understand: the shortest substring ending in a '\n' that is not --- --- 1. immediately preceded by a '\\' --- 2. not inside some (possibly nested) block comment +-- | Split a "line" off the front of a string, hopefully without cutting tokens +-- in half. I say "hopefully" because knowing what a token is requires lexing, +-- yet lexing depends on this function. -- -- All characters in the input are present in the output: -- @@ -122,17 +121,36 @@ isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5 spanToNewline :: Int -- ^ open '{-' -> String -- ^ input -> (String, String) -spanToNewline _ [] = ([], []) + +-- Base case and space characters +spanToNewline _ "" = ("", "") +spanToNewline n ('\n':str) | n <= 0 = ("\n", str) spanToNewline n ('\n':str) | n <= 0 = ("\n", str) spanToNewline n ('\\':'\n':str) = let (str', rest) = spanToNewline n str in ('\\':'\n':str', rest) + +-- Block comments spanToNewline n ('{':'-':str) = let (str', rest) = spanToNewline (n+1) str in ('{':'-':str', rest) spanToNewline n ('-':'}':str) = let (str', rest) = spanToNewline (n-1) str in ('-':'}':str', rest) + +-- When not in a block comment, try to lex a Haskell token +spanToNewline 0 str@(c:_) | ((lexed, str') : _) <- R.lex str, not (isSpace c) = + if all (== '-') lexed && length lexed >= 2 + -- A Haskell line comment + then case span (/= '\n') str' of + (str'', '\n':rest) -> (lexed ++ str'' ++ "\n", rest) + (_, _) -> (str, "") + + -- An actual Haskell token + else let (str'', rest) = spanToNewline 0 str' + in (lexed ++ str'', rest) + +-- In all other cases, advance one character at a time spanToNewline n (c:str) = let (str', rest) = spanToNewline n str in (c:str', rest) diff --git a/hypsrc-test/ref/src/CPP.html b/hypsrc-test/ref/src/CPP.html new file mode 100644 index 00000000..fb85bd2f --- /dev/null +++ b/hypsrc-test/ref/src/CPP.html @@ -0,0 +1,216 @@ +
{-# LANGUAGE CPP #-}
+module CPP where
+
+#define SOMETHING1
+
+foo :: String
+foo = {-  " single quotes are fine in block comments
+          {- nested block comments are fine -}
+       -} "foo"
+
+#define SOMETHING2
+
+bar :: String
+bar = "block comment in a string is not a comment {- "
+
+#define SOMETHING3
+
+-- " single quotes are fine in line comments
+-- {- unclosed block comments are fine in line comments
+
+-- Multiline CPP is also fine
+#define FOO\
+  1
+
+baz :: String
+baz = "line comment in a string is not a comment --"
+
\ No newline at end of file diff --git a/hypsrc-test/src/CPP.hs b/hypsrc-test/src/CPP.hs new file mode 100644 index 00000000..f00ce031 --- /dev/null +++ b/hypsrc-test/src/CPP.hs @@ -0,0 +1,26 @@ +{-# LANGUAGE CPP #-} +module CPP where + +#define SOMETHING1 + +foo :: String +foo = {- " single quotes are fine in block comments + {- nested block comments are fine -} + -} "foo" + +#define SOMETHING2 + +bar :: String +bar = "block comment in a string is not a comment {- " + +#define SOMETHING3 + +-- " single quotes are fine in line comments +-- {- unclosed block comments are fine in line comments + +-- Multiline CPP is also fine +#define FOO\ + 1 + +baz :: String +baz = "line comment in a string is not a comment --" -- cgit v1.2.3 From 083faee1cec2df01d7ab8a4ef2dc0a28c0c120cf Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Wed, 13 Jun 2018 23:28:47 +0200 Subject: Complete FixitySig and FamilyDecl pattern matches --- haddock-api/src/Haddock/Backends/Hoogle.hs | 1 + haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs | 3 +++ 2 files changed, 4 insertions(+) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker') diff --git a/haddock-api/src/Haddock/Backends/Hoogle.hs b/haddock-api/src/Haddock/Backends/Hoogle.hs index 6e8148f7..257a8d6d 100644 --- a/haddock-api/src/Haddock/Backends/Hoogle.hs +++ b/haddock-api/src/Haddock/Backends/Hoogle.hs @@ -201,6 +201,7 @@ ppFam dflags decl@(FamilyDecl { fdInfo = info }) -- for Hoogle, so pretend it doesn't have any. ClosedTypeFamily{} -> decl { fdInfo = OpenTypeFamily } _ -> decl +ppFam _ XFamilyDecl {} = panic "ppFam" ppInstance :: DynFlags -> ClsInst -> [String] ppInstance dflags x = diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs index 220a59fe..0ecf7109 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs @@ -183,8 +183,11 @@ decls (group, _, _, _) = concatMap ($ group) fix term = case cast term of Just ((GHC.FixitySig _ names _) :: GHC.FixitySig GHC.GhcRn) -> map (\(GHC.L sspan x) -> (sspan, RtkVar x)) names + Just ((GHC.XFixitySig {}) :: GHC.FixitySig GHC.GhcRn) + -> GHC.panic "haddock:decls" Nothing -> empty tyfam (GHC.L _ (GHC.FamilyDecl{..})) = [decl fdLName] + tyfam (GHC.L _ (GHC.XFamilyDecl {})) = GHC.panic "haddock:dels" sig (GHC.L _ (GHC.TypeSig _ names _)) = map decl names sig (GHC.L _ (GHC.PatSynSig _ names _)) = map decl names sig (GHC.L _ (GHC.ClassOpSig _ _ names _)) = map decl names -- cgit v1.2.3