From 4f75be94f45a0e92553eccefe56230c554333ce7 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Sun, 10 Dec 2017 12:22:21 -0800 Subject: Use the GHC lexer for the Hyperlinker backend (#714) * Start changing to use GHC lexer * better cpp * Change SrcSpan to RealSrcSpan * Remove error * Try to stop too many open files * wip * wip * Revert "wip" This reverts commit b605510a195f26315e3d8ca90e6d95a6737553e1. Conflicts: haddock-api/haddock-api.cabal haddock-api/src/Haddock/Interface.hs * Remove pointless 'caching' * Use dlist rather than lists when finding vars * Use a map rather than list * Delete bogus comment * Rebase followup Things now run using the GHC lexer. There are still - stray debug statements - unnecessary changes w.r.t. master * Cleaned up differences w.r.t. current Haddock HEAD Things are looking good. quasiquotes in particular look beautiful: the TH ones (with Haskell source inside) colour/link their contents too! Haven't yet begun to check for possible performance problems. * Support CPP and top-level pragmas The support for these is hackier - but no more hacky than the existing support. * Tests pass, CPP is better recognized The tests were in some cases altered: I consider the new output to be more correct than the old one.... * Fix shrinking of source without tabs in test * Replace 'Position'/'Span' with GHC counterparts Replaces 'Position' -> 'GHC.RealSrcLoc' and 'Span' -> 'GHC.RealSrcSpan'. * Nits * Forgot entry in .cabal * Update changelog --- hypsrc-test/ref/src/Identifiers.html | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'hypsrc-test/ref/src') diff --git a/hypsrc-test/ref/src/Identifiers.html b/hypsrc-test/ref/src/Identifiers.html index f52db4ab..ce69ad37 100644 --- a/hypsrc-test/ref/src/Identifiers.html +++ b/hypsrc-test/ref/src/Identifiers.html @@ -827,11 +827,7 @@ > Identifiers.norfIdentifiers.norf \ No newline at end of file +> -- cgit v1.2.3 From 3077a12b57e1b93a738082aa73fab72e9c3e3f83 Mon Sep 17 00:00:00 2001 From: alexbiehl Date: Mon, 5 Mar 2018 18:10:07 +0100 Subject: Hyperlinker: Links for TyOps, class methods and associated types --- .../src/Haddock/Backends/Hyperlinker/Ast.hs | 15 +++++++- hypsrc-test/ref/src/Classes.html | 40 ++++++++++++++++------ 2 files changed, 44 insertions(+), 11 deletions(-) (limited to 'hypsrc-test/ref/src') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs index 02c4ca0b..3c96db98 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs @@ -93,9 +93,12 @@ variables = types :: GHC.RenamedSource -> LTokenDetails types = everythingInRenamedSource ty where + ty :: forall a. Data a => a -> [(GHC.SrcSpan, TokenDetails)] ty term = case cast term of (Just ((GHC.L sspan (GHC.HsTyVar _ name)) :: GHC.LHsType GHC.GhcRn)) -> pure (sspan, RtkType (GHC.unLoc name)) + (Just ((GHC.L sspan (GHC.HsOpTy l name r)) :: GHC.LHsType GHC.GhcRn)) -> + (sspan, RtkType (GHC.unLoc name)):(ty l ++ ty r) _ -> empty -- | Obtain details map for identifier bindings. @@ -141,6 +144,7 @@ decls :: GHC.RenamedSource -> LTokenDetails decls (group, _, _, _) = concatMap ($ group) [ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds , everythingInRenamedSource fun . GHC.hs_valds + , everythingInRenamedSource fix . GHC.hs_fixds , everythingInRenamedSource (con `Syb.combine` ins) ] where @@ -148,7 +152,10 @@ decls (group, _, _, _) = concatMap ($ group) GHC.DataDecl { tcdLName = name } -> pure . decl $ name GHC.SynDecl name _ _ _ _ -> pure . decl $ name GHC.FamDecl fam -> pure . decl $ GHC.fdLName fam - GHC.ClassDecl{..} -> [decl tcdLName] ++ concatMap sig tcdSigs + GHC.ClassDecl{..} -> + [decl tcdLName] + ++ concatMap sig tcdSigs + ++ concatMap tyfam tcdATs fun term = case cast term of (Just (GHC.FunBind (GHC.L sspan name) _ _ _ _ :: GHC.HsBind GHC.GhcRn)) | GHC.isExternalName name -> pure (sspan, RtkDecl name) @@ -171,8 +178,14 @@ decls (group, _, _, _) = concatMap ($ group) Just (field :: GHC.ConDeclField GHC.GhcRn) -> map (decl . fmap GHC.selectorFieldOcc) $ GHC.cd_fld_names field Nothing -> empty + fix term = case cast term of + Just ((GHC.FixitySig names _) :: GHC.FixitySig GHC.GhcRn) + -> map decl names + Nothing -> empty + tyfam (GHC.L _ (GHC.FamilyDecl{..})) = [decl fdLName] sig (GHC.L _ (GHC.TypeSig names _)) = map decl names sig (GHC.L _ (GHC.PatSynSig names _)) = map decl names + sig (GHC.L _ (GHC.ClassOpSig _ names _)) = map decl names sig _ = [] decl (GHC.L sspan name) = (sspan, RtkDecl name) tyref (GHC.L sspan name) = (sspan, RtkType name) diff --git a/hypsrc-test/ref/src/Classes.html b/hypsrc-test/ref/src/Classes.html index abff8877..d2604e82 100644 --- a/hypsrc-test/ref/src/Classes.html +++ b/hypsrc-test/ref/src/Classes.html @@ -60,8 +60,12 @@ > barbar bazbaz quuxquux norfnorf plughplugh Date: Tue, 5 Jun 2018 10:47:16 -0700 Subject: Improve hyperlinker's 'spanToNewline' (#846) 'spanToNewline' is used to help break apart the source into lines which can then be partioned into CPP and non-CPP chunks. It is important that 'spanToNewline' not break apart tokens, so it needs to properly handle things like * block comments, possibly nested * string literals, possibly multi-line * CPP macros, possibly multi-line String literals in particular were not being properly handled. The fix is to to fall back in 'Text.Read.lex' to help lex things that are not comments. Fixes #837. --- .../src/Haddock/Backends/Hyperlinker/Parser.hs | 32 ++- hypsrc-test/ref/src/CPP.html | 216 +++++++++++++++++++++ hypsrc-test/src/CPP.hs | 26 +++ 3 files changed, 267 insertions(+), 7 deletions(-) create mode 100644 hypsrc-test/ref/src/CPP.html create mode 100644 hypsrc-test/src/CPP.hs (limited to 'hypsrc-test/ref/src') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs index 92443bff..e7ecac73 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs @@ -3,6 +3,8 @@ module Haddock.Backends.Hyperlinker.Parser (parse) where import Data.Either ( isRight, isLeft ) import Data.List ( foldl', isPrefixOf, isSuffixOf ) import Data.Maybe ( maybeToList ) +import Data.Char ( isSpace ) +import qualified Text.Read as R import GHC ( DynFlags, addSourceToTokens ) import SrcLoc @@ -109,12 +111,9 @@ isCPPline :: String -> Bool isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5 --- | Split a "line" off the front of a string, supporting newline escapes. --- --- By "line", we understand: the shortest substring ending in a '\n' that is not --- --- 1. immediately preceded by a '\\' --- 2. not inside some (possibly nested) block comment +-- | Split a "line" off the front of a string, hopefully without cutting tokens +-- in half. I say "hopefully" because knowing what a token is requires lexing, +-- yet lexing depends on this function. -- -- All characters in the input are present in the output: -- @@ -122,17 +121,36 @@ isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5 spanToNewline :: Int -- ^ open '{-' -> String -- ^ input -> (String, String) -spanToNewline _ [] = ([], []) + +-- Base case and space characters +spanToNewline _ "" = ("", "") +spanToNewline n ('\n':str) | n <= 0 = ("\n", str) spanToNewline n ('\n':str) | n <= 0 = ("\n", str) spanToNewline n ('\\':'\n':str) = let (str', rest) = spanToNewline n str in ('\\':'\n':str', rest) + +-- Block comments spanToNewline n ('{':'-':str) = let (str', rest) = spanToNewline (n+1) str in ('{':'-':str', rest) spanToNewline n ('-':'}':str) = let (str', rest) = spanToNewline (n-1) str in ('-':'}':str', rest) + +-- When not in a block comment, try to lex a Haskell token +spanToNewline 0 str@(c:_) | ((lexed, str') : _) <- R.lex str, not (isSpace c) = + if all (== '-') lexed && length lexed >= 2 + -- A Haskell line comment + then case span (/= '\n') str' of + (str'', '\n':rest) -> (lexed ++ str'' ++ "\n", rest) + (_, _) -> (str, "") + + -- An actual Haskell token + else let (str'', rest) = spanToNewline 0 str' + in (lexed ++ str'', rest) + +-- In all other cases, advance one character at a time spanToNewline n (c:str) = let (str', rest) = spanToNewline n str in (c:str', rest) diff --git a/hypsrc-test/ref/src/CPP.html b/hypsrc-test/ref/src/CPP.html new file mode 100644 index 00000000..fb85bd2f --- /dev/null +++ b/hypsrc-test/ref/src/CPP.html @@ -0,0 +1,216 @@ +
{-# LANGUAGE CPP #-}
+module CPP where
+
+#define SOMETHING1
+
+foo :: String
+foo = {-  " single quotes are fine in block comments
+          {- nested block comments are fine -}
+       -} "foo"
+
+#define SOMETHING2
+
+bar :: String
+bar = "block comment in a string is not a comment {- "
+
+#define SOMETHING3
+
+-- " single quotes are fine in line comments
+-- {- unclosed block comments are fine in line comments
+
+-- Multiline CPP is also fine
+#define FOO\
+  1
+
+baz :: String
+baz = "line comment in a string is not a comment --"
+
\ No newline at end of file diff --git a/hypsrc-test/src/CPP.hs b/hypsrc-test/src/CPP.hs new file mode 100644 index 00000000..f00ce031 --- /dev/null +++ b/hypsrc-test/src/CPP.hs @@ -0,0 +1,26 @@ +{-# LANGUAGE CPP #-} +module CPP where + +#define SOMETHING1 + +foo :: String +foo = {- " single quotes are fine in block comments + {- nested block comments are fine -} + -} "foo" + +#define SOMETHING2 + +bar :: String +bar = "block comment in a string is not a comment {- " + +#define SOMETHING3 + +-- " single quotes are fine in line comments +-- {- unclosed block comments are fine in line comments + +-- Multiline CPP is also fine +#define FOO\ + 1 + +baz :: String +baz = "line comment in a string is not a comment --" -- cgit v1.2.3