From 1b26460fb3b5df5215cc1e6715661cbc7c950085 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Thu, 31 Jan 2019 01:37:25 -0800 Subject: Use `.hie` files for the Hyperlinker backend (#977) # Summary This is a large architectural change to the Hyperlinker. * extract link (and now also type) information from `.hie` instead of doing ad-hoc SYB traversals of the `RenamedSource`. Also adds a superb type-on-hover feature (#715). * re-engineer the lexer to avoid needless string conversions. By going directly through GHC's `P` monad and taking bytestring slices, we avoid a ton of allocation and have better handling of position pragmas and CPP. In terms of performance, the Haddock side of things has gotten _much_ more efficient. Unfortunately, much of this is cancelled out by the increased GHC workload for generating `.hie` files. For the full set of boot libs (including `ghc`-the-library) * the sum of total time went down by 9-10% overall * the sum of total allocations went down by 6-7% # Motivation Haddock is moving towards working entirely over `.hi` and `.hie` files. This change means we no longer need the `RenamedSource` from `TypecheckedModule` (something which is _not_ in `.hi` files). # Details Along the way a bunch of things were fixed: * Cross package (and other) links are now more reliable (#496) * The lexer tries to recover from errors on every line (instead of at CPP boundaries) * `LINE`/`COLUMN` pragmas are taken into account * filter out zero length tokens before rendering * avoid recomputing the `ModuleName`-based `SrcMap` * remove the last use of `Documentation.Haddock.Utf8` (see #998) * restructure temporary folder logic for `.hi`/`.hie` model --- haddock-api/src/Haddock/Backends/Hyperlinker.hs | 55 ++- .../src/Haddock/Backends/Hyperlinker/Ast.hs | 237 ------------- .../src/Haddock/Backends/Hyperlinker/Parser.hs | 386 ++++++++++----------- .../src/Haddock/Backends/Hyperlinker/Renderer.hs | 276 +++++++++++---- .../src/Haddock/Backends/Hyperlinker/Types.hs | 36 +- .../src/Haddock/Backends/Hyperlinker/Utils.hs | 98 +++++- 6 files changed, 532 insertions(+), 556 deletions(-) delete mode 100644 haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs (limited to 'haddock-api/src/Haddock/Backends') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker.hs b/haddock-api/src/Haddock/Backends/Hyperlinker.hs index 8f0c4b67..5ef7d9bb 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} module Haddock.Backends.Hyperlinker ( ppHyperlinkedSource , module Haddock.Backends.Hyperlinker.Types @@ -8,15 +9,24 @@ module Haddock.Backends.Hyperlinker import Haddock.Types import Haddock.Utils (writeUtf8File) import Haddock.Backends.Hyperlinker.Renderer +import Haddock.Backends.Hyperlinker.Parser import Haddock.Backends.Hyperlinker.Types import Haddock.Backends.Hyperlinker.Utils - -import Text.XHtml hiding (()) +import Haddock.Backends.Xhtml.Utils ( renderToString ) import Data.Maybe import System.Directory import System.FilePath +import HieTypes ( HieFile(..), HieASTs(..) ) +import HieBin ( readHieFile ) +import Data.Map as M +import FastString ( mkFastString ) +import Module ( Module, moduleName ) +import NameCache ( initNameCache ) +import UniqSupply ( mkSplitUniqSupply ) +import SysTools.Info ( getCompilerInfo' ) + -- | Generate hyperlinked source for given interfaces. -- @@ -27,10 +37,10 @@ ppHyperlinkedSource :: FilePath -- ^ Output directory -> FilePath -- ^ Resource directory -> Maybe FilePath -- ^ Custom CSS file path -> Bool -- ^ Flag indicating whether to pretty-print HTML - -> SrcMap -- ^ Paths to sources + -> M.Map Module SrcPath -- ^ Paths to sources -> [Interface] -- ^ Interfaces for which we create source -> IO () -ppHyperlinkedSource outdir libdir mstyle pretty srcs ifaces = do +ppHyperlinkedSource outdir libdir mstyle pretty srcs' ifaces = do createDirectoryIfMissing True srcdir let cssFile = fromMaybe (defaultCssFile libdir) mstyle copyFile cssFile $ srcdir srcCssFile @@ -39,17 +49,39 @@ ppHyperlinkedSource outdir libdir mstyle pretty srcs ifaces = do mapM_ (ppHyperlinkedModuleSource srcdir pretty srcs) ifaces where srcdir = outdir hypSrcDir + srcs = (srcs', M.mapKeys moduleName srcs') -- | Generate hyperlinked source for particular interface. -ppHyperlinkedModuleSource :: FilePath -> Bool -> SrcMap -> Interface - -> IO () -ppHyperlinkedModuleSource srcdir pretty srcs iface = - case ifaceTokenizedSrc iface of - Just tokens -> writeUtf8File path . html . render' $ tokens - Nothing -> return () +ppHyperlinkedModuleSource :: FilePath -> Bool -> SrcMaps -> Interface -> IO () +ppHyperlinkedModuleSource srcdir pretty srcs iface = case ifaceHieFile iface of + Just hfp -> do + -- Parse the GHC-produced HIE file + u <- mkSplitUniqSupply 'a' + HieFile { hie_hs_file = file + , hie_asts = HieASTs asts + , hie_types = types + , hie_hs_src = rawSrc + } <- fmap fst (readHieFile (initNameCache u []) hfp) + comp <- getCompilerInfo' df + + -- Get the AST and tokens corresponding to the source file we want + let mast | M.size asts == 1 = snd <$> M.lookupMin asts + | otherwise = M.lookup (mkFastString file) asts + tokens = parse comp df file rawSrc + + -- Produce and write out the hyperlinked sources + case mast of + Just ast -> + let fullAst = recoverFullIfaceTypes df types ast + in writeUtf8File path . renderToString pretty . render' fullAst $ tokens + Nothing + | M.size asts == 0 -> return () + | otherwise -> error $ unwords [ "couldn't find ast for" + , file, show (M.keys asts) ] + Nothing -> return () where + df = ifaceDynFlags iface render' = render (Just srcCssFile) (Just highlightScript) srcs - html = if pretty then renderHtml else showHtml path = srcdir hypSrcModuleFile (ifaceMod iface) -- | Name of CSS file in output directory. @@ -63,3 +95,4 @@ highlightScript = "highlight.js" -- | Path to default CSS file. defaultCssFile :: FilePath -> FilePath defaultCssFile libdir = libdir "html" "solarized.css" + diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs deleted file mode 100644 index a9ffc36e..00000000 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs +++ /dev/null @@ -1,237 +0,0 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE TypeApplications #-} -{-# LANGUAGE ViewPatterns #-} - -module Haddock.Backends.Hyperlinker.Ast (enrich) where - - -import qualified Haddock.Syb as Syb -import Haddock.Backends.Hyperlinker.Types - -import qualified GHC -import qualified SrcLoc -import qualified Outputable as GHC - -import Control.Applicative -import Control.Monad (guard) -import Data.Data -import qualified Data.Map.Strict as Map -import Data.Maybe - -import Prelude hiding (span) - -everythingInRenamedSource :: (Alternative f, Data x) - => (forall a. Data a => a -> f r) -> x -> f r -everythingInRenamedSource f = Syb.everythingButType @GHC.Name (<|>) f - --- | Add more detailed information to token stream using GHC API. -enrich :: GHC.RenamedSource -> [Token] -> [RichToken] -enrich src = - map $ \token -> RichToken - { rtkToken = token - , rtkDetails = enrichToken token detailsMap - } - where - detailsMap = - mkDetailsMap (concatMap ($ src) - [ variables - , types - , decls - , binds - , imports - ]) - -type LTokenDetails = [(GHC.SrcSpan, TokenDetails)] - --- | A map containing association between source locations and "details" of --- this location. --- -type DetailsMap = Map.Map Position (Span, TokenDetails) - -mkDetailsMap :: [(GHC.SrcSpan, TokenDetails)] -> DetailsMap -mkDetailsMap xs = - Map.fromListWith select_details [ (start, (span, token_details)) - | (ghc_span, token_details) <- xs - , GHC.RealSrcSpan span <- [ghc_span] - , let start = SrcLoc.realSrcSpanStart span - ] - where - -- favour token details which appear earlier in the list - select_details _new old = old - -lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails -lookupBySpan span details = do - let pos = SrcLoc.realSrcSpanStart span - (_, (tok_span, tok_details)) <- Map.lookupLE pos details - guard (tok_span `SrcLoc.containsSpan` span) - return tok_details - -enrichToken :: Token -> DetailsMap -> Maybe TokenDetails -enrichToken (Token typ _ spn) dm - | typ `elem` [TkIdentifier, TkOperator] = lookupBySpan spn dm -enrichToken _ _ = Nothing - --- | Obtain details map for variables ("normally" used identifiers). -variables :: GHC.RenamedSource -> LTokenDetails -variables = - everythingInRenamedSource (var `Syb.combine` rec) - where - var term = case cast term of - (Just ((GHC.dL->GHC.L sspan (GHC.HsVar _ name)) - :: GHC.LHsExpr GHC.GhcRn)) -> - pure (sspan, RtkVar (GHC.unLoc name)) - (Just (GHC.dL->GHC.L _ (GHC.RecordCon _ - (GHC.dL->GHC.L sspan name) _))) -> - pure (sspan, RtkVar name) - _ -> empty - rec term = case cast term of - Just (GHC.HsRecField (GHC.dL->GHC.L sspan name) - (_ :: GHC.LHsExpr GHC.GhcRn) _) -> - pure (sspan, RtkVar name) - _ -> empty - --- | Obtain details map for types. -types :: GHC.RenamedSource -> LTokenDetails -types = everythingInRenamedSource ty - where - ty :: forall a. Data a => a -> [(GHC.SrcSpan, TokenDetails)] - ty term = case cast term of - (Just ((GHC.dL->GHC.L sspan (GHC.HsTyVar _ _ name)) - :: GHC.LHsType GHC.GhcRn)) -> - pure (sspan, RtkType (GHC.unLoc name)) - (Just ((GHC.dL->GHC.L sspan (GHC.HsOpTy _ l name r)) - :: GHC.LHsType GHC.GhcRn)) -> - (sspan, RtkType (GHC.unLoc name)):(ty l ++ ty r) - _ -> empty - --- | Obtain details map for identifier bindings. --- --- That includes both identifiers bound by pattern matching or declared using --- ordinary assignment (in top-level declarations, let-expressions and where --- clauses). - -binds :: GHC.RenamedSource -> LTokenDetails -binds = everythingInRenamedSource - (fun `Syb.combine` pat `Syb.combine` tvar) - where - fun term = case cast term of - (Just (GHC.FunBind _ (GHC.dL->GHC.L sspan name) _ _ _ - :: GHC.HsBind GHC.GhcRn)) -> - pure (sspan, RtkBind name) - (Just (GHC.PatSynBind _ - (GHC.PSB _ (GHC.dL->GHC.L sspan name) args _ _))) -> - pure (sspan, RtkBind name) - ++ everythingInRenamedSource patsyn_binds args - _ -> empty - patsyn_binds term = case cast term of - (Just (GHC.L sspan (name :: GHC.Name))) -> - pure (sspan, RtkVar name) - _ -> empty - pat term = case cast term of - (Just ((GHC.dL->GHC.L sspan (GHC.VarPat _ name)) - :: GHC.LPat GHC.GhcRn)) -> - pure (sspan, RtkBind (GHC.unLoc name)) - (Just (GHC.dL->GHC.L _ - (GHC.ConPatIn (GHC.dL->GHC.L sspan name) recs))) -> - [(sspan, RtkVar name)] ++ everythingInRenamedSource rec recs - (Just (GHC.dL->GHC.L _ (GHC.AsPat _ (GHC.dL->GHC.L sspan name) _))) -> - pure (sspan, RtkBind name) - _ -> empty - rec term = case cast term of - (Just (GHC.HsRecField (GHC.dL->GHC.L sspan name) - (_ :: GHC.LPat GHC.GhcRn) _)) -> - pure (sspan, RtkVar name) - _ -> empty - tvar term = case cast term of - (Just ((GHC.dL->GHC.L sspan (GHC.UserTyVar _ name)) - :: GHC.LHsTyVarBndr GHC.GhcRn)) -> - pure (sspan, RtkBind (GHC.unLoc name)) - (Just (GHC.dL->GHC.L _ (GHC.KindedTyVar _ (GHC.dL->GHC.L sspan name) _))) -> - pure (sspan, RtkBind name) - _ -> empty - --- | Obtain details map for top-level declarations. -decls :: GHC.RenamedSource -> LTokenDetails -decls (group, _, _, _) = concatMap ($ group) - [ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds - , everythingInRenamedSource fun . GHC.hs_valds - , everythingInRenamedSource fix . GHC.hs_fixds - , everythingInRenamedSource (con `Syb.combine` ins) - ] - where - typ (GHC.dL->GHC.L _ t) = case t of - GHC.DataDecl { tcdLName = name } -> pure . decl $ name - GHC.SynDecl _ name _ _ _ -> pure . decl $ name - GHC.FamDecl _ fam -> pure . decl $ GHC.fdLName fam - GHC.ClassDecl{..} -> - [decl tcdLName] - ++ concatMap sig tcdSigs - ++ concatMap tyfam tcdATs - GHC.XTyClDecl {} -> GHC.panic "haddock:decls" - fun term = case cast term of - (Just (GHC.FunBind _ (GHC.dL->GHC.L sspan name) _ _ _ - :: GHC.HsBind GHC.GhcRn)) - | GHC.isExternalName name -> pure (sspan, RtkDecl name) - (Just (GHC.PatSynBind _ (GHC.PSB _ (GHC.dL->GHC.L sspan name) _ _ _))) - | GHC.isExternalName name -> pure (sspan, RtkDecl name) - _ -> empty - con term = case cast term of - (Just (cdcl :: GHC.ConDecl GHC.GhcRn)) -> - map decl (GHC.getConNames cdcl) - ++ everythingInRenamedSource fld cdcl - Nothing -> empty - ins term = case cast term of - (Just ((GHC.DataFamInstD _ (GHC.DataFamInstDecl eqn)) - :: GHC.InstDecl GHC.GhcRn)) - -> pure . tyref $ GHC.feqn_tycon $ GHC.hsib_body eqn - (Just (GHC.TyFamInstD _ (GHC.TyFamInstDecl eqn))) -> - pure . tyref $ GHC.feqn_tycon $ GHC.hsib_body eqn - _ -> empty - fld term = case cast term of - Just (field :: GHC.ConDeclField GHC.GhcRn) - -> map (decl . fmap GHC.extFieldOcc) $ GHC.cd_fld_names field - Nothing -> empty - fix term = case cast term of - Just ((GHC.FixitySig _ names _) :: GHC.FixitySig GHC.GhcRn) - -> map (\(GHC.dL->GHC.L sspan x) -> (sspan, RtkVar x)) names - Just ((GHC.XFixitySig {}) :: GHC.FixitySig GHC.GhcRn) - -> GHC.panic "haddock:decls" - Nothing -> empty - tyfam (GHC.dL->GHC.L _ (GHC.FamilyDecl{..})) = [decl fdLName] - tyfam (GHC.dL->GHC.L _ (GHC.XFamilyDecl {})) = GHC.panic "haddock:dels" - tyfam _ = GHC.panic "tyfam: Impossible Match" - - sig (GHC.dL->GHC.L _ (GHC.TypeSig _ names _)) = map decl names - sig (GHC.dL->GHC.L _ (GHC.PatSynSig _ names _)) = map decl names - sig (GHC.dL->GHC.L _ (GHC.ClassOpSig _ _ names _)) = map decl names - sig _ = [] - decl (GHC.dL->GHC.L sspan name) = (sspan, RtkDecl name) - tyref (GHC.dL->GHC.L sspan name) = (sspan, RtkType name) - --- | Obtain details map for import declarations. --- --- This map also includes type and variable details for items in export and --- import lists. -imports :: GHC.RenamedSource -> LTokenDetails -imports src@(_, imps, _, _) = - everythingInRenamedSource ie src ++ mapMaybe (imp . GHC.unLoc) imps - where - ie term = case cast term of - (Just ((GHC.IEVar _ v) :: GHC.IE GHC.GhcRn)) -> pure $ var - $ GHC.ieLWrappedName v - (Just (GHC.IEThingAbs _ t)) -> pure $ typ $ GHC.ieLWrappedName t - (Just (GHC.IEThingAll _ t)) -> pure $ typ $ GHC.ieLWrappedName t - (Just (GHC.IEThingWith _ t _ vs _fls)) -> - [typ $ GHC.ieLWrappedName t] ++ map (var . GHC.ieLWrappedName) vs - (Just (GHC.IEModuleContents _ m)) -> pure $ modu m - _ -> empty - typ (GHC.dL->GHC.L sspan name) = (sspan, RtkType name) - var (GHC.dL->GHC.L sspan name) = (sspan, RtkVar name) - modu (GHC.dL->GHC.L sspan name) = (sspan, RtkModule name) - imp idecl - | not . GHC.ideclImplicit $ idecl = Just (modu (GHC.ideclName idecl)) - | otherwise = Nothing diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs index f8494242..1d5576cc 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs @@ -1,213 +1,212 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} module Haddock.Backends.Hyperlinker.Parser (parse) where -import Data.Either ( isRight, isLeft ) -import Data.List ( foldl', isPrefixOf, isSuffixOf ) -import Data.Maybe ( maybeToList ) -import Data.Char ( isSpace ) -import qualified Text.Read as R +import Control.Applicative ( Alternative(..) ) +import Data.List ( isPrefixOf, isSuffixOf ) -import GHC ( DynFlags, addSourceToTokens ) -import SrcLoc +import qualified Data.ByteString as BS +import qualified Data.ByteString.Char8 as BSC + +import GHC.LanguageExtensions.Type + +import BasicTypes ( IntegralLit(..) ) +import DynFlags +import qualified EnumSet as E +import ErrUtils ( emptyMessages ) import FastString ( mkFastString ) -import StringBuffer ( stringToStringBuffer ) -import Lexer ( Token(..) ) -import qualified Lexer as L +import Lexer ( P(..), ParseResult(..), PState(..), Token(..) + , mkPStatePure, lexer, mkParserFlags' ) +import Outputable ( showSDoc, panic ) +import SrcLoc +import StringBuffer ( StringBuffer, atEnd ) import Haddock.Backends.Hyperlinker.Types as T - +import Haddock.GhcUtils -- | Turn source code string into a stream of more descriptive tokens. -- --- Result should retain original file layout (including comments, whitespace, --- etc.), i.e. the following "law" should hold: --- --- prop> concat . map tkValue . parse = id --- --- (In reality, this only holds for input not containing '\r', '\t', '\f', '\v', --- characters, since GHC transforms those into ' ' and '\n') -parse :: DynFlags -> FilePath -> String -> [T.Token] -parse dflags fp = ghcToks . processCPP dflags fp . filterCRLF +-- Result should retain original file layout (including comments, +-- whitespace, and CPP). +parse + :: CompilerInfo -- ^ Underlying CC compiler (whatever expanded CPP) + -> DynFlags -- ^ Flags for this module + -> FilePath -- ^ Path to the source of this module + -> BS.ByteString -- ^ Raw UTF-8 encoded source of this module + -> [T.Token] +parse comp dflags fpath bs = case unP (go False []) initState of + POk _ toks -> reverse toks + PFailed _ ss errMsg -> panic $ "Hyperlinker parse error at " ++ show ss ++ + ": " ++ showSDoc dflags errMsg where - -- Remove CRLFs from source - filterCRLF :: String -> String - filterCRLF ('\r':'\n':cs) = '\n' : filterCRLF cs - filterCRLF (c:cs) = c : filterCRLF cs - filterCRLF [] = [] --- | Parse the source into tokens using the GHC lexer. + initState = mkPStatePure pflags buf start + buf = stringBufferFromByteString bs + start = mkRealSrcLoc (mkFastString fpath) 1 1 + needPragHack' = needPragHack comp dflags + pflags = mkParserFlags' (warningFlags dflags) + (extensionFlags dflags) + (thisPackage dflags) + (safeImportsOn dflags) + False -- lex Haddocks as comment tokens + True -- produce comment tokens + False -- produce position pragmas tokens + + go :: Bool -- ^ are we currently in a pragma? + -> [T.Token] -- ^ tokens accumulated so far (in reverse) + -> P [T.Token] + go inPrag toks = do + (b, _) <- getInput + if not (atEnd b) + then do + (newToks, inPrag') <- parseCppLine <|> parsePlainTok inPrag <|> unknownLine + go inPrag' (newToks ++ toks) + else + pure toks + + -- | Like 'Lexer.lexer', but slower, with a better API, and filtering out empty tokens + wrappedLexer :: P (RealLocated Lexer.Token) + wrappedLexer = Lexer.lexer False andThen + where andThen (L (RealSrcSpan s) t) + | srcSpanStartLine s /= srcSpanEndLine s || + srcSpanStartCol s /= srcSpanEndCol s + = pure (L s t) + andThen (L (RealSrcSpan s) ITeof) = pure (L s ITeof) + andThen _ = wrappedLexer + + -- | Try to parse a CPP line (can fail) + parseCppLine :: P ([T.Token], Bool) + parseCppLine = do + (b, l) <- getInput + case tryCppLine l b of + Just (cppBStr, l', b') + -> let cppTok = T.Token { tkType = TkCpp + , tkValue = cppBStr + , tkSpan = mkRealSrcSpan l l' } + in setInput (b', l') *> pure ([cppTok], False) + _ -> empty + + -- | Try to parse a regular old token (can fail) + parsePlainTok :: Bool -> P ([T.Token], Bool) -- return list is only ever 0-2 elements + parsePlainTok inPrag = do + (bInit, lInit) <- getInput + L sp tok <- Lexer.lexer False return + (bEnd, _) <- getInput + case sp of + UnhelpfulSpan _ -> pure ([], False) -- pretend the token never existed + RealSrcSpan rsp -> do + let typ = if inPrag then TkPragma else classify tok + RealSrcLoc lStart = srcSpanStart sp -- safe since @sp@ is real + (spaceBStr, bStart) = spanPosition lInit lStart bInit + inPragDef = inPragma inPrag tok + + (bEnd', inPrag') <- case tok of + + -- Update internal line + file position if this is a LINE pragma + ITline_prag _ -> tryOrElse (bEnd, inPragDef) $ do + L _ (ITinteger (IL { il_value = line })) <- wrappedLexer + L _ (ITstring _ file) <- wrappedLexer + L spF ITclose_prag <- wrappedLexer + + let newLoc = mkRealSrcLoc file (fromIntegral line - 1) (srcSpanEndCol spF) + (bEnd'', _) <- getInput + setInput (bEnd'', newLoc) + + pure (bEnd'', False) + + -- Update internal column position if this is a COLUMN pragma + ITcolumn_prag _ -> tryOrElse (bEnd, inPragDef) $ do + L _ (ITinteger (IL { il_value = col })) <- wrappedLexer + L spF ITclose_prag <- wrappedLexer + + let newLoc = mkRealSrcLoc (srcSpanFile spF) (srcSpanEndLine spF) (fromIntegral col) + (bEnd'', _) <- getInput + setInput (bEnd'', newLoc) + + pure (bEnd'', False) + + -- See 'needPragHack' + ITclose_prag{} + | needPragHack' + , '\n' `BSC.elem` spaceBStr + -> getInput >>= \(b,p) -> setInput (b,advanceSrcLoc p '\n') >> pure (bEnd, False) + + _ -> pure (bEnd, inPragDef) + + let tokBStr = splitStringBuffer bStart bEnd' + plainTok = T.Token { tkType = typ + , tkValue = tokBStr + , tkSpan = rsp } + spaceTok = T.Token { tkType = TkSpace + , tkValue = spaceBStr + , tkSpan = mkRealSrcSpan lInit lStart } + + pure (plainTok : [ spaceTok | not (BS.null spaceBStr) ], inPrag') + + -- | Parse whatever remains of the line as an unknown token (can't fail) + unknownLine :: P ([T.Token], Bool) + unknownLine = do + (b, l) <- getInput + let (unkBStr, l', b') = spanLine l b + unkTok = T.Token { tkType = TkUnknown + , tkValue = unkBStr + , tkSpan = mkRealSrcSpan l l' } + setInput (b', l') + pure ([unkTok], False) + + +-- | This is really, really, /really/ gross. Problem: consider a Haskell +-- file that looks like: -- --- * CPP lines are removed and reinserted as line-comments --- * top-level file pragmas are parsed as block comments (see the --- 'ITblockComment' case of 'classify' for more details) +-- @ +-- {-# LANGUAGE CPP #-} +-- module SomeMod where -- -processCPP :: DynFlags -- ^ GHC's flags - -> FilePath -- ^ source file name (for position information) - -> String -- ^ source file contents - -> [(Located L.Token, String)] -processCPP dflags fpath s = addSrc . go start . splitCPP $ s - where - start = mkRealSrcLoc (mkFastString fpath) 1 1 - addSrc = addSourceToTokens start (stringToStringBuffer s) - - -- Transform a list of Haskell/CPP lines into a list of tokens - go :: RealSrcLoc -> [Either String String] -> [Located L.Token] - go _ [] = [] - go pos ls = - let (hLinesRight, ls') = span isRight ls - (cppLinesLeft, rest) = span isLeft ls' - - hSrc = concat [ hLine | Right hLine <- hLinesRight ] - cppSrc = concat [ cppLine | Left cppLine <- cppLinesLeft ] - - in case L.lexTokenStream (stringToStringBuffer hSrc) pos dflags of - - -- Stuff that fails to lex gets turned into comments - L.PFailed _ _ss _msg -> - let (src_pos, failed) = mkToken ITunknown pos hSrc - (new_pos, cpp) = mkToken ITlineComment src_pos cppSrc - in failed : cpp : go new_pos rest - - -- Successfully lexed - L.POk ss toks -> - let (new_pos, cpp) = mkToken ITlineComment (L.loc ss) cppSrc - in toks ++ [cpp] ++ go new_pos rest - - -- Manually make a token from a 'String', advancing the cursor position - mkToken tok start' str = - let end = foldl' advanceSrcLoc start' str - in (end, L (RealSrcSpan $ mkRealSrcSpan start' end) (tok str)) - - --- | Split apart the initial file into Haskell source lines ('Left' entries) and --- CPP lines ('Right' entries). +-- #define SIX 6 +-- +-- {-# INLINE foo +-- #-} +-- foo = 1 +-- @ -- --- All characters in the input are present in the output: +-- Clang's CPP replaces the @#define SIX 6@ line with an empty line (as it +-- should), but get confused about @#-}@. I'm guessing it /starts/ by +-- parsing that as a pre-processor directive and, when it fails to, it just +-- leaves the line alone. HOWEVER, it still adds an extra newline. =.= -- --- prop> concat . map (either id id) . splitCPP = id -splitCPP :: String -> [Either String String] -splitCPP "" = [] -splitCPP s | isCPPline s = Left l : splitCPP rest - | otherwise = Right l : splitCPP rest +-- This function makes sure that the Hyperlinker backend also adds that +-- extra newline (or else our spans won't line up with GHC's anymore). +needPragHack :: CompilerInfo -> DynFlags -> Bool +needPragHack comp dflags = isCcClang && E.member Cpp (extensionFlags dflags) where - ~(l, rest) = spanToNewline 0 s + isCcClang = case comp of + GCC -> False + Clang -> True + AppleClang -> True + AppleClang51 -> True + UnknownCC -> False +-- | Get the input +getInput :: P (StringBuffer, RealSrcLoc) +getInput = P $ \p @ PState { buffer = buf, loc = srcLoc } -> POk p (buf, srcLoc) --- | Heuristic to decide if a line is going to be a CPP line. This should be a --- cheap operation since it is going to be run on every line being processed. --- --- Right now it just checks if the first non-whitespace character in the first --- five characters of the line is a '#': --- --- >>> isCPPline "#define FOO 1" --- True --- --- >>> isCPPline "\t\t #ifdef GHC" --- True --- --- >>> isCPPline " #endif" --- False --- -isCPPline :: String -> Bool -isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5 +-- | Set the input +setInput :: (StringBuffer, RealSrcLoc) -> P () +setInput (buf, srcLoc) = P $ \p -> POk (p { buffer = buf, loc = srcLoc }) () --- | Split a "line" off the front of a string, hopefully without cutting tokens --- in half. I say "hopefully" because knowing what a token is requires lexing, --- yet lexing depends on this function. --- --- All characters in the input are present in the output: --- --- prop> curry (++) . spanToNewLine 0 = id -spanToNewline :: Int -- ^ open '{-' - -> String -- ^ input - -> (String, String) - --- Base case and space characters -spanToNewline _ "" = ("", "") -spanToNewline n ('\n':str) | n <= 0 = ("\n", str) -spanToNewline n ('\n':str) | n <= 0 = ("\n", str) -spanToNewline n ('\\':'\n':str) = - let (str', rest) = spanToNewline n str - in ('\\':'\n':str', rest) - --- Block comments -spanToNewline n ('{':'-':str) = - let (str', rest) = spanToNewline (n+1) str - in ('{':'-':str', rest) -spanToNewline n ('-':'}':str) = - let (str', rest) = spanToNewline (n-1) str - in ('-':'}':str', rest) - --- When not in a block comment, try to lex a Haskell token -spanToNewline 0 str@(c:_) | ((lexed, str') : _) <- R.lex str, not (isSpace c) = - if all (== '-') lexed && length lexed >= 2 - -- A Haskell line comment - then case span (/= '\n') str' of - (str'', '\n':rest) -> (lexed ++ str'' ++ "\n", rest) - (_, _) -> (str, "") - - -- An actual Haskell token - else let (str'', rest) = spanToNewline 0 str' - in (lexed ++ str'', rest) - --- In all other cases, advance one character at a time -spanToNewline n (c:str) = - let (str', rest) = spanToNewline n str - in (c:str', rest) - - --- | Turn a list of GHC's 'L.Token' (and their source 'String') into a list of --- Haddock's 'T.Token'. -ghcToks :: [(Located L.Token, String)] -> [T.Token] -ghcToks = reverse . (\(_,ts,_) -> ts) . foldl' go (start, [], False) - where - start = mkRealSrcLoc (mkFastString "lexing") 1 1 - - go :: (RealSrcLoc, [T.Token], Bool) - -- ^ current position, tokens accumulated, currently in pragma (or not) - - -> (Located L.Token, String) - -- ^ next token, its content - - -> (RealSrcLoc, [T.Token], Bool) - -- ^ new position, new tokens accumulated, currently in pragma (or not) - - go (pos, toks, in_prag) (L l tok, raw) = - ( next_pos - , classifiedTok ++ maybeToList white ++ toks - , inPragma in_prag tok - ) - where - (next_pos, white) = mkWhitespace pos l - - classifiedTok = [ Token (classify' tok) raw rss - | RealSrcSpan rss <- [l] - , not (null raw) - ] - - classify' | in_prag = const TkPragma - | otherwise = classify - - --- | Find the correct amount of whitespace between tokens. -mkWhitespace :: RealSrcLoc -> SrcSpan -> (RealSrcLoc, Maybe T.Token) -mkWhitespace prev spn = - case spn of - UnhelpfulSpan _ -> (prev,Nothing) - RealSrcSpan s | null wsstring -> (end, Nothing) - | otherwise -> (end, Just (Token TkSpace wsstring wsspan)) - where - start = realSrcSpanStart s - end = realSrcSpanEnd s - wsspan = mkRealSrcSpan prev start - nls = srcLocLine start - srcLocLine prev - spaces = if nls == 0 then srcLocCol start - srcLocCol prev - else srcLocCol start - 1 - wsstring = replicate nls '\n' ++ replicate spaces ' ' +-- | Orphan instance that adds backtracking to 'P' +instance Alternative P where + empty = P $ \_ -> PFailed (const emptyMessages) noSrcSpan "Alterative.empty" + P x <|> P y = P $ \s -> case x s of { p@POk{} -> p + ; _ -> y s } +-- | Try a parser. If it fails, backtrack and return the pure value. +tryOrElse :: a -> P a -> P a +tryOrElse x p = p <|> pure x -- | Classify given tokens as appropriate Haskell token type. -classify :: L.Token -> TokenType +classify :: Lexer.Token -> TokenType classify tok = case tok of ITas -> TkKeyword @@ -382,12 +381,7 @@ classify tok = ITunknown {} -> TkUnknown ITeof -> TkUnknown - -- Line comments are only supposed to start with '--'. Starting with '#' - -- means that this was probably a CPP. - ITlineComment s - | isCPPline s -> TkCpp - | otherwise -> TkComment - + ITlineComment {} -> TkComment ITdocCommentNext {} -> TkComment ITdocCommentPrev {} -> TkComment ITdocCommentNamed {} -> TkComment @@ -404,9 +398,9 @@ classify tok = | otherwise -> TkComment -- | Classify given tokens as beginning pragmas (or not). -inPragma :: Bool -- ^ currently in pragma - -> L.Token -- ^ current token - -> Bool -- ^ new information about whether we are in a pragma +inPragma :: Bool -- ^ currently in pragma + -> Lexer.Token -- ^ current token + -> Bool -- ^ new information about whether we are in a pragma inPragma _ ITclose_prag = False inPragma True _ = True inPragma False tok = diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs index d7ea70a6..a4dcb77b 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs @@ -1,4 +1,8 @@ {-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE BangPatterns #-} module Haddock.Backends.Hyperlinker.Renderer (render) where @@ -6,15 +10,19 @@ module Haddock.Backends.Hyperlinker.Renderer (render) where import Haddock.Backends.Hyperlinker.Types import Haddock.Backends.Hyperlinker.Utils -import qualified GHC -import qualified Name as GHC -import qualified Unique as GHC +import qualified Data.ByteString as BS + +import HieTypes +import Module ( ModuleName, moduleNameString ) +import Name ( getOccString, isInternalName, Name, nameModule, nameUnique ) +import SrcLoc +import Unique ( getKey ) +import Encoding ( utf8DecodeByteString ) import System.FilePath.Posix (()) -import Data.List -import Data.Maybe import qualified Data.Map as Map +import qualified Data.Set as Set import Text.XHtml (Html, HtmlAttr, (!)) import qualified Text.XHtml as Html @@ -22,22 +30,24 @@ import qualified Text.XHtml as Html type StyleClass = String +-- | Produce the HTML corresponding to a hyperlinked Haskell source +render + :: Maybe FilePath -- ^ path to the CSS file + -> Maybe FilePath -- ^ path to the JS file + -> SrcMaps -- ^ Paths to sources + -> HieAST PrintedType -- ^ ASTs from @.hie@ files + -> [Token] -- ^ tokens to render + -> Html +render mcss mjs srcs ast tokens = header mcss mjs <> body srcs ast tokens -render :: Maybe FilePath -> Maybe FilePath -> SrcMap -> [RichToken] - -> Html -render mcss mjs srcs tokens = header mcss mjs <> body srcs tokens - -body :: SrcMap -> [RichToken] -> Html -body srcs tokens = Html.body . Html.pre $ hypsrc +body :: SrcMaps -> HieAST PrintedType -> [Token] -> Html +body srcs ast tokens = Html.body . Html.pre $ hypsrc where - hypsrc = mconcat . map (richToken srcs) $ tokens - + hypsrc = renderWithAst srcs ast tokens header :: Maybe FilePath -> Maybe FilePath -> Html -header mcss mjs - | isNothing mcss && isNothing mjs = Html.noHtml -header mcss mjs = - Html.header $ css mcss <> js mjs +header Nothing Nothing = Html.noHtml +header mcss mjs = Html.header $ css mcss <> js mjs where css Nothing = Html.noHtml css (Just cssFile) = Html.thelink Html.noHtml ! @@ -51,25 +61,132 @@ header mcss mjs = , Html.src scriptFile ] + +splitTokens :: HieAST PrintedType -> [Token] -> ([Token],[Token],[Token]) +splitTokens ast toks = (before,during,after) + where + (before,rest) = span leftOf toks + (during,after) = span inAst rest + leftOf t = realSrcSpanEnd (tkSpan t) <= realSrcSpanStart nodeSp + inAst t = nodeSp `containsSpan` tkSpan t + nodeSp = nodeSpan ast + +-- | Turn a list of tokens into hyperlinked sources, threading in relevant link +-- information from the 'HieAST'. +renderWithAst :: SrcMaps -> HieAST PrintedType -> [Token] -> Html +renderWithAst srcs Node{..} toks = anchored $ case toks of + + [tok] | nodeSpan == tkSpan tok -> richToken srcs nodeInfo tok + + -- NB: the GHC lexer lexes backquoted identifiers and parenthesized operators + -- as multiple tokens. + -- + -- * @a `elem` b@ turns into @[a, `, elem, `, b]@ (excluding space tokens) + -- * @(+) 1 2@ turns into @[(, +, ), 1, 2]@ (excluding space tokens) + -- + -- However, the HIE ast considers @`elem`@ and @(+)@ to be single nodes. In + -- order to make sure these get hyperlinked properly, we intercept these + -- special sequences of tokens and merge them into just one identifier or + -- operator token. + [BacktickTok s1, tok @ Token{ tkType = TkIdentifier }, BacktickTok s2] + | realSrcSpanStart s1 == realSrcSpanStart nodeSpan + , realSrcSpanEnd s2 == realSrcSpanEnd nodeSpan + -> richToken srcs nodeInfo + (Token{ tkValue = "`" <> tkValue tok <> "`" + , tkType = TkOperator + , tkSpan = nodeSpan }) + [OpenParenTok s1, tok @ Token{ tkType = TkOperator }, CloseParenTok s2] + | realSrcSpanStart s1 == realSrcSpanStart nodeSpan + , realSrcSpanEnd s2 == realSrcSpanEnd nodeSpan + -> richToken srcs nodeInfo + (Token{ tkValue = "(" <> tkValue tok <> ")" + , tkType = TkOperator + , tkSpan = nodeSpan }) + + _ -> go nodeChildren toks + where + go _ [] = mempty + go [] xs = foldMap renderToken xs + go (cur:rest) xs = + foldMap renderToken before <> renderWithAst srcs cur during <> go rest after + where + (before,during,after) = splitTokens cur xs + anchored c = Map.foldrWithKey anchorOne c (nodeIdentifiers nodeInfo) + anchorOne n dets c = externalAnchor n d $ internalAnchor n d c + where d = identInfo dets + +renderToken :: Token -> Html +renderToken Token{..} + | BS.null tkValue = mempty + | tkType == TkSpace = renderSpace (srcSpanStartLine tkSpan) tkValue' + | otherwise = tokenSpan ! [ multiclass style ] + where + tkValue' = filterCRLF $ utf8DecodeByteString tkValue + style = tokenStyle tkType + tokenSpan = Html.thespan (Html.toHtml tkValue') + + -- | Given information about the source position of definitions, render a token -richToken :: SrcMap -> RichToken -> Html -richToken srcs (RichToken Token{..} details) - | tkType == TkSpace = renderSpace (GHC.srcSpanStartLine tkSpan) tkValue - | otherwise = linked content +richToken :: SrcMaps -> NodeInfo PrintedType -> Token -> Html +richToken srcs details Token{..} + | tkType == TkSpace = renderSpace (srcSpanStartLine tkSpan) tkValue' + | otherwise = annotate details $ linked content where + tkValue' = filterCRLF $ utf8DecodeByteString tkValue content = tokenSpan ! [ multiclass style ] - tokenSpan = Html.thespan (Html.toHtml tkValue) - style = tokenStyle tkType ++ maybe [] richTokenStyle details + tokenSpan = Html.thespan (Html.toHtml tkValue') + style = tokenStyle tkType ++ concatMap (richTokenStyle (null (nodeType details))) contexts + + contexts = concatMap (Set.elems . identInfo) . Map.elems . nodeIdentifiers $ details + + -- pick an arbitary identifier to hyperlink with + identDet = Map.lookupMin . nodeIdentifiers $ details -- If we have name information, we can make links - linked = case details of - Just d -> externalAnchor d . internalAnchor d . hyperlink srcs d + linked = case identDet of + Just (n,_) -> hyperlink srcs n Nothing -> id -richTokenStyle :: TokenDetails -> [StyleClass] -richTokenStyle (RtkVar _) = ["hs-var"] -richTokenStyle (RtkType _) = ["hs-type"] -richTokenStyle _ = [] +-- | Remove CRLFs from source +filterCRLF :: String -> String +filterCRLF ('\r':'\n':cs) = '\n' : filterCRLF cs +filterCRLF (c:cs) = c : filterCRLF cs +filterCRLF [] = [] + +annotate :: NodeInfo PrintedType -> Html -> Html +annotate ni content = + Html.thespan (annot <> content) ! [ Html.theclass "annot" ] + where + annot + | not (null annotation) = + Html.thespan (Html.toHtml annotation) ! [ Html.theclass "annottext" ] + | otherwise = mempty + annotation = typ ++ identTyps + typ = unlines (nodeType ni) + typedIdents = [ (n,t) | (n, identType -> Just t) <- Map.toList $ nodeIdentifiers ni ] + identTyps + | length typedIdents > 1 || null (nodeType ni) + = concatMap (\(n,t) -> printName n ++ " :: " ++ t ++ "\n") typedIdents + | otherwise = "" + + printName :: Either ModuleName Name -> String + printName = either moduleNameString getOccString + +richTokenStyle + :: Bool -- ^ are we lacking a type annotation? + -> ContextInfo -- ^ in what context did this token show up? + -> [StyleClass] +richTokenStyle True Use = ["hs-type"] +richTokenStyle False Use = ["hs-var"] +richTokenStyle _ RecField{} = ["hs-var"] +richTokenStyle _ PatternBind{} = ["hs-var"] +richTokenStyle _ MatchBind{} = ["hs-var"] +richTokenStyle _ TyVarBind{} = ["hs-type"] +richTokenStyle _ ValBind{} = ["hs-var"] +richTokenStyle _ TyDecl = ["hs-type"] +richTokenStyle _ ClassTyDecl{} = ["hs-type"] +richTokenStyle _ Decl{} = ["hs-var"] +richTokenStyle _ IEThing{} = [] -- could be either a value or type tokenStyle :: TokenType -> [StyleClass] tokenStyle TkIdentifier = ["hs-identifier"] @@ -87,61 +204,70 @@ tokenStyle TkPragma = ["hs-pragma"] tokenStyle TkUnknown = [] multiclass :: [StyleClass] -> HtmlAttr -multiclass = Html.theclass . intercalate " " +multiclass = Html.theclass . unwords + +externalAnchor :: Identifier -> Set.Set ContextInfo -> Html -> Html +externalAnchor (Right name) contexts content + | not (isInternalName name) + , any isBinding contexts + = Html.thespan content ! [ Html.identifier $ externalAnchorIdent name ] +externalAnchor _ _ content = content -externalAnchor :: TokenDetails -> Html -> Html -externalAnchor (RtkDecl name) content = - Html.anchor content ! [ Html.name $ externalAnchorIdent name ] -externalAnchor _ content = content +isBinding :: ContextInfo -> Bool +isBinding (ValBind RegularBind _ _) = True +isBinding PatternBind{} = True +isBinding Decl{} = True +isBinding (RecField RecFieldDecl _) = True +isBinding TyVarBind{} = True +isBinding ClassTyDecl{} = True +isBinding _ = False -internalAnchor :: TokenDetails -> Html -> Html -internalAnchor (RtkBind name) content = - Html.anchor content ! [ Html.name $ internalAnchorIdent name ] -internalAnchor _ content = content +internalAnchor :: Identifier -> Set.Set ContextInfo -> Html -> Html +internalAnchor (Right name) contexts content + | isInternalName name + , any isBinding contexts + = Html.thespan content ! [ Html.identifier $ internalAnchorIdent name ] +internalAnchor _ _ content = content -externalAnchorIdent :: GHC.Name -> String +externalAnchorIdent :: Name -> String externalAnchorIdent = hypSrcNameUrl -internalAnchorIdent :: GHC.Name -> String -internalAnchorIdent = ("local-" ++) . show . GHC.getKey . GHC.nameUnique - -hyperlink :: SrcMap -> TokenDetails -> Html -> Html -hyperlink srcs details = case rtkName details of - Left name -> - if GHC.isInternalName name - then internalHyperlink name - else externalNameHyperlink srcs name - Right name -> externalModHyperlink srcs name - -internalHyperlink :: GHC.Name -> Html -> Html -internalHyperlink name content = - Html.anchor content ! [ Html.href $ "#" ++ internalAnchorIdent name ] - -externalNameHyperlink :: SrcMap -> GHC.Name -> Html -> Html -externalNameHyperlink srcs name content = case Map.lookup mdl srcs of - Just SrcLocal -> Html.anchor content ! - [ Html.href $ hypSrcModuleNameUrl mdl name ] - Just (SrcExternal path) -> Html.anchor content ! - [ Html.href $ path hypSrcModuleNameUrl mdl name ] - Nothing -> content +internalAnchorIdent :: Name -> String +internalAnchorIdent = ("local-" ++) . show . getKey . nameUnique + +-- | Generate the HTML hyperlink for an identifier +hyperlink :: SrcMaps -> Identifier -> Html -> Html +hyperlink (srcs, srcs') ident = case ident of + Right name | isInternalName name -> internalHyperlink name + | otherwise -> externalNameHyperlink name + Left name -> externalModHyperlink name + where - mdl = GHC.nameModule name + internalHyperlink name content = + Html.anchor content ! [ Html.href $ "#" ++ internalAnchorIdent name ] + + externalNameHyperlink name content = case Map.lookup mdl srcs of + Just SrcLocal -> Html.anchor content ! + [ Html.href $ hypSrcModuleNameUrl mdl name ] + Just (SrcExternal path) -> Html.anchor content ! + [ Html.href $ spliceURL Nothing (Just mdl) (Just name) Nothing (".." path) ] + Nothing -> content + where + mdl = nameModule name -externalModHyperlink :: SrcMap -> GHC.ModuleName -> Html -> Html -externalModHyperlink srcs name content = - let srcs' = Map.mapKeys GHC.moduleName srcs in - case Map.lookup name srcs' of - Just SrcLocal -> Html.anchor content ! - [ Html.href $ hypSrcModuleUrl' name ] - Just (SrcExternal path) -> Html.anchor content ! - [ Html.href $ path hypSrcModuleUrl' name ] - Nothing -> content + externalModHyperlink moduleName content = + case Map.lookup moduleName srcs' of + Just SrcLocal -> Html.anchor content ! + [ Html.href $ hypSrcModuleUrl' moduleName ] + Just (SrcExternal path) -> Html.anchor content ! + [ Html.href $ spliceURL' Nothing (Just moduleName) Nothing Nothing (".." path) ] + Nothing -> content renderSpace :: Int -> String -> Html -renderSpace _ [] = Html.noHtml -renderSpace line ('\n':rest) = mconcat - [ Html.thespan . Html.toHtml $ "\n" +renderSpace !_ "" = Html.noHtml +renderSpace !line ('\n':rest) = mconcat + [ Html.thespan (Html.toHtml '\n') , lineAnchor (line + 1) , renderSpace (line + 1) rest ] @@ -151,4 +277,4 @@ renderSpace line space = lineAnchor :: Int -> Html -lineAnchor line = Html.anchor Html.noHtml ! [ Html.name $ hypSrcLineUrl line ] +lineAnchor line = Html.thespan Html.noHtml ! [ Html.identifier $ hypSrcLineUrl line ] diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs index e377471e..50916937 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs @@ -1,17 +1,24 @@ +{-# LANGUAGE PatternSynonyms, OverloadedStrings #-} module Haddock.Backends.Hyperlinker.Types where - import qualified GHC +import Data.ByteString ( ByteString ) + import Data.Map (Map) data Token = Token { tkType :: TokenType - , tkValue :: String + , tkValue :: ByteString -- ^ UTF-8 encoded , tkSpan :: {-# UNPACK #-} !Span } deriving (Show) +pattern BacktickTok, OpenParenTok, CloseParenTok :: Span -> Token +pattern BacktickTok sp = Token TkSpecial "`" sp +pattern OpenParenTok sp = Token TkSpecial "(" sp +pattern CloseParenTok sp = Token TkSpecial ")" sp + type Position = GHC.RealSrcLoc type Span = GHC.RealSrcSpan @@ -31,29 +38,6 @@ data TokenType | TkUnknown deriving (Show, Eq) - -data RichToken = RichToken - { rtkToken :: Token - , rtkDetails :: Maybe TokenDetails - } - -data TokenDetails - = RtkVar GHC.Name - | RtkType GHC.Name - | RtkBind GHC.Name - | RtkDecl GHC.Name - | RtkModule GHC.ModuleName - deriving (Eq) - - -rtkName :: TokenDetails -> Either GHC.Name GHC.ModuleName -rtkName (RtkVar name) = Left name -rtkName (RtkType name) = Left name -rtkName (RtkBind name) = Left name -rtkName (RtkDecl name) = Left name -rtkName (RtkModule name) = Right name - - -- | Path for making cross-package hyperlinks in generated sources. -- -- Used in 'SrcMap' to determine whether module originates in current package @@ -63,5 +47,5 @@ data SrcPath | SrcLocal -- | Mapping from modules to cross-package source paths. -type SrcMap = Map GHC.Module SrcPath +type SrcMaps = (Map GHC.Module SrcPath, Map GHC.ModuleName SrcPath) diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs index 9de4a03d..4e8b88d2 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} module Haddock.Backends.Hyperlinker.Utils ( hypSrcDir, hypSrcModuleFile, hypSrcModuleFile' , hypSrcModuleUrl, hypSrcModuleUrl' @@ -6,21 +7,35 @@ module Haddock.Backends.Hyperlinker.Utils , hypSrcModuleNameUrl, hypSrcModuleLineUrl , hypSrcModuleUrlFormat , hypSrcModuleNameUrlFormat, hypSrcModuleLineUrlFormat - ) where + , spliceURL, spliceURL' + -- * HIE file processing + , PrintedType + , recoverFullIfaceTypes + ) where +import Haddock.Utils import Haddock.Backends.Xhtml.Utils import GHC -import FastString -import System.FilePath.Posix (()) +import HieTypes ( HieAST(..), HieType(..), HieArgs(..), TypeIndex, HieTypeFlat ) +import IfaceType +import Name ( getOccFS, getOccString ) +import Outputable ( showSDoc ) +import Var ( VarBndr(..) ) + +import System.FilePath.Posix ((), (<.>)) +import qualified Data.Array as A + +{-# INLINE hypSrcDir #-} hypSrcDir :: FilePath hypSrcDir = "src" +{-# INLINE hypSrcModuleFile #-} hypSrcModuleFile :: Module -> FilePath -hypSrcModuleFile = hypSrcModuleFile' . moduleName +hypSrcModuleFile m = moduleNameString (moduleName m) <.> "html" hypSrcModuleFile' :: ModuleName -> FilePath hypSrcModuleFile' mdl = spliceURL' @@ -32,20 +47,19 @@ hypSrcModuleUrl = hypSrcModuleFile hypSrcModuleUrl' :: ModuleName -> String hypSrcModuleUrl' = hypSrcModuleFile' +{-# INLINE hypSrcNameUrl #-} hypSrcNameUrl :: Name -> String -hypSrcNameUrl name = spliceURL - Nothing Nothing (Just name) Nothing nameFormat +hypSrcNameUrl = escapeStr . getOccString +{-# INLINE hypSrcLineUrl #-} hypSrcLineUrl :: Int -> String -hypSrcLineUrl line = spliceURL - Nothing Nothing Nothing (Just spn) lineFormat - where - loc = mkSrcLoc nilFS line 1 - spn = mkSrcSpan loc loc +hypSrcLineUrl line = "line-" ++ show line +{-# INLINE hypSrcModuleNameUrl #-} hypSrcModuleNameUrl :: Module -> Name -> String hypSrcModuleNameUrl mdl name = hypSrcModuleUrl mdl ++ "#" ++ hypSrcNameUrl name +{-# INLINE hypSrcModuleLineUrl #-} hypSrcModuleLineUrl :: Module -> Int -> String hypSrcModuleLineUrl mdl line = hypSrcModuleUrl mdl ++ "#" ++ hypSrcLineUrl line @@ -66,3 +80,65 @@ nameFormat = "%{NAME}" lineFormat :: String lineFormat = "line-%{LINE}" + + +-- * HIE file procesddsing + +-- This belongs in GHC's HieUtils... + +-- | Pretty-printed type, ready to be turned into HTML by @xhtml@ +type PrintedType = String + +-- | Expand the flattened HIE AST into one where the types printed out and +-- ready for end-users to look at. +-- +-- Using just primitives found in GHC's HIE utilities, we could write this as +-- follows: +-- +-- > 'recoverFullIfaceTypes' dflags hieTypes hieAst +-- > = 'fmap' (\ti -> 'showSDoc' df . +-- > 'pprIfaceType' $ +-- > 'recoverFullType' ti hieTypes) +-- > hieAst +-- +-- However, this is very inefficient (both in time and space) because the +-- mutliple calls to 'recoverFullType' don't share intermediate results. This +-- function fixes that. +recoverFullIfaceTypes + :: DynFlags + -> A.Array TypeIndex HieTypeFlat -- ^ flat types + -> HieAST TypeIndex -- ^ flattened AST + -> HieAST PrintedType -- ^ full AST +recoverFullIfaceTypes df flattened ast = fmap (printed A.!) ast + where + + -- Splitting this out into its own array is also important: we don't want + -- to pretty print the same type many times + printed :: A.Array TypeIndex PrintedType + printed = fmap (showSDoc df . pprIfaceType) unflattened + + -- The recursion in 'unflattened' is crucial - it's what gives us sharing + -- between the IfaceType's produced + unflattened :: A.Array TypeIndex IfaceType + unflattened = fmap (\flatTy -> go (fmap (unflattened A.!) flatTy)) flattened + + -- Unfold an 'HieType' whose subterms have already been unfolded + go :: HieType IfaceType -> IfaceType + go (HTyVarTy n) = IfaceTyVar (getOccFS n) + go (HAppTy a b) = IfaceAppTy a (hieToIfaceArgs b) + go (HLitTy l) = IfaceLitTy l + go (HForAllTy ((n,k),af) t) = let b = (getOccFS n, k) + in IfaceForAllTy (Bndr (IfaceTvBndr b) af) t + go (HFunTy a b) = IfaceFunTy a b + go (HQualTy con b) = IfaceDFunTy con b + go (HCastTy a) = a + go HCoercionTy = IfaceTyVar "" + go (HTyConApp a xs) = IfaceTyConApp a (hieToIfaceArgs xs) + + -- This isn't fully faithful - we can't produce the 'Inferred' case + hieToIfaceArgs :: HieArgs IfaceType -> IfaceAppArgs + hieToIfaceArgs (HieArgs args) = go' args + where + go' [] = IA_Nil + go' ((True ,x):xs) = IA_Arg x Required $ go' xs + go' ((False,x):xs) = IA_Arg x Specified $ go' xs -- cgit v1.2.3