aboutsummaryrefslogtreecommitdiff
path: root/haddock-api/src/Haddock/Backends/Hyperlinker
diff options
context:
space:
mode:
Diffstat (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker')
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs219
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs387
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs276
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs36
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs98
5 files changed, 489 insertions, 527 deletions
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
deleted file mode 100644
index 0ecf7109..00000000
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
+++ /dev/null
@@ -1,219 +0,0 @@
-{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE RankNTypes #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE TypeFamilies #-}
-{-# LANGUAGE TypeApplications #-}
-
-module Haddock.Backends.Hyperlinker.Ast (enrich) where
-
-
-import qualified Haddock.Syb as Syb
-import Haddock.Backends.Hyperlinker.Types
-
-import qualified GHC
-import qualified SrcLoc
-import qualified Outputable as GHC
-
-import Control.Applicative
-import Control.Monad (guard)
-import Data.Data
-import qualified Data.Map.Strict as Map
-import Data.Maybe
-
-import Prelude hiding (span)
-
-everythingInRenamedSource :: (Alternative f, Data x)
- => (forall a. Data a => a -> f r) -> x -> f r
-everythingInRenamedSource f = Syb.everythingButType @GHC.Name (<|>) f
-
--- | Add more detailed information to token stream using GHC API.
-enrich :: GHC.RenamedSource -> [Token] -> [RichToken]
-enrich src =
- map $ \token -> RichToken
- { rtkToken = token
- , rtkDetails = enrichToken token detailsMap
- }
- where
- detailsMap =
- mkDetailsMap (concatMap ($ src)
- [ variables
- , types
- , decls
- , binds
- , imports
- ])
-
-type LTokenDetails = [(GHC.SrcSpan, TokenDetails)]
-
--- | A map containing association between source locations and "details" of
--- this location.
---
-type DetailsMap = Map.Map Position (Span, TokenDetails)
-
-mkDetailsMap :: [(GHC.SrcSpan, TokenDetails)] -> DetailsMap
-mkDetailsMap xs =
- Map.fromListWith select_details [ (start, (span, token_details))
- | (ghc_span, token_details) <- xs
- , GHC.RealSrcSpan span <- [ghc_span]
- , let start = SrcLoc.realSrcSpanStart span
- ]
- where
- -- favour token details which appear earlier in the list
- select_details _new old = old
-
-lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails
-lookupBySpan span details = do
- let pos = SrcLoc.realSrcSpanStart span
- (_, (tok_span, tok_details)) <- Map.lookupLE pos details
- guard (tok_span `SrcLoc.containsSpan` span)
- return tok_details
-
-enrichToken :: Token -> DetailsMap -> Maybe TokenDetails
-enrichToken (Token typ _ spn) dm
- | typ `elem` [TkIdentifier, TkOperator] = lookupBySpan spn dm
-enrichToken _ _ = Nothing
-
--- | Obtain details map for variables ("normally" used identifiers).
-variables :: GHC.RenamedSource -> LTokenDetails
-variables =
- everythingInRenamedSource (var `Syb.combine` rec)
- where
- var term = case cast term of
- (Just ((GHC.L sspan (GHC.HsVar _ name)) :: GHC.LHsExpr GHC.GhcRn)) ->
- pure (sspan, RtkVar (GHC.unLoc name))
- (Just (GHC.L _ (GHC.RecordCon _ (GHC.L sspan name) _))) ->
- pure (sspan, RtkVar name)
- _ -> empty
- rec term = case cast term of
- Just (GHC.HsRecField (GHC.L sspan name) (_ :: GHC.LHsExpr GHC.GhcRn) _) ->
- pure (sspan, RtkVar name)
- _ -> empty
-
--- | Obtain details map for types.
-types :: GHC.RenamedSource -> LTokenDetails
-types = everythingInRenamedSource ty
- where
- ty :: forall a. Data a => a -> [(GHC.SrcSpan, TokenDetails)]
- ty term = case cast term of
- (Just ((GHC.L sspan (GHC.HsTyVar _ _ name)) :: GHC.LHsType GHC.GhcRn)) ->
- pure (sspan, RtkType (GHC.unLoc name))
- (Just ((GHC.L sspan (GHC.HsOpTy _ l name r)) :: GHC.LHsType GHC.GhcRn)) ->
- (sspan, RtkType (GHC.unLoc name)):(ty l ++ ty r)
- _ -> empty
-
--- | Obtain details map for identifier bindings.
---
--- That includes both identifiers bound by pattern matching or declared using
--- ordinary assignment (in top-level declarations, let-expressions and where
--- clauses).
-
-binds :: GHC.RenamedSource -> LTokenDetails
-binds = everythingInRenamedSource
- (fun `Syb.combine` pat `Syb.combine` tvar)
- where
- fun term = case cast term of
- (Just (GHC.FunBind _ (GHC.L sspan name) _ _ _ :: GHC.HsBind GHC.GhcRn)) ->
- pure (sspan, RtkBind name)
- (Just (GHC.PatSynBind _ (GHC.PSB _ (GHC.L sspan name) args _ _))) ->
- pure (sspan, RtkBind name) ++ everythingInRenamedSource patsyn_binds args
- _ -> empty
- patsyn_binds term = case cast term of
- (Just (GHC.L sspan (name :: GHC.Name))) -> pure (sspan, RtkVar name)
- _ -> empty
- pat term = case cast term of
- (Just ((GHC.L sspan (GHC.VarPat _ name)) :: GHC.LPat GHC.GhcRn)) ->
- pure (sspan, RtkBind (GHC.unLoc name))
- (Just (GHC.L _ (GHC.ConPatIn (GHC.L sspan name) recs))) ->
- [(sspan, RtkVar name)] ++ everythingInRenamedSource rec recs
- (Just (GHC.L _ (GHC.AsPat _ (GHC.L sspan name) _))) ->
- pure (sspan, RtkBind name)
- _ -> empty
- rec term = case cast term of
- (Just (GHC.HsRecField (GHC.L sspan name) (_ :: GHC.LPat GHC.GhcRn) _)) ->
- pure (sspan, RtkVar name)
- _ -> empty
- tvar term = case cast term of
- (Just ((GHC.L sspan (GHC.UserTyVar _ name)) :: GHC.LHsTyVarBndr GHC.GhcRn)) ->
- pure (sspan, RtkBind (GHC.unLoc name))
- (Just (GHC.L _ (GHC.KindedTyVar _ (GHC.L sspan name) _))) ->
- pure (sspan, RtkBind name)
- _ -> empty
-
--- | Obtain details map for top-level declarations.
-decls :: GHC.RenamedSource -> LTokenDetails
-decls (group, _, _, _) = concatMap ($ group)
- [ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds
- , everythingInRenamedSource fun . GHC.hs_valds
- , everythingInRenamedSource fix . GHC.hs_fixds
- , everythingInRenamedSource (con `Syb.combine` ins)
- ]
- where
- typ (GHC.L _ t) = case t of
- GHC.DataDecl { tcdLName = name } -> pure . decl $ name
- GHC.SynDecl _ name _ _ _ -> pure . decl $ name
- GHC.FamDecl _ fam -> pure . decl $ GHC.fdLName fam
- GHC.ClassDecl{..} ->
- [decl tcdLName]
- ++ concatMap sig tcdSigs
- ++ concatMap tyfam tcdATs
- GHC.XTyClDecl {} -> GHC.panic "haddock:decls"
- fun term = case cast term of
- (Just (GHC.FunBind _ (GHC.L sspan name) _ _ _ :: GHC.HsBind GHC.GhcRn))
- | GHC.isExternalName name -> pure (sspan, RtkDecl name)
- (Just (GHC.PatSynBind _ (GHC.PSB _ (GHC.L sspan name) _ _ _)))
- | GHC.isExternalName name -> pure (sspan, RtkDecl name)
- _ -> empty
- con term = case cast term of
- (Just (cdcl :: GHC.ConDecl GHC.GhcRn)) ->
- map decl (GHC.getConNames cdcl)
- ++ everythingInRenamedSource fld cdcl
- Nothing -> empty
- ins term = case cast term of
- (Just ((GHC.DataFamInstD _ (GHC.DataFamInstDecl eqn))
- :: GHC.InstDecl GHC.GhcRn))
- -> pure . tyref $ GHC.feqn_tycon $ GHC.hsib_body eqn
- (Just (GHC.TyFamInstD _ (GHC.TyFamInstDecl eqn))) ->
- pure . tyref $ GHC.feqn_tycon $ GHC.hsib_body eqn
- _ -> empty
- fld term = case cast term of
- Just (field :: GHC.ConDeclField GHC.GhcRn)
- -> map (decl . fmap GHC.extFieldOcc) $ GHC.cd_fld_names field
- Nothing -> empty
- fix term = case cast term of
- Just ((GHC.FixitySig _ names _) :: GHC.FixitySig GHC.GhcRn)
- -> map (\(GHC.L sspan x) -> (sspan, RtkVar x)) names
- Just ((GHC.XFixitySig {}) :: GHC.FixitySig GHC.GhcRn)
- -> GHC.panic "haddock:decls"
- Nothing -> empty
- tyfam (GHC.L _ (GHC.FamilyDecl{..})) = [decl fdLName]
- tyfam (GHC.L _ (GHC.XFamilyDecl {})) = GHC.panic "haddock:dels"
- sig (GHC.L _ (GHC.TypeSig _ names _)) = map decl names
- sig (GHC.L _ (GHC.PatSynSig _ names _)) = map decl names
- sig (GHC.L _ (GHC.ClassOpSig _ _ names _)) = map decl names
- sig _ = []
- decl (GHC.L sspan name) = (sspan, RtkDecl name)
- tyref (GHC.L sspan name) = (sspan, RtkType name)
-
--- | Obtain details map for import declarations.
---
--- This map also includes type and variable details for items in export and
--- import lists.
-imports :: GHC.RenamedSource -> LTokenDetails
-imports src@(_, imps, _, _) =
- everythingInRenamedSource ie src ++ mapMaybe (imp . GHC.unLoc) imps
- where
- ie term = case cast term of
- (Just ((GHC.IEVar _ v) :: GHC.IE GHC.GhcRn)) -> pure $ var $ GHC.ieLWrappedName v
- (Just (GHC.IEThingAbs _ t)) -> pure $ typ $ GHC.ieLWrappedName t
- (Just (GHC.IEThingAll _ t)) -> pure $ typ $ GHC.ieLWrappedName t
- (Just (GHC.IEThingWith _ t _ vs _fls)) ->
- [typ $ GHC.ieLWrappedName t] ++ map (var . GHC.ieLWrappedName) vs
- (Just (GHC.IEModuleContents _ m)) -> pure $ modu m
- _ -> empty
- typ (GHC.L sspan name) = (sspan, RtkType name)
- var (GHC.L sspan name) = (sspan, RtkVar name)
- modu (GHC.L sspan name) = (sspan, RtkModule name)
- imp idecl
- | not . GHC.ideclImplicit $ idecl = Just (modu (GHC.ideclName idecl))
- | otherwise = Nothing
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
index acb2c892..1d5576cc 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
@@ -1,213 +1,212 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
module Haddock.Backends.Hyperlinker.Parser (parse) where
-import Data.Either ( isRight, isLeft )
-import Data.List ( foldl', isPrefixOf, isSuffixOf )
-import Data.Maybe ( maybeToList )
-import Data.Char ( isSpace )
-import qualified Text.Read as R
+import Control.Applicative ( Alternative(..) )
+import Data.List ( isPrefixOf, isSuffixOf )
-import GHC ( DynFlags, addSourceToTokens )
-import SrcLoc
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Char8 as BSC
+
+import GHC.LanguageExtensions.Type
+
+import BasicTypes ( IntegralLit(..) )
+import DynFlags
+import qualified EnumSet as E
+import ErrUtils ( emptyMessages )
import FastString ( mkFastString )
-import StringBuffer ( stringToStringBuffer )
-import Lexer ( Token(..) )
-import qualified Lexer as L
+import Lexer ( P(..), ParseResult(..), PState(..), Token(..)
+ , mkPStatePure, lexer, mkParserFlags' )
+import Outputable ( showSDoc, panic )
+import SrcLoc
+import StringBuffer ( StringBuffer, atEnd )
import Haddock.Backends.Hyperlinker.Types as T
-
+import Haddock.GhcUtils
-- | Turn source code string into a stream of more descriptive tokens.
--
--- Result should retain original file layout (including comments, whitespace,
--- etc.), i.e. the following "law" should hold:
---
--- prop> concat . map tkValue . parse = id
---
--- (In reality, this only holds for input not containing '\r', '\t', '\f', '\v',
--- characters, since GHC transforms those into ' ' and '\n')
-parse :: DynFlags -> FilePath -> String -> [T.Token]
-parse dflags fp = ghcToks . processCPP dflags fp . filterCRLF
+-- Result should retain original file layout (including comments,
+-- whitespace, and CPP).
+parse
+ :: CompilerInfo -- ^ Underlying CC compiler (whatever expanded CPP)
+ -> DynFlags -- ^ Flags for this module
+ -> FilePath -- ^ Path to the source of this module
+ -> BS.ByteString -- ^ Raw UTF-8 encoded source of this module
+ -> [T.Token]
+parse comp dflags fpath bs = case unP (go False []) initState of
+ POk _ toks -> reverse toks
+ PFailed _ ss errMsg -> panic $ "Hyperlinker parse error at " ++ show ss ++
+ ": " ++ showSDoc dflags errMsg
where
- -- Remove CRLFs from source
- filterCRLF :: String -> String
- filterCRLF ('\r':'\n':cs) = '\n' : filterCRLF cs
- filterCRLF (c:cs) = c : filterCRLF cs
- filterCRLF [] = []
--- | Parse the source into tokens using the GHC lexer.
+ initState = mkPStatePure pflags buf start
+ buf = stringBufferFromByteString bs
+ start = mkRealSrcLoc (mkFastString fpath) 1 1
+ needPragHack' = needPragHack comp dflags
+ pflags = mkParserFlags' (warningFlags dflags)
+ (extensionFlags dflags)
+ (thisPackage dflags)
+ (safeImportsOn dflags)
+ False -- lex Haddocks as comment tokens
+ True -- produce comment tokens
+ False -- produce position pragmas tokens
+
+ go :: Bool -- ^ are we currently in a pragma?
+ -> [T.Token] -- ^ tokens accumulated so far (in reverse)
+ -> P [T.Token]
+ go inPrag toks = do
+ (b, _) <- getInput
+ if not (atEnd b)
+ then do
+ (newToks, inPrag') <- parseCppLine <|> parsePlainTok inPrag <|> unknownLine
+ go inPrag' (newToks ++ toks)
+ else
+ pure toks
+
+ -- | Like 'Lexer.lexer', but slower, with a better API, and filtering out empty tokens
+ wrappedLexer :: P (RealLocated Lexer.Token)
+ wrappedLexer = Lexer.lexer False andThen
+ where andThen (L (RealSrcSpan s) t)
+ | srcSpanStartLine s /= srcSpanEndLine s ||
+ srcSpanStartCol s /= srcSpanEndCol s
+ = pure (L s t)
+ andThen (L (RealSrcSpan s) ITeof) = pure (L s ITeof)
+ andThen _ = wrappedLexer
+
+ -- | Try to parse a CPP line (can fail)
+ parseCppLine :: P ([T.Token], Bool)
+ parseCppLine = do
+ (b, l) <- getInput
+ case tryCppLine l b of
+ Just (cppBStr, l', b')
+ -> let cppTok = T.Token { tkType = TkCpp
+ , tkValue = cppBStr
+ , tkSpan = mkRealSrcSpan l l' }
+ in setInput (b', l') *> pure ([cppTok], False)
+ _ -> empty
+
+ -- | Try to parse a regular old token (can fail)
+ parsePlainTok :: Bool -> P ([T.Token], Bool) -- return list is only ever 0-2 elements
+ parsePlainTok inPrag = do
+ (bInit, lInit) <- getInput
+ L sp tok <- Lexer.lexer False return
+ (bEnd, _) <- getInput
+ case sp of
+ UnhelpfulSpan _ -> pure ([], False) -- pretend the token never existed
+ RealSrcSpan rsp -> do
+ let typ = if inPrag then TkPragma else classify tok
+ RealSrcLoc lStart = srcSpanStart sp -- safe since @sp@ is real
+ (spaceBStr, bStart) = spanPosition lInit lStart bInit
+ inPragDef = inPragma inPrag tok
+
+ (bEnd', inPrag') <- case tok of
+
+ -- Update internal line + file position if this is a LINE pragma
+ ITline_prag _ -> tryOrElse (bEnd, inPragDef) $ do
+ L _ (ITinteger (IL { il_value = line })) <- wrappedLexer
+ L _ (ITstring _ file) <- wrappedLexer
+ L spF ITclose_prag <- wrappedLexer
+
+ let newLoc = mkRealSrcLoc file (fromIntegral line - 1) (srcSpanEndCol spF)
+ (bEnd'', _) <- getInput
+ setInput (bEnd'', newLoc)
+
+ pure (bEnd'', False)
+
+ -- Update internal column position if this is a COLUMN pragma
+ ITcolumn_prag _ -> tryOrElse (bEnd, inPragDef) $ do
+ L _ (ITinteger (IL { il_value = col })) <- wrappedLexer
+ L spF ITclose_prag <- wrappedLexer
+
+ let newLoc = mkRealSrcLoc (srcSpanFile spF) (srcSpanEndLine spF) (fromIntegral col)
+ (bEnd'', _) <- getInput
+ setInput (bEnd'', newLoc)
+
+ pure (bEnd'', False)
+
+ -- See 'needPragHack'
+ ITclose_prag{}
+ | needPragHack'
+ , '\n' `BSC.elem` spaceBStr
+ -> getInput >>= \(b,p) -> setInput (b,advanceSrcLoc p '\n') >> pure (bEnd, False)
+
+ _ -> pure (bEnd, inPragDef)
+
+ let tokBStr = splitStringBuffer bStart bEnd'
+ plainTok = T.Token { tkType = typ
+ , tkValue = tokBStr
+ , tkSpan = rsp }
+ spaceTok = T.Token { tkType = TkSpace
+ , tkValue = spaceBStr
+ , tkSpan = mkRealSrcSpan lInit lStart }
+
+ pure (plainTok : [ spaceTok | not (BS.null spaceBStr) ], inPrag')
+
+ -- | Parse whatever remains of the line as an unknown token (can't fail)
+ unknownLine :: P ([T.Token], Bool)
+ unknownLine = do
+ (b, l) <- getInput
+ let (unkBStr, l', b') = spanLine l b
+ unkTok = T.Token { tkType = TkUnknown
+ , tkValue = unkBStr
+ , tkSpan = mkRealSrcSpan l l' }
+ setInput (b', l')
+ pure ([unkTok], False)
+
+
+-- | This is really, really, /really/ gross. Problem: consider a Haskell
+-- file that looks like:
--
--- * CPP lines are removed and reinserted as line-comments
--- * top-level file pragmas are parsed as block comments (see the
--- 'ITblockComment' case of 'classify' for more details)
+-- @
+-- {-# LANGUAGE CPP #-}
+-- module SomeMod where
--
-processCPP :: DynFlags -- ^ GHC's flags
- -> FilePath -- ^ source file name (for position information)
- -> String -- ^ source file contents
- -> [(Located L.Token, String)]
-processCPP dflags fpath s = addSrc . go start . splitCPP $ s
- where
- start = mkRealSrcLoc (mkFastString fpath) 1 1
- addSrc = addSourceToTokens start (stringToStringBuffer s)
-
- -- Transform a list of Haskell/CPP lines into a list of tokens
- go :: RealSrcLoc -> [Either String String] -> [Located L.Token]
- go _ [] = []
- go pos ls =
- let (hLinesRight, ls') = span isRight ls
- (cppLinesLeft, rest) = span isLeft ls'
-
- hSrc = concat [ hLine | Right hLine <- hLinesRight ]
- cppSrc = concat [ cppLine | Left cppLine <- cppLinesLeft ]
-
- in case L.lexTokenStream (stringToStringBuffer hSrc) pos dflags of
-
- -- Stuff that fails to lex gets turned into comments
- L.PFailed _ _ss _msg ->
- let (src_pos, failed) = mkToken ITunknown pos hSrc
- (new_pos, cpp) = mkToken ITlineComment src_pos cppSrc
- in failed : cpp : go new_pos rest
-
- -- Successfully lexed
- L.POk ss toks ->
- let (new_pos, cpp) = mkToken ITlineComment (L.loc ss) cppSrc
- in toks ++ [cpp] ++ go new_pos rest
-
- -- Manually make a token from a 'String', advancing the cursor position
- mkToken tok start' str =
- let end = foldl' advanceSrcLoc start' str
- in (end, L (RealSrcSpan $ mkRealSrcSpan start' end) (tok str))
-
-
--- | Split apart the initial file into Haskell source lines ('Left' entries) and
--- CPP lines ('Right' entries).
+-- #define SIX 6
+--
+-- {-# INLINE foo
+-- #-}
+-- foo = 1
+-- @
--
--- All characters in the input are present in the output:
+-- Clang's CPP replaces the @#define SIX 6@ line with an empty line (as it
+-- should), but get confused about @#-}@. I'm guessing it /starts/ by
+-- parsing that as a pre-processor directive and, when it fails to, it just
+-- leaves the line alone. HOWEVER, it still adds an extra newline. =.=
--
--- prop> concat . map (either id id) . splitCPP = id
-splitCPP :: String -> [Either String String]
-splitCPP "" = []
-splitCPP s | isCPPline s = Left l : splitCPP rest
- | otherwise = Right l : splitCPP rest
+-- This function makes sure that the Hyperlinker backend also adds that
+-- extra newline (or else our spans won't line up with GHC's anymore).
+needPragHack :: CompilerInfo -> DynFlags -> Bool
+needPragHack comp dflags = isCcClang && E.member Cpp (extensionFlags dflags)
where
- ~(l, rest) = spanToNewline 0 s
+ isCcClang = case comp of
+ GCC -> False
+ Clang -> True
+ AppleClang -> True
+ AppleClang51 -> True
+ UnknownCC -> False
+-- | Get the input
+getInput :: P (StringBuffer, RealSrcLoc)
+getInput = P $ \p @ PState { buffer = buf, loc = srcLoc } -> POk p (buf, srcLoc)
--- | Heuristic to decide if a line is going to be a CPP line. This should be a
--- cheap operation since it is going to be run on every line being processed.
---
--- Right now it just checks if the first non-whitespace character in the first
--- five characters of the line is a '#':
---
--- >>> isCPPline "#define FOO 1"
--- True
---
--- >>> isCPPline "\t\t #ifdef GHC"
--- True
---
--- >>> isCPPline " #endif"
--- False
---
-isCPPline :: String -> Bool
-isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5
+-- | Set the input
+setInput :: (StringBuffer, RealSrcLoc) -> P ()
+setInput (buf, srcLoc) = P $ \p -> POk (p { buffer = buf, loc = srcLoc }) ()
--- | Split a "line" off the front of a string, hopefully without cutting tokens
--- in half. I say "hopefully" because knowing what a token is requires lexing,
--- yet lexing depends on this function.
---
--- All characters in the input are present in the output:
---
--- prop> curry (++) . spanToNewLine 0 = id
-spanToNewline :: Int -- ^ open '{-'
- -> String -- ^ input
- -> (String, String)
-
--- Base case and space characters
-spanToNewline _ "" = ("", "")
-spanToNewline n ('\n':str) | n <= 0 = ("\n", str)
-spanToNewline n ('\n':str) | n <= 0 = ("\n", str)
-spanToNewline n ('\\':'\n':str) =
- let (str', rest) = spanToNewline n str
- in ('\\':'\n':str', rest)
-
--- Block comments
-spanToNewline n ('{':'-':str) =
- let (str', rest) = spanToNewline (n+1) str
- in ('{':'-':str', rest)
-spanToNewline n ('-':'}':str) =
- let (str', rest) = spanToNewline (n-1) str
- in ('-':'}':str', rest)
-
--- When not in a block comment, try to lex a Haskell token
-spanToNewline 0 str@(c:_) | ((lexed, str') : _) <- R.lex str, not (isSpace c) =
- if all (== '-') lexed && length lexed >= 2
- -- A Haskell line comment
- then case span (/= '\n') str' of
- (str'', '\n':rest) -> (lexed ++ str'' ++ "\n", rest)
- (_, _) -> (str, "")
-
- -- An actual Haskell token
- else let (str'', rest) = spanToNewline 0 str'
- in (lexed ++ str'', rest)
-
--- In all other cases, advance one character at a time
-spanToNewline n (c:str) =
- let (str', rest) = spanToNewline n str
- in (c:str', rest)
-
-
--- | Turn a list of GHC's 'L.Token' (and their source 'String') into a list of
--- Haddock's 'T.Token'.
-ghcToks :: [(Located L.Token, String)] -> [T.Token]
-ghcToks = reverse . (\(_,ts,_) -> ts) . foldl' go (start, [], False)
- where
- start = mkRealSrcLoc (mkFastString "lexing") 1 1
-
- go :: (RealSrcLoc, [T.Token], Bool)
- -- ^ current position, tokens accumulated, currently in pragma (or not)
-
- -> (Located L.Token, String)
- -- ^ next token, its content
-
- -> (RealSrcLoc, [T.Token], Bool)
- -- ^ new position, new tokens accumulated, currently in pragma (or not)
-
- go (pos, toks, in_prag) (L l tok, raw) =
- ( next_pos
- , classifiedTok ++ maybeToList white ++ toks
- , inPragma in_prag tok
- )
- where
- (next_pos, white) = mkWhitespace pos l
-
- classifiedTok = [ Token (classify' tok) raw rss
- | RealSrcSpan rss <- [l]
- , not (null raw)
- ]
-
- classify' | in_prag = const TkPragma
- | otherwise = classify
-
-
--- | Find the correct amount of whitespace between tokens.
-mkWhitespace :: RealSrcLoc -> SrcSpan -> (RealSrcLoc, Maybe T.Token)
-mkWhitespace prev spn =
- case spn of
- UnhelpfulSpan _ -> (prev,Nothing)
- RealSrcSpan s | null wsstring -> (end, Nothing)
- | otherwise -> (end, Just (Token TkSpace wsstring wsspan))
- where
- start = realSrcSpanStart s
- end = realSrcSpanEnd s
- wsspan = mkRealSrcSpan prev start
- nls = srcLocLine start - srcLocLine prev
- spaces = if nls == 0 then srcLocCol start - srcLocCol prev
- else srcLocCol start - 1
- wsstring = replicate nls '\n' ++ replicate spaces ' '
+-- | Orphan instance that adds backtracking to 'P'
+instance Alternative P where
+ empty = P $ \_ -> PFailed (const emptyMessages) noSrcSpan "Alterative.empty"
+ P x <|> P y = P $ \s -> case x s of { p@POk{} -> p
+ ; _ -> y s }
+-- | Try a parser. If it fails, backtrack and return the pure value.
+tryOrElse :: a -> P a -> P a
+tryOrElse x p = p <|> pure x
-- | Classify given tokens as appropriate Haskell token type.
-classify :: L.Token -> TokenType
+classify :: Lexer.Token -> TokenType
classify tok =
case tok of
ITas -> TkKeyword
@@ -378,15 +377,11 @@ classify tok =
ITLarrowtail {} -> TkGlyph
ITRarrowtail {} -> TkGlyph
+ ITcomment_line_prag -> TkUnknown
ITunknown {} -> TkUnknown
ITeof -> TkUnknown
- -- Line comments are only supposed to start with '--'. Starting with '#'
- -- means that this was probably a CPP.
- ITlineComment s
- | isCPPline s -> TkCpp
- | otherwise -> TkComment
-
+ ITlineComment {} -> TkComment
ITdocCommentNext {} -> TkComment
ITdocCommentPrev {} -> TkComment
ITdocCommentNamed {} -> TkComment
@@ -403,9 +398,9 @@ classify tok =
| otherwise -> TkComment
-- | Classify given tokens as beginning pragmas (or not).
-inPragma :: Bool -- ^ currently in pragma
- -> L.Token -- ^ current token
- -> Bool -- ^ new information about whether we are in a pragma
+inPragma :: Bool -- ^ currently in pragma
+ -> Lexer.Token -- ^ current token
+ -> Bool -- ^ new information about whether we are in a pragma
inPragma _ ITclose_prag = False
inPragma True _ = True
inPragma False tok =
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
index d7ea70a6..a4dcb77b 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
@@ -1,4 +1,8 @@
{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE BangPatterns #-}
module Haddock.Backends.Hyperlinker.Renderer (render) where
@@ -6,15 +10,19 @@ module Haddock.Backends.Hyperlinker.Renderer (render) where
import Haddock.Backends.Hyperlinker.Types
import Haddock.Backends.Hyperlinker.Utils
-import qualified GHC
-import qualified Name as GHC
-import qualified Unique as GHC
+import qualified Data.ByteString as BS
+
+import HieTypes
+import Module ( ModuleName, moduleNameString )
+import Name ( getOccString, isInternalName, Name, nameModule, nameUnique )
+import SrcLoc
+import Unique ( getKey )
+import Encoding ( utf8DecodeByteString )
import System.FilePath.Posix ((</>))
-import Data.List
-import Data.Maybe
import qualified Data.Map as Map
+import qualified Data.Set as Set
import Text.XHtml (Html, HtmlAttr, (!))
import qualified Text.XHtml as Html
@@ -22,22 +30,24 @@ import qualified Text.XHtml as Html
type StyleClass = String
+-- | Produce the HTML corresponding to a hyperlinked Haskell source
+render
+ :: Maybe FilePath -- ^ path to the CSS file
+ -> Maybe FilePath -- ^ path to the JS file
+ -> SrcMaps -- ^ Paths to sources
+ -> HieAST PrintedType -- ^ ASTs from @.hie@ files
+ -> [Token] -- ^ tokens to render
+ -> Html
+render mcss mjs srcs ast tokens = header mcss mjs <> body srcs ast tokens
-render :: Maybe FilePath -> Maybe FilePath -> SrcMap -> [RichToken]
- -> Html
-render mcss mjs srcs tokens = header mcss mjs <> body srcs tokens
-
-body :: SrcMap -> [RichToken] -> Html
-body srcs tokens = Html.body . Html.pre $ hypsrc
+body :: SrcMaps -> HieAST PrintedType -> [Token] -> Html
+body srcs ast tokens = Html.body . Html.pre $ hypsrc
where
- hypsrc = mconcat . map (richToken srcs) $ tokens
-
+ hypsrc = renderWithAst srcs ast tokens
header :: Maybe FilePath -> Maybe FilePath -> Html
-header mcss mjs
- | isNothing mcss && isNothing mjs = Html.noHtml
-header mcss mjs =
- Html.header $ css mcss <> js mjs
+header Nothing Nothing = Html.noHtml
+header mcss mjs = Html.header $ css mcss <> js mjs
where
css Nothing = Html.noHtml
css (Just cssFile) = Html.thelink Html.noHtml !
@@ -51,25 +61,132 @@ header mcss mjs =
, Html.src scriptFile
]
+
+splitTokens :: HieAST PrintedType -> [Token] -> ([Token],[Token],[Token])
+splitTokens ast toks = (before,during,after)
+ where
+ (before,rest) = span leftOf toks
+ (during,after) = span inAst rest
+ leftOf t = realSrcSpanEnd (tkSpan t) <= realSrcSpanStart nodeSp
+ inAst t = nodeSp `containsSpan` tkSpan t
+ nodeSp = nodeSpan ast
+
+-- | Turn a list of tokens into hyperlinked sources, threading in relevant link
+-- information from the 'HieAST'.
+renderWithAst :: SrcMaps -> HieAST PrintedType -> [Token] -> Html
+renderWithAst srcs Node{..} toks = anchored $ case toks of
+
+ [tok] | nodeSpan == tkSpan tok -> richToken srcs nodeInfo tok
+
+ -- NB: the GHC lexer lexes backquoted identifiers and parenthesized operators
+ -- as multiple tokens.
+ --
+ -- * @a `elem` b@ turns into @[a, `, elem, `, b]@ (excluding space tokens)
+ -- * @(+) 1 2@ turns into @[(, +, ), 1, 2]@ (excluding space tokens)
+ --
+ -- However, the HIE ast considers @`elem`@ and @(+)@ to be single nodes. In
+ -- order to make sure these get hyperlinked properly, we intercept these
+ -- special sequences of tokens and merge them into just one identifier or
+ -- operator token.
+ [BacktickTok s1, tok @ Token{ tkType = TkIdentifier }, BacktickTok s2]
+ | realSrcSpanStart s1 == realSrcSpanStart nodeSpan
+ , realSrcSpanEnd s2 == realSrcSpanEnd nodeSpan
+ -> richToken srcs nodeInfo
+ (Token{ tkValue = "`" <> tkValue tok <> "`"
+ , tkType = TkOperator
+ , tkSpan = nodeSpan })
+ [OpenParenTok s1, tok @ Token{ tkType = TkOperator }, CloseParenTok s2]
+ | realSrcSpanStart s1 == realSrcSpanStart nodeSpan
+ , realSrcSpanEnd s2 == realSrcSpanEnd nodeSpan
+ -> richToken srcs nodeInfo
+ (Token{ tkValue = "(" <> tkValue tok <> ")"
+ , tkType = TkOperator
+ , tkSpan = nodeSpan })
+
+ _ -> go nodeChildren toks
+ where
+ go _ [] = mempty
+ go [] xs = foldMap renderToken xs
+ go (cur:rest) xs =
+ foldMap renderToken before <> renderWithAst srcs cur during <> go rest after
+ where
+ (before,during,after) = splitTokens cur xs
+ anchored c = Map.foldrWithKey anchorOne c (nodeIdentifiers nodeInfo)
+ anchorOne n dets c = externalAnchor n d $ internalAnchor n d c
+ where d = identInfo dets
+
+renderToken :: Token -> Html
+renderToken Token{..}
+ | BS.null tkValue = mempty
+ | tkType == TkSpace = renderSpace (srcSpanStartLine tkSpan) tkValue'
+ | otherwise = tokenSpan ! [ multiclass style ]
+ where
+ tkValue' = filterCRLF $ utf8DecodeByteString tkValue
+ style = tokenStyle tkType
+ tokenSpan = Html.thespan (Html.toHtml tkValue')
+
+
-- | Given information about the source position of definitions, render a token
-richToken :: SrcMap -> RichToken -> Html
-richToken srcs (RichToken Token{..} details)
- | tkType == TkSpace = renderSpace (GHC.srcSpanStartLine tkSpan) tkValue
- | otherwise = linked content
+richToken :: SrcMaps -> NodeInfo PrintedType -> Token -> Html
+richToken srcs details Token{..}
+ | tkType == TkSpace = renderSpace (srcSpanStartLine tkSpan) tkValue'
+ | otherwise = annotate details $ linked content
where
+ tkValue' = filterCRLF $ utf8DecodeByteString tkValue
content = tokenSpan ! [ multiclass style ]
- tokenSpan = Html.thespan (Html.toHtml tkValue)
- style = tokenStyle tkType ++ maybe [] richTokenStyle details
+ tokenSpan = Html.thespan (Html.toHtml tkValue')
+ style = tokenStyle tkType ++ concatMap (richTokenStyle (null (nodeType details))) contexts
+
+ contexts = concatMap (Set.elems . identInfo) . Map.elems . nodeIdentifiers $ details
+
+ -- pick an arbitary identifier to hyperlink with
+ identDet = Map.lookupMin . nodeIdentifiers $ details
-- If we have name information, we can make links
- linked = case details of
- Just d -> externalAnchor d . internalAnchor d . hyperlink srcs d
+ linked = case identDet of
+ Just (n,_) -> hyperlink srcs n
Nothing -> id
-richTokenStyle :: TokenDetails -> [StyleClass]
-richTokenStyle (RtkVar _) = ["hs-var"]
-richTokenStyle (RtkType _) = ["hs-type"]
-richTokenStyle _ = []
+-- | Remove CRLFs from source
+filterCRLF :: String -> String
+filterCRLF ('\r':'\n':cs) = '\n' : filterCRLF cs
+filterCRLF (c:cs) = c : filterCRLF cs
+filterCRLF [] = []
+
+annotate :: NodeInfo PrintedType -> Html -> Html
+annotate ni content =
+ Html.thespan (annot <> content) ! [ Html.theclass "annot" ]
+ where
+ annot
+ | not (null annotation) =
+ Html.thespan (Html.toHtml annotation) ! [ Html.theclass "annottext" ]
+ | otherwise = mempty
+ annotation = typ ++ identTyps
+ typ = unlines (nodeType ni)
+ typedIdents = [ (n,t) | (n, identType -> Just t) <- Map.toList $ nodeIdentifiers ni ]
+ identTyps
+ | length typedIdents > 1 || null (nodeType ni)
+ = concatMap (\(n,t) -> printName n ++ " :: " ++ t ++ "\n") typedIdents
+ | otherwise = ""
+
+ printName :: Either ModuleName Name -> String
+ printName = either moduleNameString getOccString
+
+richTokenStyle
+ :: Bool -- ^ are we lacking a type annotation?
+ -> ContextInfo -- ^ in what context did this token show up?
+ -> [StyleClass]
+richTokenStyle True Use = ["hs-type"]
+richTokenStyle False Use = ["hs-var"]
+richTokenStyle _ RecField{} = ["hs-var"]
+richTokenStyle _ PatternBind{} = ["hs-var"]
+richTokenStyle _ MatchBind{} = ["hs-var"]
+richTokenStyle _ TyVarBind{} = ["hs-type"]
+richTokenStyle _ ValBind{} = ["hs-var"]
+richTokenStyle _ TyDecl = ["hs-type"]
+richTokenStyle _ ClassTyDecl{} = ["hs-type"]
+richTokenStyle _ Decl{} = ["hs-var"]
+richTokenStyle _ IEThing{} = [] -- could be either a value or type
tokenStyle :: TokenType -> [StyleClass]
tokenStyle TkIdentifier = ["hs-identifier"]
@@ -87,61 +204,70 @@ tokenStyle TkPragma = ["hs-pragma"]
tokenStyle TkUnknown = []
multiclass :: [StyleClass] -> HtmlAttr
-multiclass = Html.theclass . intercalate " "
+multiclass = Html.theclass . unwords
+
+externalAnchor :: Identifier -> Set.Set ContextInfo -> Html -> Html
+externalAnchor (Right name) contexts content
+ | not (isInternalName name)
+ , any isBinding contexts
+ = Html.thespan content ! [ Html.identifier $ externalAnchorIdent name ]
+externalAnchor _ _ content = content
-externalAnchor :: TokenDetails -> Html -> Html
-externalAnchor (RtkDecl name) content =
- Html.anchor content ! [ Html.name $ externalAnchorIdent name ]
-externalAnchor _ content = content
+isBinding :: ContextInfo -> Bool
+isBinding (ValBind RegularBind _ _) = True
+isBinding PatternBind{} = True
+isBinding Decl{} = True
+isBinding (RecField RecFieldDecl _) = True
+isBinding TyVarBind{} = True
+isBinding ClassTyDecl{} = True
+isBinding _ = False
-internalAnchor :: TokenDetails -> Html -> Html
-internalAnchor (RtkBind name) content =
- Html.anchor content ! [ Html.name $ internalAnchorIdent name ]
-internalAnchor _ content = content
+internalAnchor :: Identifier -> Set.Set ContextInfo -> Html -> Html
+internalAnchor (Right name) contexts content
+ | isInternalName name
+ , any isBinding contexts
+ = Html.thespan content ! [ Html.identifier $ internalAnchorIdent name ]
+internalAnchor _ _ content = content
-externalAnchorIdent :: GHC.Name -> String
+externalAnchorIdent :: Name -> String
externalAnchorIdent = hypSrcNameUrl
-internalAnchorIdent :: GHC.Name -> String
-internalAnchorIdent = ("local-" ++) . show . GHC.getKey . GHC.nameUnique
-
-hyperlink :: SrcMap -> TokenDetails -> Html -> Html
-hyperlink srcs details = case rtkName details of
- Left name ->
- if GHC.isInternalName name
- then internalHyperlink name
- else externalNameHyperlink srcs name
- Right name -> externalModHyperlink srcs name
-
-internalHyperlink :: GHC.Name -> Html -> Html
-internalHyperlink name content =
- Html.anchor content ! [ Html.href $ "#" ++ internalAnchorIdent name ]
-
-externalNameHyperlink :: SrcMap -> GHC.Name -> Html -> Html
-externalNameHyperlink srcs name content = case Map.lookup mdl srcs of
- Just SrcLocal -> Html.anchor content !
- [ Html.href $ hypSrcModuleNameUrl mdl name ]
- Just (SrcExternal path) -> Html.anchor content !
- [ Html.href $ path </> hypSrcModuleNameUrl mdl name ]
- Nothing -> content
+internalAnchorIdent :: Name -> String
+internalAnchorIdent = ("local-" ++) . show . getKey . nameUnique
+
+-- | Generate the HTML hyperlink for an identifier
+hyperlink :: SrcMaps -> Identifier -> Html -> Html
+hyperlink (srcs, srcs') ident = case ident of
+ Right name | isInternalName name -> internalHyperlink name
+ | otherwise -> externalNameHyperlink name
+ Left name -> externalModHyperlink name
+
where
- mdl = GHC.nameModule name
+ internalHyperlink name content =
+ Html.anchor content ! [ Html.href $ "#" ++ internalAnchorIdent name ]
+
+ externalNameHyperlink name content = case Map.lookup mdl srcs of
+ Just SrcLocal -> Html.anchor content !
+ [ Html.href $ hypSrcModuleNameUrl mdl name ]
+ Just (SrcExternal path) -> Html.anchor content !
+ [ Html.href $ spliceURL Nothing (Just mdl) (Just name) Nothing (".." </> path) ]
+ Nothing -> content
+ where
+ mdl = nameModule name
-externalModHyperlink :: SrcMap -> GHC.ModuleName -> Html -> Html
-externalModHyperlink srcs name content =
- let srcs' = Map.mapKeys GHC.moduleName srcs in
- case Map.lookup name srcs' of
- Just SrcLocal -> Html.anchor content !
- [ Html.href $ hypSrcModuleUrl' name ]
- Just (SrcExternal path) -> Html.anchor content !
- [ Html.href $ path </> hypSrcModuleUrl' name ]
- Nothing -> content
+ externalModHyperlink moduleName content =
+ case Map.lookup moduleName srcs' of
+ Just SrcLocal -> Html.anchor content !
+ [ Html.href $ hypSrcModuleUrl' moduleName ]
+ Just (SrcExternal path) -> Html.anchor content !
+ [ Html.href $ spliceURL' Nothing (Just moduleName) Nothing Nothing (".." </> path) ]
+ Nothing -> content
renderSpace :: Int -> String -> Html
-renderSpace _ [] = Html.noHtml
-renderSpace line ('\n':rest) = mconcat
- [ Html.thespan . Html.toHtml $ "\n"
+renderSpace !_ "" = Html.noHtml
+renderSpace !line ('\n':rest) = mconcat
+ [ Html.thespan (Html.toHtml '\n')
, lineAnchor (line + 1)
, renderSpace (line + 1) rest
]
@@ -151,4 +277,4 @@ renderSpace line space =
lineAnchor :: Int -> Html
-lineAnchor line = Html.anchor Html.noHtml ! [ Html.name $ hypSrcLineUrl line ]
+lineAnchor line = Html.thespan Html.noHtml ! [ Html.identifier $ hypSrcLineUrl line ]
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
index e377471e..50916937 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
@@ -1,17 +1,24 @@
+{-# LANGUAGE PatternSynonyms, OverloadedStrings #-}
module Haddock.Backends.Hyperlinker.Types where
-
import qualified GHC
+import Data.ByteString ( ByteString )
+
import Data.Map (Map)
data Token = Token
{ tkType :: TokenType
- , tkValue :: String
+ , tkValue :: ByteString -- ^ UTF-8 encoded
, tkSpan :: {-# UNPACK #-} !Span
}
deriving (Show)
+pattern BacktickTok, OpenParenTok, CloseParenTok :: Span -> Token
+pattern BacktickTok sp = Token TkSpecial "`" sp
+pattern OpenParenTok sp = Token TkSpecial "(" sp
+pattern CloseParenTok sp = Token TkSpecial ")" sp
+
type Position = GHC.RealSrcLoc
type Span = GHC.RealSrcSpan
@@ -31,29 +38,6 @@ data TokenType
| TkUnknown
deriving (Show, Eq)
-
-data RichToken = RichToken
- { rtkToken :: Token
- , rtkDetails :: Maybe TokenDetails
- }
-
-data TokenDetails
- = RtkVar GHC.Name
- | RtkType GHC.Name
- | RtkBind GHC.Name
- | RtkDecl GHC.Name
- | RtkModule GHC.ModuleName
- deriving (Eq)
-
-
-rtkName :: TokenDetails -> Either GHC.Name GHC.ModuleName
-rtkName (RtkVar name) = Left name
-rtkName (RtkType name) = Left name
-rtkName (RtkBind name) = Left name
-rtkName (RtkDecl name) = Left name
-rtkName (RtkModule name) = Right name
-
-
-- | Path for making cross-package hyperlinks in generated sources.
--
-- Used in 'SrcMap' to determine whether module originates in current package
@@ -63,5 +47,5 @@ data SrcPath
| SrcLocal
-- | Mapping from modules to cross-package source paths.
-type SrcMap = Map GHC.Module SrcPath
+type SrcMaps = (Map GHC.Module SrcPath, Map GHC.ModuleName SrcPath)
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs
index 9de4a03d..4e8b88d2 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Utils.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
module Haddock.Backends.Hyperlinker.Utils
( hypSrcDir, hypSrcModuleFile, hypSrcModuleFile'
, hypSrcModuleUrl, hypSrcModuleUrl'
@@ -6,21 +7,35 @@ module Haddock.Backends.Hyperlinker.Utils
, hypSrcModuleNameUrl, hypSrcModuleLineUrl
, hypSrcModuleUrlFormat
, hypSrcModuleNameUrlFormat, hypSrcModuleLineUrlFormat
- ) where
+ , spliceURL, spliceURL'
+ -- * HIE file processing
+ , PrintedType
+ , recoverFullIfaceTypes
+ ) where
+import Haddock.Utils
import Haddock.Backends.Xhtml.Utils
import GHC
-import FastString
-import System.FilePath.Posix ((</>))
+import HieTypes ( HieAST(..), HieType(..), HieArgs(..), TypeIndex, HieTypeFlat )
+import IfaceType
+import Name ( getOccFS, getOccString )
+import Outputable ( showSDoc )
+import Var ( VarBndr(..) )
+
+import System.FilePath.Posix ((</>), (<.>))
+import qualified Data.Array as A
+
+{-# INLINE hypSrcDir #-}
hypSrcDir :: FilePath
hypSrcDir = "src"
+{-# INLINE hypSrcModuleFile #-}
hypSrcModuleFile :: Module -> FilePath
-hypSrcModuleFile = hypSrcModuleFile' . moduleName
+hypSrcModuleFile m = moduleNameString (moduleName m) <.> "html"
hypSrcModuleFile' :: ModuleName -> FilePath
hypSrcModuleFile' mdl = spliceURL'
@@ -32,20 +47,19 @@ hypSrcModuleUrl = hypSrcModuleFile
hypSrcModuleUrl' :: ModuleName -> String
hypSrcModuleUrl' = hypSrcModuleFile'
+{-# INLINE hypSrcNameUrl #-}
hypSrcNameUrl :: Name -> String
-hypSrcNameUrl name = spliceURL
- Nothing Nothing (Just name) Nothing nameFormat
+hypSrcNameUrl = escapeStr . getOccString
+{-# INLINE hypSrcLineUrl #-}
hypSrcLineUrl :: Int -> String
-hypSrcLineUrl line = spliceURL
- Nothing Nothing Nothing (Just spn) lineFormat
- where
- loc = mkSrcLoc nilFS line 1
- spn = mkSrcSpan loc loc
+hypSrcLineUrl line = "line-" ++ show line
+{-# INLINE hypSrcModuleNameUrl #-}
hypSrcModuleNameUrl :: Module -> Name -> String
hypSrcModuleNameUrl mdl name = hypSrcModuleUrl mdl ++ "#" ++ hypSrcNameUrl name
+{-# INLINE hypSrcModuleLineUrl #-}
hypSrcModuleLineUrl :: Module -> Int -> String
hypSrcModuleLineUrl mdl line = hypSrcModuleUrl mdl ++ "#" ++ hypSrcLineUrl line
@@ -66,3 +80,65 @@ nameFormat = "%{NAME}"
lineFormat :: String
lineFormat = "line-%{LINE}"
+
+
+-- * HIE file procesddsing
+
+-- This belongs in GHC's HieUtils...
+
+-- | Pretty-printed type, ready to be turned into HTML by @xhtml@
+type PrintedType = String
+
+-- | Expand the flattened HIE AST into one where the types printed out and
+-- ready for end-users to look at.
+--
+-- Using just primitives found in GHC's HIE utilities, we could write this as
+-- follows:
+--
+-- > 'recoverFullIfaceTypes' dflags hieTypes hieAst
+-- > = 'fmap' (\ti -> 'showSDoc' df .
+-- > 'pprIfaceType' $
+-- > 'recoverFullType' ti hieTypes)
+-- > hieAst
+--
+-- However, this is very inefficient (both in time and space) because the
+-- mutliple calls to 'recoverFullType' don't share intermediate results. This
+-- function fixes that.
+recoverFullIfaceTypes
+ :: DynFlags
+ -> A.Array TypeIndex HieTypeFlat -- ^ flat types
+ -> HieAST TypeIndex -- ^ flattened AST
+ -> HieAST PrintedType -- ^ full AST
+recoverFullIfaceTypes df flattened ast = fmap (printed A.!) ast
+ where
+
+ -- Splitting this out into its own array is also important: we don't want
+ -- to pretty print the same type many times
+ printed :: A.Array TypeIndex PrintedType
+ printed = fmap (showSDoc df . pprIfaceType) unflattened
+
+ -- The recursion in 'unflattened' is crucial - it's what gives us sharing
+ -- between the IfaceType's produced
+ unflattened :: A.Array TypeIndex IfaceType
+ unflattened = fmap (\flatTy -> go (fmap (unflattened A.!) flatTy)) flattened
+
+ -- Unfold an 'HieType' whose subterms have already been unfolded
+ go :: HieType IfaceType -> IfaceType
+ go (HTyVarTy n) = IfaceTyVar (getOccFS n)
+ go (HAppTy a b) = IfaceAppTy a (hieToIfaceArgs b)
+ go (HLitTy l) = IfaceLitTy l
+ go (HForAllTy ((n,k),af) t) = let b = (getOccFS n, k)
+ in IfaceForAllTy (Bndr (IfaceTvBndr b) af) t
+ go (HFunTy a b) = IfaceFunTy a b
+ go (HQualTy con b) = IfaceDFunTy con b
+ go (HCastTy a) = a
+ go HCoercionTy = IfaceTyVar "<coercion type>"
+ go (HTyConApp a xs) = IfaceTyConApp a (hieToIfaceArgs xs)
+
+ -- This isn't fully faithful - we can't produce the 'Inferred' case
+ hieToIfaceArgs :: HieArgs IfaceType -> IfaceAppArgs
+ hieToIfaceArgs (HieArgs args) = go' args
+ where
+ go' [] = IA_Nil
+ go' ((True ,x):xs) = IA_Arg x Required $ go' xs
+ go' ((False,x):xs) = IA_Arg x Specified $ go' xs