From 740458ac4d2acf197f2ef8dc94a66f9b160b9c3c Mon Sep 17 00:00:00 2001 From: Alexander Biehl Date: Sat, 19 Aug 2017 20:35:27 +0200 Subject: Hyperlinker: Avoid linear lookup in enrichToken (#669) * Make Span strict in Position * Hyperlinker: Use a proper map to enrich tokens --- .../src/Haddock/Backends/Hyperlinker/Ast.hs | 80 ++++++++++++---------- .../src/Haddock/Backends/Hyperlinker/Types.hs | 14 ++-- 2 files changed, 53 insertions(+), 41 deletions(-) (limited to 'haddock-api/src/Haddock/Backends') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs index 78beacf2..9d273417 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE RecordWildCards #-} @@ -13,9 +14,13 @@ import Haddock.Backends.Hyperlinker.Types import qualified GHC import Control.Applicative +import Control.Monad (guard) import Data.Data +import qualified Data.Map.Strict as Map import Data.Maybe +import Prelude hiding (span) + everythingInRenamedSource :: (Alternative f, Data x) => (forall a. Data a => a -> f r) -> x -> f r everythingInRenamedSource f = Syb.everythingButType @GHC.Name (<|>) f @@ -28,25 +33,45 @@ enrich src = , rtkDetails = enrichToken token detailsMap } where - detailsMap = concatMap ($ src) - [ variables - , types - , decls - , binds - , imports - ] + detailsMap = + mkDetailsMap (concatMap ($ src) + [ variables + , types + , decls + , binds + , imports + ]) + +type LTokenDetails = [(GHC.SrcSpan, TokenDetails)] -- | A map containing association between source locations and "details" of -- this location. -- --- For the time being, it is just a list of pairs. However, looking up things --- in such structure has linear complexity. We cannot use any hashmap-like --- stuff because source locations are not ordered. In the future, this should --- be replaced with interval tree data structure. -type DetailsMap = [(GHC.SrcSpan, TokenDetails)] +type DetailsMap = Map.Map Position (Span, TokenDetails) + +mkDetailsMap :: [(GHC.SrcSpan, TokenDetails)] -> DetailsMap +mkDetailsMap xs = + Map.fromListWith select_details [ (start, (token_span, token_details)) + | (ghc_span, token_details) <- xs + , Just !token_span <- [ghcSrcSpanToSpan ghc_span] + , let start = spStart token_span + ] + where + -- favour token details which appear earlier in the list + select_details _new old = old lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails -lookupBySpan tspan = listToMaybe . map snd . filter (matches tspan . fst) +lookupBySpan span details = do + (_, (tok_span, tok_details)) <- Map.lookupLE (spStart span) details + guard (tok_span `containsSpan` span ) + return tok_details + +ghcSrcSpanToSpan :: GHC.SrcSpan -> Maybe Span +ghcSrcSpanToSpan (GHC.RealSrcSpan span) = + Just (Span { spStart = Position (GHC.srcSpanStartLine span) (GHC.srcSpanStartCol span) + , spEnd = Position (GHC.srcSpanEndLine span) (GHC.srcSpanEndCol span) + }) +ghcSrcSpanToSpan _ = Nothing enrichToken :: Token -> DetailsMap -> Maybe TokenDetails enrichToken (Token typ _ spn) dm @@ -54,7 +79,7 @@ enrichToken (Token typ _ spn) dm enrichToken _ _ = Nothing -- | Obtain details map for variables ("normally" used identifiers). -variables :: GHC.RenamedSource -> DetailsMap +variables :: GHC.RenamedSource -> LTokenDetails variables = everythingInRenamedSource (var `Syb.combine` rec) where @@ -70,7 +95,7 @@ variables = _ -> empty -- | Obtain details map for types. -types :: GHC.RenamedSource -> DetailsMap +types :: GHC.RenamedSource -> LTokenDetails types = everythingInRenamedSource ty where ty term = case cast term of @@ -84,7 +109,7 @@ types = everythingInRenamedSource ty -- ordinary assignment (in top-level declarations, let-expressions and where -- clauses). -binds :: GHC.RenamedSource -> DetailsMap +binds :: GHC.RenamedSource -> LTokenDetails binds = everythingInRenamedSource (fun `Syb.combine` pat `Syb.combine` tvar) where @@ -112,7 +137,7 @@ binds = everythingInRenamedSource _ -> empty -- | Obtain details map for top-level declarations. -decls :: GHC.RenamedSource -> DetailsMap +decls :: GHC.RenamedSource -> LTokenDetails decls (group, _, _, _) = concatMap ($ group) [ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds , everythingInRenamedSource fun . GHC.hs_valds @@ -151,7 +176,7 @@ decls (group, _, _, _) = concatMap ($ group) -- -- This map also includes type and variable details for items in export and -- import lists. -imports :: GHC.RenamedSource -> DetailsMap +imports :: GHC.RenamedSource -> LTokenDetails imports src@(_, imps, _, _) = everythingInRenamedSource ie src ++ mapMaybe (imp . GHC.unLoc) imps where @@ -168,22 +193,3 @@ imports src@(_, imps, _, _) = let (GHC.L sspan name) = GHC.ideclName idecl in Just (sspan, RtkModule name) imp _ = Nothing - --- | Check whether token stream span matches GHC source span. --- --- Currently, it is implemented as checking whether "our" span is contained --- in GHC span. The reason for that is because GHC span are generally wider --- and may spread across couple tokens. For example, @(>>=)@ consists of three --- tokens: @(@, @>>=@, @)@, but GHC source span associated with @>>=@ variable --- contains @(@ and @)@. Similarly, qualified identifiers like @Foo.Bar.quux@ --- are tokenized as @Foo@, @.@, @Bar@, @.@, @quux@ but GHC source span --- associated with @quux@ contains all five elements. -matches :: Span -> GHC.SrcSpan -> Bool -matches tspan (GHC.RealSrcSpan aspan) - | saspan <= stspan && etspan <= easpan = True - where - stspan = (posRow . spStart $ tspan, posCol . spStart $ tspan) - etspan = (posRow . spEnd $ tspan, posCol . spEnd $ tspan) - saspan = (GHC.srcSpanStartLine aspan, GHC.srcSpanStartCol aspan) - easpan = (GHC.srcSpanEndLine aspan, GHC.srcSpanEndCol aspan) -matches _ _ = False diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs index b27ec4d8..d8ae89e4 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs @@ -10,7 +10,7 @@ import qualified Data.Map as Map data Token = Token { tkType :: TokenType , tkValue :: String - , tkSpan :: Span + , tkSpan :: {-# UNPACK #-} !Span } deriving (Show) @@ -18,14 +18,20 @@ data Position = Position { posRow :: !Int , posCol :: !Int } - deriving (Show) + deriving (Eq, Ord, Show) data Span = Span - { spStart :: Position - , spEnd :: Position + { spStart :: !Position + , spEnd :: !Position } deriving (Show) +-- | Tests whether the first span "contains" the other span, meaning +-- that it covers at least as much source code. True where spans are equal. +containsSpan :: Span -> Span -> Bool +containsSpan s1 s2 = + spStart s1 <= spStart s2 && spEnd s1 >= spEnd s2 + data TokenType = TkIdentifier | TkKeyword -- cgit v1.2.3