From 506f614402192bd7b6a9a608e925a01b373b2bdc Mon Sep 17 00:00:00 2001 From: Doug Wilson Date: Sun, 28 May 2017 05:54:53 +1200 Subject: Improve Syb code (#621) Specialize.hs and Ast.hs are modified to have their Syb code not recurse into Name or Id in HsSyn types. Specialize.hs is refactored to have fewer calls to Syb functions. Syb.hs has some foldl calls replaced with foldl' calls. There is still a lot of performance on the floor of Ast.hs. The RenamedSource is traversed many times, and lookupBySpan is very inefficient. everywhereBut and lookupBySpan dominate the runtime whenever --hyperlinked-source is passed. --- .../src/Haddock/Backends/Hyperlinker/Ast.hs | 28 ++++++++++++---------- 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs index b97f0ead..78beacf2 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs @@ -2,12 +2,12 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TypeFamilies #-} - +{-# LANGUAGE TypeApplications #-} module Haddock.Backends.Hyperlinker.Ast (enrich) where -import Haddock.Syb +import qualified Haddock.Syb as Syb import Haddock.Backends.Hyperlinker.Types import qualified GHC @@ -16,6 +16,9 @@ import Control.Applicative import Data.Data import Data.Maybe +everythingInRenamedSource :: (Alternative f, Data x) + => (forall a. Data a => a -> f r) -> x -> f r +everythingInRenamedSource f = Syb.everythingButType @GHC.Name (<|>) f -- | Add more detailed information to token stream using GHC API. enrich :: GHC.RenamedSource -> [Token] -> [RichToken] @@ -53,7 +56,7 @@ enrichToken _ _ = Nothing -- | Obtain details map for variables ("normally" used identifiers). variables :: GHC.RenamedSource -> DetailsMap variables = - everything (<|>) (var `combine` rec) + everythingInRenamedSource (var `Syb.combine` rec) where var term = case cast term of (Just (GHC.L sspan (GHC.HsVar name))) -> @@ -68,8 +71,7 @@ variables = -- | Obtain details map for types. types :: GHC.RenamedSource -> DetailsMap -types = - everything (<|>) ty +types = everythingInRenamedSource ty where ty term = case cast term of (Just (GHC.L sspan (GHC.HsTyVar _ name))) -> @@ -81,9 +83,10 @@ types = -- That includes both identifiers bound by pattern matching or declared using -- ordinary assignment (in top-level declarations, let-expressions and where -- clauses). + binds :: GHC.RenamedSource -> DetailsMap -binds = - everything (<|>) (fun `combine` pat `combine` tvar) +binds = everythingInRenamedSource + (fun `Syb.combine` pat `Syb.combine` tvar) where fun term = case cast term of (Just (GHC.FunBind (GHC.L sspan name) _ _ _ _ :: GHC.HsBind GHC.Name)) -> @@ -93,7 +96,7 @@ binds = (Just (GHC.L sspan (GHC.VarPat name))) -> pure (sspan, RtkBind (GHC.unLoc name)) (Just (GHC.L _ (GHC.ConPatIn (GHC.L sspan name) recs))) -> - [(sspan, RtkVar name)] ++ everything (<|>) rec recs + [(sspan, RtkVar name)] ++ everythingInRenamedSource rec recs (Just (GHC.L _ (GHC.AsPat (GHC.L sspan name) _))) -> pure (sspan, RtkBind name) _ -> empty @@ -112,8 +115,8 @@ binds = decls :: GHC.RenamedSource -> DetailsMap decls (group, _, _, _) = concatMap ($ group) [ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds - , everything (<|>) fun . GHC.hs_valds - , everything (<|>) (con `combine` ins) + , everythingInRenamedSource fun . GHC.hs_valds + , everythingInRenamedSource (con `Syb.combine` ins) ] where typ (GHC.L _ t) = case t of @@ -127,7 +130,8 @@ decls (group, _, _, _) = concatMap ($ group) _ -> empty con term = case cast term of (Just cdcl) -> - map decl (GHC.getConNames cdcl) ++ everything (<|>) fld cdcl + map decl (GHC.getConNames cdcl) + ++ everythingInRenamedSource fld cdcl Nothing -> empty ins term = case cast term of (Just (GHC.DataFamInstD inst)) -> pure . tyref $ GHC.dfid_tycon inst @@ -149,7 +153,7 @@ decls (group, _, _, _) = concatMap ($ group) -- import lists. imports :: GHC.RenamedSource -> DetailsMap imports src@(_, imps, _, _) = - everything (<|>) ie src ++ mapMaybe (imp . GHC.unLoc) imps + everythingInRenamedSource ie src ++ mapMaybe (imp . GHC.unLoc) imps where ie term = case cast term of (Just (GHC.IEVar v)) -> pure $ var $ GHC.ieLWrappedName v -- cgit v1.2.3 From 740458ac4d2acf197f2ef8dc94a66f9b160b9c3c Mon Sep 17 00:00:00 2001 From: Alexander Biehl Date: Sat, 19 Aug 2017 20:35:27 +0200 Subject: Hyperlinker: Avoid linear lookup in enrichToken (#669) * Make Span strict in Position * Hyperlinker: Use a proper map to enrich tokens --- .../src/Haddock/Backends/Hyperlinker/Ast.hs | 80 ++++++++++++---------- .../src/Haddock/Backends/Hyperlinker/Types.hs | 14 ++-- 2 files changed, 53 insertions(+), 41 deletions(-) (limited to 'haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs') diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs index 78beacf2..9d273417 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE RecordWildCards #-} @@ -13,9 +14,13 @@ import Haddock.Backends.Hyperlinker.Types import qualified GHC import Control.Applicative +import Control.Monad (guard) import Data.Data +import qualified Data.Map.Strict as Map import Data.Maybe +import Prelude hiding (span) + everythingInRenamedSource :: (Alternative f, Data x) => (forall a. Data a => a -> f r) -> x -> f r everythingInRenamedSource f = Syb.everythingButType @GHC.Name (<|>) f @@ -28,25 +33,45 @@ enrich src = , rtkDetails = enrichToken token detailsMap } where - detailsMap = concatMap ($ src) - [ variables - , types - , decls - , binds - , imports - ] + detailsMap = + mkDetailsMap (concatMap ($ src) + [ variables + , types + , decls + , binds + , imports + ]) + +type LTokenDetails = [(GHC.SrcSpan, TokenDetails)] -- | A map containing association between source locations and "details" of -- this location. -- --- For the time being, it is just a list of pairs. However, looking up things --- in such structure has linear complexity. We cannot use any hashmap-like --- stuff because source locations are not ordered. In the future, this should --- be replaced with interval tree data structure. -type DetailsMap = [(GHC.SrcSpan, TokenDetails)] +type DetailsMap = Map.Map Position (Span, TokenDetails) + +mkDetailsMap :: [(GHC.SrcSpan, TokenDetails)] -> DetailsMap +mkDetailsMap xs = + Map.fromListWith select_details [ (start, (token_span, token_details)) + | (ghc_span, token_details) <- xs + , Just !token_span <- [ghcSrcSpanToSpan ghc_span] + , let start = spStart token_span + ] + where + -- favour token details which appear earlier in the list + select_details _new old = old lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails -lookupBySpan tspan = listToMaybe . map snd . filter (matches tspan . fst) +lookupBySpan span details = do + (_, (tok_span, tok_details)) <- Map.lookupLE (spStart span) details + guard (tok_span `containsSpan` span ) + return tok_details + +ghcSrcSpanToSpan :: GHC.SrcSpan -> Maybe Span +ghcSrcSpanToSpan (GHC.RealSrcSpan span) = + Just (Span { spStart = Position (GHC.srcSpanStartLine span) (GHC.srcSpanStartCol span) + , spEnd = Position (GHC.srcSpanEndLine span) (GHC.srcSpanEndCol span) + }) +ghcSrcSpanToSpan _ = Nothing enrichToken :: Token -> DetailsMap -> Maybe TokenDetails enrichToken (Token typ _ spn) dm @@ -54,7 +79,7 @@ enrichToken (Token typ _ spn) dm enrichToken _ _ = Nothing -- | Obtain details map for variables ("normally" used identifiers). -variables :: GHC.RenamedSource -> DetailsMap +variables :: GHC.RenamedSource -> LTokenDetails variables = everythingInRenamedSource (var `Syb.combine` rec) where @@ -70,7 +95,7 @@ variables = _ -> empty -- | Obtain details map for types. -types :: GHC.RenamedSource -> DetailsMap +types :: GHC.RenamedSource -> LTokenDetails types = everythingInRenamedSource ty where ty term = case cast term of @@ -84,7 +109,7 @@ types = everythingInRenamedSource ty -- ordinary assignment (in top-level declarations, let-expressions and where -- clauses). -binds :: GHC.RenamedSource -> DetailsMap +binds :: GHC.RenamedSource -> LTokenDetails binds = everythingInRenamedSource (fun `Syb.combine` pat `Syb.combine` tvar) where @@ -112,7 +137,7 @@ binds = everythingInRenamedSource _ -> empty -- | Obtain details map for top-level declarations. -decls :: GHC.RenamedSource -> DetailsMap +decls :: GHC.RenamedSource -> LTokenDetails decls (group, _, _, _) = concatMap ($ group) [ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds , everythingInRenamedSource fun . GHC.hs_valds @@ -151,7 +176,7 @@ decls (group, _, _, _) = concatMap ($ group) -- -- This map also includes type and variable details for items in export and -- import lists. -imports :: GHC.RenamedSource -> DetailsMap +imports :: GHC.RenamedSource -> LTokenDetails imports src@(_, imps, _, _) = everythingInRenamedSource ie src ++ mapMaybe (imp . GHC.unLoc) imps where @@ -168,22 +193,3 @@ imports src@(_, imps, _, _) = let (GHC.L sspan name) = GHC.ideclName idecl in Just (sspan, RtkModule name) imp _ = Nothing - --- | Check whether token stream span matches GHC source span. --- --- Currently, it is implemented as checking whether "our" span is contained --- in GHC span. The reason for that is because GHC span are generally wider --- and may spread across couple tokens. For example, @(>>=)@ consists of three --- tokens: @(@, @>>=@, @)@, but GHC source span associated with @>>=@ variable --- contains @(@ and @)@. Similarly, qualified identifiers like @Foo.Bar.quux@ --- are tokenized as @Foo@, @.@, @Bar@, @.@, @quux@ but GHC source span --- associated with @quux@ contains all five elements. -matches :: Span -> GHC.SrcSpan -> Bool -matches tspan (GHC.RealSrcSpan aspan) - | saspan <= stspan && etspan <= easpan = True - where - stspan = (posRow . spStart $ tspan, posCol . spStart $ tspan) - etspan = (posRow . spEnd $ tspan, posCol . spEnd $ tspan) - saspan = (GHC.srcSpanStartLine aspan, GHC.srcSpanStartCol aspan) - easpan = (GHC.srcSpanEndLine aspan, GHC.srcSpanEndCol aspan) -matches _ _ = False diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs index b27ec4d8..d8ae89e4 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs @@ -10,7 +10,7 @@ import qualified Data.Map as Map data Token = Token { tkType :: TokenType , tkValue :: String - , tkSpan :: Span + , tkSpan :: {-# UNPACK #-} !Span } deriving (Show) @@ -18,14 +18,20 @@ data Position = Position { posRow :: !Int , posCol :: !Int } - deriving (Show) + deriving (Eq, Ord, Show) data Span = Span - { spStart :: Position - , spEnd :: Position + { spStart :: !Position + , spEnd :: !Position } deriving (Show) +-- | Tests whether the first span "contains" the other span, meaning +-- that it covers at least as much source code. True where spans are equal. +containsSpan :: Span -> Span -> Bool +containsSpan s1 s2 = + spStart s1 <= spStart s2 && spEnd s1 >= spEnd s2 + data TokenType = TkIdentifier | TkKeyword -- cgit v1.2.3