diff options
| author | Alexander Biehl <alexbiehl@gmail.com> | 2017-08-19 20:35:27 +0200 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2017-08-19 20:35:27 +0200 | 
| commit | 740458ac4d2acf197f2ef8dc94a66f9b160b9c3c (patch) | |
| tree | 292d147cedefc0a4153e02a9377d2a36cc845b07 /haddock-api/src/Haddock | |
| parent | f7032e5e48c7a6635e1dca607a37a16c8893e94b (diff) | |
Hyperlinker: Avoid linear lookup in enrichToken (#669)
* Make Span strict in Position
* Hyperlinker: Use a proper map to enrich tokens
Diffstat (limited to 'haddock-api/src/Haddock')
| -rw-r--r-- | haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs | 80 | ||||
| -rw-r--r-- | haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs | 14 | 
2 files changed, 53 insertions, 41 deletions
| diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs index 78beacf2..9d273417 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE BangPatterns #-}  {-# LANGUAGE RankNTypes #-}  {-# LANGUAGE ScopedTypeVariables #-}  {-# LANGUAGE RecordWildCards #-} @@ -13,9 +14,13 @@ import Haddock.Backends.Hyperlinker.Types  import qualified GHC  import Control.Applicative +import Control.Monad (guard)  import Data.Data +import qualified Data.Map.Strict as Map  import Data.Maybe +import Prelude hiding (span) +  everythingInRenamedSource :: (Alternative f, Data x)    => (forall a. Data a => a -> f r) -> x -> f r  everythingInRenamedSource f = Syb.everythingButType @GHC.Name (<|>) f @@ -28,25 +33,45 @@ enrich src =          , rtkDetails = enrichToken token detailsMap          }    where -    detailsMap = concatMap ($ src) -        [ variables -        , types -        , decls -        , binds -        , imports -        ] +    detailsMap = +      mkDetailsMap (concatMap ($ src) +                     [ variables +                     , types +                     , decls +                     , binds +                     , imports +                     ]) + +type LTokenDetails = [(GHC.SrcSpan, TokenDetails)]  -- | A map containing association between source locations and "details" of  -- this location.  -- --- For the time being, it is just a list of pairs. However, looking up things --- in such structure has linear complexity. We cannot use any hashmap-like --- stuff because source locations are not ordered. In the future, this should --- be replaced with interval tree data structure. -type DetailsMap = [(GHC.SrcSpan, TokenDetails)] +type DetailsMap = Map.Map Position (Span, TokenDetails) + +mkDetailsMap :: [(GHC.SrcSpan, TokenDetails)] -> DetailsMap +mkDetailsMap xs = +  Map.fromListWith select_details [ (start, (token_span, token_details)) +                                  | (ghc_span, token_details) <- xs +                                  , Just !token_span <- [ghcSrcSpanToSpan ghc_span] +                                  , let start = spStart token_span +                                  ] +  where +    -- favour token details which appear earlier in the list +    select_details _new old = old  lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails -lookupBySpan tspan = listToMaybe . map snd . filter (matches tspan . fst) +lookupBySpan span details = do +  (_, (tok_span, tok_details)) <- Map.lookupLE (spStart span) details +  guard (tok_span `containsSpan` span ) +  return tok_details + +ghcSrcSpanToSpan :: GHC.SrcSpan -> Maybe Span +ghcSrcSpanToSpan (GHC.RealSrcSpan span) = +  Just (Span { spStart = Position (GHC.srcSpanStartLine span) (GHC.srcSpanStartCol span) +             , spEnd   = Position (GHC.srcSpanEndLine span) (GHC.srcSpanEndCol span) +             }) +ghcSrcSpanToSpan _ = Nothing  enrichToken :: Token -> DetailsMap -> Maybe TokenDetails  enrichToken (Token typ _ spn) dm @@ -54,7 +79,7 @@ enrichToken (Token typ _ spn) dm  enrichToken _ _ = Nothing  -- | Obtain details map for variables ("normally" used identifiers). -variables :: GHC.RenamedSource -> DetailsMap +variables :: GHC.RenamedSource -> LTokenDetails  variables =      everythingInRenamedSource (var `Syb.combine` rec)    where @@ -70,7 +95,7 @@ variables =          _ -> empty  -- | Obtain details map for types. -types :: GHC.RenamedSource -> DetailsMap +types :: GHC.RenamedSource -> LTokenDetails  types = everythingInRenamedSource ty    where      ty term = case cast term of @@ -84,7 +109,7 @@ types = everythingInRenamedSource ty  -- ordinary assignment (in top-level declarations, let-expressions and where  -- clauses). -binds :: GHC.RenamedSource -> DetailsMap +binds :: GHC.RenamedSource -> LTokenDetails  binds = everythingInRenamedSource        (fun `Syb.combine` pat `Syb.combine` tvar)    where @@ -112,7 +137,7 @@ binds = everythingInRenamedSource          _ -> empty  -- | Obtain details map for top-level declarations. -decls :: GHC.RenamedSource -> DetailsMap +decls :: GHC.RenamedSource -> LTokenDetails  decls (group, _, _, _) = concatMap ($ group)      [ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds      , everythingInRenamedSource fun . GHC.hs_valds @@ -151,7 +176,7 @@ decls (group, _, _, _) = concatMap ($ group)  --  -- This map also includes type and variable details for items in export and  -- import lists. -imports :: GHC.RenamedSource -> DetailsMap +imports :: GHC.RenamedSource -> LTokenDetails  imports src@(_, imps, _, _) =      everythingInRenamedSource ie src ++ mapMaybe (imp . GHC.unLoc) imps    where @@ -168,22 +193,3 @@ imports src@(_, imps, _, _) =          let (GHC.L sspan name) = GHC.ideclName idecl          in Just (sspan, RtkModule name)      imp _ = Nothing - --- | Check whether token stream span matches GHC source span. --- --- Currently, it is implemented as checking whether "our" span is contained --- in GHC span. The reason for that is because GHC span are generally wider --- and may spread across couple tokens. For example, @(>>=)@ consists of three --- tokens: @(@, @>>=@, @)@, but GHC source span associated with @>>=@ variable --- contains @(@ and @)@. Similarly, qualified identifiers like @Foo.Bar.quux@ --- are tokenized as @Foo@, @.@, @Bar@, @.@, @quux@ but GHC source span --- associated with @quux@ contains all five elements. -matches :: Span -> GHC.SrcSpan -> Bool -matches tspan (GHC.RealSrcSpan aspan) -    | saspan <= stspan && etspan <= easpan = True -  where -    stspan = (posRow . spStart $ tspan, posCol . spStart $ tspan) -    etspan = (posRow . spEnd $ tspan, posCol . spEnd $ tspan) -    saspan = (GHC.srcSpanStartLine aspan, GHC.srcSpanStartCol aspan) -    easpan = (GHC.srcSpanEndLine aspan, GHC.srcSpanEndCol aspan) -matches _ _ = False diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs index b27ec4d8..d8ae89e4 100644 --- a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs +++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs @@ -10,7 +10,7 @@ import qualified Data.Map as Map  data Token = Token      { tkType :: TokenType      , tkValue :: String -    , tkSpan :: Span +    , tkSpan :: {-# UNPACK #-} !Span      }      deriving (Show) @@ -18,14 +18,20 @@ data Position = Position      { posRow :: !Int      , posCol :: !Int      } -    deriving (Show) +    deriving (Eq, Ord, Show)  data Span = Span -    { spStart :: Position -    , spEnd :: Position +    { spStart :: !Position +    , spEnd   :: !Position      }      deriving (Show) +-- | Tests whether the first span "contains" the other span, meaning +-- that it covers at least as much source code. True where spans are equal. +containsSpan :: Span -> Span -> Bool +containsSpan s1 s2 = +  spStart s1 <= spStart s2 && spEnd s1 >= spEnd s2 +  data TokenType      = TkIdentifier      | TkKeyword | 
