aboutsummaryrefslogtreecommitdiff
path: root/haddock-api/src/Haddock
diff options
context:
space:
mode:
authorAlexander Biehl <alexbiehl@gmail.com>2017-08-19 20:35:27 +0200
committerGitHub <noreply@github.com>2017-08-19 20:35:27 +0200
commit740458ac4d2acf197f2ef8dc94a66f9b160b9c3c (patch)
tree292d147cedefc0a4153e02a9377d2a36cc845b07 /haddock-api/src/Haddock
parentf7032e5e48c7a6635e1dca607a37a16c8893e94b (diff)
Hyperlinker: Avoid linear lookup in enrichToken (#669)
* Make Span strict in Position * Hyperlinker: Use a proper map to enrich tokens
Diffstat (limited to 'haddock-api/src/Haddock')
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs80
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs14
2 files changed, 53 insertions, 41 deletions
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
index 78beacf2..9d273417 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE RecordWildCards #-}
@@ -13,9 +14,13 @@ import Haddock.Backends.Hyperlinker.Types
import qualified GHC
import Control.Applicative
+import Control.Monad (guard)
import Data.Data
+import qualified Data.Map.Strict as Map
import Data.Maybe
+import Prelude hiding (span)
+
everythingInRenamedSource :: (Alternative f, Data x)
=> (forall a. Data a => a -> f r) -> x -> f r
everythingInRenamedSource f = Syb.everythingButType @GHC.Name (<|>) f
@@ -28,25 +33,45 @@ enrich src =
, rtkDetails = enrichToken token detailsMap
}
where
- detailsMap = concatMap ($ src)
- [ variables
- , types
- , decls
- , binds
- , imports
- ]
+ detailsMap =
+ mkDetailsMap (concatMap ($ src)
+ [ variables
+ , types
+ , decls
+ , binds
+ , imports
+ ])
+
+type LTokenDetails = [(GHC.SrcSpan, TokenDetails)]
-- | A map containing association between source locations and "details" of
-- this location.
--
--- For the time being, it is just a list of pairs. However, looking up things
--- in such structure has linear complexity. We cannot use any hashmap-like
--- stuff because source locations are not ordered. In the future, this should
--- be replaced with interval tree data structure.
-type DetailsMap = [(GHC.SrcSpan, TokenDetails)]
+type DetailsMap = Map.Map Position (Span, TokenDetails)
+
+mkDetailsMap :: [(GHC.SrcSpan, TokenDetails)] -> DetailsMap
+mkDetailsMap xs =
+ Map.fromListWith select_details [ (start, (token_span, token_details))
+ | (ghc_span, token_details) <- xs
+ , Just !token_span <- [ghcSrcSpanToSpan ghc_span]
+ , let start = spStart token_span
+ ]
+ where
+ -- favour token details which appear earlier in the list
+ select_details _new old = old
lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails
-lookupBySpan tspan = listToMaybe . map snd . filter (matches tspan . fst)
+lookupBySpan span details = do
+ (_, (tok_span, tok_details)) <- Map.lookupLE (spStart span) details
+ guard (tok_span `containsSpan` span )
+ return tok_details
+
+ghcSrcSpanToSpan :: GHC.SrcSpan -> Maybe Span
+ghcSrcSpanToSpan (GHC.RealSrcSpan span) =
+ Just (Span { spStart = Position (GHC.srcSpanStartLine span) (GHC.srcSpanStartCol span)
+ , spEnd = Position (GHC.srcSpanEndLine span) (GHC.srcSpanEndCol span)
+ })
+ghcSrcSpanToSpan _ = Nothing
enrichToken :: Token -> DetailsMap -> Maybe TokenDetails
enrichToken (Token typ _ spn) dm
@@ -54,7 +79,7 @@ enrichToken (Token typ _ spn) dm
enrichToken _ _ = Nothing
-- | Obtain details map for variables ("normally" used identifiers).
-variables :: GHC.RenamedSource -> DetailsMap
+variables :: GHC.RenamedSource -> LTokenDetails
variables =
everythingInRenamedSource (var `Syb.combine` rec)
where
@@ -70,7 +95,7 @@ variables =
_ -> empty
-- | Obtain details map for types.
-types :: GHC.RenamedSource -> DetailsMap
+types :: GHC.RenamedSource -> LTokenDetails
types = everythingInRenamedSource ty
where
ty term = case cast term of
@@ -84,7 +109,7 @@ types = everythingInRenamedSource ty
-- ordinary assignment (in top-level declarations, let-expressions and where
-- clauses).
-binds :: GHC.RenamedSource -> DetailsMap
+binds :: GHC.RenamedSource -> LTokenDetails
binds = everythingInRenamedSource
(fun `Syb.combine` pat `Syb.combine` tvar)
where
@@ -112,7 +137,7 @@ binds = everythingInRenamedSource
_ -> empty
-- | Obtain details map for top-level declarations.
-decls :: GHC.RenamedSource -> DetailsMap
+decls :: GHC.RenamedSource -> LTokenDetails
decls (group, _, _, _) = concatMap ($ group)
[ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds
, everythingInRenamedSource fun . GHC.hs_valds
@@ -151,7 +176,7 @@ decls (group, _, _, _) = concatMap ($ group)
--
-- This map also includes type and variable details for items in export and
-- import lists.
-imports :: GHC.RenamedSource -> DetailsMap
+imports :: GHC.RenamedSource -> LTokenDetails
imports src@(_, imps, _, _) =
everythingInRenamedSource ie src ++ mapMaybe (imp . GHC.unLoc) imps
where
@@ -168,22 +193,3 @@ imports src@(_, imps, _, _) =
let (GHC.L sspan name) = GHC.ideclName idecl
in Just (sspan, RtkModule name)
imp _ = Nothing
-
--- | Check whether token stream span matches GHC source span.
---
--- Currently, it is implemented as checking whether "our" span is contained
--- in GHC span. The reason for that is because GHC span are generally wider
--- and may spread across couple tokens. For example, @(>>=)@ consists of three
--- tokens: @(@, @>>=@, @)@, but GHC source span associated with @>>=@ variable
--- contains @(@ and @)@. Similarly, qualified identifiers like @Foo.Bar.quux@
--- are tokenized as @Foo@, @.@, @Bar@, @.@, @quux@ but GHC source span
--- associated with @quux@ contains all five elements.
-matches :: Span -> GHC.SrcSpan -> Bool
-matches tspan (GHC.RealSrcSpan aspan)
- | saspan <= stspan && etspan <= easpan = True
- where
- stspan = (posRow . spStart $ tspan, posCol . spStart $ tspan)
- etspan = (posRow . spEnd $ tspan, posCol . spEnd $ tspan)
- saspan = (GHC.srcSpanStartLine aspan, GHC.srcSpanStartCol aspan)
- easpan = (GHC.srcSpanEndLine aspan, GHC.srcSpanEndCol aspan)
-matches _ _ = False
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
index b27ec4d8..d8ae89e4 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
@@ -10,7 +10,7 @@ import qualified Data.Map as Map
data Token = Token
{ tkType :: TokenType
, tkValue :: String
- , tkSpan :: Span
+ , tkSpan :: {-# UNPACK #-} !Span
}
deriving (Show)
@@ -18,14 +18,20 @@ data Position = Position
{ posRow :: !Int
, posCol :: !Int
}
- deriving (Show)
+ deriving (Eq, Ord, Show)
data Span = Span
- { spStart :: Position
- , spEnd :: Position
+ { spStart :: !Position
+ , spEnd :: !Position
}
deriving (Show)
+-- | Tests whether the first span "contains" the other span, meaning
+-- that it covers at least as much source code. True where spans are equal.
+containsSpan :: Span -> Span -> Bool
+containsSpan s1 s2 =
+ spStart s1 <= spStart s2 && spEnd s1 >= spEnd s2
+
data TokenType
= TkIdentifier
| TkKeyword