1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
|
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Haddock.Backends.Hyperlinker.Ast
( enrich
, RichToken(..), TokenDetails(..), rtkName
) where
import Haddock.Backends.Hyperlinker.Parser
import qualified GHC
import Control.Applicative
import Data.Data
import Data.Maybe
data RichToken = RichToken
{ rtkToken :: Token
, rtkDetails :: Maybe TokenDetails
}
data TokenDetails
= RtkVar GHC.Name
| RtkType GHC.Name
| RtkBind GHC.Name
| RtkDecl GHC.Name
| RtkModule GHC.ModuleName
rtkName :: TokenDetails -> Either GHC.Name GHC.ModuleName
rtkName (RtkVar name) = Left name
rtkName (RtkType name) = Left name
rtkName (RtkBind name) = Left name
rtkName (RtkDecl name) = Left name
rtkName (RtkModule name) = Right name
-- | Add more detailed information to token stream using GHC API.
enrich :: GHC.RenamedSource -> [Token] -> [RichToken]
enrich src =
map $ \token -> RichToken
{ rtkToken = token
, rtkDetails = enrichToken token detailsMap
}
where
detailsMap = concatMap ($ src)
[ variables
, types
, decls
, binds
, imports
]
-- | A map containing association between source locations and "details" of
-- this location.
--
-- For the time being, it is just a list of pairs. However, looking up things
-- in such structure has linear complexity. We cannot use any hashmap-like
-- stuff because source locations are not ordered. In the future, this should
-- be replaced with interval tree data structure.
type DetailsMap = [(GHC.SrcSpan, TokenDetails)]
lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails
lookupBySpan tspan = listToMaybe . map snd . filter (matches tspan . fst)
enrichToken :: Token -> DetailsMap -> Maybe TokenDetails
enrichToken (Token typ _ spn) dm
| typ `elem` [TkIdentifier, TkOperator] = lookupBySpan spn dm
enrichToken _ _ = Nothing
-- | Obtain details map for variables ("normally" used identifiers).
variables :: GHC.RenamedSource -> DetailsMap
variables =
everything (<|>) (var `combine` rec)
where
var term = case cast term of
(Just (GHC.L sspan (GHC.HsVar name))) ->
pure (sspan, RtkVar name)
(Just (GHC.L _ (GHC.RecordCon (GHC.L sspan name) _ _))) ->
pure (sspan, RtkVar name)
_ -> empty
rec term = case cast term of
Just (GHC.HsRecField (GHC.L sspan name) (_ :: GHC.LHsExpr GHC.Name) _) ->
pure (sspan, RtkVar name)
_ -> empty
-- | Obtain details map for types.
types :: GHC.RenamedSource -> DetailsMap
types =
everything (<|>) ty
where
ty term = case cast term of
(Just (GHC.L sspan (GHC.HsTyVar name))) ->
pure (sspan, RtkType name)
_ -> empty
-- | Obtain details map for identifier bindings.
--
-- That includes both identifiers bound by pattern matching or declared using
-- ordinary assignment (in top-level declarations, let-expressions and where
-- clauses).
binds :: GHC.RenamedSource -> DetailsMap
binds =
everything (<|>) (fun `combine` pat `combine` tvar)
where
fun term = case cast term of
(Just (GHC.FunBind (GHC.L sspan name) _ _ _ _ _ :: GHC.HsBind GHC.Name)) ->
pure (sspan, RtkBind name)
_ -> empty
pat term = case cast term of
(Just (GHC.L sspan (GHC.VarPat name))) ->
pure (sspan, RtkBind name)
(Just (GHC.L _ (GHC.ConPatIn (GHC.L sspan name) recs))) ->
[(sspan, RtkVar name)] ++ everything (<|>) rec recs
(Just (GHC.L _ (GHC.AsPat (GHC.L sspan name) _))) ->
pure (sspan, RtkBind name)
_ -> empty
rec term = case cast term of
(Just (GHC.HsRecField (GHC.L sspan name) (_ :: GHC.LPat GHC.Name) _)) ->
pure (sspan, RtkVar name)
_ -> empty
tvar term = case cast term of
(Just (GHC.L sspan (GHC.UserTyVar name))) ->
pure (sspan, RtkBind name)
(Just (GHC.L _ (GHC.KindedTyVar (GHC.L sspan name) _))) ->
pure (sspan, RtkBind name)
_ -> empty
-- | Obtain details map for top-level declarations.
decls :: GHC.RenamedSource -> DetailsMap
decls (group, _, _, _) = concatMap ($ group)
[ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds
, everything (<|>) fun
]
where
typ (GHC.L _ t) = case t of
GHC.DataDecl name _ defn _ ->
[decl name] ++ concatMap con (GHC.dd_cons defn)
_ -> pure . decl $ GHC.tcdLName t
fun term = case cast term of
(Just (GHC.FunBind (GHC.L sspan name) _ _ _ _ _ :: GHC.HsBind GHC.Name))
| GHC.isExternalName name -> pure (sspan, RtkDecl name)
_ -> empty
con (GHC.L _ t) =
map decl (GHC.con_names t) ++ everything (<|>) fld t
fld term = case cast term of
Just field -> map decl $ GHC.cd_fld_names field
Nothing -> empty
decl (GHC.L sspan name) = (sspan, RtkDecl name)
-- | Obtain details map for import declarations.
--
-- This map also includes type and variable details for items in export and
-- import lists.
imports :: GHC.RenamedSource -> DetailsMap
imports src@(_, imps, _, _) =
everything (<|>) ie src ++ map (imp . GHC.unLoc) imps
where
ie term = case cast term of
(Just (GHC.IEVar v)) -> pure $ var v
(Just (GHC.IEThingAbs t)) -> pure $ typ t
(Just (GHC.IEThingAll t)) -> pure $ typ t
(Just (GHC.IEThingWith t vs)) -> [typ t] ++ map var vs
_ -> empty
typ (GHC.L sspan name) = (sspan, RtkType name)
var (GHC.L sspan name) = (sspan, RtkVar name)
imp idecl =
let (GHC.L sspan name) = GHC.ideclName idecl
in (sspan, RtkModule name)
-- | Check whether token stream span matches GHC source span.
--
-- Currently, it is implemented as checking whether "our" span is contained
-- in GHC span. The reason for that is because GHC span are generally wider
-- and may spread across couple tokens. For example, @(>>=)@ consists of three
-- tokens: @(@, @>>=@, @)@, but GHC source span associated with @>>=@ variable
-- contains @(@ and @)@. Similarly, qualified identifiers like @Foo.Bar.quux@
-- are tokenized as @Foo@, @.@, @Bar@, @.@, @quux@ but GHC source span
-- associated with @quux@ contains all five elements.
matches :: Span -> GHC.SrcSpan -> Bool
matches tspan (GHC.RealSrcSpan aspan)
| saspan <= stspan && etspan <= easpan = True
where
stspan = (posRow . spStart $ tspan, posCol . spStart $ tspan)
etspan = (posRow . spEnd $ tspan, posCol . spEnd $ tspan)
saspan = (GHC.srcSpanStartLine aspan, GHC.srcSpanStartCol aspan)
easpan = (GHC.srcSpanEndLine aspan, GHC.srcSpanEndCol aspan)
matches _ _ = False
-- | Perform a query on each level of a tree.
--
-- This is stolen directly from SYB package and copied here to not introduce
-- additional dependencies.
everything :: (r -> r -> r) -> (forall a. Data a => a -> r)
-> (forall a. Data a => a -> r)
everything k f x = foldl k (f x) (gmapQ (everything k f) x)
-- | Combine two queries into one using alternative combinator.
combine :: Alternative f => (forall a. Data a => a -> f r)
-> (forall a. Data a => a -> f r)
-> (forall a. Data a => a -> f r)
combine f g x = f x <|> g x
|