1 | // Copyright 2013 The Go Authors. All rights reserved. |
---|---|
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | // This package provides Rapid Type Analysis (RTA) for Go, a fast |
6 | // algorithm for call graph construction and discovery of reachable code |
7 | // (and hence dead code) and runtime types. The algorithm was first |
8 | // described in: |
9 | // |
10 | // David F. Bacon and Peter F. Sweeney. 1996. |
11 | // Fast static analysis of C++ virtual function calls. (OOPSLA '96) |
12 | // http://doi.acm.org/10.1145/236337.236371 |
13 | // |
14 | // The algorithm uses dynamic programming to tabulate the cross-product |
15 | // of the set of known "address taken" functions with the set of known |
16 | // dynamic calls of the same type. As each new address-taken function |
17 | // is discovered, call graph edges are added from each known callsite, |
18 | // and as each new call site is discovered, call graph edges are added |
19 | // from it to each known address-taken function. |
20 | // |
21 | // A similar approach is used for dynamic calls via interfaces: it |
22 | // tabulates the cross-product of the set of known "runtime types", |
23 | // i.e. types that may appear in an interface value, or be derived from |
24 | // one via reflection, with the set of known "invoke"-mode dynamic |
25 | // calls. As each new "runtime type" is discovered, call edges are |
26 | // added from the known call sites, and as each new call site is |
27 | // discovered, call graph edges are added to each compatible |
28 | // method. |
29 | // |
30 | // In addition, we must consider all exported methods of any runtime type |
31 | // as reachable, since they may be called via reflection. |
32 | // |
33 | // Each time a newly added call edge causes a new function to become |
34 | // reachable, the code of that function is analyzed for more call sites, |
35 | // address-taken functions, and runtime types. The process continues |
36 | // until a fixed point is achieved. |
37 | // |
38 | // The resulting call graph is less precise than one produced by pointer |
39 | // analysis, but the algorithm is much faster. For example, running the |
40 | // cmd/callgraph tool on its own source takes ~2.1s for RTA and ~5.4s |
41 | // for points-to analysis. |
42 | package rta // import "golang.org/x/tools/go/callgraph/rta" |
43 | |
44 | // TODO(adonovan): test it by connecting it to the interpreter and |
45 | // replacing all "unreachable" functions by a special intrinsic, and |
46 | // ensure that that intrinsic is never called. |
47 | |
48 | // TODO(zpavlinovic): decide if the clients must use ssa.InstantiateGenerics |
49 | // mode when building programs with generics. It might be possible to |
50 | // extend rta to accurately support generics with just ssa.BuilderMode(0). |
51 | |
52 | import ( |
53 | "fmt" |
54 | "go/types" |
55 | |
56 | "golang.org/x/tools/go/callgraph" |
57 | "golang.org/x/tools/go/ssa" |
58 | "golang.org/x/tools/go/types/typeutil" |
59 | ) |
60 | |
61 | // A Result holds the results of Rapid Type Analysis, which includes the |
62 | // set of reachable functions/methods, runtime types, and the call graph. |
63 | type Result struct { |
64 | // CallGraph is the discovered callgraph. |
65 | // It does not include edges for calls made via reflection. |
66 | CallGraph *callgraph.Graph |
67 | |
68 | // Reachable contains the set of reachable functions and methods. |
69 | // This includes exported methods of runtime types, since |
70 | // they may be accessed via reflection. |
71 | // The value indicates whether the function is address-taken. |
72 | // |
73 | // (We wrap the bool in a struct to avoid inadvertent use of |
74 | // "if Reachable[f] {" to test for set membership.) |
75 | Reachable map[*ssa.Function]struct{ AddrTaken bool } |
76 | |
77 | // RuntimeTypes contains the set of types that are needed at |
78 | // runtime, for interfaces or reflection. |
79 | // |
80 | // The value indicates whether the type is inaccessible to reflection. |
81 | // Consider: |
82 | // type A struct{B} |
83 | // fmt.Println(new(A)) |
84 | // Types *A, A and B are accessible to reflection, but the unnamed |
85 | // type struct{B} is not. |
86 | RuntimeTypes typeutil.Map |
87 | } |
88 | |
89 | // Working state of the RTA algorithm. |
90 | type rta struct { |
91 | result *Result |
92 | |
93 | prog *ssa.Program |
94 | |
95 | worklist []*ssa.Function // list of functions to visit |
96 | |
97 | // addrTakenFuncsBySig contains all address-taken *Functions, grouped by signature. |
98 | // Keys are *types.Signature, values are map[*ssa.Function]bool sets. |
99 | addrTakenFuncsBySig typeutil.Map |
100 | |
101 | // dynCallSites contains all dynamic "call"-mode call sites, grouped by signature. |
102 | // Keys are *types.Signature, values are unordered []ssa.CallInstruction. |
103 | dynCallSites typeutil.Map |
104 | |
105 | // invokeSites contains all "invoke"-mode call sites, grouped by interface. |
106 | // Keys are *types.Interface (never *types.Named), |
107 | // Values are unordered []ssa.CallInstruction sets. |
108 | invokeSites typeutil.Map |
109 | |
110 | // The following two maps together define the subset of the |
111 | // m:n "implements" relation needed by the algorithm. |
112 | |
113 | // concreteTypes maps each concrete type to the set of interfaces that it implements. |
114 | // Keys are types.Type, values are unordered []*types.Interface. |
115 | // Only concrete types used as MakeInterface operands are included. |
116 | concreteTypes typeutil.Map |
117 | |
118 | // interfaceTypes maps each interface type to |
119 | // the set of concrete types that implement it. |
120 | // Keys are *types.Interface, values are unordered []types.Type. |
121 | // Only interfaces used in "invoke"-mode CallInstructions are included. |
122 | interfaceTypes typeutil.Map |
123 | } |
124 | |
125 | // addReachable marks a function as potentially callable at run-time, |
126 | // and ensures that it gets processed. |
127 | func (r *rta) addReachable(f *ssa.Function, addrTaken bool) { |
128 | reachable := r.result.Reachable |
129 | n := len(reachable) |
130 | v := reachable[f] |
131 | if addrTaken { |
132 | v.AddrTaken = true |
133 | } |
134 | reachable[f] = v |
135 | if len(reachable) > n { |
136 | // First time seeing f. Add it to the worklist. |
137 | r.worklist = append(r.worklist, f) |
138 | } |
139 | } |
140 | |
141 | // addEdge adds the specified call graph edge, and marks it reachable. |
142 | // addrTaken indicates whether to mark the callee as "address-taken". |
143 | func (r *rta) addEdge(site ssa.CallInstruction, callee *ssa.Function, addrTaken bool) { |
144 | r.addReachable(callee, addrTaken) |
145 | |
146 | if g := r.result.CallGraph; g != nil { |
147 | if site.Parent() == nil { |
148 | panic(site) |
149 | } |
150 | from := g.CreateNode(site.Parent()) |
151 | to := g.CreateNode(callee) |
152 | callgraph.AddEdge(from, site, to) |
153 | } |
154 | } |
155 | |
156 | // ---------- addrTakenFuncs × dynCallSites ---------- |
157 | |
158 | // visitAddrTakenFunc is called each time we encounter an address-taken function f. |
159 | func (r *rta) visitAddrTakenFunc(f *ssa.Function) { |
160 | // Create two-level map (Signature -> Function -> bool). |
161 | S := f.Signature |
162 | funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool) |
163 | if funcs == nil { |
164 | funcs = make(map[*ssa.Function]bool) |
165 | r.addrTakenFuncsBySig.Set(S, funcs) |
166 | } |
167 | if !funcs[f] { |
168 | // First time seeing f. |
169 | funcs[f] = true |
170 | |
171 | // If we've seen any dyncalls of this type, mark it reachable, |
172 | // and add call graph edges. |
173 | sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction) |
174 | for _, site := range sites { |
175 | r.addEdge(site, f, true) |
176 | } |
177 | } |
178 | } |
179 | |
180 | // visitDynCall is called each time we encounter a dynamic "call"-mode call. |
181 | func (r *rta) visitDynCall(site ssa.CallInstruction) { |
182 | S := site.Common().Signature() |
183 | |
184 | // Record the call site. |
185 | sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction) |
186 | r.dynCallSites.Set(S, append(sites, site)) |
187 | |
188 | // For each function of signature S that we know is address-taken, |
189 | // add an edge and mark it reachable. |
190 | funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool) |
191 | for g := range funcs { |
192 | r.addEdge(site, g, true) |
193 | } |
194 | } |
195 | |
196 | // ---------- concrete types × invoke sites ---------- |
197 | |
198 | // addInvokeEdge is called for each new pair (site, C) in the matrix. |
199 | func (r *rta) addInvokeEdge(site ssa.CallInstruction, C types.Type) { |
200 | // Ascertain the concrete method of C to be called. |
201 | imethod := site.Common().Method |
202 | cmethod := r.prog.MethodValue(r.prog.MethodSets.MethodSet(C).Lookup(imethod.Pkg(), imethod.Name())) |
203 | r.addEdge(site, cmethod, true) |
204 | } |
205 | |
206 | // visitInvoke is called each time the algorithm encounters an "invoke"-mode call. |
207 | func (r *rta) visitInvoke(site ssa.CallInstruction) { |
208 | I := site.Common().Value.Type().Underlying().(*types.Interface) |
209 | |
210 | // Record the invoke site. |
211 | sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction) |
212 | r.invokeSites.Set(I, append(sites, site)) |
213 | |
214 | // Add callgraph edge for each existing |
215 | // address-taken concrete type implementing I. |
216 | for _, C := range r.implementations(I) { |
217 | r.addInvokeEdge(site, C) |
218 | } |
219 | } |
220 | |
221 | // ---------- main algorithm ---------- |
222 | |
223 | // visitFunc processes function f. |
224 | func (r *rta) visitFunc(f *ssa.Function) { |
225 | var space [32]*ssa.Value // preallocate space for common case |
226 | |
227 | for _, b := range f.Blocks { |
228 | for _, instr := range b.Instrs { |
229 | rands := instr.Operands(space[:0]) |
230 | |
231 | switch instr := instr.(type) { |
232 | case ssa.CallInstruction: |
233 | call := instr.Common() |
234 | if call.IsInvoke() { |
235 | r.visitInvoke(instr) |
236 | } else if g := call.StaticCallee(); g != nil { |
237 | r.addEdge(instr, g, false) |
238 | } else if _, ok := call.Value.(*ssa.Builtin); !ok { |
239 | r.visitDynCall(instr) |
240 | } |
241 | |
242 | // Ignore the call-position operand when |
243 | // looking for address-taken Functions. |
244 | // Hack: assume this is rands[0]. |
245 | rands = rands[1:] |
246 | |
247 | case *ssa.MakeInterface: |
248 | r.addRuntimeType(instr.X.Type(), false) |
249 | } |
250 | |
251 | // Process all address-taken functions. |
252 | for _, op := range rands { |
253 | if g, ok := (*op).(*ssa.Function); ok { |
254 | r.visitAddrTakenFunc(g) |
255 | } |
256 | } |
257 | } |
258 | } |
259 | } |
260 | |
261 | // Analyze performs Rapid Type Analysis, starting at the specified root |
262 | // functions. It returns nil if no roots were specified. |
263 | // |
264 | // If buildCallGraph is true, Result.CallGraph will contain a call |
265 | // graph; otherwise, only the other fields (reachable functions) are |
266 | // populated. |
267 | func Analyze(roots []*ssa.Function, buildCallGraph bool) *Result { |
268 | if len(roots) == 0 { |
269 | return nil |
270 | } |
271 | |
272 | r := &rta{ |
273 | result: &Result{Reachable: make(map[*ssa.Function]struct{ AddrTaken bool })}, |
274 | prog: roots[0].Prog, |
275 | } |
276 | |
277 | if buildCallGraph { |
278 | // TODO(adonovan): change callgraph API to eliminate the |
279 | // notion of a distinguished root node. Some callgraphs |
280 | // have many roots, or none. |
281 | r.result.CallGraph = callgraph.New(roots[0]) |
282 | } |
283 | |
284 | hasher := typeutil.MakeHasher() |
285 | r.result.RuntimeTypes.SetHasher(hasher) |
286 | r.addrTakenFuncsBySig.SetHasher(hasher) |
287 | r.dynCallSites.SetHasher(hasher) |
288 | r.invokeSites.SetHasher(hasher) |
289 | r.concreteTypes.SetHasher(hasher) |
290 | r.interfaceTypes.SetHasher(hasher) |
291 | |
292 | // Visit functions, processing their instructions, and adding |
293 | // new functions to the worklist, until a fixed point is |
294 | // reached. |
295 | var shadow []*ssa.Function // for efficiency, we double-buffer the worklist |
296 | r.worklist = append(r.worklist, roots...) |
297 | for len(r.worklist) > 0 { |
298 | shadow, r.worklist = r.worklist, shadow[:0] |
299 | for _, f := range shadow { |
300 | r.visitFunc(f) |
301 | } |
302 | } |
303 | return r.result |
304 | } |
305 | |
306 | // interfaces(C) returns all currently known interfaces implemented by C. |
307 | func (r *rta) interfaces(C types.Type) []*types.Interface { |
308 | // Ascertain set of interfaces C implements |
309 | // and update 'implements' relation. |
310 | var ifaces []*types.Interface |
311 | r.interfaceTypes.Iterate(func(I types.Type, concs interface{}) { |
312 | if I := I.(*types.Interface); types.Implements(C, I) { |
313 | concs, _ := concs.([]types.Type) |
314 | r.interfaceTypes.Set(I, append(concs, C)) |
315 | ifaces = append(ifaces, I) |
316 | } |
317 | }) |
318 | r.concreteTypes.Set(C, ifaces) |
319 | return ifaces |
320 | } |
321 | |
322 | // implementations(I) returns all currently known concrete types that implement I. |
323 | func (r *rta) implementations(I *types.Interface) []types.Type { |
324 | var concs []types.Type |
325 | if v := r.interfaceTypes.At(I); v != nil { |
326 | concs = v.([]types.Type) |
327 | } else { |
328 | // First time seeing this interface. |
329 | // Update the 'implements' relation. |
330 | r.concreteTypes.Iterate(func(C types.Type, ifaces interface{}) { |
331 | if types.Implements(C, I) { |
332 | ifaces, _ := ifaces.([]*types.Interface) |
333 | r.concreteTypes.Set(C, append(ifaces, I)) |
334 | concs = append(concs, C) |
335 | } |
336 | }) |
337 | r.interfaceTypes.Set(I, concs) |
338 | } |
339 | return concs |
340 | } |
341 | |
342 | // addRuntimeType is called for each concrete type that can be the |
343 | // dynamic type of some interface or reflect.Value. |
344 | // Adapted from needMethods in go/ssa/builder.go |
345 | func (r *rta) addRuntimeType(T types.Type, skip bool) { |
346 | if prev, ok := r.result.RuntimeTypes.At(T).(bool); ok { |
347 | if skip && !prev { |
348 | r.result.RuntimeTypes.Set(T, skip) |
349 | } |
350 | return |
351 | } |
352 | r.result.RuntimeTypes.Set(T, skip) |
353 | |
354 | mset := r.prog.MethodSets.MethodSet(T) |
355 | |
356 | if _, ok := T.Underlying().(*types.Interface); !ok { |
357 | // T is a new concrete type. |
358 | for i, n := 0, mset.Len(); i < n; i++ { |
359 | sel := mset.At(i) |
360 | m := sel.Obj() |
361 | |
362 | if m.Exported() { |
363 | // Exported methods are always potentially callable via reflection. |
364 | r.addReachable(r.prog.MethodValue(sel), true) |
365 | } |
366 | } |
367 | |
368 | // Add callgraph edge for each existing dynamic |
369 | // "invoke"-mode call via that interface. |
370 | for _, I := range r.interfaces(T) { |
371 | sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction) |
372 | for _, site := range sites { |
373 | r.addInvokeEdge(site, T) |
374 | } |
375 | } |
376 | } |
377 | |
378 | // Precondition: T is not a method signature (*Signature with Recv()!=nil). |
379 | // Recursive case: skip => don't call makeMethods(T). |
380 | // Each package maintains its own set of types it has visited. |
381 | |
382 | var n *types.Named |
383 | switch T := T.(type) { |
384 | case *types.Named: |
385 | n = T |
386 | case *types.Pointer: |
387 | n, _ = T.Elem().(*types.Named) |
388 | } |
389 | if n != nil { |
390 | owner := n.Obj().Pkg() |
391 | if owner == nil { |
392 | return // built-in error type |
393 | } |
394 | } |
395 | |
396 | // Recursion over signatures of each exported method. |
397 | for i := 0; i < mset.Len(); i++ { |
398 | if mset.At(i).Obj().Exported() { |
399 | sig := mset.At(i).Type().(*types.Signature) |
400 | r.addRuntimeType(sig.Params(), true) // skip the Tuple itself |
401 | r.addRuntimeType(sig.Results(), true) // skip the Tuple itself |
402 | } |
403 | } |
404 | |
405 | switch t := T.(type) { |
406 | case *types.Basic: |
407 | // nop |
408 | |
409 | case *types.Interface: |
410 | // nop---handled by recursion over method set. |
411 | |
412 | case *types.Pointer: |
413 | r.addRuntimeType(t.Elem(), false) |
414 | |
415 | case *types.Slice: |
416 | r.addRuntimeType(t.Elem(), false) |
417 | |
418 | case *types.Chan: |
419 | r.addRuntimeType(t.Elem(), false) |
420 | |
421 | case *types.Map: |
422 | r.addRuntimeType(t.Key(), false) |
423 | r.addRuntimeType(t.Elem(), false) |
424 | |
425 | case *types.Signature: |
426 | if t.Recv() != nil { |
427 | panic(fmt.Sprintf("Signature %s has Recv %s", t, t.Recv())) |
428 | } |
429 | r.addRuntimeType(t.Params(), true) // skip the Tuple itself |
430 | r.addRuntimeType(t.Results(), true) // skip the Tuple itself |
431 | |
432 | case *types.Named: |
433 | // A pointer-to-named type can be derived from a named |
434 | // type via reflection. It may have methods too. |
435 | r.addRuntimeType(types.NewPointer(T), false) |
436 | |
437 | // Consider 'type T struct{S}' where S has methods. |
438 | // Reflection provides no way to get from T to struct{S}, |
439 | // only to S, so the method set of struct{S} is unwanted, |
440 | // so set 'skip' flag during recursion. |
441 | r.addRuntimeType(t.Underlying(), true) |
442 | |
443 | case *types.Array: |
444 | r.addRuntimeType(t.Elem(), false) |
445 | |
446 | case *types.Struct: |
447 | for i, n := 0, t.NumFields(); i < n; i++ { |
448 | r.addRuntimeType(t.Field(i).Type(), false) |
449 | } |
450 | |
451 | case *types.Tuple: |
452 | for i, n := 0, t.Len(); i < n; i++ { |
453 | r.addRuntimeType(t.At(i).Type(), false) |
454 | } |
455 | |
456 | default: |
457 | panic(T) |
458 | } |
459 | } |
460 |
Members