gopls/go/callgraph/rta/rta.go

1	// Copyright 2013 The Go Authors. All rights reserved.
2	// Use of this source code is governed by a BSD-style
3	// license that can be found in the LICENSE file.
4
5	// This package provides Rapid Type Analysis (RTA) for Go, a fast
6	// algorithm for call graph construction and discovery of reachable code
7	// (and hence dead code) and runtime types. The algorithm was first
8	// described in:
9	//
10	// David F. Bacon and Peter F. Sweeney. 1996.
11	// Fast static analysis of C++ virtual function calls. (OOPSLA '96)
12	// http://doi.acm.org/10.1145/236337.236371
13	//
14	// The algorithm uses dynamic programming to tabulate the cross-product
15	// of the set of known "address taken" functions with the set of known
16	// dynamic calls of the same type. As each new address-taken function
17	// is discovered, call graph edges are added from each known callsite,
18	// and as each new call site is discovered, call graph edges are added
19	// from it to each known address-taken function.
20	//
21	// A similar approach is used for dynamic calls via interfaces: it
22	// tabulates the cross-product of the set of known "runtime types",
23	// i.e. types that may appear in an interface value, or be derived from
24	// one via reflection, with the set of known "invoke"-mode dynamic
25	// calls. As each new "runtime type" is discovered, call edges are
26	// added from the known call sites, and as each new call site is
27	// discovered, call graph edges are added to each compatible
28	// method.
29	//
30	// In addition, we must consider all exported methods of any runtime type
31	// as reachable, since they may be called via reflection.
32	//
33	// Each time a newly added call edge causes a new function to become
34	// reachable, the code of that function is analyzed for more call sites,
35	// address-taken functions, and runtime types. The process continues
36	// until a fixed point is achieved.
37	//
38	// The resulting call graph is less precise than one produced by pointer
39	// analysis, but the algorithm is much faster. For example, running the
40	// cmd/callgraph tool on its own source takes ~2.1s for RTA and ~5.4s
41	// for points-to analysis.
42	package rta // import "golang.org/x/tools/go/callgraph/rta"
43
44	// TODO(adonovan): test it by connecting it to the interpreter and
45	// replacing all "unreachable" functions by a special intrinsic, and
46	// ensure that that intrinsic is never called.
47
48	// TODO(zpavlinovic): decide if the clients must use ssa.InstantiateGenerics
49	// mode when building programs with generics. It might be possible to
50	// extend rta to accurately support generics with just ssa.BuilderMode(0).
51
52	import (
53	"fmt"
54	"go/types"
55
56	"golang.org/x/tools/go/callgraph"
57	"golang.org/x/tools/go/ssa"
58	"golang.org/x/tools/go/types/typeutil"
59	)
60
61	// A Result holds the results of Rapid Type Analysis, which includes the
62	// set of reachable functions/methods, runtime types, and the call graph.
63	type Result struct {
64	// CallGraph is the discovered callgraph.
65	// It does not include edges for calls made via reflection.
66	CallGraph *callgraph.Graph
67
68	// Reachable contains the set of reachable functions and methods.
69	// This includes exported methods of runtime types, since
70	// they may be accessed via reflection.
71	// The value indicates whether the function is address-taken.
72	//
73	// (We wrap the bool in a struct to avoid inadvertent use of
74	// "if Reachable[f] {" to test for set membership.)
75	Reachable map[*ssa.Function]struct{ AddrTaken bool }
76
77	// RuntimeTypes contains the set of types that are needed at
78	// runtime, for interfaces or reflection.
79	//
80	// The value indicates whether the type is inaccessible to reflection.
81	// Consider:
82	// type A struct{B}
83	// fmt.Println(new(A))
84	// Types *A, A and B are accessible to reflection, but the unnamed
85	// type struct{B} is not.
86	RuntimeTypes typeutil.Map
87	}
88
89	// Working state of the RTA algorithm.
90	type rta struct {
91	result *Result
92
93	prog *ssa.Program
94
95	worklist []*ssa.Function // list of functions to visit
96
97	// addrTakenFuncsBySig contains all address-taken *Functions, grouped by signature.
98	// Keys are types.Signature, values are map[ssa.Function]bool sets.
99	addrTakenFuncsBySig typeutil.Map
100
101	// dynCallSites contains all dynamic "call"-mode call sites, grouped by signature.
102	// Keys are *types.Signature, values are unordered []ssa.CallInstruction.
103	dynCallSites typeutil.Map
104
105	// invokeSites contains all "invoke"-mode call sites, grouped by interface.
106	// Keys are types.Interface (never types.Named),
107	// Values are unordered []ssa.CallInstruction sets.
108	invokeSites typeutil.Map
109
110	// The following two maps together define the subset of the
111	// m:n "implements" relation needed by the algorithm.
112
113	// concreteTypes maps each concrete type to the set of interfaces that it implements.
114	// Keys are types.Type, values are unordered []*types.Interface.
115	// Only concrete types used as MakeInterface operands are included.
116	concreteTypes typeutil.Map
117
118	// interfaceTypes maps each interface type to
119	// the set of concrete types that implement it.
120	// Keys are *types.Interface, values are unordered []types.Type.
121	// Only interfaces used in "invoke"-mode CallInstructions are included.
122	interfaceTypes typeutil.Map
123	}
124
125	// addReachable marks a function as potentially callable at run-time,
126	// and ensures that it gets processed.
127	func (r rta) addReachable(f ssa.Function, addrTaken bool) {
128	reachable := r.result.Reachable
129	n := len(reachable)
130	v := reachable[f]
131	if addrTaken {
132	v.AddrTaken = true
133	}
134	reachable[f] = v
135	if len(reachable) > n {
136	// First time seeing f. Add it to the worklist.
137	r.worklist = append(r.worklist, f)
138	}
139	}
140
141	// addEdge adds the specified call graph edge, and marks it reachable.
142	// addrTaken indicates whether to mark the callee as "address-taken".
143	func (r rta) addEdge(site ssa.CallInstruction, callee ssa.Function, addrTaken bool) {
144	r.addReachable(callee, addrTaken)
145
146	if g := r.result.CallGraph; g != nil {
147	if site.Parent() == nil {
148	panic(site)
149	}
150	from := g.CreateNode(site.Parent())
151	to := g.CreateNode(callee)
152	callgraph.AddEdge(from, site, to)
153	}
154	}
155
156	// ---------- addrTakenFuncs × dynCallSites ----------
157
158	// visitAddrTakenFunc is called each time we encounter an address-taken function f.
159	func (r rta) visitAddrTakenFunc(f ssa.Function) {
160	// Create two-level map (Signature -> Function -> bool).
161	S := f.Signature
162	funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool)
163	if funcs == nil {
164	funcs = make(map[*ssa.Function]bool)
165	r.addrTakenFuncsBySig.Set(S, funcs)
166	}
167	if !funcs[f] {
168	// First time seeing f.
169	funcs[f] = true
170
171	// If we've seen any dyncalls of this type, mark it reachable,
172	// and add call graph edges.
173	sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction)
174	for _, site := range sites {
175	r.addEdge(site, f, true)
176	}
177	}
178	}
179
180	// visitDynCall is called each time we encounter a dynamic "call"-mode call.
181	func (r *rta) visitDynCall(site ssa.CallInstruction) {
182	S := site.Common().Signature()
183
184	// Record the call site.
185	sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction)
186	r.dynCallSites.Set(S, append(sites, site))
187
188	// For each function of signature S that we know is address-taken,
189	// add an edge and mark it reachable.
190	funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool)
191	for g := range funcs {
192	r.addEdge(site, g, true)
193	}
194	}
195
196	// ---------- concrete types × invoke sites ----------
197
198	// addInvokeEdge is called for each new pair (site, C) in the matrix.
199	func (r *rta) addInvokeEdge(site ssa.CallInstruction, C types.Type) {
200	// Ascertain the concrete method of C to be called.
201	imethod := site.Common().Method
202	cmethod := r.prog.MethodValue(r.prog.MethodSets.MethodSet(C).Lookup(imethod.Pkg(), imethod.Name()))
203	r.addEdge(site, cmethod, true)
204	}
205
206	// visitInvoke is called each time the algorithm encounters an "invoke"-mode call.
207	func (r *rta) visitInvoke(site ssa.CallInstruction) {
208	I := site.Common().Value.Type().Underlying().(*types.Interface)
209
210	// Record the invoke site.
211	sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction)
212	r.invokeSites.Set(I, append(sites, site))
213
214	// Add callgraph edge for each existing
215	// address-taken concrete type implementing I.
216	for _, C := range r.implementations(I) {
217	r.addInvokeEdge(site, C)
218	}
219	}
220
221	// ---------- main algorithm ----------
222
223	// visitFunc processes function f.
224	func (r rta) visitFunc(f ssa.Function) {
225	var space [32]*ssa.Value // preallocate space for common case
226
227	for _, b := range f.Blocks {
228	for _, instr := range b.Instrs {
229	rands := instr.Operands(space[:0])
230
231	switch instr := instr.(type) {
232	case ssa.CallInstruction:
233	call := instr.Common()
234	if call.IsInvoke() {
235	r.visitInvoke(instr)
236	} else if g := call.StaticCallee(); g != nil {
237	r.addEdge(instr, g, false)
238	} else if _, ok := call.Value.(*ssa.Builtin); !ok {
239	r.visitDynCall(instr)
240	}
241
242	// Ignore the call-position operand when
243	// looking for address-taken Functions.
244	// Hack: assume this is rands[0].
245	rands = rands[1:]
246
247	case *ssa.MakeInterface:
248	r.addRuntimeType(instr.X.Type(), false)
249	}
250
251	// Process all address-taken functions.
252	for _, op := range rands {
253	if g, ok := (op).(ssa.Function); ok {
254	r.visitAddrTakenFunc(g)
255	}
256	}
257	}
258	}
259	}
260
261	// Analyze performs Rapid Type Analysis, starting at the specified root
262	// functions. It returns nil if no roots were specified.
263	//
264	// If buildCallGraph is true, Result.CallGraph will contain a call
265	// graph; otherwise, only the other fields (reachable functions) are
266	// populated.
267	func Analyze(roots []ssa.Function, buildCallGraph bool) Result {
268	if len(roots) == 0 {
269	return nil
270	}
271
272	r := &rta{
273	result: &Result{Reachable: make(map[*ssa.Function]struct{ AddrTaken bool })},
274	prog: roots[0].Prog,
275	}
276
277	if buildCallGraph {
278	// TODO(adonovan): change callgraph API to eliminate the
279	// notion of a distinguished root node. Some callgraphs
280	// have many roots, or none.
281	r.result.CallGraph = callgraph.New(roots[0])
282	}
283
284	hasher := typeutil.MakeHasher()
285	r.result.RuntimeTypes.SetHasher(hasher)
286	r.addrTakenFuncsBySig.SetHasher(hasher)
287	r.dynCallSites.SetHasher(hasher)
288	r.invokeSites.SetHasher(hasher)
289	r.concreteTypes.SetHasher(hasher)
290	r.interfaceTypes.SetHasher(hasher)
291
292	// Visit functions, processing their instructions, and adding
293	// new functions to the worklist, until a fixed point is
294	// reached.
295	var shadow []*ssa.Function // for efficiency, we double-buffer the worklist
296	r.worklist = append(r.worklist, roots...)
297	for len(r.worklist) > 0 {
298	shadow, r.worklist = r.worklist, shadow[:0]
299	for _, f := range shadow {
300	r.visitFunc(f)
301	}
302	}
303	return r.result
304	}
305
306	// interfaces(C) returns all currently known interfaces implemented by C.
307	func (r rta) interfaces(C types.Type) []types.Interface {
308	// Ascertain set of interfaces C implements
309	// and update 'implements' relation.
310	var ifaces []*types.Interface
311	r.interfaceTypes.Iterate(func(I types.Type, concs interface{}) {
312	if I := I.(*types.Interface); types.Implements(C, I) {
313	concs, _ := concs.([]types.Type)
314	r.interfaceTypes.Set(I, append(concs, C))
315	ifaces = append(ifaces, I)
316	}
317	})
318	r.concreteTypes.Set(C, ifaces)
319	return ifaces
320	}
321
322	// implementations(I) returns all currently known concrete types that implement I.
323	func (r rta) implementations(I types.Interface) []types.Type {
324	var concs []types.Type
325	if v := r.interfaceTypes.At(I); v != nil {
326	concs = v.([]types.Type)
327	} else {
328	// First time seeing this interface.
329	// Update the 'implements' relation.
330	r.concreteTypes.Iterate(func(C types.Type, ifaces interface{}) {
331	if types.Implements(C, I) {
332	ifaces, _ := ifaces.([]*types.Interface)
333	r.concreteTypes.Set(C, append(ifaces, I))
334	concs = append(concs, C)
335	}
336	})
337	r.interfaceTypes.Set(I, concs)
338	}
339	return concs
340	}
341
342	// addRuntimeType is called for each concrete type that can be the
343	// dynamic type of some interface or reflect.Value.
344	// Adapted from needMethods in go/ssa/builder.go
345	func (r *rta) addRuntimeType(T types.Type, skip bool) {
346	if prev, ok := r.result.RuntimeTypes.At(T).(bool); ok {
347	if skip && !prev {
348	r.result.RuntimeTypes.Set(T, skip)
349	}
350	return
351	}
352	r.result.RuntimeTypes.Set(T, skip)
353
354	mset := r.prog.MethodSets.MethodSet(T)
355
356	if _, ok := T.Underlying().(*types.Interface); !ok {
357	// T is a new concrete type.
358	for i, n := 0, mset.Len(); i < n; i++ {
359	sel := mset.At(i)
360	m := sel.Obj()
361
362	if m.Exported() {
363	// Exported methods are always potentially callable via reflection.
364	r.addReachable(r.prog.MethodValue(sel), true)
365	}
366	}
367
368	// Add callgraph edge for each existing dynamic
369	// "invoke"-mode call via that interface.
370	for _, I := range r.interfaces(T) {
371	sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction)
372	for _, site := range sites {
373	r.addInvokeEdge(site, T)
374	}
375	}
376	}
377
378	// Precondition: T is not a method signature (*Signature with Recv()!=nil).
379	// Recursive case: skip => don't call makeMethods(T).
380	// Each package maintains its own set of types it has visited.
381
382	var n *types.Named
383	switch T := T.(type) {
384	case *types.Named:
385	n = T
386	case *types.Pointer:
387	n, _ = T.Elem().(*types.Named)
388	}
389	if n != nil {
390	owner := n.Obj().Pkg()
391	if owner == nil {
392	return // built-in error type
393	}
394	}
395
396	// Recursion over signatures of each exported method.
397	for i := 0; i < mset.Len(); i++ {
398	if mset.At(i).Obj().Exported() {
399	sig := mset.At(i).Type().(*types.Signature)
400	r.addRuntimeType(sig.Params(), true) // skip the Tuple itself
401	r.addRuntimeType(sig.Results(), true) // skip the Tuple itself
402	}
403	}
404
405	switch t := T.(type) {
406	case *types.Basic:
407	// nop
408
409	case *types.Interface:
410	// nop---handled by recursion over method set.
411
412	case *types.Pointer:
413	r.addRuntimeType(t.Elem(), false)
414
415	case *types.Slice:
416	r.addRuntimeType(t.Elem(), false)
417
418	case *types.Chan:
419	r.addRuntimeType(t.Elem(), false)
420
421	case *types.Map:
422	r.addRuntimeType(t.Key(), false)
423	r.addRuntimeType(t.Elem(), false)
424
425	case *types.Signature:
426	if t.Recv() != nil {
427	panic(fmt.Sprintf("Signature %s has Recv %s", t, t.Recv()))
428	}
429	r.addRuntimeType(t.Params(), true) // skip the Tuple itself
430	r.addRuntimeType(t.Results(), true) // skip the Tuple itself
431
432	case *types.Named:
433	// A pointer-to-named type can be derived from a named
434	// type via reflection. It may have methods too.
435	r.addRuntimeType(types.NewPointer(T), false)
436
437	// Consider 'type T struct{S}' where S has methods.
438	// Reflection provides no way to get from T to struct{S},
439	// only to S, so the method set of struct{S} is unwanted,
440	// so set 'skip' flag during recursion.
441	r.addRuntimeType(t.Underlying(), true)
442
443	case *types.Array:
444	r.addRuntimeType(t.Elem(), false)
445
446	case *types.Struct:
447	for i, n := 0, t.NumFields(); i < n; i++ {
448	r.addRuntimeType(t.Field(i).Type(), false)
449	}
450
451	case *types.Tuple:
452	for i, n := 0, t.Len(); i < n; i++ {
453	r.addRuntimeType(t.At(i).Type(), false)
454	}
455
456	default:
457	panic(T)
458	}
459	}
460