1 | // Copyright 2021 The Go Authors. All rights reserved. |
---|---|
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | // Package vta computes the call graph of a Go program using the Variable |
6 | // Type Analysis (VTA) algorithm originally described in “Practical Virtual |
7 | // Method Call Resolution for Java," Vijay Sundaresan, Laurie Hendren, |
8 | // Chrislain Razafimahefa, Raja Vallée-Rai, Patrick Lam, Etienne Gagnon, and |
9 | // Charles Godin. |
10 | // |
11 | // Note: this package is in experimental phase and its interface is |
12 | // subject to change. |
13 | // TODO(zpavlinovic): reiterate on documentation. |
14 | // |
15 | // The VTA algorithm overapproximates the set of types (and function literals) |
16 | // a variable can take during runtime by building a global type propagation |
17 | // graph and propagating types (and function literals) through the graph. |
18 | // |
19 | // A type propagation is a directed, labeled graph. A node can represent |
20 | // one of the following: |
21 | // - A field of a struct type. |
22 | // - A local (SSA) variable of a method/function. |
23 | // - All pointers to a non-interface type. |
24 | // - The return value of a method. |
25 | // - All elements in an array. |
26 | // - All elements in a slice. |
27 | // - All elements in a map. |
28 | // - All elements in a channel. |
29 | // - A global variable. |
30 | // |
31 | // In addition, the implementation used in this package introduces |
32 | // a few Go specific kinds of nodes: |
33 | // - (De)references of nested pointers to interfaces are modeled |
34 | // as a unique nestedPtrInterface node in the type propagation graph. |
35 | // - Each function literal is represented as a function node whose |
36 | // internal value is the (SSA) representation of the function. This |
37 | // is done to precisely infer flow of higher-order functions. |
38 | // |
39 | // Edges in the graph represent flow of types (and function literals) through |
40 | // the program. That is, the model 1) typing constraints that are induced by |
41 | // assignment statements or function and method calls and 2) higher-order flow |
42 | // of functions in the program. |
43 | // |
44 | // The labeling function maps each node to a set of types and functions that |
45 | // can intuitively reach the program construct the node represents. Initially, |
46 | // every node is assigned a type corresponding to the program construct it |
47 | // represents. Function nodes are also assigned the function they represent. |
48 | // The labeling function then propagates types and function through the graph. |
49 | // |
50 | // The result of VTA is a type propagation graph in which each node is labeled |
51 | // with a conservative overapproximation of the set of types (and functions) |
52 | // it may have. This information is then used to construct the call graph. |
53 | // For each unresolved call site, vta uses the set of types and functions |
54 | // reaching the node representing the call site to create a set of callees. |
55 | package vta |
56 | |
57 | // TODO(zpavlinovic): update VTA for how it handles generic function bodies and instantiation wrappers. |
58 | |
59 | import ( |
60 | "go/types" |
61 | |
62 | "golang.org/x/tools/go/callgraph" |
63 | "golang.org/x/tools/go/ssa" |
64 | ) |
65 | |
66 | // CallGraph uses the VTA algorithm to compute call graph for all functions |
67 | // f:true in funcs. VTA refines the results of initial call graph and uses it |
68 | // to establish interprocedural type flow. The resulting graph does not have |
69 | // a root node. |
70 | // |
71 | // CallGraph does not make any assumptions on initial types global variables |
72 | // and function/method inputs can have. CallGraph is then sound, modulo use of |
73 | // reflection and unsafe, if the initial call graph is sound. |
74 | func CallGraph(funcs map[*ssa.Function]bool, initial *callgraph.Graph) *callgraph.Graph { |
75 | vtaG, canon := typePropGraph(funcs, initial) |
76 | types := propagate(vtaG, canon) |
77 | |
78 | c := &constructor{types: types, initial: initial, cache: make(methodCache)} |
79 | return c.construct(funcs) |
80 | } |
81 | |
82 | // constructor type linearly traverses the input program |
83 | // and constructs a callgraph based on the results of the |
84 | // VTA type propagation phase. |
85 | type constructor struct { |
86 | types propTypeMap |
87 | cache methodCache |
88 | initial *callgraph.Graph |
89 | } |
90 | |
91 | func (c *constructor) construct(funcs map[*ssa.Function]bool) *callgraph.Graph { |
92 | cg := &callgraph.Graph{Nodes: make(map[*ssa.Function]*callgraph.Node)} |
93 | for f, in := range funcs { |
94 | if in { |
95 | c.constrct(cg, f) |
96 | } |
97 | } |
98 | return cg |
99 | } |
100 | |
101 | func (c *constructor) constrct(g *callgraph.Graph, f *ssa.Function) { |
102 | caller := g.CreateNode(f) |
103 | for _, call := range calls(f) { |
104 | for _, c := range c.callees(call) { |
105 | callgraph.AddEdge(caller, call, g.CreateNode(c)) |
106 | } |
107 | } |
108 | } |
109 | |
110 | // callees computes the set of functions to which VTA resolves `c`. The resolved |
111 | // functions are intersected with functions to which `initial` resolves `c`. |
112 | func (c *constructor) callees(call ssa.CallInstruction) []*ssa.Function { |
113 | cc := call.Common() |
114 | if cc.StaticCallee() != nil { |
115 | return []*ssa.Function{cc.StaticCallee()} |
116 | } |
117 | |
118 | // Skip builtins as they are not *ssa.Function. |
119 | if _, ok := cc.Value.(*ssa.Builtin); ok { |
120 | return nil |
121 | } |
122 | |
123 | // Cover the case of dynamic higher-order and interface calls. |
124 | return intersect(resolve(call, c.types, c.cache), siteCallees(call, c.initial)) |
125 | } |
126 | |
127 | // resolve returns a set of functions `c` resolves to based on the |
128 | // type propagation results in `types`. |
129 | func resolve(c ssa.CallInstruction, types propTypeMap, cache methodCache) []*ssa.Function { |
130 | n := local{val: c.Common().Value} |
131 | var funcs []*ssa.Function |
132 | for _, p := range types.propTypes(n) { |
133 | funcs = append(funcs, propFunc(p, c, cache)...) |
134 | } |
135 | return funcs |
136 | } |
137 | |
138 | // propFunc returns the functions modeled with the propagation type `p` |
139 | // assigned to call site `c`. If no such function exists, nil is returned. |
140 | func propFunc(p propType, c ssa.CallInstruction, cache methodCache) []*ssa.Function { |
141 | if p.f != nil { |
142 | return []*ssa.Function{p.f} |
143 | } |
144 | |
145 | if c.Common().Method == nil { |
146 | return nil |
147 | } |
148 | |
149 | return cache.methods(p.typ, c.Common().Method.Name(), c.Parent().Prog) |
150 | } |
151 | |
152 | // methodCache serves as a type -> method name -> methods |
153 | // cache when computing methods of a type using the |
154 | // ssa.Program.MethodSets and ssa.Program.MethodValue |
155 | // APIs. The cache is used to speed up querying of |
156 | // methods of a type as the mentioned APIs are expensive. |
157 | type methodCache map[types.Type]map[string][]*ssa.Function |
158 | |
159 | // methods returns methods of a type `t` named `name`. First consults |
160 | // `mc` and otherwise queries `prog` for the method. If no such method |
161 | // exists, nil is returned. |
162 | func (mc methodCache) methods(t types.Type, name string, prog *ssa.Program) []*ssa.Function { |
163 | if ms, ok := mc[t]; ok { |
164 | return ms[name] |
165 | } |
166 | |
167 | ms := make(map[string][]*ssa.Function) |
168 | mset := prog.MethodSets.MethodSet(t) |
169 | for i, n := 0, mset.Len(); i < n; i++ { |
170 | // f can be nil when t is an interface or some |
171 | // other type without any runtime methods. |
172 | if f := prog.MethodValue(mset.At(i)); f != nil { |
173 | ms[f.Name()] = append(ms[f.Name()], f) |
174 | } |
175 | } |
176 | mc[t] = ms |
177 | return ms[name] |
178 | } |
179 |
Members