1 | // Copyright 2013 The Go Authors. All rights reserved. |
---|---|
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | package pointer |
6 | |
7 | // This file defines the main datatypes and Analyze function of the pointer analysis. |
8 | |
9 | import ( |
10 | "fmt" |
11 | "go/token" |
12 | "go/types" |
13 | "io" |
14 | "os" |
15 | "reflect" |
16 | "runtime" |
17 | "runtime/debug" |
18 | "sort" |
19 | "strings" |
20 | |
21 | "golang.org/x/tools/go/callgraph" |
22 | "golang.org/x/tools/go/ssa" |
23 | "golang.org/x/tools/go/types/typeutil" |
24 | ) |
25 | |
26 | const ( |
27 | // optimization options; enable all when committing |
28 | optRenumber = true // enable renumbering optimization (makes logs hard to read) |
29 | optHVN = true // enable pointer equivalence via Hash-Value Numbering |
30 | |
31 | // debugging options; disable all when committing |
32 | debugHVN = false // enable assertions in HVN |
33 | debugHVNVerbose = false // enable extra HVN logging |
34 | debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below) |
35 | debugTimers = false // show running time of each phase |
36 | ) |
37 | |
38 | // object.flags bitmask values. |
39 | const ( |
40 | otTagged = 1 << iota // type-tagged object |
41 | otIndirect // type-tagged object with indirect payload |
42 | otFunction // function object |
43 | ) |
44 | |
45 | // An object represents a contiguous block of memory to which some |
46 | // (generalized) pointer may point. |
47 | // |
48 | // (Note: most variables called 'obj' are not *objects but nodeids |
49 | // such that a.nodes[obj].obj != nil.) |
50 | type object struct { |
51 | // flags is a bitset of the node type (ot*) flags defined above. |
52 | flags uint32 |
53 | |
54 | // Number of following nodes belonging to the same "object" |
55 | // allocation. Zero for all other nodes. |
56 | size uint32 |
57 | |
58 | // data describes this object; it has one of these types: |
59 | // |
60 | // ssa.Value for an object allocated by an SSA operation. |
61 | // types.Type for an rtype instance object or *rtype-tagged object. |
62 | // string for an intrinsic object, e.g. the array behind os.Args. |
63 | // nil for an object allocated by an intrinsic. |
64 | // (cgn provides the identity of the intrinsic.) |
65 | data interface{} |
66 | |
67 | // The call-graph node (=context) in which this object was allocated. |
68 | // May be nil for global objects: Global, Const, some Functions. |
69 | cgn *cgnode |
70 | } |
71 | |
72 | // nodeid denotes a node. |
73 | // It is an index within analysis.nodes. |
74 | // We use small integers, not *node pointers, for many reasons: |
75 | // - they are smaller on 64-bit systems. |
76 | // - sets of them can be represented compactly in bitvectors or BDDs. |
77 | // - order matters; a field offset can be computed by simple addition. |
78 | type nodeid uint32 |
79 | |
80 | // A node is an equivalence class of memory locations. |
81 | // Nodes may be pointers, pointed-to locations, neither, or both. |
82 | // |
83 | // Nodes that are pointed-to locations ("labels") have an enclosing |
84 | // object (see analysis.enclosingObject). |
85 | type node struct { |
86 | // If non-nil, this node is the start of an object |
87 | // (addressable memory location). |
88 | // The following obj.size nodes implicitly belong to the object; |
89 | // they locate their object by scanning back. |
90 | obj *object |
91 | |
92 | // The type of the field denoted by this node. Non-aggregate, |
93 | // unless this is an tagged.T node (i.e. the thing |
94 | // pointed to by an interface) in which case typ is that type. |
95 | typ types.Type |
96 | |
97 | // subelement indicates which directly embedded subelement of |
98 | // an object of aggregate type (struct, tuple, array) this is. |
99 | subelement *fieldInfo // e.g. ".a.b[*].c" |
100 | |
101 | // Solver state for the canonical node of this pointer- |
102 | // equivalence class. Each node is created with its own state |
103 | // but they become shared after HVN. |
104 | solve *solverState |
105 | } |
106 | |
107 | // An analysis instance holds the state of a single pointer analysis problem. |
108 | type analysis struct { |
109 | config *Config // the client's control/observer interface |
110 | prog *ssa.Program // the program being analyzed |
111 | log io.Writer // log stream; nil to disable |
112 | panicNode nodeid // sink for panic, source for recover |
113 | nodes []*node // indexed by nodeid |
114 | flattenMemo map[types.Type][]*fieldInfo // memoization of flatten() |
115 | trackTypes map[types.Type]bool // memoization of shouldTrack() |
116 | constraints []constraint // set of constraints |
117 | cgnodes []*cgnode // all cgnodes |
118 | genq []*cgnode // queue of functions to generate constraints for |
119 | intrinsics map[*ssa.Function]intrinsic // non-nil values are summaries for intrinsic fns |
120 | globalval map[ssa.Value]nodeid // node for each global ssa.Value |
121 | globalobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton |
122 | localval map[ssa.Value]nodeid // node for each local ssa.Value |
123 | localobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton |
124 | atFuncs map[*ssa.Function]bool // address-taken functions (for presolver) |
125 | mapValues []nodeid // values of makemap objects (indirect in HVN) |
126 | work nodeset // solver's worklist |
127 | result *Result // results of the analysis |
128 | track track // pointerlike types whose aliasing we track |
129 | deltaSpace []int // working space for iterating over PTS deltas |
130 | |
131 | // Reflection & intrinsics: |
132 | hasher typeutil.Hasher // cache of type hashes |
133 | reflectValueObj types.Object // type symbol for reflect.Value (if present) |
134 | reflectValueCall *ssa.Function // (reflect.Value).Call |
135 | reflectRtypeObj types.Object // *types.TypeName for reflect.rtype (if present) |
136 | reflectRtypePtr *types.Pointer // *reflect.rtype |
137 | reflectType *types.Named // reflect.Type |
138 | rtypes typeutil.Map // nodeid of canonical *rtype-tagged object for type T |
139 | reflectZeros typeutil.Map // nodeid of canonical T-tagged object for zero value |
140 | runtimeSetFinalizer *ssa.Function // runtime.SetFinalizer |
141 | } |
142 | |
143 | // enclosingObj returns the first node of the addressable memory |
144 | // object that encloses node id. Panic ensues if that node does not |
145 | // belong to any object. |
146 | func (a *analysis) enclosingObj(id nodeid) nodeid { |
147 | // Find previous node with obj != nil. |
148 | for i := id; i >= 0; i-- { |
149 | n := a.nodes[i] |
150 | if obj := n.obj; obj != nil { |
151 | if i+nodeid(obj.size) <= id { |
152 | break // out of bounds |
153 | } |
154 | return i |
155 | } |
156 | } |
157 | panic("node has no enclosing object") |
158 | } |
159 | |
160 | // labelFor returns the Label for node id. |
161 | // Panic ensues if that node is not addressable. |
162 | func (a *analysis) labelFor(id nodeid) *Label { |
163 | return &Label{ |
164 | obj: a.nodes[a.enclosingObj(id)].obj, |
165 | subelement: a.nodes[id].subelement, |
166 | } |
167 | } |
168 | |
169 | func (a *analysis) warnf(pos token.Pos, format string, args ...interface{}) { |
170 | msg := fmt.Sprintf(format, args...) |
171 | if a.log != nil { |
172 | fmt.Fprintf(a.log, "%s: warning: %s\n", a.prog.Fset.Position(pos), msg) |
173 | } |
174 | a.result.Warnings = append(a.result.Warnings, Warning{pos, msg}) |
175 | } |
176 | |
177 | // computeTrackBits sets a.track to the necessary 'track' bits for the pointer queries. |
178 | func (a *analysis) computeTrackBits() { |
179 | if len(a.config.extendedQueries) != 0 { |
180 | // TODO(dh): only track the types necessary for the query. |
181 | a.track = trackAll |
182 | return |
183 | } |
184 | var queryTypes []types.Type |
185 | for v := range a.config.Queries { |
186 | queryTypes = append(queryTypes, v.Type()) |
187 | } |
188 | for v := range a.config.IndirectQueries { |
189 | queryTypes = append(queryTypes, mustDeref(v.Type())) |
190 | } |
191 | for _, t := range queryTypes { |
192 | switch t.Underlying().(type) { |
193 | case *types.Chan: |
194 | a.track |= trackChan |
195 | case *types.Map: |
196 | a.track |= trackMap |
197 | case *types.Pointer: |
198 | a.track |= trackPtr |
199 | case *types.Slice: |
200 | a.track |= trackSlice |
201 | case *types.Interface: |
202 | a.track = trackAll |
203 | return |
204 | } |
205 | if rVObj := a.reflectValueObj; rVObj != nil && types.Identical(t, rVObj.Type()) { |
206 | a.track = trackAll |
207 | return |
208 | } |
209 | } |
210 | } |
211 | |
212 | // Analyze runs the pointer analysis with the scope and options |
213 | // specified by config, and returns the (synthetic) root of the callgraph. |
214 | // |
215 | // Pointer analysis of a transitively closed well-typed program should |
216 | // always succeed. An error can occur only due to an internal bug. |
217 | func Analyze(config *Config) (result *Result, err error) { |
218 | if config.Mains == nil { |
219 | return nil, fmt.Errorf("no main/test packages to analyze (check $GOROOT/$GOPATH)") |
220 | } |
221 | defer func() { |
222 | if p := recover(); p != nil { |
223 | err = fmt.Errorf("internal error in pointer analysis: %v (please report this bug)", p) |
224 | fmt.Fprintln(os.Stderr, "Internal panic in pointer analysis:") |
225 | debug.PrintStack() |
226 | } |
227 | }() |
228 | |
229 | a := &analysis{ |
230 | config: config, |
231 | log: config.Log, |
232 | prog: config.prog(), |
233 | globalval: make(map[ssa.Value]nodeid), |
234 | globalobj: make(map[ssa.Value]nodeid), |
235 | flattenMemo: make(map[types.Type][]*fieldInfo), |
236 | trackTypes: make(map[types.Type]bool), |
237 | atFuncs: make(map[*ssa.Function]bool), |
238 | hasher: typeutil.MakeHasher(), |
239 | intrinsics: make(map[*ssa.Function]intrinsic), |
240 | result: &Result{ |
241 | Queries: make(map[ssa.Value]Pointer), |
242 | IndirectQueries: make(map[ssa.Value]Pointer), |
243 | }, |
244 | deltaSpace: make([]int, 0, 100), |
245 | } |
246 | |
247 | if false { |
248 | a.log = os.Stderr // for debugging crashes; extremely verbose |
249 | } |
250 | |
251 | if a.log != nil { |
252 | fmt.Fprintln(a.log, "==== Starting analysis") |
253 | } |
254 | |
255 | // Pointer analysis requires a complete program for soundness. |
256 | // Check to prevent accidental misconfiguration. |
257 | for _, pkg := range a.prog.AllPackages() { |
258 | // (This only checks that the package scope is complete, |
259 | // not that func bodies exist, but it's a good signal.) |
260 | if !pkg.Pkg.Complete() { |
261 | return nil, fmt.Errorf(`pointer analysis requires a complete program yet package %q was incomplete`, pkg.Pkg.Path()) |
262 | } |
263 | } |
264 | |
265 | if reflect := a.prog.ImportedPackage("reflect"); reflect != nil { |
266 | rV := reflect.Pkg.Scope().Lookup("Value") |
267 | a.reflectValueObj = rV |
268 | a.reflectValueCall = a.prog.LookupMethod(rV.Type(), nil, "Call") |
269 | a.reflectType = reflect.Pkg.Scope().Lookup("Type").Type().(*types.Named) |
270 | a.reflectRtypeObj = reflect.Pkg.Scope().Lookup("rtype") |
271 | a.reflectRtypePtr = types.NewPointer(a.reflectRtypeObj.Type()) |
272 | |
273 | // Override flattening of reflect.Value, treating it like a basic type. |
274 | tReflectValue := a.reflectValueObj.Type() |
275 | a.flattenMemo[tReflectValue] = []*fieldInfo{{typ: tReflectValue}} |
276 | |
277 | // Override shouldTrack of reflect.Value and *reflect.rtype. |
278 | // Always track pointers of these types. |
279 | a.trackTypes[tReflectValue] = true |
280 | a.trackTypes[a.reflectRtypePtr] = true |
281 | |
282 | a.rtypes.SetHasher(a.hasher) |
283 | a.reflectZeros.SetHasher(a.hasher) |
284 | } |
285 | if runtime := a.prog.ImportedPackage("runtime"); runtime != nil { |
286 | a.runtimeSetFinalizer = runtime.Func("SetFinalizer") |
287 | } |
288 | a.computeTrackBits() |
289 | |
290 | a.generate() |
291 | a.showCounts() |
292 | |
293 | if optRenumber { |
294 | a.renumber() |
295 | } |
296 | |
297 | N := len(a.nodes) // excludes solver-created nodes |
298 | |
299 | if optHVN { |
300 | if debugHVNCrossCheck { |
301 | // Cross-check: run the solver once without |
302 | // optimization, once with, and compare the |
303 | // solutions. |
304 | savedConstraints := a.constraints |
305 | |
306 | a.solve() |
307 | a.dumpSolution("A.pts", N) |
308 | |
309 | // Restore. |
310 | a.constraints = savedConstraints |
311 | for _, n := range a.nodes { |
312 | n.solve = new(solverState) |
313 | } |
314 | a.nodes = a.nodes[:N] |
315 | |
316 | // rtypes is effectively part of the solver state. |
317 | a.rtypes = typeutil.Map{} |
318 | a.rtypes.SetHasher(a.hasher) |
319 | } |
320 | |
321 | a.hvn() |
322 | } |
323 | |
324 | if debugHVNCrossCheck { |
325 | runtime.GC() |
326 | runtime.GC() |
327 | } |
328 | |
329 | a.solve() |
330 | |
331 | // Compare solutions. |
332 | if optHVN && debugHVNCrossCheck { |
333 | a.dumpSolution("B.pts", N) |
334 | |
335 | if !diff("A.pts", "B.pts") { |
336 | return nil, fmt.Errorf("internal error: optimization changed solution") |
337 | } |
338 | } |
339 | |
340 | // Create callgraph.Nodes in deterministic order. |
341 | if cg := a.result.CallGraph; cg != nil { |
342 | for _, caller := range a.cgnodes { |
343 | cg.CreateNode(caller.fn) |
344 | } |
345 | } |
346 | |
347 | // Add dynamic edges to call graph. |
348 | var space [100]int |
349 | for _, caller := range a.cgnodes { |
350 | for _, site := range caller.sites { |
351 | for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) { |
352 | a.callEdge(caller, site, nodeid(callee)) |
353 | } |
354 | } |
355 | } |
356 | |
357 | return a.result, nil |
358 | } |
359 | |
360 | // callEdge is called for each edge in the callgraph. |
361 | // calleeid is the callee's object node (has otFunction flag). |
362 | func (a *analysis) callEdge(caller *cgnode, site *callsite, calleeid nodeid) { |
363 | obj := a.nodes[calleeid].obj |
364 | if obj.flags&otFunction == 0 { |
365 | panic(fmt.Sprintf("callEdge %s -> n%d: not a function object", site, calleeid)) |
366 | } |
367 | callee := obj.cgn |
368 | |
369 | if cg := a.result.CallGraph; cg != nil { |
370 | // TODO(adonovan): opt: I would expect duplicate edges |
371 | // (to wrappers) to arise due to the elimination of |
372 | // context information, but I haven't observed any. |
373 | // Understand this better. |
374 | callgraph.AddEdge(cg.CreateNode(caller.fn), site.instr, cg.CreateNode(callee.fn)) |
375 | } |
376 | |
377 | if a.log != nil { |
378 | fmt.Fprintf(a.log, "\tcall edge %s -> %s\n", site, callee) |
379 | } |
380 | |
381 | // Warn about calls to functions that are handled unsoundly. |
382 | // TODO(adonovan): de-dup these messages. |
383 | fn := callee.fn |
384 | |
385 | // Warn about calls to non-intrinsic external functions. |
386 | if fn.Blocks == nil && a.findIntrinsic(fn) == nil { |
387 | a.warnf(site.pos(), "unsound call to unknown intrinsic: %s", fn) |
388 | a.warnf(fn.Pos(), " (declared here)") |
389 | } |
390 | |
391 | // Warn about calls to generic function bodies. |
392 | if fn.TypeParams().Len() > 0 && len(fn.TypeArgs()) == 0 { |
393 | a.warnf(site.pos(), "unsound call to generic function body: %s (build with ssa.InstantiateGenerics)", fn) |
394 | a.warnf(fn.Pos(), " (declared here)") |
395 | } |
396 | |
397 | // Warn about calls to instantiation wrappers of generics functions. |
398 | if fn.Origin() != nil && strings.HasPrefix(fn.Synthetic, "instantiation wrapper ") { |
399 | a.warnf(site.pos(), "unsound call to instantiation wrapper of generic: %s (build with ssa.InstantiateGenerics)", fn) |
400 | a.warnf(fn.Pos(), " (declared here)") |
401 | } |
402 | } |
403 | |
404 | // dumpSolution writes the PTS solution to the specified file. |
405 | // |
406 | // It only dumps the nodes that existed before solving. The order in |
407 | // which solver-created nodes are created depends on pre-solver |
408 | // optimization, so we can't include them in the cross-check. |
409 | func (a *analysis) dumpSolution(filename string, N int) { |
410 | f, err := os.Create(filename) |
411 | if err != nil { |
412 | panic(err) |
413 | } |
414 | for id, n := range a.nodes[:N] { |
415 | if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil { |
416 | panic(err) |
417 | } |
418 | var sep string |
419 | for _, l := range n.solve.pts.AppendTo(a.deltaSpace) { |
420 | if l >= N { |
421 | break |
422 | } |
423 | fmt.Fprintf(f, "%s%d", sep, l) |
424 | sep = " " |
425 | } |
426 | fmt.Fprintf(f, "} : %s\n", n.typ) |
427 | } |
428 | if err := f.Close(); err != nil { |
429 | panic(err) |
430 | } |
431 | } |
432 | |
433 | // showCounts logs the size of the constraint system. A typical |
434 | // optimized distribution is 65% copy, 13% load, 11% addr, 5% |
435 | // offsetAddr, 4% store, 2% others. |
436 | func (a *analysis) showCounts() { |
437 | if a.log != nil { |
438 | counts := make(map[reflect.Type]int) |
439 | for _, c := range a.constraints { |
440 | counts[reflect.TypeOf(c)]++ |
441 | } |
442 | fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints)) |
443 | var lines []string |
444 | for t, n := range counts { |
445 | line := fmt.Sprintf("%7d (%2d%%)\t%s", n, 100*n/len(a.constraints), t) |
446 | lines = append(lines, line) |
447 | } |
448 | sort.Sort(sort.Reverse(sort.StringSlice(lines))) |
449 | for _, line := range lines { |
450 | fmt.Fprintf(a.log, "\t%s\n", line) |
451 | } |
452 | |
453 | fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes)) |
454 | |
455 | // Show number of pointer equivalence classes. |
456 | m := make(map[*solverState]bool) |
457 | for _, n := range a.nodes { |
458 | m[n.solve] = true |
459 | } |
460 | fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m)) |
461 | } |
462 | } |
463 |
Members