gopls/go/pointer/analysis.go

1	// Copyright 2013 The Go Authors. All rights reserved.
2	// Use of this source code is governed by a BSD-style
3	// license that can be found in the LICENSE file.
4
5	package pointer
6
7	// This file defines the main datatypes and Analyze function of the pointer analysis.
8
9	import (
10	"fmt"
11	"go/token"
12	"go/types"
13	"io"
14	"os"
15	"reflect"
16	"runtime"
17	"runtime/debug"
18	"sort"
19	"strings"
20
21	"golang.org/x/tools/go/callgraph"
22	"golang.org/x/tools/go/ssa"
23	"golang.org/x/tools/go/types/typeutil"
24	)
25
26	const (
27	// optimization options; enable all when committing
28	optRenumber = true // enable renumbering optimization (makes logs hard to read)
29	optHVN = true // enable pointer equivalence via Hash-Value Numbering
30
31	// debugging options; disable all when committing
32	debugHVN = false // enable assertions in HVN
33	debugHVNVerbose = false // enable extra HVN logging
34	debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below)
35	debugTimers = false // show running time of each phase
36	)
37
38	// object.flags bitmask values.
39	const (
40	otTagged = 1 << iota // type-tagged object
41	otIndirect // type-tagged object with indirect payload
42	otFunction // function object
43	)
44
45	// An object represents a contiguous block of memory to which some
46	// (generalized) pointer may point.
47	//
48	// (Note: most variables called 'obj' are not *objects but nodeids
49	// such that a.nodes[obj].obj != nil.)
50	type object struct {
51	// flags is a bitset of the node type (ot*) flags defined above.
52	flags uint32
53
54	// Number of following nodes belonging to the same "object"
55	// allocation. Zero for all other nodes.
56	size uint32
57
58	// data describes this object; it has one of these types:
59	//
60	// ssa.Value for an object allocated by an SSA operation.
61	// types.Type for an rtype instance object or *rtype-tagged object.
62	// string for an intrinsic object, e.g. the array behind os.Args.
63	// nil for an object allocated by an intrinsic.
64	// (cgn provides the identity of the intrinsic.)
65	data interface{}
66
67	// The call-graph node (=context) in which this object was allocated.
68	// May be nil for global objects: Global, Const, some Functions.
69	cgn *cgnode
70	}
71
72	// nodeid denotes a node.
73	// It is an index within analysis.nodes.
74	// We use small integers, not *node pointers, for many reasons:
75	// - they are smaller on 64-bit systems.
76	// - sets of them can be represented compactly in bitvectors or BDDs.
77	// - order matters; a field offset can be computed by simple addition.
78	type nodeid uint32
79
80	// A node is an equivalence class of memory locations.
81	// Nodes may be pointers, pointed-to locations, neither, or both.
82	//
83	// Nodes that are pointed-to locations ("labels") have an enclosing
84	// object (see analysis.enclosingObject).
85	type node struct {
86	// If non-nil, this node is the start of an object
87	// (addressable memory location).
88	// The following obj.size nodes implicitly belong to the object;
89	// they locate their object by scanning back.
90	obj *object
91
92	// The type of the field denoted by this node. Non-aggregate,
93	// unless this is an tagged.T node (i.e. the thing
94	// pointed to by an interface) in which case typ is that type.
95	typ types.Type
96
97	// subelement indicates which directly embedded subelement of
98	// an object of aggregate type (struct, tuple, array) this is.
99	subelement fieldInfo // e.g. ".a.b[].c"
100
101	// Solver state for the canonical node of this pointer-
102	// equivalence class. Each node is created with its own state
103	// but they become shared after HVN.
104	solve *solverState
105	}
106
107	// An analysis instance holds the state of a single pointer analysis problem.
108	type analysis struct {
109	config *Config // the client's control/observer interface
110	prog *ssa.Program // the program being analyzed
111	log io.Writer // log stream; nil to disable
112	panicNode nodeid // sink for panic, source for recover
113	nodes []*node // indexed by nodeid
114	flattenMemo map[types.Type][]*fieldInfo // memoization of flatten()
115	trackTypes map[types.Type]bool // memoization of shouldTrack()
116	constraints []constraint // set of constraints
117	cgnodes []*cgnode // all cgnodes
118	genq []*cgnode // queue of functions to generate constraints for
119	intrinsics map[*ssa.Function]intrinsic // non-nil values are summaries for intrinsic fns
120	globalval map[ssa.Value]nodeid // node for each global ssa.Value
121	globalobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
122	localval map[ssa.Value]nodeid // node for each local ssa.Value
123	localobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
124	atFuncs map[*ssa.Function]bool // address-taken functions (for presolver)
125	mapValues []nodeid // values of makemap objects (indirect in HVN)
126	work nodeset // solver's worklist
127	result *Result // results of the analysis
128	track track // pointerlike types whose aliasing we track
129	deltaSpace []int // working space for iterating over PTS deltas
130
131	// Reflection & intrinsics:
132	hasher typeutil.Hasher // cache of type hashes
133	reflectValueObj types.Object // type symbol for reflect.Value (if present)
134	reflectValueCall *ssa.Function // (reflect.Value).Call
135	reflectRtypeObj types.Object // *types.TypeName for reflect.rtype (if present)
136	reflectRtypePtr types.Pointer // reflect.rtype
137	reflectType *types.Named // reflect.Type
138	rtypes typeutil.Map // nodeid of canonical *rtype-tagged object for type T
139	reflectZeros typeutil.Map // nodeid of canonical T-tagged object for zero value
140	runtimeSetFinalizer *ssa.Function // runtime.SetFinalizer
141	}
142
143	// enclosingObj returns the first node of the addressable memory
144	// object that encloses node id. Panic ensues if that node does not
145	// belong to any object.
146	func (a *analysis) enclosingObj(id nodeid) nodeid {
147	// Find previous node with obj != nil.
148	for i := id; i >= 0; i-- {
149	n := a.nodes[i]
150	if obj := n.obj; obj != nil {
151	if i+nodeid(obj.size) <= id {
152	break // out of bounds
153	}
154	return i
155	}
156	}
157	panic("node has no enclosing object")
158	}
159
160	// labelFor returns the Label for node id.
161	// Panic ensues if that node is not addressable.
162	func (a analysis) labelFor(id nodeid) Label {
163	return &Label{
164	obj: a.nodes[a.enclosingObj(id)].obj,
165	subelement: a.nodes[id].subelement,
166	}
167	}
168
169	func (a *analysis) warnf(pos token.Pos, format string, args ...interface{}) {
170	msg := fmt.Sprintf(format, args...)
171	if a.log != nil {
172	fmt.Fprintf(a.log, "%s: warning: %s\n", a.prog.Fset.Position(pos), msg)
173	}
174	a.result.Warnings = append(a.result.Warnings, Warning{pos, msg})
175	}
176
177	// computeTrackBits sets a.track to the necessary 'track' bits for the pointer queries.
178	func (a *analysis) computeTrackBits() {
179	if len(a.config.extendedQueries) != 0 {
180	// TODO(dh): only track the types necessary for the query.
181	a.track = trackAll
182	return
183	}
184	var queryTypes []types.Type
185	for v := range a.config.Queries {
186	queryTypes = append(queryTypes, v.Type())
187	}
188	for v := range a.config.IndirectQueries {
189	queryTypes = append(queryTypes, mustDeref(v.Type()))
190	}
191	for _, t := range queryTypes {
192	switch t.Underlying().(type) {
193	case *types.Chan:
194	a.track \|= trackChan
195	case *types.Map:
196	a.track \|= trackMap
197	case *types.Pointer:
198	a.track \|= trackPtr
199	case *types.Slice:
200	a.track \|= trackSlice
201	case *types.Interface:
202	a.track = trackAll
203	return
204	}
205	if rVObj := a.reflectValueObj; rVObj != nil && types.Identical(t, rVObj.Type()) {
206	a.track = trackAll
207	return
208	}
209	}
210	}
211
212	// Analyze runs the pointer analysis with the scope and options
213	// specified by config, and returns the (synthetic) root of the callgraph.
214	//
215	// Pointer analysis of a transitively closed well-typed program should
216	// always succeed. An error can occur only due to an internal bug.
217	func Analyze(config Config) (result Result, err error) {
218	if config.Mains == nil {
219	return nil, fmt.Errorf("no main/test packages to analyze (check $GOROOT/$GOPATH)")
220	}
221	defer func() {
222	if p := recover(); p != nil {
223	err = fmt.Errorf("internal error in pointer analysis: %v (please report this bug)", p)
224	fmt.Fprintln(os.Stderr, "Internal panic in pointer analysis:")
225	debug.PrintStack()
226	}
227	}()
228
229	a := &analysis{
230	config: config,
231	log: config.Log,
232	prog: config.prog(),
233	globalval: make(map[ssa.Value]nodeid),
234	globalobj: make(map[ssa.Value]nodeid),
235	flattenMemo: make(map[types.Type][]*fieldInfo),
236	trackTypes: make(map[types.Type]bool),
237	atFuncs: make(map[*ssa.Function]bool),
238	hasher: typeutil.MakeHasher(),
239	intrinsics: make(map[*ssa.Function]intrinsic),
240	result: &Result{
241	Queries: make(map[ssa.Value]Pointer),
242	IndirectQueries: make(map[ssa.Value]Pointer),
243	},
244	deltaSpace: make([]int, 0, 100),
245	}
246
247	if false {
248	a.log = os.Stderr // for debugging crashes; extremely verbose
249	}
250
251	if a.log != nil {
252	fmt.Fprintln(a.log, "==== Starting analysis")
253	}
254
255	// Pointer analysis requires a complete program for soundness.
256	// Check to prevent accidental misconfiguration.
257	for _, pkg := range a.prog.AllPackages() {
258	// (This only checks that the package scope is complete,
259	// not that func bodies exist, but it's a good signal.)
260	if !pkg.Pkg.Complete() {
261	return nil, fmt.Errorf(`pointer analysis requires a complete program yet package %q was incomplete`, pkg.Pkg.Path())
262	}
263	}
264
265	if reflect := a.prog.ImportedPackage("reflect"); reflect != nil {
266	rV := reflect.Pkg.Scope().Lookup("Value")
267	a.reflectValueObj = rV
268	a.reflectValueCall = a.prog.LookupMethod(rV.Type(), nil, "Call")
269	a.reflectType = reflect.Pkg.Scope().Lookup("Type").Type().(*types.Named)
270	a.reflectRtypeObj = reflect.Pkg.Scope().Lookup("rtype")
271	a.reflectRtypePtr = types.NewPointer(a.reflectRtypeObj.Type())
272
273	// Override flattening of reflect.Value, treating it like a basic type.
274	tReflectValue := a.reflectValueObj.Type()
275	a.flattenMemo[tReflectValue] = []*fieldInfo{{typ: tReflectValue}}
276
277	// Override shouldTrack of reflect.Value and *reflect.rtype.
278	// Always track pointers of these types.
279	a.trackTypes[tReflectValue] = true
280	a.trackTypes[a.reflectRtypePtr] = true
281
282	a.rtypes.SetHasher(a.hasher)
283	a.reflectZeros.SetHasher(a.hasher)
284	}
285	if runtime := a.prog.ImportedPackage("runtime"); runtime != nil {
286	a.runtimeSetFinalizer = runtime.Func("SetFinalizer")
287	}
288	a.computeTrackBits()
289
290	a.generate()
291	a.showCounts()
292
293	if optRenumber {
294	a.renumber()
295	}
296
297	N := len(a.nodes) // excludes solver-created nodes
298
299	if optHVN {
300	if debugHVNCrossCheck {
301	// Cross-check: run the solver once without
302	// optimization, once with, and compare the
303	// solutions.
304	savedConstraints := a.constraints
305
306	a.solve()
307	a.dumpSolution("A.pts", N)
308
309	// Restore.
310	a.constraints = savedConstraints
311	for _, n := range a.nodes {
312	n.solve = new(solverState)
313	}
314	a.nodes = a.nodes[:N]
315
316	// rtypes is effectively part of the solver state.
317	a.rtypes = typeutil.Map{}
318	a.rtypes.SetHasher(a.hasher)
319	}
320
321	a.hvn()
322	}
323
324	if debugHVNCrossCheck {
325	runtime.GC()
326	runtime.GC()
327	}
328
329	a.solve()
330
331	// Compare solutions.
332	if optHVN && debugHVNCrossCheck {
333	a.dumpSolution("B.pts", N)
334
335	if !diff("A.pts", "B.pts") {
336	return nil, fmt.Errorf("internal error: optimization changed solution")
337	}
338	}
339
340	// Create callgraph.Nodes in deterministic order.
341	if cg := a.result.CallGraph; cg != nil {
342	for _, caller := range a.cgnodes {
343	cg.CreateNode(caller.fn)
344	}
345	}
346
347	// Add dynamic edges to call graph.
348	var space [100]int
349	for _, caller := range a.cgnodes {
350	for _, site := range caller.sites {
351	for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) {
352	a.callEdge(caller, site, nodeid(callee))
353	}
354	}
355	}
356
357	return a.result, nil
358	}
359
360	// callEdge is called for each edge in the callgraph.
361	// calleeid is the callee's object node (has otFunction flag).
362	func (a analysis) callEdge(caller cgnode, site *callsite, calleeid nodeid) {
363	obj := a.nodes[calleeid].obj
364	if obj.flags&otFunction == 0 {
365	panic(fmt.Sprintf("callEdge %s -> n%d: not a function object", site, calleeid))
366	}
367	callee := obj.cgn
368
369	if cg := a.result.CallGraph; cg != nil {
370	// TODO(adonovan): opt: I would expect duplicate edges
371	// (to wrappers) to arise due to the elimination of
372	// context information, but I haven't observed any.
373	// Understand this better.
374	callgraph.AddEdge(cg.CreateNode(caller.fn), site.instr, cg.CreateNode(callee.fn))
375	}
376
377	if a.log != nil {
378	fmt.Fprintf(a.log, "\tcall edge %s -> %s\n", site, callee)
379	}
380
381	// Warn about calls to functions that are handled unsoundly.
382	// TODO(adonovan): de-dup these messages.
383	fn := callee.fn
384
385	// Warn about calls to non-intrinsic external functions.
386	if fn.Blocks == nil && a.findIntrinsic(fn) == nil {
387	a.warnf(site.pos(), "unsound call to unknown intrinsic: %s", fn)
388	a.warnf(fn.Pos(), " (declared here)")
389	}
390
391	// Warn about calls to generic function bodies.
392	if fn.TypeParams().Len() > 0 && len(fn.TypeArgs()) == 0 {
393	a.warnf(site.pos(), "unsound call to generic function body: %s (build with ssa.InstantiateGenerics)", fn)
394	a.warnf(fn.Pos(), " (declared here)")
395	}
396
397	// Warn about calls to instantiation wrappers of generics functions.
398	if fn.Origin() != nil && strings.HasPrefix(fn.Synthetic, "instantiation wrapper ") {
399	a.warnf(site.pos(), "unsound call to instantiation wrapper of generic: %s (build with ssa.InstantiateGenerics)", fn)
400	a.warnf(fn.Pos(), " (declared here)")
401	}
402	}
403
404	// dumpSolution writes the PTS solution to the specified file.
405	//
406	// It only dumps the nodes that existed before solving. The order in
407	// which solver-created nodes are created depends on pre-solver
408	// optimization, so we can't include them in the cross-check.
409	func (a *analysis) dumpSolution(filename string, N int) {
410	f, err := os.Create(filename)
411	if err != nil {
412	panic(err)
413	}
414	for id, n := range a.nodes[:N] {
415	if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil {
416	panic(err)
417	}
418	var sep string
419	for _, l := range n.solve.pts.AppendTo(a.deltaSpace) {
420	if l >= N {
421	break
422	}
423	fmt.Fprintf(f, "%s%d", sep, l)
424	sep = " "
425	}
426	fmt.Fprintf(f, "} : %s\n", n.typ)
427	}
428	if err := f.Close(); err != nil {
429	panic(err)
430	}
431	}
432
433	// showCounts logs the size of the constraint system. A typical
434	// optimized distribution is 65% copy, 13% load, 11% addr, 5%
435	// offsetAddr, 4% store, 2% others.
436	func (a *analysis) showCounts() {
437	if a.log != nil {
438	counts := make(map[reflect.Type]int)
439	for _, c := range a.constraints {
440	counts[reflect.TypeOf(c)]++
441	}
442	fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints))
443	var lines []string
444	for t, n := range counts {
445	line := fmt.Sprintf("%7d (%2d%%)\t%s", n, 100*n/len(a.constraints), t)
446	lines = append(lines, line)
447	}
448	sort.Sort(sort.Reverse(sort.StringSlice(lines)))
449	for _, line := range lines {
450	fmt.Fprintf(a.log, "\t%s\n", line)
451	}
452
453	fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes))
454
455	// Show number of pointer equivalence classes.
456	m := make(map[*solverState]bool)
457	for _, n := range a.nodes {
458	m[n.solve] = true
459	}
460	fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m))
461	}
462	}
463