gopls/internal/fuzzy/input.go

1	// Copyright 2019 The Go Authors. All rights reserved.
2	// Use of this source code is governed by a BSD-style
3	// license that can be found in the LICENSE file.
4
5	package fuzzy
6
7	import (
8	"unicode"
9	)
10
11	// RuneRole specifies the role of a rune in the context of an input.
12	type RuneRole byte
13
14	const (
15	// RNone specifies a rune without any role in the input (i.e., whitespace/non-ASCII).
16	RNone RuneRole = iota
17	// RSep specifies a rune with the role of segment separator.
18	RSep
19	// RTail specifies a rune which is a lower-case tail in a word in the input.
20	RTail
21	// RUCTail specifies a rune which is an upper-case tail in a word in the input.
22	RUCTail
23	// RHead specifies a rune which is the first character in a word in the input.
24	RHead
25	)
26
27	// RuneRoles detects the roles of each byte rune in an input string and stores it in the output
28	// slice. The rune role depends on the input type. Stops when it parsed all the runes in the string
29	// or when it filled the output. If output is nil, then it gets created.
30	func RuneRoles(candidate []byte, reuse []RuneRole) []RuneRole {
31	var output []RuneRole
32	if cap(reuse) < len(candidate) {
33	output = make([]RuneRole, 0, len(candidate))
34	} else {
35	output = reuse[:0]
36	}
37
38	prev, prev2 := rtNone, rtNone
39	for i := 0; i < len(candidate); i++ {
40	r := rune(candidate[i])
41
42	role := RNone
43
44	curr := rtLower
45	if candidate[i] <= unicode.MaxASCII {
46	curr = runeType(rt[candidate[i]] - '0')
47	}
48
49	if curr == rtLower {
50	if prev == rtNone \|\| prev == rtPunct {
51	role = RHead
52	} else {
53	role = RTail
54	}
55	} else if curr == rtUpper {
56	role = RHead
57
58	if prev == rtUpper {
59	// This and previous characters are both upper case.
60
61	if i+1 == len(candidate) {
62	// This is last character, previous was also uppercase -> this is UCTail
63	// i.e., (current char is C): aBC / BC / ABC
64	role = RUCTail
65	}
66	}
67	} else if curr == rtPunct {
68	switch r {
69	case '.', ':':
70	role = RSep
71	}
72	}
73	if curr != rtLower {
74	if i > 1 && output[i-1] == RHead && prev2 == rtUpper && (output[i-2] == RHead \|\| output[i-2] == RUCTail) {
75	// The previous two characters were uppercase. The current one is not a lower case, so the
76	// previous one can't be a HEAD. Make it a UCTail.
77	// i.e., (last char is current char - B must be a UCTail): ABC / ZABC / AB.
78	output[i-1] = RUCTail
79	}
80	}
81
82	output = append(output, role)
83	prev2 = prev
84	prev = curr
85	}
86	return output
87	}
88
89	type runeType byte
90
91	const (
92	rtNone runeType = iota
93	rtPunct
94	rtLower
95	rtUpper
96	)
97
98	const rt = "00000000000000000000000000000000000000000000001122222222221000000333333333333333333333333330000002222222222222222222222222200000"
99
100	// LastSegment returns the substring representing the last segment from the input, where each
101	// byte has an associated RuneRole in the roles slice. This makes sense only for inputs of Symbol
102	// or Filename type.
103	func LastSegment(input string, roles []RuneRole) string {
104	// Exclude ending separators.
105	end := len(input) - 1
106	for end >= 0 && roles[end] == RSep {
107	end--
108	}
109	if end < 0 {
110	return ""
111	}
112
113	start := end - 1
114	for start >= 0 && roles[start] != RSep {
115	start--
116	}
117
118	return input[start+1 : end+1]
119	}
120
121	// fromChunks copies string chunks into the given buffer.
122	func fromChunks(chunks []string, buffer []byte) []byte {
123	ii := 0
124	for _, chunk := range chunks {
125	for i := 0; i < len(chunk); i++ {
126	if ii >= cap(buffer) {
127	break
128	}
129	buffer[ii] = chunk[i]
130	ii++
131	}
132	}
133	return buffer[:ii]
134	}
135
136	// toLower transforms the input string to lower case, which is stored in the output byte slice.
137	// The lower casing considers only ASCII values - non ASCII values are left unmodified.
138	// Stops when parsed all input or when it filled the output slice. If output is nil, then it gets
139	// created.
140	func toLower(input []byte, reuse []byte) []byte {
141	output := reuse
142	if cap(reuse) < len(input) {
143	output = make([]byte, len(input))
144	}
145
146	for i := 0; i < len(input); i++ {
147	r := rune(input[i])
148	if input[i] <= unicode.MaxASCII {
149	if 'A' <= r && r <= 'Z' {
150	r += 'a' - 'A'
151	}
152	}
153	output[i] = byte(r)
154	}
155	return output[:len(input)]
156	}
157
158	// WordConsumer defines a consumer for a word delimited by the [start,end) byte offsets in an input
159	// (start is inclusive, end is exclusive).
160	type WordConsumer func(start, end int)
161
162	// Words find word delimiters in an input based on its bytes' mappings to rune roles. The offset
163	// delimiters for each word are fed to the provided consumer function.
164	func Words(roles []RuneRole, consume WordConsumer) {
165	var wordStart int
166	for i, r := range roles {
167	switch r {
168	case RUCTail, RTail:
169	case RHead, RNone, RSep:
170	if i != wordStart {
171	consume(wordStart, i)
172	}
173	wordStart = i
174	if r != RHead {
175	// Skip this character.
176	wordStart = i + 1
177	}
178	}
179	}
180	if wordStart != len(roles) {
181	consume(wordStart, len(roles))
182	}
183	}
184

Members

GoPLS Viewer