1 | // Copyright 2011 The Go Authors. All rights reserved. |
---|---|
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | // This file implements FormatSelections and FormatText. |
6 | // FormatText is used to HTML-format Go and non-Go source |
7 | // text with line numbers and highlighted sections. It is |
8 | // built on top of FormatSelections, a generic formatter |
9 | // for "selected" text. |
10 | |
11 | package godoc |
12 | |
13 | import ( |
14 | "fmt" |
15 | "go/scanner" |
16 | "go/token" |
17 | "io" |
18 | "regexp" |
19 | "strconv" |
20 | "text/template" |
21 | ) |
22 | |
23 | // ---------------------------------------------------------------------------- |
24 | // Implementation of FormatSelections |
25 | |
26 | // A Segment describes a text segment [start, end). |
27 | // The zero value of a Segment is a ready-to-use empty segment. |
28 | type Segment struct { |
29 | start, end int |
30 | } |
31 | |
32 | func (seg *Segment) isEmpty() bool { return seg.start >= seg.end } |
33 | |
34 | // A Selection is an "iterator" function returning a text segment. |
35 | // Repeated calls to a selection return consecutive, non-overlapping, |
36 | // non-empty segments, followed by an infinite sequence of empty |
37 | // segments. The first empty segment marks the end of the selection. |
38 | type Selection func() Segment |
39 | |
40 | // A LinkWriter writes some start or end "tag" to w for the text offset offs. |
41 | // It is called by FormatSelections at the start or end of each link segment. |
42 | type LinkWriter func(w io.Writer, offs int, start bool) |
43 | |
44 | // A SegmentWriter formats a text according to selections and writes it to w. |
45 | // The selections parameter is a bit set indicating which selections provided |
46 | // to FormatSelections overlap with the text segment: If the n'th bit is set |
47 | // in selections, the n'th selection provided to FormatSelections is overlapping |
48 | // with the text. |
49 | type SegmentWriter func(w io.Writer, text []byte, selections int) |
50 | |
51 | // FormatSelections takes a text and writes it to w using link and segment |
52 | // writers lw and sw as follows: lw is invoked for consecutive segment starts |
53 | // and ends as specified through the links selection, and sw is invoked for |
54 | // consecutive segments of text overlapped by the same selections as specified |
55 | // by selections. The link writer lw may be nil, in which case the links |
56 | // Selection is ignored. |
57 | func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, sw SegmentWriter, selections ...Selection) { |
58 | // If we have a link writer, make the links |
59 | // selection the last entry in selections |
60 | if lw != nil { |
61 | selections = append(selections, links) |
62 | } |
63 | |
64 | // compute the sequence of consecutive segment changes |
65 | changes := newMerger(selections) |
66 | |
67 | // The i'th bit in bitset indicates that the text |
68 | // at the current offset is covered by selections[i]. |
69 | bitset := 0 |
70 | lastOffs := 0 |
71 | |
72 | // Text segments are written in a delayed fashion |
73 | // such that consecutive segments belonging to the |
74 | // same selection can be combined (peephole optimization). |
75 | // last describes the last segment which has not yet been written. |
76 | var last struct { |
77 | begin, end int // valid if begin < end |
78 | bitset int |
79 | } |
80 | |
81 | // flush writes the last delayed text segment |
82 | flush := func() { |
83 | if last.begin < last.end { |
84 | sw(w, text[last.begin:last.end], last.bitset) |
85 | } |
86 | last.begin = last.end // invalidate last |
87 | } |
88 | |
89 | // segment runs the segment [lastOffs, end) with the selection |
90 | // indicated by bitset through the segment peephole optimizer. |
91 | segment := func(end int) { |
92 | if lastOffs < end { // ignore empty segments |
93 | if last.end != lastOffs || last.bitset != bitset { |
94 | // the last segment is not adjacent to or |
95 | // differs from the new one |
96 | flush() |
97 | // start a new segment |
98 | last.begin = lastOffs |
99 | } |
100 | last.end = end |
101 | last.bitset = bitset |
102 | } |
103 | } |
104 | |
105 | for { |
106 | // get the next segment change |
107 | index, offs, start := changes.next() |
108 | if index < 0 || offs > len(text) { |
109 | // no more segment changes or the next change |
110 | // is past the end of the text - we're done |
111 | break |
112 | } |
113 | // determine the kind of segment change |
114 | if lw != nil && index == len(selections)-1 { |
115 | // we have a link segment change (see start of this function): |
116 | // format the previous selection segment, write the |
117 | // link tag and start a new selection segment |
118 | segment(offs) |
119 | flush() |
120 | lastOffs = offs |
121 | lw(w, offs, start) |
122 | } else { |
123 | // we have a selection change: |
124 | // format the previous selection segment, determine |
125 | // the new selection bitset and start a new segment |
126 | segment(offs) |
127 | lastOffs = offs |
128 | mask := 1 << uint(index) |
129 | if start { |
130 | bitset |= mask |
131 | } else { |
132 | bitset &^= mask |
133 | } |
134 | } |
135 | } |
136 | segment(len(text)) |
137 | flush() |
138 | } |
139 | |
140 | // A merger merges a slice of Selections and produces a sequence of |
141 | // consecutive segment change events through repeated next() calls. |
142 | type merger struct { |
143 | selections []Selection |
144 | segments []Segment // segments[i] is the next segment of selections[i] |
145 | } |
146 | |
147 | const infinity int = 2e9 |
148 | |
149 | func newMerger(selections []Selection) *merger { |
150 | segments := make([]Segment, len(selections)) |
151 | for i, sel := range selections { |
152 | segments[i] = Segment{infinity, infinity} |
153 | if sel != nil { |
154 | if seg := sel(); !seg.isEmpty() { |
155 | segments[i] = seg |
156 | } |
157 | } |
158 | } |
159 | return &merger{selections, segments} |
160 | } |
161 | |
162 | // next returns the next segment change: index specifies the Selection |
163 | // to which the segment belongs, offs is the segment start or end offset |
164 | // as determined by the start value. If there are no more segment changes, |
165 | // next returns an index value < 0. |
166 | func (m *merger) next() (index, offs int, start bool) { |
167 | // find the next smallest offset where a segment starts or ends |
168 | offs = infinity |
169 | index = -1 |
170 | for i, seg := range m.segments { |
171 | switch { |
172 | case seg.start < offs: |
173 | offs = seg.start |
174 | index = i |
175 | start = true |
176 | case seg.end < offs: |
177 | offs = seg.end |
178 | index = i |
179 | start = false |
180 | } |
181 | } |
182 | if index < 0 { |
183 | // no offset found => all selections merged |
184 | return |
185 | } |
186 | // offset found - it's either the start or end offset but |
187 | // either way it is ok to consume the start offset: set it |
188 | // to infinity so it won't be considered in the following |
189 | // next call |
190 | m.segments[index].start = infinity |
191 | if start { |
192 | return |
193 | } |
194 | // end offset found - consume it |
195 | m.segments[index].end = infinity |
196 | // advance to the next segment for that selection |
197 | seg := m.selections[index]() |
198 | if !seg.isEmpty() { |
199 | m.segments[index] = seg |
200 | } |
201 | return |
202 | } |
203 | |
204 | // ---------------------------------------------------------------------------- |
205 | // Implementation of FormatText |
206 | |
207 | // lineSelection returns the line segments for text as a Selection. |
208 | func lineSelection(text []byte) Selection { |
209 | i, j := 0, 0 |
210 | return func() (seg Segment) { |
211 | // find next newline, if any |
212 | for j < len(text) { |
213 | j++ |
214 | if text[j-1] == '\n' { |
215 | break |
216 | } |
217 | } |
218 | if i < j { |
219 | // text[i:j] constitutes a line |
220 | seg = Segment{i, j} |
221 | i = j |
222 | } |
223 | return |
224 | } |
225 | } |
226 | |
227 | // tokenSelection returns, as a selection, the sequence of |
228 | // consecutive occurrences of token sel in the Go src text. |
229 | func tokenSelection(src []byte, sel token.Token) Selection { |
230 | var s scanner.Scanner |
231 | fset := token.NewFileSet() |
232 | file := fset.AddFile("", fset.Base(), len(src)) |
233 | s.Init(file, src, nil, scanner.ScanComments) |
234 | return func() (seg Segment) { |
235 | for { |
236 | pos, tok, lit := s.Scan() |
237 | if tok == token.EOF { |
238 | break |
239 | } |
240 | offs := file.Offset(pos) |
241 | if tok == sel { |
242 | seg = Segment{offs, offs + len(lit)} |
243 | break |
244 | } |
245 | } |
246 | return |
247 | } |
248 | } |
249 | |
250 | // makeSelection is a helper function to make a Selection from a slice of pairs. |
251 | // Pairs describing empty segments are ignored. |
252 | func makeSelection(matches [][]int) Selection { |
253 | i := 0 |
254 | return func() Segment { |
255 | for i < len(matches) { |
256 | m := matches[i] |
257 | i++ |
258 | if m[0] < m[1] { |
259 | // non-empty segment |
260 | return Segment{m[0], m[1]} |
261 | } |
262 | } |
263 | return Segment{} |
264 | } |
265 | } |
266 | |
267 | // regexpSelection computes the Selection for the regular expression expr in text. |
268 | func regexpSelection(text []byte, expr string) Selection { |
269 | var matches [][]int |
270 | if rx, err := regexp.Compile(expr); err == nil { |
271 | matches = rx.FindAllIndex(text, -1) |
272 | } |
273 | return makeSelection(matches) |
274 | } |
275 | |
276 | var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`) |
277 | |
278 | // RangeSelection computes the Selection for a text range described |
279 | // by the argument str; the range description must match the selRx |
280 | // regular expression. |
281 | func RangeSelection(str string) Selection { |
282 | m := selRx.FindStringSubmatch(str) |
283 | if len(m) >= 2 { |
284 | from, _ := strconv.Atoi(m[1]) |
285 | to, _ := strconv.Atoi(m[2]) |
286 | if from < to { |
287 | return makeSelection([][]int{{from, to}}) |
288 | } |
289 | } |
290 | return nil |
291 | } |
292 | |
293 | // Span tags for all the possible selection combinations that may |
294 | // be generated by FormatText. Selections are indicated by a bitset, |
295 | // and the value of the bitset specifies the tag to be used. |
296 | // |
297 | // bit 0: comments |
298 | // bit 1: highlights |
299 | // bit 2: selections |
300 | var startTags = [][]byte{ |
301 | /* 000 */ []byte(``), |
302 | /* 001 */ []byte(`<span class="comment">`), |
303 | /* 010 */ []byte(`<span class="highlight">`), |
304 | /* 011 */ []byte(`<span class="highlight-comment">`), |
305 | /* 100 */ []byte(`<span class="selection">`), |
306 | /* 101 */ []byte(`<span class="selection-comment">`), |
307 | /* 110 */ []byte(`<span class="selection-highlight">`), |
308 | /* 111 */ []byte(`<span class="selection-highlight-comment">`), |
309 | } |
310 | |
311 | var endTag = []byte(`</span>`) |
312 | |
313 | func selectionTag(w io.Writer, text []byte, selections int) { |
314 | if selections < len(startTags) { |
315 | if tag := startTags[selections]; len(tag) > 0 { |
316 | w.Write(tag) |
317 | template.HTMLEscape(w, text) |
318 | w.Write(endTag) |
319 | return |
320 | } |
321 | } |
322 | template.HTMLEscape(w, text) |
323 | } |
324 | |
325 | // FormatText HTML-escapes text and writes it to w. |
326 | // Consecutive text segments are wrapped in HTML spans (with tags as |
327 | // defined by startTags and endTag) as follows: |
328 | // |
329 | // - if line >= 0, line number (ln) spans are inserted before each line, |
330 | // starting with the value of line |
331 | // - if the text is Go source, comments get the "comment" span class |
332 | // - each occurrence of the regular expression pattern gets the "highlight" |
333 | // span class |
334 | // - text segments covered by selection get the "selection" span class |
335 | // |
336 | // Comments, highlights, and selections may overlap arbitrarily; the respective |
337 | // HTML span classes are specified in the startTags variable. |
338 | func FormatText(w io.Writer, text []byte, line int, goSource bool, pattern string, selection Selection) { |
339 | var comments, highlights Selection |
340 | if goSource { |
341 | comments = tokenSelection(text, token.COMMENT) |
342 | } |
343 | if pattern != "" { |
344 | highlights = regexpSelection(text, pattern) |
345 | } |
346 | if line >= 0 || comments != nil || highlights != nil || selection != nil { |
347 | var lineTag LinkWriter |
348 | if line >= 0 { |
349 | lineTag = func(w io.Writer, _ int, start bool) { |
350 | if start { |
351 | fmt.Fprintf(w, "<span id=\"L%d\" class=\"ln\">%6d</span>", line, line) |
352 | line++ |
353 | } |
354 | } |
355 | } |
356 | FormatSelections(w, text, lineTag, lineSelection(text), selectionTag, comments, highlights, selection) |
357 | } else { |
358 | template.HTMLEscape(w, text) |
359 | } |
360 | } |
361 |
Members