summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/internal/format/parser.go
blob: f4f37f4295e1fe92f37782e5a7695574f1bf010f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package format

import (
	"reflect"
	"unicode/utf8"
)

// A Parser parses a format string. The result from the parse are set in the
// struct fields.
type Parser struct {
	Verb rune

	WidthPresent bool
	PrecPresent  bool
	Minus        bool
	Plus         bool
	Sharp        bool
	Space        bool
	Zero         bool

	// For the formats %+v %#v, we set the plusV/sharpV flags
	// and clear the plus/sharp flags since %+v and %#v are in effect
	// different, flagless formats set at the top level.
	PlusV  bool
	SharpV bool

	HasIndex bool

	Width int
	Prec  int // precision

	// retain arguments across calls.
	Args []interface{}
	// retain current argument number across calls
	ArgNum int

	// reordered records whether the format string used argument reordering.
	Reordered bool
	// goodArgNum records whether the most recent reordering directive was valid.
	goodArgNum bool

	// position info
	format   string
	startPos int
	endPos   int
	Status   Status
}

// Reset initializes a parser to scan format strings for the given args.
func (p *Parser) Reset(args []interface{}) {
	p.Args = args
	p.ArgNum = 0
	p.startPos = 0
	p.Reordered = false
}

// Text returns the part of the format string that was parsed by the last call
// to Scan. It returns the original substitution clause if the current scan
// parsed a substitution.
func (p *Parser) Text() string { return p.format[p.startPos:p.endPos] }

// SetFormat sets a new format string to parse. It does not reset the argument
// count.
func (p *Parser) SetFormat(format string) {
	p.format = format
	p.startPos = 0
	p.endPos = 0
}

// Status indicates the result type of a call to Scan.
type Status int

const (
	StatusText Status = iota
	StatusSubstitution
	StatusBadWidthSubstitution
	StatusBadPrecSubstitution
	StatusNoVerb
	StatusBadArgNum
	StatusMissingArg
)

// ClearFlags reset the parser to default behavior.
func (p *Parser) ClearFlags() {
	p.WidthPresent = false
	p.PrecPresent = false
	p.Minus = false
	p.Plus = false
	p.Sharp = false
	p.Space = false
	p.Zero = false

	p.PlusV = false
	p.SharpV = false

	p.HasIndex = false
}

// Scan scans the next part of the format string and sets the status to
// indicate whether it scanned a string literal, substitution or error.
func (p *Parser) Scan() bool {
	p.Status = StatusText
	format := p.format
	end := len(format)
	if p.endPos >= end {
		return false
	}
	afterIndex := false // previous item in format was an index like [3].

	p.startPos = p.endPos
	p.goodArgNum = true
	i := p.startPos
	for i < end && format[i] != '%' {
		i++
	}
	if i > p.startPos {
		p.endPos = i
		return true
	}
	// Process one verb
	i++

	p.Status = StatusSubstitution

	// Do we have flags?
	p.ClearFlags()

simpleFormat:
	for ; i < end; i++ {
		c := p.format[i]
		switch c {
		case '#':
			p.Sharp = true
		case '0':
			p.Zero = !p.Minus // Only allow zero padding to the left.
		case '+':
			p.Plus = true
		case '-':
			p.Minus = true
			p.Zero = false // Do not pad with zeros to the right.
		case ' ':
			p.Space = true
		default:
			// Fast path for common case of ascii lower case simple verbs
			// without precision or width or argument indices.
			if 'a' <= c && c <= 'z' && p.ArgNum < len(p.Args) {
				if c == 'v' {
					// Go syntax
					p.SharpV = p.Sharp
					p.Sharp = false
					// Struct-field syntax
					p.PlusV = p.Plus
					p.Plus = false
				}
				p.Verb = rune(c)
				p.ArgNum++
				p.endPos = i + 1
				return true
			}
			// Format is more complex than simple flags and a verb or is malformed.
			break simpleFormat
		}
	}

	// Do we have an explicit argument index?
	i, afterIndex = p.updateArgNumber(format, i)

	// Do we have width?
	if i < end && format[i] == '*' {
		i++
		p.Width, p.WidthPresent = p.intFromArg()

		if !p.WidthPresent {
			p.Status = StatusBadWidthSubstitution
		}

		// We have a negative width, so take its value and ensure
		// that the minus flag is set
		if p.Width < 0 {
			p.Width = -p.Width
			p.Minus = true
			p.Zero = false // Do not pad with zeros to the right.
		}
		afterIndex = false
	} else {
		p.Width, p.WidthPresent, i = parsenum(format, i, end)
		if afterIndex && p.WidthPresent { // "%[3]2d"
			p.goodArgNum = false
		}
	}

	// Do we have precision?
	if i+1 < end && format[i] == '.' {
		i++
		if afterIndex { // "%[3].2d"
			p.goodArgNum = false
		}
		i, afterIndex = p.updateArgNumber(format, i)
		if i < end && format[i] == '*' {
			i++
			p.Prec, p.PrecPresent = p.intFromArg()
			// Negative precision arguments don't make sense
			if p.Prec < 0 {
				p.Prec = 0
				p.PrecPresent = false
			}
			if !p.PrecPresent {
				p.Status = StatusBadPrecSubstitution
			}
			afterIndex = false
		} else {
			p.Prec, p.PrecPresent, i = parsenum(format, i, end)
			if !p.PrecPresent {
				p.Prec = 0
				p.PrecPresent = true
			}
		}
	}

	if !afterIndex {
		i, afterIndex = p.updateArgNumber(format, i)
	}
	p.HasIndex = afterIndex

	if i >= end {
		p.endPos = i
		p.Status = StatusNoVerb
		return true
	}

	verb, w := utf8.DecodeRuneInString(format[i:])
	p.endPos = i + w
	p.Verb = verb

	switch {
	case verb == '%': // Percent does not absorb operands and ignores f.wid and f.prec.
		p.startPos = p.endPos - 1
		p.Status = StatusText
	case !p.goodArgNum:
		p.Status = StatusBadArgNum
	case p.ArgNum >= len(p.Args): // No argument left over to print for the current verb.
		p.Status = StatusMissingArg
	case verb == 'v':
		// Go syntax
		p.SharpV = p.Sharp
		p.Sharp = false
		// Struct-field syntax
		p.PlusV = p.Plus
		p.Plus = false
		fallthrough
	default:
		p.ArgNum++
	}
	return true
}

// intFromArg gets the ArgNumth element of Args. On return, isInt reports
// whether the argument has integer type.
func (p *Parser) intFromArg() (num int, isInt bool) {
	if p.ArgNum < len(p.Args) {
		arg := p.Args[p.ArgNum]
		num, isInt = arg.(int) // Almost always OK.
		if !isInt {
			// Work harder.
			switch v := reflect.ValueOf(arg); v.Kind() {
			case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
				n := v.Int()
				if int64(int(n)) == n {
					num = int(n)
					isInt = true
				}
			case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
				n := v.Uint()
				if int64(n) >= 0 && uint64(int(n)) == n {
					num = int(n)
					isInt = true
				}
			default:
				// Already 0, false.
			}
		}
		p.ArgNum++
		if tooLarge(num) {
			num = 0
			isInt = false
		}
	}
	return
}

// parseArgNumber returns the value of the bracketed number, minus 1
// (explicit argument numbers are one-indexed but we want zero-indexed).
// The opening bracket is known to be present at format[0].
// The returned values are the index, the number of bytes to consume
// up to the closing paren, if present, and whether the number parsed
// ok. The bytes to consume will be 1 if no closing paren is present.
func parseArgNumber(format string) (index int, wid int, ok bool) {
	// There must be at least 3 bytes: [n].
	if len(format) < 3 {
		return 0, 1, false
	}

	// Find closing bracket.
	for i := 1; i < len(format); i++ {
		if format[i] == ']' {
			width, ok, newi := parsenum(format, 1, i)
			if !ok || newi != i {
				return 0, i + 1, false
			}
			return width - 1, i + 1, true // arg numbers are one-indexed and skip paren.
		}
	}
	return 0, 1, false
}

// updateArgNumber returns the next argument to evaluate, which is either the value of the passed-in
// argNum or the value of the bracketed integer that begins format[i:]. It also returns
// the new value of i, that is, the index of the next byte of the format to process.
func (p *Parser) updateArgNumber(format string, i int) (newi int, found bool) {
	if len(format) <= i || format[i] != '[' {
		return i, false
	}
	p.Reordered = true
	index, wid, ok := parseArgNumber(format[i:])
	if ok && 0 <= index && index < len(p.Args) {
		p.ArgNum = index
		return i + wid, true
	}
	p.goodArgNum = false
	return i + wid, ok
}

// tooLarge reports whether the magnitude of the integer is
// too large to be used as a formatting width or precision.
func tooLarge(x int) bool {
	const max int = 1e6
	return x > max || x < -max
}

// parsenum converts ASCII to integer.  num is 0 (and isnum is false) if no number present.
func parsenum(s string, start, end int) (num int, isnum bool, newi int) {
	if start >= end {
		return 0, false, end
	}
	for newi = start; newi < end && '0' <= s[newi] && s[newi] <= '9'; newi++ {
		if tooLarge(num) {
			return 0, false, end // Overflow; crazy long number most likely.
		}
		num = num*10 + int(s[newi]-'0')
		isnum = true
	}
	return
}