summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/runes/cond.go
blob: df7aa02db6d3bf11a35b2a5892f85078c884d727 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package runes

import (
	"unicode/utf8"

	"golang.org/x/text/transform"
)

// Note: below we pass invalid UTF-8 to the tIn and tNotIn transformers as is.
// This is done for various reasons:
// - To retain the semantics of the Nop transformer: if input is passed to a Nop
//   one would expect it to be unchanged.
// - It would be very expensive to pass a converted RuneError to a transformer:
//   a transformer might need more source bytes after RuneError, meaning that
//   the only way to pass it safely is to create a new buffer and manage the
//   intermingling of RuneErrors and normal input.
// - Many transformers leave ill-formed UTF-8 as is, so this is not
//   inconsistent. Generally ill-formed UTF-8 is only replaced if it is a
//   logical consequence of the operation (as for Map) or if it otherwise would
//   pose security concerns (as for Remove).
// - An alternative would be to return an error on ill-formed UTF-8, but this
//   would be inconsistent with other operations.

// If returns a transformer that applies tIn to consecutive runes for which
// s.Contains(r) and tNotIn to consecutive runes for which !s.Contains(r). Reset
// is called on tIn and tNotIn at the start of each run. A Nop transformer will
// substitute a nil value passed to tIn or tNotIn. Invalid UTF-8 is translated
// to RuneError to determine which transformer to apply, but is passed as is to
// the respective transformer.
func If(s Set, tIn, tNotIn transform.Transformer) Transformer {
	if tIn == nil && tNotIn == nil {
		return Transformer{transform.Nop}
	}
	if tIn == nil {
		tIn = transform.Nop
	}
	if tNotIn == nil {
		tNotIn = transform.Nop
	}
	sIn, ok := tIn.(transform.SpanningTransformer)
	if !ok {
		sIn = dummySpan{tIn}
	}
	sNotIn, ok := tNotIn.(transform.SpanningTransformer)
	if !ok {
		sNotIn = dummySpan{tNotIn}
	}

	a := &cond{
		tIn:    sIn,
		tNotIn: sNotIn,
		f:      s.Contains,
	}
	a.Reset()
	return Transformer{a}
}

type dummySpan struct{ transform.Transformer }

func (d dummySpan) Span(src []byte, atEOF bool) (n int, err error) {
	return 0, transform.ErrEndOfSpan
}

type cond struct {
	tIn, tNotIn transform.SpanningTransformer
	f           func(rune) bool
	check       func(rune) bool               // current check to perform
	t           transform.SpanningTransformer // current transformer to use
}

// Reset implements transform.Transformer.
func (t *cond) Reset() {
	t.check = t.is
	t.t = t.tIn
	t.t.Reset() // notIn will be reset on first usage.
}

func (t *cond) is(r rune) bool {
	if t.f(r) {
		return true
	}
	t.check = t.isNot
	t.t = t.tNotIn
	t.tNotIn.Reset()
	return false
}

func (t *cond) isNot(r rune) bool {
	if !t.f(r) {
		return true
	}
	t.check = t.is
	t.t = t.tIn
	t.tIn.Reset()
	return false
}

// This implementation of Span doesn't help all too much, but it needs to be
// there to satisfy this package's Transformer interface.
// TODO: there are certainly room for improvements, though. For example, if
// t.t == transform.Nop (which will a common occurrence) it will save a bundle
// to special-case that loop.
func (t *cond) Span(src []byte, atEOF bool) (n int, err error) {
	p := 0
	for n < len(src) && err == nil {
		// Don't process too much at a time as the Spanner that will be
		// called on this block may terminate early.
		const maxChunk = 4096
		max := len(src)
		if v := n + maxChunk; v < max {
			max = v
		}
		atEnd := false
		size := 0
		current := t.t
		for ; p < max; p += size {
			r := rune(src[p])
			if r < utf8.RuneSelf {
				size = 1
			} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
				if !atEOF && !utf8.FullRune(src[p:]) {
					err = transform.ErrShortSrc
					break
				}
			}
			if !t.check(r) {
				// The next rune will be the start of a new run.
				atEnd = true
				break
			}
		}
		n2, err2 := current.Span(src[n:p], atEnd || (atEOF && p == len(src)))
		n += n2
		if err2 != nil {
			return n, err2
		}
		// At this point either err != nil or t.check will pass for the rune at p.
		p = n + size
	}
	return n, err
}

func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	p := 0
	for nSrc < len(src) && err == nil {
		// Don't process too much at a time, as the work might be wasted if the
		// destination buffer isn't large enough to hold the result or a
		// transform returns an error early.
		const maxChunk = 4096
		max := len(src)
		if n := nSrc + maxChunk; n < len(src) {
			max = n
		}
		atEnd := false
		size := 0
		current := t.t
		for ; p < max; p += size {
			r := rune(src[p])
			if r < utf8.RuneSelf {
				size = 1
			} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
				if !atEOF && !utf8.FullRune(src[p:]) {
					err = transform.ErrShortSrc
					break
				}
			}
			if !t.check(r) {
				// The next rune will be the start of a new run.
				atEnd = true
				break
			}
		}
		nDst2, nSrc2, err2 := current.Transform(dst[nDst:], src[nSrc:p], atEnd || (atEOF && p == len(src)))
		nDst += nDst2
		nSrc += nSrc2
		if err2 != nil {
			return nDst, nSrc, err2
		}
		// At this point either err != nil or t.check will pass for the rune at p.
		p = nSrc + size
	}
	return nDst, nSrc, err
}