summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/search/pattern.go
blob: 2497cfd2f1b287fc878f3070ad46bee204bba3fa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package search

import (
	"golang.org/x/text/internal/colltab"
)

// TODO: handle variable primary weights?

func (p *Pattern) deleteEmptyElements() {
	k := 0
	for _, e := range p.ce {
		if !isIgnorable(p.m, e) {
			p.ce[k] = e
			k++
		}
	}
	p.ce = p.ce[:k]
}

func isIgnorable(m *Matcher, e colltab.Elem) bool {
	if e.Primary() > 0 {
		return false
	}
	if e.Secondary() > 0 {
		if !m.ignoreDiacritics {
			return false
		}
		// Primary value is 0 and ignoreDiacritics is true. In this case we
		// ignore the tertiary element, as it only pertains to the modifier.
		return true
	}
	// TODO: further distinguish once we have the new implementation.
	if !(m.ignoreWidth || m.ignoreCase) && e.Tertiary() > 0 {
		return false
	}
	// TODO: we ignore the Quaternary level for now.
	return true
}

// TODO: Use a Boyer-Moore-like algorithm (probably Sunday) for searching.

func (p *Pattern) forwardSearch(it *colltab.Iter) (start, end int) {
	for start := 0; it.Next(); it.Reset(start) {
		nextStart := it.End()
		if end := p.searchOnce(it); end != -1 {
			return start, end
		}
		start = nextStart
	}
	return -1, -1
}

func (p *Pattern) anchoredForwardSearch(it *colltab.Iter) (start, end int) {
	if it.Next() {
		if end := p.searchOnce(it); end != -1 {
			return 0, end
		}
	}
	return -1, -1
}

// next advances to the next weight in a pattern. f must return one of the
// weights of a collation element. next will advance to the first non-zero
// weight and return this weight and true if it exists, or 0, false otherwise.
func (p *Pattern) next(i *int, f func(colltab.Elem) int) (weight int, ok bool) {
	for *i < len(p.ce) {
		v := f(p.ce[*i])
		*i++
		if v != 0 {
			// Skip successive ignorable values.
			for ; *i < len(p.ce) && f(p.ce[*i]) == 0; *i++ {
			}
			return v, true
		}
	}
	return 0, false
}

// TODO: remove this function once Elem is internal and Tertiary returns int.
func tertiary(e colltab.Elem) int {
	return int(e.Tertiary())
}

// searchOnce tries to match the pattern s.p at the text position i. s.buf needs
// to be filled with collation elements of the first segment, where n is the
// number of source bytes consumed for this segment. It will return the end
// position of the match or -1.
func (p *Pattern) searchOnce(it *colltab.Iter) (end int) {
	var pLevel [4]int

	m := p.m
	for {
		k := 0
		for ; k < it.N; k++ {
			if v := it.Elems[k].Primary(); v > 0 {
				if w, ok := p.next(&pLevel[0], colltab.Elem.Primary); !ok || v != w {
					return -1
				}
			}

			if !m.ignoreDiacritics {
				if v := it.Elems[k].Secondary(); v > 0 {
					if w, ok := p.next(&pLevel[1], colltab.Elem.Secondary); !ok || v != w {
						return -1
					}
				}
			} else if it.Elems[k].Primary() == 0 {
				// We ignore tertiary values of collation elements of the
				// secondary level.
				continue
			}

			// TODO: distinguish between case and width. This will be easier to
			// implement after we moved to the new collation implementation.
			if !m.ignoreWidth && !m.ignoreCase {
				if v := it.Elems[k].Tertiary(); v > 0 {
					if w, ok := p.next(&pLevel[2], tertiary); !ok || int(v) != w {
						return -1
					}
				}
			}
			// TODO: check quaternary weight
		}
		it.Discard() // Remove the current segment from the buffer.

		// Check for completion.
		switch {
		// If any of these cases match, we are not at the end.
		case pLevel[0] < len(p.ce):
		case !m.ignoreDiacritics && pLevel[1] < len(p.ce):
		case !(m.ignoreWidth || m.ignoreCase) && pLevel[2] < len(p.ce):
		default:
			// At this point, both the segment and pattern has matched fully.
			// However, the segment may still be have trailing modifiers.
			// This can be verified by another call to next.
			end = it.End()
			if it.Next() && it.Elems[0].Primary() == 0 {
				if !m.ignoreDiacritics {
					return -1
				}
				end = it.End()
			}
			return end
		}

		// Fill the buffer with the next batch of collation elements.
		if !it.Next() {
			return -1
		}
	}
}