summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/collate/tools/colcmp/gen.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/collate/tools/colcmp/gen.go')
-rw-r--r--vendor/golang.org/x/text/collate/tools/colcmp/gen.go183
1 files changed, 183 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/collate/tools/colcmp/gen.go b/vendor/golang.org/x/text/collate/tools/colcmp/gen.go
new file mode 100644
index 0000000..795be13
--- /dev/null
+++ b/vendor/golang.org/x/text/collate/tools/colcmp/gen.go
@@ -0,0 +1,183 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "math"
+ "math/rand"
+ "strings"
+ "unicode"
+ "unicode/utf16"
+ "unicode/utf8"
+
+ "golang.org/x/text/language"
+ "golang.org/x/text/unicode/norm"
+)
+
+// TODO: replace with functionality in language package.
+// parent computes the parent language for the given language.
+// It returns false if the parent is already root.
+func parent(locale string) (parent string, ok bool) {
+ if locale == "und" {
+ return "", false
+ }
+ if i := strings.LastIndex(locale, "-"); i != -1 {
+ return locale[:i], true
+ }
+ return "und", true
+}
+
+// rewriter is used to both unique strings and create variants of strings
+// to add to the test set.
+type rewriter struct {
+ seen map[string]bool
+ addCases bool
+}
+
+func newRewriter() *rewriter {
+ return &rewriter{
+ seen: make(map[string]bool),
+ }
+}
+
+func (r *rewriter) insert(a []string, s string) []string {
+ if !r.seen[s] {
+ r.seen[s] = true
+ a = append(a, s)
+ }
+ return a
+}
+
+// rewrite takes a sequence of strings in, adds variants of the these strings
+// based on options and removes duplicates.
+func (r *rewriter) rewrite(ss []string) []string {
+ ns := []string{}
+ for _, s := range ss {
+ ns = r.insert(ns, s)
+ if r.addCases {
+ rs := []rune(s)
+ rn := rs[0]
+ for c := unicode.SimpleFold(rn); c != rn; c = unicode.SimpleFold(c) {
+ rs[0] = c
+ ns = r.insert(ns, string(rs))
+ }
+ }
+ }
+ return ns
+}
+
+// exemplarySet holds a parsed set of characters from the exemplarCharacters table.
+type exemplarySet struct {
+ typ exemplarType
+ set []string
+ charIndex int // cumulative total of phrases, including this set
+}
+
+type phraseGenerator struct {
+ sets [exN]exemplarySet
+ n int
+}
+
+func (g *phraseGenerator) init(id string) {
+ ec := exemplarCharacters
+ loc := language.Make(id).String()
+ // get sets for locale or parent locale if the set is not defined.
+ for i := range g.sets {
+ for p, ok := loc, true; ok; p, ok = parent(p) {
+ if set, ok := ec[p]; ok && set[i] != "" {
+ g.sets[i].set = strings.Split(set[i], " ")
+ break
+ }
+ }
+ }
+ r := newRewriter()
+ r.addCases = *cases
+ for i := range g.sets {
+ g.sets[i].set = r.rewrite(g.sets[i].set)
+ }
+ // compute indexes
+ for i, set := range g.sets {
+ g.n += len(set.set)
+ g.sets[i].charIndex = g.n
+ }
+}
+
+// phrase returns the ith phrase, where i < g.n.
+func (g *phraseGenerator) phrase(i int) string {
+ for _, set := range g.sets {
+ if i < set.charIndex {
+ return set.set[i-(set.charIndex-len(set.set))]
+ }
+ }
+ panic("index out of range")
+}
+
+// generate generates inputs by combining all pairs of examplar strings.
+// If doNorm is true, all input strings are normalized to NFC.
+// TODO: allow other variations, statistical models, and random
+// trailing sequences.
+func (g *phraseGenerator) generate(doNorm bool) []Input {
+ const (
+ M = 1024 * 1024
+ buf8Size = 30 * M
+ buf16Size = 10 * M
+ )
+ // TODO: use a better way to limit the input size.
+ if sq := int(math.Sqrt(float64(*limit))); g.n > sq {
+ g.n = sq
+ }
+ size := g.n * g.n
+ a := make([]Input, 0, size)
+ buf8 := make([]byte, 0, buf8Size)
+ buf16 := make([]uint16, 0, buf16Size)
+
+ addInput := func(str string) {
+ buf8 = buf8[len(buf8):]
+ buf16 = buf16[len(buf16):]
+ if len(str) > cap(buf8) {
+ buf8 = make([]byte, 0, buf8Size)
+ }
+ if len(str) > cap(buf16) {
+ buf16 = make([]uint16, 0, buf16Size)
+ }
+ if doNorm {
+ buf8 = norm.NFD.AppendString(buf8, str)
+ } else {
+ buf8 = append(buf8, str...)
+ }
+ buf16 = appendUTF16(buf16, buf8)
+ a = append(a, makeInput(buf8, buf16))
+ }
+ for i := 0; i < g.n; i++ {
+ p1 := g.phrase(i)
+ addInput(p1)
+ for j := 0; j < g.n; j++ {
+ p2 := g.phrase(j)
+ addInput(p1 + p2)
+ }
+ }
+ // permutate
+ rnd := rand.New(rand.NewSource(int64(rand.Int())))
+ for i := range a {
+ j := i + rnd.Intn(len(a)-i)
+ a[i], a[j] = a[j], a[i]
+ a[i].index = i // allow restoring this order if input is used multiple times.
+ }
+ return a
+}
+
+func appendUTF16(buf []uint16, s []byte) []uint16 {
+ for len(s) > 0 {
+ r, sz := utf8.DecodeRune(s)
+ s = s[sz:]
+ r1, r2 := utf16.EncodeRune(r)
+ if r1 != 0xFFFD {
+ buf = append(buf, uint16(r1), uint16(r2))
+ } else {
+ buf = append(buf, uint16(r))
+ }
+ }
+ return buf
+}