From 621e49bb465f500cc46d47e39e828cf76d6381d7 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Tue, 24 Jul 2018 14:35:44 +0200 Subject: update vendor --- .../golang.org/x/text/collate/tools/colcmp/gen.go | 183 +++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 vendor/golang.org/x/text/collate/tools/colcmp/gen.go (limited to 'vendor/golang.org/x/text/collate/tools/colcmp/gen.go') diff --git a/vendor/golang.org/x/text/collate/tools/colcmp/gen.go b/vendor/golang.org/x/text/collate/tools/colcmp/gen.go new file mode 100644 index 0000000..795be13 --- /dev/null +++ b/vendor/golang.org/x/text/collate/tools/colcmp/gen.go @@ -0,0 +1,183 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "math" + "math/rand" + "strings" + "unicode" + "unicode/utf16" + "unicode/utf8" + + "golang.org/x/text/language" + "golang.org/x/text/unicode/norm" +) + +// TODO: replace with functionality in language package. +// parent computes the parent language for the given language. +// It returns false if the parent is already root. +func parent(locale string) (parent string, ok bool) { + if locale == "und" { + return "", false + } + if i := strings.LastIndex(locale, "-"); i != -1 { + return locale[:i], true + } + return "und", true +} + +// rewriter is used to both unique strings and create variants of strings +// to add to the test set. +type rewriter struct { + seen map[string]bool + addCases bool +} + +func newRewriter() *rewriter { + return &rewriter{ + seen: make(map[string]bool), + } +} + +func (r *rewriter) insert(a []string, s string) []string { + if !r.seen[s] { + r.seen[s] = true + a = append(a, s) + } + return a +} + +// rewrite takes a sequence of strings in, adds variants of the these strings +// based on options and removes duplicates. +func (r *rewriter) rewrite(ss []string) []string { + ns := []string{} + for _, s := range ss { + ns = r.insert(ns, s) + if r.addCases { + rs := []rune(s) + rn := rs[0] + for c := unicode.SimpleFold(rn); c != rn; c = unicode.SimpleFold(c) { + rs[0] = c + ns = r.insert(ns, string(rs)) + } + } + } + return ns +} + +// exemplarySet holds a parsed set of characters from the exemplarCharacters table. +type exemplarySet struct { + typ exemplarType + set []string + charIndex int // cumulative total of phrases, including this set +} + +type phraseGenerator struct { + sets [exN]exemplarySet + n int +} + +func (g *phraseGenerator) init(id string) { + ec := exemplarCharacters + loc := language.Make(id).String() + // get sets for locale or parent locale if the set is not defined. + for i := range g.sets { + for p, ok := loc, true; ok; p, ok = parent(p) { + if set, ok := ec[p]; ok && set[i] != "" { + g.sets[i].set = strings.Split(set[i], " ") + break + } + } + } + r := newRewriter() + r.addCases = *cases + for i := range g.sets { + g.sets[i].set = r.rewrite(g.sets[i].set) + } + // compute indexes + for i, set := range g.sets { + g.n += len(set.set) + g.sets[i].charIndex = g.n + } +} + +// phrase returns the ith phrase, where i < g.n. +func (g *phraseGenerator) phrase(i int) string { + for _, set := range g.sets { + if i < set.charIndex { + return set.set[i-(set.charIndex-len(set.set))] + } + } + panic("index out of range") +} + +// generate generates inputs by combining all pairs of examplar strings. +// If doNorm is true, all input strings are normalized to NFC. +// TODO: allow other variations, statistical models, and random +// trailing sequences. +func (g *phraseGenerator) generate(doNorm bool) []Input { + const ( + M = 1024 * 1024 + buf8Size = 30 * M + buf16Size = 10 * M + ) + // TODO: use a better way to limit the input size. + if sq := int(math.Sqrt(float64(*limit))); g.n > sq { + g.n = sq + } + size := g.n * g.n + a := make([]Input, 0, size) + buf8 := make([]byte, 0, buf8Size) + buf16 := make([]uint16, 0, buf16Size) + + addInput := func(str string) { + buf8 = buf8[len(buf8):] + buf16 = buf16[len(buf16):] + if len(str) > cap(buf8) { + buf8 = make([]byte, 0, buf8Size) + } + if len(str) > cap(buf16) { + buf16 = make([]uint16, 0, buf16Size) + } + if doNorm { + buf8 = norm.NFD.AppendString(buf8, str) + } else { + buf8 = append(buf8, str...) + } + buf16 = appendUTF16(buf16, buf8) + a = append(a, makeInput(buf8, buf16)) + } + for i := 0; i < g.n; i++ { + p1 := g.phrase(i) + addInput(p1) + for j := 0; j < g.n; j++ { + p2 := g.phrase(j) + addInput(p1 + p2) + } + } + // permutate + rnd := rand.New(rand.NewSource(int64(rand.Int()))) + for i := range a { + j := i + rnd.Intn(len(a)-i) + a[i], a[j] = a[j], a[i] + a[i].index = i // allow restoring this order if input is used multiple times. + } + return a +} + +func appendUTF16(buf []uint16, s []byte) []uint16 { + for len(s) > 0 { + r, sz := utf8.DecodeRune(s) + s = s[sz:] + r1, r2 := utf16.EncodeRune(r) + if r1 != 0xFFFD { + buf = append(buf, uint16(r1), uint16(r2)) + } else { + buf = append(buf, uint16(r)) + } + } + return buf +} -- cgit v1.2.3