summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/collate/tools/colcmp/colcmp.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/collate/tools/colcmp/colcmp.go')
-rw-r--r--vendor/golang.org/x/text/collate/tools/colcmp/colcmp.go529
1 files changed, 529 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/collate/tools/colcmp/colcmp.go b/vendor/golang.org/x/text/collate/tools/colcmp/colcmp.go
new file mode 100644
index 0000000..ebd6012
--- /dev/null
+++ b/vendor/golang.org/x/text/collate/tools/colcmp/colcmp.go
@@ -0,0 +1,529 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main // import "golang.org/x/text/collate/tools/colcmp"
+
+import (
+ "bytes"
+ "flag"
+ "fmt"
+ "io"
+ "log"
+ "os"
+ "runtime/pprof"
+ "sort"
+ "strconv"
+ "strings"
+ "text/template"
+ "time"
+
+ "golang.org/x/text/unicode/norm"
+)
+
+var (
+ doNorm = flag.Bool("norm", false, "normalize input strings")
+ cases = flag.Bool("case", false, "generate case variants")
+ verbose = flag.Bool("verbose", false, "print results")
+ debug = flag.Bool("debug", false, "output debug information")
+ locales = flag.String("locale", "en_US", "the locale to use. May be a comma-separated list for some commands.")
+ col = flag.String("col", "go", "collator to test")
+ gold = flag.String("gold", "go", "collator used as the gold standard")
+ usecmp = flag.Bool("usecmp", false,
+ `use comparison instead of sort keys when sorting. Must be "test", "gold" or "both"`)
+ cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
+ exclude = flag.String("exclude", "", "exclude errors that contain any of the characters")
+ limit = flag.Int("limit", 5000000, "maximum number of samples to generate for one run")
+)
+
+func failOnError(err error) {
+ if err != nil {
+ log.Panic(err)
+ }
+}
+
+// Test holds test data for testing a locale-collator pair.
+// Test also provides functionality that is commonly used by the various commands.
+type Test struct {
+ ctxt *Context
+ Name string
+ Locale string
+ ColName string
+
+ Col Collator
+ UseCompare bool
+
+ Input []Input
+ Duration time.Duration
+
+ start time.Time
+ msg string
+ count int
+}
+
+func (t *Test) clear() {
+ t.Col = nil
+ t.Input = nil
+}
+
+const (
+ msgGeneratingInput = "generating input"
+ msgGeneratingKeys = "generating keys"
+ msgSorting = "sorting"
+)
+
+var lastLen = 0
+
+func (t *Test) SetStatus(msg string) {
+ if *debug || *verbose {
+ fmt.Printf("%s: %s...\n", t.Name, msg)
+ } else if t.ctxt.out != nil {
+ fmt.Fprint(t.ctxt.out, strings.Repeat(" ", lastLen))
+ fmt.Fprint(t.ctxt.out, strings.Repeat("\b", lastLen))
+ fmt.Fprint(t.ctxt.out, msg, "...")
+ lastLen = len(msg) + 3
+ fmt.Fprint(t.ctxt.out, strings.Repeat("\b", lastLen))
+ }
+}
+
+// Start is used by commands to signal the start of an operation.
+func (t *Test) Start(msg string) {
+ t.SetStatus(msg)
+ t.count = 0
+ t.msg = msg
+ t.start = time.Now()
+}
+
+// Stop is used by commands to signal the end of an operation.
+func (t *Test) Stop() (time.Duration, int) {
+ d := time.Now().Sub(t.start)
+ t.Duration += d
+ if *debug || *verbose {
+ fmt.Printf("%s: %s done. (%.3fs /%dK ops)\n", t.Name, t.msg, d.Seconds(), t.count/1000)
+ }
+ return d, t.count
+}
+
+// generateKeys generates sort keys for all the inputs.
+func (t *Test) generateKeys() {
+ for i, s := range t.Input {
+ b := t.Col.Key(s)
+ t.Input[i].key = b
+ if *debug {
+ fmt.Printf("%s (%X): %X\n", string(s.UTF8), s.UTF16, b)
+ }
+ }
+}
+
+// Sort sorts the inputs. It generates sort keys if this is required by the
+// chosen sort method.
+func (t *Test) Sort() (tkey, tsort time.Duration, nkey, nsort int) {
+ if *cpuprofile != "" {
+ f, err := os.Create(*cpuprofile)
+ failOnError(err)
+ pprof.StartCPUProfile(f)
+ defer pprof.StopCPUProfile()
+ }
+ if t.UseCompare || t.Col.Key(t.Input[0]) == nil {
+ t.Start(msgSorting)
+ sort.Sort(&testCompare{*t})
+ tsort, nsort = t.Stop()
+ } else {
+ t.Start(msgGeneratingKeys)
+ t.generateKeys()
+ t.count = len(t.Input)
+ tkey, nkey = t.Stop()
+ t.Start(msgSorting)
+ sort.Sort(t)
+ tsort, nsort = t.Stop()
+ }
+ return
+}
+
+func (t *Test) Swap(a, b int) {
+ t.Input[a], t.Input[b] = t.Input[b], t.Input[a]
+}
+
+func (t *Test) Less(a, b int) bool {
+ t.count++
+ return bytes.Compare(t.Input[a].key, t.Input[b].key) == -1
+}
+
+func (t Test) Len() int {
+ return len(t.Input)
+}
+
+type testCompare struct {
+ Test
+}
+
+func (t *testCompare) Less(a, b int) bool {
+ t.count++
+ return t.Col.Compare(t.Input[a], t.Input[b]) == -1
+}
+
+type testRestore struct {
+ Test
+}
+
+func (t *testRestore) Less(a, b int) bool {
+ return t.Input[a].index < t.Input[b].index
+}
+
+// GenerateInput generates input phrases for the locale tested by t.
+func (t *Test) GenerateInput() {
+ t.Input = nil
+ if t.ctxt.lastLocale != t.Locale {
+ gen := phraseGenerator{}
+ gen.init(t.Locale)
+ t.SetStatus(msgGeneratingInput)
+ t.ctxt.lastInput = nil // allow the previous value to be garbage collected.
+ t.Input = gen.generate(*doNorm)
+ t.ctxt.lastInput = t.Input
+ t.ctxt.lastLocale = t.Locale
+ } else {
+ t.Input = t.ctxt.lastInput
+ for i := range t.Input {
+ t.Input[i].key = nil
+ }
+ sort.Sort(&testRestore{*t})
+ }
+}
+
+// Context holds all tests and settings translated from command line options.
+type Context struct {
+ test []*Test
+ last *Test
+
+ lastLocale string
+ lastInput []Input
+
+ out io.Writer
+}
+
+func (ts *Context) Printf(format string, a ...interface{}) {
+ ts.assertBuf()
+ fmt.Fprintf(ts.out, format, a...)
+}
+
+func (ts *Context) Print(a ...interface{}) {
+ ts.assertBuf()
+ fmt.Fprint(ts.out, a...)
+}
+
+// assertBuf sets up an io.Writer for output, if it doesn't already exist.
+// In debug and verbose mode, output is buffered so that the regular output
+// will not interfere with the additional output. Otherwise, output is
+// written directly to stdout for a more responsive feel.
+func (ts *Context) assertBuf() {
+ if ts.out != nil {
+ return
+ }
+ if *debug || *verbose {
+ ts.out = &bytes.Buffer{}
+ } else {
+ ts.out = os.Stdout
+ }
+}
+
+// flush flushes the contents of ts.out to stdout, if it is not stdout already.
+func (ts *Context) flush() {
+ if ts.out != nil {
+ if _, ok := ts.out.(io.ReadCloser); !ok {
+ io.Copy(os.Stdout, ts.out.(io.Reader))
+ }
+ }
+}
+
+// parseTests creates all tests from command lines and returns
+// a Context to hold them.
+func parseTests() *Context {
+ ctxt := &Context{}
+ colls := strings.Split(*col, ",")
+ for _, loc := range strings.Split(*locales, ",") {
+ loc = strings.TrimSpace(loc)
+ for _, name := range colls {
+ name = strings.TrimSpace(name)
+ col := getCollator(name, loc)
+ ctxt.test = append(ctxt.test, &Test{
+ ctxt: ctxt,
+ Locale: loc,
+ ColName: name,
+ UseCompare: *usecmp,
+ Col: col,
+ })
+ }
+ }
+ return ctxt
+}
+
+func (c *Context) Len() int {
+ return len(c.test)
+}
+
+func (c *Context) Test(i int) *Test {
+ if c.last != nil {
+ c.last.clear()
+ }
+ c.last = c.test[i]
+ return c.last
+}
+
+func parseInput(args []string) []Input {
+ input := []Input{}
+ for _, s := range args {
+ rs := []rune{}
+ for len(s) > 0 {
+ var r rune
+ r, _, s, _ = strconv.UnquoteChar(s, '\'')
+ rs = append(rs, r)
+ }
+ s = string(rs)
+ if *doNorm {
+ s = norm.NFD.String(s)
+ }
+ input = append(input, makeInputString(s))
+ }
+ return input
+}
+
+// A Command is an implementation of a colcmp command.
+type Command struct {
+ Run func(cmd *Context, args []string)
+ Usage string
+ Short string
+ Long string
+}
+
+func (cmd Command) Name() string {
+ return strings.SplitN(cmd.Usage, " ", 2)[0]
+}
+
+var commands = []*Command{
+ cmdSort,
+ cmdBench,
+ cmdRegress,
+}
+
+const sortHelp = `
+Sort sorts a given list of strings. Strings are separated by whitespace.
+`
+
+var cmdSort = &Command{
+ Run: runSort,
+ Usage: "sort <string>*",
+ Short: "sort a given list of strings",
+ Long: sortHelp,
+}
+
+func runSort(ctxt *Context, args []string) {
+ input := parseInput(args)
+ if len(input) == 0 {
+ log.Fatalf("Nothing to sort.")
+ }
+ if ctxt.Len() > 1 {
+ ctxt.Print("COLL LOCALE RESULT\n")
+ }
+ for i := 0; i < ctxt.Len(); i++ {
+ t := ctxt.Test(i)
+ t.Input = append(t.Input, input...)
+ t.Sort()
+ if ctxt.Len() > 1 {
+ ctxt.Printf("%-5s %-5s ", t.ColName, t.Locale)
+ }
+ for _, s := range t.Input {
+ ctxt.Print(string(s.UTF8), " ")
+ }
+ ctxt.Print("\n")
+ }
+}
+
+const benchHelp = `
+Bench runs a benchmark for the given list of collator implementations.
+If no collator implementations are given, the go collator will be used.
+`
+
+var cmdBench = &Command{
+ Run: runBench,
+ Usage: "bench",
+ Short: "benchmark a given list of collator implementations",
+ Long: benchHelp,
+}
+
+func runBench(ctxt *Context, args []string) {
+ ctxt.Printf("%-7s %-5s %-6s %-24s %-24s %-5s %s\n", "LOCALE", "COLL", "N", "KEYS", "SORT", "AVGLN", "TOTAL")
+ for i := 0; i < ctxt.Len(); i++ {
+ t := ctxt.Test(i)
+ ctxt.Printf("%-7s %-5s ", t.Locale, t.ColName)
+ t.GenerateInput()
+ ctxt.Printf("%-6s ", fmt.Sprintf("%dK", t.Len()/1000))
+ tkey, tsort, nkey, nsort := t.Sort()
+ p := func(dur time.Duration, n int) {
+ s := ""
+ if dur > 0 {
+ s = fmt.Sprintf("%6.3fs ", dur.Seconds())
+ if n > 0 {
+ s += fmt.Sprintf("%15s", fmt.Sprintf("(%4.2f ns/op)", float64(dur)/float64(n)))
+ }
+ }
+ ctxt.Printf("%-24s ", s)
+ }
+ p(tkey, nkey)
+ p(tsort, nsort)
+
+ total := 0
+ for _, s := range t.Input {
+ total += len(s.key)
+ }
+ ctxt.Printf("%-5d ", total/t.Len())
+ ctxt.Printf("%6.3fs\n", t.Duration.Seconds())
+ if *debug {
+ for _, s := range t.Input {
+ fmt.Print(string(s.UTF8), " ")
+ }
+ fmt.Println()
+ }
+ }
+}
+
+const regressHelp = `
+Regress runs a monkey test by comparing the results of randomly generated tests
+between two implementations of a collator. The user may optionally pass a list
+of strings to regress against instead of the default test set.
+`
+
+var cmdRegress = &Command{
+ Run: runRegress,
+ Usage: "regress -gold=<col> -test=<col> [string]*",
+ Short: "run a monkey test between two collators",
+ Long: regressHelp,
+}
+
+const failedKeyCompare = `
+%s:%d: incorrect comparison result for input:
+ a: %q (%.4X)
+ key: %s
+ b: %q (%.4X)
+ key: %s
+ Compare(a, b) = %d; want %d.
+
+ gold keys:
+ a: %s
+ b: %s
+`
+
+const failedCompare = `
+%s:%d: incorrect comparison result for input:
+ a: %q (%.4X)
+ b: %q (%.4X)
+ Compare(a, b) = %d; want %d.
+`
+
+func keyStr(b []byte) string {
+ buf := &bytes.Buffer{}
+ for _, v := range b {
+ fmt.Fprintf(buf, "%.2X ", v)
+ }
+ return buf.String()
+}
+
+func runRegress(ctxt *Context, args []string) {
+ input := parseInput(args)
+ for i := 0; i < ctxt.Len(); i++ {
+ t := ctxt.Test(i)
+ if len(input) > 0 {
+ t.Input = append(t.Input, input...)
+ } else {
+ t.GenerateInput()
+ }
+ t.Sort()
+ count := 0
+ gold := getCollator(*gold, t.Locale)
+ for i := 1; i < len(t.Input); i++ {
+ ia := t.Input[i-1]
+ ib := t.Input[i]
+ if bytes.IndexAny(ib.UTF8, *exclude) != -1 {
+ i++
+ continue
+ }
+ if bytes.IndexAny(ia.UTF8, *exclude) != -1 {
+ continue
+ }
+ goldCmp := gold.Compare(ia, ib)
+ if cmp := bytes.Compare(ia.key, ib.key); cmp != goldCmp {
+ count++
+ a := string(ia.UTF8)
+ b := string(ib.UTF8)
+ fmt.Printf(failedKeyCompare, t.Locale, i-1, a, []rune(a), keyStr(ia.key), b, []rune(b), keyStr(ib.key), cmp, goldCmp, keyStr(gold.Key(ia)), keyStr(gold.Key(ib)))
+ } else if cmp := t.Col.Compare(ia, ib); cmp != goldCmp {
+ count++
+ a := string(ia.UTF8)
+ b := string(ib.UTF8)
+ fmt.Printf(failedCompare, t.Locale, i-1, a, []rune(a), b, []rune(b), cmp, goldCmp)
+ }
+ }
+ if count > 0 {
+ ctxt.Printf("Found %d inconsistencies in %d entries.\n", count, t.Len()-1)
+ }
+ }
+}
+
+const helpTemplate = `
+colcmp is a tool for testing and benchmarking collation
+
+Usage: colcmp command [arguments]
+
+The commands are:
+{{range .}}
+ {{.Name | printf "%-11s"}} {{.Short}}{{end}}
+
+Use "col help [topic]" for more information about that topic.
+`
+
+const detailedHelpTemplate = `
+Usage: colcmp {{.Usage}}
+
+{{.Long | trim}}
+`
+
+func runHelp(args []string) {
+ t := template.New("help")
+ t.Funcs(template.FuncMap{"trim": strings.TrimSpace})
+ if len(args) < 1 {
+ template.Must(t.Parse(helpTemplate))
+ failOnError(t.Execute(os.Stderr, &commands))
+ } else {
+ for _, cmd := range commands {
+ if cmd.Name() == args[0] {
+ template.Must(t.Parse(detailedHelpTemplate))
+ failOnError(t.Execute(os.Stderr, cmd))
+ os.Exit(0)
+ }
+ }
+ log.Fatalf("Unknown command %q. Run 'colcmp help'.", args[0])
+ }
+ os.Exit(0)
+}
+
+func main() {
+ flag.Parse()
+ log.SetFlags(0)
+
+ ctxt := parseTests()
+
+ if flag.NArg() < 1 {
+ runHelp(nil)
+ }
+ args := flag.Args()[1:]
+ if flag.Arg(0) == "help" {
+ runHelp(args)
+ }
+ for _, cmd := range commands {
+ if cmd.Name() == flag.Arg(0) {
+ cmd.Run(ctxt, args)
+ ctxt.flush()
+ return
+ }
+ }
+ runHelp(flag.Args())
+}