From 621e49bb465f500cc46d47e39e828cf76d6381d7 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Tue, 24 Jul 2018 14:35:44 +0200 Subject: update vendor --- .../x/text/internal/cldrtree/cldrtree.go | 353 +++++++++++++++++++++ 1 file changed, 353 insertions(+) create mode 100644 vendor/golang.org/x/text/internal/cldrtree/cldrtree.go (limited to 'vendor/golang.org/x/text/internal/cldrtree/cldrtree.go') diff --git a/vendor/golang.org/x/text/internal/cldrtree/cldrtree.go b/vendor/golang.org/x/text/internal/cldrtree/cldrtree.go new file mode 100644 index 0000000..7530831 --- /dev/null +++ b/vendor/golang.org/x/text/internal/cldrtree/cldrtree.go @@ -0,0 +1,353 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cldrtree builds and generates a CLDR index file, including all +// inheritance. +// +package cldrtree + +//go:generate go test -gen + +// cldrtree stores CLDR data in a tree-like structure called Tree. In the CLDR +// data each branch in the tree is indicated by either an element name or an +// attribute value. A Tree does not distinguish between these two cases, but +// rather assumes that all branches can be accessed by an enum with a compact +// range of positive integer values starting from 0. +// +// Each Tree consists of three parts: +// - a slice mapping compact language identifiers to an offset into a set of +// indices, +// - a set of indices, stored as a large blob of uint16 values that encode +// the actual tree structure of data, and +// - a set of buckets that each holds a collection of strings. +// each of which is explained in more detail below. +// +// +// Tree lookup +// A tree lookup is done by providing a locale and a "path", which is a +// sequence of enum values. The search starts with getting the index for the +// given locale and then incrementally jumping into the index using the path +// values. If an element cannot be found in the index, the search starts anew +// for the locale's parent locale. The path may change during lookup by means +// of aliasing, described below. +// +// Buckets +// Buckets hold the actual string data of the leaf values of the CLDR tree. +// This data is stored in buckets, rather than one large string, for multiple +// reasons: +// - it allows representing leaf values more compactly, by storing all leaf +// values in a single bucket and then needing only needing a uint16 to index +// into this bucket for all leaf values, +// - (TBD) allow multiple trees to share subsets of buckets, mostly to allow +// linking in a smaller amount of data if only a subset of the buckets is +// needed, +// - to be nice to go fmt and the compiler. +// +// indices +// An index is a slice of uint16 for which the values are interpreted in one of +// two ways: as a node or a set of leaf values. +// A set of leaf values has the following form: +// , , ... +// max_size indicates the maximum enum value for which an offset is defined. +// An offset value of 0xFFFF (missingValue) also indicates an undefined value. +// If defined offset indicates the offset within the given bucket of the string. +// A node value has the following form: +// , ... +// max_size indicates the maximum value for which an offset is defined. +// A missing offset may also be indicated with 0. If the high bit (0x8000, or +// inheritMask) is not set, the offset points to the offset within the index +// for the current locale. +// An offset with high bit set is an alias. In this case the uint16 has the form +// bits: +// 15: 1 +// 14-12: negative offset into path relative to current position +// 0-11: new enum value for path element. +// On encountering an alias, the path is modified accordingly and the lookup is +// restarted for the given locale. + +import ( + "fmt" + "reflect" + "regexp" + "strings" + "unicode/utf8" + + "golang.org/x/text/internal/gen" + "golang.org/x/text/language" + "golang.org/x/text/unicode/cldr" +) + +// TODO: +// - allow two Trees to share the same set of buckets. + +// A Builder allows storing CLDR data in compact form. +type Builder struct { + table []string + + rootMeta *metaData + locales []locale + strToBucket map[string]stringInfo + buckets [][]byte + enums []*enum + err error + + // Stats + size int + sizeAll int + bucketWaste int +} + +const ( + maxBucketSize = 8 * 1024 // 8K + maxStrlen = 254 // allow 0xFF sentinel +) + +func (b *Builder) setError(err error) { + if b.err == nil { + b.err = err + } +} + +func (b *Builder) addString(data string) stringInfo { + data = b.makeString(data) + info, ok := b.strToBucket[data] + if !ok { + b.size += len(data) + x := len(b.buckets) - 1 + bucket := b.buckets[x] + if len(bucket)+len(data) < maxBucketSize { + info.bucket = uint16(x) + info.bucketPos = uint16(len(bucket)) + b.buckets[x] = append(bucket, data...) + } else { + info.bucket = uint16(len(b.buckets)) + info.bucketPos = 0 + b.buckets = append(b.buckets, []byte(data)) + } + b.strToBucket[data] = info + } + return info +} + +func (b *Builder) addStringToBucket(data string, bucket uint16) stringInfo { + data = b.makeString(data) + info, ok := b.strToBucket[data] + if !ok || info.bucket != bucket { + if ok { + b.bucketWaste += len(data) + } + b.size += len(data) + bk := b.buckets[bucket] + info.bucket = bucket + info.bucketPos = uint16(len(bk)) + b.buckets[bucket] = append(bk, data...) + b.strToBucket[data] = info + } + return info +} + +func (b *Builder) makeString(data string) string { + if len(data) > maxStrlen { + b.setError(fmt.Errorf("string %q exceeds maximum length of %d", data, maxStrlen)) + data = data[:maxStrlen] + for i := len(data) - 1; i > len(data)-4; i-- { + if utf8.RuneStart(data[i]) { + data = data[:i] + break + } + } + } + data = string([]byte{byte(len(data))}) + data + b.sizeAll += len(data) + return data +} + +type stringInfo struct { + bufferPos uint32 + bucket uint16 + bucketPos uint16 +} + +// New creates a new Builder. +func New(tableName string) *Builder { + b := &Builder{ + strToBucket: map[string]stringInfo{}, + buckets: [][]byte{nil}, // initialize with first bucket. + } + b.rootMeta = &metaData{ + b: b, + typeInfo: &typeInfo{}, + } + return b +} + +// Gen writes all the tables and types for the collected data. +func (b *Builder) Gen(w *gen.CodeWriter) error { + t, err := build(b) + if err != nil { + return err + } + return generate(b, t, w) +} + +// GenTestData generates tables useful for testing data generated with Gen. +func (b *Builder) GenTestData(w *gen.CodeWriter) error { + return generateTestData(b, w) +} + +type locale struct { + tag language.Tag + root *Index +} + +// Locale creates an index for the given locale. +func (b *Builder) Locale(t language.Tag) *Index { + index := &Index{ + meta: b.rootMeta, + } + b.locales = append(b.locales, locale{tag: t, root: index}) + return index +} + +// An Index holds a map of either leaf values or other indices. +type Index struct { + meta *metaData + + subIndex []*Index + values []keyValue +} + +func (i *Index) setError(err error) { i.meta.b.setError(err) } + +type keyValue struct { + key enumIndex + value stringInfo +} + +// Element is a CLDR XML element. +type Element interface { + GetCommon() *cldr.Common +} + +// Index creates a subindex where the type and enum values are not shared +// with siblings by default. The name is derived from the elem. If elem is +// an alias reference, the alias will be resolved and linked. If elem is nil +// Index returns nil. +func (i *Index) Index(elem Element, opt ...Option) *Index { + if elem == nil || reflect.ValueOf(elem).IsNil() { + return nil + } + c := elem.GetCommon() + o := &options{ + parent: i, + name: c.GetCommon().Element(), + } + o.fill(opt) + o.setAlias(elem) + return i.subIndexForKey(o) +} + +// IndexWithName is like Section but derives the name from the given name. +func (i *Index) IndexWithName(name string, opt ...Option) *Index { + o := &options{parent: i, name: name} + o.fill(opt) + return i.subIndexForKey(o) +} + +// IndexFromType creates a subindex the value of tye type attribute as key. It +// will also configure the Index to share the enumeration values with all +// sibling values. If elem is an alias, it will be resolved and linked. +func (i *Index) IndexFromType(elem Element, opts ...Option) *Index { + o := &options{ + parent: i, + name: elem.GetCommon().Type, + } + o.fill(opts) + o.setAlias(elem) + useSharedType()(o) + return i.subIndexForKey(o) +} + +// IndexFromAlt creates a subindex the value of tye alt attribute as key. It +// will also configure the Index to share the enumeration values with all +// sibling values. If elem is an alias, it will be resolved and linked. +func (i *Index) IndexFromAlt(elem Element, opts ...Option) *Index { + o := &options{ + parent: i, + name: elem.GetCommon().Alt, + } + o.fill(opts) + o.setAlias(elem) + useSharedType()(o) + return i.subIndexForKey(o) +} + +func (i *Index) subIndexForKey(opts *options) *Index { + key := opts.name + if len(i.values) > 0 { + panic(fmt.Errorf("cldrtree: adding Index for %q when value already exists", key)) + } + meta := i.meta.sub(key, opts) + for _, x := range i.subIndex { + if x.meta == meta { + return x + } + } + if alias := opts.alias; alias != nil { + if a := alias.GetCommon().Alias; a != nil { + if a.Source != "locale" { + i.setError(fmt.Errorf("cldrtree: non-locale alias not supported %v", a.Path)) + } + if meta.inheritOffset < 0 { + i.setError(fmt.Errorf("cldrtree: alias was already set %v", a.Path)) + } + path := a.Path + for ; strings.HasPrefix(path, "../"); path = path[len("../"):] { + meta.inheritOffset-- + } + m := aliasRe.FindStringSubmatch(path) + if m == nil { + i.setError(fmt.Errorf("cldrtree: could not parse alias %q", a.Path)) + } else { + key := m[4] + if key == "" { + key = m[1] + } + meta.inheritIndex = key + } + } + } + x := &Index{meta: meta} + i.subIndex = append(i.subIndex, x) + return x +} + +var aliasRe = regexp.MustCompile(`^([a-zA-Z]+)(\[@([a-zA-Z-]+)='([a-zA-Z-]+)'\])?`) + +// SetValue sets the value, the data from a CLDR XML element, for the given key. +func (i *Index) SetValue(key string, value Element, opt ...Option) { + if len(i.subIndex) > 0 { + panic(fmt.Errorf("adding value for key %q when index already exists", key)) + } + o := &options{parent: i} + o.fill(opt) + c := value.GetCommon() + if c.Alias != nil { + i.setError(fmt.Errorf("cldrtree: alias not supported for SetValue %v", c.Alias.Path)) + } + i.setValue(key, c.Data(), o) +} + +func (i *Index) setValue(key, data string, o *options) { + index, _ := i.meta.typeInfo.lookupSubtype(key, o) + kv := keyValue{key: index} + if len(i.values) > 0 { + // Add string to the same bucket as the other values. + bucket := i.values[0].value.bucket + kv.value = i.meta.b.addStringToBucket(data, bucket) + } else { + kv.value = i.meta.b.addString(data) + } + i.values = append(i.values, kv) +} -- cgit v1.2.3