summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/internal/cldrtree/cldrtree.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/internal/cldrtree/cldrtree.go')
-rw-r--r--vendor/golang.org/x/text/internal/cldrtree/cldrtree.go353
1 files changed, 353 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/internal/cldrtree/cldrtree.go b/vendor/golang.org/x/text/internal/cldrtree/cldrtree.go
new file mode 100644
index 0000000..7530831
--- /dev/null
+++ b/vendor/golang.org/x/text/internal/cldrtree/cldrtree.go
@@ -0,0 +1,353 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package cldrtree builds and generates a CLDR index file, including all
+// inheritance.
+//
+package cldrtree
+
+//go:generate go test -gen
+
+// cldrtree stores CLDR data in a tree-like structure called Tree. In the CLDR
+// data each branch in the tree is indicated by either an element name or an
+// attribute value. A Tree does not distinguish between these two cases, but
+// rather assumes that all branches can be accessed by an enum with a compact
+// range of positive integer values starting from 0.
+//
+// Each Tree consists of three parts:
+// - a slice mapping compact language identifiers to an offset into a set of
+// indices,
+// - a set of indices, stored as a large blob of uint16 values that encode
+// the actual tree structure of data, and
+// - a set of buckets that each holds a collection of strings.
+// each of which is explained in more detail below.
+//
+//
+// Tree lookup
+// A tree lookup is done by providing a locale and a "path", which is a
+// sequence of enum values. The search starts with getting the index for the
+// given locale and then incrementally jumping into the index using the path
+// values. If an element cannot be found in the index, the search starts anew
+// for the locale's parent locale. The path may change during lookup by means
+// of aliasing, described below.
+//
+// Buckets
+// Buckets hold the actual string data of the leaf values of the CLDR tree.
+// This data is stored in buckets, rather than one large string, for multiple
+// reasons:
+// - it allows representing leaf values more compactly, by storing all leaf
+// values in a single bucket and then needing only needing a uint16 to index
+// into this bucket for all leaf values,
+// - (TBD) allow multiple trees to share subsets of buckets, mostly to allow
+// linking in a smaller amount of data if only a subset of the buckets is
+// needed,
+// - to be nice to go fmt and the compiler.
+//
+// indices
+// An index is a slice of uint16 for which the values are interpreted in one of
+// two ways: as a node or a set of leaf values.
+// A set of leaf values has the following form:
+// <max_size>, <bucket>, <offset>...
+// max_size indicates the maximum enum value for which an offset is defined.
+// An offset value of 0xFFFF (missingValue) also indicates an undefined value.
+// If defined offset indicates the offset within the given bucket of the string.
+// A node value has the following form:
+// <max_size>, <offset_or_alias>...
+// max_size indicates the maximum value for which an offset is defined.
+// A missing offset may also be indicated with 0. If the high bit (0x8000, or
+// inheritMask) is not set, the offset points to the offset within the index
+// for the current locale.
+// An offset with high bit set is an alias. In this case the uint16 has the form
+// bits:
+// 15: 1
+// 14-12: negative offset into path relative to current position
+// 0-11: new enum value for path element.
+// On encountering an alias, the path is modified accordingly and the lookup is
+// restarted for the given locale.
+
+import (
+ "fmt"
+ "reflect"
+ "regexp"
+ "strings"
+ "unicode/utf8"
+
+ "golang.org/x/text/internal/gen"
+ "golang.org/x/text/language"
+ "golang.org/x/text/unicode/cldr"
+)
+
+// TODO:
+// - allow two Trees to share the same set of buckets.
+
+// A Builder allows storing CLDR data in compact form.
+type Builder struct {
+ table []string
+
+ rootMeta *metaData
+ locales []locale
+ strToBucket map[string]stringInfo
+ buckets [][]byte
+ enums []*enum
+ err error
+
+ // Stats
+ size int
+ sizeAll int
+ bucketWaste int
+}
+
+const (
+ maxBucketSize = 8 * 1024 // 8K
+ maxStrlen = 254 // allow 0xFF sentinel
+)
+
+func (b *Builder) setError(err error) {
+ if b.err == nil {
+ b.err = err
+ }
+}
+
+func (b *Builder) addString(data string) stringInfo {
+ data = b.makeString(data)
+ info, ok := b.strToBucket[data]
+ if !ok {
+ b.size += len(data)
+ x := len(b.buckets) - 1
+ bucket := b.buckets[x]
+ if len(bucket)+len(data) < maxBucketSize {
+ info.bucket = uint16(x)
+ info.bucketPos = uint16(len(bucket))
+ b.buckets[x] = append(bucket, data...)
+ } else {
+ info.bucket = uint16(len(b.buckets))
+ info.bucketPos = 0
+ b.buckets = append(b.buckets, []byte(data))
+ }
+ b.strToBucket[data] = info
+ }
+ return info
+}
+
+func (b *Builder) addStringToBucket(data string, bucket uint16) stringInfo {
+ data = b.makeString(data)
+ info, ok := b.strToBucket[data]
+ if !ok || info.bucket != bucket {
+ if ok {
+ b.bucketWaste += len(data)
+ }
+ b.size += len(data)
+ bk := b.buckets[bucket]
+ info.bucket = bucket
+ info.bucketPos = uint16(len(bk))
+ b.buckets[bucket] = append(bk, data...)
+ b.strToBucket[data] = info
+ }
+ return info
+}
+
+func (b *Builder) makeString(data string) string {
+ if len(data) > maxStrlen {
+ b.setError(fmt.Errorf("string %q exceeds maximum length of %d", data, maxStrlen))
+ data = data[:maxStrlen]
+ for i := len(data) - 1; i > len(data)-4; i-- {
+ if utf8.RuneStart(data[i]) {
+ data = data[:i]
+ break
+ }
+ }
+ }
+ data = string([]byte{byte(len(data))}) + data
+ b.sizeAll += len(data)
+ return data
+}
+
+type stringInfo struct {
+ bufferPos uint32
+ bucket uint16
+ bucketPos uint16
+}
+
+// New creates a new Builder.
+func New(tableName string) *Builder {
+ b := &Builder{
+ strToBucket: map[string]stringInfo{},
+ buckets: [][]byte{nil}, // initialize with first bucket.
+ }
+ b.rootMeta = &metaData{
+ b: b,
+ typeInfo: &typeInfo{},
+ }
+ return b
+}
+
+// Gen writes all the tables and types for the collected data.
+func (b *Builder) Gen(w *gen.CodeWriter) error {
+ t, err := build(b)
+ if err != nil {
+ return err
+ }
+ return generate(b, t, w)
+}
+
+// GenTestData generates tables useful for testing data generated with Gen.
+func (b *Builder) GenTestData(w *gen.CodeWriter) error {
+ return generateTestData(b, w)
+}
+
+type locale struct {
+ tag language.Tag
+ root *Index
+}
+
+// Locale creates an index for the given locale.
+func (b *Builder) Locale(t language.Tag) *Index {
+ index := &Index{
+ meta: b.rootMeta,
+ }
+ b.locales = append(b.locales, locale{tag: t, root: index})
+ return index
+}
+
+// An Index holds a map of either leaf values or other indices.
+type Index struct {
+ meta *metaData
+
+ subIndex []*Index
+ values []keyValue
+}
+
+func (i *Index) setError(err error) { i.meta.b.setError(err) }
+
+type keyValue struct {
+ key enumIndex
+ value stringInfo
+}
+
+// Element is a CLDR XML element.
+type Element interface {
+ GetCommon() *cldr.Common
+}
+
+// Index creates a subindex where the type and enum values are not shared
+// with siblings by default. The name is derived from the elem. If elem is
+// an alias reference, the alias will be resolved and linked. If elem is nil
+// Index returns nil.
+func (i *Index) Index(elem Element, opt ...Option) *Index {
+ if elem == nil || reflect.ValueOf(elem).IsNil() {
+ return nil
+ }
+ c := elem.GetCommon()
+ o := &options{
+ parent: i,
+ name: c.GetCommon().Element(),
+ }
+ o.fill(opt)
+ o.setAlias(elem)
+ return i.subIndexForKey(o)
+}
+
+// IndexWithName is like Section but derives the name from the given name.
+func (i *Index) IndexWithName(name string, opt ...Option) *Index {
+ o := &options{parent: i, name: name}
+ o.fill(opt)
+ return i.subIndexForKey(o)
+}
+
+// IndexFromType creates a subindex the value of tye type attribute as key. It
+// will also configure the Index to share the enumeration values with all
+// sibling values. If elem is an alias, it will be resolved and linked.
+func (i *Index) IndexFromType(elem Element, opts ...Option) *Index {
+ o := &options{
+ parent: i,
+ name: elem.GetCommon().Type,
+ }
+ o.fill(opts)
+ o.setAlias(elem)
+ useSharedType()(o)
+ return i.subIndexForKey(o)
+}
+
+// IndexFromAlt creates a subindex the value of tye alt attribute as key. It
+// will also configure the Index to share the enumeration values with all
+// sibling values. If elem is an alias, it will be resolved and linked.
+func (i *Index) IndexFromAlt(elem Element, opts ...Option) *Index {
+ o := &options{
+ parent: i,
+ name: elem.GetCommon().Alt,
+ }
+ o.fill(opts)
+ o.setAlias(elem)
+ useSharedType()(o)
+ return i.subIndexForKey(o)
+}
+
+func (i *Index) subIndexForKey(opts *options) *Index {
+ key := opts.name
+ if len(i.values) > 0 {
+ panic(fmt.Errorf("cldrtree: adding Index for %q when value already exists", key))
+ }
+ meta := i.meta.sub(key, opts)
+ for _, x := range i.subIndex {
+ if x.meta == meta {
+ return x
+ }
+ }
+ if alias := opts.alias; alias != nil {
+ if a := alias.GetCommon().Alias; a != nil {
+ if a.Source != "locale" {
+ i.setError(fmt.Errorf("cldrtree: non-locale alias not supported %v", a.Path))
+ }
+ if meta.inheritOffset < 0 {
+ i.setError(fmt.Errorf("cldrtree: alias was already set %v", a.Path))
+ }
+ path := a.Path
+ for ; strings.HasPrefix(path, "../"); path = path[len("../"):] {
+ meta.inheritOffset--
+ }
+ m := aliasRe.FindStringSubmatch(path)
+ if m == nil {
+ i.setError(fmt.Errorf("cldrtree: could not parse alias %q", a.Path))
+ } else {
+ key := m[4]
+ if key == "" {
+ key = m[1]
+ }
+ meta.inheritIndex = key
+ }
+ }
+ }
+ x := &Index{meta: meta}
+ i.subIndex = append(i.subIndex, x)
+ return x
+}
+
+var aliasRe = regexp.MustCompile(`^([a-zA-Z]+)(\[@([a-zA-Z-]+)='([a-zA-Z-]+)'\])?`)
+
+// SetValue sets the value, the data from a CLDR XML element, for the given key.
+func (i *Index) SetValue(key string, value Element, opt ...Option) {
+ if len(i.subIndex) > 0 {
+ panic(fmt.Errorf("adding value for key %q when index already exists", key))
+ }
+ o := &options{parent: i}
+ o.fill(opt)
+ c := value.GetCommon()
+ if c.Alias != nil {
+ i.setError(fmt.Errorf("cldrtree: alias not supported for SetValue %v", c.Alias.Path))
+ }
+ i.setValue(key, c.Data(), o)
+}
+
+func (i *Index) setValue(key, data string, o *options) {
+ index, _ := i.meta.typeInfo.lookupSubtype(key, o)
+ kv := keyValue{key: index}
+ if len(i.values) > 0 {
+ // Add string to the same bucket as the other values.
+ bucket := i.values[0].value.bucket
+ kv.value = i.meta.b.addStringToBucket(data, bucket)
+ } else {
+ kv.value = i.meta.b.addString(data)
+ }
+ i.values = append(i.values, kv)
+}