From 473acc61c8392dc7ae303d91568e179c4f105a76 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Tue, 2 Jul 2019 12:12:53 +0200 Subject: add black list --- vendor/golang.org/x/text/collate/build/builder.go | 702 ---------------------- 1 file changed, 702 deletions(-) delete mode 100644 vendor/golang.org/x/text/collate/build/builder.go (limited to 'vendor/golang.org/x/text/collate/build/builder.go') diff --git a/vendor/golang.org/x/text/collate/build/builder.go b/vendor/golang.org/x/text/collate/build/builder.go deleted file mode 100644 index 1104284..0000000 --- a/vendor/golang.org/x/text/collate/build/builder.go +++ /dev/null @@ -1,702 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package build // import "golang.org/x/text/collate/build" - -import ( - "fmt" - "io" - "log" - "sort" - "strings" - "unicode/utf8" - - "golang.org/x/text/internal/colltab" - "golang.org/x/text/language" - "golang.org/x/text/unicode/norm" -) - -// TODO: optimizations: -// - expandElem is currently 20K. By putting unique colElems in a separate -// table and having a byte array of indexes into this table, we can reduce -// the total size to about 7K. By also factoring out the length bytes, we -// can reduce this to about 6K. -// - trie valueBlocks are currently 100K. There are a lot of sparse blocks -// and many consecutive values with the same stride. This can be further -// compacted. -// - Compress secondary weights into 8 bits. -// - Some LDML specs specify a context element. Currently we simply concatenate -// those. Context can be implemented using the contraction trie. If Builder -// could analyze and detect when using a context makes sense, there is no -// need to expose this construct in the API. - -// A Builder builds a root collation table. The user must specify the -// collation elements for each entry. A common use will be to base the weights -// on those specified in the allkeys* file as provided by the UCA or CLDR. -type Builder struct { - index *trieBuilder - root ordering - locale []*Tailoring - t *table - err error - built bool - - minNonVar int // lowest primary recorded for a variable - varTop int // highest primary recorded for a non-variable - - // indexes used for reusing expansions and contractions - expIndex map[string]int // positions of expansions keyed by their string representation - ctHandle map[string]ctHandle // contraction handles keyed by a concatenation of the suffixes - ctElem map[string]int // contraction elements keyed by their string representation -} - -// A Tailoring builds a collation table based on another collation table. -// The table is defined by specifying tailorings to the underlying table. -// See http://unicode.org/reports/tr35/ for an overview of tailoring -// collation tables. The CLDR contains pre-defined tailorings for a variety -// of languages (See http://www.unicode.org/Public/cldr//core.zip.) -type Tailoring struct { - id string - builder *Builder - index *ordering - - anchor *entry - before bool -} - -// NewBuilder returns a new Builder. -func NewBuilder() *Builder { - return &Builder{ - index: newTrieBuilder(), - root: makeRootOrdering(), - expIndex: make(map[string]int), - ctHandle: make(map[string]ctHandle), - ctElem: make(map[string]int), - } -} - -// Tailoring returns a Tailoring for the given locale. One should -// have completed all calls to Add before calling Tailoring. -func (b *Builder) Tailoring(loc language.Tag) *Tailoring { - t := &Tailoring{ - id: loc.String(), - builder: b, - index: b.root.clone(), - } - t.index.id = t.id - b.locale = append(b.locale, t) - return t -} - -// Add adds an entry to the collation element table, mapping -// a slice of runes to a sequence of collation elements. -// A collation element is specified as list of weights: []int{primary, secondary, ...}. -// The entries are typically obtained from a collation element table -// as defined in http://www.unicode.org/reports/tr10/#Data_Table_Format. -// Note that the collation elements specified by colelems are only used -// as a guide. The actual weights generated by Builder may differ. -// The argument variables is a list of indices into colelems that should contain -// a value for each colelem that is a variable. (See the reference above.) -func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error { - str := string(runes) - elems := make([]rawCE, len(colelems)) - for i, ce := range colelems { - if len(ce) == 0 { - break - } - elems[i] = makeRawCE(ce, 0) - if len(ce) == 1 { - elems[i].w[1] = defaultSecondary - } - if len(ce) <= 2 { - elems[i].w[2] = defaultTertiary - } - if len(ce) <= 3 { - elems[i].w[3] = ce[0] - } - } - for i, ce := range elems { - p := ce.w[0] - isvar := false - for _, j := range variables { - if i == j { - isvar = true - } - } - if isvar { - if p >= b.minNonVar && b.minNonVar > 0 { - return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", p, b.minNonVar) - } - if p > b.varTop { - b.varTop = p - } - } else if p > 1 { // 1 is a special primary value reserved for FFFE - if p <= b.varTop { - return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", p, b.varTop) - } - if b.minNonVar == 0 || p < b.minNonVar { - b.minNonVar = p - } - } - } - elems, err := convertLargeWeights(elems) - if err != nil { - return err - } - cccs := []uint8{} - nfd := norm.NFD.String(str) - for i := range nfd { - cccs = append(cccs, norm.NFD.PropertiesString(nfd[i:]).CCC()) - } - if len(cccs) < len(elems) { - if len(cccs) > 2 { - return fmt.Errorf("number of decomposed characters should be greater or equal to the number of collation elements for len(colelems) > 3 (%d < %d)", len(cccs), len(elems)) - } - p := len(elems) - 1 - for ; p > 0 && elems[p].w[0] == 0; p-- { - elems[p].ccc = cccs[len(cccs)-1] - } - for ; p >= 0; p-- { - elems[p].ccc = cccs[0] - } - } else { - for i := range elems { - elems[i].ccc = cccs[i] - } - } - // doNorm in collate.go assumes that the following conditions hold. - if len(elems) > 1 && len(cccs) > 1 && cccs[0] != 0 && cccs[0] != cccs[len(cccs)-1] { - return fmt.Errorf("incompatible CCC values for expansion %X (%d)", runes, cccs) - } - b.root.newEntry(str, elems) - return nil -} - -func (t *Tailoring) setAnchor(anchor string) error { - anchor = norm.NFC.String(anchor) - a := t.index.find(anchor) - if a == nil { - a = t.index.newEntry(anchor, nil) - a.implicit = true - a.modified = true - for _, r := range []rune(anchor) { - e := t.index.find(string(r)) - e.lock = true - } - } - t.anchor = a - return nil -} - -// SetAnchor sets the point after which elements passed in subsequent calls to -// Insert will be inserted. It is equivalent to the reset directive in an LDML -// specification. See Insert for an example. -// SetAnchor supports the following logical reset positions: -// , , , -// and . -func (t *Tailoring) SetAnchor(anchor string) error { - if err := t.setAnchor(anchor); err != nil { - return err - } - t.before = false - return nil -} - -// SetAnchorBefore is similar to SetAnchor, except that subsequent calls to -// Insert will insert entries before the anchor. -func (t *Tailoring) SetAnchorBefore(anchor string) error { - if err := t.setAnchor(anchor); err != nil { - return err - } - t.before = true - return nil -} - -// Insert sets the ordering of str relative to the entry set by the previous -// call to SetAnchor or Insert. The argument extend corresponds -// to the extend elements as defined in LDML. A non-empty value for extend -// will cause the collation elements corresponding to extend to be appended -// to the collation elements generated for the entry added by Insert. -// This has the same net effect as sorting str after the string anchor+extend. -// See http://www.unicode.org/reports/tr10/#Tailoring_Example for details -// on parametric tailoring and http://unicode.org/reports/tr35/#Collation_Elements -// for full details on LDML. -// -// Examples: create a tailoring for Swedish, where "ä" is ordered after "z" -// at the primary sorting level: -// t := b.Tailoring("se") -// t.SetAnchor("z") -// t.Insert(colltab.Primary, "ä", "") -// Order "ü" after "ue" at the secondary sorting level: -// t.SetAnchor("ue") -// t.Insert(colltab.Secondary, "ü","") -// or -// t.SetAnchor("u") -// t.Insert(colltab.Secondary, "ü", "e") -// Order "q" afer "ab" at the secondary level and "Q" after "q" -// at the tertiary level: -// t.SetAnchor("ab") -// t.Insert(colltab.Secondary, "q", "") -// t.Insert(colltab.Tertiary, "Q", "") -// Order "b" before "a": -// t.SetAnchorBefore("a") -// t.Insert(colltab.Primary, "b", "") -// Order "0" after the last primary ignorable: -// t.SetAnchor("") -// t.Insert(colltab.Primary, "0", "") -func (t *Tailoring) Insert(level colltab.Level, str, extend string) error { - if t.anchor == nil { - return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str) - } - str = norm.NFC.String(str) - e := t.index.find(str) - if e == nil { - e = t.index.newEntry(str, nil) - } else if e.logical != noAnchor { - return fmt.Errorf("%s:Insert: cannot reinsert logical reset position %q", t.id, e.str) - } - if e.lock { - return fmt.Errorf("%s:Insert: cannot reinsert element %q", t.id, e.str) - } - a := t.anchor - // Find the first element after the anchor which differs at a level smaller or - // equal to the given level. Then insert at this position. - // See http://unicode.org/reports/tr35/#Collation_Elements, Section 5.14.5 for details. - e.before = t.before - if t.before { - t.before = false - if a.prev == nil { - a.insertBefore(e) - } else { - for a = a.prev; a.level > level; a = a.prev { - } - a.insertAfter(e) - } - e.level = level - } else { - for ; a.level > level; a = a.next { - } - e.level = a.level - if a != e { - a.insertAfter(e) - a.level = level - } else { - // We don't set a to prev itself. This has the effect of the entry - // getting new collation elements that are an increment of itself. - // This is intentional. - a.prev.level = level - } - } - e.extend = norm.NFD.String(extend) - e.exclude = false - e.modified = true - e.elems = nil - t.anchor = e - return nil -} - -func (o *ordering) getWeight(e *entry) []rawCE { - if len(e.elems) == 0 && e.logical == noAnchor { - if e.implicit { - for _, r := range e.runes { - e.elems = append(e.elems, o.getWeight(o.find(string(r)))...) - } - } else if e.before { - count := [colltab.Identity + 1]int{} - a := e - for ; a.elems == nil && !a.implicit; a = a.next { - count[a.level]++ - } - e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)} - for i := colltab.Primary; i < colltab.Quaternary; i++ { - if count[i] != 0 { - e.elems[0].w[i] -= count[i] - break - } - } - if e.prev != nil { - o.verifyWeights(e.prev, e, e.prev.level) - } - } else { - prev := e.prev - e.elems = nextWeight(prev.level, o.getWeight(prev)) - o.verifyWeights(e, e.next, e.level) - } - } - return e.elems -} - -func (o *ordering) addExtension(e *entry) { - if ex := o.find(e.extend); ex != nil { - e.elems = append(e.elems, ex.elems...) - } else { - for _, r := range []rune(e.extend) { - e.elems = append(e.elems, o.find(string(r)).elems...) - } - } - e.extend = "" -} - -func (o *ordering) verifyWeights(a, b *entry, level colltab.Level) error { - if level == colltab.Identity || b == nil || b.elems == nil || a.elems == nil { - return nil - } - for i := colltab.Primary; i < level; i++ { - if a.elems[0].w[i] < b.elems[0].w[i] { - return nil - } - } - if a.elems[0].w[level] >= b.elems[0].w[level] { - err := fmt.Errorf("%s:overflow: collation elements of %q (%X) overflows those of %q (%X) at level %d (%X >= %X)", o.id, a.str, a.runes, b.str, b.runes, level, a.elems, b.elems) - log.Println(err) - // TODO: return the error instead, or better, fix the conflicting entry by making room. - } - return nil -} - -func (b *Builder) error(e error) { - if e != nil { - b.err = e - } -} - -func (b *Builder) errorID(locale string, e error) { - if e != nil { - b.err = fmt.Errorf("%s:%v", locale, e) - } -} - -// patchNorm ensures that NFC and NFD counterparts are consistent. -func (o *ordering) patchNorm() { - // Insert the NFD counterparts, if necessary. - for _, e := range o.ordered { - nfd := norm.NFD.String(e.str) - if nfd != e.str { - if e0 := o.find(nfd); e0 != nil && !e0.modified { - e0.elems = e.elems - } else if e.modified && !equalCEArrays(o.genColElems(nfd), e.elems) { - e := o.newEntry(nfd, e.elems) - e.modified = true - } - } - } - // Update unchanged composed forms if one of their parts changed. - for _, e := range o.ordered { - nfd := norm.NFD.String(e.str) - if e.modified || nfd == e.str { - continue - } - if e0 := o.find(nfd); e0 != nil { - e.elems = e0.elems - } else { - e.elems = o.genColElems(nfd) - if norm.NFD.LastBoundary([]byte(nfd)) == 0 { - r := []rune(nfd) - head := string(r[0]) - tail := "" - for i := 1; i < len(r); i++ { - s := norm.NFC.String(head + string(r[i])) - if e0 := o.find(s); e0 != nil && e0.modified { - head = s - } else { - tail += string(r[i]) - } - } - e.elems = append(o.genColElems(head), o.genColElems(tail)...) - } - } - } - // Exclude entries for which the individual runes generate the same collation elements. - for _, e := range o.ordered { - if len(e.runes) > 1 && equalCEArrays(o.genColElems(e.str), e.elems) { - e.exclude = true - } - } -} - -func (b *Builder) buildOrdering(o *ordering) { - for _, e := range o.ordered { - o.getWeight(e) - } - for _, e := range o.ordered { - o.addExtension(e) - } - o.patchNorm() - o.sort() - simplify(o) - b.processExpansions(o) // requires simplify - b.processContractions(o) // requires simplify - - t := newNode() - for e := o.front(); e != nil; e, _ = e.nextIndexed() { - if !e.skip() { - ce, err := e.encode() - b.errorID(o.id, err) - t.insert(e.runes[0], ce) - } - } - o.handle = b.index.addTrie(t) -} - -func (b *Builder) build() (*table, error) { - if b.built { - return b.t, b.err - } - b.built = true - b.t = &table{ - Table: colltab.Table{ - MaxContractLen: utf8.UTFMax, - VariableTop: uint32(b.varTop), - }, - } - - b.buildOrdering(&b.root) - b.t.root = b.root.handle - for _, t := range b.locale { - b.buildOrdering(t.index) - if b.err != nil { - break - } - } - i, err := b.index.generate() - b.t.trie = *i - b.t.Index = colltab.Trie{ - Index: i.index, - Values: i.values, - Index0: i.index[blockSize*b.t.root.lookupStart:], - Values0: i.values[blockSize*b.t.root.valueStart:], - } - b.error(err) - return b.t, b.err -} - -// Build builds the root Collator. -func (b *Builder) Build() (colltab.Weighter, error) { - table, err := b.build() - if err != nil { - return nil, err - } - return table, nil -} - -// Build builds a Collator for Tailoring t. -func (t *Tailoring) Build() (colltab.Weighter, error) { - // TODO: implement. - return nil, nil -} - -// Print prints the tables for b and all its Tailorings as a Go file -// that can be included in the Collate package. -func (b *Builder) Print(w io.Writer) (n int, err error) { - p := func(nn int, e error) { - n += nn - if err == nil { - err = e - } - } - t, err := b.build() - if err != nil { - return 0, err - } - p(fmt.Fprintf(w, `var availableLocales = "und`)) - for _, loc := range b.locale { - if loc.id != "und" { - p(fmt.Fprintf(w, ",%s", loc.id)) - } - } - p(fmt.Fprint(w, "\"\n\n")) - p(fmt.Fprintf(w, "const varTop = 0x%x\n\n", b.varTop)) - p(fmt.Fprintln(w, "var locales = [...]tableIndex{")) - for _, loc := range b.locale { - if loc.id == "und" { - p(t.fprintIndex(w, loc.index.handle, loc.id)) - } - } - for _, loc := range b.locale { - if loc.id != "und" { - p(t.fprintIndex(w, loc.index.handle, loc.id)) - } - } - p(fmt.Fprint(w, "}\n\n")) - n, _, err = t.fprint(w, "main") - return -} - -// reproducibleFromNFKD checks whether the given expansion could be generated -// from an NFKD expansion. -func reproducibleFromNFKD(e *entry, exp, nfkd []rawCE) bool { - // Length must be equal. - if len(exp) != len(nfkd) { - return false - } - for i, ce := range exp { - // Primary and secondary values should be equal. - if ce.w[0] != nfkd[i].w[0] || ce.w[1] != nfkd[i].w[1] { - return false - } - // Tertiary values should be equal to maxTertiary for third element onwards. - // TODO: there seem to be a lot of cases in CLDR (e.g. ㏭ in zh.xml) that can - // simply be dropped. Try this out by dropping the following code. - if i >= 2 && ce.w[2] != maxTertiary { - return false - } - if _, err := makeCE(ce); err != nil { - // Simply return false. The error will be caught elsewhere. - return false - } - } - return true -} - -func simplify(o *ordering) { - // Runes that are a starter of a contraction should not be removed. - // (To date, there is only Kannada character 0CCA.) - keep := make(map[rune]bool) - for e := o.front(); e != nil; e, _ = e.nextIndexed() { - if len(e.runes) > 1 { - keep[e.runes[0]] = true - } - } - // Tag entries for which the runes NFKD decompose to identical values. - for e := o.front(); e != nil; e, _ = e.nextIndexed() { - s := e.str - nfkd := norm.NFKD.String(s) - nfd := norm.NFD.String(s) - if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == nfd { - continue - } - if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) { - e.decompose = true - } - } -} - -// appendExpansion converts the given collation sequence to -// collation elements and adds them to the expansion table. -// It returns an index to the expansion table. -func (b *Builder) appendExpansion(e *entry) int { - t := b.t - i := len(t.ExpandElem) - ce := uint32(len(e.elems)) - t.ExpandElem = append(t.ExpandElem, ce) - for _, w := range e.elems { - ce, err := makeCE(w) - if err != nil { - b.error(err) - return -1 - } - t.ExpandElem = append(t.ExpandElem, ce) - } - return i -} - -// processExpansions extracts data necessary to generate -// the extraction tables. -func (b *Builder) processExpansions(o *ordering) { - for e := o.front(); e != nil; e, _ = e.nextIndexed() { - if !e.expansion() { - continue - } - key := fmt.Sprintf("%v", e.elems) - i, ok := b.expIndex[key] - if !ok { - i = b.appendExpansion(e) - b.expIndex[key] = i - } - e.expansionIndex = i - } -} - -func (b *Builder) processContractions(o *ordering) { - // Collate contractions per starter rune. - starters := []rune{} - cm := make(map[rune][]*entry) - for e := o.front(); e != nil; e, _ = e.nextIndexed() { - if e.contraction() { - if len(e.str) > b.t.MaxContractLen { - b.t.MaxContractLen = len(e.str) - } - r := e.runes[0] - if _, ok := cm[r]; !ok { - starters = append(starters, r) - } - cm[r] = append(cm[r], e) - } - } - // Add entries of single runes that are at a start of a contraction. - for e := o.front(); e != nil; e, _ = e.nextIndexed() { - if !e.contraction() { - r := e.runes[0] - if _, ok := cm[r]; ok { - cm[r] = append(cm[r], e) - } - } - } - // Build the tries for the contractions. - t := b.t - for _, r := range starters { - l := cm[r] - // Compute suffix strings. There are 31 different contraction suffix - // sets for 715 contractions and 82 contraction starter runes as of - // version 6.0.0. - sufx := []string{} - hasSingle := false - for _, e := range l { - if len(e.runes) > 1 { - sufx = append(sufx, string(e.runes[1:])) - } else { - hasSingle = true - } - } - if !hasSingle { - b.error(fmt.Errorf("no single entry for starter rune %U found", r)) - continue - } - // Unique the suffix set. - sort.Strings(sufx) - key := strings.Join(sufx, "\n") - handle, ok := b.ctHandle[key] - if !ok { - var err error - handle, err = appendTrie(&t.ContractTries, sufx) - if err != nil { - b.error(err) - } - b.ctHandle[key] = handle - } - // Bucket sort entries in index order. - es := make([]*entry, len(l)) - for _, e := range l { - var p, sn int - if len(e.runes) > 1 { - str := []byte(string(e.runes[1:])) - p, sn = lookup(&t.ContractTries, handle, str) - if sn != len(str) { - log.Fatalf("%s: processContractions: unexpected length for '%X'; len=%d; want %d", o.id, e.runes, sn, len(str)) - } - } - if es[p] != nil { - log.Fatalf("%s: multiple contractions for position %d for rune %U", o.id, p, e.runes[0]) - } - es[p] = e - } - // Create collation elements for contractions. - elems := []uint32{} - for _, e := range es { - ce, err := e.encodeBase() - b.errorID(o.id, err) - elems = append(elems, ce) - } - key = fmt.Sprintf("%v", elems) - i, ok := b.ctElem[key] - if !ok { - i = len(t.ContractElem) - b.ctElem[key] = i - t.ContractElem = append(t.ContractElem, elems...) - } - // Store info in entry for starter rune. - es[0].contractionIndex = i - es[0].contractionHandle = handle - } -} -- cgit v1.2.3