From 621e49bb465f500cc46d47e39e828cf76d6381d7 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Tue, 24 Jul 2018 14:35:44 +0200 Subject: update vendor --- .../x/text/internal/colltab/collate_test.go | 121 +++++++ .../golang.org/x/text/internal/colltab/collelem.go | 371 +++++++++++++++++++++ .../x/text/internal/colltab/collelem_test.go | 183 ++++++++++ .../golang.org/x/text/internal/colltab/colltab.go | 105 ++++++ .../x/text/internal/colltab/colltab_test.go | 64 ++++ .../golang.org/x/text/internal/colltab/contract.go | 145 ++++++++ .../x/text/internal/colltab/contract_test.go | 131 ++++++++ vendor/golang.org/x/text/internal/colltab/iter.go | 178 ++++++++++ .../x/text/internal/colltab/iter_test.go | 63 ++++ .../golang.org/x/text/internal/colltab/numeric.go | 236 +++++++++++++ .../x/text/internal/colltab/numeric_test.go | 159 +++++++++ vendor/golang.org/x/text/internal/colltab/table.go | 275 +++++++++++++++ vendor/golang.org/x/text/internal/colltab/trie.go | 159 +++++++++ .../x/text/internal/colltab/trie_test.go | 106 ++++++ .../golang.org/x/text/internal/colltab/weighter.go | 31 ++ .../x/text/internal/colltab/weighter_test.go | 42 +++ 16 files changed, 2369 insertions(+) create mode 100644 vendor/golang.org/x/text/internal/colltab/collate_test.go create mode 100644 vendor/golang.org/x/text/internal/colltab/collelem.go create mode 100644 vendor/golang.org/x/text/internal/colltab/collelem_test.go create mode 100644 vendor/golang.org/x/text/internal/colltab/colltab.go create mode 100644 vendor/golang.org/x/text/internal/colltab/colltab_test.go create mode 100644 vendor/golang.org/x/text/internal/colltab/contract.go create mode 100644 vendor/golang.org/x/text/internal/colltab/contract_test.go create mode 100644 vendor/golang.org/x/text/internal/colltab/iter.go create mode 100644 vendor/golang.org/x/text/internal/colltab/iter_test.go create mode 100644 vendor/golang.org/x/text/internal/colltab/numeric.go create mode 100644 vendor/golang.org/x/text/internal/colltab/numeric_test.go create mode 100644 vendor/golang.org/x/text/internal/colltab/table.go create mode 100644 vendor/golang.org/x/text/internal/colltab/trie.go create mode 100644 vendor/golang.org/x/text/internal/colltab/trie_test.go create mode 100644 vendor/golang.org/x/text/internal/colltab/weighter.go create mode 100644 vendor/golang.org/x/text/internal/colltab/weighter_test.go (limited to 'vendor/golang.org/x/text/internal/colltab') diff --git a/vendor/golang.org/x/text/internal/colltab/collate_test.go b/vendor/golang.org/x/text/internal/colltab/collate_test.go new file mode 100644 index 0000000..580c85c --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/collate_test.go @@ -0,0 +1,121 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab_test + +// This file contains tests which need to import package collate, which causes +// an import cycle when done within package colltab itself. + +import ( + "bytes" + "testing" + "unicode" + + "golang.org/x/text/collate" + "golang.org/x/text/language" + "golang.org/x/text/unicode/rangetable" +) + +// assigned is used to only test runes that are inside the scope of the Unicode +// version used to generation the collation table. +var assigned = rangetable.Assigned(collate.UnicodeVersion) + +func TestNonDigits(t *testing.T) { + c := collate.New(language.English, collate.Loose, collate.Numeric) + + // Verify that all non-digit numbers sort outside of the number range. + for r, hi := rune(unicode.N.R16[0].Lo), rune(unicode.N.R32[0].Hi); r <= hi; r++ { + if unicode.In(r, unicode.Nd) || !unicode.In(r, assigned) { + continue + } + if a := string(r); c.CompareString(a, "0") != -1 && c.CompareString(a, "999999") != 1 { + t.Errorf("%+q non-digit number is collated as digit", a) + } + } +} + +func TestNumericCompare(t *testing.T) { + c := collate.New(language.English, collate.Loose, collate.Numeric) + + // Iterate over all digits. + for _, r16 := range unicode.Nd.R16 { + testDigitCompare(t, c, rune(r16.Lo), rune(r16.Hi)) + } + for _, r32 := range unicode.Nd.R32 { + testDigitCompare(t, c, rune(r32.Lo), rune(r32.Hi)) + } +} + +func testDigitCompare(t *testing.T, c *collate.Collator, zero, nine rune) { + if !unicode.In(zero, assigned) { + return + } + n := int(nine - zero + 1) + if n%10 != 0 { + t.Fatalf("len([%+q, %+q]) = %d; want a multiple of 10", zero, nine, n) + } + for _, tt := range []struct { + prefix string + b [11]string + }{ + { + prefix: "", + b: [11]string{ + "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", + }, + }, + { + prefix: "1", + b: [11]string{ + "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", + }, + }, + { + prefix: "0", + b: [11]string{ + "00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", + }, + }, + { + prefix: "00", + b: [11]string{ + "000", "001", "002", "003", "004", "005", "006", "007", "008", "009", "010", + }, + }, + { + prefix: "9", + b: [11]string{ + "90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "100", + }, + }, + } { + for k := 0; k <= n; k++ { + i := k % 10 + a := tt.prefix + string(zero+rune(i)) + for j, b := range tt.b { + want := 0 + switch { + case i < j: + want = -1 + case i > j: + want = 1 + } + got := c.CompareString(a, b) + if got != want { + t.Errorf("Compare(%+q, %+q) = %d; want %d", a, b, got, want) + return + } + } + } + } +} + +func BenchmarkNumericWeighter(b *testing.B) { + c := collate.New(language.English, collate.Numeric) + input := bytes.Repeat([]byte("Testing, testing 123..."), 100) + b.SetBytes(int64(2 * len(input))) + for i := 0; i < b.N; i++ { + c.Compare(input, input) + } +} diff --git a/vendor/golang.org/x/text/internal/colltab/collelem.go b/vendor/golang.org/x/text/internal/colltab/collelem.go new file mode 100644 index 0000000..2855589 --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/collelem.go @@ -0,0 +1,371 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +import ( + "fmt" + "unicode" +) + +// Level identifies the collation comparison level. +// The primary level corresponds to the basic sorting of text. +// The secondary level corresponds to accents and related linguistic elements. +// The tertiary level corresponds to casing and related concepts. +// The quaternary level is derived from the other levels by the +// various algorithms for handling variable elements. +type Level int + +const ( + Primary Level = iota + Secondary + Tertiary + Quaternary + Identity + + NumLevels +) + +const ( + defaultSecondary = 0x20 + defaultTertiary = 0x2 + maxTertiary = 0x1F + MaxQuaternary = 0x1FFFFF // 21 bits. +) + +// Elem is a representation of a collation element. This API provides ways to encode +// and decode Elems. Implementations of collation tables may use values greater +// or equal to PrivateUse for their own purposes. However, these should never be +// returned by AppendNext. +type Elem uint32 + +const ( + maxCE Elem = 0xAFFFFFFF + PrivateUse = minContract + minContract = 0xC0000000 + maxContract = 0xDFFFFFFF + minExpand = 0xE0000000 + maxExpand = 0xEFFFFFFF + minDecomp = 0xF0000000 +) + +type ceType int + +const ( + ceNormal ceType = iota // ceNormal includes implicits (ce == 0) + ceContractionIndex // rune can be a start of a contraction + ceExpansionIndex // rune expands into a sequence of collation elements + ceDecompose // rune expands using NFKC decomposition +) + +func (ce Elem) ctype() ceType { + if ce <= maxCE { + return ceNormal + } + if ce <= maxContract { + return ceContractionIndex + } else { + if ce <= maxExpand { + return ceExpansionIndex + } + return ceDecompose + } + panic("should not reach here") + return ceType(-1) +} + +// For normal collation elements, we assume that a collation element either has +// a primary or non-default secondary value, not both. +// Collation elements with a primary value are of the form +// 01pppppp pppppppp ppppppp0 ssssssss +// - p* is primary collation value +// - s* is the secondary collation value +// 00pppppp pppppppp ppppppps sssttttt, where +// - p* is primary collation value +// - s* offset of secondary from default value. +// - t* is the tertiary collation value +// 100ttttt cccccccc pppppppp pppppppp +// - t* is the tertiar collation value +// - c* is the canonical combining class +// - p* is the primary collation value +// Collation elements with a secondary value are of the form +// 1010cccc ccccssss ssssssss tttttttt, where +// - c* is the canonical combining class +// - s* is the secondary collation value +// - t* is the tertiary collation value +// 11qqqqqq qqqqqqqq qqqqqqq0 00000000 +// - q* quaternary value +const ( + ceTypeMask = 0xC0000000 + ceTypeMaskExt = 0xE0000000 + ceIgnoreMask = 0xF00FFFFF + ceType1 = 0x40000000 + ceType2 = 0x00000000 + ceType3or4 = 0x80000000 + ceType4 = 0xA0000000 + ceTypeQ = 0xC0000000 + Ignore = ceType4 + firstNonPrimary = 0x80000000 + lastSpecialPrimary = 0xA0000000 + secondaryMask = 0x80000000 + hasTertiaryMask = 0x40000000 + primaryValueMask = 0x3FFFFE00 + maxPrimaryBits = 21 + compactPrimaryBits = 16 + maxSecondaryBits = 12 + maxTertiaryBits = 8 + maxCCCBits = 8 + maxSecondaryCompactBits = 8 + maxSecondaryDiffBits = 4 + maxTertiaryCompactBits = 5 + primaryShift = 9 + compactSecondaryShift = 5 + minCompactSecondary = defaultSecondary - 4 +) + +func makeImplicitCE(primary int) Elem { + return ceType1 | Elem(primary<= 1<= %x", w, 1<= 1<= %x", w, 1<= 1<= %x", w, 1<= 1<= %x", primary, 1<= 1<= %x", secondary, 1<= 1< %x", d, d, 1<= 1< %x", tertiary, 1<> 16) + } + return uint8(ce >> 20) + } + return 0 +} + +// Primary returns the primary collation weight for ce. +func (ce Elem) Primary() int { + if ce >= firstNonPrimary { + if ce > lastSpecialPrimary { + return 0 + } + return int(uint16(ce)) + } + return int(ce&primaryValueMask) >> primaryShift +} + +// Secondary returns the secondary collation weight for ce. +func (ce Elem) Secondary() int { + switch ce & ceTypeMask { + case ceType1: + return int(uint8(ce)) + case ceType2: + return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF) + case ceType3or4: + if ce < ceType4 { + return defaultSecondary + } + return int(ce>>8) & 0xFFF + case ceTypeQ: + return 0 + } + panic("should not reach here") +} + +// Tertiary returns the tertiary collation weight for ce. +func (ce Elem) Tertiary() uint8 { + if ce&hasTertiaryMask == 0 { + if ce&ceType3or4 == 0 { + return uint8(ce & 0x1F) + } + if ce&ceType4 == ceType4 { + return uint8(ce) + } + return uint8(ce>>24) & 0x1F // type 2 + } else if ce&ceTypeMask == ceType1 { + return defaultTertiary + } + // ce is a quaternary value. + return 0 +} + +func (ce Elem) updateTertiary(t uint8) Elem { + if ce&ceTypeMask == ceType1 { + // convert to type 4 + nce := ce & primaryValueMask + nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift + ce = nce + } else if ce&ceTypeMaskExt == ceType3or4 { + ce &= ^Elem(maxTertiary << 24) + return ce | (Elem(t) << 24) + } else { + // type 2 or 4 + ce &= ^Elem(maxTertiary) + } + return ce | Elem(t) +} + +// Quaternary returns the quaternary value if explicitly specified, +// 0 if ce == Ignore, or MaxQuaternary otherwise. +// Quaternary values are used only for shifted variants. +func (ce Elem) Quaternary() int { + if ce&ceTypeMask == ceTypeQ { + return int(ce&primaryValueMask) >> primaryShift + } else if ce&ceIgnoreMask == Ignore { + return 0 + } + return MaxQuaternary +} + +// Weight returns the collation weight for the given level. +func (ce Elem) Weight(l Level) int { + switch l { + case Primary: + return ce.Primary() + case Secondary: + return ce.Secondary() + case Tertiary: + return int(ce.Tertiary()) + case Quaternary: + return ce.Quaternary() + } + return 0 // return 0 (ignore) for undefined levels. +} + +// For contractions, collation elements are of the form +// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where +// - n* is the size of the first node in the contraction trie. +// - i* is the index of the first node in the contraction trie. +// - b* is the offset into the contraction collation element table. +// See contract.go for details on the contraction trie. +const ( + maxNBits = 4 + maxTrieIndexBits = 12 + maxContractOffsetBits = 13 +) + +func splitContractIndex(ce Elem) (index, n, offset int) { + n = int(ce & (1<>= maxNBits + index = int(ce & (1<>= maxTrieIndexBits + offset = int(ce & (1<> 8) +} + +const ( + // These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf. + minUnified rune = 0x4E00 + maxUnified = 0x9FFF + minCompatibility = 0xF900 + maxCompatibility = 0xFAFF + minRare = 0x3400 + maxRare = 0x4DBF +) +const ( + commonUnifiedOffset = 0x10000 + rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF + otherOffset = 0x50000 // largest rune in rare is U+2FA1D + illegalOffset = otherOffset + int(unicode.MaxRune) + maxPrimary = illegalOffset + 1 +) + +// implicitPrimary returns the primary weight for the a rune +// for which there is no entry for the rune in the collation table. +// We take a different approach from the one specified in +// http://unicode.org/reports/tr10/#Implicit_Weights, +// but preserve the resulting relative ordering of the runes. +func implicitPrimary(r rune) int { + if unicode.Is(unicode.Ideographic, r) { + if r >= minUnified && r <= maxUnified { + // The most common case for CJK. + return int(r) + commonUnifiedOffset + } + if r >= minCompatibility && r <= maxCompatibility { + // This will typically not hit. The DUCET explicitly specifies mappings + // for all characters that do not decompose. + return int(r) + commonUnifiedOffset + } + return int(r) + rareUnifiedOffset + } + return int(r) + otherOffset +} diff --git a/vendor/golang.org/x/text/internal/colltab/collelem_test.go b/vendor/golang.org/x/text/internal/colltab/collelem_test.go new file mode 100644 index 0000000..f131ecc --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/collelem_test.go @@ -0,0 +1,183 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +import ( + "fmt" + "testing" + "unicode" +) + +func (e Elem) String() string { + q := "" + if v := e.Quaternary(); v == MaxQuaternary { + q = "max" + } else { + q = fmt.Sprint(v) + } + return fmt.Sprintf("[%d, %d, %d, %s]", + e.Primary(), + e.Secondary(), + e.Tertiary(), + q) +} + +type ceTest struct { + f func(inout []int) (Elem, ceType) + arg []int +} + +func makeCE(weights []int) Elem { + ce, _ := MakeElem(weights[0], weights[1], weights[2], uint8(weights[3])) + return ce +} + +var defaultValues = []int{0, defaultSecondary, defaultTertiary, 0} + +func e(w ...int) Elem { + return makeCE(append(w, defaultValues[len(w):]...)) +} + +func makeContractIndex(index, n, offset int) Elem { + const ( + contractID = 0xC0000000 + maxNBits = 4 + maxTrieIndexBits = 12 + maxContractOffsetBits = 13 + ) + ce := Elem(contractID) + ce += Elem(offset << (maxNBits + maxTrieIndexBits)) + ce += Elem(index << maxNBits) + ce += Elem(n) + return ce +} + +func makeExpandIndex(index int) Elem { + const expandID = 0xE0000000 + return expandID + Elem(index) +} + +func makeDecompose(t1, t2 int) Elem { + const decompID = 0xF0000000 + return Elem(t2<<8+t1) + decompID +} + +func normalCE(inout []int) (ce Elem, t ceType) { + ce = makeCE(inout) + inout[0] = ce.Primary() + inout[1] = ce.Secondary() + inout[2] = int(ce.Tertiary()) + inout[3] = int(ce.CCC()) + return ce, ceNormal +} + +func expandCE(inout []int) (ce Elem, t ceType) { + ce = makeExpandIndex(inout[0]) + inout[0] = splitExpandIndex(ce) + return ce, ceExpansionIndex +} + +func contractCE(inout []int) (ce Elem, t ceType) { + ce = makeContractIndex(inout[0], inout[1], inout[2]) + i, n, o := splitContractIndex(ce) + inout[0], inout[1], inout[2] = i, n, o + return ce, ceContractionIndex +} + +func decompCE(inout []int) (ce Elem, t ceType) { + ce = makeDecompose(inout[0], inout[1]) + t1, t2 := splitDecompose(ce) + inout[0], inout[1] = int(t1), int(t2) + return ce, ceDecompose +} + +var ceTests = []ceTest{ + {normalCE, []int{0, 0, 0, 0}}, + {normalCE, []int{0, 30, 3, 0}}, + {normalCE, []int{0, 30, 3, 0xFF}}, + {normalCE, []int{100, defaultSecondary, defaultTertiary, 0}}, + {normalCE, []int{100, defaultSecondary, defaultTertiary, 0xFF}}, + {normalCE, []int{100, defaultSecondary, 3, 0}}, + {normalCE, []int{0x123, defaultSecondary, 8, 0xFF}}, + + {contractCE, []int{0, 0, 0}}, + {contractCE, []int{1, 1, 1}}, + {contractCE, []int{1, (1 << maxNBits) - 1, 1}}, + {contractCE, []int{(1 << maxTrieIndexBits) - 1, 1, 1}}, + {contractCE, []int{1, 1, (1 << maxContractOffsetBits) - 1}}, + + {expandCE, []int{0}}, + {expandCE, []int{5}}, + {expandCE, []int{(1 << maxExpandIndexBits) - 1}}, + + {decompCE, []int{0, 0}}, + {decompCE, []int{1, 1}}, + {decompCE, []int{0x1F, 0x1F}}, +} + +func TestColElem(t *testing.T) { + for i, tt := range ceTests { + inout := make([]int, len(tt.arg)) + copy(inout, tt.arg) + ce, typ := tt.f(inout) + if ce.ctype() != typ { + t.Errorf("%d: type is %d; want %d (ColElem: %X)", i, ce.ctype(), typ, ce) + } + for j, a := range tt.arg { + if inout[j] != a { + t.Errorf("%d: argument %d is %X; want %X (ColElem: %X)", i, j, inout[j], a, ce) + } + } + } +} + +type implicitTest struct { + r rune + p int +} + +var implicitTests = []implicitTest{ + {0x33FF, 0x533FF}, + {0x3400, 0x23400}, + {0x4DC0, 0x54DC0}, + {0x4DFF, 0x54DFF}, + {0x4E00, 0x14E00}, + {0x9FCB, 0x19FCB}, + {0xA000, 0x5A000}, + {0xF8FF, 0x5F8FF}, + {0xF900, 0x1F900}, + {0xFA23, 0x1FA23}, + {0xFAD9, 0x1FAD9}, + {0xFB00, 0x5FB00}, + {0x20000, 0x40000}, + {0x2B81C, 0x4B81C}, + {unicode.MaxRune, 0x15FFFF}, // maximum primary value +} + +func TestImplicit(t *testing.T) { + for _, tt := range implicitTests { + if p := implicitPrimary(tt.r); p != tt.p { + t.Errorf("%U: was %X; want %X", tt.r, p, tt.p) + } + } +} + +func TestUpdateTertiary(t *testing.T) { + tests := []struct { + in, out Elem + t uint8 + }{ + {0x4000FE20, 0x0000FE8A, 0x0A}, + {0x4000FE21, 0x0000FEAA, 0x0A}, + {0x0000FE8B, 0x0000FE83, 0x03}, + {0x82FF0188, 0x9BFF0188, 0x1B}, + {0xAFF0CC02, 0xAFF0CC1B, 0x1B}, + } + for i, tt := range tests { + if out := tt.in.updateTertiary(tt.t); out != tt.out { + t.Errorf("%d: was %X; want %X", i, out, tt.out) + } + } +} diff --git a/vendor/golang.org/x/text/internal/colltab/colltab.go b/vendor/golang.org/x/text/internal/colltab/colltab.go new file mode 100644 index 0000000..02f2247 --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/colltab.go @@ -0,0 +1,105 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package colltab contains functionality related to collation tables. +// It is only to be used by the collate and search packages. +package colltab // import "golang.org/x/text/internal/colltab" + +import ( + "sort" + + "golang.org/x/text/language" +) + +// MatchLang finds the index of t in tags, using a matching algorithm used for +// collation and search. tags[0] must be language.Und, the remaining tags should +// be sorted alphabetically. +// +// Language matching for collation and search is different from the matching +// defined by language.Matcher: the (inferred) base language must be an exact +// match for the relevant fields. For example, "gsw" should not match "de". +// Also the parent relation is different, as a parent may have a different +// script. So usually the parent of zh-Hant is und, whereas for MatchLang it is +// zh. +func MatchLang(t language.Tag, tags []language.Tag) int { + // Canonicalize the values, including collapsing macro languages. + t, _ = language.All.Canonicalize(t) + + base, conf := t.Base() + // Estimate the base language, but only use high-confidence values. + if conf < language.High { + // The root locale supports "search" and "standard". We assume that any + // implementation will only use one of both. + return 0 + } + + // Maximize base and script and normalize the tag. + if _, s, r := t.Raw(); (r != language.Region{}) { + p, _ := language.Raw.Compose(base, s, r) + // Taking the parent forces the script to be maximized. + p = p.Parent() + // Add back region and extensions. + t, _ = language.Raw.Compose(p, r, t.Extensions()) + } else { + // Set the maximized base language. + t, _ = language.Raw.Compose(base, s, t.Extensions()) + } + + // Find start index of the language tag. + start := 1 + sort.Search(len(tags)-1, func(i int) bool { + b, _, _ := tags[i+1].Raw() + return base.String() <= b.String() + }) + if start < len(tags) { + if b, _, _ := tags[start].Raw(); b != base { + return 0 + } + } + + // Besides the base language, script and region, only the collation type and + // the custom variant defined in the 'u' extension are used to distinguish a + // locale. + // Strip all variants and extensions and add back the custom variant. + tdef, _ := language.Raw.Compose(t.Raw()) + tdef, _ = tdef.SetTypeForKey("va", t.TypeForKey("va")) + + // First search for a specialized collation type, if present. + try := []language.Tag{tdef} + if co := t.TypeForKey("co"); co != "" { + tco, _ := tdef.SetTypeForKey("co", co) + try = []language.Tag{tco, tdef} + } + + for _, tx := range try { + for ; tx != language.Und; tx = parent(tx) { + for i, t := range tags[start:] { + if b, _, _ := t.Raw(); b != base { + break + } + if tx == t { + return start + i + } + } + } + } + return 0 +} + +// parent computes the structural parent. This means inheritance may change +// script. So, unlike the CLDR parent, parent(zh-Hant) == zh. +func parent(t language.Tag) language.Tag { + if t.TypeForKey("va") != "" { + t, _ = t.SetTypeForKey("va", "") + return t + } + result := language.Und + if b, s, r := t.Raw(); (r != language.Region{}) { + result, _ = language.Raw.Compose(b, s, t.Extensions()) + } else if (s != language.Script{}) { + result, _ = language.Raw.Compose(b, t.Extensions()) + } else if (b != language.Base{}) { + result, _ = language.Raw.Compose(t.Extensions()) + } + return result +} diff --git a/vendor/golang.org/x/text/internal/colltab/colltab_test.go b/vendor/golang.org/x/text/internal/colltab/colltab_test.go new file mode 100644 index 0000000..c403ac3 --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/colltab_test.go @@ -0,0 +1,64 @@ +package colltab + +import ( + "testing" + + "golang.org/x/text/language" +) + +func TestMatchLang(t *testing.T) { + tags := []language.Tag{ + 0: language.Und, + 1: language.MustParse("bs"), + 2: language.German, + 3: language.English, + 4: language.AmericanEnglish, + 5: language.MustParse("en-US-u-va-posix"), + 6: language.Portuguese, + 7: language.Serbian, + 8: language.MustParse("sr-Latn"), + 9: language.Chinese, + 10: language.MustParse("zh-u-co-stroke"), + 11: language.MustParse("zh-Hant-u-co-pinyin"), + 12: language.TraditionalChinese, + } + for i, tc := range []struct { + x int + t language.Tag + }{ + {0, language.Und}, + {0, language.Persian}, // Default to first element when no match. + {3, language.English}, + {4, language.AmericanEnglish}, + {5, language.MustParse("en-US-u-va-posix")}, // Ext. variant match. + {4, language.MustParse("en-US-u-va-noposix")}, // Ext. variant mismatch. + {3, language.MustParse("en-UK-u-va-noposix")}, // Ext. variant mismatch. + {7, language.Serbian}, + {0, language.Croatian}, // Don't match to close language! + {0, language.MustParse("gsw")}, // Don't match to close language! + {1, language.MustParse("bs-Cyrl")}, // Odd, but correct. + {1, language.MustParse("bs-Latn")}, // Estimated script drops. + {8, language.MustParse("sr-Latn")}, + {9, language.Chinese}, + {9, language.SimplifiedChinese}, + {12, language.TraditionalChinese}, + {11, language.MustParse("zh-Hant-u-co-pinyin")}, + // TODO: should this be 12? Either inherited value (10) or default is + // fine in this case, though. Other locales are not affected. + {10, language.MustParse("zh-Hant-u-co-stroke")}, + // There is no "phonebk" sorting order for zh-Hant, so use default. + {12, language.MustParse("zh-Hant-u-co-phonebk")}, + {10, language.MustParse("zh-u-co-stroke")}, + {12, language.MustParse("und-TW")}, // Infer script and language. + {12, language.MustParse("und-HK")}, // Infer script and language. + {6, language.MustParse("und-BR")}, // Infer script and language. + {6, language.MustParse("und-PT")}, // Infer script and language. + {2, language.MustParse("und-Latn-DE")}, // Infer language. + {0, language.MustParse("und-Jpan-BR")}, // Infers "ja", so no match. + {0, language.MustParse("zu")}, // No match past index. + } { + if x := MatchLang(tc.t, tags); x != tc.x { + t.Errorf("%d: MatchLang(%q, tags) = %d; want %d", i, tc.t, x, tc.x) + } + } +} diff --git a/vendor/golang.org/x/text/internal/colltab/contract.go b/vendor/golang.org/x/text/internal/colltab/contract.go new file mode 100644 index 0000000..25649d4 --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/contract.go @@ -0,0 +1,145 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +import "unicode/utf8" + +// For a description of ContractTrieSet, see text/collate/build/contract.go. + +type ContractTrieSet []struct{ L, H, N, I uint8 } + +// ctScanner is used to match a trie to an input sequence. +// A contraction may match a non-contiguous sequence of bytes in an input string. +// For example, if there is a contraction for , it should match +// the sequence , as combining_cedilla does +// not block combining_ring. +// ctScanner does not automatically skip over non-blocking non-starters, but rather +// retains the state of the last match and leaves it up to the user to continue +// the match at the appropriate points. +type ctScanner struct { + states ContractTrieSet + s []byte + n int + index int + pindex int + done bool +} + +type ctScannerString struct { + states ContractTrieSet + s string + n int + index int + pindex int + done bool +} + +func (t ContractTrieSet) scanner(index, n int, b []byte) ctScanner { + return ctScanner{s: b, states: t[index:], n: n} +} + +func (t ContractTrieSet) scannerString(index, n int, str string) ctScannerString { + return ctScannerString{s: str, states: t[index:], n: n} +} + +// result returns the offset i and bytes consumed p so far. If no suffix +// matched, i and p will be 0. +func (s *ctScanner) result() (i, p int) { + return s.index, s.pindex +} + +func (s *ctScannerString) result() (i, p int) { + return s.index, s.pindex +} + +const ( + final = 0 + noIndex = 0xFF +) + +// scan matches the longest suffix at the current location in the input +// and returns the number of bytes consumed. +func (s *ctScanner) scan(p int) int { + pr := p // the p at the rune start + str := s.s + states, n := s.states, s.n + for i := 0; i < n && p < len(str); { + e := states[i] + c := str[p] + // TODO: a significant number of contractions are of a form that + // cannot match discontiguous UTF-8 in a normalized string. We could let + // a negative value of e.n mean that we can set s.done = true and avoid + // the need for additional matches. + if c >= e.L { + if e.L == c { + p++ + if e.I != noIndex { + s.index = int(e.I) + s.pindex = p + } + if e.N != final { + i, states, n = 0, states[int(e.H)+n:], int(e.N) + if p >= len(str) || utf8.RuneStart(str[p]) { + s.states, s.n, pr = states, n, p + } + } else { + s.done = true + return p + } + continue + } else if e.N == final && c <= e.H { + p++ + s.done = true + s.index = int(c-e.L) + int(e.I) + s.pindex = p + return p + } + } + i++ + } + return pr +} + +// scan is a verbatim copy of ctScanner.scan. +func (s *ctScannerString) scan(p int) int { + pr := p // the p at the rune start + str := s.s + states, n := s.states, s.n + for i := 0; i < n && p < len(str); { + e := states[i] + c := str[p] + // TODO: a significant number of contractions are of a form that + // cannot match discontiguous UTF-8 in a normalized string. We could let + // a negative value of e.n mean that we can set s.done = true and avoid + // the need for additional matches. + if c >= e.L { + if e.L == c { + p++ + if e.I != noIndex { + s.index = int(e.I) + s.pindex = p + } + if e.N != final { + i, states, n = 0, states[int(e.H)+n:], int(e.N) + if p >= len(str) || utf8.RuneStart(str[p]) { + s.states, s.n, pr = states, n, p + } + } else { + s.done = true + return p + } + continue + } else if e.N == final && c <= e.H { + p++ + s.done = true + s.index = int(c-e.L) + int(e.I) + s.pindex = p + return p + } + } + i++ + } + return pr +} diff --git a/vendor/golang.org/x/text/internal/colltab/contract_test.go b/vendor/golang.org/x/text/internal/colltab/contract_test.go new file mode 100644 index 0000000..ce2871d --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/contract_test.go @@ -0,0 +1,131 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +import ( + "testing" +) + +type lookupStrings struct { + str string + offset int + n int // bytes consumed from input +} + +type LookupTest struct { + lookup []lookupStrings + n int + tries ContractTrieSet +} + +var lookupTests = []LookupTest{{ + []lookupStrings{ + {"abc", 1, 3}, + {"a", 0, 0}, + {"b", 0, 0}, + {"c", 0, 0}, + {"d", 0, 0}, + }, + 1, + ContractTrieSet{ + {'a', 0, 1, 0xFF}, + {'b', 0, 1, 0xFF}, + {'c', 'c', 0, 1}, + }, +}, { + []lookupStrings{ + {"abc", 1, 3}, + {"abd", 2, 3}, + {"abe", 3, 3}, + {"a", 0, 0}, + {"ab", 0, 0}, + {"d", 0, 0}, + {"f", 0, 0}, + }, + 1, + ContractTrieSet{ + {'a', 0, 1, 0xFF}, + {'b', 0, 1, 0xFF}, + {'c', 'e', 0, 1}, + }, +}, { + []lookupStrings{ + {"abc", 1, 3}, + {"ab", 2, 2}, + {"a", 3, 1}, + {"abcd", 1, 3}, + {"abe", 2, 2}, + }, + 1, + ContractTrieSet{ + {'a', 0, 1, 3}, + {'b', 0, 1, 2}, + {'c', 'c', 0, 1}, + }, +}, { + []lookupStrings{ + {"abc", 1, 3}, + {"abd", 2, 3}, + {"ab", 3, 2}, + {"ac", 4, 2}, + {"a", 5, 1}, + {"b", 6, 1}, + {"ba", 6, 1}, + }, + 2, + ContractTrieSet{ + {'b', 'b', 0, 6}, + {'a', 0, 2, 5}, + {'c', 'c', 0, 4}, + {'b', 0, 1, 3}, + {'c', 'd', 0, 1}, + }, +}, { + []lookupStrings{ + {"bcde", 2, 4}, + {"bc", 7, 2}, + {"ab", 6, 2}, + {"bcd", 5, 3}, + {"abcd", 1, 4}, + {"abc", 4, 3}, + {"bcdf", 3, 4}, + }, + 2, + ContractTrieSet{ + {'b', 3, 1, 0xFF}, + {'a', 0, 1, 0xFF}, + {'b', 0, 1, 6}, + {'c', 0, 1, 4}, + {'d', 'd', 0, 1}, + {'c', 0, 1, 7}, + {'d', 0, 1, 5}, + {'e', 'f', 0, 2}, + }, +}} + +func lookup(c *ContractTrieSet, nnode int, s []uint8) (i, n int) { + scan := c.scanner(0, nnode, s) + scan.scan(0) + return scan.result() +} + +func TestLookupContraction(t *testing.T) { + for i, tt := range lookupTests { + cts := ContractTrieSet(tt.tries) + for j, lu := range tt.lookup { + str := lu.str + for _, s := range []string{str, str + "X"} { + const msg = `%d:%d: %s of "%s" %v; want %v` + offset, n := lookup(&cts, tt.n, []byte(s)) + if offset != lu.offset { + t.Errorf(msg, i, j, "offset", s, offset, lu.offset) + } + if n != lu.n { + t.Errorf(msg, i, j, "bytes consumed", s, n, len(str)) + } + } + } + } +} diff --git a/vendor/golang.org/x/text/internal/colltab/iter.go b/vendor/golang.org/x/text/internal/colltab/iter.go new file mode 100644 index 0000000..c1b1ba8 --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/iter.go @@ -0,0 +1,178 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +// An Iter incrementally converts chunks of the input text to collation +// elements, while ensuring that the collation elements are in normalized order +// (that is, they are in the order as if the input text were normalized first). +type Iter struct { + Weighter Weighter + Elems []Elem + // N is the number of elements in Elems that will not be reordered on + // subsequent iterations, N <= len(Elems). + N int + + bytes []byte + str string + // Because the Elems buffer may contain collation elements that are needed + // for look-ahead, we need two positions in the text (bytes or str): one for + // the end position in the text for the current iteration and one for the + // start of the next call to appendNext. + pEnd int // end position in text corresponding to N. + pNext int // pEnd <= pNext. +} + +// Reset sets the position in the current input text to p and discards any +// results obtained so far. +func (i *Iter) Reset(p int) { + i.Elems = i.Elems[:0] + i.N = 0 + i.pEnd = p + i.pNext = p +} + +// Len returns the length of the input text. +func (i *Iter) Len() int { + if i.bytes != nil { + return len(i.bytes) + } + return len(i.str) +} + +// Discard removes the collation elements up to N. +func (i *Iter) Discard() { + // TODO: change this such that only modifiers following starters will have + // to be copied. + i.Elems = i.Elems[:copy(i.Elems, i.Elems[i.N:])] + i.N = 0 +} + +// End returns the end position of the input text for which Next has returned +// results. +func (i *Iter) End() int { + return i.pEnd +} + +// SetInput resets i to input s. +func (i *Iter) SetInput(s []byte) { + i.bytes = s + i.str = "" + i.Reset(0) +} + +// SetInputString resets i to input s. +func (i *Iter) SetInputString(s string) { + i.str = s + i.bytes = nil + i.Reset(0) +} + +func (i *Iter) done() bool { + return i.pNext >= len(i.str) && i.pNext >= len(i.bytes) +} + +func (i *Iter) appendNext() bool { + if i.done() { + return false + } + var sz int + if i.bytes == nil { + i.Elems, sz = i.Weighter.AppendNextString(i.Elems, i.str[i.pNext:]) + } else { + i.Elems, sz = i.Weighter.AppendNext(i.Elems, i.bytes[i.pNext:]) + } + if sz == 0 { + sz = 1 + } + i.pNext += sz + return true +} + +// Next appends Elems to the internal array. On each iteration, it will either +// add starters or modifiers. In the majority of cases, an Elem with a primary +// value > 0 will have a CCC of 0. The CCC values of collation elements are also +// used to detect if the input string was not normalized and to adjust the +// result accordingly. +func (i *Iter) Next() bool { + if i.N == len(i.Elems) && !i.appendNext() { + return false + } + + // Check if the current segment starts with a starter. + prevCCC := i.Elems[len(i.Elems)-1].CCC() + if prevCCC == 0 { + i.N = len(i.Elems) + i.pEnd = i.pNext + return true + } else if i.Elems[i.N].CCC() == 0 { + // set i.N to only cover part of i.Elems for which prevCCC == 0 and + // use rest for the next call to next. + for i.N++; i.N < len(i.Elems) && i.Elems[i.N].CCC() == 0; i.N++ { + } + i.pEnd = i.pNext + return true + } + + // The current (partial) segment starts with modifiers. We need to collect + // all successive modifiers to ensure that they are normalized. + for { + p := len(i.Elems) + i.pEnd = i.pNext + if !i.appendNext() { + break + } + + if ccc := i.Elems[p].CCC(); ccc == 0 || len(i.Elems)-i.N > maxCombiningCharacters { + // Leave the starter for the next iteration. This ensures that we + // do not return sequences of collation elements that cross two + // segments. + // + // TODO: handle large number of combining characters by fully + // normalizing the input segment before iteration. This ensures + // results are consistent across the text repo. + i.N = p + return true + } else if ccc < prevCCC { + i.doNorm(p, ccc) // should be rare, never occurs for NFD and FCC. + } else { + prevCCC = ccc + } + } + + done := len(i.Elems) != i.N + i.N = len(i.Elems) + return done +} + +// nextNoNorm is the same as next, but does not "normalize" the collation +// elements. +func (i *Iter) nextNoNorm() bool { + // TODO: remove this function. Using this instead of next does not seem + // to improve performance in any significant way. We retain this until + // later for evaluation purposes. + if i.done() { + return false + } + i.appendNext() + i.N = len(i.Elems) + return true +} + +const maxCombiningCharacters = 30 + +// doNorm reorders the collation elements in i.Elems. +// It assumes that blocks of collation elements added with appendNext +// either start and end with the same CCC or start with CCC == 0. +// This allows for a single insertion point for the entire block. +// The correctness of this assumption is verified in builder.go. +func (i *Iter) doNorm(p int, ccc uint8) { + n := len(i.Elems) + k := p + for p--; p > i.N && ccc < i.Elems[p-1].CCC(); p-- { + } + i.Elems = append(i.Elems, i.Elems[p:k]...) + copy(i.Elems[p:], i.Elems[k:]) + i.Elems = i.Elems[:n] +} diff --git a/vendor/golang.org/x/text/internal/colltab/iter_test.go b/vendor/golang.org/x/text/internal/colltab/iter_test.go new file mode 100644 index 0000000..5783534 --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/iter_test.go @@ -0,0 +1,63 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +import ( + "testing" +) + +func TestDoNorm(t *testing.T) { + const div = -1 // The insertion point of the next block. + tests := []struct { + in, out []int + }{{ + in: []int{4, div, 3}, + out: []int{3, 4}, + }, { + in: []int{4, div, 3, 3, 3}, + out: []int{3, 3, 3, 4}, + }, { + in: []int{0, 4, div, 3}, + out: []int{0, 3, 4}, + }, { + in: []int{0, 0, 4, 5, div, 3, 3}, + out: []int{0, 0, 3, 3, 4, 5}, + }, { + in: []int{0, 0, 1, 4, 5, div, 3, 3}, + out: []int{0, 0, 1, 3, 3, 4, 5}, + }, { + in: []int{0, 0, 1, 4, 5, div, 4, 4}, + out: []int{0, 0, 1, 4, 4, 4, 5}, + }, + } + for j, tt := range tests { + i := Iter{} + var w, p int + for k, cc := range tt.in { + + if cc == div { + w = 100 + p = k + continue + } + i.Elems = append(i.Elems, makeCE([]int{w, defaultSecondary, 2, cc})) + } + i.doNorm(p, i.Elems[p].CCC()) + if len(i.Elems) != len(tt.out) { + t.Errorf("%d: length was %d; want %d", j, len(i.Elems), len(tt.out)) + } + prevCCC := uint8(0) + for k, ce := range i.Elems { + if int(ce.CCC()) != tt.out[k] { + t.Errorf("%d:%d: unexpected CCC. Was %d; want %d", j, k, ce.CCC(), tt.out[k]) + } + if k > 0 && ce.CCC() == prevCCC && i.Elems[k-1].Primary() > ce.Primary() { + t.Errorf("%d:%d: normalization crossed across CCC boundary.", j, k) + } + } + } + + // Combining rune overflow is tested in search/pattern_test.go. +} diff --git a/vendor/golang.org/x/text/internal/colltab/numeric.go b/vendor/golang.org/x/text/internal/colltab/numeric.go new file mode 100644 index 0000000..38c255c --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/numeric.go @@ -0,0 +1,236 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +import ( + "unicode" + "unicode/utf8" +) + +// NewNumericWeighter wraps w to replace individual digits to sort based on their +// numeric value. +// +// Weighter w must have a free primary weight after the primary weight for 9. +// If this is not the case, numeric value will sort at the same primary level +// as the first primary sorting after 9. +func NewNumericWeighter(w Weighter) Weighter { + getElem := func(s string) Elem { + elems, _ := w.AppendNextString(nil, s) + return elems[0] + } + nine := getElem("9") + + // Numbers should order before zero, but the DUCET has no room for this. + // TODO: move before zero once we use fractional collation elements. + ns, _ := MakeElem(nine.Primary()+1, nine.Secondary(), int(nine.Tertiary()), 0) + + return &numericWeighter{ + Weighter: w, + + // We assume that w sorts digits of different kinds in order of numeric + // value and that the tertiary weight order is preserved. + // + // TODO: evaluate whether it is worth basing the ranges on the Elem + // encoding itself once the move to fractional weights is complete. + zero: getElem("0"), + zeroSpecialLo: getElem("0"), // U+FF10 FULLWIDTH DIGIT ZERO + zeroSpecialHi: getElem("₀"), // U+2080 SUBSCRIPT ZERO + nine: nine, + nineSpecialHi: getElem("₉"), // U+2089 SUBSCRIPT NINE + numberStart: ns, + } +} + +// A numericWeighter translates a stream of digits into a stream of weights +// representing the numeric value. +type numericWeighter struct { + Weighter + + // The Elems below all demarcate boundaries of specific ranges. With the + // current element encoding digits are in two ranges: normal (default + // tertiary value) and special. For most languages, digits have collation + // elements in the normal range. + // + // Note: the range tests are very specific for the element encoding used by + // this implementation. The tests in collate_test.go are designed to fail + // if this code is not updated when an encoding has changed. + + zero Elem // normal digit zero + zeroSpecialLo Elem // special digit zero, low tertiary value + zeroSpecialHi Elem // special digit zero, high tertiary value + nine Elem // normal digit nine + nineSpecialHi Elem // special digit nine + numberStart Elem +} + +// AppendNext calls the namesake of the underlying weigher, but replaces single +// digits with weights representing their value. +func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) { + ce, n = nw.Weighter.AppendNext(buf, s) + nc := numberConverter{ + elems: buf, + w: nw, + b: s, + } + isZero, ok := nc.checkNextDigit(ce) + if !ok { + return ce, n + } + // ce might have been grown already, so take it instead of buf. + nc.init(ce, len(buf), isZero) + for n < len(s) { + ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:]) + nc.b = s + n += sz + if !nc.update(ce) { + break + } + } + return nc.result(), n +} + +// AppendNextString calls the namesake of the underlying weigher, but replaces +// single digits with weights representing their value. +func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n int) { + ce, n = nw.Weighter.AppendNextString(buf, s) + nc := numberConverter{ + elems: buf, + w: nw, + s: s, + } + isZero, ok := nc.checkNextDigit(ce) + if !ok { + return ce, n + } + nc.init(ce, len(buf), isZero) + for n < len(s) { + ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:]) + nc.s = s + n += sz + if !nc.update(ce) { + break + } + } + return nc.result(), n +} + +type numberConverter struct { + w *numericWeighter + + elems []Elem + nDigits int + lenIndex int + + s string // set if the input was of type string + b []byte // set if the input was of type []byte +} + +// init completes initialization of a numberConverter and prepares it for adding +// more digits. elems is assumed to have a digit starting at oldLen. +func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) { + // Insert a marker indicating the start of a number and and a placeholder + // for the number of digits. + if isZero { + elems = append(elems[:oldLen], nc.w.numberStart, 0) + } else { + elems = append(elems, 0, 0) + copy(elems[oldLen+2:], elems[oldLen:]) + elems[oldLen] = nc.w.numberStart + elems[oldLen+1] = 0 + + nc.nDigits = 1 + } + nc.elems = elems + nc.lenIndex = oldLen + 1 +} + +// checkNextDigit reports whether bufNew adds a single digit relative to the old +// buffer. If it does, it also reports whether this digit is zero. +func (nc *numberConverter) checkNextDigit(bufNew []Elem) (isZero, ok bool) { + if len(nc.elems) >= len(bufNew) { + return false, false + } + e := bufNew[len(nc.elems)] + if e < nc.w.zeroSpecialLo || nc.w.nine < e { + // Not a number. + return false, false + } + if e < nc.w.zero { + if e > nc.w.nineSpecialHi { + // Not a number. + return false, false + } + if !nc.isDigit() { + return false, false + } + isZero = e <= nc.w.zeroSpecialHi + } else { + // This is the common case if we encounter a digit. + isZero = e == nc.w.zero + } + // Test the remaining added collation elements have a zero primary value. + if n := len(bufNew) - len(nc.elems); n > 1 { + for i := len(nc.elems) + 1; i < len(bufNew); i++ { + if bufNew[i].Primary() != 0 { + return false, false + } + } + // In some rare cases, collation elements will encode runes in + // unicode.No as a digit. For example Ethiopic digits (U+1369 - U+1371) + // are not in Nd. Also some digits that clearly belong in unicode.No, + // like U+0C78 TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR, have + // collation elements indistinguishable from normal digits. + // Unfortunately, this means we need to make this check for nearly all + // non-Latin digits. + // + // TODO: check the performance impact and find something better if it is + // an issue. + if !nc.isDigit() { + return false, false + } + } + return isZero, true +} + +func (nc *numberConverter) isDigit() bool { + if nc.b != nil { + r, _ := utf8.DecodeRune(nc.b) + return unicode.In(r, unicode.Nd) + } + r, _ := utf8.DecodeRuneInString(nc.s) + return unicode.In(r, unicode.Nd) +} + +// We currently support a maximum of about 2M digits (the number of primary +// values). Such numbers will compare correctly against small numbers, but their +// comparison against other large numbers is undefined. +// +// TODO: define a proper fallback, such as comparing large numbers textually or +// actually allowing numbers of unlimited length. +// +// TODO: cap this to a lower number (like 100) and maybe allow a larger number +// in an option? +const maxDigits = 1< 0 { + t.Errorf("got %f; want 0", n) + } +} diff --git a/vendor/golang.org/x/text/internal/colltab/table.go b/vendor/golang.org/x/text/internal/colltab/table.go new file mode 100644 index 0000000..e26e36d --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/table.go @@ -0,0 +1,275 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +import ( + "unicode/utf8" + + "golang.org/x/text/unicode/norm" +) + +// Table holds all collation data for a given collation ordering. +type Table struct { + Index Trie // main trie + + // expansion info + ExpandElem []uint32 + + // contraction info + ContractTries ContractTrieSet + ContractElem []uint32 + MaxContractLen int + VariableTop uint32 +} + +func (t *Table) AppendNext(w []Elem, b []byte) (res []Elem, n int) { + return t.appendNext(w, source{bytes: b}) +} + +func (t *Table) AppendNextString(w []Elem, s string) (res []Elem, n int) { + return t.appendNext(w, source{str: s}) +} + +func (t *Table) Start(p int, b []byte) int { + // TODO: implement + panic("not implemented") +} + +func (t *Table) StartString(p int, s string) int { + // TODO: implement + panic("not implemented") +} + +func (t *Table) Domain() []string { + // TODO: implement + panic("not implemented") +} + +func (t *Table) Top() uint32 { + return t.VariableTop +} + +type source struct { + str string + bytes []byte +} + +func (src *source) lookup(t *Table) (ce Elem, sz int) { + if src.bytes == nil { + return t.Index.lookupString(src.str) + } + return t.Index.lookup(src.bytes) +} + +func (src *source) tail(sz int) { + if src.bytes == nil { + src.str = src.str[sz:] + } else { + src.bytes = src.bytes[sz:] + } +} + +func (src *source) nfd(buf []byte, end int) []byte { + if src.bytes == nil { + return norm.NFD.AppendString(buf[:0], src.str[:end]) + } + return norm.NFD.Append(buf[:0], src.bytes[:end]...) +} + +func (src *source) rune() (r rune, sz int) { + if src.bytes == nil { + return utf8.DecodeRuneInString(src.str) + } + return utf8.DecodeRune(src.bytes) +} + +func (src *source) properties(f norm.Form) norm.Properties { + if src.bytes == nil { + return f.PropertiesString(src.str) + } + return f.Properties(src.bytes) +} + +// appendNext appends the weights corresponding to the next rune or +// contraction in s. If a contraction is matched to a discontinuous +// sequence of runes, the weights for the interstitial runes are +// appended as well. It returns a new slice that includes the appended +// weights and the number of bytes consumed from s. +func (t *Table) appendNext(w []Elem, src source) (res []Elem, n int) { + ce, sz := src.lookup(t) + tp := ce.ctype() + if tp == ceNormal { + if ce == 0 { + r, _ := src.rune() + const ( + hangulSize = 3 + firstHangul = 0xAC00 + lastHangul = 0xD7A3 + ) + if r >= firstHangul && r <= lastHangul { + // TODO: performance can be considerably improved here. + n = sz + var buf [16]byte // Used for decomposing Hangul. + for b := src.nfd(buf[:0], hangulSize); len(b) > 0; b = b[sz:] { + ce, sz = t.Index.lookup(b) + w = append(w, ce) + } + return w, n + } + ce = makeImplicitCE(implicitPrimary(r)) + } + w = append(w, ce) + } else if tp == ceExpansionIndex { + w = t.appendExpansion(w, ce) + } else if tp == ceContractionIndex { + n := 0 + src.tail(sz) + if src.bytes == nil { + w, n = t.matchContractionString(w, ce, src.str) + } else { + w, n = t.matchContraction(w, ce, src.bytes) + } + sz += n + } else if tp == ceDecompose { + // Decompose using NFKD and replace tertiary weights. + t1, t2 := splitDecompose(ce) + i := len(w) + nfkd := src.properties(norm.NFKD).Decomposition() + for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] { + w, p = t.appendNext(w, source{bytes: nfkd}) + } + w[i] = w[i].updateTertiary(t1) + if i++; i < len(w) { + w[i] = w[i].updateTertiary(t2) + for i++; i < len(w); i++ { + w[i] = w[i].updateTertiary(maxTertiary) + } + } + } + return w, sz +} + +func (t *Table) appendExpansion(w []Elem, ce Elem) []Elem { + i := splitExpandIndex(ce) + n := int(t.ExpandElem[i]) + i++ + for _, ce := range t.ExpandElem[i : i+n] { + w = append(w, Elem(ce)) + } + return w +} + +func (t *Table) matchContraction(w []Elem, ce Elem, suffix []byte) ([]Elem, int) { + index, n, offset := splitContractIndex(ce) + + scan := t.ContractTries.scanner(index, n, suffix) + buf := [norm.MaxSegmentSize]byte{} + bufp := 0 + p := scan.scan(0) + + if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf { + // By now we should have filtered most cases. + p0 := p + bufn := 0 + rune := norm.NFD.Properties(suffix[p:]) + p += rune.Size() + if rune.LeadCCC() != 0 { + prevCC := rune.TrailCCC() + // A gap may only occur in the last normalization segment. + // This also ensures that len(scan.s) < norm.MaxSegmentSize. + if end := norm.NFD.FirstBoundary(suffix[p:]); end != -1 { + scan.s = suffix[:p+end] + } + for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf { + rune = norm.NFD.Properties(suffix[p:]) + if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc { + break + } + prevCC = rune.TrailCCC() + if pp := scan.scan(p); pp != p { + // Copy the interstitial runes for later processing. + bufn += copy(buf[bufn:], suffix[p0:p]) + if scan.pindex == pp { + bufp = bufn + } + p, p0 = pp, pp + } else { + p += rune.Size() + } + } + } + } + // Append weights for the matched contraction, which may be an expansion. + i, n := scan.result() + ce = Elem(t.ContractElem[i+offset]) + if ce.ctype() == ceNormal { + w = append(w, ce) + } else { + w = t.appendExpansion(w, ce) + } + // Append weights for the runes in the segment not part of the contraction. + for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] { + w, p = t.appendNext(w, source{bytes: b}) + } + return w, n +} + +// TODO: unify the two implementations. This is best done after first simplifying +// the algorithm taking into account the inclusion of both NFC and NFD forms +// in the table. +func (t *Table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem, int) { + index, n, offset := splitContractIndex(ce) + + scan := t.ContractTries.scannerString(index, n, suffix) + buf := [norm.MaxSegmentSize]byte{} + bufp := 0 + p := scan.scan(0) + + if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf { + // By now we should have filtered most cases. + p0 := p + bufn := 0 + rune := norm.NFD.PropertiesString(suffix[p:]) + p += rune.Size() + if rune.LeadCCC() != 0 { + prevCC := rune.TrailCCC() + // A gap may only occur in the last normalization segment. + // This also ensures that len(scan.s) < norm.MaxSegmentSize. + if end := norm.NFD.FirstBoundaryInString(suffix[p:]); end != -1 { + scan.s = suffix[:p+end] + } + for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf { + rune = norm.NFD.PropertiesString(suffix[p:]) + if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc { + break + } + prevCC = rune.TrailCCC() + if pp := scan.scan(p); pp != p { + // Copy the interstitial runes for later processing. + bufn += copy(buf[bufn:], suffix[p0:p]) + if scan.pindex == pp { + bufp = bufn + } + p, p0 = pp, pp + } else { + p += rune.Size() + } + } + } + } + // Append weights for the matched contraction, which may be an expansion. + i, n := scan.result() + ce = Elem(t.ContractElem[i+offset]) + if ce.ctype() == ceNormal { + w = append(w, ce) + } else { + w = t.appendExpansion(w, ce) + } + // Append weights for the runes in the segment not part of the contraction. + for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] { + w, p = t.appendNext(w, source{bytes: b}) + } + return w, n +} diff --git a/vendor/golang.org/x/text/internal/colltab/trie.go b/vendor/golang.org/x/text/internal/colltab/trie.go new file mode 100644 index 0000000..a0eaa0d --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/trie.go @@ -0,0 +1,159 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The trie in this file is used to associate the first full character in an +// UTF-8 string to a collation element. All but the last byte in a UTF-8 byte +// sequence are used to lookup offsets in the index table to be used for the +// next byte. The last byte is used to index into a table of collation elements. +// For a full description, see go.text/collate/build/trie.go. + +package colltab + +const blockSize = 64 + +type Trie struct { + Index0 []uint16 // index for first byte (0xC0-0xFF) + Values0 []uint32 // index for first byte (0x00-0x7F) + Index []uint16 + Values []uint32 +} + +const ( + t1 = 0x00 // 0000 0000 + tx = 0x80 // 1000 0000 + t2 = 0xC0 // 1100 0000 + t3 = 0xE0 // 1110 0000 + t4 = 0xF0 // 1111 0000 + t5 = 0xF8 // 1111 1000 + t6 = 0xFC // 1111 1100 + te = 0xFE // 1111 1110 +) + +func (t *Trie) lookupValue(n uint16, b byte) Elem { + return Elem(t.Values[int(n)<<6+int(b)]) +} + +// lookup returns the trie value for the first UTF-8 encoding in s and +// the width in bytes of this encoding. The size will be 0 if s does not +// hold enough bytes to complete the encoding. len(s) must be greater than 0. +func (t *Trie) lookup(s []byte) (v Elem, sz int) { + c0 := s[0] + switch { + case c0 < tx: + return Elem(t.Values0[c0]), 1 + case c0 < t2: + return 0, 1 + case c0 < t3: + if len(s) < 2 { + return 0, 0 + } + i := t.Index0[c0] + c1 := s[1] + if c1 < tx || t2 <= c1 { + return 0, 1 + } + return t.lookupValue(i, c1), 2 + case c0 < t4: + if len(s) < 3 { + return 0, 0 + } + i := t.Index0[c0] + c1 := s[1] + if c1 < tx || t2 <= c1 { + return 0, 1 + } + o := int(i)<<6 + int(c1) + i = t.Index[o] + c2 := s[2] + if c2 < tx || t2 <= c2 { + return 0, 2 + } + return t.lookupValue(i, c2), 3 + case c0 < t5: + if len(s) < 4 { + return 0, 0 + } + i := t.Index0[c0] + c1 := s[1] + if c1 < tx || t2 <= c1 { + return 0, 1 + } + o := int(i)<<6 + int(c1) + i = t.Index[o] + c2 := s[2] + if c2 < tx || t2 <= c2 { + return 0, 2 + } + o = int(i)<<6 + int(c2) + i = t.Index[o] + c3 := s[3] + if c3 < tx || t2 <= c3 { + return 0, 3 + } + return t.lookupValue(i, c3), 4 + } + // Illegal rune + return 0, 1 +} + +// The body of lookupString is a verbatim copy of that of lookup. +func (t *Trie) lookupString(s string) (v Elem, sz int) { + c0 := s[0] + switch { + case c0 < tx: + return Elem(t.Values0[c0]), 1 + case c0 < t2: + return 0, 1 + case c0 < t3: + if len(s) < 2 { + return 0, 0 + } + i := t.Index0[c0] + c1 := s[1] + if c1 < tx || t2 <= c1 { + return 0, 1 + } + return t.lookupValue(i, c1), 2 + case c0 < t4: + if len(s) < 3 { + return 0, 0 + } + i := t.Index0[c0] + c1 := s[1] + if c1 < tx || t2 <= c1 { + return 0, 1 + } + o := int(i)<<6 + int(c1) + i = t.Index[o] + c2 := s[2] + if c2 < tx || t2 <= c2 { + return 0, 2 + } + return t.lookupValue(i, c2), 3 + case c0 < t5: + if len(s) < 4 { + return 0, 0 + } + i := t.Index0[c0] + c1 := s[1] + if c1 < tx || t2 <= c1 { + return 0, 1 + } + o := int(i)<<6 + int(c1) + i = t.Index[o] + c2 := s[2] + if c2 < tx || t2 <= c2 { + return 0, 2 + } + o = int(i)<<6 + int(c2) + i = t.Index[o] + c3 := s[3] + if c3 < tx || t2 <= c3 { + return 0, 3 + } + return t.lookupValue(i, c3), 4 + } + // Illegal rune + return 0, 1 +} diff --git a/vendor/golang.org/x/text/internal/colltab/trie_test.go b/vendor/golang.org/x/text/internal/colltab/trie_test.go new file mode 100644 index 0000000..b056a81 --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/trie_test.go @@ -0,0 +1,106 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +import ( + "testing" +) + +// We take the smallest, largest and an arbitrary value for each +// of the UTF-8 sequence lengths. +var testRunes = []rune{ + 0x01, 0x0C, 0x7F, // 1-byte sequences + 0x80, 0x100, 0x7FF, // 2-byte sequences + 0x800, 0x999, 0xFFFF, // 3-byte sequences + 0x10000, 0x10101, 0x10FFFF, // 4-byte sequences + 0x200, 0x201, 0x202, 0x210, 0x215, // five entries in one sparse block +} + +// Test cases for illegal runes. +type trietest struct { + size int + bytes []byte +} + +var tests = []trietest{ + // illegal runes + {1, []byte{0x80}}, + {1, []byte{0xFF}}, + {1, []byte{t2, tx - 1}}, + {1, []byte{t2, t2}}, + {2, []byte{t3, tx, tx - 1}}, + {2, []byte{t3, tx, t2}}, + {1, []byte{t3, tx - 1, tx}}, + {3, []byte{t4, tx, tx, tx - 1}}, + {3, []byte{t4, tx, tx, t2}}, + {1, []byte{t4, t2, tx, tx - 1}}, + {2, []byte{t4, tx, t2, tx - 1}}, + + // short runes + {0, []byte{t2}}, + {0, []byte{t3, tx}}, + {0, []byte{t4, tx, tx}}, + + // we only support UTF-8 up to utf8.UTFMax bytes (4 bytes) + {1, []byte{t5, tx, tx, tx, tx}}, + {1, []byte{t6, tx, tx, tx, tx, tx}}, +} + +func TestLookupTrie(t *testing.T) { + for i, r := range testRunes { + b := []byte(string(r)) + v, sz := testTrie.lookup(b) + if int(v) != i { + t.Errorf("lookup(%U): found value %#x, expected %#x", r, v, i) + } + if sz != len(b) { + t.Errorf("lookup(%U): found size %d, expected %d", r, sz, len(b)) + } + } + for i, tt := range tests { + v, sz := testTrie.lookup(tt.bytes) + if int(v) != 0 { + t.Errorf("lookup of illegal rune, case %d: found value %#x, expected 0", i, v) + } + if sz != tt.size { + t.Errorf("lookup of illegal rune, case %d: found size %d, expected %d", i, sz, tt.size) + } + } +} + +// test data is taken from exp/collate/locale/build/trie_test.go +var testValues = [832]uint32{ + 0x000c: 0x00000001, + 0x007f: 0x00000002, + 0x00c0: 0x00000003, + 0x0100: 0x00000004, + 0x0140: 0x0000000c, 0x0141: 0x0000000d, 0x0142: 0x0000000e, + 0x0150: 0x0000000f, + 0x0155: 0x00000010, + 0x01bf: 0x00000005, + 0x01c0: 0x00000006, + 0x0219: 0x00000007, + 0x027f: 0x00000008, + 0x0280: 0x00000009, + 0x02c1: 0x0000000a, + 0x033f: 0x0000000b, +} + +var testLookup = [640]uint16{ + 0x0e0: 0x05, 0x0e6: 0x06, + 0x13f: 0x07, + 0x140: 0x08, 0x144: 0x09, + 0x190: 0x03, + 0x1ff: 0x0a, + 0x20f: 0x05, + 0x242: 0x01, 0x244: 0x02, + 0x248: 0x03, + 0x25f: 0x04, + 0x260: 0x01, + 0x26f: 0x02, + 0x270: 0x04, 0x274: 0x06, +} + +var testTrie = Trie{testLookup[6*blockSize:], testValues[:], testLookup[:], testValues[:]} diff --git a/vendor/golang.org/x/text/internal/colltab/weighter.go b/vendor/golang.org/x/text/internal/colltab/weighter.go new file mode 100644 index 0000000..f1ec45f --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/weighter.go @@ -0,0 +1,31 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab // import "golang.org/x/text/internal/colltab" + +// A Weighter can be used as a source for Collator and Searcher. +type Weighter interface { + // Start finds the start of the segment that includes position p. + Start(p int, b []byte) int + + // StartString finds the start of the segment that includes position p. + StartString(p int, s string) int + + // AppendNext appends Elems to buf corresponding to the longest match + // of a single character or contraction from the start of s. + // It returns the new buf and the number of bytes consumed. + AppendNext(buf []Elem, s []byte) (ce []Elem, n int) + + // AppendNextString appends Elems to buf corresponding to the longest match + // of a single character or contraction from the start of s. + // It returns the new buf and the number of bytes consumed. + AppendNextString(buf []Elem, s string) (ce []Elem, n int) + + // Domain returns a slice of all single characters and contractions for which + // collation elements are defined in this table. + Domain() []string + + // Top returns the highest variable primary value. + Top() uint32 +} diff --git a/vendor/golang.org/x/text/internal/colltab/weighter_test.go b/vendor/golang.org/x/text/internal/colltab/weighter_test.go new file mode 100644 index 0000000..b5f8487 --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/weighter_test.go @@ -0,0 +1,42 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +// testWeighter is a simple Weighter that returns weights from a user-defined map. +type testWeighter map[string][]Elem + +func (t testWeighter) Start(int, []byte) int { return 0 } +func (t testWeighter) StartString(int, string) int { return 0 } +func (t testWeighter) Domain() []string { return nil } +func (t testWeighter) Top() uint32 { return 0 } + +// maxContractBytes is the maximum length of any key in the map. +const maxContractBytes = 10 + +func (t testWeighter) AppendNext(buf []Elem, s []byte) ([]Elem, int) { + n := len(s) + if n > maxContractBytes { + n = maxContractBytes + } + for i := n; i > 0; i-- { + if e, ok := t[string(s[:i])]; ok { + return append(buf, e...), i + } + } + panic("incomplete testWeighter: could not find " + string(s)) +} + +func (t testWeighter) AppendNextString(buf []Elem, s string) ([]Elem, int) { + n := len(s) + if n > maxContractBytes { + n = maxContractBytes + } + for i := n; i > 0; i-- { + if e, ok := t[s[:i]]; ok { + return append(buf, e...), i + } + } + panic("incomplete testWeighter: could not find " + s) +} -- cgit v1.2.3