From 473acc61c8392dc7ae303d91568e179c4f105a76 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Tue, 2 Jul 2019 12:12:53 +0200 Subject: add black list --- vendor/golang.org/x/text/cases/map_test.go | 950 ----------------------------- 1 file changed, 950 deletions(-) delete mode 100644 vendor/golang.org/x/text/cases/map_test.go (limited to 'vendor/golang.org/x/text/cases/map_test.go') diff --git a/vendor/golang.org/x/text/cases/map_test.go b/vendor/golang.org/x/text/cases/map_test.go deleted file mode 100644 index 8ac3911..0000000 --- a/vendor/golang.org/x/text/cases/map_test.go +++ /dev/null @@ -1,950 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package cases - -import ( - "bytes" - "fmt" - "path" - "strings" - "testing" - "unicode/utf8" - - "golang.org/x/text/internal/testtext" - "golang.org/x/text/language" - "golang.org/x/text/transform" - "golang.org/x/text/unicode/norm" -) - -type testCase struct { - lang string - src interface{} // string, []string, or nil to skip test - title interface{} // string, []string, or nil to skip test - lower interface{} // string, []string, or nil to skip test - upper interface{} // string, []string, or nil to skip test - opts options -} - -var testCases = []testCase{ - 0: { - lang: "und", - src: "abc aBc ABC abC İsıI ΕΣΆΣ", - title: "Abc Abc Abc Abc İsıi Εσάσ", - lower: "abc abc abc abc i\u0307sıi εσάσ", - upper: "ABC ABC ABC ABC İSII ΕΣΆΣ", - opts: getOpts(HandleFinalSigma(false)), - }, - - 1: { - lang: "und", - src: "abc aBc ABC abC İsıI ΕΣΆΣ Σ _Σ -Σ", - title: "Abc Abc Abc Abc İsıi Εσάς Σ _Σ -Σ", - lower: "abc abc abc abc i\u0307sıi εσάς σ _σ -σ", - upper: "ABC ABC ABC ABC İSII ΕΣΆΣ Σ _Σ -Σ", - opts: getOpts(HandleFinalSigma(true)), - }, - - 2: { // Title cased runes. - lang: supported, - src: "DžA", - title: "Dža", - lower: "dža", - upper: "DŽA", - }, - - 3: { - // Title breaking. - lang: supported, - src: []string{ - "FOO CASE TEST", - "DON'T DO THiS", - "χωΡΊΣ χωΡΊΣ^a χωΡΊΣ:a χωΡΊΣ:^a χωΡΊΣ^ όμΩΣ Σ", - "with-hyphens", - "49ers 49ers", - `"capitalize a^a -hyphen 0X _u a_u:a`, - "MidNumLet a.b\u2018c\u2019d\u2024e\ufe52f\uff07f\uff0eg", - "MidNum a,b;c\u037ed\u0589e\u060cf\u2044g\ufe50h", - "\u0345 x\u3031x x\u05d0x \u05d0x a'.a a.a a4,a", - }, - title: []string{ - "Foo Case Test", - "Don't Do This", - "Χωρίς Χωρίσ^A Χωρίσ:a Χωρίσ:^A Χωρίς^ Όμως Σ", - "With-Hyphens", - // Note that 49Ers is correct according to the spec. - // TODO: provide some option to the user to treat different - // characters as cased. - "49Ers 49Ers", - `"Capitalize A^A -Hyphen 0X _U A_u:a`, - "Midnumlet A.b\u2018c\u2019d\u2024e\ufe52f\uff07f\uff0eg", - "Midnum A,B;C\u037eD\u0589E\u060cF\u2044G\ufe50H", - "\u0399 X\u3031X X\u05d0x \u05d0X A'.A A.a A4,A", - }, - }, - - // TODO: These are known deviations from the options{} Unicode Word Breaking - // Algorithm. - // { - // "und", - // "x_\u3031_x a4,4a", - // "X_\u3031_x A4,4a", // Currently is "X_\U3031_X A4,4A". - // "x_\u3031_x a4,4a", - // "X_\u3031_X A4,4A", - // options{}, - // }, - - 4: { - // Tests title options - lang: "und", - src: "abc aBc ABC abC İsıI o'Brien", - title: "Abc ABc ABC AbC İsıI O'Brien", - opts: getOpts(NoLower), - }, - - 5: { - lang: "el", - src: "aBc ΟΔΌΣ Οδός Σο ΣΟ Σ oΣ ΟΣ σ ἕξ \u03ac", - title: "Abc Οδός Οδός Σο Σο Σ Oς Ος Σ Ἕξ \u0386", - lower: "abc οδός οδός σο σο σ oς ος σ ἕξ \u03ac", - upper: "ABC ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ Σ OΣ ΟΣ Σ ΕΞ \u0391", // Uppercase removes accents - }, - - 6: { - lang: "tr az", - src: "Isiİ İsıI I\u0307sIiİ İsıI\u0307 I\u0300\u0307", - title: "Isii İsıı I\u0307sıii İsıi I\u0300\u0307", - lower: "ısii isıı isıii isıi \u0131\u0300\u0307", - upper: "ISİİ İSII I\u0307SIİİ İSII\u0307 I\u0300\u0307", - }, - - 7: { - lang: "lt", - src: "I Ï J J̈ Į Į̈ Ì Í Ĩ xi̇̈ xj̇̈ xį̇̈ xi̇̀ xi̇́ xi̇̃ XI XÏ XJ XJ̈ XĮ XĮ̈ XI̟̤", - title: "I Ï J J̈ Į Į̈ Ì Í Ĩ Xi̇̈ Xj̇̈ Xį̇̈ Xi̇̀ Xi̇́ Xi̇̃ Xi Xi̇̈ Xj Xj̇̈ Xį Xį̇̈ Xi̟̤", - lower: "i i̇̈ j j̇̈ į į̇̈ i̇̀ i̇́ i̇̃ xi̇̈ xj̇̈ xį̇̈ xi̇̀ xi̇́ xi̇̃ xi xi̇̈ xj xj̇̈ xį xį̇̈ xi̟̤", - upper: "I Ï J J̈ Į Į̈ Ì Í Ĩ XÏ XJ̈ XĮ̈ XÌ XÍ XĨ XI XÏ XJ XJ̈ XĮ XĮ̈ XI̟̤", - }, - - 8: { - lang: "lt", - src: "\u012e\u0300 \u00cc i\u0307\u0300 i\u0307\u0301 i\u0307\u0303 i\u0307\u0308 i\u0300\u0307", - title: "\u012e\u0300 \u00cc \u00cc \u00cd \u0128 \u00cf I\u0300\u0307", - lower: "\u012f\u0307\u0300 i\u0307\u0300 i\u0307\u0300 i\u0307\u0301 i\u0307\u0303 i\u0307\u0308 i\u0300\u0307", - upper: "\u012e\u0300 \u00cc \u00cc \u00cd \u0128 \u00cf I\u0300\u0307", - }, - - 9: { - lang: "nl", - src: "ijs IJs Ij Ijs İJ İJs aa aA 'ns 'S", - title: "IJs IJs IJ IJs İj İjs Aa Aa 'ns 's", - }, - - // Note: this specification is not currently part of CLDR. The same holds - // for the leading apostrophe handling for Dutch. - // See http://unicode.org/cldr/trac/ticket/7078. - 10: { - lang: "af", - src: "wag 'n bietjie", - title: "Wag 'n Bietjie", - lower: "wag 'n bietjie", - upper: "WAG 'N BIETJIE", - }, -} - -func TestCaseMappings(t *testing.T) { - for i, tt := range testCases { - src, ok := tt.src.([]string) - if !ok { - src = strings.Split(tt.src.(string), " ") - } - - for _, lang := range strings.Split(tt.lang, " ") { - tag := language.MustParse(lang) - testEntry := func(name string, mk func(language.Tag, options) transform.SpanningTransformer, gold interface{}) { - c := Caser{mk(tag, tt.opts)} - if gold != nil { - wants, ok := gold.([]string) - if !ok { - wants = strings.Split(gold.(string), " ") - } - for j, want := range wants { - if got := c.String(src[j]); got != want { - t.Errorf("%d:%s:\n%s.String(%+q):\ngot %+q;\nwant %+q", i, lang, name, src[j], got, want) - } - } - } - dst := make([]byte, 256) // big enough to hold any result - src := []byte(strings.Join(src, " ")) - v := testtext.AllocsPerRun(20, func() { - c.Transform(dst, src, true) - }) - if v > 1.1 { - t.Errorf("%d:%s:\n%s: number of allocs was %f; want 0", i, lang, name, v) - } - } - testEntry("Upper", makeUpper, tt.upper) - testEntry("Lower", makeLower, tt.lower) - testEntry("Title", makeTitle, tt.title) - } - } -} - -// TestAlloc tests that some mapping methods should not cause any allocation. -func TestAlloc(t *testing.T) { - dst := make([]byte, 256) // big enough to hold any result - src := []byte(txtNonASCII) - - for i, f := range []func() Caser{ - func() Caser { return Upper(language.Und) }, - func() Caser { return Lower(language.Und) }, - func() Caser { return Lower(language.Und, HandleFinalSigma(false)) }, - // TODO: use a shared copy for these casers as well, in order of - // importance, starting with the most important: - // func() Caser { return Title(language.Und) }, - // func() Caser { return Title(language.Und, HandleFinalSigma(false)) }, - } { - testtext.Run(t, "", func(t *testing.T) { - var c Caser - v := testtext.AllocsPerRun(10, func() { - c = f() - }) - if v > 0 { - // TODO: Right now only Upper has 1 allocation. Special-case Lower - // and Title as well to have less allocations for the root locale. - t.Errorf("%d:init: number of allocs was %f; want 0", i, v) - } - v = testtext.AllocsPerRun(2, func() { - c.Transform(dst, src, true) - }) - if v > 0 { - t.Errorf("%d:transform: number of allocs was %f; want 0", i, v) - } - }) - } -} - -func testHandover(t *testing.T, c Caser, src string) { - want := c.String(src) - // Find the common prefix. - pSrc := 0 - for ; pSrc < len(src) && pSrc < len(want) && want[pSrc] == src[pSrc]; pSrc++ { - } - - // Test handover for each substring of the prefix. - for i := 0; i < pSrc; i++ { - testtext.Run(t, fmt.Sprint("interleave/", i), func(t *testing.T) { - dst := make([]byte, 4*len(src)) - c.Reset() - nSpan, _ := c.Span([]byte(src[:i]), false) - copy(dst, src[:nSpan]) - nTransform, _, _ := c.Transform(dst[nSpan:], []byte(src[nSpan:]), true) - got := string(dst[:nSpan+nTransform]) - if got != want { - t.Errorf("full string: got %q; want %q", got, want) - } - }) - } -} - -func TestHandover(t *testing.T) { - testCases := []struct { - desc string - t Caser - first, second string - }{{ - "title/nosigma/single midword", - Title(language.Und, HandleFinalSigma(false)), - "A.", "a", - }, { - "title/nosigma/single midword", - Title(language.Und, HandleFinalSigma(false)), - "A", ".a", - }, { - "title/nosigma/double midword", - Title(language.Und, HandleFinalSigma(false)), - "A..", "a", - }, { - "title/nosigma/double midword", - Title(language.Und, HandleFinalSigma(false)), - "A.", ".a", - }, { - "title/nosigma/double midword", - Title(language.Und, HandleFinalSigma(false)), - "A", "..a", - }, { - "title/sigma/single midword", - Title(language.Und), - "ΟΣ.", "a", - }, { - "title/sigma/single midword", - Title(language.Und), - "ΟΣ", ".a", - }, { - "title/sigma/double midword", - Title(language.Und), - "ΟΣ..", "a", - }, { - "title/sigma/double midword", - Title(language.Und), - "ΟΣ.", ".a", - }, { - "title/sigma/double midword", - Title(language.Und), - "ΟΣ", "..a", - }, { - "title/af/leading apostrophe", - Title(language.Afrikaans), - "'", "n bietje", - }} - for _, tc := range testCases { - testtext.Run(t, tc.desc, func(t *testing.T) { - src := tc.first + tc.second - want := tc.t.String(src) - tc.t.Reset() - n, _ := tc.t.Span([]byte(tc.first), false) - - dst := make([]byte, len(want)) - copy(dst, tc.first[:n]) - - nDst, _, _ := tc.t.Transform(dst[n:], []byte(src[n:]), true) - got := string(dst[:n+nDst]) - if got != want { - t.Errorf("got %q; want %q", got, want) - } - }) - } -} - -// minBufSize is the size of the buffer by which the casing operation in -// this package are guaranteed to make progress. -const minBufSize = norm.MaxSegmentSize - -type bufferTest struct { - desc, src, want string - firstErr error - dstSize, srcSize int - t transform.SpanningTransformer -} - -var bufferTests []bufferTest - -func init() { - bufferTests = []bufferTest{{ - desc: "und/upper/short dst", - src: "abcdefg", - want: "ABCDEFG", - firstErr: transform.ErrShortDst, - dstSize: 3, - srcSize: minBufSize, - t: Upper(language.Und), - }, { - desc: "und/upper/short src", - src: "123é56", - want: "123É56", - firstErr: transform.ErrShortSrc, - dstSize: 4, - srcSize: 4, - t: Upper(language.Und), - }, { - desc: "und/upper/no error on short", - src: "12", - want: "12", - firstErr: nil, - dstSize: 1, - srcSize: 1, - t: Upper(language.Und), - }, { - desc: "und/lower/short dst", - src: "ABCDEFG", - want: "abcdefg", - firstErr: transform.ErrShortDst, - dstSize: 3, - srcSize: minBufSize, - t: Lower(language.Und), - }, { - desc: "und/lower/short src", - src: "123É56", - want: "123é56", - firstErr: transform.ErrShortSrc, - dstSize: 4, - srcSize: 4, - t: Lower(language.Und), - }, { - desc: "und/lower/no error on short", - src: "12", - want: "12", - firstErr: nil, - dstSize: 1, - srcSize: 1, - t: Lower(language.Und), - }, { - desc: "und/lower/simple (no final sigma)", - src: "ΟΣ ΟΣΣ", - want: "οσ οσσ", - dstSize: minBufSize, - srcSize: minBufSize, - t: Lower(language.Und, HandleFinalSigma(false)), - }, { - desc: "und/title/simple (no final sigma)", - src: "ΟΣ ΟΣΣ", - want: "Οσ Οσσ", - dstSize: minBufSize, - srcSize: minBufSize, - t: Title(language.Und, HandleFinalSigma(false)), - }, { - desc: "und/title/final sigma: no error", - src: "ΟΣ", - want: "Ος", - dstSize: minBufSize, - srcSize: minBufSize, - t: Title(language.Und), - }, { - desc: "und/title/final sigma: short source", - src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ", - want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς", - firstErr: transform.ErrShortSrc, - dstSize: minBufSize, - srcSize: 10, - t: Title(language.Und), - }, { - desc: "und/title/final sigma: short destination 1", - src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ", - want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς", - firstErr: transform.ErrShortDst, - dstSize: 10, - srcSize: minBufSize, - t: Title(language.Und), - }, { - desc: "und/title/final sigma: short destination 2", - src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ", - want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς", - firstErr: transform.ErrShortDst, - dstSize: 9, - srcSize: minBufSize, - t: Title(language.Und), - }, { - desc: "und/title/final sigma: short destination 3", - src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ", - want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς", - firstErr: transform.ErrShortDst, - dstSize: 8, - srcSize: minBufSize, - t: Title(language.Und), - }, { - desc: "und/title/clipped UTF-8 rune", - src: "σσσσσσσσσσσ", - want: "Σσσσσσσσσσσ", - firstErr: transform.ErrShortSrc, - dstSize: minBufSize, - srcSize: 5, - t: Title(language.Und), - }, { - desc: "und/title/clipped UTF-8 rune atEOF", - src: "σσσ" + string([]byte{0xCF}), - want: "Σσσ" + string([]byte{0xCF}), - dstSize: minBufSize, - srcSize: minBufSize, - t: Title(language.Und), - }, { - // Note: the choice to change the final sigma at the end in case of - // too many case ignorables is arbitrary. The main reason for this - // choice is that it results in simpler code. - desc: "und/title/final sigma: max ignorables", - src: "ΟΣ" + strings.Repeat(".", maxIgnorable) + "a", - want: "Οσ" + strings.Repeat(".", maxIgnorable) + "A", - dstSize: minBufSize, - srcSize: minBufSize, - t: Title(language.Und), - }, { - // Note: the choice to change the final sigma at the end in case of - // too many case ignorables is arbitrary. The main reason for this - // choice is that it results in simpler code. - desc: "und/title/long string", - src: "AA" + strings.Repeat(".", maxIgnorable+1) + "a", - want: "Aa" + strings.Repeat(".", maxIgnorable+1) + "A", - dstSize: minBufSize, - srcSize: len("AA" + strings.Repeat(".", maxIgnorable+1)), - t: Title(language.Und), - }, { - // Note: the choice to change the final sigma at the end in case of - // too many case ignorables is arbitrary. The main reason for this - // choice is that it results in simpler code. - desc: "und/title/final sigma: too many ignorables", - src: "ΟΣ" + strings.Repeat(".", maxIgnorable+1) + "a", - want: "Ος" + strings.Repeat(".", maxIgnorable+1) + "A", - dstSize: minBufSize, - srcSize: len("ΟΣ" + strings.Repeat(".", maxIgnorable+1)), - t: Title(language.Und), - }, { - desc: "und/title/final sigma: apostrophe", - src: "ΟΣ''a", - want: "Οσ''A", - dstSize: minBufSize, - srcSize: minBufSize, - t: Title(language.Und), - }, { - desc: "el/upper/max ignorables", - src: "ο" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0313", - want: "Ο" + strings.Repeat("\u0321", maxIgnorable-1), - dstSize: minBufSize, - srcSize: minBufSize, - t: Upper(language.Greek), - }, { - desc: "el/upper/too many ignorables", - src: "ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313", - want: "Ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313", - dstSize: minBufSize, - srcSize: len("ο" + strings.Repeat("\u0321", maxIgnorable)), - t: Upper(language.Greek), - }, { - desc: "el/upper/short dst", - src: "123ο", - want: "123Ο", - firstErr: transform.ErrShortDst, - dstSize: 3, - srcSize: minBufSize, - t: Upper(language.Greek), - }, { - desc: "lt/lower/max ignorables", - src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300", - want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300", - dstSize: minBufSize, - srcSize: minBufSize, - t: Lower(language.Lithuanian), - }, { - desc: "lt/lower/too many ignorables", - src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0300", - want: "i" + strings.Repeat("\u0321", maxIgnorable) + "\u0300", - dstSize: minBufSize, - srcSize: len("I" + strings.Repeat("\u0321", maxIgnorable)), - t: Lower(language.Lithuanian), - }, { - desc: "lt/lower/decomposition with short dst buffer 1", - src: "aaaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE - firstErr: transform.ErrShortDst, - want: "aaaaai\u0307\u0300", - dstSize: 5, - srcSize: minBufSize, - t: Lower(language.Lithuanian), - }, { - desc: "lt/lower/decomposition with short dst buffer 2", - src: "aaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE - firstErr: transform.ErrShortDst, - want: "aaaai\u0307\u0300", - dstSize: 5, - srcSize: minBufSize, - t: Lower(language.Lithuanian), - }, { - desc: "lt/upper/max ignorables", - src: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300", - want: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300", - dstSize: minBufSize, - srcSize: minBufSize, - t: Upper(language.Lithuanian), - }, { - desc: "lt/upper/too many ignorables", - src: "i" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300", - want: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300", - dstSize: minBufSize, - srcSize: len("i" + strings.Repeat("\u0321", maxIgnorable)), - t: Upper(language.Lithuanian), - }, { - desc: "lt/upper/short dst", - src: "12i\u0307\u0300", - want: "12\u00cc", - firstErr: transform.ErrShortDst, - dstSize: 3, - srcSize: minBufSize, - t: Upper(language.Lithuanian), - }, { - desc: "aztr/lower/max ignorables", - src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300", - want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300", - dstSize: minBufSize, - srcSize: minBufSize, - t: Lower(language.Turkish), - }, { - desc: "aztr/lower/too many ignorables", - src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300", - want: "\u0131" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300", - dstSize: minBufSize, - srcSize: len("I" + strings.Repeat("\u0321", maxIgnorable)), - t: Lower(language.Turkish), - }, { - desc: "nl/title/pre-IJ cutoff", - src: " ij", - want: " IJ", - firstErr: transform.ErrShortDst, - dstSize: 2, - srcSize: minBufSize, - t: Title(language.Dutch), - }, { - desc: "nl/title/mid-IJ cutoff", - src: " ij", - want: " IJ", - firstErr: transform.ErrShortDst, - dstSize: 3, - srcSize: minBufSize, - t: Title(language.Dutch), - }, { - desc: "af/title/apostrophe", - src: "'n bietje", - want: "'n Bietje", - firstErr: transform.ErrShortDst, - dstSize: 3, - srcSize: minBufSize, - t: Title(language.Afrikaans), - }} -} - -func TestShortBuffersAndOverflow(t *testing.T) { - for i, tt := range bufferTests { - testtext.Run(t, tt.desc, func(t *testing.T) { - buf := make([]byte, tt.dstSize) - got := []byte{} - var nSrc, nDst int - var err error - for p := 0; p < len(tt.src); p += nSrc { - q := p + tt.srcSize - if q > len(tt.src) { - q = len(tt.src) - } - nDst, nSrc, err = tt.t.Transform(buf, []byte(tt.src[p:q]), q == len(tt.src)) - got = append(got, buf[:nDst]...) - - if p == 0 && err != tt.firstErr { - t.Errorf("%d:%s:\n error was %v; want %v", i, tt.desc, err, tt.firstErr) - break - } - } - if string(got) != tt.want { - t.Errorf("%d:%s:\ngot %+q;\nwant %+q", i, tt.desc, got, tt.want) - } - testHandover(t, Caser{tt.t}, tt.src) - }) - } -} - -func TestSpan(t *testing.T) { - for _, tt := range []struct { - desc string - src string - want string - atEOF bool - err error - t Caser - }{{ - desc: "und/upper/basic", - src: "abcdefg", - want: "", - atEOF: true, - err: transform.ErrEndOfSpan, - t: Upper(language.Und), - }, { - desc: "und/upper/short src", - src: "123É"[:4], - want: "123", - atEOF: false, - err: transform.ErrShortSrc, - t: Upper(language.Und), - }, { - desc: "und/upper/no error on short", - src: "12", - want: "12", - atEOF: false, - t: Upper(language.Und), - }, { - desc: "und/lower/basic", - src: "ABCDEFG", - want: "", - atEOF: true, - err: transform.ErrEndOfSpan, - t: Lower(language.Und), - }, { - desc: "und/lower/short src num", - src: "123é"[:4], - want: "123", - atEOF: false, - err: transform.ErrShortSrc, - t: Lower(language.Und), - }, { - desc: "und/lower/short src greek", - src: "αβγé"[:7], - want: "αβγ", - atEOF: false, - err: transform.ErrShortSrc, - t: Lower(language.Und), - }, { - desc: "und/lower/no error on short", - src: "12", - want: "12", - atEOF: false, - t: Lower(language.Und), - }, { - desc: "und/lower/simple (no final sigma)", - src: "ος οσσ", - want: "οσ οσσ", - atEOF: true, - t: Lower(language.Und, HandleFinalSigma(false)), - }, { - desc: "und/title/simple (no final sigma)", - src: "Οσ Οσσ", - want: "Οσ Οσσ", - atEOF: true, - t: Title(language.Und, HandleFinalSigma(false)), - }, { - desc: "und/lower/final sigma: no error", - src: "οΣ", // Oς - want: "ο", // Oς - err: transform.ErrEndOfSpan, - t: Lower(language.Und), - }, { - desc: "und/title/final sigma: no error", - src: "ΟΣ", // Oς - want: "Ο", // Oς - err: transform.ErrEndOfSpan, - t: Title(language.Und), - }, { - desc: "und/title/final sigma: no short source!", - src: "ΟσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσΣ", - want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσ", - err: transform.ErrEndOfSpan, - t: Title(language.Und), - }, { - desc: "und/title/clipped UTF-8 rune", - src: "Σσ" + string([]byte{0xCF}), - want: "Σσ", - atEOF: false, - err: transform.ErrShortSrc, - t: Title(language.Und), - }, { - desc: "und/title/clipped UTF-8 rune atEOF", - src: "Σσσ" + string([]byte{0xCF}), - want: "Σσσ" + string([]byte{0xCF}), - atEOF: true, - t: Title(language.Und), - }, { - // Note: the choice to change the final sigma at the end in case of - // too many case ignorables is arbitrary. The main reason for this - // choice is that it results in simpler code. - desc: "und/title/long string", - src: "A" + strings.Repeat("a", maxIgnorable+5), - want: "A" + strings.Repeat("a", maxIgnorable+5), - t: Title(language.Und), - }, { - // Note: the choice to change the final sigma at the end in case of - // too many case ignorables is arbitrary. The main reason for this - // choice is that it results in simpler code. - desc: "und/title/cyrillic", - src: "При", - want: "При", - atEOF: true, - t: Title(language.Und, HandleFinalSigma(false)), - }, { - // Note: the choice to change the final sigma at the end in case of - // too many case ignorables is arbitrary. The main reason for this - // choice is that it results in simpler code. - desc: "und/title/final sigma: max ignorables", - src: "Οσ" + strings.Repeat(".", maxIgnorable) + "A", - want: "Οσ" + strings.Repeat(".", maxIgnorable) + "A", - t: Title(language.Und), - }, { - desc: "el/upper/max ignorables - not implemented", - src: "Ο" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0313", - want: "", - err: transform.ErrEndOfSpan, - t: Upper(language.Greek), - }, { - desc: "el/upper/too many ignorables - not implemented", - src: "Ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313", - want: "", - err: transform.ErrEndOfSpan, - t: Upper(language.Greek), - }, { - desc: "el/upper/short dst", - src: "123ο", - want: "", - err: transform.ErrEndOfSpan, - t: Upper(language.Greek), - }, { - desc: "lt/lower/max ignorables", - src: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300", - want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300", - t: Lower(language.Lithuanian), - }, { - desc: "lt/lower/isLower", - src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0300", - want: "", - err: transform.ErrEndOfSpan, - t: Lower(language.Lithuanian), - }, { - desc: "lt/lower/not identical", - src: "aaaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE - err: transform.ErrEndOfSpan, - want: "aaaaa", - t: Lower(language.Lithuanian), - }, { - desc: "lt/lower/identical", - src: "aaaai\u0307\u0300", // U+00CC LATIN CAPITAL LETTER I GRAVE - want: "aaaai\u0307\u0300", - t: Lower(language.Lithuanian), - }, { - desc: "lt/upper/not implemented", - src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300", - want: "", - err: transform.ErrEndOfSpan, - t: Upper(language.Lithuanian), - }, { - desc: "lt/upper/not implemented, ascii", - src: "AB", - want: "", - err: transform.ErrEndOfSpan, - t: Upper(language.Lithuanian), - }, { - desc: "nl/title/pre-IJ cutoff", - src: " IJ", - want: " IJ", - t: Title(language.Dutch), - }, { - desc: "nl/title/mid-IJ cutoff", - src: " Ia", - want: " Ia", - t: Title(language.Dutch), - }, { - desc: "af/title/apostrophe", - src: "'n Bietje", - want: "'n Bietje", - t: Title(language.Afrikaans), - }, { - desc: "af/title/apostrophe-incorrect", - src: "'N Bietje", - // The Single_Quote (a MidWord), needs to be retained as unspanned so - // that a successive call to Transform can detect that N should not be - // capitalized. - want: "", - err: transform.ErrEndOfSpan, - t: Title(language.Afrikaans), - }} { - testtext.Run(t, tt.desc, func(t *testing.T) { - for p := 0; p < len(tt.want); p += utf8.RuneLen([]rune(tt.src[p:])[0]) { - tt.t.Reset() - n, err := tt.t.Span([]byte(tt.src[:p]), false) - if err != nil && err != transform.ErrShortSrc { - t.Errorf("early failure:Span(%+q): %v (%d < %d)", tt.src[:p], err, n, len(tt.want)) - break - } - } - tt.t.Reset() - n, err := tt.t.Span([]byte(tt.src), tt.atEOF) - if n != len(tt.want) || err != tt.err { - t.Errorf("Span(%+q, %v): got %d, %v; want %d, %v", tt.src, tt.atEOF, n, err, len(tt.want), tt.err) - } - testHandover(t, tt.t, tt.src) - }) - } -} - -var txtASCII = strings.Repeat("The quick brown fox jumps over the lazy dog. ", 50) - -// Taken from http://creativecommons.org/licenses/by-sa/3.0/vn/ -const txt_vn = `Với các điều kiện sau: Ghi nhận công của tác giả. Nếu bạn sử -dụng, chuyển đổi, hoặc xây dựng dự án từ nội dung được chia sẻ này, bạn phải áp -dụng giấy phép này hoặc một giấy phép khác có các điều khoản tương tự như giấy -phép này cho dự án của bạn. Hiểu rằng: Miễn — Bất kỳ các điều kiện nào trên đây -cũng có thể được miễn bỏ nếu bạn được sự cho phép của người sở hữu bản quyền. -Phạm vi công chúng — Khi tác phẩm hoặc bất kỳ chương nào của tác phẩm đã trong -vùng dành cho công chúng theo quy định của pháp luật thì tình trạng của nó không -bị ảnh hưởng bởi giấy phép trong bất kỳ trường hợp nào.` - -// http://creativecommons.org/licenses/by-sa/2.5/cn/ -const txt_cn = `您可以自由: 复制、发行、展览、表演、放映、 -广播或通过信息网络传播本作品 创作演绎作品 -对本作品进行商业性使用 惟须遵守下列条件: -署名 — 您必须按照作者或者许可人指定的方式对作品进行署名。 -相同方式共享 — 如果您改变、转换本作品或者以本作品为基础进行创作, -您只能采用与本协议相同的许可协议发布基于本作品的演绎作品。` - -// Taken from http://creativecommons.org/licenses/by-sa/1.0/deed.ru -const txt_ru = `При обязательном соблюдении следующих условий: Attribution — Вы -должны атрибутировать произведение (указывать автора и источник) в порядке, -предусмотренном автором или лицензиаром (но только так, чтобы никоим образом не -подразумевалось, что они поддерживают вас или использование вами данного -произведения). Υπό τις ακόλουθες προϋποθέσεις:` - -// Taken from http://creativecommons.org/licenses/by-sa/3.0/gr/ -const txt_gr = `Αναφορά Δημιουργού — Θα πρέπει να κάνετε την αναφορά στο έργο με -τον τρόπο που έχει οριστεί από το δημιουργό ή το χορηγούντο την άδεια (χωρίς -όμως να εννοείται με οποιονδήποτε τρόπο ότι εγκρίνουν εσάς ή τη χρήση του έργου -από εσάς). Παρόμοια Διανομή — Εάν αλλοιώσετε, τροποποιήσετε ή δημιουργήσετε -περαιτέρω βασισμένοι στο έργο θα μπορείτε να διανέμετε το έργο που θα προκύψει -μόνο με την ίδια ή παρόμοια άδεια.` - -const txtNonASCII = txt_vn + txt_cn + txt_ru + txt_gr - -// TODO: Improve ASCII performance. - -func BenchmarkCasers(b *testing.B) { - for _, s := range []struct{ name, text string }{ - {"ascii", txtASCII}, - {"nonASCII", txtNonASCII}, - {"short", "При"}, - } { - src := []byte(s.text) - // Measure case mappings in bytes package for comparison. - for _, f := range []struct { - name string - fn func(b []byte) []byte - }{ - {"lower", bytes.ToLower}, - {"title", bytes.ToTitle}, - {"upper", bytes.ToUpper}, - } { - testtext.Bench(b, path.Join(s.name, "bytes", f.name), func(b *testing.B) { - b.SetBytes(int64(len(src))) - for i := 0; i < b.N; i++ { - f.fn(src) - } - }) - } - for _, t := range []struct { - name string - caser transform.SpanningTransformer - }{ - {"fold/default", Fold()}, - {"upper/default", Upper(language.Und)}, - {"lower/sigma", Lower(language.Und)}, - {"lower/simple", Lower(language.Und, HandleFinalSigma(false))}, - {"title/sigma", Title(language.Und)}, - {"title/simple", Title(language.Und, HandleFinalSigma(false))}, - } { - c := Caser{t.caser} - dst := make([]byte, len(src)) - testtext.Bench(b, path.Join(s.name, t.name, "transform"), func(b *testing.B) { - b.SetBytes(int64(len(src))) - for i := 0; i < b.N; i++ { - c.Reset() - c.Transform(dst, src, true) - } - }) - // No need to check span for simple cases, as they will be the same - // as sigma. - if strings.HasSuffix(t.name, "/simple") { - continue - } - spanSrc := c.Bytes(src) - testtext.Bench(b, path.Join(s.name, t.name, "span"), func(b *testing.B) { - c.Reset() - if n, _ := c.Span(spanSrc, true); n < len(spanSrc) { - b.Fatalf("spanner is not recognizing text %q as done (at %d)", spanSrc, n) - } - b.SetBytes(int64(len(spanSrc))) - for i := 0; i < b.N; i++ { - c.Reset() - c.Span(spanSrc, true) - } - }) - } - } -} -- cgit v1.2.3