vendor/golang.org/x/text/internal/export/idna/idna9.0.0_test.go


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136

// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build !go1.10

package idna

import "testing"

// TestLabelErrors tests strings returned in case of error. All results should
// be identical to the reference implementation and can be verified at
// http://unicode.org/cldr/utility/idna.jsp. The reference implementation,
// however, seems to not display Bidi and ContextJ errors.
//
// In some cases the behavior of browsers is added as a comment. In all cases,
// whenever a resolve search returns an error here, Chrome will treat the input
// string as a search string (including those for Bidi and Context J errors),
// unless noted otherwise.
func TestLabelErrors(t *testing.T) {
	encode := func(s string) string { s, _ = encode(acePrefix, s); return s }
	type kind struct {
		name string
		f    func(string) (string, error)
	}
	punyA := kind{"PunycodeA", punycode.ToASCII}
	resolve := kind{"ResolveA", Lookup.ToASCII}
	display := kind{"ToUnicode", Display.ToUnicode}
	p := New(VerifyDNSLength(true), MapForLookup(), BidiRule())
	lengthU := kind{"CheckLengthU", p.ToUnicode}
	lengthA := kind{"CheckLengthA", p.ToASCII}
	p = New(MapForLookup(), StrictDomainName(false))
	std3 := kind{"STD3", p.ToASCII}

	testCases := []struct {
		kind
		input   string
		want    string
		wantErr string
	}{
		{lengthU, "", "", "A4"}, // From UTS 46 conformance test.
		{lengthA, "", "", "A4"},

		{lengthU, "xn--", "", "A4"},
		{lengthU, "foo.xn--", "foo.", "A4"}, // TODO: is dropping xn-- correct?
		{lengthU, "xn--.foo", ".foo", "A4"},
		{lengthU, "foo.xn--.bar", "foo..bar", "A4"},

		{display, "xn--", "", ""},
		{display, "foo.xn--", "foo.", ""}, // TODO: is dropping xn-- correct?
		{display, "xn--.foo", ".foo", ""},
		{display, "foo.xn--.bar", "foo..bar", ""},

		{lengthA, "a..b", "a..b", "A4"},
		{punyA, ".b", ".b", ""},
		// For backwards compatibility, the Punycode profile does not map runes.
		{punyA, "\u3002b", "xn--b-83t", ""},
		{punyA, "..b", "..b", ""},
		// Only strip leading empty labels for certain profiles. Stripping
		// leading empty labels here but not for "empty" punycode above seems
		// inconsistent, but seems to be applied by both the conformance test
		// and Chrome. So we turn it off by default, support it as an option,
		// and enable it in profiles where it seems commonplace.
		{lengthA, ".b", "b", ""},
		{lengthA, "\u3002b", "b", ""},
		{lengthA, "..b", "b", ""},
		{lengthA, "b..", "b..", ""},

		{resolve, "a..b", "a..b", ""},
		{resolve, ".b", "b", ""},
		{resolve, "\u3002b", "b", ""},
		{resolve, "..b", "b", ""},
		{resolve, "b..", "b..", ""},

		// Raw punycode
		{punyA, "", "", ""},
		{punyA, "*.foo.com", "*.foo.com", ""},
		{punyA, "Foo.com", "Foo.com", ""},

		// STD3 rules
		{display, "*.foo.com", "*.foo.com", "P1"},
		{std3, "*.foo.com", "*.foo.com", ""},

		// Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of
		// Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return
		// lab9.be.
		{resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be")
		{display, "lab⒐be", "lab⒐be", "P1"},

		{resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de"
		{display, "Plan⒐faß.de", "plan⒐faß.de", "P1"},

		// Chrome 54.0 recognizes the error and treats this input verbatim as a
		// search string.
		// Safari 10.0 (non-conform spec) decomposes "⒈" and computes the
		// punycode on the result using transitional mapping.
		// Firefox 49.0.1 goes haywire on this string and prints a bunch of what
		// seems to be nested punycode encodings.
		{resolve, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"},
		{display, "日本⒈co.ßßß.de", "日本⒈co.ßßß.de", "P1"},

		{resolve, "a\u200Cb", "ab", ""},
		{display, "a\u200Cb", "a\u200Cb", "C"},

		{resolve, encode("a\u200Cb"), encode("a\u200Cb"), "C"},
		{display, "a\u200Cb", "a\u200Cb", "C"},

		{resolve, "grﻋﺮﺑﻲ.de", "xn--gr-gtd9a1b0g.de", "B"},
		{
			// Notice how the string gets transformed, even with an error.
			// Chrome will use the original string if it finds an error, so not
			// the transformed one.
			display,
			"gr\ufecb\ufeae\ufe91\ufef2.de",
			"gr\u0639\u0631\u0628\u064a.de",
			"B",
		},

		{resolve, "\u0671.\u03c3\u07dc", "xn--qib.xn--4xa21s", "B"}, // ٱ.σߜ
		{display, "\u0671.\u03c3\u07dc", "\u0671.\u03c3\u07dc", "B"},

		// normalize input
		{resolve, "a\u0323\u0322", "xn--jta191l", ""}, // ạ̢
		{display, "a\u0323\u0322", "\u1ea1\u0322", ""},

		// Non-normalized strings are not normalized when they originate from
		// punycode. Despite the error, Chrome, Safari and Firefox will attempt
		// to look up the input punycode.
		{resolve, encode("a\u0323\u0322") + ".com", "xn--a-tdbc.com", "V1"},
		{display, encode("a\u0323\u0322") + ".com", "a\u0323\u0322.com", "V1"},
	}

	for _, tc := range testCases {
		doTest(t, tc.f, tc.name, tc.input, tc.want, tc.wantErr)
	}
}