From 473acc61c8392dc7ae303d91568e179c4f105a76 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Tue, 2 Jul 2019 12:12:53 +0200 Subject: add black list --- .../x/text/encoding/internal/enctest/enctest.go | 180 --------------------- .../x/text/encoding/internal/identifier/gen.go | 7 +- .../encoding/internal/identifier/identifier.go | 2 +- .../x/text/encoding/internal/identifier/mib.go | 96 ++++++----- 4 files changed, 54 insertions(+), 231 deletions(-) delete mode 100644 vendor/golang.org/x/text/encoding/internal/enctest/enctest.go (limited to 'vendor/golang.org/x/text/encoding/internal') diff --git a/vendor/golang.org/x/text/encoding/internal/enctest/enctest.go b/vendor/golang.org/x/text/encoding/internal/enctest/enctest.go deleted file mode 100644 index 0cccae0..0000000 --- a/vendor/golang.org/x/text/encoding/internal/enctest/enctest.go +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package enctest - -import ( - "bytes" - "fmt" - "io" - "io/ioutil" - "strings" - "testing" - - "golang.org/x/text/encoding" - "golang.org/x/text/encoding/internal/identifier" - "golang.org/x/text/transform" -) - -// Encoder or Decoder -type Transcoder interface { - transform.Transformer - Bytes([]byte) ([]byte, error) - String(string) (string, error) -} - -func TestEncoding(t *testing.T, e encoding.Encoding, encoded, utf8, prefix, suffix string) { - for _, direction := range []string{"Decode", "Encode"} { - t.Run(fmt.Sprintf("%v/%s", e, direction), func(t *testing.T) { - - var coder Transcoder - var want, src, wPrefix, sPrefix, wSuffix, sSuffix string - if direction == "Decode" { - coder, want, src = e.NewDecoder(), utf8, encoded - wPrefix, sPrefix, wSuffix, sSuffix = "", prefix, "", suffix - } else { - coder, want, src = e.NewEncoder(), encoded, utf8 - wPrefix, sPrefix, wSuffix, sSuffix = prefix, "", suffix, "" - } - - dst := make([]byte, len(wPrefix)+len(want)+len(wSuffix)) - nDst, nSrc, err := coder.Transform(dst, []byte(sPrefix+src+sSuffix), true) - if err != nil { - t.Fatal(err) - } - if nDst != len(wPrefix)+len(want)+len(wSuffix) { - t.Fatalf("nDst got %d, want %d", - nDst, len(wPrefix)+len(want)+len(wSuffix)) - } - if nSrc != len(sPrefix)+len(src)+len(sSuffix) { - t.Fatalf("nSrc got %d, want %d", - nSrc, len(sPrefix)+len(src)+len(sSuffix)) - } - if got := string(dst); got != wPrefix+want+wSuffix { - t.Fatalf("\ngot %q\nwant %q", got, wPrefix+want+wSuffix) - } - - for _, n := range []int{0, 1, 2, 10, 123, 4567} { - input := sPrefix + strings.Repeat(src, n) + sSuffix - g, err := coder.String(input) - if err != nil { - t.Fatalf("Bytes: n=%d: %v", n, err) - } - if len(g) == 0 && len(input) == 0 { - // If the input is empty then the output can be empty, - // regardless of whatever wPrefix is. - continue - } - got1, want1 := string(g), wPrefix+strings.Repeat(want, n)+wSuffix - if got1 != want1 { - t.Fatalf("ReadAll: n=%d\ngot %q\nwant %q", - n, trim(got1), trim(want1)) - } - } - }) - } -} - -func TestFile(t *testing.T, e encoding.Encoding) { - for _, dir := range []string{"Decode", "Encode"} { - t.Run(fmt.Sprintf("%s/%s", e, dir), func(t *testing.T) { - dst, src, transformer, err := load(dir, e) - if err != nil { - t.Fatalf("load: %v", err) - } - buf, err := transformer.Bytes(src) - if err != nil { - t.Fatalf("transform: %v", err) - } - if !bytes.Equal(buf, dst) { - t.Error("transformed bytes did not match golden file") - } - }) - } -} - -func Benchmark(b *testing.B, enc encoding.Encoding) { - for _, direction := range []string{"Decode", "Encode"} { - b.Run(fmt.Sprintf("%s/%s", enc, direction), func(b *testing.B) { - _, src, transformer, err := load(direction, enc) - if err != nil { - b.Fatal(err) - } - b.SetBytes(int64(len(src))) - b.ResetTimer() - for i := 0; i < b.N; i++ { - r := transform.NewReader(bytes.NewReader(src), transformer) - io.Copy(ioutil.Discard, r) - } - }) - } -} - -// testdataFiles are files in testdata/*.txt. -var testdataFiles = []struct { - mib identifier.MIB - basename, ext string -}{ - {identifier.Windows1252, "candide", "windows-1252"}, - {identifier.EUCPkdFmtJapanese, "rashomon", "euc-jp"}, - {identifier.ISO2022JP, "rashomon", "iso-2022-jp"}, - {identifier.ShiftJIS, "rashomon", "shift-jis"}, - {identifier.EUCKR, "unsu-joh-eun-nal", "euc-kr"}, - {identifier.GBK, "sunzi-bingfa-simplified", "gbk"}, - {identifier.HZGB2312, "sunzi-bingfa-gb-levels-1-and-2", "hz-gb2312"}, - {identifier.Big5, "sunzi-bingfa-traditional", "big5"}, - {identifier.UTF16LE, "candide", "utf-16le"}, - {identifier.UTF8, "candide", "utf-8"}, - {identifier.UTF32BE, "candide", "utf-32be"}, - - // GB18030 is a superset of GBK and is nominally a Simplified Chinese - // encoding, but it can also represent the entire Basic Multilingual - // Plane, including codepoints like 'รข' that aren't encodable by GBK. - // GB18030 on Simplified Chinese should perform similarly to GBK on - // Simplified Chinese. GB18030 on "candide" is more interesting. - {identifier.GB18030, "candide", "gb18030"}, -} - -func load(direction string, enc encoding.Encoding) ([]byte, []byte, Transcoder, error) { - basename, ext, count := "", "", 0 - for _, tf := range testdataFiles { - if mib, _ := enc.(identifier.Interface).ID(); tf.mib == mib { - basename, ext = tf.basename, tf.ext - count++ - } - } - if count != 1 { - if count == 0 { - return nil, nil, nil, fmt.Errorf("no testdataFiles for %s", enc) - } - return nil, nil, nil, fmt.Errorf("too many testdataFiles for %s", enc) - } - dstFile := fmt.Sprintf("../testdata/%s-%s.txt", basename, ext) - srcFile := fmt.Sprintf("../testdata/%s-utf-8.txt", basename) - var coder Transcoder = encoding.ReplaceUnsupported(enc.NewEncoder()) - if direction == "Decode" { - dstFile, srcFile = srcFile, dstFile - coder = enc.NewDecoder() - } - dst, err := ioutil.ReadFile(dstFile) - if err != nil { - if dst, err = ioutil.ReadFile("../" + dstFile); err != nil { - return nil, nil, nil, err - } - } - src, err := ioutil.ReadFile(srcFile) - if err != nil { - if src, err = ioutil.ReadFile("../" + srcFile); err != nil { - return nil, nil, nil, err - } - } - return dst, src, coder, nil -} - -func trim(s string) string { - if len(s) < 120 { - return s - } - return s[:50] + "..." + s[len(s)-50:] -} diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/gen.go b/vendor/golang.org/x/text/encoding/internal/identifier/gen.go index 0c8eba7..26cfef9 100644 --- a/vendor/golang.org/x/text/encoding/internal/identifier/gen.go +++ b/vendor/golang.org/x/text/encoding/internal/identifier/gen.go @@ -109,7 +109,12 @@ func main() { use = use || a.Value != "person" } if a.Name.Local == "data" && use { - attr = a.Value + " " + // Patch up URLs to use https. From some links, the + // https version is different from the http one. + s := a.Value + s = strings.Replace(s, "http://", "https://", -1) + s = strings.Replace(s, "/unicode/", "/", -1) + attr = s + " " } } } diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/identifier.go b/vendor/golang.org/x/text/encoding/internal/identifier/identifier.go index 7351b4e..5c9b85c 100644 --- a/vendor/golang.org/x/text/encoding/internal/identifier/identifier.go +++ b/vendor/golang.org/x/text/encoding/internal/identifier/identifier.go @@ -34,7 +34,7 @@ package identifier // - http://www.iana.org/assignments/character-sets/character-sets.xhtml // - http://www.iana.org/assignments/ianacharset-mib/ianacharset-mib // - http://www.ietf.org/rfc/rfc2978.txt -// - http://www.unicode.org/reports/tr22/ +// - https://www.unicode.org/reports/tr22/ // - http://www.w3.org/TR/encoding/ // - https://encoding.spec.whatwg.org/ // - https://encoding.spec.whatwg.org/encodings.json diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/mib.go b/vendor/golang.org/x/text/encoding/internal/identifier/mib.go index 768842b..fc7df1b 100644 --- a/vendor/golang.org/x/text/encoding/internal/identifier/mib.go +++ b/vendor/golang.org/x/text/encoding/internal/identifier/mib.go @@ -538,8 +538,6 @@ const ( // ISO111ECMACyrillic is the MIB identifier with IANA name ECMA-cyrillic. // // ISO registry - // (formerly ECMA - // registry ) ISO111ECMACyrillic MIB = 77 // ISO121Canadian1 is the MIB identifier with IANA name CSA_Z243.4-1985-1. @@ -732,18 +730,18 @@ const ( // ISO885913 is the MIB identifier with IANA name ISO-8859-13. // - // ISO See http://www.iana.org/assignments/charset-reg/ISO-8859-13 http://www.iana.org/assignments/charset-reg/ISO-8859-13 + // ISO See https://www.iana.org/assignments/charset-reg/ISO-8859-13 https://www.iana.org/assignments/charset-reg/ISO-8859-13 ISO885913 MIB = 109 // ISO885914 is the MIB identifier with IANA name ISO-8859-14. // - // ISO See http://www.iana.org/assignments/charset-reg/ISO-8859-14 + // ISO See https://www.iana.org/assignments/charset-reg/ISO-8859-14 ISO885914 MIB = 110 // ISO885915 is the MIB identifier with IANA name ISO-8859-15. // // ISO - // Please see: http://www.iana.org/assignments/charset-reg/ISO-8859-15 + // Please see: https://www.iana.org/assignments/charset-reg/ISO-8859-15 ISO885915 MIB = 111 // ISO885916 is the MIB identifier with IANA name ISO-8859-16. @@ -754,41 +752,41 @@ const ( // GBK is the MIB identifier with IANA name GBK. // // Chinese IT Standardization Technical Committee - // Please see: http://www.iana.org/assignments/charset-reg/GBK + // Please see: https://www.iana.org/assignments/charset-reg/GBK GBK MIB = 113 // GB18030 is the MIB identifier with IANA name GB18030. // // Chinese IT Standardization Technical Committee - // Please see: http://www.iana.org/assignments/charset-reg/GB18030 + // Please see: https://www.iana.org/assignments/charset-reg/GB18030 GB18030 MIB = 114 // OSDEBCDICDF0415 is the MIB identifier with IANA name OSD_EBCDIC_DF04_15. // // Fujitsu-Siemens standard mainframe EBCDIC encoding - // Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15 + // Please see: https://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15 OSDEBCDICDF0415 MIB = 115 // OSDEBCDICDF03IRV is the MIB identifier with IANA name OSD_EBCDIC_DF03_IRV. // // Fujitsu-Siemens standard mainframe EBCDIC encoding - // Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV + // Please see: https://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV OSDEBCDICDF03IRV MIB = 116 // OSDEBCDICDF041 is the MIB identifier with IANA name OSD_EBCDIC_DF04_1. // // Fujitsu-Siemens standard mainframe EBCDIC encoding - // Please see: http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1 + // Please see: https://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1 OSDEBCDICDF041 MIB = 117 // ISO115481 is the MIB identifier with IANA name ISO-11548-1. // - // See http://www.iana.org/assignments/charset-reg/ISO-11548-1 + // See https://www.iana.org/assignments/charset-reg/ISO-11548-1 ISO115481 MIB = 118 // KZ1048 is the MIB identifier with IANA name KZ-1048. // - // See http://www.iana.org/assignments/charset-reg/KZ-1048 + // See https://www.iana.org/assignments/charset-reg/KZ-1048 KZ1048 MIB = 119 // Unicode is the MIB identifier with IANA name ISO-10646-UCS-2. @@ -855,7 +853,7 @@ const ( // SCSU is the MIB identifier with IANA name SCSU. // - // SCSU See http://www.iana.org/assignments/charset-reg/SCSU + // SCSU See https://www.iana.org/assignments/charset-reg/SCSU SCSU MIB = 1011 // UTF7 is the MIB identifier with IANA name UTF-7. @@ -884,27 +882,27 @@ const ( // CESU8 is the MIB identifier with IANA name CESU-8. // - // http://www.unicode.org/unicode/reports/tr26 + // https://www.unicode.org/reports/tr26 CESU8 MIB = 1016 // UTF32 is the MIB identifier with IANA name UTF-32. // - // http://www.unicode.org/unicode/reports/tr19/ + // https://www.unicode.org/reports/tr19/ UTF32 MIB = 1017 // UTF32BE is the MIB identifier with IANA name UTF-32BE. // - // http://www.unicode.org/unicode/reports/tr19/ + // https://www.unicode.org/reports/tr19/ UTF32BE MIB = 1018 // UTF32LE is the MIB identifier with IANA name UTF-32LE. // - // http://www.unicode.org/unicode/reports/tr19/ + // https://www.unicode.org/reports/tr19/ UTF32LE MIB = 1019 // BOCU1 is the MIB identifier with IANA name BOCU-1. // - // http://www.unicode.org/notes/tn6/ + // https://www.unicode.org/notes/tn6/ BOCU1 MIB = 1020 // Windows30Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.0-Latin-1. @@ -1461,152 +1459,152 @@ const ( // IBM00858 is the MIB identifier with IANA name IBM00858. // - // IBM See http://www.iana.org/assignments/charset-reg/IBM00858 + // IBM See https://www.iana.org/assignments/charset-reg/IBM00858 IBM00858 MIB = 2089 // IBM00924 is the MIB identifier with IANA name IBM00924. // - // IBM See http://www.iana.org/assignments/charset-reg/IBM00924 + // IBM See https://www.iana.org/assignments/charset-reg/IBM00924 IBM00924 MIB = 2090 // IBM01140 is the MIB identifier with IANA name IBM01140. // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01140 + // IBM See https://www.iana.org/assignments/charset-reg/IBM01140 IBM01140 MIB = 2091 // IBM01141 is the MIB identifier with IANA name IBM01141. // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01141 + // IBM See https://www.iana.org/assignments/charset-reg/IBM01141 IBM01141 MIB = 2092 // IBM01142 is the MIB identifier with IANA name IBM01142. // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01142 + // IBM See https://www.iana.org/assignments/charset-reg/IBM01142 IBM01142 MIB = 2093 // IBM01143 is the MIB identifier with IANA name IBM01143. // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01143 + // IBM See https://www.iana.org/assignments/charset-reg/IBM01143 IBM01143 MIB = 2094 // IBM01144 is the MIB identifier with IANA name IBM01144. // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01144 + // IBM See https://www.iana.org/assignments/charset-reg/IBM01144 IBM01144 MIB = 2095 // IBM01145 is the MIB identifier with IANA name IBM01145. // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01145 + // IBM See https://www.iana.org/assignments/charset-reg/IBM01145 IBM01145 MIB = 2096 // IBM01146 is the MIB identifier with IANA name IBM01146. // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01146 + // IBM See https://www.iana.org/assignments/charset-reg/IBM01146 IBM01146 MIB = 2097 // IBM01147 is the MIB identifier with IANA name IBM01147. // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01147 + // IBM See https://www.iana.org/assignments/charset-reg/IBM01147 IBM01147 MIB = 2098 // IBM01148 is the MIB identifier with IANA name IBM01148. // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01148 + // IBM See https://www.iana.org/assignments/charset-reg/IBM01148 IBM01148 MIB = 2099 // IBM01149 is the MIB identifier with IANA name IBM01149. // - // IBM See http://www.iana.org/assignments/charset-reg/IBM01149 + // IBM See https://www.iana.org/assignments/charset-reg/IBM01149 IBM01149 MIB = 2100 // Big5HKSCS is the MIB identifier with IANA name Big5-HKSCS. // - // See http://www.iana.org/assignments/charset-reg/Big5-HKSCS + // See https://www.iana.org/assignments/charset-reg/Big5-HKSCS Big5HKSCS MIB = 2101 // IBM1047 is the MIB identifier with IANA name IBM1047. // - // IBM1047 (EBCDIC Latin 1/Open Systems) http://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf + // IBM1047 (EBCDIC Latin 1/Open Systems) https://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf IBM1047 MIB = 2102 // PTCP154 is the MIB identifier with IANA name PTCP154. // - // See http://www.iana.org/assignments/charset-reg/PTCP154 + // See https://www.iana.org/assignments/charset-reg/PTCP154 PTCP154 MIB = 2103 // Amiga1251 is the MIB identifier with IANA name Amiga-1251. // - // See http://www.amiga.ultranet.ru/Amiga-1251.html + // See https://www.amiga.ultranet.ru/Amiga-1251.html Amiga1251 MIB = 2104 // KOI7switched is the MIB identifier with IANA name KOI7-switched. // - // See http://www.iana.org/assignments/charset-reg/KOI7-switched + // See https://www.iana.org/assignments/charset-reg/KOI7-switched KOI7switched MIB = 2105 // BRF is the MIB identifier with IANA name BRF. // - // See http://www.iana.org/assignments/charset-reg/BRF + // See https://www.iana.org/assignments/charset-reg/BRF BRF MIB = 2106 // TSCII is the MIB identifier with IANA name TSCII. // - // See http://www.iana.org/assignments/charset-reg/TSCII + // See https://www.iana.org/assignments/charset-reg/TSCII TSCII MIB = 2107 // CP51932 is the MIB identifier with IANA name CP51932. // - // See http://www.iana.org/assignments/charset-reg/CP51932 + // See https://www.iana.org/assignments/charset-reg/CP51932 CP51932 MIB = 2108 // Windows874 is the MIB identifier with IANA name windows-874. // - // See http://www.iana.org/assignments/charset-reg/windows-874 + // See https://www.iana.org/assignments/charset-reg/windows-874 Windows874 MIB = 2109 // Windows1250 is the MIB identifier with IANA name windows-1250. // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1250 + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1250 Windows1250 MIB = 2250 // Windows1251 is the MIB identifier with IANA name windows-1251. // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1251 + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1251 Windows1251 MIB = 2251 // Windows1252 is the MIB identifier with IANA name windows-1252. // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1252 + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1252 Windows1252 MIB = 2252 // Windows1253 is the MIB identifier with IANA name windows-1253. // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1253 + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1253 Windows1253 MIB = 2253 // Windows1254 is the MIB identifier with IANA name windows-1254. // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1254 + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1254 Windows1254 MIB = 2254 // Windows1255 is the MIB identifier with IANA name windows-1255. // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1255 + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1255 Windows1255 MIB = 2255 // Windows1256 is the MIB identifier with IANA name windows-1256. // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1256 + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1256 Windows1256 MIB = 2256 // Windows1257 is the MIB identifier with IANA name windows-1257. // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1257 + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1257 Windows1257 MIB = 2257 // Windows1258 is the MIB identifier with IANA name windows-1258. // - // Microsoft http://www.iana.org/assignments/charset-reg/windows-1258 + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1258 Windows1258 MIB = 2258 // TIS620 is the MIB identifier with IANA name TIS-620. @@ -1616,6 +1614,6 @@ const ( // CP50220 is the MIB identifier with IANA name CP50220. // - // See http://www.iana.org/assignments/charset-reg/CP50220 + // See https://www.iana.org/assignments/charset-reg/CP50220 CP50220 MIB = 2260 ) -- cgit v1.2.3