From 14bb08c1df8db9ec6c8a05520d4eee67971235d9 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Thu, 27 Sep 2018 20:03:23 +0200 Subject: mod tidy --- vendor/golang.org/x/net/html/charset/charset.go | 257 --------------------- .../x/net/html/charset/testdata/HTTP-charset.html | 48 ---- .../html/charset/testdata/HTTP-vs-UTF-8-BOM.html | 48 ---- .../charset/testdata/HTTP-vs-meta-charset.html | 49 ---- .../charset/testdata/HTTP-vs-meta-content.html | 49 ---- .../charset/testdata/No-encoding-declaration.html | 47 ---- .../golang.org/x/net/html/charset/testdata/README | 9 - .../x/net/html/charset/testdata/UTF-16BE-BOM.html | Bin 2670 -> 0 bytes .../x/net/html/charset/testdata/UTF-16LE-BOM.html | Bin 2682 -> 0 bytes .../testdata/UTF-8-BOM-vs-meta-charset.html | 49 ---- .../testdata/UTF-8-BOM-vs-meta-content.html | 48 ---- .../charset/testdata/meta-charset-attribute.html | 48 ---- .../charset/testdata/meta-content-attribute.html | 48 ---- 13 files changed, 700 deletions(-) delete mode 100644 vendor/golang.org/x/net/html/charset/charset.go delete mode 100644 vendor/golang.org/x/net/html/charset/testdata/HTTP-charset.html delete mode 100644 vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-UTF-8-BOM.html delete mode 100644 vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-charset.html delete mode 100644 vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-content.html delete mode 100644 vendor/golang.org/x/net/html/charset/testdata/No-encoding-declaration.html delete mode 100644 vendor/golang.org/x/net/html/charset/testdata/README delete mode 100644 vendor/golang.org/x/net/html/charset/testdata/UTF-16BE-BOM.html delete mode 100644 vendor/golang.org/x/net/html/charset/testdata/UTF-16LE-BOM.html delete mode 100644 vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-charset.html delete mode 100644 vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-content.html delete mode 100644 vendor/golang.org/x/net/html/charset/testdata/meta-charset-attribute.html delete mode 100644 vendor/golang.org/x/net/html/charset/testdata/meta-content-attribute.html (limited to 'vendor/golang.org/x/net/html/charset') diff --git a/vendor/golang.org/x/net/html/charset/charset.go b/vendor/golang.org/x/net/html/charset/charset.go deleted file mode 100644 index 13bed15..0000000 --- a/vendor/golang.org/x/net/html/charset/charset.go +++ /dev/null @@ -1,257 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package charset provides common text encodings for HTML documents. -// -// The mapping from encoding labels to encodings is defined at -// https://encoding.spec.whatwg.org/. -package charset // import "golang.org/x/net/html/charset" - -import ( - "bytes" - "fmt" - "io" - "mime" - "strings" - "unicode/utf8" - - "golang.org/x/net/html" - "golang.org/x/text/encoding" - "golang.org/x/text/encoding/charmap" - "golang.org/x/text/encoding/htmlindex" - "golang.org/x/text/transform" -) - -// Lookup returns the encoding with the specified label, and its canonical -// name. It returns nil and the empty string if label is not one of the -// standard encodings for HTML. Matching is case-insensitive and ignores -// leading and trailing whitespace. Encoders will use HTML escape sequences for -// runes that are not supported by the character set. -func Lookup(label string) (e encoding.Encoding, name string) { - e, err := htmlindex.Get(label) - if err != nil { - return nil, "" - } - name, _ = htmlindex.Name(e) - return &htmlEncoding{e}, name -} - -type htmlEncoding struct{ encoding.Encoding } - -func (h *htmlEncoding) NewEncoder() *encoding.Encoder { - // HTML requires a non-terminating legacy encoder. We use HTML escapes to - // substitute unsupported code points. - return encoding.HTMLEscapeUnsupported(h.Encoding.NewEncoder()) -} - -// DetermineEncoding determines the encoding of an HTML document by examining -// up to the first 1024 bytes of content and the declared Content-Type. -// -// See http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding -func DetermineEncoding(content []byte, contentType string) (e encoding.Encoding, name string, certain bool) { - if len(content) > 1024 { - content = content[:1024] - } - - for _, b := range boms { - if bytes.HasPrefix(content, b.bom) { - e, name = Lookup(b.enc) - return e, name, true - } - } - - if _, params, err := mime.ParseMediaType(contentType); err == nil { - if cs, ok := params["charset"]; ok { - if e, name = Lookup(cs); e != nil { - return e, name, true - } - } - } - - if len(content) > 0 { - e, name = prescan(content) - if e != nil { - return e, name, false - } - } - - // Try to detect UTF-8. - // First eliminate any partial rune at the end. - for i := len(content) - 1; i >= 0 && i > len(content)-4; i-- { - b := content[i] - if b < 0x80 { - break - } - if utf8.RuneStart(b) { - content = content[:i] - break - } - } - hasHighBit := false - for _, c := range content { - if c >= 0x80 { - hasHighBit = true - break - } - } - if hasHighBit && utf8.Valid(content) { - return encoding.Nop, "utf-8", false - } - - // TODO: change default depending on user's locale? - return charmap.Windows1252, "windows-1252", false -} - -// NewReader returns an io.Reader that converts the content of r to UTF-8. -// It calls DetermineEncoding to find out what r's encoding is. -func NewReader(r io.Reader, contentType string) (io.Reader, error) { - preview := make([]byte, 1024) - n, err := io.ReadFull(r, preview) - switch { - case err == io.ErrUnexpectedEOF: - preview = preview[:n] - r = bytes.NewReader(preview) - case err != nil: - return nil, err - default: - r = io.MultiReader(bytes.NewReader(preview), r) - } - - if e, _, _ := DetermineEncoding(preview, contentType); e != encoding.Nop { - r = transform.NewReader(r, e.NewDecoder()) - } - return r, nil -} - -// NewReaderLabel returns a reader that converts from the specified charset to -// UTF-8. It uses Lookup to find the encoding that corresponds to label, and -// returns an error if Lookup returns nil. It is suitable for use as -// encoding/xml.Decoder's CharsetReader function. -func NewReaderLabel(label string, input io.Reader) (io.Reader, error) { - e, _ := Lookup(label) - if e == nil { - return nil, fmt.Errorf("unsupported charset: %q", label) - } - return transform.NewReader(input, e.NewDecoder()), nil -} - -func prescan(content []byte) (e encoding.Encoding, name string) { - z := html.NewTokenizer(bytes.NewReader(content)) - for { - switch z.Next() { - case html.ErrorToken: - return nil, "" - - case html.StartTagToken, html.SelfClosingTagToken: - tagName, hasAttr := z.TagName() - if !bytes.Equal(tagName, []byte("meta")) { - continue - } - attrList := make(map[string]bool) - gotPragma := false - - const ( - dontKnow = iota - doNeedPragma - doNotNeedPragma - ) - needPragma := dontKnow - - name = "" - e = nil - for hasAttr { - var key, val []byte - key, val, hasAttr = z.TagAttr() - ks := string(key) - if attrList[ks] { - continue - } - attrList[ks] = true - for i, c := range val { - if 'A' <= c && c <= 'Z' { - val[i] = c + 0x20 - } - } - - switch ks { - case "http-equiv": - if bytes.Equal(val, []byte("content-type")) { - gotPragma = true - } - - case "content": - if e == nil { - name = fromMetaElement(string(val)) - if name != "" { - e, name = Lookup(name) - if e != nil { - needPragma = doNeedPragma - } - } - } - - case "charset": - e, name = Lookup(string(val)) - needPragma = doNotNeedPragma - } - } - - if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma { - continue - } - - if strings.HasPrefix(name, "utf-16") { - name = "utf-8" - e = encoding.Nop - } - - if e != nil { - return e, name - } - } - } -} - -func fromMetaElement(s string) string { - for s != "" { - csLoc := strings.Index(s, "charset") - if csLoc == -1 { - return "" - } - s = s[csLoc+len("charset"):] - s = strings.TrimLeft(s, " \t\n\f\r") - if !strings.HasPrefix(s, "=") { - continue - } - s = s[1:] - s = strings.TrimLeft(s, " \t\n\f\r") - if s == "" { - return "" - } - if q := s[0]; q == '"' || q == '\'' { - s = s[1:] - closeQuote := strings.IndexRune(s, rune(q)) - if closeQuote == -1 { - return "" - } - return s[:closeQuote] - } - - end := strings.IndexAny(s, "; \t\n\f\r") - if end == -1 { - end = len(s) - } - return s[:end] - } - return "" -} - -var boms = []struct { - bom []byte - enc string -}{ - {[]byte{0xfe, 0xff}, "utf-16be"}, - {[]byte{0xff, 0xfe}, "utf-16le"}, - {[]byte{0xef, 0xbb, 0xbf}, "utf-8"}, -} diff --git a/vendor/golang.org/x/net/html/charset/testdata/HTTP-charset.html b/vendor/golang.org/x/net/html/charset/testdata/HTTP-charset.html deleted file mode 100644 index 9915fa0..0000000 --- a/vendor/golang.org/x/net/html/charset/testdata/HTTP-charset.html +++ /dev/null @@ -1,48 +0,0 @@ - - - - HTTP charset - - - - - - - - - - - -

HTTP charset

- - -
- - -
 
- - - - - -
-

The character encoding of a page can be set using the HTTP header charset declaration.

-

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ÜÀÚ. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.

The only character encoding declaration for this HTML file is in the HTTP header, which sets the encoding to ISO 8859-15.

-
-
-
Next test
HTML5
-

the-input-byte-stream-001
Result summary & related tests
Detailed results for this test
Link to spec

-
Assumptions:
  • The default encoding for the browser you are testing is not set to ISO 8859-15.
  • -
  • The test is read from a server that supports HTTP.
-
- - - - - - diff --git a/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-UTF-8-BOM.html b/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-UTF-8-BOM.html deleted file mode 100644 index 26e5d8b..0000000 --- a/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-UTF-8-BOM.html +++ /dev/null @@ -1,48 +0,0 @@ - - - - HTTP vs UTF-8 BOM - - - - - - - - - - - -

HTTP vs UTF-8 BOM

- - -
- - -
 
- - - - - -
-

A character encoding set in the HTTP header has lower precedence than the UTF-8 signature.

-

The HTTP header attempts to set the character encoding to ISO 8859-15. The page starts with a UTF-8 signature.

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ýäè. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.

If the test is unsuccessful, the characters  should appear at the top of the page. These represent the bytes that make up the UTF-8 signature when encountered in the ISO 8859-15 encoding.

-
-
-
Next test
HTML5
-

the-input-byte-stream-034
Result summary & related tests
Detailed results for this test
Link to spec

-
Assumptions:
  • The default encoding for the browser you are testing is not set to ISO 8859-15.
  • -
  • The test is read from a server that supports HTTP.
-
- - - - - - diff --git a/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-charset.html b/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-charset.html deleted file mode 100644 index 2f07e95..0000000 --- a/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-charset.html +++ /dev/null @@ -1,49 +0,0 @@ - - - - HTTP vs meta charset - - - - - - - - - - - -

HTTP vs meta charset

- - -
- - -
 
- - - - - -
-

The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute.

-

The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-1.

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ÜÀÚ. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.

-
-
-
Next test
HTML5
-

the-input-byte-stream-018
Result summary & related tests
Detailed results for this test
Link to spec

-
Assumptions:
  • The default encoding for the browser you are testing is not set to ISO 8859-15.
  • -
  • The test is read from a server that supports HTTP.
-
- - - - - - diff --git a/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-content.html b/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-content.html deleted file mode 100644 index 6853cdd..0000000 --- a/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-content.html +++ /dev/null @@ -1,49 +0,0 @@ - - - - HTTP vs meta content - - - - - - - - - - - -

HTTP vs meta content

- - -
- - -
 
- - - - - -
-

The HTTP header has a higher precedence than an encoding declaration in a meta content attribute.

-

The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-1.

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ÜÀÚ. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.

-
-
-
Next test
HTML5
-

the-input-byte-stream-016
Result summary & related tests
Detailed results for this test
Link to spec

-
Assumptions:
  • The default encoding for the browser you are testing is not set to ISO 8859-15.
  • -
  • The test is read from a server that supports HTTP.
-
- - - - - - diff --git a/vendor/golang.org/x/net/html/charset/testdata/No-encoding-declaration.html b/vendor/golang.org/x/net/html/charset/testdata/No-encoding-declaration.html deleted file mode 100644 index 612e26c..0000000 --- a/vendor/golang.org/x/net/html/charset/testdata/No-encoding-declaration.html +++ /dev/null @@ -1,47 +0,0 @@ - - - - No encoding declaration - - - - - - - - - - - -

No encoding declaration

- - -
- - -
 
- - - - - -
-

A page with no encoding information in HTTP, BOM, XML declaration or meta element will be treated as UTF-8.

-

The test on this page contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ýäè. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.

-
-
-
Next test
HTML5
-

the-input-byte-stream-015
Result summary & related tests
Detailed results for this test
Link to spec

-
Assumptions:
  • The test is read from a server that supports HTTP.
-
- - - - - - diff --git a/vendor/golang.org/x/net/html/charset/testdata/README b/vendor/golang.org/x/net/html/charset/testdata/README deleted file mode 100644 index 38ef0f9..0000000 --- a/vendor/golang.org/x/net/html/charset/testdata/README +++ /dev/null @@ -1,9 +0,0 @@ -These test cases come from -http://www.w3.org/International/tests/repository/html5/the-input-byte-stream/results-basics - -Distributed under both the W3C Test Suite License -(http://www.w3.org/Consortium/Legal/2008/04-testsuite-license) -and the W3C 3-clause BSD License -(http://www.w3.org/Consortium/Legal/2008/03-bsd-license). -To contribute to a W3C Test Suite, see the policies and contribution -forms (http://www.w3.org/2004/10/27-testcases). diff --git a/vendor/golang.org/x/net/html/charset/testdata/UTF-16BE-BOM.html b/vendor/golang.org/x/net/html/charset/testdata/UTF-16BE-BOM.html deleted file mode 100644 index 3abf7a9..0000000 Binary files a/vendor/golang.org/x/net/html/charset/testdata/UTF-16BE-BOM.html and /dev/null differ diff --git a/vendor/golang.org/x/net/html/charset/testdata/UTF-16LE-BOM.html b/vendor/golang.org/x/net/html/charset/testdata/UTF-16LE-BOM.html deleted file mode 100644 index 76254c9..0000000 Binary files a/vendor/golang.org/x/net/html/charset/testdata/UTF-16LE-BOM.html and /dev/null differ diff --git a/vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-charset.html b/vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-charset.html deleted file mode 100644 index 83de433..0000000 --- a/vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-charset.html +++ /dev/null @@ -1,49 +0,0 @@ - - - - UTF-8 BOM vs meta charset - - - - - - - - - - - -

UTF-8 BOM vs meta charset

- - -
- - -
 
- - - - - -
-

A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta charset attribute declares a different encoding.

-

The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature.

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ýäè. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.

-
-
-
Next test
HTML5
-

the-input-byte-stream-038
Result summary & related tests
Detailed results for this test
Link to spec

-
Assumptions:
  • The default encoding for the browser you are testing is not set to ISO 8859-15.
  • -
  • The test is read from a server that supports HTTP.
-
- - - - - - diff --git a/vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-content.html b/vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-content.html deleted file mode 100644 index 501aac2..0000000 --- a/vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-content.html +++ /dev/null @@ -1,48 +0,0 @@ - - - - UTF-8 BOM vs meta content - - - - - - - - - - - -

UTF-8 BOM vs meta content

- - -
- - -
 
- - - - - -
-

A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta content attribute declares a different encoding.

-

The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature.

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ýäè. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.

-
-
-
Next test
HTML5
-

the-input-byte-stream-037
Result summary & related tests
Detailed results for this test
Link to spec

-
Assumptions:
  • The default encoding for the browser you are testing is not set to ISO 8859-15.
  • -
  • The test is read from a server that supports HTTP.
-
- - - - - - diff --git a/vendor/golang.org/x/net/html/charset/testdata/meta-charset-attribute.html b/vendor/golang.org/x/net/html/charset/testdata/meta-charset-attribute.html deleted file mode 100644 index 2d7d25a..0000000 --- a/vendor/golang.org/x/net/html/charset/testdata/meta-charset-attribute.html +++ /dev/null @@ -1,48 +0,0 @@ - - - - meta charset attribute - - - - - - - - - - - -

meta charset attribute

- - -
- - -
 
- - - - - -
-

The character encoding of the page can be set by a meta element with charset attribute.

-

The only character encoding declaration for this HTML file is in the charset attribute of the meta element, which declares the encoding to be ISO 8859-15.

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ÜÀÚ. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.

-
-
-
Next test
HTML5
-

the-input-byte-stream-009
Result summary & related tests
Detailed results for this test
Link to spec

-
Assumptions:
  • The default encoding for the browser you are testing is not set to ISO 8859-15.
  • -
  • The test is read from a server that supports HTTP.
-
- - - - - - diff --git a/vendor/golang.org/x/net/html/charset/testdata/meta-content-attribute.html b/vendor/golang.org/x/net/html/charset/testdata/meta-content-attribute.html deleted file mode 100644 index 1c3f228..0000000 --- a/vendor/golang.org/x/net/html/charset/testdata/meta-content-attribute.html +++ /dev/null @@ -1,48 +0,0 @@ - - - - meta content attribute - - - - - - - - - - - -

meta content attribute

- - -
- - -
 
- - - - - -
-

The character encoding of the page can be set by a meta element with http-equiv and content attributes.

-

The only character encoding declaration for this HTML file is in the content attribute of the meta element, which declares the encoding to be ISO 8859-15.

The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector .test div.ÜÀÚ. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.

-
-
-
Next test
HTML5
-

the-input-byte-stream-007
Result summary & related tests
Detailed results for this test
Link to spec

-
Assumptions:
  • The default encoding for the browser you are testing is not set to ISO 8859-15.
  • -
  • The test is read from a server that supports HTTP.
-
- - - - - - -- cgit v1.2.3