aboutsummaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/net/html/charset
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/net/html/charset')
-rw-r--r--vendor/golang.org/x/net/html/charset/charset.go257
-rw-r--r--vendor/golang.org/x/net/html/charset/testdata/HTTP-charset.html48
-rw-r--r--vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-UTF-8-BOM.html48
-rw-r--r--vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-charset.html49
-rw-r--r--vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-content.html49
-rw-r--r--vendor/golang.org/x/net/html/charset/testdata/No-encoding-declaration.html47
-rw-r--r--vendor/golang.org/x/net/html/charset/testdata/README9
-rw-r--r--vendor/golang.org/x/net/html/charset/testdata/UTF-16BE-BOM.htmlbin0 -> 2670 bytes
-rw-r--r--vendor/golang.org/x/net/html/charset/testdata/UTF-16LE-BOM.htmlbin0 -> 2682 bytes
-rw-r--r--vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-charset.html49
-rw-r--r--vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-content.html48
-rw-r--r--vendor/golang.org/x/net/html/charset/testdata/meta-charset-attribute.html48
-rw-r--r--vendor/golang.org/x/net/html/charset/testdata/meta-content-attribute.html48
13 files changed, 700 insertions, 0 deletions
diff --git a/vendor/golang.org/x/net/html/charset/charset.go b/vendor/golang.org/x/net/html/charset/charset.go
new file mode 100644
index 0000000..13bed15
--- /dev/null
+++ b/vendor/golang.org/x/net/html/charset/charset.go
@@ -0,0 +1,257 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package charset provides common text encodings for HTML documents.
+//
+// The mapping from encoding labels to encodings is defined at
+// https://encoding.spec.whatwg.org/.
+package charset // import "golang.org/x/net/html/charset"
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "mime"
+ "strings"
+ "unicode/utf8"
+
+ "golang.org/x/net/html"
+ "golang.org/x/text/encoding"
+ "golang.org/x/text/encoding/charmap"
+ "golang.org/x/text/encoding/htmlindex"
+ "golang.org/x/text/transform"
+)
+
+// Lookup returns the encoding with the specified label, and its canonical
+// name. It returns nil and the empty string if label is not one of the
+// standard encodings for HTML. Matching is case-insensitive and ignores
+// leading and trailing whitespace. Encoders will use HTML escape sequences for
+// runes that are not supported by the character set.
+func Lookup(label string) (e encoding.Encoding, name string) {
+ e, err := htmlindex.Get(label)
+ if err != nil {
+ return nil, ""
+ }
+ name, _ = htmlindex.Name(e)
+ return &htmlEncoding{e}, name
+}
+
+type htmlEncoding struct{ encoding.Encoding }
+
+func (h *htmlEncoding) NewEncoder() *encoding.Encoder {
+ // HTML requires a non-terminating legacy encoder. We use HTML escapes to
+ // substitute unsupported code points.
+ return encoding.HTMLEscapeUnsupported(h.Encoding.NewEncoder())
+}
+
+// DetermineEncoding determines the encoding of an HTML document by examining
+// up to the first 1024 bytes of content and the declared Content-Type.
+//
+// See http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding
+func DetermineEncoding(content []byte, contentType string) (e encoding.Encoding, name string, certain bool) {
+ if len(content) > 1024 {
+ content = content[:1024]
+ }
+
+ for _, b := range boms {
+ if bytes.HasPrefix(content, b.bom) {
+ e, name = Lookup(b.enc)
+ return e, name, true
+ }
+ }
+
+ if _, params, err := mime.ParseMediaType(contentType); err == nil {
+ if cs, ok := params["charset"]; ok {
+ if e, name = Lookup(cs); e != nil {
+ return e, name, true
+ }
+ }
+ }
+
+ if len(content) > 0 {
+ e, name = prescan(content)
+ if e != nil {
+ return e, name, false
+ }
+ }
+
+ // Try to detect UTF-8.
+ // First eliminate any partial rune at the end.
+ for i := len(content) - 1; i >= 0 && i > len(content)-4; i-- {
+ b := content[i]
+ if b < 0x80 {
+ break
+ }
+ if utf8.RuneStart(b) {
+ content = content[:i]
+ break
+ }
+ }
+ hasHighBit := false
+ for _, c := range content {
+ if c >= 0x80 {
+ hasHighBit = true
+ break
+ }
+ }
+ if hasHighBit && utf8.Valid(content) {
+ return encoding.Nop, "utf-8", false
+ }
+
+ // TODO: change default depending on user's locale?
+ return charmap.Windows1252, "windows-1252", false
+}
+
+// NewReader returns an io.Reader that converts the content of r to UTF-8.
+// It calls DetermineEncoding to find out what r's encoding is.
+func NewReader(r io.Reader, contentType string) (io.Reader, error) {
+ preview := make([]byte, 1024)
+ n, err := io.ReadFull(r, preview)
+ switch {
+ case err == io.ErrUnexpectedEOF:
+ preview = preview[:n]
+ r = bytes.NewReader(preview)
+ case err != nil:
+ return nil, err
+ default:
+ r = io.MultiReader(bytes.NewReader(preview), r)
+ }
+
+ if e, _, _ := DetermineEncoding(preview, contentType); e != encoding.Nop {
+ r = transform.NewReader(r, e.NewDecoder())
+ }
+ return r, nil
+}
+
+// NewReaderLabel returns a reader that converts from the specified charset to
+// UTF-8. It uses Lookup to find the encoding that corresponds to label, and
+// returns an error if Lookup returns nil. It is suitable for use as
+// encoding/xml.Decoder's CharsetReader function.
+func NewReaderLabel(label string, input io.Reader) (io.Reader, error) {
+ e, _ := Lookup(label)
+ if e == nil {
+ return nil, fmt.Errorf("unsupported charset: %q", label)
+ }
+ return transform.NewReader(input, e.NewDecoder()), nil
+}
+
+func prescan(content []byte) (e encoding.Encoding, name string) {
+ z := html.NewTokenizer(bytes.NewReader(content))
+ for {
+ switch z.Next() {
+ case html.ErrorToken:
+ return nil, ""
+
+ case html.StartTagToken, html.SelfClosingTagToken:
+ tagName, hasAttr := z.TagName()
+ if !bytes.Equal(tagName, []byte("meta")) {
+ continue
+ }
+ attrList := make(map[string]bool)
+ gotPragma := false
+
+ const (
+ dontKnow = iota
+ doNeedPragma
+ doNotNeedPragma
+ )
+ needPragma := dontKnow
+
+ name = ""
+ e = nil
+ for hasAttr {
+ var key, val []byte
+ key, val, hasAttr = z.TagAttr()
+ ks := string(key)
+ if attrList[ks] {
+ continue
+ }
+ attrList[ks] = true
+ for i, c := range val {
+ if 'A' <= c && c <= 'Z' {
+ val[i] = c + 0x20
+ }
+ }
+
+ switch ks {
+ case "http-equiv":
+ if bytes.Equal(val, []byte("content-type")) {
+ gotPragma = true
+ }
+
+ case "content":
+ if e == nil {
+ name = fromMetaElement(string(val))
+ if name != "" {
+ e, name = Lookup(name)
+ if e != nil {
+ needPragma = doNeedPragma
+ }
+ }
+ }
+
+ case "charset":
+ e, name = Lookup(string(val))
+ needPragma = doNotNeedPragma
+ }
+ }
+
+ if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma {
+ continue
+ }
+
+ if strings.HasPrefix(name, "utf-16") {
+ name = "utf-8"
+ e = encoding.Nop
+ }
+
+ if e != nil {
+ return e, name
+ }
+ }
+ }
+}
+
+func fromMetaElement(s string) string {
+ for s != "" {
+ csLoc := strings.Index(s, "charset")
+ if csLoc == -1 {
+ return ""
+ }
+ s = s[csLoc+len("charset"):]
+ s = strings.TrimLeft(s, " \t\n\f\r")
+ if !strings.HasPrefix(s, "=") {
+ continue
+ }
+ s = s[1:]
+ s = strings.TrimLeft(s, " \t\n\f\r")
+ if s == "" {
+ return ""
+ }
+ if q := s[0]; q == '"' || q == '\'' {
+ s = s[1:]
+ closeQuote := strings.IndexRune(s, rune(q))
+ if closeQuote == -1 {
+ return ""
+ }
+ return s[:closeQuote]
+ }
+
+ end := strings.IndexAny(s, "; \t\n\f\r")
+ if end == -1 {
+ end = len(s)
+ }
+ return s[:end]
+ }
+ return ""
+}
+
+var boms = []struct {
+ bom []byte
+ enc string
+}{
+ {[]byte{0xfe, 0xff}, "utf-16be"},
+ {[]byte{0xff, 0xfe}, "utf-16le"},
+ {[]byte{0xef, 0xbb, 0xbf}, "utf-8"},
+}
diff --git a/vendor/golang.org/x/net/html/charset/testdata/HTTP-charset.html b/vendor/golang.org/x/net/html/charset/testdata/HTTP-charset.html
new file mode 100644
index 0000000..9915fa0
--- /dev/null
+++ b/vendor/golang.org/x/net/html/charset/testdata/HTTP-charset.html
@@ -0,0 +1,48 @@
+<!DOCTYPE html>
+<html lang="en" >
+<head>
+ <title>HTTP charset</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="The character encoding of a page can be set using the HTTP header charset declaration.">
+<style type='text/css'>
+.test div { width: 50px; }</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
+</head>
+<body>
+<p class='title'>HTTP charset</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">The character encoding of a page can be set using the HTTP header charset declaration.</p>
+<div class="notes"><p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p><p>The only character encoding declaration for this HTML file is in the HTTP header, which sets the encoding to ISO 8859-15.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-003">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-001<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-001" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+ <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-UTF-8-BOM.html b/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-UTF-8-BOM.html
new file mode 100644
index 0000000..26e5d8b
--- /dev/null
+++ b/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-UTF-8-BOM.html
@@ -0,0 +1,48 @@
+<!DOCTYPE html>
+<html lang="en" >
+<head>
+ <title>HTTP vs UTF-8 BOM</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="A character encoding set in the HTTP header has lower precedence than the UTF-8 signature.">
+<style type='text/css'>
+.test div { width: 50px; }</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
+</head>
+<body>
+<p class='title'>HTTP vs UTF-8 BOM</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">A character encoding set in the HTTP header has lower precedence than the UTF-8 signature.</p>
+<div class="notes"><p><p>The HTTP header attempts to set the character encoding to ISO 8859-15. The page starts with a UTF-8 signature.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p><p>If the test is unsuccessful, the characters &#x00EF;&#x00BB;&#x00BF; should appear at the top of the page. These represent the bytes that make up the UTF-8 signature when encountered in the ISO 8859-15 encoding.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-022">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-034<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-034" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+ <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-charset.html b/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-charset.html
new file mode 100644
index 0000000..2f07e95
--- /dev/null
+++ b/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-charset.html
@@ -0,0 +1,49 @@
+<!DOCTYPE html>
+<html lang="en" >
+<head>
+ <meta charset="iso-8859-1" > <title>HTTP vs meta charset</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute.">
+<style type='text/css'>
+.test div { width: 50px; }.test div { width: 90px; }
+</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
+</head>
+<body>
+<p class='title'>HTTP vs meta charset</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute.</p>
+<div class="notes"><p><p>The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-1.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-037">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-018<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-018" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+ <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-content.html b/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-content.html
new file mode 100644
index 0000000..6853cdd
--- /dev/null
+++ b/vendor/golang.org/x/net/html/charset/testdata/HTTP-vs-meta-content.html
@@ -0,0 +1,49 @@
+<!DOCTYPE html>
+<html lang="en" >
+<head>
+ <meta http-equiv="content-type" content="text/html;charset=iso-8859-1" > <title>HTTP vs meta content</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="The HTTP header has a higher precedence than an encoding declaration in a meta content attribute.">
+<style type='text/css'>
+.test div { width: 50px; }.test div { width: 90px; }
+</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
+</head>
+<body>
+<p class='title'>HTTP vs meta content</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">The HTTP header has a higher precedence than an encoding declaration in a meta content attribute.</p>
+<div class="notes"><p><p>The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-1.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-018">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-016<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-016" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+ <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/vendor/golang.org/x/net/html/charset/testdata/No-encoding-declaration.html b/vendor/golang.org/x/net/html/charset/testdata/No-encoding-declaration.html
new file mode 100644
index 0000000..612e26c
--- /dev/null
+++ b/vendor/golang.org/x/net/html/charset/testdata/No-encoding-declaration.html
@@ -0,0 +1,47 @@
+<!DOCTYPE html>
+<html lang="en" >
+<head>
+ <title>No encoding declaration</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="A page with no encoding information in HTTP, BOM, XML declaration or meta element will be treated as UTF-8.">
+<style type='text/css'>
+.test div { width: 50px; }</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
+</head>
+<body>
+<p class='title'>No encoding declaration</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">A page with no encoding information in HTTP, BOM, XML declaration or meta element will be treated as UTF-8.</p>
+<div class="notes"><p><p>The test on this page contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-034">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-015<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-015" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/vendor/golang.org/x/net/html/charset/testdata/README b/vendor/golang.org/x/net/html/charset/testdata/README
new file mode 100644
index 0000000..38ef0f9
--- /dev/null
+++ b/vendor/golang.org/x/net/html/charset/testdata/README
@@ -0,0 +1,9 @@
+These test cases come from
+http://www.w3.org/International/tests/repository/html5/the-input-byte-stream/results-basics
+
+Distributed under both the W3C Test Suite License
+(http://www.w3.org/Consortium/Legal/2008/04-testsuite-license)
+and the W3C 3-clause BSD License
+(http://www.w3.org/Consortium/Legal/2008/03-bsd-license).
+To contribute to a W3C Test Suite, see the policies and contribution
+forms (http://www.w3.org/2004/10/27-testcases).
diff --git a/vendor/golang.org/x/net/html/charset/testdata/UTF-16BE-BOM.html b/vendor/golang.org/x/net/html/charset/testdata/UTF-16BE-BOM.html
new file mode 100644
index 0000000..3abf7a9
--- /dev/null
+++ b/vendor/golang.org/x/net/html/charset/testdata/UTF-16BE-BOM.html
Binary files differ
diff --git a/vendor/golang.org/x/net/html/charset/testdata/UTF-16LE-BOM.html b/vendor/golang.org/x/net/html/charset/testdata/UTF-16LE-BOM.html
new file mode 100644
index 0000000..76254c9
--- /dev/null
+++ b/vendor/golang.org/x/net/html/charset/testdata/UTF-16LE-BOM.html
Binary files differ
diff --git a/vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-charset.html b/vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-charset.html
new file mode 100644
index 0000000..83de433
--- /dev/null
+++ b/vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-charset.html
@@ -0,0 +1,49 @@
+<!DOCTYPE html>
+<html lang="en" >
+<head>
+ <meta charset="iso-8859-15"> <title>UTF-8 BOM vs meta charset</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta charset attribute declares a different encoding.">
+<style type='text/css'>
+.test div { width: 50px; }.test div { width: 90px; }
+</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
+</head>
+<body>
+<p class='title'>UTF-8 BOM vs meta charset</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta charset attribute declares a different encoding.</p>
+<div class="notes"><p><p>The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-024">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-038<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-038" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+ <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-content.html b/vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-content.html
new file mode 100644
index 0000000..501aac2
--- /dev/null
+++ b/vendor/golang.org/x/net/html/charset/testdata/UTF-8-BOM-vs-meta-content.html
@@ -0,0 +1,48 @@
+<!DOCTYPE html>
+<html lang="en" >
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=iso-8859-15"> <title>UTF-8 BOM vs meta content</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta content attribute declares a different encoding.">
+<style type='text/css'>
+.test div { width: 50px; }</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
+</head>
+<body>
+<p class='title'>UTF-8 BOM vs meta content</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta content attribute declares a different encoding.</p>
+<div class="notes"><p><p>The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-038">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-037<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-037" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+ <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/vendor/golang.org/x/net/html/charset/testdata/meta-charset-attribute.html b/vendor/golang.org/x/net/html/charset/testdata/meta-charset-attribute.html
new file mode 100644
index 0000000..2d7d25a
--- /dev/null
+++ b/vendor/golang.org/x/net/html/charset/testdata/meta-charset-attribute.html
@@ -0,0 +1,48 @@
+<!DOCTYPE html>
+<html lang="en" >
+<head>
+ <meta charset="iso-8859-15"> <title>meta charset attribute</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="The character encoding of the page can be set by a meta element with charset attribute.">
+<style type='text/css'>
+.test div { width: 50px; }</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
+</head>
+<body>
+<p class='title'>meta charset attribute</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">The character encoding of the page can be set by a meta element with charset attribute.</p>
+<div class="notes"><p><p>The only character encoding declaration for this HTML file is in the charset attribute of the meta element, which declares the encoding to be ISO 8859-15.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-015">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-009<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-009" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+ <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/vendor/golang.org/x/net/html/charset/testdata/meta-content-attribute.html b/vendor/golang.org/x/net/html/charset/testdata/meta-content-attribute.html
new file mode 100644
index 0000000..1c3f228
--- /dev/null
+++ b/vendor/golang.org/x/net/html/charset/testdata/meta-content-attribute.html
@@ -0,0 +1,48 @@
+<!DOCTYPE html>
+<html lang="en" >
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=iso-8859-15"> <title>meta content attribute</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="The character encoding of the page can be set by a meta element with http-equiv and content attributes.">
+<style type='text/css'>
+.test div { width: 50px; }</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
+</head>
+<body>
+<p class='title'>meta content attribute</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">The character encoding of the page can be set by a meta element with http-equiv and content attributes.</p>
+<div class="notes"><p><p>The only character encoding declaration for this HTML file is in the content attribute of the meta element, which declares the encoding to be ISO 8859-15.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-009">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-007<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-007" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+ <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+