summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/encoding/htmlindex/gen.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/encoding/htmlindex/gen.go')
-rw-r--r--vendor/golang.org/x/text/encoding/htmlindex/gen.go173
1 files changed, 173 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/encoding/htmlindex/gen.go b/vendor/golang.org/x/text/encoding/htmlindex/gen.go
new file mode 100644
index 0000000..ac6b4a7
--- /dev/null
+++ b/vendor/golang.org/x/text/encoding/htmlindex/gen.go
@@ -0,0 +1,173 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "log"
+ "strings"
+
+ "golang.org/x/text/internal/gen"
+)
+
+type group struct {
+ Encodings []struct {
+ Labels []string
+ Name string
+ }
+}
+
+func main() {
+ gen.Init()
+
+ r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
+ var groups []group
+ if err := json.NewDecoder(r).Decode(&groups); err != nil {
+ log.Fatalf("Error reading encodings.json: %v", err)
+ }
+
+ w := &bytes.Buffer{}
+ fmt.Fprintln(w, "type htmlEncoding byte")
+ fmt.Fprintln(w, "const (")
+ for i, g := range groups {
+ for _, e := range g.Encodings {
+ key := strings.ToLower(e.Name)
+ name := consts[key]
+ if name == "" {
+ log.Fatalf("No const defined for %s.", key)
+ }
+ if i == 0 {
+ fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
+ } else {
+ fmt.Fprintf(w, "%s\n", name)
+ }
+ }
+ }
+ fmt.Fprintln(w, "numEncodings")
+ fmt.Fprint(w, ")\n\n")
+
+ fmt.Fprintln(w, "var canonical = [numEncodings]string{")
+ for _, g := range groups {
+ for _, e := range g.Encodings {
+ fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
+ }
+ }
+ fmt.Fprint(w, "}\n\n")
+
+ fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
+ for _, g := range groups {
+ for _, e := range g.Encodings {
+ for _, l := range e.Labels {
+ key := strings.ToLower(e.Name)
+ name := consts[key]
+ fmt.Fprintf(w, "%q: %s,\n", l, name)
+ }
+ }
+ }
+ fmt.Fprint(w, "}\n\n")
+
+ var tags []string
+ fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
+ for _, loc := range locales {
+ tags = append(tags, loc.tag)
+ fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
+ }
+ fmt.Fprint(w, "}\n\n")
+
+ fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
+
+ gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
+}
+
+// consts maps canonical encoding name to internal constant.
+var consts = map[string]string{
+ "utf-8": "utf8",
+ "ibm866": "ibm866",
+ "iso-8859-2": "iso8859_2",
+ "iso-8859-3": "iso8859_3",
+ "iso-8859-4": "iso8859_4",
+ "iso-8859-5": "iso8859_5",
+ "iso-8859-6": "iso8859_6",
+ "iso-8859-7": "iso8859_7",
+ "iso-8859-8": "iso8859_8",
+ "iso-8859-8-i": "iso8859_8I",
+ "iso-8859-10": "iso8859_10",
+ "iso-8859-13": "iso8859_13",
+ "iso-8859-14": "iso8859_14",
+ "iso-8859-15": "iso8859_15",
+ "iso-8859-16": "iso8859_16",
+ "koi8-r": "koi8r",
+ "koi8-u": "koi8u",
+ "macintosh": "macintosh",
+ "windows-874": "windows874",
+ "windows-1250": "windows1250",
+ "windows-1251": "windows1251",
+ "windows-1252": "windows1252",
+ "windows-1253": "windows1253",
+ "windows-1254": "windows1254",
+ "windows-1255": "windows1255",
+ "windows-1256": "windows1256",
+ "windows-1257": "windows1257",
+ "windows-1258": "windows1258",
+ "x-mac-cyrillic": "macintoshCyrillic",
+ "gbk": "gbk",
+ "gb18030": "gb18030",
+ // "hz-gb-2312": "hzgb2312", // Was removed from WhatWG
+ "big5": "big5",
+ "euc-jp": "eucjp",
+ "iso-2022-jp": "iso2022jp",
+ "shift_jis": "shiftJIS",
+ "euc-kr": "euckr",
+ "replacement": "replacement",
+ "utf-16be": "utf16be",
+ "utf-16le": "utf16le",
+ "x-user-defined": "xUserDefined",
+}
+
+// locales is taken from
+// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
+var locales = []struct{ tag, name string }{
+ // The default value. Explicitly state latin to benefit from the exact
+ // script option, while still making 1252 the default encoding for languages
+ // written in Latin script.
+ {"und_Latn", "windows-1252"},
+ {"ar", "windows-1256"},
+ {"ba", "windows-1251"},
+ {"be", "windows-1251"},
+ {"bg", "windows-1251"},
+ {"cs", "windows-1250"},
+ {"el", "iso-8859-7"},
+ {"et", "windows-1257"},
+ {"fa", "windows-1256"},
+ {"he", "windows-1255"},
+ {"hr", "windows-1250"},
+ {"hu", "iso-8859-2"},
+ {"ja", "shift_jis"},
+ {"kk", "windows-1251"},
+ {"ko", "euc-kr"},
+ {"ku", "windows-1254"},
+ {"ky", "windows-1251"},
+ {"lt", "windows-1257"},
+ {"lv", "windows-1257"},
+ {"mk", "windows-1251"},
+ {"pl", "iso-8859-2"},
+ {"ru", "windows-1251"},
+ {"sah", "windows-1251"},
+ {"sk", "windows-1250"},
+ {"sl", "iso-8859-2"},
+ {"sr", "windows-1251"},
+ {"tg", "windows-1251"},
+ {"th", "windows-874"},
+ {"tr", "windows-1254"},
+ {"tt", "windows-1251"},
+ {"uk", "windows-1251"},
+ {"vi", "windows-1258"},
+ {"zh-hans", "gb18030"},
+ {"zh-hant", "big5"},
+}