From 87e820722cf02054225b47a58f97d0824118292f Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Sat, 6 Jul 2019 17:05:53 +0200 Subject: update vendor --- go.mod | 2 + vendor/golang.org/x/net/html/atom/gen.go | 712 --------- .../x/text/encoding/charmap/maketables.go | 556 ------- vendor/golang.org/x/text/encoding/htmlindex/gen.go | 173 --- .../x/text/encoding/internal/identifier/gen.go | 142 -- .../x/text/encoding/japanese/maketables.go | 161 --- .../x/text/encoding/korean/maketables.go | 143 -- .../text/encoding/simplifiedchinese/maketables.go | 161 --- .../text/encoding/traditionalchinese/maketables.go | 140 -- .../x/text/internal/language/compact/gen.go | 64 - .../x/text/internal/language/compact/gen_index.go | 113 -- .../text/internal/language/compact/gen_parents.go | 54 - vendor/golang.org/x/text/internal/language/gen.go | 1520 -------------------- .../x/text/internal/language/gen_common.go | 20 - vendor/golang.org/x/text/language/gen.go | 305 ---- vendor/modules.txt | 14 +- 16 files changed, 9 insertions(+), 4271 deletions(-) delete mode 100644 vendor/golang.org/x/net/html/atom/gen.go delete mode 100644 vendor/golang.org/x/text/encoding/charmap/maketables.go delete mode 100644 vendor/golang.org/x/text/encoding/htmlindex/gen.go delete mode 100644 vendor/golang.org/x/text/encoding/internal/identifier/gen.go delete mode 100644 vendor/golang.org/x/text/encoding/japanese/maketables.go delete mode 100644 vendor/golang.org/x/text/encoding/korean/maketables.go delete mode 100644 vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go delete mode 100644 vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go delete mode 100644 vendor/golang.org/x/text/internal/language/compact/gen.go delete mode 100644 vendor/golang.org/x/text/internal/language/compact/gen_index.go delete mode 100644 vendor/golang.org/x/text/internal/language/compact/gen_parents.go delete mode 100644 vendor/golang.org/x/text/internal/language/gen.go delete mode 100644 vendor/golang.org/x/text/internal/language/gen_common.go delete mode 100644 vendor/golang.org/x/text/language/gen.go diff --git a/go.mod b/go.mod index 60c7779..b568446 100644 --- a/go.mod +++ b/go.mod @@ -8,3 +8,5 @@ require ( golang.org/x/net v0.0.0-20190628185345-da137c7871d7 golang.org/x/text v0.3.2 // indirect ) + +go 1.13 diff --git a/vendor/golang.org/x/net/html/atom/gen.go b/vendor/golang.org/x/net/html/atom/gen.go deleted file mode 100644 index 5d05278..0000000 --- a/vendor/golang.org/x/net/html/atom/gen.go +++ /dev/null @@ -1,712 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -//go:generate go run gen.go -//go:generate go run gen.go -test - -package main - -import ( - "bytes" - "flag" - "fmt" - "go/format" - "io/ioutil" - "math/rand" - "os" - "sort" - "strings" -) - -// identifier converts s to a Go exported identifier. -// It converts "div" to "Div" and "accept-charset" to "AcceptCharset". -func identifier(s string) string { - b := make([]byte, 0, len(s)) - cap := true - for _, c := range s { - if c == '-' { - cap = true - continue - } - if cap && 'a' <= c && c <= 'z' { - c -= 'a' - 'A' - } - cap = false - b = append(b, byte(c)) - } - return string(b) -} - -var test = flag.Bool("test", false, "generate table_test.go") - -func genFile(name string, buf *bytes.Buffer) { - b, err := format.Source(buf.Bytes()) - if err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) - } - if err := ioutil.WriteFile(name, b, 0644); err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) - } -} - -func main() { - flag.Parse() - - var all []string - all = append(all, elements...) - all = append(all, attributes...) - all = append(all, eventHandlers...) - all = append(all, extra...) - sort.Strings(all) - - // uniq - lists have dups - w := 0 - for _, s := range all { - if w == 0 || all[w-1] != s { - all[w] = s - w++ - } - } - all = all[:w] - - if *test { - var buf bytes.Buffer - fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n") - fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n") - fmt.Fprintln(&buf, "package atom\n") - fmt.Fprintln(&buf, "var testAtomList = []string{") - for _, s := range all { - fmt.Fprintf(&buf, "\t%q,\n", s) - } - fmt.Fprintln(&buf, "}") - - genFile("table_test.go", &buf) - return - } - - // Find hash that minimizes table size. - var best *table - for i := 0; i < 1000000; i++ { - if best != nil && 1<<(best.k-1) < len(all) { - break - } - h := rand.Uint32() - for k := uint(0); k <= 16; k++ { - if best != nil && k >= best.k { - break - } - var t table - if t.init(h, k, all) { - best = &t - break - } - } - } - if best == nil { - fmt.Fprintf(os.Stderr, "failed to construct string table\n") - os.Exit(1) - } - - // Lay out strings, using overlaps when possible. - layout := append([]string{}, all...) - - // Remove strings that are substrings of other strings - for changed := true; changed; { - changed = false - for i, s := range layout { - if s == "" { - continue - } - for j, t := range layout { - if i != j && t != "" && strings.Contains(s, t) { - changed = true - layout[j] = "" - } - } - } - } - - // Join strings where one suffix matches another prefix. - for { - // Find best i, j, k such that layout[i][len-k:] == layout[j][:k], - // maximizing overlap length k. - besti := -1 - bestj := -1 - bestk := 0 - for i, s := range layout { - if s == "" { - continue - } - for j, t := range layout { - if i == j { - continue - } - for k := bestk + 1; k <= len(s) && k <= len(t); k++ { - if s[len(s)-k:] == t[:k] { - besti = i - bestj = j - bestk = k - } - } - } - } - if bestk > 0 { - layout[besti] += layout[bestj][bestk:] - layout[bestj] = "" - continue - } - break - } - - text := strings.Join(layout, "") - - atom := map[string]uint32{} - for _, s := range all { - off := strings.Index(text, s) - if off < 0 { - panic("lost string " + s) - } - atom[s] = uint32(off<<8 | len(s)) - } - - var buf bytes.Buffer - // Generate the Go code. - fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n") - fmt.Fprintln(&buf, "//go:generate go run gen.go\n") - fmt.Fprintln(&buf, "package atom\n\nconst (") - - // compute max len - maxLen := 0 - for _, s := range all { - if maxLen < len(s) { - maxLen = len(s) - } - fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s]) - } - fmt.Fprintln(&buf, ")\n") - - fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0) - fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen) - - fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k) - for i, s := range best.tab { - if s == "" { - continue - } - fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s) - } - fmt.Fprintf(&buf, "}\n") - datasize := (1 << best.k) * 4 - - fmt.Fprintln(&buf, "const atomText =") - textsize := len(text) - for len(text) > 60 { - fmt.Fprintf(&buf, "\t%q +\n", text[:60]) - text = text[60:] - } - fmt.Fprintf(&buf, "\t%q\n\n", text) - - genFile("table.go", &buf) - - fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize) -} - -type byLen []string - -func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) } -func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] } -func (x byLen) Len() int { return len(x) } - -// fnv computes the FNV hash with an arbitrary starting value h. -func fnv(h uint32, s string) uint32 { - for i := 0; i < len(s); i++ { - h ^= uint32(s[i]) - h *= 16777619 - } - return h -} - -// A table represents an attempt at constructing the lookup table. -// The lookup table uses cuckoo hashing, meaning that each string -// can be found in one of two positions. -type table struct { - h0 uint32 - k uint - mask uint32 - tab []string -} - -// hash returns the two hashes for s. -func (t *table) hash(s string) (h1, h2 uint32) { - h := fnv(t.h0, s) - h1 = h & t.mask - h2 = (h >> 16) & t.mask - return -} - -// init initializes the table with the given parameters. -// h0 is the initial hash value, -// k is the number of bits of hash value to use, and -// x is the list of strings to store in the table. -// init returns false if the table cannot be constructed. -func (t *table) init(h0 uint32, k uint, x []string) bool { - t.h0 = h0 - t.k = k - t.tab = make([]string, 1< len(t.tab) { - return false - } - s := t.tab[i] - h1, h2 := t.hash(s) - j := h1 + h2 - i - if t.tab[j] != "" && !t.push(j, depth+1) { - return false - } - t.tab[j] = s - return true -} - -// The lists of element names and attribute keys were taken from -// https://html.spec.whatwg.org/multipage/indices.html#index -// as of the "HTML Living Standard - Last Updated 16 April 2018" version. - -// "command", "keygen" and "menuitem" have been removed from the spec, -// but are kept here for backwards compatibility. -var elements = []string{ - "a", - "abbr", - "address", - "area", - "article", - "aside", - "audio", - "b", - "base", - "bdi", - "bdo", - "blockquote", - "body", - "br", - "button", - "canvas", - "caption", - "cite", - "code", - "col", - "colgroup", - "command", - "data", - "datalist", - "dd", - "del", - "details", - "dfn", - "dialog", - "div", - "dl", - "dt", - "em", - "embed", - "fieldset", - "figcaption", - "figure", - "footer", - "form", - "h1", - "h2", - "h3", - "h4", - "h5", - "h6", - "head", - "header", - "hgroup", - "hr", - "html", - "i", - "iframe", - "img", - "input", - "ins", - "kbd", - "keygen", - "label", - "legend", - "li", - "link", - "main", - "map", - "mark", - "menu", - "menuitem", - "meta", - "meter", - "nav", - "noscript", - "object", - "ol", - "optgroup", - "option", - "output", - "p", - "param", - "picture", - "pre", - "progress", - "q", - "rp", - "rt", - "ruby", - "s", - "samp", - "script", - "section", - "select", - "slot", - "small", - "source", - "span", - "strong", - "style", - "sub", - "summary", - "sup", - "table", - "tbody", - "td", - "template", - "textarea", - "tfoot", - "th", - "thead", - "time", - "title", - "tr", - "track", - "u", - "ul", - "var", - "video", - "wbr", -} - -// https://html.spec.whatwg.org/multipage/indices.html#attributes-3 -// -// "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup", -// "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec, -// but are kept here for backwards compatibility. -var attributes = []string{ - "abbr", - "accept", - "accept-charset", - "accesskey", - "action", - "allowfullscreen", - "allowpaymentrequest", - "allowusermedia", - "alt", - "as", - "async", - "autocomplete", - "autofocus", - "autoplay", - "challenge", - "charset", - "checked", - "cite", - "class", - "color", - "cols", - "colspan", - "command", - "content", - "contenteditable", - "contextmenu", - "controls", - "coords", - "crossorigin", - "data", - "datetime", - "default", - "defer", - "dir", - "dirname", - "disabled", - "download", - "draggable", - "dropzone", - "enctype", - "for", - "form", - "formaction", - "formenctype", - "formmethod", - "formnovalidate", - "formtarget", - "headers", - "height", - "hidden", - "high", - "href", - "hreflang", - "http-equiv", - "icon", - "id", - "inputmode", - "integrity", - "is", - "ismap", - "itemid", - "itemprop", - "itemref", - "itemscope", - "itemtype", - "keytype", - "kind", - "label", - "lang", - "list", - "loop", - "low", - "manifest", - "max", - "maxlength", - "media", - "mediagroup", - "method", - "min", - "minlength", - "multiple", - "muted", - "name", - "nomodule", - "nonce", - "novalidate", - "open", - "optimum", - "pattern", - "ping", - "placeholder", - "playsinline", - "poster", - "preload", - "radiogroup", - "readonly", - "referrerpolicy", - "rel", - "required", - "reversed", - "rows", - "rowspan", - "sandbox", - "spellcheck", - "scope", - "scoped", - "seamless", - "selected", - "shape", - "size", - "sizes", - "sortable", - "sorted", - "slot", - "span", - "spellcheck", - "src", - "srcdoc", - "srclang", - "srcset", - "start", - "step", - "style", - "tabindex", - "target", - "title", - "translate", - "type", - "typemustmatch", - "updateviacache", - "usemap", - "value", - "width", - "workertype", - "wrap", -} - -// "onautocomplete", "onautocompleteerror", "onmousewheel", -// "onshow" and "onsort" have been removed from the spec, -// but are kept here for backwards compatibility. -var eventHandlers = []string{ - "onabort", - "onautocomplete", - "onautocompleteerror", - "onauxclick", - "onafterprint", - "onbeforeprint", - "onbeforeunload", - "onblur", - "oncancel", - "oncanplay", - "oncanplaythrough", - "onchange", - "onclick", - "onclose", - "oncontextmenu", - "oncopy", - "oncuechange", - "oncut", - "ondblclick", - "ondrag", - "ondragend", - "ondragenter", - "ondragexit", - "ondragleave", - "ondragover", - "ondragstart", - "ondrop", - "ondurationchange", - "onemptied", - "onended", - "onerror", - "onfocus", - "onhashchange", - "oninput", - "oninvalid", - "onkeydown", - "onkeypress", - "onkeyup", - "onlanguagechange", - "onload", - "onloadeddata", - "onloadedmetadata", - "onloadend", - "onloadstart", - "onmessage", - "onmessageerror", - "onmousedown", - "onmouseenter", - "onmouseleave", - "onmousemove", - "onmouseout", - "onmouseover", - "onmouseup", - "onmousewheel", - "onwheel", - "onoffline", - "ononline", - "onpagehide", - "onpageshow", - "onpaste", - "onpause", - "onplay", - "onplaying", - "onpopstate", - "onprogress", - "onratechange", - "onreset", - "onresize", - "onrejectionhandled", - "onscroll", - "onsecuritypolicyviolation", - "onseeked", - "onseeking", - "onselect", - "onshow", - "onsort", - "onstalled", - "onstorage", - "onsubmit", - "onsuspend", - "ontimeupdate", - "ontoggle", - "onunhandledrejection", - "onunload", - "onvolumechange", - "onwaiting", -} - -// extra are ad-hoc values not covered by any of the lists above. -var extra = []string{ - "acronym", - "align", - "annotation", - "annotation-xml", - "applet", - "basefont", - "bgsound", - "big", - "blink", - "center", - "color", - "desc", - "face", - "font", - "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive. - "foreignobject", - "frame", - "frameset", - "image", - "isindex", - "listing", - "malignmark", - "marquee", - "math", - "mglyph", - "mi", - "mn", - "mo", - "ms", - "mtext", - "nobr", - "noembed", - "noframes", - "plaintext", - "prompt", - "public", - "rb", - "rtc", - "spacer", - "strike", - "svg", - "system", - "tt", - "xmp", -} diff --git a/vendor/golang.org/x/text/encoding/charmap/maketables.go b/vendor/golang.org/x/text/encoding/charmap/maketables.go deleted file mode 100644 index f794170..0000000 --- a/vendor/golang.org/x/text/encoding/charmap/maketables.go +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -import ( - "bufio" - "fmt" - "log" - "net/http" - "sort" - "strings" - "unicode/utf8" - - "golang.org/x/text/encoding" - "golang.org/x/text/internal/gen" -) - -const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + - "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + - ` !"#$%&'()*+,-./0123456789:;<=>?` + - `@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` + - "`abcdefghijklmnopqrstuvwxyz{|}~\u007f" - -var encodings = []struct { - name string - mib string - comment string - varName string - replacement byte - mapping string -}{ - { - "IBM Code Page 037", - "IBM037", - "", - "CodePage037", - 0x3f, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM037-2.1.2.ucm", - }, - { - "IBM Code Page 437", - "PC8CodePage437", - "", - "CodePage437", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm", - }, - { - "IBM Code Page 850", - "PC850Multilingual", - "", - "CodePage850", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm", - }, - { - "IBM Code Page 852", - "PCp852", - "", - "CodePage852", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm", - }, - { - "IBM Code Page 855", - "IBM855", - "", - "CodePage855", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm", - }, - { - "Windows Code Page 858", // PC latin1 with Euro - "IBM00858", - "", - "CodePage858", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm", - }, - { - "IBM Code Page 860", - "IBM860", - "", - "CodePage860", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM860-2.1.2.ucm", - }, - { - "IBM Code Page 862", - "PC862LatinHebrew", - "", - "CodePage862", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm", - }, - { - "IBM Code Page 863", - "IBM863", - "", - "CodePage863", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM863-2.1.2.ucm", - }, - { - "IBM Code Page 865", - "IBM865", - "", - "CodePage865", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM865-2.1.2.ucm", - }, - { - "IBM Code Page 866", - "IBM866", - "", - "CodePage866", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-ibm866.txt", - }, - { - "IBM Code Page 1047", - "IBM1047", - "", - "CodePage1047", - 0x3f, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM1047-2.1.2.ucm", - }, - { - "IBM Code Page 1140", - "IBM01140", - "", - "CodePage1140", - 0x3f, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/ibm-1140_P100-1997.ucm", - }, - { - "ISO 8859-1", - "ISOLatin1", - "", - "ISO8859_1", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm", - }, - { - "ISO 8859-2", - "ISOLatin2", - "", - "ISO8859_2", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-2.txt", - }, - { - "ISO 8859-3", - "ISOLatin3", - "", - "ISO8859_3", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-3.txt", - }, - { - "ISO 8859-4", - "ISOLatin4", - "", - "ISO8859_4", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-4.txt", - }, - { - "ISO 8859-5", - "ISOLatinCyrillic", - "", - "ISO8859_5", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-5.txt", - }, - { - "ISO 8859-6", - "ISOLatinArabic", - "", - "ISO8859_6,ISO8859_6E,ISO8859_6I", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-6.txt", - }, - { - "ISO 8859-7", - "ISOLatinGreek", - "", - "ISO8859_7", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-7.txt", - }, - { - "ISO 8859-8", - "ISOLatinHebrew", - "", - "ISO8859_8,ISO8859_8E,ISO8859_8I", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-8.txt", - }, - { - "ISO 8859-9", - "ISOLatin5", - "", - "ISO8859_9", - encoding.ASCIISub, - "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_9-1999.ucm", - }, - { - "ISO 8859-10", - "ISOLatin6", - "", - "ISO8859_10", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-10.txt", - }, - { - "ISO 8859-13", - "ISO885913", - "", - "ISO8859_13", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-13.txt", - }, - { - "ISO 8859-14", - "ISO885914", - "", - "ISO8859_14", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-14.txt", - }, - { - "ISO 8859-15", - "ISO885915", - "", - "ISO8859_15", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-15.txt", - }, - { - "ISO 8859-16", - "ISO885916", - "", - "ISO8859_16", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-iso-8859-16.txt", - }, - { - "KOI8-R", - "KOI8R", - "", - "KOI8R", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-koi8-r.txt", - }, - { - "KOI8-U", - "KOI8U", - "", - "KOI8U", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-koi8-u.txt", - }, - { - "Macintosh", - "Macintosh", - "", - "Macintosh", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-macintosh.txt", - }, - { - "Macintosh Cyrillic", - "MacintoshCyrillic", - "", - "MacintoshCyrillic", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt", - }, - { - "Windows 874", - "Windows874", - "", - "Windows874", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-874.txt", - }, - { - "Windows 1250", - "Windows1250", - "", - "Windows1250", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1250.txt", - }, - { - "Windows 1251", - "Windows1251", - "", - "Windows1251", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1251.txt", - }, - { - "Windows 1252", - "Windows1252", - "", - "Windows1252", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1252.txt", - }, - { - "Windows 1253", - "Windows1253", - "", - "Windows1253", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1253.txt", - }, - { - "Windows 1254", - "Windows1254", - "", - "Windows1254", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1254.txt", - }, - { - "Windows 1255", - "Windows1255", - "", - "Windows1255", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1255.txt", - }, - { - "Windows 1256", - "Windows1256", - "", - "Windows1256", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1256.txt", - }, - { - "Windows 1257", - "Windows1257", - "", - "Windows1257", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1257.txt", - }, - { - "Windows 1258", - "Windows1258", - "", - "Windows1258", - encoding.ASCIISub, - "http://encoding.spec.whatwg.org/index-windows-1258.txt", - }, - { - "X-User-Defined", - "XUserDefined", - "It is defined at http://encoding.spec.whatwg.org/#x-user-defined", - "XUserDefined", - encoding.ASCIISub, - ascii + - "\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" + - "\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" + - "\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" + - "\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" + - "\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" + - "\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" + - "\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" + - "\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" + - "\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" + - "\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" + - "\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" + - "\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" + - "\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" + - "\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" + - "\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" + - "\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff", - }, -} - -func getWHATWG(url string) string { - res, err := http.Get(url) - if err != nil { - log.Fatalf("%q: Get: %v", url, err) - } - defer res.Body.Close() - - mapping := make([]rune, 128) - for i := range mapping { - mapping[i] = '\ufffd' - } - - scanner := bufio.NewScanner(res.Body) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if s == "" || s[0] == '#' { - continue - } - x, y := 0, 0 - if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil { - log.Fatalf("could not parse %q", s) - } - if x < 0 || 128 <= x { - log.Fatalf("code %d is out of range", x) - } - if 0x80 <= y && y < 0xa0 { - // We diverge from the WHATWG spec by mapping control characters - // in the range [0x80, 0xa0) to U+FFFD. - continue - } - mapping[x] = rune(y) - } - return ascii + string(mapping) -} - -func getUCM(url string) string { - res, err := http.Get(url) - if err != nil { - log.Fatalf("%q: Get: %v", url, err) - } - defer res.Body.Close() - - mapping := make([]rune, 256) - for i := range mapping { - mapping[i] = '\ufffd' - } - - charsFound := 0 - scanner := bufio.NewScanner(res.Body) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if s == "" || s[0] == '#' { - continue - } - var c byte - var r rune - if _, err := fmt.Sscanf(s, ` \x%x |0`, &r, &c); err != nil { - continue - } - mapping[c] = r - charsFound++ - } - - if charsFound < 200 { - log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound) - } - - return string(mapping) -} - -func main() { - mibs := map[string]bool{} - all := []string{} - - w := gen.NewCodeWriter() - defer w.WriteGoFile("tables.go", "charmap") - - printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) } - - printf("import (\n") - printf("\t\"golang.org/x/text/encoding\"\n") - printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n") - printf(")\n\n") - for _, e := range encodings { - varNames := strings.Split(e.varName, ",") - all = append(all, varNames...) - varName := varNames[0] - switch { - case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"): - e.mapping = getWHATWG(e.mapping) - case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"): - e.mapping = getUCM(e.mapping) - } - - asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00 - if asciiSuperset { - low = 0x80 - } - lvn := 1 - if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") { - lvn = 3 - } - lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:] - printf("// %s is the %s encoding.\n", varName, e.name) - if e.comment != "" { - printf("//\n// %s\n", e.comment) - } - printf("var %s *Charmap = &%s\n\nvar %s = Charmap{\nname: %q,\n", - varName, lowerVarName, lowerVarName, e.name) - if mibs[e.mib] { - log.Fatalf("MIB type %q declared multiple times.", e.mib) - } - printf("mib: identifier.%s,\n", e.mib) - printf("asciiSuperset: %t,\n", asciiSuperset) - printf("low: 0x%02x,\n", low) - printf("replacement: 0x%02x,\n", e.replacement) - - printf("decode: [256]utf8Enc{\n") - i, backMapping := 0, map[rune]byte{} - for _, c := range e.mapping { - if _, ok := backMapping[c]; !ok && c != utf8.RuneError { - backMapping[c] = byte(i) - } - var buf [8]byte - n := utf8.EncodeRune(buf[:], c) - if n > 3 { - panic(fmt.Sprintf("rune %q (%U) is too long", c, c)) - } - printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2]) - if i%2 == 1 { - printf("\n") - } - i++ - } - printf("},\n") - - printf("encode: [256]uint32{\n") - encode := make([]uint32, 0, 256) - for c, i := range backMapping { - encode = append(encode, uint32(i)<<24|uint32(c)) - } - sort.Sort(byRune(encode)) - for len(encode) < cap(encode) { - encode = append(encode, encode[len(encode)-1]) - } - for i, enc := range encode { - printf("0x%08x,", enc) - if i%8 == 7 { - printf("\n") - } - } - printf("},\n}\n") - - // Add an estimate of the size of a single Charmap{} struct value, which - // includes two 256 elem arrays of 4 bytes and some extra fields, which - // align to 3 uint64s on 64-bit architectures. - w.Size += 2*4*256 + 3*8 - } - // TODO: add proper line breaking. - printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n")) -} - -type byRune []uint32 - -func (b byRune) Len() int { return len(b) } -func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff } -func (b byRune) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/htmlindex/gen.go b/vendor/golang.org/x/text/encoding/htmlindex/gen.go deleted file mode 100644 index ac6b4a7..0000000 --- a/vendor/golang.org/x/text/encoding/htmlindex/gen.go +++ /dev/null @@ -1,173 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -import ( - "bytes" - "encoding/json" - "fmt" - "log" - "strings" - - "golang.org/x/text/internal/gen" -) - -type group struct { - Encodings []struct { - Labels []string - Name string - } -} - -func main() { - gen.Init() - - r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json") - var groups []group - if err := json.NewDecoder(r).Decode(&groups); err != nil { - log.Fatalf("Error reading encodings.json: %v", err) - } - - w := &bytes.Buffer{} - fmt.Fprintln(w, "type htmlEncoding byte") - fmt.Fprintln(w, "const (") - for i, g := range groups { - for _, e := range g.Encodings { - key := strings.ToLower(e.Name) - name := consts[key] - if name == "" { - log.Fatalf("No const defined for %s.", key) - } - if i == 0 { - fmt.Fprintf(w, "%s htmlEncoding = iota\n", name) - } else { - fmt.Fprintf(w, "%s\n", name) - } - } - } - fmt.Fprintln(w, "numEncodings") - fmt.Fprint(w, ")\n\n") - - fmt.Fprintln(w, "var canonical = [numEncodings]string{") - for _, g := range groups { - for _, e := range g.Encodings { - fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name)) - } - } - fmt.Fprint(w, "}\n\n") - - fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{") - for _, g := range groups { - for _, e := range g.Encodings { - for _, l := range e.Labels { - key := strings.ToLower(e.Name) - name := consts[key] - fmt.Fprintf(w, "%q: %s,\n", l, name) - } - } - } - fmt.Fprint(w, "}\n\n") - - var tags []string - fmt.Fprintln(w, "var localeMap = []htmlEncoding{") - for _, loc := range locales { - tags = append(tags, loc.tag) - fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag) - } - fmt.Fprint(w, "}\n\n") - - fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " ")) - - gen.WriteGoFile("tables.go", "htmlindex", w.Bytes()) -} - -// consts maps canonical encoding name to internal constant. -var consts = map[string]string{ - "utf-8": "utf8", - "ibm866": "ibm866", - "iso-8859-2": "iso8859_2", - "iso-8859-3": "iso8859_3", - "iso-8859-4": "iso8859_4", - "iso-8859-5": "iso8859_5", - "iso-8859-6": "iso8859_6", - "iso-8859-7": "iso8859_7", - "iso-8859-8": "iso8859_8", - "iso-8859-8-i": "iso8859_8I", - "iso-8859-10": "iso8859_10", - "iso-8859-13": "iso8859_13", - "iso-8859-14": "iso8859_14", - "iso-8859-15": "iso8859_15", - "iso-8859-16": "iso8859_16", - "koi8-r": "koi8r", - "koi8-u": "koi8u", - "macintosh": "macintosh", - "windows-874": "windows874", - "windows-1250": "windows1250", - "windows-1251": "windows1251", - "windows-1252": "windows1252", - "windows-1253": "windows1253", - "windows-1254": "windows1254", - "windows-1255": "windows1255", - "windows-1256": "windows1256", - "windows-1257": "windows1257", - "windows-1258": "windows1258", - "x-mac-cyrillic": "macintoshCyrillic", - "gbk": "gbk", - "gb18030": "gb18030", - // "hz-gb-2312": "hzgb2312", // Was removed from WhatWG - "big5": "big5", - "euc-jp": "eucjp", - "iso-2022-jp": "iso2022jp", - "shift_jis": "shiftJIS", - "euc-kr": "euckr", - "replacement": "replacement", - "utf-16be": "utf16be", - "utf-16le": "utf16le", - "x-user-defined": "xUserDefined", -} - -// locales is taken from -// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm. -var locales = []struct{ tag, name string }{ - // The default value. Explicitly state latin to benefit from the exact - // script option, while still making 1252 the default encoding for languages - // written in Latin script. - {"und_Latn", "windows-1252"}, - {"ar", "windows-1256"}, - {"ba", "windows-1251"}, - {"be", "windows-1251"}, - {"bg", "windows-1251"}, - {"cs", "windows-1250"}, - {"el", "iso-8859-7"}, - {"et", "windows-1257"}, - {"fa", "windows-1256"}, - {"he", "windows-1255"}, - {"hr", "windows-1250"}, - {"hu", "iso-8859-2"}, - {"ja", "shift_jis"}, - {"kk", "windows-1251"}, - {"ko", "euc-kr"}, - {"ku", "windows-1254"}, - {"ky", "windows-1251"}, - {"lt", "windows-1257"}, - {"lv", "windows-1257"}, - {"mk", "windows-1251"}, - {"pl", "iso-8859-2"}, - {"ru", "windows-1251"}, - {"sah", "windows-1251"}, - {"sk", "windows-1250"}, - {"sl", "iso-8859-2"}, - {"sr", "windows-1251"}, - {"tg", "windows-1251"}, - {"th", "windows-874"}, - {"tr", "windows-1254"}, - {"tt", "windows-1251"}, - {"uk", "windows-1251"}, - {"vi", "windows-1258"}, - {"zh-hans", "gb18030"}, - {"zh-hant", "big5"}, -} diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/gen.go b/vendor/golang.org/x/text/encoding/internal/identifier/gen.go deleted file mode 100644 index 26cfef9..0000000 --- a/vendor/golang.org/x/text/encoding/internal/identifier/gen.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -import ( - "bytes" - "encoding/xml" - "fmt" - "io" - "log" - "strings" - - "golang.org/x/text/internal/gen" -) - -type registry struct { - XMLName xml.Name `xml:"registry"` - Updated string `xml:"updated"` - Registry []struct { - ID string `xml:"id,attr"` - Record []struct { - Name string `xml:"name"` - Xref []struct { - Type string `xml:"type,attr"` - Data string `xml:"data,attr"` - } `xml:"xref"` - Desc struct { - Data string `xml:",innerxml"` - // Any []struct { - // Data string `xml:",chardata"` - // } `xml:",any"` - // Data string `xml:",chardata"` - } `xml:"description,"` - MIB string `xml:"value"` - Alias []string `xml:"alias"` - MIME string `xml:"preferred_alias"` - } `xml:"record"` - } `xml:"registry"` -} - -func main() { - r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml") - reg := ®istry{} - if err := xml.NewDecoder(r).Decode(®); err != nil && err != io.EOF { - log.Fatalf("Error decoding charset registry: %v", err) - } - if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" { - log.Fatalf("Unexpected ID %s", reg.Registry[0].ID) - } - - w := &bytes.Buffer{} - fmt.Fprintf(w, "const (\n") - for _, rec := range reg.Registry[0].Record { - constName := "" - for _, a := range rec.Alias { - if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 { - // Some of the constant definitions have comments in them. Strip those. - constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0]) - } - } - if constName == "" { - switch rec.MIB { - case "2085": - constName = "HZGB2312" // Not listed as alias for some reason. - default: - log.Fatalf("No cs alias defined for %s.", rec.MIB) - } - } - if rec.MIME != "" { - rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME) - } - fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME) - if len(rec.Desc.Data) > 0 { - fmt.Fprint(w, "// ") - d := xml.NewDecoder(strings.NewReader(rec.Desc.Data)) - inElem := true - attr := "" - for { - t, err := d.Token() - if err != nil { - if err != io.EOF { - log.Fatal(err) - } - break - } - switch x := t.(type) { - case xml.CharData: - attr = "" // Don't need attribute info. - a := bytes.Split([]byte(x), []byte("\n")) - for i, b := range a { - if b = bytes.TrimSpace(b); len(b) != 0 { - if !inElem && i > 0 { - fmt.Fprint(w, "\n// ") - } - inElem = false - fmt.Fprintf(w, "%s ", string(b)) - } - } - case xml.StartElement: - if x.Name.Local == "xref" { - inElem = true - use := false - for _, a := range x.Attr { - if a.Name.Local == "type" { - use = use || a.Value != "person" - } - if a.Name.Local == "data" && use { - // Patch up URLs to use https. From some links, the - // https version is different from the http one. - s := a.Value - s = strings.Replace(s, "http://", "https://", -1) - s = strings.Replace(s, "/unicode/", "/", -1) - attr = s + " " - } - } - } - case xml.EndElement: - inElem = false - fmt.Fprint(w, attr) - } - } - fmt.Fprint(w, "\n") - } - for _, x := range rec.Xref { - switch x.Type { - case "rfc": - fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data)) - case "uri": - fmt.Fprintf(w, "// Reference: %s\n", x.Data) - } - } - fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB) - fmt.Fprintln(w) - } - fmt.Fprintln(w, ")") - - gen.WriteGoFile("mib.go", "identifier", w.Bytes()) -} diff --git a/vendor/golang.org/x/text/encoding/japanese/maketables.go b/vendor/golang.org/x/text/encoding/japanese/maketables.go deleted file mode 100644 index 023957a..0000000 --- a/vendor/golang.org/x/text/encoding/japanese/maketables.go +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -// This program generates tables.go: -// go run maketables.go | gofmt > tables.go - -// TODO: Emoji extensions? -// https://www.unicode.org/faq/emoji_dingbats.html -// https://www.unicode.org/Public/UNIDATA/EmojiSources.txt - -import ( - "bufio" - "fmt" - "log" - "net/http" - "sort" - "strings" -) - -type entry struct { - jisCode, table int -} - -func main() { - fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") - fmt.Printf("// Package japanese provides Japanese encodings such as EUC-JP and Shift JIS.\n") - fmt.Printf(`package japanese // import "golang.org/x/text/encoding/japanese"` + "\n\n") - - reverse := [65536]entry{} - for i := range reverse { - reverse[i].table = -1 - } - - tables := []struct { - url string - name string - }{ - {"http://encoding.spec.whatwg.org/index-jis0208.txt", "0208"}, - {"http://encoding.spec.whatwg.org/index-jis0212.txt", "0212"}, - } - for i, table := range tables { - res, err := http.Get(table.url) - if err != nil { - log.Fatalf("%q: Get: %v", table.url, err) - } - defer res.Body.Close() - - mapping := [65536]uint16{} - - scanner := bufio.NewScanner(res.Body) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if s == "" || s[0] == '#' { - continue - } - x, y := 0, uint16(0) - if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { - log.Fatalf("%q: could not parse %q", table.url, s) - } - if x < 0 || 120*94 <= x { - log.Fatalf("%q: JIS code %d is out of range", table.url, x) - } - mapping[x] = y - if reverse[y].table == -1 { - reverse[y] = entry{jisCode: x, table: i} - } - } - if err := scanner.Err(); err != nil { - log.Fatalf("%q: scanner error: %v", table.url, err) - } - - fmt.Printf("// jis%sDecode is the decoding table from JIS %s code to Unicode.\n// It is defined at %s\n", - table.name, table.name, table.url) - fmt.Printf("var jis%sDecode = [...]uint16{\n", table.name) - for i, m := range mapping { - if m != 0 { - fmt.Printf("\t%d: 0x%04X,\n", i, m) - } - } - fmt.Printf("}\n\n") - } - - // Any run of at least separation continuous zero entries in the reverse map will - // be a separate encode table. - const separation = 1024 - - intervals := []interval(nil) - low, high := -1, -1 - for i, v := range reverse { - if v.table == -1 { - continue - } - if low < 0 { - low = i - } else if i-high >= separation { - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - low = i - } - high = i + 1 - } - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - sort.Sort(byDecreasingLength(intervals)) - - fmt.Printf("const (\n") - fmt.Printf("\tjis0208 = 1\n") - fmt.Printf("\tjis0212 = 2\n") - fmt.Printf("\tcodeMask = 0x7f\n") - fmt.Printf("\tcodeShift = 7\n") - fmt.Printf("\ttableShift = 14\n") - fmt.Printf(")\n\n") - - fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) - fmt.Printf("// encodeX are the encoding tables from Unicode to JIS code,\n") - fmt.Printf("// sorted by decreasing length.\n") - for i, v := range intervals { - fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high) - } - fmt.Printf("//\n") - fmt.Printf("// The high two bits of the value record whether the JIS code comes from the\n") - fmt.Printf("// JIS0208 table (high bits == 1) or the JIS0212 table (high bits == 2).\n") - fmt.Printf("// The low 14 bits are two 7-bit unsigned integers j1 and j2 that form the\n") - fmt.Printf("// JIS code (94*j1 + j2) within that table.\n") - fmt.Printf("\n") - - for i, v := range intervals { - fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) - fmt.Printf("var encode%d = [...]uint16{\n", i) - for j := v.low; j < v.high; j++ { - x := reverse[j] - if x.table == -1 { - continue - } - fmt.Printf("\t%d - %d: jis%s<<14 | 0x%02X<<7 | 0x%02X,\n", - j, v.low, tables[x.table].name, x.jisCode/94, x.jisCode%94) - } - fmt.Printf("}\n\n") - } -} - -// interval is a half-open interval [low, high). -type interval struct { - low, high int -} - -func (i interval) len() int { return i.high - i.low } - -// byDecreasingLength sorts intervals by decreasing length. -type byDecreasingLength []interval - -func (b byDecreasingLength) Len() int { return len(b) } -func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } -func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/korean/maketables.go b/vendor/golang.org/x/text/encoding/korean/maketables.go deleted file mode 100644 index c84034f..0000000 --- a/vendor/golang.org/x/text/encoding/korean/maketables.go +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -// This program generates tables.go: -// go run maketables.go | gofmt > tables.go - -import ( - "bufio" - "fmt" - "log" - "net/http" - "sort" - "strings" -) - -func main() { - fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") - fmt.Printf("// Package korean provides Korean encodings such as EUC-KR.\n") - fmt.Printf(`package korean // import "golang.org/x/text/encoding/korean"` + "\n\n") - - res, err := http.Get("http://encoding.spec.whatwg.org/index-euc-kr.txt") - if err != nil { - log.Fatalf("Get: %v", err) - } - defer res.Body.Close() - - mapping := [65536]uint16{} - reverse := [65536]uint16{} - - scanner := bufio.NewScanner(res.Body) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if s == "" || s[0] == '#' { - continue - } - x, y := uint16(0), uint16(0) - if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { - log.Fatalf("could not parse %q", s) - } - if x < 0 || 178*(0xc7-0x81)+(0xfe-0xc7)*94+(0xff-0xa1) <= x { - log.Fatalf("EUC-KR code %d is out of range", x) - } - mapping[x] = y - if reverse[y] == 0 { - c0, c1 := uint16(0), uint16(0) - if x < 178*(0xc7-0x81) { - c0 = uint16(x/178) + 0x81 - c1 = uint16(x % 178) - switch { - case c1 < 1*26: - c1 += 0x41 - case c1 < 2*26: - c1 += 0x47 - default: - c1 += 0x4d - } - } else { - x -= 178 * (0xc7 - 0x81) - c0 = uint16(x/94) + 0xc7 - c1 = uint16(x%94) + 0xa1 - } - reverse[y] = c0<<8 | c1 - } - } - if err := scanner.Err(); err != nil { - log.Fatalf("scanner error: %v", err) - } - - fmt.Printf("// decode is the decoding table from EUC-KR code to Unicode.\n") - fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-euc-kr.txt\n") - fmt.Printf("var decode = [...]uint16{\n") - for i, v := range mapping { - if v != 0 { - fmt.Printf("\t%d: 0x%04X,\n", i, v) - } - } - fmt.Printf("}\n\n") - - // Any run of at least separation continuous zero entries in the reverse map will - // be a separate encode table. - const separation = 1024 - - intervals := []interval(nil) - low, high := -1, -1 - for i, v := range reverse { - if v == 0 { - continue - } - if low < 0 { - low = i - } else if i-high >= separation { - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - low = i - } - high = i + 1 - } - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - sort.Sort(byDecreasingLength(intervals)) - - fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) - fmt.Printf("// encodeX are the encoding tables from Unicode to EUC-KR code,\n") - fmt.Printf("// sorted by decreasing length.\n") - for i, v := range intervals { - fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high) - } - fmt.Printf("\n") - - for i, v := range intervals { - fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) - fmt.Printf("var encode%d = [...]uint16{\n", i) - for j := v.low; j < v.high; j++ { - x := reverse[j] - if x == 0 { - continue - } - fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x) - } - fmt.Printf("}\n\n") - } -} - -// interval is a half-open interval [low, high). -type interval struct { - low, high int -} - -func (i interval) len() int { return i.high - i.low } - -// byDecreasingLength sorts intervals by decreasing length. -type byDecreasingLength []interval - -func (b byDecreasingLength) Len() int { return len(b) } -func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } -func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go b/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go deleted file mode 100644 index 55016c7..0000000 --- a/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -// This program generates tables.go: -// go run maketables.go | gofmt > tables.go - -import ( - "bufio" - "fmt" - "log" - "net/http" - "sort" - "strings" -) - -func main() { - fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") - fmt.Printf("// Package simplifiedchinese provides Simplified Chinese encodings such as GBK.\n") - fmt.Printf(`package simplifiedchinese // import "golang.org/x/text/encoding/simplifiedchinese"` + "\n\n") - - printGB18030() - printGBK() -} - -func printGB18030() { - res, err := http.Get("http://encoding.spec.whatwg.org/index-gb18030.txt") - if err != nil { - log.Fatalf("Get: %v", err) - } - defer res.Body.Close() - - fmt.Printf("// gb18030 is the table from http://encoding.spec.whatwg.org/index-gb18030.txt\n") - fmt.Printf("var gb18030 = [...][2]uint16{\n") - scanner := bufio.NewScanner(res.Body) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if s == "" || s[0] == '#' { - continue - } - x, y := uint32(0), uint32(0) - if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { - log.Fatalf("could not parse %q", s) - } - if x < 0x10000 && y < 0x10000 { - fmt.Printf("\t{0x%04x, 0x%04x},\n", x, y) - } - } - fmt.Printf("}\n\n") -} - -func printGBK() { - res, err := http.Get("http://encoding.spec.whatwg.org/index-gbk.txt") - if err != nil { - log.Fatalf("Get: %v", err) - } - defer res.Body.Close() - - mapping := [65536]uint16{} - reverse := [65536]uint16{} - - scanner := bufio.NewScanner(res.Body) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if s == "" || s[0] == '#' { - continue - } - x, y := uint16(0), uint16(0) - if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { - log.Fatalf("could not parse %q", s) - } - if x < 0 || 126*190 <= x { - log.Fatalf("GBK code %d is out of range", x) - } - mapping[x] = y - if reverse[y] == 0 { - c0, c1 := x/190, x%190 - if c1 >= 0x3f { - c1++ - } - reverse[y] = (0x81+c0)<<8 | (0x40 + c1) - } - } - if err := scanner.Err(); err != nil { - log.Fatalf("scanner error: %v", err) - } - - fmt.Printf("// decode is the decoding table from GBK code to Unicode.\n") - fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-gbk.txt\n") - fmt.Printf("var decode = [...]uint16{\n") - for i, v := range mapping { - if v != 0 { - fmt.Printf("\t%d: 0x%04X,\n", i, v) - } - } - fmt.Printf("}\n\n") - - // Any run of at least separation continuous zero entries in the reverse map will - // be a separate encode table. - const separation = 1024 - - intervals := []interval(nil) - low, high := -1, -1 - for i, v := range reverse { - if v == 0 { - continue - } - if low < 0 { - low = i - } else if i-high >= separation { - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - low = i - } - high = i + 1 - } - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - sort.Sort(byDecreasingLength(intervals)) - - fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) - fmt.Printf("// encodeX are the encoding tables from Unicode to GBK code,\n") - fmt.Printf("// sorted by decreasing length.\n") - for i, v := range intervals { - fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high) - } - fmt.Printf("\n") - - for i, v := range intervals { - fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) - fmt.Printf("var encode%d = [...]uint16{\n", i) - for j := v.low; j < v.high; j++ { - x := reverse[j] - if x == 0 { - continue - } - fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x) - } - fmt.Printf("}\n\n") - } -} - -// interval is a half-open interval [low, high). -type interval struct { - low, high int -} - -func (i interval) len() int { return i.high - i.low } - -// byDecreasingLength sorts intervals by decreasing length. -type byDecreasingLength []interval - -func (b byDecreasingLength) Len() int { return len(b) } -func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } -func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go b/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go deleted file mode 100644 index cf7fdb3..0000000 --- a/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -// This program generates tables.go: -// go run maketables.go | gofmt > tables.go - -import ( - "bufio" - "fmt" - "log" - "net/http" - "sort" - "strings" -) - -func main() { - fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n") - fmt.Printf("// Package traditionalchinese provides Traditional Chinese encodings such as Big5.\n") - fmt.Printf(`package traditionalchinese // import "golang.org/x/text/encoding/traditionalchinese"` + "\n\n") - - res, err := http.Get("http://encoding.spec.whatwg.org/index-big5.txt") - if err != nil { - log.Fatalf("Get: %v", err) - } - defer res.Body.Close() - - mapping := [65536]uint32{} - reverse := [65536 * 4]uint16{} - - scanner := bufio.NewScanner(res.Body) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if s == "" || s[0] == '#' { - continue - } - x, y := uint16(0), uint32(0) - if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil { - log.Fatalf("could not parse %q", s) - } - if x < 0 || 126*157 <= x { - log.Fatalf("Big5 code %d is out of range", x) - } - mapping[x] = y - - // The WHATWG spec http://encoding.spec.whatwg.org/#indexes says that - // "The index pointer for code point in index is the first pointer - // corresponding to code point in index", which would normally mean - // that the code below should be guarded by "if reverse[y] == 0", but - // last instead of first seems to match the behavior of - // "iconv -f UTF-8 -t BIG5". For example, U+8005 者 occurs twice in - // http://encoding.spec.whatwg.org/index-big5.txt, as index 2148 - // (encoded as "\x8e\xcd") and index 6543 (encoded as "\xaa\xcc") - // and "echo 者 | iconv -f UTF-8 -t BIG5 | xxd" gives "\xaa\xcc". - c0, c1 := x/157, x%157 - if c1 < 0x3f { - c1 += 0x40 - } else { - c1 += 0x62 - } - reverse[y] = (0x81+c0)<<8 | c1 - } - if err := scanner.Err(); err != nil { - log.Fatalf("scanner error: %v", err) - } - - fmt.Printf("// decode is the decoding table from Big5 code to Unicode.\n") - fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-big5.txt\n") - fmt.Printf("var decode = [...]uint32{\n") - for i, v := range mapping { - if v != 0 { - fmt.Printf("\t%d: 0x%08X,\n", i, v) - } - } - fmt.Printf("}\n\n") - - // Any run of at least separation continuous zero entries in the reverse map will - // be a separate encode table. - const separation = 1024 - - intervals := []interval(nil) - low, high := -1, -1 - for i, v := range reverse { - if v == 0 { - continue - } - if low < 0 { - low = i - } else if i-high >= separation { - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - low = i - } - high = i + 1 - } - if high >= 0 { - intervals = append(intervals, interval{low, high}) - } - sort.Sort(byDecreasingLength(intervals)) - - fmt.Printf("const numEncodeTables = %d\n\n", len(intervals)) - fmt.Printf("// encodeX are the encoding tables from Unicode to Big5 code,\n") - fmt.Printf("// sorted by decreasing length.\n") - for i, v := range intervals { - fmt.Printf("// encode%d: %5d entries for runes in [%6d, %6d).\n", i, v.len(), v.low, v.high) - } - fmt.Printf("\n") - - for i, v := range intervals { - fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high) - fmt.Printf("var encode%d = [...]uint16{\n", i) - for j := v.low; j < v.high; j++ { - x := reverse[j] - if x == 0 { - continue - } - fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x) - } - fmt.Printf("}\n\n") - } -} - -// interval is a half-open interval [low, high). -type interval struct { - low, high int -} - -func (i interval) len() int { return i.high - i.low } - -// byDecreasingLength sorts intervals by decreasing length. -type byDecreasingLength []interval - -func (b byDecreasingLength) Len() int { return len(b) } -func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() } -func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] } diff --git a/vendor/golang.org/x/text/internal/language/compact/gen.go b/vendor/golang.org/x/text/internal/language/compact/gen.go deleted file mode 100644 index 0c36a05..0000000 --- a/vendor/golang.org/x/text/internal/language/compact/gen.go +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -// Language tag table generator. -// Data read from the web. - -package main - -import ( - "flag" - "fmt" - "log" - - "golang.org/x/text/internal/gen" - "golang.org/x/text/unicode/cldr" -) - -var ( - test = flag.Bool("test", - false, - "test existing tables; can be used to compare web data with package data.") - outputFile = flag.String("output", - "tables.go", - "output file for generated tables") -) - -func main() { - gen.Init() - - w := gen.NewCodeWriter() - defer w.WriteGoFile("tables.go", "compact") - - fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`) - - b := newBuilder(w) - gen.WriteCLDRVersion(w) - - b.writeCompactIndex() -} - -type builder struct { - w *gen.CodeWriter - data *cldr.CLDR - supp *cldr.SupplementalData -} - -func newBuilder(w *gen.CodeWriter) *builder { - r := gen.OpenCLDRCoreZip() - defer r.Close() - d := &cldr.Decoder{} - data, err := d.DecodeZip(r) - if err != nil { - log.Fatal(err) - } - b := builder{ - w: w, - data: data, - supp: data.Supplemental(), - } - return &b -} diff --git a/vendor/golang.org/x/text/internal/language/compact/gen_index.go b/vendor/golang.org/x/text/internal/language/compact/gen_index.go deleted file mode 100644 index 136cefa..0000000 --- a/vendor/golang.org/x/text/internal/language/compact/gen_index.go +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -// This file generates derivative tables based on the language package itself. - -import ( - "fmt" - "log" - "sort" - "strings" - - "golang.org/x/text/internal/language" -) - -// Compact indices: -// Note -va-X variants only apply to localization variants. -// BCP variants only ever apply to language. -// The only ambiguity between tags is with regions. - -func (b *builder) writeCompactIndex() { - // Collect all language tags for which we have any data in CLDR. - m := map[language.Tag]bool{} - for _, lang := range b.data.Locales() { - // We include all locales unconditionally to be consistent with en_US. - // We want en_US, even though it has no data associated with it. - - // TODO: put any of the languages for which no data exists at the end - // of the index. This allows all components based on ICU to use that - // as the cutoff point. - // if x := data.RawLDML(lang); false || - // x.LocaleDisplayNames != nil || - // x.Characters != nil || - // x.Delimiters != nil || - // x.Measurement != nil || - // x.Dates != nil || - // x.Numbers != nil || - // x.Units != nil || - // x.ListPatterns != nil || - // x.Collations != nil || - // x.Segmentations != nil || - // x.Rbnf != nil || - // x.Annotations != nil || - // x.Metadata != nil { - - // TODO: support POSIX natively, albeit non-standard. - tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1)) - m[tag] = true - // } - } - - // TODO: plural rules are also defined for the deprecated tags: - // iw mo sh tl - // Consider removing these as compact tags. - - // Include locales for plural rules, which uses a different structure. - for _, plurals := range b.supp.Plurals { - for _, rules := range plurals.PluralRules { - for _, lang := range strings.Split(rules.Locales, " ") { - m[language.Make(lang)] = true - } - } - } - - var coreTags []language.CompactCoreInfo - var special []string - - for t := range m { - if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" { - log.Fatalf("Unexpected extension %v in %v", x, t) - } - if len(t.Variants()) == 0 && len(t.Extensions()) == 0 { - cci, ok := language.GetCompactCore(t) - if !ok { - log.Fatalf("Locale for non-basic language %q", t) - } - coreTags = append(coreTags, cci) - } else { - special = append(special, t.String()) - } - } - - w := b.w - - sort.Slice(coreTags, func(i, j int) bool { return coreTags[i] < coreTags[j] }) - sort.Strings(special) - - w.WriteComment(` - NumCompactTags is the number of common tags. The maximum tag is - NumCompactTags-1.`) - w.WriteConst("NumCompactTags", len(m)) - - fmt.Fprintln(w, "const (") - for i, t := range coreTags { - fmt.Fprintf(w, "%s ID = %d\n", ident(t.Tag().String()), i) - } - for i, t := range special { - fmt.Fprintf(w, "%s ID = %d\n", ident(t), i+len(coreTags)) - } - fmt.Fprintln(w, ")") - - w.WriteVar("coreTags", coreTags) - - w.WriteConst("specialTagsStr", strings.Join(special, " ")) -} - -func ident(s string) string { - return strings.Replace(s, "-", "", -1) + "Index" -} diff --git a/vendor/golang.org/x/text/internal/language/compact/gen_parents.go b/vendor/golang.org/x/text/internal/language/compact/gen_parents.go deleted file mode 100644 index 9543d58..0000000 --- a/vendor/golang.org/x/text/internal/language/compact/gen_parents.go +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -package main - -import ( - "log" - - "golang.org/x/text/internal/gen" - "golang.org/x/text/internal/language" - "golang.org/x/text/internal/language/compact" - "golang.org/x/text/unicode/cldr" -) - -func main() { - r := gen.OpenCLDRCoreZip() - defer r.Close() - - d := &cldr.Decoder{} - data, err := d.DecodeZip(r) - if err != nil { - log.Fatalf("DecodeZip: %v", err) - } - - w := gen.NewCodeWriter() - defer w.WriteGoFile("parents.go", "compact") - - // Create parents table. - type ID uint16 - parents := make([]ID, compact.NumCompactTags) - for _, loc := range data.Locales() { - tag := language.MustParse(loc) - index, ok := compact.FromTag(tag) - if !ok { - continue - } - parentIndex := compact.ID(0) // und - for p := tag.Parent(); p != language.Und; p = p.Parent() { - if x, ok := compact.FromTag(p); ok { - parentIndex = x - break - } - } - parents[index] = ID(parentIndex) - } - - w.WriteComment(` - parents maps a compact index of a tag to the compact index of the parent of - this tag.`) - w.WriteVar("parents", parents) -} diff --git a/vendor/golang.org/x/text/internal/language/gen.go b/vendor/golang.org/x/text/internal/language/gen.go deleted file mode 100644 index cdcc7fe..0000000 --- a/vendor/golang.org/x/text/internal/language/gen.go +++ /dev/null @@ -1,1520 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build ignore - -// Language tag table generator. -// Data read from the web. - -package main - -import ( - "bufio" - "flag" - "fmt" - "io" - "io/ioutil" - "log" - "math" - "reflect" - "regexp" - "sort" - "strconv" - "strings" - - "golang.org/x/text/internal/gen" - "golang.org/x/text/internal/tag" - "golang.org/x/text/unicode/cldr" -) - -var ( - test = flag.Bool("test", - false, - "test existing tables; can be used to compare web data with package data.") - outputFile = flag.String("output", - "tables.go", - "output file for generated tables") -) - -var comment = []string{ - ` -lang holds an alphabetically sorted list of ISO-639 language identifiers. -All entries are 4 bytes. The index of the identifier (divided by 4) is the language tag. -For 2-byte language identifiers, the two successive bytes have the following meaning: - - if the first letter of the 2- and 3-letter ISO codes are the same: - the second and third letter of the 3-letter ISO code. - - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3. -For 3-byte language identifiers the 4th byte is 0.`, - ` -langNoIndex is a bit vector of all 3-letter language codes that are not used as an index -in lookup tables. The language ids for these language codes are derived directly -from the letters and are not consecutive.`, - ` -altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives -to 2-letter language codes that cannot be derived using the method described above. -Each 3-letter code is followed by its 1-byte langID.`, - ` -altLangIndex is used to convert indexes in altLangISO3 to langIDs.`, - ` -AliasMap maps langIDs to their suggested replacements.`, - ` -script is an alphabetically sorted list of ISO 15924 codes. The index -of the script in the string, divided by 4, is the internal scriptID.`, - ` -isoRegionOffset needs to be added to the index of regionISO to obtain the regionID -for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for -the UN.M49 codes used for groups.)`, - ` -regionISO holds a list of alphabetically sorted 2-letter ISO region codes. -Each 2-letter codes is followed by two bytes with the following meaning: - - [A-Z}{2}: the first letter of the 2-letter code plus these two - letters form the 3-letter ISO code. - - 0, n: index into altRegionISO3.`, - ` -regionTypes defines the status of a region for various standards.`, - ` -m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are -codes indicating collections of regions.`, - ` -m49Index gives indexes into fromM49 based on the three most significant bits -of a 10-bit UN.M49 code. To search an UN.M49 code in fromM49, search in - fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]] -for an entry where the first 7 bits match the 7 lsb of the UN.M49 code. -The region code is stored in the 9 lsb of the indexed value.`, - ` -fromM49 contains entries to map UN.M49 codes to regions. See m49Index for details.`, - ` -altRegionISO3 holds a list of 3-letter region codes that cannot be -mapped to 2-letter codes using the default algorithm. This is a short list.`, - ` -altRegionIDs holds a list of regionIDs the positions of which match those -of the 3-letter ISO codes in altRegionISO3.`, - ` -variantNumSpecialized is the number of specialized variants in variants.`, - ` -suppressScript is an index from langID to the dominant script for that language, -if it exists. If a script is given, it should be suppressed from the language tag.`, - ` -likelyLang is a lookup table, indexed by langID, for the most likely -scripts and regions given incomplete information. If more entries exist for a -given language, region and script are the index and size respectively -of the list in likelyLangList.`, - ` -likelyLangList holds lists info associated with likelyLang.`, - ` -likelyRegion is a lookup table, indexed by regionID, for the most likely -languages and scripts given incomplete information. If more entries exist -for a given regionID, lang and script are the index and size respectively -of the list in likelyRegionList. -TODO: exclude containers and user-definable regions from the list.`, - ` -likelyRegionList holds lists info associated with likelyRegion.`, - ` -likelyScript is a lookup table, indexed by scriptID, for the most likely -languages and regions given a script.`, - ` -nRegionGroups is the number of region groups.`, - ` -regionInclusion maps region identifiers to sets of regions in regionInclusionBits, -where each set holds all groupings that are directly connected in a region -containment graph.`, - ` -regionInclusionBits is an array of bit vectors where every vector represents -a set of region groupings. These sets are used to compute the distance -between two regions for the purpose of language matching.`, - ` -regionInclusionNext marks, for each entry in regionInclusionBits, the set of -all groups that are reachable from the groups set in the respective entry.`, -} - -// TODO: consider changing some of these structures to tries. This can reduce -// memory, but may increase the need for memory allocations. This could be -// mitigated if we can piggyback on language tags for common cases. - -func failOnError(e error) { - if e != nil { - log.Panic(e) - } -} - -type setType int - -const ( - Indexed setType = 1 + iota // all elements must be of same size - Linear -) - -type stringSet struct { - s []string - sorted, frozen bool - - // We often need to update values after the creation of an index is completed. - // We include a convenience map for keeping track of this. - update map[string]string - typ setType // used for checking. -} - -func (ss *stringSet) clone() stringSet { - c := *ss - c.s = append([]string(nil), c.s...) - return c -} - -func (ss *stringSet) setType(t setType) { - if ss.typ != t && ss.typ != 0 { - log.Panicf("type %d cannot be assigned as it was already %d", t, ss.typ) - } -} - -// parse parses a whitespace-separated string and initializes ss with its -// components. -func (ss *stringSet) parse(s string) { - scan := bufio.NewScanner(strings.NewReader(s)) - scan.Split(bufio.ScanWords) - for scan.Scan() { - ss.add(scan.Text()) - } -} - -func (ss *stringSet) assertChangeable() { - if ss.frozen { - log.Panic("attempt to modify a frozen stringSet") - } -} - -func (ss *stringSet) add(s string) { - ss.assertChangeable() - ss.s = append(ss.s, s) - ss.sorted = ss.frozen -} - -func (ss *stringSet) freeze() { - ss.compact() - ss.frozen = true -} - -func (ss *stringSet) compact() { - if ss.sorted { - return - } - a := ss.s - sort.Strings(a) - k := 0 - for i := 1; i < len(a); i++ { - if a[k] != a[i] { - a[k+1] = a[i] - k++ - } - } - ss.s = a[:k+1] - ss.sorted = ss.frozen -} - -type funcSorter struct { - fn func(a, b string) bool - sort.StringSlice -} - -func (s funcSorter) Less(i, j int) bool { - return s.fn(s.StringSlice[i], s.StringSlice[j]) -} - -func (ss *stringSet) sortFunc(f func(a, b string) bool) { - ss.compact() - sort.Sort(funcSorter{f, sort.StringSlice(ss.s)}) -} - -func (ss *stringSet) remove(s string) { - ss.assertChangeable() - if i, ok := ss.find(s); ok { - copy(ss.s[i:], ss.s[i+1:]) - ss.s = ss.s[:len(ss.s)-1] - } -} - -func (ss *stringSet) replace(ol, nu string) { - ss.s[ss.index(ol)] = nu - ss.sorted = ss.frozen -} - -func (ss *stringSet) index(s string) int { - ss.setType(Indexed) - i, ok := ss.find(s) - if !ok { - if i < len(ss.s) { - log.Panicf("find: item %q is not in list. Closest match is %q.", s, ss.s[i]) - } - log.Panicf("find: item %q is not in list", s) - - } - return i -} - -func (ss *stringSet) find(s string) (int, bool) { - ss.compact() - i := sort.SearchStrings(ss.s, s) - return i, i != len(ss.s) && ss.s[i] == s -} - -func (ss *stringSet) slice() []string { - ss.compact() - return ss.s -} - -func (ss *stringSet) updateLater(v, key string) { - if ss.update == nil { - ss.update = map[string]string{} - } - ss.update[v] = key -} - -// join joins the string and ensures that all entries are of the same length. -func (ss *stringSet) join() string { - ss.setType(Indexed) - n := len(ss.s[0]) - for _, s := range ss.s { - if len(s) != n { - log.Panicf("join: not all entries are of the same length: %q", s) - } - } - ss.s = append(ss.s, strings.Repeat("\xff", n)) - return strings.Join(ss.s, "") -} - -// ianaEntry holds information for an entry in the IANA Language Subtag Repository. -// All types use the same entry. -// See http://tools.ietf.org/html/bcp47#section-5.1 for a description of the various -// fields. -type ianaEntry struct { - typ string - description []string - scope string - added string - preferred string - deprecated string - suppressScript string - macro string - prefix []string -} - -type builder struct { - w *gen.CodeWriter - hw io.Writer // MultiWriter for w and w.Hash - data *cldr.CLDR - supp *cldr.SupplementalData - - // indices - locale stringSet // common locales - lang stringSet // canonical language ids (2 or 3 letter ISO codes) with data - langNoIndex stringSet // 3-letter ISO codes with no associated data - script stringSet // 4-letter ISO codes - region stringSet // 2-letter ISO or 3-digit UN M49 codes - variant stringSet // 4-8-alphanumeric variant code. - - // Region codes that are groups with their corresponding group IDs. - groups map[int]index - - // langInfo - registry map[string]*ianaEntry -} - -type index uint - -func newBuilder(w *gen.CodeWriter) *builder { - r := gen.OpenCLDRCoreZip() - defer r.Close() - d := &cldr.Decoder{} - data, err := d.DecodeZip(r) - failOnError(err) - b := builder{ - w: w, - hw: io.MultiWriter(w, w.Hash), - data: data, - supp: data.Supplemental(), - } - b.parseRegistry() - return &b -} - -func (b *builder) parseRegistry() { - r := gen.OpenIANAFile("assignments/language-subtag-registry") - defer r.Close() - b.registry = make(map[string]*ianaEntry) - - scan := bufio.NewScanner(r) - scan.Split(bufio.ScanWords) - var record *ianaEntry - for more := scan.Scan(); more; { - key := scan.Text() - more = scan.Scan() - value := scan.Text() - switch key { - case "Type:": - record = &ianaEntry{typ: value} - case "Subtag:", "Tag:": - if s := strings.SplitN(value, "..", 2); len(s) > 1 { - for a := s[0]; a <= s[1]; a = inc(a) { - b.addToRegistry(a, record) - } - } else { - b.addToRegistry(value, record) - } - case "Suppress-Script:": - record.suppressScript = value - case "Added:": - record.added = value - case "Deprecated:": - record.deprecated = value - case "Macrolanguage:": - record.macro = value - case "Preferred-Value:": - record.preferred = value - case "Prefix:": - record.prefix = append(record.prefix, value) - case "Scope:": - record.scope = value - case "Description:": - buf := []byte(value) - for more = scan.Scan(); more; more = scan.Scan() { - b := scan.Bytes() - if b[0] == '%' || b[len(b)-1] == ':' { - break - } - buf = append(buf, ' ') - buf = append(buf, b...) - } - record.description = append(record.description, string(buf)) - continue - default: - continue - } - more = scan.Scan() - } - if scan.Err() != nil { - log.Panic(scan.Err()) - } -} - -func (b *builder) addToRegistry(key string, entry *ianaEntry) { - if info, ok := b.registry[key]; ok { - if info.typ != "language" || entry.typ != "extlang" { - log.Fatalf("parseRegistry: tag %q already exists", key) - } - } else { - b.registry[key] = entry - } -} - -var commentIndex = make(map[string]string) - -func init() { - for _, s := range comment { - key := strings.TrimSpace(strings.SplitN(s, " ", 2)[0]) - commentIndex[key] = s - } -} - -func (b *builder) comment(name string) { - if s := commentIndex[name]; len(s) > 0 { - b.w.WriteComment(s) - } else { - fmt.Fprintln(b.w) - } -} - -func (b *builder) pf(f string, x ...interface{}) { - fmt.Fprintf(b.hw, f, x...) - fmt.Fprint(b.hw, "\n") -} - -func (b *builder) p(x ...interface{}) { - fmt.Fprintln(b.hw, x...) -} - -func (b *builder) addSize(s int) { - b.w.Size += s - b.pf("// Size: %d bytes", s) -} - -func (b *builder) writeConst(name string, x interface{}) { - b.comment(name) - b.w.WriteConst(name, x) -} - -// writeConsts computes f(v) for all v in values and writes the results -// as constants named _v to a single constant block. -func (b *builder) writeConsts(f func(string) int, values ...string) { - b.pf("const (") - for _, v := range values { - b.pf("\t_%s = %v", v, f(v)) - } - b.pf(")") -} - -// writeType writes the type of the given value, which must be a struct. -func (b *builder) writeType(value interface{}) { - b.comment(reflect.TypeOf(value).Name()) - b.w.WriteType(value) -} - -func (b *builder) writeSlice(name string, ss interface{}) { - b.writeSliceAddSize(name, 0, ss) -} - -func (b *builder) writeSliceAddSize(name string, extraSize int, ss interface{}) { - b.comment(name) - b.w.Size += extraSize - v := reflect.ValueOf(ss) - t := v.Type().Elem() - b.pf("// Size: %d bytes, %d elements", v.Len()*int(t.Size())+extraSize, v.Len()) - - fmt.Fprintf(b.w, "var %s = ", name) - b.w.WriteArray(ss) - b.p() -} - -type FromTo struct { - From, To uint16 -} - -func (b *builder) writeSortedMap(name string, ss *stringSet, index func(s string) uint16) { - ss.sortFunc(func(a, b string) bool { - return index(a) < index(b) - }) - m := []FromTo{} - for _, s := range ss.s { - m = append(m, FromTo{index(s), index(ss.update[s])}) - } - b.writeSlice(name, m) -} - -const base = 'z' - 'a' + 1 - -func strToInt(s string) uint { - v := uint(0) - for i := 0; i < len(s); i++ { - v *= base - v += uint(s[i] - 'a') - } - return v -} - -// converts the given integer to the original ASCII string passed to strToInt. -// len(s) must match the number of characters obtained. -func intToStr(v uint, s []byte) { - for i := len(s) - 1; i >= 0; i-- { - s[i] = byte(v%base) + 'a' - v /= base - } -} - -func (b *builder) writeBitVector(name string, ss []string) { - vec := make([]uint8, int(math.Ceil(math.Pow(base, float64(len(ss[0])))/8))) - for _, s := range ss { - v := strToInt(s) - vec[v/8] |= 1 << (v % 8) - } - b.writeSlice(name, vec) -} - -// TODO: convert this type into a list or two-stage trie. -func (b *builder) writeMapFunc(name string, m map[string]string, f func(string) uint16) { - b.comment(name) - v := reflect.ValueOf(m) - sz := v.Len() * (2 + int(v.Type().Key().Size())) - for _, k := range m { - sz += len(k) - } - b.addSize(sz) - keys := []string{} - b.pf(`var %s = map[string]uint16{`, name) - for k := range m { - keys = append(keys, k) - } - sort.Strings(keys) - for _, k := range keys { - b.pf("\t%q: %v,", k, f(m[k])) - } - b.p("}") -} - -func (b *builder) writeMap(name string, m interface{}) { - b.comment(name) - v := reflect.ValueOf(m) - sz := v.Len() * (2 + int(v.Type().Key().Size()) + int(v.Type().Elem().Size())) - b.addSize(sz) - f := strings.FieldsFunc(fmt.Sprintf("%#v", m), func(r rune) bool { - return strings.IndexRune("{}, ", r) != -1 - }) - sort.Strings(f[1:]) - b.pf(`var %s = %s{`, name, f[0]) - for _, kv := range f[1:] { - b.pf("\t%s,", kv) - } - b.p("}") -} - -func (b *builder) langIndex(s string) uint16 { - if s == "und" { - return 0 - } - if i, ok := b.lang.find(s); ok { - return uint16(i) - } - return uint16(strToInt(s)) + uint16(len(b.lang.s)) -} - -// inc advances the string to its lexicographical successor. -func inc(s string) string { - const maxTagLength = 4 - var buf [maxTagLength]byte - intToStr(strToInt(strings.ToLower(s))+1, buf[:len(s)]) - for i := 0; i < len(s); i++ { - if s[i] <= 'Z' { - buf[i] -= 'a' - 'A' - } - } - return string(buf[:len(s)]) -} - -func (b *builder) parseIndices() { - meta := b.supp.Metadata - - for k, v := range b.registry { - var ss *stringSet - switch v.typ { - case "language": - if len(k) == 2 || v.suppressScript != "" || v.scope == "special" { - b.lang.add(k) - continue - } else { - ss = &b.langNoIndex - } - case "region": - ss = &b.region - case "script": - ss = &b.script - case "variant": - ss = &b.variant - default: - continue - } - ss.add(k) - } - // Include any language for which there is data. - for _, lang := range b.data.Locales() { - if x := b.data.RawLDML(lang); false || - x.LocaleDisplayNames != nil || - x.Characters != nil || - x.Delimiters != nil || - x.Measurement != nil || - x.Dates != nil || - x.Numbers != nil || - x.Units != nil || - x.ListPatterns != nil || - x.Collations != nil || - x.Segmentations != nil || - x.Rbnf != nil || - x.Annotations != nil || - x.Metadata != nil { - - from := strings.Split(lang, "_") - if lang := from[0]; lang != "root" { - b.lang.add(lang) - } - } - } - // Include locales for plural rules, which uses a different structure. - for _, plurals := range b.data.Supplemental().Plurals { - for _, rules := range plurals.PluralRules { - for _, lang := range strings.Split(rules.Locales, " ") { - if lang = strings.Split(lang, "_")[0]; lang != "root" { - b.lang.add(lang) - } - } - } - } - // Include languages in likely subtags. - for _, m := range b.supp.LikelySubtags.LikelySubtag { - from := strings.Split(m.From, "_") - b.lang.add(from[0]) - } - // Include ISO-639 alpha-3 bibliographic entries. - for _, a := range meta.Alias.LanguageAlias { - if a.Reason == "bibliographic" { - b.langNoIndex.add(a.Type) - } - } - // Include regions in territoryAlias (not all are in the IANA registry!) - for _, reg := range b.supp.Metadata.Alias.TerritoryAlias { - if len(reg.Type) == 2 { - b.region.add(reg.Type) - } - } - - for _, s := range b.lang.s { - if len(s) == 3 { - b.langNoIndex.remove(s) - } - } - b.writeConst("NumLanguages", len(b.lang.slice())+len(b.langNoIndex.slice())) - b.writeConst("NumScripts", len(b.script.slice())) - b.writeConst("NumRegions", len(b.region.slice())) - - // Add dummy codes at the start of each list to represent "unspecified". - b.lang.add("---") - b.script.add("----") - b.region.add("---") - - // common locales - b.locale.parse(meta.DefaultContent.Locales) -} - -// TODO: region inclusion data will probably not be use used in future matchers. - -func (b *builder) computeRegionGroups() { - b.groups = make(map[int]index) - - // Create group indices. - for i := 1; b.region.s[i][0] < 'A'; i++ { // Base M49 indices on regionID. - b.groups[i] = index(len(b.groups)) - } - for _, g := range b.supp.TerritoryContainment.Group { - // Skip UN and EURO zone as they are flattening the containment - // relationship. - if g.Type == "EZ" || g.Type == "UN" { - continue - } - group := b.region.index(g.Type) - if _, ok := b.groups[group]; !ok { - b.groups[group] = index(len(b.groups)) - } - } - if len(b.groups) > 64 { - log.Fatalf("only 64 groups supported, found %d", len(b.groups)) - } - b.writeConst("nRegionGroups", len(b.groups)) -} - -var langConsts = []string{ - "af", "am", "ar", "az", "bg", "bn", "ca", "cs", "da", "de", "el", "en", "es", - "et", "fa", "fi", "fil", "fr", "gu", "he", "hi", "hr", "hu", "hy", "id", "is", - "it", "ja", "ka", "kk", "km", "kn", "ko", "ky", "lo", "lt", "lv", "mk", "ml", - "mn", "mo", "mr", "ms", "mul", "my", "nb", "ne", "nl", "no", "pa", "pl", "pt", - "ro", "ru", "sh", "si", "sk", "sl", "sq", "sr", "sv", "sw", "ta", "te", "th", - "tl", "tn", "tr", "uk", "ur", "uz", "vi", "zh", "zu", - - // constants for grandfathered tags (if not already defined) - "jbo", "ami", "bnn", "hak", "tlh", "lb", "nv", "pwn", "tao", "tay", "tsu", - "nn", "sfb", "vgt", "sgg", "cmn", "nan", "hsn", -} - -// writeLanguage generates all tables needed for language canonicalization. -func (b *builder) writeLanguage() { - meta := b.supp.Metadata - - b.writeConst("nonCanonicalUnd", b.lang.index("und")) - b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...) - b.writeConst("langPrivateStart", b.langIndex("qaa")) - b.writeConst("langPrivateEnd", b.langIndex("qtz")) - - // Get language codes that need to be mapped (overlong 3-letter codes, - // deprecated 2-letter codes, legacy and grandfathered tags.) - langAliasMap := stringSet{} - aliasTypeMap := map[string]AliasType{} - - // altLangISO3 get the alternative ISO3 names that need to be mapped. - altLangISO3 := stringSet{} - // Add dummy start to avoid the use of index 0. - altLangISO3.add("---") - altLangISO3.updateLater("---", "aa") - - lang := b.lang.clone() - for _, a := range meta.Alias.LanguageAlias { - if a.Replacement == "" { - a.Replacement = "und" - } - // TODO: support mapping to tags - repl := strings.SplitN(a.Replacement, "_", 2)[0] - if a.Reason == "overlong" { - if len(a.Replacement) == 2 && len(a.Type) == 3 { - lang.updateLater(a.Replacement, a.Type) - } - } else if len(a.Type) <= 3 { - switch a.Reason { - case "macrolanguage": - aliasTypeMap[a.Type] = Macro - case "deprecated": - // handled elsewhere - continue - case "bibliographic", "legacy": - if a.Type == "no" { - continue - } - aliasTypeMap[a.Type] = Legacy - default: - log.Fatalf("new %s alias: %s", a.Reason, a.Type) - } - langAliasMap.add(a.Type) - langAliasMap.updateLater(a.Type, repl) - } - } - // Manually add the mapping of "nb" (Norwegian) to its macro language. - // This can be removed if CLDR adopts this change. - langAliasMap.add("nb") - langAliasMap.updateLater("nb", "no") - aliasTypeMap["nb"] = Macro - - for k, v := range b.registry { - // Also add deprecated values for 3-letter ISO codes, which CLDR omits. - if v.typ == "language" && v.deprecated != "" && v.preferred != "" { - langAliasMap.add(k) - langAliasMap.updateLater(k, v.preferred) - aliasTypeMap[k] = Deprecated - } - } - // Fix CLDR mappings. - lang.updateLater("tl", "tgl") - lang.updateLater("sh", "hbs") - lang.updateLater("mo", "mol") - lang.updateLater("no", "nor") - lang.updateLater("tw", "twi") - lang.updateLater("nb", "nob") - lang.updateLater("ak", "aka") - lang.updateLater("bh", "bih") - - // Ensure that each 2-letter code is matched with a 3-letter code. - for _, v := range lang.s[1:] { - s, ok := lang.update[v] - if !ok { - if s, ok = lang.update[langAliasMap.update[v]]; !ok { - continue - } - lang.update[v] = s - } - if v[0] != s[0] { - altLangISO3.add(s) - altLangISO3.updateLater(s, v) - } - } - - // Complete canonicalized language tags. - lang.freeze() - for i, v := range lang.s { - // We can avoid these manual entries by using the IANA registry directly. - // Seems easier to update the list manually, as changes are rare. - // The panic in this loop will trigger if we miss an entry. - add := "" - if s, ok := lang.update[v]; ok { - if s[0] == v[0] { - add = s[1:] - } else { - add = string([]byte{0, byte(altLangISO3.index(s))}) - } - } else if len(v) == 3 { - add = "\x00" - } else { - log.Panicf("no data for long form of %q", v) - } - lang.s[i] += add - } - b.writeConst("lang", tag.Index(lang.join())) - - b.writeConst("langNoIndexOffset", len(b.lang.s)) - - // space of all valid 3-letter language identifiers. - b.writeBitVector("langNoIndex", b.langNoIndex.slice()) - - altLangIndex := []uint16{} - for i, s := range altLangISO3.slice() { - altLangISO3.s[i] += string([]byte{byte(len(altLangIndex))}) - if i > 0 { - idx := b.lang.index(altLangISO3.update[s]) - altLangIndex = append(altLangIndex, uint16(idx)) - } - } - b.writeConst("altLangISO3", tag.Index(altLangISO3.join())) - b.writeSlice("altLangIndex", altLangIndex) - - b.writeSortedMap("AliasMap", &langAliasMap, b.langIndex) - types := make([]AliasType, len(langAliasMap.s)) - for i, s := range langAliasMap.s { - types[i] = aliasTypeMap[s] - } - b.writeSlice("AliasTypes", types) -} - -var scriptConsts = []string{ - "Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy", - "Zzzz", -} - -func (b *builder) writeScript() { - b.writeConsts(b.script.index, scriptConsts...) - b.writeConst("script", tag.Index(b.script.join())) - - supp := make([]uint8, len(b.lang.slice())) - for i, v := range b.lang.slice()[1:] { - if sc := b.registry[v].suppressScript; sc != "" { - supp[i+1] = uint8(b.script.index(sc)) - } - } - b.writeSlice("suppressScript", supp) - - // There is only one deprecated script in CLDR. This value is hard-coded. - // We check here if the code must be updated. - for _, a := range b.supp.Metadata.Alias.ScriptAlias { - if a.Type != "Qaai" { - log.Panicf("unexpected deprecated stript %q", a.Type) - } - } -} - -func parseM49(s string) int16 { - if len(s) == 0 { - return 0 - } - v, err := strconv.ParseUint(s, 10, 10) - failOnError(err) - return int16(v) -} - -var regionConsts = []string{ - "001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US", - "ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo. -} - -func (b *builder) writeRegion() { - b.writeConsts(b.region.index, regionConsts...) - - isoOffset := b.region.index("AA") - m49map := make([]int16, len(b.region.slice())) - fromM49map := make(map[int16]int) - altRegionISO3 := "" - altRegionIDs := []uint16{} - - b.writeConst("isoRegionOffset", isoOffset) - - // 2-letter region lookup and mapping to numeric codes. - regionISO := b.region.clone() - regionISO.s = regionISO.s[isoOffset:] - regionISO.sorted = false - - regionTypes := make([]byte, len(b.region.s)) - - // Is the region valid BCP 47? - for s, e := range b.registry { - if len(s) == 2 && s == strings.ToUpper(s) { - i := b.region.index(s) - for _, d := range e.description { - if strings.Contains(d, "Private use") { - regionTypes[i] = iso3166UserAssigned - } - } - regionTypes[i] |= bcp47Region - } - } - - // Is the region a valid ccTLD? - r := gen.OpenIANAFile("domains/root/db") - defer r.Close() - - buf, err := ioutil.ReadAll(r) - failOnError(err) - re := regexp.MustCompile(`"/domains/root/db/([a-z]{2}).html"`) - for _, m := range re.FindAllSubmatch(buf, -1) { - i := b.region.index(strings.ToUpper(string(m[1]))) - regionTypes[i] |= ccTLD - } - - b.writeSlice("regionTypes", regionTypes) - - iso3Set := make(map[string]int) - update := func(iso2, iso3 string) { - i := regionISO.index(iso2) - if j, ok := iso3Set[iso3]; !ok && iso3[0] == iso2[0] { - regionISO.s[i] += iso3[1:] - iso3Set[iso3] = -1 - } else { - if ok && j >= 0 { - regionISO.s[i] += string([]byte{0, byte(j)}) - } else { - iso3Set[iso3] = len(altRegionISO3) - regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))}) - altRegionISO3 += iso3 - altRegionIDs = append(altRegionIDs, uint16(isoOffset+i)) - } - } - } - for _, tc := range b.supp.CodeMappings.TerritoryCodes { - i := regionISO.index(tc.Type) + isoOffset - if d := m49map[i]; d != 0 { - log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d) - } - m49 := parseM49(tc.Numeric) - m49map[i] = m49 - if r := fromM49map[m49]; r == 0 { - fromM49map[m49] = i - } else if r != i { - dep := b.registry[regionISO.s[r-isoOffset]].deprecated - if t := b.registry[tc.Type]; t != nil && dep != "" && (t.deprecated == "" || t.deprecated > dep) { - fromM49map[m49] = i - } - } - } - for _, ta := range b.supp.Metadata.Alias.TerritoryAlias { - if len(ta.Type) == 3 && ta.Type[0] <= '9' && len(ta.Replacement) == 2 { - from := parseM49(ta.Type) - if r := fromM49map[from]; r == 0 { - fromM49map[from] = regionISO.index(ta.Replacement) + isoOffset - } - } - } - for _, tc := range b.supp.CodeMappings.TerritoryCodes { - if len(tc.Alpha3) == 3 { - update(tc.Type, tc.Alpha3) - } - } - // This entries are not included in territoryCodes. Mostly 3-letter variants - // of deleted codes and an entry for QU. - for _, m := range []struct{ iso2, iso3 string }{ - {"CT", "CTE"}, - {"DY", "DHY"}, - {"HV", "HVO"}, - {"JT", "JTN"}, - {"MI", "MID"}, - {"NH", "NHB"}, - {"NQ", "ATN"}, - {"PC", "PCI"}, - {"PU", "PUS"}, - {"PZ", "PCZ"}, - {"RH", "RHO"}, - {"VD", "VDR"}, - {"WK", "WAK"}, - // These three-letter codes are used for others as well. - {"FQ", "ATF"}, - } { - update(m.iso2, m.iso3) - } - for i, s := range regionISO.s { - if len(s) != 4 { - regionISO.s[i] = s + " " - } - } - b.writeConst("regionISO", tag.Index(regionISO.join())) - b.writeConst("altRegionISO3", altRegionISO3) - b.writeSlice("altRegionIDs", altRegionIDs) - - // Create list of deprecated regions. - // TODO: consider inserting SF -> FI. Not included by CLDR, but is the only - // Transitionally-reserved mapping not included. - regionOldMap := stringSet{} - // Include regions in territoryAlias (not all are in the IANA registry!) - for _, reg := range b.supp.Metadata.Alias.TerritoryAlias { - if len(reg.Type) == 2 && reg.Reason == "deprecated" && len(reg.Replacement) == 2 { - regionOldMap.add(reg.Type) - regionOldMap.updateLater(reg.Type, reg.Replacement) - i, _ := regionISO.find(reg.Type) - j, _ := regionISO.find(reg.Replacement) - if k := m49map[i+isoOffset]; k == 0 { - m49map[i+isoOffset] = m49map[j+isoOffset] - } - } - } - b.writeSortedMap("regionOldMap", ®ionOldMap, func(s string) uint16 { - return uint16(b.region.index(s)) - }) - // 3-digit region lookup, groupings. - for i := 1; i < isoOffset; i++ { - m := parseM49(b.region.s[i]) - m49map[i] = m - fromM49map[m] = i - } - b.writeSlice("m49", m49map) - - const ( - searchBits = 7 - regionBits = 9 - ) - if len(m49map) >= 1< %d", len(m49map), 1<>searchBits] = int16(len(fromM49)) - } - b.writeSlice("m49Index", m49Index) - b.writeSlice("fromM49", fromM49) -} - -const ( - // TODO: put these lists in regionTypes as user data? Could be used for - // various optimizations and refinements and could be exposed in the API. - iso3166Except = "AC CP DG EA EU FX IC SU TA UK" - iso3166Trans = "AN BU CS NT TP YU ZR" // SF is not in our set of Regions. - // DY and RH are actually not deleted, but indeterminately reserved. - iso3166DelCLDR = "CT DD DY FQ HV JT MI NH NQ PC PU PZ RH VD WK YD" -) - -const ( - iso3166UserAssigned = 1 << iota - ccTLD - bcp47Region -) - -func find(list []string, s string) int { - for i, t := range list { - if t == s { - return i - } - } - return -1 -} - -// writeVariants generates per-variant information and creates a map from variant -// name to index value. We assign index values such that sorting multiple -// variants by index value will result in the correct order. -// There are two types of variants: specialized and general. Specialized variants -// are only applicable to certain language or language-script pairs. Generalized -// variants apply to any language. Generalized variants always sort after -// specialized variants. We will therefore always assign a higher index value -// to a generalized variant than any other variant. Generalized variants are -// sorted alphabetically among themselves. -// Specialized variants may also sort after other specialized variants. Such -// variants will be ordered after any of the variants they may follow. -// We assume that if a variant x is followed by a variant y, then for any prefix -// p of x, p-x is a prefix of y. This allows us to order tags based on the -// maximum of the length of any of its prefixes. -// TODO: it is possible to define a set of Prefix values on variants such that -// a total order cannot be defined to the point that this algorithm breaks. -// In other words, we cannot guarantee the same order of variants for the -// future using the same algorithm or for non-compliant combinations of -// variants. For this reason, consider using simple alphabetic sorting -// of variants and ignore Prefix restrictions altogether. -func (b *builder) writeVariant() { - generalized := stringSet{} - specialized := stringSet{} - specializedExtend := stringSet{} - // Collate the variants by type and check assumptions. - for _, v := range b.variant.slice() { - e := b.registry[v] - if len(e.prefix) == 0 { - generalized.add(v) - continue - } - c := strings.Split(e.prefix[0], "-") - hasScriptOrRegion := false - if len(c) > 1 { - _, hasScriptOrRegion = b.script.find(c[1]) - if !hasScriptOrRegion { - _, hasScriptOrRegion = b.region.find(c[1]) - - } - } - if len(c) == 1 || len(c) == 2 && hasScriptOrRegion { - // Variant is preceded by a language. - specialized.add(v) - continue - } - // Variant is preceded by another variant. - specializedExtend.add(v) - prefix := c[0] + "-" - if hasScriptOrRegion { - prefix += c[1] - } - for _, p := range e.prefix { - // Verify that the prefix minus the last element is a prefix of the - // predecessor element. - i := strings.LastIndex(p, "-") - pred := b.registry[p[i+1:]] - if find(pred.prefix, p[:i]) < 0 { - log.Fatalf("prefix %q for variant %q not consistent with predecessor spec", p, v) - } - // The sorting used below does not work in the general case. It works - // if we assume that variants that may be followed by others only have - // prefixes of the same length. Verify this. - count := strings.Count(p[:i], "-") - for _, q := range pred.prefix { - if c := strings.Count(q, "-"); c != count { - log.Fatalf("variant %q preceding %q has a prefix %q of size %d; want %d", p[i+1:], v, q, c, count) - } - } - if !strings.HasPrefix(p, prefix) { - log.Fatalf("prefix %q of variant %q should start with %q", p, v, prefix) - } - } - } - - // Sort extended variants. - a := specializedExtend.s - less := func(v, w string) bool { - // Sort by the maximum number of elements. - maxCount := func(s string) (max int) { - for _, p := range b.registry[s].prefix { - if c := strings.Count(p, "-"); c > max { - max = c - } - } - return - } - if cv, cw := maxCount(v), maxCount(w); cv != cw { - return cv < cw - } - // Sort by name as tie breaker. - return v < w - } - sort.Sort(funcSorter{less, sort.StringSlice(a)}) - specializedExtend.frozen = true - - // Create index from variant name to index. - variantIndex := make(map[string]uint8) - add := func(s []string) { - for _, v := range s { - variantIndex[v] = uint8(len(variantIndex)) - } - } - add(specialized.slice()) - add(specializedExtend.s) - numSpecialized := len(variantIndex) - add(generalized.slice()) - if n := len(variantIndex); n > 255 { - log.Fatalf("maximum number of variants exceeded: was %d; want <= 255", n) - } - b.writeMap("variantIndex", variantIndex) - b.writeConst("variantNumSpecialized", numSpecialized) -} - -func (b *builder) writeLanguageInfo() { -} - -// writeLikelyData writes tables that are used both for finding parent relations and for -// language matching. Each entry contains additional bits to indicate the status of the -// data to know when it cannot be used for parent relations. -func (b *builder) writeLikelyData() { - const ( - isList = 1 << iota - scriptInFrom - regionInFrom - ) - type ( // generated types - likelyScriptRegion struct { - region uint16 - script uint8 - flags uint8 - } - likelyLangScript struct { - lang uint16 - script uint8 - flags uint8 - } - likelyLangRegion struct { - lang uint16 - region uint16 - } - // likelyTag is used for getting likely tags for group regions, where - // the likely region might be a region contained in the group. - likelyTag struct { - lang uint16 - region uint16 - script uint8 - } - ) - var ( // generated variables - likelyRegionGroup = make([]likelyTag, len(b.groups)) - likelyLang = make([]likelyScriptRegion, len(b.lang.s)) - likelyRegion = make([]likelyLangScript, len(b.region.s)) - likelyScript = make([]likelyLangRegion, len(b.script.s)) - likelyLangList = []likelyScriptRegion{} - likelyRegionList = []likelyLangScript{} - ) - type fromTo struct { - from, to []string - } - langToOther := map[int][]fromTo{} - regionToOther := map[int][]fromTo{} - for _, m := range b.supp.LikelySubtags.LikelySubtag { - from := strings.Split(m.From, "_") - to := strings.Split(m.To, "_") - if len(to) != 3 { - log.Fatalf("invalid number of subtags in %q: found %d, want 3", m.To, len(to)) - } - if len(from) > 3 { - log.Fatalf("invalid number of subtags: found %d, want 1-3", len(from)) - } - if from[0] != to[0] && from[0] != "und" { - log.Fatalf("unexpected language change in expansion: %s -> %s", from, to) - } - if len(from) == 3 { - if from[2] != to[2] { - log.Fatalf("unexpected region change in expansion: %s -> %s", from, to) - } - if from[0] != "und" { - log.Fatalf("unexpected fully specified from tag: %s -> %s", from, to) - } - } - if len(from) == 1 || from[0] != "und" { - id := 0 - if from[0] != "und" { - id = b.lang.index(from[0]) - } - langToOther[id] = append(langToOther[id], fromTo{from, to}) - } else if len(from) == 2 && len(from[1]) == 4 { - sid := b.script.index(from[1]) - likelyScript[sid].lang = uint16(b.langIndex(to[0])) - likelyScript[sid].region = uint16(b.region.index(to[2])) - } else { - r := b.region.index(from[len(from)-1]) - if id, ok := b.groups[r]; ok { - if from[0] != "und" { - log.Fatalf("region changed unexpectedly: %s -> %s", from, to) - } - likelyRegionGroup[id].lang = uint16(b.langIndex(to[0])) - likelyRegionGroup[id].script = uint8(b.script.index(to[1])) - likelyRegionGroup[id].region = uint16(b.region.index(to[2])) - } else { - regionToOther[r] = append(regionToOther[r], fromTo{from, to}) - } - } - } - b.writeType(likelyLangRegion{}) - b.writeSlice("likelyScript", likelyScript) - - for id := range b.lang.s { - list := langToOther[id] - if len(list) == 1 { - likelyLang[id].region = uint16(b.region.index(list[0].to[2])) - likelyLang[id].script = uint8(b.script.index(list[0].to[1])) - } else if len(list) > 1 { - likelyLang[id].flags = isList - likelyLang[id].region = uint16(len(likelyLangList)) - likelyLang[id].script = uint8(len(list)) - for _, x := range list { - flags := uint8(0) - if len(x.from) > 1 { - if x.from[1] == x.to[2] { - flags = regionInFrom - } else { - flags = scriptInFrom - } - } - likelyLangList = append(likelyLangList, likelyScriptRegion{ - region: uint16(b.region.index(x.to[2])), - script: uint8(b.script.index(x.to[1])), - flags: flags, - }) - } - } - } - // TODO: merge suppressScript data with this table. - b.writeType(likelyScriptRegion{}) - b.writeSlice("likelyLang", likelyLang) - b.writeSlice("likelyLangList", likelyLangList) - - for id := range b.region.s { - list := regionToOther[id] - if len(list) == 1 { - likelyRegion[id].lang = uint16(b.langIndex(list[0].to[0])) - likelyRegion[id].script = uint8(b.script.index(list[0].to[1])) - if len(list[0].from) > 2 { - likelyRegion[id].flags = scriptInFrom - } - } else if len(list) > 1 { - likelyRegion[id].flags = isList - likelyRegion[id].lang = uint16(len(likelyRegionList)) - likelyRegion[id].script = uint8(len(list)) - for i, x := range list { - if len(x.from) == 2 && i != 0 || i > 0 && len(x.from) != 3 { - log.Fatalf("unspecified script must be first in list: %v at %d", x.from, i) - } - x := likelyLangScript{ - lang: uint16(b.langIndex(x.to[0])), - script: uint8(b.script.index(x.to[1])), - } - if len(list[0].from) > 2 { - x.flags = scriptInFrom - } - likelyRegionList = append(likelyRegionList, x) - } - } - } - b.writeType(likelyLangScript{}) - b.writeSlice("likelyRegion", likelyRegion) - b.writeSlice("likelyRegionList", likelyRegionList) - - b.writeType(likelyTag{}) - b.writeSlice("likelyRegionGroup", likelyRegionGroup) -} - -func (b *builder) writeRegionInclusionData() { - var ( - // mm holds for each group the set of groups with a distance of 1. - mm = make(map[int][]index) - - // containment holds for each group the transitive closure of - // containment of other groups. - containment = make(map[index][]index) - ) - for _, g := range b.supp.TerritoryContainment.Group { - // Skip UN and EURO zone as they are flattening the containment - // relationship. - if g.Type == "EZ" || g.Type == "UN" { - continue - } - group := b.region.index(g.Type) - groupIdx := b.groups[group] - for _, mem := range strings.Split(g.Contains, " ") { - r := b.region.index(mem) - mm[r] = append(mm[r], groupIdx) - if g, ok := b.groups[r]; ok { - mm[group] = append(mm[group], g) - containment[groupIdx] = append(containment[groupIdx], g) - } - } - } - - regionContainment := make([]uint64, len(b.groups)) - for _, g := range b.groups { - l := containment[g] - - // Compute the transitive closure of containment. - for i := 0; i < len(l); i++ { - l = append(l, containment[l[i]]...) - } - - // Compute the bitmask. - regionContainment[g] = 1 << g - for _, v := range l { - regionContainment[g] |= 1 << v - } - } - b.writeSlice("regionContainment", regionContainment) - - regionInclusion := make([]uint8, len(b.region.s)) - bvs := make(map[uint64]index) - // Make the first bitvector positions correspond with the groups. - for r, i := range b.groups { - bv := uint64(1 << i) - for _, g := range mm[r] { - bv |= 1 << g - } - bvs[bv] = i - regionInclusion[r] = uint8(bvs[bv]) - } - for r := 1; r < len(b.region.s); r++ { - if _, ok := b.groups[r]; !ok { - bv := uint64(0) - for _, g := range mm[r] { - bv |= 1 << g - } - if bv == 0 { - // Pick the world for unspecified regions. - bv = 1 << b.groups[b.region.index("001")] - } - if _, ok := bvs[bv]; !ok { - bvs[bv] = index(len(bvs)) - } - regionInclusion[r] = uint8(bvs[bv]) - } - } - b.writeSlice("regionInclusion", regionInclusion) - regionInclusionBits := make([]uint64, len(bvs)) - for k, v := range bvs { - regionInclusionBits[v] = uint64(k) - } - // Add bit vectors for increasingly large distances until a fixed point is reached. - regionInclusionNext := []uint8{} - for i := 0; i < len(regionInclusionBits); i++ { - bits := regionInclusionBits[i] - next := bits - for i := uint(0); i < uint(len(b.groups)); i++ { - if bits&(1< 6 { - log.Fatalf("Too many groups: %d", i) - } - idToIndex[mv.Id] = uint8(i + 1) - // TODO: also handle '-' - for _, r := range strings.Split(mv.Value, "+") { - todo := []string{r} - for k := 0; k < len(todo); k++ { - r := todo[k] - regionToGroups[b.regionIndex(r)] |= 1 << uint8(i) - todo = append(todo, regionHierarchy[r]...) - } - } - } - b.w.WriteVar("regionToGroups", regionToGroups) - - // maps language id to in- and out-of-group region. - paradigmLocales := [][3]uint16{} - locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ") - for i := 0; i < len(locales); i += 2 { - x := [3]uint16{} - for j := 0; j < 2; j++ { - pc := strings.SplitN(locales[i+j], "-", 2) - x[0] = b.langIndex(pc[0]) - if len(pc) == 2 { - x[1+j] = uint16(b.regionIndex(pc[1])) - } - } - paradigmLocales = append(paradigmLocales, x) - } - b.w.WriteVar("paradigmLocales", paradigmLocales) - - b.w.WriteType(mutualIntelligibility{}) - b.w.WriteType(scriptIntelligibility{}) - b.w.WriteType(regionIntelligibility{}) - - matchLang := []mutualIntelligibility{} - matchScript := []scriptIntelligibility{} - matchRegion := []regionIntelligibility{} - // Convert the languageMatch entries in lists keyed by desired language. - for _, m := range lm[0].LanguageMatch { - // Different versions of CLDR use different separators. - desired := strings.Replace(m.Desired, "-", "_", -1) - supported := strings.Replace(m.Supported, "-", "_", -1) - d := strings.Split(desired, "_") - s := strings.Split(supported, "_") - if len(d) != len(s) { - log.Fatalf("not supported: desired=%q; supported=%q", desired, supported) - continue - } - distance, _ := strconv.ParseInt(m.Distance, 10, 8) - switch len(d) { - case 2: - if desired == supported && desired == "*_*" { - continue - } - // language-script pair. - matchScript = append(matchScript, scriptIntelligibility{ - wantLang: uint16(b.langIndex(d[0])), - haveLang: uint16(b.langIndex(s[0])), - wantScript: uint8(b.scriptIndex(d[1])), - haveScript: uint8(b.scriptIndex(s[1])), - distance: uint8(distance), - }) - if m.Oneway != "true" { - matchScript = append(matchScript, scriptIntelligibility{ - wantLang: uint16(b.langIndex(s[0])), - haveLang: uint16(b.langIndex(d[0])), - wantScript: uint8(b.scriptIndex(s[1])), - haveScript: uint8(b.scriptIndex(d[1])), - distance: uint8(distance), - }) - } - case 1: - if desired == supported && desired == "*" { - continue - } - if distance == 1 { - // nb == no is already handled by macro mapping. Check there - // really is only this case. - if d[0] != "no" || s[0] != "nb" { - log.Fatalf("unhandled equivalence %s == %s", s[0], d[0]) - } - continue - } - // TODO: consider dropping oneway field and just doubling the entry. - matchLang = append(matchLang, mutualIntelligibility{ - want: uint16(b.langIndex(d[0])), - have: uint16(b.langIndex(s[0])), - distance: uint8(distance), - oneway: m.Oneway == "true", - }) - case 3: - if desired == supported && desired == "*_*_*" { - continue - } - if desired != supported { - // This is now supported by CLDR, but only one case, which - // should already be covered by paradigm locales. For instance, - // test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in - // testdata/CLDRLocaleMatcherTest.txt tests this. - if supported != "en_*_GB" { - log.Fatalf("not supported: desired=%q; supported=%q", desired, supported) - } - continue - } - ri := regionIntelligibility{ - lang: b.langIndex(d[0]), - distance: uint8(distance), - } - if d[1] != "*" { - ri.script = uint8(b.scriptIndex(d[1])) - } - switch { - case d[2] == "*": - ri.group = 0x80 // not contained in anything - case strings.HasPrefix(d[2], "$!"): - ri.group = 0x80 - d[2] = "$" + d[2][len("$!"):] - fallthrough - case strings.HasPrefix(d[2], "$"): - ri.group |= idToIndex[d[2]] - } - matchRegion = append(matchRegion, ri) - default: - log.Fatalf("not supported: desired=%q; supported=%q", desired, supported) - } - } - sort.SliceStable(matchLang, func(i, j int) bool { - return matchLang[i].distance < matchLang[j].distance - }) - b.w.WriteComment(` - matchLang holds pairs of langIDs of base languages that are typically - mutually intelligible. Each pair is associated with a confidence and - whether the intelligibility goes one or both ways.`) - b.w.WriteVar("matchLang", matchLang) - - b.w.WriteComment(` - matchScript holds pairs of scriptIDs where readers of one script - can typically also read the other. Each is associated with a confidence.`) - sort.SliceStable(matchScript, func(i, j int) bool { - return matchScript[i].distance < matchScript[j].distance - }) - b.w.WriteVar("matchScript", matchScript) - - sort.SliceStable(matchRegion, func(i, j int) bool { - return matchRegion[i].distance < matchRegion[j].distance - }) - b.w.WriteVar("matchRegion", matchRegion) -} diff --git a/vendor/modules.txt b/vendor/modules.txt index 1564db8..769efec 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -11,25 +11,25 @@ github.com/hashicorp/golang-lru github.com/hashicorp/golang-lru/simplelru # golang.org/x/net v0.0.0-20190628185345-da137c7871d7 golang.org/x/net/html -golang.org/x/net/html/charset -golang.org/x/net/proxy golang.org/x/net/html/atom +golang.org/x/net/html/charset golang.org/x/net/internal/socks +golang.org/x/net/proxy # golang.org/x/text v0.3.2 golang.org/x/text/encoding golang.org/x/text/encoding/charmap golang.org/x/text/encoding/htmlindex -golang.org/x/text/transform -golang.org/x/text/encoding/internal/identifier golang.org/x/text/encoding/internal +golang.org/x/text/encoding/internal/identifier golang.org/x/text/encoding/japanese golang.org/x/text/encoding/korean golang.org/x/text/encoding/simplifiedchinese golang.org/x/text/encoding/traditionalchinese golang.org/x/text/encoding/unicode -golang.org/x/text/language -golang.org/x/text/internal/utf8internal -golang.org/x/text/runes golang.org/x/text/internal/language golang.org/x/text/internal/language/compact golang.org/x/text/internal/tag +golang.org/x/text/internal/utf8internal +golang.org/x/text/language +golang.org/x/text/runes +golang.org/x/text/transform -- cgit v1.2.3