summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/internal/export/idna/gen.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/internal/export/idna/gen.go')
-rw-r--r--vendor/golang.org/x/text/internal/export/idna/gen.go276
1 files changed, 0 insertions, 276 deletions
diff --git a/vendor/golang.org/x/text/internal/export/idna/gen.go b/vendor/golang.org/x/text/internal/export/idna/gen.go
deleted file mode 100644
index 4ad9804..0000000
--- a/vendor/golang.org/x/text/internal/export/idna/gen.go
+++ /dev/null
@@ -1,276 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-// This program generates the trie for idna operations. The Unicode casing
-// algorithm requires the lookup of various properties and mappings for each
-// rune. The table generated by this generator combines several of the most
-// frequently used of these into a single trie so that they can be accessed
-// with a single lookup.
-package main
-
-import (
- "fmt"
- "io"
- "log"
- "unicode"
- "unicode/utf8"
-
- "golang.org/x/text/internal/gen"
- "golang.org/x/text/internal/triegen"
- "golang.org/x/text/internal/ucd"
- "golang.org/x/text/unicode/bidi"
-)
-
-func main() {
- gen.Init()
- genTables()
- gen.Repackage("gen_trieval.go", "trieval.go", "idna")
- gen.Repackage("gen_common.go", "common_test.go", "idna")
-}
-
-var runes = map[rune]info{}
-
-func genTables() {
- t := triegen.NewTrie("idna")
-
- ucd.Parse(gen.OpenUCDFile("DerivedNormalizationProps.txt"), func(p *ucd.Parser) {
- r := p.Rune(0)
- if p.String(1) == "NFC_QC" { // p.String(2) is "N" or "M"
- runes[r] = mayNeedNorm
- }
- })
- ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
- r := p.Rune(0)
-
- const cccVirama = 9
- if p.Int(ucd.CanonicalCombiningClass) == cccVirama {
- runes[p.Rune(0)] = viramaModifier
- }
- switch {
- case unicode.In(r, unicode.Mark):
- runes[r] |= modifier | mayNeedNorm
- }
- // TODO: by using UnicodeData.txt we don't mark undefined codepoints
- // that are earmarked as RTL properly. However, an undefined cp will
- // always fail, so there is no need to store this info.
- switch p, _ := bidi.LookupRune(r); p.Class() {
- case bidi.R, bidi.AL, bidi.AN:
- if x := runes[r]; x != 0 && x != mayNeedNorm {
- log.Fatalf("%U: rune both modifier and RTL letter/number", r)
- }
- runes[r] = rtl
- }
- })
-
- ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
- switch v := p.String(1); v {
- case "L", "D", "T", "R":
- runes[p.Rune(0)] |= joinType[v] << joinShift
- }
- })
-
- ucd.Parse(gen.OpenUnicodeFile("idna", "", "IdnaMappingTable.txt"), func(p *ucd.Parser) {
- r := p.Rune(0)
-
- // The mappings table explicitly defines surrogates as invalid.
- if !utf8.ValidRune(r) {
- return
- }
-
- cat := catFromEntry(p)
- isMapped := cat == mapped || cat == disallowedSTD3Mapped || cat == deviation
- if !isMapped {
- // Only include additional category information for non-mapped
- // runes. The additional information is only used after mapping and
- // the bits would clash with mapping information.
- // TODO: it would be possible to inline this data and avoid
- // additional lookups. This is quite tedious, though, so let's first
- // see if we need this.
- cat |= category(runes[r])
- }
-
- s := string(p.Runes(2))
- if s != "" && !isMapped {
- log.Fatalf("%U: Mapping with non-mapping category %d", r, cat)
- }
- t.Insert(r, uint64(makeEntry(r, s))+uint64(cat))
- })
-
- w := gen.NewCodeWriter()
- defer w.WriteVersionedGoFile("tables.go", "idna")
-
- gen.WriteUnicodeVersion(w)
-
- w.WriteVar("mappings", string(mappings))
- w.WriteVar("xorData", string(xorData))
-
- sz, err := t.Gen(w, triegen.Compact(&normCompacter{}))
- if err != nil {
- log.Fatal(err)
- }
- w.Size += sz
-}
-
-var (
- // mappings contains replacement strings for mapped runes, each prefixed
- // with a byte containing the length of the following string.
- mappings = []byte{}
- mapCache = map[string]int{}
-
- // xorData is like mappings, except that it contains XOR data.
- // We split these two tables so that we don't get an overflow.
- xorData = []byte{}
- xorCache = map[string]int{}
-)
-
-// makeEntry creates a trie entry.
-func makeEntry(r rune, mapped string) info {
- orig := string(r)
-
- if len(orig) != len(mapped) {
- // Store the mapped value as is in the mappings table.
- index := len(mappings)
- if x, ok := mapCache[mapped]; ok {
- index = x
- } else {
- mapCache[mapped] = index
- mappings = append(mappings, byte(len(mapped)))
- mappings = append(mappings, mapped...)
- }
- return info(index) << indexShift
- }
-
- // Create per-byte XOR mask.
- var b []byte
- for i := 0; i < len(orig); i++ {
- b = append(b, orig[i]^mapped[i])
- }
-
- // Remove leading 0 bytes, but keep at least one byte.
- for ; len(b) > 1 && b[0] == 0; b = b[1:] {
- }
-
- if len(b) == 1 {
- return xorBit | inlineXOR | info(b[0])<<indexShift
- }
- mapped = string(b)
-
- // Store the mapped value as is in the mappings table.
- index := len(xorData)
- if x, ok := xorCache[mapped]; ok {
- index = x
- } else {
- xorCache[mapped] = index
- xorData = append(xorData, byte(len(mapped)))
- xorData = append(xorData, mapped...)
- }
- return xorBit | info(index)<<indexShift
-}
-
-// The following code implements a triegen.Compacter that was originally
-// designed for normalization. The IDNA table has some similarities with the
-// norm table. Using this compacter, together with the XOR pattern approach,
-// reduces the table size by roughly 100K. It can probably be compressed further
-// by also including elements of the compacter used by cases, but for now it is
-// good enough.
-
-const maxSparseEntries = 16
-
-type normCompacter struct {
- sparseBlocks [][]uint64
- sparseOffset []uint16
- sparseCount int
-}
-
-func mostFrequentStride(a []uint64) int {
- counts := make(map[int]int)
- var v int
- for _, x := range a {
- if stride := int(x) - v; v != 0 && stride >= 0 {
- counts[stride]++
- }
- v = int(x)
- }
- var maxs, maxc int
- for stride, cnt := range counts {
- if cnt > maxc || (cnt == maxc && stride < maxs) {
- maxs, maxc = stride, cnt
- }
- }
- return maxs
-}
-
-func countSparseEntries(a []uint64) int {
- stride := mostFrequentStride(a)
- var v, count int
- for _, tv := range a {
- if int(tv)-v != stride {
- if tv != 0 {
- count++
- }
- }
- v = int(tv)
- }
- return count
-}
-
-func (c *normCompacter) Size(v []uint64) (sz int, ok bool) {
- if n := countSparseEntries(v); n <= maxSparseEntries {
- return (n+1)*4 + 2, true
- }
- return 0, false
-}
-
-func (c *normCompacter) Store(v []uint64) uint32 {
- h := uint32(len(c.sparseOffset))
- c.sparseBlocks = append(c.sparseBlocks, v)
- c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount))
- c.sparseCount += countSparseEntries(v) + 1
- return h
-}
-
-func (c *normCompacter) Handler() string {
- return "idnaSparse.lookup"
-}
-
-func (c *normCompacter) Print(w io.Writer) (retErr error) {
- p := func(f string, x ...interface{}) {
- if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil {
- retErr = err
- }
- }
-
- ls := len(c.sparseBlocks)
- p("// idnaSparseOffset: %d entries, %d bytes\n", ls, ls*2)
- p("var idnaSparseOffset = %#v\n\n", c.sparseOffset)
-
- ns := c.sparseCount
- p("// idnaSparseValues: %d entries, %d bytes\n", ns, ns*4)
- p("var idnaSparseValues = [%d]valueRange {", ns)
- for i, b := range c.sparseBlocks {
- p("\n// Block %#x, offset %#x", i, c.sparseOffset[i])
- var v int
- stride := mostFrequentStride(b)
- n := countSparseEntries(b)
- p("\n{value:%#04x,lo:%#02x},", stride, uint8(n))
- for i, nv := range b {
- if int(nv)-v != stride {
- if v != 0 {
- p(",hi:%#02x},", 0x80+i-1)
- }
- if nv != 0 {
- p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
- }
- }
- v = int(nv)
- }
- if v != 0 {
- p(",hi:%#02x},", 0x80+len(b)-1)
- }
- }
- p("\n}\n\n")
- return
-}