summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/secure/precis/gen.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/secure/precis/gen.go')
-rw-r--r--vendor/golang.org/x/text/secure/precis/gen.go310
1 files changed, 310 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/secure/precis/gen.go b/vendor/golang.org/x/text/secure/precis/gen.go
new file mode 100644
index 0000000..946acba
--- /dev/null
+++ b/vendor/golang.org/x/text/secure/precis/gen.go
@@ -0,0 +1,310 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Unicode table generator.
+// Data read from the web.
+
+// +build ignore
+
+package main
+
+import (
+ "flag"
+ "log"
+ "unicode"
+ "unicode/utf8"
+
+ "golang.org/x/text/internal/gen"
+ "golang.org/x/text/internal/triegen"
+ "golang.org/x/text/internal/ucd"
+ "golang.org/x/text/unicode/norm"
+ "golang.org/x/text/unicode/rangetable"
+)
+
+var outputFile = flag.String("output", "tables.go", "output file for generated tables; default tables.go")
+
+var assigned, disallowedRunes *unicode.RangeTable
+
+var runeCategory = map[rune]category{}
+
+var overrides = map[category]category{
+ viramaModifier: viramaJoinT,
+ greek: greekJoinT,
+ hebrew: hebrewJoinT,
+}
+
+func setCategory(r rune, cat category) {
+ if c, ok := runeCategory[r]; ok {
+ if override, ok := overrides[c]; cat == joiningT && ok {
+ cat = override
+ } else {
+ log.Fatalf("%U: multiple categories for rune (%v and %v)", r, c, cat)
+ }
+ }
+ runeCategory[r] = cat
+}
+
+func init() {
+ if numCategories > 1<<propShift {
+ log.Fatalf("Number of categories is %d; may at most be %d", numCategories, 1<<propShift)
+ }
+}
+
+func main() {
+ gen.Init()
+
+ // Load data
+ runes := []rune{}
+ // PrecisIgnorableProperties: https://tools.ietf.org/html/rfc7564#section-9.13
+ ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
+ if p.String(1) == "Default_Ignorable_Code_Point" {
+ runes = append(runes, p.Rune(0))
+ }
+ })
+ ucd.Parse(gen.OpenUCDFile("PropList.txt"), func(p *ucd.Parser) {
+ switch p.String(1) {
+ case "Noncharacter_Code_Point":
+ runes = append(runes, p.Rune(0))
+ }
+ })
+ // OldHangulJamo: https://tools.ietf.org/html/rfc5892#section-2.9
+ ucd.Parse(gen.OpenUCDFile("HangulSyllableType.txt"), func(p *ucd.Parser) {
+ switch p.String(1) {
+ case "L", "V", "T":
+ runes = append(runes, p.Rune(0))
+ }
+ })
+
+ disallowedRunes = rangetable.New(runes...)
+ assigned = rangetable.Assigned(unicode.Version)
+
+ // Load category data.
+ runeCategory['l'] = latinSmallL
+ ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
+ const cccVirama = 9
+ if p.Int(ucd.CanonicalCombiningClass) == cccVirama {
+ setCategory(p.Rune(0), viramaModifier)
+ }
+ })
+ ucd.Parse(gen.OpenUCDFile("Scripts.txt"), func(p *ucd.Parser) {
+ switch p.String(1) {
+ case "Greek":
+ setCategory(p.Rune(0), greek)
+ case "Hebrew":
+ setCategory(p.Rune(0), hebrew)
+ case "Hiragana", "Katakana", "Han":
+ setCategory(p.Rune(0), japanese)
+ }
+ })
+
+ // Set the rule categories associated with exceptions. This overrides any
+ // previously set categories. The original categories are manually
+ // reintroduced in the categoryTransitions table.
+ for r, e := range exceptions {
+ if e.cat != 0 {
+ runeCategory[r] = e.cat
+ }
+ }
+ cat := map[string]category{
+ "L": joiningL,
+ "D": joiningD,
+ "T": joiningT,
+
+ "R": joiningR,
+ }
+ ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
+ switch v := p.String(1); v {
+ case "L", "D", "T", "R":
+ setCategory(p.Rune(0), cat[v])
+ }
+ })
+
+ writeTables()
+ gen.Repackage("gen_trieval.go", "trieval.go", "precis")
+}
+
+type exception struct {
+ prop property
+ cat category
+}
+
+func init() {
+ // Programmatically add the Arabic and Indic digits to the exceptions map.
+ // See comment in the exceptions map below why these are marked disallowed.
+ for i := rune(0); i <= 9; i++ {
+ exceptions[0x0660+i] = exception{
+ prop: disallowed,
+ cat: arabicIndicDigit,
+ }
+ exceptions[0x06F0+i] = exception{
+ prop: disallowed,
+ cat: extendedArabicIndicDigit,
+ }
+ }
+}
+
+// The Exceptions class as defined in RFC 5892
+// https://tools.ietf.org/html/rfc5892#section-2.6
+var exceptions = map[rune]exception{
+ 0x00DF: {prop: pValid},
+ 0x03C2: {prop: pValid},
+ 0x06FD: {prop: pValid},
+ 0x06FE: {prop: pValid},
+ 0x0F0B: {prop: pValid},
+ 0x3007: {prop: pValid},
+
+ // ContextO|J rules are marked as disallowed, taking a "guilty until proven
+ // innocent" approach. The main reason for this is that the check for
+ // whether a context rule should be applied can be moved to the logic for
+ // handing disallowed runes, taken it off the common path. The exception to
+ // this rule is for katakanaMiddleDot, as the rule logic is handled without
+ // using a rule function.
+
+ // ContextJ (Join control)
+ 0x200C: {prop: disallowed, cat: zeroWidthNonJoiner},
+ 0x200D: {prop: disallowed, cat: zeroWidthJoiner},
+
+ // ContextO
+ 0x00B7: {prop: disallowed, cat: middleDot},
+ 0x0375: {prop: disallowed, cat: greekLowerNumeralSign},
+ 0x05F3: {prop: disallowed, cat: hebrewPreceding}, // punctuation Geresh
+ 0x05F4: {prop: disallowed, cat: hebrewPreceding}, // punctuation Gershayim
+ 0x30FB: {prop: pValid, cat: katakanaMiddleDot},
+
+ // These are officially ContextO, but the implementation does not require
+ // special treatment of these, so we simply mark them as valid.
+ 0x0660: {prop: pValid},
+ 0x0661: {prop: pValid},
+ 0x0662: {prop: pValid},
+ 0x0663: {prop: pValid},
+ 0x0664: {prop: pValid},
+ 0x0665: {prop: pValid},
+ 0x0666: {prop: pValid},
+ 0x0667: {prop: pValid},
+ 0x0668: {prop: pValid},
+ 0x0669: {prop: pValid},
+ 0x06F0: {prop: pValid},
+ 0x06F1: {prop: pValid},
+ 0x06F2: {prop: pValid},
+ 0x06F3: {prop: pValid},
+ 0x06F4: {prop: pValid},
+ 0x06F5: {prop: pValid},
+ 0x06F6: {prop: pValid},
+ 0x06F7: {prop: pValid},
+ 0x06F8: {prop: pValid},
+ 0x06F9: {prop: pValid},
+
+ 0x0640: {prop: disallowed},
+ 0x07FA: {prop: disallowed},
+ 0x302E: {prop: disallowed},
+ 0x302F: {prop: disallowed},
+ 0x3031: {prop: disallowed},
+ 0x3032: {prop: disallowed},
+ 0x3033: {prop: disallowed},
+ 0x3034: {prop: disallowed},
+ 0x3035: {prop: disallowed},
+ 0x303B: {prop: disallowed},
+}
+
+// LetterDigits: https://tools.ietf.org/html/rfc5892#section-2.1
+// r in {Ll, Lu, Lo, Nd, Lm, Mn, Mc}.
+func isLetterDigits(r rune) bool {
+ return unicode.In(r,
+ unicode.Ll, unicode.Lu, unicode.Lm, unicode.Lo, // Letters
+ unicode.Mn, unicode.Mc, // Modifiers
+ unicode.Nd, // Digits
+ )
+}
+
+func isIdDisAndFreePVal(r rune) bool {
+ return unicode.In(r,
+ // OtherLetterDigits: https://tools.ietf.org/html/rfc7564#section-9.18
+ // r in in {Lt, Nl, No, Me}
+ unicode.Lt, unicode.Nl, unicode.No, // Other letters / numbers
+ unicode.Me, // Modifiers
+
+ // Spaces: https://tools.ietf.org/html/rfc7564#section-9.14
+ // r in in {Zs}
+ unicode.Zs,
+
+ // Symbols: https://tools.ietf.org/html/rfc7564#section-9.15
+ // r in {Sm, Sc, Sk, So}
+ unicode.Sm, unicode.Sc, unicode.Sk, unicode.So,
+
+ // Punctuation: https://tools.ietf.org/html/rfc7564#section-9.16
+ // r in {Pc, Pd, Ps, Pe, Pi, Pf, Po}
+ unicode.Pc, unicode.Pd, unicode.Ps, unicode.Pe,
+ unicode.Pi, unicode.Pf, unicode.Po,
+ )
+}
+
+// HasCompat: https://tools.ietf.org/html/rfc7564#section-9.17
+func hasCompat(r rune) bool {
+ return !norm.NFKC.IsNormalString(string(r))
+}
+
+// From https://tools.ietf.org/html/rfc5892:
+//
+// If .cp. .in. Exceptions Then Exceptions(cp);
+// Else If .cp. .in. BackwardCompatible Then BackwardCompatible(cp);
+// Else If .cp. .in. Unassigned Then UNASSIGNED;
+// Else If .cp. .in. ASCII7 Then PVALID;
+// Else If .cp. .in. JoinControl Then CONTEXTJ;
+// Else If .cp. .in. OldHangulJamo Then DISALLOWED;
+// Else If .cp. .in. PrecisIgnorableProperties Then DISALLOWED;
+// Else If .cp. .in. Controls Then DISALLOWED;
+// Else If .cp. .in. HasCompat Then ID_DIS or FREE_PVAL;
+// Else If .cp. .in. LetterDigits Then PVALID;
+// Else If .cp. .in. OtherLetterDigits Then ID_DIS or FREE_PVAL;
+// Else If .cp. .in. Spaces Then ID_DIS or FREE_PVAL;
+// Else If .cp. .in. Symbols Then ID_DIS or FREE_PVAL;
+// Else If .cp. .in. Punctuation Then ID_DIS or FREE_PVAL;
+// Else DISALLOWED;
+
+func writeTables() {
+ propTrie := triegen.NewTrie("derivedProperties")
+ w := gen.NewCodeWriter()
+ defer w.WriteVersionedGoFile(*outputFile, "precis")
+ gen.WriteUnicodeVersion(w)
+
+ // Iterate over all the runes...
+ for i := rune(0); i < unicode.MaxRune; i++ {
+ r := rune(i)
+
+ if !utf8.ValidRune(r) {
+ continue
+ }
+
+ e, ok := exceptions[i]
+ p := e.prop
+ switch {
+ case ok:
+ case !unicode.In(r, assigned):
+ p = unassigned
+ case r >= 0x0021 && r <= 0x007e: // Is ASCII 7
+ p = pValid
+ case unicode.In(r, disallowedRunes, unicode.Cc):
+ p = disallowed
+ case hasCompat(r):
+ p = idDisOrFreePVal
+ case isLetterDigits(r):
+ p = pValid
+ case isIdDisAndFreePVal(r):
+ p = idDisOrFreePVal
+ default:
+ p = disallowed
+ }
+ cat := runeCategory[r]
+ // Don't set category for runes that are disallowed.
+ if p == disallowed {
+ cat = exceptions[r].cat
+ }
+ propTrie.Insert(r, uint64(p)|uint64(cat))
+ }
+ sz, err := propTrie.Gen(w)
+ if err != nil {
+ log.Fatal(err)
+ }
+ w.Size += sz
+}