summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/feature/plural/gen.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/feature/plural/gen.go')
-rw-r--r--vendor/golang.org/x/text/feature/plural/gen.go513
1 files changed, 513 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/feature/plural/gen.go b/vendor/golang.org/x/text/feature/plural/gen.go
new file mode 100644
index 0000000..a0de986
--- /dev/null
+++ b/vendor/golang.org/x/text/feature/plural/gen.go
@@ -0,0 +1,513 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// This file generates data for the CLDR plural rules, as defined in
+// http://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules
+//
+// We assume a slightly simplified grammar:
+//
+// condition = and_condition ('or' and_condition)* samples
+// and_condition = relation ('and' relation)*
+// relation = expr ('=' | '!=') range_list
+// expr = operand ('%' '10' '0'* )?
+// operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
+// range_list = (range | value) (',' range_list)*
+// range = value'..'value
+// value = digit+
+// digit = 0|1|2|3|4|5|6|7|8|9
+//
+// samples = ('@integer' sampleList)?
+// ('@decimal' sampleList)?
+// sampleList = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
+// sampleRange = decimalValue ('~' decimalValue)?
+// decimalValue = value ('.' value)?
+//
+// Symbol Value
+// n absolute value of the source number (integer and decimals).
+// i integer digits of n.
+// v number of visible fraction digits in n, with trailing zeros.
+// w number of visible fraction digits in n, without trailing zeros.
+// f visible fractional digits in n, with trailing zeros.
+// t visible fractional digits in n, without trailing zeros.
+//
+// The algorithm for which the data is generated is based on the following
+// observations
+//
+// - the number of different sets of numbers which the plural rules use to
+// test inclusion is limited,
+// - most numbers that are tested on are < 100
+//
+// This allows us to define a bitmap for each number < 100 where a bit i
+// indicates whether this number is included in some defined set i.
+// The function matchPlural in plural.go defines how we can subsequently use
+// this data to determine inclusion.
+//
+// There are a few languages for which this doesn't work. For one Italian and
+// Azerbaijan, which both test against numbers > 100 for ordinals and Breton,
+// which considers whether numbers are multiples of hundreds. The model here
+// could be extended to handle Italian and Azerbaijan fairly easily (by
+// considering the numbers 100, 200, 300, ..., 800, 900 in addition to the first
+// 100), but for now it seems easier to just hard-code these cases.
+
+import (
+ "bufio"
+ "bytes"
+ "flag"
+ "fmt"
+ "log"
+ "strconv"
+ "strings"
+
+ "golang.org/x/text/internal"
+ "golang.org/x/text/internal/gen"
+ "golang.org/x/text/language"
+ "golang.org/x/text/unicode/cldr"
+)
+
+var (
+ test = flag.Bool("test", false,
+ "test existing tables; can be used to compare web data with package data.")
+ outputFile = flag.String("output", "tables.go", "output file")
+ outputTestFile = flag.String("testoutput", "data_test.go", "output file")
+
+ draft = flag.String("draft",
+ "contributed",
+ `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
+)
+
+func main() {
+ gen.Init()
+
+ const pkg = "plural"
+
+ gen.Repackage("gen_common.go", "common.go", pkg)
+ // Read the CLDR zip file.
+ r := gen.OpenCLDRCoreZip()
+ defer r.Close()
+
+ d := &cldr.Decoder{}
+ d.SetDirFilter("supplemental", "main")
+ d.SetSectionFilter("numbers", "plurals")
+ data, err := d.DecodeZip(r)
+ if err != nil {
+ log.Fatalf("DecodeZip: %v", err)
+ }
+
+ w := gen.NewCodeWriter()
+ defer w.WriteGoFile(*outputFile, pkg)
+
+ gen.WriteCLDRVersion(w)
+
+ genPlurals(w, data)
+
+ w = gen.NewCodeWriter()
+ defer w.WriteGoFile(*outputTestFile, pkg)
+
+ genPluralsTests(w, data)
+}
+
+type pluralTest struct {
+ locales string // space-separated list of locales for this test
+ form int // Use int instead of Form to simplify generation.
+ integer []string // Entries of the form \d+ or \d+~\d+
+ decimal []string // Entries of the form \f+ or \f+ +~\f+, where f is \d+\.\d+
+}
+
+func genPluralsTests(w *gen.CodeWriter, data *cldr.CLDR) {
+ w.WriteType(pluralTest{})
+
+ for _, plurals := range data.Supplemental().Plurals {
+ if plurals.Type == "" {
+ // The empty type is reserved for plural ranges.
+ continue
+ }
+ tests := []pluralTest{}
+
+ for _, pRules := range plurals.PluralRules {
+ for _, rule := range pRules.PluralRule {
+ test := pluralTest{
+ locales: pRules.Locales,
+ form: int(countMap[rule.Count]),
+ }
+ scan := bufio.NewScanner(strings.NewReader(rule.Data()))
+ scan.Split(splitTokens)
+ var p *[]string
+ for scan.Scan() {
+ switch t := scan.Text(); t {
+ case "@integer":
+ p = &test.integer
+ case "@decimal":
+ p = &test.decimal
+ case ",", "…":
+ default:
+ if p != nil {
+ *p = append(*p, t)
+ }
+ }
+ }
+ tests = append(tests, test)
+ }
+ }
+ w.WriteVar(plurals.Type+"Tests", tests)
+ }
+}
+
+func genPlurals(w *gen.CodeWriter, data *cldr.CLDR) {
+ for _, plurals := range data.Supplemental().Plurals {
+ if plurals.Type == "" {
+ continue
+ }
+ // Initialize setMap and inclusionMasks. They are already populated with
+ // a few entries to serve as an example and to assign nice numbers to
+ // common cases.
+
+ // setMap contains sets of numbers represented by boolean arrays where
+ // a true value for element i means that the number i is included.
+ setMap := map[[numN]bool]int{
+ // The above init func adds an entry for including all numbers.
+ [numN]bool{1: true}: 1, // fix {1} to a nice value
+ [numN]bool{2: true}: 2, // fix {2} to a nice value
+ [numN]bool{0: true}: 3, // fix {0} to a nice value
+ }
+
+ // inclusionMasks contains bit masks for every number under numN to
+ // indicate in which set the number is included. Bit 1 << x will be set
+ // if it is included in set x.
+ inclusionMasks := [numN]uint64{
+ // Note: these entries are not complete: more bits will be set along the way.
+ 0: 1 << 3,
+ 1: 1 << 1,
+ 2: 1 << 2,
+ }
+
+ // Create set {0..99}. We will assign this set the identifier 0.
+ var all [numN]bool
+ for i := range all {
+ // Mark number i as being included in the set (which has identifier 0).
+ inclusionMasks[i] |= 1 << 0
+ // Mark number i as included in the set.
+ all[i] = true
+ }
+ // Register the identifier for the set.
+ setMap[all] = 0
+
+ rules := []pluralCheck{}
+ index := []byte{0}
+ langMap := map[int]byte{0: 0} // From compact language index to index
+
+ for _, pRules := range plurals.PluralRules {
+ // Parse the rules.
+ var conds []orCondition
+ for _, rule := range pRules.PluralRule {
+ form := countMap[rule.Count]
+ conds = parsePluralCondition(conds, rule.Data(), form)
+ }
+ // Encode the rules.
+ for _, c := range conds {
+ // If an or condition only has filters, we create an entry for
+ // this filter and the set that contains all values.
+ empty := true
+ for _, b := range c.used {
+ empty = empty && !b
+ }
+ if empty {
+ rules = append(rules, pluralCheck{
+ cat: byte(opMod<<opShift) | byte(c.form),
+ setID: 0, // all values
+ })
+ continue
+ }
+ // We have some entries with values.
+ for i, set := range c.set {
+ if !c.used[i] {
+ continue
+ }
+ index, ok := setMap[set]
+ if !ok {
+ index = len(setMap)
+ setMap[set] = index
+ for i := range inclusionMasks {
+ if set[i] {
+ inclusionMasks[i] |= 1 << uint64(index)
+ }
+ }
+ }
+ rules = append(rules, pluralCheck{
+ cat: byte(i<<opShift | andNext),
+ setID: byte(index),
+ })
+ }
+ // Now set the last entry to the plural form the rule matches.
+ rules[len(rules)-1].cat &^= formMask
+ rules[len(rules)-1].cat |= byte(c.form)
+ }
+ // Point the relevant locales to the created entries.
+ for _, loc := range strings.Split(pRules.Locales, " ") {
+ if strings.TrimSpace(loc) == "" {
+ continue
+ }
+ lang, ok := language.CompactIndex(language.MustParse(loc))
+ if !ok {
+ log.Printf("No compact index for locale %q", loc)
+ }
+ langMap[lang] = byte(len(index) - 1)
+ }
+ index = append(index, byte(len(rules)))
+ }
+ w.WriteVar(plurals.Type+"Rules", rules)
+ w.WriteVar(plurals.Type+"Index", index)
+ // Expand the values.
+ langToIndex := make([]byte, language.NumCompactTags)
+ for i := range langToIndex {
+ for p := i; ; p = int(internal.Parent[p]) {
+ if x, ok := langMap[p]; ok {
+ langToIndex[i] = x
+ break
+ }
+ }
+ }
+ w.WriteVar(plurals.Type+"LangToIndex", langToIndex)
+ // Need to convert array to slice because of golang.org/issue/7651.
+ // This will allow tables to be dropped when unused. This is especially
+ // relevant for the ordinal data, which I suspect won't be used as much.
+ w.WriteVar(plurals.Type+"InclusionMasks", inclusionMasks[:])
+
+ if len(rules) > 0xFF {
+ log.Fatalf("Too many entries for rules: %#x", len(rules))
+ }
+ if len(index) > 0xFF {
+ log.Fatalf("Too many entries for index: %#x", len(index))
+ }
+ if len(setMap) > 64 { // maximum number of bits.
+ log.Fatalf("Too many entries for setMap: %d", len(setMap))
+ }
+ w.WriteComment(
+ "Slots used for %s: %X of 0xFF rules; %X of 0xFF indexes; %d of 64 sets",
+ plurals.Type, len(rules), len(index), len(setMap))
+ // Prevent comment from attaching to the next entry.
+ fmt.Fprint(w, "\n\n")
+ }
+}
+
+type orCondition struct {
+ original string // for debugging
+
+ form Form
+ used [32]bool
+ set [32][numN]bool
+}
+
+func (o *orCondition) add(op opID, mod int, v []int) (ok bool) {
+ ok = true
+ for _, x := range v {
+ if x >= maxMod {
+ ok = false
+ break
+ }
+ }
+ for i := 0; i < numN; i++ {
+ m := i
+ if mod != 0 {
+ m = i % mod
+ }
+ if !intIn(m, v) {
+ o.set[op][i] = false
+ }
+ }
+ if ok {
+ o.used[op] = true
+ }
+ return ok
+}
+
+func intIn(x int, a []int) bool {
+ for _, y := range a {
+ if x == y {
+ return true
+ }
+ }
+ return false
+}
+
+var operandIndex = map[string]opID{
+ "i": opI,
+ "n": opN,
+ "f": opF,
+ "v": opV,
+ "w": opW,
+}
+
+// parsePluralCondition parses the condition of a single pluralRule and appends
+// the resulting or conditions to conds.
+//
+// Example rules:
+// // Category "one" in English: only allow 1 with no visible fraction
+// i = 1 and v = 0 @integer 1
+//
+// // Category "few" in Czech: all numbers with visible fractions
+// v != 0 @decimal ...
+//
+// // Category "zero" in Latvian: all multiples of 10 or the numbers 11-19 or
+// // numbers with a fraction 11..19 and no trailing zeros.
+// n % 10 = 0 or n % 100 = 11..19 or v = 2 and f % 100 = 11..19 @integer ...
+//
+// @integer and @decimal are followed by examples and are not relevant for the
+// rule itself. The are used here to signal the termination of the rule.
+func parsePluralCondition(conds []orCondition, s string, f Form) []orCondition {
+ scan := bufio.NewScanner(strings.NewReader(s))
+ scan.Split(splitTokens)
+ for {
+ cond := orCondition{original: s, form: f}
+ // Set all numbers to be allowed for all number classes and restrict
+ // from here on.
+ for i := range cond.set {
+ for j := range cond.set[i] {
+ cond.set[i][j] = true
+ }
+ }
+ andLoop:
+ for {
+ var token string
+ scan.Scan() // Must exist.
+ switch class := scan.Text(); class {
+ case "t":
+ class = "w" // equal to w for t == 0
+ fallthrough
+ case "n", "i", "f", "v", "w":
+ op := scanToken(scan)
+ opCode := operandIndex[class]
+ mod := 0
+ if op == "%" {
+ opCode |= opMod
+
+ switch v := scanUint(scan); v {
+ case 10, 100:
+ mod = v
+ case 1000:
+ // A more general solution would be to allow checking
+ // against multiples of 100 and include entries for the
+ // numbers 100..900 in the inclusion masks. At the
+ // moment this would only help Azerbaijan and Italian.
+
+ // Italian doesn't use '%', so this must be Azerbaijan.
+ cond.used[opAzerbaijan00s] = true
+ return append(conds, cond)
+
+ case 1000000:
+ cond.used[opBretonM] = true
+ return append(conds, cond)
+
+ default:
+ log.Fatalf("Modulo value not supported %d", v)
+ }
+ op = scanToken(scan)
+ }
+ if op != "=" && op != "!=" {
+ log.Fatalf("Unexpected op %q", op)
+ }
+ if op == "!=" {
+ opCode |= opNotEqual
+ }
+ a := []int{}
+ v := scanUint(scan)
+ if class == "w" && v != 0 {
+ log.Fatalf("Must compare against zero for operand type %q", class)
+ }
+ token = scanToken(scan)
+ for {
+ switch token {
+ case "..":
+ end := scanUint(scan)
+ for ; v <= end; v++ {
+ a = append(a, v)
+ }
+ token = scanToken(scan)
+ default: // ",", "or", "and", "@..."
+ a = append(a, v)
+ }
+ if token != "," {
+ break
+ }
+ v = scanUint(scan)
+ token = scanToken(scan)
+ }
+ if !cond.add(opCode, mod, a) {
+ // Detected large numbers. As we ruled out Azerbaijan, this
+ // must be the many rule for Italian ordinals.
+ cond.set[opItalian800] = cond.set[opN]
+ cond.used[opItalian800] = true
+ }
+
+ case "@integer", "@decimal": // "other" entry: tests only.
+ return conds
+ default:
+ log.Fatalf("Unexpected operand class %q (%s)", class, s)
+ }
+ switch token {
+ case "or":
+ conds = append(conds, cond)
+ break andLoop
+ case "@integer", "@decimal": // examples
+ // There is always an example in practice, so we always terminate here.
+ if err := scan.Err(); err != nil {
+ log.Fatal(err)
+ }
+ return append(conds, cond)
+ case "and":
+ // keep accumulating
+ default:
+ log.Fatalf("Unexpected token %q", token)
+ }
+ }
+ }
+}
+
+func scanToken(scan *bufio.Scanner) string {
+ scan.Scan()
+ return scan.Text()
+}
+
+func scanUint(scan *bufio.Scanner) int {
+ scan.Scan()
+ val, err := strconv.ParseUint(scan.Text(), 10, 32)
+ if err != nil {
+ log.Fatal(err)
+ }
+ return int(val)
+}
+
+// splitTokens can be used with bufio.Scanner to tokenize CLDR plural rules.
+func splitTokens(data []byte, atEOF bool) (advance int, token []byte, err error) {
+ condTokens := [][]byte{
+ []byte(".."),
+ []byte(","),
+ []byte("!="),
+ []byte("="),
+ }
+ advance, token, err = bufio.ScanWords(data, atEOF)
+ for _, t := range condTokens {
+ if len(t) >= len(token) {
+ continue
+ }
+ switch p := bytes.Index(token, t); {
+ case p == -1:
+ case p == 0:
+ advance = len(t)
+ token = token[:len(t)]
+ return advance - len(token) + len(t), token[:len(t)], err
+ case p < advance:
+ // Don't split when "=" overlaps "!=".
+ if t[0] == '=' && token[p-1] == '!' {
+ continue
+ }
+ advance = p
+ token = token[:p]
+ }
+ }
+ return advance, token, err
+}