summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/internal/ucd
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/internal/ucd')
-rw-r--r--vendor/golang.org/x/text/internal/ucd/example_test.go81
-rw-r--r--vendor/golang.org/x/text/internal/ucd/ucd.go371
-rw-r--r--vendor/golang.org/x/text/internal/ucd/ucd_test.go105
3 files changed, 557 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/internal/ucd/example_test.go b/vendor/golang.org/x/text/internal/ucd/example_test.go
new file mode 100644
index 0000000..338a50d
--- /dev/null
+++ b/vendor/golang.org/x/text/internal/ucd/example_test.go
@@ -0,0 +1,81 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ucd_test
+
+import (
+ "fmt"
+ "strings"
+
+ "golang.org/x/text/internal/ucd"
+)
+
+func Example() {
+ // Read rune-by-rune from UnicodeData.
+ var count int
+ p := ucd.New(strings.NewReader(unicodeData))
+ for p.Next() {
+ count++
+ if lower := p.Runes(ucd.SimpleLowercaseMapping); lower != nil {
+ fmt.Printf("lower(%U) -> %U\n", p.Rune(0), lower[0])
+ }
+ }
+ if err := p.Err(); err != nil {
+ fmt.Println(err)
+ }
+ fmt.Println("Number of runes visited:", count)
+
+ // Read raw ranges from Scripts.
+ p = ucd.New(strings.NewReader(scripts), ucd.KeepRanges)
+ for p.Next() {
+ start, end := p.Range(0)
+ fmt.Printf("%04X..%04X: %s\n", start, end, p.String(1))
+ }
+ if err := p.Err(); err != nil {
+ fmt.Println(err)
+ }
+
+ // Output:
+ // lower(U+00C0) -> U+00E0
+ // lower(U+00C1) -> U+00E1
+ // lower(U+00C2) -> U+00E2
+ // lower(U+00C3) -> U+00E3
+ // lower(U+00C4) -> U+00E4
+ // Number of runes visited: 6594
+ // 0000..001F: Common
+ // 0020..0020: Common
+ // 0021..0023: Common
+ // 0024..0024: Common
+}
+
+// Excerpt from UnicodeData.txt
+const unicodeData = `
+00B9;SUPERSCRIPT ONE;No;0;EN;<super> 0031;;1;1;N;SUPERSCRIPT DIGIT ONE;;;;
+00BA;MASCULINE ORDINAL INDICATOR;Lo;0;L;<super> 006F;;;;N;;;;;
+00BB;RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK;Pf;0;ON;;;;;Y;RIGHT POINTING GUILLEMET;;;;
+00BC;VULGAR FRACTION ONE QUARTER;No;0;ON;<fraction> 0031 2044 0034;;;1/4;N;FRACTION ONE QUARTER;;;;
+00BD;VULGAR FRACTION ONE HALF;No;0;ON;<fraction> 0031 2044 0032;;;1/2;N;FRACTION ONE HALF;;;;
+00BE;VULGAR FRACTION THREE QUARTERS;No;0;ON;<fraction> 0033 2044 0034;;;3/4;N;FRACTION THREE QUARTERS;;;;
+00BF;INVERTED QUESTION MARK;Po;0;ON;;;;;N;;;;;
+00C0;LATIN CAPITAL LETTER A WITH GRAVE;Lu;0;L;0041 0300;;;;N;LATIN CAPITAL LETTER A GRAVE;;;00E0;
+00C1;LATIN CAPITAL LETTER A WITH ACUTE;Lu;0;L;0041 0301;;;;N;LATIN CAPITAL LETTER A ACUTE;;;00E1;
+00C2;LATIN CAPITAL LETTER A WITH CIRCUMFLEX;Lu;0;L;0041 0302;;;;N;LATIN CAPITAL LETTER A CIRCUMFLEX;;;00E2;
+00C3;LATIN CAPITAL LETTER A WITH TILDE;Lu;0;L;0041 0303;;;;N;LATIN CAPITAL LETTER A TILDE;;;00E3;
+00C4;LATIN CAPITAL LETTER A WITH DIAERESIS;Lu;0;L;0041 0308;;;;N;LATIN CAPITAL LETTER A DIAERESIS;;;00E4;
+
+# A legacy rune range.
+3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
+4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
+`
+
+// Excerpt from Scripts.txt
+const scripts = `
+# Property: Script
+# ================================================
+
+0000..001F ; Common # Cc [32] <control-0000>..<control-001F>
+0020 ; Common # Zs SPACE
+0021..0023 ; Common # Po [3] EXCLAMATION MARK..NUMBER SIGN
+0024 ; Common # Sc DOLLAR SIGN
+`
diff --git a/vendor/golang.org/x/text/internal/ucd/ucd.go b/vendor/golang.org/x/text/internal/ucd/ucd.go
new file mode 100644
index 0000000..8c45b5f
--- /dev/null
+++ b/vendor/golang.org/x/text/internal/ucd/ucd.go
@@ -0,0 +1,371 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package ucd provides a parser for Unicode Character Database files, the
+// format of which is defined in http://www.unicode.org/reports/tr44/. See
+// http://www.unicode.org/Public/UCD/latest/ucd/ for example files.
+//
+// It currently does not support substitutions of missing fields.
+package ucd // import "golang.org/x/text/internal/ucd"
+
+import (
+ "bufio"
+ "errors"
+ "fmt"
+ "io"
+ "log"
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+// UnicodeData.txt fields.
+const (
+ CodePoint = iota
+ Name
+ GeneralCategory
+ CanonicalCombiningClass
+ BidiClass
+ DecompMapping
+ DecimalValue
+ DigitValue
+ NumericValue
+ BidiMirrored
+ Unicode1Name
+ ISOComment
+ SimpleUppercaseMapping
+ SimpleLowercaseMapping
+ SimpleTitlecaseMapping
+)
+
+// Parse calls f for each entry in the given reader of a UCD file. It will close
+// the reader upon return. It will call log.Fatal if any error occurred.
+//
+// This implements the most common usage pattern of using Parser.
+func Parse(r io.ReadCloser, f func(p *Parser)) {
+ defer r.Close()
+
+ p := New(r)
+ for p.Next() {
+ f(p)
+ }
+ if err := p.Err(); err != nil {
+ r.Close() // os.Exit will cause defers not to be called.
+ log.Fatal(err)
+ }
+}
+
+// An Option is used to configure a Parser.
+type Option func(p *Parser)
+
+func keepRanges(p *Parser) {
+ p.keepRanges = true
+}
+
+var (
+ // KeepRanges prevents the expansion of ranges. The raw ranges can be
+ // obtained by calling Range(0) on the parser.
+ KeepRanges Option = keepRanges
+)
+
+// The Part option register a handler for lines starting with a '@'. The text
+// after a '@' is available as the first field. Comments are handled as usual.
+func Part(f func(p *Parser)) Option {
+ return func(p *Parser) {
+ p.partHandler = f
+ }
+}
+
+// The CommentHandler option passes comments that are on a line by itself to
+// a given handler.
+func CommentHandler(f func(s string)) Option {
+ return func(p *Parser) {
+ p.commentHandler = f
+ }
+}
+
+// A Parser parses Unicode Character Database (UCD) files.
+type Parser struct {
+ scanner *bufio.Scanner
+
+ keepRanges bool // Don't expand rune ranges in field 0.
+
+ err error
+ comment string
+ field []string
+ // parsedRange is needed in case Range(0) is called more than once for one
+ // field. In some cases this requires scanning ahead.
+ line int
+ parsedRange bool
+ rangeStart, rangeEnd rune
+
+ partHandler func(p *Parser)
+ commentHandler func(s string)
+}
+
+func (p *Parser) setError(err error, msg string) {
+ if p.err == nil && err != nil {
+ if msg == "" {
+ p.err = fmt.Errorf("ucd:line:%d: %v", p.line, err)
+ } else {
+ p.err = fmt.Errorf("ucd:line:%d:%s: %v", p.line, msg, err)
+ }
+ }
+}
+
+func (p *Parser) getField(i int) string {
+ if i >= len(p.field) {
+ return ""
+ }
+ return p.field[i]
+}
+
+// Err returns a non-nil error if any error occurred during parsing.
+func (p *Parser) Err() error {
+ return p.err
+}
+
+// New returns a Parser for the given Reader.
+func New(r io.Reader, o ...Option) *Parser {
+ p := &Parser{
+ scanner: bufio.NewScanner(r),
+ }
+ for _, f := range o {
+ f(p)
+ }
+ return p
+}
+
+// Next parses the next line in the file. It returns true if a line was parsed
+// and false if it reached the end of the file.
+func (p *Parser) Next() bool {
+ if !p.keepRanges && p.rangeStart < p.rangeEnd {
+ p.rangeStart++
+ return true
+ }
+ p.comment = ""
+ p.field = p.field[:0]
+ p.parsedRange = false
+
+ for p.scanner.Scan() && p.err == nil {
+ p.line++
+ s := p.scanner.Text()
+ if s == "" {
+ continue
+ }
+ if s[0] == '#' {
+ if p.commentHandler != nil {
+ p.commentHandler(strings.TrimSpace(s[1:]))
+ }
+ continue
+ }
+
+ // Parse line
+ if i := strings.IndexByte(s, '#'); i != -1 {
+ p.comment = strings.TrimSpace(s[i+1:])
+ s = s[:i]
+ }
+ if s[0] == '@' {
+ if p.partHandler != nil {
+ p.field = append(p.field, strings.TrimSpace(s[1:]))
+ p.partHandler(p)
+ p.field = p.field[:0]
+ }
+ p.comment = ""
+ continue
+ }
+ for {
+ i := strings.IndexByte(s, ';')
+ if i == -1 {
+ p.field = append(p.field, strings.TrimSpace(s))
+ break
+ }
+ p.field = append(p.field, strings.TrimSpace(s[:i]))
+ s = s[i+1:]
+ }
+ if !p.keepRanges {
+ p.rangeStart, p.rangeEnd = p.getRange(0)
+ }
+ return true
+ }
+ p.setError(p.scanner.Err(), "scanner failed")
+ return false
+}
+
+func parseRune(b string) (rune, error) {
+ if len(b) > 2 && b[0] == 'U' && b[1] == '+' {
+ b = b[2:]
+ }
+ x, err := strconv.ParseUint(b, 16, 32)
+ return rune(x), err
+}
+
+func (p *Parser) parseRune(s string) rune {
+ x, err := parseRune(s)
+ p.setError(err, "failed to parse rune")
+ return x
+}
+
+// Rune parses and returns field i as a rune.
+func (p *Parser) Rune(i int) rune {
+ if i > 0 || p.keepRanges {
+ return p.parseRune(p.getField(i))
+ }
+ return p.rangeStart
+}
+
+// Runes interprets and returns field i as a sequence of runes.
+func (p *Parser) Runes(i int) (runes []rune) {
+ add := func(s string) {
+ if s = strings.TrimSpace(s); len(s) > 0 {
+ runes = append(runes, p.parseRune(s))
+ }
+ }
+ for b := p.getField(i); ; {
+ i := strings.IndexByte(b, ' ')
+ if i == -1 {
+ add(b)
+ break
+ }
+ add(b[:i])
+ b = b[i+1:]
+ }
+ return
+}
+
+var (
+ errIncorrectLegacyRange = errors.New("ucd: unmatched <* First>")
+
+ // reRange matches one line of a legacy rune range.
+ reRange = regexp.MustCompile("^([0-9A-F]*);<([^,]*), ([^>]*)>(.*)$")
+)
+
+// Range parses and returns field i as a rune range. A range is inclusive at
+// both ends. If the field only has one rune, first and last will be identical.
+// It supports the legacy format for ranges used in UnicodeData.txt.
+func (p *Parser) Range(i int) (first, last rune) {
+ if !p.keepRanges {
+ return p.rangeStart, p.rangeStart
+ }
+ return p.getRange(i)
+}
+
+func (p *Parser) getRange(i int) (first, last rune) {
+ b := p.getField(i)
+ if k := strings.Index(b, ".."); k != -1 {
+ return p.parseRune(b[:k]), p.parseRune(b[k+2:])
+ }
+ // The first field may not be a rune, in which case we may ignore any error
+ // and set the range as 0..0.
+ x, err := parseRune(b)
+ if err != nil {
+ // Disable range parsing henceforth. This ensures that an error will be
+ // returned if the user subsequently will try to parse this field as
+ // a Rune.
+ p.keepRanges = true
+ }
+ // Special case for UnicodeData that was retained for backwards compatibility.
+ if i == 0 && len(p.field) > 1 && strings.HasSuffix(p.field[1], "First>") {
+ if p.parsedRange {
+ return p.rangeStart, p.rangeEnd
+ }
+ mf := reRange.FindStringSubmatch(p.scanner.Text())
+ p.line++
+ if mf == nil || !p.scanner.Scan() {
+ p.setError(errIncorrectLegacyRange, "")
+ return x, x
+ }
+ // Using Bytes would be more efficient here, but Text is a lot easier
+ // and this is not a frequent case.
+ ml := reRange.FindStringSubmatch(p.scanner.Text())
+ if ml == nil || mf[2] != ml[2] || ml[3] != "Last" || mf[4] != ml[4] {
+ p.setError(errIncorrectLegacyRange, "")
+ return x, x
+ }
+ p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Text()[:len(ml[1])])
+ p.parsedRange = true
+ return p.rangeStart, p.rangeEnd
+ }
+ return x, x
+}
+
+// bools recognizes all valid UCD boolean values.
+var bools = map[string]bool{
+ "": false,
+ "N": false,
+ "No": false,
+ "F": false,
+ "False": false,
+ "Y": true,
+ "Yes": true,
+ "T": true,
+ "True": true,
+}
+
+// Bool parses and returns field i as a boolean value.
+func (p *Parser) Bool(i int) bool {
+ f := p.getField(i)
+ for s, v := range bools {
+ if f == s {
+ return v
+ }
+ }
+ p.setError(strconv.ErrSyntax, "error parsing bool")
+ return false
+}
+
+// Int parses and returns field i as an integer value.
+func (p *Parser) Int(i int) int {
+ x, err := strconv.ParseInt(string(p.getField(i)), 10, 64)
+ p.setError(err, "error parsing int")
+ return int(x)
+}
+
+// Uint parses and returns field i as an unsigned integer value.
+func (p *Parser) Uint(i int) uint {
+ x, err := strconv.ParseUint(string(p.getField(i)), 10, 64)
+ p.setError(err, "error parsing uint")
+ return uint(x)
+}
+
+// Float parses and returns field i as a decimal value.
+func (p *Parser) Float(i int) float64 {
+ x, err := strconv.ParseFloat(string(p.getField(i)), 64)
+ p.setError(err, "error parsing float")
+ return x
+}
+
+// String parses and returns field i as a string value.
+func (p *Parser) String(i int) string {
+ return string(p.getField(i))
+}
+
+// Strings parses and returns field i as a space-separated list of strings.
+func (p *Parser) Strings(i int) []string {
+ ss := strings.Split(string(p.getField(i)), " ")
+ for i, s := range ss {
+ ss[i] = strings.TrimSpace(s)
+ }
+ return ss
+}
+
+// Comment returns the comments for the current line.
+func (p *Parser) Comment() string {
+ return string(p.comment)
+}
+
+var errUndefinedEnum = errors.New("ucd: undefined enum value")
+
+// Enum interprets and returns field i as a value that must be one of the values
+// in enum.
+func (p *Parser) Enum(i int, enum ...string) string {
+ f := p.getField(i)
+ for _, s := range enum {
+ if f == s {
+ return s
+ }
+ }
+ p.setError(errUndefinedEnum, "error parsing enum")
+ return ""
+}
diff --git a/vendor/golang.org/x/text/internal/ucd/ucd_test.go b/vendor/golang.org/x/text/internal/ucd/ucd_test.go
new file mode 100644
index 0000000..11a6542
--- /dev/null
+++ b/vendor/golang.org/x/text/internal/ucd/ucd_test.go
@@ -0,0 +1,105 @@
+package ucd
+
+import (
+ "strings"
+ "testing"
+)
+
+const file = `
+# Comments should be skipped
+# rune; bool; uint; int; float; runes; # Y
+0..0005; Y; 0; 2; -5.25 ; 0 1 2 3 4 5;
+6..0007; Yes ; 6; 1; -4.25 ; 0006 0007;
+8; T ; 8 ; 0 ;-3.25 ;;# T
+9; True ;9 ; -1;-2.25 ; 0009;
+
+# more comments to be ignored
+@Part0
+
+A; N; 10 ; -2; -1.25; ;# N
+B; No; 11 ; -3; -0.25;
+C; False;12; -4; 0.75;
+D; ;13;-5;1.75;
+
+@Part1 # Another part.
+# We test part comments get removed by not commenting the the next line.
+E..10FFFF; F; 14 ; -6; 2.75;
+`
+
+var want = []struct {
+ start, end rune
+}{
+ {0x00, 0x05},
+ {0x06, 0x07},
+ {0x08, 0x08},
+ {0x09, 0x09},
+ {0x0A, 0x0A},
+ {0x0B, 0x0B},
+ {0x0C, 0x0C},
+ {0x0D, 0x0D},
+ {0x0E, 0x10FFFF},
+}
+
+func TestGetters(t *testing.T) {
+ parts := [][2]string{
+ {"Part0", ""},
+ {"Part1", "Another part."},
+ }
+ handler := func(p *Parser) {
+ if len(parts) == 0 {
+ t.Error("Part handler invoked too many times.")
+ return
+ }
+ want := parts[0]
+ parts = parts[1:]
+ if got0, got1 := p.String(0), p.Comment(); got0 != want[0] || got1 != want[1] {
+ t.Errorf(`part: got %q, %q; want %q"`, got0, got1, want)
+ }
+ }
+
+ p := New(strings.NewReader(file), KeepRanges, Part(handler))
+ for i := 0; p.Next(); i++ {
+ start, end := p.Range(0)
+ w := want[i]
+ if start != w.start || end != w.end {
+ t.Fatalf("%d:Range(0); got %#x..%#x; want %#x..%#x", i, start, end, w.start, w.end)
+ }
+ if w.start == w.end && p.Rune(0) != w.start {
+ t.Errorf("%d:Range(0).start: got %U; want %U", i, p.Rune(0), w.start)
+ }
+ if got, want := p.Bool(1), w.start <= 9; got != want {
+ t.Errorf("%d:Bool(1): got %v; want %v", i, got, want)
+ }
+ if got := p.Rune(4); got != 0 || p.Err() == nil {
+ t.Errorf("%d:Rune(%q): got no error; want error", i, p.String(1))
+ }
+ p.err = nil
+ if got := p.Uint(2); rune(got) != start {
+ t.Errorf("%d:Uint(2): got %v; want %v", i, got, start)
+ }
+ if got, want := p.Int(3), 2-i; got != want {
+ t.Errorf("%d:Int(3): got %v; want %v", i, got, want)
+ }
+ if got, want := p.Float(4), -5.25+float64(i); got != want {
+ t.Errorf("%d:Int(3): got %v; want %v", i, got, want)
+ }
+ if got := p.Runes(5); got == nil {
+ if p.String(5) != "" {
+ t.Errorf("%d:Runes(5): expected non-empty list", i)
+ }
+ } else {
+ if got[0] != start || got[len(got)-1] != end {
+ t.Errorf("%d:Runes(5): got %#x; want %#x..%#x", i, got, start, end)
+ }
+ }
+ if got := p.Comment(); got != "" && got != p.String(1) {
+ t.Errorf("%d:Comment(): got %v; want %v", i, got, p.String(1))
+ }
+ }
+ if err := p.Err(); err != nil {
+ t.Errorf("Parser error: %v", err)
+ }
+ if len(parts) != 0 {
+ t.Errorf("expected %d more invocations of part handler", len(parts))
+ }
+}