diff options
author | Dimitri Sokolyuk <demon@dim13.org> | 2019-07-02 12:12:53 +0200 |
---|---|---|
committer | Dimitri Sokolyuk <demon@dim13.org> | 2019-07-02 12:12:53 +0200 |
commit | 473acc61c8392dc7ae303d91568e179c4f105a76 (patch) | |
tree | a2070cba25f918cda460387e587dd60551b23894 /vendor/golang.org/x/net/idna/idna.go | |
parent | dd45f63209a8e51979b11182253ee80b5289c10a (diff) |
add black list
Diffstat (limited to 'vendor/golang.org/x/net/idna/idna.go')
-rw-r--r-- | vendor/golang.org/x/net/idna/idna.go | 732 |
1 files changed, 0 insertions, 732 deletions
diff --git a/vendor/golang.org/x/net/idna/idna.go b/vendor/golang.org/x/net/idna/idna.go deleted file mode 100644 index 346fe44..0000000 --- a/vendor/golang.org/x/net/idna/idna.go +++ /dev/null @@ -1,732 +0,0 @@ -// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. - -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package idna implements IDNA2008 using the compatibility processing -// defined by UTS (Unicode Technical Standard) #46, which defines a standard to -// deal with the transition from IDNA2003. -// -// IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC -// 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894. -// UTS #46 is defined in http://www.unicode.org/reports/tr46. -// See http://unicode.org/cldr/utility/idna.jsp for a visualization of the -// differences between these two standards. -package idna // import "golang.org/x/net/idna" - -import ( - "fmt" - "strings" - "unicode/utf8" - - "golang.org/x/text/secure/bidirule" - "golang.org/x/text/unicode/bidi" - "golang.org/x/text/unicode/norm" -) - -// NOTE: Unlike common practice in Go APIs, the functions will return a -// sanitized domain name in case of errors. Browsers sometimes use a partially -// evaluated string as lookup. -// TODO: the current error handling is, in my opinion, the least opinionated. -// Other strategies are also viable, though: -// Option 1) Return an empty string in case of error, but allow the user to -// specify explicitly which errors to ignore. -// Option 2) Return the partially evaluated string if it is itself a valid -// string, otherwise return the empty string in case of error. -// Option 3) Option 1 and 2. -// Option 4) Always return an empty string for now and implement Option 1 as -// needed, and document that the return string may not be empty in case of -// error in the future. -// I think Option 1 is best, but it is quite opinionated. - -// ToASCII is a wrapper for Punycode.ToASCII. -func ToASCII(s string) (string, error) { - return Punycode.process(s, true) -} - -// ToUnicode is a wrapper for Punycode.ToUnicode. -func ToUnicode(s string) (string, error) { - return Punycode.process(s, false) -} - -// An Option configures a Profile at creation time. -type Option func(*options) - -// Transitional sets a Profile to use the Transitional mapping as defined in UTS -// #46. This will cause, for example, "ß" to be mapped to "ss". Using the -// transitional mapping provides a compromise between IDNA2003 and IDNA2008 -// compatibility. It is used by most browsers when resolving domain names. This -// option is only meaningful if combined with MapForLookup. -func Transitional(transitional bool) Option { - return func(o *options) { o.transitional = true } -} - -// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts -// are longer than allowed by the RFC. -func VerifyDNSLength(verify bool) Option { - return func(o *options) { o.verifyDNSLength = verify } -} - -// RemoveLeadingDots removes leading label separators. Leading runes that map to -// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well. -// -// This is the behavior suggested by the UTS #46 and is adopted by some -// browsers. -func RemoveLeadingDots(remove bool) Option { - return func(o *options) { o.removeLeadingDots = remove } -} - -// ValidateLabels sets whether to check the mandatory label validation criteria -// as defined in Section 5.4 of RFC 5891. This includes testing for correct use -// of hyphens ('-'), normalization, validity of runes, and the context rules. -func ValidateLabels(enable bool) Option { - return func(o *options) { - // Don't override existing mappings, but set one that at least checks - // normalization if it is not set. - if o.mapping == nil && enable { - o.mapping = normalize - } - o.trie = trie - o.validateLabels = enable - o.fromPuny = validateFromPunycode - } -} - -// StrictDomainName limits the set of permissible ASCII characters to those -// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the -// hyphen). This is set by default for MapForLookup and ValidateForRegistration. -// -// This option is useful, for instance, for browsers that allow characters -// outside this range, for example a '_' (U+005F LOW LINE). See -// http://www.rfc-editor.org/std/std3.txt for more details This option -// corresponds to the UseSTD3ASCIIRules option in UTS #46. -func StrictDomainName(use bool) Option { - return func(o *options) { - o.trie = trie - o.useSTD3Rules = use - o.fromPuny = validateFromPunycode - } -} - -// NOTE: the following options pull in tables. The tables should not be linked -// in as long as the options are not used. - -// BidiRule enables the Bidi rule as defined in RFC 5893. Any application -// that relies on proper validation of labels should include this rule. -func BidiRule() Option { - return func(o *options) { o.bidirule = bidirule.ValidString } -} - -// ValidateForRegistration sets validation options to verify that a given IDN is -// properly formatted for registration as defined by Section 4 of RFC 5891. -func ValidateForRegistration() Option { - return func(o *options) { - o.mapping = validateRegistration - StrictDomainName(true)(o) - ValidateLabels(true)(o) - VerifyDNSLength(true)(o) - BidiRule()(o) - } -} - -// MapForLookup sets validation and mapping options such that a given IDN is -// transformed for domain name lookup according to the requirements set out in -// Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894, -// RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option -// to add this check. -// -// The mappings include normalization and mapping case, width and other -// compatibility mappings. -func MapForLookup() Option { - return func(o *options) { - o.mapping = validateAndMap - StrictDomainName(true)(o) - ValidateLabels(true)(o) - } -} - -type options struct { - transitional bool - useSTD3Rules bool - validateLabels bool - verifyDNSLength bool - removeLeadingDots bool - - trie *idnaTrie - - // fromPuny calls validation rules when converting A-labels to U-labels. - fromPuny func(p *Profile, s string) error - - // mapping implements a validation and mapping step as defined in RFC 5895 - // or UTS 46, tailored to, for example, domain registration or lookup. - mapping func(p *Profile, s string) (mapped string, isBidi bool, err error) - - // bidirule, if specified, checks whether s conforms to the Bidi Rule - // defined in RFC 5893. - bidirule func(s string) bool -} - -// A Profile defines the configuration of an IDNA mapper. -type Profile struct { - options -} - -func apply(o *options, opts []Option) { - for _, f := range opts { - f(o) - } -} - -// New creates a new Profile. -// -// With no options, the returned Profile is the most permissive and equals the -// Punycode Profile. Options can be passed to further restrict the Profile. The -// MapForLookup and ValidateForRegistration options set a collection of options, -// for lookup and registration purposes respectively, which can be tailored by -// adding more fine-grained options, where later options override earlier -// options. -func New(o ...Option) *Profile { - p := &Profile{} - apply(&p.options, o) - return p -} - -// ToASCII converts a domain or domain label to its ASCII form. For example, -// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and -// ToASCII("golang") is "golang". If an error is encountered it will return -// an error and a (partially) processed result. -func (p *Profile) ToASCII(s string) (string, error) { - return p.process(s, true) -} - -// ToUnicode converts a domain or domain label to its Unicode form. For example, -// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and -// ToUnicode("golang") is "golang". If an error is encountered it will return -// an error and a (partially) processed result. -func (p *Profile) ToUnicode(s string) (string, error) { - pp := *p - pp.transitional = false - return pp.process(s, false) -} - -// String reports a string with a description of the profile for debugging -// purposes. The string format may change with different versions. -func (p *Profile) String() string { - s := "" - if p.transitional { - s = "Transitional" - } else { - s = "NonTransitional" - } - if p.useSTD3Rules { - s += ":UseSTD3Rules" - } - if p.validateLabels { - s += ":ValidateLabels" - } - if p.verifyDNSLength { - s += ":VerifyDNSLength" - } - return s -} - -var ( - // Punycode is a Profile that does raw punycode processing with a minimum - // of validation. - Punycode *Profile = punycode - - // Lookup is the recommended profile for looking up domain names, according - // to Section 5 of RFC 5891. The exact configuration of this profile may - // change over time. - Lookup *Profile = lookup - - // Display is the recommended profile for displaying domain names. - // The configuration of this profile may change over time. - Display *Profile = display - - // Registration is the recommended profile for checking whether a given - // IDN is valid for registration, according to Section 4 of RFC 5891. - Registration *Profile = registration - - punycode = &Profile{} - lookup = &Profile{options{ - transitional: true, - useSTD3Rules: true, - validateLabels: true, - trie: trie, - fromPuny: validateFromPunycode, - mapping: validateAndMap, - bidirule: bidirule.ValidString, - }} - display = &Profile{options{ - useSTD3Rules: true, - validateLabels: true, - trie: trie, - fromPuny: validateFromPunycode, - mapping: validateAndMap, - bidirule: bidirule.ValidString, - }} - registration = &Profile{options{ - useSTD3Rules: true, - validateLabels: true, - verifyDNSLength: true, - trie: trie, - fromPuny: validateFromPunycode, - mapping: validateRegistration, - bidirule: bidirule.ValidString, - }} - - // TODO: profiles - // Register: recommended for approving domain names: don't do any mappings - // but rather reject on invalid input. Bundle or block deviation characters. -) - -type labelError struct{ label, code_ string } - -func (e labelError) code() string { return e.code_ } -func (e labelError) Error() string { - return fmt.Sprintf("idna: invalid label %q", e.label) -} - -type runeError rune - -func (e runeError) code() string { return "P1" } -func (e runeError) Error() string { - return fmt.Sprintf("idna: disallowed rune %U", e) -} - -// process implements the algorithm described in section 4 of UTS #46, -// see http://www.unicode.org/reports/tr46. -func (p *Profile) process(s string, toASCII bool) (string, error) { - var err error - var isBidi bool - if p.mapping != nil { - s, isBidi, err = p.mapping(p, s) - } - // Remove leading empty labels. - if p.removeLeadingDots { - for ; len(s) > 0 && s[0] == '.'; s = s[1:] { - } - } - // TODO: allow for a quick check of the tables data. - // It seems like we should only create this error on ToASCII, but the - // UTS 46 conformance tests suggests we should always check this. - if err == nil && p.verifyDNSLength && s == "" { - err = &labelError{s, "A4"} - } - labels := labelIter{orig: s} - for ; !labels.done(); labels.next() { - label := labels.label() - if label == "" { - // Empty labels are not okay. The label iterator skips the last - // label if it is empty. - if err == nil && p.verifyDNSLength { - err = &labelError{s, "A4"} - } - continue - } - if strings.HasPrefix(label, acePrefix) { - u, err2 := decode(label[len(acePrefix):]) - if err2 != nil { - if err == nil { - err = err2 - } - // Spec says keep the old label. - continue - } - isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight - labels.set(u) - if err == nil && p.validateLabels { - err = p.fromPuny(p, u) - } - if err == nil { - // This should be called on NonTransitional, according to the - // spec, but that currently does not have any effect. Use the - // original profile to preserve options. - err = p.validateLabel(u) - } - } else if err == nil { - err = p.validateLabel(label) - } - } - if isBidi && p.bidirule != nil && err == nil { - for labels.reset(); !labels.done(); labels.next() { - if !p.bidirule(labels.label()) { - err = &labelError{s, "B"} - break - } - } - } - if toASCII { - for labels.reset(); !labels.done(); labels.next() { - label := labels.label() - if !ascii(label) { - a, err2 := encode(acePrefix, label) - if err == nil { - err = err2 - } - label = a - labels.set(a) - } - n := len(label) - if p.verifyDNSLength && err == nil && (n == 0 || n > 63) { - err = &labelError{label, "A4"} - } - } - } - s = labels.result() - if toASCII && p.verifyDNSLength && err == nil { - // Compute the length of the domain name minus the root label and its dot. - n := len(s) - if n > 0 && s[n-1] == '.' { - n-- - } - if len(s) < 1 || n > 253 { - err = &labelError{s, "A4"} - } - } - return s, err -} - -func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) { - // TODO: consider first doing a quick check to see if any of these checks - // need to be done. This will make it slower in the general case, but - // faster in the common case. - mapped = norm.NFC.String(s) - isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft - return mapped, isBidi, nil -} - -func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) { - // TODO: filter need for normalization in loop below. - if !norm.NFC.IsNormalString(s) { - return s, false, &labelError{s, "V1"} - } - for i := 0; i < len(s); { - v, sz := trie.lookupString(s[i:]) - if sz == 0 { - return s, bidi, runeError(utf8.RuneError) - } - bidi = bidi || info(v).isBidi(s[i:]) - // Copy bytes not copied so far. - switch p.simplify(info(v).category()) { - // TODO: handle the NV8 defined in the Unicode idna data set to allow - // for strict conformance to IDNA2008. - case valid, deviation: - case disallowed, mapped, unknown, ignored: - r, _ := utf8.DecodeRuneInString(s[i:]) - return s, bidi, runeError(r) - } - i += sz - } - return s, bidi, nil -} - -func (c info) isBidi(s string) bool { - if !c.isMapped() { - return c&attributesMask == rtl - } - // TODO: also store bidi info for mapped data. This is possible, but a bit - // cumbersome and not for the common case. - p, _ := bidi.LookupString(s) - switch p.Class() { - case bidi.R, bidi.AL, bidi.AN: - return true - } - return false -} - -func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) { - var ( - b []byte - k int - ) - // combinedInfoBits contains the or-ed bits of all runes. We use this - // to derive the mayNeedNorm bit later. This may trigger normalization - // overeagerly, but it will not do so in the common case. The end result - // is another 10% saving on BenchmarkProfile for the common case. - var combinedInfoBits info - for i := 0; i < len(s); { - v, sz := trie.lookupString(s[i:]) - if sz == 0 { - b = append(b, s[k:i]...) - b = append(b, "\ufffd"...) - k = len(s) - if err == nil { - err = runeError(utf8.RuneError) - } - break - } - combinedInfoBits |= info(v) - bidi = bidi || info(v).isBidi(s[i:]) - start := i - i += sz - // Copy bytes not copied so far. - switch p.simplify(info(v).category()) { - case valid: - continue - case disallowed: - if err == nil { - r, _ := utf8.DecodeRuneInString(s[start:]) - err = runeError(r) - } - continue - case mapped, deviation: - b = append(b, s[k:start]...) - b = info(v).appendMapping(b, s[start:i]) - case ignored: - b = append(b, s[k:start]...) - // drop the rune - case unknown: - b = append(b, s[k:start]...) - b = append(b, "\ufffd"...) - } - k = i - } - if k == 0 { - // No changes so far. - if combinedInfoBits&mayNeedNorm != 0 { - s = norm.NFC.String(s) - } - } else { - b = append(b, s[k:]...) - if norm.NFC.QuickSpan(b) != len(b) { - b = norm.NFC.Bytes(b) - } - // TODO: the punycode converters require strings as input. - s = string(b) - } - return s, bidi, err -} - -// A labelIter allows iterating over domain name labels. -type labelIter struct { - orig string - slice []string - curStart int - curEnd int - i int -} - -func (l *labelIter) reset() { - l.curStart = 0 - l.curEnd = 0 - l.i = 0 -} - -func (l *labelIter) done() bool { - return l.curStart >= len(l.orig) -} - -func (l *labelIter) result() string { - if l.slice != nil { - return strings.Join(l.slice, ".") - } - return l.orig -} - -func (l *labelIter) label() string { - if l.slice != nil { - return l.slice[l.i] - } - p := strings.IndexByte(l.orig[l.curStart:], '.') - l.curEnd = l.curStart + p - if p == -1 { - l.curEnd = len(l.orig) - } - return l.orig[l.curStart:l.curEnd] -} - -// next sets the value to the next label. It skips the last label if it is empty. -func (l *labelIter) next() { - l.i++ - if l.slice != nil { - if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" { - l.curStart = len(l.orig) - } - } else { - l.curStart = l.curEnd + 1 - if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' { - l.curStart = len(l.orig) - } - } -} - -func (l *labelIter) set(s string) { - if l.slice == nil { - l.slice = strings.Split(l.orig, ".") - } - l.slice[l.i] = s -} - -// acePrefix is the ASCII Compatible Encoding prefix. -const acePrefix = "xn--" - -func (p *Profile) simplify(cat category) category { - switch cat { - case disallowedSTD3Mapped: - if p.useSTD3Rules { - cat = disallowed - } else { - cat = mapped - } - case disallowedSTD3Valid: - if p.useSTD3Rules { - cat = disallowed - } else { - cat = valid - } - case deviation: - if !p.transitional { - cat = valid - } - case validNV8, validXV8: - // TODO: handle V2008 - cat = valid - } - return cat -} - -func validateFromPunycode(p *Profile, s string) error { - if !norm.NFC.IsNormalString(s) { - return &labelError{s, "V1"} - } - // TODO: detect whether string may have to be normalized in the following - // loop. - for i := 0; i < len(s); { - v, sz := trie.lookupString(s[i:]) - if sz == 0 { - return runeError(utf8.RuneError) - } - if c := p.simplify(info(v).category()); c != valid && c != deviation { - return &labelError{s, "V6"} - } - i += sz - } - return nil -} - -const ( - zwnj = "\u200c" - zwj = "\u200d" -) - -type joinState int8 - -const ( - stateStart joinState = iota - stateVirama - stateBefore - stateBeforeVirama - stateAfter - stateFAIL -) - -var joinStates = [][numJoinTypes]joinState{ - stateStart: { - joiningL: stateBefore, - joiningD: stateBefore, - joinZWNJ: stateFAIL, - joinZWJ: stateFAIL, - joinVirama: stateVirama, - }, - stateVirama: { - joiningL: stateBefore, - joiningD: stateBefore, - }, - stateBefore: { - joiningL: stateBefore, - joiningD: stateBefore, - joiningT: stateBefore, - joinZWNJ: stateAfter, - joinZWJ: stateFAIL, - joinVirama: stateBeforeVirama, - }, - stateBeforeVirama: { - joiningL: stateBefore, - joiningD: stateBefore, - joiningT: stateBefore, - }, - stateAfter: { - joiningL: stateFAIL, - joiningD: stateBefore, - joiningT: stateAfter, - joiningR: stateStart, - joinZWNJ: stateFAIL, - joinZWJ: stateFAIL, - joinVirama: stateAfter, // no-op as we can't accept joiners here - }, - stateFAIL: { - 0: stateFAIL, - joiningL: stateFAIL, - joiningD: stateFAIL, - joiningT: stateFAIL, - joiningR: stateFAIL, - joinZWNJ: stateFAIL, - joinZWJ: stateFAIL, - joinVirama: stateFAIL, - }, -} - -// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are -// already implicitly satisfied by the overall implementation. -func (p *Profile) validateLabel(s string) (err error) { - if s == "" { - if p.verifyDNSLength { - return &labelError{s, "A4"} - } - return nil - } - if !p.validateLabels { - return nil - } - trie := p.trie // p.validateLabels is only set if trie is set. - if len(s) > 4 && s[2] == '-' && s[3] == '-' { - return &labelError{s, "V2"} - } - if s[0] == '-' || s[len(s)-1] == '-' { - return &labelError{s, "V3"} - } - // TODO: merge the use of this in the trie. - v, sz := trie.lookupString(s) - x := info(v) - if x.isModifier() { - return &labelError{s, "V5"} - } - // Quickly return in the absence of zero-width (non) joiners. - if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 { - return nil - } - st := stateStart - for i := 0; ; { - jt := x.joinType() - if s[i:i+sz] == zwj { - jt = joinZWJ - } else if s[i:i+sz] == zwnj { - jt = joinZWNJ - } - st = joinStates[st][jt] - if x.isViramaModifier() { - st = joinStates[st][joinVirama] - } - if i += sz; i == len(s) { - break - } - v, sz = trie.lookupString(s[i:]) - x = info(v) - } - if st == stateFAIL || st == stateAfter { - return &labelError{s, "C"} - } - return nil -} - -func ascii(s string) bool { - for i := 0; i < len(s); i++ { - if s[i] >= utf8.RuneSelf { - return false - } - } - return true -} |