From 473acc61c8392dc7ae303d91568e179c4f105a76 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Tue, 2 Jul 2019 12:12:53 +0200 Subject: add black list --- vendor/golang.org/x/text/cases/map.go | 816 ---------------------------------- 1 file changed, 816 deletions(-) delete mode 100644 vendor/golang.org/x/text/cases/map.go (limited to 'vendor/golang.org/x/text/cases/map.go') diff --git a/vendor/golang.org/x/text/cases/map.go b/vendor/golang.org/x/text/cases/map.go deleted file mode 100644 index 4baebaa..0000000 --- a/vendor/golang.org/x/text/cases/map.go +++ /dev/null @@ -1,816 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package cases - -// This file contains the definitions of case mappings for all supported -// languages. The rules for the language-specific tailorings were taken and -// modified from the CLDR transform definitions in common/transforms. - -import ( - "strings" - "unicode" - "unicode/utf8" - - "golang.org/x/text/internal" - "golang.org/x/text/language" - "golang.org/x/text/transform" - "golang.org/x/text/unicode/norm" -) - -// A mapFunc takes a context set to the current rune and writes the mapped -// version to the same context. It may advance the context to the next rune. It -// returns whether a checkpoint is possible: whether the pDst bytes written to -// dst so far won't need changing as we see more source bytes. -type mapFunc func(*context) bool - -// A spanFunc takes a context set to the current rune and returns whether this -// rune would be altered when written to the output. It may advance the context -// to the next rune. It returns whether a checkpoint is possible. -type spanFunc func(*context) bool - -// maxIgnorable defines the maximum number of ignorables to consider for -// lookahead operations. -const maxIgnorable = 30 - -// supported lists the language tags for which we have tailorings. -const supported = "und af az el lt nl tr" - -func init() { - tags := []language.Tag{} - for _, s := range strings.Split(supported, " ") { - tags = append(tags, language.MustParse(s)) - } - matcher = internal.NewInheritanceMatcher(tags) - Supported = language.NewCoverage(tags) -} - -var ( - matcher *internal.InheritanceMatcher - - Supported language.Coverage - - // We keep the following lists separate, instead of having a single per- - // language struct, to give the compiler a chance to remove unused code. - - // Some uppercase mappers are stateless, so we can precompute the - // Transformers and save a bit on runtime allocations. - upperFunc = []struct { - upper mapFunc - span spanFunc - }{ - {nil, nil}, // und - {nil, nil}, // af - {aztrUpper(upper), isUpper}, // az - {elUpper, noSpan}, // el - {ltUpper(upper), noSpan}, // lt - {nil, nil}, // nl - {aztrUpper(upper), isUpper}, // tr - } - - undUpper transform.SpanningTransformer = &undUpperCaser{} - undLower transform.SpanningTransformer = &undLowerCaser{} - undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{} - - lowerFunc = []mapFunc{ - nil, // und - nil, // af - aztrLower, // az - nil, // el - ltLower, // lt - nil, // nl - aztrLower, // tr - } - - titleInfos = []struct { - title mapFunc - lower mapFunc - titleSpan spanFunc - rewrite func(*context) - }{ - {title, lower, isTitle, nil}, // und - {title, lower, isTitle, afnlRewrite}, // af - {aztrUpper(title), aztrLower, isTitle, nil}, // az - {title, lower, isTitle, nil}, // el - {ltUpper(title), ltLower, noSpan, nil}, // lt - {nlTitle, lower, nlTitleSpan, afnlRewrite}, // nl - {aztrUpper(title), aztrLower, isTitle, nil}, // tr - } -) - -func makeUpper(t language.Tag, o options) transform.SpanningTransformer { - _, i, _ := matcher.Match(t) - f := upperFunc[i].upper - if f == nil { - return undUpper - } - return &simpleCaser{f: f, span: upperFunc[i].span} -} - -func makeLower(t language.Tag, o options) transform.SpanningTransformer { - _, i, _ := matcher.Match(t) - f := lowerFunc[i] - if f == nil { - if o.ignoreFinalSigma { - return undLowerIgnoreSigma - } - return undLower - } - if o.ignoreFinalSigma { - return &simpleCaser{f: f, span: isLower} - } - return &lowerCaser{ - first: f, - midWord: finalSigma(f), - } -} - -func makeTitle(t language.Tag, o options) transform.SpanningTransformer { - _, i, _ := matcher.Match(t) - x := &titleInfos[i] - lower := x.lower - if o.noLower { - lower = (*context).copy - } else if !o.ignoreFinalSigma { - lower = finalSigma(lower) - } - return &titleCaser{ - title: x.title, - lower: lower, - titleSpan: x.titleSpan, - rewrite: x.rewrite, - } -} - -func noSpan(c *context) bool { - c.err = transform.ErrEndOfSpan - return false -} - -// TODO: consider a similar special case for the fast majority lower case. This -// is a bit more involved so will require some more precise benchmarking to -// justify it. - -type undUpperCaser struct{ transform.NopResetter } - -// undUpperCaser implements the Transformer interface for doing an upper case -// mapping for the root locale (und). It eliminates the need for an allocation -// as it prevents escaping by not using function pointers. -func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { - c := context{dst: dst, src: src, atEOF: atEOF} - for c.next() { - upper(&c) - c.checkpoint() - } - return c.ret() -} - -func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) { - c := context{src: src, atEOF: atEOF} - for c.next() && isUpper(&c) { - c.checkpoint() - } - return c.retSpan() -} - -// undLowerIgnoreSigmaCaser implements the Transformer interface for doing -// a lower case mapping for the root locale (und) ignoring final sigma -// handling. This casing algorithm is used in some performance-critical packages -// like secure/precis and x/net/http/idna, which warrants its special-casing. -type undLowerIgnoreSigmaCaser struct{ transform.NopResetter } - -func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { - c := context{dst: dst, src: src, atEOF: atEOF} - for c.next() && lower(&c) { - c.checkpoint() - } - return c.ret() - -} - -// Span implements a generic lower-casing. This is possible as isLower works -// for all lowercasing variants. All lowercase variants only vary in how they -// transform a non-lowercase letter. They will never change an already lowercase -// letter. In addition, there is no state. -func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) { - c := context{src: src, atEOF: atEOF} - for c.next() && isLower(&c) { - c.checkpoint() - } - return c.retSpan() -} - -type simpleCaser struct { - context - f mapFunc - span spanFunc -} - -// simpleCaser implements the Transformer interface for doing a case operation -// on a rune-by-rune basis. -func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { - c := context{dst: dst, src: src, atEOF: atEOF} - for c.next() && t.f(&c) { - c.checkpoint() - } - return c.ret() -} - -func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) { - c := context{src: src, atEOF: atEOF} - for c.next() && t.span(&c) { - c.checkpoint() - } - return c.retSpan() -} - -// undLowerCaser implements the Transformer interface for doing a lower case -// mapping for the root locale (und) ignoring final sigma handling. This casing -// algorithm is used in some performance-critical packages like secure/precis -// and x/net/http/idna, which warrants its special-casing. -type undLowerCaser struct{ transform.NopResetter } - -func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { - c := context{dst: dst, src: src, atEOF: atEOF} - - for isInterWord := true; c.next(); { - if isInterWord { - if c.info.isCased() { - if !lower(&c) { - break - } - isInterWord = false - } else if !c.copy() { - break - } - } else { - if c.info.isNotCasedAndNotCaseIgnorable() { - if !c.copy() { - break - } - isInterWord = true - } else if !c.hasPrefix("Σ") { - if !lower(&c) { - break - } - } else if !finalSigmaBody(&c) { - break - } - } - c.checkpoint() - } - return c.ret() -} - -func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) { - c := context{src: src, atEOF: atEOF} - for c.next() && isLower(&c) { - c.checkpoint() - } - return c.retSpan() -} - -// lowerCaser implements the Transformer interface. The default Unicode lower -// casing requires different treatment for the first and subsequent characters -// of a word, most notably to handle the Greek final Sigma. -type lowerCaser struct { - undLowerIgnoreSigmaCaser - - context - - first, midWord mapFunc -} - -func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { - t.context = context{dst: dst, src: src, atEOF: atEOF} - c := &t.context - - for isInterWord := true; c.next(); { - if isInterWord { - if c.info.isCased() { - if !t.first(c) { - break - } - isInterWord = false - } else if !c.copy() { - break - } - } else { - if c.info.isNotCasedAndNotCaseIgnorable() { - if !c.copy() { - break - } - isInterWord = true - } else if !t.midWord(c) { - break - } - } - c.checkpoint() - } - return c.ret() -} - -// titleCaser implements the Transformer interface. Title casing algorithms -// distinguish between the first letter of a word and subsequent letters of the -// same word. It uses state to avoid requiring a potentially infinite lookahead. -type titleCaser struct { - context - - // rune mappings used by the actual casing algorithms. - title mapFunc - lower mapFunc - titleSpan spanFunc - - rewrite func(*context) -} - -// Transform implements the standard Unicode title case algorithm as defined in -// Chapter 3 of The Unicode Standard: -// toTitlecase(X): Find the word boundaries in X according to Unicode Standard -// Annex #29, "Unicode Text Segmentation." For each word boundary, find the -// first cased character F following the word boundary. If F exists, map F to -// Titlecase_Mapping(F); then map all characters C between F and the following -// word boundary to Lowercase_Mapping(C). -func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { - t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord} - c := &t.context - - if !c.next() { - return c.ret() - } - - for { - p := c.info - if t.rewrite != nil { - t.rewrite(c) - } - - wasMid := p.isMid() - // Break out of this loop on failure to ensure we do not modify the - // state incorrectly. - if p.isCased() { - if !c.isMidWord { - if !t.title(c) { - break - } - c.isMidWord = true - } else if !t.lower(c) { - break - } - } else if !c.copy() { - break - } else if p.isBreak() { - c.isMidWord = false - } - - // As we save the state of the transformer, it is safe to call - // checkpoint after any successful write. - if !(c.isMidWord && wasMid) { - c.checkpoint() - } - - if !c.next() { - break - } - if wasMid && c.info.isMid() { - c.isMidWord = false - } - } - return c.ret() -} - -func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) { - t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord} - c := &t.context - - if !c.next() { - return c.retSpan() - } - - for { - p := c.info - if t.rewrite != nil { - t.rewrite(c) - } - - wasMid := p.isMid() - // Break out of this loop on failure to ensure we do not modify the - // state incorrectly. - if p.isCased() { - if !c.isMidWord { - if !t.titleSpan(c) { - break - } - c.isMidWord = true - } else if !isLower(c) { - break - } - } else if p.isBreak() { - c.isMidWord = false - } - // As we save the state of the transformer, it is safe to call - // checkpoint after any successful write. - if !(c.isMidWord && wasMid) { - c.checkpoint() - } - - if !c.next() { - break - } - if wasMid && c.info.isMid() { - c.isMidWord = false - } - } - return c.retSpan() -} - -// finalSigma adds Greek final Sigma handing to another casing function. It -// determines whether a lowercased sigma should be σ or ς, by looking ahead for -// case-ignorables and a cased letters. -func finalSigma(f mapFunc) mapFunc { - return func(c *context) bool { - if !c.hasPrefix("Σ") { - return f(c) - } - return finalSigmaBody(c) - } -} - -func finalSigmaBody(c *context) bool { - // Current rune must be ∑. - - // ::NFD(); - // # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA - // Σ } [:case-ignorable:]* [:cased:] → σ; - // [:cased:] [:case-ignorable:]* { Σ → ς; - // ::Any-Lower; - // ::NFC(); - - p := c.pDst - c.writeString("ς") - - // TODO: we should do this here, but right now this will never have an - // effect as this is called when the prefix is Sigma, whereas Dutch and - // Afrikaans only test for an apostrophe. - // - // if t.rewrite != nil { - // t.rewrite(c) - // } - - // We need to do one more iteration after maxIgnorable, as a cased - // letter is not an ignorable and may modify the result. - wasMid := false - for i := 0; i < maxIgnorable+1; i++ { - if !c.next() { - return false - } - if !c.info.isCaseIgnorable() { - // All Midword runes are also case ignorable, so we are - // guaranteed to have a letter or word break here. As we are - // unreading the run, there is no need to unset c.isMidWord; - // the title caser will handle this. - if c.info.isCased() { - // p+1 is guaranteed to be in bounds: if writing ς was - // successful, p+1 will contain the second byte of ς. If not, - // this function will have returned after c.next returned false. - c.dst[p+1]++ // ς → σ - } - c.unreadRune() - return true - } - // A case ignorable may also introduce a word break, so we may need - // to continue searching even after detecting a break. - isMid := c.info.isMid() - if (wasMid && isMid) || c.info.isBreak() { - c.isMidWord = false - } - wasMid = isMid - c.copy() - } - return true -} - -// finalSigmaSpan would be the same as isLower. - -// elUpper implements Greek upper casing, which entails removing a predefined -// set of non-blocked modifiers. Note that these accents should not be removed -// for title casing! -// Example: "Οδός" -> "ΟΔΟΣ". -func elUpper(c *context) bool { - // From CLDR: - // [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ; - // [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ; - - r, _ := utf8.DecodeRune(c.src[c.pSrc:]) - oldPDst := c.pDst - if !upper(c) { - return false - } - if !unicode.Is(unicode.Greek, r) { - return true - } - i := 0 - // Take the properties of the uppercased rune that is already written to the - // destination. This saves us the trouble of having to uppercase the - // decomposed rune again. - if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil { - // Restore the destination position and process the decomposed rune. - r, sz := utf8.DecodeRune(b) - if r <= 0xFF { // See A.6.1 - return true - } - c.pDst = oldPDst - // Insert the first rune and ignore the modifiers. See A.6.2. - c.writeBytes(b[:sz]) - i = len(b[sz:]) / 2 // Greek modifiers are always of length 2. - } - - for ; i < maxIgnorable && c.next(); i++ { - switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r { - // Above and Iota Subscript - case 0x0300, // U+0300 COMBINING GRAVE ACCENT - 0x0301, // U+0301 COMBINING ACUTE ACCENT - 0x0304, // U+0304 COMBINING MACRON - 0x0306, // U+0306 COMBINING BREVE - 0x0308, // U+0308 COMBINING DIAERESIS - 0x0313, // U+0313 COMBINING COMMA ABOVE - 0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE - 0x0342, // U+0342 COMBINING GREEK PERISPOMENI - 0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI - // No-op. Gobble the modifier. - - default: - switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() { - case cccZero: - c.unreadRune() - return true - - // We don't need to test for IotaSubscript as the only rune that - // qualifies (U+0345) was already excluded in the switch statement - // above. See A.4. - - case cccAbove: - return c.copy() - default: - // Some other modifier. We're still allowed to gobble Greek - // modifiers after this. - c.copy() - } - } - } - return i == maxIgnorable -} - -// TODO: implement elUpperSpan (low-priority: complex and infrequent). - -func ltLower(c *context) bool { - // From CLDR: - // # Introduce an explicit dot above when lowercasing capital I's and J's - // # whenever there are more accents above. - // # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) - // # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I - // # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J - // # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK - // # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE - // # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE - // # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE - // ::NFD(); - // I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307; - // J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307; - // I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307; - // I \u0300 (Ì) → i \u0307 \u0300; - // I \u0301 (Í) → i \u0307 \u0301; - // I \u0303 (Ĩ) → i \u0307 \u0303; - // ::Any-Lower(); - // ::NFC(); - - i := 0 - if r := c.src[c.pSrc]; r < utf8.RuneSelf { - lower(c) - if r != 'I' && r != 'J' { - return true - } - } else { - p := norm.NFD.Properties(c.src[c.pSrc:]) - if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') { - // UTF-8 optimization: the decomposition will only have an above - // modifier if the last rune of the decomposition is in [U+300-U+311]. - // In all other cases, a decomposition starting with I is always - // an I followed by modifiers that are not cased themselves. See A.2. - if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4. - if !c.writeBytes(d[:1]) { - return false - } - c.dst[c.pDst-1] += 'a' - 'A' // lower - - // Assumption: modifier never changes on lowercase. See A.1. - // Assumption: all modifiers added have CCC = Above. See A.2.3. - return c.writeString("\u0307") && c.writeBytes(d[1:]) - } - // In all other cases the additional modifiers will have a CCC - // that is less than 230 (Above). We will insert the U+0307, if - // needed, after these modifiers so that a string in FCD form - // will remain so. See A.2.2. - lower(c) - i = 1 - } else { - return lower(c) - } - } - - for ; i < maxIgnorable && c.next(); i++ { - switch c.info.cccType() { - case cccZero: - c.unreadRune() - return true - case cccAbove: - return c.writeString("\u0307") && c.copy() // See A.1. - default: - c.copy() // See A.1. - } - } - return i == maxIgnorable -} - -// ltLowerSpan would be the same as isLower. - -func ltUpper(f mapFunc) mapFunc { - return func(c *context) bool { - // Unicode: - // 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE - // - // From CLDR: - // # Remove \u0307 following soft-dotteds (i, j, and the like), with possible - // # intervening non-230 marks. - // ::NFD(); - // [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ; - // ::Any-Upper(); - // ::NFC(); - - // TODO: See A.5. A soft-dotted rune never has an exception. This would - // allow us to overload the exception bit and encode this property in - // info. Need to measure performance impact of this. - r, _ := utf8.DecodeRune(c.src[c.pSrc:]) - oldPDst := c.pDst - if !f(c) { - return false - } - if !unicode.Is(unicode.Soft_Dotted, r) { - return true - } - - // We don't need to do an NFD normalization, as a soft-dotted rune never - // contains U+0307. See A.3. - - i := 0 - for ; i < maxIgnorable && c.next(); i++ { - switch c.info.cccType() { - case cccZero: - c.unreadRune() - return true - case cccAbove: - if c.hasPrefix("\u0307") { - // We don't do a full NFC, but rather combine runes for - // some of the common cases. (Returning NFC or - // preserving normal form is neither a requirement nor - // a possibility anyway). - if !c.next() { - return false - } - if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc { - s := "" - switch c.src[c.pSrc+1] { - case 0x80: // U+0300 COMBINING GRAVE ACCENT - s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE - case 0x81: // U+0301 COMBINING ACUTE ACCENT - s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE - case 0x83: // U+0303 COMBINING TILDE - s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE - case 0x88: // U+0308 COMBINING DIAERESIS - s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS - default: - } - if s != "" { - c.pDst = oldPDst - return c.writeString(s) - } - } - } - return c.copy() - default: - c.copy() - } - } - return i == maxIgnorable - } -} - -// TODO: implement ltUpperSpan (low priority: complex and infrequent). - -func aztrUpper(f mapFunc) mapFunc { - return func(c *context) bool { - // i→İ; - if c.src[c.pSrc] == 'i' { - return c.writeString("İ") - } - return f(c) - } -} - -func aztrLower(c *context) (done bool) { - // From CLDR: - // # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri - // # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE - // İ→i; - // # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. - // # This matches the behavior of the canonically equivalent I-dot_above - // # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE - // # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. - // # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I - // I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ; - // I→ı ; - // ::Any-Lower(); - if c.hasPrefix("\u0130") { // İ - return c.writeString("i") - } - if c.src[c.pSrc] != 'I' { - return lower(c) - } - - // We ignore the lower-case I for now, but insert it later when we know - // which form we need. - start := c.pSrc + c.sz - - i := 0 -Loop: - // We check for up to n ignorables before \u0307. As \u0307 is an - // ignorable as well, n is maxIgnorable-1. - for ; i < maxIgnorable && c.next(); i++ { - switch c.info.cccType() { - case cccAbove: - if c.hasPrefix("\u0307") { - return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307 - } - done = true - break Loop - case cccZero: - c.unreadRune() - done = true - break Loop - default: - // We'll write this rune after we know which starter to use. - } - } - if i == maxIgnorable { - done = true - } - return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done -} - -// aztrLowerSpan would be the same as isLower. - -func nlTitle(c *context) bool { - // From CLDR: - // # Special titlecasing for Dutch initial "ij". - // ::Any-Title(); - // # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29) - // [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ; - if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' { - return title(c) - } - - if !c.writeString("I") || !c.next() { - return false - } - if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' { - return c.writeString("J") - } - c.unreadRune() - return true -} - -func nlTitleSpan(c *context) bool { - // From CLDR: - // # Special titlecasing for Dutch initial "ij". - // ::Any-Title(); - // # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29) - // [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ; - if c.src[c.pSrc] != 'I' { - return isTitle(c) - } - if !c.next() || c.src[c.pSrc] == 'j' { - return false - } - if c.src[c.pSrc] != 'J' { - c.unreadRune() - } - return true -} - -// Not part of CLDR, but see http://unicode.org/cldr/trac/ticket/7078. -func afnlRewrite(c *context) { - if c.hasPrefix("'") || c.hasPrefix("’") { - c.isMidWord = true - } -} -- cgit v1.2.3