diff options
author | Dimitri Sokolyuk <demon@dim13.org> | 2019-07-02 12:12:53 +0200 |
---|---|---|
committer | Dimitri Sokolyuk <demon@dim13.org> | 2019-07-02 12:12:53 +0200 |
commit | 473acc61c8392dc7ae303d91568e179c4f105a76 (patch) | |
tree | a2070cba25f918cda460387e587dd60551b23894 /vendor/golang.org/x/text/cases/info.go | |
parent | dd45f63209a8e51979b11182253ee80b5289c10a (diff) |
add black list
Diffstat (limited to 'vendor/golang.org/x/text/cases/info.go')
-rw-r--r-- | vendor/golang.org/x/text/cases/info.go | 82 |
1 files changed, 0 insertions, 82 deletions
diff --git a/vendor/golang.org/x/text/cases/info.go b/vendor/golang.org/x/text/cases/info.go deleted file mode 100644 index 3b51f03..0000000 --- a/vendor/golang.org/x/text/cases/info.go +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package cases - -func (c info) cccVal() info { - if c&exceptionBit != 0 { - return info(exceptions[c>>exceptionShift]) & cccMask - } - return c & cccMask -} - -func (c info) cccType() info { - ccc := c.cccVal() - if ccc <= cccZero { - return cccZero - } - return ccc -} - -// TODO: Implement full Unicode breaking algorithm: -// 1) Implement breaking in separate package. -// 2) Use the breaker here. -// 3) Compare table size and performance of using the more generic breaker. -// -// Note that we can extend the current algorithm to be much more accurate. This -// only makes sense, though, if the performance and/or space penalty of using -// the generic breaker is big. Extra data will only be needed for non-cased -// runes, which means there are sufficient bits left in the caseType. -// ICU prohibits breaking in such cases as well. - -// For the purpose of title casing we use an approximation of the Unicode Word -// Breaking algorithm defined in Annex #29: -// http://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table. -// -// For our approximation, we group the Word Break types into the following -// categories, with associated rules: -// -// 1) Letter: -// ALetter, Hebrew_Letter, Numeric, ExtendNumLet, Extend, Format_FE, ZWJ. -// Rule: Never break between consecutive runes of this category. -// -// 2) Mid: -// MidLetter, MidNumLet, Single_Quote. -// (Cf. case-ignorable: MidLetter, MidNumLet, Single_Quote or cat is Mn, -// Me, Cf, Lm or Sk). -// Rule: Don't break between Letter and Mid, but break between two Mids. -// -// 3) Break: -// Any other category: NewLine, MidNum, CR, LF, Double_Quote, Katakana, and -// Other. -// These categories should always result in a break between two cased letters. -// Rule: Always break. -// -// Note 1: the Katakana and MidNum categories can, in esoteric cases, result in -// preventing a break between two cased letters. For now we will ignore this -// (e.g. [ALetter] [ExtendNumLet] [Katakana] [ExtendNumLet] [ALetter] and -// [ALetter] [Numeric] [MidNum] [Numeric] [ALetter].) -// -// Note 2: the rule for Mid is very approximate, but works in most cases. To -// improve, we could store the categories in the trie value and use a FA to -// manage breaks. See TODO comment above. -// -// Note 3: according to the spec, it is possible for the Extend category to -// introduce breaks between other categories grouped in Letter. However, this -// is undesirable for our purposes. ICU prevents breaks in such cases as well. - -// isBreak returns whether this rune should introduce a break. -func (c info) isBreak() bool { - return c.cccVal() == cccBreak -} - -// isLetter returns whether the rune is of break type ALetter, Hebrew_Letter, -// Numeric, ExtendNumLet, or Extend. -func (c info) isLetter() bool { - ccc := c.cccVal() - if ccc == cccZero { - return !c.isCaseIgnorable() - } - return ccc != cccBreak -} |