summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/encoding/encoding.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/encoding/encoding.go')
-rw-r--r--vendor/golang.org/x/text/encoding/encoding.go335
1 files changed, 0 insertions, 335 deletions
diff --git a/vendor/golang.org/x/text/encoding/encoding.go b/vendor/golang.org/x/text/encoding/encoding.go
deleted file mode 100644
index a0bd7cd..0000000
--- a/vendor/golang.org/x/text/encoding/encoding.go
+++ /dev/null
@@ -1,335 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package encoding defines an interface for character encodings, such as Shift
-// JIS and Windows 1252, that can convert to and from UTF-8.
-//
-// Encoding implementations are provided in other packages, such as
-// golang.org/x/text/encoding/charmap and
-// golang.org/x/text/encoding/japanese.
-package encoding // import "golang.org/x/text/encoding"
-
-import (
- "errors"
- "io"
- "strconv"
- "unicode/utf8"
-
- "golang.org/x/text/encoding/internal/identifier"
- "golang.org/x/text/transform"
-)
-
-// TODO:
-// - There seems to be some inconsistency in when decoders return errors
-// and when not. Also documentation seems to suggest they shouldn't return
-// errors at all (except for UTF-16).
-// - Encoders seem to rely on or at least benefit from the input being in NFC
-// normal form. Perhaps add an example how users could prepare their output.
-
-// Encoding is a character set encoding that can be transformed to and from
-// UTF-8.
-type Encoding interface {
- // NewDecoder returns a Decoder.
- NewDecoder() *Decoder
-
- // NewEncoder returns an Encoder.
- NewEncoder() *Encoder
-}
-
-// A Decoder converts bytes to UTF-8. It implements transform.Transformer.
-//
-// Transforming source bytes that are not of that encoding will not result in an
-// error per se. Each byte that cannot be transcoded will be represented in the
-// output by the UTF-8 encoding of '\uFFFD', the replacement rune.
-type Decoder struct {
- transform.Transformer
-
- // This forces external creators of Decoders to use names in struct
- // initializers, allowing for future extendibility without having to break
- // code.
- _ struct{}
-}
-
-// Bytes converts the given encoded bytes to UTF-8. It returns the converted
-// bytes or nil, err if any error occurred.
-func (d *Decoder) Bytes(b []byte) ([]byte, error) {
- b, _, err := transform.Bytes(d, b)
- if err != nil {
- return nil, err
- }
- return b, nil
-}
-
-// String converts the given encoded string to UTF-8. It returns the converted
-// string or "", err if any error occurred.
-func (d *Decoder) String(s string) (string, error) {
- s, _, err := transform.String(d, s)
- if err != nil {
- return "", err
- }
- return s, nil
-}
-
-// Reader wraps another Reader to decode its bytes.
-//
-// The Decoder may not be used for any other operation as long as the returned
-// Reader is in use.
-func (d *Decoder) Reader(r io.Reader) io.Reader {
- return transform.NewReader(r, d)
-}
-
-// An Encoder converts bytes from UTF-8. It implements transform.Transformer.
-//
-// Each rune that cannot be transcoded will result in an error. In this case,
-// the transform will consume all source byte up to, not including the offending
-// rune. Transforming source bytes that are not valid UTF-8 will be replaced by
-// `\uFFFD`. To return early with an error instead, use transform.Chain to
-// preprocess the data with a UTF8Validator.
-type Encoder struct {
- transform.Transformer
-
- // This forces external creators of Encoders to use names in struct
- // initializers, allowing for future extendibility without having to break
- // code.
- _ struct{}
-}
-
-// Bytes converts bytes from UTF-8. It returns the converted bytes or nil, err if
-// any error occurred.
-func (e *Encoder) Bytes(b []byte) ([]byte, error) {
- b, _, err := transform.Bytes(e, b)
- if err != nil {
- return nil, err
- }
- return b, nil
-}
-
-// String converts a string from UTF-8. It returns the converted string or
-// "", err if any error occurred.
-func (e *Encoder) String(s string) (string, error) {
- s, _, err := transform.String(e, s)
- if err != nil {
- return "", err
- }
- return s, nil
-}
-
-// Writer wraps another Writer to encode its UTF-8 output.
-//
-// The Encoder may not be used for any other operation as long as the returned
-// Writer is in use.
-func (e *Encoder) Writer(w io.Writer) io.Writer {
- return transform.NewWriter(w, e)
-}
-
-// ASCIISub is the ASCII substitute character, as recommended by
-// https://unicode.org/reports/tr36/#Text_Comparison
-const ASCIISub = '\x1a'
-
-// Nop is the nop encoding. Its transformed bytes are the same as the source
-// bytes; it does not replace invalid UTF-8 sequences.
-var Nop Encoding = nop{}
-
-type nop struct{}
-
-func (nop) NewDecoder() *Decoder {
- return &Decoder{Transformer: transform.Nop}
-}
-func (nop) NewEncoder() *Encoder {
- return &Encoder{Transformer: transform.Nop}
-}
-
-// Replacement is the replacement encoding. Decoding from the replacement
-// encoding yields a single '\uFFFD' replacement rune. Encoding from UTF-8 to
-// the replacement encoding yields the same as the source bytes except that
-// invalid UTF-8 is converted to '\uFFFD'.
-//
-// It is defined at http://encoding.spec.whatwg.org/#replacement
-var Replacement Encoding = replacement{}
-
-type replacement struct{}
-
-func (replacement) NewDecoder() *Decoder {
- return &Decoder{Transformer: replacementDecoder{}}
-}
-
-func (replacement) NewEncoder() *Encoder {
- return &Encoder{Transformer: replacementEncoder{}}
-}
-
-func (replacement) ID() (mib identifier.MIB, other string) {
- return identifier.Replacement, ""
-}
-
-type replacementDecoder struct{ transform.NopResetter }
-
-func (replacementDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
- if len(dst) < 3 {
- return 0, 0, transform.ErrShortDst
- }
- if atEOF {
- const fffd = "\ufffd"
- dst[0] = fffd[0]
- dst[1] = fffd[1]
- dst[2] = fffd[2]
- nDst = 3
- }
- return nDst, len(src), nil
-}
-
-type replacementEncoder struct{ transform.NopResetter }
-
-func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
- r, size := rune(0), 0
-
- for ; nSrc < len(src); nSrc += size {
- r = rune(src[nSrc])
-
- // Decode a 1-byte rune.
- if r < utf8.RuneSelf {
- size = 1
-
- } else {
- // Decode a multi-byte rune.
- r, size = utf8.DecodeRune(src[nSrc:])
- if size == 1 {
- // All valid runes of size 1 (those below utf8.RuneSelf) were
- // handled above. We have invalid UTF-8 or we haven't seen the
- // full character yet.
- if !atEOF && !utf8.FullRune(src[nSrc:]) {
- err = transform.ErrShortSrc
- break
- }
- r = '\ufffd'
- }
- }
-
- if nDst+utf8.RuneLen(r) > len(dst) {
- err = transform.ErrShortDst
- break
- }
- nDst += utf8.EncodeRune(dst[nDst:], r)
- }
- return nDst, nSrc, err
-}
-
-// HTMLEscapeUnsupported wraps encoders to replace source runes outside the
-// repertoire of the destination encoding with HTML escape sequences.
-//
-// This wrapper exists to comply to URL and HTML forms requiring a
-// non-terminating legacy encoder. The produced sequences may lead to data
-// loss as they are indistinguishable from legitimate input. To avoid this
-// issue, use UTF-8 encodings whenever possible.
-func HTMLEscapeUnsupported(e *Encoder) *Encoder {
- return &Encoder{Transformer: &errorHandler{e, errorToHTML}}
-}
-
-// ReplaceUnsupported wraps encoders to replace source runes outside the
-// repertoire of the destination encoding with an encoding-specific
-// replacement.
-//
-// This wrapper is only provided for backwards compatibility and legacy
-// handling. Its use is strongly discouraged. Use UTF-8 whenever possible.
-func ReplaceUnsupported(e *Encoder) *Encoder {
- return &Encoder{Transformer: &errorHandler{e, errorToReplacement}}
-}
-
-type errorHandler struct {
- *Encoder
- handler func(dst []byte, r rune, err repertoireError) (n int, ok bool)
-}
-
-// TODO: consider making this error public in some form.
-type repertoireError interface {
- Replacement() byte
-}
-
-func (h errorHandler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
- nDst, nSrc, err = h.Transformer.Transform(dst, src, atEOF)
- for err != nil {
- rerr, ok := err.(repertoireError)
- if !ok {
- return nDst, nSrc, err
- }
- r, sz := utf8.DecodeRune(src[nSrc:])
- n, ok := h.handler(dst[nDst:], r, rerr)
- if !ok {
- return nDst, nSrc, transform.ErrShortDst
- }
- err = nil
- nDst += n
- if nSrc += sz; nSrc < len(src) {
- var dn, sn int
- dn, sn, err = h.Transformer.Transform(dst[nDst:], src[nSrc:], atEOF)
- nDst += dn
- nSrc += sn
- }
- }
- return nDst, nSrc, err
-}
-
-func errorToHTML(dst []byte, r rune, err repertoireError) (n int, ok bool) {
- buf := [8]byte{}
- b := strconv.AppendUint(buf[:0], uint64(r), 10)
- if n = len(b) + len("&#;"); n >= len(dst) {
- return 0, false
- }
- dst[0] = '&'
- dst[1] = '#'
- dst[copy(dst[2:], b)+2] = ';'
- return n, true
-}
-
-func errorToReplacement(dst []byte, r rune, err repertoireError) (n int, ok bool) {
- if len(dst) == 0 {
- return 0, false
- }
- dst[0] = err.Replacement()
- return 1, true
-}
-
-// ErrInvalidUTF8 means that a transformer encountered invalid UTF-8.
-var ErrInvalidUTF8 = errors.New("encoding: invalid UTF-8")
-
-// UTF8Validator is a transformer that returns ErrInvalidUTF8 on the first
-// input byte that is not valid UTF-8.
-var UTF8Validator transform.Transformer = utf8Validator{}
-
-type utf8Validator struct{ transform.NopResetter }
-
-func (utf8Validator) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
- n := len(src)
- if n > len(dst) {
- n = len(dst)
- }
- for i := 0; i < n; {
- if c := src[i]; c < utf8.RuneSelf {
- dst[i] = c
- i++
- continue
- }
- _, size := utf8.DecodeRune(src[i:])
- if size == 1 {
- // All valid runes of size 1 (those below utf8.RuneSelf) were
- // handled above. We have invalid UTF-8 or we haven't seen the
- // full character yet.
- err = ErrInvalidUTF8
- if !atEOF && !utf8.FullRune(src[i:]) {
- err = transform.ErrShortSrc
- }
- return i, i, err
- }
- if i+size > len(dst) {
- return i, i, transform.ErrShortDst
- }
- for ; size > 0; size-- {
- dst[i] = src[i]
- i++
- }
- }
- if len(src) > len(dst) {
- err = transform.ErrShortDst
- }
- return n, n, err
-}