From e1e8d058a33f7566f9c565d04b0d8b56f9645c35 Mon Sep 17 00:00:00 2001
From: Dimitri Sokolyuk <demon@dim13.org>
Date: Wed, 25 Apr 2018 09:28:54 +0200
Subject: add vendor

---
 vendor/golang.org/x/image/vector/gen.go | 447 ++++++++++++++++++++++++++++++++
 1 file changed, 447 insertions(+)
 create mode 100644 vendor/golang.org/x/image/vector/gen.go

(limited to 'vendor/golang.org/x/image/vector/gen.go')

diff --git a/vendor/golang.org/x/image/vector/gen.go b/vendor/golang.org/x/image/vector/gen.go
new file mode 100644
index 0000000..28b298b
--- /dev/null
+++ b/vendor/golang.org/x/image/vector/gen.go
@@ -0,0 +1,447 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+	"bytes"
+	"io/ioutil"
+	"log"
+	"strings"
+	"text/template"
+)
+
+const (
+	copyright = "" +
+		"// Copyright 2016 The Go Authors. All rights reserved.\n" +
+		"// Use of this source code is governed by a BSD-style\n" +
+		"// license that can be found in the LICENSE file.\n"
+
+	doNotEdit = "// generated by go run gen.go; DO NOT EDIT\n"
+
+	dashDashDash = "// --------"
+)
+
+func main() {
+	tmpl, err := ioutil.ReadFile("gen_acc_amd64.s.tmpl")
+	if err != nil {
+		log.Fatalf("ReadFile: %v", err)
+	}
+	if !bytes.HasPrefix(tmpl, []byte(copyright)) {
+		log.Fatal("source template did not start with the copyright header")
+	}
+	tmpl = tmpl[len(copyright):]
+
+	preamble := []byte(nil)
+	if i := bytes.Index(tmpl, []byte(dashDashDash)); i < 0 {
+		log.Fatalf("source template did not contain %q", dashDashDash)
+	} else {
+		preamble, tmpl = tmpl[:i], tmpl[i:]
+	}
+
+	t, err := template.New("").Parse(string(tmpl))
+	if err != nil {
+		log.Fatalf("Parse: %v", err)
+	}
+
+	out := bytes.NewBuffer(nil)
+	out.WriteString(doNotEdit)
+	out.Write(preamble)
+
+	for i, v := range instances {
+		if i != 0 {
+			out.WriteString("\n")
+		}
+		if strings.Contains(v.LoadArgs, "{{.ShortName}}") {
+			v.LoadArgs = strings.Replace(v.LoadArgs, "{{.ShortName}}", v.ShortName, -1)
+		}
+		if err := t.Execute(out, v); err != nil {
+			log.Fatalf("Execute(%q): %v", v.ShortName, err)
+		}
+	}
+
+	if err := ioutil.WriteFile("acc_amd64.s", out.Bytes(), 0666); err != nil {
+		log.Fatalf("WriteFile: %v", err)
+	}
+}
+
+var instances = []struct {
+	LongName       string
+	ShortName      string
+	FrameSize      string
+	ArgsSize       string
+	Args           string
+	DstElemSize1   int
+	DstElemSize4   int
+	XMM3           string
+	XMM4           string
+	XMM5           string
+	XMM6           string
+	XMM8           string
+	XMM9           string
+	XMM10          string
+	LoadArgs       string
+	Setup          string
+	LoadXMMRegs    string
+	Add            string
+	ClampAndScale  string
+	ConvertToInt32 string
+	Store4         string
+	Store1         string
+}{{
+	LongName:       "fixedAccumulateOpOver",
+	ShortName:      "fxAccOpOver",
+	FrameSize:      fxFrameSize,
+	ArgsSize:       twoArgArgsSize,
+	Args:           "dst []uint8, src []uint32",
+	DstElemSize1:   1 * sizeOfUint8,
+	DstElemSize4:   4 * sizeOfUint8,
+	XMM3:           fxXMM3,
+	XMM4:           fxXMM4,
+	XMM5:           fxXMM5,
+	XMM6:           opOverXMM6,
+	XMM8:           opOverXMM8,
+	XMM9:           opOverXMM9,
+	XMM10:          opOverXMM10,
+	LoadArgs:       twoArgLoadArgs,
+	Setup:          fxSetup,
+	LoadXMMRegs:    fxLoadXMMRegs + "\n" + opOverLoadXMMRegs,
+	Add:            fxAdd,
+	ClampAndScale:  fxClampAndScale,
+	ConvertToInt32: fxConvertToInt32,
+	Store4:         opOverStore4,
+	Store1:         opOverStore1,
+}, {
+	LongName:       "fixedAccumulateOpSrc",
+	ShortName:      "fxAccOpSrc",
+	FrameSize:      fxFrameSize,
+	ArgsSize:       twoArgArgsSize,
+	Args:           "dst []uint8, src []uint32",
+	DstElemSize1:   1 * sizeOfUint8,
+	DstElemSize4:   4 * sizeOfUint8,
+	XMM3:           fxXMM3,
+	XMM4:           fxXMM4,
+	XMM5:           fxXMM5,
+	XMM6:           opSrcXMM6,
+	XMM8:           opSrcXMM8,
+	XMM9:           opSrcXMM9,
+	XMM10:          opSrcXMM10,
+	LoadArgs:       twoArgLoadArgs,
+	Setup:          fxSetup,
+	LoadXMMRegs:    fxLoadXMMRegs + "\n" + opSrcLoadXMMRegs,
+	Add:            fxAdd,
+	ClampAndScale:  fxClampAndScale,
+	ConvertToInt32: fxConvertToInt32,
+	Store4:         opSrcStore4,
+	Store1:         opSrcStore1,
+}, {
+	LongName:       "fixedAccumulateMask",
+	ShortName:      "fxAccMask",
+	FrameSize:      fxFrameSize,
+	ArgsSize:       oneArgArgsSize,
+	Args:           "buf []uint32",
+	DstElemSize1:   1 * sizeOfUint32,
+	DstElemSize4:   4 * sizeOfUint32,
+	XMM3:           fxXMM3,
+	XMM4:           fxXMM4,
+	XMM5:           fxXMM5,
+	XMM6:           maskXMM6,
+	XMM8:           maskXMM8,
+	XMM9:           maskXMM9,
+	XMM10:          maskXMM10,
+	LoadArgs:       oneArgLoadArgs,
+	Setup:          fxSetup,
+	LoadXMMRegs:    fxLoadXMMRegs + "\n" + maskLoadXMMRegs,
+	Add:            fxAdd,
+	ClampAndScale:  fxClampAndScale,
+	ConvertToInt32: fxConvertToInt32,
+	Store4:         maskStore4,
+	Store1:         maskStore1,
+}, {
+	LongName:       "floatingAccumulateOpOver",
+	ShortName:      "flAccOpOver",
+	FrameSize:      flFrameSize,
+	ArgsSize:       twoArgArgsSize,
+	Args:           "dst []uint8, src []float32",
+	DstElemSize1:   1 * sizeOfUint8,
+	DstElemSize4:   4 * sizeOfUint8,
+	XMM3:           flXMM3,
+	XMM4:           flXMM4,
+	XMM5:           flXMM5,
+	XMM6:           opOverXMM6,
+	XMM8:           opOverXMM8,
+	XMM9:           opOverXMM9,
+	XMM10:          opOverXMM10,
+	LoadArgs:       twoArgLoadArgs,
+	Setup:          flSetup,
+	LoadXMMRegs:    flLoadXMMRegs + "\n" + opOverLoadXMMRegs,
+	Add:            flAdd,
+	ClampAndScale:  flClampAndScale,
+	ConvertToInt32: flConvertToInt32,
+	Store4:         opOverStore4,
+	Store1:         opOverStore1,
+}, {
+	LongName:       "floatingAccumulateOpSrc",
+	ShortName:      "flAccOpSrc",
+	FrameSize:      flFrameSize,
+	ArgsSize:       twoArgArgsSize,
+	Args:           "dst []uint8, src []float32",
+	DstElemSize1:   1 * sizeOfUint8,
+	DstElemSize4:   4 * sizeOfUint8,
+	XMM3:           flXMM3,
+	XMM4:           flXMM4,
+	XMM5:           flXMM5,
+	XMM6:           opSrcXMM6,
+	XMM8:           opSrcXMM8,
+	XMM9:           opSrcXMM9,
+	XMM10:          opSrcXMM10,
+	LoadArgs:       twoArgLoadArgs,
+	Setup:          flSetup,
+	LoadXMMRegs:    flLoadXMMRegs + "\n" + opSrcLoadXMMRegs,
+	Add:            flAdd,
+	ClampAndScale:  flClampAndScale,
+	ConvertToInt32: flConvertToInt32,
+	Store4:         opSrcStore4,
+	Store1:         opSrcStore1,
+}, {
+	LongName:       "floatingAccumulateMask",
+	ShortName:      "flAccMask",
+	FrameSize:      flFrameSize,
+	ArgsSize:       twoArgArgsSize,
+	Args:           "dst []uint32, src []float32",
+	DstElemSize1:   1 * sizeOfUint32,
+	DstElemSize4:   4 * sizeOfUint32,
+	XMM3:           flXMM3,
+	XMM4:           flXMM4,
+	XMM5:           flXMM5,
+	XMM6:           maskXMM6,
+	XMM8:           maskXMM8,
+	XMM9:           maskXMM9,
+	XMM10:          maskXMM10,
+	LoadArgs:       twoArgLoadArgs,
+	Setup:          flSetup,
+	LoadXMMRegs:    flLoadXMMRegs + "\n" + maskLoadXMMRegs,
+	Add:            flAdd,
+	ClampAndScale:  flClampAndScale,
+	ConvertToInt32: flConvertToInt32,
+	Store4:         maskStore4,
+	Store1:         maskStore1,
+}}
+
+const (
+	fxFrameSize = `0`
+	flFrameSize = `8`
+
+	oneArgArgsSize = `24`
+	twoArgArgsSize = `48`
+
+	sizeOfUint8  = 1
+	sizeOfUint32 = 4
+
+	fxXMM3 = `-`
+	flXMM3 = `flSignMask`
+
+	fxXMM4 = `-`
+	flXMM4 = `flOne`
+
+	fxXMM5 = `fxAlmost65536`
+	flXMM5 = `flAlmost65536`
+
+	oneArgLoadArgs = `
+		MOVQ buf_base+0(FP), DI
+		MOVQ buf_len+8(FP), BX
+		MOVQ buf_base+0(FP), SI
+		MOVQ buf_len+8(FP), R10
+		`
+	twoArgLoadArgs = `
+		MOVQ dst_base+0(FP), DI
+		MOVQ dst_len+8(FP), BX
+		MOVQ src_base+24(FP), SI
+		MOVQ src_len+32(FP), R10
+		// Sanity check that len(dst) >= len(src).
+		CMPQ BX, R10
+		JLT  {{.ShortName}}End
+		`
+
+	fxSetup = ``
+	flSetup = `
+		// Prepare to set MXCSR bits 13 and 14, so that the CVTPS2PL below is
+		// "Round To Zero".
+		STMXCSR mxcsrOrig-8(SP)
+		MOVL    mxcsrOrig-8(SP), AX
+		ORL     $0x6000, AX
+		MOVL    AX, mxcsrNew-4(SP)
+		`
+
+	fxLoadXMMRegs = `
+		// fxAlmost65536 := XMM(0x0000ffff repeated four times) // Maximum of an uint16.
+		MOVOU fxAlmost65536<>(SB), X5
+		`
+	flLoadXMMRegs = `
+		// flSignMask    := XMM(0x7fffffff repeated four times) // All but the sign bit of a float32.
+		// flOne         := XMM(0x3f800000 repeated four times) // 1 as a float32.
+		// flAlmost65536 := XMM(0x477fffff repeated four times) // 255.99998 * 256 as a float32.
+		MOVOU flSignMask<>(SB), X3
+		MOVOU flOne<>(SB), X4
+		MOVOU flAlmost65536<>(SB), X5
+		`
+
+	fxAdd = `PADDD`
+	flAdd = `ADDPS`
+
+	fxClampAndScale = `
+		// y = abs(x)
+		// y >>= 2 // Shift by 2*ϕ - 16.
+		// y = min(y, fxAlmost65536)
+		//
+		// pabsd  %xmm1,%xmm2
+		// psrld  $0x2,%xmm2
+		// pminud %xmm5,%xmm2
+		//
+		// Hopefully we'll get these opcode mnemonics into the assembler for Go
+		// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
+		// it's similar.
+		BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
+		BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
+		BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
+		`
+	flClampAndScale = `
+		// y = x & flSignMask
+		// y = min(y, flOne)
+		// y = mul(y, flAlmost65536)
+		MOVOU X3, X2
+		ANDPS X1, X2
+		MINPS X4, X2
+		MULPS X5, X2
+		`
+
+	fxConvertToInt32 = `
+		// z = convertToInt32(y)
+		// No-op.
+		`
+	flConvertToInt32 = `
+		// z = convertToInt32(y)
+		LDMXCSR  mxcsrNew-4(SP)
+		CVTPS2PL X2, X2
+		LDMXCSR  mxcsrOrig-8(SP)
+		`
+
+	opOverStore4 = `
+		// Blend over the dst's prior value. SIMD for i in 0..3:
+		//
+		// dstA := uint32(dst[i]) * 0x101
+		// maskA := z@i
+		// outA := dstA*(0xffff-maskA)/0xffff + maskA
+		// dst[i] = uint8(outA >> 8)
+		//
+		// First, set X0 to dstA*(0xfff-maskA).
+		MOVL   (DI), X0
+		PSHUFB X8, X0
+		MOVOU  X9, X11
+		PSUBL  X2, X11
+		PMULLD X11, X0
+		// We implement uint32 division by 0xffff as multiplication by a magic
+		// constant (0x800080001) and then a shift by a magic constant (47).
+		// See TestDivideByFFFF for a justification.
+		//
+		// That multiplication widens from uint32 to uint64, so we have to
+		// duplicate and shift our four uint32s from one XMM register (X0) to
+		// two XMM registers (X0 and X11).
+		//
+		// Move the second and fourth uint32s in X0 to be the first and third
+		// uint32s in X11.
+		MOVOU X0, X11
+		PSRLQ $32, X11
+		// Multiply by magic, shift by magic.
+		//
+		// pmuludq %xmm10,%xmm0
+		// pmuludq %xmm10,%xmm11
+		BYTE  $0x66; BYTE $0x41; BYTE $0x0f; BYTE $0xf4; BYTE $0xc2
+		BYTE  $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0xf4; BYTE $0xda
+		PSRLQ $47, X0
+		PSRLQ $47, X11
+		// Merge the two registers back to one, X11, and add maskA.
+		PSLLQ $32, X11
+		XORPS X0, X11
+		PADDD X11, X2
+		// As per opSrcStore4, shuffle and copy the 4 second-lowest bytes.
+		PSHUFB X6, X2
+		MOVL   X2, (DI)
+		`
+	opSrcStore4 = `
+		// z = shuffleTheSecondLowestBytesOfEach4ByteElement(z)
+		// copy(dst[:4], low4BytesOf(z))
+		PSHUFB X6, X2
+		MOVL   X2, (DI)
+		`
+	maskStore4 = `
+		// copy(dst[:4], z)
+		MOVOU X2, (DI)
+		`
+
+	opOverStore1 = `
+		// Blend over the dst's prior value.
+		//
+		// dstA := uint32(dst[0]) * 0x101
+		// maskA := z
+		// outA := dstA*(0xffff-maskA)/0xffff + maskA
+		// dst[0] = uint8(outA >> 8)
+		MOVBLZX (DI), R12
+		IMULL   $0x101, R12
+		MOVL    X2, R13
+		MOVL    $0xffff, AX
+		SUBL    R13, AX
+		MULL    R12             // MULL's implicit arg is AX, and the result is stored in DX:AX.
+		MOVL    $0x80008001, BX // Divide by 0xffff is to first multiply by a magic constant...
+		MULL    BX              // MULL's implicit arg is AX, and the result is stored in DX:AX.
+		SHRL    $15, DX         // ...and then shift by another magic constant (47 - 32 = 15).
+		ADDL    DX, R13
+		SHRL    $8, R13
+		MOVB    R13, (DI)
+		`
+	opSrcStore1 = `
+		// dst[0] = uint8(z>>8)
+		MOVL X2, BX
+		SHRL $8, BX
+		MOVB BX, (DI)
+		`
+	maskStore1 = `
+		// dst[0] = uint32(z)
+		MOVL X2, (DI)
+		`
+
+	opOverXMM6 = `gather`
+	opSrcXMM6  = `gather`
+	maskXMM6   = `-`
+
+	opOverXMM8 = `scatterAndMulBy0x101`
+	opSrcXMM8  = `-`
+	maskXMM8   = `-`
+
+	opOverXMM9 = `fxAlmost65536`
+	opSrcXMM9  = `-`
+	maskXMM9   = `-`
+
+	opOverXMM10 = `inverseFFFF`
+	opSrcXMM10  = `-`
+	maskXMM10   = `-`
+
+	opOverLoadXMMRegs = `
+		// gather               := XMM(see above)                      // PSHUFB shuffle mask.
+		// scatterAndMulBy0x101 := XMM(see above)                      // PSHUFB shuffle mask.
+		// fxAlmost65536        := XMM(0x0000ffff repeated four times) // 0xffff.
+		// inverseFFFF          := XMM(0x80008001 repeated four times) // Magic constant for dividing by 0xffff.
+		MOVOU gather<>(SB), X6
+		MOVOU scatterAndMulBy0x101<>(SB), X8
+		MOVOU fxAlmost65536<>(SB), X9
+		MOVOU inverseFFFF<>(SB), X10
+		`
+	opSrcLoadXMMRegs = `
+		// gather := XMM(see above) // PSHUFB shuffle mask.
+		MOVOU gather<>(SB), X6
+		`
+	maskLoadXMMRegs = ``
+)
-- 
cgit v1.2.3