aboutsummaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/image/vector/gen_acc_amd64.s.tmpl
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/image/vector/gen_acc_amd64.s.tmpl')
-rw-r--r--vendor/golang.org/x/image/vector/gen_acc_amd64.s.tmpl171
1 files changed, 0 insertions, 171 deletions
diff --git a/vendor/golang.org/x/image/vector/gen_acc_amd64.s.tmpl b/vendor/golang.org/x/image/vector/gen_acc_amd64.s.tmpl
deleted file mode 100644
index 66b21a1..0000000
--- a/vendor/golang.org/x/image/vector/gen_acc_amd64.s.tmpl
+++ /dev/null
@@ -1,171 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build go1.6
-// +build !noasm
-
-#include "textflag.h"
-
-// fl is short for floating point math. fx is short for fixed point math.
-
-DATA flAlmost65536<>+0x00(SB)/8, $0x477fffff477fffff
-DATA flAlmost65536<>+0x08(SB)/8, $0x477fffff477fffff
-DATA flOne<>+0x00(SB)/8, $0x3f8000003f800000
-DATA flOne<>+0x08(SB)/8, $0x3f8000003f800000
-DATA flSignMask<>+0x00(SB)/8, $0x7fffffff7fffffff
-DATA flSignMask<>+0x08(SB)/8, $0x7fffffff7fffffff
-
-// scatterAndMulBy0x101 is a PSHUFB mask that brings the low four bytes of an
-// XMM register to the low byte of that register's four uint32 values. It
-// duplicates those bytes, effectively multiplying each uint32 by 0x101.
-//
-// It transforms a little-endian 16-byte XMM value from
-// ijkl????????????
-// to
-// ii00jj00kk00ll00
-DATA scatterAndMulBy0x101<>+0x00(SB)/8, $0x8080010180800000
-DATA scatterAndMulBy0x101<>+0x08(SB)/8, $0x8080030380800202
-
-// gather is a PSHUFB mask that brings the second-lowest byte of the XMM
-// register's four uint32 values to the low four bytes of that register.
-//
-// It transforms a little-endian 16-byte XMM value from
-// ?i???j???k???l??
-// to
-// ijkl000000000000
-DATA gather<>+0x00(SB)/8, $0x808080800d090501
-DATA gather<>+0x08(SB)/8, $0x8080808080808080
-
-DATA fxAlmost65536<>+0x00(SB)/8, $0x0000ffff0000ffff
-DATA fxAlmost65536<>+0x08(SB)/8, $0x0000ffff0000ffff
-DATA inverseFFFF<>+0x00(SB)/8, $0x8000800180008001
-DATA inverseFFFF<>+0x08(SB)/8, $0x8000800180008001
-
-GLOBL flAlmost65536<>(SB), (NOPTR+RODATA), $16
-GLOBL flOne<>(SB), (NOPTR+RODATA), $16
-GLOBL flSignMask<>(SB), (NOPTR+RODATA), $16
-GLOBL scatterAndMulBy0x101<>(SB), (NOPTR+RODATA), $16
-GLOBL gather<>(SB), (NOPTR+RODATA), $16
-GLOBL fxAlmost65536<>(SB), (NOPTR+RODATA), $16
-GLOBL inverseFFFF<>(SB), (NOPTR+RODATA), $16
-
-// func haveSSE4_1() bool
-TEXT ·haveSSE4_1(SB), NOSPLIT, $0
- MOVQ $1, AX
- CPUID
- SHRQ $19, CX
- ANDQ $1, CX
- MOVB CX, ret+0(FP)
- RET
-
-// ----------------------------------------------------------------------------
-
-// func {{.LongName}}SIMD({{.Args}})
-//
-// XMM registers. Variable names are per
-// https://github.com/google/font-rs/blob/master/src/accumulate.c
-//
-// xmm0 scratch
-// xmm1 x
-// xmm2 y, z
-// xmm3 {{.XMM3}}
-// xmm4 {{.XMM4}}
-// xmm5 {{.XMM5}}
-// xmm6 {{.XMM6}}
-// xmm7 offset
-// xmm8 {{.XMM8}}
-// xmm9 {{.XMM9}}
-// xmm10 {{.XMM10}}
-TEXT ·{{.LongName}}SIMD(SB), NOSPLIT, ${{.FrameSize}}-{{.ArgsSize}}
- {{.LoadArgs}}
-
- // R10 = len(src) &^ 3
- // R11 = len(src)
- MOVQ R10, R11
- ANDQ $-4, R10
-
- {{.Setup}}
-
- {{.LoadXMMRegs}}
-
- // offset := XMM(0x00000000 repeated four times) // Cumulative sum.
- XORPS X7, X7
-
- // i := 0
- MOVQ $0, R9
-
-{{.ShortName}}Loop4:
- // for i < (len(src) &^ 3)
- CMPQ R9, R10
- JAE {{.ShortName}}Loop1
-
- // x = XMM(s0, s1, s2, s3)
- //
- // Where s0 is src[i+0], s1 is src[i+1], etc.
- MOVOU (SI), X1
-
- // scratch = XMM(0, s0, s1, s2)
- // x += scratch // yields x == XMM(s0, s0+s1, s1+s2, s2+s3)
- MOVOU X1, X0
- PSLLO $4, X0
- {{.Add}} X0, X1
-
- // scratch = XMM(0, 0, 0, 0)
- // scratch = XMM(scratch@0, scratch@0, x@0, x@1) // yields scratch == XMM(0, 0, s0, s0+s1)
- // x += scratch // yields x == XMM(s0, s0+s1, s0+s1+s2, s0+s1+s2+s3)
- XORPS X0, X0
- SHUFPS $0x40, X1, X0
- {{.Add}} X0, X1
-
- // x += offset
- {{.Add}} X7, X1
-
- {{.ClampAndScale}}
-
- {{.ConvertToInt32}}
-
- {{.Store4}}
-
- // offset = XMM(x@3, x@3, x@3, x@3)
- MOVOU X1, X7
- SHUFPS $0xff, X1, X7
-
- // i += 4
- // dst = dst[4:]
- // src = src[4:]
- ADDQ $4, R9
- ADDQ ${{.DstElemSize4}}, DI
- ADDQ $16, SI
- JMP {{.ShortName}}Loop4
-
-{{.ShortName}}Loop1:
- // for i < len(src)
- CMPQ R9, R11
- JAE {{.ShortName}}End
-
- // x = src[i] + offset
- MOVL (SI), X1
- {{.Add}} X7, X1
-
- {{.ClampAndScale}}
-
- {{.ConvertToInt32}}
-
- {{.Store1}}
-
- // offset = x
- MOVOU X1, X7
-
- // i += 1
- // dst = dst[1:]
- // src = src[1:]
- ADDQ $1, R9
- ADDQ ${{.DstElemSize1}}, DI
- ADDQ $4, SI
- JMP {{.ShortName}}Loop1
-
-{{.ShortName}}End:
- RET