From 14bb08c1df8db9ec6c8a05520d4eee67971235d9 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Thu, 27 Sep 2018 20:03:23 +0200 Subject: mod tidy --- .../golang.org/x/image/vector/gen_acc_amd64.s.tmpl | 171 --------------------- 1 file changed, 171 deletions(-) delete mode 100644 vendor/golang.org/x/image/vector/gen_acc_amd64.s.tmpl (limited to 'vendor/golang.org/x/image/vector/gen_acc_amd64.s.tmpl') diff --git a/vendor/golang.org/x/image/vector/gen_acc_amd64.s.tmpl b/vendor/golang.org/x/image/vector/gen_acc_amd64.s.tmpl deleted file mode 100644 index 66b21a1..0000000 --- a/vendor/golang.org/x/image/vector/gen_acc_amd64.s.tmpl +++ /dev/null @@ -1,171 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build go1.6 -// +build !noasm - -#include "textflag.h" - -// fl is short for floating point math. fx is short for fixed point math. - -DATA flAlmost65536<>+0x00(SB)/8, $0x477fffff477fffff -DATA flAlmost65536<>+0x08(SB)/8, $0x477fffff477fffff -DATA flOne<>+0x00(SB)/8, $0x3f8000003f800000 -DATA flOne<>+0x08(SB)/8, $0x3f8000003f800000 -DATA flSignMask<>+0x00(SB)/8, $0x7fffffff7fffffff -DATA flSignMask<>+0x08(SB)/8, $0x7fffffff7fffffff - -// scatterAndMulBy0x101 is a PSHUFB mask that brings the low four bytes of an -// XMM register to the low byte of that register's four uint32 values. It -// duplicates those bytes, effectively multiplying each uint32 by 0x101. -// -// It transforms a little-endian 16-byte XMM value from -// ijkl???????????? -// to -// ii00jj00kk00ll00 -DATA scatterAndMulBy0x101<>+0x00(SB)/8, $0x8080010180800000 -DATA scatterAndMulBy0x101<>+0x08(SB)/8, $0x8080030380800202 - -// gather is a PSHUFB mask that brings the second-lowest byte of the XMM -// register's four uint32 values to the low four bytes of that register. -// -// It transforms a little-endian 16-byte XMM value from -// ?i???j???k???l?? -// to -// ijkl000000000000 -DATA gather<>+0x00(SB)/8, $0x808080800d090501 -DATA gather<>+0x08(SB)/8, $0x8080808080808080 - -DATA fxAlmost65536<>+0x00(SB)/8, $0x0000ffff0000ffff -DATA fxAlmost65536<>+0x08(SB)/8, $0x0000ffff0000ffff -DATA inverseFFFF<>+0x00(SB)/8, $0x8000800180008001 -DATA inverseFFFF<>+0x08(SB)/8, $0x8000800180008001 - -GLOBL flAlmost65536<>(SB), (NOPTR+RODATA), $16 -GLOBL flOne<>(SB), (NOPTR+RODATA), $16 -GLOBL flSignMask<>(SB), (NOPTR+RODATA), $16 -GLOBL scatterAndMulBy0x101<>(SB), (NOPTR+RODATA), $16 -GLOBL gather<>(SB), (NOPTR+RODATA), $16 -GLOBL fxAlmost65536<>(SB), (NOPTR+RODATA), $16 -GLOBL inverseFFFF<>(SB), (NOPTR+RODATA), $16 - -// func haveSSE4_1() bool -TEXT ·haveSSE4_1(SB), NOSPLIT, $0 - MOVQ $1, AX - CPUID - SHRQ $19, CX - ANDQ $1, CX - MOVB CX, ret+0(FP) - RET - -// ---------------------------------------------------------------------------- - -// func {{.LongName}}SIMD({{.Args}}) -// -// XMM registers. Variable names are per -// https://github.com/google/font-rs/blob/master/src/accumulate.c -// -// xmm0 scratch -// xmm1 x -// xmm2 y, z -// xmm3 {{.XMM3}} -// xmm4 {{.XMM4}} -// xmm5 {{.XMM5}} -// xmm6 {{.XMM6}} -// xmm7 offset -// xmm8 {{.XMM8}} -// xmm9 {{.XMM9}} -// xmm10 {{.XMM10}} -TEXT ·{{.LongName}}SIMD(SB), NOSPLIT, ${{.FrameSize}}-{{.ArgsSize}} - {{.LoadArgs}} - - // R10 = len(src) &^ 3 - // R11 = len(src) - MOVQ R10, R11 - ANDQ $-4, R10 - - {{.Setup}} - - {{.LoadXMMRegs}} - - // offset := XMM(0x00000000 repeated four times) // Cumulative sum. - XORPS X7, X7 - - // i := 0 - MOVQ $0, R9 - -{{.ShortName}}Loop4: - // for i < (len(src) &^ 3) - CMPQ R9, R10 - JAE {{.ShortName}}Loop1 - - // x = XMM(s0, s1, s2, s3) - // - // Where s0 is src[i+0], s1 is src[i+1], etc. - MOVOU (SI), X1 - - // scratch = XMM(0, s0, s1, s2) - // x += scratch // yields x == XMM(s0, s0+s1, s1+s2, s2+s3) - MOVOU X1, X0 - PSLLO $4, X0 - {{.Add}} X0, X1 - - // scratch = XMM(0, 0, 0, 0) - // scratch = XMM(scratch@0, scratch@0, x@0, x@1) // yields scratch == XMM(0, 0, s0, s0+s1) - // x += scratch // yields x == XMM(s0, s0+s1, s0+s1+s2, s0+s1+s2+s3) - XORPS X0, X0 - SHUFPS $0x40, X1, X0 - {{.Add}} X0, X1 - - // x += offset - {{.Add}} X7, X1 - - {{.ClampAndScale}} - - {{.ConvertToInt32}} - - {{.Store4}} - - // offset = XMM(x@3, x@3, x@3, x@3) - MOVOU X1, X7 - SHUFPS $0xff, X1, X7 - - // i += 4 - // dst = dst[4:] - // src = src[4:] - ADDQ $4, R9 - ADDQ ${{.DstElemSize4}}, DI - ADDQ $16, SI - JMP {{.ShortName}}Loop4 - -{{.ShortName}}Loop1: - // for i < len(src) - CMPQ R9, R11 - JAE {{.ShortName}}End - - // x = src[i] + offset - MOVL (SI), X1 - {{.Add}} X7, X1 - - {{.ClampAndScale}} - - {{.ConvertToInt32}} - - {{.Store1}} - - // offset = x - MOVOU X1, X7 - - // i += 1 - // dst = dst[1:] - // src = src[1:] - ADDQ $1, R9 - ADDQ ${{.DstElemSize1}}, DI - ADDQ $4, SI - JMP {{.ShortName}}Loop1 - -{{.ShortName}}End: - RET -- cgit v1.2.3