From 62a404d90ebd1b1d7a2bbfe65694326b76c55731 Mon Sep 17 00:00:00 2001 From: lukechampine Date: Fri, 6 Jul 2018 17:10:20 -0400 Subject: [PATCH] fix SSE segfault --- xor_amd64.s | 71 ++++++++++++++--------------------------------------- xor_test.go | 16 ++++++++++-- 2 files changed, 33 insertions(+), 54 deletions(-) diff --git a/xor_amd64.s b/xor_amd64.s index c137b67..24a2e14 100644 --- a/xor_amd64.s +++ b/xor_amd64.s @@ -8,68 +8,34 @@ #define N R12 // func xorBytesSSE(dst, a, b []byte, n int) -TEXT ·xorBytesSSE(SB), NOSPLIT ,$0 +TEXT ·xorBytesSSE(SB), NOSPLIT, $0 MOVQ dst_data+0(FP), Dst MOVQ a_data+24(FP), A MOVQ b_data+48(FP), B MOVQ n+72(FP), N -XOR_LOOP_128_SSE: - CMPQ N, $128 - JB XOR_LOOP_64_SSE - - MOVOU (A), X0 - MOVOU 16(A), X1 - MOVOU 32(A), X2 - MOVOU 48(A), X3 - MOVOU 64(A), X4 - MOVOU 80(A), X5 - MOVOU 96(A), X6 - MOVOU 112(A), X7 - - PXOR (B), X0 - PXOR 16(B), X1 - PXOR 32(B), X2 - PXOR 48(B), X3 - PXOR 64(B), X4 - PXOR 80(B), X5 - PXOR 96(B), X6 - PXOR 112(B), X7 - - MOVOU X0, (Dst) - MOVOU X1, 16(Dst) - MOVOU X2, 32(Dst) - MOVOU X3, 48(Dst) - MOVOU X4, 64(Dst) - MOVOU X5, 80(Dst) - MOVOU X6, 96(Dst) - MOVOU X7, 112(Dst) - - ADDQ $128, A - ADDQ $128, B - ADDQ $128, Dst - SUBQ $128, N - JNZ XOR_LOOP_128_SSE - RET - XOR_LOOP_64_SSE: CMPQ N, $64 JB XOR_LOOP_16_SSE - MOVOU (A), X0 - MOVOU 16(A), X1 - MOVOU 32(A), X2 - MOVOU 48(A), X3 + MOVOU 0*16(A), X0 + MOVOU 1*16(A), X1 + MOVOU 2*16(A), X2 + MOVOU 3*16(A), X3 + MOVOU 0*16(B), X4 + MOVOU 1*16(B), X5 + MOVOU 2*16(B), X6 + MOVOU 3*16(B), X7 - PXOR (B), X0 - PXOR 16(B), X1 - PXOR 32(B), X2 - PXOR 48(B), X3 + PXOR X4, X0 + PXOR X5, X1 + PXOR X6, X2 + PXOR X7, X3 - MOVOU X0, (Dst) - MOVOU X1, 16(Dst) - MOVOU X2, 32(Dst) - MOVOU X3, 48(Dst) + MOVOU X0, 0*16(Dst) + MOVOU X1, 1*16(Dst) + MOVOU X2, 2*16(Dst) + MOVOU X3, 3*16(Dst) ADDQ $64, A ADDQ $64, B @@ -82,7 +48,8 @@ XOR_LOOP_16_SSE: CMPQ N, $16 JB XOR_LOOP_FINAL_SSE MOVOU (A), X0 - PXOR (B), X0 + MOVOU (B), X1 + PXOR X1, X0 MOVOU X0, (Dst) ADDQ $16, A ADDQ $16, B diff --git a/xor_test.go b/xor_test.go index 6fea00b..6cf2c0f 100644 --- a/xor_test.go +++ b/xor_test.go @@ -56,8 +56,10 @@ func refBlock(dst, a, b []byte) { func TestBytes(t *testing.T) { err := quick.Check(func(a, b []byte) bool { - // double size to increase chances of reaching 64 bytes + // quadruple size to increase coverage a = append(a, a...) + a = append(a, a...) + b = append(b, b...) b = append(b, b...) if len(a) < 8 { return true @@ -72,7 +74,17 @@ func TestBytes(t *testing.T) { return bytes.Equal(dst1, dst2) }, &quick.Config{MaxCount: 10000}) if err != nil { - t.Fatal(err) + t.Error(err) + } + + // test a large slice + a := make([]byte, 512-1) + for i := range a { + a[i] = byte(i) + } + Bytes(a, a, a) + if !bytes.Equal(a, make([]byte, len(a))) { + t.Error("bad xor of large slice") } }