speedup Block by casting to array, not slice

This commit is contained in:
lukechampine
2018-10-11 17:48:17 -04:00
parent 62a404d90e
commit ab5bbcecc5
3 changed files with 11 additions and 9 deletions
+4 -4
View File
@@ -53,9 +53,9 @@ roughly twice as fast as SSE, which is unsurpising since it can operate on
twice as many bits per cycle. Lastly, for very small slices, the cost of the
function call starts to outweigh the benefit of AVX/SSE (the Go compiler never
inlines handwritten asm). If you need to xor exactly 16 bytes (common in block
ciphers), the specialized `Block` function outperforms the more generic
`Bytes`:
ciphers), the specialized `Block` function is about 6 times faster than the
more generic `Bytes`:
```
BenchmarkBlock-4 1000000000 2.72 ns/op 5888.02 MB/s
```
BenchmarkBlock-4 2000000000 1.18 ns/op 13546.30 MB/s
```
+3 -3
View File
@@ -97,9 +97,9 @@ func Byte(dst, a []byte, b byte) int {
func Block(dst, a, b []byte) {
// profiling indicates that for 16-byte blocks, the cost of a function
// call outweighs the SSE/AVX speedup
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
aw := *(*[]uintptr)(unsafe.Pointer(&a))
bw := *(*[]uintptr)(unsafe.Pointer(&b))
dw := (*[2]uintptr)(unsafe.Pointer(&dst[0]))
aw := (*[2]uintptr)(unsafe.Pointer(&a[0]))
bw := (*[2]uintptr)(unsafe.Pointer(&b[0]))
dw[0] = aw[0] ^ bw[0]
dw[1] = aw[1] ^ bw[1]
}
+4 -2
View File
@@ -181,10 +181,12 @@ func BenchmarkRefByte(b *testing.B) {
}
func BenchmarkBlock(b *testing.B) {
buf := make([]byte, 16)
dst := make([]byte, 16)
bufA := make([]byte, 16)
bufB := make([]byte, 16)
b.SetBytes(16)
for i := 0; i < b.N; i++ {
Block(buf, buf, buf)
Block(dst, bufA, bufB)
}
}