speedup Block by casting to array, not slice
This commit is contained in:
@@ -53,9 +53,9 @@ roughly twice as fast as SSE, which is unsurpising since it can operate on
|
||||
twice as many bits per cycle. Lastly, for very small slices, the cost of the
|
||||
function call starts to outweigh the benefit of AVX/SSE (the Go compiler never
|
||||
inlines handwritten asm). If you need to xor exactly 16 bytes (common in block
|
||||
ciphers), the specialized `Block` function outperforms the more generic
|
||||
`Bytes`:
|
||||
ciphers), the specialized `Block` function is about 6 times faster than the
|
||||
more generic `Bytes`:
|
||||
|
||||
```
|
||||
BenchmarkBlock-4 1000000000 2.72 ns/op 5888.02 MB/s
|
||||
```
|
||||
BenchmarkBlock-4 2000000000 1.18 ns/op 13546.30 MB/s
|
||||
```
|
||||
|
||||
+3
-3
@@ -97,9 +97,9 @@ func Byte(dst, a []byte, b byte) int {
|
||||
func Block(dst, a, b []byte) {
|
||||
// profiling indicates that for 16-byte blocks, the cost of a function
|
||||
// call outweighs the SSE/AVX speedup
|
||||
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
||||
aw := *(*[]uintptr)(unsafe.Pointer(&a))
|
||||
bw := *(*[]uintptr)(unsafe.Pointer(&b))
|
||||
dw := (*[2]uintptr)(unsafe.Pointer(&dst[0]))
|
||||
aw := (*[2]uintptr)(unsafe.Pointer(&a[0]))
|
||||
bw := (*[2]uintptr)(unsafe.Pointer(&b[0]))
|
||||
dw[0] = aw[0] ^ bw[0]
|
||||
dw[1] = aw[1] ^ bw[1]
|
||||
}
|
||||
|
||||
+4
-2
@@ -181,10 +181,12 @@ func BenchmarkRefByte(b *testing.B) {
|
||||
}
|
||||
|
||||
func BenchmarkBlock(b *testing.B) {
|
||||
buf := make([]byte, 16)
|
||||
dst := make([]byte, 16)
|
||||
bufA := make([]byte, 16)
|
||||
bufB := make([]byte, 16)
|
||||
b.SetBytes(16)
|
||||
for i := 0; i < b.N; i++ {
|
||||
Block(buf, buf, buf)
|
||||
Block(dst, bufA, bufB)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user