speedup Block by casting to array, not slice
This commit is contained in:
@@ -53,9 +53,9 @@ roughly twice as fast as SSE, which is unsurpising since it can operate on
|
|||||||
twice as many bits per cycle. Lastly, for very small slices, the cost of the
|
twice as many bits per cycle. Lastly, for very small slices, the cost of the
|
||||||
function call starts to outweigh the benefit of AVX/SSE (the Go compiler never
|
function call starts to outweigh the benefit of AVX/SSE (the Go compiler never
|
||||||
inlines handwritten asm). If you need to xor exactly 16 bytes (common in block
|
inlines handwritten asm). If you need to xor exactly 16 bytes (common in block
|
||||||
ciphers), the specialized `Block` function outperforms the more generic
|
ciphers), the specialized `Block` function is about 6 times faster than the
|
||||||
`Bytes`:
|
more generic `Bytes`:
|
||||||
|
|
||||||
```
|
```
|
||||||
BenchmarkBlock-4 1000000000 2.72 ns/op 5888.02 MB/s
|
BenchmarkBlock-4 2000000000 1.18 ns/op 13546.30 MB/s
|
||||||
```
|
```
|
||||||
+3
-3
@@ -97,9 +97,9 @@ func Byte(dst, a []byte, b byte) int {
|
|||||||
func Block(dst, a, b []byte) {
|
func Block(dst, a, b []byte) {
|
||||||
// profiling indicates that for 16-byte blocks, the cost of a function
|
// profiling indicates that for 16-byte blocks, the cost of a function
|
||||||
// call outweighs the SSE/AVX speedup
|
// call outweighs the SSE/AVX speedup
|
||||||
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
dw := (*[2]uintptr)(unsafe.Pointer(&dst[0]))
|
||||||
aw := *(*[]uintptr)(unsafe.Pointer(&a))
|
aw := (*[2]uintptr)(unsafe.Pointer(&a[0]))
|
||||||
bw := *(*[]uintptr)(unsafe.Pointer(&b))
|
bw := (*[2]uintptr)(unsafe.Pointer(&b[0]))
|
||||||
dw[0] = aw[0] ^ bw[0]
|
dw[0] = aw[0] ^ bw[0]
|
||||||
dw[1] = aw[1] ^ bw[1]
|
dw[1] = aw[1] ^ bw[1]
|
||||||
}
|
}
|
||||||
|
|||||||
+4
-2
@@ -181,10 +181,12 @@ func BenchmarkRefByte(b *testing.B) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkBlock(b *testing.B) {
|
func BenchmarkBlock(b *testing.B) {
|
||||||
buf := make([]byte, 16)
|
dst := make([]byte, 16)
|
||||||
|
bufA := make([]byte, 16)
|
||||||
|
bufB := make([]byte, 16)
|
||||||
b.SetBytes(16)
|
b.SetBytes(16)
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
Block(buf, buf, buf)
|
Block(dst, bufA, bufB)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user