aboutsummaryrefslogtreecommitdiff
path: root/internal/cryptocore/randprefetch.go
diff options
context:
space:
mode:
authorJakob Unterwurzacher2017-06-09 21:52:26 +0200
committerJakob Unterwurzacher2017-06-09 22:05:14 +0200
commit80516ed3351477793eec882508969b6b29b69b0a (patch)
treec461bd49e79fd6d8bf7f5dc8f28058faf2ba3078 /internal/cryptocore/randprefetch.go
parentda1bd742461e397abefc814bb0c0a21a6d8ec3d6 (diff)
cryptocore: prefetch nonces in 512-byte blocks
On my machine, reading 512-byte blocks from /dev/urandom (same via getentropy syscall) is a lot faster in terms of throughput: Blocksize Throughput 16 28.18 MB/s 512 83.75 MB/s For a single-threaded streaming write, this drops the CPU usage of nonceGenerator.Get to almost 1/3: flat flat% sum% cum cum% Before 0 0% 95.08% 0.35s 2.92% github.com/rfjakob/gocryptfs/internal/cryptocore.(*nonceGenerator).Get After 0.01s 0.092% 92.34% 0.13s 1.20% github.com/rfjakob/gocryptfs/internal/cryptocore.(*nonceGenerator).Get This change makes the nonce reading single-threaded, which may hurt massively-parallel writes.
Diffstat (limited to 'internal/cryptocore/randprefetch.go')
-rw-r--r--internal/cryptocore/randprefetch.go50
1 files changed, 50 insertions, 0 deletions
diff --git a/internal/cryptocore/randprefetch.go b/internal/cryptocore/randprefetch.go
new file mode 100644
index 0000000..8825a05
--- /dev/null
+++ b/internal/cryptocore/randprefetch.go
@@ -0,0 +1,50 @@
+package cryptocore
+
+import (
+ "bytes"
+ "log"
+ "sync"
+)
+
+/*
+Number of bytes to prefetch.
+
+512 looks like a good compromise between throughput and latency:
+Benchmark16-2 3000000 567 ns/op 28.18 MB/s
+Benchmark64-2 5000000 293 ns/op 54.51 MB/s
+Benchmark128-2 10000000 220 ns/op 72.48 MB/s
+Benchmark256-2 10000000 210 ns/op 76.17 MB/s
+Benchmark512-2 10000000 191 ns/op 83.75 MB/s
+Benchmark1024-2 10000000 171 ns/op 93.48 MB/s
+Benchmark2048-2 10000000 165 ns/op 96.45 MB/s
+Benchmark4096-2 10000000 165 ns/op 96.58 MB/s
+Benchmark40960-2 10000000 147 ns/op 108.82 MB/s
+*/
+const prefetchN = 512
+
+type randPrefetcherT struct {
+ sync.Mutex
+ buf bytes.Buffer
+}
+
+func (r *randPrefetcherT) read(want int) (out []byte) {
+ out = make([]byte, want)
+ r.Lock()
+ // Note: don't use defer, it slows us down!
+ have, err := r.buf.Read(out)
+ if have == want && err == nil {
+ r.Unlock()
+ return out
+ }
+ // Buffer was empty -> re-fill
+ r.buf.Reset()
+ r.buf.Write(RandBytes(prefetchN))
+ have, err = r.buf.Read(out)
+ if have != want || err != nil {
+ log.Panicf("randPrefetcher could not satisfy read: have=%d want=%d err=%v", have, want, err)
+ }
+ r.Unlock()
+ return out
+}
+
+var randPrefetcher randPrefetcherT