diff options
author | Jakob Unterwurzacher | 2017-05-22 22:26:59 +0200 |
---|---|---|
committer | Jakob Unterwurzacher | 2017-05-22 22:36:54 +0200 |
commit | 245b84c887955d12cd1113e9a6701ee7338c8255 (patch) | |
tree | d810d14f63800a81c7fa073e91f71c34bdfb963e | |
parent | 5672e41f5a1ac0fb6c46e99c6bdeae0946a448c7 (diff) |
nametransform: diriv cache: fall back to the grandparent
When a user calls into a deep directory hierarchy, we often
get a sequence like this from the kernel:
LOOKUP a
LOOKUP a/b
LOOKUP a/b/c
LOOKUP a/b/c/d
The diriv cache was not effective for this pattern, because it
was designed for this:
LOOKUP a/a
LOOKUP a/b
LOOKUP a/c
LOOKUP a/d
By also using the cached entry of the grandparent we can avoid lots
of diriv reads.
This benchmark is against a large encrypted directory hosted on NFS:
Before:
$ time ls -R nfs-backed-mount > /dev/null
real 1m35.976s
user 0m0.248s
sys 0m0.281s
After:
$ time ls -R nfs-backed-mount > /dev/null
real 1m3.670s
user 0m0.217s
sys 0m0.403s
-rw-r--r-- | internal/nametransform/diriv.go | 32 |
1 files changed, 25 insertions, 7 deletions
diff --git a/internal/nametransform/diriv.go b/internal/nametransform/diriv.go index 00d059f..e74592a 100644 --- a/internal/nametransform/diriv.go +++ b/internal/nametransform/diriv.go @@ -90,7 +90,8 @@ func WriteDirIV(dir string) error { return nil } -// EncryptPathDirIV - encrypt relative plaintext path using EME with DirIV. +// EncryptPathDirIV - encrypt relative plaintext path "plainPath" using EME with +// DirIV. "rootDir" is the backing storage root directory. // Components that are longer than 255 bytes are hashed if be.longnames == true. func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (cipherPath string, err error) { // Empty string means root directory @@ -103,7 +104,8 @@ func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (cip if len(baseName) > syscall.NAME_MAX { return "", syscall.ENAMETOOLONG } - // Check if the DirIV is cached + // Check if the DirIV is cached. This catches the case of the user iterating + // over files in a directory pretty well. parentDir := filepath.Dir(plainPath) iv, cParentDir := be.DirIVCache.lookup(parentDir) if iv != nil { @@ -114,10 +116,27 @@ func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (cip cipherPath = filepath.Join(cParentDir, cBaseName) return cipherPath, nil } - // Not cached - walk the directory tree - var wd = rootDir - var encryptedNames []string + // We have to walk the directory tree, in the worst case starting at the root + // directory. + wd := rootDir plainNames := strings.Split(plainPath, "/") + // So the DirIV we need is not cached. But maybe one level higher is + // cached. Then we can skip a few items in the directory walk. + // The catches the case of walking directories recursively. + parentDir2 := filepath.Dir(parentDir) + iv, cParentDir = be.DirIVCache.lookup(parentDir2) + if iv != nil { + parentDirBase := filepath.Base(parentDir) + cBaseName := be.EncryptName(parentDirBase, iv) + if be.longNames && len(cBaseName) > syscall.NAME_MAX { + cBaseName = be.HashLongName(cBaseName) + } + wd = filepath.Join(wd, cParentDir, cBaseName) + cipherPath = filepath.Join(cParentDir, cBaseName) + skip := len(strings.Split(cipherPath, "/")) + plainNames = plainNames[skip:] + } + // Walk the directory tree starting at "wd" for _, plainName := range plainNames { iv, err = ReadDirIV(wd) if err != nil { @@ -127,10 +146,9 @@ func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (cip if be.longNames && len(encryptedName) > syscall.NAME_MAX { encryptedName = be.HashLongName(encryptedName) } - encryptedNames = append(encryptedNames, encryptedName) + cipherPath = filepath.Join(cipherPath, encryptedName) wd = filepath.Join(wd, encryptedName) } - cipherPath = filepath.Join(encryptedNames...) // Cache the final DirIV cParentDir = filepath.Dir(cipherPath) be.DirIVCache.store(parentDir, iv, cParentDir) |