From e80b5f2049edb794e340da629bce9e44485a4dbb Mon Sep 17 00:00:00 2001 From: Jakob Unterwurzacher Date: Wed, 9 Aug 2017 21:44:15 +0200 Subject: nametransform: extend diriv cache to 100 entries * extend the diriv cache to 100 entries * add special handling for the immutable root diriv The better cache allows to shed some complexity from the path encryption logic (parent-of-parent check). Mitigates https://github.com/rfjakob/gocryptfs/issues/127 --- internal/nametransform/diriv.go | 59 ++++++++----------- internal/nametransform/dirivcache/dirivcache.go | 77 ++++++++++++++++--------- 2 files changed, 75 insertions(+), 61 deletions(-) diff --git a/internal/nametransform/diriv.go b/internal/nametransform/diriv.go index 87e7887..ffaf785 100644 --- a/internal/nametransform/diriv.go +++ b/internal/nametransform/diriv.go @@ -111,56 +111,45 @@ func (be *NameTransform) encryptAndHashName(name string, iv []byte) string { // EncryptPathDirIV - encrypt relative plaintext path "plainPath" using EME with // DirIV. "rootDir" is the backing storage root directory. // Components that are longer than 255 bytes are hashed if be.longnames == true. -func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (cipherPath string, err error) { +func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (string, error) { + var err error // Empty string means root directory if plainPath == "" { return plainPath, nil } - // Reject names longer than 255 bytes already here. This relieves everybody - // who uses hashed long names from checking for that later. + // Reject names longer than 255 bytes. baseName := filepath.Base(plainPath) if len(baseName) > syscall.NAME_MAX { return "", syscall.ENAMETOOLONG } - // Check if the DirIV is cached. This catches the case of the user iterating - // over files in a directory pretty well. - parentDir := filepath.Dir(plainPath) - iv, cParentDir := be.DirIVCache.Lookup(parentDir) - if iv != nil { + // If we have the iv and the encrypted directory name in the cache, we + // can skip the directory walk. This optimization yields a 10% improvement + // in the tar extract benchmark. + parentDir := Dir(plainPath) + if iv, cParentDir := be.DirIVCache.Lookup(parentDir); iv != nil { cBaseName := be.encryptAndHashName(baseName, iv) return filepath.Join(cParentDir, cBaseName), nil } - // We have to walk the directory tree, in the worst case starting at the root - // directory. - wd := rootDir + // We have to walk the directory tree, starting at the root directory. + // ciphertext working directory (relative path) + cipherWD := "" + // plaintext working directory (relative path) + plainWD := "" plainNames := strings.Split(plainPath, "/") - // So the DirIV we need is not cached. But maybe one level higher is - // cached. Then we can skip a few items in the directory walk. - // This catches the case of walking directories recursively. - parentDir2 := filepath.Dir(parentDir) - iv, cParentDir = be.DirIVCache.Lookup(parentDir2) - if iv != nil { - parentDirBase := filepath.Base(parentDir) - cBaseName := be.encryptAndHashName(parentDirBase, iv) - wd = filepath.Join(wd, cParentDir, cBaseName) - cipherPath = filepath.Join(cParentDir, cBaseName) - skip := len(strings.Split(cipherPath, "/")) - plainNames = plainNames[skip:] - } - // Walk the directory tree starting at "wd" for _, plainName := range plainNames { - iv, err = ReadDirIV(wd) - if err != nil { - return "", err + iv, _ := be.DirIVCache.Lookup(plainWD) + if iv == nil { + iv, err = ReadDirIV(filepath.Join(rootDir, cipherWD)) + if err != nil { + return "", err + } + be.DirIVCache.Store(plainWD, iv, cipherWD) } - encryptedName := be.encryptAndHashName(plainName, iv) - cipherPath = filepath.Join(cipherPath, encryptedName) - wd = filepath.Join(wd, encryptedName) + cipherName := be.encryptAndHashName(plainName, iv) + cipherWD = filepath.Join(cipherWD, cipherName) + plainWD = filepath.Join(plainWD, plainName) } - // Cache the final DirIV - cParentDir = filepath.Dir(cipherPath) - be.DirIVCache.Store(parentDir, iv, cParentDir) - return cipherPath, nil + return cipherWD, nil } // Dir is like filepath.Dir but returns "" instead of ".". diff --git a/internal/nametransform/dirivcache/dirivcache.go b/internal/nametransform/dirivcache/dirivcache.go index 890ebac..2708d89 100644 --- a/internal/nametransform/dirivcache/dirivcache.go +++ b/internal/nametransform/dirivcache/dirivcache.go @@ -5,59 +5,84 @@ import ( "time" ) -// Single-entry DirIV cache. Stores the directory IV and the encrypted -// path. +const ( + maxEntries = 100 + expireTime = 1 * time.Second +) + +type cacheEntry struct { + // DirIV of the directory. + iv []byte + // Relative ciphertext path of the directory. + cDir string +} + +// DirIVCache stores up to "maxEntries" directory IVs. type DirIVCache struct { - // Directory the DirIV belongs to - dir string - // Time the entry expires. + // data in the cache, indexed by relative plaintext path + // of the directory. + data map[string]cacheEntry + + // The DirIV of the root directory gets special treatment because it + // cannot change (the root directory cannot be renamed or deleted). + // It is unaffected by the expiry timer and cache clears. + rootDirIV []byte + + // expiry is the time when the whole cache expires. // The cached entry my become out-of-date if the ciphertext directory is // modifed behind the back of gocryptfs. Having an expiry time limits the // inconstency to one second, like attr_timeout does for the kernel // getattr cache. expiry time.Time - // The DirIV - iv []byte - // Ecrypted version of "dir" - cDir string - - // Invalidated? - cleared bool sync.RWMutex } -// lookup - fetch entry for "dir" from the cache +// Lookup - fetch entry for "dir" from the cache func (c *DirIVCache) Lookup(dir string) ([]byte, string) { c.RLock() defer c.RUnlock() - if c.cleared || c.dir != dir { + if dir == "" { + return c.rootDirIV, "" + } + if c.data == nil { return nil, "" } if time.Since(c.expiry) > 0 { - c.cleared = true + c.data = nil return nil, "" } - return c.iv, c.cDir + v := c.data[dir] + return v.iv, v.cDir } -// store - write entry for "dir" into the cache +// Store - write entry for "dir" into the cache func (c *DirIVCache) Store(dir string, iv []byte, cDir string) { c.Lock() defer c.Unlock() - c.cleared = false - c.iv = iv - c.dir = dir - c.cDir = cDir - // Set expiry time one second into the future - c.expiry = time.Now().Add(1 * time.Second) + if dir == "" { + c.rootDirIV = iv + } + if c.data == nil { + c.data = make(map[string]cacheEntry, maxEntries) + // Set expiry time one second into the future + c.expiry = time.Now().Add(expireTime) + } + // Delete a random entry from the map if reached maxEntries + if len(c.data) >= maxEntries { + for k := range c.data { + delete(c.data, k) + break + } + } + c.data[dir] = cacheEntry{iv, cDir} } // Clear ... clear the cache. -// Exported because it is called from fusefrontend when directories are -// renamed or deleted. +// Called from fusefrontend when directories are renamed or deleted. func (c *DirIVCache) Clear() { c.Lock() defer c.Unlock() - c.cleared = true + // Will be re-initialized in the next Store() + c.data = nil } -- cgit v1.2.3