From e80b5f2049edb794e340da629bce9e44485a4dbb Mon Sep 17 00:00:00 2001
From: Jakob Unterwurzacher
Date: Wed, 9 Aug 2017 21:44:15 +0200
Subject: nametransform: extend diriv cache to 100 entries

* extend the diriv cache to 100 entries
* add special handling for the immutable root diriv

The better cache allows to shed some complexity from the path
encryption logic (parent-of-parent check).

Mitigates https://github.com/rfjakob/gocryptfs/issues/127
---
 internal/nametransform/diriv.go                 | 59 ++++++++-----------
 internal/nametransform/dirivcache/dirivcache.go | 77 ++++++++++++++++---------
 2 files changed, 75 insertions(+), 61 deletions(-)

(limited to 'internal')

diff --git a/internal/nametransform/diriv.go b/internal/nametransform/diriv.go
index 87e7887..ffaf785 100644
--- a/internal/nametransform/diriv.go
+++ b/internal/nametransform/diriv.go
@@ -111,56 +111,45 @@ func (be *NameTransform) encryptAndHashName(name string, iv []byte) string {
 // EncryptPathDirIV - encrypt relative plaintext path "plainPath" using EME with
 // DirIV. "rootDir" is the backing storage root directory.
 // Components that are longer than 255 bytes are hashed if be.longnames == true.
-func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (cipherPath string, err error) {
+func (be *NameTransform) EncryptPathDirIV(plainPath string, rootDir string) (string, error) {
+	var err error
 	// Empty string means root directory
 	if plainPath == "" {
 		return plainPath, nil
 	}
-	// Reject names longer than 255 bytes already here. This relieves everybody
-	// who uses hashed long names from checking for that later.
+	// Reject names longer than 255 bytes.
 	baseName := filepath.Base(plainPath)
 	if len(baseName) > syscall.NAME_MAX {
 		return "", syscall.ENAMETOOLONG
 	}
-	// Check if the DirIV is cached. This catches the case of the user iterating
-	// over files in a directory pretty well.
-	parentDir := filepath.Dir(plainPath)
-	iv, cParentDir := be.DirIVCache.Lookup(parentDir)
-	if iv != nil {
+	// If we have the iv and the encrypted directory name in the cache, we
+	// can skip the directory walk. This optimization yields a 10% improvement
+	// in the tar extract benchmark.
+	parentDir := Dir(plainPath)
+	if iv, cParentDir := be.DirIVCache.Lookup(parentDir); iv != nil {
 		cBaseName := be.encryptAndHashName(baseName, iv)
 		return filepath.Join(cParentDir, cBaseName), nil
 	}
-	// We have to walk the directory tree, in the worst case starting at the root
-	// directory.
-	wd := rootDir
+	// We have to walk the directory tree, starting at the root directory.
+	// ciphertext working directory (relative path)
+	cipherWD := ""
+	// plaintext working directory (relative path)
+	plainWD := ""
 	plainNames := strings.Split(plainPath, "/")
-	// So the DirIV we need is not cached. But maybe one level higher is
-	// cached. Then we can skip a few items in the directory walk.
-	// This catches the case of walking directories recursively.
-	parentDir2 := filepath.Dir(parentDir)
-	iv, cParentDir = be.DirIVCache.Lookup(parentDir2)
-	if iv != nil {
-		parentDirBase := filepath.Base(parentDir)
-		cBaseName := be.encryptAndHashName(parentDirBase, iv)
-		wd = filepath.Join(wd, cParentDir, cBaseName)
-		cipherPath = filepath.Join(cParentDir, cBaseName)
-		skip := len(strings.Split(cipherPath, "/"))
-		plainNames = plainNames[skip:]
-	}
-	// Walk the directory tree starting at "wd"
 	for _, plainName := range plainNames {
-		iv, err = ReadDirIV(wd)
-		if err != nil {
-			return "", err
+		iv, _ := be.DirIVCache.Lookup(plainWD)
+		if iv == nil {
+			iv, err = ReadDirIV(filepath.Join(rootDir, cipherWD))
+			if err != nil {
+				return "", err
+			}
+			be.DirIVCache.Store(plainWD, iv, cipherWD)
 		}
-		encryptedName := be.encryptAndHashName(plainName, iv)
-		cipherPath = filepath.Join(cipherPath, encryptedName)
-		wd = filepath.Join(wd, encryptedName)
+		cipherName := be.encryptAndHashName(plainName, iv)
+		cipherWD = filepath.Join(cipherWD, cipherName)
+		plainWD = filepath.Join(plainWD, plainName)
 	}
-	// Cache the final DirIV
-	cParentDir = filepath.Dir(cipherPath)
-	be.DirIVCache.Store(parentDir, iv, cParentDir)
-	return cipherPath, nil
+	return cipherWD, nil
 }
 
 // Dir is like filepath.Dir but returns "" instead of ".".
diff --git a/internal/nametransform/dirivcache/dirivcache.go b/internal/nametransform/dirivcache/dirivcache.go
index 890ebac..2708d89 100644
--- a/internal/nametransform/dirivcache/dirivcache.go
+++ b/internal/nametransform/dirivcache/dirivcache.go
@@ -5,59 +5,84 @@ import (
 	"time"
 )
 
-// Single-entry DirIV cache. Stores the directory IV and the encrypted
-// path.
+const (
+	maxEntries = 100
+	expireTime = 1 * time.Second
+)
+
+type cacheEntry struct {
+	// DirIV of the directory.
+	iv []byte
+	// Relative ciphertext path of the directory.
+	cDir string
+}
+
+// DirIVCache stores up to "maxEntries" directory IVs.
 type DirIVCache struct {
-	// Directory the DirIV belongs to
-	dir string
-	// Time the entry expires.
+	// data in the cache, indexed by relative plaintext path
+	// of the directory.
+	data map[string]cacheEntry
+
+	// The DirIV of the root directory gets special treatment because it
+	// cannot change (the root directory cannot be renamed or deleted).
+	// It is unaffected by the expiry timer and cache clears.
+	rootDirIV []byte
+
+	// expiry is the time when the whole cache expires.
 	// The cached entry my become out-of-date if the ciphertext directory is
 	// modifed behind the back of gocryptfs. Having an expiry time limits the
 	// inconstency to one second, like attr_timeout does for the kernel
 	// getattr cache.
 	expiry time.Time
 
-	// The DirIV
-	iv []byte
-	// Ecrypted version of "dir"
-	cDir string
-
-	// Invalidated?
-	cleared bool
 	sync.RWMutex
 }
 
-// lookup - fetch entry for "dir" from the cache
+// Lookup - fetch entry for "dir" from the cache
 func (c *DirIVCache) Lookup(dir string) ([]byte, string) {
 	c.RLock()
 	defer c.RUnlock()
-	if c.cleared || c.dir != dir {
+	if dir == "" {
+		return c.rootDirIV, ""
+	}
+	if c.data == nil {
 		return nil, ""
 	}
 	if time.Since(c.expiry) > 0 {
-		c.cleared = true
+		c.data = nil
 		return nil, ""
 	}
-	return c.iv, c.cDir
+	v := c.data[dir]
+	return v.iv, v.cDir
 }
 
-// store - write entry for "dir" into the cache
+// Store - write entry for "dir" into the cache
 func (c *DirIVCache) Store(dir string, iv []byte, cDir string) {
 	c.Lock()
 	defer c.Unlock()
-	c.cleared = false
-	c.iv = iv
-	c.dir = dir
-	c.cDir = cDir
-	// Set expiry time one second into the future
-	c.expiry = time.Now().Add(1 * time.Second)
+	if dir == "" {
+		c.rootDirIV = iv
+	}
+	if c.data == nil {
+		c.data = make(map[string]cacheEntry, maxEntries)
+		// Set expiry time one second into the future
+		c.expiry = time.Now().Add(expireTime)
+	}
+	// Delete a random entry from the map if reached maxEntries
+	if len(c.data) >= maxEntries {
+		for k := range c.data {
+			delete(c.data, k)
+			break
+		}
+	}
+	c.data[dir] = cacheEntry{iv, cDir}
 }
 
 // Clear ... clear the cache.
-// Exported because it is called from fusefrontend when directories are
-// renamed or deleted.
+// Called from fusefrontend when directories are renamed or deleted.
 func (c *DirIVCache) Clear() {
 	c.Lock()
 	defer c.Unlock()
-	c.cleared = true
+	// Will be re-initialized in the next Store()
+	c.data = nil
 }
-- 
cgit v1.2.3