v2api: implement Create

author: Jakob Unterwurzacher 2020-06-21 13:25:12 +0200
committer: Jakob Unterwurzacher 2020-06-21 13:25:12 +0200
commit: f6ded09e36a679695354f4b9bc74242ef399be09 (patch)
tree: 4ed4c3986682a3b342152e3693dee8ba707dc276 /internal
parent: 74a4accf0cc1fd3265abd8fa53b0721cd72c2158 (diff)
6 files changed, 920 insertions, 0 deletions
diff --git a/internal/fusefrontend/file2.go b/internal/fusefrontend/file2.go
new file mode 100644
index 0000000..2882732
--- /dev/null
+++ b/internal/fusefrontend/file2.go
@@ -0,0 +1,476 @@
+package fusefrontend
+
+// FUSE operations on file handles
+
+import (
+	"bytes"
+	"encoding/hex"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/hanwen/go-fuse/v2/fuse"
+	"github.com/hanwen/go-fuse/v2/fuse/nodefs"
+
+	"github.com/rfjakob/gocryptfs/internal/contentenc"
+	"github.com/rfjakob/gocryptfs/internal/inomap"
+	"github.com/rfjakob/gocryptfs/internal/openfiletable"
+	"github.com/rfjakob/gocryptfs/internal/serialize_reads"
+	"github.com/rfjakob/gocryptfs/internal/stupidgcm"
+	"github.com/rfjakob/gocryptfs/internal/syscallcompat"
+	"github.com/rfjakob/gocryptfs/internal/tlog"
+)
+
+var _ nodefs.File = &File{} // Verify that interface is implemented.
+
+// File - based on loopbackFile in go-fuse/fuse/nodefs/files.go
+type File2 struct {
+	fd *os.File
+	// Has Release() already been called on this file? This also means that the
+	// wlock entry has been freed, so let's not crash trying to access it.
+	// Due to concurrency, Release can overtake other operations. These will
+	// return EBADF in that case.
+	released bool
+	// fdLock prevents the fd to be closed while we are in the middle of
+	// an operation.
+	// Every FUSE entrypoint should RLock(). The only user of Lock() is
+	// Release(), which closes the fd and sets "released" to true.
+	fdLock sync.RWMutex
+	// Content encryption helper
+	contentEnc *contentenc.ContentEnc
+	// Device and inode number uniquely identify the backing file
+	qIno inomap.QIno
+	// Entry in the open file table
+	fileTableEntry *openfiletable.Entry
+	// Store where the last byte was written
+	lastWrittenOffset int64
+	// The opCount is used to judge whether "lastWrittenOffset" is still
+	// guaranteed to be correct.
+	lastOpCount uint64
+	// Parent filesystem
+	rootNode *RootNode
+	// We embed a nodefs.NewDefaultFile() that returns ENOSYS for every operation we
+	// have not implemented. This prevents build breakage when the go-fuse library
+	// adds new methods to the nodefs.File interface.
+	nodefs.File
+}
+
+// NewFile returns a new go-fuse File instance.
+func NewFile2(fd *os.File, rn *RootNode, st *syscall.Stat_t) *File2 {
+	qi := inomap.QInoFromStat(st)
+	e := openfiletable.Register(qi)
+
+	return &File2{
+		fd:             fd,
+		contentEnc:     rn.contentEnc,
+		qIno:           qi,
+		fileTableEntry: e,
+		rootNode:       rn,
+		File:           nodefs.NewDefaultFile(),
+	}
+}
+
+// intFd - return the backing file descriptor as an integer.
+func (f *File2) intFd() int {
+	return int(f.fd.Fd())
+}
+
+// readFileID loads the file header from disk and extracts the file ID.
+// Returns io.EOF if the file is empty.
+func (f *File2) readFileID() ([]byte, error) {
+	// We read +1 byte to determine if the file has actual content
+	// and not only the header. A header-only file will be considered empty.
+	// This makes File ID poisoning more difficult.
+	readLen := contentenc.HeaderLen + 1
+	buf := make([]byte, readLen)
+	n, err := f.fd.ReadAt(buf, 0)
+	if err != nil {
+		if err == io.EOF && n != 0 {
+			tlog.Warn.Printf("readFileID %d: incomplete file, got %d instead of %d bytes",
+				f.qIno.Ino, n, readLen)
+			f.rootNode.reportMitigatedCorruption(fmt.Sprint(f.qIno.Ino))
+		}
+		return nil, err
+	}
+	buf = buf[:contentenc.HeaderLen]
+	h, err := contentenc.ParseHeader(buf)
+	if err != nil {
+		return nil, err
+	}
+	return h.ID, nil
+}
+
+// createHeader creates a new random header and writes it to disk.
+// Returns the new file ID.
+// The caller must hold fileIDLock.Lock().
+func (f *File2) createHeader() (fileID []byte, err error) {
+	h := contentenc.RandomHeader()
+	buf := h.Pack()
+	// Prevent partially written (=corrupt) header by preallocating the space beforehand
+	if !f.rootNode.args.NoPrealloc {
+		err = syscallcompat.EnospcPrealloc(f.intFd(), 0, contentenc.HeaderLen)
+		if err != nil {
+			if !syscallcompat.IsENOSPC(err) {
+				tlog.Warn.Printf("ino%d: createHeader: prealloc failed: %s\n", f.qIno.Ino, err.Error())
+			}
+			return nil, err
+		}
+	}
+	// Actually write header
+	_, err = f.fd.WriteAt(buf, 0)
+	if err != nil {
+		return nil, err
+	}
+	return h.ID, err
+}
+
+// doRead - read "length" plaintext bytes from plaintext offset "off" and append
+// to "dst".
+// Arguments "length" and "off" do not have to be block-aligned.
+//
+// doRead reads the corresponding ciphertext blocks from disk, decrypts them and
+// returns the requested part of the plaintext.
+//
+// Called by Read() for normal reading,
+// by Write() and Truncate() via doWrite() for Read-Modify-Write.
+func (f *File2) doRead(dst []byte, off uint64, length uint64) ([]byte, fuse.Status) {
+	// Get the file ID, either from the open file table, or from disk.
+	var fileID []byte
+	f.fileTableEntry.IDLock.Lock()
+	if f.fileTableEntry.ID != nil {
+		// Use the cached value in the file table
+		fileID = f.fileTableEntry.ID
+	} else {
+		// Not cached, we have to read it from disk.
+		var err error
+		fileID, err = f.readFileID()
+		if err != nil {
+			f.fileTableEntry.IDLock.Unlock()
+			if err == io.EOF {
+				// Empty file
+				return nil, fuse.OK
+			}
+			buf := make([]byte, 100)
+			n, _ := f.fd.ReadAt(buf, 0)
+			buf = buf[:n]
+			hexdump := hex.EncodeToString(buf)
+			tlog.Warn.Printf("doRead %d: corrupt header: %v\nFile hexdump (%d bytes): %s",
+				f.qIno.Ino, err, n, hexdump)
+			return nil, fuse.EIO
+		}
+		// Save into the file table
+		f.fileTableEntry.ID = fileID
+	}
+	f.fileTableEntry.IDLock.Unlock()
+	if fileID == nil {
+		log.Panicf("fileID=%v", fileID)
+	}
+	// Read the backing ciphertext in one go
+	blocks := f.contentEnc.ExplodePlainRange(off, length)
+	alignedOffset, alignedLength := blocks[0].JointCiphertextRange(blocks)
+	skip := blocks[0].Skip
+	tlog.Debug.Printf("doRead: off=%d len=%d -> off=%d len=%d skip=%d\n",
+		off, length, alignedOffset, alignedLength, skip)
+
+	ciphertext := f.rootNode.contentEnc.CReqPool.Get()
+	ciphertext = ciphertext[:int(alignedLength)]
+	n, err := f.fd.ReadAt(ciphertext, int64(alignedOffset))
+	if err != nil && err != io.EOF {
+		tlog.Warn.Printf("read: ReadAt: %s", err.Error())
+		return nil, fuse.ToStatus(err)
+	}
+	// The ReadAt came back empty. We can skip all the decryption and return early.
+	if n == 0 {
+		f.rootNode.contentEnc.CReqPool.Put(ciphertext)
+		return dst, fuse.OK
+	}
+	// Truncate ciphertext buffer down to actually read bytes
+	ciphertext = ciphertext[0:n]
+
+	firstBlockNo := blocks[0].BlockNo
+	tlog.Debug.Printf("ReadAt offset=%d bytes (%d blocks), want=%d, got=%d", alignedOffset, firstBlockNo, alignedLength, n)
+
+	// Decrypt it
+	plaintext, err := f.contentEnc.DecryptBlocks(ciphertext, firstBlockNo, fileID)
+	f.rootNode.contentEnc.CReqPool.Put(ciphertext)
+	if err != nil {
+		if f.rootNode.args.ForceDecode && err == stupidgcm.ErrAuth {
+			// We do not have the information which block was corrupt here anymore,
+			// but DecryptBlocks() has already logged it anyway.
+			tlog.Warn.Printf("doRead %d: off=%d len=%d: returning corrupt data due to forcedecode",
+				f.qIno.Ino, off, length)
+		} else {
+			curruptBlockNo := firstBlockNo + f.contentEnc.PlainOffToBlockNo(uint64(len(plaintext)))
+			tlog.Warn.Printf("doRead %d: corrupt block #%d: %v", f.qIno.Ino, curruptBlockNo, err)
+			return nil, fuse.EIO
+		}
+	}
+
+	// Crop down to the relevant part
+	var out []byte
+	lenHave := len(plaintext)
+	lenWant := int(skip + length)
+	if lenHave > lenWant {
+		out = plaintext[skip:lenWant]
+	} else if lenHave > int(skip) {
+		out = plaintext[skip:lenHave]
+	}
+	// else: out stays empty, file was smaller than the requested offset
+
+	out = append(dst, out...)
+	f.rootNode.contentEnc.PReqPool.Put(plaintext)
+
+	return out, fuse.OK
+}
+
+// Read - FUSE call
+func (f *File2) Read(buf []byte, off int64) (resultData fuse.ReadResult, code fuse.Status) {
+	if len(buf) > fuse.MAX_KERNEL_WRITE {
+		// This would crash us due to our fixed-size buffer pool
+		tlog.Warn.Printf("Read: rejecting oversized request with EMSGSIZE, len=%d", len(buf))
+		return nil, fuse.Status(syscall.EMSGSIZE)
+	}
+	f.fdLock.RLock()
+	defer f.fdLock.RUnlock()
+
+	f.fileTableEntry.ContentLock.RLock()
+	defer f.fileTableEntry.ContentLock.RUnlock()
+
+	tlog.Debug.Printf("ino%d: FUSE Read: offset=%d length=%d", f.qIno.Ino, off, len(buf))
+	if f.rootNode.args.SerializeReads {
+		serialize_reads.Wait(off, len(buf))
+	}
+	out, status := f.doRead(buf[:0], uint64(off), uint64(len(buf)))
+	if f.rootNode.args.SerializeReads {
+		serialize_reads.Done()
+	}
+	if status != fuse.OK {
+		return nil, status
+	}
+	tlog.Debug.Printf("ino%d: Read: status %v, returning %d bytes", f.qIno.Ino, status, len(out))
+	return fuse.ReadResultData(out), status
+}
+
+// doWrite - encrypt "data" and write it to plaintext offset "off"
+//
+// Arguments do not have to be block-aligned, read-modify-write is
+// performed internally as necessary
+//
+// Called by Write() for normal writing,
+// and by Truncate() to rewrite the last file block.
+//
+// Empty writes do nothing and are allowed.
+func (f *File2) doWrite(data []byte, off int64) (uint32, fuse.Status) {
+	fileWasEmpty := false
+	// Get the file ID, create a new one if it does not exist yet.
+	var fileID []byte
+	// The caller has exclusively locked ContentLock, which blocks all other
+	// readers and writers. No need to take IDLock.
+	if f.fileTableEntry.ID != nil {
+		fileID = f.fileTableEntry.ID
+	} else {
+		// If the file ID is not cached, read it from disk
+		var err error
+		fileID, err = f.readFileID()
+		// Write a new file header if the file is empty
+		if err == io.EOF {
+			fileID, err = f.createHeader()
+			fileWasEmpty = true
+		}
+		if err != nil {
+			return 0, fuse.ToStatus(err)
+		}
+		f.fileTableEntry.ID = fileID
+	}
+	// Handle payload data
+	dataBuf := bytes.NewBuffer(data)
+	blocks := f.contentEnc.ExplodePlainRange(uint64(off), uint64(len(data)))
+	toEncrypt := make([][]byte, len(blocks))
+	for i, b := range blocks {
+		blockData := dataBuf.Next(int(b.Length))
+		// Incomplete block -> Read-Modify-Write
+		if b.IsPartial() {
+			// Read
+			oldData, status := f.doRead(nil, b.BlockPlainOff(), f.contentEnc.PlainBS())
+			if status != fuse.OK {
+				tlog.Warn.Printf("ino%d fh%d: RMW read failed: %s", f.qIno.Ino, f.intFd(), status.String())
+				return 0, status
+			}
+			// Modify
+			blockData = f.contentEnc.MergeBlocks(oldData, blockData, int(b.Skip))
+			tlog.Debug.Printf("len(oldData)=%d len(blockData)=%d", len(oldData), len(blockData))
+		}
+		tlog.Debug.Printf("ino%d: Writing %d bytes to block #%d",
+			f.qIno.Ino, len(blockData), b.BlockNo)
+		// Write into the to-encrypt list
+		toEncrypt[i] = blockData
+	}
+	// Encrypt all blocks
+	ciphertext := f.contentEnc.EncryptBlocks(toEncrypt, blocks[0].BlockNo, f.fileTableEntry.ID)
+	// Preallocate so we cannot run out of space in the middle of the write.
+	// This prevents partially written (=corrupt) blocks.
+	var err error
+	cOff := int64(blocks[0].BlockCipherOff())
+	if !f.rootNode.args.NoPrealloc {
+		err = syscallcompat.EnospcPrealloc(f.intFd(), cOff, int64(len(ciphertext)))
+		if err != nil {
+			if !syscallcompat.IsENOSPC(err) {
+				tlog.Warn.Printf("ino%d fh%d: doWrite: prealloc failed: %v", f.qIno.Ino, f.intFd(), err)
+			}
+			if fileWasEmpty {
+				// Kill the file header again
+				f.fileTableEntry.ID = nil
+				err2 := syscall.Ftruncate(f.intFd(), 0)
+				if err2 != nil {
+					tlog.Warn.Printf("ino%d fh%d: doWrite: rollback failed: %v", f.qIno.Ino, f.intFd(), err2)
+				}
+			}
+			return 0, fuse.ToStatus(err)
+		}
+	}
+	// Write
+	_, err = f.fd.WriteAt(ciphertext, cOff)
+	// Return memory to CReqPool
+	f.rootNode.contentEnc.CReqPool.Put(ciphertext)
+	if err != nil {
+		tlog.Warn.Printf("ino%d fh%d: doWrite: WriteAt off=%d len=%d failed: %v",
+			f.qIno.Ino, f.intFd(), cOff, len(ciphertext), err)
+		return 0, fuse.ToStatus(err)
+	}
+	return uint32(len(data)), fuse.OK
+}
+
+// isConsecutiveWrite returns true if the current write
+// directly (in time and space) follows the last write.
+// This is an optimisation for streaming writes on NFS where a
+// Stat() call is very expensive.
+// The caller must "wlock.lock(f.devIno.ino)" otherwise this check would be racy.
+func (f *File2) isConsecutiveWrite(off int64) bool {
+	opCount := openfiletable.WriteOpCount()
+	return opCount == f.lastOpCount+1 && off == f.lastWrittenOffset+1
+}
+
+// Write - FUSE call
+//
+// If the write creates a hole, pads the file to the next block boundary.
+func (f *File2) Write(data []byte, off int64) (uint32, fuse.Status) {
+	if len(data) > fuse.MAX_KERNEL_WRITE {
+		// This would crash us due to our fixed-size buffer pool
+		tlog.Warn.Printf("Write: rejecting oversized request with EMSGSIZE, len=%d", len(data))
+		return 0, fuse.Status(syscall.EMSGSIZE)
+	}
+	f.fdLock.RLock()
+	defer f.fdLock.RUnlock()
+	if f.released {
+		// The file descriptor has been closed concurrently
+		tlog.Warn.Printf("ino%d fh%d: Write on released file", f.qIno.Ino, f.intFd())
+		return 0, fuse.EBADF
+	}
+	f.fileTableEntry.ContentLock.Lock()
+	defer f.fileTableEntry.ContentLock.Unlock()
+	tlog.Debug.Printf("ino%d: FUSE Write: offset=%d length=%d", f.qIno.Ino, off, len(data))
+	// If the write creates a file hole, we have to zero-pad the last block.
+	// But if the write directly follows an earlier write, it cannot create a
+	// hole, and we can save one Stat() call.
+	if !f.isConsecutiveWrite(off) {
+		status := f.writePadHole(off)
+		if !status.Ok() {
+			return 0, status
+		}
+	}
+	n, status := f.doWrite(data, off)
+	if status.Ok() {
+		f.lastOpCount = openfiletable.WriteOpCount()
+		f.lastWrittenOffset = off + int64(len(data)) - 1
+	}
+	return n, status
+}
+
+// Release - FUSE call, close file
+func (f *File2) Release() {
+	f.fdLock.Lock()
+	if f.released {
+		log.Panicf("ino%d fh%d: double release", f.qIno.Ino, f.intFd())
+	}
+	f.released = true
+	openfiletable.Unregister(f.qIno)
+	f.fd.Close()
+	f.fdLock.Unlock()
+}
+
+// Flush - FUSE call
+func (f *File2) Flush() fuse.Status {
+	f.fdLock.RLock()
+	defer f.fdLock.RUnlock()
+
+	// Since Flush() may be called for each dup'd fd, we don't
+	// want to really close the file, we just want to flush. This
+	// is achieved by closing a dup'd fd.
+	newFd, err := syscall.Dup(f.intFd())
+
+	if err != nil {
+		return fuse.ToStatus(err)
+	}
+	err = syscall.Close(newFd)
+	return fuse.ToStatus(err)
+}
+
+// Fsync FUSE call
+func (f *File2) Fsync(flags int) (code fuse.Status) {
+	f.fdLock.RLock()
+	defer f.fdLock.RUnlock()
+
+	return fuse.ToStatus(syscall.Fsync(f.intFd()))
+}
+
+// Chmod FUSE call
+func (f *File2) Chmod(mode uint32) fuse.Status {
+	f.fdLock.RLock()
+	defer f.fdLock.RUnlock()
+
+	// os.File.Chmod goes through the "syscallMode" translation function that messes
+	// up the suid and sgid bits. So use syscall.Fchmod directly.
+	err := syscall.Fchmod(f.intFd(), mode)
+	return fuse.ToStatus(err)
+}
+
+// Chown FUSE call
+func (f *File2) Chown(uid uint32, gid uint32) fuse.Status {
+	f.fdLock.RLock()
+	defer f.fdLock.RUnlock()
+
+	return fuse.ToStatus(f.fd.Chown(int(uid), int(gid)))
+}
+
+// GetAttr FUSE call (like stat)
+func (f *File2) GetAttr(a *fuse.Attr) fuse.Status {
+	f.fdLock.RLock()
+	defer f.fdLock.RUnlock()
+
+	tlog.Debug.Printf("file.GetAttr()")
+	st := syscall.Stat_t{}
+	err := syscall.Fstat(f.intFd(), &st)
+	if err != nil {
+		return fuse.ToStatus(err)
+	}
+	f.rootNode.inoMap.TranslateStat(&st)
+	a.FromStat(&st)
+	a.Size = f.contentEnc.CipherSizeToPlainSize(a.Size)
+	if f.rootNode.args.ForceOwner != nil {
+		a.Owner = *f.rootNode.args.ForceOwner
+	}
+
+	return fuse.OK
+}
+
+// Utimens FUSE call
+func (f *File2) Utimens(a *time.Time, m *time.Time) fuse.Status {
+	f.fdLock.RLock()
+	defer f.fdLock.RUnlock()
+	err := syscallcompat.FutimesNano(f.intFd(), a, m)
+	return fuse.ToStatus(err)
+}
diff --git a/internal/fusefrontend/file2_allocate_truncate.go b/internal/fusefrontend/file2_allocate_truncate.go
new file mode 100644
index 0000000..f799a3e
--- /dev/null
+++ b/internal/fusefrontend/file2_allocate_truncate.go
@@ -0,0 +1,217 @@
+package fusefrontend
+
+// FUSE operations Truncate and Allocate on file handles
+// i.e. ftruncate and fallocate
+
+import (
+	"log"
+	"syscall"
+
+	"github.com/hanwen/go-fuse/v2/fuse"
+
+	"github.com/rfjakob/gocryptfs/internal/syscallcompat"
+	"github.com/rfjakob/gocryptfs/internal/tlog"
+)
+
+// Allocate - FUSE call for fallocate(2)
+//
+// mode=FALLOC_FL_KEEP_SIZE is implemented directly.
+//
+// mode=FALLOC_DEFAULT is implemented as a two-step process:
+//
+//   (1) Allocate the space using FALLOC_FL_KEEP_SIZE
+//   (2) Set the file size using ftruncate (via truncateGrowFile)
+//
+// This allows us to reuse the file grow mechanics from Truncate as they are
+// complicated and hard to get right.
+//
+// Other modes (hole punching, zeroing) are not supported.
+func (f *File2) Allocate(off uint64, sz uint64, mode uint32) fuse.Status {
+	if mode != FALLOC_DEFAULT && mode != FALLOC_FL_KEEP_SIZE {
+		f := func() {
+			tlog.Info.Printf("fallocate: only mode 0 (default) and 1 (keep size) are supported")
+		}
+		allocateWarnOnce.Do(f)
+		return fuse.Status(syscall.EOPNOTSUPP)
+	}
+
+	f.fdLock.RLock()
+	defer f.fdLock.RUnlock()
+	if f.released {
+		return fuse.EBADF
+	}
+	f.fileTableEntry.ContentLock.Lock()
+	defer f.fileTableEntry.ContentLock.Unlock()
+
+	blocks := f.contentEnc.ExplodePlainRange(off, sz)
+	firstBlock := blocks[0]
+	lastBlock := blocks[len(blocks)-1]
+
+	// Step (1): Allocate the space the user wants using FALLOC_FL_KEEP_SIZE.
+	// This will fill file holes and/or allocate additional space past the end of
+	// the file.
+	cipherOff := firstBlock.BlockCipherOff()
+	cipherSz := lastBlock.BlockCipherOff() - cipherOff +
+		f.contentEnc.BlockOverhead() + lastBlock.Skip + lastBlock.Length
+	err := syscallcompat.Fallocate(f.intFd(), FALLOC_FL_KEEP_SIZE, int64(cipherOff), int64(cipherSz))
+	tlog.Debug.Printf("Allocate off=%d sz=%d mode=%x cipherOff=%d cipherSz=%d\n",
+		off, sz, mode, cipherOff, cipherSz)
+	if err != nil {
+		return fuse.ToStatus(err)
+	}
+	if mode == FALLOC_FL_KEEP_SIZE {
+		// The user did not want to change the apparent size. We are done.
+		return fuse.OK
+	}
+	// Step (2): Grow the apparent file size
+	// We need the old file size to determine if we are growing the file at all.
+	newPlainSz := off + sz
+	oldPlainSz, err := f.statPlainSize()
+	if err != nil {
+		return fuse.ToStatus(err)
+	}
+	if newPlainSz <= oldPlainSz {
+		// The new size is smaller (or equal). Fallocate with mode = 0 never
+		// truncates a file, so we are done.
+		return fuse.OK
+	}
+	// The file grows. The space has already been allocated in (1), so what is
+	// left to do is to pad the first and last block and call truncate.
+	// truncateGrowFile does just that.
+	return f.truncateGrowFile(oldPlainSz, newPlainSz)
+}
+
+// Truncate - FUSE call
+func (f *File2) Truncate(newSize uint64) fuse.Status {
+	f.fdLock.RLock()
+	defer f.fdLock.RUnlock()
+	if f.released {
+		// The file descriptor has been closed concurrently.
+		tlog.Warn.Printf("ino%d fh%d: Truncate on released file", f.qIno.Ino, f.intFd())
+		return fuse.EBADF
+	}
+	f.fileTableEntry.ContentLock.Lock()
+	defer f.fileTableEntry.ContentLock.Unlock()
+	var err error
+	// Common case first: Truncate to zero
+	if newSize == 0 {
+		err = syscall.Ftruncate(int(f.fd.Fd()), 0)
+		if err != nil {
+			tlog.Warn.Printf("ino%d fh%d: Ftruncate(fd, 0) returned error: %v", f.qIno.Ino, f.intFd(), err)
+			return fuse.ToStatus(err)
+		}
+		// Truncate to zero kills the file header
+		f.fileTableEntry.ID = nil
+		return fuse.OK
+	}
+	// We need the old file size to determine if we are growing or shrinking
+	// the file
+	oldSize, err := f.statPlainSize()
+	if err != nil {
+		return fuse.ToStatus(err)
+	}
+
+	oldB := float32(oldSize) / float32(f.contentEnc.PlainBS())
+	newB := float32(newSize) / float32(f.contentEnc.PlainBS())
+	tlog.Debug.Printf("ino%d: FUSE Truncate from %.2f to %.2f blocks (%d to %d bytes)", f.qIno.Ino, oldB, newB, oldSize, newSize)
+
+	// File size stays the same - nothing to do
+	if newSize == oldSize {
+		return fuse.OK
+	}
+	// File grows
+	if newSize > oldSize {
+		return f.truncateGrowFile(oldSize, newSize)
+	}
+
+	// File shrinks
+	blockNo := f.contentEnc.PlainOffToBlockNo(newSize)
+	cipherOff := f.contentEnc.BlockNoToCipherOff(blockNo)
+	plainOff := f.contentEnc.BlockNoToPlainOff(blockNo)
+	lastBlockLen := newSize - plainOff
+	var data []byte
+	if lastBlockLen > 0 {
+		var status fuse.Status
+		data, status = f.doRead(nil, plainOff, lastBlockLen)
+		if status != fuse.OK {
+			tlog.Warn.Printf("Truncate: shrink doRead returned error: %v", err)
+			return status
+		}
+	}
+	// Truncate down to the last complete block
+	err = syscall.Ftruncate(int(f.fd.Fd()), int64(cipherOff))
+	if err != nil {
+		tlog.Warn.Printf("Truncate: shrink Ftruncate returned error: %v", err)
+		return fuse.ToStatus(err)
+	}
+	// Append partial block
+	if lastBlockLen > 0 {
+		_, status := f.doWrite(data, int64(plainOff))
+		return status
+	}
+	return fuse.OK
+}
+
+// statPlainSize stats the file and returns the plaintext size
+func (f *File2) statPlainSize() (uint64, error) {
+	fi, err := f.fd.Stat()
+	if err != nil {
+		tlog.Warn.Printf("ino%d fh%d: statPlainSize: %v", f.qIno.Ino, f.intFd(), err)
+		return 0, err
+	}
+	cipherSz := uint64(fi.Size())
+	plainSz := uint64(f.contentEnc.CipherSizeToPlainSize(cipherSz))
+	return plainSz, nil
+}
+
+// truncateGrowFile extends a file using seeking or ftruncate performing RMW on
+// the first and last block as necessary. New blocks in the middle become
+// file holes unless they have been fallocate()'d beforehand.
+func (f *File2) truncateGrowFile(oldPlainSz uint64, newPlainSz uint64) fuse.Status {
+	if newPlainSz <= oldPlainSz {
+		log.Panicf("BUG: newSize=%d <= oldSize=%d", newPlainSz, oldPlainSz)
+	}
+	newEOFOffset := newPlainSz - 1
+	if oldPlainSz > 0 {
+		n1 := f.contentEnc.PlainOffToBlockNo(oldPlainSz - 1)
+		n2 := f.contentEnc.PlainOffToBlockNo(newEOFOffset)
+		// The file is grown within one block, no need to pad anything.
+		// Write a single zero to the last byte and let doWrite figure out the RMW.
+		if n1 == n2 {
+			buf := make([]byte, 1)
+			_, status := f.doWrite(buf, int64(newEOFOffset))
+			return status
+		}
+	}
+	// The truncate creates at least one new block.
+	//
+	// Make sure the old last block is padded to the block boundary. This call
+	// is a no-op if it is already block-aligned.
+	status := f.zeroPad(oldPlainSz)
+	if !status.Ok() {
+		return status
+	}
+	// The new size is block-aligned. In this case we can do everything ourselves
+	// and avoid the call to doWrite.
+	if newPlainSz%f.contentEnc.PlainBS() == 0 {
+		// The file was empty, so it did not have a header. Create one.
+		if oldPlainSz == 0 {
+			id, err := f.createHeader()
+			if err != nil {
+				return fuse.ToStatus(err)
+			}
+			f.fileTableEntry.ID = id
+		}
+		cSz := int64(f.contentEnc.PlainSizeToCipherSize(newPlainSz))
+		err := syscall.Ftruncate(f.intFd(), cSz)
+		if err != nil {
+			tlog.Warn.Printf("Truncate: grow Ftruncate returned error: %v", err)
+		}
+		return fuse.ToStatus(err)
+	}
+	// The new size is NOT aligned, so we need to write a partial block.
+	// Write a single zero to the last byte and let doWrite figure it out.
+	buf := make([]byte, 1)
+	_, status = f.doWrite(buf, int64(newEOFOffset))
+	return status
+}
diff --git a/internal/fusefrontend/file2_holes.go b/internal/fusefrontend/file2_holes.go
new file mode 100644
index 0000000..5e06981
--- /dev/null
+++ b/internal/fusefrontend/file2_holes.go
@@ -0,0 +1,92 @@
+package fusefrontend
+
+// Helper functions for sparse files (files with holes)
+
+import (
+	"runtime"
+	"syscall"
+
+	"github.com/hanwen/go-fuse/v2/fuse"
+
+	"github.com/rfjakob/gocryptfs/internal/tlog"
+)
+
+// Will a write to plaintext offset "targetOff" create a file hole in the
+// ciphertext? If yes, zero-pad the last ciphertext block.
+func (f *File2) writePadHole(targetOff int64) fuse.Status {
+	// Get the current file size.
+	fi, err := f.fd.Stat()
+	if err != nil {
+		tlog.Warn.Printf("checkAndPadHole: Fstat failed: %v", err)
+		return fuse.ToStatus(err)
+	}
+	plainSize := f.contentEnc.CipherSizeToPlainSize(uint64(fi.Size()))
+	// Appending a single byte to the file (equivalent to writing to
+	// offset=plainSize) would write to "nextBlock".
+	nextBlock := f.contentEnc.PlainOffToBlockNo(plainSize)
+	// targetBlock is the block the user wants to write to.
+	targetBlock := f.contentEnc.PlainOffToBlockNo(uint64(targetOff))
+	// The write goes into an existing block or (if the last block was full)
+	// starts a new one directly after the last block. Nothing to do.
+	if targetBlock <= nextBlock {
+		return fuse.OK
+	}
+	// The write goes past the next block. nextBlock has
+	// to be zero-padded to the block boundary and (at least) nextBlock+1
+	// will contain a file hole in the ciphertext.
+	status := f.zeroPad(plainSize)
+	if status != fuse.OK {
+		return status
+	}
+	return fuse.OK
+}
+
+// Zero-pad the file of size plainSize to the next block boundary. This is a no-op
+// if the file is already block-aligned.
+func (f *File2) zeroPad(plainSize uint64) fuse.Status {
+	lastBlockLen := plainSize % f.contentEnc.PlainBS()
+	if lastBlockLen == 0 {
+		// Already block-aligned
+		return fuse.OK
+	}
+	missing := f.contentEnc.PlainBS() - lastBlockLen
+	pad := make([]byte, missing)
+	tlog.Debug.Printf("zeroPad: Writing %d bytes\n", missing)
+	_, status := f.doWrite(pad, int64(plainSize))
+	return status
+}
+
+// SeekData calls the lseek syscall with SEEK_DATA. It returns the offset of the
+// next data bytes, skipping over file holes.
+func (f *File2) SeekData(oldOffset int64) (int64, error) {
+	if runtime.GOOS != "linux" {
+		// Does MacOS support something like this?
+		return 0, syscall.EOPNOTSUPP
+	}
+	const SEEK_DATA = 3
+
+	// Convert plaintext offset to ciphertext offset and round down to the
+	// start of the current block. File holes smaller than a full block will
+	// be ignored.
+	blockNo := f.contentEnc.PlainOffToBlockNo(uint64(oldOffset))
+	oldCipherOff := int64(f.contentEnc.BlockNoToCipherOff(blockNo))
+
+	// Determine the next data offset. If the old offset points to (or beyond)
+	// the end of the file, the Seek syscall fails with syscall.ENXIO.
+	newCipherOff, err := syscall.Seek(f.intFd(), oldCipherOff, SEEK_DATA)
+	if err != nil {
+		return 0, err
+	}
+
+	// Convert ciphertext offset back to plaintext offset. At this point,
+	// newCipherOff should always be >= contentenc.HeaderLen. Round down,
+	// but ensure that the result is never smaller than the initial offset
+	// (to avoid endless loops).
+	blockNo = f.contentEnc.CipherOffToBlockNo(uint64(newCipherOff))
+	newOffset := int64(f.contentEnc.BlockNoToPlainOff(blockNo))
+	if newOffset < oldOffset {
+		newOffset = oldOffset
+	}
+
+	return newOffset, nil
+}
diff --git a/internal/fusefrontend/node.go b/internal/fusefrontend/node.go
index 9074f72..28e606a 100644
--- a/internal/fusefrontend/node.go
+++ b/internal/fusefrontend/node.go
@@ -2,6 +2,7 @@ package fusefrontend
 
 import (
 	"context"
+	"os"
 	"path/filepath"
 	"syscall"
 
@@ -10,7 +11,9 @@ import (
 	"github.com/hanwen/go-fuse/v2/fs"
 	"github.com/hanwen/go-fuse/v2/fuse"
 
+	"github.com/rfjakob/gocryptfs/internal/nametransform"
 	"github.com/rfjakob/gocryptfs/internal/syscallcompat"
+	"github.com/rfjakob/gocryptfs/internal/tlog"
 )
 
 // Node is a file or directory in the filesystem tree
@@ -31,6 +34,9 @@ func (n *Node) rootNode() *RootNode {
 func (n *Node) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fs.Inode, syscall.Errno) {
 	rn := n.rootNode()
 	p := filepath.Join(n.path(), name)
+	if rn.isFiltered(p) {
+		return nil, syscall.EPERM
+	}
 	dirfd, cName, err := rn.openBackingDir(p)
 	if err != nil {
 		return nil, fs.ToErrno(err)
@@ -71,3 +77,68 @@ func (n *Node) Getattr(ctx context.Context, f fs.FileHandle, out *fuse.AttrOut)
 	out.Attr.FromStat(st)
 	return 0
 }
+
+func (n *Node) Create(ctx context.Context, name string, flags uint32, mode uint32, out *fuse.EntryOut) (inode *fs.Inode, fh fs.FileHandle, fuseFlags uint32, errno syscall.Errno) {
+	rn := n.rootNode()
+	path := filepath.Join(n.path(), name)
+	if rn.isFiltered(path) {
+		return nil, nil, 0, syscall.EPERM
+	}
+	dirfd, cName, err := rn.openBackingDir(path)
+	if err != nil {
+		return nil, nil, 0, fs.ToErrno(err)
+	}
+	defer syscall.Close(dirfd)
+
+	fd := -1
+	// Make sure context is nil if we don't want to preserve the owner
+	if !rn.args.PreserveOwner {
+		ctx = nil
+	}
+	newFlags := rn.mangleOpenFlags(flags)
+	// Handle long file name
+	if !rn.args.PlaintextNames && nametransform.IsLongContent(cName) {
+		// Create ".name"
+		err = rn.nameTransform.WriteLongNameAt(dirfd, cName, path)
+		if err != nil {
+			return nil, nil, 0, fs.ToErrno(err)
+		}
+		// Create content
+		fd, err = syscallcompat.OpenatUserCtx(dirfd, cName, newFlags|syscall.O_CREAT|syscall.O_EXCL, mode, ctx)
+		if err != nil {
+			nametransform.DeleteLongNameAt(dirfd, cName)
+		}
+	} else {
+		// Create content, normal (short) file name
+		fd, err = syscallcompat.OpenatUserCtx(dirfd, cName, newFlags|syscall.O_CREAT|syscall.O_EXCL, mode, ctx)
+	}
+	if err != nil {
+		// xfstests generic/488 triggers this
+		if err == syscall.EMFILE {
+			var lim syscall.Rlimit
+			syscall.Getrlimit(syscall.RLIMIT_NOFILE, &lim)
+			tlog.Warn.Printf("Create %q: too many open files. Current \"ulimit -n\": %d", cName, lim.Cur)
+		}
+		return nil, nil, 0, fs.ToErrno(err)
+	}
+
+	// Get device number and inode number into `st`
+	st, err := syscallcompat.Fstatat2(dirfd, cName, unix.AT_SYMLINK_NOFOLLOW)
+	if err != nil {
+		return nil, nil, 0, fs.ToErrno(err)
+	}
+	// Get unique inode number
+	rn.inoMap.TranslateStat(st)
+	out.Attr.FromStat(st)
+	// Create child node
+	id := fs.StableAttr{
+		Mode: uint32(st.Mode),
+		Gen:  1,
+		Ino:  st.Ino,
+	}
+	node := &Node{}
+	ch := n.NewInode(ctx, node, id)
+
+	f := os.NewFile(uintptr(fd), cName)
+	return ch, NewFile2(f, rn, st), 0, 0
+}
diff --git a/internal/fusefrontend/root_node.go b/internal/fusefrontend/root_node.go
index 1116a41..c84ac93 100644
--- a/internal/fusefrontend/root_node.go
+++ b/internal/fusefrontend/root_node.go
@@ -1,11 +1,16 @@
 package fusefrontend
 
 import (
+	"os"
+	"sync/atomic"
+	"syscall"
 	"time"
 
+	"github.com/rfjakob/gocryptfs/internal/configfile"
 	"github.com/rfjakob/gocryptfs/internal/contentenc"
 	"github.com/rfjakob/gocryptfs/internal/inomap"
 	"github.com/rfjakob/gocryptfs/internal/nametransform"
+	"github.com/rfjakob/gocryptfs/internal/syscallcompat"
 	"github.com/rfjakob/gocryptfs/internal/tlog"
 )
 
@@ -47,6 +52,30 @@ func NewRootNode(args Args, c *contentenc.ContentEnc, n nametransform.NameTransf
 	}
 }
 
+// mangleOpenFlags is used by Create() and Open() to convert the open flags the user
+// wants to the flags we internally use to open the backing file.
+// The returned flags always contain O_NOFOLLOW.
+func (rn *RootNode) mangleOpenFlags(flags uint32) (newFlags int) {
+	newFlags = int(flags)
+	// Convert WRONLY to RDWR. We always need read access to do read-modify-write cycles.
+	if (newFlags & syscall.O_ACCMODE) == syscall.O_WRONLY {
+		newFlags = newFlags ^ os.O_WRONLY | os.O_RDWR
+	}
+	// We also cannot open the file in append mode, we need to seek back for RMW
+	newFlags = newFlags &^ os.O_APPEND
+	// O_DIRECT accesses must be aligned in both offset and length. Due to our
+	// crypto header, alignment will be off, even if userspace makes aligned
+	// accesses. Running xfstests generic/013 on ext4 used to trigger lots of
+	// EINVAL errors due to missing alignment. Just fall back to buffered IO.
+	newFlags = newFlags &^ syscallcompat.O_DIRECT
+	// Create and Open are two separate FUSE operations, so O_CREAT should not
+	// be part of the open flags.
+	newFlags = newFlags &^ syscall.O_CREAT
+	// We always want O_NOFOLLOW to be safe against symlink races
+	newFlags |= syscall.O_NOFOLLOW
+	return newFlags
+}
+
 // reportMitigatedCorruption is used to report a corruption that was transparently
 // mitigated and did not return an error to the user. Pass the name of the corrupt
 // item (filename for OpenDir(), xattr name for ListXAttr() etc).
@@ -63,3 +92,23 @@ func (rn *RootNode) reportMitigatedCorruption(item string) {
 		return
 	}
 }
+
+// isFiltered - check if plaintext "path" should be forbidden
+//
+// Prevents name clashes with internal files when file names are not encrypted
+func (rn *RootNode) isFiltered(path string) bool {
+	atomic.StoreUint32(&rn.IsIdle, 0)
+
+	if !rn.args.PlaintextNames {
+		return false
+	}
+	// gocryptfs.conf in the root directory is forbidden
+	if path == configfile.ConfDefaultName {
+		tlog.Info.Printf("The name /%s is reserved when -plaintextnames is used\n",
+			configfile.ConfDefaultName)
+		return true
+	}
+	// Note: gocryptfs.diriv is NOT forbidden because diriv and plaintextnames
+	// are exclusive
+	return false
+}
diff --git a/internal/syscallcompat/sys_linux.go b/internal/syscallcompat/sys_linux.go
index e2b19bb..02064ac 100644
--- a/internal/syscallcompat/sys_linux.go
+++ b/internal/syscallcompat/sys_linux.go
@@ -2,6 +2,7 @@
 package syscallcompat
 
 import (
+	"context"
 	"fmt"
 	"io/ioutil"
 	"runtime"
@@ -88,6 +89,20 @@ func getSupplementaryGroups(pid uint32) (gids []int) {
 	return nil
 }
 
+// OpenatUserCtx is a tries to extract a fuse.Context from the generic ctx and
+// calls OpenatUser.
+func OpenatUserCtx(dirfd int, path string, flags int, mode uint32, ctx context.Context) (fd int, err error) {
+	var ctx2 *fuse.Context
+	if ctx != nil {
+		if caller, ok := fuse.FromContext(ctx); ok {
+			ctx2 = &fuse.Context{
+				Caller: *caller,
+			}
+		}
+	}
+	return OpenatUser(dirfd, path, flags, mode, ctx2)
+}
+
 // OpenatUser runs the Openat syscall in the context of a different user.
 func OpenatUser(dirfd int, path string, flags int, mode uint32, context *fuse.Context) (fd int, err error) {
 	if context != nil {
author	Jakob Unterwurzacher	2020-06-21 13:25:12 +0200
committer	Jakob Unterwurzacher	2020-06-21 13:25:12 +0200
commit	f6ded09e36a679695354f4b9bc74242ef399be09 (patch)
tree	4ed4c3986682a3b342152e3693dee8ba707dc276 /internal
parent	74a4accf0cc1fd3265abd8fa53b0721cd72c2158 (diff)