diff options
| -rw-r--r-- | internal/fusefrontend/file2.go | 476 | ||||
| -rw-r--r-- | internal/fusefrontend/file2_allocate_truncate.go | 217 | ||||
| -rw-r--r-- | internal/fusefrontend/file2_holes.go | 92 | ||||
| -rw-r--r-- | internal/fusefrontend/node.go | 71 | ||||
| -rw-r--r-- | internal/fusefrontend/root_node.go | 49 | ||||
| -rw-r--r-- | internal/syscallcompat/sys_linux.go | 15 | 
6 files changed, 920 insertions, 0 deletions
| diff --git a/internal/fusefrontend/file2.go b/internal/fusefrontend/file2.go new file mode 100644 index 0000000..2882732 --- /dev/null +++ b/internal/fusefrontend/file2.go @@ -0,0 +1,476 @@ +package fusefrontend + +// FUSE operations on file handles + +import ( +	"bytes" +	"encoding/hex" +	"fmt" +	"io" +	"log" +	"os" +	"sync" +	"syscall" +	"time" + +	"github.com/hanwen/go-fuse/v2/fuse" +	"github.com/hanwen/go-fuse/v2/fuse/nodefs" + +	"github.com/rfjakob/gocryptfs/internal/contentenc" +	"github.com/rfjakob/gocryptfs/internal/inomap" +	"github.com/rfjakob/gocryptfs/internal/openfiletable" +	"github.com/rfjakob/gocryptfs/internal/serialize_reads" +	"github.com/rfjakob/gocryptfs/internal/stupidgcm" +	"github.com/rfjakob/gocryptfs/internal/syscallcompat" +	"github.com/rfjakob/gocryptfs/internal/tlog" +) + +var _ nodefs.File = &File{} // Verify that interface is implemented. + +// File - based on loopbackFile in go-fuse/fuse/nodefs/files.go +type File2 struct { +	fd *os.File +	// Has Release() already been called on this file? This also means that the +	// wlock entry has been freed, so let's not crash trying to access it. +	// Due to concurrency, Release can overtake other operations. These will +	// return EBADF in that case. +	released bool +	// fdLock prevents the fd to be closed while we are in the middle of +	// an operation. +	// Every FUSE entrypoint should RLock(). The only user of Lock() is +	// Release(), which closes the fd and sets "released" to true. +	fdLock sync.RWMutex +	// Content encryption helper +	contentEnc *contentenc.ContentEnc +	// Device and inode number uniquely identify the backing file +	qIno inomap.QIno +	// Entry in the open file table +	fileTableEntry *openfiletable.Entry +	// Store where the last byte was written +	lastWrittenOffset int64 +	// The opCount is used to judge whether "lastWrittenOffset" is still +	// guaranteed to be correct. +	lastOpCount uint64 +	// Parent filesystem +	rootNode *RootNode +	// We embed a nodefs.NewDefaultFile() that returns ENOSYS for every operation we +	// have not implemented. This prevents build breakage when the go-fuse library +	// adds new methods to the nodefs.File interface. +	nodefs.File +} + +// NewFile returns a new go-fuse File instance. +func NewFile2(fd *os.File, rn *RootNode, st *syscall.Stat_t) *File2 { +	qi := inomap.QInoFromStat(st) +	e := openfiletable.Register(qi) + +	return &File2{ +		fd:             fd, +		contentEnc:     rn.contentEnc, +		qIno:           qi, +		fileTableEntry: e, +		rootNode:       rn, +		File:           nodefs.NewDefaultFile(), +	} +} + +// intFd - return the backing file descriptor as an integer. +func (f *File2) intFd() int { +	return int(f.fd.Fd()) +} + +// readFileID loads the file header from disk and extracts the file ID. +// Returns io.EOF if the file is empty. +func (f *File2) readFileID() ([]byte, error) { +	// We read +1 byte to determine if the file has actual content +	// and not only the header. A header-only file will be considered empty. +	// This makes File ID poisoning more difficult. +	readLen := contentenc.HeaderLen + 1 +	buf := make([]byte, readLen) +	n, err := f.fd.ReadAt(buf, 0) +	if err != nil { +		if err == io.EOF && n != 0 { +			tlog.Warn.Printf("readFileID %d: incomplete file, got %d instead of %d bytes", +				f.qIno.Ino, n, readLen) +			f.rootNode.reportMitigatedCorruption(fmt.Sprint(f.qIno.Ino)) +		} +		return nil, err +	} +	buf = buf[:contentenc.HeaderLen] +	h, err := contentenc.ParseHeader(buf) +	if err != nil { +		return nil, err +	} +	return h.ID, nil +} + +// createHeader creates a new random header and writes it to disk. +// Returns the new file ID. +// The caller must hold fileIDLock.Lock(). +func (f *File2) createHeader() (fileID []byte, err error) { +	h := contentenc.RandomHeader() +	buf := h.Pack() +	// Prevent partially written (=corrupt) header by preallocating the space beforehand +	if !f.rootNode.args.NoPrealloc { +		err = syscallcompat.EnospcPrealloc(f.intFd(), 0, contentenc.HeaderLen) +		if err != nil { +			if !syscallcompat.IsENOSPC(err) { +				tlog.Warn.Printf("ino%d: createHeader: prealloc failed: %s\n", f.qIno.Ino, err.Error()) +			} +			return nil, err +		} +	} +	// Actually write header +	_, err = f.fd.WriteAt(buf, 0) +	if err != nil { +		return nil, err +	} +	return h.ID, err +} + +// doRead - read "length" plaintext bytes from plaintext offset "off" and append +// to "dst". +// Arguments "length" and "off" do not have to be block-aligned. +// +// doRead reads the corresponding ciphertext blocks from disk, decrypts them and +// returns the requested part of the plaintext. +// +// Called by Read() for normal reading, +// by Write() and Truncate() via doWrite() for Read-Modify-Write. +func (f *File2) doRead(dst []byte, off uint64, length uint64) ([]byte, fuse.Status) { +	// Get the file ID, either from the open file table, or from disk. +	var fileID []byte +	f.fileTableEntry.IDLock.Lock() +	if f.fileTableEntry.ID != nil { +		// Use the cached value in the file table +		fileID = f.fileTableEntry.ID +	} else { +		// Not cached, we have to read it from disk. +		var err error +		fileID, err = f.readFileID() +		if err != nil { +			f.fileTableEntry.IDLock.Unlock() +			if err == io.EOF { +				// Empty file +				return nil, fuse.OK +			} +			buf := make([]byte, 100) +			n, _ := f.fd.ReadAt(buf, 0) +			buf = buf[:n] +			hexdump := hex.EncodeToString(buf) +			tlog.Warn.Printf("doRead %d: corrupt header: %v\nFile hexdump (%d bytes): %s", +				f.qIno.Ino, err, n, hexdump) +			return nil, fuse.EIO +		} +		// Save into the file table +		f.fileTableEntry.ID = fileID +	} +	f.fileTableEntry.IDLock.Unlock() +	if fileID == nil { +		log.Panicf("fileID=%v", fileID) +	} +	// Read the backing ciphertext in one go +	blocks := f.contentEnc.ExplodePlainRange(off, length) +	alignedOffset, alignedLength := blocks[0].JointCiphertextRange(blocks) +	skip := blocks[0].Skip +	tlog.Debug.Printf("doRead: off=%d len=%d -> off=%d len=%d skip=%d\n", +		off, length, alignedOffset, alignedLength, skip) + +	ciphertext := f.rootNode.contentEnc.CReqPool.Get() +	ciphertext = ciphertext[:int(alignedLength)] +	n, err := f.fd.ReadAt(ciphertext, int64(alignedOffset)) +	if err != nil && err != io.EOF { +		tlog.Warn.Printf("read: ReadAt: %s", err.Error()) +		return nil, fuse.ToStatus(err) +	} +	// The ReadAt came back empty. We can skip all the decryption and return early. +	if n == 0 { +		f.rootNode.contentEnc.CReqPool.Put(ciphertext) +		return dst, fuse.OK +	} +	// Truncate ciphertext buffer down to actually read bytes +	ciphertext = ciphertext[0:n] + +	firstBlockNo := blocks[0].BlockNo +	tlog.Debug.Printf("ReadAt offset=%d bytes (%d blocks), want=%d, got=%d", alignedOffset, firstBlockNo, alignedLength, n) + +	// Decrypt it +	plaintext, err := f.contentEnc.DecryptBlocks(ciphertext, firstBlockNo, fileID) +	f.rootNode.contentEnc.CReqPool.Put(ciphertext) +	if err != nil { +		if f.rootNode.args.ForceDecode && err == stupidgcm.ErrAuth { +			// We do not have the information which block was corrupt here anymore, +			// but DecryptBlocks() has already logged it anyway. +			tlog.Warn.Printf("doRead %d: off=%d len=%d: returning corrupt data due to forcedecode", +				f.qIno.Ino, off, length) +		} else { +			curruptBlockNo := firstBlockNo + f.contentEnc.PlainOffToBlockNo(uint64(len(plaintext))) +			tlog.Warn.Printf("doRead %d: corrupt block #%d: %v", f.qIno.Ino, curruptBlockNo, err) +			return nil, fuse.EIO +		} +	} + +	// Crop down to the relevant part +	var out []byte +	lenHave := len(plaintext) +	lenWant := int(skip + length) +	if lenHave > lenWant { +		out = plaintext[skip:lenWant] +	} else if lenHave > int(skip) { +		out = plaintext[skip:lenHave] +	} +	// else: out stays empty, file was smaller than the requested offset + +	out = append(dst, out...) +	f.rootNode.contentEnc.PReqPool.Put(plaintext) + +	return out, fuse.OK +} + +// Read - FUSE call +func (f *File2) Read(buf []byte, off int64) (resultData fuse.ReadResult, code fuse.Status) { +	if len(buf) > fuse.MAX_KERNEL_WRITE { +		// This would crash us due to our fixed-size buffer pool +		tlog.Warn.Printf("Read: rejecting oversized request with EMSGSIZE, len=%d", len(buf)) +		return nil, fuse.Status(syscall.EMSGSIZE) +	} +	f.fdLock.RLock() +	defer f.fdLock.RUnlock() + +	f.fileTableEntry.ContentLock.RLock() +	defer f.fileTableEntry.ContentLock.RUnlock() + +	tlog.Debug.Printf("ino%d: FUSE Read: offset=%d length=%d", f.qIno.Ino, off, len(buf)) +	if f.rootNode.args.SerializeReads { +		serialize_reads.Wait(off, len(buf)) +	} +	out, status := f.doRead(buf[:0], uint64(off), uint64(len(buf))) +	if f.rootNode.args.SerializeReads { +		serialize_reads.Done() +	} +	if status != fuse.OK { +		return nil, status +	} +	tlog.Debug.Printf("ino%d: Read: status %v, returning %d bytes", f.qIno.Ino, status, len(out)) +	return fuse.ReadResultData(out), status +} + +// doWrite - encrypt "data" and write it to plaintext offset "off" +// +// Arguments do not have to be block-aligned, read-modify-write is +// performed internally as necessary +// +// Called by Write() for normal writing, +// and by Truncate() to rewrite the last file block. +// +// Empty writes do nothing and are allowed. +func (f *File2) doWrite(data []byte, off int64) (uint32, fuse.Status) { +	fileWasEmpty := false +	// Get the file ID, create a new one if it does not exist yet. +	var fileID []byte +	// The caller has exclusively locked ContentLock, which blocks all other +	// readers and writers. No need to take IDLock. +	if f.fileTableEntry.ID != nil { +		fileID = f.fileTableEntry.ID +	} else { +		// If the file ID is not cached, read it from disk +		var err error +		fileID, err = f.readFileID() +		// Write a new file header if the file is empty +		if err == io.EOF { +			fileID, err = f.createHeader() +			fileWasEmpty = true +		} +		if err != nil { +			return 0, fuse.ToStatus(err) +		} +		f.fileTableEntry.ID = fileID +	} +	// Handle payload data +	dataBuf := bytes.NewBuffer(data) +	blocks := f.contentEnc.ExplodePlainRange(uint64(off), uint64(len(data))) +	toEncrypt := make([][]byte, len(blocks)) +	for i, b := range blocks { +		blockData := dataBuf.Next(int(b.Length)) +		// Incomplete block -> Read-Modify-Write +		if b.IsPartial() { +			// Read +			oldData, status := f.doRead(nil, b.BlockPlainOff(), f.contentEnc.PlainBS()) +			if status != fuse.OK { +				tlog.Warn.Printf("ino%d fh%d: RMW read failed: %s", f.qIno.Ino, f.intFd(), status.String()) +				return 0, status +			} +			// Modify +			blockData = f.contentEnc.MergeBlocks(oldData, blockData, int(b.Skip)) +			tlog.Debug.Printf("len(oldData)=%d len(blockData)=%d", len(oldData), len(blockData)) +		} +		tlog.Debug.Printf("ino%d: Writing %d bytes to block #%d", +			f.qIno.Ino, len(blockData), b.BlockNo) +		// Write into the to-encrypt list +		toEncrypt[i] = blockData +	} +	// Encrypt all blocks +	ciphertext := f.contentEnc.EncryptBlocks(toEncrypt, blocks[0].BlockNo, f.fileTableEntry.ID) +	// Preallocate so we cannot run out of space in the middle of the write. +	// This prevents partially written (=corrupt) blocks. +	var err error +	cOff := int64(blocks[0].BlockCipherOff()) +	if !f.rootNode.args.NoPrealloc { +		err = syscallcompat.EnospcPrealloc(f.intFd(), cOff, int64(len(ciphertext))) +		if err != nil { +			if !syscallcompat.IsENOSPC(err) { +				tlog.Warn.Printf("ino%d fh%d: doWrite: prealloc failed: %v", f.qIno.Ino, f.intFd(), err) +			} +			if fileWasEmpty { +				// Kill the file header again +				f.fileTableEntry.ID = nil +				err2 := syscall.Ftruncate(f.intFd(), 0) +				if err2 != nil { +					tlog.Warn.Printf("ino%d fh%d: doWrite: rollback failed: %v", f.qIno.Ino, f.intFd(), err2) +				} +			} +			return 0, fuse.ToStatus(err) +		} +	} +	// Write +	_, err = f.fd.WriteAt(ciphertext, cOff) +	// Return memory to CReqPool +	f.rootNode.contentEnc.CReqPool.Put(ciphertext) +	if err != nil { +		tlog.Warn.Printf("ino%d fh%d: doWrite: WriteAt off=%d len=%d failed: %v", +			f.qIno.Ino, f.intFd(), cOff, len(ciphertext), err) +		return 0, fuse.ToStatus(err) +	} +	return uint32(len(data)), fuse.OK +} + +// isConsecutiveWrite returns true if the current write +// directly (in time and space) follows the last write. +// This is an optimisation for streaming writes on NFS where a +// Stat() call is very expensive. +// The caller must "wlock.lock(f.devIno.ino)" otherwise this check would be racy. +func (f *File2) isConsecutiveWrite(off int64) bool { +	opCount := openfiletable.WriteOpCount() +	return opCount == f.lastOpCount+1 && off == f.lastWrittenOffset+1 +} + +// Write - FUSE call +// +// If the write creates a hole, pads the file to the next block boundary. +func (f *File2) Write(data []byte, off int64) (uint32, fuse.Status) { +	if len(data) > fuse.MAX_KERNEL_WRITE { +		// This would crash us due to our fixed-size buffer pool +		tlog.Warn.Printf("Write: rejecting oversized request with EMSGSIZE, len=%d", len(data)) +		return 0, fuse.Status(syscall.EMSGSIZE) +	} +	f.fdLock.RLock() +	defer f.fdLock.RUnlock() +	if f.released { +		// The file descriptor has been closed concurrently +		tlog.Warn.Printf("ino%d fh%d: Write on released file", f.qIno.Ino, f.intFd()) +		return 0, fuse.EBADF +	} +	f.fileTableEntry.ContentLock.Lock() +	defer f.fileTableEntry.ContentLock.Unlock() +	tlog.Debug.Printf("ino%d: FUSE Write: offset=%d length=%d", f.qIno.Ino, off, len(data)) +	// If the write creates a file hole, we have to zero-pad the last block. +	// But if the write directly follows an earlier write, it cannot create a +	// hole, and we can save one Stat() call. +	if !f.isConsecutiveWrite(off) { +		status := f.writePadHole(off) +		if !status.Ok() { +			return 0, status +		} +	} +	n, status := f.doWrite(data, off) +	if status.Ok() { +		f.lastOpCount = openfiletable.WriteOpCount() +		f.lastWrittenOffset = off + int64(len(data)) - 1 +	} +	return n, status +} + +// Release - FUSE call, close file +func (f *File2) Release() { +	f.fdLock.Lock() +	if f.released { +		log.Panicf("ino%d fh%d: double release", f.qIno.Ino, f.intFd()) +	} +	f.released = true +	openfiletable.Unregister(f.qIno) +	f.fd.Close() +	f.fdLock.Unlock() +} + +// Flush - FUSE call +func (f *File2) Flush() fuse.Status { +	f.fdLock.RLock() +	defer f.fdLock.RUnlock() + +	// Since Flush() may be called for each dup'd fd, we don't +	// want to really close the file, we just want to flush. This +	// is achieved by closing a dup'd fd. +	newFd, err := syscall.Dup(f.intFd()) + +	if err != nil { +		return fuse.ToStatus(err) +	} +	err = syscall.Close(newFd) +	return fuse.ToStatus(err) +} + +// Fsync FUSE call +func (f *File2) Fsync(flags int) (code fuse.Status) { +	f.fdLock.RLock() +	defer f.fdLock.RUnlock() + +	return fuse.ToStatus(syscall.Fsync(f.intFd())) +} + +// Chmod FUSE call +func (f *File2) Chmod(mode uint32) fuse.Status { +	f.fdLock.RLock() +	defer f.fdLock.RUnlock() + +	// os.File.Chmod goes through the "syscallMode" translation function that messes +	// up the suid and sgid bits. So use syscall.Fchmod directly. +	err := syscall.Fchmod(f.intFd(), mode) +	return fuse.ToStatus(err) +} + +// Chown FUSE call +func (f *File2) Chown(uid uint32, gid uint32) fuse.Status { +	f.fdLock.RLock() +	defer f.fdLock.RUnlock() + +	return fuse.ToStatus(f.fd.Chown(int(uid), int(gid))) +} + +// GetAttr FUSE call (like stat) +func (f *File2) GetAttr(a *fuse.Attr) fuse.Status { +	f.fdLock.RLock() +	defer f.fdLock.RUnlock() + +	tlog.Debug.Printf("file.GetAttr()") +	st := syscall.Stat_t{} +	err := syscall.Fstat(f.intFd(), &st) +	if err != nil { +		return fuse.ToStatus(err) +	} +	f.rootNode.inoMap.TranslateStat(&st) +	a.FromStat(&st) +	a.Size = f.contentEnc.CipherSizeToPlainSize(a.Size) +	if f.rootNode.args.ForceOwner != nil { +		a.Owner = *f.rootNode.args.ForceOwner +	} + +	return fuse.OK +} + +// Utimens FUSE call +func (f *File2) Utimens(a *time.Time, m *time.Time) fuse.Status { +	f.fdLock.RLock() +	defer f.fdLock.RUnlock() +	err := syscallcompat.FutimesNano(f.intFd(), a, m) +	return fuse.ToStatus(err) +} diff --git a/internal/fusefrontend/file2_allocate_truncate.go b/internal/fusefrontend/file2_allocate_truncate.go new file mode 100644 index 0000000..f799a3e --- /dev/null +++ b/internal/fusefrontend/file2_allocate_truncate.go @@ -0,0 +1,217 @@ +package fusefrontend + +// FUSE operations Truncate and Allocate on file handles +// i.e. ftruncate and fallocate + +import ( +	"log" +	"syscall" + +	"github.com/hanwen/go-fuse/v2/fuse" + +	"github.com/rfjakob/gocryptfs/internal/syscallcompat" +	"github.com/rfjakob/gocryptfs/internal/tlog" +) + +// Allocate - FUSE call for fallocate(2) +// +// mode=FALLOC_FL_KEEP_SIZE is implemented directly. +// +// mode=FALLOC_DEFAULT is implemented as a two-step process: +// +//   (1) Allocate the space using FALLOC_FL_KEEP_SIZE +//   (2) Set the file size using ftruncate (via truncateGrowFile) +// +// This allows us to reuse the file grow mechanics from Truncate as they are +// complicated and hard to get right. +// +// Other modes (hole punching, zeroing) are not supported. +func (f *File2) Allocate(off uint64, sz uint64, mode uint32) fuse.Status { +	if mode != FALLOC_DEFAULT && mode != FALLOC_FL_KEEP_SIZE { +		f := func() { +			tlog.Info.Printf("fallocate: only mode 0 (default) and 1 (keep size) are supported") +		} +		allocateWarnOnce.Do(f) +		return fuse.Status(syscall.EOPNOTSUPP) +	} + +	f.fdLock.RLock() +	defer f.fdLock.RUnlock() +	if f.released { +		return fuse.EBADF +	} +	f.fileTableEntry.ContentLock.Lock() +	defer f.fileTableEntry.ContentLock.Unlock() + +	blocks := f.contentEnc.ExplodePlainRange(off, sz) +	firstBlock := blocks[0] +	lastBlock := blocks[len(blocks)-1] + +	// Step (1): Allocate the space the user wants using FALLOC_FL_KEEP_SIZE. +	// This will fill file holes and/or allocate additional space past the end of +	// the file. +	cipherOff := firstBlock.BlockCipherOff() +	cipherSz := lastBlock.BlockCipherOff() - cipherOff + +		f.contentEnc.BlockOverhead() + lastBlock.Skip + lastBlock.Length +	err := syscallcompat.Fallocate(f.intFd(), FALLOC_FL_KEEP_SIZE, int64(cipherOff), int64(cipherSz)) +	tlog.Debug.Printf("Allocate off=%d sz=%d mode=%x cipherOff=%d cipherSz=%d\n", +		off, sz, mode, cipherOff, cipherSz) +	if err != nil { +		return fuse.ToStatus(err) +	} +	if mode == FALLOC_FL_KEEP_SIZE { +		// The user did not want to change the apparent size. We are done. +		return fuse.OK +	} +	// Step (2): Grow the apparent file size +	// We need the old file size to determine if we are growing the file at all. +	newPlainSz := off + sz +	oldPlainSz, err := f.statPlainSize() +	if err != nil { +		return fuse.ToStatus(err) +	} +	if newPlainSz <= oldPlainSz { +		// The new size is smaller (or equal). Fallocate with mode = 0 never +		// truncates a file, so we are done. +		return fuse.OK +	} +	// The file grows. The space has already been allocated in (1), so what is +	// left to do is to pad the first and last block and call truncate. +	// truncateGrowFile does just that. +	return f.truncateGrowFile(oldPlainSz, newPlainSz) +} + +// Truncate - FUSE call +func (f *File2) Truncate(newSize uint64) fuse.Status { +	f.fdLock.RLock() +	defer f.fdLock.RUnlock() +	if f.released { +		// The file descriptor has been closed concurrently. +		tlog.Warn.Printf("ino%d fh%d: Truncate on released file", f.qIno.Ino, f.intFd()) +		return fuse.EBADF +	} +	f.fileTableEntry.ContentLock.Lock() +	defer f.fileTableEntry.ContentLock.Unlock() +	var err error +	// Common case first: Truncate to zero +	if newSize == 0 { +		err = syscall.Ftruncate(int(f.fd.Fd()), 0) +		if err != nil { +			tlog.Warn.Printf("ino%d fh%d: Ftruncate(fd, 0) returned error: %v", f.qIno.Ino, f.intFd(), err) +			return fuse.ToStatus(err) +		} +		// Truncate to zero kills the file header +		f.fileTableEntry.ID = nil +		return fuse.OK +	} +	// We need the old file size to determine if we are growing or shrinking +	// the file +	oldSize, err := f.statPlainSize() +	if err != nil { +		return fuse.ToStatus(err) +	} + +	oldB := float32(oldSize) / float32(f.contentEnc.PlainBS()) +	newB := float32(newSize) / float32(f.contentEnc.PlainBS()) +	tlog.Debug.Printf("ino%d: FUSE Truncate from %.2f to %.2f blocks (%d to %d bytes)", f.qIno.Ino, oldB, newB, oldSize, newSize) + +	// File size stays the same - nothing to do +	if newSize == oldSize { +		return fuse.OK +	} +	// File grows +	if newSize > oldSize { +		return f.truncateGrowFile(oldSize, newSize) +	} + +	// File shrinks +	blockNo := f.contentEnc.PlainOffToBlockNo(newSize) +	cipherOff := f.contentEnc.BlockNoToCipherOff(blockNo) +	plainOff := f.contentEnc.BlockNoToPlainOff(blockNo) +	lastBlockLen := newSize - plainOff +	var data []byte +	if lastBlockLen > 0 { +		var status fuse.Status +		data, status = f.doRead(nil, plainOff, lastBlockLen) +		if status != fuse.OK { +			tlog.Warn.Printf("Truncate: shrink doRead returned error: %v", err) +			return status +		} +	} +	// Truncate down to the last complete block +	err = syscall.Ftruncate(int(f.fd.Fd()), int64(cipherOff)) +	if err != nil { +		tlog.Warn.Printf("Truncate: shrink Ftruncate returned error: %v", err) +		return fuse.ToStatus(err) +	} +	// Append partial block +	if lastBlockLen > 0 { +		_, status := f.doWrite(data, int64(plainOff)) +		return status +	} +	return fuse.OK +} + +// statPlainSize stats the file and returns the plaintext size +func (f *File2) statPlainSize() (uint64, error) { +	fi, err := f.fd.Stat() +	if err != nil { +		tlog.Warn.Printf("ino%d fh%d: statPlainSize: %v", f.qIno.Ino, f.intFd(), err) +		return 0, err +	} +	cipherSz := uint64(fi.Size()) +	plainSz := uint64(f.contentEnc.CipherSizeToPlainSize(cipherSz)) +	return plainSz, nil +} + +// truncateGrowFile extends a file using seeking or ftruncate performing RMW on +// the first and last block as necessary. New blocks in the middle become +// file holes unless they have been fallocate()'d beforehand. +func (f *File2) truncateGrowFile(oldPlainSz uint64, newPlainSz uint64) fuse.Status { +	if newPlainSz <= oldPlainSz { +		log.Panicf("BUG: newSize=%d <= oldSize=%d", newPlainSz, oldPlainSz) +	} +	newEOFOffset := newPlainSz - 1 +	if oldPlainSz > 0 { +		n1 := f.contentEnc.PlainOffToBlockNo(oldPlainSz - 1) +		n2 := f.contentEnc.PlainOffToBlockNo(newEOFOffset) +		// The file is grown within one block, no need to pad anything. +		// Write a single zero to the last byte and let doWrite figure out the RMW. +		if n1 == n2 { +			buf := make([]byte, 1) +			_, status := f.doWrite(buf, int64(newEOFOffset)) +			return status +		} +	} +	// The truncate creates at least one new block. +	// +	// Make sure the old last block is padded to the block boundary. This call +	// is a no-op if it is already block-aligned. +	status := f.zeroPad(oldPlainSz) +	if !status.Ok() { +		return status +	} +	// The new size is block-aligned. In this case we can do everything ourselves +	// and avoid the call to doWrite. +	if newPlainSz%f.contentEnc.PlainBS() == 0 { +		// The file was empty, so it did not have a header. Create one. +		if oldPlainSz == 0 { +			id, err := f.createHeader() +			if err != nil { +				return fuse.ToStatus(err) +			} +			f.fileTableEntry.ID = id +		} +		cSz := int64(f.contentEnc.PlainSizeToCipherSize(newPlainSz)) +		err := syscall.Ftruncate(f.intFd(), cSz) +		if err != nil { +			tlog.Warn.Printf("Truncate: grow Ftruncate returned error: %v", err) +		} +		return fuse.ToStatus(err) +	} +	// The new size is NOT aligned, so we need to write a partial block. +	// Write a single zero to the last byte and let doWrite figure it out. +	buf := make([]byte, 1) +	_, status = f.doWrite(buf, int64(newEOFOffset)) +	return status +} diff --git a/internal/fusefrontend/file2_holes.go b/internal/fusefrontend/file2_holes.go new file mode 100644 index 0000000..5e06981 --- /dev/null +++ b/internal/fusefrontend/file2_holes.go @@ -0,0 +1,92 @@ +package fusefrontend + +// Helper functions for sparse files (files with holes) + +import ( +	"runtime" +	"syscall" + +	"github.com/hanwen/go-fuse/v2/fuse" + +	"github.com/rfjakob/gocryptfs/internal/tlog" +) + +// Will a write to plaintext offset "targetOff" create a file hole in the +// ciphertext? If yes, zero-pad the last ciphertext block. +func (f *File2) writePadHole(targetOff int64) fuse.Status { +	// Get the current file size. +	fi, err := f.fd.Stat() +	if err != nil { +		tlog.Warn.Printf("checkAndPadHole: Fstat failed: %v", err) +		return fuse.ToStatus(err) +	} +	plainSize := f.contentEnc.CipherSizeToPlainSize(uint64(fi.Size())) +	// Appending a single byte to the file (equivalent to writing to +	// offset=plainSize) would write to "nextBlock". +	nextBlock := f.contentEnc.PlainOffToBlockNo(plainSize) +	// targetBlock is the block the user wants to write to. +	targetBlock := f.contentEnc.PlainOffToBlockNo(uint64(targetOff)) +	// The write goes into an existing block or (if the last block was full) +	// starts a new one directly after the last block. Nothing to do. +	if targetBlock <= nextBlock { +		return fuse.OK +	} +	// The write goes past the next block. nextBlock has +	// to be zero-padded to the block boundary and (at least) nextBlock+1 +	// will contain a file hole in the ciphertext. +	status := f.zeroPad(plainSize) +	if status != fuse.OK { +		return status +	} +	return fuse.OK +} + +// Zero-pad the file of size plainSize to the next block boundary. This is a no-op +// if the file is already block-aligned. +func (f *File2) zeroPad(plainSize uint64) fuse.Status { +	lastBlockLen := plainSize % f.contentEnc.PlainBS() +	if lastBlockLen == 0 { +		// Already block-aligned +		return fuse.OK +	} +	missing := f.contentEnc.PlainBS() - lastBlockLen +	pad := make([]byte, missing) +	tlog.Debug.Printf("zeroPad: Writing %d bytes\n", missing) +	_, status := f.doWrite(pad, int64(plainSize)) +	return status +} + +// SeekData calls the lseek syscall with SEEK_DATA. It returns the offset of the +// next data bytes, skipping over file holes. +func (f *File2) SeekData(oldOffset int64) (int64, error) { +	if runtime.GOOS != "linux" { +		// Does MacOS support something like this? +		return 0, syscall.EOPNOTSUPP +	} +	const SEEK_DATA = 3 + +	// Convert plaintext offset to ciphertext offset and round down to the +	// start of the current block. File holes smaller than a full block will +	// be ignored. +	blockNo := f.contentEnc.PlainOffToBlockNo(uint64(oldOffset)) +	oldCipherOff := int64(f.contentEnc.BlockNoToCipherOff(blockNo)) + +	// Determine the next data offset. If the old offset points to (or beyond) +	// the end of the file, the Seek syscall fails with syscall.ENXIO. +	newCipherOff, err := syscall.Seek(f.intFd(), oldCipherOff, SEEK_DATA) +	if err != nil { +		return 0, err +	} + +	// Convert ciphertext offset back to plaintext offset. At this point, +	// newCipherOff should always be >= contentenc.HeaderLen. Round down, +	// but ensure that the result is never smaller than the initial offset +	// (to avoid endless loops). +	blockNo = f.contentEnc.CipherOffToBlockNo(uint64(newCipherOff)) +	newOffset := int64(f.contentEnc.BlockNoToPlainOff(blockNo)) +	if newOffset < oldOffset { +		newOffset = oldOffset +	} + +	return newOffset, nil +} diff --git a/internal/fusefrontend/node.go b/internal/fusefrontend/node.go index 9074f72..28e606a 100644 --- a/internal/fusefrontend/node.go +++ b/internal/fusefrontend/node.go @@ -2,6 +2,7 @@ package fusefrontend  import (  	"context" +	"os"  	"path/filepath"  	"syscall" @@ -10,7 +11,9 @@ import (  	"github.com/hanwen/go-fuse/v2/fs"  	"github.com/hanwen/go-fuse/v2/fuse" +	"github.com/rfjakob/gocryptfs/internal/nametransform"  	"github.com/rfjakob/gocryptfs/internal/syscallcompat" +	"github.com/rfjakob/gocryptfs/internal/tlog"  )  // Node is a file or directory in the filesystem tree @@ -31,6 +34,9 @@ func (n *Node) rootNode() *RootNode {  func (n *Node) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fs.Inode, syscall.Errno) {  	rn := n.rootNode()  	p := filepath.Join(n.path(), name) +	if rn.isFiltered(p) { +		return nil, syscall.EPERM +	}  	dirfd, cName, err := rn.openBackingDir(p)  	if err != nil {  		return nil, fs.ToErrno(err) @@ -71,3 +77,68 @@ func (n *Node) Getattr(ctx context.Context, f fs.FileHandle, out *fuse.AttrOut)  	out.Attr.FromStat(st)  	return 0  } + +func (n *Node) Create(ctx context.Context, name string, flags uint32, mode uint32, out *fuse.EntryOut) (inode *fs.Inode, fh fs.FileHandle, fuseFlags uint32, errno syscall.Errno) { +	rn := n.rootNode() +	path := filepath.Join(n.path(), name) +	if rn.isFiltered(path) { +		return nil, nil, 0, syscall.EPERM +	} +	dirfd, cName, err := rn.openBackingDir(path) +	if err != nil { +		return nil, nil, 0, fs.ToErrno(err) +	} +	defer syscall.Close(dirfd) + +	fd := -1 +	// Make sure context is nil if we don't want to preserve the owner +	if !rn.args.PreserveOwner { +		ctx = nil +	} +	newFlags := rn.mangleOpenFlags(flags) +	// Handle long file name +	if !rn.args.PlaintextNames && nametransform.IsLongContent(cName) { +		// Create ".name" +		err = rn.nameTransform.WriteLongNameAt(dirfd, cName, path) +		if err != nil { +			return nil, nil, 0, fs.ToErrno(err) +		} +		// Create content +		fd, err = syscallcompat.OpenatUserCtx(dirfd, cName, newFlags|syscall.O_CREAT|syscall.O_EXCL, mode, ctx) +		if err != nil { +			nametransform.DeleteLongNameAt(dirfd, cName) +		} +	} else { +		// Create content, normal (short) file name +		fd, err = syscallcompat.OpenatUserCtx(dirfd, cName, newFlags|syscall.O_CREAT|syscall.O_EXCL, mode, ctx) +	} +	if err != nil { +		// xfstests generic/488 triggers this +		if err == syscall.EMFILE { +			var lim syscall.Rlimit +			syscall.Getrlimit(syscall.RLIMIT_NOFILE, &lim) +			tlog.Warn.Printf("Create %q: too many open files. Current \"ulimit -n\": %d", cName, lim.Cur) +		} +		return nil, nil, 0, fs.ToErrno(err) +	} + +	// Get device number and inode number into `st` +	st, err := syscallcompat.Fstatat2(dirfd, cName, unix.AT_SYMLINK_NOFOLLOW) +	if err != nil { +		return nil, nil, 0, fs.ToErrno(err) +	} +	// Get unique inode number +	rn.inoMap.TranslateStat(st) +	out.Attr.FromStat(st) +	// Create child node +	id := fs.StableAttr{ +		Mode: uint32(st.Mode), +		Gen:  1, +		Ino:  st.Ino, +	} +	node := &Node{} +	ch := n.NewInode(ctx, node, id) + +	f := os.NewFile(uintptr(fd), cName) +	return ch, NewFile2(f, rn, st), 0, 0 +} diff --git a/internal/fusefrontend/root_node.go b/internal/fusefrontend/root_node.go index 1116a41..c84ac93 100644 --- a/internal/fusefrontend/root_node.go +++ b/internal/fusefrontend/root_node.go @@ -1,11 +1,16 @@  package fusefrontend  import ( +	"os" +	"sync/atomic" +	"syscall"  	"time" +	"github.com/rfjakob/gocryptfs/internal/configfile"  	"github.com/rfjakob/gocryptfs/internal/contentenc"  	"github.com/rfjakob/gocryptfs/internal/inomap"  	"github.com/rfjakob/gocryptfs/internal/nametransform" +	"github.com/rfjakob/gocryptfs/internal/syscallcompat"  	"github.com/rfjakob/gocryptfs/internal/tlog"  ) @@ -47,6 +52,30 @@ func NewRootNode(args Args, c *contentenc.ContentEnc, n nametransform.NameTransf  	}  } +// mangleOpenFlags is used by Create() and Open() to convert the open flags the user +// wants to the flags we internally use to open the backing file. +// The returned flags always contain O_NOFOLLOW. +func (rn *RootNode) mangleOpenFlags(flags uint32) (newFlags int) { +	newFlags = int(flags) +	// Convert WRONLY to RDWR. We always need read access to do read-modify-write cycles. +	if (newFlags & syscall.O_ACCMODE) == syscall.O_WRONLY { +		newFlags = newFlags ^ os.O_WRONLY | os.O_RDWR +	} +	// We also cannot open the file in append mode, we need to seek back for RMW +	newFlags = newFlags &^ os.O_APPEND +	// O_DIRECT accesses must be aligned in both offset and length. Due to our +	// crypto header, alignment will be off, even if userspace makes aligned +	// accesses. Running xfstests generic/013 on ext4 used to trigger lots of +	// EINVAL errors due to missing alignment. Just fall back to buffered IO. +	newFlags = newFlags &^ syscallcompat.O_DIRECT +	// Create and Open are two separate FUSE operations, so O_CREAT should not +	// be part of the open flags. +	newFlags = newFlags &^ syscall.O_CREAT +	// We always want O_NOFOLLOW to be safe against symlink races +	newFlags |= syscall.O_NOFOLLOW +	return newFlags +} +  // reportMitigatedCorruption is used to report a corruption that was transparently  // mitigated and did not return an error to the user. Pass the name of the corrupt  // item (filename for OpenDir(), xattr name for ListXAttr() etc). @@ -63,3 +92,23 @@ func (rn *RootNode) reportMitigatedCorruption(item string) {  		return  	}  } + +// isFiltered - check if plaintext "path" should be forbidden +// +// Prevents name clashes with internal files when file names are not encrypted +func (rn *RootNode) isFiltered(path string) bool { +	atomic.StoreUint32(&rn.IsIdle, 0) + +	if !rn.args.PlaintextNames { +		return false +	} +	// gocryptfs.conf in the root directory is forbidden +	if path == configfile.ConfDefaultName { +		tlog.Info.Printf("The name /%s is reserved when -plaintextnames is used\n", +			configfile.ConfDefaultName) +		return true +	} +	// Note: gocryptfs.diriv is NOT forbidden because diriv and plaintextnames +	// are exclusive +	return false +} diff --git a/internal/syscallcompat/sys_linux.go b/internal/syscallcompat/sys_linux.go index e2b19bb..02064ac 100644 --- a/internal/syscallcompat/sys_linux.go +++ b/internal/syscallcompat/sys_linux.go @@ -2,6 +2,7 @@  package syscallcompat  import ( +	"context"  	"fmt"  	"io/ioutil"  	"runtime" @@ -88,6 +89,20 @@ func getSupplementaryGroups(pid uint32) (gids []int) {  	return nil  } +// OpenatUserCtx is a tries to extract a fuse.Context from the generic ctx and +// calls OpenatUser. +func OpenatUserCtx(dirfd int, path string, flags int, mode uint32, ctx context.Context) (fd int, err error) { +	var ctx2 *fuse.Context +	if ctx != nil { +		if caller, ok := fuse.FromContext(ctx); ok { +			ctx2 = &fuse.Context{ +				Caller: *caller, +			} +		} +	} +	return OpenatUser(dirfd, path, flags, mode, ctx2) +} +  // OpenatUser runs the Openat syscall in the context of a different user.  func OpenatUser(dirfd int, path string, flags int, mode uint32, context *fuse.Context) (fd int, err error) {  	if context != nil { | 
