From f6ded09e36a679695354f4b9bc74242ef399be09 Mon Sep 17 00:00:00 2001 From: Jakob Unterwurzacher Date: Sun, 21 Jun 2020 13:25:12 +0200 Subject: v2api: implement Create --- internal/fusefrontend/file2.go | 476 +++++++++++++++++++++++ internal/fusefrontend/file2_allocate_truncate.go | 217 +++++++++++ internal/fusefrontend/file2_holes.go | 92 +++++ internal/fusefrontend/node.go | 71 ++++ internal/fusefrontend/root_node.go | 49 +++ internal/syscallcompat/sys_linux.go | 15 + 6 files changed, 920 insertions(+) create mode 100644 internal/fusefrontend/file2.go create mode 100644 internal/fusefrontend/file2_allocate_truncate.go create mode 100644 internal/fusefrontend/file2_holes.go diff --git a/internal/fusefrontend/file2.go b/internal/fusefrontend/file2.go new file mode 100644 index 0000000..2882732 --- /dev/null +++ b/internal/fusefrontend/file2.go @@ -0,0 +1,476 @@ +package fusefrontend + +// FUSE operations on file handles + +import ( + "bytes" + "encoding/hex" + "fmt" + "io" + "log" + "os" + "sync" + "syscall" + "time" + + "github.com/hanwen/go-fuse/v2/fuse" + "github.com/hanwen/go-fuse/v2/fuse/nodefs" + + "github.com/rfjakob/gocryptfs/internal/contentenc" + "github.com/rfjakob/gocryptfs/internal/inomap" + "github.com/rfjakob/gocryptfs/internal/openfiletable" + "github.com/rfjakob/gocryptfs/internal/serialize_reads" + "github.com/rfjakob/gocryptfs/internal/stupidgcm" + "github.com/rfjakob/gocryptfs/internal/syscallcompat" + "github.com/rfjakob/gocryptfs/internal/tlog" +) + +var _ nodefs.File = &File{} // Verify that interface is implemented. + +// File - based on loopbackFile in go-fuse/fuse/nodefs/files.go +type File2 struct { + fd *os.File + // Has Release() already been called on this file? This also means that the + // wlock entry has been freed, so let's not crash trying to access it. + // Due to concurrency, Release can overtake other operations. These will + // return EBADF in that case. + released bool + // fdLock prevents the fd to be closed while we are in the middle of + // an operation. + // Every FUSE entrypoint should RLock(). The only user of Lock() is + // Release(), which closes the fd and sets "released" to true. + fdLock sync.RWMutex + // Content encryption helper + contentEnc *contentenc.ContentEnc + // Device and inode number uniquely identify the backing file + qIno inomap.QIno + // Entry in the open file table + fileTableEntry *openfiletable.Entry + // Store where the last byte was written + lastWrittenOffset int64 + // The opCount is used to judge whether "lastWrittenOffset" is still + // guaranteed to be correct. + lastOpCount uint64 + // Parent filesystem + rootNode *RootNode + // We embed a nodefs.NewDefaultFile() that returns ENOSYS for every operation we + // have not implemented. This prevents build breakage when the go-fuse library + // adds new methods to the nodefs.File interface. + nodefs.File +} + +// NewFile returns a new go-fuse File instance. +func NewFile2(fd *os.File, rn *RootNode, st *syscall.Stat_t) *File2 { + qi := inomap.QInoFromStat(st) + e := openfiletable.Register(qi) + + return &File2{ + fd: fd, + contentEnc: rn.contentEnc, + qIno: qi, + fileTableEntry: e, + rootNode: rn, + File: nodefs.NewDefaultFile(), + } +} + +// intFd - return the backing file descriptor as an integer. +func (f *File2) intFd() int { + return int(f.fd.Fd()) +} + +// readFileID loads the file header from disk and extracts the file ID. +// Returns io.EOF if the file is empty. +func (f *File2) readFileID() ([]byte, error) { + // We read +1 byte to determine if the file has actual content + // and not only the header. A header-only file will be considered empty. + // This makes File ID poisoning more difficult. + readLen := contentenc.HeaderLen + 1 + buf := make([]byte, readLen) + n, err := f.fd.ReadAt(buf, 0) + if err != nil { + if err == io.EOF && n != 0 { + tlog.Warn.Printf("readFileID %d: incomplete file, got %d instead of %d bytes", + f.qIno.Ino, n, readLen) + f.rootNode.reportMitigatedCorruption(fmt.Sprint(f.qIno.Ino)) + } + return nil, err + } + buf = buf[:contentenc.HeaderLen] + h, err := contentenc.ParseHeader(buf) + if err != nil { + return nil, err + } + return h.ID, nil +} + +// createHeader creates a new random header and writes it to disk. +// Returns the new file ID. +// The caller must hold fileIDLock.Lock(). +func (f *File2) createHeader() (fileID []byte, err error) { + h := contentenc.RandomHeader() + buf := h.Pack() + // Prevent partially written (=corrupt) header by preallocating the space beforehand + if !f.rootNode.args.NoPrealloc { + err = syscallcompat.EnospcPrealloc(f.intFd(), 0, contentenc.HeaderLen) + if err != nil { + if !syscallcompat.IsENOSPC(err) { + tlog.Warn.Printf("ino%d: createHeader: prealloc failed: %s\n", f.qIno.Ino, err.Error()) + } + return nil, err + } + } + // Actually write header + _, err = f.fd.WriteAt(buf, 0) + if err != nil { + return nil, err + } + return h.ID, err +} + +// doRead - read "length" plaintext bytes from plaintext offset "off" and append +// to "dst". +// Arguments "length" and "off" do not have to be block-aligned. +// +// doRead reads the corresponding ciphertext blocks from disk, decrypts them and +// returns the requested part of the plaintext. +// +// Called by Read() for normal reading, +// by Write() and Truncate() via doWrite() for Read-Modify-Write. +func (f *File2) doRead(dst []byte, off uint64, length uint64) ([]byte, fuse.Status) { + // Get the file ID, either from the open file table, or from disk. + var fileID []byte + f.fileTableEntry.IDLock.Lock() + if f.fileTableEntry.ID != nil { + // Use the cached value in the file table + fileID = f.fileTableEntry.ID + } else { + // Not cached, we have to read it from disk. + var err error + fileID, err = f.readFileID() + if err != nil { + f.fileTableEntry.IDLock.Unlock() + if err == io.EOF { + // Empty file + return nil, fuse.OK + } + buf := make([]byte, 100) + n, _ := f.fd.ReadAt(buf, 0) + buf = buf[:n] + hexdump := hex.EncodeToString(buf) + tlog.Warn.Printf("doRead %d: corrupt header: %v\nFile hexdump (%d bytes): %s", + f.qIno.Ino, err, n, hexdump) + return nil, fuse.EIO + } + // Save into the file table + f.fileTableEntry.ID = fileID + } + f.fileTableEntry.IDLock.Unlock() + if fileID == nil { + log.Panicf("fileID=%v", fileID) + } + // Read the backing ciphertext in one go + blocks := f.contentEnc.ExplodePlainRange(off, length) + alignedOffset, alignedLength := blocks[0].JointCiphertextRange(blocks) + skip := blocks[0].Skip + tlog.Debug.Printf("doRead: off=%d len=%d -> off=%d len=%d skip=%d\n", + off, length, alignedOffset, alignedLength, skip) + + ciphertext := f.rootNode.contentEnc.CReqPool.Get() + ciphertext = ciphertext[:int(alignedLength)] + n, err := f.fd.ReadAt(ciphertext, int64(alignedOffset)) + if err != nil && err != io.EOF { + tlog.Warn.Printf("read: ReadAt: %s", err.Error()) + return nil, fuse.ToStatus(err) + } + // The ReadAt came back empty. We can skip all the decryption and return early. + if n == 0 { + f.rootNode.contentEnc.CReqPool.Put(ciphertext) + return dst, fuse.OK + } + // Truncate ciphertext buffer down to actually read bytes + ciphertext = ciphertext[0:n] + + firstBlockNo := blocks[0].BlockNo + tlog.Debug.Printf("ReadAt offset=%d bytes (%d blocks), want=%d, got=%d", alignedOffset, firstBlockNo, alignedLength, n) + + // Decrypt it + plaintext, err := f.contentEnc.DecryptBlocks(ciphertext, firstBlockNo, fileID) + f.rootNode.contentEnc.CReqPool.Put(ciphertext) + if err != nil { + if f.rootNode.args.ForceDecode && err == stupidgcm.ErrAuth { + // We do not have the information which block was corrupt here anymore, + // but DecryptBlocks() has already logged it anyway. + tlog.Warn.Printf("doRead %d: off=%d len=%d: returning corrupt data due to forcedecode", + f.qIno.Ino, off, length) + } else { + curruptBlockNo := firstBlockNo + f.contentEnc.PlainOffToBlockNo(uint64(len(plaintext))) + tlog.Warn.Printf("doRead %d: corrupt block #%d: %v", f.qIno.Ino, curruptBlockNo, err) + return nil, fuse.EIO + } + } + + // Crop down to the relevant part + var out []byte + lenHave := len(plaintext) + lenWant := int(skip + length) + if lenHave > lenWant { + out = plaintext[skip:lenWant] + } else if lenHave > int(skip) { + out = plaintext[skip:lenHave] + } + // else: out stays empty, file was smaller than the requested offset + + out = append(dst, out...) + f.rootNode.contentEnc.PReqPool.Put(plaintext) + + return out, fuse.OK +} + +// Read - FUSE call +func (f *File2) Read(buf []byte, off int64) (resultData fuse.ReadResult, code fuse.Status) { + if len(buf) > fuse.MAX_KERNEL_WRITE { + // This would crash us due to our fixed-size buffer pool + tlog.Warn.Printf("Read: rejecting oversized request with EMSGSIZE, len=%d", len(buf)) + return nil, fuse.Status(syscall.EMSGSIZE) + } + f.fdLock.RLock() + defer f.fdLock.RUnlock() + + f.fileTableEntry.ContentLock.RLock() + defer f.fileTableEntry.ContentLock.RUnlock() + + tlog.Debug.Printf("ino%d: FUSE Read: offset=%d length=%d", f.qIno.Ino, off, len(buf)) + if f.rootNode.args.SerializeReads { + serialize_reads.Wait(off, len(buf)) + } + out, status := f.doRead(buf[:0], uint64(off), uint64(len(buf))) + if f.rootNode.args.SerializeReads { + serialize_reads.Done() + } + if status != fuse.OK { + return nil, status + } + tlog.Debug.Printf("ino%d: Read: status %v, returning %d bytes", f.qIno.Ino, status, len(out)) + return fuse.ReadResultData(out), status +} + +// doWrite - encrypt "data" and write it to plaintext offset "off" +// +// Arguments do not have to be block-aligned, read-modify-write is +// performed internally as necessary +// +// Called by Write() for normal writing, +// and by Truncate() to rewrite the last file block. +// +// Empty writes do nothing and are allowed. +func (f *File2) doWrite(data []byte, off int64) (uint32, fuse.Status) { + fileWasEmpty := false + // Get the file ID, create a new one if it does not exist yet. + var fileID []byte + // The caller has exclusively locked ContentLock, which blocks all other + // readers and writers. No need to take IDLock. + if f.fileTableEntry.ID != nil { + fileID = f.fileTableEntry.ID + } else { + // If the file ID is not cached, read it from disk + var err error + fileID, err = f.readFileID() + // Write a new file header if the file is empty + if err == io.EOF { + fileID, err = f.createHeader() + fileWasEmpty = true + } + if err != nil { + return 0, fuse.ToStatus(err) + } + f.fileTableEntry.ID = fileID + } + // Handle payload data + dataBuf := bytes.NewBuffer(data) + blocks := f.contentEnc.ExplodePlainRange(uint64(off), uint64(len(data))) + toEncrypt := make([][]byte, len(blocks)) + for i, b := range blocks { + blockData := dataBuf.Next(int(b.Length)) + // Incomplete block -> Read-Modify-Write + if b.IsPartial() { + // Read + oldData, status := f.doRead(nil, b.BlockPlainOff(), f.contentEnc.PlainBS()) + if status != fuse.OK { + tlog.Warn.Printf("ino%d fh%d: RMW read failed: %s", f.qIno.Ino, f.intFd(), status.String()) + return 0, status + } + // Modify + blockData = f.contentEnc.MergeBlocks(oldData, blockData, int(b.Skip)) + tlog.Debug.Printf("len(oldData)=%d len(blockData)=%d", len(oldData), len(blockData)) + } + tlog.Debug.Printf("ino%d: Writing %d bytes to block #%d", + f.qIno.Ino, len(blockData), b.BlockNo) + // Write into the to-encrypt list + toEncrypt[i] = blockData + } + // Encrypt all blocks + ciphertext := f.contentEnc.EncryptBlocks(toEncrypt, blocks[0].BlockNo, f.fileTableEntry.ID) + // Preallocate so we cannot run out of space in the middle of the write. + // This prevents partially written (=corrupt) blocks. + var err error + cOff := int64(blocks[0].BlockCipherOff()) + if !f.rootNode.args.NoPrealloc { + err = syscallcompat.EnospcPrealloc(f.intFd(), cOff, int64(len(ciphertext))) + if err != nil { + if !syscallcompat.IsENOSPC(err) { + tlog.Warn.Printf("ino%d fh%d: doWrite: prealloc failed: %v", f.qIno.Ino, f.intFd(), err) + } + if fileWasEmpty { + // Kill the file header again + f.fileTableEntry.ID = nil + err2 := syscall.Ftruncate(f.intFd(), 0) + if err2 != nil { + tlog.Warn.Printf("ino%d fh%d: doWrite: rollback failed: %v", f.qIno.Ino, f.intFd(), err2) + } + } + return 0, fuse.ToStatus(err) + } + } + // Write + _, err = f.fd.WriteAt(ciphertext, cOff) + // Return memory to CReqPool + f.rootNode.contentEnc.CReqPool.Put(ciphertext) + if err != nil { + tlog.Warn.Printf("ino%d fh%d: doWrite: WriteAt off=%d len=%d failed: %v", + f.qIno.Ino, f.intFd(), cOff, len(ciphertext), err) + return 0, fuse.ToStatus(err) + } + return uint32(len(data)), fuse.OK +} + +// isConsecutiveWrite returns true if the current write +// directly (in time and space) follows the last write. +// This is an optimisation for streaming writes on NFS where a +// Stat() call is very expensive. +// The caller must "wlock.lock(f.devIno.ino)" otherwise this check would be racy. +func (f *File2) isConsecutiveWrite(off int64) bool { + opCount := openfiletable.WriteOpCount() + return opCount == f.lastOpCount+1 && off == f.lastWrittenOffset+1 +} + +// Write - FUSE call +// +// If the write creates a hole, pads the file to the next block boundary. +func (f *File2) Write(data []byte, off int64) (uint32, fuse.Status) { + if len(data) > fuse.MAX_KERNEL_WRITE { + // This would crash us due to our fixed-size buffer pool + tlog.Warn.Printf("Write: rejecting oversized request with EMSGSIZE, len=%d", len(data)) + return 0, fuse.Status(syscall.EMSGSIZE) + } + f.fdLock.RLock() + defer f.fdLock.RUnlock() + if f.released { + // The file descriptor has been closed concurrently + tlog.Warn.Printf("ino%d fh%d: Write on released file", f.qIno.Ino, f.intFd()) + return 0, fuse.EBADF + } + f.fileTableEntry.ContentLock.Lock() + defer f.fileTableEntry.ContentLock.Unlock() + tlog.Debug.Printf("ino%d: FUSE Write: offset=%d length=%d", f.qIno.Ino, off, len(data)) + // If the write creates a file hole, we have to zero-pad the last block. + // But if the write directly follows an earlier write, it cannot create a + // hole, and we can save one Stat() call. + if !f.isConsecutiveWrite(off) { + status := f.writePadHole(off) + if !status.Ok() { + return 0, status + } + } + n, status := f.doWrite(data, off) + if status.Ok() { + f.lastOpCount = openfiletable.WriteOpCount() + f.lastWrittenOffset = off + int64(len(data)) - 1 + } + return n, status +} + +// Release - FUSE call, close file +func (f *File2) Release() { + f.fdLock.Lock() + if f.released { + log.Panicf("ino%d fh%d: double release", f.qIno.Ino, f.intFd()) + } + f.released = true + openfiletable.Unregister(f.qIno) + f.fd.Close() + f.fdLock.Unlock() +} + +// Flush - FUSE call +func (f *File2) Flush() fuse.Status { + f.fdLock.RLock() + defer f.fdLock.RUnlock() + + // Since Flush() may be called for each dup'd fd, we don't + // want to really close the file, we just want to flush. This + // is achieved by closing a dup'd fd. + newFd, err := syscall.Dup(f.intFd()) + + if err != nil { + return fuse.ToStatus(err) + } + err = syscall.Close(newFd) + return fuse.ToStatus(err) +} + +// Fsync FUSE call +func (f *File2) Fsync(flags int) (code fuse.Status) { + f.fdLock.RLock() + defer f.fdLock.RUnlock() + + return fuse.ToStatus(syscall.Fsync(f.intFd())) +} + +// Chmod FUSE call +func (f *File2) Chmod(mode uint32) fuse.Status { + f.fdLock.RLock() + defer f.fdLock.RUnlock() + + // os.File.Chmod goes through the "syscallMode" translation function that messes + // up the suid and sgid bits. So use syscall.Fchmod directly. + err := syscall.Fchmod(f.intFd(), mode) + return fuse.ToStatus(err) +} + +// Chown FUSE call +func (f *File2) Chown(uid uint32, gid uint32) fuse.Status { + f.fdLock.RLock() + defer f.fdLock.RUnlock() + + return fuse.ToStatus(f.fd.Chown(int(uid), int(gid))) +} + +// GetAttr FUSE call (like stat) +func (f *File2) GetAttr(a *fuse.Attr) fuse.Status { + f.fdLock.RLock() + defer f.fdLock.RUnlock() + + tlog.Debug.Printf("file.GetAttr()") + st := syscall.Stat_t{} + err := syscall.Fstat(f.intFd(), &st) + if err != nil { + return fuse.ToStatus(err) + } + f.rootNode.inoMap.TranslateStat(&st) + a.FromStat(&st) + a.Size = f.contentEnc.CipherSizeToPlainSize(a.Size) + if f.rootNode.args.ForceOwner != nil { + a.Owner = *f.rootNode.args.ForceOwner + } + + return fuse.OK +} + +// Utimens FUSE call +func (f *File2) Utimens(a *time.Time, m *time.Time) fuse.Status { + f.fdLock.RLock() + defer f.fdLock.RUnlock() + err := syscallcompat.FutimesNano(f.intFd(), a, m) + return fuse.ToStatus(err) +} diff --git a/internal/fusefrontend/file2_allocate_truncate.go b/internal/fusefrontend/file2_allocate_truncate.go new file mode 100644 index 0000000..f799a3e --- /dev/null +++ b/internal/fusefrontend/file2_allocate_truncate.go @@ -0,0 +1,217 @@ +package fusefrontend + +// FUSE operations Truncate and Allocate on file handles +// i.e. ftruncate and fallocate + +import ( + "log" + "syscall" + + "github.com/hanwen/go-fuse/v2/fuse" + + "github.com/rfjakob/gocryptfs/internal/syscallcompat" + "github.com/rfjakob/gocryptfs/internal/tlog" +) + +// Allocate - FUSE call for fallocate(2) +// +// mode=FALLOC_FL_KEEP_SIZE is implemented directly. +// +// mode=FALLOC_DEFAULT is implemented as a two-step process: +// +// (1) Allocate the space using FALLOC_FL_KEEP_SIZE +// (2) Set the file size using ftruncate (via truncateGrowFile) +// +// This allows us to reuse the file grow mechanics from Truncate as they are +// complicated and hard to get right. +// +// Other modes (hole punching, zeroing) are not supported. +func (f *File2) Allocate(off uint64, sz uint64, mode uint32) fuse.Status { + if mode != FALLOC_DEFAULT && mode != FALLOC_FL_KEEP_SIZE { + f := func() { + tlog.Info.Printf("fallocate: only mode 0 (default) and 1 (keep size) are supported") + } + allocateWarnOnce.Do(f) + return fuse.Status(syscall.EOPNOTSUPP) + } + + f.fdLock.RLock() + defer f.fdLock.RUnlock() + if f.released { + return fuse.EBADF + } + f.fileTableEntry.ContentLock.Lock() + defer f.fileTableEntry.ContentLock.Unlock() + + blocks := f.contentEnc.ExplodePlainRange(off, sz) + firstBlock := blocks[0] + lastBlock := blocks[len(blocks)-1] + + // Step (1): Allocate the space the user wants using FALLOC_FL_KEEP_SIZE. + // This will fill file holes and/or allocate additional space past the end of + // the file. + cipherOff := firstBlock.BlockCipherOff() + cipherSz := lastBlock.BlockCipherOff() - cipherOff + + f.contentEnc.BlockOverhead() + lastBlock.Skip + lastBlock.Length + err := syscallcompat.Fallocate(f.intFd(), FALLOC_FL_KEEP_SIZE, int64(cipherOff), int64(cipherSz)) + tlog.Debug.Printf("Allocate off=%d sz=%d mode=%x cipherOff=%d cipherSz=%d\n", + off, sz, mode, cipherOff, cipherSz) + if err != nil { + return fuse.ToStatus(err) + } + if mode == FALLOC_FL_KEEP_SIZE { + // The user did not want to change the apparent size. We are done. + return fuse.OK + } + // Step (2): Grow the apparent file size + // We need the old file size to determine if we are growing the file at all. + newPlainSz := off + sz + oldPlainSz, err := f.statPlainSize() + if err != nil { + return fuse.ToStatus(err) + } + if newPlainSz <= oldPlainSz { + // The new size is smaller (or equal). Fallocate with mode = 0 never + // truncates a file, so we are done. + return fuse.OK + } + // The file grows. The space has already been allocated in (1), so what is + // left to do is to pad the first and last block and call truncate. + // truncateGrowFile does just that. + return f.truncateGrowFile(oldPlainSz, newPlainSz) +} + +// Truncate - FUSE call +func (f *File2) Truncate(newSize uint64) fuse.Status { + f.fdLock.RLock() + defer f.fdLock.RUnlock() + if f.released { + // The file descriptor has been closed concurrently. + tlog.Warn.Printf("ino%d fh%d: Truncate on released file", f.qIno.Ino, f.intFd()) + return fuse.EBADF + } + f.fileTableEntry.ContentLock.Lock() + defer f.fileTableEntry.ContentLock.Unlock() + var err error + // Common case first: Truncate to zero + if newSize == 0 { + err = syscall.Ftruncate(int(f.fd.Fd()), 0) + if err != nil { + tlog.Warn.Printf("ino%d fh%d: Ftruncate(fd, 0) returned error: %v", f.qIno.Ino, f.intFd(), err) + return fuse.ToStatus(err) + } + // Truncate to zero kills the file header + f.fileTableEntry.ID = nil + return fuse.OK + } + // We need the old file size to determine if we are growing or shrinking + // the file + oldSize, err := f.statPlainSize() + if err != nil { + return fuse.ToStatus(err) + } + + oldB := float32(oldSize) / float32(f.contentEnc.PlainBS()) + newB := float32(newSize) / float32(f.contentEnc.PlainBS()) + tlog.Debug.Printf("ino%d: FUSE Truncate from %.2f to %.2f blocks (%d to %d bytes)", f.qIno.Ino, oldB, newB, oldSize, newSize) + + // File size stays the same - nothing to do + if newSize == oldSize { + return fuse.OK + } + // File grows + if newSize > oldSize { + return f.truncateGrowFile(oldSize, newSize) + } + + // File shrinks + blockNo := f.contentEnc.PlainOffToBlockNo(newSize) + cipherOff := f.contentEnc.BlockNoToCipherOff(blockNo) + plainOff := f.contentEnc.BlockNoToPlainOff(blockNo) + lastBlockLen := newSize - plainOff + var data []byte + if lastBlockLen > 0 { + var status fuse.Status + data, status = f.doRead(nil, plainOff, lastBlockLen) + if status != fuse.OK { + tlog.Warn.Printf("Truncate: shrink doRead returned error: %v", err) + return status + } + } + // Truncate down to the last complete block + err = syscall.Ftruncate(int(f.fd.Fd()), int64(cipherOff)) + if err != nil { + tlog.Warn.Printf("Truncate: shrink Ftruncate returned error: %v", err) + return fuse.ToStatus(err) + } + // Append partial block + if lastBlockLen > 0 { + _, status := f.doWrite(data, int64(plainOff)) + return status + } + return fuse.OK +} + +// statPlainSize stats the file and returns the plaintext size +func (f *File2) statPlainSize() (uint64, error) { + fi, err := f.fd.Stat() + if err != nil { + tlog.Warn.Printf("ino%d fh%d: statPlainSize: %v", f.qIno.Ino, f.intFd(), err) + return 0, err + } + cipherSz := uint64(fi.Size()) + plainSz := uint64(f.contentEnc.CipherSizeToPlainSize(cipherSz)) + return plainSz, nil +} + +// truncateGrowFile extends a file using seeking or ftruncate performing RMW on +// the first and last block as necessary. New blocks in the middle become +// file holes unless they have been fallocate()'d beforehand. +func (f *File2) truncateGrowFile(oldPlainSz uint64, newPlainSz uint64) fuse.Status { + if newPlainSz <= oldPlainSz { + log.Panicf("BUG: newSize=%d <= oldSize=%d", newPlainSz, oldPlainSz) + } + newEOFOffset := newPlainSz - 1 + if oldPlainSz > 0 { + n1 := f.contentEnc.PlainOffToBlockNo(oldPlainSz - 1) + n2 := f.contentEnc.PlainOffToBlockNo(newEOFOffset) + // The file is grown within one block, no need to pad anything. + // Write a single zero to the last byte and let doWrite figure out the RMW. + if n1 == n2 { + buf := make([]byte, 1) + _, status := f.doWrite(buf, int64(newEOFOffset)) + return status + } + } + // The truncate creates at least one new block. + // + // Make sure the old last block is padded to the block boundary. This call + // is a no-op if it is already block-aligned. + status := f.zeroPad(oldPlainSz) + if !status.Ok() { + return status + } + // The new size is block-aligned. In this case we can do everything ourselves + // and avoid the call to doWrite. + if newPlainSz%f.contentEnc.PlainBS() == 0 { + // The file was empty, so it did not have a header. Create one. + if oldPlainSz == 0 { + id, err := f.createHeader() + if err != nil { + return fuse.ToStatus(err) + } + f.fileTableEntry.ID = id + } + cSz := int64(f.contentEnc.PlainSizeToCipherSize(newPlainSz)) + err := syscall.Ftruncate(f.intFd(), cSz) + if err != nil { + tlog.Warn.Printf("Truncate: grow Ftruncate returned error: %v", err) + } + return fuse.ToStatus(err) + } + // The new size is NOT aligned, so we need to write a partial block. + // Write a single zero to the last byte and let doWrite figure it out. + buf := make([]byte, 1) + _, status = f.doWrite(buf, int64(newEOFOffset)) + return status +} diff --git a/internal/fusefrontend/file2_holes.go b/internal/fusefrontend/file2_holes.go new file mode 100644 index 0000000..5e06981 --- /dev/null +++ b/internal/fusefrontend/file2_holes.go @@ -0,0 +1,92 @@ +package fusefrontend + +// Helper functions for sparse files (files with holes) + +import ( + "runtime" + "syscall" + + "github.com/hanwen/go-fuse/v2/fuse" + + "github.com/rfjakob/gocryptfs/internal/tlog" +) + +// Will a write to plaintext offset "targetOff" create a file hole in the +// ciphertext? If yes, zero-pad the last ciphertext block. +func (f *File2) writePadHole(targetOff int64) fuse.Status { + // Get the current file size. + fi, err := f.fd.Stat() + if err != nil { + tlog.Warn.Printf("checkAndPadHole: Fstat failed: %v", err) + return fuse.ToStatus(err) + } + plainSize := f.contentEnc.CipherSizeToPlainSize(uint64(fi.Size())) + // Appending a single byte to the file (equivalent to writing to + // offset=plainSize) would write to "nextBlock". + nextBlock := f.contentEnc.PlainOffToBlockNo(plainSize) + // targetBlock is the block the user wants to write to. + targetBlock := f.contentEnc.PlainOffToBlockNo(uint64(targetOff)) + // The write goes into an existing block or (if the last block was full) + // starts a new one directly after the last block. Nothing to do. + if targetBlock <= nextBlock { + return fuse.OK + } + // The write goes past the next block. nextBlock has + // to be zero-padded to the block boundary and (at least) nextBlock+1 + // will contain a file hole in the ciphertext. + status := f.zeroPad(plainSize) + if status != fuse.OK { + return status + } + return fuse.OK +} + +// Zero-pad the file of size plainSize to the next block boundary. This is a no-op +// if the file is already block-aligned. +func (f *File2) zeroPad(plainSize uint64) fuse.Status { + lastBlockLen := plainSize % f.contentEnc.PlainBS() + if lastBlockLen == 0 { + // Already block-aligned + return fuse.OK + } + missing := f.contentEnc.PlainBS() - lastBlockLen + pad := make([]byte, missing) + tlog.Debug.Printf("zeroPad: Writing %d bytes\n", missing) + _, status := f.doWrite(pad, int64(plainSize)) + return status +} + +// SeekData calls the lseek syscall with SEEK_DATA. It returns the offset of the +// next data bytes, skipping over file holes. +func (f *File2) SeekData(oldOffset int64) (int64, error) { + if runtime.GOOS != "linux" { + // Does MacOS support something like this? + return 0, syscall.EOPNOTSUPP + } + const SEEK_DATA = 3 + + // Convert plaintext offset to ciphertext offset and round down to the + // start of the current block. File holes smaller than a full block will + // be ignored. + blockNo := f.contentEnc.PlainOffToBlockNo(uint64(oldOffset)) + oldCipherOff := int64(f.contentEnc.BlockNoToCipherOff(blockNo)) + + // Determine the next data offset. If the old offset points to (or beyond) + // the end of the file, the Seek syscall fails with syscall.ENXIO. + newCipherOff, err := syscall.Seek(f.intFd(), oldCipherOff, SEEK_DATA) + if err != nil { + return 0, err + } + + // Convert ciphertext offset back to plaintext offset. At this point, + // newCipherOff should always be >= contentenc.HeaderLen. Round down, + // but ensure that the result is never smaller than the initial offset + // (to avoid endless loops). + blockNo = f.contentEnc.CipherOffToBlockNo(uint64(newCipherOff)) + newOffset := int64(f.contentEnc.BlockNoToPlainOff(blockNo)) + if newOffset < oldOffset { + newOffset = oldOffset + } + + return newOffset, nil +} diff --git a/internal/fusefrontend/node.go b/internal/fusefrontend/node.go index 9074f72..28e606a 100644 --- a/internal/fusefrontend/node.go +++ b/internal/fusefrontend/node.go @@ -2,6 +2,7 @@ package fusefrontend import ( "context" + "os" "path/filepath" "syscall" @@ -10,7 +11,9 @@ import ( "github.com/hanwen/go-fuse/v2/fs" "github.com/hanwen/go-fuse/v2/fuse" + "github.com/rfjakob/gocryptfs/internal/nametransform" "github.com/rfjakob/gocryptfs/internal/syscallcompat" + "github.com/rfjakob/gocryptfs/internal/tlog" ) // Node is a file or directory in the filesystem tree @@ -31,6 +34,9 @@ func (n *Node) rootNode() *RootNode { func (n *Node) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fs.Inode, syscall.Errno) { rn := n.rootNode() p := filepath.Join(n.path(), name) + if rn.isFiltered(p) { + return nil, syscall.EPERM + } dirfd, cName, err := rn.openBackingDir(p) if err != nil { return nil, fs.ToErrno(err) @@ -71,3 +77,68 @@ func (n *Node) Getattr(ctx context.Context, f fs.FileHandle, out *fuse.AttrOut) out.Attr.FromStat(st) return 0 } + +func (n *Node) Create(ctx context.Context, name string, flags uint32, mode uint32, out *fuse.EntryOut) (inode *fs.Inode, fh fs.FileHandle, fuseFlags uint32, errno syscall.Errno) { + rn := n.rootNode() + path := filepath.Join(n.path(), name) + if rn.isFiltered(path) { + return nil, nil, 0, syscall.EPERM + } + dirfd, cName, err := rn.openBackingDir(path) + if err != nil { + return nil, nil, 0, fs.ToErrno(err) + } + defer syscall.Close(dirfd) + + fd := -1 + // Make sure context is nil if we don't want to preserve the owner + if !rn.args.PreserveOwner { + ctx = nil + } + newFlags := rn.mangleOpenFlags(flags) + // Handle long file name + if !rn.args.PlaintextNames && nametransform.IsLongContent(cName) { + // Create ".name" + err = rn.nameTransform.WriteLongNameAt(dirfd, cName, path) + if err != nil { + return nil, nil, 0, fs.ToErrno(err) + } + // Create content + fd, err = syscallcompat.OpenatUserCtx(dirfd, cName, newFlags|syscall.O_CREAT|syscall.O_EXCL, mode, ctx) + if err != nil { + nametransform.DeleteLongNameAt(dirfd, cName) + } + } else { + // Create content, normal (short) file name + fd, err = syscallcompat.OpenatUserCtx(dirfd, cName, newFlags|syscall.O_CREAT|syscall.O_EXCL, mode, ctx) + } + if err != nil { + // xfstests generic/488 triggers this + if err == syscall.EMFILE { + var lim syscall.Rlimit + syscall.Getrlimit(syscall.RLIMIT_NOFILE, &lim) + tlog.Warn.Printf("Create %q: too many open files. Current \"ulimit -n\": %d", cName, lim.Cur) + } + return nil, nil, 0, fs.ToErrno(err) + } + + // Get device number and inode number into `st` + st, err := syscallcompat.Fstatat2(dirfd, cName, unix.AT_SYMLINK_NOFOLLOW) + if err != nil { + return nil, nil, 0, fs.ToErrno(err) + } + // Get unique inode number + rn.inoMap.TranslateStat(st) + out.Attr.FromStat(st) + // Create child node + id := fs.StableAttr{ + Mode: uint32(st.Mode), + Gen: 1, + Ino: st.Ino, + } + node := &Node{} + ch := n.NewInode(ctx, node, id) + + f := os.NewFile(uintptr(fd), cName) + return ch, NewFile2(f, rn, st), 0, 0 +} diff --git a/internal/fusefrontend/root_node.go b/internal/fusefrontend/root_node.go index 1116a41..c84ac93 100644 --- a/internal/fusefrontend/root_node.go +++ b/internal/fusefrontend/root_node.go @@ -1,11 +1,16 @@ package fusefrontend import ( + "os" + "sync/atomic" + "syscall" "time" + "github.com/rfjakob/gocryptfs/internal/configfile" "github.com/rfjakob/gocryptfs/internal/contentenc" "github.com/rfjakob/gocryptfs/internal/inomap" "github.com/rfjakob/gocryptfs/internal/nametransform" + "github.com/rfjakob/gocryptfs/internal/syscallcompat" "github.com/rfjakob/gocryptfs/internal/tlog" ) @@ -47,6 +52,30 @@ func NewRootNode(args Args, c *contentenc.ContentEnc, n nametransform.NameTransf } } +// mangleOpenFlags is used by Create() and Open() to convert the open flags the user +// wants to the flags we internally use to open the backing file. +// The returned flags always contain O_NOFOLLOW. +func (rn *RootNode) mangleOpenFlags(flags uint32) (newFlags int) { + newFlags = int(flags) + // Convert WRONLY to RDWR. We always need read access to do read-modify-write cycles. + if (newFlags & syscall.O_ACCMODE) == syscall.O_WRONLY { + newFlags = newFlags ^ os.O_WRONLY | os.O_RDWR + } + // We also cannot open the file in append mode, we need to seek back for RMW + newFlags = newFlags &^ os.O_APPEND + // O_DIRECT accesses must be aligned in both offset and length. Due to our + // crypto header, alignment will be off, even if userspace makes aligned + // accesses. Running xfstests generic/013 on ext4 used to trigger lots of + // EINVAL errors due to missing alignment. Just fall back to buffered IO. + newFlags = newFlags &^ syscallcompat.O_DIRECT + // Create and Open are two separate FUSE operations, so O_CREAT should not + // be part of the open flags. + newFlags = newFlags &^ syscall.O_CREAT + // We always want O_NOFOLLOW to be safe against symlink races + newFlags |= syscall.O_NOFOLLOW + return newFlags +} + // reportMitigatedCorruption is used to report a corruption that was transparently // mitigated and did not return an error to the user. Pass the name of the corrupt // item (filename for OpenDir(), xattr name for ListXAttr() etc). @@ -63,3 +92,23 @@ func (rn *RootNode) reportMitigatedCorruption(item string) { return } } + +// isFiltered - check if plaintext "path" should be forbidden +// +// Prevents name clashes with internal files when file names are not encrypted +func (rn *RootNode) isFiltered(path string) bool { + atomic.StoreUint32(&rn.IsIdle, 0) + + if !rn.args.PlaintextNames { + return false + } + // gocryptfs.conf in the root directory is forbidden + if path == configfile.ConfDefaultName { + tlog.Info.Printf("The name /%s is reserved when -plaintextnames is used\n", + configfile.ConfDefaultName) + return true + } + // Note: gocryptfs.diriv is NOT forbidden because diriv and plaintextnames + // are exclusive + return false +} diff --git a/internal/syscallcompat/sys_linux.go b/internal/syscallcompat/sys_linux.go index e2b19bb..02064ac 100644 --- a/internal/syscallcompat/sys_linux.go +++ b/internal/syscallcompat/sys_linux.go @@ -2,6 +2,7 @@ package syscallcompat import ( + "context" "fmt" "io/ioutil" "runtime" @@ -88,6 +89,20 @@ func getSupplementaryGroups(pid uint32) (gids []int) { return nil } +// OpenatUserCtx is a tries to extract a fuse.Context from the generic ctx and +// calls OpenatUser. +func OpenatUserCtx(dirfd int, path string, flags int, mode uint32, ctx context.Context) (fd int, err error) { + var ctx2 *fuse.Context + if ctx != nil { + if caller, ok := fuse.FromContext(ctx); ok { + ctx2 = &fuse.Context{ + Caller: *caller, + } + } + } + return OpenatUser(dirfd, path, flags, mode, ctx2) +} + // OpenatUser runs the Openat syscall in the context of a different user. func OpenatUser(dirfd int, path string, flags int, mode uint32, context *fuse.Context) (fd int, err error) { if context != nil { -- cgit v1.2.3