aboutsummaryrefslogtreecommitdiff
path: root/internal/syscallcompat/sys_linux.go
blob: 3714b7572877a8d2135b41325b4a24cdc7488e10 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
// Package syscallcompat wraps Linux-specific syscalls.
package syscallcompat

import (
	"fmt"
	"io/ioutil"
	"runtime"
	"strconv"
	"strings"
	"sync"
	"syscall"
	"time"

	"golang.org/x/sys/unix"

	"github.com/hanwen/go-fuse/v2/fuse"

	"github.com/rfjakob/gocryptfs/v2/internal/tlog"
)

const (
	_FALLOC_FL_KEEP_SIZE = 0x01

	// O_DIRECT means oncached I/O on Linux. No direct equivalent on MacOS and defined
	// to zero there.
	O_DIRECT = syscall.O_DIRECT

	// O_PATH is only defined on Linux
	O_PATH = unix.O_PATH

	// Only defined on Linux
	RENAME_NOREPLACE = unix.RENAME_NOREPLACE
	RENAME_WHITEOUT  = unix.RENAME_WHITEOUT
	RENAME_EXCHANGE  = unix.RENAME_EXCHANGE
	F_OFD_SETLKW     = unix.F_OFD_SETLKW
)

var preallocWarn sync.Once

// EnospcPrealloc preallocates ciphertext space without changing the file
// size. This guarantees that we don't run out of space while writing a
// ciphertext block (that would corrupt the block).
func EnospcPrealloc(fd int, off int64, len int64) (err error) {
	for {
		err = syscall.Fallocate(fd, _FALLOC_FL_KEEP_SIZE, off, len)
		if err == syscall.EINTR {
			// fallocate, like many syscalls, can return EINTR. This is not an
			// error and just signifies that the operation was interrupted by a
			// signal and we should try again.
			continue
		}
		if err == syscall.EOPNOTSUPP {
			// ZFS and ext3 do not support fallocate. Warn but continue anyway.
			// https://github.com/rfjakob/gocryptfs/issues/22
			preallocWarn.Do(func() {
				tlog.Warn.Printf("Warning: The underlying filesystem " +
					"does not support fallocate(2). gocryptfs will continue working " +
					"but is no longer resistant against out-of-space errors.\n")
			})
			return nil
		}
		return err
	}
}

// Fallocate wraps the Fallocate syscall.
func Fallocate(fd int, mode uint32, off int64, len int64) (err error) {
	return syscall.Fallocate(fd, mode, off, len)
}

func getSupplementaryGroups(pid uint32) (gids []int) {
	procPath := fmt.Sprintf("/proc/%d/task/%d/status", pid, pid)
	blob, err := ioutil.ReadFile(procPath)
	if err != nil {
		return nil
	}

	lines := strings.Split(string(blob), "\n")
	for _, line := range lines {
		if strings.HasPrefix(line, "Groups:") {
			f := strings.Fields(line[7:])
			gids = make([]int, len(f))
			for i := range gids {
				val, err := strconv.ParseInt(f[i], 10, 32)
				if err != nil {
					return nil
				}
				gids[i] = int(val)
			}
			return gids
		}
	}

	return nil
}

// asUser runs `f()` under the effective uid, gid, groups specified
// in `context`.
//
// If `context` is nil, `f()` is executed directly without switching user id.
func asUser(f func() (int, error), context *fuse.Context) (int, error) {
	if context == nil {
		return f()
	}

	runtime.LockOSThread()
	defer runtime.UnlockOSThread()

	// Since go1.16beta1 (commit d1b1145cace8b968307f9311ff611e4bb810710c ,
	// https://go-review.googlesource.com/c/go/+/210639 )
	// syscall.{Setgroups,Setregid,Setreuid} affects all threads, which
	// is exactly what we not want.
	//
	// We now use unix.{Setgroups,Setregid,Setreuid} instead.

	err := unix.Setgroups(getSupplementaryGroups(context.Pid))
	if err != nil {
		return -1, err
	}
	defer unix.Setgroups(nil)

	err = unix.Setregid(-1, int(context.Owner.Gid))
	if err != nil {
		return -1, err
	}
	defer unix.Setregid(-1, 0)

	err = unix.Setreuid(-1, int(context.Owner.Uid))
	if err != nil {
		return -1, err
	}
	defer unix.Setreuid(-1, 0)

	return f()
}

// OpenatUser runs the Openat syscall in the context of a different user.
//
// It switches the current thread to the new user, performs the syscall,
// and switches back.
//
// If `context` is nil, this function behaves like ordinary Openat (no
// user switching).
func OpenatUser(dirfd int, path string, flags int, mode uint32, context *fuse.Context) (fd int, err error) {
	f := func() (int, error) {
		return Openat(dirfd, path, flags, mode)
	}
	return asUser(f, context)
}

// Mknodat wraps the Mknodat syscall.
func Mknodat(dirfd int, path string, mode uint32, dev int) (err error) {
	return syscall.Mknodat(dirfd, path, mode, dev)
}

// MknodatUser runs the Mknodat syscall in the context of a different user.
// If `context` is nil, this function behaves like ordinary Mknodat.
//
// See OpenatUser() for how this works.
func MknodatUser(dirfd int, path string, mode uint32, dev int, context *fuse.Context) (err error) {
	f := func() (int, error) {
		err := Mknodat(dirfd, path, mode, dev)
		return -1, err
	}
	_, err = asUser(f, context)
	return err
}

// Dup3 wraps the Dup3 syscall. We want to use Dup3 rather than Dup2 because Dup2
// is not implemented on arm64.
func Dup3(oldfd int, newfd int, flags int) (err error) {
	return syscall.Dup3(oldfd, newfd, flags)
}

// FchmodatNofollow is like Fchmodat but never follows symlinks.
//
// This should be handled by the AT_SYMLINK_NOFOLLOW flag, but Linux
// does not implement it, so we have to perform an elaborate dance
// with O_PATH and /proc/self/fd.
//
// See also: Qemu implemented the same logic as fchmodat_nofollow():
// https://git.qemu.org/?p=qemu.git;a=blob;f=hw/9pfs/9p-local.c#l335
func FchmodatNofollow(dirfd int, path string, mode uint32) (err error) {
	// Open handle to the filename (but without opening the actual file).
	// This succeeds even when we don't have read permissions to the file.
	fd, err := syscall.Openat(dirfd, path, syscall.O_NOFOLLOW|O_PATH, 0)
	if err != nil {
		return err
	}
	defer syscall.Close(fd)

	// Now we can check the type without the risk of race-conditions.
	// Return syscall.ELOOP if it is a symlink.
	var st syscall.Stat_t
	err = syscall.Fstat(fd, &st)
	if err != nil {
		return err
	}
	if st.Mode&syscall.S_IFMT == syscall.S_IFLNK {
		return syscall.ELOOP
	}

	// Change mode of the actual file. Fchmod does not work with O_PATH,
	// but Chmod via /proc/self/fd works.
	procPath := fmt.Sprintf("/proc/self/fd/%d", fd)
	return syscall.Chmod(procPath, mode)
}

// SymlinkatUser runs the Symlinkat syscall in the context of a different user.
// If `context` is nil, this function behaves like ordinary Symlinkat.
//
// See OpenatUser() for how this works.
func SymlinkatUser(oldpath string, newdirfd int, newpath string, context *fuse.Context) (err error) {
	f := func() (int, error) {
		err := unix.Symlinkat(oldpath, newdirfd, newpath)
		return -1, err
	}
	_, err = asUser(f, context)
	return err
}

// MkdiratUser runs the Mkdirat syscall in the context of a different user.
// If `context` is nil, this function behaves like ordinary Mkdirat.
//
// See OpenatUser() for how this works.
func MkdiratUser(dirfd int, path string, mode uint32, context *fuse.Context) (err error) {
	f := func() (int, error) {
		err := unix.Mkdirat(dirfd, path, mode)
		return -1, err
	}
	_, err = asUser(f, context)
	return err
}

// LsetxattrUser runs the Lsetxattr syscall in the context of a different user.
// This is useful when setting ACLs, as the result depends on the user running
// the operation (see fuse-xfstests generic/375).
//
// If `context` is nil, this function behaves like ordinary Lsetxattr.
func LsetxattrUser(path string, attr string, data []byte, flags int, context *fuse.Context) (err error) {
	f := func() (int, error) {
		err := unix.Lsetxattr(path, attr, data, flags)
		return -1, err
	}
	_, err = asUser(f, context)
	return err
}

func timesToTimespec(a *time.Time, m *time.Time) []unix.Timespec {
	ts := make([]unix.Timespec, 2)
	ts[0] = unix.Timespec(fuse.UtimeToTimespec(a))
	ts[1] = unix.Timespec(fuse.UtimeToTimespec(m))
	return ts
}

// FutimesNano syscall.
func FutimesNano(fd int, a *time.Time, m *time.Time) (err error) {
	ts := timesToTimespec(a, m)
	// To avoid introducing a separate syscall wrapper for futimens()
	// (as done in go-fuse, for example), we instead use the /proc/self/fd trick.
	procPath := fmt.Sprintf("/proc/self/fd/%d", fd)
	return unix.UtimesNanoAt(unix.AT_FDCWD, procPath, ts, 0)
}

// UtimesNanoAtNofollow is like UtimesNanoAt but never follows symlinks.
// Retries on EINTR.
func UtimesNanoAtNofollow(dirfd int, path string, a *time.Time, m *time.Time) (err error) {
	ts := timesToTimespec(a, m)
	err = retryEINTR(func() error {
		return unix.UtimesNanoAt(dirfd, path, ts, unix.AT_SYMLINK_NOFOLLOW)
	})
	return err
}

// Getdents syscall with "." and ".." filtered out.
func Getdents(fd int) ([]fuse.DirEntry, error) {
	entries, _, err := getdents(fd)
	return entries, err
}

// GetdentsSpecial calls the Getdents syscall,
// with normal entries and "." / ".." split into two slices.
func GetdentsSpecial(fd int) (entries []fuse.DirEntry, entriesSpecial []fuse.DirEntry, err error) {
	return getdents(fd)
}

// Renameat2 does not exist on Darwin, so we have to wrap it here.
// Retries on EINTR.
func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) (err error) {
	err = retryEINTR(func() error {
		return unix.Renameat2(olddirfd, oldpath, newdirfd, newpath, flags)
	})
	return err
}