aboutsummaryrefslogtreecommitdiff
path: root/internal/syscallcompat/sys_linux.go
blob: 5a2308489b29d0bef73cc296e568f269490fe86d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
// Package syscallcompat wraps Linux-specific syscalls.
package syscallcompat

import (
	"context"
	"fmt"
	"io/ioutil"
	"runtime"
	"strconv"
	"strings"
	"sync"
	"syscall"
	"time"

	"golang.org/x/sys/unix"

	"github.com/hanwen/go-fuse/v2/fuse"

	"github.com/rfjakob/gocryptfs/internal/tlog"
)

const (
	_FALLOC_FL_KEEP_SIZE = 0x01

	// O_DIRECT means oncached I/O on Linux. No direct equivalent on MacOS and defined
	// to zero there.
	O_DIRECT = syscall.O_DIRECT

	// O_PATH is only defined on Linux
	O_PATH = unix.O_PATH
)

var preallocWarn sync.Once

// EnospcPrealloc preallocates ciphertext space without changing the file
// size. This guarantees that we don't run out of space while writing a
// ciphertext block (that would corrupt the block).
func EnospcPrealloc(fd int, off int64, len int64) (err error) {
	for {
		err = syscall.Fallocate(fd, _FALLOC_FL_KEEP_SIZE, off, len)
		if err == syscall.EINTR {
			// fallocate, like many syscalls, can return EINTR. This is not an
			// error and just signifies that the operation was interrupted by a
			// signal and we should try again.
			continue
		}
		if err == syscall.EOPNOTSUPP {
			// ZFS and ext3 do not support fallocate. Warn but continue anyway.
			// https://github.com/rfjakob/gocryptfs/issues/22
			preallocWarn.Do(func() {
				tlog.Warn.Printf("Warning: The underlying filesystem " +
					"does not support fallocate(2). gocryptfs will continue working " +
					"but is no longer resistant against out-of-space errors.\n")
			})
			return nil
		}
		return err
	}
}

// Fallocate wraps the Fallocate syscall.
func Fallocate(fd int, mode uint32, off int64, len int64) (err error) {
	return syscall.Fallocate(fd, mode, off, len)
}

func getSupplementaryGroups(pid uint32) (gids []int) {
	procPath := fmt.Sprintf("/proc/%d/task/%d/status", pid, pid)
	blob, err := ioutil.ReadFile(procPath)
	if err != nil {
		return nil
	}

	lines := strings.Split(string(blob), "\n")
	for _, line := range lines {
		if strings.HasPrefix(line, "Groups:") {
			f := strings.Fields(line[7:])
			gids = make([]int, len(f))
			for i := range gids {
				val, err := strconv.ParseInt(f[i], 10, 32)
				if err != nil {
					return nil
				}
				gids[i] = int(val)
			}
			return gids
		}
	}

	return nil
}

// OpenatUserCtx is a tries to extract a fuse.Context from the generic ctx and
// calls OpenatUser.
func OpenatUserCtx(dirfd int, path string, flags int, mode uint32, ctx context.Context) (fd int, err error) {
	var ctx2 *fuse.Context
	if ctx != nil {
		if caller, ok := fuse.FromContext(ctx); ok {
			ctx2 = &fuse.Context{
				Caller: *caller,
			}
		}
	}
	return OpenatUser(dirfd, path, flags, mode, ctx2)
}

// OpenatUser runs the Openat syscall in the context of a different user.
func OpenatUser(dirfd int, path string, flags int, mode uint32, context *fuse.Context) (fd int, err error) {
	if context != nil {
		runtime.LockOSThread()
		defer runtime.UnlockOSThread()

		err = syscall.Setgroups(getSupplementaryGroups(context.Pid))
		if err != nil {
			return -1, err
		}
		defer syscall.Setgroups(nil)

		err = syscall.Setregid(-1, int(context.Owner.Gid))
		if err != nil {
			return -1, err
		}
		defer syscall.Setregid(-1, 0)

		err = syscall.Setreuid(-1, int(context.Owner.Uid))
		if err != nil {
			return -1, err
		}
		defer syscall.Setreuid(-1, 0)
	}

	return Openat(dirfd, path, flags, mode)
}

// Mknodat wraps the Mknodat syscall.
func Mknodat(dirfd int, path string, mode uint32, dev int) (err error) {
	return syscall.Mknodat(dirfd, path, mode, dev)
}

// MknodatUserCtx is a tries to extract a fuse.Context from the generic ctx and
// calls OpenatUser.
func MknodatUserCtx(dirfd int, path string, mode uint32, dev int, ctx context.Context) (err error) {
	var ctx2 *fuse.Context
	if ctx != nil {
		if caller, ok := fuse.FromContext(ctx); ok {
			ctx2 = &fuse.Context{
				Caller: *caller,
			}
		}
	}
	return MknodatUser(dirfd, path, mode, dev, ctx2)
}

// MknodatUser runs the Mknodat syscall in the context of a different user.
func MknodatUser(dirfd int, path string, mode uint32, dev int, context *fuse.Context) (err error) {
	if context != nil {
		runtime.LockOSThread()
		defer runtime.UnlockOSThread()

		err = syscall.Setgroups(getSupplementaryGroups(context.Pid))
		if err != nil {
			return err
		}
		defer syscall.Setgroups(nil)

		err = syscall.Setregid(-1, int(context.Owner.Gid))
		if err != nil {
			return err
		}
		defer syscall.Setregid(-1, 0)

		err = syscall.Setreuid(-1, int(context.Owner.Uid))
		if err != nil {
			return err
		}
		defer syscall.Setreuid(-1, 0)
	}

	return Mknodat(dirfd, path, mode, dev)
}

// Dup3 wraps the Dup3 syscall. We want to use Dup3 rather than Dup2 because Dup2
// is not implemented on arm64.
func Dup3(oldfd int, newfd int, flags int) (err error) {
	return syscall.Dup3(oldfd, newfd, flags)
}

// FchmodatNofollow is like Fchmodat but never follows symlinks.
//
// This should be handled by the AT_SYMLINK_NOFOLLOW flag, but Linux
// does not implement it, so we have to perform an elaborate dance
// with O_PATH and /proc/self/fd.
//
// See also: Qemu implemented the same logic as fchmodat_nofollow():
// https://git.qemu.org/?p=qemu.git;a=blob;f=hw/9pfs/9p-local.c#l335
func FchmodatNofollow(dirfd int, path string, mode uint32) (err error) {
	// Open handle to the filename (but without opening the actual file).
	// This succeeds even when we don't have read permissions to the file.
	fd, err := syscall.Openat(dirfd, path, syscall.O_NOFOLLOW|O_PATH, 0)
	if err != nil {
		return err
	}
	defer syscall.Close(fd)

	// Now we can check the type without the risk of race-conditions.
	// Return syscall.ELOOP if it is a symlink.
	var st syscall.Stat_t
	err = syscall.Fstat(fd, &st)
	if err != nil {
		return err
	}
	if st.Mode&syscall.S_IFMT == syscall.S_IFLNK {
		return syscall.ELOOP
	}

	// Change mode of the actual file. Fchmod does not work with O_PATH,
	// but Chmod via /proc/self/fd works.
	procPath := fmt.Sprintf("/proc/self/fd/%d", fd)
	return syscall.Chmod(procPath, mode)
}

// SymlinkatUser runs the Symlinkat syscall in the context of a different user.
func SymlinkatUser(oldpath string, newdirfd int, newpath string, context *fuse.Context) (err error) {
	if context != nil {
		runtime.LockOSThread()
		defer runtime.UnlockOSThread()

		err = syscall.Setgroups(getSupplementaryGroups(context.Pid))
		if err != nil {
			return err
		}
		defer syscall.Setgroups(nil)

		err = syscall.Setregid(-1, int(context.Owner.Gid))
		if err != nil {
			return err
		}
		defer syscall.Setregid(-1, 0)

		err = syscall.Setreuid(-1, int(context.Owner.Uid))
		if err != nil {
			return err
		}
		defer syscall.Setreuid(-1, 0)
	}

	return Symlinkat(oldpath, newdirfd, newpath)
}

// MkdiratUser runs the Mkdirat syscall in the context of a different user.
func MkdiratUser(dirfd int, path string, mode uint32, caller *fuse.Caller) (err error) {
	if caller != nil {
		runtime.LockOSThread()
		defer runtime.UnlockOSThread()

		err = syscall.Setgroups(getSupplementaryGroups(caller.Pid))
		if err != nil {
			return err
		}
		defer syscall.Setgroups(nil)

		err = syscall.Setregid(-1, int(caller.Gid))
		if err != nil {
			return err
		}
		defer syscall.Setregid(-1, 0)

		err = syscall.Setreuid(-1, int(caller.Uid))
		if err != nil {
			return err
		}
		defer syscall.Setreuid(-1, 0)
	}

	return Mkdirat(dirfd, path, mode)
}

func timesToTimespec(a *time.Time, m *time.Time) []unix.Timespec {
	ts := make([]unix.Timespec, 2)
	ts[0] = unix.Timespec(fuse.UtimeToTimespec(a))
	ts[1] = unix.Timespec(fuse.UtimeToTimespec(m))
	return ts
}

// FutimesNano syscall.
func FutimesNano(fd int, a *time.Time, m *time.Time) (err error) {
	ts := timesToTimespec(a, m)
	// To avoid introducing a separate syscall wrapper for futimens()
	// (as done in go-fuse, for example), we instead use the /proc/self/fd trick.
	procPath := fmt.Sprintf("/proc/self/fd/%d", fd)
	return unix.UtimesNanoAt(unix.AT_FDCWD, procPath, ts, 0)
}

// UtimesNanoAtNofollow is like UtimesNanoAt but never follows symlinks.
func UtimesNanoAtNofollow(dirfd int, path string, a *time.Time, m *time.Time) (err error) {
	ts := timesToTimespec(a, m)
	return unix.UtimesNanoAt(dirfd, path, ts, unix.AT_SYMLINK_NOFOLLOW)
}

// Getdents syscall.
func Getdents(fd int) ([]fuse.DirEntry, error) {
	return getdents(fd)
}