-
Notifications
You must be signed in to change notification settings - Fork 149
Expand file tree
/
Copy pathfork_exec.c
More file actions
456 lines (413 loc) · 15.3 KB
/
fork_exec.c
File metadata and controls
456 lines (413 loc) · 15.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
/* Copyright (c) 2021, 2025, Oracle and/or its affiliates.
* Copyright (C) 1996-2020 Python Software Foundation
*
* Licensed under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
*/
#if defined(__gnu_linux__) && !defined(_GNU_SOURCE)
#define _GNU_SOURCE 1
#endif
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include <errno.h>
#include <assert.h>
#include <string.h>
// These definitions emulate CPython's equivalents so that the copy&pasted code below works without too many changes
#define HAVE_DIRFD 1
#define HAVE_SETSID 1
int32_t set_inheritable(int32_t fd, int32_t inheritable);
// TODO JDK also handles EINTR for dup2 calls
// TODO neither CPython nor JDK handle EINTR returned from close(), see NOTES on man page for close(2)
static const char *Py_hexdigits = "0123456789abcdef";
static int
_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
{
assert(atomic_flag_works == NULL);
return set_inheritable(fd, inheritable);
}
static void
_Py_write_noraise(int fd, const void *buf, size_t count)
{
while (write(fd, buf, count) < 0 && errno == EINTR)
;
}
static void
_Py_RestoreSignals()
{
}
// The following code is taken directly from CPython's _posixsubprocess.c with some minor changes:
// - py_fds_to_keep is not a PyObject, but an int array
// - there's no preexec_fn
#if defined(__FreeBSD__) || (defined(__APPLE__) && defined(__MACH__))
# define FD_DIR "/dev/fd"
#else
# define FD_DIR "/proc/self/fd"
#endif
#define POSIX_CALL(call) do { if ((call) == -1) goto error; } while (0)
/* Convert ASCII to a positive int, no libc call. no overflow. -1 on error. */
static int
_pos_int_from_ascii(const char *name)
{
int num = 0;
while (*name >= '0' && *name <= '9') {
num = num * 10 + (*name - '0');
++name;
}
if (*name)
return -1; /* Non digit found, not a number. */
return num;
}
/* Is fd found in the sorted Python Sequence? */
static int
_is_fd_in_sorted_fd_sequence(int fd, int *fd_sequence, ssize_t fd_sequence_len)
{
/* Binary search. */
ssize_t search_min = 0;
ssize_t search_max = fd_sequence_len - 1;
if (search_max < 0)
return 0;
do {
long middle = (search_min + search_max) / 2;
int middle_fd = fd_sequence[middle];
if (fd == middle_fd)
return 1;
if (fd > middle_fd)
search_min = middle + 1;
else
search_max = middle - 1;
} while (search_min <= search_max);
return 0;
}
static int
make_inheritable(int *fds_to_keep, ssize_t len, int errpipe_write)
{
ssize_t i;
for (i = 0; i < len; ++i) {
int fd = fds_to_keep[i];
assert(0 <= fd && fd <= INT_MAX);
if (fd == errpipe_write) {
/* errpipe_write is part of py_fds_to_keep. It must be closed at
exec(), but kept open in the child process until exec() is
called. */
continue;
}
if (_Py_set_inheritable_async_safe(fd, 1, NULL) < 0)
return -1;
}
return 0;
}
/* Get the maximum file descriptor that could be opened by this process.
* This function is async signal safe for use between fork() and exec().
*/
static long
safe_get_max_fd(void)
{
long local_max_fd;
#if defined(__NetBSD__)
local_max_fd = fcntl(0, F_MAXFD);
if (local_max_fd >= 0)
return local_max_fd;
#endif
#if defined(HAVE_SYS_RESOURCE_H) && defined(__OpenBSD__)
struct rlimit rl;
/* Not on the POSIX async signal safe functions list but likely
* safe. TODO - Someone should audit OpenBSD to make sure. */
if (getrlimit(RLIMIT_NOFILE, &rl) >= 0)
return (long) rl.rlim_max;
#endif
#ifdef _SC_OPEN_MAX
local_max_fd = sysconf(_SC_OPEN_MAX);
if (local_max_fd == -1)
#endif
local_max_fd = 256; /* Matches legacy Lib/subprocess.py behavior. */
return local_max_fd;
}
/* Close all file descriptors in the range from start_fd and higher
* except for those in py_fds_to_keep. If the range defined by
* [start_fd, safe_get_max_fd()) is large this will take a long
* time as it calls close() on EVERY possible fd.
*
* It isn't possible to know for sure what the max fd to go up to
* is for processes with the capability of raising their maximum.
*/
static void
_close_fds_by_brute_force(long start_fd, int *fds_to_keep, ssize_t num_fds_to_keep)
{
long end_fd = safe_get_max_fd();
ssize_t keep_seq_idx;
int fd_num;
/* As py_fds_to_keep is sorted we can loop through the list closing
* fds in between any in the keep list falling within our range. */
for (keep_seq_idx = 0; keep_seq_idx < num_fds_to_keep; ++keep_seq_idx) {
int keep_fd = fds_to_keep[keep_seq_idx];
if (keep_fd < start_fd)
continue;
for (fd_num = start_fd; fd_num < keep_fd; ++fd_num) {
close(fd_num);
}
start_fd = keep_fd + 1;
}
if (start_fd <= end_fd) {
for (fd_num = start_fd; fd_num < end_fd; ++fd_num) {
close(fd_num);
}
}
}
/* Close all open file descriptors from start_fd and higher.
* Do not close any in the sorted py_fds_to_keep tuple.
*
* This function violates the strict use of async signal safe functions. :(
* It calls opendir(), readdir() and closedir(). Of these, the one most
* likely to ever cause a problem is opendir() as it performs an internal
* malloc(). Practically this should not be a problem. The Java VM makes the
* same calls between fork and exec in its own UNIXProcess_md.c implementation.
*
* readdir_r() is not used because it provides no benefit. It is typically
* implemented as readdir() followed by memcpy(). See also:
* http://womble.decadent.org.uk/readdir_r-advisory.html
*/
static void
_close_open_fds_maybe_unsafe(long start_fd, int* fds_to_keep, ssize_t fds_to_keep_len)
{
DIR *proc_fd_dir;
#ifndef HAVE_DIRFD
while (_is_fd_in_sorted_fd_sequence(start_fd, fds_to_keep, fds_to_keep_len)) {
++start_fd;
}
/* Close our lowest fd before we call opendir so that it is likely to
* reuse that fd otherwise we might close opendir's file descriptor in
* our loop. This trick assumes that fd's are allocated on a lowest
* available basis. */
close(start_fd);
++start_fd;
#endif
#if defined(__FreeBSD__)
if (!_is_fdescfs_mounted_on_dev_fd())
proc_fd_dir = NULL;
else
#endif
proc_fd_dir = opendir(FD_DIR);
if (!proc_fd_dir) {
/* No way to get a list of open fds. */
_close_fds_by_brute_force(start_fd, fds_to_keep, fds_to_keep_len);
} else {
struct dirent *dir_entry;
#ifdef HAVE_DIRFD
int fd_used_by_opendir = dirfd(proc_fd_dir);
#else
int fd_used_by_opendir = start_fd - 1;
#endif
errno = 0;
while ((dir_entry = readdir(proc_fd_dir))) {
int fd;
if ((fd = _pos_int_from_ascii(dir_entry->d_name)) < 0)
continue; /* Not a number. */
if (fd != fd_used_by_opendir && fd >= start_fd &&
!_is_fd_in_sorted_fd_sequence(fd, fds_to_keep, fds_to_keep_len)) {
close(fd);
}
errno = 0;
}
if (errno) {
/* readdir error, revert behavior. Highly Unlikely. */
_close_fds_by_brute_force(start_fd, fds_to_keep, fds_to_keep_len);
}
closedir(proc_fd_dir);
}
}
#define _close_open_fds _close_open_fds_maybe_unsafe
/*
* This function is code executed in the child process immediately after fork
* to set things up and call exec().
*
* All of the code in this function must only use async-signal-safe functions,
* listed at `man 7 signal` or
* http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html.
*
* This restriction is documented at
* http://www.opengroup.org/onlinepubs/009695399/functions/fork.html.
*/
static void
child_exec(char *const exec_array[],
char *const argv[],
char *const envp[],
const char *cwd,
int p2cread, int p2cwrite,
int c2pread, int c2pwrite,
int errread, int errwrite,
int errpipe_read, int errpipe_write,
int close_fds, int restore_signals,
int call_setsid,
int *fds_to_keep,
ssize_t fds_to_keep_len)
{
int i, saved_errno;
/* Buffer large enough to hold a hex integer. We can't malloc. */
char hex_errno[sizeof(saved_errno)*2+1];
const char *err_msg = "noexec";
if (make_inheritable(fds_to_keep, fds_to_keep_len, errpipe_write) < 0)
goto error;
/* Close parent's pipe ends. */
if (p2cwrite != -1)
POSIX_CALL(close(p2cwrite));
if (c2pread != -1)
POSIX_CALL(close(c2pread));
if (errread != -1)
POSIX_CALL(close(errread));
POSIX_CALL(close(errpipe_read));
/* When duping fds, if there arises a situation where one of the fds is
either 0, 1 or 2, it is possible that it is overwritten (#12607). */
if (c2pwrite == 0) {
POSIX_CALL(c2pwrite = dup(c2pwrite));
/* issue32270 */
if (_Py_set_inheritable_async_safe(c2pwrite, 0, NULL) < 0) {
goto error;
}
}
while (errwrite == 0 || errwrite == 1) {
POSIX_CALL(errwrite = dup(errwrite));
/* issue32270 */
if (_Py_set_inheritable_async_safe(errwrite, 0, NULL) < 0) {
goto error;
}
}
/* Dup fds for child.
dup2() removes the CLOEXEC flag but we must do it ourselves if dup2()
would be a no-op (issue #10806). */
if (p2cread == 0) {
if (_Py_set_inheritable_async_safe(p2cread, 1, NULL) < 0)
goto error;
}
else if (p2cread != -1)
POSIX_CALL(dup2(p2cread, 0)); /* stdin */
if (c2pwrite == 1) {
if (_Py_set_inheritable_async_safe(c2pwrite, 1, NULL) < 0)
goto error;
}
else if (c2pwrite != -1)
POSIX_CALL(dup2(c2pwrite, 1)); /* stdout */
if (errwrite == 2) {
if (_Py_set_inheritable_async_safe(errwrite, 1, NULL) < 0)
goto error;
}
else if (errwrite != -1)
POSIX_CALL(dup2(errwrite, 2)); /* stderr */
/* We no longer manually close p2cread, c2pwrite, and errwrite here as
* _close_open_fds takes care when it is not already non-inheritable. */
if (cwd) {
if (chdir(cwd) == -1) {
err_msg = "noexec:chdir";
goto error;
}
}
if (restore_signals)
_Py_RestoreSignals();
#ifdef HAVE_SETSID
if (call_setsid)
POSIX_CALL(setsid());
#endif
err_msg = "";
/* close FDs after executing preexec_fn, which might open FDs */
if (close_fds) {
/* TODO HP-UX could use pstat_getproc() if anyone cares about it. */
_close_open_fds(3, fds_to_keep, fds_to_keep_len);
}
/* This loop matches the Lib/os.py _execvpe()'s PATH search when */
/* given the executable_list generated by Lib/subprocess.py. */
saved_errno = 0;
for (i = 0; exec_array[i] != NULL; ++i) {
const char *executable = exec_array[i];
if (envp) {
execve(executable, argv, envp);
} else {
execv(executable, argv);
}
if (errno != ENOENT && errno != ENOTDIR && saved_errno == 0) {
saved_errno = errno;
}
}
/* Report the first exec error, not the last. */
if (saved_errno)
errno = saved_errno;
error:
saved_errno = errno;
/* Report the posix error to our parent process. */
/* We ignore all write() return values as the total size of our writes is
less than PIPEBUF and we cannot do anything about an error anyways.
Use _Py_write_noraise() to retry write() if it is interrupted by a
signal (fails with EINTR). */
if (saved_errno) {
char *cur;
_Py_write_noraise(errpipe_write, "OSError:", 8);
cur = hex_errno + sizeof(hex_errno);
while (saved_errno != 0 && cur != hex_errno) {
*--cur = Py_hexdigits[saved_errno % 16];
saved_errno /= 16;
}
_Py_write_noraise(errpipe_write, cur, hex_errno + sizeof(hex_errno) - cur);
_Py_write_noraise(errpipe_write, ":", 1);
/* We can't call strerror(saved_errno). It is not async signal safe.
* The parent process will look the error message up. */
} else {
_Py_write_noraise(errpipe_write, "SubprocessError:0:", 18);
}
_Py_write_noraise(errpipe_write, err_msg, strlen(err_msg));
}
/*
* data, offsets, offsetsLen, argsPos, envPos, cwdPos - see comment in NFiPosixSupport.forkExec()
* stdinRdFd - read end of the pipe for the child's stdin - closed by parent, dupped to fd 0 by child
* stdinWrFd - write end of the pipe for the child's stdin - closed by child, written to by parent
* stdoutRdFd - read end of the pipe for the child's stdout - closed by child, read from by parent
* stdoutWrFd - write end of the pipe for the child's stdout - closed by parent, dupped to fd 1 by child
* stderrRdFd - read end of the pipe for the child's stderr - closed by child, read from by parent
* stderrWrFd - write end of the pipe for the child's stderr - closed by parent, dupped to fd 2 by child
* errPipeRdFd - read end of the pipe for reporting exec errors to parent - closed by child, read from by parent
* errPipeWrFd - write end of the pipe for reporting exec errors to parent - closed by parent, written to by child if an error occurs
- the child process closes this fd in any case:
- on success, no data is written and the fd is closed by exec() because its O_CLOEXEC is set
- on error, the error code is written and the fd is closed because the child process exits
* closeFds - nonzero if all fds except 0, 1, 2 and those in fdsToKeep should be explicitly closed by the child
* (if nonzero, then errPipeWrFd must be in fdsToKeep)
* restoreSignals - currently not used
* callSetsid - if nonzero, the child calls setsid before exec()
* fdsToKeep, fdsToKeepLen - a sorted list of fds to keep open (the child clears their O_CLOEXEC)
*/
int32_t fork_exec(
char *data, int64_t *offsets, int32_t offsetsLen, int32_t argsPos, int32_t envPos, int32_t cwdPos,
int32_t stdinRdFd, int32_t stdinWrFd,
int32_t stdoutRdFd, int32_t stdoutWrFd,
int32_t stderrRdFd, int32_t stderrWrFd,
int32_t errPipeRdFd, int32_t errPipeWrFd,
int32_t closeFds,
int32_t restoreSignals,
int32_t callSetsid,
int32_t *fdsToKeep, int64_t fdsToKeepLen
) {
// We reuse the memory allocated for offsets to avoid the need to allocate and reliably free another array
char **strings = (char **) offsets;
for (int32_t i = 0; i < offsetsLen; ++i) {
strings[i] = offsets[i] == -1 ? NULL : data + offsets[i];
}
char **exec_list = strings;
char **argv = strings + argsPos;
char **envp = envPos == -1 ? NULL : strings + envPos;
char *cwd = cwdPos == -1 ? NULL : strings[cwdPos];
pid_t pid = fork();
if (pid == 0) {
child_exec(
exec_list, argv, envp, cwd,
stdinRdFd, stdinWrFd,
stdoutRdFd, stdoutWrFd,
stderrRdFd, stderrWrFd,
errPipeRdFd, errPipeWrFd,
closeFds,
restoreSignals,
callSetsid,
fdsToKeep, fdsToKeepLen
);
_exit(255);
}
return pid;
}