|
32 | 32 | import com.google.devtools.build.lib.shell.Command; |
33 | 33 | import com.google.devtools.build.lib.shell.CommandException; |
34 | 34 | import com.google.devtools.build.lib.shell.CommandResult; |
| 35 | +import com.google.devtools.build.lib.shell.FutureCommandResult; |
35 | 36 | import com.google.devtools.build.lib.shell.TimeoutKillableObserver; |
36 | 37 | import com.google.devtools.build.lib.vfs.FileSystemUtils; |
37 | 38 | import com.google.devtools.build.lib.vfs.Path; |
|
49 | 50 | import io.grpc.StatusException; |
50 | 51 | import io.grpc.protobuf.StatusProto; |
51 | 52 | import io.grpc.stub.StreamObserver; |
| 53 | +import java.io.ByteArrayInputStream; |
52 | 54 | import java.io.ByteArrayOutputStream; |
53 | 55 | import java.io.File; |
54 | 56 | import java.io.IOException; |
|
71 | 73 | final class ExecutionServer extends ExecutionImplBase { |
72 | 74 | private static final Logger logger = Logger.getLogger(ExecutionServer.class.getName()); |
73 | 75 |
|
| 76 | + private final Object lock = new Object(); |
| 77 | + |
74 | 78 | // The name of the container image entry in the Platform proto |
75 | 79 | // (see third_party/googleapis/devtools/remoteexecution/*/remote_execution.proto and |
76 | 80 | // experimental_remote_platform_override in |
@@ -200,41 +204,41 @@ private ActionResult execute(Action action, Path execRoot) |
200 | 204 | execRoot.getPathString()); |
201 | 205 | long startTime = System.currentTimeMillis(); |
202 | 206 | CommandResult cmdResult = null; |
203 | | - // Linux does not provide a safe API for a multi-threaded program to fork a subprocess. Consider |
204 | | - // the case where two threads both write an executable file and then try to execute it. It can |
205 | | - // happen that the first thread writes its executable file, with the file descriptor still |
206 | | - // being open when the second thread forks, with the fork inheriting a copy of the file |
207 | | - // descriptor. Then the first thread closes the original file descriptor, and proceeds to |
208 | | - // execute the file. At that point Linux sees an open file descriptor to the file and returns |
209 | | - // ETXTBSY (Text file busy) as an error. This race is inherent in the fork / exec duality, with |
210 | | - // fork always inheriting a copy of the file descriptor table; if there was a way to fork |
211 | | - // without copying the entire file descriptor table (e.g., only copy specific entries), we could |
212 | | - // avoid this race. |
213 | | - // |
214 | | - // I was able to reproduce this problem reliably by running significantly more threads than |
215 | | - // there are CPU cores on my workstation - the more threads the more likely it happens. |
216 | | - // |
217 | | - // As a workaround, we retry up to two times before we let the exception propagate. |
218 | | - int attempt = 0; |
219 | | - while (true) { |
| 207 | + |
| 208 | + FutureCommandResult futureCmdResult = null; |
| 209 | + synchronized (lock) { |
| 210 | + // Linux does not provide a safe API for a multi-threaded program to fork a subprocess. |
| 211 | + // Consider the case where two threads both write an executable file and then try to execute |
| 212 | + // it. It can happen that the first thread writes its executable file, with the file |
| 213 | + // descriptor still being open when the second thread forks, with the fork inheriting a copy |
| 214 | + // of the file descriptor. Then the first thread closes the original file descriptor, and |
| 215 | + // proceeds to execute the file. At that point Linux sees an open file descriptor to the file |
| 216 | + // and returns ETXTBSY (Text file busy) as an error. This race is inherent in the fork / exec |
| 217 | + // duality, with fork always inheriting a copy of the file descriptor table; if there was a |
| 218 | + // way to fork without copying the entire file descriptor table (e.g., only copy specific |
| 219 | + // entries), we could avoid this race. |
| 220 | + // |
| 221 | + // I was able to reproduce this problem reliably by running significantly more threads than |
| 222 | + // there are CPU cores on my workstation - the more threads the more likely it happens. |
| 223 | + // |
| 224 | + // As a workaround, we put a synchronized block around the fork. |
| 225 | + try { |
| 226 | + futureCmdResult = cmd |
| 227 | + .executeAsynchronously(new ByteArrayInputStream(new byte[0]), Command.NO_OBSERVER, |
| 228 | + stdoutBuffer, stderrBuffer, true, false); |
| 229 | + } catch (CommandException e) { |
| 230 | + Throwables.throwIfInstanceOf(e.getCause(), IOException.class); |
| 231 | + } |
| 232 | + } |
| 233 | + |
| 234 | + if (futureCmdResult != null) { |
220 | 235 | try { |
221 | | - cmdResult = |
222 | | - cmd.execute(Command.NO_INPUT, Command.NO_OBSERVER, stdoutBuffer, stderrBuffer, true); |
| 236 | + cmdResult = futureCmdResult.get(); |
223 | 237 | } catch (AbnormalTerminationException e) { |
224 | 238 | cmdResult = e.getResult(); |
225 | | - } catch (CommandException e) { |
226 | | - // As of this writing, the cause can only be an IOException from the underlying library. |
227 | | - IOException cause = (IOException) e.getCause(); |
228 | | - if ((attempt++ < 3) && cause.getMessage().endsWith("Text file busy")) { |
229 | | - // We wait a bit to give the other forks some time to close their open file descriptors. |
230 | | - Thread.sleep(10); |
231 | | - continue; |
232 | | - } else { |
233 | | - throw cause; |
234 | | - } |
235 | 239 | } |
236 | | - break; |
237 | 240 | } |
| 241 | + |
238 | 242 | long timeoutMillis = |
239 | 243 | action.hasTimeout() |
240 | 244 | ? Durations.toMillis(action.getTimeout()) |
|
0 commit comments