Skip to content

Commit ca08e17

Browse files
authored
Fix some async hangs in the functional tests (#5007)
For #4992 Two root causes tree-kill spawns a process it never cleans up after on Windows liveshare guest shutdown was not using a promise based method, so it would shutdown and then immediately exit. Meaning the test would exit before the shutdown finished. Also fixed an intermittent problem with Jupyter never connecting to us. Forcing two retries now instead.
1 parent acad9cd commit ca08e17

13 files changed

Lines changed: 214 additions & 78 deletions

File tree

build/.mocha.functional.opts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
--ui=tdd
55
--recursive
66
--colors
7-
--timeout=120000
87
--exit
8+
--timeout=120000
99
--reporter mocha-multi-reporters
1010
--reporter-options configFile=build/.mocha-multi-reporters.config

build/ci/vscode-python-ci.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,41 +213,47 @@ jobs:
213213
NeedsPythonFunctionalReqs: true
214214
# This tells the functional tests to not mock out Jupyter...
215215
VSCODE_PYTHON_ROLLING: true
216+
VSC_PYTHON_FORCE_LOGGING: true
216217
'Linux-Py3.7 Functional':
217218
PythonVersion: '3.7'
218219
VMImageName: 'ubuntu-16.04'
219220
TestsToRun: 'testfunctional'
220221
NeedsPythonTestReqs: true
221222
NeedsPythonFunctionalReqs: true
222223
VSCODE_PYTHON_ROLLING: true
224+
VSC_PYTHON_FORCE_LOGGING: true
223225
'Mac-Py3.7 Functional':
224226
PythonVersion: '3.7'
225227
VMImageName: 'macos-10.13'
226228
TestsToRun: 'testfunctional'
227229
NeedsPythonTestReqs: true
228230
NeedsPythonFunctionalReqs: true
229231
VSCODE_PYTHON_ROLLING: true
232+
VSC_PYTHON_FORCE_LOGGING: true
230233
'Windows-Py3.6 Functional':
231234
PythonVersion: '3.6'
232235
VMImageName: 'vs2017-win2016'
233236
TestsToRun: 'testfunctional'
234237
NeedsPythonTestReqs: true
235238
NeedsPythonFunctionalReqs: true
236239
VSCODE_PYTHON_ROLLING: true
240+
VSC_PYTHON_FORCE_LOGGING: true
237241
'Linux-Py3.6 Functional':
238242
PythonVersion: '3.6'
239243
VMImageName: 'ubuntu-16.04'
240244
TestsToRun: 'testfunctional'
241245
NeedsPythonTestReqs: true
242246
NeedsPythonFunctionalReqs: true
243247
VSCODE_PYTHON_ROLLING: true
248+
VSC_PYTHON_FORCE_LOGGING: true
244249
'Mac-Py3.6 Functional':
245250
PythonVersion: '3.6'
246251
VMImageName: 'macos-10.13'
247252
TestsToRun: 'testfunctional'
248253
NeedsPythonTestReqs: true
249254
NeedsPythonFunctionalReqs: true
250255
VSCODE_PYTHON_ROLLING: true
256+
VSC_PYTHON_FORCE_LOGGING: true
251257

252258
pool:
253259
vmImage: $(VMImageName)

news/2 Fixes/4992.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
Force mocha to exit so that hanging promises (which might be from Jupyter) don't stop tests from running.
1+
Fix hangs in functional tests.

package-lock.json

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2434,6 +2434,7 @@
24342434
"webpack-fix-default-import-plugin": "^1.0.3",
24352435
"webpack-merge": "^4.1.4",
24362436
"webpack-node-externals": "^1.7.2",
2437+
"why-is-node-running": "^2.0.3",
24372438
"yargs": "^12.0.2"
24382439
},
24392440
"__metadata": {

src/client/common/process/proc.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
// Copyright (c) Microsoft Corporation. All rights reserved.
22
// Licensed under the MIT License.
3-
import { ChildProcess, exec, spawn } from 'child_process';
3+
import { ChildProcess, exec, execSync, spawn } from 'child_process';
44
import { Observable } from 'rxjs/Observable';
5-
import * as tk from 'tree-kill';
5+
66
import { IDisposable } from '../types';
77
import { createDeferred } from '../utils/async';
88
import { EnvironmentVariables } from '../variables/types';
@@ -32,7 +32,12 @@ export class ProcessService implements IProcessService, IDisposable {
3232
}
3333
public static kill(pid: number): void {
3434
try {
35-
tk(pid);
35+
if (process.platform === 'win32') {
36+
// Windows doesn't support SIGTERM, so execute taskkill to kill the process
37+
execSync(`taskkill /pid ${pid} /T /F`);
38+
} else {
39+
process.kill(pid);
40+
}
3641
} catch {
3742
// Ignore.
3843
}
@@ -103,7 +108,10 @@ export class ProcessService implements IProcessService, IDisposable {
103108
out: output,
104109
dispose: () => {
105110
if (proc && !proc.killed) {
106-
tk(proc.pid);
111+
ProcessService.kill(proc.pid);
112+
}
113+
if (proc) {
114+
proc.unref();
107115
}
108116
}
109117
};

src/client/datascience/jupyter/jupyterExecution.ts

Lines changed: 96 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import {
3535
} from '../types';
3636
import { JupyterConnection, JupyterServerInfo } from './jupyterConnection';
3737
import { JupyterKernelSpec } from './jupyterKernelSpec';
38+
import { JupyterWaitForIdleError } from './jupyterWaitForIdleError';
3839

3940
enum ModuleExistsResult {
4041
NotFound,
@@ -124,66 +125,65 @@ export class JupyterExecutionBase implements IJupyterExecution {
124125
return this.isNotebookSupported(cancelToken);
125126
}
126127

128+
//tslint:disable:cyclomatic-complexity
127129
public connectToNotebookServer(options?: INotebookServerOptions, cancelToken?: CancellationToken): Promise<INotebookServer | undefined> {
128130
// Return nothing if we cancel
129131
return Cancellation.race(async () => {
130-
let connection: IConnection;
131-
let kernelSpec: IJupyterKernelSpec | undefined;
132-
133-
// If our uri is undefined or if it's set to local launch we need to launch a server locally
134-
if (!options || !options.uri) {
135-
const launchResults = await this.startNotebookServer(options && options.useDefaultConfig ? true : false, cancelToken);
136-
if (launchResults) {
137-
connection = launchResults.connection;
138-
kernelSpec = launchResults.kernelSpec;
139-
} else {
140-
// Throw a cancellation error if we were canceled.
141-
Cancellation.throwIfCanceled(cancelToken);
142-
143-
// Otherwise we can't connect
144-
throw new Error(localize.DataScience.jupyterNotebookFailure().format(''));
145-
}
146-
} else {
147-
// If we have a URI spec up a connection info for it
148-
connection = this.createRemoteConnectionInfo(options.uri);
149-
kernelSpec = undefined;
150-
}
151-
152-
try {
153-
// If we don't have a kernel spec yet, check using our current connection
154-
if (!kernelSpec) {
155-
kernelSpec = await this.getMatchingKernelSpec(connection, cancelToken);
156-
}
157-
158-
// If still not found, log an error (this seems possible for some people, so use the default)
159-
if (!kernelSpec) {
160-
this.logger.logError(localize.DataScience.jupyterKernelSpecNotFound());
161-
}
162-
163-
// Try to connect to our jupyter process
164-
const result = this.serviceContainer.get<INotebookServer>(INotebookServer);
165-
const info = await this.interpreterService.getActiveInterpreter();
166-
// Populate the launch info that we are starting our server with
167-
const launchInfo: INotebookServerLaunchInfo = {
168-
connectionInfo: connection,
169-
currentInterpreter: info,
170-
kernelSpec: kernelSpec,
171-
usingDarkTheme: options && options.usingDarkTheme ? options.usingDarkTheme : false,
172-
workingDir: options ? options.workingDir : undefined,
173-
uri: options ? options.uri : undefined,
174-
purpose: options ? options.purpose : uuid()
175-
};
176-
await result.connect(launchInfo, cancelToken);
177-
sendTelemetryEvent(launchInfo.uri ? Telemetry.ConnectRemoteJupyter : Telemetry.ConnectLocalJupyter);
178-
return result;
179-
} catch (err) {
180-
// Something else went wrong
181-
if (options && options.uri) {
182-
sendTelemetryEvent(Telemetry.ConnectRemoteFailedJupyter);
183-
throw new Error(localize.DataScience.jupyterNotebookRemoteConnectFailed().format(connection.baseUrl, err));
184-
} else {
185-
sendTelemetryEvent(Telemetry.ConnectFailedJupyter);
186-
throw new Error(localize.DataScience.jupyterNotebookConnectFailed().format(connection.baseUrl, err));
132+
let result: INotebookServer | undefined;
133+
let startInfo: {connection: IConnection; kernelSpec: IJupyterKernelSpec | undefined} | undefined;
134+
traceInfo(`Connecting to ${options ? options.purpose : 'unknown type of'} server`);
135+
const interpreter = await this.interpreterService.getActiveInterpreter();
136+
137+
// Try to connect to our jupyter process. Give it at most 2 tries.
138+
let tryCount = 0;
139+
while (tryCount < 2) {
140+
try {
141+
// Start or connect to the process
142+
startInfo = await this.startOrConnect(options, cancelToken);
143+
144+
// Create a server that we will then attempt to connect to.
145+
result = this.serviceContainer.get<INotebookServer>(INotebookServer);
146+
147+
// Populate the launch info that we are starting our server with
148+
const launchInfo: INotebookServerLaunchInfo = {
149+
connectionInfo: startInfo.connection,
150+
currentInterpreter: interpreter,
151+
kernelSpec: startInfo.kernelSpec,
152+
usingDarkTheme: options && options.usingDarkTheme ? options.usingDarkTheme : false,
153+
workingDir: options ? options.workingDir : undefined,
154+
uri: options ? options.uri : undefined,
155+
purpose: options ? options.purpose : uuid()
156+
};
157+
158+
traceInfo(`Connecting to process for ${options ? options.purpose : 'unknown type of'} server`);
159+
await result.connect(launchInfo, cancelToken);
160+
traceInfo(`Connection complete for ${options ? options.purpose : 'unknown type of'} server`);
161+
162+
sendTelemetryEvent(launchInfo.uri ? Telemetry.ConnectRemoteJupyter : Telemetry.ConnectLocalJupyter);
163+
return result;
164+
} catch (err) {
165+
// Cleanup after ourselves. server may be running partially.
166+
if (result) {
167+
traceInfo('Killing server because of error');
168+
await result.dispose();
169+
}
170+
if (err instanceof JupyterWaitForIdleError && tryCount < 2) {
171+
// Special case. This sometimes happens where jupyter doesn't ever connect. Cleanup after
172+
// ourselves and propagate the failure outwards.
173+
traceInfo('Retry because of wait for idle problem.');
174+
tryCount += 1;
175+
} else if (startInfo) {
176+
// Something else went wrong
177+
if (options && options.uri) {
178+
sendTelemetryEvent(Telemetry.ConnectRemoteFailedJupyter);
179+
throw new Error(localize.DataScience.jupyterNotebookRemoteConnectFailed().format(startInfo.connection.baseUrl, err));
180+
} else {
181+
sendTelemetryEvent(Telemetry.ConnectFailedJupyter);
182+
throw new Error(localize.DataScience.jupyterNotebookConnectFailed().format(startInfo.connection.baseUrl, err));
183+
}
184+
} else {
185+
throw err;
186+
}
187187
}
188188
}
189189
}, cancelToken);
@@ -258,6 +258,45 @@ export class JupyterExecutionBase implements IJupyterExecution {
258258
}
259259
}
260260

261+
private async startOrConnect(options?: INotebookServerOptions, cancelToken?: CancellationToken) : Promise<{connection: IConnection; kernelSpec: IJupyterKernelSpec | undefined}> {
262+
let connection: IConnection | undefined;
263+
let kernelSpec: IJupyterKernelSpec | undefined;
264+
265+
// If our uri is undefined or if it's set to local launch we need to launch a server locally
266+
if (!options || !options.uri) {
267+
traceInfo(`Launching ${options ? options.purpose : 'unknown type of'} server`);
268+
const launchResults = await this.startNotebookServer(options && options.useDefaultConfig ? true : false, cancelToken);
269+
if (launchResults) {
270+
connection = launchResults.connection;
271+
kernelSpec = launchResults.kernelSpec;
272+
} else {
273+
// Throw a cancellation error if we were canceled.
274+
Cancellation.throwIfCanceled(cancelToken);
275+
276+
// Otherwise we can't connect
277+
throw new Error(localize.DataScience.jupyterNotebookFailure().format(''));
278+
}
279+
} else {
280+
// If we have a URI spec up a connection info for it
281+
connection = this.createRemoteConnectionInfo(options.uri);
282+
kernelSpec = undefined;
283+
}
284+
285+
// If we don't have a kernel spec yet, check using our current connection
286+
if (!kernelSpec && connection.localLaunch) {
287+
traceInfo(`Getting kernel specs for ${options ? options.purpose : 'unknown type of'} server`);
288+
kernelSpec = await this.getMatchingKernelSpec(connection, cancelToken);
289+
}
290+
291+
// If still not found, log an error (this seems possible for some people, so use the default)
292+
if (!kernelSpec && connection.localLaunch) {
293+
this.logger.logError(localize.DataScience.jupyterKernelSpecNotFound());
294+
}
295+
296+
// Return the data we found.
297+
return {connection, kernelSpec};
298+
}
299+
261300
private createRemoteConnectionInfo = (uri: string): IConnection => {
262301
let url: URL;
263302
try {

src/client/datascience/jupyter/jupyterServer.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import { CancellationToken } from 'vscode-jsonrpc';
1515

1616
import { ILiveShareApi } from '../../common/application/types';
1717
import { CancellationError } from '../../common/cancellation';
18-
import { traceWarning } from '../../common/logger';
18+
import { traceInfo, traceWarning } from '../../common/logger';
1919
import { IAsyncDisposableRegistry, IConfigurationService, IDisposableRegistry, ILogger } from '../../common/types';
2020
import { createDeferred, Deferred, sleep } from '../../common/utils/async';
2121
import * as localize from '../../common/utils/localize';
@@ -134,7 +134,7 @@ export class JupyterServerBase implements INotebookServer {
134134
}
135135

136136
public async connect(launchInfo: INotebookServerLaunchInfo, cancelToken?: CancellationToken): Promise<void> {
137-
this.logger.logInformation(`Connecting server ${this.id}`);
137+
traceInfo(`Connecting server ${this.id}`);
138138

139139
// Save our launch info
140140
this.launchInfo = launchInfo;
@@ -154,15 +154,21 @@ export class JupyterServerBase implements INotebookServer {
154154
// Start our session
155155
this.session = await this.sessionManager.startNew(launchInfo.connectionInfo, launchInfo.kernelSpec, cancelToken);
156156

157+
traceInfo(`Started session ${this.id}`);
158+
157159
if (this.session) {
158160
// Setup our start time. We reject anything that comes in before this time during execute
159161
this.sessionStartTime = Date.now();
160162

161163
// Wait for it to be ready
164+
traceInfo(`Waiting for idle ${this.id}`);
162165
await this.session.waitForIdle();
163166

167+
traceInfo(`Performing initial setup ${this.id}`);
164168
// Run our initial setup and plot magics
165169
await this.initialNotebookSetup(cancelToken);
170+
171+
traceInfo(`Finished connecting ${this.id}`);
166172
}
167173
}
168174

0 commit comments

Comments
 (0)