Skip to content

Commit ee83e54

Browse files
authored
Simplify import telemetry (microsoft#5246)
1 parent 4c2adb3 commit ee83e54

7 files changed

Lines changed: 100 additions & 121 deletions

File tree

.vscode/extensions.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// See https://go.microsoft.com/fwlink/?LinkId=827846
33
// for the documentation about the extensions.json format
44
"recommendations": [
5-
"eg2.tslint",
5+
"ms-vscode.vscode-typescript-tslint-plugin",
66
"editorconfig.editorconfig"
77
]
88
}

.vscode/launch.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,8 @@
131131
"--ui=tdd",
132132
"--recursive",
133133
"--colors",
134-
"--timeout=300000",
135-
"--grep="
134+
//"--grep", "<suite name>",
135+
"--timeout=300000"
136136
],
137137
"outFiles": [
138138
"${workspaceFolder}/out/**/*.js"
@@ -152,8 +152,8 @@
152152
"--ui=tdd",
153153
"--recursive",
154154
"--colors",
155-
"--timeout=300000",
156-
"--grep="
155+
//"--grep", "<suite name>",
156+
"--timeout=300000"
157157
],
158158
"outFiles": [
159159
"${workspaceFolder}/out/**/*.js"

news/3 Code Health/4852.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
Hash imports so we can get every import and not just known ones. Should happen at most once every 5 seconds.
1+
Hash imports of top-level packages to see what users need supported.

src/client/telemetry/constants.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,7 @@ export enum EventName {
7272
SELECT_LINTER = 'LINTING.SELECT',
7373

7474
LINTER_NOT_INSTALLED_PROMPT = 'LINTER_NOT_INSTALLED_PROMPT',
75-
KNOWN_IMPORT_FROM_FILE = 'KNOWN_IMPORT_FROM_FILE',
76-
KNOWN_IMPORT_FROM_EXECUTION = 'KNOWN_IMPORT_FROM_EXECUTION'
75+
HASHED_PACKAGE_NAME = 'HASHED_PACKAGE_NAME'
7776
}
7877

7978
export enum PlatformErrors {

src/client/telemetry/importTracker.ts

Lines changed: 44 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,27 @@ import { sendTelemetryEvent } from '.';
1111
import { noop } from '../../test/core';
1212
import { IDocumentManager } from '../common/application/types';
1313
import { isTestExecution } from '../common/constants';
14-
import { IHistoryProvider } from '../datascience/types';
15-
import { ICodeExecutionManager } from '../terminals/types';
1614
import { EventName } from './constants';
1715
import { IImportTracker } from './types';
1816

19-
const ImportRegEx = /^(?!['"#]).*from\s+([a-zA-Z0-9_\.]+)\s+import.*(?!['"])|^(?!['"#]).*import\s+([a-zA-Z0-9_\., ]+).*(?!['"])/;
17+
/*
18+
Python has a fairly rich import statement, but luckily we only care about top-level (public) packages.
19+
That means we can ignore:
20+
21+
- Relative imports
22+
- `as` rebindings
23+
- The`fromlist`
24+
25+
We can also ignore multi-line/parenthesized imports for simplicity since we don't' need 100% accuracy,
26+
just enough to be able to tell what packages user's rely on to make sure we are covering our bases
27+
in terms of support.
28+
29+
We can rely on the fact that the use of the `from` and `import` keywords from the start of a line are
30+
only usable for imports (`from` can also be used when raising an exception, but `raise` would be the
31+
first keyword on a line in that instance). We also get to rely on the fact that we only care about
32+
the top-level package, keeping the regex extremely greedy and simple for performance.
33+
*/
34+
const ImportRegEx = /^\s*(from\s+(?<fromImport>\w+)|import\s+(?<importImport>(\w+(?:\s*,\s*)?)+))/;
2035
const MAX_DOCUMENT_LINES = 1000;
2136

2237
// Capture isTestExecution on module load so that a test can turn it off and still
@@ -32,25 +47,14 @@ export class ImportTracker implements IImportTracker {
3247
private hashFn = require('hash.js').sha256;
3348

3449
constructor(
35-
@inject(IDocumentManager) private documentManager: IDocumentManager,
36-
@inject(IHistoryProvider) private historyProvider: IHistoryProvider,
37-
@inject(ICodeExecutionManager) private executionManager: ICodeExecutionManager
50+
@inject(IDocumentManager) private documentManager: IDocumentManager
3851
) {
39-
// Sign up for document open/save events so we can track known imports
4052
this.documentManager.onDidOpenTextDocument((t) => this.onOpenedOrSavedDocument(t));
4153
this.documentManager.onDidSaveTextDocument((t) => this.onOpenedOrSavedDocument(t));
42-
43-
// Sign up for history execution events (user can input code here too)
44-
this.historyProvider.onExecutedCode(c => this.onExecutedCode(c));
45-
46-
// Sign up for terminal execution events (user can send code to the terminal)
47-
// However we won't get any text typed directly into the terminal. Not part of the VS code API
48-
// Could potentially hook stdin? Not sure that's possible.
49-
this.executionManager.onExecutedCode(c => this.onExecutedCode(c));
5054
}
5155

5256
public async activate(): Promise<void> {
53-
// Act like all of our open documents just opened. Our timeout will make sure this is delayed
57+
// Act like all of our open documents just opened; our timeout will make sure this is delayed.
5458
this.documentManager.textDocuments.forEach(d => this.onOpenedOrSavedDocument(d));
5559
}
5660

@@ -66,9 +70,8 @@ export class ImportTracker implements IImportTracker {
6670
}
6771

6872
private onOpenedOrSavedDocument(document: TextDocument) {
69-
// Make sure this is a python file.
73+
// Make sure this is a Python file.
7074
if (path.extname(document.fileName) === '.py') {
71-
// Parse the contents of the document, looking for import matches on each line
7275
this.scheduleDocument(document);
7376
}
7477
}
@@ -94,48 +97,39 @@ export class ImportTracker implements IImportTracker {
9497
private checkDocument(document: TextDocument) {
9598
this.pendingDocs.delete(document.fileName);
9699
const lines = this.getDocumentLines(document);
97-
this.lookForImports(lines, EventName.KNOWN_IMPORT_FROM_FILE);
100+
this.lookForImports(lines);
98101
}
99102

100-
private onExecutedCode(code: string) {
101-
const lines = code.splitLines({ trim: true, removeEmptyEntries: true });
102-
this.lookForImports(lines, EventName.KNOWN_IMPORT_FROM_EXECUTION);
103+
private sendTelemetry(packageName: string) {
104+
// No need to send duplicate telemetry or waste CPU cycles on an unneeded hash.
105+
if (this.sentMatches.has(packageName)) {
106+
return;
107+
}
108+
this.sentMatches.add(packageName);
109+
// Hash the package name so that we will never accidentally see a
110+
// user's private package name.
111+
const hash = this.hashFn().update(packageName).digest('hex');
112+
sendTelemetryEvent(EventName.HASHED_PACKAGE_NAME, undefined, {hashedName: hash});
103113
}
104114

105-
private lookForImports(lines: (string | undefined)[], eventName: string) {
115+
private lookForImports(lines: (string | undefined)[]) {
106116
try {
107-
// Use a regex to parse each line, looking for imports
108-
const matches: Set<string> = new Set<string>();
109117
for (const s of lines) {
110118
const match = s ? ImportRegEx.exec(s) : null;
111-
if (match && match.length > 2) {
112-
// Could be a from or a straight import. from is the first entry.
113-
const actual = match[1] ? match[1] : match[2];
114-
115-
// Use just the bits to the left of ' as '
116-
const left = actual.split(' as ')[0];
117-
118-
// Now split this based on, and chop off all .
119-
const baseNames = left.split(',').map(l => l.split('.')[0].trim());
120-
baseNames.forEach(l => {
121-
// Hash this value and save this in our import
122-
const hash = this.hashFn().update(l).digest('hex');
123-
if (!this.sentMatches.has(hash)) {
124-
matches.add(hash);
125-
}
126-
});
119+
if (match !== null && match.groups !== undefined) {
120+
if (match.groups.fromImport !== undefined) {
121+
// `from pkg ...`
122+
this.sendTelemetry(match.groups.fromImport);
123+
} else if (match.groups.importImport !== undefined) {
124+
// `import pkg1, pkg2, ...`
125+
const packageNames = match.groups.importImport.split(',').map(rawPackageName => rawPackageName.trim());
126+
// Can't pass in `this.sendTelemetry` directly as that rebinds `this`.
127+
packageNames.forEach(p => this.sendTelemetry(p));
128+
}
127129
}
128130
}
129-
130-
// For each unique match, emit a new telemetry event.
131-
matches.forEach(s => {
132-
sendTelemetryEvent(
133-
eventName === EventName.KNOWN_IMPORT_FROM_FILE ? EventName.KNOWN_IMPORT_FROM_FILE : EventName.KNOWN_IMPORT_FROM_EXECUTION,
134-
0,
135-
{ import: s });
136-
this.sentMatches.add(s);
137-
});
138131
} catch {
132+
// Don't care about failures since this is just telemetry.
139133
noop();
140134
}
141135
}

src/client/telemetry/index.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,8 +274,7 @@ export interface IEventNamePropertyMapping {
274274
[EventName.FORMAT_SORT_IMPORTS]: never | undefined;
275275
[EventName.GO_TO_OBJECT_DEFINITION]: never | undefined;
276276
[EventName.HOVER_DEFINITION]: never | undefined;
277-
[EventName.KNOWN_IMPORT_FROM_FILE]: { import: string };
278-
[EventName.KNOWN_IMPORT_FROM_EXECUTION]: { import: string };
277+
[EventName.HASHED_PACKAGE_NAME]: { hashedName: string };
279278
[EventName.LINTER_NOT_INSTALLED_PROMPT]: LinterInstallPromptTelemetry;
280279
[EventName.PYTHON_INSTALL_PACKAGE]: { installer: string };
281280
[EventName.LINTING]: LintingTelemetry;

0 commit comments

Comments
 (0)