Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .vscode/extensions.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// See https://go.microsoft.com/fwlink/?LinkId=827846
// for the documentation about the extensions.json format
"recommendations": [
"eg2.tslint",
"ms-vscode.vscode-typescript-tslint-plugin",
"editorconfig.editorconfig"
]
}
8 changes: 4 additions & 4 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@
"--ui=tdd",
"--recursive",
"--colors",
"--timeout=300000",
"--grep="
//"--grep", "<suite name>",
"--timeout=300000"
],
"outFiles": [
"${workspaceFolder}/out/**/*.js"
Expand All @@ -152,8 +152,8 @@
"--ui=tdd",
"--recursive",
"--colors",
"--timeout=300000",
"--grep="
//"--grep", "<suite name>",
"--timeout=300000"
],
"outFiles": [
"${workspaceFolder}/out/**/*.js"
Expand Down
2 changes: 1 addition & 1 deletion news/3 Code Health/4852.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Hash imports so we can get every import and not just known ones. Should happen at most once every 5 seconds.
Hash imports of top-level packages to see what users need supported.
3 changes: 1 addition & 2 deletions src/client/telemetry/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@ export enum EventName {
SELECT_LINTER = 'LINTING.SELECT',

LINTER_NOT_INSTALLED_PROMPT = 'LINTER_NOT_INSTALLED_PROMPT',
KNOWN_IMPORT_FROM_FILE = 'KNOWN_IMPORT_FROM_FILE',
KNOWN_IMPORT_FROM_EXECUTION = 'KNOWN_IMPORT_FROM_EXECUTION'
HASHED_PACKAGE_NAME = 'HASHED_PACKAGE_NAME'
}

export enum PlatformErrors {
Expand Down
94 changes: 44 additions & 50 deletions src/client/telemetry/importTracker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,27 @@ import { sendTelemetryEvent } from '.';
import { noop } from '../../test/core';
import { IDocumentManager } from '../common/application/types';
import { isTestExecution } from '../common/constants';
import { IHistoryProvider } from '../datascience/types';
import { ICodeExecutionManager } from '../terminals/types';
import { EventName } from './constants';
import { IImportTracker } from './types';

const ImportRegEx = /^(?!['"#]).*from\s+([a-zA-Z0-9_\.]+)\s+import.*(?!['"])|^(?!['"#]).*import\s+([a-zA-Z0-9_\., ]+).*(?!['"])/;
/*
Python has a fairly rich import statement, but luckily we only care about top-level (public) packages.
That means we can ignore:

- Relative imports
- `as` rebindings
- The`fromlist`

We can also ignore multi-line/parenthesized imports for simplicity since we don't' need 100% accuracy,
just enough to be able to tell what packages user's rely on to make sure we are covering our bases
in terms of support.

We can rely on the fact that the use of the `from` and `import` keywords from the start of a line are
only usable for imports (`from` can also be used when raising an exception, but `raise` would be the
first keyword on a line in that instance). We also get to rely on the fact that we only care about
the top-level package, keeping the regex extremely greedy and simple for performance.
*/
const ImportRegEx = /^\s*(from\s+(?<fromImport>\w+)|import\s+(?<importImport>(\w+(?:\s*,\s*)?)+))/;
const MAX_DOCUMENT_LINES = 1000;

// Capture isTestExecution on module load so that a test can turn it off and still
Expand All @@ -32,25 +47,14 @@ export class ImportTracker implements IImportTracker {
private hashFn = require('hash.js').sha256;

constructor(
@inject(IDocumentManager) private documentManager: IDocumentManager,
@inject(IHistoryProvider) private historyProvider: IHistoryProvider,
@inject(ICodeExecutionManager) private executionManager: ICodeExecutionManager
@inject(IDocumentManager) private documentManager: IDocumentManager
) {
// Sign up for document open/save events so we can track known imports
this.documentManager.onDidOpenTextDocument((t) => this.onOpenedOrSavedDocument(t));
this.documentManager.onDidSaveTextDocument((t) => this.onOpenedOrSavedDocument(t));

// Sign up for history execution events (user can input code here too)
this.historyProvider.onExecutedCode(c => this.onExecutedCode(c));

// Sign up for terminal execution events (user can send code to the terminal)
// However we won't get any text typed directly into the terminal. Not part of the VS code API
// Could potentially hook stdin? Not sure that's possible.
this.executionManager.onExecutedCode(c => this.onExecutedCode(c));
}

public async activate(): Promise<void> {
// Act like all of our open documents just opened. Our timeout will make sure this is delayed
// Act like all of our open documents just opened; our timeout will make sure this is delayed.
this.documentManager.textDocuments.forEach(d => this.onOpenedOrSavedDocument(d));
}

Expand All @@ -66,9 +70,8 @@ export class ImportTracker implements IImportTracker {
}

private onOpenedOrSavedDocument(document: TextDocument) {
// Make sure this is a python file.
// Make sure this is a Python file.
if (path.extname(document.fileName) === '.py') {
// Parse the contents of the document, looking for import matches on each line
this.scheduleDocument(document);
}
}
Expand All @@ -94,48 +97,39 @@ export class ImportTracker implements IImportTracker {
private checkDocument(document: TextDocument) {
this.pendingDocs.delete(document.fileName);
const lines = this.getDocumentLines(document);
this.lookForImports(lines, EventName.KNOWN_IMPORT_FROM_FILE);
this.lookForImports(lines);
}

private onExecutedCode(code: string) {
const lines = code.splitLines({ trim: true, removeEmptyEntries: true });
this.lookForImports(lines, EventName.KNOWN_IMPORT_FROM_EXECUTION);
private sendTelemetry(packageName: string) {
// No need to send duplicate telemetry or waste CPU cycles on an unneeded hash.
if (this.sentMatches.has(packageName)) {
return;
}
this.sentMatches.add(packageName);
// Hash the package name so that we will never accidentally see a
// user's private package name.
const hash = this.hashFn().update(packageName).digest('hex');
sendTelemetryEvent(EventName.HASHED_PACKAGE_NAME, undefined, {hashedName: hash});
}

private lookForImports(lines: (string | undefined)[], eventName: string) {
private lookForImports(lines: (string | undefined)[]) {
try {
// Use a regex to parse each line, looking for imports
const matches: Set<string> = new Set<string>();
for (const s of lines) {
const match = s ? ImportRegEx.exec(s) : null;
if (match && match.length > 2) {
// Could be a from or a straight import. from is the first entry.
const actual = match[1] ? match[1] : match[2];

// Use just the bits to the left of ' as '
const left = actual.split(' as ')[0];

// Now split this based on, and chop off all .
const baseNames = left.split(',').map(l => l.split('.')[0].trim());
baseNames.forEach(l => {
// Hash this value and save this in our import
const hash = this.hashFn().update(l).digest('hex');
if (!this.sentMatches.has(hash)) {
matches.add(hash);
}
});
if (match !== null && match.groups !== undefined) {
if (match.groups.fromImport !== undefined) {
// `from pkg ...`
this.sendTelemetry(match.groups.fromImport);
} else if (match.groups.importImport !== undefined) {
// `import pkg1, pkg2, ...`
const packageNames = match.groups.importImport.split(',').map(rawPackageName => rawPackageName.trim());
// Can't pass in `this.sendTelemetry` directly as that rebinds `this`.
packageNames.forEach(p => this.sendTelemetry(p));
Comment thread
brettcannon marked this conversation as resolved.
}
}
}

// For each unique match, emit a new telemetry event.
matches.forEach(s => {
sendTelemetryEvent(
eventName === EventName.KNOWN_IMPORT_FROM_FILE ? EventName.KNOWN_IMPORT_FROM_FILE : EventName.KNOWN_IMPORT_FROM_EXECUTION,
0,
{ import: s });
this.sentMatches.add(s);
});
} catch {
// Don't care about failures since this is just telemetry.
noop();
}
}
Expand Down
3 changes: 1 addition & 2 deletions src/client/telemetry/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,7 @@ export interface IEventNamePropertyMapping {
[EventName.FORMAT_SORT_IMPORTS]: never | undefined;
[EventName.GO_TO_OBJECT_DEFINITION]: never | undefined;
[EventName.HOVER_DEFINITION]: never | undefined;
[EventName.KNOWN_IMPORT_FROM_FILE]: { import: string };
[EventName.KNOWN_IMPORT_FROM_EXECUTION]: { import: string };
[EventName.HASHED_PACKAGE_NAME]: { hashedName: string };
[EventName.LINTER_NOT_INSTALLED_PROMPT]: LinterInstallPromptTelemetry;
[EventName.PYTHON_INSTALL_PACKAGE]: { installer: string };
[EventName.LINTING]: LintingTelemetry;
Expand Down
Loading