forked from microsoft/vscode-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimportTracker.ts
More file actions
147 lines (130 loc) · 6.12 KB
/
importTracker.ts
File metadata and controls
147 lines (130 loc) · 6.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
'use strict';
import { inject, injectable } from 'inversify';
import * as path from 'path';
import { TextDocument } from 'vscode';
import { captureTelemetry, sendTelemetryEvent } from '.';
import { IDocumentManager } from '../common/application/types';
import { isTestExecution } from '../common/constants';
import '../common/extensions';
import { noop } from '../common/utils/misc';
import { EventName } from './constants';
import { IImportTracker } from './types';
/*
Python has a fairly rich import statement. Originally the matching regexp was kept simple for
performance worries, but it led to false-positives due to matching things like docstrings with
phrases along the lines of "from the thing" or "import the thing". To minimize false-positives the
regexp does its best to validate the structure of the import line _within reason_. This leads to
us supporting the following (where `pkg` represents what we are actually capturing for telemetry):
- `from pkg import _`
- `from pkg import _, _`
- `from pkg import _ as _`
- `import pkg`
- `import pkg, pkg`
- `import pkg as _`
Things we are ignoring the following for simplicity/performance:
- `from pkg import (...)` (this includes single-line and multi-line imports with parentheses)
- `import pkg # ... and anything else with a trailing comment.`
- Non-standard whitespace separators within the import statement (i.e. more than a single space, tabs)
*/
const ImportRegEx = /^\s*(from (?<fromImport>\w+)(?:\.\w+)* import \w+(?:, \w+)*(?: as \w+)?|import (?<importImport>\w+(?:, \w+)*)(?: as \w+)?)$/;
const MAX_DOCUMENT_LINES = 1000;
// Capture isTestExecution on module load so that a test can turn it off and still
// have this value set.
const testExecution = isTestExecution();
@injectable()
export class ImportTracker implements IImportTracker {
private pendingDocs = new Map<string, NodeJS.Timer | number>();
private sentMatches: Set<string> = new Set<string>();
// tslint:disable-next-line:no-require-imports
private hashFn = require('hash.js').sha256;
constructor(@inject(IDocumentManager) private documentManager: IDocumentManager) {
this.documentManager.onDidOpenTextDocument(t => this.onOpenedOrSavedDocument(t));
this.documentManager.onDidSaveTextDocument(t => this.onOpenedOrSavedDocument(t));
}
public async activate(): Promise<void> {
// Act like all of our open documents just opened; our timeout will make sure this is delayed.
this.documentManager.textDocuments.forEach(d => this.onOpenedOrSavedDocument(d));
}
private getDocumentLines(document: TextDocument): (string | undefined)[] {
const array = Array<string>(Math.min(document.lineCount, MAX_DOCUMENT_LINES)).fill('');
return array
.map((_a: string, i: number) => {
const line = document.lineAt(i);
if (line && !line.isEmptyOrWhitespace) {
return line.text;
}
return undefined;
})
.filter((f: string | undefined) => f);
}
private onOpenedOrSavedDocument(document: TextDocument) {
// Make sure this is a Python file.
if (path.extname(document.fileName) === '.py') {
this.scheduleDocument(document);
}
}
private scheduleDocument(document: TextDocument) {
// If already scheduled, cancel.
const currentTimeout = this.pendingDocs.get(document.fileName);
if (currentTimeout) {
// tslint:disable-next-line: no-any
clearTimeout(currentTimeout as any);
this.pendingDocs.delete(document.fileName);
}
// Now schedule a new one.
if (testExecution) {
// During a test, check right away. It needs to be synchronous.
this.checkDocument(document);
} else {
// Wait five seconds to make sure we don't already have this document pending.
this.pendingDocs.set(
document.fileName,
setTimeout(() => this.checkDocument(document), 5000)
);
}
}
@captureTelemetry(EventName.HASHED_PACKAGE_PERF)
private checkDocument(document: TextDocument) {
this.pendingDocs.delete(document.fileName);
const lines = this.getDocumentLines(document);
this.lookForImports(lines);
}
private sendTelemetry(packageName: string) {
// No need to send duplicate telemetry or waste CPU cycles on an unneeded hash.
if (this.sentMatches.has(packageName)) {
return;
}
this.sentMatches.add(packageName);
// Hash the package name so that we will never accidentally see a
// user's private package name.
const hash = this.hashFn()
.update(packageName)
.digest('hex');
sendTelemetryEvent(EventName.HASHED_PACKAGE_NAME, undefined, { hashedName: hash });
}
private lookForImports(lines: (string | undefined)[]) {
try {
for (const s of lines) {
const match = s ? ImportRegEx.exec(s) : null;
if (match !== null && match.groups !== undefined) {
if (match.groups.fromImport !== undefined) {
// `from pkg ...`
this.sendTelemetry(match.groups.fromImport);
} else if (match.groups.importImport !== undefined) {
// `import pkg1, pkg2, ...`
const packageNames = match.groups.importImport
.split(',')
.map(rawPackageName => rawPackageName.trim());
// Can't pass in `this.sendTelemetry` directly as that rebinds `this`.
packageNames.forEach(p => this.sendTelemetry(p));
}
}
}
} catch {
// Don't care about failures since this is just telemetry.
noop();
}
}
}