@@ -11,12 +11,27 @@ import { sendTelemetryEvent } from '.';
1111import { noop } from '../../test/core' ;
1212import { IDocumentManager } from '../common/application/types' ;
1313import { isTestExecution } from '../common/constants' ;
14- import { IHistoryProvider } from '../datascience/types' ;
15- import { ICodeExecutionManager } from '../terminals/types' ;
1614import { EventName } from './constants' ;
1715import { IImportTracker } from './types' ;
1816
19- const ImportRegEx = / ^ (? ! [ ' " # ] ) .* f r o m \s + ( [ a - z A - Z 0 - 9 _ \. ] + ) \s + i m p o r t .* (? ! [ ' " ] ) | ^ (? ! [ ' " # ] ) .* i m p o r t \s + ( [ a - z A - Z 0 - 9 _ \. , ] + ) .* (? ! [ ' " ] ) / ;
17+ /*
18+ Python has a fairly rich import statement, but luckily we only care about top-level (public) packages.
19+ That means we can ignore:
20+
21+ - Relative imports
22+ - `as` rebindings
23+ - The`fromlist`
24+
25+ We can also ignore multi-line/parenthesized imports for simplicity since we don't' need 100% accuracy,
26+ just enough to be able to tell what packages user's rely on to make sure we are covering our bases
27+ in terms of support.
28+
29+ We can rely on the fact that the use of the `from` and `import` keywords from the start of a line are
30+ only usable for imports (`from` can also be used when raising an exception, but `raise` would be the
31+ first keyword on a line in that instance). We also get to rely on the fact that we only care about
32+ the top-level package, keeping the regex extremely greedy and simple for performance.
33+ */
34+ const ImportRegEx = / ^ \s * ( f r o m \s + (?< fromImport > \w + ) | i m p o r t \s + (?< importImport > ( \w + (?: \s * , \s * ) ? ) + ) ) / ;
2035const MAX_DOCUMENT_LINES = 1000 ;
2136
2237// Capture isTestExecution on module load so that a test can turn it off and still
@@ -32,25 +47,14 @@ export class ImportTracker implements IImportTracker {
3247 private hashFn = require ( 'hash.js' ) . sha256 ;
3348
3449 constructor (
35- @inject ( IDocumentManager ) private documentManager : IDocumentManager ,
36- @inject ( IHistoryProvider ) private historyProvider : IHistoryProvider ,
37- @inject ( ICodeExecutionManager ) private executionManager : ICodeExecutionManager
50+ @inject ( IDocumentManager ) private documentManager : IDocumentManager
3851 ) {
39- // Sign up for document open/save events so we can track known imports
4052 this . documentManager . onDidOpenTextDocument ( ( t ) => this . onOpenedOrSavedDocument ( t ) ) ;
4153 this . documentManager . onDidSaveTextDocument ( ( t ) => this . onOpenedOrSavedDocument ( t ) ) ;
42-
43- // Sign up for history execution events (user can input code here too)
44- this . historyProvider . onExecutedCode ( c => this . onExecutedCode ( c ) ) ;
45-
46- // Sign up for terminal execution events (user can send code to the terminal)
47- // However we won't get any text typed directly into the terminal. Not part of the VS code API
48- // Could potentially hook stdin? Not sure that's possible.
49- this . executionManager . onExecutedCode ( c => this . onExecutedCode ( c ) ) ;
5054 }
5155
5256 public async activate ( ) : Promise < void > {
53- // Act like all of our open documents just opened. Our timeout will make sure this is delayed
57+ // Act like all of our open documents just opened; our timeout will make sure this is delayed.
5458 this . documentManager . textDocuments . forEach ( d => this . onOpenedOrSavedDocument ( d ) ) ;
5559 }
5660
@@ -66,9 +70,8 @@ export class ImportTracker implements IImportTracker {
6670 }
6771
6872 private onOpenedOrSavedDocument ( document : TextDocument ) {
69- // Make sure this is a python file.
73+ // Make sure this is a Python file.
7074 if ( path . extname ( document . fileName ) === '.py' ) {
71- // Parse the contents of the document, looking for import matches on each line
7275 this . scheduleDocument ( document ) ;
7376 }
7477 }
@@ -94,48 +97,39 @@ export class ImportTracker implements IImportTracker {
9497 private checkDocument ( document : TextDocument ) {
9598 this . pendingDocs . delete ( document . fileName ) ;
9699 const lines = this . getDocumentLines ( document ) ;
97- this . lookForImports ( lines , EventName . KNOWN_IMPORT_FROM_FILE ) ;
100+ this . lookForImports ( lines ) ;
98101 }
99102
100- private onExecutedCode ( code : string ) {
101- const lines = code . splitLines ( { trim : true , removeEmptyEntries : true } ) ;
102- this . lookForImports ( lines , EventName . KNOWN_IMPORT_FROM_EXECUTION ) ;
103+ private sendTelemetry ( packageName : string ) {
104+ // No need to send duplicate telemetry or waste CPU cycles on an unneeded hash.
105+ if ( this . sentMatches . has ( packageName ) ) {
106+ return ;
107+ }
108+ this . sentMatches . add ( packageName ) ;
109+ // Hash the package name so that we will never accidentally see a
110+ // user's private package name.
111+ const hash = this . hashFn ( ) . update ( packageName ) . digest ( 'hex' ) ;
112+ sendTelemetryEvent ( EventName . HASHED_PACKAGE_NAME , undefined , { hashedName : hash } ) ;
103113 }
104114
105- private lookForImports ( lines : ( string | undefined ) [ ] , eventName : string ) {
115+ private lookForImports ( lines : ( string | undefined ) [ ] ) {
106116 try {
107- // Use a regex to parse each line, looking for imports
108- const matches : Set < string > = new Set < string > ( ) ;
109117 for ( const s of lines ) {
110118 const match = s ? ImportRegEx . exec ( s ) : null ;
111- if ( match && match . length > 2 ) {
112- // Could be a from or a straight import. from is the first entry.
113- const actual = match [ 1 ] ? match [ 1 ] : match [ 2 ] ;
114-
115- // Use just the bits to the left of ' as '
116- const left = actual . split ( ' as ' ) [ 0 ] ;
117-
118- // Now split this based on, and chop off all .
119- const baseNames = left . split ( ',' ) . map ( l => l . split ( '.' ) [ 0 ] . trim ( ) ) ;
120- baseNames . forEach ( l => {
121- // Hash this value and save this in our import
122- const hash = this . hashFn ( ) . update ( l ) . digest ( 'hex' ) ;
123- if ( ! this . sentMatches . has ( hash ) ) {
124- matches . add ( hash ) ;
125- }
126- } ) ;
119+ if ( match !== null && match . groups !== undefined ) {
120+ if ( match . groups . fromImport !== undefined ) {
121+ // `from pkg ...`
122+ this . sendTelemetry ( match . groups . fromImport ) ;
123+ } else if ( match . groups . importImport !== undefined ) {
124+ // `import pkg1, pkg2, ...`
125+ const packageNames = match . groups . importImport . split ( ',' ) . map ( rawPackageName => rawPackageName . trim ( ) ) ;
126+ // Can't pass in `this.sendTelemetry` directly as that rebinds `this`.
127+ packageNames . forEach ( p => this . sendTelemetry ( p ) ) ;
128+ }
127129 }
128130 }
129-
130- // For each unique match, emit a new telemetry event.
131- matches . forEach ( s => {
132- sendTelemetryEvent (
133- eventName === EventName . KNOWN_IMPORT_FROM_FILE ? EventName . KNOWN_IMPORT_FROM_FILE : EventName . KNOWN_IMPORT_FROM_EXECUTION ,
134- 0 ,
135- { import : s } ) ;
136- this . sentMatches . add ( s ) ;
137- } ) ;
138131 } catch {
132+ // Don't care about failures since this is just telemetry.
139133 noop ( ) ;
140134 }
141135 }
0 commit comments