Skip to content

Commit 5ad7a59

Browse files
Add a common, dense, format for classification operations to lower cost of processing on the host side.
We now just return an array of triples to represent classified results. The triple contains: 1) the start of the classification. 2) the length of the classification. 3) the type of the clasification. We also encode this into a comma separated string when passing over to the managed side (as opposed to an JSON array). That way we don't pay such a high JSON parsing cost. Instead, we can just do a string.split(",") on the encoded triples and process each element ourselves.
1 parent d1607cf commit 5ad7a59

File tree

5 files changed

+232
-115
lines changed

5 files changed

+232
-115
lines changed

src/harness/harnessLanguageService.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,9 @@ module Harness.LanguageService {
241241
class ClassifierShimProxy implements ts.Classifier {
242242
constructor(private shim: ts.ClassifierShim) {
243243
}
244+
getLexicalClassifications2(text: string, lexState: ts.EndOfLineState, classifyKeywordsInGenerics?: boolean): ts.Classifications {
245+
throw new Error("NYI");
246+
}
244247
getClassificationsForLine(text: string, lexState: ts.EndOfLineState, classifyKeywordsInGenerics?: boolean): ts.ClassificationResult {
245248
var result = this.shim.getClassificationsForLine(text, lexState, classifyKeywordsInGenerics).split('\n');
246249
var entries: ts.ClassificationInfo[] = [];
@@ -306,10 +309,10 @@ module Harness.LanguageService {
306309
getSemanticClassifications(fileName: string, span: ts.TextSpan): ts.ClassifiedSpan[] {
307310
return unwrapJSONCallResult(this.shim.getSemanticClassifications(fileName, span.start, span.length));
308311
}
309-
getSyntacticClassifications2(fileName: string, span: ts.TextSpan): number[] {
312+
getSyntacticClassifications2(fileName: string, span: ts.TextSpan): ts.Classifications {
310313
return unwrapJSONCallResult(this.shim.getSyntacticClassifications2(fileName, span.start, span.length));
311314
}
312-
getSemanticClassifications2(fileName: string, span: ts.TextSpan): number[] {
315+
getSemanticClassifications2(fileName: string, span: ts.TextSpan): ts.Classifications {
313316
return unwrapJSONCallResult(this.shim.getSemanticClassifications2(fileName, span.start, span.length));
314317
}
315318
getCompletionsAtPosition(fileName: string, position: number): ts.CompletionInfo {

src/server/client.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -533,11 +533,11 @@ module ts.server {
533533
throw new Error("Not Implemented Yet.");
534534
}
535535

536-
getSyntacticClassifications2(fileName: string, span: TextSpan): number[] {
536+
getSyntacticClassifications2(fileName: string, span: TextSpan): Classifications {
537537
throw new Error("Not Implemented Yet.");
538538
}
539539

540-
getSemanticClassifications2(fileName: string, span: TextSpan): number[] {
540+
getSemanticClassifications2(fileName: string, span: TextSpan): Classifications {
541541
throw new Error("Not Implemented Yet.");
542542
}
543543

src/services/services.ts

Lines changed: 140 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -972,12 +972,19 @@ module ts {
972972
getSemanticDiagnostics(fileName: string): Diagnostic[];
973973
getCompilerOptionsDiagnostics(): Diagnostic[];
974974

975+
/**
976+
* @deprecated Use getSyntacticClassifications2 instead.
977+
*/
975978
getSyntacticClassifications(fileName: string, span: TextSpan): ClassifiedSpan[];
979+
980+
/**
981+
* @deprecated Use getSemanticClassifications2 instead.
982+
*/
976983
getSemanticClassifications(fileName: string, span: TextSpan): ClassifiedSpan[];
977984

978985
// Encoded as triples of [start, length, ClassificationType].
979-
getSyntacticClassifications2(fileName: string, span: TextSpan): number[];
980-
getSemanticClassifications2(fileName: string, span: TextSpan): number[];
986+
getSyntacticClassifications2(fileName: string, span: TextSpan): Classifications;
987+
getSemanticClassifications2(fileName: string, span: TextSpan): Classifications;
981988

982989
getCompletionsAtPosition(fileName: string, position: number): CompletionInfo;
983990
getCompletionEntryDetails(fileName: string, position: number, entryName: string): CompletionEntryDetails;
@@ -1022,6 +1029,11 @@ module ts {
10221029
dispose(): void;
10231030
}
10241031

1032+
export interface Classifications {
1033+
spans: number[],
1034+
endOfLineState: EndOfLineState
1035+
}
1036+
10251037
export interface ClassifiedSpan {
10261038
textSpan: TextSpan;
10271039
classificationType: string; // ClassificationTypeNames
@@ -1265,7 +1277,7 @@ module ts {
12651277
}
12661278

12671279
export const enum EndOfLineState {
1268-
Start,
1280+
None,
12691281
InMultiLineCommentTrivia,
12701282
InSingleQuoteStringLiteral,
12711283
InDoubleQuoteStringLiteral,
@@ -1315,8 +1327,10 @@ module ts {
13151327
* classifications which may be incorrectly categorized will be given
13161328
* back as Identifiers in order to allow the syntactic classifier to
13171329
* subsume the classification.
1330+
* @deprecated Use getLexicalClassifications instead.
13181331
*/
13191332
getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult;
1333+
getLexicalClassifications2(text: string, endOfLineState: EndOfLineState, syntacticClassifierAbsent: boolean): Classifications;
13201334
}
13211335

13221336
/**
@@ -1501,15 +1515,17 @@ module ts {
15011515
numericLiteral = 4,
15021516
operator = 5,
15031517
stringLiteral = 6,
1504-
whiteSpace = 7,
1505-
text = 8,
1506-
punctuation = 9,
1507-
className = 10,
1508-
enumName = 11,
1509-
interfaceName = 12,
1510-
moduleName = 13,
1511-
typeParameterName = 14,
1512-
typeAlias = 15,
1518+
regularExpressionLiteral = 7,
1519+
whiteSpace = 8,
1520+
text = 9,
1521+
punctuation = 10,
1522+
className = 11,
1523+
enumName = 12,
1524+
interfaceName = 13,
1525+
moduleName = 14,
1526+
typeParameterName = 15,
1527+
typeAlias = 16,
1528+
parameterName = 17
15131529
}
15141530

15151531
/// Language Service
@@ -5833,7 +5849,7 @@ module ts {
58335849
return convertClassifications(getSemanticClassifications2(fileName, span));
58345850
}
58355851

5836-
function getSemanticClassifications2(fileName: string, span: TextSpan): number[] {
5852+
function getSemanticClassifications2(fileName: string, span: TextSpan): Classifications {
58375853
synchronizeHostData();
58385854

58395855
let sourceFile = getValidSourceFile(fileName);
@@ -5842,7 +5858,7 @@ module ts {
58425858
let result: number[] = [];
58435859
processNode(sourceFile);
58445860

5845-
return result;
5861+
return { spans: result, endOfLineState: EndOfLineState.None };
58465862

58475863
function pushClassification(start: number, length: number, type: ClassificationType) {
58485864
result.push(start);
@@ -5930,8 +5946,9 @@ module ts {
59305946
}
59315947
}
59325948

5933-
function convertClassifications(dense: number[]): ClassifiedSpan[] {
5934-
Debug.assert(dense.length % 3 === 0);
5949+
function convertClassifications(classifications: Classifications): ClassifiedSpan[] {
5950+
Debug.assert(classifications.spans.length % 3 === 0);
5951+
let dense = classifications.spans;
59355952
let result: ClassifiedSpan[] = [];
59365953
for (let i = 0, n = dense.length; i < n; i += 3) {
59375954
result.push({
@@ -5947,7 +5964,7 @@ module ts {
59475964
return convertClassifications(getSyntacticClassifications2(fileName, span));
59485965
}
59495966

5950-
function getSyntacticClassifications2(fileName: string, span: TextSpan): number[] {
5967+
function getSyntacticClassifications2(fileName: string, span: TextSpan): Classifications {
59515968
// doesn't use compiler - no need to synchronize with host
59525969
let sourceFile = syntaxTreeCache.getCurrentSourceFile(fileName);
59535970

@@ -5958,7 +5975,7 @@ module ts {
59585975
let result: number[] = [];
59595976
processElement(sourceFile);
59605977

5961-
return result;
5978+
return { spans: result, endOfLineState: EndOfLineState.None };
59625979

59635980
function pushClassification(start: number, length: number, type: ClassificationType) {
59645981
result.push(start);
@@ -6606,10 +6623,67 @@ module ts {
66066623
// if there are more cases we want the classifier to be better at.
66076624
return true;
66086625
}
6609-
6626+
6627+
function convertClassifications(classifications: Classifications, text: string): ClassificationResult {
6628+
var entries: ClassificationInfo[] = [];
6629+
let dense = classifications.spans;
6630+
let lastEnd = 0;
6631+
6632+
for (let i = 0, n = dense.length; i < n; i += 3) {
6633+
let start = dense[i];
6634+
let length = dense[i + 1];
6635+
let type = <ClassificationType>dense[i + 2];
6636+
6637+
// Make a whitespace entry between the last item and this one.
6638+
if (lastEnd >= 0) {
6639+
let whitespaceLength = start - lastEnd;
6640+
if (whitespaceLength > 0) {
6641+
entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
6642+
}
6643+
}
6644+
6645+
entries.push({ length, classification: convertClassification(type) });
6646+
lastEnd = start + length;
6647+
}
6648+
6649+
let whitespaceLength = text.length - lastEnd;
6650+
if (whitespaceLength > 0) {
6651+
entries.push({ length: whitespaceLength, classification: TokenClass.Whitespace });
6652+
}
6653+
6654+
return { entries, finalLexState: classifications.endOfLineState };
6655+
}
6656+
6657+
function convertClassification(type: ClassificationType): TokenClass {
6658+
switch (type) {
6659+
case ClassificationType.comment: return TokenClass.Comment;
6660+
case ClassificationType.keyword: return TokenClass.Keyword;
6661+
case ClassificationType.numericLiteral: return TokenClass.NumberLiteral;
6662+
case ClassificationType.operator: return TokenClass.Operator;
6663+
case ClassificationType.stringLiteral: return TokenClass.StringLiteral;
6664+
case ClassificationType.whiteSpace: return TokenClass.Whitespace;
6665+
case ClassificationType.punctuation: return TokenClass.Punctuation;
6666+
case ClassificationType.identifier:
6667+
case ClassificationType.className:
6668+
case ClassificationType.enumName:
6669+
case ClassificationType.interfaceName:
6670+
case ClassificationType.moduleName:
6671+
case ClassificationType.typeParameterName:
6672+
case ClassificationType.typeAlias:
6673+
case ClassificationType.text:
6674+
case ClassificationType.parameterName:
6675+
default:
6676+
return TokenClass.Identifier;
6677+
}
6678+
}
6679+
6680+
function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult {
6681+
return convertClassifications(getLexicalClassifications2(text, lexState, syntacticClassifierAbsent), text);
6682+
}
6683+
66106684
// If there is a syntactic classifier ('syntacticClassifierAbsent' is false),
66116685
// we will be more conservative in order to avoid conflicting with the syntactic classifier.
6612-
function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult {
6686+
function getLexicalClassifications2(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): Classifications {
66136687
let offset = 0;
66146688
let token = SyntaxKind.Unknown;
66156689
let lastNonTriviaToken = SyntaxKind.Unknown;
@@ -6652,9 +6726,9 @@ module ts {
66526726

66536727
scanner.setText(text);
66546728

6655-
let result: ClassificationResult = {
6656-
finalLexState: EndOfLineState.Start,
6657-
entries: []
6729+
let result: Classifications = {
6730+
endOfLineState: EndOfLineState.None,
6731+
spans: []
66586732
};
66596733

66606734
// We can run into an unfortunate interaction between the lexical and syntactic classifier
@@ -6767,7 +6841,7 @@ module ts {
67676841
let start = scanner.getTokenPos();
67686842
let end = scanner.getTextPos();
67696843

6770-
addResult(end - start, classFromKind(token));
6844+
addResult(start, end, classFromKind(token));
67716845

67726846
if (end >= text.length) {
67736847
if (token === SyntaxKind.StringLiteral) {
@@ -6784,7 +6858,7 @@ module ts {
67846858
// If we have an odd number of backslashes, then the multiline string is unclosed
67856859
if (numBackslashes & 1) {
67866860
let quoteChar = tokenText.charCodeAt(0);
6787-
result.finalLexState = quoteChar === CharacterCodes.doubleQuote
6861+
result.endOfLineState = quoteChar === CharacterCodes.doubleQuote
67886862
? EndOfLineState.InDoubleQuoteStringLiteral
67896863
: EndOfLineState.InSingleQuoteStringLiteral;
67906864
}
@@ -6793,37 +6867,51 @@ module ts {
67936867
else if (token === SyntaxKind.MultiLineCommentTrivia) {
67946868
// Check to see if the multiline comment was unclosed.
67956869
if (scanner.isUnterminated()) {
6796-
result.finalLexState = EndOfLineState.InMultiLineCommentTrivia;
6870+
result.endOfLineState = EndOfLineState.InMultiLineCommentTrivia;
67976871
}
67986872
}
67996873
else if (isTemplateLiteralKind(token)) {
68006874
if (scanner.isUnterminated()) {
68016875
if (token === SyntaxKind.TemplateTail) {
6802-
result.finalLexState = EndOfLineState.InTemplateMiddleOrTail;
6876+
result.endOfLineState = EndOfLineState.InTemplateMiddleOrTail;
68036877
}
68046878
else if (token === SyntaxKind.NoSubstitutionTemplateLiteral) {
6805-
result.finalLexState = EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate;
6879+
result.endOfLineState = EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate;
68066880
}
68076881
else {
68086882
Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token);
68096883
}
68106884
}
68116885
}
68126886
else if (templateStack.length > 0 && lastOrUndefined(templateStack) === SyntaxKind.TemplateHead) {
6813-
result.finalLexState = EndOfLineState.InTemplateSubstitutionPosition;
6887+
result.endOfLineState = EndOfLineState.InTemplateSubstitutionPosition;
68146888
}
68156889
}
68166890
}
68176891

6818-
function addResult(length: number, classification: TokenClass): void {
6819-
if (length > 0) {
6820-
// If this is the first classification we're adding to the list, then remove any
6821-
// offset we have if we were continuing a construct from the previous line.
6822-
if (result.entries.length === 0) {
6823-
length -= offset;
6824-
}
6892+
function addResult(start: number, end: number, classification: ClassificationType): void {
6893+
if (classification === ClassificationType.whiteSpace) {
6894+
// Don't bother with whitespace classifications. They're not needed.
6895+
return;
6896+
}
6897+
6898+
if (start === 0 && offset > 0) {
6899+
// We're classifying the first token, and this was a case where we prepended
6900+
// text. We should consider the start of this token to be at the start of
6901+
// the original text.
6902+
start += offset;
6903+
}
6904+
6905+
// All our tokens are in relation to the augmented text. Move them back to be
6906+
// relative to the original text.
6907+
start -= offset;
6908+
end -= offset;
6909+
let length = end - start;
68256910

6826-
result.entries.push({ length: length, classification: classification });
6911+
if (length > 0) {
6912+
result.spans.push(start);
6913+
result.spans.push(length);
6914+
result.spans.push(classification);
68276915
}
68286916
}
68296917
}
@@ -6890,41 +6978,44 @@ module ts {
68906978
return token >= SyntaxKind.FirstKeyword && token <= SyntaxKind.LastKeyword;
68916979
}
68926980

6893-
function classFromKind(token: SyntaxKind) {
6981+
function classFromKind(token: SyntaxKind): ClassificationType {
68946982
if (isKeyword(token)) {
6895-
return TokenClass.Keyword;
6983+
return ClassificationType.keyword;
68966984
}
68976985
else if (isBinaryExpressionOperatorToken(token) || isPrefixUnaryExpressionOperatorToken(token)) {
6898-
return TokenClass.Operator;
6986+
return ClassificationType.operator;
68996987
}
69006988
else if (token >= SyntaxKind.FirstPunctuation && token <= SyntaxKind.LastPunctuation) {
6901-
return TokenClass.Punctuation;
6989+
return ClassificationType.punctuation;
69026990
}
69036991

69046992
switch (token) {
69056993
case SyntaxKind.NumericLiteral:
6906-
return TokenClass.NumberLiteral;
6994+
return ClassificationType.numericLiteral;
69076995
case SyntaxKind.StringLiteral:
6908-
return TokenClass.StringLiteral;
6996+
return ClassificationType.stringLiteral;
69096997
case SyntaxKind.RegularExpressionLiteral:
6910-
return TokenClass.RegExpLiteral;
6998+
return ClassificationType.regularExpressionLiteral;
69116999
case SyntaxKind.ConflictMarkerTrivia:
69127000
case SyntaxKind.MultiLineCommentTrivia:
69137001
case SyntaxKind.SingleLineCommentTrivia:
6914-
return TokenClass.Comment;
7002+
return ClassificationType.comment;
69157003
case SyntaxKind.WhitespaceTrivia:
69167004
case SyntaxKind.NewLineTrivia:
6917-
return TokenClass.Whitespace;
7005+
return ClassificationType.whiteSpace;
69187006
case SyntaxKind.Identifier:
69197007
default:
69207008
if (isTemplateLiteralKind(token)) {
6921-
return TokenClass.StringLiteral;
7009+
return ClassificationType.stringLiteral;
69227010
}
6923-
return TokenClass.Identifier;
7011+
return ClassificationType.identifier;
69247012
}
69257013
}
69267014

6927-
return { getClassificationsForLine };
7015+
return {
7016+
getClassificationsForLine,
7017+
getLexicalClassifications2
7018+
};
69287019
}
69297020

69307021
/// getDefaultLibraryFilePath

0 commit comments

Comments
 (0)