Skip to content

Commit f686a20

Browse files
committed
add provider to readable-stream logic, microsoft#41985
1 parent 36708c0 commit f686a20

3 files changed

Lines changed: 72 additions & 59 deletions

File tree

src/vs/base/node/encoding.ts

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,21 @@ export const UTF16be = 'utf16be';
1818
export const UTF16le = 'utf16le';
1919

2020
export interface IDecodeStreamOptions {
21-
minBytesRequiredForDetection: number;
22-
guessEncoding: boolean;
23-
overwriteEncoding(detected: string): string;
21+
guessEncoding?: boolean;
22+
minBytesRequiredForDetection?: number;
23+
overwriteEncoding?(detected: string): string;
2424
}
2525

26-
export function toDecodeStream(readable: Readable, opts: IDecodeStreamOptions): TPromise<{ detected: IDetectedEncodingResult, stream: NodeJS.ReadableStream }> {
26+
export function toDecodeStream(readable: Readable, options: IDecodeStreamOptions): TPromise<{ detected: IDetectedEncodingResult, stream: NodeJS.ReadableStream }> {
27+
28+
if (!options.minBytesRequiredForDetection) {
29+
options.minBytesRequiredForDetection = options.guessEncoding ? AUTO_GUESS_BUFFER_MAX_LEN : NO_GUESS_BUFFER_MAX_LEN;
30+
}
31+
32+
if (!options.overwriteEncoding) {
33+
options.overwriteEncoding = detected => detected || UTF8;
34+
}
35+
2736
return new TPromise<{ detected: IDetectedEncodingResult, stream: NodeJS.ReadableStream }>((resolve, reject) => {
2837
readable.pipe(new class extends Writable {
2938

@@ -55,7 +64,7 @@ export function toDecodeStream(readable: Readable, opts: IDecodeStreamOptions):
5564
// waiting for the decoder to be ready
5665
this._decodeStreamConstruction.then(_ => callback(), err => callback(err));
5766

58-
} else if (this._bytesBuffered >= opts.minBytesRequiredForDetection) {
67+
} else if (this._bytesBuffered >= options.minBytesRequiredForDetection) {
5968
// buffered enough data, create stream and forward data
6069
this._startDecodeStream(callback);
6170

@@ -69,8 +78,8 @@ export function toDecodeStream(readable: Readable, opts: IDecodeStreamOptions):
6978

7079
this._decodeStreamConstruction = TPromise.as(detectEncodingFromBuffer({
7180
buffer: Buffer.concat(this._buffer), bytesRead: this._bytesBuffered
72-
}, opts.guessEncoding)).then(detected => {
73-
detected.encoding = opts.overwriteEncoding(detected.encoding); // default encoding
81+
}, options.guessEncoding)).then(detected => {
82+
detected.encoding = options.overwriteEncoding(detected.encoding); // default encoding
7483
this._decodeStream = decodeStream(detected.encoding);
7584
for (const buffer of this._buffer) {
7685
this._decodeStream.write(buffer);
@@ -256,17 +265,6 @@ const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512; // number of bytes to look at to
256265
const NO_GUESS_BUFFER_MAX_LEN = 512; // when not auto guessing the encoding, small number of bytes are enough
257266
const AUTO_GUESS_BUFFER_MAX_LEN = 512 * 8; // with auto guessing we want a lot more content to be read for guessing
258267

259-
export function maxEncodingDetectionBufferLen(arg1?: DetectEncodingOption | boolean): number {
260-
let autoGuessEncoding: boolean;
261-
if (typeof arg1 === 'boolean') {
262-
autoGuessEncoding = arg1;
263-
} else {
264-
autoGuessEncoding = arg1 && arg1.autoGuessEncoding;
265-
}
266-
267-
return autoGuessEncoding ? AUTO_GUESS_BUFFER_MAX_LEN : NO_GUESS_BUFFER_MAX_LEN;
268-
}
269-
270268
export interface IDetectedEncodingResult {
271269
encoding: string;
272270
seemsBinary: boolean;

src/vs/base/test/node/encoding/encoding.test.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ suite('Encoding', () => {
189189
}
190190
});
191191

192-
let { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 4, guessEncoding: true, overwriteEncoding() { return encoding.UTF8; } });
192+
let { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 4 });
193193

194194
assert.ok(detected);
195195
assert.ok(stream);
@@ -209,7 +209,7 @@ suite('Encoding', () => {
209209
}
210210
});
211211

212-
let { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 64, guessEncoding: true, overwriteEncoding() { return encoding.UTF8; } });
212+
let { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 64 });
213213

214214
assert.ok(detected);
215215
assert.ok(stream);
@@ -226,7 +226,7 @@ suite('Encoding', () => {
226226
}
227227
});
228228

229-
let { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 512, guessEncoding: true, overwriteEncoding() { return encoding.UTF8; } });
229+
let { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 512 });
230230

231231
assert.ok(detected);
232232
assert.ok(stream);
@@ -241,7 +241,7 @@ suite('Encoding', () => {
241241
let path = require.toUrl('./fixtures/some_utf16be.css');
242242
let source = fs.createReadStream(path);
243243

244-
let { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 64, guessEncoding: true, overwriteEncoding(detected) { return detected; } });
244+
let { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 64 });
245245

246246
assert.equal(detected.encoding, 'utf16be');
247247
assert.equal(detected.seemsBinary, false);
@@ -256,7 +256,7 @@ suite('Encoding', () => {
256256

257257
let path = require.toUrl('./fixtures/empty.txt');
258258
let source = fs.createReadStream(path);
259-
let { detected, stream } = await encoding.toDecodeStream(source, { minBytesRequiredForDetection: 64, guessEncoding: true, overwriteEncoding() { return encoding.UTF8; } });
259+
let { detected, stream } = await encoding.toDecodeStream(source, {});
260260

261261
let expected = await readAndDecodeFromDisk(path, detected.encoding);
262262
let actual = await readAllAsString(stream);

src/vs/workbench/services/files/electron-browser/remoteFileService.ts

Lines changed: 51 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import { posix } from 'path';
1212
import { IDisposable } from 'vs/base/common/lifecycle';
1313
import { isFalsyOrEmpty, distinct } from 'vs/base/common/arrays';
1414
import { Schemas } from 'vs/base/common/network';
15-
import { decodeStream, encode, UTF8, UTF8_with_bom, detectEncodingFromBuffer, maxEncodingDetectionBufferLen } from 'vs/base/node/encoding';
15+
import { encode, UTF8, UTF8_with_bom, toDecodeStream } from 'vs/base/node/encoding';
1616
import { TernarySearchTree } from 'vs/base/common/map';
1717
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
1818
import { IWorkspaceContextService } from 'vs/platform/workspace/common/workspace';
@@ -23,6 +23,7 @@ import { ITextResourceConfigurationService } from 'vs/editor/common/services/res
2323
import { IExtensionService } from 'vs/workbench/services/extensions/common/extensions';
2424
import { localize } from 'vs/nls';
2525
import { INotificationService } from 'vs/platform/notification/common/notification';
26+
import { Readable } from 'stream';
2627

2728
function toIFileStat(provider: IFileSystemProvider, tuple: [URI, IStat], recurse?: (tuple: [URI, IStat]) => boolean): TPromise<IFileStat> {
2829
const [resource, stat] = tuple;
@@ -220,6 +221,25 @@ export class RemoteFileService extends FileService {
220221
}
221222
}
222223

224+
private _createReadStream(provider: IFileSystemProvider, resource: URI): Readable {
225+
return new class extends Readable {
226+
_done: boolean = false;
227+
_read(size?: number): void {
228+
if (this._done) {
229+
this.push(null);
230+
return;
231+
}
232+
provider.readFile(resource).then(data => {
233+
this._done = true;
234+
this.push(data);
235+
}, err => {
236+
this._done = true;
237+
this.emit('error', err);
238+
});
239+
}
240+
};
241+
}
242+
223243
private _readFile(resource: URI, options: IResolveContentOptions = Object.create(null)): TPromise<IStreamContent> {
224244
return this._withProvider(resource).then(provider => {
225245

@@ -242,49 +262,44 @@ export class RemoteFileService extends FileService {
242262
);
243263
}
244264

245-
const guessEncoding = options.autoGuessEncoding;
246-
const count = maxEncodingDetectionBufferLen(options);
247-
let buffer: Buffer;
265+
return toDecodeStream(this._createReadStream(provider, resource), {
266+
guessEncoding: options.autoGuessEncoding,
267+
overwriteEncoding: detected => {
268+
let preferredEncoding: string;
269+
if (options && options.encoding) {
270+
if (detected === UTF8 && options.encoding === UTF8) {
271+
preferredEncoding = UTF8_with_bom; // indicate the file has BOM if we are to resolve with UTF 8
272+
} else {
273+
preferredEncoding = options.encoding; // give passed in encoding highest priority
274+
}
275+
} else if (detected) {
276+
if (detected === UTF8) {
277+
preferredEncoding = UTF8_with_bom; // if we detected UTF-8, it can only be because of a BOM
278+
} else {
279+
preferredEncoding = detected;
280+
}
281+
// todo@remote - encoding logic should not be kept
282+
// hostage inside the node file service
283+
// } else if (super.configuredEncoding(resource) === UTF8_with_bom) {
284+
} else {
285+
preferredEncoding = UTF8; // if we did not detect UTF 8 BOM before, this can only be UTF 8 then
286+
}
287+
return preferredEncoding;
288+
}
248289

249-
return provider.readFile(resource).then(data => {
250-
buffer = Buffer.from(data);
251-
return detectEncodingFromBuffer({ bytesRead: Math.min(count, buffer.length), buffer }, guessEncoding);
252-
}).then(detected => {
253-
if (options.acceptTextOnly && detected.seemsBinary) {
290+
}).then(data => {
291+
292+
if (options.acceptTextOnly && data.detected.seemsBinary) {
254293
return TPromise.wrapError<IStreamContent>(new FileOperationError(
255294
localize('fileBinaryError', "File seems to be binary and cannot be opened as text"),
256295
FileOperationResult.FILE_IS_BINARY,
257296
options
258297
));
259298
}
260299

261-
let preferredEncoding: string;
262-
if (options && options.encoding) {
263-
if (detected.encoding === UTF8 && options.encoding === UTF8) {
264-
preferredEncoding = UTF8_with_bom; // indicate the file has BOM if we are to resolve with UTF 8
265-
} else {
266-
preferredEncoding = options.encoding; // give passed in encoding highest priority
267-
}
268-
} else if (detected.encoding) {
269-
if (detected.encoding === UTF8) {
270-
preferredEncoding = UTF8_with_bom; // if we detected UTF-8, it can only be because of a BOM
271-
} else {
272-
preferredEncoding = detected.encoding;
273-
}
274-
// todo@remote - encoding logic should not be kept
275-
// hostage inside the node file service
276-
// } else if (super.configuredEncoding(resource) === UTF8_with_bom) {
277-
} else {
278-
preferredEncoding = UTF8; // if we did not detect UTF 8 BOM before, this can only be UTF 8 then
279-
}
280-
281-
// const encoding = this.getEncoding(resource);
282-
const stream = decodeStream(preferredEncoding);
283-
stream.end(buffer);
284-
285-
return {
286-
encoding: preferredEncoding,
287-
value: stream,
300+
return <IStreamContent>{
301+
encoding: data.detected.encoding,
302+
value: data.stream,
288303
resource: fileStat.resource,
289304
name: fileStat.name,
290305
etag: fileStat.etag,

0 commit comments

Comments
 (0)