@@ -15,7 +15,7 @@ import { TPromise } from 'vs/base/common/winjs.base';
1515import { ISerializedFileMatch } from '../search' ;
1616import * as baseMime from 'vs/base/common/mime' ;
1717import { ILineMatch } from 'vs/platform/search/common/search' ;
18- import { UTF16le , UTF16be , UTF8 , UTF8_with_bom , encodingExists , decode } from 'vs/base/node/encoding' ;
18+ import { UTF16le , UTF16be , UTF8 , UTF8_with_bom , encodingExists , decode , bomLength } from 'vs/base/node/encoding' ;
1919import { detectMimeAndEncodingFromBuffer } from 'vs/base/node/mime' ;
2020
2121import { ISearchWorker , ISearchWorkerSearchArgs , ISearchWorkerSearchResult } from './searchWorkerIpc' ;
@@ -66,6 +66,9 @@ interface IFileSearchResult {
6666 limitReached ?: boolean ;
6767}
6868
69+ const LF = 0x0a ;
70+ const CR = 0x0d ;
71+
6972export class SearchWorkerEngine {
7073 private nextSearch = TPromise . wrap ( null ) ;
7174 private isCanceled = false ;
@@ -205,31 +208,42 @@ export class SearchWorkerEngine {
205208
206209 // Detect encoding and mime when this is the beginning of the file
207210 if ( isFirstRead ) {
208- let mimeAndEncoding = detectMimeAndEncodingFromBuffer ( buffer , bytesRead ) ;
211+ const mimeAndEncoding = detectMimeAndEncodingFromBuffer ( buffer , bytesRead ) ;
209212 if ( mimeAndEncoding . mimes [ mimeAndEncoding . mimes . length - 1 ] !== baseMime . MIME_TEXT ) {
210213 return clb ( null ) ; // skip files that seem binary
211214 }
212215
213216 // Check for BOM offset
214217 switch ( mimeAndEncoding . encoding ) {
215218 case UTF8 :
216- pos = i = 3 ;
219+ pos = i = bomLength ( UTF8 ) ;
217220 options . encoding = UTF8 ;
218221 break ;
219222 case UTF16be :
220- pos = i = 2 ;
223+ pos = i = bomLength ( UTF16be ) ;
221224 options . encoding = UTF16be ;
222225 break ;
223226 case UTF16le :
224- pos = i = 2 ;
227+ pos = i = bomLength ( UTF16le ) ;
225228 options . encoding = UTF16le ;
226229 break ;
227230 }
228231 }
229232
233+ // when we are running with UTF16le, LF and CR are encoded as
234+ // 0A 00 (LF) and 0D 00 (CR). the zero bytes are at the end
235+ // due to little endianess. since we want to split our buffer
236+ // into lines, we need to skip over the 00 bytes after LF and CR
237+ // so UTF16-LE gets a multiplier of 2, otherwise we would include
238+ // bad 00 bytes in our resulting buffer.
239+ let byteOffsetMultiplier = 1 ;
240+ if ( options . encoding === UTF16le ) {
241+ byteOffsetMultiplier = 2 ;
242+ }
243+
230244 if ( lastBufferHadTraillingCR ) {
231- if ( buffer [ i ] === 0x0a ) { // LF (Line Feed)
232- lineFinished ( 1 ) ;
245+ if ( buffer [ i ] === LF ) {
246+ lineFinished ( 1 * byteOffsetMultiplier ) ;
233247 i ++ ;
234248 } else {
235249 lineFinished ( 0 ) ;
@@ -239,16 +253,16 @@ export class SearchWorkerEngine {
239253 }
240254
241255 for ( ; i < bytesRead ; ++ i ) {
242- if ( buffer [ i ] === 0x0a ) { // LF (Line Feed)
243- lineFinished ( 1 ) ;
244- } else if ( buffer [ i ] === 0x0d ) { // CR (Carriage Return)
256+ if ( buffer [ i ] === LF ) {
257+ lineFinished ( 1 * byteOffsetMultiplier ) ;
258+ } else if ( buffer [ i ] === CR ) { // CR (Carriage Return)
245259 if ( i + 1 === bytesRead ) {
246260 lastBufferHadTraillingCR = true ;
247- } else if ( buffer [ i + 1 ] === 0x0a ) { // LF (Line Feed)
248- lineFinished ( 2 ) ;
261+ } else if ( buffer [ i + 1 ] === LF ) {
262+ lineFinished ( 2 * byteOffsetMultiplier ) ;
249263 i ++ ;
250264 } else {
251- lineFinished ( 1 ) ;
265+ lineFinished ( 1 * byteOffsetMultiplier ) ;
252266 }
253267 }
254268 }
@@ -339,7 +353,7 @@ export class LineMatch implements ILineMatch {
339353 }
340354
341355 serialize ( ) : ILineMatch {
342- let result = {
356+ const result = {
343357 preview : this . preview ,
344358 lineNumber : this . lineNumber ,
345359 offsetAndLengths : this . offsetAndLengths
0 commit comments