Skip to content

Commit b34ff73

Browse files
committed
convertUTF8ToUTF16() Should Check for ASCII Input
ihttps://bugs.webkit.org/show_bug.cgi?id=99739 Reviewed by Geoffrey Garen. Source/JavaScriptCore: Using the updated convertUTF8ToUTF16() , we can determine if is makes more sense to create a string using the 8 bit source. Added a new OpaqueJSString::create(LChar*, unsigned). Had to add a cast n JSStringCreateWithCFString to differentiate which create() to call. * API/JSStringRef.cpp: (JSStringCreateWithUTF8CString): * API/JSStringRefCF.cpp: (JSStringCreateWithCFString): * API/OpaqueJSString.h: (OpaqueJSString::create): (OpaqueJSString): (OpaqueJSString::OpaqueJSString): Source/WTF: Added code to accumulate the "or" of all characters seen during the UTF8 to UTF16 conversion. This is used to check to see if all characters are ASCII and is returned via a bool*. * wtf/unicode/UTF8.cpp: (WTF::Unicode::convertUTF8ToUTF16): * wtf/unicode/UTF8.h: Canonical link: https://commits.webkit.org/117772@main git-svn-id: https://svn.webkit.org/repository/webkit/trunk@131836 268f45cc-cd09-0410-ab3c-d52691b4dbfc
1 parent 6ae1a09 commit b34ff73

7 files changed

Lines changed: 67 additions & 7 deletions

File tree

Source/JavaScriptCore/API/JSStringRef.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,12 @@ JSStringRef JSStringCreateWithUTF8CString(const char* string)
4646
size_t length = strlen(string);
4747
Vector<UChar, 1024> buffer(length);
4848
UChar* p = buffer.data();
49-
if (conversionOK == convertUTF8ToUTF16(&string, string + length, &p, p + length))
49+
bool sourceIsAllASCII;
50+
if (conversionOK == convertUTF8ToUTF16(&string, string + length, &p, p + length, &sourceIsAllASCII)) {
51+
if (sourceIsAllASCII)
52+
return OpaqueJSString::create(reinterpret_cast<const LChar*>(string), length).leakRef();
5053
return OpaqueJSString::create(buffer.data(), p - buffer.data()).leakRef();
54+
}
5155
}
5256

5357
// Null string.

Source/JavaScriptCore/API/JSStringRefCF.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ JSStringRef JSStringCreateWithCFString(CFStringRef string)
4646
COMPILE_ASSERT(sizeof(UniChar) == sizeof(UChar), unichar_and_uchar_must_be_same_size);
4747
return OpaqueJSString::create(reinterpret_cast<UChar*>(buffer.get()), length).leakRef();
4848
} else {
49-
return OpaqueJSString::create(0, 0).leakRef();
49+
return OpaqueJSString::create(static_cast<const LChar*>(0), 0).leakRef();
5050
}
5151
}
5252

Source/JavaScriptCore/API/OpaqueJSString.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ struct OpaqueJSString : public ThreadSafeRefCounted<OpaqueJSString> {
4141
return adoptRef(new OpaqueJSString);
4242
}
4343

44+
static PassRefPtr<OpaqueJSString> create(const LChar* characters, unsigned length)
45+
{
46+
return adoptRef(new OpaqueJSString(characters, length));
47+
}
48+
4449
static PassRefPtr<OpaqueJSString> create(const UChar* characters, unsigned length)
4550
{
4651
return adoptRef(new OpaqueJSString(characters, length));
@@ -70,6 +75,11 @@ struct OpaqueJSString : public ThreadSafeRefCounted<OpaqueJSString> {
7075
m_string = String(string.characters16(), string.length());
7176
}
7277

78+
OpaqueJSString(const LChar* characters, unsigned length)
79+
{
80+
m_string = String(characters, length);
81+
}
82+
7383
OpaqueJSString(const UChar* characters, unsigned length)
7484
{
7585
m_string = String(characters, length);

Source/JavaScriptCore/ChangeLog

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,23 @@
1+
2012-10-18 Michael Saboff <msaboff@apple.com>
2+
3+
convertUTF8ToUTF16() Should Check for ASCII Input
4+
ihttps://bugs.webkit.org/show_bug.cgi?id=99739
5+
6+
Reviewed by Geoffrey Garen.
7+
8+
Using the updated convertUTF8ToUTF16() , we can determine if is makes more sense to
9+
create a string using the 8 bit source. Added a new OpaqueJSString::create(LChar*, unsigned).
10+
Had to add a cast n JSStringCreateWithCFString to differentiate which create() to call.
11+
12+
* API/JSStringRef.cpp:
13+
(JSStringCreateWithUTF8CString):
14+
* API/JSStringRefCF.cpp:
15+
(JSStringCreateWithCFString):
16+
* API/OpaqueJSString.h:
17+
(OpaqueJSString::create):
18+
(OpaqueJSString):
19+
(OpaqueJSString::OpaqueJSString):
20+
121
2012-10-18 Oliver Hunt <oliver@apple.com>
222

323
Unbreak jsc tests. Last minute "clever"-ness is clearly just not

Source/WTF/ChangeLog

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
2012-10-18 Michael Saboff <msaboff@apple.com>
2+
3+
convertUTF8ToUTF16() Should Check for ASCII Input
4+
ihttps://bugs.webkit.org/show_bug.cgi?id=99739
5+
6+
Reviewed by Geoffrey Garen.
7+
8+
Added code to accumulate the "or" of all characters seen during the UTF8 to UTF16 conversion. This is
9+
used to check to see if all characters are ASCII and is returned via a bool*.
10+
11+
* wtf/unicode/UTF8.cpp:
12+
(WTF::Unicode::convertUTF8ToUTF16):
13+
* wtf/unicode/UTF8.h:
14+
115
2012-10-18 Michael Saboff <msaboff@apple.com>
216

317
Mac WTF build checks dependencies before copying header files

Source/WTF/wtf/unicode/UTF8.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -297,11 +297,12 @@ static inline UChar32 readUTF8Sequence(const char*& sequence, unsigned length)
297297

298298
ConversionResult convertUTF8ToUTF16(
299299
const char** sourceStart, const char* sourceEnd,
300-
UChar** targetStart, UChar* targetEnd, bool strict)
300+
UChar** targetStart, UChar* targetEnd, bool* sourceAllASCII, bool strict)
301301
{
302302
ConversionResult result = conversionOK;
303303
const char* source = *sourceStart;
304304
UChar* target = *targetStart;
305+
UChar orAllData = 0;
305306
while (source < sourceEnd) {
306307
int utf8SequenceLength = inlineUTF8SequenceLength(*source);
307308
if (sourceEnd - source < utf8SequenceLength) {
@@ -329,10 +330,14 @@ ConversionResult convertUTF8ToUTF16(
329330
source -= utf8SequenceLength; // return to the illegal value itself
330331
result = sourceIllegal;
331332
break;
332-
} else
333+
} else {
333334
*target++ = replacementCharacter;
334-
} else
335+
orAllData |= replacementCharacter;
336+
}
337+
} else {
335338
*target++ = character; // normal case
339+
orAllData |= character;
340+
}
336341
} else if (U_IS_SUPPLEMENTARY(character)) {
337342
// target is a character in range 0xFFFF - 0x10FFFF
338343
if (target + 1 >= targetEnd) {
@@ -342,17 +347,24 @@ ConversionResult convertUTF8ToUTF16(
342347
}
343348
*target++ = U16_LEAD(character);
344349
*target++ = U16_TRAIL(character);
350+
orAllData = 0xffff;
345351
} else {
346352
if (strict) {
347353
source -= utf8SequenceLength; // return to the start
348354
result = sourceIllegal;
349355
break; // Bail out; shouldn't continue
350-
} else
356+
} else {
351357
*target++ = replacementCharacter;
358+
orAllData |= replacementCharacter;
359+
}
352360
}
353361
}
354362
*sourceStart = source;
355363
*targetStart = target;
364+
365+
if (sourceAllASCII)
366+
*sourceAllASCII = !(orAllData & 0x7f);
367+
356368
return result;
357369
}
358370

Source/WTF/wtf/unicode/UTF8.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ namespace Unicode {
6464

6565
WTF_EXPORT_PRIVATE ConversionResult convertUTF8ToUTF16(
6666
const char** sourceStart, const char* sourceEnd,
67-
UChar** targetStart, UChar* targetEnd, bool strict = true);
67+
UChar** targetStart, UChar* targetEnd, bool* isSourceAllASCII = 0, bool strict = true);
6868

6969
WTF_EXPORT_PRIVATE ConversionResult convertLatin1ToUTF8(
7070
const LChar** sourceStart, const LChar* sourceEnd,

0 commit comments

Comments
 (0)