Skip to content

Commit 56fde66

Browse files
committed
src: redo unaligned access workaround
Introduce two-byte overloads of node::Encode() and StringBytes::Encode() that ensure that the input is suitably aligned. Revisits commit 535fec8 from yesterday.
1 parent a60056d commit 56fde66

6 files changed

Lines changed: 100 additions & 35 deletions

File tree

src/node.cc

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,13 +1219,15 @@ enum encoding ParseEncoding(Isolate* isolate,
12191219
}
12201220

12211221
Local<Value> Encode(Isolate* isolate,
1222-
const void* buf,
1222+
const char* buf,
12231223
size_t len,
12241224
enum encoding encoding) {
1225-
return StringBytes::Encode(isolate,
1226-
static_cast<const char*>(buf),
1227-
len,
1228-
encoding);
1225+
CHECK_NE(encoding, UCS2);
1226+
return StringBytes::Encode(isolate, buf, len, encoding);
1227+
}
1228+
1229+
Local<Value> Encode(Isolate* isolate, const uint16_t* buf, size_t len) {
1230+
return StringBytes::Encode(isolate, buf, len);
12291231
}
12301232

12311233
// Returns -1 if the handle was not valid for decoding

src/node.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ NODE_EXTERN v8::Handle<v8::Value> MakeCallback(
142142
#endif
143143

144144
#include <assert.h>
145+
#include <stdint.h>
145146

146147
#ifndef NODE_STRINGIFY
147148
#define NODE_STRINGIFY(n) NODE_STRINGIFY_HELPER(n)
@@ -267,16 +268,29 @@ NODE_DEPRECATED("Use FatalException(isolate, ...)",
267268
return FatalException(v8::Isolate::GetCurrent(), try_catch);
268269
})
269270

271+
// Don't call with encoding=UCS2.
270272
NODE_EXTERN v8::Local<v8::Value> Encode(v8::Isolate* isolate,
271-
const void* buf,
273+
const char* buf,
272274
size_t len,
273275
enum encoding encoding = BINARY);
276+
277+
NODE_EXTERN v8::Local<v8::Value> Encode(v8::Isolate* isolate,
278+
const uint16_t* buf,
279+
size_t len);
280+
274281
NODE_DEPRECATED("Use Encode(isolate, ...)",
275282
inline v8::Local<v8::Value> Encode(
276283
const void* buf,
277284
size_t len,
278285
enum encoding encoding = BINARY) {
279-
return Encode(v8::Isolate::GetCurrent(), buf, len, encoding);
286+
v8::Isolate* isolate = v8::Isolate::GetCurrent();
287+
if (encoding == UCS2) {
288+
assert(reinterpret_cast<uintptr_t>(buf) % sizeof(uint16_t) == 0 &&
289+
"UCS2 buffer must be aligned on two-byte boundary.");
290+
const uint16_t* that = static_cast<const uint16_t*>(buf);
291+
return Encode(isolate, that, len / sizeof(*that));
292+
}
293+
return Encode(isolate, static_cast<const char*>(buf), len, encoding);
280294
})
281295

282296
// Returns -1 if the handle was not valid for decoding

src/node_buffer.cc

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,40 @@ void StringSlice(const FunctionCallbackInfo<Value>& args) {
264264
}
265265

266266

267+
template <>
268+
void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args) {
269+
Environment* env = Environment::GetCurrent(args);
270+
271+
ARGS_THIS(args.This())
272+
SLICE_START_END(args[0], args[1], obj_length)
273+
length /= 2;
274+
275+
const char* data = obj_data + start;
276+
const uint16_t* buf;
277+
bool release = false;
278+
279+
if (reinterpret_cast<uintptr_t>(data) % sizeof(*buf) == 0) {
280+
buf = reinterpret_cast<const uint16_t*>(data);
281+
} else {
282+
// Make a copy to avoid unaligned accesses in v8::String::NewFromTwoByte().
283+
uint16_t* copy = new uint16_t[length];
284+
for (size_t i = 0, k = 0; i < length; i += 1, k += 2) {
285+
// Assumes that the input is little endian.
286+
const uint8_t lo = static_cast<uint8_t>(data[k + 0]);
287+
const uint8_t hi = static_cast<uint8_t>(data[k + 1]);
288+
copy[i] = lo | hi << 8;
289+
}
290+
buf = copy;
291+
release = true;
292+
}
293+
294+
args.GetReturnValue().Set(StringBytes::Encode(env->isolate(), buf, length));
295+
296+
if (release)
297+
delete[] buf;
298+
}
299+
300+
267301
void BinarySlice(const FunctionCallbackInfo<Value>& args) {
268302
StringSlice<BINARY>(args);
269303
}

src/node_crypto.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1447,8 +1447,8 @@ void SSLWrap<Base>::GetSession(const FunctionCallbackInfo<Value>& args) {
14471447
int slen = i2d_SSL_SESSION(sess, nullptr);
14481448
CHECK_GT(slen, 0);
14491449

1450-
unsigned char* sbuf = new unsigned char[slen];
1451-
unsigned char* p = sbuf;
1450+
char* sbuf = new char[slen];
1451+
unsigned char* p = reinterpret_cast<unsigned char*>(sbuf);
14521452
i2d_SSL_SESSION(sess, &p);
14531453
args.GetReturnValue().Set(Encode(env->isolate(), sbuf, slen, BUFFER));
14541454
delete[] sbuf;

src/string_bytes.cc

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,7 @@ Local<Value> StringBytes::Encode(Isolate* isolate,
690690
enum encoding encoding) {
691691
EscapableHandleScope scope(isolate);
692692

693+
CHECK_NE(encoding, UCS2);
693694
CHECK_LE(buflen, Buffer::kMaxLength);
694695
if (!buflen && encoding != BUFFER)
695696
return scope.Escape(String::Empty(isolate));
@@ -747,32 +748,6 @@ Local<Value> StringBytes::Encode(Isolate* isolate,
747748
break;
748749
}
749750

750-
case UCS2: {
751-
// Node's "ucs2" encoding expects LE character data inside a
752-
// Buffer, so we need to reorder on BE platforms. See
753-
// http://nodejs.org/api/buffer.html regarding Node's "ucs2"
754-
// encoding specification. Note that we have to make a copy
755-
// to avoid pointer aliasing and unaligned access, something
756-
// that we can't guarantee by just reinterpret_casting |buf|.
757-
size_t srclen = buflen / 2;
758-
uint16_t* src = new uint16_t[srclen];
759-
for (size_t i = 0, k = 0; i < srclen; i += 1, k += 2) {
760-
const uint8_t lo = static_cast<uint8_t>(buf[k + 0]);
761-
const uint8_t hi = static_cast<uint8_t>(buf[k + 1]);
762-
src[i] = lo | hi << 8;
763-
}
764-
if (buflen < EXTERN_APEX) {
765-
val = String::NewFromTwoByte(isolate,
766-
src,
767-
String::kNormalString,
768-
srclen);
769-
} else {
770-
val = ExternTwoByteString::NewFromCopy(isolate, src, srclen);
771-
}
772-
delete[] src;
773-
break;
774-
}
775-
776751
case HEX: {
777752
size_t dlen = buflen * 2;
778753
char* dst = new char[dlen];
@@ -796,4 +771,39 @@ Local<Value> StringBytes::Encode(Isolate* isolate,
796771
return scope.Escape(val);
797772
}
798773

774+
775+
Local<Value> StringBytes::Encode(Isolate* isolate,
776+
const uint16_t* buf,
777+
size_t buflen) {
778+
const uint16_t* src = buf;
779+
780+
// Node's "ucs2" encoding expects LE character data inside a
781+
// Buffer, so we need to reorder on BE platforms. See
782+
// http://nodejs.org/api/buffer.html regarding Node's "ucs2"
783+
// encoding specification.
784+
if (IsBigEndian()) {
785+
// Inefficient, see StringSlice<UCS2>() in src/node_buffer.cc;
786+
// this is potentially the second copy of the actual input.
787+
uint16_t* copy = new uint16_t[buflen];
788+
for (size_t i = 0; i < buflen; i += 1)
789+
copy[i] = buf[i] << 8 | buf[i] >> 8;
790+
src = copy;
791+
}
792+
793+
Local<String> val;
794+
if (buflen < EXTERN_APEX) {
795+
val = String::NewFromTwoByte(isolate,
796+
src,
797+
String::kNormalString,
798+
buflen);
799+
} else {
800+
val = ExternTwoByteString::NewFromCopy(isolate, src, buflen);
801+
}
802+
803+
if (src != buf)
804+
delete[] src;
805+
806+
return val;
807+
}
808+
799809
} // namespace node

src/string_bytes.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,16 @@ class StringBytes {
7171
int* chars_written = nullptr);
7272

7373
// Take the bytes in the src, and turn it into a Buffer or String.
74+
// Don't call with encoding=UCS2.
7475
static v8::Local<v8::Value> Encode(v8::Isolate* isolate,
7576
const char* buf,
7677
size_t buflen,
7778
enum encoding encoding);
7879

80+
static v8::Local<v8::Value> Encode(v8::Isolate* isolate,
81+
const uint16_t* buf,
82+
size_t buflen);
83+
7984
// Deprecated legacy interface
8085

8186
NODE_DEPRECATED("Use IsValidString(isolate, ...)",

0 commit comments

Comments
 (0)