nodejs · mertcanaltin · May 16, 2026 · May 16, 2026
diff --git a/benchmark/fs/readfile-utf8-fastpath.js b/benchmark/fs/readfile-utf8-fastpath.js
@@ -0,0 +1,62 @@
+'use strict';
+
+const common = require('../common.js');
+const fs = require('fs');
+const path = require('path');
+const tmpdir = require('../../test/common/tmpdir');
+
+const bench = common.createBenchmark(main, {
+  size: [64, 1024, 16384, 262144, 4194304],
+  content: ['ascii', 'latin1', 'utf8_mixed'],
+  source: ['path', 'fd'],
+  n: [3e3],
+});
+
+function buildContent(kind, size) {
+  if (kind === 'ascii') {
+    return Buffer.alloc(size, 0x61); // 'a'
+  }
+  if (kind === 'latin1') {
+    // 'é' in UTF-8 is 0xC3 0xA9 (2 bytes per char)
+    const pair = Buffer.from([0xC3, 0xA9]);
+    const buf = Buffer.alloc(size);
+    for (let i = 0; i + 2 <= size; i += 2) pair.copy(buf, i);
+    return buf;
+  }
+  if (kind === 'utf8_mixed') {
+    // mixed ASCII + 3-byte CJK (U+4E2D 中 = E4 B8 AD)
+    const cjk = Buffer.from([0xE4, 0xB8, 0xAD]);
+    const buf = Buffer.alloc(size);
+    let i = 0;
+    while (i + 4 <= size) {
+      buf[i++] = 0x61;
+      cjk.copy(buf, i);
+      i += 3;
+    }
+    return buf;
+  }
+  throw new Error('unknown content: ' + kind);
+}
+
+function main({ n, size, content, source }) {
+  tmpdir.refresh();
+  const file = path.join(tmpdir.path, `bench-${content}-${size}.bin`);
+  fs.writeFileSync(file, buildContent(content, size));
+
+  let arg;
+  let shouldClose = false;
+  if (source === 'fd') {
+    arg = fs.openSync(file, 'r');
+    shouldClose = true;
+  } else {
+    arg = file;
+  }
+
+  bench.start();
+  for (let i = 0; i < n; i++) {
+    fs.readFileSync(arg, 'utf8');
+  }
+  bench.end(n);
+
+  if (shouldClose) fs.closeSync(arg);
+}
diff --git a/src/node_file.cc b/src/node_file.cc
@@ -38,6 +38,7 @@
 #include "tracing/trace_event.h"
 
 #include "req_wrap-inl.h"
+#include "simdutf.h"
 #include "stream_base-inl.h"
 #include "string_bytes.h"
 #include "uv.h"
@@ -2938,12 +2939,86 @@ static void ReadFileUtf8(const FunctionCallbackInfo<Value>& args) {
   }
   FS_SYNC_TRACE_END(read);
 
-  Local<Value> val;
-  if (!ToV8Value(env->context(), result, isolate).ToLocal(&val)) {
+  const char* data = result.data();
+  const size_t total = result.size();
+
+  if (total == 0) {
+    args.GetReturnValue().Set(String::Empty(isolate));
+    return;
+  }
+  if (total > static_cast<size_t>(v8::String::kMaxLength)) {
+    isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));
+    return;
+  }
+
+  // ASCII: skip V8 UTF-8 validation.
+  if (!simdutf::validate_ascii_with_errors(data, total).error) [[likely]] {
+    Local<String> str;
+    if (!String::NewFromOneByte(isolate,
+                                reinterpret_cast<const uint8_t*>(data),
+                                v8::NewStringType::kNormal,
+                                static_cast<int>(total))
+             .ToLocal(&str)) {
+      isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));
+      return;
+    }
+    args.GetReturnValue().Set(str);
+    return;
+  }
+
+  // Latin1-fits: one-byte V8 string, half the heap of UTF-16.
+  MaybeStackBuffer<char, 4096> latin1;
+  latin1.AllocateSufficientStorage(total);
+  simdutf::result l1 =
+      simdutf::convert_utf8_to_latin1_with_errors(data, total, latin1.out());
+  if (!l1.error) {
+    Local<String> str;
+    if (!String::NewFromOneByte(isolate,
+                                reinterpret_cast<const uint8_t*>(latin1.out()),
+                                v8::NewStringType::kNormal,
+                                static_cast<int>(l1.count))
+             .ToLocal(&str)) {
+      isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));
+      return;
+    }
+    args.GetReturnValue().Set(str);
     return;
   }
 
-  args.GetReturnValue().Set(val);
+  // Multibyte UTF-8: simdutf to UTF-16 in one pass.
+  if (l1.error == simdutf::error_code::TOO_LARGE) {
+    MaybeStackBuffer<uint16_t, 2048> u16;
+    u16.AllocateSufficientStorage(total);
+    simdutf::result r = simdutf::convert_utf8_to_utf16_with_errors(
+        data, total, reinterpret_cast<char16_t*>(u16.out()));
+    if (!r.error) {
+      if (r.count > static_cast<size_t>(v8::String::kMaxLength)) {
+        isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));
+        return;
+      }
+      Local<String> str;
+      if (!String::NewFromTwoByte(isolate,
+                                  u16.out(),
+                                  v8::NewStringType::kNormal,
+                                  static_cast<int>(r.count))
+               .ToLocal(&str)) {
+        isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));
+        return;
+      }
+      args.GetReturnValue().Set(str);
+      return;
+    }
+  }
+
+  // Invalid UTF-8: fall back to V8 for replacement-char (U+FFFD) semantics.
+  {
+    Local<Value> val;
+    if (!ToV8Value(env->context(), result, isolate).ToLocal(&val))
+        [[unlikely]] {
+      return;
+    }
+    args.GetReturnValue().Set(val);
+  }
 }
 
 // Wrapper for readv(2).

diff --git a/test/parallel/test-fs-readfile-utf8-fast-path.js b/test/parallel/test-fs-readfile-utf8-fast-path.js
@@ -0,0 +1,103 @@
+'use strict';
+
+require('../common');
+const fs = require('node:fs');
+const path = require('node:path');
+const assert = require('node:assert');
+const { describe, it } = require('node:test');
+const tmpdir = require('../common/tmpdir');
+
+tmpdir.refresh();
+
+function writeFile(name, buf) {
+  const p = path.join(tmpdir.path, name);
+  fs.writeFileSync(p, buf);
+  return p;
+}
+
+function expectMatches(filePath, rawBuf) {
+  assert.strictEqual(
+    fs.readFileSync(filePath, 'utf8'),
+    rawBuf.toString('utf8'),
+  );
+}
+
+describe('fs.readFileSync utf8 simdutf dispatch', () => {
+  it('empty file', () => {
+    const p = writeFile('empty.txt', Buffer.alloc(0));
+    assert.strictEqual(fs.readFileSync(p, 'utf8'), '');
+  });
+
+  it('ascii small', () => {
+    const buf = Buffer.from('hello');
+    expectMatches(writeFile('tiny-ascii.txt', buf), buf);
+  });
+
+  it('ascii 20KB', () => {
+    const buf = Buffer.alloc(20 * 1024, 0x41);
+    expectMatches(writeFile('medium-ascii.txt', buf), buf);
+  });
+
+  it('ascii 1MB', () => {
+    const buf = Buffer.alloc(1024 * 1024, 0x61);
+    expectMatches(writeFile('large-ascii.txt', buf), buf);
+  });
+
+  it('fd input', () => {
+    const buf = Buffer.alloc(50 * 1024, 0x62);
+    const p = writeFile('fd-ascii.txt', buf);
+    const fd = fs.openSync(p, 'r');
+    try {
+      assert.strictEqual(fs.readFileSync(fd, 'utf8'), buf.toString('utf8'));
+    } finally {
+      fs.closeSync(fd);
+    }
+  });
+
+  it('multibyte UTF-8', () => {
+    const buf = Buffer.from('中文测试 — café — 🚀'.repeat(500), 'utf8');
+    expectMatches(writeFile('multibyte.txt', buf), buf);
+  });
+
+  it('latin1-fits utf8', () => {
+    const buf = Buffer.from('naïve café résumé — niño Köln '.repeat(500), 'utf8');
+    expectMatches(writeFile('latin1-fits.txt', buf), buf);
+  });
+
+  it('invalid: lone continuation byte', () => {
+    const buf = Buffer.from([0x68, 0x69, 0x80, 0x21]);
+    expectMatches(writeFile('invalid-cont.txt', buf), buf);
+  });
+
+  it('invalid: overlong', () => {
+    const buf = Buffer.from([0x41, 0xC0, 0xAF, 0x42]);
+    expectMatches(writeFile('invalid-overlong.txt', buf), buf);
+  });
+
+  it('invalid: surrogate', () => {
+    const buf = Buffer.from([0x41, 0xED, 0xA0, 0x80, 0x42]);
+    expectMatches(writeFile('invalid-surrogate.txt', buf), buf);
+  });
+
+  it('latin1 boundary U+00FF', () => {
+    const buf = Buffer.from('ÿ'.repeat(2048), 'utf8');
+    expectMatches(writeFile('latin1-boundary.txt', buf), buf);
+  });
+
+  it('above latin1 U+0100', () => {
+    const buf = Buffer.from('ĀāĂ'.repeat(1024), 'utf8');
+    expectMatches(writeFile('above-latin1.txt', buf), buf);
+  });
+
+  it('single codepoint each UTF-8 length', () => {
+    for (const cp of [0x41, 0x00E9, 0x4E2D, 0x1F600]) {
+      const buf = Buffer.from(String.fromCodePoint(cp), 'utf8');
+      expectMatches(writeFile(`single-cp-${cp.toString(16)}.txt`, buf), buf);
+    }
+  });
+
+  it('truncated multibyte at EOF', () => {
+    const buf = Buffer.from([0x41, 0xE4, 0xB8]);
+    expectMatches(writeFile('truncated-multibyte.txt', buf), buf);
+  });
+});