diff --git a/benchmark/misc/compile-cache-timing.js b/benchmark/misc/compile-cache-timing.js
new file mode 100644
index 00000000000000..bdeae8c5cd2e93
--- /dev/null
+++ b/benchmark/misc/compile-cache-timing.js
@@ -0,0 +1,72 @@
+'use strict';
+
+// Startup benchmark for the compile cache (including the zstd dictionary).
+// Compares no-cache / cold-cache / warm-cache for two workloads:
+//   big  - one large module (the typescript.js fixture)
+//   many - many small modules (generated here, side-effect-free)
+// The modules are generated into a temp dir so the benchmark is self-contained
+// and reproducible, and never executes unrelated code.
+
+const common = require('../common.js');
+const { spawnSync } = require('child_process');
+const fs = require('fs');
+const os = require('os');
+const path = require('path');
+
+const bench = common.createBenchmark(main, {
+  workload: ['big', 'many'],
+  cache: ['none', 'cold', 'warm'],
+  n: [30],
+});
+
+const BIG = path.resolve(__dirname, '../../test/fixtures/snapshot/typescript.js');
+
+// Generate `count` small, side-effect-free modules and return the require()
+// code that loads them all in one child.
+function makeManyModules(dir, count) {
+  fs.mkdirSync(dir, { recursive: true });
+  const reqs = [];
+  for (let i = 0; i < count; i++) {
+    const file = path.join(dir, `mod-${i}.js`);
+    fs.writeFileSync(
+      file,
+      `'use strict';\n` +
+      `module.exports = function value${i}(a, b) {\n` +
+      `  const sum = a + b + ${i};\n` +
+      `  return { id: ${i}, sum, label: 'module-${i}' };\n` +
+      `};\n`);
+    reqs.push(`require(${JSON.stringify(file)});`);
+  }
+  return reqs.join('');
+}
+
+function run(cmd, args, cacheDir) {
+  const env = { ...process.env };
+  if (cacheDir) env.NODE_COMPILE_CACHE = cacheDir;
+  else delete env.NODE_COMPILE_CACHE;
+  const child = spawnSync(cmd, args, { env, stdio: 'ignore' });
+  if (child.error) throw child.error;
+}
+
+function main({ n, workload, cache }) {
+  const cmd = process.execPath || process.argv[0];
+  const tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'cc-bench-'));
+  const args = workload === 'big' ?
+    [BIG] :
+    ['-e', makeManyModules(path.join(tmp, 'mods'), 120)];
+  const cacheDir = cache === 'none' ? null : path.join(tmp, 'cache');
+
+  try {
+    if (cache === 'warm') run(cmd, args, cacheDir);  // populate once
+    bench.start();
+    for (let i = 0; i < n; i++) {
+      if (cache === 'cold' && cacheDir) {
+        fs.rmSync(cacheDir, { recursive: true, force: true });
+      }
+      run(cmd, args, cacheDir);
+    }
+    bench.end(n);
+  } finally {
+    fs.rmSync(tmp, { recursive: true, force: true });
+  }
+}
diff --git a/node.gyp b/node.gyp
index d2dbce19992b10..ab54cc27437e72 100644
--- a/node.gyp
+++ b/node.gyp
@@ -1110,6 +1110,22 @@
             '<@(linked_module_files)',
           ],
         },
+        {
+          'action_name': 'generate_compile_cache_zstd_dict',
+          'inputs': [
+            'src/compile_cache_zstd.dict',
+            'tools/generate_compile_cache_dict.py',
+          ],
+          'outputs': [
+            '<(SHARED_INTERMEDIATE_DIR)/compile_cache_zstd_dict.h',
+          ],
+          'action': [
+            '<(python)',
+            'tools/generate_compile_cache_dict.py',
+            'src/compile_cache_zstd.dict',
+            '<@(_outputs)',
+          ],
+        },
       ],
     }, # node_base
     {
@@ -1123,6 +1139,7 @@
         'src',
         'deps/v8/include',
         'deps/uv/include',
+        '<(SHARED_INTERMEDIATE_DIR)',  # for compile_cache_zstd_dict.h etc.
       ],
 
       'dependencies': [
diff --git a/src/compile_cache.cc b/src/compile_cache.cc
index dd097acd86f8e4..35c4b02cc70d9a 100644
--- a/src/compile_cache.cc
+++ b/src/compile_cache.cc
@@ -1,4 +1,6 @@
 #include "compile_cache.h"
+#include <array>
+#include <memory>
 #include <string>
 #include "debug_utils-inl.h"
 #include "env-inl.h"
@@ -8,6 +10,11 @@
 #include "path.h"
 #include "util.h"
 #include "zlib.h"
+#include "zstd.h"
+// kCompileCacheZstdDict + kCompileCacheZstdDictSize come from the header
+// generated at build time by the GYP action (from src/compile_cache_zstd.dict).
+// The include directory (SHARED_INTERMEDIATE_DIR) is added by node.gyp.
+#include "compile_cache_zstd_dict.h"
 
 #ifdef NODE_IMPLEMENTS_POSIX_CREDENTIALS
 #include <unistd.h>  // getuid
@@ -25,6 +32,29 @@ using v8::ScriptCompiler;
 using v8::String;
 
 namespace {
+// The compile-cache zstd dictionary is immutable and embedded in the binary,
+// so the prepared CDict/DDict are created once and shared across all handlers
+// (and all Environments/Workers) instead of per handler. They live for the
+// lifetime of the process. Returns nullptr if preparation fails, in which
+// case callers fall back to plain (dictionary-less) zstd.
+ZSTD_CDict* GetCompileCacheCDict() {
+  static ZSTD_CDict* cdict =
+      ZSTD_createCDict(kCompileCacheZstdDict, kCompileCacheZstdDictSize, 1);
+  return cdict;
+}
+
+ZSTD_DDict* GetCompileCacheDDict() {
+  static ZSTD_DDict* ddict =
+      ZSTD_createDDict(kCompileCacheZstdDict, kCompileCacheZstdDictSize);
+  return ddict;
+}
+
+// The dictionary only helps small/medium caches; for larger inputs zstd's own
+// adaptive model dominates and the dictionary never wins, so we skip the
+// (otherwise wasted) second compression above this raw size. Decompression is
+// unaffected: a single DDict decodes both dict-assisted and plain frames.
+constexpr uint32_t kCompileCacheDictMaxRawSize = 256 * 1024;
+
 std::string Uint32ToHex(uint32_t crc) {
   std::string str;
   str.reserve(8);
@@ -75,18 +105,21 @@ inline void CompileCacheHandler::Debug(const char* format,
   }
 }
 
-ScriptCompiler::CachedData* CompileCacheEntry::CopyCache() const {
+ScriptCompiler::CachedData* CompileCacheEntry::WrapCache() const {
   DCHECK_NOT_NULL(cache);
-  int cache_size = cache->length;
-  uint8_t* data = new uint8_t[cache_size];
-  memcpy(data, cache->data, cache_size);
+  // The returned CachedData does not own the buffer - it's a view into
+  // the buffer owned by this entry, which outlives the synchronous
+  // consumption of the cache during compilation, so no copy is necessary.
   return new ScriptCompiler::CachedData(
-      data, cache_size, ScriptCompiler::CachedData::BufferOwned);
+      cache->data, cache->length, ScriptCompiler::CachedData::BufferNotOwned);
 }
 
 // Used for identifying and verifying a file is a compile cache file.
 // See comments in CompileCacheHandler::Persist().
-constexpr uint32_t kCacheMagicNumber = 0x8adfdbb2;
+// The last byte is bumped whenever the format of the cache file changes
+// so that files in an older format are discarded as cache misses and
+// then overwritten with the new format.
+constexpr uint32_t kCacheMagicNumber = 0x8adfdbb3;
 
 const char* CompileCacheEntry::type_name() const {
   switch (type) {
@@ -124,10 +157,21 @@ void CompileCacheHandler::ReadCacheFile(CompileCacheEntry* entry) {
     uv_fs_req_cleanup(&close_req);
   });
 
+  // Get the file size upfront so that the cache can be read with a single
+  // exactly-sized read, and truncated or trailing data can be detected
+  // without additional read attempts.
+  int err = uv_fs_fstat(nullptr, &req, file, nullptr);
+  if (err < 0) {
+    Debug("fstat failed, %s\n", uv_strerror(err));
+    return;
+  }
+  uint64_t file_size = req.statbuf.st_size;
+  uv_fs_req_cleanup(&req);
+
   // Read the headers.
-  std::vector<uint32_t> headers(kHeaderCount);
-  uv_buf_t headers_buf = uv_buf_init(reinterpret_cast<char*>(headers.data()),
-                                     kHeaderCount * sizeof(uint32_t));
+  std::array<uint32_t, kHeaderCount> headers;
+  uv_buf_t headers_buf =
+      uv_buf_init(reinterpret_cast<char*>(headers.data()), kHeaderSize);
   const int r = uv_fs_read(nullptr, &req, file, &headers_buf, 1, 0, nullptr);
   if (r != static_cast<int>(headers_buf.len)) {
     Debug("reading header failed, bytes read %d", r);
@@ -137,13 +181,15 @@ void CompileCacheHandler::ReadCacheFile(CompileCacheEntry* entry) {
     Debug("\n");
     return;
   }
+  uv_fs_req_cleanup(&req);
 
-  Debug("[%d %d %d %d %d]...",
+  Debug("[%d %d %d %d %d %d]...",
         headers[kMagicNumberOffset],
         headers[kCodeSizeOffset],
         headers[kCacheSizeOffset],
         headers[kCodeHashOffset],
-        headers[kCacheHashOffset]);
+        headers[kCacheHashOffset],
+        headers[kCacheRawSizeOffset]);
 
   if (headers[kMagicNumberOffset] != kCacheMagicNumber) {
     Debug("magic number mismatch: expected %d, actual %d\n",
@@ -166,50 +212,56 @@ void CompileCacheHandler::ReadCacheFile(CompileCacheEntry* entry) {
     return;
   }
 
-  // Read the cache, grow the buffer exponentially whenever it fills up.
-  size_t offset = headers_buf.len;
-  size_t capacity = 4096;  // Initial buffer capacity
-  size_t total_read = 0;
-  uint8_t* buffer = new uint8_t[capacity];
-
-  while (true) {
-    // If there is not enough space to read more data, do a simple
-    // realloc here (we don't actually realloc because V8 requires
-    // the underlying buffer to be delete[]-able).
-    if (total_read == capacity) {
-      size_t new_capacity = capacity * 2;
-      auto* new_buffer = new uint8_t[new_capacity];
-      memcpy(new_buffer, buffer, capacity);
-      delete[] buffer;
-      buffer = new_buffer;
-      capacity = new_capacity;
-    }
+  uint32_t cache_size = headers[kCacheSizeOffset];
+  uint32_t raw_size = headers[kCacheRawSizeOffset];
 
-    uv_buf_t iov = uv_buf_init(reinterpret_cast<char*>(buffer + total_read),
-                               capacity - total_read);
-    int bytes_read =
-        uv_fs_read(nullptr, &req, file, &iov, 1, offset + total_read, nullptr);
+  // Check the cache size. The headers were read successfully, so
+  // file_size >= kHeaderSize here. The file must contain exactly the
+  // headers followed by cache_size bytes of cache content.
+  if (file_size - kHeaderSize != cache_size) {
+    Debug("cache size mismatch: expected %d, actual %d\n",
+          cache_size,
+          file_size - kHeaderSize);
+    return;
+  }
+
+  // The cache content is stored uncompressed when cache_size == raw_size,
+  // and zstd-compressed when cache_size < raw_size (see
+  // CompileCacheHandler::Persist()). Anything else is invalid.
+  if (cache_size > raw_size) {
+    Debug(
+        "invalid cache size %d > uncompressed size %d\n", cache_size, raw_size);
+    return;
+  }
+
+  // Read the cache content in one go with an exactly-sized buffer,
+  // looping only in case of short reads.
+  std::unique_ptr<uint8_t[]> disk_data(new uint8_t[cache_size]);
+  size_t total_read = 0;
+  while (total_read < cache_size) {
+    uv_buf_t iov =
+        uv_buf_init(reinterpret_cast<char*>(disk_data.get() + total_read),
+                    cache_size - total_read);
+    int bytes_read = uv_fs_read(
+        nullptr, &req, file, &iov, 1, kHeaderSize + total_read, nullptr);
     if (req.result < 0) {  // Error.
       // req will be cleaned up by scope leave.
-      delete[] buffer;
       Debug(" %s\n", uv_strerror(req.result));
       return;
     }
     uv_fs_req_cleanup(&req);
-    if (bytes_read <= 0) {
-      break;
+    if (bytes_read == 0) {  // Unexpected EOF - the file shrank under us.
+      Debug("cache size mismatch: expected %d, actual %d\n",
+            cache_size,
+            total_read);
+      return;
     }
     total_read += bytes_read;
   }
 
-  // Check the cache size and hash.
-  if (headers[kCacheSizeOffset] != total_read) {
-    Debug("cache size mismatch: expected %d, actual %d\n",
-          headers[kCacheSizeOffset],
-          total_read);
-    return;
-  }
-  uint32_t cache_hash = GetHash(reinterpret_cast<char*>(buffer), total_read);
+  // Check the cache hash of the on-disk content before decompressing.
+  uint32_t cache_hash =
+      GetHash(reinterpret_cast<char*>(disk_data.get()), cache_size);
   if (headers[kCacheHashOffset] != cache_hash) {
     Debug("cache hash mismatch: expected %d, actual %d\n",
           headers[kCacheHashOffset],
@@ -217,9 +269,58 @@ void CompileCacheHandler::ReadCacheFile(CompileCacheEntry* entry) {
     return;
   }
 
-  entry->cache.reset(new ScriptCompiler::CachedData(
-      buffer, total_read, ScriptCompiler::CachedData::BufferOwned));
-  Debug(" success, size=%d\n", total_read);
+  if (cache_size == raw_size) {
+    // Stored uncompressed - hand the buffer to V8 directly.
+    entry->cache.reset(new ScriptCompiler::CachedData(
+        disk_data.release(),
+        raw_size,
+        ScriptCompiler::CachedData::BufferOwned));
+  } else {
+    // Cross-check the content size embedded in the zstd frame before
+    // allocating, in case the headers are corrupted.
+    unsigned long long content_size =  // NOLINT(runtime/int)
+        ZSTD_getFrameContentSize(disk_data.get(), cache_size);
+    if (content_size != raw_size) {
+      Debug("uncompressed size mismatch: expected %d, actual %d\n",
+            raw_size,
+            content_size);
+      return;
+    }
+    // Lazily create the decompression context on first use and reuse it
+    // for subsequent reads - recreating its workspace for every file
+    // costs more than the decompression itself for small caches.
+    if (zstd_dctx_ == nullptr && (zstd_dctx_ = ZSTD_createDCtx()) == nullptr) {
+      Debug("failed to create zstd context\n");
+      return;
+    }
+    // Decompress directly into the buffer handed to V8. The embedded
+    // dictionary is referenced via a shared, prepared DDict; plain frames
+    // (which carry no dictID) decompress correctly with it as well.
+    std::unique_ptr<uint8_t[]> raw_data(new uint8_t[raw_size]);
+    ZSTD_DDict* ddict = GetCompileCacheDDict();
+    size_t decompressed_size;
+    if (ddict != nullptr) {
+      decompressed_size = ZSTD_decompress_usingDDict(
+          zstd_dctx_, raw_data.get(), raw_size, disk_data.get(), cache_size,
+          ddict);
+    } else {
+      decompressed_size = ZSTD_decompressDCtx(
+          zstd_dctx_, raw_data.get(), raw_size, disk_data.get(), cache_size);
+    }
+    if (ZSTD_isError(decompressed_size)) {
+      Debug("decompression failed: %s\n", ZSTD_getErrorName(decompressed_size));
+      return;
+    }
+    if (decompressed_size != raw_size) {
+      Debug("decompressed size mismatch: expected %d, actual %d\n",
+            raw_size,
+            decompressed_size);
+      return;
+    }
+    entry->cache.reset(new ScriptCompiler::CachedData(
+        raw_data.release(), raw_size, ScriptCompiler::CachedData::BufferOwned));
+  }
+  Debug(" success, size=%d\n", raw_size);
 }
 
 static std::string GetRelativePath(std::string_view path,
@@ -280,11 +381,18 @@ CompileCacheEntry* CompileCacheHandler::GetOrInsert(Local<String> code,
     return loaded->second.get();
   }
 
-  // If the code hash mismatches, the code has changed, discard the stale entry
-  // and create a new one.
-  auto emplaced =
-      compiler_cache_store_.emplace(key, std::make_unique<CompileCacheEntry>());
-  auto* result = emplaced.first->second.get();
+  // If the code hash mismatches, the code has changed, reset the stale
+  // entry in place. Otherwise insert a new one.
+  CompileCacheEntry* result;
+  if (loaded != compiler_cache_store_.end()) {
+    result = loaded->second.get();
+    result->refreshed = false;
+    result->persisted = false;
+  } else {
+    result = compiler_cache_store_
+                 .emplace(key, std::make_unique<CompileCacheEntry>())
+                 .first->second.get();
+  }
 
   result->code_hash = code_hash;
   result->code_size = code_utf8.length();
@@ -391,6 +499,16 @@ void CompileCacheHandler::Persist() {
   // finished. In that case, the off-thread writes should finish long
   // before any attempt of flushing is made so the method would then only
   // incur a negligible overhead from thread synchronization.
+
+  // The compression context is created lazily when there is anything to
+  // compress and reused for all the entries in this invocation.
+  ZSTD_CCtx* cctx = nullptr;
+  auto cleanup_cctx = OnScopeLeave([&cctx]() {
+    if (cctx != nullptr) {
+      ZSTD_freeCCtx(cctx);
+    }
+  });
+
   for (auto& pair : compiler_cache_store_) {
     auto* entry = pair.second.get();
     const char* type_name = entry->type_name();
@@ -418,18 +536,71 @@ void CompileCacheHandler::Persist() {
 
     DCHECK_EQ(entry->cache->buffer_policy,
               ScriptCompiler::CachedData::BufferOwned);
-    char* cache_ptr =
+    char* raw_ptr =
         reinterpret_cast<char*>(const_cast<uint8_t*>(entry->cache->data));
-    uint32_t cache_size = static_cast<uint32_t>(entry->cache->length);
+    uint32_t raw_size = static_cast<uint32_t>(entry->cache->length);
+
+    // Compress the cache with zstd to reduce the size on disk. Compression
+    // level 1 prioritizes speed - persistence usually happens on process
+    // shutdown and should add as little overhead as possible. If the data
+    // is not compressible, store it uncompressed, which is indicated by
+    // the cache size being equal to the uncompressed size in the headers.
+    //
+    // We also try the embedded trained dictionary and keep whichever frame is
+    // smaller (still subject to the "only store if < raw" policy). The
+    // dictionary mainly helps the small/medium caches that dominate real
+    // compile cache usage; for inputs where plain zstd already wins we keep
+    // the plain frame.
+    char* cache_ptr = raw_ptr;
+    uint32_t cache_size = raw_size;
+    std::unique_ptr<uint8_t[]> compressed;
+    std::unique_ptr<uint8_t[]> compressed_dict;
+    if (cctx != nullptr || (cctx = ZSTD_createCCtx()) != nullptr) {
+      size_t compressed_bound = ZSTD_compressBound(raw_size);
+      compressed.reset(new uint8_t[compressed_bound]);
+      size_t compressed_size = ZSTD_compressCCtx(
+          cctx, compressed.get(), compressed_bound, raw_ptr, raw_size, 1);
+      char* best_ptr = reinterpret_cast<char*>(compressed.get());
+      // Only attempt the dictionary for small/medium entries (see
+      // kCompileCacheDictMaxRawSize); for large blobs it never wins and the
+      // extra compression would be wasted work.
+      ZSTD_CDict* cdict = raw_size <= kCompileCacheDictMaxRawSize
+                              ? GetCompileCacheCDict()
+                              : nullptr;
+      if (cdict != nullptr) {
+        // Compress into a separate buffer so the selected frame's bytes and
+        // size always stay in sync (the plain buffer is left untouched).
+        compressed_dict.reset(new uint8_t[compressed_bound]);
+        size_t dict_size = ZSTD_compress_usingCDict(
+            cctx, compressed_dict.get(), compressed_bound, raw_ptr, raw_size,
+            cdict);
+        if (!ZSTD_isError(dict_size) &&
+            (ZSTD_isError(compressed_size) || dict_size < compressed_size)) {
+          compressed_size = dict_size;
+          best_ptr = reinterpret_cast<char*>(compressed_dict.get());
+        }
+      }
+      if (!ZSTD_isError(compressed_size) && compressed_size < raw_size) {
+        cache_ptr = best_ptr;
+        cache_size = static_cast<uint32_t>(compressed_size);
+      }
+    }
+    Debug("[compile cache] compressed cache for %s %s: %d -> %d bytes\n",
+          type_name,
+          entry->source_filename,
+          raw_size,
+          cache_size);
+
     uint32_t cache_hash = GetHash(cache_ptr, cache_size);
 
     // Generating headers.
-    std::vector<uint32_t> headers(kHeaderCount);
+    std::array<uint32_t, kHeaderCount> headers;
     headers[kMagicNumberOffset] = kCacheMagicNumber;
     headers[kCodeSizeOffset] = entry->code_size;
     headers[kCacheSizeOffset] = cache_size;
     headers[kCodeHashOffset] = entry->code_hash;
     headers[kCacheHashOffset] = cache_hash;
+    headers[kCacheRawSizeOffset] = raw_size;
 
     // Generate the temporary filename.
     // The temporary file should be placed in a location like:
@@ -459,7 +630,7 @@ void CompileCacheHandler::Persist() {
     Debug(" -> %s\n", mkstemp_req.path);
     Debug("[compile cache] writing cache for %s %s to temporary file %s [%d "
           "%d %d "
-          "%d %d]...",
+          "%d %d %d]...",
           type_name,
           entry->source_filename,
           mkstemp_req.path,
@@ -467,12 +638,13 @@ void CompileCacheHandler::Persist() {
           headers[kCodeSizeOffset],
           headers[kCacheSizeOffset],
           headers[kCodeHashOffset],
-          headers[kCacheHashOffset]);
+          headers[kCacheHashOffset],
+          headers[kCacheRawSizeOffset]);
 
     // Write to the temporary file.
-    uv_buf_t headers_buf = uv_buf_init(reinterpret_cast<char*>(headers.data()),
-                                       headers.size() * sizeof(uint32_t));
-    uv_buf_t data_buf = uv_buf_init(cache_ptr, entry->cache->length);
+    uv_buf_t headers_buf =
+        uv_buf_init(reinterpret_cast<char*>(headers.data()), kHeaderSize);
+    uv_buf_t data_buf = uv_buf_init(cache_ptr, cache_size);
     uv_buf_t bufs[] = {headers_buf, data_buf};
 
     uv_fs_t write_req;
@@ -529,6 +701,12 @@ CompileCacheHandler::CompileCacheHandler(Environment* env)
       is_debug_(
           env->enabled_debug_list()->enabled(DebugCategory::COMPILE_CACHE)) {}
 
+CompileCacheHandler::~CompileCacheHandler() {
+  if (zstd_dctx_ != nullptr) {
+    ZSTD_freeDCtx(zstd_dctx_);
+  }
+}
+
 // Directory structure:
 // - Compile cache directory (from NODE_COMPILE_CACHE)
 //   - $NODE_VERSION-$ARCH-$CACHE_DATA_VERSION_TAG-$UID
diff --git a/src/compile_cache.h b/src/compile_cache.h
index 62934332103661..78501dff693d39 100644
--- a/src/compile_cache.h
+++ b/src/compile_cache.h
@@ -10,6 +10,8 @@
 #include <unordered_map>
 #include "v8.h"
 
+struct ZSTD_DCtx_s;
+
 namespace node {
 class Environment;
 
@@ -36,9 +38,11 @@ struct CompileCacheEntry {
   bool refreshed = false;
   bool persisted = false;
 
-  // Copy the cache into a new store for V8 to consume. Caller takes
-  // ownership.
-  v8::ScriptCompiler::CachedData* CopyCache() const;
+  // Wrap the cache into a non-owning CachedData for V8 to consume.
+  // The caller takes ownership of the returned wrapper object, while
+  // the underlying buffer remains owned by this entry and must outlive
+  // the consumption of the wrapper.
+  v8::ScriptCompiler::CachedData* WrapCache() const;
   const char* type_name() const;
 };
 
@@ -65,6 +69,7 @@ enum class EnableOption : uint8_t { DEFAULT, PORTABLE };
 class CompileCacheHandler {
  public:
   explicit CompileCacheHandler(Environment* env);
+  ~CompileCacheHandler();
   CompileCacheEnableResult Enable(Environment* env,
                                   const std::string& dir,
                                   EnableOption option = EnableOption::DEFAULT);
@@ -99,7 +104,9 @@ class CompileCacheHandler {
   static constexpr size_t kCacheSizeOffset = 2;
   static constexpr size_t kCodeHashOffset = 3;
   static constexpr size_t kCacheHashOffset = 4;
-  static constexpr size_t kHeaderCount = 5;
+  static constexpr size_t kCacheRawSizeOffset = 5;
+  static constexpr size_t kHeaderCount = 6;
+  static constexpr size_t kHeaderSize = kHeaderCount * sizeof(uint32_t);
 
   v8::Isolate* isolate_ = nullptr;
   bool is_debug_ = false;
@@ -109,6 +116,9 @@ class CompileCacheHandler {
   EnableOption portable_ = EnableOption::DEFAULT;
   std::unordered_map<uint32_t, std::unique_ptr<CompileCacheEntry>>
       compiler_cache_store_;
+  // Lazily created zstd decompression context, reused across cache reads
+  // to avoid recreating its workspace for every file.
+  ZSTD_DCtx_s* zstd_dctx_ = nullptr;
 };
 }  // namespace node
 
diff --git a/src/compile_cache_zstd.dict b/src/compile_cache_zstd.dict
new file mode 100644
index 00000000000000..b64455d45b1d82
Binary files /dev/null and b/src/compile_cache_zstd.dict differ
diff --git a/src/module_wrap.cc b/src/module_wrap.cc
index 87a8b4d57726af..1f0c0912dea013 100644
--- a/src/module_wrap.cc
+++ b/src/module_wrap.cc
@@ -521,8 +521,9 @@ MaybeLocal<Module> ModuleWrap::CompileSourceTextModule(
   }
 
   if (cache_entry != nullptr && cache_entry->cache != nullptr) {
-    // source will take ownership of cached_data.
-    cached_data = cache_entry->CopyCache();
+    // source only takes ownership of the cached_data wrapper - the
+    // underlying buffer is still owned by the compile cache entry.
+    cached_data = cache_entry->WrapCache();
   }
 
   ScriptCompiler::Source source(source_text, origin, cached_data);
diff --git a/src/node_contextify.cc b/src/node_contextify.cc
index f319420ae02f35..3d615dbe81e115 100644
--- a/src/node_contextify.cc
+++ b/src/node_contextify.cc
@@ -1675,8 +1675,9 @@ static MaybeLocal<Function> CompileFunctionForCJSLoader(
         code, filename, CachedCodeType::kCommonJS);
   }
   if (cache_entry != nullptr && cache_entry->cache != nullptr) {
-    // source will take ownership of cached_data.
-    cached_data = cache_entry->CopyCache();
+    // source only takes ownership of the cached_data wrapper - the
+    // underlying buffer is still owned by the compile cache entry.
+    cached_data = cache_entry->WrapCache();
   }
 
   ScriptCompiler::Source source(code, origin, cached_data);
diff --git a/test/parallel/test-compile-cache-corrupted.js b/test/parallel/test-compile-cache-corrupted.js
new file mode 100644
index 00000000000000..b63b82440c66e2
--- /dev/null
+++ b/test/parallel/test-compile-cache-corrupted.js
@@ -0,0 +1,128 @@
+'use strict';
+
+// This tests that NODE_COMPILE_CACHE gracefully discards corrupted
+// cache files and regenerates them.
+
+require('../common');
+const { spawnSyncAndAssert } = require('../common/child_process');
+const assert = require('assert');
+const fs = require('fs');
+const os = require('os');
+const path = require('path');
+const tmpdir = require('../common/tmpdir');
+
+// Offsets into the cache file headers (see src/compile_cache.h).
+const kHeaderSize = 6 * 4;
+const kCacheRawSizeOffset = 5 * 4;
+
+function readU32(buf, offset) {
+  return os.endianness() === 'LE' ?
+    buf.readUInt32LE(offset) : buf.readUInt32BE(offset);
+}
+
+function writeU32(buf, value, offset) {
+  if (os.endianness() === 'LE') {
+    buf.writeUInt32LE(value, offset);
+  } else {
+    buf.writeUInt32BE(value, offset);
+  }
+}
+
+tmpdir.refresh();
+const dir = tmpdir.resolve('.compile_cache_dir');
+const script = tmpdir.resolve('script.js');
+fs.writeFileSync(script, 'const foo = 1;', 'utf-8');
+
+const env = {
+  ...process.env,
+  NODE_DEBUG_NATIVE: 'COMPILE_CACHE',
+  NODE_COMPILE_CACHE: dir,
+};
+
+function getCacheFile() {
+  const subdirs = fs.readdirSync(dir);
+  assert.strictEqual(subdirs.length, 1);
+  const entries = fs.readdirSync(path.join(dir, subdirs[0]));
+  assert.strictEqual(entries.length, 1);
+  return path.join(dir, subdirs[0], entries[0]);
+}
+
+// Runs the script and expects the corrupted cache to be discarded
+// with the given debug message and then regenerated.
+function expectRecovery(mismatchRE) {
+  spawnSyncAndAssert(
+    process.execPath,
+    [script],
+    { env, cwd: tmpdir.path },
+    {
+      stderr(output) {
+        console.log(output);  // Logging for debugging.
+        assert.match(output, mismatchRE);
+        assert.match(output, /writing cache for .*script\.js.*success/);
+        return true;
+      }
+    });
+}
+
+// Warm the cache.
+spawnSyncAndAssert(
+  process.execPath,
+  [script],
+  { env, cwd: tmpdir.path },
+  {
+    stderr(output) {
+      console.log(output);  // Logging for debugging.
+      assert.match(output, /writing cache for .*script\.js.*success/);
+      return true;
+    }
+  });
+const cacheFile = getCacheFile();
+assert(fs.readFileSync(cacheFile).length > kHeaderSize);
+
+{
+  // Corrupt the magic number.
+  const data = fs.readFileSync(cacheFile);
+  for (let i = 0; i < 4; i++) data[i] ^= 0xff;
+  fs.writeFileSync(cacheFile, data);
+  expectRecovery(
+    /reading cache from .* for CommonJS .*script\.js.*magic number mismatch/);
+}
+
+{
+  // Truncate the cache content.
+  const data = fs.readFileSync(cacheFile);
+  fs.writeFileSync(cacheFile, data.subarray(0, data.length - 3));
+  expectRecovery(
+    /reading cache from .* for CommonJS .*script\.js.*cache size mismatch/);
+}
+
+{
+  // Flip a byte in the middle of the cache content.
+  const data = fs.readFileSync(cacheFile);
+  data[kHeaderSize + Math.floor((data.length - kHeaderSize) / 2)] ^= 0xff;
+  fs.writeFileSync(cacheFile, data);
+  expectRecovery(
+    /reading cache from .* for CommonJS .*script\.js.*cache hash mismatch/);
+}
+
+{
+  // Corrupt the uncompressed size field in the headers.
+  const data = fs.readFileSync(cacheFile);
+  writeU32(data, readU32(data, kCacheRawSizeOffset) + 1, kCacheRawSizeOffset);
+  fs.writeFileSync(cacheFile, data);
+  expectRecovery(
+    /reading cache from .* for CommonJS .*script\.js.*uncompressed size mismatch/);
+}
+
+// After the last recovery the cache should be consumed just fine.
+spawnSyncAndAssert(
+  process.execPath,
+  [script],
+  { env, cwd: tmpdir.path },
+  {
+    stderr(output) {
+      console.log(output);  // Logging for debugging.
+      assert.match(output, /cache for .*script\.js was accepted/);
+      return true;
+    }
+  });
diff --git a/test/parallel/test-compile-cache-success.js b/test/parallel/test-compile-cache-success.js
index c02a6243286972..9417b6bf63c490 100644
--- a/test/parallel/test-compile-cache-success.js
+++ b/test/parallel/test-compile-cache-success.js
@@ -64,3 +64,83 @@ const path = require('path');
       }
     });
 }
+
+// Exercise the dictionary-compressed path (added on top of #63861) for many
+// small modules, which is where the embedded dictionary helps most. We write
+// the cache, then read it back and assert every entry is accepted - this
+// proves each dict-compressed frame decompresses to exactly the bytes that
+// were persisted.
+{
+  tmpdir.refresh();
+  const dir = tmpdir.resolve('.compile_cache_dir');
+
+  // Generate a handful of small modules so the dictionary path is exercised.
+  const count = 8;
+  const modules = [];
+  for (let i = 0; i < count; i++) {
+    const file = tmpdir.resolve(`mod-${i}.js`);
+    fs.writeFileSync(
+      file,
+      `'use strict';\n` +
+      `module.exports = function value${i}(a, b) {\n` +
+      `  const sum = a + b + ${i};\n` +
+      `  return { id: ${i}, sum, label: 'module-${i}' };\n` +
+      `};\n`);
+    modules.push(file);
+  }
+  const reqCode = modules.map((m) => `require(${JSON.stringify(m)});`).join('');
+
+  // First run writes the cache for every module.
+  spawnSyncAndAssert(
+    process.execPath,
+    ['-e', reqCode],
+    {
+      env: {
+        ...process.env,
+        NODE_DEBUG_NATIVE: 'COMPILE_CACHE',
+        NODE_COMPILE_CACHE: dir
+      },
+      cwd: tmpdir.path
+    },
+    {
+      stderr(output) {
+        for (const m of modules) {
+          const name = path.basename(m).replace(/[.]/g, '\\.');
+          assert.match(output, new RegExp(`writing cache for .*${name}.*success`));
+        }
+        return true;
+      }
+    });
+
+  const cacheDirs = fs.readdirSync(dir);
+  assert.strictEqual(cacheDirs.length, 1);
+  // At least one entry per module (the `-e` runner is cached too).
+  const entries = fs.readdirSync(path.join(dir, cacheDirs[0]));
+  assert(entries.length >= count, `expected >= ${count} entries, got ${entries.length}`);
+
+  // Second run reads every cached entry back; "was accepted" only happens when
+  // the decompressed bytes match the freshly produced in-memory cache, so this
+  // is a full roundtrip check of the dictionary-compressed entries.
+  spawnSyncAndAssert(
+    process.execPath,
+    ['-e', reqCode],
+    {
+      env: {
+        ...process.env,
+        NODE_DEBUG_NATIVE: 'COMPILE_CACHE',
+        NODE_COMPILE_CACHE: dir
+      },
+      cwd: tmpdir.path
+    },
+    {
+      stderr(output) {
+        for (const m of modules) {
+          const name = path.basename(m).replace(/[.]/g, '\\.');
+          assert.match(
+            output,
+            new RegExp(`cache for .*${name} was accepted, keeping the in-memory entry`));
+        }
+        return true;
+      }
+    });
+}
diff --git a/tools/generate_compile_cache_dict.py b/tools/generate_compile_cache_dict.py
new file mode 100644
index 00000000000000..8794889775bb63
--- /dev/null
+++ b/tools/generate_compile_cache_dict.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+"""Generate compile_cache_zstd_dict.h from a trained zstd .dict file.
+
+Invoked by the GYP action in node.gyp at build time. Only the small binary
+.dict (src/compile_cache_zstd.dict) is checked into the repository; the C
+array it produces is generated into SHARED_INTERMEDIATE_DIR.
+"""
+import os
+import sys
+
+
+def main(dict_path, out_path):
+    with open(dict_path, 'rb') as f:
+        data = f.read()
+
+    lines = [
+        '// Generated by tools/generate_compile_cache_dict.py',
+        '// from %s' % os.path.basename(dict_path),
+        '// The .dict file is the source of truth; do not edit by hand.',
+        '',
+        'static const unsigned char kCompileCacheZstdDict[] = {',
+    ]
+    for i in range(0, len(data), 12):
+        chunk = data[i:i + 12]
+        lines.append('  %s,' % ', '.join('0x%02x' % b for b in chunk))
+    lines.append('};')
+    lines.append('static const size_t kCompileCacheZstdDictSize = %d;' %
+                 len(data))
+
+    with open(out_path, 'w') as f:
+        f.write('\n'.join(lines) + '\n')
+
+
+if __name__ == '__main__':
+    if len(sys.argv) != 3:
+        sys.exit('Usage: %s <input.dict> <output.h>' % sys.argv[0])
+    main(sys.argv[1], sys.argv[2])