From ca1120b309d1defbb1068b69b78e68fd05acbf1a Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Tue, 8 Apr 2025 22:40:43 +0000 Subject: [PATCH 1/2] Add support for more source map fields Read the "sourcesContent", "file", and "sourceRoot" fields from incoming source maps, attach them to the wasm IR module, and write them back to the output source map. These fields are unchanged by Binaryen's updates to the mappings, so they do not need to be decoded or interpreted. Fixes #6805 --- src/wasm.h | 8 +++- src/wasm/source-map.cpp | 27 ++++++++++++ src/wasm/wasm-binary.cpp | 43 +++++++++++++------- test/gtest/source-map.cpp | 35 ++++++++++++++++ test/lit/sourcemap-sourceroot-file.wat | 14 +++++++ test/lit/sourcemap-sourceroot-file.wat.map | 9 ++++ test/lit/sourcemap-sourceroot-file.wat.wasm | Bin 0 -> 8 bytes 7 files changed, 121 insertions(+), 15 deletions(-) create mode 100644 test/lit/sourcemap-sourceroot-file.wat create mode 100644 test/lit/sourcemap-sourceroot-file.wat.map create mode 100644 test/lit/sourcemap-sourceroot-file.wat.wasm diff --git a/src/wasm.h b/src/wasm.h index 97098fd749a..3d4ce837b94 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -2406,9 +2406,15 @@ class Module { // Optional user section IR representation. std::unique_ptr dylinkSection; - // Source maps debug info. + // Source maps debug info. All of these fields are read directly in from the + // source map and are encoded as in the original JSON (UTF-8 encoded with + // with escaped quotes and slashes). The string values are uninterpreted in + // Binaryen, and they are written directly back out without re-encoding. std::vector debugInfoFileNames; std::vector debugInfoSymbolNames; + std::string debugInfoSourceRoot; + std::string debugInfoFile; + std::vector debugInfoSourcesContent; // `features` are the features allowed to be used in this module and should be // respected regardless of the value of`hasFeaturesSection`. diff --git a/src/wasm/source-map.cpp b/src/wasm/source-map.cpp index adba2504350..7844d2d113b 100644 --- a/src/wasm/source-map.cpp +++ b/src/wasm/source-map.cpp @@ -59,6 +59,17 @@ void SourceMapReader::parse(Module& wasm) { wasm.debugInfoFileNames.push_back(v->getCString()); } + if (json.has("sourcesContent")) { + json::Ref sc = json["sourcesContent"]; + if (!sc->isArray()) { + throw MapParseException("Source map sourcesContent is not an array"); + } + for (size_t i = 0; i < sc->size(); i++) { + wasm.debugInfoSourcesContent.push_back(sc[i]->getCString()); + } + } + + if (json.has("names")) { json::Ref n = json["names"]; if (!n->isArray()) { @@ -73,6 +84,22 @@ void SourceMapReader::parse(Module& wasm) { } } + if (json.has("sourceRoot")) { + json::Ref sr = json["sourceRoot"]; + if (!sr->isString()) { + throw MapParseException("Source map sourceRoot is not a string"); + } + wasm.debugInfoSourceRoot = sr->getCString(); + } + + if (json.has("file")) { + json::Ref f = json["file"]; + if (!f->isString()) { + throw MapParseException("Source map file is not a string"); + } + wasm.debugInfoFile = f->getCString(); + } + if (!json.has("mappings")) { throw MapParseException("Source map mappings missing"); } diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 415fc9a8730..b2c5842f4a4 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -1226,25 +1226,40 @@ void WasmBinaryWriter::writeSourceMapProlog() { } } - *sourceMap << "\"sources\":["; - for (size_t i = 0; i < wasm->debugInfoFileNames.size(); i++) { - if (i > 0) { - *sourceMap << ","; + auto writeOptionalString = [&](const char* name, const std::string& str) { + if (!str.empty()) { + *sourceMap << "\"" << name << "\":\"" << str << "\","; } - // TODO respect JSON string encoding, e.g. quotes and control chars. - *sourceMap << "\"" << wasm->debugInfoFileNames[i] << "\""; - } - *sourceMap << "],\"names\":["; + }; - for (size_t i = 0; i < wasm->debugInfoSymbolNames.size(); i++) { - if (i > 0) { - *sourceMap << ","; + writeOptionalString("file", wasm->debugInfoFile); + writeOptionalString("sourceRoot", wasm->debugInfoSourceRoot); + + auto writeStringVector = [&](const char* name, + const std::vector& vec) { + *sourceMap << "\"" << name << "\":["; + for (size_t i = 0; i < vec.size(); i++) { + if (i > 0) { + *sourceMap << ","; + } + *sourceMap << "\"" << vec[i] << "\""; } - // TODO respect JSON string encoding, e.g. quotes and control chars. - *sourceMap << "\"" << wasm->debugInfoSymbolNames[i] << "\""; + *sourceMap << "],"; + }; + + writeStringVector("sources", wasm->debugInfoFileNames); + + if (!wasm->debugInfoSourcesContent.empty()) { + writeStringVector("sourcesContent", wasm->debugInfoSourcesContent); } - *sourceMap << "],\"mappings\":\""; + // TODO: This field is optional; maybe we should omit if it's empty. + // TODO: Binaryen actually does not correctly preserve symbol names when it + // rewrites the mappings. We should maybe just drop them, or else handle + // them correctly. + writeStringVector("names", wasm->debugInfoSymbolNames); + + *sourceMap << "\"mappings\":\""; } static void writeBase64VLQ(std::ostream& out, int32_t n) { diff --git a/test/gtest/source-map.cpp b/test/gtest/source-map.cpp index 5076fb8ded1..c6d0ef72309 100644 --- a/test/gtest/source-map.cpp +++ b/test/gtest/source-map.cpp @@ -153,3 +153,38 @@ TEST_F(SourceMapTest, Fibonacci) { // program? ExpectDbgLocEq(9999, 0, 8, 0, std::nullopt); } + +TEST_F(SourceMapTest, SourceMapSourceRootFile) { + std::string sourceMap = R"( + { + "version":3, + "file": "foo.wasm", + "sources":[], + "names":[], + "mappings": "", + "sourceRoot": "/foo/bar" + } + )"; + parseMap(sourceMap); + EXPECT_EQ(wasm.debugInfoSourceRoot, "/foo/bar"); + EXPECT_EQ(wasm.debugInfoFile, "foo.wasm"); +} + + +TEST_F(SourceMapTest, SourcesContent) { + // The backslash escapes appear in the JSON encoding, and are preserved in + // the internal representation. The string values are uninterpreted in + // Binaryen, and they are written directly back out without re-encoding. + std::string sourceMap = R"( + { + "version": 3, + "sources": ["foo.c"], + "sourcesContent": ["#include int main()\n{ printf(\"Gr\u00fc\u00df Gott, Welt!\"); return 0;}"], + "mappings" : "" + } + )"; + parseMap(sourceMap); + ASSERT_EQ(wasm.debugInfoSourcesContent.size(), 1); + EXPECT_EQ(wasm.debugInfoSourcesContent[0], + "#include int main()\\n{ printf(\\\"Gr\\u00fc\\u00df Gott, Welt!\\\"); return 0;}"); +} diff --git a/test/lit/sourcemap-sourceroot-file.wat b/test/lit/sourcemap-sourceroot-file.wat new file mode 100644 index 00000000000..12bcc693d50 --- /dev/null +++ b/test/lit/sourcemap-sourceroot-file.wat @@ -0,0 +1,14 @@ +;; RUN: wasm-opt %s.wasm -ism %s.map -osm %t -o %t2 +;; Running multiple times is needed here because the output is all on one line. +;; RUN: cat %t | filecheck %s --check-prefix=FILE +;; RUN: cat %t | filecheck %s --check-prefix=SOURCEROOT +;; RUN: cat %t | filecheck %s --check-prefix=CONTENT + + +;; This wat file is not actually part of the test (the binary file is used), +;; but no comments are allowed in JSON so the RUN and CHECK lines are here. + +;; FILE: "file":"foo.wasm", +;; SOURCEROOT: "sourceRoot":"/foo/bar", +;; CONTENT: "sourcesContent":["#include int main()\n{ printf(\"Gr\u00fc\u00df Gott, Welt!\"); return 0;}"] +(module) diff --git a/test/lit/sourcemap-sourceroot-file.wat.map b/test/lit/sourcemap-sourceroot-file.wat.map new file mode 100644 index 00000000000..3f98ccd28a2 --- /dev/null +++ b/test/lit/sourcemap-sourceroot-file.wat.map @@ -0,0 +1,9 @@ +{ + "version":3, + "file": "foo.wasm", + "sources":[], + "names":[], + "mappings": "", + "sourceRoot": "/foo/bar", + "sourcesContent": ["#include int main()\n{ printf(\"Gr\u00fc\u00df Gott, Welt!\"); return 0;}"] +} diff --git a/test/lit/sourcemap-sourceroot-file.wat.wasm b/test/lit/sourcemap-sourceroot-file.wat.wasm new file mode 100644 index 0000000000000000000000000000000000000000..d8fc92d022fbf4d1072da17bc8e0840054b51ddc GIT binary patch literal 8 PcmZQbEY4+QU|;|M2ZjMd literal 0 HcmV?d00001 From 10c20dba651b2f667a82965b3472daf627cbfe1d Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Wed, 9 Apr 2025 17:50:25 +0000 Subject: [PATCH 2/2] clang-format --- src/wasm/source-map.cpp | 1 - test/gtest/source-map.cpp | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/wasm/source-map.cpp b/src/wasm/source-map.cpp index 7844d2d113b..cce8a5967f1 100644 --- a/src/wasm/source-map.cpp +++ b/src/wasm/source-map.cpp @@ -69,7 +69,6 @@ void SourceMapReader::parse(Module& wasm) { } } - if (json.has("names")) { json::Ref n = json["names"]; if (!n->isArray()) { diff --git a/test/gtest/source-map.cpp b/test/gtest/source-map.cpp index c6d0ef72309..657034befcb 100644 --- a/test/gtest/source-map.cpp +++ b/test/gtest/source-map.cpp @@ -170,7 +170,6 @@ TEST_F(SourceMapTest, SourceMapSourceRootFile) { EXPECT_EQ(wasm.debugInfoFile, "foo.wasm"); } - TEST_F(SourceMapTest, SourcesContent) { // The backslash escapes appear in the JSON encoding, and are preserved in // the internal representation. The string values are uninterpreted in @@ -186,5 +185,6 @@ TEST_F(SourceMapTest, SourcesContent) { parseMap(sourceMap); ASSERT_EQ(wasm.debugInfoSourcesContent.size(), 1); EXPECT_EQ(wasm.debugInfoSourcesContent[0], - "#include int main()\\n{ printf(\\\"Gr\\u00fc\\u00df Gott, Welt!\\\"); return 0;}"); + "#include int main()\\n{ printf(\\\"Gr\\u00fc\\u00df " + "Gott, Welt!\\\"); return 0;}"); }