From 065fecc57e9a962c97d488ac121df2816f680082 Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Fri, 29 Jul 2022 10:55:26 +0200 Subject: [PATCH 1/4] Swift: extract precompiled swiftmodule files Previously we were not extracting any `swiftmodule` file that was not a system or a built-in one. This was done to avoid re-extracting `swiftmodule` files that were built previously in the same build, but it turned out to be too eager, as there are legitimate cases where a non-system, non-built-in precompiled swift module can be used. An example of that is the `PackageDescription` module used in Swift Package Manager manifest files (`Package.swift`). We now relax the test and trigger module extraction on all loaded modules that do not have source files (we trigger source file extraction for those). The catch, is that we also create empty trap files for current output `swiftmodule` files (including possible alias locations set up by XCode). This means that if a following extractor run loads a previously built `swiftmodule` file, although it will trigger module extraction, this will however be skipped as it will find its target file already present (this is done via the `TargetFile` semantics). --- swift/extractor/SwiftExtractor.cpp | 37 ++++++++++++------- swift/extractor/SwiftExtractorConfiguration.h | 4 ++ swift/extractor/SwiftOutputRewrite.cpp | 12 +++++- swift/extractor/SwiftOutputRewrite.h | 5 ++- swift/extractor/main.cpp | 1 + .../partial-modules/Modules.expected | 1 + swift/integration-tests/runner.py | 2 + 7 files changed, 46 insertions(+), 16 deletions(-) diff --git a/swift/extractor/SwiftExtractor.cpp b/swift/extractor/SwiftExtractor.cpp index 666b7a900444..bbf2a2551fd2 100644 --- a/swift/extractor/SwiftExtractor.cpp +++ b/swift/extractor/SwiftExtractor.cpp @@ -165,22 +165,31 @@ void codeql::extractSwiftFiles(const SwiftExtractorConfiguration& config, auto inputFiles = collectInputFilenames(compiler); auto modules = collectModules(compiler); + // we want to make sure any following extractor run will not try to extract things from + // the swiftmodule files we are creating in this run, as those things will already have been + // extracted from source with more information. We do this by creating empty trap files. + // TargetFile semantics will ensure any following run trying to extract that swiftmodule will just + // skip doing it + auto outputModuleTrapSuffix = "-" + compiler.getMainModule()->getName().str().str() + ".trap"; + for (const auto& output : config.outputSwiftModules) { + TargetFile::create(output + outputModuleTrapSuffix, config.trapDir, config.getTempTrapDir()); + } for (auto& module : modules) { - // We only extract system and builtin modules here as the other "user" modules can be built - // during the build process and then re-used at a later stage. In this case, we extract the - // user code twice: once during the module build in a form of a source file, and then as - // a pre-built module during building of the dependent source files. - if (module->isSystemModule() || module->isBuiltinModule()) { - extractDeclarations(config, compiler, *module); - } else { - for (auto file : module->getFiles()) { - auto sourceFile = llvm::dyn_cast(file); - if (!sourceFile || inputFiles.count(sourceFile->getFilename().str()) == 0) { - continue; - } - archiveFile(config, *sourceFile); - extractDeclarations(config, compiler, *module, sourceFile); + bool isFromSourceFile = false; + for (auto file : module->getFiles()) { + auto sourceFile = llvm::dyn_cast(file); + if (!sourceFile) { + continue; + } + isFromSourceFile = true; + if (inputFiles.count(sourceFile->getFilename().str()) == 0) { + continue; } + archiveFile(config, *sourceFile); + extractDeclarations(config, compiler, *module, sourceFile); + } + if (!isFromSourceFile) { + extractDeclarations(config, compiler, *module); } } } diff --git a/swift/extractor/SwiftExtractorConfiguration.h b/swift/extractor/SwiftExtractorConfiguration.h index 3365da5f268b..eec73e7dc21b 100644 --- a/swift/extractor/SwiftExtractorConfiguration.h +++ b/swift/extractor/SwiftExtractorConfiguration.h @@ -32,5 +32,9 @@ struct SwiftExtractorConfiguration { // A temporary directory that contains build artifacts generated by the extractor during the // overall extraction process. std::string getTempArtifactDir() const { return scratchDir + "/swift-extraction-artifacts"; } + + // Output swiftmodule files. This also includes possible locations where XCode internally moves + // modules + std::vector outputSwiftModules; }; } // namespace codeql diff --git a/swift/extractor/SwiftOutputRewrite.cpp b/swift/extractor/SwiftOutputRewrite.cpp index 35a38512ff8f..f85290f5beba 100644 --- a/swift/extractor/SwiftOutputRewrite.cpp +++ b/swift/extractor/SwiftOutputRewrite.cpp @@ -163,7 +163,7 @@ static std::vector computeModuleAliases(llvm::StringRef modulePath, namespace codeql { std::unordered_map rewriteOutputsInPlace( - SwiftExtractorConfiguration& config, + const SwiftExtractorConfiguration& config, std::vector& CLIArgs) { std::unordered_map remapping; @@ -323,5 +323,15 @@ std::vector collectVFSFiles(const SwiftExtractorConfiguration& conf return overlays; } +std::vector getOutputSwiftModules( + const std::unordered_map& remapping) { + std::vector ret; + for (const auto& [oldPath, newPath] : remapping) { + if (llvm::StringRef(oldPath).endswith(".swiftmodule")) { + ret.push_back(oldPath); + } + } + return ret; +} } // namespace codeql diff --git a/swift/extractor/SwiftOutputRewrite.h b/swift/extractor/SwiftOutputRewrite.h index b7ee7fa38293..61ef2d98ea66 100644 --- a/swift/extractor/SwiftOutputRewrite.h +++ b/swift/extractor/SwiftOutputRewrite.h @@ -13,7 +13,7 @@ struct SwiftExtractorConfiguration; // artifacts produced by the actual Swift compiler. // Returns the map containing remapping oldpath -> newPath. std::unordered_map rewriteOutputsInPlace( - SwiftExtractorConfiguration& config, + const SwiftExtractorConfiguration& config, std::vector& CLIArgs); // Create directories for all the redirected new paths as the Swift compiler expects them to exist. @@ -29,4 +29,7 @@ void storeRemappingForVFS(const SwiftExtractorConfiguration& config, // This is separate from storeRemappingForVFS as we also collect files produced by other processes. std::vector collectVFSFiles(const SwiftExtractorConfiguration& config); +// Returns a list of output remapped swift module files +std::vector getOutputSwiftModules( + const std::unordered_map& remapping); } // namespace codeql diff --git a/swift/extractor/main.cpp b/swift/extractor/main.cpp index bde37b0ccb5e..d9ad34754772 100644 --- a/swift/extractor/main.cpp +++ b/swift/extractor/main.cpp @@ -68,6 +68,7 @@ int main(int argc, char** argv) { codeql::rewriteOutputsInPlace(configuration, configuration.patchedFrontendOptions); codeql::ensureDirectoriesForNewPathsExist(remapping); codeql::storeRemappingForVFS(configuration, remapping); + configuration.outputSwiftModules = codeql::getOutputSwiftModules(remapping); std::vector args; for (auto& arg : configuration.patchedFrontendOptions) { diff --git a/swift/integration-tests/posix-only/partial-modules/Modules.expected b/swift/integration-tests/posix-only/partial-modules/Modules.expected index 4c738975f343..3cdcff9b9804 100644 --- a/swift/integration-tests/posix-only/partial-modules/Modules.expected +++ b/swift/integration-tests/posix-only/partial-modules/Modules.expected @@ -1,4 +1,5 @@ | file://:0:0:0:0 | A | | file://:0:0:0:0 | B | +| file://:0:0:0:0 | PackageDescription | | file://:0:0:0:0 | main | | file://:0:0:0:0 | partial_modules | diff --git a/swift/integration-tests/runner.py b/swift/integration-tests/runner.py index b9e39325fd96..77a62bfb81ba 100755 --- a/swift/integration-tests/runner.py +++ b/swift/integration-tests/runner.py @@ -62,6 +62,8 @@ def main(opts): ] if opts.check_databases: cmd.append("--check-databases") + else: + cmd.append("--no-check-databases") if opts.learn: cmd.append("--learn") cmd.extend(str(t.parent) for t in succesful_db_creation) From 604328ea5f69aeedc84ad63eebfc7d8eef769e0c Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Fri, 29 Jul 2022 12:25:11 +0200 Subject: [PATCH 2/4] Swift: strip suffix from swiftmodule trap files --- swift/extractor/SwiftExtractor.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/swift/extractor/SwiftExtractor.cpp b/swift/extractor/SwiftExtractor.cpp index 9d19d31c646b..46204ee597e4 100644 --- a/swift/extractor/SwiftExtractor.cpp +++ b/swift/extractor/SwiftExtractor.cpp @@ -56,8 +56,10 @@ static std::string getFilename(swift::ModuleDecl& module, swift::SourceFile* pri } // PCM clang module if (module.isNonSwiftModule()) { - // Several modules with different name might come from .pcm (clang module) files + // Several modules with different names might come from .pcm (clang module) files // In this case we want to differentiate them + // Moreover, pcm files may come from caches located in different directories, but are + // unambiguously identified by the base file name, so we can discard the absolute directory std::string filename = "/pcms/"s + llvm::sys::path::filename(module.getModuleFilename()).str(); filename += "-"; filename += module.getName().str(); @@ -175,9 +177,8 @@ void codeql::extractSwiftFiles(const SwiftExtractorConfiguration& config, // extracted from source with more information. We do this by creating empty trap files. // TargetFile semantics will ensure any following run trying to extract that swiftmodule will just // skip doing it - auto outputModuleTrapSuffix = "-" + compiler.getMainModule()->getName().str().str() + ".trap"; for (const auto& output : config.outputSwiftModules) { - TargetFile::create(output + outputModuleTrapSuffix, config.trapDir, config.getTempTrapDir()); + TargetFile::create(output, config.trapDir, config.getTempTrapDir()); } for (auto& module : modules) { bool isFromSourceFile = false; From 099ab0e0c2e6e3b4d64718b05339661f9290466e Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Fri, 29 Jul 2022 12:26:33 +0200 Subject: [PATCH 3/4] Swift: readd .trap suffix to swiftmodule trap files --- swift/extractor/SwiftExtractor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swift/extractor/SwiftExtractor.cpp b/swift/extractor/SwiftExtractor.cpp index 46204ee597e4..debbc5c99af9 100644 --- a/swift/extractor/SwiftExtractor.cpp +++ b/swift/extractor/SwiftExtractor.cpp @@ -178,7 +178,7 @@ void codeql::extractSwiftFiles(const SwiftExtractorConfiguration& config, // TargetFile semantics will ensure any following run trying to extract that swiftmodule will just // skip doing it for (const auto& output : config.outputSwiftModules) { - TargetFile::create(output, config.trapDir, config.getTempTrapDir()); + TargetFile::create(output + ".trap", config.trapDir, config.getTempTrapDir()); } for (auto& module : modules) { bool isFromSourceFile = false; From daf1fa3c31df2d21aa3cea0c812fc68c26614479 Mon Sep 17 00:00:00 2001 From: Paolo Tranquilli Date: Fri, 29 Jul 2022 16:27:55 +0200 Subject: [PATCH 4/4] Swift: lock built swiftmodule traps in main This should cover `-merge-modules` mode. Dumping of the configuration to the target files was moved to a separate pair of header/source files, as now it is also done in `SwiftOutputRewrite.cpp`. --- swift/extractor/BUILD.bazel | 12 +++------ swift/extractor/SwiftExtractor.cpp | 27 ++----------------- swift/extractor/SwiftExtractorConfiguration.h | 7 +++-- swift/extractor/SwiftOutputRewrite.cpp | 14 ++++++---- swift/extractor/SwiftOutputRewrite.h | 6 ++--- swift/extractor/TargetTrapFile.cpp | 23 ++++++++++++++++ swift/extractor/TargetTrapFile.h | 11 ++++++++ swift/extractor/main.cpp | 2 +- 8 files changed, 56 insertions(+), 46 deletions(-) create mode 100644 swift/extractor/TargetTrapFile.cpp create mode 100644 swift/extractor/TargetTrapFile.h diff --git a/swift/extractor/BUILD.bazel b/swift/extractor/BUILD.bazel index b22841ca819b..bf5c9e65d520 100644 --- a/swift/extractor/BUILD.bazel +++ b/swift/extractor/BUILD.bazel @@ -2,14 +2,10 @@ load("//swift:rules.bzl", "swift_cc_binary") swift_cc_binary( name = "extractor", - srcs = [ - "SwiftOutputRewrite.cpp", - "SwiftOutputRewrite.h", - "SwiftExtractor.cpp", - "SwiftExtractor.h", - "SwiftExtractorConfiguration.h", - "main.cpp", - ], + srcs = glob([ + "*.h", + "*.cpp", + ]), visibility = ["//swift:__pkg__"], deps = [ "//swift/extractor/infra", diff --git a/swift/extractor/SwiftExtractor.cpp b/swift/extractor/SwiftExtractor.cpp index debbc5c99af9..22f4f01a470a 100644 --- a/swift/extractor/SwiftExtractor.cpp +++ b/swift/extractor/SwiftExtractor.cpp @@ -14,7 +14,7 @@ #include "swift/extractor/trap/generated/TrapClasses.h" #include "swift/extractor/trap/TrapDomain.h" #include "swift/extractor/visitors/SwiftVisitor.h" -#include "swift/extractor/infra/TargetFile.h" +#include "swift/extractor/TargetTrapFile.h" using namespace codeql; using namespace std::string_literals; @@ -80,20 +80,6 @@ static llvm::SmallVector getTopLevelDecls(swift::ModuleDecl& modul return ret; } -static void dumpArgs(TargetFile& out, const SwiftExtractorConfiguration& config) { - out << "/* extractor-args:\n"; - for (const auto& opt : config.frontendOptions) { - out << " " << std::quoted(opt) << " \\\n"; - } - out << "\n*/\n"; - - out << "/* swift-frontend-args:\n"; - for (const auto& opt : config.patchedFrontendOptions) { - out << " " << std::quoted(opt) << " \\\n"; - } - out << "\n*/\n"; -} - static void extractDeclarations(const SwiftExtractorConfiguration& config, swift::CompilerInstance& compiler, swift::ModuleDecl& module, @@ -103,12 +89,11 @@ static void extractDeclarations(const SwiftExtractorConfiguration& config, // The extractor can be called several times from different processes with // the same input file(s). Using `TargetFile` the first process will win, and the following // will just skip the work - auto trapTarget = TargetFile::create(filename + ".trap", config.trapDir, config.getTempTrapDir()); + auto trapTarget = createTargetTrapFile(config, filename); if (!trapTarget) { // another process arrived first, nothing to do for us return; } - dumpArgs(*trapTarget, config); TrapDomain trap{*trapTarget}; // TODO: remove this and recreate it with IPA when we have that @@ -172,14 +157,6 @@ void codeql::extractSwiftFiles(const SwiftExtractorConfiguration& config, auto inputFiles = collectInputFilenames(compiler); auto modules = collectModules(compiler); - // we want to make sure any following extractor run will not try to extract things from - // the swiftmodule files we are creating in this run, as those things will already have been - // extracted from source with more information. We do this by creating empty trap files. - // TargetFile semantics will ensure any following run trying to extract that swiftmodule will just - // skip doing it - for (const auto& output : config.outputSwiftModules) { - TargetFile::create(output + ".trap", config.trapDir, config.getTempTrapDir()); - } for (auto& module : modules) { bool isFromSourceFile = false; for (auto file : module->getFiles()) { diff --git a/swift/extractor/SwiftExtractorConfiguration.h b/swift/extractor/SwiftExtractorConfiguration.h index eec73e7dc21b..cd4ed51cdcd4 100644 --- a/swift/extractor/SwiftExtractorConfiguration.h +++ b/swift/extractor/SwiftExtractorConfiguration.h @@ -3,6 +3,8 @@ #include #include +#include "swift/extractor/infra/TargetFile.h" + namespace codeql { struct SwiftExtractorConfiguration { // The location for storing TRAP files to be imported by CodeQL engine. @@ -32,9 +34,6 @@ struct SwiftExtractorConfiguration { // A temporary directory that contains build artifacts generated by the extractor during the // overall extraction process. std::string getTempArtifactDir() const { return scratchDir + "/swift-extraction-artifacts"; } - - // Output swiftmodule files. This also includes possible locations where XCode internally moves - // modules - std::vector outputSwiftModules; }; + } // namespace codeql diff --git a/swift/extractor/SwiftOutputRewrite.cpp b/swift/extractor/SwiftOutputRewrite.cpp index f85290f5beba..b09a558d1873 100644 --- a/swift/extractor/SwiftOutputRewrite.cpp +++ b/swift/extractor/SwiftOutputRewrite.cpp @@ -1,5 +1,6 @@ #include "SwiftOutputRewrite.h" #include "swift/extractor/SwiftExtractorConfiguration.h" +#include "swift/extractor/TargetTrapFile.h" #include #include @@ -323,15 +324,18 @@ std::vector collectVFSFiles(const SwiftExtractorConfiguration& conf return overlays; } -std::vector getOutputSwiftModules( - const std::unordered_map& remapping) { - std::vector ret; + +void lockOutputSwiftModuleTraps(const SwiftExtractorConfiguration& config, + const std::unordered_map& remapping) { for (const auto& [oldPath, newPath] : remapping) { if (llvm::StringRef(oldPath).endswith(".swiftmodule")) { - ret.push_back(oldPath); + if (auto target = createTargetTrapFile(config, oldPath)) { + *target << "// trap file deliberately empty\n" + "// this swiftmodule was created during the build, so its entities must have" + " been extracted directly from source files"; + } } } - return ret; } } // namespace codeql diff --git a/swift/extractor/SwiftOutputRewrite.h b/swift/extractor/SwiftOutputRewrite.h index 61ef2d98ea66..94f1eeb6aaab 100644 --- a/swift/extractor/SwiftOutputRewrite.h +++ b/swift/extractor/SwiftOutputRewrite.h @@ -29,7 +29,7 @@ void storeRemappingForVFS(const SwiftExtractorConfiguration& config, // This is separate from storeRemappingForVFS as we also collect files produced by other processes. std::vector collectVFSFiles(const SwiftExtractorConfiguration& config); -// Returns a list of output remapped swift module files -std::vector getOutputSwiftModules( - const std::unordered_map& remapping); +// Creates empty trap files for output swiftmodule files +void lockOutputSwiftModuleTraps(const SwiftExtractorConfiguration& config, + const std::unordered_map& remapping); } // namespace codeql diff --git a/swift/extractor/TargetTrapFile.cpp b/swift/extractor/TargetTrapFile.cpp new file mode 100644 index 000000000000..2275575ecfa7 --- /dev/null +++ b/swift/extractor/TargetTrapFile.cpp @@ -0,0 +1,23 @@ +#include "swift/extractor/TargetTrapFile.h" +#include +namespace codeql { +std::optional createTargetTrapFile(const SwiftExtractorConfiguration& configuration, + std::string_view target) { + std::string trap{target}; + trap += ".trap"; + auto ret = TargetFile::create(trap, configuration.trapDir, configuration.getTempTrapDir()); + if (ret) { + *ret << "/* extractor-args:\n"; + for (const auto& opt : configuration.frontendOptions) { + *ret << " " << std::quoted(opt) << " \\\n"; + } + *ret << "\n*/\n" + "/* swift-frontend-args:\n"; + for (const auto& opt : configuration.patchedFrontendOptions) { + *ret << " " << std::quoted(opt) << " \\\n"; + } + *ret << "\n*/\n"; + } + return ret; +} +} // namespace codeql diff --git a/swift/extractor/TargetTrapFile.h b/swift/extractor/TargetTrapFile.h new file mode 100644 index 000000000000..eb8de4c206f5 --- /dev/null +++ b/swift/extractor/TargetTrapFile.h @@ -0,0 +1,11 @@ +#pragma once + +#include "swift/extractor/infra/TargetFile.h" +#include "swift/extractor/SwiftExtractorConfiguration.h" + +namespace codeql { + +std::optional createTargetTrapFile(const SwiftExtractorConfiguration& configuration, + std::string_view target); + +} // namespace codeql diff --git a/swift/extractor/main.cpp b/swift/extractor/main.cpp index d9ad34754772..5f9afef4e812 100644 --- a/swift/extractor/main.cpp +++ b/swift/extractor/main.cpp @@ -68,7 +68,7 @@ int main(int argc, char** argv) { codeql::rewriteOutputsInPlace(configuration, configuration.patchedFrontendOptions); codeql::ensureDirectoriesForNewPathsExist(remapping); codeql::storeRemappingForVFS(configuration, remapping); - configuration.outputSwiftModules = codeql::getOutputSwiftModules(remapping); + codeql::lockOutputSwiftModuleTraps(configuration, remapping); std::vector args; for (auto& arg : configuration.patchedFrontendOptions) {