From 74022b7a6a78a37570e2d9255284d83e51281465 Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Fri, 19 Jun 2026 18:22:31 +0200 Subject: [PATCH 1/3] feat: Expand the jar checks to stricter agent jar validation * Maintains the size check. * Verify some required entries * Ensure there's a minimum number of classes in the whole jar * Ensure products are correctly included and have at least one class * Light size check on the indexes * Fixed list of packages that should not appear in the jar * Run checks as part of the build job, to catch issues earlier --- .gitlab-ci.yml | 2 +- dd-java-agent/build.gradle | 113 ++++++++++++++++++++++++++++++++++--- 2 files changed, 107 insertions(+), 8 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index eee9eadfc68..c31f251ef52 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -336,7 +336,7 @@ build: script: - if [ $CI_PIPELINE_SOURCE == "schedule" ] ; then ./gradlew resolveAndLockAll --write-locks $GRADLE_ARGS; fi - ./gradlew --version - - ./gradlew clean :dd-java-agent:shadowJar :dd-trace-api:jar :dd-trace-ot:shadowJar -PskipTests $GRADLE_ARGS + - ./gradlew clean :dd-java-agent:shadowJar :dd-java-agent:check :dd-trace-api:jar :dd-trace-ot:shadowJar -PskipTests -x spotlessCheck $GRADLE_ARGS - echo UPSTREAM_TRACER_VERSION=$(java -jar workspace/dd-java-agent/build/libs/*.jar) >> upstream.env - echo "BUILD_JOB_NAME=$CI_JOB_NAME" >> build.env - echo "BUILD_JOB_ID=$CI_JOB_ID" >> build.env diff --git a/dd-java-agent/build.gradle b/dd-java-agent/build.gradle index a2001cb4ee8..ae78df4410c 100644 --- a/dd-java-agent/build.gradle +++ b/dd-java-agent/build.gradle @@ -1,5 +1,6 @@ import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar import java.util.concurrent.atomic.AtomicBoolean +import java.util.jar.JarFile plugins { id 'com.gradleup.shadow' @@ -19,6 +20,10 @@ configurations { def includedAgentDir = project.layout.buildDirectory.dir("generated/included") def includedJarFileTree = fileTree(includedAgentDir) +// Populated automatically by includeShadowJar for every product dir registered in this build. +// Used by verifyAgentJarContents to check that all included products land in the assembled jar. +ext.includedProductPrefixes = objects.setProperty(String) + def pomPropertiesDir = project.layout.buildDirectory.dir("generated/maven-metadata") def pomPropertiesFileTree = fileTree(pomPropertiesDir) @@ -183,6 +188,8 @@ def generalShadowJarConfig(ShadowJar shadowJarTask) { } def includeShadowJar(TaskProvider includedShadowJarTask, String agentDir, FileTree includedJarFileTree) { + includedProductPrefixes.add(agentDir) + def expandTask = project.tasks.register("expandAgentShadowJar${agentDir.capitalize()}", Sync) { it.group = LifecycleBasePlugin.BUILD_GROUP it.description = "Expand the included shadow jar into the agent jar under ${agentDir}" @@ -468,16 +475,108 @@ tasks.withType(Test).configureEach { dependsOn "shadowJar" } -tasks.register('checkAgentJarSize') { +tasks.register('verifyAgentJarContents') { + group = LifecycleBasePlugin.VERIFICATION_GROUP + description = 'Verify the agent jar contains required entries and meets structural invariants' + + def jarProvider = tasks.named('shadowJar', ShadowJar).flatMap { it.archiveFile } + inputs.file(jarProvider) + inputs.property('productPrefixes', includedProductPrefixes) + outputs.file(project.layout.buildDirectory.file("tmp/${it.name}/.verified")) + doLast { - // Arbitrary limit to prevent unintentional increases to the agent jar size - // Raise or lower as required - assert tasks.named("shadowJar", ShadowJar).get().archiveFile.get().getAsFile().length() <= 33 * 1024 * 1024 - } + File jarFile = jarProvider.get().asFile + List failures = [] + Map entries = [:] + + // Jar size ceiling — raise only when the growth is intentional + def sizeCeiling = 33L * 1024 * 1024 + if (jarFile.length() > sizeCeiling) { + failures << "Jar size ${jarFile.length()} B exceeds ceiling ${sizeCeiling} B (33 MiB)" + } - dependsOn "shadowJar" + // Inspect jar content + new JarFile(jarFile).withCloseable { jf -> + jf.entries().each { ze -> entries[ze.name] = ze.size } + } + + // Required entries + [ + // Runtime index, loaded at startup to resolve classdata paths + // Generated by :dd-java-agent:generateAgentJarIndex + 'dd-java-agent.index', + // Premain-Class: Java 6 pre check + 'datadog/trace/bootstrap/AgentPreCheck.class', + // Agent-Class: main bootstrap entry point + 'datadog/trace/bootstrap/AgentBootstrap.class', + // Additional checks for Java 11 + 'datadog/trace/bootstrap/AdvancedAgentChecks.class', + // Instrumentation indexes + // * :dd-java-agent:instrumentation:generateInstrumenterIndex + // * :dd-java-agent:instrumentation:generateKnownTypesIndex + // Without instrumenter.index, zero instrumentations load at runtime. + 'inst/instrumenter.index', + 'inst/known-types.index', + // OTel drop-in support, embedded via otel-bootstrap + otel-shim shadowInclude + 'datadog/trace/bootstrap/otel/api/', + 'datadog/trace/bootstrap/otel/context/', + 'datadog/trace/bootstrap/otel/shim/', + 'META-INF/maven/com.datadoghq/dd-java-agent/pom.properties', + ].each { required -> + if (!entries.containsKey(required)) { + failures << "Missing required entry: ${required}" + } + } + + // Sanity check on the minimum number of classes; update as needed. Set to about 98% of that number. + def classCount = entries.keySet().count { it.endsWith('.class') || it.endsWith('.classdata') } + def classFloor = 17_000 // a bit moe than 98% of 17,279 at time of writing + if (classCount < classFloor) { + failures << "Class count ${classCount} is below floor ${classFloor}" + } + + // Each registered product must contribute at least one .classdata entry. + // Catches a product wired into the build but producing no classes. + def classdataPrefixes = entries.keySet() + .findAll { it.endsWith('.classdata') } + .collect { it.split('/')[0] } + .toSet() + includedProductPrefixes.get().each { dir -> + if (!classdataPrefixes.contains(dir)) { + failures << "Product '${dir}' has no .classdata entries in the assembled jar" + } + } + + // All *.index files in the jar must be non-empty + entries.findAll { name, size -> name.endsWith('.index') && size == 0 }.each { name, _ -> + failures << "Empty index file: ${name}" + } + + // Packages that must not appear anywhere in the jar after relocation. + // NOTE: Hardcoded to catch accidental removal of relocate() calls in generalShadowJarConfig or in a nested shadow jar. + def productPrefixes = includedProductPrefixes.get() + ['org/slf4j/', 'org/jctools/', 'net/jpountz/', 'org/objectweb/asm/', 'io/airlift/'].each { pkg -> + def leaked = entries.keySet().findAll { entry -> + entry.startsWith(pkg) || productPrefixes.any { prefix -> entry.startsWith("${prefix}/${pkg}") } + } + if (!leaked.empty) { + def sample = leaked.take(3).toString() + failures << "Unrelocated package '${pkg}': ${sample}${leaked.size() > 3 ? ' ...' : ''}" + } + } + + if (!failures.empty) { + throw new GradleException( + "Agent jar verification failed (${failures.size()} issue(s)):\n" + + failures.collect { " - ${it}" }.join('\n')) + } + + def marker = outputs.files.singleFile + marker.parentFile.mkdirs() + marker.text = 'verified' + } } tasks.named('check') { - dependsOn 'checkAgentJarSize' + dependsOn 'verifyAgentJarContents' } From f5d3e582317b4d7525f38acbc0ca208cf4413ca1 Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Mon, 22 Jun 2026 12:46:27 +0200 Subject: [PATCH 2/3] feat: verify shipped agent integrations Run `--list-integrations` on the assembled jar as part of check. This exercises dd-java-agent.index, inst/instrumenter.index, and integration class loading end-to-end. The today's 208 integration names are stored in `expected-integrations.txt` and compared against the runtime output. Diff is shown on mismatch. Run `updateAgentJarIntegrationsGolden` after intentional changes and commit the result. --- dd-java-agent/build.gradle | 72 +++++++- dd-java-agent/expected-integrations.txt | 208 ++++++++++++++++++++++++ 2 files changed, 278 insertions(+), 2 deletions(-) create mode 100644 dd-java-agent/expected-integrations.txt diff --git a/dd-java-agent/build.gradle b/dd-java-agent/build.gradle index ae78df4410c..f389efea2fb 100644 --- a/dd-java-agent/build.gradle +++ b/dd-java-agent/build.gradle @@ -530,7 +530,7 @@ tasks.register('verifyAgentJarContents') { // Sanity check on the minimum number of classes; update as needed. Set to about 98% of that number. def classCount = entries.keySet().count { it.endsWith('.class') || it.endsWith('.classdata') } - def classFloor = 17_000 // a bit moe than 98% of 17,279 at time of writing + def classFloor = 17_000 // a bit more than 98% of 17,279 at time of writing if (classCount < classFloor) { failures << "Class count ${classCount} is below floor ${classFloor}" } @@ -577,6 +577,74 @@ tasks.register('verifyAgentJarContents') { } } +def integrationsGoldenFile = project.file('expected-integrations.txt') + +tasks.register('verifyAgentJarIntegrations', JavaExec) { + group = LifecycleBasePlugin.VERIFICATION_GROUP + description = 'Verify the agent jar lists exactly the integrations in expected-integrations.txt' + + def jarProvider = tasks.named('shadowJar', ShadowJar).flatMap { it.archiveFile } + inputs.file(jarProvider) + inputs.file(integrationsGoldenFile) + outputs.file(project.layout.buildDirectory.file("tmp/${it.name}/.verified")) + + // Run the assembled agent jar directly — this exercises dd-java-agent.index, + // inst/instrumenter.index, and instrumentation class loading end-to-end. + mainClass = 'datadog.trace.bootstrap.AgentBootstrap' + classpath = objects.fileCollection().from(jarProvider) + args = ['--list-integrations'] + + def capturedOutput = new ByteArrayOutputStream() + standardOutput = capturedOutput + + doLast { + if (!integrationsGoldenFile.exists()) { + throw new GradleException( + "${integrationsGoldenFile.name} not found. " + + "Run './gradlew :dd-java-agent:updateAgentJarIntegrationsGolden' to create it.") + } + + def actual = capturedOutput.toString().readLines().findAll { !it.isBlank() }.toSorted() + def expected = integrationsGoldenFile.readLines().findAll { !it.isBlank() }.toSorted() + def added = actual - expected + def removed = expected - actual + + if (added || removed) { + def msg = new StringBuilder("Integration list differs from ${integrationsGoldenFile.name}.") + msg.append(" Run './gradlew :dd-java-agent:updateAgentJarIntegrationsGolden' to update it.\n") + added.each { msg.append(" + ${it}\n") } + removed.each { msg.append(" - ${it}\n") } + throw new GradleException(msg.toString()) + } + + def marker = outputs.files.singleFile + marker.parentFile.mkdirs() + marker.text = 'verified' + } +} + +// Run after adding/removing integrations to update expected-integrations.txt, then commit the diff. +tasks.register('updateAgentJarIntegrationsGolden', JavaExec) { + group = LifecycleBasePlugin.VERIFICATION_GROUP + description = 'Regenerate expected-integrations.txt from the current agent jar' + + def jarProvider = tasks.named('shadowJar', ShadowJar).flatMap { it.archiveFile } + inputs.file(jarProvider) + + mainClass = 'datadog.trace.bootstrap.AgentBootstrap' + classpath = objects.fileCollection().from(jarProvider) + args = ['--list-integrations'] + + def capturedOutput = new ByteArrayOutputStream() + standardOutput = capturedOutput + + doLast { + def integrations = capturedOutput.toString().readLines().findAll { !it.isBlank() }.toSorted() + integrationsGoldenFile.text = integrations.join('\n') + '\n' + logger.lifecycle("Updated ${integrationsGoldenFile.name} with ${integrations.size()} integrations") + } +} + tasks.named('check') { - dependsOn 'verifyAgentJarContents' + dependsOn 'verifyAgentJarContents', 'verifyAgentJarIntegrations' } diff --git a/dd-java-agent/expected-integrations.txt b/dd-java-agent/expected-integrations.txt new file mode 100644 index 00000000000..dc0d8bd7e7b --- /dev/null +++ b/dd-java-agent/expected-integrations.txt @@ -0,0 +1,208 @@ +IastInstrumentation +aerospike +akka-http +akka-http2 +akka_actor_mailbox +akka_actor_receive +akka_actor_send +akka_concurrent +allocatedirect +amqp +apache-httpclient +armeria-grpc-client +armeria-grpc-server +armeria-jetty +avro +aws-lambda +aws-sdk +axis2 +axway-api +azure-functions +caffeine +cassandra +ci-visibility +cics +classloading +commons-fileupload +commons-http-client +confluent-schema-registry +couchbase +cucumber +cxf +datanucleus +defineclass +dropwizard +dynamodb +elasticsearch +emr-aws-sdk +eventbridge +ffm-native-tracing +finatra +freemarker +gax +glassfish +google-http-client +google-pubsub +gradle +graphql-java +grizzly +grizzly-client +grizzly-filterchain +grpc +gson +guava +hazelcast +hazelcast_legacy +hibernate +httpasyncclient +httpasyncclient5 +httpclient +httpclient5 +httpcore +httpcore-5 +httpurlconnection +hystrix +ignite +inputStream +jackson +jackson-core +jacoco +jakarta-jms +jakarta-mail +jakarta-rs +jakarta-websocket +jakarta-ws +java-http-client +java-lang +java-lang-appsec +java-lang-management +java-module +java-net +java_completablefuture +java_concurrent +java_timer +javax-mail +javax-websocket +jax-rs +jax-ws +jboss-logmanager +jdbc +jdbc-datasource +jedis +jersey +jetty +jetty-client +jetty-concurrent +jms +jni +jsp +jwt +kafka +kotlin_coroutine +lettuce +liberty +log4j +logback +maven +micronaut +mmap +mongo +mule +native-image +netty +netty-concurrent +netty-promise +not-not-trace +ognl +okhttp +openai-java +opensearch +opentelemetry-annotations +opentelemetry-beta +opentelemetry-logs +opentelemetry-metrics +opentelemetry.experimental +opentracing +org-json +pekko-http +pekko-http2 +pekko_actor_mailbox +pekko_actor_receive +pekko_actor_send +play +play-ws +protobuf +quartz +ratpack +ratpack-request-body +reactive-streams +reactor-core +reactor-netty +rediscala +redisson +renaissance +resilience4j +resilience4j-reactor +resteasy +restlet-http +rmi +rxjava +s3 +scala_concurrent +servicetalk +servlet +servlet-filter +servlet-request-body +servlet-service +sfn +shutdown +slick +snakeyaml +sns +socket +sofarpc +spark +spark-executor +spark-exit +spark-launcher +spark-openlineage +sparkjava +spray-http +spring-async +spring-beans +spring-boot +spring-boot-span-origin +spring-cloud-zuul +spring-core +spring-data +spring-jms +spring-messaging +spring-rabbit +spring-scheduling +spring-security +spring-web +spring-web-code-origin +spring-webflux +spring-ws +spymemcached +sqs +sslsocket +synapse3-client +synapse3-server +testng +throwables +thymeleaf +tibco +tinylog +tomcat +trace +twilio-sdk +undertow +urlconnection +valkey +velocity +vertx +wallclock +websphere-jmx +wildfly +zio.experimental From 0fb73aea860fe41a4b4ee4b83ffcd879cbbccf0d Mon Sep 17 00:00:00 2001 From: Brice Dutheil Date: Mon, 22 Jun 2026 14:41:01 +0200 Subject: [PATCH 3/3] fix: Also track the stderr when verifying integrations Multi-version integrations issues could be misreported as valid. InstrumenterIndex.buildModule() logs ERROR and returns null when a module fails to load, while the process exits 0. For integrations with multiple versioned modules sharing one name (akka-http, vertx, servlet...), if any of a versionned integration fails it's invisible. This commit changes that. In a clean run stderr is empty; any output indicates a module load failure and fails the task immediately. --- dd-java-agent/build.gradle | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dd-java-agent/build.gradle b/dd-java-agent/build.gradle index f389efea2fb..e086e3f054f 100644 --- a/dd-java-agent/build.gradle +++ b/dd-java-agent/build.gradle @@ -594,10 +594,21 @@ tasks.register('verifyAgentJarIntegrations', JavaExec) { classpath = objects.fileCollection().from(jarProvider) args = ['--list-integrations'] + // Capture both stdout and stderr: InstrumenterIndex.buildModule() logs ERROR and returns null when a module + // fails to load, while the process exits with status 0. def capturedOutput = new ByteArrayOutputStream() + def capturedError = new ByteArrayOutputStream() standardOutput = capturedOutput + errorOutput = capturedError doLast { + def stderr = capturedError.toString() + if (!stderr.isBlank()) { + throw new GradleException( + "--list-integrations produced unexpected stderr output " + + "(likely a module load failure; see InstrumenterIndex.buildModule):\n${stderr}") + } + if (!integrationsGoldenFile.exists()) { throw new GradleException( "${integrationsGoldenFile.name} not found. " + @@ -623,7 +634,7 @@ tasks.register('verifyAgentJarIntegrations', JavaExec) { } } -// Run after adding/removing integrations to update expected-integrations.txt, then commit the diff. +// Manual run after adding/removing integrations to update expected-integrations.txt, then add with the new integration. tasks.register('updateAgentJarIntegrationsGolden', JavaExec) { group = LifecycleBasePlugin.VERIFICATION_GROUP description = 'Regenerate expected-integrations.txt from the current agent jar'