From 0ba6188ebc80450a848de8c9232dfd0313089165 Mon Sep 17 00:00:00 2001 From: Chad Retz Date: Fri, 19 Jul 2024 07:46:52 -0500 Subject: [PATCH 01/25] Experimental cloud operations client (#2146) Fixes #2059 --- .github/workflows/ci.yml | 39 ++++++ .gitmodules | 3 + .../client/CloudOperationsClient.java | 37 +++++ .../client/CloudOperationsClientImpl.java | 36 +++++ .../client/CloudOperationsClientTest.java | 63 +++++++++ temporal-serviceclient/build.gradle | 9 ++ .../serviceclient/ChannelManager.java | 22 +++ .../serviceclient/CloudServiceStubs.java | 55 ++++++++ .../serviceclient/CloudServiceStubsImpl.java | 125 +++++++++++++++++ .../CloudServiceStubsOptions.java | 130 ++++++++++++++++++ temporal-serviceclient/src/main/protocloud | 1 + 11 files changed, 520 insertions(+) create mode 100644 temporal-sdk/src/main/java/io/temporal/client/CloudOperationsClient.java create mode 100644 temporal-sdk/src/main/java/io/temporal/client/CloudOperationsClientImpl.java create mode 100644 temporal-sdk/src/test/java/io/temporal/client/CloudOperationsClientTest.java create mode 100644 temporal-serviceclient/src/main/java/io/temporal/serviceclient/CloudServiceStubs.java create mode 100644 temporal-serviceclient/src/main/java/io/temporal/serviceclient/CloudServiceStubsImpl.java create mode 100644 temporal-serviceclient/src/main/java/io/temporal/serviceclient/CloudServiceStubsOptions.java create mode 160000 temporal-serviceclient/src/main/protocloud diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index caf85fb810..da620fd5e8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -79,6 +79,45 @@ jobs: with: report_paths: '**/build/test-results/test/TEST-*.xml' + unit_test_cloud: + name: Unit test with cloud + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + ref: ${{ github.event.pull_request.head.sha }} + + - name: Set up Java + uses: actions/setup-java@v4 + with: + java-version: "11" + distribution: "temurin" + + - name: Set up Gradle + uses: gradle/actions/setup-gradle@v3 + + - name: Run cloud test + # Only supported in non-fork runs, since secrets are not available in forks. We intentionally + # are only doing this check on the step instead of the job so we require job passing in CI + # even for those that can't run this step. + if: ${{ github.event.pull_request.head.repo.full_name == '' || github.event.pull_request.head.repo.full_name == 'temporalio/sdk-java' }} + env: + USER: unittest + TEMPORAL_CLIENT_CLOUD_NAMESPACE: sdk-ci.a2dd6 + TEMPORAL_CLIENT_CLOUD_API_KEY: ${{ secrets.TEMPORAL_CLIENT_CLOUD_API_KEY }} + TEMPORAL_CLIENT_CLOUD_API_VERSION: 2024-05-13-00 + run: ./gradlew --no-daemon :temporal-sdk:test --tests '*CloudOperationsClientTest' + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v4 + if: success() || failure() # always run even if the previous step fails + with: + report_paths: '**/build/test-results/test/TEST-*.xml' + copyright: name: Copyright and code format runs-on: ubuntu-latest diff --git a/.gitmodules b/.gitmodules index 3222a3d131..74a657d3f5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "temporal-serviceclient/src/main/proto"] path = temporal-serviceclient/src/main/proto url = https://github.com/temporalio/api.git +[submodule "temporal-serviceclient/src/main/protocloud"] + path = temporal-serviceclient/src/main/protocloud + url = https://github.com/temporalio/api-cloud.git diff --git a/temporal-sdk/src/main/java/io/temporal/client/CloudOperationsClient.java b/temporal-sdk/src/main/java/io/temporal/client/CloudOperationsClient.java new file mode 100644 index 0000000000..fa46eb60cf --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/client/CloudOperationsClient.java @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.client; + +import io.temporal.common.Experimental; +import io.temporal.serviceclient.CloudServiceStubs; + +/** Client to the Temporal Cloud operations service for performing cloud operations. */ +@Experimental +public interface CloudOperationsClient { + @Experimental + static CloudOperationsClient newInstance(CloudServiceStubs service) { + return new CloudOperationsClientImpl(service); + } + + /** Get the raw cloud service stubs. */ + @Experimental + CloudServiceStubs getCloudServiceStubs(); +} diff --git a/temporal-sdk/src/main/java/io/temporal/client/CloudOperationsClientImpl.java b/temporal-sdk/src/main/java/io/temporal/client/CloudOperationsClientImpl.java new file mode 100644 index 0000000000..fea9a736c7 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/client/CloudOperationsClientImpl.java @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.client; + +import io.temporal.serviceclient.CloudServiceStubs; + +class CloudOperationsClientImpl implements CloudOperationsClient { + private final CloudServiceStubs cloudServiceStubs; + + CloudOperationsClientImpl(CloudServiceStubs cloudServiceStubs) { + this.cloudServiceStubs = cloudServiceStubs; + } + + @Override + public CloudServiceStubs getCloudServiceStubs() { + return cloudServiceStubs; + } +} diff --git a/temporal-sdk/src/test/java/io/temporal/client/CloudOperationsClientTest.java b/temporal-sdk/src/test/java/io/temporal/client/CloudOperationsClientTest.java new file mode 100644 index 0000000000..7cf394cb06 --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/client/CloudOperationsClientTest.java @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.client; + +import io.temporal.api.cloud.cloudservice.v1.GetNamespaceRequest; +import io.temporal.api.cloud.cloudservice.v1.GetNamespaceResponse; +import io.temporal.serviceclient.CloudServiceStubs; +import io.temporal.serviceclient.CloudServiceStubsOptions; +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Before; +import org.junit.Test; + +public class CloudOperationsClientTest { + private String namespace; + private String apiKey; + private String apiVersion; + + @Before + public void checkCloudEnvVars() { + namespace = System.getenv("TEMPORAL_CLIENT_CLOUD_NAMESPACE"); + apiKey = System.getenv("TEMPORAL_CLIENT_CLOUD_API_KEY"); + apiVersion = System.getenv("TEMPORAL_CLIENT_CLOUD_API_VERSION"); + Assume.assumeTrue( + "Cloud environment variables not present", namespace != null && apiKey != null); + } + + @Test + public void simpleCall() { + CloudOperationsClient client = + CloudOperationsClient.newInstance( + CloudServiceStubs.newServiceStubs( + CloudServiceStubsOptions.newBuilder() + .addApiKey(() -> apiKey) + .setVersion(apiVersion) + .build())); + // Do simple get namespace call + GetNamespaceResponse resp = + client + .getCloudServiceStubs() + .blockingStub() + .getNamespace(GetNamespaceRequest.newBuilder().setNamespace(namespace).build()); + Assert.assertEquals(namespace, resp.getNamespace().getNamespace()); + } +} diff --git a/temporal-serviceclient/build.gradle b/temporal-serviceclient/build.gradle index 8b51ba6bae..77ec231034 100644 --- a/temporal-serviceclient/build.gradle +++ b/temporal-serviceclient/build.gradle @@ -56,6 +56,15 @@ sourcesJar { .setDuplicatesStrategy(DuplicatesStrategy.EXCLUDE) } +// Putting protocloud as an additional proto source set +sourceSets { + main { + proto { + srcDir 'src/main/protocloud' + } + } +} + protobuf { // version/variables substitution is not supported in protobuf section. // protoc and protoc-gen-grpc-java versions are selected to be compatible diff --git a/temporal-serviceclient/src/main/java/io/temporal/serviceclient/ChannelManager.java b/temporal-serviceclient/src/main/java/io/temporal/serviceclient/ChannelManager.java index 38fa4e1703..bca4113a3b 100644 --- a/temporal-serviceclient/src/main/java/io/temporal/serviceclient/ChannelManager.java +++ b/temporal-serviceclient/src/main/java/io/temporal/serviceclient/ChannelManager.java @@ -75,6 +75,10 @@ final class ChannelManager { private static final Metadata.Key CLIENT_NAME_HEADER_KEY = Metadata.Key.of("client-name", Metadata.ASCII_STRING_MARSHALLER); + /** refers to the name of the gRPC header that contains the cloud service version */ + private static final Metadata.Key CLOUD_VERSION_HEADER_KEY = + Metadata.Key.of("temporal-cloud-api-version", Metadata.ASCII_STRING_MARSHALLER); + private static final String CLIENT_NAME_HEADER_VALUE = "temporal-java"; private final ServiceStubsOptions options; @@ -93,6 +97,18 @@ final class ChannelManager { public ChannelManager( ServiceStubsOptions options, List additionalHeadInterceptors) { + this(options, additionalHeadInterceptors, null); + } + + public ChannelManager( + ServiceStubsOptions options, + List additionalHeadInterceptors, + @Nullable Capabilities fixedServerCapabilities) { + // If fixed capabilities are present, set them on the future + if (fixedServerCapabilities != null) { + serverCapabilitiesFuture.complete(fixedServerCapabilities); + } + // Do not shutdown a channel passed to the constructor from outside this.channelNeedsShutdown = options.getChannel() == null; @@ -154,6 +170,12 @@ private Channel applyHeadStandardInterceptors(Channel channel) { headers.put(LIBRARY_VERSION_HEADER_KEY, Version.LIBRARY_VERSION); headers.put(SUPPORTED_SERVER_VERSIONS_HEADER_KEY, Version.SUPPORTED_SERVER_VERSIONS); headers.put(CLIENT_NAME_HEADER_KEY, CLIENT_NAME_HEADER_VALUE); + if (options instanceof CloudServiceStubsOptions) { + String version = ((CloudServiceStubsOptions) options).getVersion(); + if (version != null) { + headers.put(CLOUD_VERSION_HEADER_KEY, version); + } + } return ClientInterceptors.intercept( channel, diff --git a/temporal-serviceclient/src/main/java/io/temporal/serviceclient/CloudServiceStubs.java b/temporal-serviceclient/src/main/java/io/temporal/serviceclient/CloudServiceStubs.java new file mode 100644 index 0000000000..77128ca427 --- /dev/null +++ b/temporal-serviceclient/src/main/java/io/temporal/serviceclient/CloudServiceStubs.java @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.serviceclient; + +import static io.temporal.internal.WorkflowThreadMarker.enforceNonWorkflowThread; + +import io.temporal.api.cloud.cloudservice.v1.CloudServiceGrpc; +import io.temporal.internal.WorkflowThreadMarker; + +/** + * Initializes and holds gRPC blocking and future stubs. + * + *

WARNING: The cloud service is currently experimental. + */ +public interface CloudServiceStubs + extends ServiceStubs< + CloudServiceGrpc.CloudServiceBlockingStub, CloudServiceGrpc.CloudServiceFutureStub> { + String HEALTH_CHECK_SERVICE_NAME = "temporal.api.cloud.cloudservice.v1.CloudService"; + + /** Creates CloudService gRPC stubs pointed on to Temporal Cloud. */ + static CloudServiceStubs newCloudServiceStubs() { + return newServiceStubs(CloudServiceStubsOptions.getDefaultInstance()); + } + + /** + * Creates CloudService gRPC stubs
+ * This method creates stubs with lazy connectivity, connection is not performed during the + * creation time and happens on the first request. + * + * @param options stub options to use + */ + static CloudServiceStubs newServiceStubs(CloudServiceStubsOptions options) { + enforceNonWorkflowThread(); + return WorkflowThreadMarker.protectFromWorkflowThread( + new CloudServiceStubsImpl(options), CloudServiceStubs.class); + } +} diff --git a/temporal-serviceclient/src/main/java/io/temporal/serviceclient/CloudServiceStubsImpl.java b/temporal-serviceclient/src/main/java/io/temporal/serviceclient/CloudServiceStubsImpl.java new file mode 100644 index 0000000000..1ff5176914 --- /dev/null +++ b/temporal-serviceclient/src/main/java/io/temporal/serviceclient/CloudServiceStubsImpl.java @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.serviceclient; + +import io.grpc.ClientInterceptor; +import io.grpc.ManagedChannel; +import io.grpc.health.v1.HealthCheckResponse; +import io.temporal.api.cloud.cloudservice.v1.CloudServiceGrpc; +import io.temporal.api.workflowservice.v1.GetSystemInfoResponse; +import java.time.Duration; +import java.util.Collections; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import javax.annotation.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +final class CloudServiceStubsImpl implements CloudServiceStubs { + private static final Logger log = LoggerFactory.getLogger(CloudServiceStubsImpl.class); + + private final ChannelManager channelManager; + + private final CloudServiceGrpc.CloudServiceBlockingStub blockingStub; + private final CloudServiceGrpc.CloudServiceFutureStub futureStub; + + /** + * Creates gRPC Channel and Stubs that connects to the {@link CloudServiceGrpc} according to the + * specified options. + */ + CloudServiceStubsImpl(CloudServiceStubsOptions options) { + ClientInterceptor deadlineInterceptor = + new GrpcDeadlineInterceptor(options.getRpcTimeout(), null, null); + + options = CloudServiceStubsOptions.newBuilder(options).validateAndBuildWithDefaults(); + + this.channelManager = + new ChannelManager( + options, + Collections.singletonList(deadlineInterceptor), + GetSystemInfoResponse.Capabilities.newBuilder() + .setInternalErrorDifferentiation(true) + .build()); + + log.info("Created CloudServiceStubs for channel: {}", channelManager.getRawChannel()); + + this.blockingStub = CloudServiceGrpc.newBlockingStub(channelManager.getInterceptedChannel()); + this.futureStub = CloudServiceGrpc.newFutureStub(channelManager.getInterceptedChannel()); + } + + @Override + public ManagedChannel getRawChannel() { + return channelManager.getRawChannel(); + } + + @Override + public CloudServiceGrpc.CloudServiceBlockingStub blockingStub() { + return blockingStub; + } + + @Override + public CloudServiceGrpc.CloudServiceFutureStub futureStub() { + return futureStub; + } + + @Override + public void shutdown() { + log.info("shutdown"); + channelManager.shutdown(); + } + + @Override + public void shutdownNow() { + log.info("shutdownNow"); + channelManager.shutdownNow(); + } + + @Override + public boolean isShutdown() { + return channelManager.isShutdown(); + } + + @Override + public boolean isTerminated() { + return channelManager.isTerminated(); + } + + @Override + public boolean awaitTermination(long timeout, TimeUnit unit) { + return channelManager.awaitTermination(timeout, unit); + } + + @Override + public void connect(@Nullable Duration timeout) { + channelManager.connect(HEALTH_CHECK_SERVICE_NAME, timeout); + } + + @Override + public HealthCheckResponse healthCheck() { + // no need to pass timeout, timeout will be assigned by GrpcDeadlineInterceptor + return this.channelManager.healthCheck(HEALTH_CHECK_SERVICE_NAME, null); + } + + @Override + public Supplier getServerCapabilities() { + return this.channelManager.getServerCapabilities(); + } +} diff --git a/temporal-serviceclient/src/main/java/io/temporal/serviceclient/CloudServiceStubsOptions.java b/temporal-serviceclient/src/main/java/io/temporal/serviceclient/CloudServiceStubsOptions.java new file mode 100644 index 0000000000..bc7a94af83 --- /dev/null +++ b/temporal-serviceclient/src/main/java/io/temporal/serviceclient/CloudServiceStubsOptions.java @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.serviceclient; + +import io.grpc.ManagedChannel; +import java.util.Objects; + +/** + * Options for cloud service. + * + *

WARNING: The cloud service is currently experimental. + */ +public final class CloudServiceStubsOptions extends ServiceStubsOptions { + public static final String DEFAULT_CLOUD_TARGET = "saas-api.tmprl.cloud:443"; + + private static final CloudServiceStubsOptions DEFAULT_INSTANCE = + newBuilder().validateAndBuildWithDefaults(); + + /** Version header if any. */ + private final String version; + + public static Builder newBuilder() { + return new Builder(); + } + + public static Builder newBuilder(CloudServiceStubsOptions options) { + // We intentionally only accept our options and not the base class of + // options to ensure our defaults were originally applied at some point + // when the options class was first created. + return new Builder(options); + } + + public static CloudServiceStubsOptions getDefaultInstance() { + return DEFAULT_INSTANCE; + } + + private CloudServiceStubsOptions(ServiceStubsOptions serviceStubsOptions, String version) { + super(serviceStubsOptions); + this.version = version; + } + + /** + * @return Returns the version used for the version header if any. + */ + public String getVersion() { + return version; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + CloudServiceStubsOptions that = (CloudServiceStubsOptions) o; + return Objects.equals(version, that.version); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), version); + } + + /** Builder is the builder for ClientOptions. */ + public static class Builder extends ServiceStubsOptions.Builder { + private String version; + + private Builder() { + // Set defaults only in this constructor + setTarget(DEFAULT_CLOUD_TARGET); + setEnableHttps(true); + } + + private Builder(CloudServiceStubsOptions options) { + super(options); + this.version = options.version; + } + + /** Set a cloud operation service version. This sets the version header for each call. */ + public Builder setVersion(String version) { + this.version = version; + return this; + } + + /** Default is {@link #DEFAULT_CLOUD_TARGET}. See inherited method for more details. */ + @Override + public Builder setTarget(String target) { + return super.setTarget(target); + } + + @Override + public Builder setChannel(ManagedChannel channel) { + // Unset our defaults + setEnableHttps(false); + setTarget(null); + return super.setChannel(channel); + } + + /** + * Builds and returns a ClientOptions object. + * + * @return ClientOptions object with the specified params. + */ + public CloudServiceStubsOptions build() { + return new CloudServiceStubsOptions(super.build(), this.version); + } + + public CloudServiceStubsOptions validateAndBuildWithDefaults() { + ServiceStubsOptions serviceStubsOptions = super.validateAndBuildWithDefaults(); + return new CloudServiceStubsOptions(serviceStubsOptions, this.version); + } + } +} diff --git a/temporal-serviceclient/src/main/protocloud b/temporal-serviceclient/src/main/protocloud new file mode 160000 index 0000000000..508379bad5 --- /dev/null +++ b/temporal-serviceclient/src/main/protocloud @@ -0,0 +1 @@ +Subproject commit 508379bad5b0ec0275dc5c0f4a8a884fbb6e7123 From b95322f1ca7976aeafac9685a953d2c57913a1a5 Mon Sep 17 00:00:00 2001 From: Spencer Judge Date: Tue, 23 Jul 2024 15:09:59 -0700 Subject: [PATCH 02/25] Reintroduce slot supplier & add many tests (#2143) --- .github/workflows/ci.yml | 6 + temporal-sdk/build.gradle | 13 + .../activity/ActivityPollResponseToInfo.java | 34 +++ .../internal/worker/ActivityPollTask.java | 26 +- .../internal/worker/ActivityTask.java | 9 + .../internal/worker/ActivityWorker.java | 63 ++-- .../worker/EagerActivityDispatcher.java | 19 +- .../worker/EagerActivitySlotsReservation.java | 52 ++-- .../worker/LocalActivityAttemptTask.java | 11 - .../worker/LocalActivityExecutionContext.java | 11 + .../LocalActivitySlotSupplierQueue.java | 138 +++++++++ .../internal/worker/LocalActivityWorker.java | 154 +++++----- .../internal/worker/PollTaskExecutor.java | 20 -- .../internal/worker/ShutdownManager.java | 33 +- .../internal/worker/SingleWorkerOptions.java | 16 - .../internal/worker/SlotReservationData.java | 33 ++ .../internal/worker/SyncActivityWorker.java | 13 +- .../internal/worker/SyncWorkflowWorker.java | 14 +- .../internal/worker/TrackingSlotSupplier.java | 211 +++++++++++++ .../internal/worker/WorkflowPollTask.java | 24 +- .../internal/worker/WorkflowTask.java | 8 +- .../internal/worker/WorkflowWorker.java | 87 +++--- .../java/io/temporal/worker/MetricsType.java | 15 + .../main/java/io/temporal/worker/Worker.java | 50 ++- .../io/temporal/worker/WorkerOptions.java | 61 +++- .../worker/WorkflowTaskDispatchHandle.java | 22 +- .../worker/tuning/ActivitySlotInfo.java | 80 +++++ .../worker/tuning/CompositeTuner.java | 83 +++++ .../worker/tuning/FixedSizeSlotSupplier.java | 75 +++++ .../worker/tuning/JVMSystemResourceInfo.java | 91 ++++++ .../worker/tuning/LocalActivitySlotInfo.java | 80 +++++ .../temporal/worker/tuning/PIDController.java | 186 ++++++++++++ .../tuning/ResourceBasedController.java | 131 ++++++++ .../ResourceBasedControllerOptions.java | 172 +++++++++++ .../tuning/ResourceBasedSlotOptions.java | 127 ++++++++ .../tuning/ResourceBasedSlotSupplier.java | 165 ++++++++++ .../worker/tuning/ResourceBasedTuner.java | 137 +++++++++ .../io/temporal/worker/tuning/SlotInfo.java | 29 ++ .../worker/tuning/SlotMarkUsedContext.java | 36 +++ .../io/temporal/worker/tuning/SlotPermit.java | 44 +++ .../worker/tuning/SlotReleaseContext.java | 44 +++ .../worker/tuning/SlotReleaseReason.java | 88 ++++++ .../worker/tuning/SlotReserveContext.java | 54 ++++ .../temporal/worker/tuning/SlotSupplier.java | 96 ++++++ .../worker/tuning/SystemResourceInfo.java | 39 +++ .../temporal/worker/tuning/WorkerTuner.java | 46 +++ .../worker/tuning/WorkflowSlotInfo.java | 145 +++++++++ .../internal/worker/SlotSupplierTest.java | 137 +++++++++ .../worker/StickyQueueBacklogTest.java | 10 +- .../WorkflowSlotGrpcInterceptedTests.java | 261 ++++++++++++++++ .../WorkflowSlotMaxConcurrentTests.java | 191 ++++++++++++ .../internal/worker/WorkflowSlotTests.java | 197 +++++++++--- .../worker/WorkflowSlotsSmallSizeTests.java | 285 ++++++++++++++++++ .../internal/worker/WorkflowWorkerTest.java | 26 +- .../testUtils/CountingSlotSupplier.java | 63 ++++ .../worker/IndependentResourceBasedTests.java | 23 ++ .../worker/ResourceBasedTunerTests.java | 174 +++++++++++ .../io/temporal/worker/WorkerOptionsTest.java | 47 +++ .../EagerWorkflowTaskDispatchTest.java | 29 +- .../EagerActivityDispatchingTest.java | 44 +-- .../TestActivityEnvironmentInternal.java | 3 +- 61 files changed, 4225 insertions(+), 356 deletions(-) create mode 100644 temporal-sdk/src/main/java/io/temporal/internal/activity/ActivityPollResponseToInfo.java create mode 100644 temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivitySlotSupplierQueue.java create mode 100644 temporal-sdk/src/main/java/io/temporal/internal/worker/SlotReservationData.java create mode 100644 temporal-sdk/src/main/java/io/temporal/internal/worker/TrackingSlotSupplier.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/ActivitySlotInfo.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/CompositeTuner.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/FixedSizeSlotSupplier.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/JVMSystemResourceInfo.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/LocalActivitySlotInfo.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/PIDController.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedController.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedControllerOptions.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedSlotOptions.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedSlotSupplier.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedTuner.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotInfo.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotMarkUsedContext.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotPermit.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotReleaseContext.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotReleaseReason.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotReserveContext.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotSupplier.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/SystemResourceInfo.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/WorkerTuner.java create mode 100644 temporal-sdk/src/main/java/io/temporal/worker/tuning/WorkflowSlotInfo.java create mode 100644 temporal-sdk/src/test/java/io/temporal/internal/worker/SlotSupplierTest.java create mode 100644 temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotGrpcInterceptedTests.java create mode 100644 temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotMaxConcurrentTests.java create mode 100644 temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java create mode 100644 temporal-sdk/src/test/java/io/temporal/testUtils/CountingSlotSupplier.java create mode 100644 temporal-sdk/src/test/java/io/temporal/worker/IndependentResourceBasedTests.java create mode 100644 temporal-sdk/src/test/java/io/temporal/worker/ResourceBasedTunerTests.java diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index da620fd5e8..b531cc7431 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,6 +33,12 @@ jobs: USE_DOCKER_SERVICE: false run: ./gradlew --no-daemon test -x checkLicenseMain -x checkLicenses -x spotlessCheck -x spotlessApply -x spotlessJava -P edgeDepsTest + - name: Run independent resource tuner test + env: + USER: unittest + USE_DOCKER_SERVICE: false + run: ./gradlew --no-daemon temporal-sdk:testResourceIndependent -x checkLicenseMain -x checkLicenses -x spotlessCheck -x spotlessApply -x spotlessJava -P edgeDepsTest + - name: Publish Test Report uses: mikepenz/action-junit-report@v4 if: success() || failure() # always run even if the previous step fails diff --git a/temporal-sdk/build.gradle b/temporal-sdk/build.gradle index e8371f29b5..4bd3fdddcb 100644 --- a/temporal-sdk/build.gradle +++ b/temporal-sdk/build.gradle @@ -36,3 +36,16 @@ task registerNamespace(type: JavaExec) { } test.dependsOn 'registerNamespace' + +test { + useJUnit { + excludeCategories 'io.temporal.worker.IndependentResourceBasedTests' + } +} + +task testResourceIndependent(type: Test) { + useJUnit { + includeCategories 'io.temporal.worker.IndependentResourceBasedTests' + maxParallelForks = 1 + } +} \ No newline at end of file diff --git a/temporal-sdk/src/main/java/io/temporal/internal/activity/ActivityPollResponseToInfo.java b/temporal-sdk/src/main/java/io/temporal/internal/activity/ActivityPollResponseToInfo.java new file mode 100644 index 0000000000..9af4ae9d60 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/internal/activity/ActivityPollResponseToInfo.java @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.activity; + +import io.temporal.activity.ActivityInfo; +import io.temporal.api.workflowservice.v1.PollActivityTaskQueueResponseOrBuilder; + +public class ActivityPollResponseToInfo { + public static ActivityInfo toActivityInfoImpl( + PollActivityTaskQueueResponseOrBuilder response, + String namespace, + String activityTaskQueue, + boolean local) { + return new ActivityInfoImpl(response, namespace, activityTaskQueue, local, null); + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityPollTask.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityPollTask.java index a08dc38ca2..effabf3f23 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityPollTask.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityPollTask.java @@ -33,8 +33,8 @@ import io.temporal.internal.common.ProtobufTimeUtils; import io.temporal.serviceclient.WorkflowServiceStubs; import io.temporal.worker.MetricsType; +import io.temporal.worker.tuning.*; import java.util.Objects; -import java.util.concurrent.Semaphore; import java.util.function.Supplier; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -45,7 +45,7 @@ final class ActivityPollTask implements Poller.PollTask { private static final Logger log = LoggerFactory.getLogger(ActivityPollTask.class); private final WorkflowServiceStubs service; - private final Semaphore pollSemaphore; + private final TrackingSlotSupplier slotSupplier; private final Scope metricsScope; private final PollActivityTaskQueueRequest pollRequest; @@ -57,11 +57,11 @@ public ActivityPollTask( @Nullable String buildId, boolean useBuildIdForVersioning, double activitiesPerSecond, - Semaphore pollSemaphore, + @Nonnull TrackingSlotSupplier slotSupplier, @Nonnull Scope metricsScope, @Nonnull Supplier serverCapabilities) { this.service = Objects.requireNonNull(service); - this.pollSemaphore = pollSemaphore; + this.slotSupplier = slotSupplier; this.metricsScope = Objects.requireNonNull(metricsScope); PollActivityTaskQueueRequest.Builder pollRequest = @@ -92,13 +92,22 @@ public ActivityTask poll() { log.trace("poll request begin: " + pollRequest); } PollActivityTaskQueueResponse response; + SlotPermit permit; boolean isSuccessful = false; try { - pollSemaphore.acquire(); + permit = + slotSupplier.reserveSlot( + new SlotReservationData( + pollRequest.getTaskQueue().getName(), + pollRequest.getIdentity(), + pollRequest.getWorkerVersionCapabilities().getBuildId())); } catch (InterruptedException e) { Thread.currentThread().interrupt(); return null; + } catch (Exception e) { + log.warn("Error while trying to reserve a slot for an activity", e.getCause()); + return null; } try { @@ -118,9 +127,12 @@ public ActivityTask poll() { ProtobufTimeUtils.toM3Duration( response.getStartedTime(), response.getCurrentAttemptScheduledTime())); isSuccessful = true; - return new ActivityTask(response, pollSemaphore::release); + return new ActivityTask( + response, + permit, + () -> slotSupplier.releaseSlot(SlotReleaseReason.taskComplete(), permit)); } finally { - if (!isSuccessful) pollSemaphore.release(); + if (!isSuccessful) slotSupplier.releaseSlot(SlotReleaseReason.neverUsed(), permit); } } } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityTask.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityTask.java index ebc874c1f4..9340f08ebe 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityTask.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityTask.java @@ -21,17 +21,21 @@ package io.temporal.internal.worker; import io.temporal.api.workflowservice.v1.PollActivityTaskQueueResponseOrBuilder; +import io.temporal.worker.tuning.SlotPermit; import io.temporal.workflow.Functions; import javax.annotation.Nonnull; public final class ActivityTask { private final @Nonnull PollActivityTaskQueueResponseOrBuilder response; + private final @Nonnull SlotPermit permit; private final @Nonnull Functions.Proc completionCallback; public ActivityTask( @Nonnull PollActivityTaskQueueResponseOrBuilder response, + @Nonnull SlotPermit permit, @Nonnull Functions.Proc completionCallback) { this.response = response; + this.permit = permit; this.completionCallback = completionCallback; } @@ -48,4 +52,9 @@ public PollActivityTaskQueueResponseOrBuilder getResponse() { public Functions.Proc getCompletionCallback() { return completionCallback; } + + @Nonnull + public SlotPermit getPermit() { + return permit; + } } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityWorker.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityWorker.java index 73b037f21f..17ebf3d646 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityWorker.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityWorker.java @@ -30,6 +30,7 @@ import io.temporal.api.command.v1.ScheduleActivityTaskCommandAttributesOrBuilder; import io.temporal.api.common.v1.WorkflowExecution; import io.temporal.api.workflowservice.v1.*; +import io.temporal.internal.activity.ActivityPollResponseToInfo; import io.temporal.internal.common.ProtobufTimeUtils; import io.temporal.internal.logging.LoggerTag; import io.temporal.internal.retryer.GrpcRetryer; @@ -39,9 +40,10 @@ import io.temporal.serviceclient.rpcretry.DefaultStubServiceOperationRpcRetryOptions; import io.temporal.worker.MetricsType; import io.temporal.worker.WorkerMetricsTag; +import io.temporal.worker.tuning.*; import java.util.Objects; +import java.util.Optional; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; import org.slf4j.Logger; @@ -64,8 +66,7 @@ final class ActivityWorker implements SuspendableWorker { private final Scope workerMetricsScope; private final GrpcRetryer grpcRetryer; private final GrpcRetryer.GrpcRetryerOptions replyGrpcRetryerOptions; - private final int executorSlots; - private final Semaphore executorSlotsSemaphore; + private final TrackingSlotSupplier slotSupplier; public ActivityWorker( @Nonnull WorkflowServiceStubs service, @@ -73,7 +74,8 @@ public ActivityWorker( @Nonnull String taskQueue, double taskQueueActivitiesPerSecond, @Nonnull SingleWorkerOptions options, - @Nonnull ActivityTaskHandler handler) { + @Nonnull ActivityTaskHandler handler, + @Nonnull SlotSupplier slotSupplier) { this.service = Objects.requireNonNull(service); this.namespace = Objects.requireNonNull(namespace); this.taskQueue = Objects.requireNonNull(taskQueue); @@ -87,8 +89,8 @@ public ActivityWorker( this.replyGrpcRetryerOptions = new GrpcRetryer.GrpcRetryerOptions( DefaultStubServiceOperationRpcRetryOptions.INSTANCE, null); - this.executorSlots = options.getTaskExecutorThreadPoolSize(); - this.executorSlotsSemaphore = new Semaphore(executorSlots); + + this.slotSupplier = new TrackingSlotSupplier<>(slotSupplier, this.workerMetricsScope); } @Override @@ -101,8 +103,7 @@ public boolean start() { options.getIdentity(), new TaskHandlerImpl(handler), pollerOptions, - options.getTaskExecutorThreadPoolSize(), - workerMetricsScope, + slotSupplier.maximumSlots().orElse(Integer.MAX_VALUE), true); poller = new Poller<>( @@ -115,7 +116,7 @@ public boolean start() { options.getBuildId(), options.isUsingBuildIdForVersioning(), taskQueueActivitiesPerSecond, - executorSlotsSemaphore, + this.slotSupplier, workerMetricsScope, service.getServerCapabilities()), this.pollTaskExecutor, @@ -131,14 +132,14 @@ public boolean start() { @Override public CompletableFuture shutdown(ShutdownManager shutdownManager, boolean interruptTasks) { - String semaphoreName = this + "#executorSlotsSemaphore"; + String supplierName = this + "#executorSlots"; return poller .shutdown(shutdownManager, interruptTasks) .thenCompose( ignore -> !interruptTasks - ? shutdownManager.waitForSemaphorePermitsReleaseUntimed( - executorSlotsSemaphore, executorSlots, semaphoreName) + ? shutdownManager.waitForSupplierPermitsReleasedUnlimited( + slotSupplier, supplierName) : CompletableFuture.completedFuture(null)) .thenCompose( ignore -> @@ -224,6 +225,15 @@ private TaskHandlerImpl(ActivityTaskHandler handler) { @Override public void handle(ActivityTask task) throws Exception { PollActivityTaskQueueResponseOrBuilder pollResponse = task.getResponse(); + + slotSupplier.markSlotUsed( + new ActivitySlotInfo( + ActivityPollResponseToInfo.toActivityInfoImpl( + pollResponse, namespace, taskQueue, false), + options.getIdentity(), + options.getBuildId()), + task.getPermit()); + Scope metricsScope = workerMetricsScope.tagged( ImmutableMap.of( @@ -416,23 +426,34 @@ private void logExceptionDuringResultReporting( private final class EagerActivityDispatcherImpl implements EagerActivityDispatcher { @Override - public boolean tryReserveActivitySlot( + public Optional tryReserveActivitySlot( ScheduleActivityTaskCommandAttributesOrBuilder commandAttributes) { - return WorkerLifecycleState.ACTIVE.equals(ActivityWorker.this.getLifecycleState()) - && Objects.equals( - commandAttributes.getTaskQueue().getName(), ActivityWorker.this.taskQueue) - && ActivityWorker.this.executorSlotsSemaphore.tryAcquire(); + if (!WorkerLifecycleState.ACTIVE.equals(ActivityWorker.this.getLifecycleState()) + || !Objects.equals( + commandAttributes.getTaskQueue().getName(), ActivityWorker.this.taskQueue)) { + return Optional.empty(); + } + return ActivityWorker.this.slotSupplier.tryReserveSlot( + new SlotReservationData( + ActivityWorker.this.taskQueue, options.getIdentity(), options.getBuildId())); } @Override - public void releaseActivitySlotReservations(int slotCounts) { - ActivityWorker.this.executorSlotsSemaphore.release(slotCounts); + public void releaseActivitySlotReservations(Iterable permits) { + for (SlotPermit permit : permits) { + ActivityWorker.this.slotSupplier.releaseSlot(SlotReleaseReason.neverUsed(), permit); + } } @Override - public void dispatchActivity(PollActivityTaskQueueResponse activity) { + public void dispatchActivity(PollActivityTaskQueueResponse activity, SlotPermit permit) { ActivityWorker.this.pollTaskExecutor.process( - new ActivityTask(activity, ActivityWorker.this.executorSlotsSemaphore::release)); + new ActivityTask( + activity, + permit, + () -> + ActivityWorker.this.slotSupplier.releaseSlot( + SlotReleaseReason.taskComplete(), permit))); } } } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/EagerActivityDispatcher.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/EagerActivityDispatcher.java index 721bdfa796..92b0b6e88b 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/EagerActivityDispatcher.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/EagerActivityDispatcher.java @@ -22,30 +22,33 @@ import io.temporal.api.command.v1.ScheduleActivityTaskCommandAttributesOrBuilder; import io.temporal.api.workflowservice.v1.PollActivityTaskQueueResponse; +import io.temporal.worker.tuning.SlotPermit; +import java.util.Optional; public interface EagerActivityDispatcher { - boolean tryReserveActivitySlot(ScheduleActivityTaskCommandAttributesOrBuilder commandAttributes); + Optional tryReserveActivitySlot( + ScheduleActivityTaskCommandAttributesOrBuilder commandAttributes); - void releaseActivitySlotReservations(int slotCounts); + void releaseActivitySlotReservations(Iterable permits); - void dispatchActivity(PollActivityTaskQueueResponse activity); + void dispatchActivity(PollActivityTaskQueueResponse activity, SlotPermit permit); class NoopEagerActivityDispatcher implements EagerActivityDispatcher { @Override - public boolean tryReserveActivitySlot( + public Optional tryReserveActivitySlot( ScheduleActivityTaskCommandAttributesOrBuilder commandAttributes) { - return false; + return Optional.empty(); } @Override - public void releaseActivitySlotReservations(int slotCounts) { - if (slotCounts > 0) + public void releaseActivitySlotReservations(Iterable permits) { + if (permits.iterator().hasNext()) throw new IllegalStateException( "Trying to release activity slots on a NoopEagerActivityDispatcher"); } @Override - public void dispatchActivity(PollActivityTaskQueueResponse activity) { + public void dispatchActivity(PollActivityTaskQueueResponse activity, SlotPermit permit) { throw new IllegalStateException( "Trying to dispatch activity on a NoopEagerActivityDispatcher"); } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/EagerActivitySlotsReservation.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/EagerActivitySlotsReservation.java index 2da101321e..e1a5c1e887 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/EagerActivitySlotsReservation.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/EagerActivitySlotsReservation.java @@ -28,14 +28,18 @@ import io.temporal.api.workflowservice.v1.RespondWorkflowTaskCompletedRequest; import io.temporal.api.workflowservice.v1.RespondWorkflowTaskCompletedResponse; import io.temporal.internal.Config; +import io.temporal.worker.tuning.SlotPermit; import java.io.Closeable; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; import javax.annotation.concurrent.NotThreadSafe; /** This class is not thread safe and shouldn't leave the boundaries of one activity executor */ @NotThreadSafe class EagerActivitySlotsReservation implements Closeable { private final EagerActivityDispatcher eagerActivityDispatcher; - private int outstandingReservationSlotsCount = 0; + private final List reservedSlots = new ArrayList<>(); EagerActivitySlotsReservation(EagerActivityDispatcher eagerActivityDispatcher) { this.eagerActivityDispatcher = eagerActivityDispatcher; @@ -49,10 +53,14 @@ public void applyToRequest(RespondWorkflowTaskCompletedRequest.Builder mutableRe ScheduleActivityTaskCommandAttributes commandAttributes = command.getScheduleActivityTaskCommandAttributes(); if (!commandAttributes.getRequestEagerExecution()) continue; + boolean atLimit = this.reservedSlots.size() >= Config.EAGER_ACTIVITIES_LIMIT; + Optional permit = Optional.empty(); + if (!atLimit) { + permit = this.eagerActivityDispatcher.tryReserveActivitySlot(commandAttributes); + } - if (this.outstandingReservationSlotsCount < Config.EAGER_ACTIVITIES_LIMIT - && this.eagerActivityDispatcher.tryReserveActivitySlot(commandAttributes)) { - this.outstandingReservationSlotsCount++; + if (permit.isPresent()) { + this.reservedSlots.add(permit.get()); } else { mutableRequest.setCommands( i, @@ -66,34 +74,32 @@ public void applyToRequest(RespondWorkflowTaskCompletedRequest.Builder mutableRe public void handleResponse(RespondWorkflowTaskCompletedResponse serverResponse) { int activityTasksCount = serverResponse.getActivityTasksCount(); Preconditions.checkArgument( - activityTasksCount <= this.outstandingReservationSlotsCount, + activityTasksCount <= this.reservedSlots.size(), "Unexpectedly received %s eager activities though we only requested %s", activityTasksCount, - this.outstandingReservationSlotsCount); - - releaseSlots(this.outstandingReservationSlotsCount - activityTasksCount); + this.reservedSlots.size()); for (PollActivityTaskQueueResponse act : serverResponse.getActivityTasksList()) { - // don't release slots here, instead the semaphore release reference is passed to the activity - // worker to release when the activity is done - this.eagerActivityDispatcher.dispatchActivity(act); + // don't release slots here, instead the release function is called in the activity worker to + // release when the activity is done + SlotPermit permit = this.reservedSlots.remove(0); + this.eagerActivityDispatcher.dispatchActivity(act, permit); } - this.outstandingReservationSlotsCount = 0; + // Release any remaining that we won't be using + try { + this.eagerActivityDispatcher.releaseActivitySlotReservations(this.reservedSlots); + } finally { + this.reservedSlots.clear(); + } } @Override public void close() { - if (this.outstandingReservationSlotsCount > 0) - releaseSlots(this.outstandingReservationSlotsCount); - } - - private void releaseSlots(int slotsToRelease) { - if (slotsToRelease > this.outstandingReservationSlotsCount) - throw new IllegalStateException( - "Trying to release more activity slots than outstanding reservations"); - - this.eagerActivityDispatcher.releaseActivitySlotReservations(slotsToRelease); - this.outstandingReservationSlotsCount -= slotsToRelease; + if (!this.reservedSlots.isEmpty()) { + // Release all slots + this.eagerActivityDispatcher.releaseActivitySlotReservations(this.reservedSlots); + this.reservedSlots.clear(); + } } } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityAttemptTask.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityAttemptTask.java index 146f56333c..ec38c5b6e9 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityAttemptTask.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityAttemptTask.java @@ -21,7 +21,6 @@ package io.temporal.internal.worker; import io.temporal.api.workflowservice.v1.PollActivityTaskQueueResponse; -import io.temporal.workflow.Functions; import java.util.concurrent.ScheduledFuture; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -29,17 +28,14 @@ class LocalActivityAttemptTask { private final @Nonnull LocalActivityExecutionContext executionContext; private final @Nonnull PollActivityTaskQueueResponse.Builder attemptTask; - private final @Nullable Functions.Proc takenFromQueueCallback; private final @Nullable ScheduledFuture scheduleToStartFuture; public LocalActivityAttemptTask( @Nonnull LocalActivityExecutionContext executionContext, @Nonnull PollActivityTaskQueueResponse.Builder attemptTask, - @Nullable Functions.Proc takenFromQueueCallback, @Nullable ScheduledFuture scheduleToStartFuture) { this.executionContext = executionContext; this.attemptTask = attemptTask; - this.takenFromQueueCallback = takenFromQueueCallback; this.scheduleToStartFuture = scheduleToStartFuture; } @@ -57,13 +53,6 @@ public PollActivityTaskQueueResponse.Builder getAttemptTask() { return attemptTask; } - public void markAsTakenFromQueue() { - executionContext.newAttempt(); - if (takenFromQueueCallback != null) { - takenFromQueueCallback.apply(); - } - } - @Nullable public ScheduledFuture getScheduleToStartFuture() { return scheduleToStartFuture; diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityExecutionContext.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityExecutionContext.java index 061415b010..153570e9d4 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityExecutionContext.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityExecutionContext.java @@ -26,6 +26,7 @@ import io.temporal.api.workflowservice.v1.DescribeWorkflowExecutionRequest; import io.temporal.api.workflowservice.v1.PollActivityTaskQueueResponse; import io.temporal.internal.statemachines.ExecuteLocalActivityParameters; +import io.temporal.worker.tuning.SlotPermit; import io.temporal.workflow.Functions; import java.time.Duration; import java.util.Objects; @@ -44,6 +45,7 @@ class LocalActivityExecutionContext { private @Nullable ScheduledFuture scheduleToCloseFuture; private final @Nonnull CompletableFuture executionResult = new CompletableFuture<>(); + private @Nullable SlotPermit permit; public LocalActivityExecutionContext( @Nonnull ExecuteLocalActivityParameters executionParams, @@ -164,4 +166,13 @@ public boolean isCompleted() { public void newAttempt() { executionParams.getOnNewAttemptCallback().apply(); } + + public void setPermit(SlotPermit permit) { + this.permit = permit; + } + + @Nullable + public SlotPermit getPermit() { + return permit; + } } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivitySlotSupplierQueue.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivitySlotSupplierQueue.java new file mode 100644 index 0000000000..94892ab499 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivitySlotSupplierQueue.java @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.worker; + +import io.temporal.worker.tuning.LocalActivitySlotInfo; +import io.temporal.worker.tuning.SlotPermit; +import io.temporal.workflow.Functions; +import java.util.concurrent.PriorityBlockingQueue; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import javax.annotation.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class LocalActivitySlotSupplierQueue { + static final class QueuedLARequest { + final boolean isRetry; + final SlotReservationData data; + final LocalActivityAttemptTask task; + + QueuedLARequest(boolean isRetry, SlotReservationData data, LocalActivityAttemptTask task) { + this.isRetry = isRetry; + this.data = data; + this.task = task; + } + } + + private final PriorityBlockingQueue requestQueue; + private final Semaphore newExecutionsBackpressureSemaphore; + private final TrackingSlotSupplier slotSupplier; + private final Functions.Proc1 afterReservedCallback; + private final Thread queueThread; + private static final Logger log = + LoggerFactory.getLogger(LocalActivitySlotSupplierQueue.class.getName()); + private volatile boolean running = true; + + LocalActivitySlotSupplierQueue( + TrackingSlotSupplier slotSupplier, + Functions.Proc1 afterReservedCallback) { + this.afterReservedCallback = afterReservedCallback; + // TODO: See if there's a better option than fixed number for no-max suppliers + // https://github.com/temporalio/sdk-java/issues/2149 + int maximumSlots = slotSupplier.maximumSlots().orElse(50) * 2; + this.newExecutionsBackpressureSemaphore = new Semaphore(maximumSlots); + this.requestQueue = + new PriorityBlockingQueue<>( + maximumSlots, + (r1, r2) -> { + // Prioritize retries + if (r1.isRetry && !r2.isRetry) { + return -1; + } else if (!r1.isRetry && r2.isRetry) { + return 1; + } + return 0; + }); + this.slotSupplier = slotSupplier; + this.queueThread = new Thread(this::processQueue, "LocalActivitySlotSupplierQueue"); + this.queueThread.start(); + } + + private void processQueue() { + try { + while (running) { + QueuedLARequest request = requestQueue.take(); + SlotPermit slotPermit; + try { + slotPermit = slotSupplier.reserveSlot(request.data); + } catch (Exception e) { + log.error( + "Error reserving local activity slot, dropped activity id {}", + request.task.getActivityId(), + e); + continue; + } + request.task.getExecutionContext().setPermit(slotPermit); + afterReservedCallback.apply(request.task); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + void shutdown() { + running = false; + queueThread.interrupt(); + } + + boolean waitOnBackpressure(@Nullable Long acceptanceTimeoutMs) throws InterruptedException { + boolean accepted; + if (acceptanceTimeoutMs == null) { + newExecutionsBackpressureSemaphore.acquire(); + accepted = true; + } else { + if (acceptanceTimeoutMs > 0) { + accepted = + newExecutionsBackpressureSemaphore.tryAcquire( + acceptanceTimeoutMs, TimeUnit.MILLISECONDS); + } else { + accepted = newExecutionsBackpressureSemaphore.tryAcquire(); + } + } + return accepted; + } + + void submitAttempt(SlotReservationData data, boolean isRetry, LocalActivityAttemptTask task) { + QueuedLARequest request = new QueuedLARequest(isRetry, data, task); + requestQueue.add(request); + + if (!isRetry) { + // If this attempt isn't a retry, that means it had to get a permit from the backpressure + // semaphore, and therefore we should release that permit now. + newExecutionsBackpressureSemaphore.release(); + } + } + + TrackingSlotSupplier getSlotSupplier() { + return slotSupplier; + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityWorker.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityWorker.java index 5c6254a7c4..8fbd5771c3 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityWorker.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityWorker.java @@ -36,6 +36,7 @@ import io.temporal.api.workflowservice.v1.PollActivityTaskQueueResponseOrBuilder; import io.temporal.common.RetryOptions; import io.temporal.failure.ApplicationFailure; +import io.temporal.internal.activity.ActivityPollResponseToInfo; import io.temporal.internal.common.ProtobufTimeUtils; import io.temporal.internal.common.RetryOptionsUtils; import io.temporal.internal.logging.LoggerTag; @@ -43,6 +44,7 @@ import io.temporal.serviceclient.MetricsTag; import io.temporal.worker.MetricsType; import io.temporal.worker.WorkerMetricsTag; +import io.temporal.worker.tuning.*; import io.temporal.workflow.Functions; import java.time.Duration; import java.util.Objects; @@ -70,48 +72,59 @@ final class LocalActivityWorker implements Startable, Shutdownable { private ScheduledExecutorService scheduledExecutor; private PollTaskExecutor activityAttemptTaskExecutor; + private final TrackingSlotSupplier slotSupplier; + private final LocalActivitySlotSupplierQueue slotQueue; public LocalActivityWorker( @Nonnull String namespace, @Nonnull String taskQueue, @Nonnull SingleWorkerOptions options, - @Nonnull ActivityTaskHandler handler) { + @Nonnull ActivityTaskHandler handler, + @Nonnull SlotSupplier slotSupplier) { this.namespace = Objects.requireNonNull(namespace); this.taskQueue = Objects.requireNonNull(taskQueue); this.handler = handler; - this.laScheduler = new LocalActivityDispatcherImpl(2 * options.getTaskExecutorThreadPoolSize()); this.options = Objects.requireNonNull(options); this.pollerOptions = getPollerOptions(options); this.workerMetricsScope = MetricsTag.tagged( options.getMetricsScope(), WorkerMetricsTag.WorkerType.LOCAL_ACTIVITY_WORKER); + this.slotSupplier = + new TrackingSlotSupplier<>(Objects.requireNonNull(slotSupplier), this.workerMetricsScope); + this.slotQueue = + new LocalActivitySlotSupplierQueue( + this.slotSupplier, (t) -> activityAttemptTaskExecutor.process(t)); + this.laScheduler = new LocalActivityDispatcherImpl(); } private void submitRetry( @Nonnull LocalActivityExecutionContext executionContext, @Nonnull PollActivityTaskQueueResponse.Builder activityTask) { - submitAttempt(executionContext, activityTask, null); + submitAttempt(executionContext, activityTask, true); } private void submitAttempt( @Nonnull LocalActivityExecutionContext executionContext, @Nonnull PollActivityTaskQueueResponse.Builder activityTask, - @Nullable Functions.Proc leftQueueCallback) { + boolean isRetry) { @Nullable Duration scheduleToStartTimeout = executionContext.getScheduleToStartTimeout(); - @Nullable ScheduledFuture scheduleToStartFuture = null; + @Nullable final ScheduledFuture scheduleToStartFuture; if (scheduleToStartTimeout != null) { scheduleToStartFuture = scheduledExecutor.schedule( new FinalTimeoutHandler(TimeoutType.TIMEOUT_TYPE_SCHEDULE_TO_START, executionContext), scheduleToStartTimeout.toMillis(), TimeUnit.MILLISECONDS); + } else { + scheduleToStartFuture = null; } + SlotReservationData reservationDat = + new SlotReservationData(taskQueue, options.getIdentity(), options.getBuildId()); activityTask.setCurrentAttemptScheduledTime(ProtobufTimeUtils.getCurrentProtoTime()); - LocalActivityAttemptTask task = - new LocalActivityAttemptTask( - executionContext, activityTask, leftQueueCallback, scheduleToStartFuture); - activityAttemptTaskExecutor.process(task); + final LocalActivityAttemptTask task = + new LocalActivityAttemptTask(executionContext, activityTask, scheduleToStartFuture); + slotQueue.submitAttempt(reservationDat, isRetry, task); } /** @@ -191,18 +204,6 @@ private void scheduleNextAttempt( } private class LocalActivityDispatcherImpl implements LocalActivityDispatcher { - /** - * Retries always get a green light, but we have a backpressure for new tasks if the queue fills - * up with not picked up new executions - */ - private final Semaphore newExecutionsBackpressureSemaphore; - - public LocalActivityDispatcherImpl(int semaphorePermits) { - // number of permits for this semaphore is not that important, because we allow submitter to - // block and wait till the workflow task heartbeat to allow the worker to tolerate spikes of - // short local activity executions. - this.newExecutionsBackpressureSemaphore = new Semaphore(semaphorePermits); - } @Override public boolean dispatch( @@ -258,25 +259,16 @@ private boolean submitANewExecution( @Nonnull PollActivityTaskQueueResponse.Builder activityTask, @Nullable Deadline acceptanceDeadline) { try { - boolean accepted; - if (acceptanceDeadline == null) { - newExecutionsBackpressureSemaphore.acquire(); - accepted = true; - } else { - long acceptanceTimeoutMs = acceptanceDeadline.timeRemaining(TimeUnit.MILLISECONDS); - if (acceptanceTimeoutMs > 0) { - accepted = - newExecutionsBackpressureSemaphore.tryAcquire( - acceptanceTimeoutMs, TimeUnit.MILLISECONDS); - } else { - accepted = newExecutionsBackpressureSemaphore.tryAcquire(); - } - if (!accepted) { - log.warn( - "LocalActivity queue is full and submitting timed out for activity {} with acceptanceTimeoutMs: {}", - activityTask.getActivityId(), - acceptanceTimeoutMs); - } + Long acceptanceTimeoutMs = + acceptanceDeadline != null + ? acceptanceDeadline.timeRemaining(TimeUnit.MILLISECONDS) + : null; + boolean accepted = slotQueue.waitOnBackpressure(acceptanceTimeoutMs); + if (!accepted) { + log.warn( + "LocalActivity queue is full and submitting timed out for activity {} with acceptanceTimeoutMs: {}", + activityTask.getActivityId(), + acceptanceTimeoutMs); } if (accepted) { @@ -293,8 +285,7 @@ private boolean submitANewExecution( TimeUnit.MILLISECONDS); executionContext.setScheduleToCloseFuture(scheduleToCloseFuture); } - submitAttempt( - executionContext, activityTask, newExecutionsBackpressureSemaphore::release); + submitAttempt(executionContext, activityTask, false); log.trace("LocalActivity queued: {}", activityTask.getActivityId()); } return accepted; @@ -416,7 +407,7 @@ private AttemptTaskHandlerImpl(ActivityTaskHandler handler) { @Override public void handle(LocalActivityAttemptTask attemptTask) throws Exception { - attemptTask.markAsTakenFromQueue(); + SlotReleaseReason reason = SlotReleaseReason.taskComplete(); // cancel scheduleToStart timeout if not already fired @Nullable ScheduledFuture scheduleToStartFuture = attemptTask.getScheduleToStartFuture(); @@ -424,32 +415,41 @@ public void handle(LocalActivityAttemptTask attemptTask) throws Exception { scheduleToStartFuture != null && !scheduleToStartFuture.cancel(false); LocalActivityExecutionContext executionContext = attemptTask.getExecutionContext(); + executionContext.newAttempt(); PollActivityTaskQueueResponseOrBuilder activityTask = attemptTask.getAttemptTask(); - // if an activity was already completed by any mean like scheduleToClose or scheduleToStart, - // discard this attempt, this execution is completed. - // The scheduleToStartFired check here is a bit overkill, but allows to catch an edge case - // where - // scheduleToStart is already fired, but didn't report a completion yet. - boolean shouldDiscardTheAttempt = scheduleToStartFired || executionContext.isCompleted(); - if (shouldDiscardTheAttempt) { - return; - } - - Scope metricsScope = - workerMetricsScope.tagged( - ImmutableMap.of( - MetricsTag.ACTIVITY_TYPE, - activityTask.getActivityType().getName(), - MetricsTag.WORKFLOW_TYPE, - activityTask.getWorkflowType().getName())); - - MDC.put(LoggerTag.ACTIVITY_ID, activityTask.getActivityId()); - MDC.put(LoggerTag.ACTIVITY_TYPE, activityTask.getActivityType().getName()); - MDC.put(LoggerTag.WORKFLOW_ID, activityTask.getWorkflowExecution().getWorkflowId()); - MDC.put(LoggerTag.WORKFLOW_TYPE, activityTask.getWorkflowType().getName()); - MDC.put(LoggerTag.RUN_ID, activityTask.getWorkflowExecution().getRunId()); try { + // if an activity was already completed by any mean like scheduleToClose or scheduleToStart, + // discard this attempt, this execution is completed. + // The scheduleToStartFired check here is a bit overkill, but allows to catch an edge case + // where scheduleToStart is already fired, but didn't report a completion yet. + boolean shouldDiscardTheAttempt = scheduleToStartFired || executionContext.isCompleted(); + if (shouldDiscardTheAttempt) { + return; + } + + Scope metricsScope = + workerMetricsScope.tagged( + ImmutableMap.of( + MetricsTag.ACTIVITY_TYPE, + activityTask.getActivityType().getName(), + MetricsTag.WORKFLOW_TYPE, + activityTask.getWorkflowType().getName())); + + MDC.put(LoggerTag.ACTIVITY_ID, activityTask.getActivityId()); + MDC.put(LoggerTag.ACTIVITY_TYPE, activityTask.getActivityType().getName()); + MDC.put(LoggerTag.WORKFLOW_ID, activityTask.getWorkflowExecution().getWorkflowId()); + MDC.put(LoggerTag.WORKFLOW_TYPE, activityTask.getWorkflowType().getName()); + MDC.put(LoggerTag.RUN_ID, activityTask.getWorkflowExecution().getRunId()); + + slotSupplier.markSlotUsed( + new LocalActivitySlotInfo( + ActivityPollResponseToInfo.toActivityInfoImpl( + activityTask, namespace, taskQueue, true), + options.getIdentity(), + options.getBuildId()), + executionContext.getPermit()); + ScheduledFuture startToCloseTimeoutFuture = null; if (activityTask.hasStartToCloseTimeout()) { @@ -468,7 +468,10 @@ public void handle(LocalActivityAttemptTask attemptTask) throws Exception { Stopwatch sw = metricsScope.timer(MetricsType.LOCAL_ACTIVITY_EXECUTION_LATENCY).start(); try { activityHandlerResult = - handler.handle(new ActivityTask(activityTask, () -> {}), metricsScope, true); + handler.handle( + new ActivityTask(activityTask, executionContext.getPermit(), () -> {}), + metricsScope, + true); } finally { sw.stop(); } @@ -488,7 +491,7 @@ public void handle(LocalActivityAttemptTask attemptTask) throws Exception { return; } - handleResult(activityHandlerResult, attemptTask, metricsScope); + reason = handleResult(activityHandlerResult, attemptTask, metricsScope); } catch (Throwable ex) { // handleLocalActivity is expected to never throw an exception and return a result // that can be used for a workflow callback if this method throws, it's a bug. @@ -497,6 +500,7 @@ public void handle(LocalActivityAttemptTask attemptTask) throws Exception { processingFailed(activityTask.getActivityId(), activityTask.getAttempt(), ex)); throw ex; } finally { + slotSupplier.releaseSlot(reason, executionContext.getPermit()); MDC.remove(LoggerTag.ACTIVITY_ID); MDC.remove(LoggerTag.ACTIVITY_TYPE); MDC.remove(LoggerTag.WORKFLOW_ID); @@ -505,13 +509,14 @@ public void handle(LocalActivityAttemptTask attemptTask) throws Exception { } } - private void handleResult( + private SlotReleaseReason handleResult( ActivityTaskHandler.Result activityHandlerResult, LocalActivityAttemptTask attemptTask, Scope metricsScope) { LocalActivityExecutionContext executionContext = attemptTask.getExecutionContext(); PollActivityTaskQueueResponseOrBuilder activityTask = attemptTask.getAttemptTask(); int currentAttempt = activityTask.getAttempt(); + SlotReleaseReason releaseReason = SlotReleaseReason.taskComplete(); // Success if (activityHandlerResult.getTaskCompleted() != null) { @@ -528,14 +533,14 @@ private void handleResult( System.currentTimeMillis() - executionContext.getOriginalScheduledTimestamp()); metricsScope.timer(MetricsType.LOCAL_ACTIVITY_SUCCEED_E2E_LATENCY).record(e2eDuration); } - return; + return releaseReason; } // Cancellation if (activityHandlerResult.getTaskCanceled() != null) { executionContext.callback( LocalActivityResult.cancelled(activityHandlerResult, currentAttempt)); - return; + return releaseReason; } // Failure @@ -552,12 +557,14 @@ private void handleResult( executionContext, activityTask, activityHandlerResult.getTaskFailed().getFailure()); if (retryDecision.doNextAttempt()) { + releaseReason = SlotReleaseReason.willRetry(); scheduleNextAttempt( executionContext, Objects.requireNonNull( retryDecision.nextAttemptBackoff, "nextAttemptBackoff is expected to not be null"), executionFailure); } else if (retryDecision.failWorkflowTask()) { + releaseReason = SlotReleaseReason.error(new Exception(executionThrowable)); executionContext.callback( processingFailed(executionContext.getActivityId(), currentAttempt, executionThrowable)); } else { @@ -569,6 +576,7 @@ private void handleResult( executionFailure, retryDecision.nextAttemptBackoff)); } + return releaseReason; } @Override @@ -677,8 +685,7 @@ public boolean start() { options.getIdentity(), new AttemptTaskHandlerImpl(handler), pollerOptions, - options.getTaskExecutorThreadPoolSize(), - workerMetricsScope, + slotSupplier.maximumSlots().orElse(Integer.MAX_VALUE), false); this.workerMetricsScope.counter(MetricsType.WORKER_START_COUNTER).inc(1); @@ -691,6 +698,7 @@ public boolean start() { @Override public CompletableFuture shutdown(ShutdownManager shutdownManager, boolean interruptTasks) { if (activityAttemptTaskExecutor != null && !activityAttemptTaskExecutor.isShutdown()) { + slotQueue.shutdown(); return activityAttemptTaskExecutor .shutdown(shutdownManager, interruptTasks) .thenCompose( diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/PollTaskExecutor.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/PollTaskExecutor.java index 08c65f92f6..38cdfcd21f 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/PollTaskExecutor.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/PollTaskExecutor.java @@ -21,12 +21,9 @@ package io.temporal.internal.worker; import com.google.common.base.Preconditions; -import com.uber.m3.tally.Scope; import io.temporal.internal.logging.LoggerTag; -import io.temporal.worker.MetricsType; import java.util.Objects; import java.util.concurrent.*; -import java.util.concurrent.atomic.AtomicInteger; import javax.annotation.Nonnull; import org.slf4j.MDC; @@ -43,10 +40,8 @@ public interface TaskHandler { private final String identity; private final TaskHandler handler; private final PollerOptions pollerOptions; - private final Scope metricsScope; private final ThreadPoolExecutor taskExecutor; - private final AtomicInteger availableTaskSlots; private final String pollThreadNamePrefix; PollTaskExecutor( @@ -56,14 +51,12 @@ public interface TaskHandler { @Nonnull TaskHandler handler, @Nonnull PollerOptions pollerOptions, int workerTaskSlots, - @Nonnull Scope metricsScope, boolean synchronousQueue) { this.namespace = Objects.requireNonNull(namespace); this.taskQueue = Objects.requireNonNull(taskQueue); this.identity = Objects.requireNonNull(identity); this.handler = Objects.requireNonNull(handler); this.pollerOptions = Objects.requireNonNull(pollerOptions); - this.metricsScope = Objects.requireNonNull(metricsScope); this.taskExecutor = new ThreadPoolExecutor( @@ -79,9 +72,6 @@ public interface TaskHandler { synchronousQueue ? new SynchronousQueue<>() : new LinkedBlockingQueue<>()); this.taskExecutor.allowCoreThreadTimeOut(true); - this.availableTaskSlots = new AtomicInteger(workerTaskSlots); - publishSlotsMetric(); - this.pollThreadNamePrefix = pollerOptions.getPollThreadNamePrefix().replaceFirst("Poller", "Executor"); @@ -97,8 +87,6 @@ public void process(@Nonnull T task) { Preconditions.checkNotNull(task, "task"); taskExecutor.execute( () -> { - availableTaskSlots.decrementAndGet(); - publishSlotsMetric(); try { MDC.put(LoggerTag.NAMESPACE, namespace); MDC.put(LoggerTag.TASK_QUEUE, taskQueue); @@ -117,8 +105,6 @@ public void process(@Nonnull T task) { // throw (Error)e; // } } finally { - availableTaskSlots.incrementAndGet(); - publishSlotsMetric(); MDC.remove(LoggerTag.NAMESPACE); MDC.remove(LoggerTag.TASK_QUEUE); } @@ -156,10 +142,4 @@ public String toString() { // that will simplify such kind of logging through workers. return String.format("PollTaskExecutor{name=%s, identity=%s}", pollThreadNamePrefix, identity); } - - private void publishSlotsMetric() { - this.metricsScope - .gauge(MetricsType.WORKER_TASK_SLOTS_AVAILABLE) - .update(availableTaskSlots.get()); - } } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/ShutdownManager.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/ShutdownManager.java index 61c5c85a29..f790febb42 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/ShutdownManager.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/ShutdownManager.java @@ -68,12 +68,10 @@ public CompletableFuture shutdownExecutorUntimed( return untimedWait(executorToShutdown, executorName); } - public CompletableFuture waitForSemaphorePermitsReleaseUntimed( - Semaphore semaphore, int initialSemaphorePermits, String semaphoreName) { + public CompletableFuture waitForSupplierPermitsReleasedUnlimited( + TrackingSlotSupplier slotSupplier, String name) { CompletableFuture future = new CompletableFuture<>(); - scheduledExecutorService.submit( - new SemaphoreReportingDelayShutdown( - semaphore, initialSemaphorePermits, semaphoreName, future)); + scheduledExecutorService.submit(new SlotSupplierDelayShutdown(slotSupplier, name, future)); return future; } @@ -267,30 +265,25 @@ void onSlowSuccessfulTermination() { } } - private class SemaphoreReportingDelayShutdown extends ReportingDelayShutdown { - private final Semaphore semaphore; - private final int initialSemaphorePermits; - private final String semaphoreName; + private class SlotSupplierDelayShutdown extends ReportingDelayShutdown { + private final TrackingSlotSupplier slotSupplier; + private final String name; - public SemaphoreReportingDelayShutdown( - Semaphore semaphore, - int initialSemaphorePermits, - String semaphoreName, - CompletableFuture promise) { + public SlotSupplierDelayShutdown( + TrackingSlotSupplier supplier, String name, CompletableFuture promise) { super(promise); - this.semaphore = semaphore; - this.initialSemaphorePermits = initialSemaphorePermits; - this.semaphoreName = semaphoreName; + this.slotSupplier = supplier; + this.name = name; } @Override boolean isTerminated() { - return semaphore.availablePermits() == initialSemaphorePermits; + return slotSupplier.getIssuedSlots() == 0; } @Override void onSlowTermination() { - log.warn("Wait for release of slots of {} takes a long time", semaphoreName); + log.warn("Wait for release of slots of {} takes a long time", name); } @Override @@ -298,7 +291,7 @@ void onSuccessfulTermination() {} @Override void onSlowSuccessfulTermination() { - log.warn("All slots of {} were successfully released", semaphoreName); + log.warn("All slots of {} were successfully released", name); } } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/SingleWorkerOptions.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/SingleWorkerOptions.java index 3a6f998ddb..990a9ce46f 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/SingleWorkerOptions.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/SingleWorkerOptions.java @@ -47,13 +47,11 @@ public static final class Builder { private String buildId; private boolean useBuildIdForVersioning; private DataConverter dataConverter; - private int taskExecutorThreadPoolSize = 100; private PollerOptions pollerOptions; private Scope metricsScope; private boolean enableLoggingInReplay; private List contextPropagators; private WorkerInterceptor[] workerInterceptors; - private Duration stickyQueueScheduleToStartTimeout; private long defaultDeadlockDetectionTimeout; private Duration maxHeartbeatThrottleInterval; @@ -70,7 +68,6 @@ private Builder(SingleWorkerOptions options) { this.binaryChecksum = options.getBinaryChecksum(); this.dataConverter = options.getDataConverter(); this.pollerOptions = options.getPollerOptions(); - this.taskExecutorThreadPoolSize = options.getTaskExecutorThreadPoolSize(); this.metricsScope = options.getMetricsScope(); this.enableLoggingInReplay = options.getEnableLoggingInReplay(); this.contextPropagators = options.getContextPropagators(); @@ -100,11 +97,6 @@ public Builder setDataConverter(DataConverter dataConverter) { return this; } - public Builder setTaskExecutorThreadPoolSize(int taskExecutorThreadPoolSize) { - this.taskExecutorThreadPoolSize = taskExecutorThreadPoolSize; - return this; - } - public Builder setPollerOptions(PollerOptions pollerOptions) { this.pollerOptions = pollerOptions; return this; @@ -195,7 +187,6 @@ public SingleWorkerOptions build() { this.buildId, this.useBuildIdForVersioning, dataConverter, - this.taskExecutorThreadPoolSize, pollerOptions, metricsScope, this.enableLoggingInReplay, @@ -214,7 +205,6 @@ public SingleWorkerOptions build() { private final String buildId; private final boolean useBuildIdForVersioning; private final DataConverter dataConverter; - private final int taskExecutorThreadPoolSize; private final PollerOptions pollerOptions; private final Scope metricsScope; private final boolean enableLoggingInReplay; @@ -232,7 +222,6 @@ private SingleWorkerOptions( String buildId, boolean useBuildIdForVersioning, DataConverter dataConverter, - int taskExecutorThreadPoolSize, PollerOptions pollerOptions, Scope metricsScope, boolean enableLoggingInReplay, @@ -248,7 +237,6 @@ private SingleWorkerOptions( this.buildId = buildId; this.useBuildIdForVersioning = useBuildIdForVersioning; this.dataConverter = dataConverter; - this.taskExecutorThreadPoolSize = taskExecutorThreadPoolSize; this.pollerOptions = pollerOptions; this.metricsScope = metricsScope; this.enableLoggingInReplay = enableLoggingInReplay; @@ -289,10 +277,6 @@ public DataConverter getDataConverter() { return dataConverter; } - public int getTaskExecutorThreadPoolSize() { - return taskExecutorThreadPoolSize; - } - public PollerOptions getPollerOptions() { return pollerOptions; } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/SlotReservationData.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/SlotReservationData.java new file mode 100644 index 0000000000..e3378d5c0f --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/SlotReservationData.java @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.worker; + +public class SlotReservationData { + public final String taskQueue; + public final String workerIdentity; + public final String workerBuildId; + + public SlotReservationData(String taskQueue, String workerIdentity, String workerBuildId) { + this.taskQueue = taskQueue; + this.workerIdentity = workerIdentity; + this.workerBuildId = workerBuildId; + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/SyncActivityWorker.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/SyncActivityWorker.java index 8e1537b7f1..5fd7a96e79 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/SyncActivityWorker.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/SyncActivityWorker.java @@ -24,6 +24,8 @@ import io.temporal.internal.activity.ActivityExecutionContextFactoryImpl; import io.temporal.internal.activity.ActivityTaskHandlerImpl; import io.temporal.serviceclient.WorkflowServiceStubs; +import io.temporal.worker.tuning.ActivitySlotInfo; +import io.temporal.worker.tuning.SlotSupplier; import java.time.Duration; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executors; @@ -49,7 +51,8 @@ public SyncActivityWorker( String namespace, String taskQueue, double taskQueueActivitiesPerSecond, - SingleWorkerOptions options) { + SingleWorkerOptions options, + SlotSupplier slotSupplier) { this.identity = options.getIdentity(); this.namespace = namespace; this.taskQueue = taskQueue; @@ -83,7 +86,13 @@ public SyncActivityWorker( options.getContextPropagators()); this.worker = new ActivityWorker( - service, namespace, taskQueue, taskQueueActivitiesPerSecond, options, taskHandler); + service, + namespace, + taskQueue, + taskQueueActivitiesPerSecond, + options, + taskHandler, + slotSupplier); } public void registerActivityImplementations(Object... activitiesImplementation) { diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/SyncWorkflowWorker.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/SyncWorkflowWorker.java index a1ece312f5..5d83efbb8a 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/SyncWorkflowWorker.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/SyncWorkflowWorker.java @@ -34,6 +34,9 @@ import io.temporal.serviceclient.WorkflowServiceStubs; import io.temporal.worker.WorkflowImplementationOptions; import io.temporal.worker.WorkflowTaskDispatchHandle; +import io.temporal.worker.tuning.LocalActivitySlotInfo; +import io.temporal.worker.tuning.SlotSupplier; +import io.temporal.worker.tuning.WorkflowSlotInfo; import io.temporal.workflow.Functions.Func; import java.lang.reflect.Type; import java.time.Duration; @@ -80,7 +83,9 @@ public SyncWorkflowWorker( @Nonnull WorkflowExecutorCache cache, String stickyTaskQueueName, @Nonnull WorkflowThreadExecutor workflowThreadExecutor, - @Nonnull EagerActivityDispatcher eagerActivityDispatcher) { + @Nonnull EagerActivityDispatcher eagerActivityDispatcher, + @Nonnull SlotSupplier slotSupplier, + @Nonnull SlotSupplier laSlotSupplier) { this.identity = singleWorkerOptions.getIdentity(); this.namespace = namespace; this.taskQueue = taskQueue; @@ -104,7 +109,9 @@ public SyncWorkflowWorker( laActivityExecutionContextFactory, localActivityOptions.getWorkerInterceptors(), localActivityOptions.getContextPropagators()); - laWorker = new LocalActivityWorker(namespace, taskQueue, localActivityOptions, laTaskHandler); + laWorker = + new LocalActivityWorker( + namespace, taskQueue, localActivityOptions, laTaskHandler, laSlotSupplier); TaskQueue stickyTaskQueue = null; if (stickyTaskQueueName != null) { stickyTaskQueue = createStickyTaskQueue(stickyTaskQueueName, taskQueue); @@ -131,7 +138,8 @@ public SyncWorkflowWorker( runLocks, cache, taskHandler, - eagerActivityDispatcher); + eagerActivityDispatcher, + slotSupplier); // Exists to support Worker#replayWorkflowExecution functionality. // This handler has to be non-sticky to avoid evicting actual executions from the cache diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/TrackingSlotSupplier.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/TrackingSlotSupplier.java new file mode 100644 index 0000000000..6ee36d6398 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/TrackingSlotSupplier.java @@ -0,0 +1,211 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.worker; + +import com.uber.m3.tally.Scope; +import io.temporal.worker.MetricsType; +import io.temporal.worker.tuning.*; +import java.util.Collections; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Wraps a slot supplier and supplements it with additional tracking information that is useful to + * provide to all implementations. This type is used internally rather than {@link SlotSupplier} + * directly. + * + * @param The slot info type + */ +public class TrackingSlotSupplier { + private final SlotSupplier inner; + private final AtomicInteger issuedSlots = new AtomicInteger(); + private final Map usedSlots = new ConcurrentHashMap<>(); + private final Scope metricsScope; + + public TrackingSlotSupplier(SlotSupplier inner, Scope metricsScope) { + this.inner = inner; + this.metricsScope = metricsScope; + publishSlotsMetric(); + } + + public SlotPermit reserveSlot(SlotReservationData dat) throws InterruptedException { + SlotPermit p = inner.reserveSlot(createCtx(dat)); + issuedSlots.incrementAndGet(); + return p; + } + + public Optional tryReserveSlot(SlotReservationData dat) { + Optional p = inner.tryReserveSlot(createCtx(dat)); + if (p.isPresent()) { + issuedSlots.incrementAndGet(); + } + return p; + } + + public void markSlotUsed(SI slotInfo, SlotPermit permit) { + if (permit == null) { + throw new IllegalArgumentException( + "Permit cannot be null when marking slot as used. This is an SDK bug."); + } + if (usedSlots.put(permit, slotInfo) != null) { + throw new IllegalStateException("Slot is being marked used twice. This is an SDK bug."); + } + inner.markSlotUsed(new SlotMarkUsedContextImpl(slotInfo, permit)); + publishSlotsMetric(); + } + + public void releaseSlot(SlotReleaseReason reason, SlotPermit permit) { + if (permit == null) { + throw new IllegalArgumentException( + "Permit cannot be null when releasing a slot. This is an SDK bug."); + } + SI slotInfo = usedSlots.get(permit); + inner.releaseSlot(new SlotReleaseContextImpl(reason, permit, slotInfo)); + issuedSlots.decrementAndGet(); + usedSlots.remove(permit); + publishSlotsMetric(); + } + + public Optional maximumSlots() { + return inner.getMaximumSlots(); + } + + public int getIssuedSlots() { + return issuedSlots.get(); + } + + Map getUsedSlots() { + return usedSlots; + } + + private void publishSlotsMetric() { + if (maximumSlots().isPresent()) { + this.metricsScope + .gauge(MetricsType.WORKER_TASK_SLOTS_AVAILABLE) + .update(maximumSlots().get() - usedSlots.size()); + } + this.metricsScope.gauge(MetricsType.WORKER_TASK_SLOTS_USED).update(usedSlots.size()); + } + + private SlotReserveContext createCtx(SlotReservationData dat) { + return new SlotReserveContextImpl( + dat.taskQueue, + Collections.unmodifiableMap(usedSlots), + dat.workerIdentity, + dat.workerBuildId, + issuedSlots); + } + + private class SlotReserveContextImpl implements SlotReserveContext { + private final String taskQueue; + private final Map usedSlots; + private final String workerIdentity; + private final String workerBuildId; + private final AtomicInteger issuedSlots; + + private SlotReserveContextImpl( + String taskQueue, + Map usedSlots, + String workerIdentity, + String workerBuildId, + AtomicInteger issuedSlots) { + this.taskQueue = taskQueue; + this.usedSlots = usedSlots; + this.workerIdentity = workerIdentity; + this.workerBuildId = workerBuildId; + this.issuedSlots = issuedSlots; + } + + @Override + public String getTaskQueue() { + return taskQueue; + } + + @Override + public Map getUsedSlots() { + return usedSlots; + } + + @Override + public String getWorkerIdentity() { + return workerIdentity; + } + + @Override + public String getWorkerBuildId() { + return workerBuildId; + } + + @Override + public int getNumIssuedSlots() { + return issuedSlots.get(); + } + } + + private class SlotMarkUsedContextImpl implements SlotMarkUsedContext { + private final SI slotInfo; + private final SlotPermit slotPermit; + + protected SlotMarkUsedContextImpl(SI slotInfo, SlotPermit slotPermit) { + this.slotInfo = slotInfo; + this.slotPermit = slotPermit; + } + + @Override + public SI getSlotInfo() { + return slotInfo; + } + + @Override + public SlotPermit getSlotPermit() { + return slotPermit; + } + } + + private class SlotReleaseContextImpl implements SlotReleaseContext { + private final SlotPermit slotPermit; + private final SlotReleaseReason reason; + private final SI slotInfo; + + protected SlotReleaseContextImpl(SlotReleaseReason reason, SlotPermit slotPermit, SI slotInfo) { + this.slotPermit = slotPermit; + this.reason = reason; + this.slotInfo = slotInfo; + } + + @Override + public SlotReleaseReason getSlotReleaseReason() { + return reason; + } + + @Override + public SlotPermit getSlotPermit() { + return slotPermit; + } + + @Override + public SI getSlotInfo() { + return slotInfo; + } + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowPollTask.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowPollTask.java index f1dcb60ec6..29fff4e51b 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowPollTask.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowPollTask.java @@ -35,8 +35,8 @@ import io.temporal.serviceclient.MetricsTag; import io.temporal.serviceclient.WorkflowServiceStubs; import io.temporal.worker.MetricsType; +import io.temporal.worker.tuning.*; import java.util.Objects; -import java.util.concurrent.Semaphore; import java.util.function.Supplier; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -46,7 +46,7 @@ final class WorkflowPollTask implements Poller.PollTask { private static final Logger log = LoggerFactory.getLogger(WorkflowPollTask.class); - private final Semaphore workflowTaskExecutorSemaphore; + private final TrackingSlotSupplier slotSupplier; private final StickyQueueBalancer stickyQueueBalancer; private final Scope metricsScope; private final Scope stickyMetricsScope; @@ -62,11 +62,11 @@ public WorkflowPollTask( @Nonnull String identity, @Nullable String buildId, boolean useBuildIdForVersioning, - @Nonnull Semaphore workflowTaskExecutorSemaphore, + @Nonnull TrackingSlotSupplier slotSupplier, @Nonnull StickyQueueBalancer stickyQueueBalancer, @Nonnull Scope workerMetricsScope, @Nonnull Supplier serverCapabilities) { - this.workflowTaskExecutorSemaphore = Objects.requireNonNull(workflowTaskExecutorSemaphore); + this.slotSupplier = Objects.requireNonNull(slotSupplier); this.stickyQueueBalancer = Objects.requireNonNull(stickyQueueBalancer); this.metricsScope = Objects.requireNonNull(workerMetricsScope); this.stickyMetricsScope = @@ -120,11 +120,20 @@ public WorkflowPollTask( @Override public WorkflowTask poll() { boolean isSuccessful = false; + SlotPermit permit; try { - workflowTaskExecutorSemaphore.acquire(); + permit = + slotSupplier.reserveSlot( + new SlotReservationData( + pollRequest.getTaskQueue().getName(), + pollRequest.getIdentity(), + pollRequest.getWorkerVersionCapabilities().getBuildId())); } catch (InterruptedException e) { Thread.currentThread().interrupt(); return null; + } catch (Exception e) { + log.warn("Error while trying to reserve a slot for workflow task", e.getCause()); + return null; } TaskQueueKind taskQueueKind = stickyQueueBalancer.makePoll(); @@ -140,10 +149,11 @@ public WorkflowTask poll() { } isSuccessful = true; stickyQueueBalancer.finishPoll(taskQueueKind, response.getBacklogCountHint()); - return new WorkflowTask(response, workflowTaskExecutorSemaphore::release); + slotSupplier.markSlotUsed(new WorkflowSlotInfo(response, pollRequest), permit); + return new WorkflowTask(response, (rr) -> slotSupplier.releaseSlot(rr, permit)); } finally { if (!isSuccessful) { - workflowTaskExecutorSemaphore.release(); + slotSupplier.releaseSlot(SlotReleaseReason.neverUsed(), permit); stickyQueueBalancer.finishPoll(taskQueueKind, 0); } } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowTask.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowTask.java index 7e4462f80f..9946b2e360 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowTask.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowTask.java @@ -21,15 +21,17 @@ package io.temporal.internal.worker; import io.temporal.api.workflowservice.v1.PollWorkflowTaskQueueResponse; +import io.temporal.worker.tuning.SlotReleaseReason; import io.temporal.workflow.Functions; import javax.annotation.Nonnull; public class WorkflowTask { @Nonnull private final PollWorkflowTaskQueueResponse response; - @Nonnull private final Functions.Proc completionCallback; + @Nonnull private final Functions.Proc1 completionCallback; public WorkflowTask( - @Nonnull PollWorkflowTaskQueueResponse response, @Nonnull Functions.Proc completionCallback) { + @Nonnull PollWorkflowTaskQueueResponse response, + @Nonnull Functions.Proc1 completionCallback) { this.response = response; this.completionCallback = completionCallback; } @@ -44,7 +46,7 @@ public PollWorkflowTaskQueueResponse getResponse() { * completed. */ @Nonnull - public Functions.Proc getCompletionCallback() { + public Functions.Proc1 getCompletionCallback() { return completionCallback; } } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowWorker.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowWorker.java index 42a9ae450f..aa5f1d25b5 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowWorker.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowWorker.java @@ -41,11 +41,13 @@ import io.temporal.worker.MetricsType; import io.temporal.worker.WorkerMetricsTag; import io.temporal.worker.WorkflowTaskDispatchHandle; +import io.temporal.worker.tuning.SlotReleaseReason; +import io.temporal.worker.tuning.SlotSupplier; +import io.temporal.worker.tuning.WorkflowSlotInfo; import java.util.Objects; import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.RejectedExecutionException; -import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -69,8 +71,7 @@ final class WorkflowWorker implements SuspendableWorker { private final Scope workerMetricsScope; private final GrpcRetryer grpcRetryer; private final EagerActivityDispatcher eagerActivityDispatcher; - private final int executorSlots; - private final Semaphore executorSlotsSemaphore; + private final TrackingSlotSupplier slotSupplier; private PollTaskExecutor pollTaskExecutor; @@ -89,7 +90,8 @@ public WorkflowWorker( @Nonnull WorkflowRunLockManager runLocks, @Nonnull WorkflowExecutorCache cache, @Nonnull WorkflowTaskHandler handler, - @Nonnull EagerActivityDispatcher eagerActivityDispatcher) { + @Nonnull EagerActivityDispatcher eagerActivityDispatcher, + @Nonnull SlotSupplier slotSupplier) { this.service = Objects.requireNonNull(service); this.namespace = Objects.requireNonNull(namespace); this.taskQueue = Objects.requireNonNull(taskQueue); @@ -103,8 +105,7 @@ public WorkflowWorker( this.handler = Objects.requireNonNull(handler); this.grpcRetryer = new GrpcRetryer(service.getServerCapabilities()); this.eagerActivityDispatcher = eagerActivityDispatcher; - this.executorSlots = options.getTaskExecutorThreadPoolSize(); - this.executorSlotsSemaphore = new Semaphore(executorSlots); + this.slotSupplier = new TrackingSlotSupplier<>(slotSupplier, this.workerMetricsScope); } @Override @@ -117,8 +118,7 @@ public boolean start() { options.getIdentity(), new TaskHandlerImpl(handler), pollerOptions, - options.getTaskExecutorThreadPoolSize(), - workerMetricsScope, + this.slotSupplier.maximumSlots().orElse(Integer.MAX_VALUE), true); stickyQueueBalancer = new StickyQueueBalancer( @@ -135,7 +135,7 @@ public boolean start() { options.getIdentity(), options.getBuildId(), options.isUsingBuildIdForVersioning(), - executorSlotsSemaphore, + slotSupplier, stickyQueueBalancer, workerMetricsScope, service.getServerCapabilities()), @@ -154,7 +154,7 @@ public boolean start() { @Override public CompletableFuture shutdown(ShutdownManager shutdownManager, boolean interruptTasks) { - String semaphoreName = this + "#executorSlotsSemaphore"; + String supplierName = this + "#executorSlots"; boolean stickyQueueBalancerDrainEnabled = !interruptTasks @@ -173,8 +173,8 @@ public CompletableFuture shutdown(ShutdownManager shutdownManager, boolean .thenCompose( ignore -> !interruptTasks - ? shutdownManager.waitForSemaphorePermitsReleaseUntimed( - executorSlotsSemaphore, executorSlots, semaphoreName) + ? shutdownManager.waitForSupplierPermitsReleasedUnlimited( + slotSupplier, supplierName) : CompletableFuture.completedFuture(null)) .thenCompose( ignore -> @@ -240,32 +240,40 @@ private PollerOptions getPollerOptions(SingleWorkerOptions options) { @Nullable public WorkflowTaskDispatchHandle reserveWorkflowExecutor() { - // to avoid pollTaskExecutor to become null inside the lambda, we are caching it here + // to avoid pollTaskExecutor becoming null inside the lambda, we cache it here final PollTaskExecutor executor = pollTaskExecutor; - return executor != null && !isSuspended() && executorSlotsSemaphore.tryAcquire() - ? new WorkflowTaskDispatchHandle( - workflowTask -> { - String queueName = - workflowTask.getResponse().getWorkflowExecutionTaskQueue().getName(); - TaskQueueKind queueKind = - workflowTask.getResponse().getWorkflowExecutionTaskQueue().getKind(); - Preconditions.checkArgument( - this.taskQueue.equals(queueName) - || TaskQueueKind.TASK_QUEUE_KIND_STICKY.equals(queueKind) - && this.stickyTaskQueueName.equals(queueName), - "Got a WFT for a wrong queue %s, expected %s or %s", - queueName, - this.taskQueue, - this.stickyTaskQueueName); - try { - pollTaskExecutor.process(workflowTask); - return true; - } catch (RejectedExecutionException e) { - return false; - } - }, - executorSlotsSemaphore) - : null; + if (executor == null || isSuspended()) { + return null; + } + return slotSupplier + .tryReserveSlot( + new SlotReservationData(taskQueue, options.getIdentity(), options.getBuildId())) + .map( + slotPermit -> + new WorkflowTaskDispatchHandle( + workflowTask -> { + String queueName = + workflowTask.getResponse().getWorkflowExecutionTaskQueue().getName(); + TaskQueueKind queueKind = + workflowTask.getResponse().getWorkflowExecutionTaskQueue().getKind(); + Preconditions.checkArgument( + this.taskQueue.equals(queueName) + || TaskQueueKind.TASK_QUEUE_KIND_STICKY.equals(queueKind) + && this.stickyTaskQueueName.equals(queueName), + "Got a WFT for a wrong queue %s, expected %s or %s", + queueName, + this.taskQueue, + this.stickyTaskQueueName); + try { + pollTaskExecutor.process(workflowTask); + return true; + } catch (RejectedExecutionException e) { + return false; + } + }, + slotSupplier, + slotPermit)) + .orElse(null); } @Override @@ -301,6 +309,7 @@ public void handle(WorkflowTask task) throws Exception { Stopwatch swTotal = workflowTypeScope.timer(MetricsType.WORKFLOW_TASK_EXECUTION_TOTAL_LATENCY).start(); + SlotReleaseReason releaseReason = SlotReleaseReason.taskComplete(); try { if (!Strings.isNullOrEmpty(stickyTaskQueueName)) { // Serialize workflow task processing for a particular workflow run. @@ -377,6 +386,7 @@ public void handle(WorkflowTask task) throws Exception { } } catch (Exception e) { logExceptionDuringResultReporting(e, currentTask, result); + releaseReason = SlotReleaseReason.error(e); // if we failed to report the workflow task completion back to the server, // our cached version of the workflow may be more advanced than the server is aware of. // We should discard this execution and perform a clean replay based on what server @@ -413,12 +423,11 @@ public void handle(WorkflowTask task) throws Exception { } while (nextWFTResponse.isPresent()); } finally { swTotal.stop(); + task.getCompletionCallback().apply(releaseReason); MDC.remove(LoggerTag.WORKFLOW_ID); MDC.remove(LoggerTag.WORKFLOW_TYPE); MDC.remove(LoggerTag.RUN_ID); - task.getCompletionCallback().apply(); - if (locked) { runLocks.unlock(runId); } diff --git a/temporal-sdk/src/main/java/io/temporal/worker/MetricsType.java b/temporal-sdk/src/main/java/io/temporal/worker/MetricsType.java index f3627beb87..e8752ec4fc 100644 --- a/temporal-sdk/src/main/java/io/temporal/worker/MetricsType.java +++ b/temporal-sdk/src/main/java/io/temporal/worker/MetricsType.java @@ -20,6 +20,8 @@ package io.temporal.worker; +import io.temporal.common.Experimental; + public final class MetricsType { private MetricsType() {} @@ -136,6 +138,10 @@ private MetricsType() {} public static final String WORKER_TASK_SLOTS_AVAILABLE = TEMPORAL_METRICS_PREFIX + "worker_task_slots_available"; + @Experimental + public static final String WORKER_TASK_SLOTS_USED = + TEMPORAL_METRICS_PREFIX + "worker_task_slots_used"; + // // Worker Factory // @@ -169,4 +175,13 @@ private MetricsType() {} // gauge public static final String WORKFLOW_ACTIVE_THREAD_COUNT = TEMPORAL_METRICS_PREFIX + "workflow_active_thread_count"; + + // + // Resource tuner + // + // Tagged with namespace & task_queue + public static final String RESOURCE_MEM_USAGE = "resource_slots_mem_usage"; + public static final String RESOURCE_CPU_USAGE = "resource_slots_cpu_usage"; + public static final String RESOURCE_MEM_PID = "resource_slots_mem_pid_output"; + public static final String RESOURCE_CPU_PID = "resource_slots_cpu_pid_output"; } diff --git a/temporal-sdk/src/main/java/io/temporal/worker/Worker.java b/temporal-sdk/src/main/java/io/temporal/worker/Worker.java index 85ea85d71b..619b9fe491 100644 --- a/temporal-sdk/src/main/java/io/temporal/worker/Worker.java +++ b/temporal-sdk/src/main/java/io/temporal/worker/Worker.java @@ -39,6 +39,7 @@ import io.temporal.internal.worker.WorkflowExecutorCache; import io.temporal.serviceclient.MetricsTag; import io.temporal.serviceclient.WorkflowServiceStubs; +import io.temporal.worker.tuning.*; import io.temporal.workflow.Functions.Func; import io.temporal.workflow.WorkflowMethod; import java.time.Duration; @@ -65,7 +66,6 @@ public final class Worker { final SyncWorkflowWorker workflowWorker; final SyncActivityWorker activityWorker; private final AtomicBoolean started = new AtomicBoolean(); - private final Scope metricsScope; /** * Creates worker that connects to an instance of the Temporal Service. @@ -100,20 +100,27 @@ public final class Worker { String namespace = clientOptions.getNamespace(); Map tags = new ImmutableMap.Builder(1).put(MetricsTag.TASK_QUEUE, taskQueue).build(); - this.metricsScope = metricsScope.tagged(tags); + Scope taggedScope = metricsScope.tagged(tags); SingleWorkerOptions activityOptions = toActivityOptions( - factoryOptions, this.options, clientOptions, contextPropagators, this.metricsScope); + factoryOptions, this.options, clientOptions, contextPropagators, taggedScope); if (this.options.isLocalActivityWorkerOnly()) { activityWorker = null; } else { + SlotSupplier activitySlotSupplier = + this.options.getWorkerTuner() == null + ? new FixedSizeSlotSupplier<>(this.options.getMaxConcurrentActivityExecutionSize()) + : this.options.getWorkerTuner().getActivityTaskSlotSupplier(); + attachMetricsToResourceController(taggedScope, activitySlotSupplier); + activityWorker = new SyncActivityWorker( service, namespace, taskQueue, this.options.getMaxTaskQueueActivitiesPerSecond(), - activityOptions); + activityOptions, + activitySlotSupplier); } EagerActivityDispatcher eagerActivityDispatcher = @@ -128,10 +135,21 @@ public final class Worker { clientOptions, taskQueue, contextPropagators, - this.metricsScope); + taggedScope); SingleWorkerOptions localActivityOptions = toLocalActivityOptions( - factoryOptions, this.options, clientOptions, contextPropagators, this.metricsScope); + factoryOptions, this.options, clientOptions, contextPropagators, taggedScope); + + SlotSupplier workflowSlotSupplier = + this.options.getWorkerTuner() == null + ? new FixedSizeSlotSupplier<>(this.options.getMaxConcurrentWorkflowTaskExecutionSize()) + : this.options.getWorkerTuner().getWorkflowTaskSlotSupplier(); + attachMetricsToResourceController(taggedScope, workflowSlotSupplier); + SlotSupplier localActivitySlotSupplier = + this.options.getWorkerTuner() == null + ? new FixedSizeSlotSupplier<>(this.options.getMaxConcurrentLocalActivityExecutionSize()) + : this.options.getWorkerTuner().getLocalActivitySlotSupplier(); + attachMetricsToResourceController(taggedScope, localActivitySlotSupplier); workflowWorker = new SyncWorkflowWorker( service, @@ -143,7 +161,9 @@ public final class Worker { cache, useStickyTaskQueue ? getStickyTaskQueueName(client.getOptions().getIdentity()) : null, workflowThreadExecutor, - eagerActivityDispatcher); + eagerActivityDispatcher, + workflowSlotSupplier, + localActivitySlotSupplier); } /** @@ -508,7 +528,6 @@ private static SingleWorkerOptions toActivityOptions( .setMaximumPollRatePerSecond(options.getMaxWorkerActivitiesPerSecond()) .setPollThreadCount(options.getMaxConcurrentActivityTaskPollers()) .build()) - .setTaskExecutorThreadPoolSize(options.getMaxConcurrentActivityExecutionSize()) .setMetricsScope(metricsScope) .build(); } @@ -541,7 +560,6 @@ private static SingleWorkerOptions toWorkflowWorkerOptions( return toSingleWorkerOptions(factoryOptions, options, clientOptions, contextPropagators) .setPollerOptions( PollerOptions.newBuilder().setPollThreadCount(maxConcurrentWorkflowTaskPollers).build()) - .setTaskExecutorThreadPoolSize(options.getMaxConcurrentWorkflowTaskExecutionSize()) .setStickyQueueScheduleToStartTimeout(stickyQueueScheduleToStartTimeout) .setStickyTaskQueueDrainTimeout(options.getStickyTaskQueueDrainTimeout()) .setDefaultDeadlockDetectionTimeout(options.getDefaultDeadlockDetectionTimeout()) @@ -557,7 +575,6 @@ private static SingleWorkerOptions toLocalActivityOptions( Scope metricsScope) { return toSingleWorkerOptions(factoryOptions, options, clientOptions, contextPropagators) .setPollerOptions(PollerOptions.newBuilder().setPollThreadCount(1).build()) - .setTaskExecutorThreadPoolSize(options.getMaxConcurrentLocalActivityExecutionSize()) .setMetricsScope(metricsScope) .build(); } @@ -591,4 +608,17 @@ private static SingleWorkerOptions.Builder toSingleWorkerOptions( .setMaxHeartbeatThrottleInterval(options.getMaxHeartbeatThrottleInterval()) .setDefaultHeartbeatThrottleInterval(options.getDefaultHeartbeatThrottleInterval()); } + + /** + * If any slot supplier is resource-based, we want to attach a metrics scope to the controller + * (before it's labelled with the worker type). + */ + private static void attachMetricsToResourceController( + Scope metricsScope, SlotSupplier supplier) { + if (supplier instanceof ResourceBasedSlotSupplier) { + ((ResourceBasedSlotSupplier) supplier) + .getResourceController() + .setMetricsScope(metricsScope); + } + } } diff --git a/temporal-sdk/src/main/java/io/temporal/worker/WorkerOptions.java b/temporal-sdk/src/main/java/io/temporal/worker/WorkerOptions.java index 52d5a75bf9..d9fce2e5ec 100644 --- a/temporal-sdk/src/main/java/io/temporal/worker/WorkerOptions.java +++ b/temporal-sdk/src/main/java/io/temporal/worker/WorkerOptions.java @@ -25,6 +25,7 @@ import com.google.common.base.Preconditions; import io.temporal.common.Experimental; import io.temporal.serviceclient.WorkflowServiceStubsOptions; +import io.temporal.worker.tuning.*; import java.time.Duration; import java.util.Objects; import javax.annotation.Nonnull; @@ -82,6 +83,7 @@ public static final class Builder { private String buildId; private boolean useBuildIdForVersioning; private Duration stickyTaskQueueDrainTimeout; + private WorkerTuner workerTuner; private String identity; private Builder() {} @@ -94,6 +96,7 @@ private Builder(WorkerOptions o) { this.maxConcurrentActivityExecutionSize = o.maxConcurrentActivityExecutionSize; this.maxConcurrentWorkflowTaskExecutionSize = o.maxConcurrentWorkflowTaskExecutionSize; this.maxConcurrentLocalActivityExecutionSize = o.maxConcurrentLocalActivityExecutionSize; + this.workerTuner = o.workerTuner; this.maxTaskQueueActivitiesPerSecond = o.maxTaskQueueActivitiesPerSecond; this.maxConcurrentWorkflowTaskPollers = o.maxConcurrentWorkflowTaskPollers; this.maxConcurrentActivityTaskPollers = o.maxConcurrentActivityTaskPollers; @@ -131,6 +134,7 @@ public Builder setMaxWorkerActivitiesPerSecond(double maxWorkerActivitiesPerSeco * @param maxConcurrentActivityExecutionSize Maximum number of activities executed in parallel. * Default is 200, which is chosen if set to zero. * @return {@code this} + *

Note setting is mutually exclusive with {@link #setWorkerTuner(WorkerTuner)} */ public Builder setMaxConcurrentActivityExecutionSize(int maxConcurrentActivityExecutionSize) { if (maxConcurrentActivityExecutionSize < 0) { @@ -148,6 +152,7 @@ public Builder setMaxConcurrentActivityExecutionSize(int maxConcurrentActivityEx * @return {@code this} *

Note that this is not related to the total number of open workflows which do not need * to be loaded in a worker when they are not making state transitions. + *

Note setting is mutually exclusive with {@link #setWorkerTuner(WorkerTuner)} */ public Builder setMaxConcurrentWorkflowTaskExecutionSize( int maxConcurrentWorkflowTaskExecutionSize) { @@ -164,6 +169,7 @@ public Builder setMaxConcurrentWorkflowTaskExecutionSize( * @param maxConcurrentLocalActivityExecutionSize Maximum number of local activities executed in * parallel. Default is 200, which is chosen if set to zero. * @return {@code this} + *

Note setting is mutually exclusive with {@link #setWorkerTuner(WorkerTuner)} */ public Builder setMaxConcurrentLocalActivityExecutionSize( int maxConcurrentLocalActivityExecutionSize) { @@ -371,6 +377,16 @@ public Builder setStickyTaskQueueDrainTimeout(Duration stickyTaskQueueDrainTimeo return this; } + /** + * Set a {@link WorkerTuner} to determine how slots will be allocated for different types of + * tasks. + */ + @Experimental + public Builder setWorkerTuner(WorkerTuner workerTuner) { + this.workerTuner = workerTuner; + return this; + } + /** Override identity of the worker primary specified in a WorkflowClient options. */ public Builder setIdentity(String identity) { this.identity = identity; @@ -383,6 +399,7 @@ public WorkerOptions build() { maxConcurrentActivityExecutionSize, maxConcurrentWorkflowTaskExecutionSize, maxConcurrentLocalActivityExecutionSize, + workerTuner, maxTaskQueueActivitiesPerSecond, maxConcurrentWorkflowTaskPollers, maxConcurrentActivityTaskPollers, @@ -409,6 +426,21 @@ public WorkerOptions validateAndBuildWithDefaults() { Preconditions.checkState( maxConcurrentLocalActivityExecutionSize >= 0, "negative maxConcurrentLocalActivityExecutionSize"); + if (workerTuner != null) { + Preconditions.checkState( + maxConcurrentActivityExecutionSize == 0, + "maxConcurrentActivityExecutionSize must not be set if workerTuner is set"); + } + if (workerTuner != null) { + Preconditions.checkState( + maxConcurrentWorkflowTaskExecutionSize == 0, + "maxConcurrentWorkflowTaskExecutionSize must not be set if workerTuner is set"); + } + if (workerTuner != null) { + Preconditions.checkState( + maxConcurrentLocalActivityExecutionSize == 0, + "maxConcurrentLocalActivityExecutionSize must not be set if workerTuner is set"); + } Preconditions.checkState( maxTaskQueueActivitiesPerSecond >= 0, "negative taskQueueActivitiesPerSecond"); Preconditions.checkState( @@ -441,6 +473,7 @@ public WorkerOptions validateAndBuildWithDefaults() { maxConcurrentLocalActivityExecutionSize == 0 ? DEFAULT_MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE : maxConcurrentLocalActivityExecutionSize, + workerTuner, maxTaskQueueActivitiesPerSecond, maxConcurrentWorkflowTaskPollers == 0 ? DEFAULT_MAX_CONCURRENT_WORKFLOW_TASK_POLLERS @@ -475,6 +508,7 @@ public WorkerOptions validateAndBuildWithDefaults() { private final int maxConcurrentActivityExecutionSize; private final int maxConcurrentWorkflowTaskExecutionSize; private final int maxConcurrentLocalActivityExecutionSize; + private final WorkerTuner workerTuner; private final double maxTaskQueueActivitiesPerSecond; private final int maxConcurrentWorkflowTaskPollers; private final int maxConcurrentActivityTaskPollers; @@ -492,8 +526,9 @@ public WorkerOptions validateAndBuildWithDefaults() { private WorkerOptions( double maxWorkerActivitiesPerSecond, int maxConcurrentActivityExecutionSize, - int maxConcurrentWorkflowExecutionSize, + int maxConcurrentWorkflowTaskExecutionSize, int maxConcurrentLocalActivityExecutionSize, + WorkerTuner workerTuner, double maxTaskQueueActivitiesPerSecond, int workflowPollThreadCount, int activityPollThreadCount, @@ -509,8 +544,9 @@ private WorkerOptions( String identity) { this.maxWorkerActivitiesPerSecond = maxWorkerActivitiesPerSecond; this.maxConcurrentActivityExecutionSize = maxConcurrentActivityExecutionSize; - this.maxConcurrentWorkflowTaskExecutionSize = maxConcurrentWorkflowExecutionSize; + this.maxConcurrentWorkflowTaskExecutionSize = maxConcurrentWorkflowTaskExecutionSize; this.maxConcurrentLocalActivityExecutionSize = maxConcurrentLocalActivityExecutionSize; + this.workerTuner = workerTuner; this.maxTaskQueueActivitiesPerSecond = maxTaskQueueActivitiesPerSecond; this.maxConcurrentWorkflowTaskPollers = workflowPollThreadCount; this.maxConcurrentActivityTaskPollers = activityPollThreadCount; @@ -607,6 +643,10 @@ public Duration getStickyTaskQueueDrainTimeout() { return stickyTaskQueueDrainTimeout; } + public WorkerTuner getWorkerTuner() { + return workerTuner; + } + @Nullable public String getIdentity() { return identity; @@ -617,21 +657,22 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; WorkerOptions that = (WorkerOptions) o; - return compare(that.maxWorkerActivitiesPerSecond, maxWorkerActivitiesPerSecond) == 0 + return compare(maxWorkerActivitiesPerSecond, that.maxWorkerActivitiesPerSecond) == 0 && maxConcurrentActivityExecutionSize == that.maxConcurrentActivityExecutionSize && maxConcurrentWorkflowTaskExecutionSize == that.maxConcurrentWorkflowTaskExecutionSize && maxConcurrentLocalActivityExecutionSize == that.maxConcurrentLocalActivityExecutionSize - && compare(that.maxTaskQueueActivitiesPerSecond, maxTaskQueueActivitiesPerSecond) == 0 + && compare(maxTaskQueueActivitiesPerSecond, that.maxTaskQueueActivitiesPerSecond) == 0 && maxConcurrentWorkflowTaskPollers == that.maxConcurrentWorkflowTaskPollers && maxConcurrentActivityTaskPollers == that.maxConcurrentActivityTaskPollers && localActivityWorkerOnly == that.localActivityWorkerOnly && defaultDeadlockDetectionTimeout == that.defaultDeadlockDetectionTimeout + && disableEagerExecution == that.disableEagerExecution + && useBuildIdForVersioning == that.useBuildIdForVersioning + && Objects.equals(workerTuner, that.workerTuner) && Objects.equals(maxHeartbeatThrottleInterval, that.maxHeartbeatThrottleInterval) && Objects.equals(defaultHeartbeatThrottleInterval, that.defaultHeartbeatThrottleInterval) && Objects.equals(stickyQueueScheduleToStartTimeout, that.stickyQueueScheduleToStartTimeout) - && disableEagerExecution == that.disableEagerExecution - && useBuildIdForVersioning == that.useBuildIdForVersioning - && Objects.equals(that.buildId, buildId) + && Objects.equals(buildId, that.buildId) && Objects.equals(stickyTaskQueueDrainTimeout, that.stickyTaskQueueDrainTimeout) && Objects.equals(identity, that.identity); } @@ -643,6 +684,7 @@ public int hashCode() { maxConcurrentActivityExecutionSize, maxConcurrentWorkflowTaskExecutionSize, maxConcurrentLocalActivityExecutionSize, + workerTuner, maxTaskQueueActivitiesPerSecond, maxConcurrentWorkflowTaskPollers, maxConcurrentActivityTaskPollers, @@ -669,6 +711,8 @@ public String toString() { + maxConcurrentWorkflowTaskExecutionSize + ", maxConcurrentLocalActivityExecutionSize=" + maxConcurrentLocalActivityExecutionSize + + ", workerTuner=" + + workerTuner + ", maxTaskQueueActivitiesPerSecond=" + maxTaskQueueActivitiesPerSecond + ", maxConcurrentWorkflowTaskPollers=" @@ -691,7 +735,8 @@ public String toString() { + useBuildIdForVersioning + ", buildId='" + buildId - + ", stickyTaskQueueDrainTimeout='" + + '\'' + + ", stickyTaskQueueDrainTimeout=" + stickyTaskQueueDrainTimeout + ", identity=" + identity diff --git a/temporal-sdk/src/main/java/io/temporal/worker/WorkflowTaskDispatchHandle.java b/temporal-sdk/src/main/java/io/temporal/worker/WorkflowTaskDispatchHandle.java index 9189d65668..a8fba8b5f9 100644 --- a/temporal-sdk/src/main/java/io/temporal/worker/WorkflowTaskDispatchHandle.java +++ b/temporal-sdk/src/main/java/io/temporal/worker/WorkflowTaskDispatchHandle.java @@ -22,9 +22,12 @@ import com.google.common.base.Preconditions; import io.temporal.api.workflowservice.v1.PollWorkflowTaskQueueResponse; +import io.temporal.internal.worker.TrackingSlotSupplier; import io.temporal.internal.worker.WorkflowTask; +import io.temporal.worker.tuning.SlotPermit; +import io.temporal.worker.tuning.SlotReleaseReason; +import io.temporal.worker.tuning.WorkflowSlotInfo; import java.io.Closeable; -import java.util.concurrent.Semaphore; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Function; import javax.annotation.Nonnull; @@ -32,17 +35,20 @@ public class WorkflowTaskDispatchHandle implements Closeable { private final AtomicBoolean completed = new AtomicBoolean(); private final Function dispatchCallback; - private final Semaphore executorSlotsSemaphore; + private final TrackingSlotSupplier slotSupplier; + private final SlotPermit permit; /** * @param dispatchCallback callback into a {@code WorkflowWorker} to dispatch a workflow task. - * @param executorSlotsSemaphore worker executor slots semaphore that was used to reserve this - * dispatch handle on + * @param slotSupplier slot supplier that was used to reserve a slot for this workflow task */ public WorkflowTaskDispatchHandle( - DispatchCallback dispatchCallback, Semaphore executorSlotsSemaphore) { + DispatchCallback dispatchCallback, + TrackingSlotSupplier slotSupplier, + SlotPermit permit) { this.dispatchCallback = dispatchCallback; - this.executorSlotsSemaphore = executorSlotsSemaphore; + this.slotSupplier = slotSupplier; + this.permit = permit; } /** @@ -55,7 +61,7 @@ public boolean dispatch(@Nonnull PollWorkflowTaskQueueResponse workflowTask) { Preconditions.checkNotNull(workflowTask, "workflowTask"); if (completed.compareAndSet(false, true)) { return dispatchCallback.apply( - new WorkflowTask(workflowTask, executorSlotsSemaphore::release)); + new WorkflowTask(workflowTask, (rr) -> slotSupplier.releaseSlot(rr, permit))); } else { return false; } @@ -64,7 +70,7 @@ public boolean dispatch(@Nonnull PollWorkflowTaskQueueResponse workflowTask) { @Override public void close() { if (completed.compareAndSet(false, true)) { - executorSlotsSemaphore.release(); + slotSupplier.releaseSlot(SlotReleaseReason.neverUsed(), permit); } } diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/ActivitySlotInfo.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ActivitySlotInfo.java new file mode 100644 index 0000000000..2a86e4a656 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ActivitySlotInfo.java @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.activity.ActivityInfo; +import io.temporal.common.Experimental; +import java.util.Objects; + +/** Contains information about a slot that is being used to execute an activity task. */ +@Experimental +public class ActivitySlotInfo extends SlotInfo { + private final ActivityInfo activityInfo; + private final String workerIdentity; + private final String workerBuildId; + + public ActivitySlotInfo(ActivityInfo activityInfo, String workerIdentity, String workerBuildId) { + this.activityInfo = activityInfo; + this.workerIdentity = workerIdentity; + this.workerBuildId = workerBuildId; + } + + public ActivityInfo getActivityInfo() { + return activityInfo; + } + + public String getWorkerIdentity() { + return workerIdentity; + } + + public String getWorkerBuildId() { + return workerBuildId; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ActivitySlotInfo that = (ActivitySlotInfo) o; + return Objects.equals(activityInfo, that.activityInfo) + && Objects.equals(workerIdentity, that.workerIdentity) + && Objects.equals(workerBuildId, that.workerBuildId); + } + + @Override + public int hashCode() { + return Objects.hash(activityInfo, workerIdentity, workerBuildId); + } + + @Override + public String toString() { + return "ActivitySlotInfo{" + + "activityInfo=" + + activityInfo + + ", workerIdentity='" + + workerIdentity + + '\'' + + ", workerBuildId='" + + workerBuildId + + '\'' + + '}'; + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/CompositeTuner.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/CompositeTuner.java new file mode 100644 index 0000000000..679050493c --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/CompositeTuner.java @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.common.Experimental; +import java.util.Objects; +import javax.annotation.Nonnull; + +/** + * Can be used to create a {@link WorkerTuner} which uses specific {@link SlotSupplier}s for each + * type of slot. + */ +@Experimental +public class CompositeTuner implements WorkerTuner { + private final @Nonnull SlotSupplier workflowTaskSlotSupplier; + private final @Nonnull SlotSupplier activityTaskSlotSupplier; + private final @Nonnull SlotSupplier localActivitySlotSupplier; + + public CompositeTuner( + @Nonnull SlotSupplier workflowTaskSlotSupplier, + @Nonnull SlotSupplier activityTaskSlotSupplier, + @Nonnull SlotSupplier localActivitySlotSupplier) { + this.workflowTaskSlotSupplier = Objects.requireNonNull(workflowTaskSlotSupplier); + this.activityTaskSlotSupplier = Objects.requireNonNull(activityTaskSlotSupplier); + this.localActivitySlotSupplier = Objects.requireNonNull(localActivitySlotSupplier); + + // All resource-based slot suppliers must use the same controller + validateResourceController(workflowTaskSlotSupplier, activityTaskSlotSupplier); + validateResourceController(workflowTaskSlotSupplier, localActivitySlotSupplier); + validateResourceController(activityTaskSlotSupplier, localActivitySlotSupplier); + } + + @Nonnull + @Override + public SlotSupplier getWorkflowTaskSlotSupplier() { + return workflowTaskSlotSupplier; + } + + @Nonnull + @Override + public SlotSupplier getActivityTaskSlotSupplier() { + return activityTaskSlotSupplier; + } + + @Nonnull + @Override + public SlotSupplier getLocalActivitySlotSupplier() { + return localActivitySlotSupplier; + } + + private void validateResourceController( + @Nonnull SlotSupplier supplier1, @Nonnull SlotSupplier supplier2) { + if (supplier1 instanceof ResourceBasedSlotSupplier + && supplier2 instanceof ResourceBasedSlotSupplier) { + ResourceBasedController controller1 = + ((ResourceBasedSlotSupplier) supplier1).getResourceController(); + ResourceBasedController controller2 = + ((ResourceBasedSlotSupplier) supplier2).getResourceController(); + if (controller1 != controller2) { + throw new IllegalArgumentException( + "All resource-based slot suppliers must use the same ResourceController"); + } + } + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/FixedSizeSlotSupplier.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/FixedSizeSlotSupplier.java new file mode 100644 index 0000000000..e9b08d3441 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/FixedSizeSlotSupplier.java @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import com.google.common.base.Preconditions; +import java.util.Optional; +import java.util.concurrent.*; + +/** + * This implementation of {@link SlotSupplier} provides a fixed number of slots backed by a + * semaphore, and is the default behavior when a custom supplier is not explicitly specified. + * + * @param The slot info type for this supplier. + */ +public class FixedSizeSlotSupplier implements SlotSupplier { + private final int numSlots; + private final Semaphore executorSlotsSemaphore; + + public FixedSizeSlotSupplier(int numSlots) { + Preconditions.checkArgument(numSlots > 0, "FixedSizeSlotSupplier must have at least one slot"); + this.numSlots = numSlots; + executorSlotsSemaphore = new Semaphore(numSlots); + } + + @Override + public SlotPermit reserveSlot(SlotReserveContext ctx) throws InterruptedException { + executorSlotsSemaphore.acquire(); + return new SlotPermit(); + } + + @Override + public Optional tryReserveSlot(SlotReserveContext ctx) { + boolean gotOne = executorSlotsSemaphore.tryAcquire(); + if (gotOne) { + return Optional.of(new SlotPermit()); + } + return Optional.empty(); + } + + @Override + public void markSlotUsed(SlotMarkUsedContext ctx) {} + + @Override + public void releaseSlot(SlotReleaseContext ctx) { + executorSlotsSemaphore.release(); + } + + @Override + public Optional getMaximumSlots() { + return Optional.of(numSlots); + } + + @Override + public String toString() { + return "FixedSizeSlotSupplier{" + "numSlots=" + numSlots + '}'; + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/JVMSystemResourceInfo.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/JVMSystemResourceInfo.java new file mode 100644 index 0000000000..ca20d2473c --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/JVMSystemResourceInfo.java @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import com.sun.management.OperatingSystemMXBean; +import io.temporal.common.Experimental; +import java.lang.management.ManagementFactory; +import java.time.Instant; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +/** {@link SystemResourceInfo} implementation that uses JVM-specific APIs to get resource usage. */ +@Experimental +public class JVMSystemResourceInfo implements SystemResourceInfo { + // As of relatively recent Java versions (including backports), this class will properly deal with + // containerized environments as well as running on bare metal. + // See https://bugs.openjdk.org/browse/JDK-8226575 for more details on which versions the fixes + // have been backported to. + OperatingSystemMXBean osBean = ManagementFactory.getPlatformMXBean(OperatingSystemMXBean.class); + + private final Lock refreshLock = new ReentrantLock(); + private SystemInfo lastSystemInfo; + + @Override + public double getCPUUsagePercent() { + return refresh().cpuUsagePercent; + } + + @Override + public double getMemoryUsagePercent() { + return refresh().memoryUsagePercent; + } + + @SuppressWarnings("deprecation") // deprecated APIs needed since replacements are for Java 14+ + private SystemInfo refresh() { + + refreshLock.lock(); + try { + if (lastSystemInfo == null + || Instant.now().isAfter(lastSystemInfo.refreshed.plusMillis(100))) { + // This can return NaN seemingly when usage is very low + double lastCpuUsage = osBean.getSystemCpuLoad(); + if (lastCpuUsage < 0 || Double.isNaN(lastCpuUsage)) { + lastCpuUsage = 0; + } + + Runtime runtime = Runtime.getRuntime(); + long jvmUsedMemory = runtime.totalMemory() - runtime.freeMemory(); + long jvmMaxMemory = runtime.maxMemory(); + + double lastMemUsage = ((double) jvmUsedMemory / jvmMaxMemory); + Instant lastRefresh = Instant.now(); + lastSystemInfo = new SystemInfo(lastRefresh, lastCpuUsage, lastMemUsage); + } + } finally { + refreshLock.unlock(); + } + + return lastSystemInfo; + } + + private static class SystemInfo { + private final Instant refreshed; + private final double cpuUsagePercent; + private final double memoryUsagePercent; + + private SystemInfo(Instant refreshed, double cpuUsagePercent, double memoryUsagePercent) { + this.refreshed = refreshed; + this.cpuUsagePercent = cpuUsagePercent; + this.memoryUsagePercent = memoryUsagePercent; + } + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/LocalActivitySlotInfo.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/LocalActivitySlotInfo.java new file mode 100644 index 0000000000..6e22358664 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/LocalActivitySlotInfo.java @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.activity.ActivityInfo; +import io.temporal.common.Experimental; +import java.util.Objects; + +/** Contains information about a slot that is being used to execute a local activity. */ +@Experimental +public class LocalActivitySlotInfo extends SlotInfo { + private final ActivityInfo activityInfo; + private final String workerIdentity; + private final String workerBuildId; + + public LocalActivitySlotInfo(ActivityInfo info, String workerIdentity, String workerBuildId) { + this.activityInfo = info; + this.workerIdentity = workerIdentity; + this.workerBuildId = workerBuildId; + } + + public ActivityInfo getActivityInfo() { + return activityInfo; + } + + public String getWorkerIdentity() { + return workerIdentity; + } + + public String getWorkerBuildId() { + return workerBuildId; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + LocalActivitySlotInfo that = (LocalActivitySlotInfo) o; + return Objects.equals(activityInfo, that.activityInfo) + && Objects.equals(workerIdentity, that.workerIdentity) + && Objects.equals(workerBuildId, that.workerBuildId); + } + + @Override + public int hashCode() { + return Objects.hash(activityInfo, workerIdentity, workerBuildId); + } + + @Override + public String toString() { + return "LocalActivitySlotInfo{" + + "activityInfo=" + + activityInfo + + ", workerIdentity='" + + workerIdentity + + '\'' + + ", workerBuildId='" + + workerBuildId + + '\'' + + '}'; + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/PIDController.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/PIDController.java new file mode 100644 index 0000000000..d9135bae22 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/PIDController.java @@ -0,0 +1,186 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +/** + * A simple PID closed control loop.
+ *
+ * License : MIT + * + * @author Charles Grassin + */ +class PIDController { + // PID coefficients + private double setPoint; + private double kP, kI, kD; + + /** Limit bound of the output. */ + private double minLimit = Double.NaN, maxLimit = Double.NaN; + + // Dynamic variables + private double previousTime = Double.NaN; + private double lastError = 0; + private double integralError = 0; + + /** + * Constructs a new PID with set coefficients. + * + * @param setPoint The initial target value. + * @param kP The proportional gain coefficient. + * @param kI The integral gain coefficient. + * @param kD The derivative gain coefficient. + */ + PIDController(final double setPoint, final double kP, final double kI, final double kD) { + this.setSetpoint(setPoint); + this.kP = kP; + this.kI = kI; + this.kD = kD; + } + + /** + * Updates the controller with the current time and value and outputs the PID controller output. + * + * @param currentTime The current time (in arbitrary time unit, such as seconds). If the PID is + * assumed to run at a constant frequency, you can simply put '1'. + * @param currentValue The current, measured value. + * @return The PID controller output. + */ + double getOutput(final double currentTime, final double currentValue) { + final double error = setPoint - currentValue; + final double dt = (!Double.isNaN(previousTime)) ? (currentTime - previousTime) : 0; + + // Compute Integral & Derivative error + final double derivativeError = (dt != 0) ? ((error - lastError) / dt) : 0; + integralError += error * dt; + + // Save history + previousTime = currentTime; + lastError = error; + + return checkLimits((kP * error) + (kI * integralError) + (kD * derivativeError)); + } + + /** Resets the integral and derivative errors. */ + void reset() { + previousTime = 0; + lastError = 0; + integralError = 0; + } + + /** + * Bounds the PID output between the lower limit and the upper limit. + * + * @param output The target output value. + * @return The output value, bounded to the limits. + */ + private double checkLimits(final double output) { + if (!Double.isNaN(minLimit) && output < minLimit) return minLimit; + else if (!Double.isNaN(maxLimit) && output > maxLimit) return maxLimit; + else return output; + } + + // Getters & Setters + + /** + * Sets the output limits of the PID controller. If the minLimit is superior to the maxLimit, it + * will use the smallest as the minLimit. + * + * @param minLimit The lower limit of the PID output. + * @param maxLimit The upper limit of the PID output. + */ + void setOuputLimits(final double minLimit, final double maxLimit) { + if (minLimit < maxLimit) { + this.minLimit = minLimit; + this.maxLimit = maxLimit; + } else { + this.minLimit = maxLimit; + this.maxLimit = minLimit; + } + } + + /** Removes the output limits of the PID controller */ + void removeOuputLimits() { + this.minLimit = Double.NaN; + this.maxLimit = Double.NaN; + } + + /** + * @return the kP parameter + */ + public double getkP() { + return kP; + } + + /** + * @param kP the kP parameter to set + */ + void setkP(double kP) { + this.kP = kP; + reset(); + } + + /** + * @return the kI parameter + */ + double getkI() { + return kI; + } + + /** + * @param kI the kI parameter to set + */ + void setkI(double kI) { + this.kI = kI; + reset(); + } + + /** + * @return the kD parameter + */ + double getkD() { + return kD; + } + + /** + * @param kD the kD parameter to set + */ + void setkD(double kD) { + this.kD = kD; + reset(); + } + + /** + * @return the setPoint + */ + double getSetPoint() { + return setPoint; + } + + /** + * Establishes a new set point for the PID controller. + * + * @param setPoint The new target point. + */ + void setSetpoint(final double setPoint) { + reset(); + this.setPoint = setPoint; + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedController.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedController.java new file mode 100644 index 0000000000..111df0de3a --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedController.java @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import com.uber.m3.tally.Gauge; +import com.uber.m3.tally.Scope; +import io.temporal.common.Experimental; +import io.temporal.worker.MetricsType; +import java.time.Instant; +import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.ReentrantLock; + +/** + * Is used by {@link ResourceBasedSlotSupplier} and {@link ResourceBasedTuner} to make decisions + * about whether slots should be handed out based on system resource usage. + */ +@Experimental +public class ResourceBasedController { + public final ResourceBasedControllerOptions options; + + private final ReentrantLock decisionLock = new ReentrantLock(); + private final PIDController memoryController; + private final PIDController cpuController; + private final SystemResourceInfo systemInfoSupplier; + private Instant lastPidRefresh = Instant.now(); + + private final AtomicReference metrics = new AtomicReference<>(); + + /** + * Construct a controller with the given options. If you want to use resource-based tuning for all + * slot suppliers, prefer {@link ResourceBasedTuner}. + */ + public static ResourceBasedController newSystemInfoController( + ResourceBasedControllerOptions options) { + return new ResourceBasedController(options, new JVMSystemResourceInfo()); + } + + /** + * Construct a controller with the given options and system info supplier. Users should prefer + * {@link #newSystemInfoController(ResourceBasedControllerOptions)}. + */ + public ResourceBasedController( + ResourceBasedControllerOptions options, SystemResourceInfo systemInfoSupplier) { + this.options = options; + this.systemInfoSupplier = systemInfoSupplier; + this.memoryController = + new PIDController( + options.getTargetCPUUsage(), + options.getMemoryPGain(), + options.getMemoryIGain(), + options.getMemoryDGain()); + this.cpuController = + new PIDController( + options.getTargetCPUUsage(), + options.getCpuPGain(), + options.getCpuIGain(), + options.getCpuDGain()); + } + + /** + * @return True if the PID controllers & and other constraints would allow another slot + */ + boolean pidDecision() { + decisionLock.lock(); + try { + double memoryUsage = systemInfoSupplier.getMemoryUsagePercent(); + double cpuUsage = systemInfoSupplier.getCPUUsagePercent(); + double memoryOutput = + memoryController.getOutput(lastPidRefresh.getEpochSecond(), memoryUsage); + double cpuOutput = cpuController.getOutput(lastPidRefresh.getEpochSecond(), cpuUsage); + lastPidRefresh = Instant.now(); + + Metrics metrics = this.metrics.get(); + if (metrics != null) { + metrics.memUsage.update(memoryUsage); + metrics.cpuUsage.update(cpuUsage); + metrics.memPidOut.update(memoryOutput); + metrics.cpuPidOut.update(cpuOutput); + } + + return memoryOutput > options.getMemoryOutputThreshold() + && cpuOutput > options.getCpuOutputThreshold() + && canReserve(); + } finally { + decisionLock.unlock(); + } + } + + private boolean canReserve() { + return systemInfoSupplier.getMemoryUsagePercent() < options.getTargetMemoryUsage(); + } + + /** Visible for internal usage. Can only be set once. */ + public void setMetricsScope(Scope metricsScope) { + if (metrics.get() == null) { + metrics.set(new Metrics(metricsScope)); + } + } + + private static class Metrics { + private final Gauge memUsage; + private final Gauge cpuUsage; + private final Gauge memPidOut; + private final Gauge cpuPidOut; + + private Metrics(Scope scope) { + memUsage = scope.gauge(MetricsType.RESOURCE_MEM_USAGE); + cpuUsage = scope.gauge(MetricsType.RESOURCE_CPU_USAGE); + memPidOut = scope.gauge(MetricsType.RESOURCE_MEM_PID); + cpuPidOut = scope.gauge(MetricsType.RESOURCE_CPU_PID); + } + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedControllerOptions.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedControllerOptions.java new file mode 100644 index 0000000000..47ca105315 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedControllerOptions.java @@ -0,0 +1,172 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import com.google.common.base.Preconditions; +import io.temporal.common.Experimental; + +/** Options for a {@link ResourceBasedController} */ +@Experimental +public class ResourceBasedControllerOptions { + + public static ResourceBasedControllerOptions.Builder newBuilder( + double targetMemoryUsage, double targetCPUUsage) { + return new ResourceBasedControllerOptions.Builder() + .setTargetMemoryUsage(targetMemoryUsage) + .setTargetCPUUsage(targetCPUUsage); + } + + public static final class Builder { + private double targetMemoryUsage; + private double targetCPUUsage; + private double memoryPGain = 5; + private double memoryIGain = 0; + private double memoryDGain = 1; + private double memoryOutputThreshold = 0.25; + private double cpuPGain = 5; + private double cpuIGain = 0; + private double cpuDGain = 1; + private double cpuOutputThreshold = 0.05; + + public Builder setTargetMemoryUsage(double targetMemoryUsage) { + this.targetMemoryUsage = targetMemoryUsage; + return this; + } + + public Builder setTargetCPUUsage(double targetCPUUsage) { + this.targetCPUUsage = targetCPUUsage; + return this; + } + + public Builder setMemoryPGain(double memoryPGain) { + this.memoryPGain = memoryPGain; + return this; + } + + public Builder setMemoryIGain(double memoryIGain) { + this.memoryIGain = memoryIGain; + return this; + } + + public Builder setMemoryDGain(double memoryDGain) { + this.memoryDGain = memoryDGain; + return this; + } + + public Builder setMemoryOutputThreshold(double memoryOutputThreshold) { + this.memoryOutputThreshold = memoryOutputThreshold; + return this; + } + + public Builder setCpuPGain(double cpuPGain) { + this.cpuPGain = cpuPGain; + return this; + } + + public Builder setCpuIGain(double cpuIGain) { + this.cpuIGain = cpuIGain; + return this; + } + + public Builder setCpuDGain(double cpuDGain) { + this.cpuDGain = cpuDGain; + return this; + } + + public Builder setCpuOutputThreshold(double cpuOutputThreshold) { + this.cpuOutputThreshold = cpuOutputThreshold; + return this; + } + + public ResourceBasedControllerOptions build() { + Preconditions.checkState( + targetMemoryUsage > 0, "targetMemoryUsage must be set and greater than 0"); + Preconditions.checkState(targetCPUUsage > 0, "targetCPUUsage must be set and greater than 0"); + return new ResourceBasedControllerOptions(this); + } + } + + private final double targetMemoryUsage; + private final double targetCPUUsage; + + private final double memoryPGain; + private final double memoryIGain; + private final double memoryDGain; + private final double memoryOutputThreshold; + + private final double cpuPGain; + private final double cpuIGain; + private final double cpuDGain; + private final double cpuOutputThreshold; + + private ResourceBasedControllerOptions(Builder builder) { + this.targetMemoryUsage = builder.targetMemoryUsage; + this.targetCPUUsage = builder.targetCPUUsage; + this.memoryPGain = builder.memoryPGain; + this.memoryIGain = builder.memoryIGain; + this.memoryDGain = builder.memoryDGain; + this.memoryOutputThreshold = builder.memoryOutputThreshold; + this.cpuPGain = builder.cpuPGain; + this.cpuIGain = builder.cpuIGain; + this.cpuDGain = builder.cpuDGain; + this.cpuOutputThreshold = builder.cpuOutputThreshold; + } + + public double getTargetMemoryUsage() { + return targetMemoryUsage; + } + + public double getTargetCPUUsage() { + return targetCPUUsage; + } + + public double getMemoryPGain() { + return memoryPGain; + } + + public double getMemoryIGain() { + return memoryIGain; + } + + public double getMemoryDGain() { + return memoryDGain; + } + + public double getMemoryOutputThreshold() { + return memoryOutputThreshold; + } + + public double getCpuPGain() { + return cpuPGain; + } + + public double getCpuIGain() { + return cpuIGain; + } + + public double getCpuDGain() { + return cpuDGain; + } + + public double getCpuOutputThreshold() { + return cpuOutputThreshold; + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedSlotOptions.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedSlotOptions.java new file mode 100644 index 0000000000..3dad1ea39f --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedSlotOptions.java @@ -0,0 +1,127 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.common.Experimental; +import java.time.Duration; +import java.util.Objects; + +/** Options resource-based slot suppliers */ +@Experimental +public class ResourceBasedSlotOptions { + private final int minimumSlots; + private final int maximumSlots; + private final Duration rampThrottle; + + public static Builder newBuilder() { + return new Builder(); + } + + public static final class Builder { + private int minimumSlots; + private int maximumSlots; + private Duration rampThrottle; + + private Builder() {} + + /** + * @param minimumSlots minimum number of slots that will be issued without any resource checks + */ + public Builder setMinimumSlots(int minimumSlots) { + this.minimumSlots = minimumSlots; + return this; + } + + /** + * @param maximumSlots maximum number of slots that will ever be issued + */ + public Builder setMaximumSlots(int maximumSlots) { + this.maximumSlots = maximumSlots; + return this; + } + + /** + * @param rampThrottle time to wait between slot issuance. This value matters because how many + * resources a task will use cannot be determined ahead of time, and thus the system should + * wait to see how much resources are used before issuing more slots. + */ + public Builder setRampThrottle(Duration rampThrottle) { + this.rampThrottle = rampThrottle; + return this; + } + + public ResourceBasedSlotOptions build() { + return new ResourceBasedSlotOptions(minimumSlots, maximumSlots, rampThrottle); + } + } + + /** + * @param minimumSlots minimum number of slots that will be issued without any resource checks + * @param maximumSlots maximum number of slots that will ever be issued + * @param rampThrottle time to wait between slot issuance. This value matters because how many + * resources a task will use cannot be determined ahead of time, and thus the system should + * wait to see how much resources are used before issuing more slots. + */ + private ResourceBasedSlotOptions(int minimumSlots, int maximumSlots, Duration rampThrottle) { + this.minimumSlots = minimumSlots; + this.maximumSlots = maximumSlots; + this.rampThrottle = rampThrottle; + } + + public int getMinimumSlots() { + return minimumSlots; + } + + public int getMaximumSlots() { + return maximumSlots; + } + + public Duration getRampThrottle() { + return rampThrottle; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ResourceBasedSlotOptions that = (ResourceBasedSlotOptions) o; + return minimumSlots == that.minimumSlots + && maximumSlots == that.maximumSlots + && Objects.equals(rampThrottle, that.rampThrottle); + } + + @Override + public int hashCode() { + return Objects.hash(minimumSlots, maximumSlots, rampThrottle); + } + + @Override + public String toString() { + return "ResourceBasedSlotOptions{" + + "minimumSlots=" + + minimumSlots + + ", maximumSlots=" + + maximumSlots + + ", rampThrottle=" + + rampThrottle + + '}'; + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedSlotSupplier.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedSlotSupplier.java new file mode 100644 index 0000000000..b01ed9dbb8 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedSlotSupplier.java @@ -0,0 +1,165 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.common.Experimental; +import java.time.Duration; +import java.time.Instant; +import java.util.Optional; + +/** Implements a {@link SlotSupplier} based on resource usage for a particular slot type. */ +@Experimental +public class ResourceBasedSlotSupplier implements SlotSupplier { + + private final ResourceBasedController resourceController; + private final ResourceBasedSlotOptions options; + private Instant lastSlotIssuedAt = Instant.EPOCH; + + /** + * Construct a slot supplier for workflow tasks with the given resource controller and options. + * + *

The resource controller must be the same among all slot suppliers in a worker. If you want + * to use resource-based tuning for all slot suppliers, prefer {@link ResourceBasedTuner}. + */ + public static ResourceBasedSlotSupplier createForWorkflow( + ResourceBasedController resourceBasedController, ResourceBasedSlotOptions options) { + return new ResourceBasedSlotSupplier<>( + WorkflowSlotInfo.class, resourceBasedController, options); + } + + /** + * Construct a slot supplier for activity tasks with the given resource controller and options. + * + *

The resource controller must be the same among all slot suppliers in a worker. If you want + * to use resource-based tuning for all slot suppliers, prefer {@link ResourceBasedTuner}. + */ + public static ResourceBasedSlotSupplier createForActivity( + ResourceBasedController resourceBasedController, ResourceBasedSlotOptions options) { + return new ResourceBasedSlotSupplier<>( + ActivitySlotInfo.class, resourceBasedController, options); + } + + /** + * Construct a slot supplier for local activities with the given resource controller and options. + * + *

The resource controller must be the same among all slot suppliers in a worker. If you want + * to use resource-based tuning for all slot suppliers, prefer {@link ResourceBasedTuner}. + */ + public static ResourceBasedSlotSupplier createForLocalActivity( + ResourceBasedController resourceBasedController, ResourceBasedSlotOptions options) { + return new ResourceBasedSlotSupplier<>( + LocalActivitySlotInfo.class, resourceBasedController, options); + } + + private ResourceBasedSlotSupplier( + Class clazz, + ResourceBasedController resourceBasedController, + ResourceBasedSlotOptions options) { + this.resourceController = resourceBasedController; + // Merge default options for any unset fields + if (WorkflowSlotInfo.class.isAssignableFrom(clazz)) { + this.options = + ResourceBasedSlotOptions.newBuilder() + .setMinimumSlots( + options.getMinimumSlots() == 0 + ? ResourceBasedTuner.DEFAULT_WORKFLOW_SLOT_OPTIONS.getMinimumSlots() + : options.getMinimumSlots()) + .setMaximumSlots( + options.getMaximumSlots() == 0 + ? ResourceBasedTuner.DEFAULT_WORKFLOW_SLOT_OPTIONS.getMaximumSlots() + : options.getMaximumSlots()) + .setRampThrottle( + options.getRampThrottle() == null + ? ResourceBasedTuner.DEFAULT_WORKFLOW_SLOT_OPTIONS.getRampThrottle() + : options.getRampThrottle()) + .build(); + } else { + this.options = + ResourceBasedSlotOptions.newBuilder() + .setMinimumSlots( + options.getMinimumSlots() == 0 + ? ResourceBasedTuner.DEFAULT_ACTIVITY_SLOT_OPTIONS.getMinimumSlots() + : options.getMinimumSlots()) + .setMaximumSlots( + options.getMaximumSlots() == 0 + ? ResourceBasedTuner.DEFAULT_ACTIVITY_SLOT_OPTIONS.getMaximumSlots() + : options.getMaximumSlots()) + .setRampThrottle( + options.getRampThrottle() == null + ? ResourceBasedTuner.DEFAULT_ACTIVITY_SLOT_OPTIONS.getRampThrottle() + : options.getRampThrottle()) + .build(); + } + } + + @Override + public SlotPermit reserveSlot(SlotReserveContext ctx) throws InterruptedException { + while (true) { + if (ctx.getNumIssuedSlots() < options.getMinimumSlots()) { + return new SlotPermit(); + } else { + Duration mustWaitFor; + try { + mustWaitFor = options.getRampThrottle().minus(timeSinceLastSlotIssued()); + } catch (ArithmeticException e) { + mustWaitFor = Duration.ZERO; + } + if (mustWaitFor.compareTo(Duration.ZERO) > 0) { + Thread.sleep(mustWaitFor.toMillis()); + } + + Optional permit = tryReserveSlot(ctx); + if (permit.isPresent()) { + return permit.get(); + } else { + Thread.sleep(10); + } + } + } + } + + @Override + public Optional tryReserveSlot(SlotReserveContext ctx) { + int numIssued = ctx.getNumIssuedSlots(); + if (numIssued < options.getMinimumSlots() + || (timeSinceLastSlotIssued().compareTo(options.getRampThrottle()) > 0 + && numIssued < options.getMaximumSlots() + && resourceController.pidDecision())) { + lastSlotIssuedAt = Instant.now(); + return Optional.of(new SlotPermit()); + } + return Optional.empty(); + } + + @Override + public void markSlotUsed(SlotMarkUsedContext ctx) {} + + @Override + public void releaseSlot(SlotReleaseContext ctx) {} + + public ResourceBasedController getResourceController() { + return resourceController; + } + + private Duration timeSinceLastSlotIssued() { + return Duration.between(lastSlotIssuedAt, Instant.now()); + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedTuner.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedTuner.java new file mode 100644 index 0000000000..47ad34e191 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedTuner.java @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.common.Experimental; +import java.time.Duration; +import javax.annotation.Nonnull; + +/** A {@link WorkerTuner} that attempts to allocate slots based on available system resources. */ +@Experimental +public class ResourceBasedTuner implements WorkerTuner { + public static final ResourceBasedSlotOptions DEFAULT_WORKFLOW_SLOT_OPTIONS = + ResourceBasedSlotOptions.newBuilder() + .setMinimumSlots(5) + .setMaximumSlots(500) + .setRampThrottle(Duration.ZERO) + .build(); + public static final ResourceBasedSlotOptions DEFAULT_ACTIVITY_SLOT_OPTIONS = + ResourceBasedSlotOptions.newBuilder() + .setMinimumSlots(1) + .setMaximumSlots(1000) + .setRampThrottle(Duration.ofMillis(50)) + .build(); + + private final ResourceBasedController controller; + private final ResourceBasedSlotOptions workflowSlotOptions; + private final ResourceBasedSlotOptions activitySlotOptions; + private final ResourceBasedSlotOptions localActivitySlotOptions; + + public static Builder newBuilder() { + return new Builder(); + } + + public static final class Builder { + private ResourceBasedControllerOptions controllerOptions; + private @Nonnull ResourceBasedSlotOptions workflowSlotOptions = DEFAULT_WORKFLOW_SLOT_OPTIONS; + private @Nonnull ResourceBasedSlotOptions activitySlotOptions = DEFAULT_ACTIVITY_SLOT_OPTIONS; + private @Nonnull ResourceBasedSlotOptions localActivitySlotOptions = + DEFAULT_ACTIVITY_SLOT_OPTIONS; + + private Builder() {} + + public Builder setControllerOptions(ResourceBasedControllerOptions controllerOptions) { + this.controllerOptions = controllerOptions; + return this; + } + + /** + * Set the slot options for workflow tasks. Has no effect after the worker using this tuner + * starts. + * + *

Defaults to minimum 5 slots, maximum 500 slots, and no ramp throttle. + */ + public Builder setWorkflowSlotOptions(@Nonnull ResourceBasedSlotOptions workflowSlotOptions) { + this.workflowSlotOptions = workflowSlotOptions; + return this; + } + + /** + * Set the slot options for activity tasks. Has no effect after the worker using this tuner + * starts. + * + *

Defaults to minimum 1 slot, maximum 1000 slots, and 50ms ramp throttle. + */ + public Builder setActivitySlotOptions(@Nonnull ResourceBasedSlotOptions activitySlotOptions) { + this.activitySlotOptions = activitySlotOptions; + return this; + } + + /** + * Set the slot options for local activity tasks. Has no effect after the worker using this + * tuner starts. + * + *

Defaults to minimum 1 slot, maximum 1000 slots, and 50ms ramp throttle. + */ + public Builder setLocalActivitySlotOptions( + @Nonnull ResourceBasedSlotOptions localActivitySlotOptions) { + this.localActivitySlotOptions = localActivitySlotOptions; + return this; + } + + public ResourceBasedTuner build() { + return new ResourceBasedTuner( + controllerOptions, workflowSlotOptions, activitySlotOptions, localActivitySlotOptions); + } + } + + /** + * @param controllerOptions options for the {@link ResourceBasedController} used by this tuner + */ + public ResourceBasedTuner( + ResourceBasedControllerOptions controllerOptions, + ResourceBasedSlotOptions workflowSlotOptions, + ResourceBasedSlotOptions activitySlotOptions, + ResourceBasedSlotOptions localActivitySlotOptions) { + this.controller = ResourceBasedController.newSystemInfoController(controllerOptions); + this.workflowSlotOptions = workflowSlotOptions; + this.activitySlotOptions = activitySlotOptions; + this.localActivitySlotOptions = localActivitySlotOptions; + } + + @Nonnull + @Override + public SlotSupplier getWorkflowTaskSlotSupplier() { + return ResourceBasedSlotSupplier.createForWorkflow(controller, workflowSlotOptions); + } + + @Nonnull + @Override + public SlotSupplier getActivityTaskSlotSupplier() { + return ResourceBasedSlotSupplier.createForActivity(controller, activitySlotOptions); + } + + @Nonnull + @Override + public SlotSupplier getLocalActivitySlotSupplier() { + return ResourceBasedSlotSupplier.createForLocalActivity(controller, localActivitySlotOptions); + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotInfo.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotInfo.java new file mode 100644 index 0000000000..16c4d17fec --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotInfo.java @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.common.Experimental; + +/** The base class that all slot info types used by {@link SlotSupplier} extend. */ +@Experimental +public abstract class SlotInfo { + SlotInfo() {} +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotMarkUsedContext.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotMarkUsedContext.java new file mode 100644 index 0000000000..24ecba9d1b --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotMarkUsedContext.java @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.common.Experimental; + +@Experimental +public interface SlotMarkUsedContext { + /** + * @return The information associated with the slot that is being marked as used. + */ + SI getSlotInfo(); + + /** + * @return The previously reserved permit that is being used with this slot. + */ + SlotPermit getSlotPermit(); +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotPermit.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotPermit.java new file mode 100644 index 0000000000..4fd625cba7 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotPermit.java @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.common.Experimental; + +/** + * This class is handed out by implementations of {@link SlotSupplier}. Permits are held until the + * tasks they are associated with (if any) are finished processing, or if the reservation is no + * longer needed. Your supplier implementation may store additional data in the permit, if desired. + * + *

When {@link SlotSupplier#releaseSlot(SlotReleaseContext)} is called, the exact same instance + * of the permit is passed back to the supplier. + */ +@Experimental +public final class SlotPermit { + public final Object userData; + + public SlotPermit() { + this.userData = null; + } + + public SlotPermit(Object userData) { + this.userData = userData; + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotReleaseContext.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotReleaseContext.java new file mode 100644 index 0000000000..bf9600e99a --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotReleaseContext.java @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.common.Experimental; +import javax.annotation.Nullable; + +@Experimental +public interface SlotReleaseContext { + /** + * @return The reason the slot is being released. + */ + SlotReleaseReason getSlotReleaseReason(); + + /** + * @return The permit the slot was using that is now being released. + */ + SlotPermit getSlotPermit(); + + /** + * @return The information associated with the slot that is being released. May be null if the + * slot was never marked as used. + */ + @Nullable + SI getSlotInfo(); +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotReleaseReason.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotReleaseReason.java new file mode 100644 index 0000000000..9181dfb754 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotReleaseReason.java @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.common.Experimental; +import javax.annotation.Nullable; + +@Experimental +public abstract class SlotReleaseReason { + SlotReleaseReason() {} + + public static SlotReleaseReason taskComplete() { + return new TaskComplete(); + } + + public static SlotReleaseReason willRetry() { + return new WillRetry(); + } + + public static SlotReleaseReason neverUsed() { + return new NeverUsed(); + } + + public static SlotReleaseReason error(Exception exception) { + return new Error(exception); + } + + public boolean isError() { + return false; + } + + /** + * @return the exception that caused the slot to be released, if this is a reason of type {@link + * Error}. + */ + public @Nullable Exception getException() { + return null; + } + + /** The slot was released because the task was completed (regardless of status). */ + public static class TaskComplete extends SlotReleaseReason {} + + /** The slot was released because the task will be retried. */ + public static class WillRetry extends SlotReleaseReason {} + + /** The slot was released because it was never needed. */ + public static class NeverUsed extends SlotReleaseReason {} + + /** + * The slot was released because some error was encountered before the slot could be used to + * actually process the task. + */ + public static class Error extends SlotReleaseReason { + private final Exception exception; + + private Error(Exception exception) { + this.exception = exception; + } + + @Override + public boolean isError() { + return true; + } + + @Override + public Exception getException() { + return exception; + } + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotReserveContext.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotReserveContext.java new file mode 100644 index 0000000000..57007d57d7 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotReserveContext.java @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.common.Experimental; +import java.util.Map; + +@Experimental +public interface SlotReserveContext { + /** + * @return the Task Queue for which this reservation request is associated. + */ + String getTaskQueue(); + + /** + * @return A read-only & safe for concurrent access mapping of slot permits to the information + * associated with the in-use slot. This map is changed internally any time new slots are + * used. + */ + Map getUsedSlots(); + + /** + * @return The worker's identity that is associated with this reservation request. + */ + String getWorkerIdentity(); + + /** + * @return The worker's build ID that is associated with this reservation request. + */ + String getWorkerBuildId(); + + /** + * @return The number of currently outstanding slot permits of this type, whether used or not. + */ + int getNumIssuedSlots(); +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotSupplier.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotSupplier.java new file mode 100644 index 0000000000..37c1f0ab77 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotSupplier.java @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.common.Experimental; +import java.util.Optional; + +/** + * A SlotSupplier is responsible for managing the number of slots available for a given type of + * task. The three types of tasks are workflow, activity, and local activity. Implementing this + * interface allows you to carefully control how many tasks of any given type a worker will process + * at once. + * + * @param The type of information that will be used to reserve a slot. The three info types are + * {@link WorkflowSlotInfo}, {@link ActivitySlotInfo}, and {@link LocalActivitySlotInfo}. + */ +@Experimental +public interface SlotSupplier { + /** + * This function is called before polling for new tasks. Your implementation should block until a + * slot is available then return a permit to use that slot. + * + * @param ctx The context for slot reservation. + * @return A permit to use the slot which may be populated with your own data. + * @throws InterruptedException The worker may choose to interrupt the thread in order to cancel + * the reservation, or during shutdown. You may perform cleanup, and then should rethrow the + * exception. + */ + SlotPermit reserveSlot(SlotReserveContext ctx) throws InterruptedException; + + /** + * This function is called when trying to reserve slots for "eager" workflow and activity tasks. + * Eager tasks are those which are returned as a result of completing a workflow task, rather than + * from polling. Your implementation must not block, and If a slot is available, return a permit + * to use that slot. + * + * @param ctx The context for slot reservation. + * @return Maybe a permit to use the slot which may be populated with your own data. + */ + Optional tryReserveSlot(SlotReserveContext ctx); + + /** + * This function is called once a slot is actually being used to process some task, which may be + * some time after the slot was reserved originally. For example, if there is no work for a + * worker, a number of slots equal to the number of active pollers may already be reserved, but + * none of them are being used yet. This call should be non-blocking. + * + * @param ctx The context for marking a slot as used. + */ + void markSlotUsed(SlotMarkUsedContext ctx); + + /** + * This function is called once a permit is no longer needed. This could be because the task has + * finished, whether successfully or not, or because the slot was no longer needed (ex: the number + * of active pollers decreased). This call should be non-blocking. + * + * @param ctx The context for releasing a slot. + */ + void releaseSlot(SlotReleaseContext ctx); + + /** + * Because we currently use thread pools to execute tasks, there must be *some* defined + * upper-limit on the size of the thread pool for each kind of task. You must not hand out more + * permits than this number. If unspecified, the default is {@link Integer#MAX_VALUE}. Be aware + * that if your implementation hands out unreasonable numbers of permits, you could easily + * oversubscribe the worker, and cause it to run out of resources. + * + *

If a non-empty value is returned, it is assumed to be meaningful, and the worker will emit + * {@link io.temporal.worker.MetricsType#WORKER_TASK_SLOTS_AVAILABLE} metrics based on this value. + * + *

This value should never change during the lifetime of the supplier. + * + * @return the maximum number of slots that can ever be in use at one type for this slot type. + */ + default Optional getMaximumSlots() { + return Optional.empty(); + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/SystemResourceInfo.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SystemResourceInfo.java new file mode 100644 index 0000000000..ff5cf6a071 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SystemResourceInfo.java @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.common.Experimental; + +/** Implementors determine how resource usage is measured. */ +@Experimental +public interface SystemResourceInfo { + /** + * @return System-wide CPU usage as a percentage [0.0, 1.0] + */ + double getCPUUsagePercent(); + + /** + * @return Memory usage as a percentage [0.0, 1.0]. Memory usage should reflect either system-wide + * usage or JVM-specific usage, whichever is higher, to avoid running out of memory in either + * way. + */ + double getMemoryUsagePercent(); +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/WorkerTuner.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/WorkerTuner.java new file mode 100644 index 0000000000..a25099569c --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/WorkerTuner.java @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.common.Experimental; +import javax.annotation.Nonnull; + +/** WorkerTuners allow for the dynamic customization of some aspects of worker configuration. */ +@Experimental +public interface WorkerTuner { + /** + * @return A {@link SlotSupplier} for workflow tasks. + */ + @Nonnull + SlotSupplier getWorkflowTaskSlotSupplier(); + + /** + * @return A {@link SlotSupplier} for activity tasks. + */ + @Nonnull + SlotSupplier getActivityTaskSlotSupplier(); + + /** + * @return A {@link SlotSupplier} for local activities. + */ + @Nonnull + SlotSupplier getLocalActivitySlotSupplier(); +} diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/WorkflowSlotInfo.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/WorkflowSlotInfo.java new file mode 100644 index 0000000000..0995603a9e --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/WorkflowSlotInfo.java @@ -0,0 +1,145 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker.tuning; + +import io.temporal.api.enums.v1.TaskQueueKind; +import io.temporal.api.workflowservice.v1.PollWorkflowTaskQueueRequest; +import io.temporal.api.workflowservice.v1.PollWorkflowTaskQueueResponse; +import io.temporal.common.Experimental; +import java.util.Objects; +import javax.annotation.Nonnull; + +/** Contains information about a slot that is being used to execute a workflow task. */ +@Experimental +public class WorkflowSlotInfo extends SlotInfo { + private final String workflowType; + private final String taskQueue; + private final String workflowId; + private final String runId; + private final String workerIdentity; + private final String workerBuildId; + private final boolean fromStickyQueue; + + /** Don't rely on this constructor. It is for internal use by the SDK. */ + public WorkflowSlotInfo( + @Nonnull PollWorkflowTaskQueueResponse response, + @Nonnull PollWorkflowTaskQueueRequest request) { + this.workflowType = response.getWorkflowType().getName(); + this.taskQueue = request.getTaskQueue().getNormalName(); + this.workflowId = response.getWorkflowExecution().getWorkflowId(); + this.runId = response.getWorkflowExecution().getRunId(); + this.workerIdentity = request.getIdentity(); + this.workerBuildId = request.getWorkerVersionCapabilities().getBuildId(); + this.fromStickyQueue = request.getTaskQueue().getKind() == TaskQueueKind.TASK_QUEUE_KIND_STICKY; + } + + /** Don't rely on this constructor. It is for internal use by the SDK. */ + public WorkflowSlotInfo( + String workflowType, + String taskQueue, + String workflowId, + String runId, + String workerIdentity, + String workerBuildId, + boolean fromStickyQueue) { + this.workflowType = workflowType; + this.taskQueue = taskQueue; + this.workflowId = workflowId; + this.runId = runId; + this.workerIdentity = workerIdentity; + this.workerBuildId = workerBuildId; + this.fromStickyQueue = fromStickyQueue; + } + + public String getWorkflowType() { + return workflowType; + } + + public String getWorkflowId() { + return workflowId; + } + + public String getRunId() { + return runId; + } + + public String getTaskQueue() { + return taskQueue; + } + + public String getWorkerIdentity() { + return workerIdentity; + } + + public String getWorkerBuildId() { + return workerBuildId; + } + + public boolean isFromStickyQueue() { + return fromStickyQueue; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + WorkflowSlotInfo that = (WorkflowSlotInfo) o; + return fromStickyQueue == that.fromStickyQueue + && Objects.equals(workflowType, that.workflowType) + && Objects.equals(taskQueue, that.taskQueue) + && Objects.equals(workflowId, that.workflowId) + && Objects.equals(runId, that.runId) + && Objects.equals(workerIdentity, that.workerIdentity) + && Objects.equals(workerBuildId, that.workerBuildId); + } + + @Override + public int hashCode() { + return Objects.hash( + workflowType, taskQueue, workflowId, runId, workerIdentity, workerBuildId, fromStickyQueue); + } + + @Override + public String toString() { + return "WorkflowSlotInfo{" + + "workflowType='" + + workflowType + + '\'' + + ", taskQueue='" + + taskQueue + + '\'' + + ", workflowId='" + + workflowId + + '\'' + + ", runId='" + + runId + + '\'' + + ", workerIdentity='" + + workerIdentity + + '\'' + + ", workerBuildId='" + + workerBuildId + + '\'' + + ", fromStickyQueue=" + + fromStickyQueue + + '}'; + } +} diff --git a/temporal-sdk/src/test/java/io/temporal/internal/worker/SlotSupplierTest.java b/temporal-sdk/src/test/java/io/temporal/internal/worker/SlotSupplierTest.java new file mode 100644 index 0000000000..8a393bd7ed --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/internal/worker/SlotSupplierTest.java @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.worker; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.*; + +import com.google.protobuf.ByteString; +import com.uber.m3.tally.RootScopeBuilder; +import com.uber.m3.tally.Scope; +import io.temporal.api.common.v1.WorkflowExecution; +import io.temporal.api.common.v1.WorkflowType; +import io.temporal.api.workflowservice.v1.GetSystemInfoResponse; +import io.temporal.api.workflowservice.v1.PollWorkflowTaskQueueResponse; +import io.temporal.api.workflowservice.v1.WorkflowServiceGrpc; +import io.temporal.common.reporter.TestStatsReporter; +import io.temporal.serviceclient.WorkflowServiceStubs; +import io.temporal.worker.tuning.*; +import java.util.Objects; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.mockito.stubbing.OngoingStubbing; + +@RunWith(Parameterized.class) +public class SlotSupplierTest { + private final TestStatsReporter reporter = new TestStatsReporter(); + private static final String WORKFLOW_ID = "test-workflow-id"; + private static final String RUN_ID = "test-run-id"; + private static final String WORKFLOW_TYPE = "test-workflow-type"; + private static final String TASK_QUEUE = "test-task-queue"; + + @Parameterized.Parameter public boolean throwOnPoll; + + @Parameterized.Parameters() + public static Object[] data() { + return new Object[][] {{true}, {false}}; + } + + @Test + public void supplierIsCalledAppropriately() throws InterruptedException, TimeoutException { + WorkflowServiceStubs client = mock(WorkflowServiceStubs.class); + when(client.getServerCapabilities()) + .thenReturn(() -> GetSystemInfoResponse.Capabilities.newBuilder().build()); + WorkflowServiceGrpc.WorkflowServiceBlockingStub blockingStub = + mock(WorkflowServiceGrpc.WorkflowServiceBlockingStub.class); + when(client.blockingStub()).thenReturn(blockingStub); + when(blockingStub.withOption(any(), any())).thenReturn(blockingStub); + + SlotSupplier mockSupplier = mock(SlotSupplier.class); + AtomicInteger usedSlotsWhenCalled = new AtomicInteger(-1); + when(mockSupplier.reserveSlot( + argThat( + src -> { + usedSlotsWhenCalled.set(src.getUsedSlots().size()); + return true; + }))) + .thenReturn(new SlotPermit()); + + StickyQueueBalancer stickyQueueBalancer = new StickyQueueBalancer(5, true); + Scope metricsScope = + new RootScopeBuilder() + .reporter(reporter) + .reportEvery(com.uber.m3.util.Duration.ofMillis(1)); + TrackingSlotSupplier trackingSS = + new TrackingSlotSupplier<>(mockSupplier, metricsScope); + + WorkflowPollTask poller = + new WorkflowPollTask( + client, + "default", + TASK_QUEUE, + "stickytaskqueue", + "", + "", + false, + trackingSS, + stickyQueueBalancer, + metricsScope, + () -> GetSystemInfoResponse.Capabilities.newBuilder().build()); + + PollWorkflowTaskQueueResponse pollResponse = + PollWorkflowTaskQueueResponse.newBuilder() + .setTaskToken(ByteString.copyFrom("token", UTF_8)) + .setWorkflowExecution( + WorkflowExecution.newBuilder().setWorkflowId(WORKFLOW_ID).setRunId(RUN_ID).build()) + .setWorkflowType(WorkflowType.newBuilder().setName(WORKFLOW_TYPE).build()) + .build(); + + OngoingStubbing pollMock = + when(blockingStub.pollWorkflowTaskQueue(any())); + if (throwOnPoll) { + pollMock.thenThrow(new RuntimeException("Poll failed")); + } else { + pollMock.thenReturn(pollResponse); + } + + if (throwOnPoll) { + assertThrows(RuntimeException.class, poller::poll); + verify(mockSupplier, times(1)).reserveSlot(any()); + verify(mockSupplier, times(1)).releaseSlot(any()); + assertEquals(0, trackingSS.getUsedSlots().size()); + } else { + WorkflowTask task = poller.poll(); + assertNotNull(task); + // We can't test this in the verifier, since it will get an up-to-date reference to the map + // where the slot *is* used. + assertEquals(0, usedSlotsWhenCalled.get()); + verify(mockSupplier, times(1)) + .reserveSlot(argThat(arg -> Objects.equals(arg.getTaskQueue(), TASK_QUEUE))); + verify(mockSupplier, times(0)).releaseSlot(any()); + assertEquals(1, trackingSS.getUsedSlots().size()); + } + } +} diff --git a/temporal-sdk/src/test/java/io/temporal/internal/worker/StickyQueueBacklogTest.java b/temporal-sdk/src/test/java/io/temporal/internal/worker/StickyQueueBacklogTest.java index 67a325d40a..1c48ebd7dc 100644 --- a/temporal-sdk/src/test/java/io/temporal/internal/worker/StickyQueueBacklogTest.java +++ b/temporal-sdk/src/test/java/io/temporal/internal/worker/StickyQueueBacklogTest.java @@ -41,7 +41,8 @@ import io.temporal.api.workflowservice.v1.WorkflowServiceGrpc; import io.temporal.common.reporter.TestStatsReporter; import io.temporal.serviceclient.WorkflowServiceStubs; -import java.util.concurrent.Semaphore; +import io.temporal.worker.tuning.FixedSizeSlotSupplier; +import io.temporal.worker.tuning.WorkflowSlotInfo; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -72,13 +73,14 @@ public void stickyQueueBacklogResetTest() { when(client.blockingStub()).thenReturn(blockingStub); when(blockingStub.withOption(any(), any())).thenReturn(blockingStub); - Semaphore executorSlotsSemaphore = new Semaphore(10); StickyQueueBalancer stickyQueueBalancer = new StickyQueueBalancer(2, true); - Scope metricsScope = new RootScopeBuilder() .reporter(reporter) .reportEvery(com.uber.m3.util.Duration.ofMillis(1)); + TrackingSlotSupplier slotSupplier = + new TrackingSlotSupplier<>(new FixedSizeSlotSupplier<>(10), metricsScope); + WorkflowPollTask poller = new WorkflowPollTask( client, @@ -88,7 +90,7 @@ public void stickyQueueBacklogResetTest() { "", "", false, - executorSlotsSemaphore, + slotSupplier, stickyQueueBalancer, metricsScope, () -> GetSystemInfoResponse.Capabilities.newBuilder().build()); diff --git a/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotGrpcInterceptedTests.java b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotGrpcInterceptedTests.java new file mode 100644 index 0000000000..274a066b2f --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotGrpcInterceptedTests.java @@ -0,0 +1,261 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.worker; + +import static org.junit.Assert.assertEquals; + +import com.uber.m3.tally.RootScopeBuilder; +import com.uber.m3.tally.Scope; +import com.uber.m3.util.ImmutableMap; +import io.grpc.*; +import io.temporal.activity.ActivityOptions; +import io.temporal.api.workflowservice.v1.WorkflowServiceGrpc; +import io.temporal.client.WorkflowClient; +import io.temporal.client.WorkflowOptions; +import io.temporal.common.RetryOptions; +import io.temporal.common.reporter.TestStatsReporter; +import io.temporal.serviceclient.WorkflowServiceStubsOptions; +import io.temporal.testUtils.CountingSlotSupplier; +import io.temporal.testing.internal.SDKTestWorkflowRule; +import io.temporal.worker.MetricsType; +import io.temporal.worker.WorkerOptions; +import io.temporal.worker.tuning.ActivitySlotInfo; +import io.temporal.worker.tuning.CompositeTuner; +import io.temporal.worker.tuning.LocalActivitySlotInfo; +import io.temporal.worker.tuning.WorkflowSlotInfo; +import io.temporal.workflow.Workflow; +import io.temporal.workflow.shared.TestActivities; +import io.temporal.workflow.shared.TestWorkflows; +import java.time.Duration; +import java.util.Collections; +import java.util.Map; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +public class WorkflowSlotGrpcInterceptedTests { + private final int MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE = 100; + private final int MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE = 1000; + private final int MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE = 10000; + private final CountingSlotSupplier workflowTaskSlotSupplier = + new CountingSlotSupplier<>(MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE); + private final CountingSlotSupplier activityTaskSlotSupplier = + new CountingSlotSupplier<>(MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE); + private final CountingSlotSupplier localActivitySlotSupplier = + new CountingSlotSupplier<>(MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); + private final TestStatsReporter reporter = new TestStatsReporter(); + private static final MaybeFailWFTResponseInterceptor MAYBE_FAIL_INTERCEPTOR = + new MaybeFailWFTResponseInterceptor(); + Scope metricsScope = + new RootScopeBuilder().reporter(reporter).reportEvery(com.uber.m3.util.Duration.ofMillis(1)); + + @Rule + public SDKTestWorkflowRule testWorkflowRule = + SDKTestWorkflowRule.newBuilder() + .setWorkflowServiceStubsOptions( + WorkflowServiceStubsOptions.newBuilder() + .setGrpcClientInterceptors(Collections.singletonList(MAYBE_FAIL_INTERCEPTOR)) + .build()) + .setWorkerOptions( + WorkerOptions.newBuilder() + .setWorkerTuner( + new CompositeTuner( + workflowTaskSlotSupplier, + activityTaskSlotSupplier, + localActivitySlotSupplier)) + .build()) + .setMetricsScope(metricsScope) + .setActivityImplementations(new TestActivities.TestActivitiesImpl()) + .setWorkflowTypes(UnblockableWorkflow.class) + .setDoNotStart(true) + .build(); + + @Before + public void setup() { + reporter.flush(); + MAYBE_FAIL_INTERCEPTOR.reset(); + } + + @After + public void tearDown() { + testWorkflowRule.getTestEnvironment().close(); + assertEquals( + workflowTaskSlotSupplier.reservedCount.get(), workflowTaskSlotSupplier.releasedCount.get()); + assertEquals( + activityTaskSlotSupplier.reservedCount.get(), activityTaskSlotSupplier.releasedCount.get()); + assertEquals( + localActivitySlotSupplier.reservedCount.get(), + localActivitySlotSupplier.releasedCount.get()); + } + + public static class UnblockableWorkflow implements TestWorkflows.TestSignaledWorkflow { + private boolean unblocked = false; + + private final TestActivities.VariousTestActivities activities = + Workflow.newActivityStub( + TestActivities.VariousTestActivities.class, + ActivityOptions.newBuilder() + .setStartToCloseTimeout(Duration.ofSeconds(10)) + // If the task comes eagerly, since the interceptor blows us up we drop it + // and that can cause timeouts. + .setDisableEagerExecution(true) + .setRetryOptions(RetryOptions.newBuilder().setMaximumAttempts(1).build()) + .validateAndBuildWithDefaults()); + + @Override + public String execute() { + Workflow.await(() -> unblocked); + activities.activity(); + return ""; + } + + @Override + public void signal(String arg) { + unblocked = true; + } + } + + @Test + public void TestWFTResponseFailsThenWorks() { + testWorkflowRule.getTestEnvironment().start(); + WorkflowClient client = testWorkflowRule.getWorkflowClient(); + TestWorkflows.TestSignaledWorkflow workflow = + client.newWorkflowStub( + TestWorkflows.TestSignaledWorkflow.class, + WorkflowOptions.newBuilder() + .setTaskQueue(testWorkflowRule.getTaskQueue()) + // If the task comes eagerly, since the interceptor blows us up we drop it + // and that can cause timeouts. + .setDisableEagerExecution(true) + .validateBuildWithDefaults()); + WorkflowClient.start(workflow::execute); + workflow.signal("whatever"); + workflow.execute(); + // All slots should be available + assertAllSlotsAvailable(); + } + + @Test + public void TestActivityResponseFailsThenWorks() { + MAYBE_FAIL_INTERCEPTOR.failActivity = true; + testWorkflowRule.getTestEnvironment().start(); + WorkflowClient client = testWorkflowRule.getWorkflowClient(); + TestWorkflows.TestSignaledWorkflow workflow = + client.newWorkflowStub( + TestWorkflows.TestSignaledWorkflow.class, + WorkflowOptions.newBuilder() + .setTaskQueue(testWorkflowRule.getTaskQueue()) + .validateBuildWithDefaults()); + WorkflowClient.start(workflow::execute); + workflow.signal("whatever"); + workflow.execute(); + // All slots should be available + assertAllSlotsAvailable(); + } + + private void assertAllSlotsAvailable() { + try { + // There can be a delay in metrics emission, another option if this + // is too flaky is to poll the metrics. + Thread.sleep(100); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + reporter.assertGauge( + MetricsType.WORKER_TASK_SLOTS_AVAILABLE, + getWorkerTags("WorkflowWorker"), + MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE); + reporter.assertGauge( + MetricsType.WORKER_TASK_SLOTS_AVAILABLE, + getWorkerTags("ActivityWorker"), + MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE); + reporter.assertGauge( + MetricsType.WORKER_TASK_SLOTS_AVAILABLE, + getWorkerTags("LocalActivityWorker"), + MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); + reporter.assertGauge(MetricsType.WORKER_TASK_SLOTS_USED, getWorkerTags("WorkflowWorker"), 0); + reporter.assertGauge(MetricsType.WORKER_TASK_SLOTS_USED, getWorkerTags("ActivityWorker"), 0); + reporter.assertGauge( + MetricsType.WORKER_TASK_SLOTS_USED, getWorkerTags("LocalActivityWorker"), 0); + } + + private Map getWorkerTags(String workerType) { + return ImmutableMap.of( + "worker_type", + workerType, + "task_queue", + testWorkflowRule.getTaskQueue(), + "namespace", + "UnitTest"); + } + + private static class MaybeFailWFTResponseInterceptor implements ClientInterceptor { + boolean didFailOnce = false; + boolean failActivity; + + @Override + public ClientCall interceptCall( + MethodDescriptor method, CallOptions callOptions, Channel next) { + if (!failActivity && method == WorkflowServiceGrpc.getRespondWorkflowTaskCompletedMethod()) { + return new MaybeFailWFTResponseInterceptor.FailResponseCall<>( + next.newCall(method, callOptions)); + } else if (failActivity + && method == WorkflowServiceGrpc.getRespondActivityTaskCompletedMethod()) { + return new MaybeFailWFTResponseInterceptor.FailResponseCall<>( + next.newCall(method, callOptions)); + } + return next.newCall(method, callOptions); + } + + public void reset() { + didFailOnce = false; + failActivity = false; + } + + private final class FailResponseCall + extends ForwardingClientCall.SimpleForwardingClientCall { + + FailResponseCall(ClientCall call) { + super(call); + } + + @Override + public void start(Listener responseListener, Metadata headers) { + + responseListener = + new ForwardingClientCallListener.SimpleForwardingClientCallListener( + responseListener) { + @Override + public void onMessage(RespT message) { + if (!didFailOnce) { + didFailOnce = true; + // Throw some non-retryable error code + throw new StatusRuntimeException(Status.UNAUTHENTICATED); + } + super.onMessage(message); + } + }; + super.start(responseListener, headers); + } + } + } +} diff --git a/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotMaxConcurrentTests.java b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotMaxConcurrentTests.java new file mode 100644 index 0000000000..30dd9a9d0f --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotMaxConcurrentTests.java @@ -0,0 +1,191 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.worker; + +import com.uber.m3.tally.RootScopeBuilder; +import com.uber.m3.tally.Scope; +import io.temporal.activity.ActivityInterface; +import io.temporal.activity.ActivityMethod; +import io.temporal.activity.ActivityOptions; +import io.temporal.activity.LocalActivityOptions; +import io.temporal.api.common.v1.WorkflowExecution; +import io.temporal.client.WorkflowClient; +import io.temporal.client.WorkflowOptions; +import io.temporal.client.WorkflowStub; +import io.temporal.common.RetryOptions; +import io.temporal.common.reporter.TestStatsReporter; +import io.temporal.testing.internal.SDKTestWorkflowRule; +import io.temporal.worker.WorkerOptions; +import io.temporal.workflow.Workflow; +import io.temporal.workflow.WorkflowInterface; +import io.temporal.workflow.WorkflowMethod; +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicInteger; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +// Verifies using the worker options without an explicit slot supplier still does the right thing +public class WorkflowSlotMaxConcurrentTests { + private static final int MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE = 2; + private static final int MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE = 2; + private static final int MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE = 2; + private final TestStatsReporter reporter = new TestStatsReporter(); + static AtomicInteger concurrentActivityHighMark = new AtomicInteger(); + static AtomicInteger concurrentLocalActivityHighMark = new AtomicInteger(); + + Scope metricsScope = + new RootScopeBuilder().reporter(reporter).reportEvery(com.uber.m3.util.Duration.ofMillis(1)); + + @Rule + public SDKTestWorkflowRule testWorkflowRule = + SDKTestWorkflowRule.newBuilder() + .setWorkerOptions( + WorkerOptions.newBuilder() + .setMaxConcurrentWorkflowTaskExecutionSize( + MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE) + .setMaxConcurrentActivityExecutionSize(MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE) + .setMaxConcurrentLocalActivityExecutionSize( + MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE) + .build()) + .setMetricsScope(metricsScope) + .setActivityImplementations(new TestActivityImpl()) + .setWorkflowTypes(SleepingWorkflowImpl.class) + .setDoNotStart(true) + .build(); + + @Before + public void setup() { + reporter.flush(); + concurrentActivityHighMark.set(0); + concurrentLocalActivityHighMark.set(0); + } + + @After + public void tearDown() { + testWorkflowRule.getTestEnvironment().close(); + } + + @WorkflowInterface + public interface TestWorkflow { + @WorkflowMethod + String workflow(String action); + } + + public static class SleepingWorkflowImpl implements TestWorkflow { + private final TestActivity activity = + Workflow.newActivityStub( + TestActivity.class, + ActivityOptions.newBuilder() + .setStartToCloseTimeout(Duration.ofSeconds(10)) + .setRetryOptions(RetryOptions.newBuilder().setMaximumAttempts(1).build()) + .validateAndBuildWithDefaults()); + + private final TestActivity localActivity = + Workflow.newLocalActivityStub( + TestActivity.class, + LocalActivityOptions.newBuilder() + .setStartToCloseTimeout(Duration.ofSeconds(10)) + .setRetryOptions(RetryOptions.newBuilder().setMaximumAttempts(1).build()) + .validateAndBuildWithDefaults()); + + @Override + public String workflow(String action) { + if (action.equals("local-activity")) { + localActivity.activity(true); + } else if (action.equals("activity")) { + activity.activity(false); + } + return "ok"; + } + } + + @ActivityInterface + public interface TestActivity { + + @ActivityMethod + String activity(boolean isLocal); + } + + public static class TestActivityImpl implements TestActivity { + @Override + public String activity(boolean isLocal) { + if (isLocal) { + int current = concurrentLocalActivityHighMark.incrementAndGet(); + if (current > MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE) { + throw new IllegalStateException("Too many local activities running concurrently"); + } + } else { + int current = concurrentActivityHighMark.incrementAndGet(); + if (current > MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE) { + throw new IllegalStateException("Too many activities running concurrently"); + } + } + + try { + Thread.sleep(500); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + + if (isLocal) { + concurrentLocalActivityHighMark.decrementAndGet(); + } else { + concurrentActivityHighMark.decrementAndGet(); + } + + return ""; + } + } + + @Test + public void TestSlotsNotExceeded() { + testWorkflowRule.getTestEnvironment().start(); + WorkflowClient client = testWorkflowRule.getWorkflowClient(); + WorkflowOptions workflowOptions = + WorkflowOptions.newBuilder() + .setTaskQueue(testWorkflowRule.getTaskQueue()) + .validateBuildWithDefaults(); + // Run a handful of workflows concurrently + List executions = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + executions.add( + WorkflowClient.start( + client.newWorkflowStub(TestWorkflow.class, workflowOptions)::workflow, + "local-activity")); + } + for (int i = 0; i < 5; i++) { + executions.add( + WorkflowClient.start( + client.newWorkflowStub(TestWorkflow.class, workflowOptions)::workflow, "activity")); + } + + // wait for all of them to finish + for (WorkflowExecution execution : executions) { + WorkflowStub workflowStub = client.newUntypedWorkflowStub(execution, Optional.empty()); + workflowStub.getResult(String.class); + } + } +} diff --git a/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotTests.java b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotTests.java index fdab84da9f..45dfb2884f 100644 --- a/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotTests.java +++ b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotTests.java @@ -20,27 +20,29 @@ package io.temporal.internal.worker; +import static org.junit.Assert.assertEquals; + import com.uber.m3.tally.RootScopeBuilder; import com.uber.m3.tally.Scope; import com.uber.m3.util.ImmutableMap; -import io.temporal.activity.ActivityInterface; -import io.temporal.activity.ActivityMethod; -import io.temporal.activity.ActivityOptions; -import io.temporal.activity.LocalActivityOptions; +import io.temporal.activity.*; import io.temporal.client.WorkflowClient; import io.temporal.client.WorkflowOptions; import io.temporal.common.RetryOptions; import io.temporal.common.reporter.TestStatsReporter; +import io.temporal.testUtils.CountingSlotSupplier; import io.temporal.testing.internal.SDKTestWorkflowRule; import io.temporal.worker.MetricsType; import io.temporal.worker.WorkerOptions; -import io.temporal.workflow.SignalMethod; -import io.temporal.workflow.Workflow; -import io.temporal.workflow.WorkflowInterface; -import io.temporal.workflow.WorkflowMethod; +import io.temporal.worker.tuning.ActivitySlotInfo; +import io.temporal.worker.tuning.CompositeTuner; +import io.temporal.worker.tuning.LocalActivitySlotInfo; +import io.temporal.worker.tuning.WorkflowSlotInfo; +import io.temporal.workflow.*; import java.time.Duration; import java.util.Map; import java.util.concurrent.CountDownLatch; +import org.junit.After; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -49,9 +51,16 @@ public class WorkflowSlotTests { private final int MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE = 100; private final int MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE = 1000; private final int MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE = 10000; + private final CountingSlotSupplier workflowTaskSlotSupplier = + new CountingSlotSupplier<>(MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE); + private final CountingSlotSupplier activityTaskSlotSupplier = + new CountingSlotSupplier<>(MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE); + private final CountingSlotSupplier localActivitySlotSupplier = + new CountingSlotSupplier<>(MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); private final TestStatsReporter reporter = new TestStatsReporter(); static CountDownLatch activityBlockLatch = new CountDownLatch(1); static CountDownLatch activityRunningLatch = new CountDownLatch(1); + static boolean didFail = false; Scope metricsScope = new RootScopeBuilder().reporter(reporter).reportEvery(com.uber.m3.util.Duration.ofMillis(1)); @@ -61,11 +70,11 @@ public class WorkflowSlotTests { SDKTestWorkflowRule.newBuilder() .setWorkerOptions( WorkerOptions.newBuilder() - .setMaxConcurrentWorkflowTaskExecutionSize( - MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE) - .setMaxConcurrentActivityExecutionSize(MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE) - .setMaxConcurrentLocalActivityExecutionSize( - MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE) + .setWorkerTuner( + new CompositeTuner( + workflowTaskSlotSupplier, + activityTaskSlotSupplier, + localActivitySlotSupplier)) .build()) .setMetricsScope(metricsScope) .setActivityImplementations(new TestActivityImpl()) @@ -78,8 +87,23 @@ public void setup() { reporter.flush(); activityBlockLatch = new CountDownLatch(1); activityRunningLatch = new CountDownLatch(1); + localActivitySlotSupplier.usedCount.set(0); + didFail = false; + } + + @After + public void tearDown() { + testWorkflowRule.getTestEnvironment().close(); + assertEquals( + workflowTaskSlotSupplier.reservedCount.get(), workflowTaskSlotSupplier.releasedCount.get()); + assertEquals( + activityTaskSlotSupplier.reservedCount.get(), activityTaskSlotSupplier.releasedCount.get()); + assertEquals( + localActivitySlotSupplier.reservedCount.get(), + localActivitySlotSupplier.releasedCount.get()); } + // Arguments are the number of used slots by type private void assertWorkerSlotCount(int worker, int activity, int localActivity) { try { // There can be a delay in metrics emission, another option if this @@ -89,21 +113,29 @@ private void assertWorkerSlotCount(int worker, int activity, int localActivity) throw new RuntimeException(e); } reporter.assertGauge( - MetricsType.WORKER_TASK_SLOTS_AVAILABLE, getWorkerTags("WorkflowWorker"), worker); - // All slots should be available + MetricsType.WORKER_TASK_SLOTS_AVAILABLE, + getWorkerTags("WorkflowWorker"), + MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE - worker); reporter.assertGauge( - MetricsType.WORKER_TASK_SLOTS_AVAILABLE, getWorkerTags("ActivityWorker"), activity); - // All slots should be available + MetricsType.WORKER_TASK_SLOTS_AVAILABLE, + getWorkerTags("ActivityWorker"), + MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE - activity); reporter.assertGauge( MetricsType.WORKER_TASK_SLOTS_AVAILABLE, getWorkerTags("LocalActivityWorker"), - localActivity); + MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE - localActivity); + reporter.assertGauge( + MetricsType.WORKER_TASK_SLOTS_USED, getWorkerTags("WorkflowWorker"), worker); + reporter.assertGauge( + MetricsType.WORKER_TASK_SLOTS_USED, getWorkerTags("ActivityWorker"), activity); + reporter.assertGauge( + MetricsType.WORKER_TASK_SLOTS_USED, getWorkerTags("LocalActivityWorker"), localActivity); } @WorkflowInterface public interface TestWorkflow { @WorkflowMethod - String workflow(boolean useLocalActivity); + String workflow(String action); @SignalMethod void unblock(); @@ -117,7 +149,7 @@ public static class SleepingWorkflowImpl implements TestWorkflow { TestActivity.class, ActivityOptions.newBuilder() .setStartToCloseTimeout(Duration.ofSeconds(10)) - .setRetryOptions(RetryOptions.newBuilder().setMaximumAttempts(1).build()) + .setRetryOptions(RetryOptions.newBuilder().setMaximumAttempts(4).build()) .validateAndBuildWithDefaults()); private final TestActivity localActivity = @@ -125,15 +157,24 @@ public static class SleepingWorkflowImpl implements TestWorkflow { TestActivity.class, LocalActivityOptions.newBuilder() .setStartToCloseTimeout(Duration.ofSeconds(10)) - .setRetryOptions(RetryOptions.newBuilder().setMaximumAttempts(1).build()) + .setRetryOptions( + RetryOptions.newBuilder() + .setMaximumAttempts(4) + .setInitialInterval(Duration.ofMillis(1)) + .build()) .validateAndBuildWithDefaults()); @Override - public String workflow(boolean useLocalActivity) { + public String workflow(String action) { Workflow.await(() -> unblocked); - if (useLocalActivity) { + if (action.equals("fail") && !didFail) { + didFail = true; + throw new RuntimeException("fail on purpose"); + } else if (action.equals("local-activity")) { localActivity.activity("test"); - } else { + } else if (action.equals("local-activity-fail")) { + localActivity.activity("fail"); + } else if (action.equals("activity")) { activity.activity("test"); } return "ok"; @@ -157,6 +198,10 @@ public static class TestActivityImpl implements TestActivity { public String activity(String input) { activityRunningLatch.countDown(); try { + ActivityExecutionContext executionContext = Activity.getExecutionContext(); + if (input.equals("fail") && executionContext.getInfo().getAttempt() < 4) { + throw new RuntimeException("fail on purpose"); + } activityBlockLatch.await(); } catch (InterruptedException e) { throw new RuntimeException(e); @@ -187,10 +232,7 @@ public void TestTaskSlotsEmittedOnStart() { // Start the worker testWorkflowRule.getTestEnvironment().start(); // All slots should be available - assertWorkerSlotCount( - MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE, - MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE, - MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); + assertWorkerSlotCount(0, 0, 0); } @Test @@ -203,23 +245,17 @@ public void TestActivityTaskSlots() throws InterruptedException { WorkflowOptions.newBuilder() .setTaskQueue(testWorkflowRule.getTaskQueue()) .validateBuildWithDefaults()); - WorkflowClient.start(workflow::workflow, false); + WorkflowClient.start(workflow::workflow, "activity"); workflow.unblock(); activityRunningLatch.await(); // The activity slot should be taken and the workflow slot should not be taken - assertWorkerSlotCount( - MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE, - MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE - 1, - MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); + assertWorkerSlotCount(0, 1, 0); activityBlockLatch.countDown(); // Wait for the workflow to finish - workflow.workflow(false); + workflow.workflow("activity"); // All slots should be available - assertWorkerSlotCount( - MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE, - MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE, - MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); + assertWorkerSlotCount(0, 0, 0); } @Test @@ -232,22 +268,85 @@ public void TestLocalActivityTaskSlots() throws InterruptedException { WorkflowOptions.newBuilder() .setTaskQueue(testWorkflowRule.getTaskQueue()) .validateBuildWithDefaults()); - WorkflowClient.start(workflow::workflow, true); + WorkflowClient.start(workflow::workflow, "local-activity"); workflow.unblock(); activityRunningLatch.await(); // The local activity slot should be taken and the workflow slot should be taken - assertWorkerSlotCount( - MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE - 1, - MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE, - MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE - 1); + assertWorkerSlotCount(1, 0, 1); activityBlockLatch.countDown(); - // Wait for the workflow to finish - workflow.workflow(true); + workflow.workflow("local-activity"); + // All slots should be available + assertWorkerSlotCount(0, 0, 0); + } + + @Test + public void TestLocalActivityHeartbeat() throws InterruptedException { + testWorkflowRule.getTestEnvironment().start(); + WorkflowClient client = testWorkflowRule.getWorkflowClient(); + TestWorkflow workflow = + client.newWorkflowStub( + TestWorkflow.class, + WorkflowOptions.newBuilder() + .setTaskQueue(testWorkflowRule.getTaskQueue()) + .setWorkflowTaskTimeout(Duration.ofSeconds(1)) + .validateBuildWithDefaults()); + WorkflowClient.start(workflow::workflow, "local-activity"); + workflow.unblock(); + activityRunningLatch.await(); + // The local activity slot should be taken and the workflow slot should be taken + assertWorkerSlotCount(1, 0, 1); + // Take long enough to heartbeat + Thread.sleep(1000); + assertWorkerSlotCount(1, 0, 1); + + activityBlockLatch.countDown(); + workflow.workflow("local-activity"); + // All slots should be available + assertWorkerSlotCount(0, 0, 0); + } + + @Test + public void TestLocalActivityFailsThenPasses() throws InterruptedException { + testWorkflowRule.getTestEnvironment().start(); + WorkflowClient client = testWorkflowRule.getWorkflowClient(); + TestWorkflow workflow = + client.newWorkflowStub( + TestWorkflow.class, + WorkflowOptions.newBuilder() + .setTaskQueue(testWorkflowRule.getTaskQueue()) + .setWorkflowTaskTimeout(Duration.ofSeconds(1)) + .validateBuildWithDefaults()); + WorkflowClient.start(workflow::workflow, "local-activity-fail"); + workflow.unblock(); + activityRunningLatch.await(); + // The local activity slot should be taken and the workflow slot should be taken + assertWorkerSlotCount(1, 0, 1); + + activityBlockLatch.countDown(); + workflow.workflow("local-activity-fail"); + assertWorkerSlotCount(0, 0, 0); + // LA slots should only have been used once per attempt + assertEquals(4, localActivitySlotSupplier.usedCount.get()); + // We should have seen releases *per* attempt as well + assertEquals(4, localActivitySlotSupplier.releasedCount.get()); + } + + @Test + public void TestWFTFailure() { + testWorkflowRule.getTestEnvironment().start(); + WorkflowClient client = testWorkflowRule.getWorkflowClient(); + TestWorkflow workflow = + client.newWorkflowStub( + TestWorkflow.class, + WorkflowOptions.newBuilder() + .setTaskQueue(testWorkflowRule.getTaskQueue()) + .setWorkflowTaskTimeout(Duration.ofMillis(500)) + .validateBuildWithDefaults()); + WorkflowClient.start(workflow::workflow, "fail"); + workflow.unblock(); + workflow.workflow("fail"); // All slots should be available - assertWorkerSlotCount( - MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE, - MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE, - MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); + assertWorkerSlotCount(0, 0, 0); } } diff --git a/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java new file mode 100644 index 0000000000..1d23e8f349 --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java @@ -0,0 +1,285 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.worker; + +import static org.junit.Assert.assertEquals; + +import com.uber.m3.tally.RootScopeBuilder; +import com.uber.m3.tally.Scope; +import com.uber.m3.util.ImmutableMap; +import io.temporal.activity.ActivityInterface; +import io.temporal.activity.ActivityMethod; +import io.temporal.activity.ActivityOptions; +import io.temporal.activity.LocalActivityOptions; +import io.temporal.client.WorkflowClient; +import io.temporal.client.WorkflowOptions; +import io.temporal.common.RetryOptions; +import io.temporal.common.reporter.TestStatsReporter; +import io.temporal.testUtils.CountingSlotSupplier; +import io.temporal.testing.internal.SDKTestWorkflowRule; +import io.temporal.worker.MetricsType; +import io.temporal.worker.WorkerOptions; +import io.temporal.worker.tuning.ActivitySlotInfo; +import io.temporal.worker.tuning.CompositeTuner; +import io.temporal.worker.tuning.LocalActivitySlotInfo; +import io.temporal.worker.tuning.WorkflowSlotInfo; +import io.temporal.workflow.*; +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Semaphore; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class WorkflowSlotsSmallSizeTests { + private final int MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE = 2; + private final int MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE = 2; + private final int MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE = 2; + private final CountingSlotSupplier workflowTaskSlotSupplier = + new CountingSlotSupplier<>(MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE); + private final CountingSlotSupplier activityTaskSlotSupplier = + new CountingSlotSupplier<>(MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE); + private final CountingSlotSupplier localActivitySlotSupplier = + new CountingSlotSupplier<>(MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); + private final TestStatsReporter reporter = new TestStatsReporter(); + static Semaphore parallelSemRunning = new Semaphore(0); + static Semaphore parallelSemBlocked = new Semaphore(0); + + Scope metricsScope = + new RootScopeBuilder().reporter(reporter).reportEvery(com.uber.m3.util.Duration.ofMillis(1)); + + @Parameterized.Parameter public boolean activitiesAreLocal; + + @Parameterized.Parameters() + public static Object[] data() { + return new Object[][] {{true}, {false}}; + } + + @Rule + public SDKTestWorkflowRule testWorkflowRule = + SDKTestWorkflowRule.newBuilder() + .setWorkerOptions( + WorkerOptions.newBuilder() + .setWorkerTuner( + new CompositeTuner( + workflowTaskSlotSupplier, + activityTaskSlotSupplier, + localActivitySlotSupplier)) + .build()) + .setMetricsScope(metricsScope) + .setActivityImplementations(new TestActivitySemaphoreImpl()) + .setWorkflowTypes(ParallelActivities.class) + .setDoNotStart(true) + .build(); + + @Before + public void setup() { + reporter.flush(); + parallelSemRunning = new Semaphore(0); + parallelSemBlocked = new Semaphore(0); + } + + @After + public void tearDown() { + testWorkflowRule.getTestEnvironment().close(); + assertEquals( + workflowTaskSlotSupplier.reservedCount.get(), workflowTaskSlotSupplier.releasedCount.get()); + assertEquals( + activityTaskSlotSupplier.reservedCount.get(), activityTaskSlotSupplier.releasedCount.get()); + assertEquals( + localActivitySlotSupplier.reservedCount.get(), + localActivitySlotSupplier.releasedCount.get()); + } + + private void assertWorkerSlotCount(int worker, int activity, int localActivity) { + try { + // There can be a delay in metrics emission, another option if this + // is too flaky is to poll the metrics. + Thread.sleep(100); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + reporter.assertGauge( + MetricsType.WORKER_TASK_SLOTS_AVAILABLE, getWorkerTags("WorkflowWorker"), worker); + // All slots should be available + reporter.assertGauge( + MetricsType.WORKER_TASK_SLOTS_AVAILABLE, getWorkerTags("ActivityWorker"), activity); + // All slots should be available + reporter.assertGauge( + MetricsType.WORKER_TASK_SLOTS_AVAILABLE, + getWorkerTags("LocalActivityWorker"), + localActivity); + } + + @WorkflowInterface + public interface TestWorkflow { + @WorkflowMethod + String workflow(boolean useLocalActivity); + + @SignalMethod + void unblock(); + } + + public static class ParallelActivities implements TestWorkflow { + boolean unblocked = false; + + private final TestActivity activity = + Workflow.newActivityStub( + TestActivity.class, + ActivityOptions.newBuilder() + .setStartToCloseTimeout(Duration.ofSeconds(10)) + .setRetryOptions(RetryOptions.newBuilder().setMaximumAttempts(1).build()) + .validateAndBuildWithDefaults()); + + private final TestActivity localActivity = + Workflow.newLocalActivityStub( + TestActivity.class, + LocalActivityOptions.newBuilder() + .setStartToCloseTimeout(Duration.ofSeconds(10)) + .setRetryOptions(RetryOptions.newBuilder().setMaximumAttempts(1).build()) + .validateAndBuildWithDefaults()); + + @Override + public String workflow(boolean useLocalActivity) { + Workflow.await(() -> unblocked); + List> laResults = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + if (useLocalActivity) { + laResults.add(Async.function(localActivity::activity, String.valueOf(i))); + } else { + laResults.add(Async.function(activity::activity, String.valueOf(i))); + } + } + Promise.allOf(laResults).get(); + return "ok"; + } + + @Override + public void unblock() { + unblocked = true; + } + } + + @ActivityInterface + public interface TestActivity { + + @ActivityMethod + String activity(String input); + } + + public static class TestActivitySemaphoreImpl implements TestActivity { + @Override + public String activity(String input) { + parallelSemRunning.release(); + try { + parallelSemBlocked.acquire(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + return ""; + } + } + + private Map getWorkerTags(String workerType) { + return ImmutableMap.of( + "worker_type", + workerType, + "task_queue", + testWorkflowRule.getTaskQueue(), + "namespace", + "UnitTest"); + } + + private void assertIntraWFTSlotCount(int allowedToRun) { + int runningLAs = activitiesAreLocal ? allowedToRun : 0; + int runningAs = activitiesAreLocal ? 0 : allowedToRun; + int runningWFTs = activitiesAreLocal ? 1 : 0; + assertWorkerSlotCount( + MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE - runningWFTs, + MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE - runningAs, + MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE - runningLAs); + } + + @Test + public void TestLocalActivitySlotAtLimit() throws InterruptedException { + testWorkflowRule.getTestEnvironment().start(); + WorkflowClient client = testWorkflowRule.getWorkflowClient(); + TestWorkflow workflow = + client.newWorkflowStub( + TestWorkflow.class, + WorkflowOptions.newBuilder() + .setTaskQueue(testWorkflowRule.getTaskQueue()) + .validateBuildWithDefaults()); + WorkflowClient.start(workflow::workflow, activitiesAreLocal); + workflow.unblock(); + for (int i = 0; i < 5; i++) { + parallelSemRunning.acquire(2); + assertIntraWFTSlotCount(2); + parallelSemBlocked.release(2); + } + workflow.workflow(true); + // All slots should be available + assertWorkerSlotCount( + MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE, + MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE, + MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); + } + + @Test + public void TestLocalActivitySlotHitsCapacity() throws InterruptedException { + testWorkflowRule.getTestEnvironment().start(); + WorkflowClient client = testWorkflowRule.getWorkflowClient(); + TestWorkflow workflow = + client.newWorkflowStub( + TestWorkflow.class, + WorkflowOptions.newBuilder() + .setTaskQueue(testWorkflowRule.getTaskQueue()) + .setWorkflowTaskTimeout(Duration.ofSeconds(1)) + .validateBuildWithDefaults()); + WorkflowClient.start(workflow::workflow, activitiesAreLocal); + workflow.unblock(); + for (int i = 0; i < 5; i++) { + parallelSemRunning.acquire(2); + assertIntraWFTSlotCount(2); + parallelSemBlocked.release(2); + // Take too long (hit WFT timeout while trying to schedule LAs) + if (i == 2) { + Thread.sleep(1000); + } + } + // Because the WFT fails, the LAs may be re-run, and it's not clearly defined how many of them + // will, so ensure there are enough permits for the test to complete. What matters is that the + // slot counts end up at the appropriate values after everything finishes. + parallelSemBlocked.release(100); + workflow.workflow(true); + // All slots should be available + assertWorkerSlotCount( + MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE, + MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE, + MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); + } +} diff --git a/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowWorkerTest.java b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowWorkerTest.java index a8645f2097..2c97e29534 100644 --- a/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowWorkerTest.java +++ b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowWorkerTest.java @@ -42,6 +42,9 @@ import io.temporal.serviceclient.WorkflowServiceStubs; import io.temporal.testUtils.HistoryUtils; import io.temporal.worker.MetricsType; +import io.temporal.worker.tuning.FixedSizeSlotSupplier; +import io.temporal.worker.tuning.SlotSupplier; +import io.temporal.worker.tuning.WorkflowSlotInfo; import java.time.Duration; import java.util.UUID; import java.util.concurrent.*; @@ -71,6 +74,7 @@ public void concurrentPollRequestLockTest() throws Exception { new RootScopeBuilder() .reporter(reporter) .reportEvery(com.uber.m3.util.Duration.ofMillis(1)); + SlotSupplier slotSupplier = new FixedSizeSlotSupplier<>(100); WorkflowExecutorCache cache = new WorkflowExecutorCache(10, runLockManager, metricsScope); WorkflowTaskHandler taskHandler = mock(WorkflowTaskHandler.class); @@ -92,7 +96,8 @@ public void concurrentPollRequestLockTest() throws Exception { runLockManager, cache, taskHandler, - eagerActivityDispatcher); + eagerActivityDispatcher, + slotSupplier); WorkflowServiceGrpc.WorkflowServiceBlockingStub blockingStub = mock(WorkflowServiceGrpc.WorkflowServiceBlockingStub.class); @@ -107,11 +112,17 @@ public void concurrentPollRequestLockTest() throws Exception { .setWorkflowType(WorkflowType.newBuilder().setName(WORKFLOW_TYPE).build()) .build(); + CountDownLatch blockFirstPollLatch = new CountDownLatch(1); CountDownLatch pollTaskQueueLatch = new CountDownLatch(1); CountDownLatch blockPollTaskQueueLatch = new CountDownLatch(1); when(blockingStub.pollWorkflowTaskQueue(any(PollWorkflowTaskQueueRequest.class))) - .thenReturn(pollResponse) + .thenAnswer( + (Answer) + invocation -> { + blockFirstPollLatch.await(); + return pollResponse; + }) .thenReturn(pollResponse) .thenAnswer( (Answer) @@ -175,6 +186,8 @@ public void concurrentPollRequestLockTest() throws Exception { MetricsType.WORKER_TASK_SLOTS_AVAILABLE, ImmutableMap.of("worker_type", "WorkflowWorker"), 100.0); + // Unblock the first poll + blockFirstPollLatch.countDown(); // Wait until we have got all the polls pollTaskQueueLatch.await(); // Wait until the worker handles at least one WFT @@ -221,6 +234,7 @@ public void respondWorkflowTaskFailureMetricTest() throws Exception { .reporter(reporter) .reportEvery(com.uber.m3.util.Duration.ofMillis(1)); WorkflowExecutorCache cache = new WorkflowExecutorCache(10, runLockManager, metricsScope); + SlotSupplier slotSupplier = new FixedSizeSlotSupplier<>(10); WorkflowTaskHandler taskHandler = mock(WorkflowTaskHandler.class); when(taskHandler.isAnyTypeSupported()).thenReturn(true); @@ -241,7 +255,8 @@ public void respondWorkflowTaskFailureMetricTest() throws Exception { runLockManager, cache, taskHandler, - eagerActivityDispatcher); + eagerActivityDispatcher, + slotSupplier); WorkflowServiceGrpc.WorkflowServiceBlockingStub blockingStub = mock(WorkflowServiceGrpc.WorkflowServiceBlockingStub.class); @@ -315,6 +330,8 @@ public void resetWorkflowIdFromWorkflowTaskTest() throws Throwable { Scope metricScope = new NoopScope(); WorkflowExecutorCache cache = new WorkflowExecutorCache(1, runLockManager, metricScope); + SlotSupplier slotSupplier = new FixedSizeSlotSupplier<>(1); + WorkflowTaskHandler rootTaskHandler = new ReplayWorkflowTaskHandler( "namespace", @@ -369,7 +386,8 @@ public boolean isAnyTypeSupported() { runLockManager, cache, taskHandler, - eagerActivityDispatcher); + eagerActivityDispatcher, + slotSupplier); WorkflowServiceGrpc.WorkflowServiceBlockingStub blockingStub = mock(WorkflowServiceGrpc.WorkflowServiceBlockingStub.class); diff --git a/temporal-sdk/src/test/java/io/temporal/testUtils/CountingSlotSupplier.java b/temporal-sdk/src/test/java/io/temporal/testUtils/CountingSlotSupplier.java new file mode 100644 index 0000000000..e7a6bdbbd9 --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/testUtils/CountingSlotSupplier.java @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.testUtils; + +import io.temporal.worker.tuning.*; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicInteger; + +public class CountingSlotSupplier extends FixedSizeSlotSupplier { + public final AtomicInteger reservedCount = new AtomicInteger(); + public final AtomicInteger releasedCount = new AtomicInteger(); + public final AtomicInteger usedCount = new AtomicInteger(); + + public CountingSlotSupplier(int numSlots) { + super(numSlots); + } + + @Override + public SlotPermit reserveSlot(SlotReserveContext ctx) throws InterruptedException { + SlotPermit p = super.reserveSlot(ctx); + reservedCount.incrementAndGet(); + return p; + } + + @Override + public Optional tryReserveSlot(SlotReserveContext ctx) { + Optional p = super.tryReserveSlot(ctx); + if (p.isPresent()) { + reservedCount.incrementAndGet(); + } + return p; + } + + @Override + public void markSlotUsed(SlotMarkUsedContext ctx) { + usedCount.incrementAndGet(); + super.markSlotUsed(ctx); + } + + @Override + public void releaseSlot(SlotReleaseContext ctx) { + super.releaseSlot(ctx); + releasedCount.incrementAndGet(); + } +} diff --git a/temporal-sdk/src/test/java/io/temporal/worker/IndependentResourceBasedTests.java b/temporal-sdk/src/test/java/io/temporal/worker/IndependentResourceBasedTests.java new file mode 100644 index 0000000000..c78f943a14 --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/worker/IndependentResourceBasedTests.java @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker; + +interface IndependentResourceBasedTests {} diff --git a/temporal-sdk/src/test/java/io/temporal/worker/ResourceBasedTunerTests.java b/temporal-sdk/src/test/java/io/temporal/worker/ResourceBasedTunerTests.java new file mode 100644 index 0000000000..98b850ed68 --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/worker/ResourceBasedTunerTests.java @@ -0,0 +1,174 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.worker; + +import static io.temporal.testing.internal.SDKTestWorkflowRule.NAMESPACE; + +import com.uber.m3.tally.RootScopeBuilder; +import com.uber.m3.util.ImmutableMap; +import io.temporal.activity.ActivityInterface; +import io.temporal.activity.ActivityOptions; +import io.temporal.activity.LocalActivityOptions; +import io.temporal.common.reporter.TestStatsReporter; +import io.temporal.serviceclient.MetricsTag; +import io.temporal.testing.internal.SDKTestWorkflowRule; +import io.temporal.worker.tuning.*; +import io.temporal.workflow.*; +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +public class ResourceBasedTunerTests { + + private final TestStatsReporter reporter = new TestStatsReporter(); + private static final Map TAGS_NAMESPACE = + new ImmutableMap.Builder().putAll(MetricsTag.defaultTags(NAMESPACE)).build(); + + @Rule + public SDKTestWorkflowRule testWorkflowRule = + SDKTestWorkflowRule.newBuilder() + .setWorkerOptions( + WorkerOptions.newBuilder() + .setWorkerTuner( + ResourceBasedTuner.newBuilder() + .setControllerOptions( + ResourceBasedControllerOptions.newBuilder(0.7, 0.7).build()) + .build()) + .build()) + .setActivityImplementations(new ActivitiesImpl()) + .setWorkflowTypes(ResourceTunerWorkflowImpl.class) + .setMetricsScope( + new RootScopeBuilder() + .reporter(reporter) + .reportEvery(com.uber.m3.util.Duration.ofMillis(10))) + .build(); + + @Test + public void canRunWithResourceBasedTuner() throws InterruptedException { + ResourceTunerWorkflow workflow = testWorkflowRule.newWorkflowStub(ResourceTunerWorkflow.class); + workflow.execute(5, 5, 1000); + Map nsAndTaskQueue = + new ImmutableMap.Builder() + .putAll(TAGS_NAMESPACE) + .put(MetricsTag.TASK_QUEUE, testWorkflowRule.getTaskQueue()) + .build(); + reporter.assertGauge(MetricsType.RESOURCE_MEM_USAGE, nsAndTaskQueue, (val) -> val > 0); + reporter.assertGauge( + MetricsType.RESOURCE_CPU_USAGE, + nsAndTaskQueue, + (val) -> { + // CPU use can be so low as to be 0, so can't really make any assertion here. + return true; + }); + reporter.assertGauge(MetricsType.RESOURCE_MEM_PID, nsAndTaskQueue, (val) -> true); + reporter.assertGauge(MetricsType.RESOURCE_CPU_PID, nsAndTaskQueue, (val) -> true); + // Verify no slots are leaked + Thread.sleep(100); // wait a beat for slots to actually get released + reporter.assertGauge(MetricsType.WORKER_TASK_SLOTS_USED, getWorkerTags("WorkflowWorker"), 0); + reporter.assertGauge(MetricsType.WORKER_TASK_SLOTS_USED, getWorkerTags("ActivityWorker"), 0); + reporter.assertGauge( + MetricsType.WORKER_TASK_SLOTS_USED, getWorkerTags("LocalActivityWorker"), 0); + } + + @Category(IndependentResourceBasedTests.class) + @Test(timeout = 300 * 1000) + public void canRunHeavyMemoryWithResourceBasedTuner() { + ResourceTunerWorkflow workflow = testWorkflowRule.newWorkflowStub(ResourceTunerWorkflow.class); + workflow.execute(50, 50, 30000000); + } + + @WorkflowInterface + public interface ResourceTunerWorkflow { + @WorkflowMethod + String execute(int numActivities, int localActivities, int memCeiling); + } + + public static class ResourceTunerWorkflowImpl implements ResourceTunerWorkflow { + @Override + public String execute(int numActivities, int localActivities, int memCeiling) { + SleepActivity activity = + Workflow.newActivityStub( + SleepActivity.class, + ActivityOptions.newBuilder() + .setStartToCloseTimeout(Duration.ofMinutes(1)) + .setHeartbeatTimeout(Duration.ofSeconds(20)) + .build()); + + SleepActivity localActivity = + Workflow.newLocalActivityStub( + SleepActivity.class, + LocalActivityOptions.newBuilder() + .setStartToCloseTimeout(Duration.ofMinutes(1)) + .build()); + + List> promises = new ArrayList<>(); + for (int j = 0; j < numActivities; j++) { + Promise promise = Async.procedure(activity::useResources, memCeiling); + promises.add(promise); + } + for (int j = 0; j < localActivities; j++) { + Promise promise = Async.procedure(localActivity::useResources, memCeiling); + promises.add(promise); + } + + for (Promise promise : promises) { + promise.get(); + } + + return "I'm done"; + } + } + + @ActivityInterface + public interface SleepActivity { + void useResources(int memCeiling); + } + + public static class ActivitiesImpl implements SleepActivity { + @Override + public void useResources(int memCeiling) { + try { + int randNumBytes = (int) (Math.random() * memCeiling); + @SuppressWarnings("unused") + byte[] bytes = new byte[randNumBytes]; + // Need to wait at least a second to give metrics a chance to be reported + // (and also simulate some actual work in the activity) + Thread.sleep(1100); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + + private Map getWorkerTags(String workerType) { + return ImmutableMap.of( + "worker_type", + workerType, + "task_queue", + testWorkflowRule.getTaskQueue(), + "namespace", + "UnitTest"); + } +} diff --git a/temporal-sdk/src/test/java/io/temporal/worker/WorkerOptionsTest.java b/temporal-sdk/src/test/java/io/temporal/worker/WorkerOptionsTest.java index 934e014cdb..7b051de07b 100644 --- a/temporal-sdk/src/test/java/io/temporal/worker/WorkerOptionsTest.java +++ b/temporal-sdk/src/test/java/io/temporal/worker/WorkerOptionsTest.java @@ -21,7 +21,9 @@ package io.temporal.worker; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import io.temporal.worker.tuning.*; import org.junit.Test; public class WorkerOptionsTest { @@ -47,4 +49,49 @@ public void verifyWorkerOptionsEquality() { WorkerOptions w2 = WorkerOptions.newBuilder().build(); assertEquals(w1, w2); } + + @Test + public void canBuildMixedSlotSupplierTuner() { + ResourceBasedController resourceController = + ResourceBasedController.newSystemInfoController( + ResourceBasedControllerOptions.newBuilder(0.5, 0.5).build()); + + SlotSupplier workflowTaskSlotSupplier = new FixedSizeSlotSupplier<>(10); + SlotSupplier activityTaskSlotSupplier = + ResourceBasedSlotSupplier.createForActivity( + resourceController, ResourceBasedTuner.DEFAULT_ACTIVITY_SLOT_OPTIONS); + SlotSupplier localActivitySlotSupplier = + ResourceBasedSlotSupplier.createForLocalActivity( + resourceController, ResourceBasedTuner.DEFAULT_ACTIVITY_SLOT_OPTIONS); + + WorkerOptions.newBuilder() + .setWorkerTuner( + new CompositeTuner( + workflowTaskSlotSupplier, activityTaskSlotSupplier, localActivitySlotSupplier)) + .build(); + } + + @Test + public void throwsIfResourceControllerIsNotSame() { + ResourceBasedController resourceController1 = + ResourceBasedController.newSystemInfoController( + ResourceBasedControllerOptions.newBuilder(0.5, 0.5).build()); + ResourceBasedController resourceController2 = + ResourceBasedController.newSystemInfoController( + ResourceBasedControllerOptions.newBuilder(0.2, 0.3).build()); + + SlotSupplier workflowTaskSlotSupplier = new FixedSizeSlotSupplier<>(10); + SlotSupplier activityTaskSlotSupplier = + ResourceBasedSlotSupplier.createForActivity( + resourceController1, ResourceBasedTuner.DEFAULT_ACTIVITY_SLOT_OPTIONS); + SlotSupplier localActivitySlotSupplier = + ResourceBasedSlotSupplier.createForLocalActivity( + resourceController2, ResourceBasedTuner.DEFAULT_ACTIVITY_SLOT_OPTIONS); + + assertThrows( + IllegalArgumentException.class, + () -> + new CompositeTuner( + workflowTaskSlotSupplier, activityTaskSlotSupplier, localActivitySlotSupplier)); + } } diff --git a/temporal-sdk/src/test/java/io/temporal/workflow/EagerWorkflowTaskDispatchTest.java b/temporal-sdk/src/test/java/io/temporal/workflow/EagerWorkflowTaskDispatchTest.java index e8fc4cb12f..5ad08635ac 100644 --- a/temporal-sdk/src/test/java/io/temporal/workflow/EagerWorkflowTaskDispatchTest.java +++ b/temporal-sdk/src/test/java/io/temporal/workflow/EagerWorkflowTaskDispatchTest.java @@ -31,9 +31,12 @@ import io.temporal.client.WorkflowOptions; import io.temporal.client.WorkflowStub; import io.temporal.serviceclient.WorkflowServiceStubsOptions; +import io.temporal.testUtils.CountingSlotSupplier; import io.temporal.testing.internal.SDKTestWorkflowRule; import io.temporal.worker.Worker; import io.temporal.worker.WorkerFactory; +import io.temporal.worker.WorkerOptions; +import io.temporal.worker.tuning.*; import io.temporal.workflow.shared.TestWorkflows; import java.util.ArrayList; import java.util.Collections; @@ -44,6 +47,12 @@ public class EagerWorkflowTaskDispatchTest { private static final StartCallInterceptor START_CALL_INTERCEPTOR = new StartCallInterceptor(); + private final CountingSlotSupplier workflowTaskSlotSupplier = + new CountingSlotSupplier<>(100); + private final CountingSlotSupplier activityTaskSlotSupplier = + new CountingSlotSupplier<>(100); + private final CountingSlotSupplier localActivitySlotSupplier = + new CountingSlotSupplier<>(100); @Rule public SDKTestWorkflowRule testWorkflowRule = @@ -53,7 +62,7 @@ public class EagerWorkflowTaskDispatchTest { .setGrpcClientInterceptors(Collections.singletonList(START_CALL_INTERCEPTOR)) .build()) .setWorkflowTypes(EagerWorkflowTaskWorkflowImpl.class) - // stop built-in worker factory to it's not in our way + // stop built-in worker factory so it's not in our way .setDoNotStart(true) .build(); @@ -65,6 +74,13 @@ public void tearDown() throws Exception { this.workerFactories.forEach(wf -> wf.awaitTermination(10, TimeUnit.SECONDS)); this.workerFactories.clear(); START_CALL_INTERCEPTOR.clear(); + assertEquals( + workflowTaskSlotSupplier.reservedCount.get(), workflowTaskSlotSupplier.releasedCount.get()); + assertEquals( + activityTaskSlotSupplier.reservedCount.get(), activityTaskSlotSupplier.releasedCount.get()); + assertEquals( + localActivitySlotSupplier.reservedCount.get(), + localActivitySlotSupplier.releasedCount.get()); } private WorkerFactory setupWorkerFactory( @@ -78,7 +94,16 @@ private WorkerFactory setupWorkerFactory( WorkerFactory workerFactory = WorkerFactory.newInstance(workflowClient); workerFactories.add(workerFactory); - Worker worker = workerFactory.newWorker(testWorkflowRule.getTaskQueue()); + Worker worker = + workerFactory.newWorker( + testWorkflowRule.getTaskQueue(), + WorkerOptions.newBuilder() + .setWorkerTuner( + new CompositeTuner( + workflowTaskSlotSupplier, + activityTaskSlotSupplier, + localActivitySlotSupplier)) + .build()); if (registerWorkflows) { worker.registerWorkflowImplementationTypes(EagerWorkflowTaskWorkflowImpl.class); } diff --git a/temporal-sdk/src/test/java/io/temporal/workflow/activityTests/EagerActivityDispatchingTest.java b/temporal-sdk/src/test/java/io/temporal/workflow/activityTests/EagerActivityDispatchingTest.java index 1ca987eed0..a64e381a73 100644 --- a/temporal-sdk/src/test/java/io/temporal/workflow/activityTests/EagerActivityDispatchingTest.java +++ b/temporal-sdk/src/test/java/io/temporal/workflow/activityTests/EagerActivityDispatchingTest.java @@ -30,12 +30,17 @@ import io.temporal.client.WorkflowStub; import io.temporal.common.WorkflowExecutionHistory; import io.temporal.internal.Config; +import io.temporal.testUtils.CountingSlotSupplier; import io.temporal.testing.TestWorkflowEnvironment; import io.temporal.testing.internal.ExternalServiceTestConfigurator; import io.temporal.testing.internal.SDKTestWorkflowRule; import io.temporal.worker.Worker; import io.temporal.worker.WorkerFactory; import io.temporal.worker.WorkerOptions; +import io.temporal.worker.tuning.ActivitySlotInfo; +import io.temporal.worker.tuning.CompositeTuner; +import io.temporal.worker.tuning.LocalActivitySlotInfo; +import io.temporal.worker.tuning.WorkflowSlotInfo; import io.temporal.workflow.*; import io.temporal.workflow.shared.TestActivities; import io.temporal.workflow.shared.TestActivities.TestActivitiesImpl; @@ -52,6 +57,10 @@ public class EagerActivityDispatchingTest { private ArrayList workerFactories; private final TestActivitiesImpl activitiesImpl = new TestActivitiesImpl(); + CountingSlotSupplier workflowTaskSlotSupplier = new CountingSlotSupplier<>(100); + CountingSlotSupplier activityTaskSlotSupplier = new CountingSlotSupplier<>(100); + CountingSlotSupplier localActivitySlotSupplier = + new CountingSlotSupplier<>(100); @Before public void setUp() throws Exception { @@ -69,10 +78,17 @@ public void tearDown() throws Exception { this.workerFactories = null; env.close(); + assertEquals( + workflowTaskSlotSupplier.reservedCount.get(), workflowTaskSlotSupplier.releasedCount.get()); + assertEquals( + activityTaskSlotSupplier.reservedCount.get(), activityTaskSlotSupplier.releasedCount.get()); + assertEquals( + localActivitySlotSupplier.reservedCount.get(), + localActivitySlotSupplier.releasedCount.get()); } private void setupWorker( - String workerIdentity, WorkerOptions workerOptions, boolean registerWorkflows) { + String workerIdentity, WorkerOptions.Builder workerOptions, boolean registerWorkflows) { WorkflowClient workflowClient = WorkflowClient.newInstance( env.getWorkflowServiceStubs(), @@ -80,7 +96,10 @@ private void setupWorker( WorkerFactory workerFactory = WorkerFactory.newInstance(workflowClient); workerFactories.add(workerFactory); - Worker worker = workerFactory.newWorker(TASK_QUEUE, workerOptions); + workerOptions.setWorkerTuner( + new CompositeTuner( + workflowTaskSlotSupplier, activityTaskSlotSupplier, localActivitySlotSupplier)); + Worker worker = workerFactory.newWorker(TASK_QUEUE, workerOptions.build()); worker.registerActivitiesImplementations(activitiesImpl); if (registerWorkflows) worker.registerWorkflowImplementationTypes(EagerActivityTestWorkflowImpl.class); @@ -99,13 +118,10 @@ public void testEagerActivities() { WorkerOptions.newBuilder() .setMaxConcurrentWorkflowTaskPollers(2) .setMaxConcurrentActivityTaskPollers(1) - .setDisableEagerExecution(false) - .build(), + .setDisableEagerExecution(false), true); setupWorker( - "worker2", - WorkerOptions.newBuilder().setMaxConcurrentActivityTaskPollers(2).build(), - false); + "worker2", WorkerOptions.newBuilder().setMaxConcurrentActivityTaskPollers(2), false); EagerActivityTestWorkflow workflowStub = env.getWorkflowClient() @@ -139,13 +155,10 @@ public void testNoEagerActivitiesIfDisabledOnWorker() { WorkerOptions.newBuilder() .setMaxConcurrentWorkflowTaskPollers(2) .setMaxConcurrentActivityTaskPollers(1) - .setDisableEagerExecution(true) - .build(), + .setDisableEagerExecution(true), true); setupWorker( - "worker2", - WorkerOptions.newBuilder().setMaxConcurrentActivityTaskPollers(2).build(), - false); + "worker2", WorkerOptions.newBuilder().setMaxConcurrentActivityTaskPollers(2), false); EagerActivityTestWorkflow workflowStub = env.getWorkflowClient() @@ -179,13 +192,10 @@ public void testNoEagerActivitiesIfDisabledOnActivity() { WorkerOptions.newBuilder() .setMaxConcurrentWorkflowTaskPollers(2) .setMaxConcurrentActivityTaskPollers(1) - .setDisableEagerExecution(false) - .build(), + .setDisableEagerExecution(false), true); setupWorker( - "worker2", - WorkerOptions.newBuilder().setMaxConcurrentActivityTaskPollers(2).build(), - false); + "worker2", WorkerOptions.newBuilder().setMaxConcurrentActivityTaskPollers(2), false); EagerActivityTestWorkflow workflowStub = env.getWorkflowClient() diff --git a/temporal-testing/src/main/java/io/temporal/testing/TestActivityEnvironmentInternal.java b/temporal-testing/src/main/java/io/temporal/testing/TestActivityEnvironmentInternal.java index 1760edf5c1..d93c5dd671 100644 --- a/temporal-testing/src/main/java/io/temporal/testing/TestActivityEnvironmentInternal.java +++ b/temporal-testing/src/main/java/io/temporal/testing/TestActivityEnvironmentInternal.java @@ -354,11 +354,12 @@ public LocalActivityOutput executeLocalActivity(LocalActivityInput i) */ private Result executeActivity( PollActivityTaskQueueResponse activityTask, boolean localActivity) { + //noinspection DataFlowIssue -- no permit for the LA in this test Future activityFuture = activityWorkerExecutor.submit( () -> activityTaskHandler.handle( - new ActivityTask(activityTask, () -> {}), + new ActivityTask(activityTask, null, () -> {}), testEnvironmentOptions.getMetricsScope(), localActivity)); From eabd51fbdc62a7435c8f4d8dc1ac24da26f6022d Mon Sep 17 00:00:00 2001 From: Dustin Dobervich Date: Wed, 24 Jul 2024 23:48:27 +0200 Subject: [PATCH 03/25] Ensure identity copied to Builder from source WorkerOptions (#2151) --- .../io/temporal/worker/WorkerOptions.java | 1 + .../io/temporal/worker/WorkerOptionsTest.java | 58 +++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/temporal-sdk/src/main/java/io/temporal/worker/WorkerOptions.java b/temporal-sdk/src/main/java/io/temporal/worker/WorkerOptions.java index d9fce2e5ec..12306d2c69 100644 --- a/temporal-sdk/src/main/java/io/temporal/worker/WorkerOptions.java +++ b/temporal-sdk/src/main/java/io/temporal/worker/WorkerOptions.java @@ -109,6 +109,7 @@ private Builder(WorkerOptions o) { this.useBuildIdForVersioning = o.useBuildIdForVersioning; this.buildId = o.buildId; this.stickyTaskQueueDrainTimeout = o.stickyTaskQueueDrainTimeout; + this.identity = o.identity; } /** diff --git a/temporal-sdk/src/test/java/io/temporal/worker/WorkerOptionsTest.java b/temporal-sdk/src/test/java/io/temporal/worker/WorkerOptionsTest.java index 7b051de07b..8bc91d2741 100644 --- a/temporal-sdk/src/test/java/io/temporal/worker/WorkerOptionsTest.java +++ b/temporal-sdk/src/test/java/io/temporal/worker/WorkerOptionsTest.java @@ -21,9 +21,12 @@ package io.temporal.worker; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertThrows; +import static org.mockito.Mockito.mock; import io.temporal.worker.tuning.*; +import java.time.Duration; import org.junit.Test; public class WorkerOptionsTest { @@ -50,6 +53,61 @@ public void verifyWorkerOptionsEquality() { assertEquals(w1, w2); } + @Test + public void verifyNewBuilderFromExistingWorkerOptions() { + WorkerOptions w1 = + WorkerOptions.newBuilder() + .setMaxWorkerActivitiesPerSecond(100) + .setMaxConcurrentActivityExecutionSize(1000) + .setMaxConcurrentWorkflowTaskExecutionSize(500) + .setMaxConcurrentLocalActivityExecutionSize(200) + .setWorkerTuner(mock(WorkerTuner.class)) + .setMaxTaskQueueActivitiesPerSecond(50) + .setMaxConcurrentWorkflowTaskPollers(4) + .setMaxConcurrentActivityTaskPollers(3) + .setLocalActivityWorkerOnly(false) + .setDefaultDeadlockDetectionTimeout(2) + .setMaxHeartbeatThrottleInterval(Duration.ofSeconds(10)) + .setDefaultHeartbeatThrottleInterval(Duration.ofSeconds(7)) + .setStickyQueueScheduleToStartTimeout(Duration.ofSeconds(60)) + .setDisableEagerExecution(false) + .setUseBuildIdForVersioning(false) + .setBuildId("build-id") + .setStickyTaskQueueDrainTimeout(Duration.ofSeconds(15)) + .setIdentity("worker-identity") + .build(); + + WorkerOptions w2 = WorkerOptions.newBuilder(w1).build(); + + assertEquals(w1.getMaxWorkerActivitiesPerSecond(), w2.getMaxWorkerActivitiesPerSecond(), 0); + assertEquals( + w1.getMaxConcurrentActivityExecutionSize(), w2.getMaxConcurrentActivityExecutionSize()); + assertEquals( + w1.getMaxConcurrentWorkflowTaskExecutionSize(), + w2.getMaxConcurrentWorkflowTaskExecutionSize()); + assertEquals( + w1.getMaxConcurrentLocalActivityExecutionSize(), + w2.getMaxConcurrentLocalActivityExecutionSize()); + assertSame(w1.getWorkerTuner(), w2.getWorkerTuner()); + assertEquals( + w1.getMaxTaskQueueActivitiesPerSecond(), w2.getMaxTaskQueueActivitiesPerSecond(), 0); + assertEquals( + w1.getMaxConcurrentWorkflowTaskPollers(), w2.getMaxConcurrentWorkflowTaskPollers()); + assertEquals( + w1.getMaxConcurrentActivityTaskPollers(), w2.getMaxConcurrentActivityTaskPollers()); + assertEquals(w1.isLocalActivityWorkerOnly(), w2.isLocalActivityWorkerOnly()); + assertEquals(w1.getMaxHeartbeatThrottleInterval(), w2.getMaxHeartbeatThrottleInterval()); + assertEquals( + w1.getDefaultHeartbeatThrottleInterval(), w2.getDefaultHeartbeatThrottleInterval()); + assertEquals( + w1.getStickyQueueScheduleToStartTimeout(), w2.getStickyQueueScheduleToStartTimeout()); + assertEquals(w1.isEagerExecutionDisabled(), w2.isEagerExecutionDisabled()); + assertEquals(w1.isUsingBuildIdForVersioning(), w2.isUsingBuildIdForVersioning()); + assertEquals(w1.getBuildId(), w2.getBuildId()); + assertEquals(w1.getStickyTaskQueueDrainTimeout(), w2.getStickyTaskQueueDrainTimeout()); + assertEquals(w1.getIdentity(), w2.getIdentity()); + } + @Test public void canBuildMixedSlotSupplierTuner() { ResourceBasedController resourceController = From 6b39e44738f62611e8ddcb62a7f61e5996d355f4 Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Tue, 30 Jul 2024 08:11:32 -0700 Subject: [PATCH 04/25] Align Update API across test server and real server (#2153) Align test server update APIs with real server update APIs --- .../statemachines/WorkflowStateMachines.java | 5 + .../internal/testing/WorkflowTestingTest.java | 6 +- .../internal/testservice/StateMachines.java | 53 +- .../TestWorkflowMutableStateImpl.java | 316 +++++--- .../testservice/TestWorkflowService.java | 61 +- .../functional/WorkflowUpdateTest.java | 761 ++++++++++++++++++ .../functional/common/TestWorkflows.java | 26 +- 7 files changed, 1059 insertions(+), 169 deletions(-) create mode 100644 temporal-test-server/src/test/java/io/temporal/testserver/functional/WorkflowUpdateTest.java diff --git a/temporal-sdk/src/main/java/io/temporal/internal/statemachines/WorkflowStateMachines.java b/temporal-sdk/src/main/java/io/temporal/internal/statemachines/WorkflowStateMachines.java index 89e87ce340..067082f88a 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/statemachines/WorkflowStateMachines.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/statemachines/WorkflowStateMachines.java @@ -387,6 +387,8 @@ private void handleSingleEventLookahead(HistoryEvent event) { // other state machines because a rejected update produces no event in history. protocolStateMachines.entrySet().removeIf(entry -> entry.getValue().isFinalState()); break; + default: + break; } } @@ -625,6 +627,9 @@ public List takeMessages() { List result = new ArrayList<>(messageOutbox.size()); result.addAll(messageOutbox); messageOutbox.clear(); + // Remove any finished update protocol state machines. We can't remove them on an event like + // other state machines because a rejected update produces no event in history. + protocolStateMachines.entrySet().removeIf(entry -> entry.getValue().isFinalState()); return result; } diff --git a/temporal-sdk/src/test/java/io/temporal/internal/testing/WorkflowTestingTest.java b/temporal-sdk/src/test/java/io/temporal/internal/testing/WorkflowTestingTest.java index 3eac25eb24..35e952b154 100644 --- a/temporal-sdk/src/test/java/io/temporal/internal/testing/WorkflowTestingTest.java +++ b/temporal-sdk/src/test/java/io/temporal/internal/testing/WorkflowTestingTest.java @@ -60,11 +60,7 @@ import java.util.UUID; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; -import org.junit.After; -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Rule; -import org.junit.Test; +import org.junit.*; import org.junit.rules.TestWatcher; import org.junit.rules.Timeout; import org.junit.runner.Description; diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/StateMachines.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/StateMachines.java index e26641552b..c64059b255 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/StateMachines.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/StateMachines.java @@ -423,19 +423,19 @@ public String toString() { /** Represents an accepted update workflow execution request */ static final class UpdateWorkflowExecutionData { final String id; - final CompletableFuture acceptance; - final CompletableFuture complete; + final CompletableFuture accepted; + final CompletableFuture outcome; final Request initialRequest; public UpdateWorkflowExecutionData( String id, Request initialRequest, - CompletableFuture acceptance, - CompletableFuture complete) { + CompletableFuture accepted, + CompletableFuture outcome) { this.id = id; this.initialRequest = initialRequest; - this.acceptance = acceptance; - this.complete = complete; + this.accepted = accepted; + this.outcome = outcome; } @Override @@ -560,10 +560,10 @@ public static StateMachine newChildWorkflowStateMachine( public static StateMachine newUpdateWorkflowExecution( String updateId, Request initialRequest, - CompletableFuture acceptance, - CompletableFuture complete) { + CompletableFuture accepted, + CompletableFuture outcome) { return new StateMachine<>( - new UpdateWorkflowExecutionData(updateId, initialRequest, acceptance, complete)) + new UpdateWorkflowExecutionData(updateId, initialRequest, accepted, outcome)) .add(NONE, START, STARTED, StateMachines::acceptUpdate) .add(STARTED, COMPLETE, COMPLETED, StateMachines::completeUpdate); } @@ -1805,19 +1805,10 @@ private static void acceptUpdate( if (!ctx.getWorkflowMutableState().isTerminalState()) { ctx.addEvent(event); } - - UpdateWorkflowExecutionResponse response = - UpdateWorkflowExecutionResponse.newBuilder() - .setUpdateRef( - UpdateRef.newBuilder() - .setWorkflowExecution(ctx.getExecution()) - .setUpdateId(data.id)) - .setStage( - UpdateWorkflowExecutionLifecycleStage - .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED) - .build(); - - data.acceptance.complete(response); + ctx.onCommit( + (int historySize) -> { + data.accepted.complete(true); + }); } catch (InvalidProtocolBufferException e) { throw new RuntimeException(e); } @@ -1849,20 +1840,10 @@ private static void completeUpdate( if (!ctx.getWorkflowMutableState().isTerminalState()) { ctx.addEvent(event); } - - UpdateWorkflowExecutionResponse updateResponse = - UpdateWorkflowExecutionResponse.newBuilder() - .setUpdateRef( - UpdateRef.newBuilder() - .setWorkflowExecution(ctx.getExecution()) - .setUpdateId(data.id)) - .setOutcome(response.getOutcome()) - .setStage( - UpdateWorkflowExecutionLifecycleStage - .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED) - .build(); - - data.complete.complete(updateResponse); + ctx.onCommit( + (int historySize) -> { + data.outcome.complete(response.getOutcome()); + }); } catch (InvalidProtocolBufferException e) { throw new RuntimeException(e); } diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowMutableStateImpl.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowMutableStateImpl.java index 0595d61633..757e344257 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowMutableStateImpl.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowMutableStateImpl.java @@ -20,6 +20,7 @@ package io.temporal.internal.testservice; +import static io.temporal.api.enums.v1.UpdateWorkflowExecutionLifecycleStage.*; import static io.temporal.internal.testservice.CronUtils.getBackoffInterval; import static io.temporal.internal.testservice.StateMachines.DEFAULT_WORKFLOW_EXECUTION_TIMEOUT_MILLISECONDS; import static io.temporal.internal.testservice.StateMachines.DEFAULT_WORKFLOW_TASK_TIMEOUT_MILLISECONDS; @@ -101,12 +102,7 @@ import io.temporal.serviceclient.StatusUtils; import java.time.Duration; import java.util.*; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ForkJoinPool; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; +import java.util.concurrent.*; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.Lock; @@ -1602,7 +1598,7 @@ private void processAcceptanceMessage( update = StateMachines.newUpdateWorkflowExecution( - protocolInstanceId, u.getRequest().getRequest(), u.getAcceptance(), u.getCompletion()); + protocolInstanceId, u.getRequest().getRequest(), u.getAccepted(), u.getOutcome()); updates.put(protocolInstanceId, update); update.action(StateMachines.Action.START, ctx, msg, workflowTaskCompletedId); } @@ -1620,18 +1616,11 @@ private void processRejectionMessage( UpdateWorkflowExecution u = workflowTaskStateMachine.getData().updateRequest.get(msg.getProtocolInstanceId()); // If an update validation fail, do not write to history and do not store the update. - UpdateWorkflowExecutionResponse response = - UpdateWorkflowExecutionResponse.newBuilder() - .setUpdateRef( - UpdateRef.newBuilder() - .setUpdateId(rejection.getRejectedRequest().getMeta().getUpdateId()) - .setWorkflowExecution(ctx.getExecution())) - .setOutcome(Outcome.newBuilder().setFailure(rejection.getFailure()).build()) - .setStage( - UpdateWorkflowExecutionLifecycleStage - .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED) - .build(); - u.getAcceptance().complete(response); + ctx.onCommit( + (int historySize) -> { + u.getOutcome().complete(Outcome.newBuilder().setFailure(rejection.getFailure()).build()); + u.getAccepted().complete(false); + }); } private void processOutcomeMessage( @@ -2094,39 +2083,68 @@ public void signalFromWorkflow(SignalExternalWorkflowExecutionCommandAttributes @Override public UpdateWorkflowExecutionResponse updateWorkflowExecution( UpdateWorkflowExecutionRequest request, Deadline deadline) { + if (request + .getWaitPolicy() + .getLifecycleStage() + .equals(UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_UNSPECIFIED) + || !request.hasWaitPolicy()) { + throw Status.INVALID_ARGUMENT + .withDescription("LifeCycle stage is required") + .asRuntimeException(); + } + if (request + .getWaitPolicy() + .getLifecycleStage() + .equals(UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ADMITTED)) { + throw Status.PERMISSION_DENIED + .withDescription("Admitted stage is not supported") + .asRuntimeException(); + } + // If the workflow is in a terminal state, return the current state of the update if it + // completed + if (isTerminalState()) { + UpdateHandle updateHandle = getUpdate(request.getRequest().getMeta().getUpdateId()); + if (updateHandle.getOutcome().isDone()) { + return UpdateWorkflowExecutionResponse.newBuilder() + .setUpdateRef(updateHandle.getRef()) + .setStage(updateHandle.getStage()) + .setOutcome(updateHandle.getOutcomeNow()) + .build(); + } else { + throw Status.NOT_FOUND + .withDescription("workflow execution already completed") + .asRuntimeException(); + } + } + // Now that we have validated the request we can create the update handle and wait for it to + // reach the desired stage. + UpdateHandle updateHandle = getOrCreateUpdate(request); try { - UpdateHandle updateHandle = getOrCreateUpdate(request); - switch (request.getWaitPolicy().getLifecycleStage()) { - case UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED: - UpdateWorkflowExecutionResponse acceptResponse = - updateHandle - .getAcceptance() - .get( - deadline != null - ? deadline.timeRemaining(TimeUnit.MILLISECONDS) - : Long.MAX_VALUE, - TimeUnit.MILLISECONDS); - if (acceptResponse.getOutcome().hasFailure()) { - return acceptResponse; - } - return updateHandle - .getCompletion() - .get( - deadline != null ? deadline.timeRemaining(TimeUnit.MILLISECONDS) : Long.MAX_VALUE, - TimeUnit.MILLISECONDS); - case UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED: - return updateHandle - .getAcceptance() - .get( - deadline != null ? deadline.timeRemaining(TimeUnit.MILLISECONDS) : Long.MAX_VALUE, - TimeUnit.MILLISECONDS); - default: - throw Status.INTERNAL - .withDescription( - "TestServer does not support this wait policy: " - + request.getWaitPolicy().getLifecycleStage()) - .asRuntimeException(); + UpdateWorkflowExecutionLifecycleStage reachedStage = + updateHandle.waitForStage( + request.getWaitPolicy().getLifecycleStage(), + deadline.timeRemaining(TimeUnit.MILLISECONDS), + TimeUnit.MILLISECONDS); + UpdateWorkflowExecutionResponse.Builder response = + UpdateWorkflowExecutionResponse.newBuilder() + .setUpdateRef(updateHandle.getRef()) + .setStage(reachedStage); + if (reachedStage == UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED) { + response.setOutcome(updateHandle.getOutcomeNow()); + } + return response.build(); + } catch (TimeoutException e) { + UpdateWorkflowExecutionLifecycleStage stage = updateHandle.getStage(); + UpdateWorkflowExecutionResponse.Builder response = + UpdateWorkflowExecutionResponse.newBuilder() + .setUpdateRef(updateHandle.getRef()) + .setStage(stage); + if (stage + == UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED) { + response.setOutcome(updateHandle.getOutcomeNow()); } + return response.build(); } catch (InterruptedException e) { throw new RuntimeException(e); } catch (ExecutionException e) { @@ -2138,37 +2156,78 @@ public UpdateWorkflowExecutionResponse updateWorkflowExecution( .withCause(cause) .withDescription(cause.getMessage()) .asRuntimeException(); - } catch (TimeoutException e) { - throw Status.DEADLINE_EXCEEDED - .withCause(e) - .withDescription("update deadline exceeded") - .asRuntimeException(); } } @Override public PollWorkflowExecutionUpdateResponse pollUpdateWorkflowExecution( PollWorkflowExecutionUpdateRequest request, Deadline deadline) { + UpdateHandle updateHandle = getUpdate(request.getUpdateRef().getUpdateId()); try { - UpdateHandle updateHandle = getUpdate(request.getUpdateRef().getUpdateId()); - UpdateWorkflowExecutionResponse completionResponse = - updateHandle - .getCompletion() - .get( - deadline != null ? deadline.timeRemaining(TimeUnit.MILLISECONDS) : Long.MAX_VALUE, - TimeUnit.MILLISECONDS); - - return PollWorkflowExecutionUpdateResponse.newBuilder() - .setOutcome(completionResponse.getOutcome()) - .build(); + // If the workflow is in a terminal state, return the current state of the update if it + // completed + if (isTerminalState()) { + if (updateHandle.getOutcome().isDone()) { + return PollWorkflowExecutionUpdateResponse.newBuilder() + .setUpdateRef(updateHandle.getRef()) + .setStage(updateHandle.getStage()) + .setOutcome(updateHandle.getOutcomeNow()) + .build(); + } else { + throw Status.NOT_FOUND + .withDescription("workflow execution already completed") + .asRuntimeException(); + } + } + + // If no wait policy is specified or is ADMITTED, return the current state of the update + if (!request.hasWaitPolicy() + || request + .getWaitPolicy() + .getLifecycleStage() + .equals(UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_UNSPECIFIED) + || request + .getWaitPolicy() + .getLifecycleStage() + .equals(UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ADMITTED)) { + UpdateWorkflowExecutionLifecycleStage stage = updateHandle.getStage(); + PollWorkflowExecutionUpdateResponse.Builder response = + PollWorkflowExecutionUpdateResponse.newBuilder() + .setUpdateRef(updateHandle.getRef()) + .setStage(stage); + if (stage + == UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED) { + response.setOutcome(updateHandle.getOutcomeNow()); + } + return response.build(); + } + + // Wait for the update to reach the specified stage + UpdateWorkflowExecutionLifecycleStage reachedStage = + updateHandle.waitForStage( + request.getWaitPolicy().getLifecycleStage(), + deadline.timeRemaining(TimeUnit.MILLISECONDS), + TimeUnit.MILLISECONDS); + PollWorkflowExecutionUpdateResponse.Builder response = + PollWorkflowExecutionUpdateResponse.newBuilder() + .setUpdateRef(updateHandle.getRef()) + .setStage(reachedStage); + if (reachedStage == UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED) { + response.setOutcome(updateHandle.getOutcomeNow()); + } + return response.build(); } catch (TimeoutException e) { - PollWorkflowExecutionUpdateResponse resp = - PollWorkflowExecutionUpdateResponse.getDefaultInstance(); - return resp.toBuilder() - .setStage( - UpdateWorkflowExecutionLifecycleStage - .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ADMITTED) - .build(); + PollWorkflowExecutionUpdateResponse.Builder response = + PollWorkflowExecutionUpdateResponse.newBuilder() + .setUpdateRef(request.getUpdateRef()) + .setStage(updateHandle.getStage()); + if (updateHandle.getStage() + == UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED) { + response.setOutcome(updateHandle.getOutcomeNow()); + } + return response.build(); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (cause instanceof StatusRuntimeException) { @@ -2194,15 +2253,17 @@ UpdateHandle getOrCreateUpdate(UpdateWorkflowExecutionRequest updateRequest) { if (inflightUpdate.isPresent()) { return new UpdateHandle( inflightUpdate.get().getId(), - inflightUpdate.get().getAcceptance(), - inflightUpdate.get().getCompletion()); + getExecutionId().getExecution(), + inflightUpdate.get().getAccepted(), + inflightUpdate.get().getOutcome()); } StateMachine acceptedUpdate = updates.get(updateId); if (acceptedUpdate != null) { return new UpdateHandle( acceptedUpdate.getData().id, - acceptedUpdate.getData().acceptance, - acceptedUpdate.getData().complete); + getExecutionId().getExecution(), + acceptedUpdate.getData().accepted, + acceptedUpdate.getData().outcome); } UpdateWorkflowExecution update = new UpdateWorkflowExecution(updateRequest); @@ -2213,7 +2274,11 @@ UpdateHandle getOrCreateUpdate(UpdateWorkflowExecutionRequest updateRequest) { } workflowTaskStateMachine.action(Action.UPDATE_WORKFLOW_EXECUTION, ctx, update, 0); }); - return new UpdateHandle(update.getId(), update.getAcceptance(), update.getCompletion()); + return new UpdateHandle( + update.getId(), + getExecutionId().getExecution(), + update.getAccepted(), + update.getOutcome()); } finally { lock.unlock(); } @@ -2229,15 +2294,17 @@ UpdateHandle getUpdate(String updateId) { if (inflightUpdate.isPresent()) { return new UpdateHandle( inflightUpdate.get().getId(), - inflightUpdate.get().getAcceptance(), - inflightUpdate.get().getCompletion()); + getExecutionId().getExecution(), + inflightUpdate.get().getAccepted(), + inflightUpdate.get().getOutcome()); } StateMachine acceptedUpdate = updates.get(updateId); if (acceptedUpdate != null) { return new UpdateHandle( acceptedUpdate.getData().id, - acceptedUpdate.getData().acceptance, - acceptedUpdate.getData().complete); + getExecutionId().getExecution(), + acceptedUpdate.getData().accepted, + acceptedUpdate.getData().outcome); } throw Status.NOT_FOUND .withDescription("update " + updateId + " not found") @@ -2436,10 +2503,8 @@ public String toString() { static class UpdateWorkflowExecution { private final String id; private final UpdateWorkflowExecutionRequest request; - private final CompletableFuture acceptance = - new CompletableFuture<>(); - private final CompletableFuture completion = - new CompletableFuture<>(); + private final CompletableFuture accepted = new CompletableFuture<>(); + private final CompletableFuture outcome = new CompletableFuture<>(); private UpdateWorkflowExecution(UpdateWorkflowExecutionRequest request) { this.request = request; @@ -2451,12 +2516,12 @@ public UpdateWorkflowExecutionRequest getRequest() { return request; } - public CompletableFuture getAcceptance() { - return acceptance; + public CompletableFuture getAccepted() { + return accepted; } - public CompletableFuture getCompletion() { - return completion; + public CompletableFuture getOutcome() { + return outcome; } public String getId() { @@ -2471,39 +2536,82 @@ public String toString() { + '\'' + ", request=" + request - + ", acceptance=" - + acceptance - + ", completion=" - + completion + + ", accepted=" + + accepted + + ", outcome=" + + outcome + '}'; } } static class UpdateHandle { private final String id; - private final CompletableFuture acceptance; - private final CompletableFuture completion; + private final WorkflowExecution execution; + private final CompletableFuture accepted; + private final CompletableFuture outcome; private UpdateHandle( String id, - CompletableFuture acceptance, - CompletableFuture completion) { + WorkflowExecution execution, + CompletableFuture accepted, + CompletableFuture outcome) { this.id = id; - this.acceptance = acceptance; - this.completion = completion; + this.execution = execution; + this.accepted = accepted; + this.outcome = outcome; } - public CompletableFuture getAcceptance() { - return acceptance; + public Future getAccepted() { + return accepted; } - public CompletableFuture getCompletion() { - return completion; + public Future getOutcome() { + return outcome; + } + + public Outcome getOutcomeNow() { + try { + return outcome.get(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } catch (ExecutionException e) { + throw new RuntimeException(e); + } + } + + public UpdateWorkflowExecutionLifecycleStage waitForStage( + UpdateWorkflowExecutionLifecycleStage stage, long timeout, TimeUnit unit) + throws ExecutionException, InterruptedException, TimeoutException { + switch (stage) { + case UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ADMITTED: + break; + case UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED: + accepted.get(timeout, unit); + break; + case UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED: + outcome.get(timeout, unit); + break; + } + return getStage(); + } + + public UpdateWorkflowExecutionLifecycleStage getStage() { + if (!accepted.isDone()) { + return UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ADMITTED; + } else if (!outcome.isDone()) { + return UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED; + } + return UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED; } public String getId() { return id; } + + public UpdateRef getRef() { + return UpdateRef.newBuilder().setUpdateId(id).setWorkflowExecution(execution).build(); + } } private QueryWorkflowResponse stronglyConsistentQuery( diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowService.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowService.java index 81aaffb909..710c4f1e31 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowService.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowService.java @@ -802,17 +802,22 @@ public void signalWorkflowExecution( public void updateWorkflowExecution( UpdateWorkflowExecutionRequest request, StreamObserver responseObserver) { - try { - ExecutionId executionId = - new ExecutionId(request.getNamespace(), request.getWorkflowExecution()); - TestWorkflowMutableState mutableState = getMutableState(executionId); - @Nullable Deadline deadline = Context.current().getDeadline(); - UpdateWorkflowExecutionResponse response = - mutableState.updateWorkflowExecution(request, deadline); - responseObserver.onNext(response); - responseObserver.onCompleted(); - } catch (StatusRuntimeException e) { - handleStatusRuntimeException(e, responseObserver); + try (Context.CancellableContext ctx = deadlineCtx(getUpdatePollDeadline())) { + Context toRestore = ctx.attach(); + try { + ExecutionId executionId = + new ExecutionId(request.getNamespace(), request.getWorkflowExecution()); + TestWorkflowMutableState mutableState = getMutableState(executionId); + @Nullable Deadline deadline = Context.current().getDeadline(); + UpdateWorkflowExecutionResponse response = + mutableState.updateWorkflowExecution(request, deadline); + responseObserver.onNext(response); + responseObserver.onCompleted(); + } catch (StatusRuntimeException e) { + handleStatusRuntimeException(e, responseObserver); + } finally { + ctx.detach(toRestore); + } } } @@ -820,17 +825,22 @@ public void updateWorkflowExecution( public void pollWorkflowExecutionUpdate( PollWorkflowExecutionUpdateRequest request, StreamObserver responseObserver) { - try { - ExecutionId executionId = - new ExecutionId(request.getNamespace(), request.getUpdateRef().getWorkflowExecution()); - TestWorkflowMutableState mutableState = getMutableState(executionId); - @Nullable Deadline deadline = Context.current().getDeadline(); - PollWorkflowExecutionUpdateResponse response = - mutableState.pollUpdateWorkflowExecution(request, deadline); - responseObserver.onNext(response); - responseObserver.onCompleted(); - } catch (StatusRuntimeException e) { - handleStatusRuntimeException(e, responseObserver); + try (Context.CancellableContext ctx = deadlineCtx(getUpdatePollDeadline())) { + Context toRestore = ctx.attach(); + try { + ExecutionId executionId = + new ExecutionId(request.getNamespace(), request.getUpdateRef().getWorkflowExecution()); + TestWorkflowMutableState mutableState = getMutableState(executionId); + @Nullable Deadline deadline = Context.current().getDeadline(); + PollWorkflowExecutionUpdateResponse response = + mutableState.pollUpdateWorkflowExecution(request, deadline); + responseObserver.onNext(response); + responseObserver.onCompleted(); + } catch (StatusRuntimeException e) { + handleStatusRuntimeException(e, responseObserver); + } finally { + ctx.detach(toRestore); + } } } @@ -1250,6 +1260,13 @@ private Deadline getLongPollDeadline() { return deadline != null ? deadline.minimum(maximumDeadline) : maximumDeadline; } + private Deadline getUpdatePollDeadline() { + @Nullable Deadline deadline = Context.current().getDeadline(); + Deadline maximumDeadline = + Deadline.after(Duration.ofSeconds(10).toMillis(), TimeUnit.MILLISECONDS); + return deadline != null ? deadline.minimum(maximumDeadline) : maximumDeadline; + } + private void handleStatusRuntimeException( StatusRuntimeException e, StreamObserver responseObserver) { if (e.getStatus().getCode() == Status.Code.INTERNAL) { diff --git a/temporal-test-server/src/test/java/io/temporal/testserver/functional/WorkflowUpdateTest.java b/temporal-test-server/src/test/java/io/temporal/testserver/functional/WorkflowUpdateTest.java new file mode 100644 index 0000000000..4a49e9b981 --- /dev/null +++ b/temporal-test-server/src/test/java/io/temporal/testserver/functional/WorkflowUpdateTest.java @@ -0,0 +1,761 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.testserver.functional; + +import static org.junit.Assume.assumeFalse; + +import io.grpc.Status; +import io.grpc.StatusRuntimeException; +import io.temporal.api.common.v1.Payloads; +import io.temporal.api.common.v1.WorkflowExecution; +import io.temporal.api.enums.v1.UpdateWorkflowExecutionLifecycleStage; +import io.temporal.api.update.v1.*; +import io.temporal.api.workflowservice.v1.PollWorkflowExecutionUpdateRequest; +import io.temporal.api.workflowservice.v1.PollWorkflowExecutionUpdateResponse; +import io.temporal.api.workflowservice.v1.UpdateWorkflowExecutionRequest; +import io.temporal.api.workflowservice.v1.UpdateWorkflowExecutionResponse; +import io.temporal.client.*; +import io.temporal.common.converter.DefaultDataConverter; +import io.temporal.testing.internal.SDKTestWorkflowRule; +import io.temporal.testserver.functional.common.TestWorkflows; +import io.temporal.workflow.Workflow; +import java.time.Duration; +import java.util.concurrent.TimeUnit; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; + +public class WorkflowUpdateTest { + + @Rule + public SDKTestWorkflowRule testWorkflowRule = + SDKTestWorkflowRule.newBuilder().setWorkflowTypes(UpdateWorkflowImpl.class).build(); + + @Test + public void updateBadWorkflow() { + // Assert that we can't update a non-existent workflow. Expect a NOT_FOUND error. + WorkflowExecution badExec = WorkflowExecution.newBuilder().setWorkflowId("workflowId").build(); + StatusRuntimeException exception = + Assert.assertThrows( + StatusRuntimeException.class, + () -> + updateWorkflow( + badExec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + TestWorkflows.UpdateType.BLOCK)); + // Server does not return a consistent error message here, so we can't assert on the message + Assert.assertEquals(Status.NOT_FOUND.getCode(), exception.getStatus().getCode()); + } + + @Test + public void pollUpdateBadWorkflow() { + // Assert that we can't poll an update for a non-existent workflow. Expect a NOT_FOUND error. + WorkflowExecution badExec = WorkflowExecution.newBuilder().setWorkflowId("workflowId").build(); + StatusRuntimeException exception = + Assert.assertThrows( + StatusRuntimeException.class, + () -> + pollWorkflowUpdate( + badExec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED)); + Assert.assertEquals(Status.NOT_FOUND.getCode(), exception.getStatus().getCode()); + } + + @Test + public void pollUpdateBadUpdate() { + // Assert that we can't poll an update for a non-existent update ID. Expect a NOT_FOUND error. + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + + TestWorkflows.WorkflowWithUpdate workflowStub = + testWorkflowRule + .getWorkflowClient() + .newWorkflowStub(TestWorkflows.WorkflowWithUpdate.class, options); + WorkflowExecution exec = WorkflowClient.start(workflowStub::execute); + + StatusRuntimeException exception = + Assert.assertThrows( + StatusRuntimeException.class, + () -> + pollWorkflowUpdate( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED)); + Assert.assertEquals(Status.NOT_FOUND.getCode(), exception.getStatus().getCode()); + } + + @Test + public void updateAndPollCompletedWorkflow() { + // Assert that we can't update or poll a new update request for a completed workflow. Expect a + // NOT_FOUND + // error. + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + + TestWorkflows.WorkflowWithUpdate workflowStub = + testWorkflowRule + .getWorkflowClient() + .newWorkflowStub(TestWorkflows.WorkflowWithUpdate.class, options); + WorkflowExecution exec = WorkflowClient.start(workflowStub::execute); + workflowStub.signal(); + workflowStub.execute(); + + StatusRuntimeException exception = + Assert.assertThrows( + StatusRuntimeException.class, + () -> + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + TestWorkflows.UpdateType.BLOCK)); + Assert.assertEquals(Status.NOT_FOUND.getCode(), exception.getStatus().getCode()); + + exception = + Assert.assertThrows( + StatusRuntimeException.class, + () -> + pollWorkflowUpdate( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED)); + Assert.assertEquals(Status.NOT_FOUND.getCode(), exception.getStatus().getCode()); + } + + @Test + public void update() { + // Assert that we can update a workflow and poll the update. + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + + TestWorkflows.WorkflowWithUpdate workflowStub = + testWorkflowRule + .getWorkflowClient() + .newWorkflowStub(TestWorkflows.WorkflowWithUpdate.class, options); + WorkflowExecution exec = WorkflowClient.start(workflowStub::execute); + + UpdateWorkflowExecutionResponse updateResponse = + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + TestWorkflows.UpdateType.BLOCK); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + updateResponse.getStage()); + + PollWorkflowExecutionUpdateResponse pollUpdateResponse = + pollWorkflowUpdate( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + pollUpdateResponse.getStage()); + } + + @Test + public void updateCompleteWorkflow() { + // Assert that an update completed in the same WFT as the workflow is completed is reported. + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + + TestWorkflows.WorkflowWithUpdate workflowStub = + testWorkflowRule + .getWorkflowClient() + .newWorkflowStub(TestWorkflows.WorkflowWithUpdate.class, options); + WorkflowExecution exec = WorkflowClient.start(workflowStub::execute); + + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + TestWorkflows.UpdateType.FINISH_WORKFLOW); + + PollWorkflowExecutionUpdateResponse pollUpdateResponse = + pollWorkflowUpdate( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + pollUpdateResponse.getStage()); + } + + @Test + public void updateAdmittedNotSupported() { + // Assert that we can't send an update with wait stage ADMITTED. Expect a + // PERMISSION_DENIED error. + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + + TestWorkflows.WorkflowWithUpdate workflowStub = + testWorkflowRule + .getWorkflowClient() + .newWorkflowStub(TestWorkflows.WorkflowWithUpdate.class, options); + WorkflowExecution exec = WorkflowClient.start(workflowStub::execute); + + StatusRuntimeException exception = + Assert.assertThrows( + StatusRuntimeException.class, + () -> + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ADMITTED, + TestWorkflows.UpdateType.BLOCK)); + Assert.assertEquals(Status.PERMISSION_DENIED.getCode(), exception.getStatus().getCode()); + + UpdateWorkflowExecutionResponse updateResponse = + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + TestWorkflows.UpdateType.COMPLETE); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + updateResponse.getStage()); + Assert.assertEquals( + Outcome.newBuilder() + .setSuccess( + Payloads.newBuilder() + .addPayloads(DefaultDataConverter.newDefaultInstance().toPayload(null).get()) + .build()) + .build(), + updateResponse.getOutcome()); + + PollWorkflowExecutionUpdateResponse response = + pollWorkflowUpdate( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ADMITTED); + Assert.assertEquals(updateResponse.getOutcome(), response.getOutcome()); + Assert.assertEquals(updateResponse.getUpdateRef(), response.getUpdateRef()); + Assert.assertEquals(updateResponse.getStage(), response.getStage()); + } + + @Test + public void duplicateUpdate() { + // Assert that sending a duplicate update request returns the same response as the original + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + + TestWorkflows.WorkflowWithUpdate workflowStub = + testWorkflowRule + .getWorkflowClient() + .newWorkflowStub(TestWorkflows.WorkflowWithUpdate.class, options); + WorkflowExecution exec = WorkflowClient.start(workflowStub::execute); + + UpdateWorkflowExecutionResponse updateResponse1 = + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + TestWorkflows.UpdateType.COMPLETE); + + UpdateWorkflowExecutionResponse updateResponse2 = + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + TestWorkflows.UpdateType.BLOCK); + Assert.assertEquals(updateResponse1, updateResponse2); + } + + @Test + public void duplicateRejectedUpdate() { + // Assert that rejected updates are not stored and a new request can use the same update id. + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + + TestWorkflows.WorkflowWithUpdate workflowStub = + testWorkflowRule + .getWorkflowClient() + .newWorkflowStub(TestWorkflows.WorkflowWithUpdate.class, options); + WorkflowExecution exec = WorkflowClient.start(workflowStub::execute); + + UpdateWorkflowExecutionResponse updateResponse1 = + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + TestWorkflows.UpdateType.REJECT); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + updateResponse1.getStage()); + + UpdateWorkflowExecutionResponse updateResponse2 = + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + TestWorkflows.UpdateType.COMPLETE); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + updateResponse2.getStage()); + + workflowStub.signal(); + workflowStub.execute(); + Assert.assertEquals(updateResponse1.getUpdateRef(), updateResponse2.getUpdateRef()); + Assert.assertNotEquals(updateResponse1.getOutcome(), updateResponse2.getOutcome()); + } + + @Test + public void updateRejected() { + // Assert that we can't poll for a rejected update. Expect a NOT_FOUND error. + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + + TestWorkflows.WorkflowWithUpdate workflowStub = + testWorkflowRule + .getWorkflowClient() + .newWorkflowStub(TestWorkflows.WorkflowWithUpdate.class, options); + WorkflowExecution exec = WorkflowClient.start(workflowStub::execute); + + UpdateWorkflowExecutionResponse updateResponse = + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + TestWorkflows.UpdateType.REJECT); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + updateResponse.getStage()); + + StatusRuntimeException exception = + Assert.assertThrows( + StatusRuntimeException.class, + () -> + pollWorkflowUpdate( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED)); + Assert.assertEquals(Status.NOT_FOUND.getCode(), exception.getStatus().getCode()); + } + + @Test + public void updateWaitStage() { + // Assert that wait for stage returns when the update has reached at least the specified stage. + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + + TestWorkflows.WorkflowWithUpdate workflowStub = + testWorkflowRule + .getWorkflowClient() + .newWorkflowStub(TestWorkflows.WorkflowWithUpdate.class, options); + WorkflowExecution exec = WorkflowClient.start(workflowStub::execute); + + UpdateWorkflowExecutionResponse updateResponse = + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + TestWorkflows.UpdateType.COMPLETE); + // Current behaviour is not defined, server may return ACCEPTED or COMPLETED + Assert.assertTrue( + updateResponse + .getStage() + .equals( + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED) + || updateResponse + .getStage() + .equals( + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED)); + + PollWorkflowExecutionUpdateResponse pollUpdateResponse = + pollWorkflowUpdate( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + pollUpdateResponse.getStage()); + } + + @Test(timeout = 120000) + public void updateNotAcceptedTimeout() { + // Assert that if an update cannot be accepted it will be considered admitted. + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + + TestWorkflows.WorkflowWithUpdate workflowStub = + testWorkflowRule + .getWorkflowClient() + .newWorkflowStub(TestWorkflows.WorkflowWithUpdate.class, options); + WorkflowExecution exec = WorkflowClient.start(workflowStub::execute); + testWorkflowRule.getTestEnvironment().shutdownNow(); + testWorkflowRule.getTestEnvironment().awaitTermination(5, TimeUnit.SECONDS); + + UpdateWorkflowExecutionResponse updateResponse = + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + TestWorkflows.UpdateType.COMPLETE); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ADMITTED, + updateResponse.getStage()); + + updateResponse = + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + TestWorkflows.UpdateType.COMPLETE); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ADMITTED, + updateResponse.getStage()); + + PollWorkflowExecutionUpdateResponse pollUpdateResponse = + pollWorkflowUpdate( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ADMITTED, + pollUpdateResponse.getStage()); + + pollUpdateResponse = + pollWorkflowUpdate( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ADMITTED, + pollUpdateResponse.getStage()); + + pollUpdateResponse = pollWorkflowUpdate(exec, "updateId", null); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ADMITTED, + pollUpdateResponse.getStage()); + } + + @Test(timeout = 60000) + public void updateWaitCompletedTimeout() { + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + + TestWorkflows.WorkflowWithUpdate workflowStub = + testWorkflowRule + .getWorkflowClient() + .newWorkflowStub(TestWorkflows.WorkflowWithUpdate.class, options); + WorkflowExecution exec = WorkflowClient.start(workflowStub::execute); + + UpdateWorkflowExecutionResponse updateResponse = + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + TestWorkflows.UpdateType.BLOCK); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + updateResponse.getStage()); + + PollWorkflowExecutionUpdateResponse pollUpdateResponse = + pollWorkflowUpdate( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + pollUpdateResponse.getStage()); + + pollUpdateResponse = pollWorkflowUpdate(exec, "updateId", null); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + pollUpdateResponse.getStage()); + } + + @Test + public void updateAndPollByWorkflowId() { + // Assert that we can update and poll a workflow by its workflowId without specifying the runId + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + + TestWorkflows.WorkflowWithUpdate workflowStub = + testWorkflowRule + .getWorkflowClient() + .newWorkflowStub(TestWorkflows.WorkflowWithUpdate.class, options); + WorkflowExecution exec = WorkflowClient.start(workflowStub::execute); + + WorkflowExecution workflowExecution = + WorkflowExecution.newBuilder().setWorkflowId(exec.getWorkflowId()).build(); + UpdateWorkflowExecutionResponse updateResponse = + updateWorkflow( + workflowExecution, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + TestWorkflows.UpdateType.COMPLETE); + Assert.assertEquals(updateResponse.getUpdateRef().getWorkflowExecution(), exec); + + PollWorkflowExecutionUpdateResponse pollUpdateResponse = + pollWorkflowUpdate( + workflowExecution, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED); + assertPollResponseEqualsUpdateResponse(pollUpdateResponse, updateResponse); + } + + @Test + public void getCompletedUpdateOfCompletedWorkflow() { + // Assert that we can get and poll a completed update from a completed workflow. + assumeFalse("Skipping as real server has a bug", SDKTestWorkflowRule.useExternalService); + + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + + TestWorkflows.WorkflowWithUpdate workflowStub = + testWorkflowRule + .getWorkflowClient() + .newWorkflowStub(TestWorkflows.WorkflowWithUpdate.class, options); + WorkflowExecution exec = WorkflowClient.start(workflowStub::execute); + + UpdateWorkflowExecutionResponse updateResponse1 = + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + TestWorkflows.UpdateType.COMPLETE); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + updateResponse1.getStage()); + + workflowStub.signal(); + workflowStub.execute(); + + UpdateWorkflowExecutionResponse updateResponse2 = + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + TestWorkflows.UpdateType.BLOCK); + Assert.assertEquals(updateResponse1, updateResponse2); + + PollWorkflowExecutionUpdateResponse pollUpdateResponse = + pollWorkflowUpdate( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED); + assertPollResponseEqualsUpdateResponse(pollUpdateResponse, updateResponse1); + } + + @Test + public void getIncompleteUpdateOfCompletedWorkflow() { + // Assert that we can't get an incomplete update of a completed workflow. Expect a NOT_FOUND + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + + TestWorkflows.WorkflowWithUpdate workflowStub = + testWorkflowRule + .getWorkflowClient() + .newWorkflowStub(TestWorkflows.WorkflowWithUpdate.class, options); + WorkflowExecution exec = WorkflowClient.start(workflowStub::execute); + + UpdateWorkflowExecutionResponse response = + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + TestWorkflows.UpdateType.BLOCK); + Assert.assertEquals( + UpdateWorkflowExecutionLifecycleStage.UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + response.getStage()); + + workflowStub.signal(); + workflowStub.execute(); + + StatusRuntimeException exception = + Assert.assertThrows( + StatusRuntimeException.class, + () -> + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED, + TestWorkflows.UpdateType.BLOCK)); + Assert.assertEquals(Status.NOT_FOUND.getCode(), exception.getStatus().getCode()); + exception = + Assert.assertThrows( + StatusRuntimeException.class, + () -> + updateWorkflow( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED, + TestWorkflows.UpdateType.BLOCK)); + Assert.assertEquals(Status.NOT_FOUND.getCode(), exception.getStatus().getCode()); + exception = + Assert.assertThrows( + StatusRuntimeException.class, + () -> + pollWorkflowUpdate( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED)); + Assert.assertEquals(Status.NOT_FOUND.getCode(), exception.getStatus().getCode()); + exception = + Assert.assertThrows( + StatusRuntimeException.class, + () -> + pollWorkflowUpdate( + exec, + "updateId", + UpdateWorkflowExecutionLifecycleStage + .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_COMPLETED)); + Assert.assertEquals(Status.NOT_FOUND.getCode(), exception.getStatus().getCode()); + } + + private UpdateWorkflowExecutionResponse updateWorkflow( + WorkflowExecution execution, + String updateId, + UpdateWorkflowExecutionLifecycleStage stage, + TestWorkflows.UpdateType type) { + UpdateWorkflowExecutionResponse response = + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .updateWorkflowExecution( + UpdateWorkflowExecutionRequest.newBuilder() + .setNamespace(testWorkflowRule.getWorkflowClient().getOptions().getNamespace()) + .setWorkflowExecution(execution) + .setRequest( + Request.newBuilder() + .setInput( + Input.newBuilder() + .setName("update") + .setArgs( + DefaultDataConverter.newDefaultInstance() + .toPayloads(type) + .get()) + .build()) + .setMeta(Meta.newBuilder().setUpdateId(updateId).build())) + .setWaitPolicy(WaitPolicy.newBuilder().setLifecycleStage(stage).build()) + .build()); + + // There are some assertions that we can always make... + Assert.assertEquals(updateId, response.getUpdateRef().getUpdateId()); + Assert.assertEquals( + execution.getWorkflowId(), response.getUpdateRef().getWorkflowExecution().getWorkflowId()); + return response; + } + + private PollWorkflowExecutionUpdateResponse pollWorkflowUpdate( + WorkflowExecution execution, String updateId, UpdateWorkflowExecutionLifecycleStage stage) { + WaitPolicy.Builder waitPolicy = WaitPolicy.newBuilder(); + if (stage != null) { + waitPolicy.setLifecycleStage(stage); + } + return testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .pollWorkflowExecutionUpdate( + PollWorkflowExecutionUpdateRequest.newBuilder() + .setNamespace(testWorkflowRule.getWorkflowClient().getOptions().getNamespace()) + .setWaitPolicy(waitPolicy.build()) + .setUpdateRef( + UpdateRef.newBuilder() + .setUpdateId(updateId) + .setWorkflowExecution(execution) + .build()) + .build()); + } + + private void assertPollResponseEqualsUpdateResponse( + PollWorkflowExecutionUpdateResponse pollResponse, + UpdateWorkflowExecutionResponse updateResponse) { + Assert.assertEquals(pollResponse.getStage(), updateResponse.getStage()); + Assert.assertEquals(pollResponse.getOutcome(), updateResponse.getOutcome()); + Assert.assertEquals(pollResponse.getUpdateRef(), updateResponse.getUpdateRef()); + } + + public static class UpdateWorkflowImpl implements TestWorkflows.WorkflowWithUpdate { + boolean unblock = false; + + @Override + public void execute() { + // wait forever to keep it in running state + Workflow.await(() -> unblock); + } + + @Override + public void update(TestWorkflows.UpdateType type) { + if (type == TestWorkflows.UpdateType.DELAYED_COMPLETE) { + Workflow.sleep(Duration.ofSeconds(1)); + } else if (type == TestWorkflows.UpdateType.BLOCK) { + Workflow.await(() -> false); + } else if (type == TestWorkflows.UpdateType.FINISH_WORKFLOW) { + unblock = true; + } + } + + @Override + public void updateValidator(TestWorkflows.UpdateType type) { + if (type == TestWorkflows.UpdateType.REJECT) { + throw new IllegalArgumentException("REJECT"); + } + } + + @Override + public void signal() { + unblock = true; + } + } +} diff --git a/temporal-test-server/src/test/java/io/temporal/testserver/functional/common/TestWorkflows.java b/temporal-test-server/src/test/java/io/temporal/testserver/functional/common/TestWorkflows.java index b67154d656..9805492121 100644 --- a/temporal-test-server/src/test/java/io/temporal/testserver/functional/common/TestWorkflows.java +++ b/temporal-test-server/src/test/java/io/temporal/testserver/functional/common/TestWorkflows.java @@ -20,8 +20,7 @@ package io.temporal.testserver.functional.common; -import io.temporal.workflow.WorkflowInterface; -import io.temporal.workflow.WorkflowMethod; +import io.temporal.workflow.*; public class TestWorkflows { @WorkflowInterface @@ -47,4 +46,27 @@ public interface PrimitiveChildWorkflow { @WorkflowMethod void execute(); } + + @WorkflowInterface + public interface WorkflowWithUpdate { + @WorkflowMethod + void execute(); + + @UpdateMethod + void update(UpdateType type); + + @UpdateValidatorMethod(updateName = "update") + void updateValidator(UpdateType type); + + @SignalMethod + void signal(); + } + + public enum UpdateType { + REJECT, + COMPLETE, + DELAYED_COMPLETE, + BLOCK, + FINISH_WORKFLOW, + } } From bbf2de7a7e1f72a8f1d4a6008006813dd7e56f85 Mon Sep 17 00:00:00 2001 From: Spencer Judge Date: Tue, 30 Jul 2024 09:53:48 -0700 Subject: [PATCH 05/25] Move workflow update polling inside of interceptor (#2159) --- .github/workflows/features.yml | 3 +- ...TracingWorkflowClientCallsInterceptor.java | 3 +- .../io/temporal/client/WorkflowStubImpl.java | 51 ++++++------------ .../WorkflowClientCallsInterceptor.java | 49 ++++------------- .../WorkflowClientCallsInterceptorBase.java | 3 +- .../client/CompletedUpdateHandleImpl.java | 7 +-- .../client/LazyUpdateHandleImpl.java | 9 ++-- .../client/RootWorkflowClientInvoker.java | 28 +++++++--- .../workflow/updateTest/UpdateTest.java | 53 +++++++++++++++++++ 9 files changed, 115 insertions(+), 91 deletions(-) rename temporal-sdk/src/main/java/io/temporal/{ => internal}/client/CompletedUpdateHandleImpl.java (86%) rename temporal-sdk/src/main/java/io/temporal/{ => internal}/client/LazyUpdateHandleImpl.java (94%) diff --git a/.github/workflows/features.yml b/.github/workflows/features.yml index 41891b1b6d..9c0a1b8e7a 100644 --- a/.github/workflows/features.yml +++ b/.github/workflows/features.yml @@ -7,4 +7,5 @@ jobs: with: java-repo-path: ${{github.event.pull_request.head.repo.full_name}} version: ${{github.event.pull_request.head.ref}} - version-is-repo-ref: true \ No newline at end of file + version-is-repo-ref: true + features-repo-ref: java-update-iceptor-change \ No newline at end of file diff --git a/temporal-opentracing/src/main/java/io/temporal/opentracing/internal/OpenTracingWorkflowClientCallsInterceptor.java b/temporal-opentracing/src/main/java/io/temporal/opentracing/internal/OpenTracingWorkflowClientCallsInterceptor.java index 3ddd42d297..fecd48d32b 100644 --- a/temporal-opentracing/src/main/java/io/temporal/opentracing/internal/OpenTracingWorkflowClientCallsInterceptor.java +++ b/temporal-opentracing/src/main/java/io/temporal/opentracing/internal/OpenTracingWorkflowClientCallsInterceptor.java @@ -23,6 +23,7 @@ import io.opentracing.Scope; import io.opentracing.Span; import io.opentracing.Tracer; +import io.temporal.client.UpdateHandle; import io.temporal.common.interceptors.WorkflowClientCallsInterceptor; import io.temporal.common.interceptors.WorkflowClientCallsInterceptorBase; import io.temporal.opentracing.OpenTracingOptions; @@ -119,7 +120,7 @@ public QueryOutput query(QueryInput input) { } @Override - public StartUpdateOutput startUpdate(StartUpdateInput input) { + public UpdateHandle startUpdate(StartUpdateInput input) { Span workflowStartUpdateSpan = contextAccessor.writeSpanContextToHeader( () -> diff --git a/temporal-sdk/src/main/java/io/temporal/client/WorkflowStubImpl.java b/temporal-sdk/src/main/java/io/temporal/client/WorkflowStubImpl.java index 0232834772..51fe9cc373 100644 --- a/temporal-sdk/src/main/java/io/temporal/client/WorkflowStubImpl.java +++ b/temporal-sdk/src/main/java/io/temporal/client/WorkflowStubImpl.java @@ -31,6 +31,7 @@ import io.temporal.common.interceptors.Header; import io.temporal.common.interceptors.WorkflowClientCallsInterceptor; import io.temporal.failure.CanceledFailure; +import io.temporal.internal.client.LazyUpdateHandleImpl; import io.temporal.serviceclient.CheckedExceptionWrapper; import io.temporal.serviceclient.StatusUtils; import java.lang.reflect.Type; @@ -331,42 +332,20 @@ public UpdateHandle startUpdate(UpdateOptions options, Object... args) options.validate(); WorkflowExecution targetExecution = execution.get(); try { - WorkflowClientCallsInterceptor.StartUpdateOutput result = - workflowClientInvoker.startUpdate( - new WorkflowClientCallsInterceptor.StartUpdateInput<>( - targetExecution, - options.getUpdateName(), - Header.empty(), - options.getUpdateId(), - args, - options.getResultClass(), - options.getResultType(), - options.getFirstExecutionRunId(), - WaitPolicy.newBuilder() - .setLifecycleStage(options.getWaitForStage().getProto()) - .build())); - - if (result.hasResult()) { - return new CompletedUpdateHandleImpl<>( - result.getReference().getUpdateId(), - result.getReference().getWorkflowExecution(), - result.getResult()); - } else { - LazyUpdateHandleImpl handle = - new LazyUpdateHandleImpl<>( - workflowClientInvoker, - workflowType.orElse(null), - options.getUpdateName(), - result.getReference().getUpdateId(), - result.getReference().getWorkflowExecution(), - options.getResultClass(), - options.getResultType()); - if (options.getWaitForStage() == WorkflowUpdateStage.COMPLETED) { - // Don't return the handle until completed, since that's what's been asked for - handle.waitCompleted(); - } - return handle; - } + return workflowClientInvoker.startUpdate( + new WorkflowClientCallsInterceptor.StartUpdateInput<>( + targetExecution, + workflowType, + options.getUpdateName(), + Header.empty(), + options.getUpdateId(), + args, + options.getResultClass(), + options.getResultType(), + options.getFirstExecutionRunId(), + WaitPolicy.newBuilder() + .setLifecycleStage(options.getWaitForStage().getProto()) + .build())); } catch (Exception e) { Throwable throwable = throwAsWorkflowFailureException(e, targetExecution); throw new WorkflowServiceException(targetExecution, workflowType.orElse(null), throwable); diff --git a/temporal-sdk/src/main/java/io/temporal/common/interceptors/WorkflowClientCallsInterceptor.java b/temporal-sdk/src/main/java/io/temporal/common/interceptors/WorkflowClientCallsInterceptor.java index 66cd5a6bc2..0d32e05ddf 100644 --- a/temporal-sdk/src/main/java/io/temporal/common/interceptors/WorkflowClientCallsInterceptor.java +++ b/temporal-sdk/src/main/java/io/temporal/common/interceptors/WorkflowClientCallsInterceptor.java @@ -22,8 +22,8 @@ import io.temporal.api.common.v1.WorkflowExecution; import io.temporal.api.enums.v1.WorkflowExecutionStatus; -import io.temporal.api.update.v1.UpdateRef; import io.temporal.api.update.v1.WaitPolicy; +import io.temporal.client.UpdateHandle; import io.temporal.client.WorkflowOptions; import io.temporal.common.Experimental; import java.lang.reflect.Type; @@ -78,7 +78,7 @@ public interface WorkflowClientCallsInterceptor { QueryOutput query(QueryInput input); @Experimental - StartUpdateOutput startUpdate(StartUpdateInput input); + UpdateHandle startUpdate(StartUpdateInput input); @Experimental PollWorkflowUpdateOutput pollWorkflowUpdate(PollWorkflowUpdateInput input); @@ -383,6 +383,7 @@ public WorkflowExecution getWorkflowExecution() { @Experimental final class StartUpdateInput { private final WorkflowExecution workflowExecution; + private final Optional workflowType; private final String updateName; private final Header header; private final Object[] arguments; @@ -394,6 +395,7 @@ final class StartUpdateInput { public StartUpdateInput( WorkflowExecution workflowExecution, + Optional workflowType, String updateName, Header header, String updateId, @@ -403,6 +405,7 @@ public StartUpdateInput( String firstExecutionRunId, WaitPolicy waitPolicy) { this.workflowExecution = workflowExecution; + this.workflowType = workflowType; this.header = header; this.updateId = updateId; this.updateName = updateName; @@ -417,6 +420,10 @@ public WorkflowExecution getWorkflowExecution() { return workflowExecution; } + public Optional getWorkflowType() { + return workflowType; + } + public String getUpdateName() { return updateName; } @@ -450,44 +457,6 @@ public WaitPolicy getWaitPolicy() { } } - @Experimental - final class UpdateOutput { - private final R result; - - public UpdateOutput(R result) { - this.result = result; - } - - public R getResult() { - return result; - } - } - - @Experimental - final class StartUpdateOutput { - private final UpdateRef reference; - private final R result; - private final boolean hasResult; - - public StartUpdateOutput(UpdateRef reference, boolean hasResult, R result) { - this.reference = reference; - this.result = result; - this.hasResult = hasResult; - } - - public UpdateRef getReference() { - return reference; - } - - public boolean hasResult() { - return hasResult; - } - - public R getResult() { - return result; - } - } - @Experimental final class PollWorkflowUpdateInput { private final WorkflowExecution workflowExecution; diff --git a/temporal-sdk/src/main/java/io/temporal/common/interceptors/WorkflowClientCallsInterceptorBase.java b/temporal-sdk/src/main/java/io/temporal/common/interceptors/WorkflowClientCallsInterceptorBase.java index 511b4f2518..26ce5ecafb 100644 --- a/temporal-sdk/src/main/java/io/temporal/common/interceptors/WorkflowClientCallsInterceptorBase.java +++ b/temporal-sdk/src/main/java/io/temporal/common/interceptors/WorkflowClientCallsInterceptorBase.java @@ -20,6 +20,7 @@ package io.temporal.common.interceptors; +import io.temporal.client.UpdateHandle; import java.util.concurrent.TimeoutException; /** Convenience base class for {@link WorkflowClientCallsInterceptor} implementations. */ @@ -62,7 +63,7 @@ public QueryOutput query(QueryInput input) { } @Override - public StartUpdateOutput startUpdate(StartUpdateInput input) { + public UpdateHandle startUpdate(StartUpdateInput input) { return next.startUpdate(input); } diff --git a/temporal-sdk/src/main/java/io/temporal/client/CompletedUpdateHandleImpl.java b/temporal-sdk/src/main/java/io/temporal/internal/client/CompletedUpdateHandleImpl.java similarity index 86% rename from temporal-sdk/src/main/java/io/temporal/client/CompletedUpdateHandleImpl.java rename to temporal-sdk/src/main/java/io/temporal/internal/client/CompletedUpdateHandleImpl.java index 40566be0a5..06fe6040b6 100644 --- a/temporal-sdk/src/main/java/io/temporal/client/CompletedUpdateHandleImpl.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/client/CompletedUpdateHandleImpl.java @@ -18,21 +18,22 @@ * limitations under the License. */ -package io.temporal.client; +package io.temporal.internal.client; import io.temporal.api.common.v1.WorkflowExecution; +import io.temporal.client.UpdateHandle; import io.temporal.common.Experimental; import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; @Experimental -final class CompletedUpdateHandleImpl implements UpdateHandle { +public final class CompletedUpdateHandleImpl implements UpdateHandle { private final String id; private final WorkflowExecution execution; private final T result; - CompletedUpdateHandleImpl(String id, WorkflowExecution execution, T result) { + public CompletedUpdateHandleImpl(String id, WorkflowExecution execution, T result) { this.id = id; this.execution = execution; this.result = result; diff --git a/temporal-sdk/src/main/java/io/temporal/client/LazyUpdateHandleImpl.java b/temporal-sdk/src/main/java/io/temporal/internal/client/LazyUpdateHandleImpl.java similarity index 94% rename from temporal-sdk/src/main/java/io/temporal/client/LazyUpdateHandleImpl.java rename to temporal-sdk/src/main/java/io/temporal/internal/client/LazyUpdateHandleImpl.java index 024774d13e..e48c49808d 100644 --- a/temporal-sdk/src/main/java/io/temporal/client/LazyUpdateHandleImpl.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/client/LazyUpdateHandleImpl.java @@ -18,11 +18,14 @@ * limitations under the License. */ -package io.temporal.client; +package io.temporal.internal.client; import io.grpc.Status; import io.grpc.StatusRuntimeException; import io.temporal.api.common.v1.WorkflowExecution; +import io.temporal.client.UpdateHandle; +import io.temporal.client.WorkflowException; +import io.temporal.client.WorkflowServiceException; import io.temporal.common.Experimental; import io.temporal.common.interceptors.WorkflowClientCallsInterceptor; import io.temporal.serviceclient.CheckedExceptionWrapper; @@ -33,7 +36,7 @@ import java.util.concurrent.TimeoutException; @Experimental -final class LazyUpdateHandleImpl implements UpdateHandle { +public final class LazyUpdateHandleImpl implements UpdateHandle { private final WorkflowClientCallsInterceptor workflowClientInvoker; private final String workflowType; @@ -44,7 +47,7 @@ final class LazyUpdateHandleImpl implements UpdateHandle { private final Type resultType; private WorkflowClientCallsInterceptor.PollWorkflowUpdateOutput waitCompletedPollCall; - LazyUpdateHandleImpl( + public LazyUpdateHandleImpl( WorkflowClientCallsInterceptor workflowClientInvoker, String workflowType, String updateName, diff --git a/temporal-sdk/src/main/java/io/temporal/internal/client/RootWorkflowClientInvoker.java b/temporal-sdk/src/main/java/io/temporal/internal/client/RootWorkflowClientInvoker.java index 8a337f0249..439304e3f9 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/client/RootWorkflowClientInvoker.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/client/RootWorkflowClientInvoker.java @@ -31,8 +31,7 @@ import io.temporal.api.query.v1.WorkflowQuery; import io.temporal.api.update.v1.*; import io.temporal.api.workflowservice.v1.*; -import io.temporal.client.WorkflowClientOptions; -import io.temporal.client.WorkflowUpdateException; +import io.temporal.client.*; import io.temporal.common.converter.DataConverter; import io.temporal.common.interceptors.WorkflowClientCallsInterceptor; import io.temporal.internal.client.external.GenericWorkflowClient; @@ -298,7 +297,7 @@ public QueryOutput query(QueryInput input) { } @Override - public StartUpdateOutput startUpdate(StartUpdateInput input) { + public UpdateHandle startUpdate(StartUpdateInput input) { DataConverter dataConverterWithWorkflowContext = clientOptions .getDataConverter() @@ -337,10 +336,11 @@ public StartUpdateOutput startUpdate(StartUpdateInput input) { // Re-attempt the update until it is at least accepted, or passes the lifecycle stage specified // by the user. UpdateWorkflowExecutionResponse result; + UpdateWorkflowExecutionLifecycleStage waitForStage = input.getWaitPolicy().getLifecycleStage(); do { Deadline pollTimeoutDeadline = Deadline.after(POLL_UPDATE_TIMEOUT_S, TimeUnit.SECONDS); result = genericClient.update(updateRequest, pollTimeoutDeadline); - } while (result.getStage().getNumber() < input.getWaitPolicy().getLifecycleStage().getNumber() + } while (result.getStage().getNumber() < waitForStage.getNumber() && result.getStage().getNumber() < UpdateWorkflowExecutionLifecycleStage .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED @@ -356,7 +356,10 @@ public StartUpdateOutput startUpdate(StartUpdateInput input) { input.getResultClass(), input.getResultType(), dataConverterWithWorkflowContext); - return new StartUpdateOutput(result.getUpdateRef(), true, resultValue); + return new CompletedUpdateHandleImpl<>( + result.getUpdateRef().getUpdateId(), + result.getUpdateRef().getWorkflowExecution(), + resultValue); case FAILURE: throw new WorkflowUpdateException( result.getUpdateRef().getWorkflowExecution(), @@ -370,7 +373,20 @@ public StartUpdateOutput startUpdate(StartUpdateInput input) { + result.getOutcome().getValueCase()); } } else { - return new StartUpdateOutput(result.getUpdateRef(), false, null); + LazyUpdateHandleImpl handle = + new LazyUpdateHandleImpl<>( + this, + input.getWorkflowType().orElse(null), + input.getUpdateName(), + result.getUpdateRef().getUpdateId(), + result.getUpdateRef().getWorkflowExecution(), + input.getResultClass(), + input.getResultType()); + if (waitForStage == WorkflowUpdateStage.COMPLETED.getProto()) { + // Don't return the handle until completed, since that's what's been asked for + handle.waitCompleted(); + } + return handle; } } diff --git a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateTest.java b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateTest.java index 9be5433360..9b9470618f 100644 --- a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateTest.java +++ b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateTest.java @@ -30,7 +30,11 @@ import io.temporal.api.workflowservice.v1.ResetWorkflowExecutionRequest; import io.temporal.api.workflowservice.v1.ResetWorkflowExecutionResponse; import io.temporal.client.*; +import io.temporal.common.interceptors.WorkflowClientCallsInterceptor; +import io.temporal.common.interceptors.WorkflowClientCallsInterceptorBase; +import io.temporal.common.interceptors.WorkflowClientInterceptorBase; import io.temporal.failure.ApplicationFailure; +import io.temporal.internal.client.CompletedUpdateHandleImpl; import io.temporal.testing.internal.SDKTestOptions; import io.temporal.testing.internal.SDKTestWorkflowRule; import io.temporal.worker.WorkerOptions; @@ -103,6 +107,55 @@ public void testUpdate() { assertEquals("Execute-Hello Update Execute-Hello Update 2", result); } + private static class FakesResultUpdateInterceptor extends WorkflowClientInterceptorBase { + @Override + public WorkflowClientCallsInterceptor workflowClientCallsInterceptor( + WorkflowClientCallsInterceptor next) { + return new WorkflowClientCallsInterceptorBase(next) { + @Override + public UpdateHandle startUpdate(StartUpdateInput input) { + super.startUpdate(input); + return new CompletedUpdateHandleImpl<>( + "someid", input.getWorkflowExecution(), (R) "fake"); + } + }; + } + } + + @Test + public void testUpdateIntercepted() { + String workflowId = UUID.randomUUID().toString(); + WorkflowClient workflowClient = + WorkflowClient.newInstance( + testWorkflowRule.getWorkflowServiceStubs(), + WorkflowClientOptions.newBuilder(testWorkflowRule.getWorkflowClient().getOptions()) + .setInterceptors(new FakesResultUpdateInterceptor()) + .validateAndBuildWithDefaults()); + WorkflowOptions options = + SDKTestOptions.newWorkflowOptionsWithTimeouts(testWorkflowRule.getTaskQueue()).toBuilder() + .setWorkflowId(workflowId) + .build(); + WorkflowWithUpdate workflow = workflowClient.newWorkflowStub(WorkflowWithUpdate.class, options); + // To execute workflow client.execute() would do. But we want to start workflow and immediately + // return. + WorkflowExecution execution = WorkflowClient.start(workflow::execute); + + SDKTestWorkflowRule.waitForOKQuery(workflow); + assertEquals("initial", workflow.getState()); + assertEquals(workflowId, execution.getWorkflowId()); + + assertEquals("fake", workflow.update(0, "Hello Update")); + assertEquals("fake", workflow.update(1, "Hello Update 2")); + workflow.complete(); + + String result = + testWorkflowRule + .getWorkflowClient() + .newUntypedWorkflowStub(execution, Optional.empty()) + .getResult(String.class); + assertEquals("Execute-Hello Update Execute-Hello Update 2", result); + } + @Test public void testUpdateUntyped() throws ExecutionException, InterruptedException { WorkflowClient workflowClient = testWorkflowRule.getWorkflowClient(); From b92c97d3f2f3f0329bb15943a7fcc0bb480ff2a8 Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Tue, 30 Jul 2024 10:58:24 -0700 Subject: [PATCH 06/25] Workflow-friendly concurrency primitives (#2133) Workflow-friendly concurrency primitives --- .../internal/sync/WorkflowInternal.java | 12 +- .../internal/sync/WorkflowLockImpl.java | 82 ++++ .../internal/sync/WorkflowSemaphoreImpl.java | 107 ++++++ .../java/io/temporal/workflow/Workflow.java | 21 + .../io/temporal/workflow/WorkflowLock.java | 67 ++++ .../temporal/workflow/WorkflowSemaphore.java | 135 +++++++ .../sync/WorkflowInternalLockTest.java | 326 ++++++++++++++++ .../sync/WorkflowInternalSemaphoreTest.java | 360 ++++++++++++++++++ 8 files changed, 1108 insertions(+), 2 deletions(-) create mode 100644 temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowLockImpl.java create mode 100644 temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowSemaphoreImpl.java create mode 100644 temporal-sdk/src/main/java/io/temporal/workflow/WorkflowLock.java create mode 100644 temporal-sdk/src/main/java/io/temporal/workflow/WorkflowSemaphore.java create mode 100644 temporal-sdk/src/test/java/io/temporal/internal/sync/WorkflowInternalLockTest.java create mode 100644 temporal-sdk/src/test/java/io/temporal/internal/sync/WorkflowInternalSemaphoreTest.java diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java index 03c4da2b2f..6eeaac1ae7 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java @@ -109,6 +109,14 @@ public static WorkflowQueue newWorkflowQueue(int capacity) { return new WorkflowQueueImpl<>(capacity); } + public static WorkflowLock newWorkflowLock() { + return new WorkflowLockImpl(); + } + + public static WorkflowSemaphore newWorkflowSemaphore(int permits) { + return new WorkflowSemaphoreImpl(permits); + } + public static CompletablePromise newCompletablePromise() { return new CompletablePromiseImpl<>(); } @@ -479,13 +487,13 @@ public static R executeActivity( public static void await(String reason, Supplier unblockCondition) throws DestroyWorkflowThreadError { - assertNotReadOnly("await"); + assertNotReadOnly(reason); getWorkflowOutboundInterceptor().await(reason, unblockCondition); } public static boolean await(Duration timeout, String reason, Supplier unblockCondition) throws DestroyWorkflowThreadError { - assertNotReadOnly("await with timeout"); + assertNotReadOnly(reason); return getWorkflowOutboundInterceptor().await(timeout, reason, unblockCondition); } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowLockImpl.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowLockImpl.java new file mode 100644 index 0000000000..9cf6d34e21 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowLockImpl.java @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.sync; + +import static io.temporal.internal.sync.WorkflowInternal.assertNotReadOnly; + +import com.google.common.base.Preconditions; +import io.temporal.workflow.CancellationScope; +import io.temporal.workflow.WorkflowLock; +import java.time.Duration; + +class WorkflowLockImpl implements WorkflowLock { + private boolean locked = false; + + @Override + public void lock() { + WorkflowInternal.await( + "WorkflowLock.lock", + () -> { + CancellationScope.throwCanceled(); + return !locked; + }); + locked = true; + } + + @Override + public boolean tryLock() { + assertNotReadOnly("WorkflowLock.tryLock"); + if (!locked) { + locked = true; + return true; + } + return false; + } + + @Override + public boolean tryLock(Duration timeout) { + boolean unlocked = + WorkflowInternal.await( + timeout, + "WorkflowLock.tryLock", + () -> { + CancellationScope.throwCanceled(); + return !locked; + }); + if (unlocked) { + locked = true; + return true; + } + return false; + } + + @Override + public void unlock() { + assertNotReadOnly("WorkflowLock.unlock"); + Preconditions.checkState(locked, "WorkflowLock.unlock called when not locked"); + locked = false; + } + + @Override + public boolean isHeld() { + return locked; + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowSemaphoreImpl.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowSemaphoreImpl.java new file mode 100644 index 0000000000..d4cc712bd2 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowSemaphoreImpl.java @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.sync; + +import static io.temporal.internal.sync.WorkflowInternal.assertNotReadOnly; + +import com.google.common.base.Preconditions; +import io.temporal.workflow.CancellationScope; +import io.temporal.workflow.WorkflowSemaphore; +import java.time.Duration; + +class WorkflowSemaphoreImpl implements WorkflowSemaphore { + private int currentPermits; + + public WorkflowSemaphoreImpl(int permits) { + this.currentPermits = permits; + } + + @Override + public void acquire() { + acquire(1); + } + + @Override + public void acquire(int permits) { + Preconditions.checkArgument( + permits >= 0, "WorkflowSemaphore.acquire called with negative permits"); + WorkflowInternal.await( + "WorkflowSemaphore.acquire", + () -> { + CancellationScope.throwCanceled(); + return currentPermits >= permits; + }); + currentPermits -= permits; + } + + @Override + public boolean tryAcquire() { + return tryAcquire(1); + } + + @Override + public boolean tryAcquire(Duration timeout) { + return tryAcquire(1, timeout); + } + + @Override + public boolean tryAcquire(int permits) { + assertNotReadOnly("WorkflowSemaphore.tryAcquire"); + Preconditions.checkArgument( + permits >= 0, "WorkflowSemaphore.tryAcquire called with negative permits"); + if (currentPermits >= permits) { + currentPermits -= permits; + return true; + } + return false; + } + + @Override + public boolean tryAcquire(int permits, Duration timeout) { + Preconditions.checkArgument( + permits >= 0, "WorkflowSemaphore.tryAcquire called with negative permits"); + boolean acquired = + WorkflowInternal.await( + timeout, + "WorkflowSemaphore.tryAcquire", + () -> { + CancellationScope.throwCanceled(); + return currentPermits >= permits; + }); + if (acquired) { + currentPermits -= permits; + } + return acquired; + } + + @Override + public void release() { + release(1); + } + + @Override + public void release(int permits) { + assertNotReadOnly("WorkflowSemaphore.release"); + Preconditions.checkArgument( + permits >= 0, "WorkflowSemaphore.release called with negative permits"); + currentPermits += permits; + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java b/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java index 8e4bea4869..99e9696464 100644 --- a/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java +++ b/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java @@ -507,6 +507,27 @@ public static Promise newFailedPromise(Exception failure) { return WorkflowInternal.newFailedPromise(failure); } + /** + * Creates a {@link WorkflowLock} implementation that can be used from workflow code. + * + * @apiNote The lock returned is not reentrant. If a workflow thread tries to acquire a lock that + * it already holds, the call will block indefinitely. + * @return new instance of {@link WorkflowLock} + */ + public static WorkflowLock newWorkflowLock() { + return WorkflowInternal.newWorkflowLock(); + } + + /** + * Creates a {@link WorkflowSemaphore} implementation that can be used from workflow code. + * + * @param permits the given number of permits for the semaphore. + * @return new instance of {@link WorkflowSemaphore} + */ + public static WorkflowSemaphore newWorkflowSemaphore(int permits) { + return WorkflowInternal.newWorkflowSemaphore(permits); + } + /** * Registers an implementation object. The object must implement at least one interface annotated * with {@link WorkflowInterface}. All its methods annotated with @{@link SignalMethod} diff --git a/temporal-sdk/src/main/java/io/temporal/workflow/WorkflowLock.java b/temporal-sdk/src/main/java/io/temporal/workflow/WorkflowLock.java new file mode 100644 index 0000000000..a13ac52f2e --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/workflow/WorkflowLock.java @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.workflow; + +import java.time.Duration; + +/** + * Workflow lock is an alternative to {@link java.util.concurrent.locks.Lock} that is deterministic + * and compatible with Temporal's concurrency model. API is designed to be used in a workflow code + * only. It is not allowed to be used in an activity code. + * + *

In Temporal concurrency model, only one thread in a workflow code can execute at a time. + */ +public interface WorkflowLock { + /** + * Acquires the lock. + * + * @throws io.temporal.failure.CanceledFailure if thread (or current {@link CancellationScope} was + * canceled). + */ + void lock(); + + /** + * Acquires the lock only if it is free at the time of invocation. + * + * @return true if the lock was acquired and false otherwise + */ + boolean tryLock(); + + /** + * Acquires the lock if it is free within the given waiting time. + * + * @throws io.temporal.failure.CanceledFailure if thread (or current {@link CancellationScope} was + * canceled). + * @return true if the lock was acquired and false if the waiting time elapsed before the lock was + * acquired. + */ + boolean tryLock(Duration timeout); + + /** Releases the lock. */ + void unlock(); + + /** + * Checks if a lock is held. + * + * @return true if the lock is held and false otherwise. + */ + boolean isHeld(); +} diff --git a/temporal-sdk/src/main/java/io/temporal/workflow/WorkflowSemaphore.java b/temporal-sdk/src/main/java/io/temporal/workflow/WorkflowSemaphore.java new file mode 100644 index 0000000000..f8fe93f3b1 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/workflow/WorkflowSemaphore.java @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.workflow; + +import java.time.Duration; + +/** + * Workflow semaphore is an alternative to {@link java.util.concurrent.Semaphore} that is + * deterministic and compatible with Temporal's concurrency model. API is designed to be used in a + * workflow code only. It is not allowed to be used in an activity code. + * + *

In Temporal concurrency model, only one thread in a workflow code can execute at a time. + */ +public interface WorkflowSemaphore { + /** + * Acquires a permit from this semaphore, blocking until one is available. + * + *

Acquires a permit, if one is available and returns immediately, reducing the number of + * available permits by one. + * + * @throws io.temporal.failure.CanceledFailure if thread (or current {@link CancellationScope} was + * canceled). + */ + void acquire(); + + /** + * Acquires the given number of permits from this semaphore, blocking until all are available. + * + *

Acquires the given number of permits, if they are available, and returns immediately, + * reducing the number of available permits by the given amount. + * + * @param permits the number of permits to acquire + * @throws io.temporal.failure.CanceledFailure if thread (or current {@link CancellationScope} was + * canceled). + * @throws IllegalArgumentException if permits is negative + */ + void acquire(int permits); + + /** + * Acquires the given number of permits from this semaphore, only if all are available at the time + * of invocation. + * + *

Acquires a permit, if one is available and returns immediately, with the value true, + * reducing the number of available permits by one. + * + * @return true if the permit was acquired and false otherwise + */ + boolean tryAcquire(); + + /** + * Acquires a permit from this semaphore, if one becomes available within the given waiting time. + * + *

Acquires a permit, if one is available and returns immediately, with the value true, + * reducing the number of available permits by one. + * + * @param timeout the maximum time to wait for a permit + * @return true if a permit was acquired and false if the waiting time elapsed before a permit was + * acquired + * @throws io.temporal.failure.CanceledFailure if thread (or current {@link CancellationScope} was + * canceled). + */ + boolean tryAcquire(Duration timeout); + + /** + * Acquires the given number of permits from this semaphore, only if all are available at the time + * of invocation. + * + *

Acquires the given number of permits, if they are available, and returns immediately, with + * the value true, reducing the number of available permits by the given amount. + * + *

If insufficient permits are available then this method will return immediately with the + * value false and the number of available permits is unchanged. + * + * @param permits the number of permits to acquire + * @return true if the permits were acquired and false otherwise + * @throws IllegalArgumentException if permits is negative + */ + boolean tryAcquire(int permits); + + /** + * Acquires the given number of permits from this semaphore, if all become available within the + * given waiting time. + * + *

Acquires the given number of permits, if they are available and returns immediately, with + * the value true, reducing the number of available permits by the given amount. + * + * @param permits the number of permits to acquire + * @param timeout the maximum duration to wait for a permit + * @return true if the permits was acquired and false if the waiting time elapsed before a permit + * was acquired + * @throws io.temporal.failure.CanceledFailure if thread (or current {@link CancellationScope} was + * canceled). + * @throws IllegalArgumentException if permits is negative + */ + boolean tryAcquire(int permits, Duration timeout); + + /** + * Releases a permit, returning it to the semaphore. + * + *

There is no requirement that a coroutine that releases a permit must have acquired that + * permit by calling {@link #acquire()}. Correct usage of a semaphore is established by + * programming convention in the application. + */ + void release(); + + /** + * Releases the given number of permits, returning them to the semaphore. + * + *

There is no requirement that a coroutine that releases a permit must have acquired that + * permit by calling {@link #acquire()}. Correct usage of a semaphore is established by + * programming convention in the application. + * + * @param permits the number of permits to release + * @throws IllegalArgumentException if permits is negative + */ + void release(int permits); +} diff --git a/temporal-sdk/src/test/java/io/temporal/internal/sync/WorkflowInternalLockTest.java b/temporal-sdk/src/test/java/io/temporal/internal/sync/WorkflowInternalLockTest.java new file mode 100644 index 0000000000..f0408238ba --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/internal/sync/WorkflowInternalLockTest.java @@ -0,0 +1,326 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.sync; + +import static org.junit.Assert.*; + +import io.temporal.client.WorkflowOptions; +import io.temporal.failure.CanceledFailure; +import io.temporal.testing.TestWorkflowEnvironment; +import io.temporal.worker.Worker; +import io.temporal.workflow.*; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.SynchronousQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; + +public class WorkflowInternalLockTest { + @Rule public final Tracer trace = new Tracer(); + + private static ExecutorService threadPool; + + @BeforeClass + public static void beforeClass() { + threadPool = new ThreadPoolExecutor(1, 1000, 1, TimeUnit.SECONDS, new SynchronousQueue<>()); + } + + @AfterClass + public static void afterClass() { + threadPool.shutdown(); + } + + @Test + public void testThreadInterleaving() { + DeterministicRunner r = + DeterministicRunner.newRunner( + threadPool::submit, + DummySyncWorkflowContext.newDummySyncWorkflowContext(), + () -> { + WorkflowLock l1 = WorkflowInternal.newWorkflowLock(); + WorkflowLock l2 = WorkflowInternal.newWorkflowLock(); + trace.add("root begin"); + l2.lock(); + trace.add("l1.isHeld " + l1.isHeld()); + trace.add("l2.isHeld " + l2.isHeld()); + WorkflowThread.newThread( + () -> { + trace.add("thread1 begin"); + l1.lock(); + trace.add("thread1 lock 1 success"); + l2.lock(); + trace.add("thread1 lock 2 success"); + l1.unlock(); + trace.add("thread1 unlock 1 success"); + }, + false) + .start(); + WorkflowThread.newThread( + () -> { + trace.add("thread2 begin"); + l2.unlock(); + trace.add("thread2 unlock 2 success"); + l1.lock(); + trace.add("thread2 lock 1 success"); + l1.unlock(); + trace.add("thread2 unlock 1 success"); + }, + false) + .start(); + trace.add("root done"); + }); + r.runUntilAllBlocked(DeterministicRunner.DEFAULT_DEADLOCK_DETECTION_TIMEOUT_MS); + String[] expected = + new String[] { + "root begin", + "l1.isHeld false", + "l2.isHeld true", + "root done", + "thread1 begin", + "thread1 lock 1 success", + "thread2 begin", + "thread2 unlock 2 success", + "thread1 lock 2 success", + "thread1 unlock 1 success", + "thread2 lock 1 success", + "thread2 unlock 1 success", + }; + trace.setExpected(expected); + r.close(); + } + + @Test + public void testLockCanceled() { + DeterministicRunner r = + DeterministicRunner.newRunner( + threadPool::submit, + DummySyncWorkflowContext.newDummySyncWorkflowContext(), + () -> { + WorkflowLock l = WorkflowInternal.newWorkflowLock(); + trace.add("root begin"); + l.lock(); + WorkflowThread.newThread( + () -> { + trace.add("thread1 begin"); + try { + l.lock(); + } catch (CanceledFailure e) { + trace.add("thread1 CanceledFailure"); + } + trace.add("thread1 done"); + }, + false) + .start(); + trace.add("root done"); + }); + r.runUntilAllBlocked(DeterministicRunner.DEFAULT_DEADLOCK_DETECTION_TIMEOUT_MS); + r.cancel("test"); + r.runUntilAllBlocked(DeterministicRunner.DEFAULT_DEADLOCK_DETECTION_TIMEOUT_MS); + + String[] expected = + new String[] { + "root begin", "root done", "thread1 begin", "thread1 CanceledFailure", "thread1 done", + }; + trace.setExpected(expected); + } + + @Test + public void testTryLock() { + DeterministicRunner r = + DeterministicRunner.newRunner( + threadPool::submit, + DummySyncWorkflowContext.newDummySyncWorkflowContext(), + () -> { + WorkflowLock l = WorkflowInternal.newWorkflowLock(); + trace.add("root begin"); + l.lock(); + WorkflowThread.newThread( + () -> { + trace.add("thread1 begin"); + if (l.tryLock()) { + trace.add("thread1 tryLock success"); + } else { + trace.add("thread1 tryLock failure"); + } + trace.add("thread1 done"); + l.unlock(); + }, + false) + .start(); + WorkflowThread.newThread( + () -> { + trace.add("thread2 begin"); + if (l.tryLock()) { + trace.add("thread2 tryLock success"); + } else { + trace.add("thread2 tryLock failure"); + } + trace.add("thread2 done"); + l.unlock(); + }, + false) + .start(); + trace.add("root done"); + }); + r.runUntilAllBlocked(DeterministicRunner.DEFAULT_DEADLOCK_DETECTION_TIMEOUT_MS); + + String[] expected = + new String[] { + "root begin", + "root done", + "thread1 begin", + "thread1 tryLock failure", + "thread1 done", + "thread2 begin", + "thread2 tryLock success", + "thread2 done", + }; + trace.setExpected(expected); + } + + @WorkflowInterface + public interface WorkflowLockTestWorkflow { + @WorkflowMethod + List test(); + } + + public static class TestLockTimeout implements WorkflowLockTestWorkflow { + @Override + public List test() { + List trace = new ArrayList<>(); + WorkflowLock l = WorkflowInternal.newWorkflowLock(); + trace.add("root begin"); + trace.add("tryLock " + l.tryLock()); + trace.add("tryLock " + l.tryLock()); + WorkflowThread.newThread( + () -> { + trace.add("thread1 begin"); + Workflow.sleep(2000); + l.unlock(); + trace.add("thread1 unlock"); + }, + false) + .start(); + // Try to lock again before the above thread unlocks + trace.add("tryLock with timeout " + l.tryLock(Duration.ofMillis(1000))); + // Try to lock again after the above thread unlocks + trace.add("tryLock with timeout " + l.tryLock(Duration.ofMillis(2000))); + trace.add("root done"); + return trace; + } + } + + @Test + public void testLockTimeout() { + TestWorkflowEnvironment testEnv = TestWorkflowEnvironment.newInstance(); + try { + String testTaskQueue = "testTaskQueue"; + Worker worker = testEnv.newWorker(testTaskQueue); + worker.registerWorkflowImplementationTypes(TestLockTimeout.class); + testEnv.start(); + WorkflowLockTestWorkflow workflow = + testEnv + .getWorkflowClient() + .newWorkflowStub( + WorkflowLockTestWorkflow.class, + WorkflowOptions.newBuilder().setTaskQueue(testTaskQueue).build()); + List trace = workflow.test(); + List expected = + Arrays.asList( + "root begin", + "tryLock true", + "tryLock false", + "thread1 begin", + "tryLock with timeout false", + "thread1 unlock", + "tryLock with timeout true", + "root done"); + assertEquals(expected, trace); + } finally { + testEnv.close(); + } + } + + public static class TestTryLockCancelled implements WorkflowLockTestWorkflow { + @Override + public List test() { + List trace = new ArrayList<>(); + WorkflowLock l = WorkflowInternal.newWorkflowLock(); + trace.add("root begin"); + trace.add("tryLock " + l.tryLock()); + WorkflowThread t1 = + WorkflowThread.newThread( + () -> { + trace.add("thread1 begin"); + try { + trace.add("tryLock with timeout " + l.tryLock(Duration.ofMillis(2000))); + } catch (CanceledFailure e) { + trace.add("thread1 CanceledFailure"); + } + trace.add("thread1 done"); + }, + false); + t1.start(); + Workflow.sleep(1000); + t1.cancel(); + trace.add("tryLock with timeout " + l.tryLock(Duration.ofMillis(2000))); + trace.add("root done"); + return trace; + } + } + + @Test + public void tesTryLockCancelled() { + TestWorkflowEnvironment testEnv = TestWorkflowEnvironment.newInstance(); + try { + String testTaskQueue = "testTaskQueue"; + Worker worker = testEnv.newWorker(testTaskQueue); + worker.registerWorkflowImplementationTypes(TestTryLockCancelled.class); + testEnv.start(); + WorkflowLockTestWorkflow workflow = + testEnv + .getWorkflowClient() + .newWorkflowStub( + WorkflowLockTestWorkflow.class, + WorkflowOptions.newBuilder().setTaskQueue(testTaskQueue).build()); + List trace = workflow.test(); + List expected = + Arrays.asList( + "root begin", + "tryLock true", + "thread1 begin", + "thread1 CanceledFailure", + "thread1 done", + "tryLock with timeout false", + "root done"); + assertEquals(expected, trace); + } finally { + testEnv.close(); + } + } +} diff --git a/temporal-sdk/src/test/java/io/temporal/internal/sync/WorkflowInternalSemaphoreTest.java b/temporal-sdk/src/test/java/io/temporal/internal/sync/WorkflowInternalSemaphoreTest.java new file mode 100644 index 0000000000..834309dafc --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/internal/sync/WorkflowInternalSemaphoreTest.java @@ -0,0 +1,360 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.sync; + +import static org.junit.Assert.assertEquals; + +import io.temporal.client.WorkflowOptions; +import io.temporal.failure.CanceledFailure; +import io.temporal.testing.TestWorkflowEnvironment; +import io.temporal.worker.Worker; +import io.temporal.workflow.*; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.*; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; + +public class WorkflowInternalSemaphoreTest { + @Rule public final Tracer trace = new Tracer(); + + private static ExecutorService threadPool; + + @BeforeClass + public static void beforeClass() { + threadPool = new ThreadPoolExecutor(1, 1000, 1, TimeUnit.SECONDS, new SynchronousQueue<>()); + } + + @AfterClass + public static void afterClass() { + threadPool.shutdown(); + } + + @Test + public void testThreadInterleaving() { + DeterministicRunner r = + DeterministicRunner.newRunner( + threadPool::submit, + DummySyncWorkflowContext.newDummySyncWorkflowContext(), + () -> { + WorkflowSemaphore s1 = WorkflowInternal.newWorkflowSemaphore(100); + trace.add("root begin"); + s1.acquire(10); + WorkflowThread.newThread( + () -> { + trace.add("thread1 begin"); + s1.acquire(50); + trace.add("thread1 acquire 50 success"); + s1.acquire(50); + trace.add("thread1 acquire 50 success"); + s1.release(100); + trace.add("thread1 release 100 success"); + }, + false) + .start(); + WorkflowThread.newThread( + () -> { + trace.add("thread2 begin"); + s1.release(10); + trace.add("thread2 release 10 success"); + }, + false) + .start(); + trace.add("root done"); + }); + r.runUntilAllBlocked(DeterministicRunner.DEFAULT_DEADLOCK_DETECTION_TIMEOUT_MS); + String[] expected = + new String[] { + "root begin", + "root done", + "thread1 begin", + "thread1 acquire 50 success", + "thread2 begin", + "thread2 release 10 success", + "thread1 acquire 50 success", + "thread1 release 100 success", + }; + trace.setExpected(expected); + r.close(); + } + + @Test + public void testSemaphoreReleaseWithoutAcquire() { + DeterministicRunner r = + DeterministicRunner.newRunner( + threadPool::submit, + DummySyncWorkflowContext.newDummySyncWorkflowContext(), + () -> { + WorkflowSemaphore s1 = WorkflowInternal.newWorkflowSemaphore(0); + trace.add("root begin"); + WorkflowThread.newThread( + () -> { + trace.add("thread1 begin"); + s1.acquire(50); + trace.add("thread1 acquire 50 success"); + s1.acquire(50); + trace.add("thread1 acquire 50 success"); + s1.release(100); + trace.add("thread1 release 100 success"); + }, + false) + .start(); + WorkflowThread.newThread( + () -> { + trace.add("thread2 begin"); + // There is no requirement to acquire before release + s1.release(100); + trace.add("thread2 release 100 success"); + }, + false) + .start(); + trace.add("root done"); + }); + r.runUntilAllBlocked(DeterministicRunner.DEFAULT_DEADLOCK_DETECTION_TIMEOUT_MS); + String[] expected = + new String[] { + "root begin", + "root done", + "thread1 begin", + "thread2 begin", + "thread2 release 100 success", + "thread1 acquire 50 success", + "thread1 acquire 50 success", + "thread1 release 100 success", + }; + trace.setExpected(expected); + r.close(); + } + + @Test + public void testSemaphoreAcquireCanceled() { + DeterministicRunner r = + DeterministicRunner.newRunner( + threadPool::submit, + DummySyncWorkflowContext.newDummySyncWorkflowContext(), + () -> { + WorkflowSemaphore s = WorkflowInternal.newWorkflowSemaphore(100); + trace.add("root begin"); + s.acquire(100); + WorkflowThread.newThread( + () -> { + trace.add("thread1 begin"); + try { + s.acquire(); + } catch (CanceledFailure e) { + trace.add("thread1 CanceledFailure"); + } + trace.add("thread1 done"); + }, + false) + .start(); + trace.add("root done"); + }); + r.runUntilAllBlocked(DeterministicRunner.DEFAULT_DEADLOCK_DETECTION_TIMEOUT_MS); + r.cancel("test"); + r.runUntilAllBlocked(DeterministicRunner.DEFAULT_DEADLOCK_DETECTION_TIMEOUT_MS); + + String[] expected = + new String[] { + "root begin", "root done", "thread1 begin", "thread1 CanceledFailure", "thread1 done", + }; + trace.setExpected(expected); + } + + @Test + public void testTryAcquire() { + DeterministicRunner r = + DeterministicRunner.newRunner( + threadPool::submit, + DummySyncWorkflowContext.newDummySyncWorkflowContext(), + () -> { + WorkflowSemaphore s = WorkflowInternal.newWorkflowSemaphore(100); + trace.add("root begin"); + s.acquire(100); + WorkflowThread.newThread( + () -> { + trace.add("thread1 begin"); + if (s.tryAcquire()) { + trace.add("thread1 tryAcquire success"); + } else { + trace.add("thread1 tryAcquire failure"); + } + trace.add("thread1 done"); + s.release(100); + }, + false) + .start(); + WorkflowThread.newThread( + () -> { + trace.add("thread2 begin"); + if (s.tryAcquire()) { + trace.add("thread2 tryAcquire success"); + } else { + trace.add("thread2 tryAcquire failure"); + } + trace.add("thread2 done"); + s.release(); + }, + false) + .start(); + trace.add("root done"); + }); + r.runUntilAllBlocked(DeterministicRunner.DEFAULT_DEADLOCK_DETECTION_TIMEOUT_MS); + + String[] expected = + new String[] { + "root begin", + "root done", + "thread1 begin", + "thread1 tryAcquire failure", + "thread1 done", + "thread2 begin", + "thread2 tryAcquire success", + "thread2 done", + }; + trace.setExpected(expected); + } + + @WorkflowInterface + public interface WorkflowSemaphoreTestWorkflow { + @WorkflowMethod + List test(); + } + + public static class TestAcquireTimeout implements WorkflowSemaphoreTestWorkflow { + @Override + public List test() { + List trace = new ArrayList<>(); + WorkflowSemaphore s = WorkflowInternal.newWorkflowSemaphore(100); + trace.add("root begin"); + trace.add("tryAcquire " + s.tryAcquire(100)); + trace.add("tryAcquire " + s.tryAcquire(100)); + WorkflowThread.newThread( + () -> { + trace.add("thread1 begin"); + Workflow.sleep(2000); + s.release(100); + trace.add("thread1 release"); + }, + false) + .start(); + // Try to lock again before the above thread unlocks + trace.add("tryAcquire with timeout " + s.tryAcquire(100, Duration.ofMillis(1000))); + // Try to lock again after the above thread unlocks + trace.add("tryAcquire with timeout " + s.tryAcquire(100, Duration.ofMillis(2000))); + trace.add("root done"); + return trace; + } + } + + @Test + public void testAcquireTimeout() { + TestWorkflowEnvironment testEnv = TestWorkflowEnvironment.newInstance(); + try { + String testTaskQueue = "testTaskQueue"; + Worker worker = testEnv.newWorker(testTaskQueue); + worker.registerWorkflowImplementationTypes(TestAcquireTimeout.class); + testEnv.start(); + WorkflowSemaphoreTestWorkflow workflow = + testEnv + .getWorkflowClient() + .newWorkflowStub( + WorkflowSemaphoreTestWorkflow.class, + WorkflowOptions.newBuilder().setTaskQueue(testTaskQueue).build()); + List trace = workflow.test(); + List expected = + Arrays.asList( + "root begin", + "tryAcquire true", + "tryAcquire false", + "thread1 begin", + "tryAcquire with timeout false", + "thread1 release", + "tryAcquire with timeout true", + "root done"); + assertEquals(expected, trace); + } finally { + testEnv.close(); + } + } + + public static class TestTryAcquireCancelled implements WorkflowSemaphoreTestWorkflow { + @Override + public List test() { + List trace = new ArrayList<>(); + WorkflowSemaphore s = WorkflowInternal.newWorkflowSemaphore(100); + trace.add("root begin"); + trace.add("tryAcquire " + s.tryAcquire(100)); + WorkflowThread t1 = + WorkflowThread.newThread( + () -> { + trace.add("thread1 begin"); + try { + trace.add("tryAcquire with timeout " + s.tryAcquire(Duration.ofMillis(2000))); + } catch (CanceledFailure e) { + trace.add("thread1 CanceledFailure"); + } + trace.add("thread1 done"); + }, + false); + t1.start(); + Workflow.sleep(1000); + t1.cancel(); + trace.add("tryAcquire with timeout " + s.tryAcquire(Duration.ofMillis(2000))); + trace.add("root done"); + return trace; + } + } + + @Test + public void tesTryLockCancelled() { + TestWorkflowEnvironment testEnv = TestWorkflowEnvironment.newInstance(); + try { + String testTaskQueue = "testTaskQueue"; + Worker worker = testEnv.newWorker(testTaskQueue); + worker.registerWorkflowImplementationTypes(TestTryAcquireCancelled.class); + testEnv.start(); + WorkflowSemaphoreTestWorkflow workflow = + testEnv + .getWorkflowClient() + .newWorkflowStub( + WorkflowSemaphoreTestWorkflow.class, + WorkflowOptions.newBuilder().setTaskQueue(testTaskQueue).build()); + List trace = workflow.test(); + List expected = + Arrays.asList( + "root begin", + "tryAcquire true", + "thread1 begin", + "thread1 CanceledFailure", + "thread1 done", + "tryAcquire with timeout false", + "root done"); + assertEquals(expected, trace); + } finally { + testEnv.close(); + } + } +} From f7c7341ff5364ad7e38a620160ac454cf45573e5 Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Tue, 30 Jul 2024 11:32:36 -0700 Subject: [PATCH 07/25] Fix transition in LA when handling canceled child wf (#2156) * Fix transition in LA when handling canceled child wf --- .../LocalActivityStateMachine.java | 7 + .../LocalActivityStateMachine.puml | 1 + .../LocalActivityStateMachineTest.java | 102 ++++- .../statemachines/TestHistoryBuilder.java | 15 +- .../LocalActivityAfterCancelTest.java | 119 ++++++ .../testLocalActivityAfterCancelTest.json | 386 ++++++++++++++++++ 6 files changed, 626 insertions(+), 4 deletions(-) create mode 100644 temporal-sdk/src/test/java/io/temporal/workflow/activityTests/LocalActivityAfterCancelTest.java create mode 100644 temporal-sdk/src/test/resources/testLocalActivityAfterCancelTest.json diff --git a/temporal-sdk/src/main/java/io/temporal/internal/statemachines/LocalActivityStateMachine.java b/temporal-sdk/src/main/java/io/temporal/internal/statemachines/LocalActivityStateMachine.java index 60842f368d..2adc856816 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/statemachines/LocalActivityStateMachine.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/statemachines/LocalActivityStateMachine.java @@ -121,6 +121,13 @@ enum State { State.REQUEST_PREPARED, LocalActivityStateMachine::sendRequest) .add(State.REQUEST_PREPARED, ExplicitEvent.MARK_AS_SENT, State.REQUEST_SENT) + // This is to cover an edge case where the event loop is + // run more than once while processing a workflow task. + // This can happen due to external cancellation + .add( + State.REQUEST_PREPARED, + ExplicitEvent.NON_REPLAY_WORKFLOW_TASK_STARTED, + State.REQUEST_PREPARED) .add( State.REQUEST_SENT, ExplicitEvent.NON_REPLAY_WORKFLOW_TASK_STARTED, diff --git a/temporal-sdk/src/main/java/io/temporal/internal/statemachines/LocalActivityStateMachine.puml b/temporal-sdk/src/main/java/io/temporal/internal/statemachines/LocalActivityStateMachine.puml index 27e068360c..330a65b1a5 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/statemachines/LocalActivityStateMachine.puml +++ b/temporal-sdk/src/main/java/io/temporal/internal/statemachines/LocalActivityStateMachine.puml @@ -31,6 +31,7 @@ EXECUTING --> REQUEST_PREPARED: SCHEDULE MARKER_COMMAND_CREATED --> RESULT_NOTIFIED: RECORD_MARKER REPLAYING --> WAITING_MARKER_EVENT: SCHEDULE REQUEST_PREPARED --> REQUEST_SENT: MARK_AS_SENT +REQUEST_PREPARED --> REQUEST_PREPARED: NON_REPLAY_WORKFLOW_TASK_STARTED REQUEST_SENT --> REQUEST_SENT: NON_REPLAY_WORKFLOW_TASK_STARTED REQUEST_SENT --> MARKER_COMMAND_CREATED: HANDLE_RESULT RESULT_NOTIFIED --> MARKER_COMMAND_RECORDED: MARKER_RECORDED diff --git a/temporal-sdk/src/test/java/io/temporal/internal/statemachines/LocalActivityStateMachineTest.java b/temporal-sdk/src/test/java/io/temporal/internal/statemachines/LocalActivityStateMachineTest.java index bdac8ca23f..42b1fb3a18 100644 --- a/temporal-sdk/src/test/java/io/temporal/internal/statemachines/LocalActivityStateMachineTest.java +++ b/temporal-sdk/src/test/java/io/temporal/internal/statemachines/LocalActivityStateMachineTest.java @@ -27,20 +27,25 @@ import static org.junit.Assert.fail; import io.temporal.api.command.v1.Command; +import io.temporal.api.command.v1.StartChildWorkflowExecutionCommandAttributes; import io.temporal.api.common.v1.ActivityType; import io.temporal.api.common.v1.Payloads; +import io.temporal.api.common.v1.WorkflowExecution; import io.temporal.api.enums.v1.CommandType; import io.temporal.api.enums.v1.EventType; -import io.temporal.api.history.v1.MarkerRecordedEventAttributes; +import io.temporal.api.history.v1.*; import io.temporal.api.workflowservice.v1.PollActivityTaskQueueResponse; import io.temporal.api.workflowservice.v1.RespondActivityTaskCompletedRequest; import io.temporal.common.converter.DataConverter; import io.temporal.common.converter.DefaultDataConverter; import io.temporal.internal.history.LocalActivityMarkerUtils; import io.temporal.internal.worker.LocalActivityResult; +import io.temporal.workflow.ChildWorkflowCancellationType; +import io.temporal.workflow.Functions; import java.util.ArrayList; import java.util.List; import java.util.Optional; +import java.util.concurrent.atomic.AtomicReference; import org.junit.AfterClass; import org.junit.Test; @@ -343,4 +348,99 @@ protected void buildWorkflow(AsyncWorkflowBuilder builder) { List commands = stateMachines.takeCommands(); assertTrue(commands.isEmpty()); } + + @Test + public void testLocalActivityStateMachineDuplicateTask() { + class TestListener extends TestEntityManagerListenerBase { + @Override + protected void buildWorkflow(AsyncWorkflowBuilder builder) { + StartChildWorkflowExecutionParameters childRequest = + new StartChildWorkflowExecutionParameters( + StartChildWorkflowExecutionCommandAttributes.newBuilder(), + ChildWorkflowCancellationType.WAIT_CANCELLATION_REQUESTED); + ExecuteLocalActivityParameters parameters1 = + new ExecuteLocalActivityParameters( + PollActivityTaskQueueResponse.newBuilder() + .setActivityId("id1") + .setActivityType(ActivityType.newBuilder().setName("activity1")), + null, + System.currentTimeMillis(), + null, + false, + null); + // TODO: This is a workaround for the lack of support for child workflow in the test + // framework. + // The test framework has no support for state machines with multiple callbacks. + AtomicReference cc = new AtomicReference<>(); + AtomicReference, Exception>> completionCallback = + new AtomicReference<>(); + builder + .add2( + (r, c) -> + cc.set( + stateMachines.startChildWorkflow( + childRequest, + c, + (r1, c1) -> { + completionCallback.get().apply(r1, c1); + }))) + .add((r) -> cc.get().apply()) + ., Exception>add2( + (r, c) -> { + completionCallback.set(c); + }) + ., LocalActivityCallback.LocalActivityFailedException>add2( + (r, c) -> stateMachines.scheduleLocalActivityTask(parameters1, c)); + } + } + /* + 1: EVENT_TYPE_WORKFLOW_EXECUTION_STARTED + 2: EVENT_TYPE_WORKFLOW_TASK_SCHEDULED + 3: EVENT_TYPE_WORKFLOW_TASK_STARTED + 4: EVENT_TYPE_WORKFLOW_TASK_COMPLETED + 5: EVENT_TYPE_START_CHILD_WORKFLOW_EXECUTION_INITIATED + 6: EVENT_TYPE_CHILD_WORKFLOW_EXECUTION_STARTED + 7: EVENT_TYPE_WORKFLOW_TASK_SCHEDULED + 8: EVENT_TYPE_WORKFLOW_TASK_STARTED + 9: EVENT_TYPE_WORKFLOW_TASK_COMPLETED + 10: EVENT_TYPE_REQUEST_CANCEL_EXTERNAL_WORKFLOW_EXECUTION_INITIATED + 11: EVENT_TYPE_EXTERNAL_WORKFLOW_EXECUTION_CANCEL_REQUESTED + 12: EVENT_TYPE_WORKFLOW_TASK_SCHEDULED + 13: EVENT_TYPE_WORKFLOW_TASK_STARTED + */ + TestHistoryBuilder h = + new TestHistoryBuilder() + .add(EventType.EVENT_TYPE_WORKFLOW_EXECUTION_STARTED) + .addWorkflowTask() + .add( + EventType.EVENT_TYPE_START_CHILD_WORKFLOW_EXECUTION_INITIATED, + StartChildWorkflowExecutionInitiatedEventAttributes.newBuilder().build()) + .add( + EventType.EVENT_TYPE_CHILD_WORKFLOW_EXECUTION_STARTED, + ChildWorkflowExecutionStartedEventAttributes.newBuilder() + .setInitiatedEventId(5) + .build()) + .addWorkflowTask() + .add( + EventType.EVENT_TYPE_REQUEST_CANCEL_EXTERNAL_WORKFLOW_EXECUTION_INITIATED, + RequestCancelExternalWorkflowExecutionInitiatedEventAttributes.newBuilder().build()) + .addWorkflowTaskScheduled() + .add( + EventType.EVENT_TYPE_EXTERNAL_WORKFLOW_EXECUTION_CANCEL_REQUESTED, + ExternalWorkflowExecutionCancelRequestedEventAttributes.newBuilder() + .setInitiatedEventId(10) + .build()) + .addWorkflowTaskScheduled() + .addWorkflowTaskStarted(); + + TestListener listener = new TestListener(); + stateMachines = newStateMachines(listener); + + h.handleWorkflowTask(stateMachines); + List requests = stateMachines.takeLocalActivityRequests(); + assertEquals(1, requests.size()); + assertEquals("id1", requests.get(0).getActivityId()); + List commands = stateMachines.takeCommands(); + assertTrue(commands.isEmpty()); + } } diff --git a/temporal-sdk/src/test/java/io/temporal/internal/statemachines/TestHistoryBuilder.java b/temporal-sdk/src/test/java/io/temporal/internal/statemachines/TestHistoryBuilder.java index 1595b40395..2c85e06e5b 100644 --- a/temporal-sdk/src/test/java/io/temporal/internal/statemachines/TestHistoryBuilder.java +++ b/temporal-sdk/src/test/java/io/temporal/internal/statemachines/TestHistoryBuilder.java @@ -553,18 +553,27 @@ private HistoryEvent newAttributes(EventType type, Object attributes) { result.setWorkflowExecutionUpdateCompletedEventAttributes( (WorkflowExecutionUpdateCompletedEventAttributes) attributes); break; + case EVENT_TYPE_START_CHILD_WORKFLOW_EXECUTION_INITIATED: + result.setStartChildWorkflowExecutionInitiatedEventAttributes( + (StartChildWorkflowExecutionInitiatedEventAttributes) attributes); + break; + case EVENT_TYPE_REQUEST_CANCEL_EXTERNAL_WORKFLOW_EXECUTION_INITIATED: + result.setRequestCancelExternalWorkflowExecutionInitiatedEventAttributes( + (RequestCancelExternalWorkflowExecutionInitiatedEventAttributes) attributes); + break; + case EVENT_TYPE_EXTERNAL_WORKFLOW_EXECUTION_CANCEL_REQUESTED: + result.setExternalWorkflowExecutionCancelRequestedEventAttributes( + (ExternalWorkflowExecutionCancelRequestedEventAttributes) attributes); + break; case EVENT_TYPE_UNSPECIFIED: case EVENT_TYPE_WORKFLOW_EXECUTION_FAILED: case EVENT_TYPE_WORKFLOW_EXECUTION_TIMED_OUT: case EVENT_TYPE_WORKFLOW_EXECUTION_CANCEL_REQUESTED: case EVENT_TYPE_WORKFLOW_EXECUTION_CANCELED: - case EVENT_TYPE_REQUEST_CANCEL_EXTERNAL_WORKFLOW_EXECUTION_INITIATED: case EVENT_TYPE_REQUEST_CANCEL_EXTERNAL_WORKFLOW_EXECUTION_FAILED: - case EVENT_TYPE_EXTERNAL_WORKFLOW_EXECUTION_CANCEL_REQUESTED: case EVENT_TYPE_WORKFLOW_EXECUTION_TERMINATED: case EVENT_TYPE_WORKFLOW_EXECUTION_CONTINUED_AS_NEW: - case EVENT_TYPE_START_CHILD_WORKFLOW_EXECUTION_INITIATED: case EVENT_TYPE_START_CHILD_WORKFLOW_EXECUTION_FAILED: case EVENT_TYPE_SIGNAL_EXTERNAL_WORKFLOW_EXECUTION_INITIATED: case EVENT_TYPE_SIGNAL_EXTERNAL_WORKFLOW_EXECUTION_FAILED: diff --git a/temporal-sdk/src/test/java/io/temporal/workflow/activityTests/LocalActivityAfterCancelTest.java b/temporal-sdk/src/test/java/io/temporal/workflow/activityTests/LocalActivityAfterCancelTest.java new file mode 100644 index 0000000000..88ea3439e8 --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/workflow/activityTests/LocalActivityAfterCancelTest.java @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.workflow.activityTests; + +import static org.junit.Assert.assertThrows; + +import io.temporal.activity.LocalActivityOptions; +import io.temporal.api.enums.v1.EventType; +import io.temporal.api.enums.v1.ParentClosePolicy; +import io.temporal.client.WorkflowClient; +import io.temporal.client.WorkflowFailedException; +import io.temporal.client.WorkflowStub; +import io.temporal.failure.TemporalFailure; +import io.temporal.testing.WorkflowReplayer; +import io.temporal.testing.internal.SDKTestWorkflowRule; +import io.temporal.workflow.*; +import io.temporal.workflow.shared.TestActivities.TestActivitiesImpl; +import io.temporal.workflow.shared.TestActivities.VariousTestActivities; +import io.temporal.workflow.shared.TestWorkflows; +import io.temporal.workflow.shared.TestWorkflows.TestWorkflow1; +import java.time.Duration; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; + +public class LocalActivityAfterCancelTest { + private final TestActivitiesImpl activitiesImpl = new TestActivitiesImpl(); + + @Rule + public SDKTestWorkflowRule testWorkflowRule = + SDKTestWorkflowRule.newBuilder() + .setWorkflowTypes(TestLocalActivityRetry.class, BlockingWorkflow.class) + .setActivityImplementations(activitiesImpl) + .build(); + + @Test + public void localActivityAfterChildWorkflowCanceled() { + TestWorkflow1 workflowStub = + testWorkflowRule.newWorkflowStubTimeoutOptions(TestWorkflow1.class); + WorkflowClient.execute(workflowStub::execute, "sada"); + WorkflowStub.fromTyped(workflowStub).cancel(); + WorkflowFailedException exception = + Assert.assertThrows(WorkflowFailedException.class, () -> workflowStub.execute("sada")); + Assert.assertEquals( + EventType.EVENT_TYPE_WORKFLOW_EXECUTION_CANCELED, exception.getWorkflowCloseEventType()); + } + + @Test + public void testLocalActivityAfterChildWorkflowCanceledReplay() { + assertThrows( + RuntimeException.class, + () -> + WorkflowReplayer.replayWorkflowExecutionFromResource( + "testLocalActivityAfterCancelTest.json", + LocalActivityAfterCancelTest.TestLocalActivityRetry.class)); + } + + @WorkflowInterface + public static class BlockingWorkflow implements TestWorkflows.TestWorkflowReturnString { + @Override + public String execute() { + Workflow.await(() -> false); + return ""; + } + } + + public static class TestLocalActivityRetry implements TestWorkflow1 { + + @Override + public String execute(String taskQueue) { + try { + ChildWorkflowOptions childOptions = + ChildWorkflowOptions.newBuilder() + .setWorkflowId(Workflow.getInfo().getWorkflowId() + "-child1") + .setCancellationType(ChildWorkflowCancellationType.WAIT_CANCELLATION_REQUESTED) + .setParentClosePolicy(ParentClosePolicy.PARENT_CLOSE_POLICY_REQUEST_CANCEL) + .validateAndBuildWithDefaults(); + TestWorkflows.TestWorkflowReturnString child = + Workflow.newChildWorkflowStub( + TestWorkflows.TestWorkflowReturnString.class, childOptions); + child.execute(); + } catch (TemporalFailure e) { + if (CancellationScope.current().isCancelRequested()) { + Workflow.newDetachedCancellationScope( + () -> { + VariousTestActivities act = + Workflow.newLocalActivityStub( + VariousTestActivities.class, + LocalActivityOptions.newBuilder() + .setStartToCloseTimeout(Duration.ofSeconds(5)) + .validateAndBuildWithDefaults()); + act.activity1(10); + }) + .run(); + throw e; + } + } + return "dsadsa"; + } + } +} diff --git a/temporal-sdk/src/test/resources/testLocalActivityAfterCancelTest.json b/temporal-sdk/src/test/resources/testLocalActivityAfterCancelTest.json new file mode 100644 index 0000000000..829a60ed92 --- /dev/null +++ b/temporal-sdk/src/test/resources/testLocalActivityAfterCancelTest.json @@ -0,0 +1,386 @@ +{ + "events": [ + { + "eventId": "1", + "eventTime": "2024-07-26T20:33:46.186015Z", + "eventType": "EVENT_TYPE_WORKFLOW_EXECUTION_STARTED", + "taskId": "1052132", + "workflowExecutionStartedEventAttributes": { + "workflowType": { + "name": "TestWorkflow1" + }, + "taskQueue": { + "name": "WorkflowTest-localActivityAfterChildWorkflowCanceled-2900c992-1205-4ca4-9334-c229b0d7f9ce", + "kind": "TASK_QUEUE_KIND_NORMAL" + }, + "input": { + "payloads": [ + { + "metadata": { + "encoding": "anNvbi9wbGFpbg==" + }, + "data": "InNhZGEi" + } + ] + }, + "workflowExecutionTimeout": "0s", + "workflowRunTimeout": "200s", + "workflowTaskTimeout": "5s", + "originalExecutionRunId": "656867fc-004f-4108-bb2e-1ec880bea2ec", + "identity": "20074@Quinn-Klassens-MacBook-Pro.local", + "firstExecutionRunId": "656867fc-004f-4108-bb2e-1ec880bea2ec", + "attempt": 1, + "firstWorkflowTaskBackoff": "0s", + "header": {}, + "workflowId": "0ae91468-42a9-4ed9-a0a6-2ca607589f00" + } + }, + { + "eventId": "2", + "eventTime": "2024-07-26T20:33:46.186060Z", + "eventType": "EVENT_TYPE_WORKFLOW_TASK_SCHEDULED", + "taskId": "1052133", + "workflowTaskScheduledEventAttributes": { + "taskQueue": { + "name": "WorkflowTest-localActivityAfterChildWorkflowCanceled-2900c992-1205-4ca4-9334-c229b0d7f9ce", + "kind": "TASK_QUEUE_KIND_NORMAL" + }, + "startToCloseTimeout": "5s", + "attempt": 1 + } + }, + { + "eventId": "3", + "eventTime": "2024-07-26T20:33:46.187952Z", + "eventType": "EVENT_TYPE_WORKFLOW_TASK_STARTED", + "taskId": "1052139", + "workflowTaskStartedEventAttributes": { + "scheduledEventId": "2", + "identity": "20074@Quinn-Klassens-MacBook-Pro.local", + "requestId": "ca34ead6-8853-4521-9b00-8a83507a4a7c", + "historySizeBytes": "477" + } + }, + { + "eventId": "4", + "eventTime": "2024-07-26T20:33:46.292342Z", + "eventType": "EVENT_TYPE_WORKFLOW_TASK_COMPLETED", + "taskId": "1052143", + "workflowTaskCompletedEventAttributes": { + "scheduledEventId": "2", + "startedEventId": "3", + "identity": "20074@Quinn-Klassens-MacBook-Pro.local", + "workerVersion": {}, + "sdkMetadata": { + "langUsedFlags": [ + 1 + ] + }, + "meteringMetadata": {} + } + }, + { + "eventId": "5", + "eventTime": "2024-07-26T20:33:46.292481Z", + "eventType": "EVENT_TYPE_START_CHILD_WORKFLOW_EXECUTION_INITIATED", + "taskId": "1052144", + "startChildWorkflowExecutionInitiatedEventAttributes": { + "namespace": "UnitTest", + "namespaceId": "ffe13594-49ff-4f24-8d0b-18a445504f60", + "workflowId": "0ae91468-42a9-4ed9-a0a6-2ca607589f00-child1", + "workflowType": { + "name": "TestWorkflowReturnString" + }, + "taskQueue": { + "name": "WorkflowTest-localActivityAfterChildWorkflowCanceled-2900c992-1205-4ca4-9334-c229b0d7f9ce", + "kind": "TASK_QUEUE_KIND_NORMAL" + }, + "workflowExecutionTimeout": "0s", + "workflowRunTimeout": "0s", + "workflowTaskTimeout": "10s", + "parentClosePolicy": "PARENT_CLOSE_POLICY_REQUEST_CANCEL", + "workflowTaskCompletedEventId": "4", + "workflowIdReusePolicy": "WORKFLOW_ID_REUSE_POLICY_ALLOW_DUPLICATE", + "header": {} + } + }, + { + "eventId": "6", + "eventTime": "2024-07-26T20:33:46.195588Z", + "eventType": "EVENT_TYPE_WORKFLOW_EXECUTION_CANCEL_REQUESTED", + "taskId": "1052145", + "workflowExecutionCancelRequestedEventAttributes": { + "identity": "20074@Quinn-Klassens-MacBook-Pro.local" + } + }, + { + "eventId": "7", + "eventTime": "2024-07-26T20:33:46.292522Z", + "eventType": "EVENT_TYPE_WORKFLOW_TASK_SCHEDULED", + "taskId": "1052146", + "workflowTaskScheduledEventAttributes": { + "taskQueue": { + "name": "20074@Quinn-Klassens-MacBook-Pro.local:67459403-89e6-46a5-826e-c41b7581e287", + "kind": "TASK_QUEUE_KIND_STICKY", + "normalName": "WorkflowTest-localActivityAfterChildWorkflowCanceled-2900c992-1205-4ca4-9334-c229b0d7f9ce" + }, + "startToCloseTimeout": "5s", + "attempt": 1 + } + }, + { + "eventId": "8", + "eventTime": "2024-07-26T20:33:46.297397Z", + "eventType": "EVENT_TYPE_CHILD_WORKFLOW_EXECUTION_STARTED", + "taskId": "1052156", + "childWorkflowExecutionStartedEventAttributes": { + "namespace": "UnitTest", + "namespaceId": "ffe13594-49ff-4f24-8d0b-18a445504f60", + "initiatedEventId": "5", + "workflowExecution": { + "workflowId": "0ae91468-42a9-4ed9-a0a6-2ca607589f00-child1", + "runId": "d272610e-9d7b-45d6-916c-496b1b7facb6" + }, + "workflowType": { + "name": "TestWorkflowReturnString" + }, + "header": {} + } + }, + { + "eventId": "9", + "eventTime": "2024-07-26T20:33:46.297738Z", + "eventType": "EVENT_TYPE_WORKFLOW_TASK_STARTED", + "taskId": "1052159", + "workflowTaskStartedEventAttributes": { + "scheduledEventId": "7", + "identity": "20074@Quinn-Klassens-MacBook-Pro.local", + "requestId": "aabebe33-340e-43a2-a39e-5cfd53985c47", + "historySizeBytes": "1393" + } + }, + { + "eventId": "10", + "eventTime": "2024-07-26T20:33:46.309659Z", + "eventType": "EVENT_TYPE_WORKFLOW_TASK_COMPLETED", + "taskId": "1052173", + "workflowTaskCompletedEventAttributes": { + "scheduledEventId": "7", + "startedEventId": "9", + "identity": "20074@Quinn-Klassens-MacBook-Pro.local", + "workerVersion": {}, + "meteringMetadata": {} + } + }, + { + "eventId": "11", + "eventTime": "2024-07-26T20:33:46.309720Z", + "eventType": "EVENT_TYPE_REQUEST_CANCEL_EXTERNAL_WORKFLOW_EXECUTION_INITIATED", + "taskId": "1052174", + "requestCancelExternalWorkflowExecutionInitiatedEventAttributes": { + "workflowTaskCompletedEventId": "10", + "namespaceId": "ffe13594-49ff-4f24-8d0b-18a445504f60", + "workflowExecution": { + "workflowId": "0ae91468-42a9-4ed9-a0a6-2ca607589f00-child1" + }, + "childWorkflowOnly": true + } + }, + { + "eventId": "12", + "eventTime": "2024-07-26T20:33:46.312159Z", + "eventType": "EVENT_TYPE_EXTERNAL_WORKFLOW_EXECUTION_CANCEL_REQUESTED", + "taskId": "1052182", + "externalWorkflowExecutionCancelRequestedEventAttributes": { + "initiatedEventId": "11", + "namespace": "UnitTest", + "namespaceId": "ffe13594-49ff-4f24-8d0b-18a445504f60", + "workflowExecution": { + "workflowId": "0ae91468-42a9-4ed9-a0a6-2ca607589f00-child1" + } + } + }, + { + "eventId": "13", + "eventTime": "2024-07-26T20:33:46.312164Z", + "eventType": "EVENT_TYPE_WORKFLOW_TASK_SCHEDULED", + "taskId": "1052183", + "workflowTaskScheduledEventAttributes": { + "taskQueue": { + "name": "20074@Quinn-Klassens-MacBook-Pro.local:67459403-89e6-46a5-826e-c41b7581e287", + "kind": "TASK_QUEUE_KIND_STICKY", + "normalName": "WorkflowTest-localActivityAfterChildWorkflowCanceled-2900c992-1205-4ca4-9334-c229b0d7f9ce" + }, + "startToCloseTimeout": "5s", + "attempt": 1 + } + }, + { + "eventId": "14", + "eventTime": "2024-07-26T20:33:46.312882Z", + "eventType": "EVENT_TYPE_WORKFLOW_TASK_STARTED", + "taskId": "1052190", + "workflowTaskStartedEventAttributes": { + "scheduledEventId": "13", + "identity": "20074@Quinn-Klassens-MacBook-Pro.local", + "requestId": "7d7c24b4-1c8e-4bfd-aeaf-0ebf492cb8f7", + "historySizeBytes": "2023" + } + }, + { + "eventId": "15", + "eventTime": "2024-07-26T20:33:46.361586Z", + "eventType": "EVENT_TYPE_WORKFLOW_TASK_FAILED", + "taskId": "1052201", + "workflowTaskFailedEventAttributes": { + "scheduledEventId": "13", + "startedEventId": "14", + "cause": "WORKFLOW_TASK_FAILED_CAUSE_UNHANDLED_COMMAND", + "failure": { + "message": "UnhandledCommand", + "serverFailureInfo": {} + }, + "identity": "20074@Quinn-Klassens-MacBook-Pro.local" + } + }, + { + "eventId": "16", + "eventTime": "2024-07-26T20:33:46.324307Z", + "eventType": "EVENT_TYPE_CHILD_WORKFLOW_EXECUTION_CANCELED", + "taskId": "1052202", + "childWorkflowExecutionCanceledEventAttributes": { + "namespace": "UnitTest", + "namespaceId": "ffe13594-49ff-4f24-8d0b-18a445504f60", + "workflowExecution": { + "workflowId": "0ae91468-42a9-4ed9-a0a6-2ca607589f00-child1", + "runId": "d272610e-9d7b-45d6-916c-496b1b7facb6" + }, + "workflowType": { + "name": "TestWorkflowReturnString" + }, + "initiatedEventId": "5", + "startedEventId": "8" + } + }, + { + "eventId": "17", + "eventTime": "2024-07-26T20:33:46.361595Z", + "eventType": "EVENT_TYPE_WORKFLOW_TASK_SCHEDULED", + "taskId": "1052203", + "workflowTaskScheduledEventAttributes": { + "taskQueue": { + "name": "WorkflowTest-localActivityAfterChildWorkflowCanceled-2900c992-1205-4ca4-9334-c229b0d7f9ce", + "kind": "TASK_QUEUE_KIND_NORMAL" + }, + "startToCloseTimeout": "5s", + "attempt": 1 + } + }, + { + "eventId": "18", + "eventTime": "2024-07-26T20:33:46.363845Z", + "eventType": "EVENT_TYPE_WORKFLOW_TASK_STARTED", + "taskId": "1052206", + "workflowTaskStartedEventAttributes": { + "scheduledEventId": "17", + "identity": "20074@Quinn-Klassens-MacBook-Pro.local", + "requestId": "27f82ca1-a459-42fc-bc21-f32a61b8ab26", + "historySizeBytes": "2547" + } + }, + { + "eventId": "19", + "eventTime": "2024-07-26T20:33:46.372112Z", + "eventType": "EVENT_TYPE_WORKFLOW_TASK_COMPLETED", + "taskId": "1052210", + "workflowTaskCompletedEventAttributes": { + "scheduledEventId": "17", + "startedEventId": "18", + "identity": "20074@Quinn-Klassens-MacBook-Pro.local", + "workerVersion": {}, + "meteringMetadata": {} + } + }, + { + "eventId": "20", + "eventTime": "2024-07-26T20:33:46.372129Z", + "eventType": "EVENT_TYPE_MARKER_RECORDED", + "taskId": "1052211", + "markerRecordedEventAttributes": { + "markerName": "LocalActivity", + "details": { + "activityId": { + "payloads": [ + { + "metadata": { + "encoding": "anNvbi9wbGFpbg==" + }, + "data": "IjYyZDgxMTgxLWVlYzQtM2M5YS1hZTA4LWJiMTMzMDQzYzc5MSI=" + } + ] + }, + "input": { + "payloads": [ + { + "metadata": { + "encoding": "anNvbi9wbGFpbg==" + }, + "data": "MTA=" + } + ] + }, + "meta": { + "payloads": [ + { + "metadata": { + "encoding": "anNvbi9wbGFpbg==" + }, + "data": "eyJmaXJzdFNrZCI6MTcyMjAyNjAyNjM2OCwiYXRwdCI6MSwiYmFja29mZiI6bnVsbH0=" + } + ] + }, + "result": { + "payloads": [ + { + "metadata": { + "encoding": "anNvbi9wbGFpbg==" + }, + "data": "MTA=" + } + ] + }, + "time": { + "payloads": [ + { + "metadata": { + "encoding": "anNvbi9wbGFpbg==" + }, + "data": "MTcyMjAyNjAyNjM2Mw==" + } + ] + }, + "type": { + "payloads": [ + { + "metadata": { + "encoding": "anNvbi9wbGFpbg==" + }, + "data": "ImN1c3RvbUFjdGl2aXR5MSI=" + } + ] + } + }, + "workflowTaskCompletedEventId": "19" + } + }, + { + "eventId": "21", + "eventTime": "2024-07-26T20:33:46.372132Z", + "eventType": "EVENT_TYPE_WORKFLOW_EXECUTION_CANCELED", + "taskId": "1052212", + "workflowExecutionCanceledEventAttributes": { + "workflowTaskCompletedEventId": "19" + } + } + ] +} \ No newline at end of file From e5c08a194ceaf75bfeb9e56815d8f37bbb45e257 Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Tue, 30 Jul 2024 13:03:58 -0700 Subject: [PATCH 08/25] Enable next retry delay test for server (#2129) Enable next retry delay test for server --- .../java/io/temporal/activity/ActivityNextRetryDelayTest.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/temporal-sdk/src/test/java/io/temporal/activity/ActivityNextRetryDelayTest.java b/temporal-sdk/src/test/java/io/temporal/activity/ActivityNextRetryDelayTest.java index 54acb2be6f..73b1480210 100644 --- a/temporal-sdk/src/test/java/io/temporal/activity/ActivityNextRetryDelayTest.java +++ b/temporal-sdk/src/test/java/io/temporal/activity/ActivityNextRetryDelayTest.java @@ -21,7 +21,6 @@ package io.temporal.activity; import static org.junit.Assert.*; -import static org.junit.Assume.assumeFalse; import io.temporal.failure.ApplicationFailure; import io.temporal.testing.internal.SDKTestOptions; @@ -46,8 +45,6 @@ public class ActivityNextRetryDelayTest { @Test public void activityNextRetryDelay() { - assumeFalse( - "Real Server doesn't support next retry delay yet", SDKTestWorkflowRule.useExternalService); TestWorkflowReturnDuration workflow = testWorkflowRule.newWorkflowStub(TestWorkflowReturnDuration.class); Duration result = workflow.execute(false); From 1acafa392348f3c9e61a29fb074b4fda45cf5523 Mon Sep 17 00:00:00 2001 From: Spencer Judge Date: Tue, 30 Jul 2024 15:28:57 -0700 Subject: [PATCH 09/25] Ensure shutdown of LA slot queue isn't swallowed (#2161) --- .../LocalActivitySlotSupplierQueue.java | 7 +++--- .../internal/worker/LocalActivityWorker.java | 1 + .../worker/WorkflowSlotsSmallSizeTests.java | 24 +++++++++++++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivitySlotSupplierQueue.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivitySlotSupplierQueue.java index 94892ab499..4cf18e090c 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivitySlotSupplierQueue.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivitySlotSupplierQueue.java @@ -84,6 +84,9 @@ private void processQueue() { SlotPermit slotPermit; try { slotPermit = slotSupplier.reserveSlot(request.data); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return; } catch (Exception e) { log.error( "Error reserving local activity slot, dropped activity id {}", @@ -131,8 +134,4 @@ void submitAttempt(SlotReservationData data, boolean isRetry, LocalActivityAttem newExecutionsBackpressureSemaphore.release(); } } - - TrackingSlotSupplier getSlotSupplier() { - return slotSupplier; - } } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityWorker.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityWorker.java index 8fbd5771c3..a15e0c8769 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityWorker.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivityWorker.java @@ -717,6 +717,7 @@ public CompletableFuture shutdown(ShutdownManager shutdownManager, boolean @Override public void awaitTermination(long timeout, TimeUnit unit) { + slotQueue.shutdown(); long timeoutMillis = unit.toMillis(timeout); ShutdownManager.awaitTermination(scheduledExecutor, timeoutMillis); } diff --git a/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java index 1d23e8f349..ba39fd335b 100644 --- a/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java +++ b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java @@ -47,6 +47,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; import org.junit.After; import org.junit.Before; import org.junit.Rule; @@ -249,6 +250,29 @@ public void TestLocalActivitySlotAtLimit() throws InterruptedException { MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); } + @Test + public void TestLocalActivityShutdownWhileWaitingOnSlot() throws InterruptedException { + testWorkflowRule.getTestEnvironment().start(); + WorkflowClient client = testWorkflowRule.getWorkflowClient(); + TestWorkflow workflow = + client.newWorkflowStub( + TestWorkflow.class, + WorkflowOptions.newBuilder() + .setTaskQueue(testWorkflowRule.getTaskQueue()) + .validateBuildWithDefaults()); + WorkflowClient.start(workflow::workflow, activitiesAreLocal); + workflow.unblock(); + parallelSemRunning.acquire(2); + testWorkflowRule.getTestEnvironment().getWorkerFactory().shutdownNow(); + parallelSemBlocked.release(2); + testWorkflowRule.getTestEnvironment().getWorkerFactory().awaitTermination(3, TimeUnit.SECONDS); + // All slots should be available + assertWorkerSlotCount( + MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE, + MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE, + MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); + } + @Test public void TestLocalActivitySlotHitsCapacity() throws InterruptedException { testWorkflowRule.getTestEnvironment().start(); From 27a1fc25242dd5d00d79fbc897b48b433087f68b Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Wed, 31 Jul 2024 09:22:42 -0700 Subject: [PATCH 10/25] Add support for query in listSchedules (#2163) --- .../ListScheduleListDescriptionIterator.java | 17 ++++++++++- .../client/schedules/ScheduleClient.java | 10 +++++++ .../client/schedules/ScheduleClientImpl.java | 10 +++++-- .../ScheduleClientCallsInterceptor.java | 8 +++++- .../client/RootScheduleClientInvoker.java | 2 +- .../client/schedules/ScheduleTest.java | 28 +++++++++++++++---- 6 files changed, 65 insertions(+), 10 deletions(-) diff --git a/temporal-sdk/src/main/java/io/temporal/client/ListScheduleListDescriptionIterator.java b/temporal-sdk/src/main/java/io/temporal/client/ListScheduleListDescriptionIterator.java index bb6ced0ff3..1d76866b96 100644 --- a/temporal-sdk/src/main/java/io/temporal/client/ListScheduleListDescriptionIterator.java +++ b/temporal-sdk/src/main/java/io/temporal/client/ListScheduleListDescriptionIterator.java @@ -34,6 +34,7 @@ public final class ListScheduleListDescriptionIterator extends EagerPaginator { private final @Nonnull String namespace; + private final @Nullable String query; private final @Nullable Integer pageSize; private final @Nonnull GenericWorkflowClient genericClient; @@ -42,6 +43,18 @@ public ListScheduleListDescriptionIterator( @Nullable Integer pageSize, @Nonnull GenericWorkflowClient genericClient) { this.namespace = namespace; + this.query = null; + this.pageSize = pageSize; + this.genericClient = genericClient; + } + + public ListScheduleListDescriptionIterator( + @Nonnull String namespace, + @Nullable String query, + @Nullable Integer pageSize, + @Nonnull GenericWorkflowClient genericClient) { + this.namespace = namespace; + this.query = query; this.pageSize = pageSize; this.genericClient = genericClient; } @@ -51,10 +64,12 @@ CompletableFuture performRequest(@Nonnull ByteString next ListSchedulesRequest.Builder request = ListSchedulesRequest.newBuilder().setNamespace(namespace).setNextPageToken(nextPageToken); + if (query != null) { + request.setQuery(query); + } if (pageSize != null) { request.setMaximumPageSize(pageSize); } - return genericClient.listSchedulesAsync(request.build()); } diff --git a/temporal-sdk/src/main/java/io/temporal/client/schedules/ScheduleClient.java b/temporal-sdk/src/main/java/io/temporal/client/schedules/ScheduleClient.java index afd69b9cf5..5b30948dc3 100644 --- a/temporal-sdk/src/main/java/io/temporal/client/schedules/ScheduleClient.java +++ b/temporal-sdk/src/main/java/io/temporal/client/schedules/ScheduleClient.java @@ -87,4 +87,14 @@ static ScheduleClient newInstance(WorkflowServiceStubs service, ScheduleClientOp * @return sequential stream that performs remote pagination under the hood */ Stream listSchedules(@Nullable Integer pageSize); + + /** + * List schedules. + * + * @param query Temporal Visibility Query, for syntax see Visibility docs + * @param pageSize how many results to fetch from the Server at a time. Default is 100. + * @return sequential stream that performs remote pagination under the hood + */ + Stream listSchedules(@Nullable String query, @Nullable Integer pageSize); } diff --git a/temporal-sdk/src/main/java/io/temporal/client/schedules/ScheduleClientImpl.java b/temporal-sdk/src/main/java/io/temporal/client/schedules/ScheduleClientImpl.java index cbfa3ba3b7..5dce7b60a5 100644 --- a/temporal-sdk/src/main/java/io/temporal/client/schedules/ScheduleClientImpl.java +++ b/temporal-sdk/src/main/java/io/temporal/client/schedules/ScheduleClientImpl.java @@ -97,15 +97,21 @@ public ScheduleHandle getHandle(String scheduleID) { @Override public Stream listSchedules() { - return this.listSchedules(null); + return this.listSchedules(null, null); } @Override public Stream listSchedules(@Nullable Integer pageSize) { + return this.listSchedules(null, pageSize); + } + + @Override + public Stream listSchedules( + @Nullable String query, @Nullable Integer pageSize) { return scheduleClientCallsInvoker .listSchedules( new ScheduleClientCallsInterceptor.ListSchedulesInput( - pageSize == null ? 100 : pageSize)) + query, pageSize == null ? 100 : pageSize)) .getStream(); } } diff --git a/temporal-sdk/src/main/java/io/temporal/common/interceptors/ScheduleClientCallsInterceptor.java b/temporal-sdk/src/main/java/io/temporal/common/interceptors/ScheduleClientCallsInterceptor.java index 328b552b1f..b14a4f9bd7 100644 --- a/temporal-sdk/src/main/java/io/temporal/common/interceptors/ScheduleClientCallsInterceptor.java +++ b/temporal-sdk/src/main/java/io/temporal/common/interceptors/ScheduleClientCallsInterceptor.java @@ -81,15 +81,21 @@ public ScheduleOptions getOptions() { } class ListSchedulesInput { + private final String query; private final int pageSize; - public ListSchedulesInput(int pageSize) { + public ListSchedulesInput(String query, int pageSize) { + this.query = query; this.pageSize = pageSize; } public int getPageSize() { return pageSize; } + + public String getQuery() { + return query; + } } class ListScheduleOutput { diff --git a/temporal-sdk/src/main/java/io/temporal/internal/client/RootScheduleClientInvoker.java b/temporal-sdk/src/main/java/io/temporal/internal/client/RootScheduleClientInvoker.java index 2f31d83616..79f0bd36b8 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/client/RootScheduleClientInvoker.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/client/RootScheduleClientInvoker.java @@ -126,7 +126,7 @@ public void createSchedule(CreateScheduleInput input) { public ListScheduleOutput listSchedules(ListSchedulesInput input) { ListScheduleListDescriptionIterator iterator = new ListScheduleListDescriptionIterator( - clientOptions.getNamespace(), input.getPageSize(), genericClient); + clientOptions.getNamespace(), input.getQuery(), input.getPageSize(), genericClient); iterator.init(); Iterator wrappedIterator = Iterators.transform( diff --git a/temporal-sdk/src/test/java/io/temporal/client/schedules/ScheduleTest.java b/temporal-sdk/src/test/java/io/temporal/client/schedules/ScheduleTest.java index 383fb55e3c..7cd8151a5e 100644 --- a/temporal-sdk/src/test/java/io/temporal/client/schedules/ScheduleTest.java +++ b/temporal-sdk/src/test/java/io/temporal/client/schedules/ScheduleTest.java @@ -25,6 +25,8 @@ import io.temporal.api.enums.v1.ScheduleOverlapPolicy; import io.temporal.client.WorkflowOptions; import io.temporal.common.RetryOptions; +import io.temporal.common.SearchAttributeKey; +import io.temporal.common.SearchAttributes; import io.temporal.common.converter.EncodedValues; import io.temporal.common.interceptors.ScheduleClientInterceptor; import io.temporal.testing.internal.SDKTestWorkflowRule; @@ -43,6 +45,9 @@ import org.junit.Test; public class ScheduleTest { + static final SearchAttributeKey CUSTOM_KEYWORD_SA = + SearchAttributeKey.forKeyword("CustomKeywordField"); + @Rule public SDKTestWorkflowRule testWorkflowRule = SDKTestWorkflowRule.newBuilder() @@ -484,10 +489,12 @@ public void updateSchedules() { public void listSchedules() { ScheduleClient client = createScheduleClient(); // Create the schedule - ScheduleOptions options = + ScheduleOptions.Builder optionsBuilder = ScheduleOptions.newBuilder() .setMemo(Collections.singletonMap("memokey2", "memoval2")) - .build(); + .setTypedSearchAttributes( + SearchAttributes.newBuilder().set(CUSTOM_KEYWORD_SA, "keyword").build()); + ScheduleOptions options = optionsBuilder.build(); Schedule schedule = createTestSchedule() .setState(ScheduleState.newBuilder().setPaused(true).setNote("schedule list").build()) @@ -532,8 +539,9 @@ public void listSchedules() { (ScheduleListActionStartWorkflow) listDescription.getSchedule().getAction(); Assert.assertEquals("TestWorkflow1", action.getWorkflow()); // Create two additional schedules - client.createSchedule(scheduleIdPrefix + UUID.randomUUID(), schedule, options); - client.createSchedule(scheduleIdPrefix + UUID.randomUUID(), schedule, options); + optionsBuilder = optionsBuilder.setTypedSearchAttributes(null); + client.createSchedule(scheduleIdPrefix + UUID.randomUUID(), schedule, optionsBuilder.build()); + client.createSchedule(scheduleIdPrefix + UUID.randomUUID(), schedule, optionsBuilder.build()); // Add delay for schedules to appear testWorkflowRule.sleep(Duration.ofSeconds(2)); // List all schedules and filter @@ -541,8 +549,18 @@ public void listSchedules() { long listedSchedulesCount = scheduleStream.filter(s -> s.getScheduleId().startsWith(scheduleIdPrefix)).count(); Assert.assertEquals(3, listedSchedulesCount); + // List all schedules with a null filter + scheduleStream = client.listSchedules(null, 10); + listedSchedulesCount = + scheduleStream.filter(s -> s.getScheduleId().startsWith(scheduleIdPrefix)).count(); + Assert.assertEquals(3, listedSchedulesCount); + // List schedules with a query + scheduleStream = client.listSchedules("CustomKeywordField = 'keyword'", null); + listedSchedulesCount = + scheduleStream.filter(s -> s.getScheduleId().startsWith(scheduleIdPrefix)).count(); + Assert.assertEquals(1, listedSchedulesCount); // Cleanup all schedules - scheduleStream = client.listSchedules(null); + scheduleStream = client.listSchedules(null, null); scheduleStream .filter(s -> s.getScheduleId().startsWith(scheduleIdPrefix)) .forEach( From 5d22bb5d8f15f6b0c651deafd5de95da1ddd0ea6 Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Thu, 1 Aug 2024 23:38:58 -0700 Subject: [PATCH 11/25] Add getCurrentUpdateInfo (#2158) Add getCurrentUpdateInfo --- .../internal/replay/ReplayWorkflow.java | 1 + .../replay/ReplayWorkflowExecutor.java | 1 + .../temporal/internal/sync/SyncWorkflow.java | 54 ++++---- .../internal/sync/SyncWorkflowContext.java | 17 ++- .../internal/sync/UpdateInfoImpl.java | 55 ++++++++ .../internal/sync/WorkflowInternal.java | 4 + .../java/io/temporal/workflow/UpdateInfo.java | 34 +++++ .../java/io/temporal/workflow/Workflow.java | 21 +++ ...eplayWorkflowRunTaskHandlerCacheTests.java | 1 + .../workflow/updateTest/UpdateInfoTest.java | 128 ++++++++++++++++++ 10 files changed, 286 insertions(+), 30 deletions(-) create mode 100644 temporal-sdk/src/main/java/io/temporal/internal/sync/UpdateInfoImpl.java create mode 100644 temporal-sdk/src/main/java/io/temporal/workflow/UpdateInfo.java create mode 100644 temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateInfoTest.java diff --git a/temporal-sdk/src/main/java/io/temporal/internal/replay/ReplayWorkflow.java b/temporal-sdk/src/main/java/io/temporal/internal/replay/ReplayWorkflow.java index 8065745e58..b345f8f2fa 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/replay/ReplayWorkflow.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/replay/ReplayWorkflow.java @@ -41,6 +41,7 @@ public interface ReplayWorkflow { /** Handle an update workflow execution event */ void handleUpdate( String updateName, + String updateId, Optional input, long eventId, Header header, diff --git a/temporal-sdk/src/main/java/io/temporal/internal/replay/ReplayWorkflowExecutor.java b/temporal-sdk/src/main/java/io/temporal/internal/replay/ReplayWorkflowExecutor.java index 299162f220..1da0775a48 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/replay/ReplayWorkflowExecutor.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/replay/ReplayWorkflowExecutor.java @@ -161,6 +161,7 @@ public void handleWorkflowExecutionUpdated(UpdateMessage updateMessage) { Optional args = Optional.ofNullable(input.getArgs()); this.workflow.handleUpdate( input.getName(), + protocolMessage.getProtocolInstanceId(), args, protocolMessage.getEventId(), input.getHeader(), diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflow.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflow.java index 248f29ac54..555607e3b6 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflow.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflow.java @@ -39,6 +39,7 @@ import io.temporal.internal.worker.WorkflowExecutionException; import io.temporal.internal.worker.WorkflowExecutorCache; import io.temporal.worker.WorkflowImplementationOptions; +import io.temporal.workflow.UpdateInfo; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -151,38 +152,45 @@ public void handleSignal( @Override public void handleUpdate( String updateName, + String updateId, Optional input, long eventId, Header header, UpdateProtocolCallback callbacks) { + final UpdateInfo updateInfo = new UpdateInfoImpl(updateName, updateId); runner.executeInWorkflowThread( "update " + updateName, () -> { - // Skip validator on replay - if (!callbacks.isReplaying()) { + try { + workflowContext.setCurrentUpdateInfo(updateInfo); + // Skip validator on replay + if (!callbacks.isReplaying()) { + try { + workflowContext.setReadOnly(true); + workflowProc.handleValidateUpdate(updateName, input, eventId, header); + } catch (ReadOnlyException r) { + // Rethrow instead on rejecting the update to fail the WFT + throw r; + } catch (Exception e) { + callbacks.reject( + workflowContext + .getDataConverterWithCurrentWorkflowContext() + .exceptionToFailure(e)); + return; + } finally { + workflowContext.setReadOnly(false); + } + } + callbacks.accept(); try { - workflowContext.setReadOnly(true); - workflowProc.handleValidateUpdate(updateName, input, eventId, header); - } catch (ReadOnlyException r) { - // Rethrow instead on rejecting the update to fail the WFT - throw r; - } catch (Exception e) { - callbacks.reject( - workflowContext - .getDataConverterWithCurrentWorkflowContext() - .exceptionToFailure(e)); - return; - } finally { - workflowContext.setReadOnly(false); + Optional result = + workflowProc.handleExecuteUpdate(updateName, input, eventId, header); + callbacks.complete(result, null); + } catch (WorkflowExecutionException e) { + callbacks.complete(Optional.empty(), e.getFailure()); } - } - callbacks.accept(); - try { - Optional result = - workflowProc.handleExecuteUpdate(updateName, input, eventId, header); - callbacks.complete(result, null); - } catch (WorkflowExecutionException e) { - callbacks.complete(Optional.empty(), e.getFailure()); + } finally { + workflowContext.setCurrentUpdateInfo(null); } }); } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflowContext.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflowContext.java index 78a3b714ba..208af532eb 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflowContext.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflowContext.java @@ -68,14 +68,8 @@ import io.temporal.payload.context.ActivitySerializationContext; import io.temporal.payload.context.WorkflowSerializationContext; import io.temporal.worker.WorkflowImplementationOptions; -import io.temporal.workflow.CancellationScope; -import io.temporal.workflow.ChildWorkflowOptions; -import io.temporal.workflow.CompletablePromise; -import io.temporal.workflow.ContinueAsNewOptions; -import io.temporal.workflow.Functions; +import io.temporal.workflow.*; import io.temporal.workflow.Functions.Func; -import io.temporal.workflow.Promise; -import io.temporal.workflow.Workflow; import java.lang.reflect.Type; import java.time.Duration; import java.time.Instant; @@ -124,6 +118,7 @@ final class SyncWorkflowContext implements WorkflowContext, WorkflowOutboundCall private LocalActivityOptions defaultLocalActivityOptions = null; private Map localActivityOptionsMap; private boolean readOnly = false; + private final WorkflowThreadLocal currentUpdateInfo = new WorkflowThreadLocal<>(); public SyncWorkflowContext( @Nonnull String namespace, @@ -1275,6 +1270,14 @@ public Map getPropagatedContexts() { return contextData; } + public void setCurrentUpdateInfo(UpdateInfo updateInfo) { + currentUpdateInfo.set(updateInfo); + } + + public Optional getCurrentUpdateInfo() { + return Optional.ofNullable(currentUpdateInfo.get()); + } + /** Simple wrapper over a failure just to allow completing the CompletablePromise as a failure */ private static class FailureWrapperException extends RuntimeException { private final Failure failure; diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/UpdateInfoImpl.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/UpdateInfoImpl.java new file mode 100644 index 0000000000..1332db6d9d --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/UpdateInfoImpl.java @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.sync; + +import io.temporal.workflow.UpdateInfo; + +public final class UpdateInfoImpl implements UpdateInfo { + final String updateName; + final String updateId; + + UpdateInfoImpl(String updateName, String updateId) { + this.updateName = updateName; + this.updateId = updateId; + } + + @Override + public String getUpdateName() { + return updateName; + } + + @Override + public String getUpdateId() { + return updateId; + } + + @Override + public String toString() { + return "UpdateInfoImpl{" + + "updateName='" + + updateName + + '\'' + + ", updateId='" + + updateId + + '\'' + + '}'; + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java index 6eeaac1ae7..7189b904b8 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java @@ -623,6 +623,10 @@ public static WorkflowInfo getWorkflowInfo() { return new WorkflowInfoImpl(getRootWorkflowContext().getReplayContext()); } + public static Optional getCurrentUpdateInfo() { + return getRootWorkflowContext().getCurrentUpdateInfo(); + } + public static Scope getMetricsScope() { return getRootWorkflowContext().getMetricsScope(); } diff --git a/temporal-sdk/src/main/java/io/temporal/workflow/UpdateInfo.java b/temporal-sdk/src/main/java/io/temporal/workflow/UpdateInfo.java new file mode 100644 index 0000000000..349e825014 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/workflow/UpdateInfo.java @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.workflow; + +/** Provides information about the current workflow Update. */ +public interface UpdateInfo { + /** + * @return Update name + */ + String getUpdateName(); + + /** + * @return Update ID + */ + String getUpdateId(); +} diff --git a/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java b/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java index 99e9696464..98e4341d27 100644 --- a/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java +++ b/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java @@ -339,10 +339,31 @@ public static void continueAsNew( WorkflowInternal.continueAsNew(workflowType, options, args); } + /** + * Returns information about current workflow execution. + * + *

Note: Can only be called within the context of a workflow. Will throw an error if called + * outside a workflow context. + * + * @return current workflow info. + */ public static WorkflowInfo getInfo() { return WorkflowInternal.getWorkflowInfo(); } + /** + * Returns information about current workflow update. + * + *

Note: Should only be called within the context of an update handler thread in a workflow. + * Will return an empty Optional if called outside an update handler thread inside a workflow + * context. Will throw an error if called outside a workflow context. + * + * @return current workflow update info. + */ + public static Optional getCurrentUpdateInfo() { + return WorkflowInternal.getCurrentUpdateInfo(); + } + /** * Extract deserialized Memo associated with given key * diff --git a/temporal-sdk/src/test/java/io/temporal/internal/replay/ReplayWorkflowRunTaskHandlerCacheTests.java b/temporal-sdk/src/test/java/io/temporal/internal/replay/ReplayWorkflowRunTaskHandlerCacheTests.java index 0fe1d11d6c..19a36c931c 100644 --- a/temporal-sdk/src/test/java/io/temporal/internal/replay/ReplayWorkflowRunTaskHandlerCacheTests.java +++ b/temporal-sdk/src/test/java/io/temporal/internal/replay/ReplayWorkflowRunTaskHandlerCacheTests.java @@ -299,6 +299,7 @@ public void handleSignal( @Override public void handleUpdate( String updateName, + String updateId, Optional input, long eventId, Header header, diff --git a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateInfoTest.java b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateInfoTest.java new file mode 100644 index 0000000000..1bd9a0db81 --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateInfoTest.java @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.workflow.updateTest; + +import static org.junit.Assert.*; + +import io.temporal.api.common.v1.WorkflowExecution; +import io.temporal.client.*; +import io.temporal.failure.ApplicationFailure; +import io.temporal.testing.internal.SDKTestOptions; +import io.temporal.testing.internal.SDKTestWorkflowRule; +import io.temporal.workflow.CompletablePromise; +import io.temporal.workflow.UpdateInfo; +import io.temporal.workflow.Workflow; +import io.temporal.workflow.shared.TestWorkflows.WorkflowWithUpdate; +import java.util.*; +import java.util.concurrent.ExecutionException; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; + +public class UpdateInfoTest { + @Rule + public SDKTestWorkflowRule testWorkflowRule = + SDKTestWorkflowRule.newBuilder().setWorkflowTypes(TestUpdateWorkflowImpl.class).build(); + + @Test + public void testUpdateInfo() throws ExecutionException, InterruptedException { + String workflowId = UUID.randomUUID().toString(); + WorkflowClient workflowClient = testWorkflowRule.getWorkflowClient(); + WorkflowOptions options = + SDKTestOptions.newWorkflowOptionsWithTimeouts(testWorkflowRule.getTaskQueue()).toBuilder() + .setWorkflowId(workflowId) + .build(); + WorkflowWithUpdate workflow = workflowClient.newWorkflowStub(WorkflowWithUpdate.class, options); + // To execute workflow client.execute() would do. But we want to start workflow and immediately + // return. + WorkflowExecution execution = WorkflowClient.start(workflow::execute); + WorkflowStub stub = WorkflowStub.fromTyped(workflow); + UpdateOptions.Builder updateOptionsBuilder = + UpdateOptions.newBuilder(String.class) + .setUpdateName("update") + .setWaitForStage(WorkflowUpdateStage.COMPLETED); + + UpdateHandle handle1 = + stub.startUpdate(updateOptionsBuilder.setUpdateId("update id 1").build(), 0, ""); + assertEquals("update:update id 1", handle1.getResultAsync().get()); + + UpdateHandle handle2 = + stub.startUpdate(updateOptionsBuilder.setUpdateId("update id 2").build(), 0, ""); + assertEquals("update:update id 2", handle2.getResultAsync().get()); + + Assert.assertThrows( + WorkflowUpdateException.class, + () -> + stub.startUpdate(updateOptionsBuilder.setUpdateId("reject").build(), 0, "") + .getResultAsync()); + + workflow.complete(); + String result = + testWorkflowRule + .getWorkflowClient() + .newUntypedWorkflowStub(execution, Optional.empty()) + .getResult(String.class); + assertEquals(" update id 1 update id 2", result); + } + + public static class TestUpdateWorkflowImpl implements WorkflowWithUpdate { + String state = "initial"; + List updates = new ArrayList<>(); + CompletablePromise promise = Workflow.newPromise(); + + @Override + public String execute() { + if (Workflow.getCurrentUpdateInfo().isPresent()) { + throw ApplicationFailure.newFailure("update info should not be present", "TestFailure"); + } + promise.get(); + return updates.stream().reduce("", (a, b) -> a + " " + b); + } + + @Override + public String getState() { + return state; + } + + @Override + public String update(Integer index, String value) { + UpdateInfo updateInfo = Workflow.getCurrentUpdateInfo().get(); + updates.add(updateInfo.getUpdateId()); + return updateInfo.getUpdateName() + ":" + updateInfo.getUpdateId(); + } + + @Override + public void updateValidator(Integer index, String value) { + UpdateInfo updateInfo = Workflow.getCurrentUpdateInfo().get(); + if (updateInfo.getUpdateId().equals("reject")) { + throw new RuntimeException("Rejecting update"); + } + } + + @Override + public void complete() { + promise.complete(null); + } + + @Override + public void completeValidator() {} + } +} From 4871168388bbb1003af22c31ee4d4f443aea37cf Mon Sep 17 00:00:00 2001 From: pdoerner <122412190+pdoerner@users.noreply.github.com> Date: Mon, 5 Aug 2024 10:36:09 -0700 Subject: [PATCH 12/25] Test server Nexus endpoint operator apis (#2162) * Bump API version to v1.36.0 * Nexus endpoint test server CRUD API implementation * cleanup * functional tests * test operator service external setup * test environment setup * test environment setup * skip functional tests with external server --- temporal-serviceclient/src/main/proto | 2 +- .../testservice/TestNexusEndpointStore.java | 44 ++ .../TestNexusEndpointStoreImpl.java | 189 +++++++++ .../testservice/TestOperatorService.java | 86 +++- .../testservice/TestServicesStarter.java | 3 +- .../functional/NexusEndpointTest.java | 400 ++++++++++++++++++ .../testing/TestWorkflowEnvironment.java | 6 + .../TestWorkflowEnvironmentInternal.java | 5 + 8 files changed, 732 insertions(+), 3 deletions(-) create mode 100644 temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStore.java create mode 100644 temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStoreImpl.java create mode 100644 temporal-test-server/src/test/java/io/temporal/testserver/functional/NexusEndpointTest.java diff --git a/temporal-serviceclient/src/main/proto b/temporal-serviceclient/src/main/proto index 2227a14f48..39b0f69d19 160000 --- a/temporal-serviceclient/src/main/proto +++ b/temporal-serviceclient/src/main/proto @@ -1 +1 @@ -Subproject commit 2227a14f482ae48fc440a5e9829cf6797009d5b8 +Subproject commit 39b0f69d19b67731e1f35fd2d231f2c871091359 diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStore.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStore.java new file mode 100644 index 0000000000..dcb9b9c23e --- /dev/null +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStore.java @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.testservice; + +import io.temporal.api.nexus.v1.Endpoint; +import io.temporal.api.nexus.v1.EndpointSpec; +import java.io.Closeable; +import java.util.List; + +public interface TestNexusEndpointStore extends Closeable { + + Endpoint createEndpoint(EndpointSpec spec); + + Endpoint updateEndpoint(String id, long version, EndpointSpec spec); + + void deleteEndpoint(String id, long version); + + Endpoint getEndpoint(String id); + + List listEndpoints(long pageSize, byte[] nextPageToken, String name); + + void validateEndpointSpec(EndpointSpec spec); + + @Override + void close(); +} diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStoreImpl.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStoreImpl.java new file mode 100644 index 0000000000..dbaa2ddf31 --- /dev/null +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStoreImpl.java @@ -0,0 +1,189 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.testservice; + +import io.grpc.Status; +import io.temporal.api.nexus.v1.Endpoint; +import io.temporal.api.nexus.v1.EndpointSpec; +import java.util.*; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * TestNexusEndpointStoreImpl is an in-memory implementation of Nexus endpoint CRUD operations for + * use with the test server. Because conflict resolution is not required, there is no handling for + * created or updated timestamps. + */ +public class TestNexusEndpointStoreImpl implements TestNexusEndpointStore { + + private static final Pattern ENDPOINT_NAME_REGEX = Pattern.compile("^[a-zA-Z_][a-zA-Z0-9_]*$"); + + private final SortedMap endpoints = new ConcurrentSkipListMap<>(); + private final Set endpointNames = new HashSet<>(); + + @Override + public Endpoint createEndpoint(EndpointSpec spec) { + validateEndpointSpec(spec); + + if (!endpointNames.add(spec.getName())) { + throw Status.ALREADY_EXISTS + .withDescription("Nexus endpoint already registered with name: " + spec.getName()) + .asRuntimeException(); + } + + String id = UUID.randomUUID().toString(); + Endpoint endpoint = Endpoint.newBuilder().setId(id).setVersion(1).setSpec(spec).build(); + + if (endpoints.putIfAbsent(id, endpoint) != null) { + // This should never happen in practice + throw Status.ALREADY_EXISTS + .withDescription("Nexus endpoint already exists with ID: " + id) + .asRuntimeException(); + } + + return endpoint; + } + + @Override + public Endpoint updateEndpoint(String id, long version, EndpointSpec spec) { + validateEndpointSpec(spec); + + Endpoint prev = endpoints.get(id); + + if (prev == null) { + throw Status.NOT_FOUND + .withDescription("Could not find Nexus endpoint with ID: " + id) + .asRuntimeException(); + } + + if (prev.getVersion() != version) { + throw Status.INVALID_ARGUMENT + .withDescription( + "Error updating Nexus endpoint: version mismatch." + + " Expected: " + + prev.getVersion() + + " Received: " + + version) + .asRuntimeException(); + } + + if (!prev.getSpec().getName().equals(spec.getName()) && !endpointNames.add(spec.getName())) { + throw Status.ALREADY_EXISTS + .withDescription( + "Error updating Nexus endpoint: " + + "endpoint already registered with updated name: " + + spec.getName()) + .asRuntimeException(); + } else { + endpointNames.remove(prev.getSpec().getName()); + } + + Endpoint updated = Endpoint.newBuilder(prev).setVersion(version + 1).setSpec(spec).build(); + + endpoints.put(id, updated); + return updated; + } + + @Override + public void deleteEndpoint(String id, long version) { + Endpoint existing = endpoints.get(id); + + if (existing == null) { + throw Status.NOT_FOUND + .withDescription("Could not find Nexus endpoint with ID: " + id) + .asRuntimeException(); + } + + if (existing.getVersion() != version) { + throw Status.INVALID_ARGUMENT + .withDescription( + "Error deleting Nexus endpoint: version mismatch." + + " Expected " + + existing.getVersion() + + " Received: " + + version) + .asRuntimeException(); + } + + endpoints.remove(id); + } + + @Override + public Endpoint getEndpoint(String id) { + Endpoint endpoint = endpoints.get(id); + if (endpoint == null) { + throw Status.NOT_FOUND + .withDescription("Could not find Nexus endpoint with ID: " + id) + .asRuntimeException(); + } + return endpoint; + } + + @Override + public List listEndpoints(long pageSize, byte[] nextPageToken, String name) { + if (name != null && !name.isEmpty()) { + return endpoints.values().stream() + .filter(ep -> ep.getSpec().getName().equals(name)) + .limit(1) + .collect(Collectors.toList()); + } + + if (nextPageToken.length > 0) { + return endpoints.tailMap(new String(nextPageToken)).values().stream() + .skip(1) + .limit(pageSize) + .collect(Collectors.toList()); + } + return endpoints.values().stream().limit(pageSize).collect(Collectors.toList()); + } + + @Override + public void validateEndpointSpec(EndpointSpec spec) { + if (spec.getName().isEmpty()) { + throw Status.INVALID_ARGUMENT + .withDescription("Nexus endpoint name cannot be empty") + .asRuntimeException(); + } + if (!ENDPOINT_NAME_REGEX.matcher(spec.getName()).matches()) { + throw Status.INVALID_ARGUMENT + .withDescription( + "Nexus endpoint name (" + + spec.getName() + + ") does not match expected pattern: " + + ENDPOINT_NAME_REGEX.pattern()) + .asRuntimeException(); + } + if (!spec.hasTarget()) { + throw Status.INVALID_ARGUMENT + .withDescription("Nexus endpoint spec must have a target") + .asRuntimeException(); + } + if (!spec.getTarget().hasWorker()) { + throw Status.INVALID_ARGUMENT + .withDescription("Test server only supports Nexus endpoints with worker targets") + .asRuntimeException(); + } + } + + @Override + public void close() {} +} diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestOperatorService.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestOperatorService.java index 3dc328aacb..feb0ef0849 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestOperatorService.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestOperatorService.java @@ -20,12 +20,15 @@ package io.temporal.internal.testservice; +import com.google.protobuf.ByteString; import io.grpc.Status; import io.grpc.StatusRuntimeException; import io.grpc.stub.StreamObserver; import io.temporal.api.enums.v1.IndexedValueType; +import io.temporal.api.nexus.v1.Endpoint; import io.temporal.api.operatorservice.v1.*; import java.io.Closeable; +import java.util.List; import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,9 +43,12 @@ final class TestOperatorService extends OperatorServiceGrpc.OperatorServiceImplB private static final Logger log = LoggerFactory.getLogger(TestOperatorService.class); private final TestVisibilityStore visibilityStore; + private final TestNexusEndpointStore nexusEndpointStore; - public TestOperatorService(TestVisibilityStore visibilityStore) { + public TestOperatorService( + TestVisibilityStore visibilityStore, TestNexusEndpointStore nexusEndpointStore) { this.visibilityStore = visibilityStore; + this.nexusEndpointStore = nexusEndpointStore; } @Override @@ -93,6 +99,84 @@ public void removeSearchAttributes( } } + @Override + public void getNexusEndpoint( + GetNexusEndpointRequest request, StreamObserver responseObserver) { + try { + Endpoint endpoint = nexusEndpointStore.getEndpoint(request.getId()); + responseObserver.onNext(GetNexusEndpointResponse.newBuilder().setEndpoint(endpoint).build()); + responseObserver.onCompleted(); + } catch (StatusRuntimeException e) { + handleStatusRuntimeException(e, responseObserver); + } + } + + @Override + public void createNexusEndpoint( + CreateNexusEndpointRequest request, + StreamObserver responseObserver) { + try { + Endpoint created = nexusEndpointStore.createEndpoint(request.getSpec()); + responseObserver.onNext( + CreateNexusEndpointResponse.newBuilder().setEndpoint(created).build()); + responseObserver.onCompleted(); + } catch (StatusRuntimeException e) { + handleStatusRuntimeException(e, responseObserver); + } + } + + @Override + public void updateNexusEndpoint( + UpdateNexusEndpointRequest request, + StreamObserver responseObserver) { + try { + Endpoint updated = + nexusEndpointStore.updateEndpoint( + request.getId(), request.getVersion(), request.getSpec()); + responseObserver.onNext( + UpdateNexusEndpointResponse.newBuilder().setEndpoint(updated).build()); + responseObserver.onCompleted(); + } catch (StatusRuntimeException e) { + handleStatusRuntimeException(e, responseObserver); + } + } + + @Override + public void deleteNexusEndpoint( + DeleteNexusEndpointRequest request, + StreamObserver responseObserver) { + try { + nexusEndpointStore.deleteEndpoint(request.getId(), request.getVersion()); + responseObserver.onNext(DeleteNexusEndpointResponse.newBuilder().build()); + responseObserver.onCompleted(); + } catch (StatusRuntimeException e) { + handleStatusRuntimeException(e, responseObserver); + } + } + + @Override + public void listNexusEndpoints( + ListNexusEndpointsRequest request, + StreamObserver responseObserver) { + try { + List endpoints = + nexusEndpointStore.listEndpoints( + request.getPageSize(), request.getNextPageToken().toByteArray(), request.getName()); + ByteString nextPageToken = + (!endpoints.isEmpty() && endpoints.size() == request.getPageSize()) + ? endpoints.get(endpoints.size() - 1).getIdBytes() + : ByteString.empty(); + responseObserver.onNext( + ListNexusEndpointsResponse.newBuilder() + .addAllEndpoints(endpoints) + .setNextPageToken(nextPageToken) + .build()); + responseObserver.onCompleted(); + } catch (StatusRuntimeException e) { + handleStatusRuntimeException(e, responseObserver); + } + } + private void handleStatusRuntimeException( StatusRuntimeException e, StreamObserver responseObserver) { if (e.getStatus().getCode() == Status.Code.INTERNAL) { diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestServicesStarter.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestServicesStarter.java index 1162c92dad..1ade1ff7ce 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestServicesStarter.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestServicesStarter.java @@ -30,6 +30,7 @@ public class TestServicesStarter implements Closeable { private final SelfAdvancingTimerImpl selfAdvancingTimer; private final TestVisibilityStore visibilityStore = new TestVisibilityStoreImpl(); + private final TestNexusEndpointStore nexusEndpointStore = new TestNexusEndpointStoreImpl(); private final TestWorkflowStore workflowStore; private final TestOperatorService operatorService; private final TestWorkflowService workflowService; @@ -46,7 +47,7 @@ public TestServicesStarter(boolean lockTimeSkipping, long initialTimeMillis) { this.selfAdvancingTimer = new SelfAdvancingTimerImpl(initialTimeMillis, Clock.systemDefaultZone()); this.workflowStore = new TestWorkflowStoreImpl(this.selfAdvancingTimer); - this.operatorService = new TestOperatorService(this.visibilityStore); + this.operatorService = new TestOperatorService(this.visibilityStore, this.nexusEndpointStore); this.testService = new TestService(this.workflowStore, this.selfAdvancingTimer, lockTimeSkipping); this.workflowService = diff --git a/temporal-test-server/src/test/java/io/temporal/testserver/functional/NexusEndpointTest.java b/temporal-test-server/src/test/java/io/temporal/testserver/functional/NexusEndpointTest.java new file mode 100644 index 0000000000..0a80308e8a --- /dev/null +++ b/temporal-test-server/src/test/java/io/temporal/testserver/functional/NexusEndpointTest.java @@ -0,0 +1,400 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.testserver.functional; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import static org.junit.Assume.assumeFalse; + +import com.google.protobuf.ByteString; +import io.grpc.Status; +import io.grpc.StatusRuntimeException; +import io.temporal.api.common.v1.Payload; +import io.temporal.api.nexus.v1.Endpoint; +import io.temporal.api.nexus.v1.EndpointSpec; +import io.temporal.api.nexus.v1.EndpointTarget; +import io.temporal.api.operatorservice.v1.*; +import io.temporal.testing.internal.SDKTestWorkflowRule; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.UUID; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +public class NexusEndpointTest { + @Rule public SDKTestWorkflowRule testWorkflowRule = SDKTestWorkflowRule.newBuilder().build(); + + @Before + public void checkExternal() { + // TODO: remove this skip once 1.25.0 is officially released and + // https://github.com/temporalio/sdk-java/issues/2165 is resolved + assumeFalse( + "Nexus APIs are not supported for server versions < 1.25.0", + testWorkflowRule.isUseExternalService()); + } + + @Test + public void testValidateEndpointSpec() { + // Create and Update use same validation logic, so just test once + EndpointSpec.Builder specBuilder = getTestEndpointSpecBuilder("valid_name_01"); + + // Valid + Endpoint testEndpoint = createTestEndpoint(specBuilder); + assertEquals(1, testEndpoint.getVersion()); + assertEquals(specBuilder.build(), testEndpoint.getSpec()); + + // Missing name + specBuilder.setName(""); + StatusRuntimeException ex = + assertThrows(StatusRuntimeException.class, () -> createTestEndpoint(specBuilder)); + assertEquals(Status.Code.INVALID_ARGUMENT, ex.getStatus().getCode()); + assertEquals("Nexus endpoint name cannot be empty", ex.getStatus().getDescription()); + + // Name contains invalid characters + specBuilder.setName("*(test)_- :invalid"); + ex = assertThrows(StatusRuntimeException.class, () -> createTestEndpoint(specBuilder)); + assertEquals(Status.Code.INVALID_ARGUMENT, ex.getStatus().getCode()); + assertEquals( + "Nexus endpoint name (" + + specBuilder.getName() + + ") does not match expected pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$", + ex.getStatus().getDescription()); + + // Missing target + specBuilder.setName("valid_name_02"); + specBuilder.clearTarget(); + ex = assertThrows(StatusRuntimeException.class, () -> createTestEndpoint(specBuilder)); + assertEquals(Status.Code.INVALID_ARGUMENT, ex.getStatus().getCode()); + assertEquals("Nexus endpoint spec must have a target", ex.getStatus().getDescription()); + + // External target (test server only supports worker targets) + specBuilder.setTarget( + EndpointTarget.newBuilder() + .setExternal(EndpointTarget.External.newBuilder().setUrl("localhost:8080"))); + ex = assertThrows(StatusRuntimeException.class, () -> createTestEndpoint(specBuilder)); + assertEquals(Status.Code.INVALID_ARGUMENT, ex.getStatus().getCode()); + assertEquals( + "Test server only supports Nexus endpoints with worker targets", + ex.getStatus().getDescription()); + } + + @Test + public void testCreate() { + EndpointSpec.Builder specBuilder = getTestEndpointSpecBuilder("valid_create_test_endpoint"); + + // Valid create + Endpoint testEndpoint = createTestEndpoint(specBuilder); + assertEquals(1, testEndpoint.getVersion()); + assertEquals(specBuilder.build(), testEndpoint.getSpec()); + + // Name already registered + StatusRuntimeException ex = + assertThrows(StatusRuntimeException.class, () -> createTestEndpoint(specBuilder)); + assertEquals(Status.Code.ALREADY_EXISTS, ex.getStatus().getCode()); + assertEquals( + "Nexus endpoint already registered with name: " + specBuilder.getName(), + ex.getStatus().getDescription()); + } + + @Test + public void testUpdate() { + // Setup + Endpoint testEndpoint = createTestEndpoint(getTestEndpointSpecBuilder("update_test_endpoint")); + assertEquals(1, testEndpoint.getVersion()); + EndpointSpec updatedSpec = + EndpointSpec.newBuilder(testEndpoint.getSpec()) + .setDescription( + Payload.newBuilder().setData(ByteString.copyFromUtf8("updated description"))) + .build(); + + // Not found + String missingID = UUID.randomUUID().toString(); + StatusRuntimeException ex = + assertThrows( + StatusRuntimeException.class, + () -> + testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .updateNexusEndpoint( + UpdateNexusEndpointRequest.newBuilder() + .setId(missingID) + .setVersion(testEndpoint.getVersion()) + .setSpec(updatedSpec) + .build())); + assertEquals(Status.Code.NOT_FOUND, ex.getStatus().getCode()); + assertEquals( + "Could not find Nexus endpoint with ID: " + missingID, ex.getStatus().getDescription()); + + // Version mismatch + ex = + assertThrows( + StatusRuntimeException.class, + () -> + testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .updateNexusEndpoint( + UpdateNexusEndpointRequest.newBuilder() + .setId(testEndpoint.getId()) + .setVersion(15) + .setSpec(updatedSpec) + .build())); + assertEquals(Status.Code.INVALID_ARGUMENT, ex.getStatus().getCode()); + assertEquals( + "Error updating Nexus endpoint: version mismatch." + + " Expected: " + + testEndpoint.getVersion() + + " Received: " + + 15, + ex.getStatus().getDescription()); + + // Updated name already registered + EndpointSpec.Builder otherSpec = getTestEndpointSpecBuilder("other_test_endpoint"); + createTestEndpoint(otherSpec); + ex = + assertThrows( + StatusRuntimeException.class, + () -> + testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .updateNexusEndpoint( + UpdateNexusEndpointRequest.newBuilder() + .setId(testEndpoint.getId()) + .setVersion(testEndpoint.getVersion()) + .setSpec(otherSpec.build()) + .build())); + assertEquals(Status.Code.ALREADY_EXISTS, ex.getStatus().getCode()); + assertEquals( + "Error updating Nexus endpoint: " + + "endpoint already registered with updated name: " + + otherSpec.getName(), + ex.getStatus().getDescription()); + + // Valid update + UpdateNexusEndpointResponse resp = + testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .updateNexusEndpoint( + UpdateNexusEndpointRequest.newBuilder() + .setId(testEndpoint.getId()) + .setVersion(testEndpoint.getVersion()) + .setSpec(updatedSpec) + .build()); + assertEquals(2, resp.getEndpoint().getVersion()); + assertEquals(updatedSpec, resp.getEndpoint().getSpec()); + } + + @Test + public void testDelete() { + // Setup + Endpoint testEndpoint = createTestEndpoint(getTestEndpointSpecBuilder("delete_test_endpoint")); + assertEquals(1, testEndpoint.getVersion()); + + // Not found + String missingID = UUID.randomUUID().toString(); + StatusRuntimeException ex = + assertThrows( + StatusRuntimeException.class, + () -> + testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .deleteNexusEndpoint( + DeleteNexusEndpointRequest.newBuilder() + .setId(missingID) + .setVersion(testEndpoint.getVersion()) + .build())); + assertEquals(Status.Code.NOT_FOUND, ex.getStatus().getCode()); + assertEquals( + "Could not find Nexus endpoint with ID: " + missingID, ex.getStatus().getDescription()); + + // Version mismatch + ex = + assertThrows( + StatusRuntimeException.class, + () -> + testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .deleteNexusEndpoint( + DeleteNexusEndpointRequest.newBuilder() + .setId(testEndpoint.getId()) + .setVersion(15) + .build())); + assertEquals(Status.Code.INVALID_ARGUMENT, ex.getStatus().getCode()); + assertEquals( + "Error deleting Nexus endpoint: version mismatch." + + " Expected " + + testEndpoint.getVersion() + + " Received: " + + 15, + ex.getStatus().getDescription()); + + // Valid delete + DeleteNexusEndpointResponse resp = + testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .deleteNexusEndpoint( + DeleteNexusEndpointRequest.newBuilder() + .setId(testEndpoint.getId()) + .setVersion(testEndpoint.getVersion()) + .build()); + assertEquals(DeleteNexusEndpointResponse.newBuilder().build(), resp); + } + + @Test + public void testGet() { + // Setup + Endpoint testEndpoint = createTestEndpoint(getTestEndpointSpecBuilder("get_test_endpoint")); + assertEquals(1, testEndpoint.getVersion()); + + // Not found + String missingID = UUID.randomUUID().toString(); + StatusRuntimeException ex = + assertThrows( + StatusRuntimeException.class, + () -> + testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .getNexusEndpoint( + GetNexusEndpointRequest.newBuilder().setId(missingID).build())); + assertEquals(Status.Code.NOT_FOUND, ex.getStatus().getCode()); + assertEquals( + "Could not find Nexus endpoint with ID: " + missingID, ex.getStatus().getDescription()); + + // Valid get + GetNexusEndpointResponse resp = + testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .getNexusEndpoint( + GetNexusEndpointRequest.newBuilder().setId(testEndpoint.getId()).build()); + assertEquals(testEndpoint, resp.getEndpoint()); + } + + @Test + public void testList() { + // Setup + List testEndpoints = new ArrayList<>(3); + for (int i = 0; i < 3; i++) { + testEndpoints.add(createTestEndpoint(getTestEndpointSpecBuilder("list_test_endpoint_" + i))); + } + testEndpoints.sort(Comparator.comparing(Endpoint::getId)); + + // List with filter for non-existent name + ListNexusEndpointsResponse resp = + testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .listNexusEndpoints( + ListNexusEndpointsRequest.newBuilder().setName("some_missing_name").build()); + assertEquals(0, resp.getEndpointsCount()); + + // List with filter for existing name + resp = + testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .listNexusEndpoints( + ListNexusEndpointsRequest.newBuilder() + .setName(testEndpoints.get(1).getSpec().getName()) + .build()); + assertEquals(1, resp.getEndpointsCount()); + assertEquals(testEndpoints.get(1), resp.getEndpoints(0)); + + // List all + resp = + testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .listNexusEndpoints(ListNexusEndpointsRequest.newBuilder().setPageSize(10).build()); + assertEquals(testEndpoints.size(), resp.getEndpointsCount()); + assertEquals(ByteString.empty(), resp.getNextPageToken()); + assertEquals(testEndpoints, resp.getEndpointsList()); + + // List page 1 + resp = + testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .listNexusEndpoints(ListNexusEndpointsRequest.newBuilder().setPageSize(2).build()); + assertEquals(2, resp.getEndpointsCount()); + assertEquals(testEndpoints.get(1).getIdBytes(), resp.getNextPageToken()); + assertEquals(testEndpoints.subList(0, 2), resp.getEndpointsList()); + + // List page 2 + resp = + testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .listNexusEndpoints( + ListNexusEndpointsRequest.newBuilder() + .setPageSize(2) + .setNextPageToken(resp.getNextPageToken()) + .build()); + assertEquals(1, resp.getEndpointsCount()); + assertEquals(ByteString.empty(), resp.getNextPageToken()); + assertEquals(testEndpoints.subList(2, testEndpoints.size()), resp.getEndpointsList()); + } + + private EndpointSpec.Builder getTestEndpointSpecBuilder(String name) { + return EndpointSpec.newBuilder() + .setName(name) + .setDescription(Payload.newBuilder().setData(ByteString.copyFromUtf8("test endpoint"))) + .setTarget( + EndpointTarget.newBuilder() + .setWorker( + EndpointTarget.Worker.newBuilder() + .setNamespace(testWorkflowRule.getTestEnvironment().getNamespace()) + .setTaskQueue(testWorkflowRule.getTaskQueue()))); + } + + private Endpoint createTestEndpoint(EndpointSpec.Builder spec) { + CreateNexusEndpointResponse resp = + testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .createNexusEndpoint(CreateNexusEndpointRequest.newBuilder().setSpec(spec).build()); + return resp.getEndpoint(); + } +} diff --git a/temporal-testing/src/main/java/io/temporal/testing/TestWorkflowEnvironment.java b/temporal-testing/src/main/java/io/temporal/testing/TestWorkflowEnvironment.java index 7bffd57a78..b03e21cd4b 100644 --- a/temporal-testing/src/main/java/io/temporal/testing/TestWorkflowEnvironment.java +++ b/temporal-testing/src/main/java/io/temporal/testing/TestWorkflowEnvironment.java @@ -24,6 +24,7 @@ import io.temporal.api.enums.v1.IndexedValueType; import io.temporal.client.WorkflowClient; import io.temporal.common.WorkflowExecutionHistory; +import io.temporal.serviceclient.OperatorServiceStubs; import io.temporal.serviceclient.WorkflowServiceStubs; import io.temporal.worker.Worker; import io.temporal.worker.WorkerFactory; @@ -170,6 +171,11 @@ static TestWorkflowEnvironment newInstance(TestEnvironmentOptions options) { */ WorkflowServiceStubs getWorkflowServiceStubs(); + /** + * @return {@link io.temporal.serviceclient.OperatorServiceStubs} connected to the test server + */ + OperatorServiceStubs getOperatorServiceStubs(); + String getNamespace(); /** diff --git a/temporal-testing/src/main/java/io/temporal/testing/TestWorkflowEnvironmentInternal.java b/temporal-testing/src/main/java/io/temporal/testing/TestWorkflowEnvironmentInternal.java index fdc4b964fb..0c08685846 100644 --- a/temporal-testing/src/main/java/io/temporal/testing/TestWorkflowEnvironmentInternal.java +++ b/temporal-testing/src/main/java/io/temporal/testing/TestWorkflowEnvironmentInternal.java @@ -229,6 +229,11 @@ public WorkflowServiceStubs getWorkflowServiceStubs() { return workflowServiceStubs; } + @Override + public OperatorServiceStubs getOperatorServiceStubs() { + return operatorServiceStubs; + } + @Override public String getNamespace() { return workflowClientOptions.getNamespace(); From 98b2e78ec8fccaa61479dff4f19c87412c67ecc7 Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Tue, 6 Aug 2024 11:23:09 -0700 Subject: [PATCH 13/25] Disallow continue as new in update handlers (#2167) Disallow continue as new in an update handler --- .../UnsupportedContinueAsNewRequest.java | 34 ++++ .../sync/WorkflowExecutionHandler.java | 4 + .../internal/sync/WorkflowInternal.java | 9 + .../UpdateContinueAsNewInHandlerTest.java | 114 ++++++++++++ .../UpdateContinueAsNewNonDeterminism.java | 120 ------------- .../UpdateContinueAsNewWFTFailure.java | 108 ------------ .../updateTest/UpdateTestContinueAsNew.java | 162 ------------------ ...ocalActivityInTheLastWorkflowTaskTest.java | 14 +- 8 files changed, 166 insertions(+), 399 deletions(-) create mode 100644 temporal-sdk/src/main/java/io/temporal/internal/statemachines/UnsupportedContinueAsNewRequest.java create mode 100644 temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateContinueAsNewInHandlerTest.java delete mode 100644 temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateContinueAsNewNonDeterminism.java delete mode 100644 temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateContinueAsNewWFTFailure.java delete mode 100644 temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateTestContinueAsNew.java diff --git a/temporal-sdk/src/main/java/io/temporal/internal/statemachines/UnsupportedContinueAsNewRequest.java b/temporal-sdk/src/main/java/io/temporal/internal/statemachines/UnsupportedContinueAsNewRequest.java new file mode 100644 index 0000000000..56347a2260 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/internal/statemachines/UnsupportedContinueAsNewRequest.java @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.statemachines; + +/** + * Thrown when {@link io.temporal.workflow.Workflow#continueAsNew} is called from an unsupported + * location. + * + *

The reason this class extends Error is for application workflow code to not catch it by + * mistake. The default behavior of the SDK is to block workflow execution while Error is thrown. + */ +public class UnsupportedContinueAsNewRequest extends Error { + public UnsupportedContinueAsNewRequest(String message) { + super(message); + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowExecutionHandler.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowExecutionHandler.java index 1a78df46f5..ebb87604a5 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowExecutionHandler.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowExecutionHandler.java @@ -29,6 +29,7 @@ import io.temporal.failure.CanceledFailure; import io.temporal.failure.TemporalFailure; import io.temporal.internal.replay.ReplayWorkflowContext; +import io.temporal.internal.statemachines.UnsupportedContinueAsNewRequest; import io.temporal.internal.worker.WorkflowExecutionException; import io.temporal.worker.WorkflowImplementationOptions; import io.temporal.workflow.Workflow; @@ -123,6 +124,9 @@ public Optional handleExecuteUpdate( io.temporal.api.common.v1.Header header) { try { return context.handleExecuteUpdate(updateName, input, eventId, new Header(header)); + } catch (UnsupportedContinueAsNewRequest e) { + // Re-throw to fail the workflow task + throw e; } catch (Throwable e) { applyWorkflowFailurePolicyAndRethrow(e); } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java index 7189b904b8..04edcbaee7 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java @@ -45,6 +45,7 @@ import io.temporal.internal.common.NonIdempotentHandle; import io.temporal.internal.common.SearchAttributesUtil; import io.temporal.internal.logging.ReplayAwareLogger; +import io.temporal.internal.statemachines.UnsupportedContinueAsNewRequest; import io.temporal.serviceclient.CheckedExceptionWrapper; import io.temporal.workflow.*; import io.temporal.workflow.Functions.Func; @@ -576,6 +577,7 @@ public static R retry( public static void continueAsNew( @Nullable String workflowType, @Nullable ContinueAsNewOptions options, Object[] args) { assertNotReadOnly("continue as new"); + assertNotInUpdateHandler("ContinueAsNew is not supported in an update handler"); getWorkflowOutboundInterceptor() .continueAsNew( new WorkflowOutboundCallsInterceptor.ContinueAsNewInput( @@ -588,6 +590,7 @@ public static void continueAsNew( Object[] args, WorkflowOutboundCallsInterceptor outboundCallsInterceptor) { assertNotReadOnly("continue as new"); + assertNotInUpdateHandler("ContinueAsNew is not supported in an update handler"); outboundCallsInterceptor.continueAsNew( new WorkflowOutboundCallsInterceptor.ContinueAsNewInput( workflowType, options, args, Header.empty())); @@ -760,6 +763,12 @@ static void assertNotReadOnly(String action) { } } + static void assertNotInUpdateHandler(String message) { + if (getCurrentUpdateInfo().isPresent()) { + throw new UnsupportedContinueAsNewRequest(message); + } + } + private static WorkflowThread getWorkflowThread() { return DeterministicRunnerImpl.currentThreadInternal(); } diff --git a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateContinueAsNewInHandlerTest.java b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateContinueAsNewInHandlerTest.java new file mode 100644 index 0000000000..8757abc647 --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateContinueAsNewInHandlerTest.java @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.workflow.updateTest; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; + +import io.temporal.api.common.v1.WorkflowExecution; +import io.temporal.client.*; +import io.temporal.failure.ApplicationFailure; +import io.temporal.internal.statemachines.UnsupportedContinueAsNewRequest; +import io.temporal.testing.internal.SDKTestOptions; +import io.temporal.testing.internal.SDKTestWorkflowRule; +import io.temporal.worker.WorkflowImplementationOptions; +import io.temporal.workflow.CompletablePromise; +import io.temporal.workflow.Workflow; +import io.temporal.workflow.shared.TestWorkflows.WorkflowWithUpdate; +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import org.junit.Rule; +import org.junit.Test; + +public class UpdateContinueAsNewInHandlerTest { + @Rule + public SDKTestWorkflowRule testWorkflowRule = + SDKTestWorkflowRule.newBuilder() + .setWorkflowTypes( + WorkflowImplementationOptions.newBuilder() + .setFailWorkflowExceptionTypes(UnsupportedContinueAsNewRequest.class) + .build(), + TestUpdateWorkflowImpl.class) + .build(); + + @Test + public void continueAsNewInUpdateHandler() { + String workflowId = UUID.randomUUID().toString(); + WorkflowClient workflowClient = testWorkflowRule.getWorkflowClient(); + WorkflowOptions options = + SDKTestOptions.newWorkflowOptionsWithTimeouts(testWorkflowRule.getTaskQueue()).toBuilder() + .setWorkflowId(workflowId) + .build(); + WorkflowWithUpdate workflow = workflowClient.newWorkflowStub(WorkflowWithUpdate.class, options); + + WorkflowExecution execution = WorkflowClient.start(workflow::execute); + WorkflowStub stub = WorkflowStub.fromTyped(workflow); + stub.startUpdate( + UpdateOptions.newBuilder(String.class) + .setUpdateName("update") + .setWaitForStage(WorkflowUpdateStage.ACCEPTED) + .build()); + WorkflowFailedException e = + assertThrows(WorkflowFailedException.class, () -> stub.getResult(String.class)); + assertEquals( + "io.temporal.internal.statemachines.UnsupportedContinueAsNewRequest", + ((ApplicationFailure) e.getCause()).getType()); + } + + public static class TestUpdateWorkflowImpl implements WorkflowWithUpdate { + String state = "initial"; + List updates = new ArrayList<>(); + CompletablePromise promise = Workflow.newPromise(); + + @Override + public String execute() { + promise.get(); + return ""; + } + + @Override + public String getState() { + return state; + } + + @Override + public String update(Integer index, String value) { + // Sleep to make sure the update can be accepted before trying to continueAsNew + Workflow.sleep(Duration.ofSeconds(1)); + // This should throw UnsupportedContinueAsNewRequest + Workflow.continueAsNew(); + return ""; + } + + @Override + public void updateValidator(Integer index, String value) {} + + @Override + public void complete() { + promise.complete(null); + } + + @Override + public void completeValidator() {} + } +} diff --git a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateContinueAsNewNonDeterminism.java b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateContinueAsNewNonDeterminism.java deleted file mode 100644 index 49751c7f4b..0000000000 --- a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateContinueAsNewNonDeterminism.java +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. - * - * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Modifications copyright (C) 2017 Uber Technologies, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this material except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.temporal.workflow.updateTest; - -import static org.hamcrest.CoreMatchers.instanceOf; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.Assert.assertEquals; - -import io.temporal.client.WorkflowClient; -import io.temporal.client.WorkflowFailedException; -import io.temporal.client.WorkflowOptions; -import io.temporal.failure.ApplicationFailure; -import io.temporal.testing.internal.SDKTestOptions; -import io.temporal.testing.internal.SDKTestWorkflowRule; -import io.temporal.worker.NonDeterministicException; -import io.temporal.worker.WorkflowImplementationOptions; -import io.temporal.workflow.*; -import java.time.Duration; -import java.util.UUID; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; - -public class UpdateContinueAsNewNonDeterminism { - private static final CompletableFuture continueAsNew = new CompletableFuture<>(); - - @Rule - public SDKTestWorkflowRule testWorkflowRule = - SDKTestWorkflowRule.newBuilder() - .setWorkflowTypes( - WorkflowImplementationOptions.newBuilder() - .setFailWorkflowExceptionTypes(Throwable.class) - .build(), - TestUpdateWorkflowImpl.class) - .build(); - - @Test - public void testUpdateContinueAsNewNonDeterminism() - throws ExecutionException, InterruptedException { - // Verify we report nondeterminism when an update handler is nondeterministic and calls continue - // as new on replay - String workflowId = UUID.randomUUID().toString(); - WorkflowClient workflowClient = testWorkflowRule.getWorkflowClient(); - WorkflowOptions options = - SDKTestOptions.newWorkflowOptionsWithTimeouts(testWorkflowRule.getTaskQueue()).toBuilder() - .setWorkflowId(workflowId) - .setWorkflowTaskTimeout(Duration.ofSeconds(1)) - .build(); - TestUpdateWorkflow client = workflowClient.newWorkflowStub(TestUpdateWorkflow.class, options); - - WorkflowClient.start(client::execute, false); - for (int i = 0; i < 5; i++) { - client.update(); - } - continueAsNew.complete(true); - // Force replay, expected to fail with NonDeterministicException - testWorkflowRule.invalidateWorkflowCache(); - // Use a signal here because an update would block - client.signal(); - WorkflowFailedException e = - Assert.assertThrows(WorkflowFailedException.class, () -> client.execute(false)); - assertThat(e.getCause(), is(instanceOf(ApplicationFailure.class))); - assertEquals( - NonDeterministicException.class.getName(), ((ApplicationFailure) e.getCause()).getType()); - } - - @WorkflowInterface - public interface TestUpdateWorkflow { - - @WorkflowMethod - String execute(boolean finish); - - @UpdateMethod - void update() throws ExecutionException, InterruptedException; - - @SignalMethod - void signal(); - } - - public static class TestUpdateWorkflowImpl implements TestUpdateWorkflow { - - @Override - public String execute(boolean finish) { - Workflow.await(() -> finish); - return "finished"; - } - - @Override - public void update() throws ExecutionException, InterruptedException { - // Intentionally introduce non determinism - if (continueAsNew.getNow(false)) { - Workflow.continueAsNew(true); - } - } - - @Override - public void signal() {} - } -} diff --git a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateContinueAsNewWFTFailure.java b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateContinueAsNewWFTFailure.java deleted file mode 100644 index 2683b0834d..0000000000 --- a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateContinueAsNewWFTFailure.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. - * - * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Modifications copyright (C) 2017 Uber Technologies, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this material except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.temporal.workflow.updateTest; - -import static org.junit.Assume.assumeTrue; - -import io.temporal.client.WorkflowClient; -import io.temporal.client.WorkflowOptions; -import io.temporal.testing.internal.SDKTestOptions; -import io.temporal.testing.internal.SDKTestWorkflowRule; -import io.temporal.workflow.*; -import java.time.Duration; -import java.util.UUID; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Semaphore; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; - -public class UpdateContinueAsNewWFTFailure { - private static final Semaphore workflowTaskProcessed = new Semaphore(1); - - private static final CompletableFuture continueAsNew = new CompletableFuture<>(); - - @Rule - public SDKTestWorkflowRule testWorkflowRule = - SDKTestWorkflowRule.newBuilder().setWorkflowTypes(TestUpdateWorkflowImpl.class).build(); - - @Test - public void testUpdateContinueAsNewAfterWFTFailure() throws InterruptedException { - // TODO(https://github.com/temporalio/sdk-java/issues/1903) - assumeTrue("Test Server hangs here", SDKTestWorkflowRule.useExternalService); - - String workflowId = UUID.randomUUID().toString(); - WorkflowClient workflowClient = testWorkflowRule.getWorkflowClient(); - WorkflowOptions options = - SDKTestOptions.newWorkflowOptionsWithTimeouts(testWorkflowRule.getTaskQueue()).toBuilder() - .setWorkflowId(workflowId) - .setWorkflowTaskTimeout(Duration.ofSeconds(1)) - .build(); - TestUpdateWorkflow client = workflowClient.newWorkflowStub(TestUpdateWorkflow.class, options); - - WorkflowClient.start(client::execute, false); - for (int i = 0; i < 3; i++) { - workflowTaskProcessed.acquire(); - // Start update in a separate thread to avoid blocking since admitted is not supported. - Thread asyncUpdate = - new Thread( - () -> { - try { - client.update(); - } catch (Exception e) { - } - }); - asyncUpdate.start(); - } - continueAsNew.complete(true); - - Assert.assertEquals("finished", client.execute(false)); - } - - @WorkflowInterface - public interface TestUpdateWorkflow { - - @WorkflowMethod - String execute(boolean finish); - - @UpdateMethod - void update() throws ExecutionException, InterruptedException; - } - - public static class TestUpdateWorkflowImpl implements TestUpdateWorkflow { - - @Override - public String execute(boolean finish) { - Workflow.await(() -> finish); - return "finished"; - } - - @Override - public void update() throws ExecutionException, InterruptedException { - if (continueAsNew.getNow(false)) { - Workflow.continueAsNew(true); - } - workflowTaskProcessed.release(); - throw new RuntimeException("Intentionally fail workflow task"); - } - } -} diff --git a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateTestContinueAsNew.java b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateTestContinueAsNew.java deleted file mode 100644 index 74d448b2c7..0000000000 --- a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateTestContinueAsNew.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. - * - * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Modifications copyright (C) 2017 Uber Technologies, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this material except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.temporal.workflow.updateTest; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertThrows; - -import io.temporal.activity.Activity; -import io.temporal.activity.ActivityInterface; -import io.temporal.activity.ActivityMethod; -import io.temporal.activity.ActivityOptions; -import io.temporal.api.common.v1.WorkflowExecution; -import io.temporal.client.*; -import io.temporal.testing.internal.SDKTestOptions; -import io.temporal.testing.internal.SDKTestWorkflowRule; -import io.temporal.worker.WorkerOptions; -import io.temporal.workflow.CompletablePromise; -import io.temporal.workflow.Workflow; -import io.temporal.workflow.shared.TestActivities; -import io.temporal.workflow.shared.TestWorkflows.WorkflowWithUpdate; -import java.time.Duration; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.UUID; -import org.junit.Rule; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class UpdateTestContinueAsNew { - - private static final Logger log = LoggerFactory.getLogger(UpdateTestContinueAsNew.class); - - @Rule - public SDKTestWorkflowRule testWorkflowRule = - SDKTestWorkflowRule.newBuilder() - .setWorkerOptions(WorkerOptions.newBuilder().build()) - .setWorkflowTypes(TestUpdateWorkflowImpl.class) - .setActivityImplementations(new ActivityImpl()) - .build(); - - @Test - public void testContinueAsNewInAUpdate() { - String workflowId = UUID.randomUUID().toString(); - WorkflowClient workflowClient = testWorkflowRule.getWorkflowClient(); - WorkflowOptions options = - SDKTestOptions.newWorkflowOptionsWithTimeouts(testWorkflowRule.getTaskQueue()).toBuilder() - .setWorkflowId(workflowId) - .build(); - WorkflowWithUpdate workflow = workflowClient.newWorkflowStub(WorkflowWithUpdate.class, options); - // To execute workflow client.execute() would do. But we want to start workflow and immediately - // return. - WorkflowExecution execution = WorkflowClient.start(workflow::execute); - - SDKTestWorkflowRule.waitForOKQuery(workflow); - assertEquals("initial", workflow.getState()); - - assertEquals(workflowId, execution.getWorkflowId()); - - assertEquals("Execute-Hello Update", workflow.update(0, "Hello Update")); - assertEquals("Execute-Hello Update 2", workflow.update(0, "Hello Update 2")); - // Complete should fail since we have not continued as new yet - assertThrows(WorkflowUpdateException.class, () -> workflow.complete()); - - // Send an update to continue as new, must be async since the update won't complete - WorkflowStub workflowStub = WorkflowStub.fromTyped(workflow); - workflowStub.startUpdate("update", WorkflowUpdateStage.ACCEPTED, String.class, 0, ""); - - testWorkflowRule.waitForTheEndOfWFT(execution.getWorkflowId()); - testWorkflowRule.invalidateWorkflowCache(); - - assertEquals("Execute-Hello Update", workflow.update(0, "Hello Update")); - assertEquals("Execute-Hello Update 2", workflow.update(0, "Hello Update 2")); - - workflow.complete(); - - String result = - testWorkflowRule - .getWorkflowClient() - .newUntypedWorkflowStub(execution, Optional.empty()) - .getResult(String.class); - assertEquals("Execute-Hello Update Execute-Hello Update 2", result); - } - - public static class TestUpdateWorkflowImpl implements WorkflowWithUpdate { - String state = "initial"; - List updates = new ArrayList<>(); - CompletablePromise promise = Workflow.newPromise(); - private final TestActivities.TestActivity1 activity = - Workflow.newActivityStub( - TestActivities.TestActivity1.class, - ActivityOptions.newBuilder().setScheduleToCloseTimeout(Duration.ofHours(1)).build()); - - @Override - public String execute() { - promise.get(); - return updates.get(0) + " " + updates.get(1); - } - - @Override - public String getState() { - return state; - } - - @Override - public String update(Integer index, String value) { - if (value.isEmpty()) { - Workflow.newContinueAsNewStub(WorkflowWithUpdate.class).execute(); - } - String result = activity.execute(value); - updates.add(result); - return result; - } - - @Override - public void updateValidator(Integer index, String value) {} - - @Override - public void complete() { - promise.complete(null); - } - - @Override - public void completeValidator() { - if (updates.size() < 2 || !Workflow.getInfo().getContinuedExecutionRunId().isPresent()) { - throw new RuntimeException("Workflow not ready to complete"); - } - } - } - - @ActivityInterface - public interface GreetingActivities { - @ActivityMethod - String hello(String input); - } - - public static class ActivityImpl implements TestActivities.TestActivity1 { - @Override - public String execute(String input) { - return Activity.getExecutionContext().getInfo().getActivityType() + "-" + input; - } - } -} diff --git a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateWithLocalActivityInTheLastWorkflowTaskTest.java b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateWithLocalActivityInTheLastWorkflowTaskTest.java index e32e6773a1..0cb0edc7c0 100644 --- a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateWithLocalActivityInTheLastWorkflowTaskTest.java +++ b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateWithLocalActivityInTheLastWorkflowTaskTest.java @@ -44,9 +44,8 @@ public class UpdateWithLocalActivityInTheLastWorkflowTaskTest { .build(); @Test - @Parameters({"true, true", "false, true", "true, false", "false, false"}) - public void testUpdateWithLocalActivityInTheLastWorkflowTask( - Boolean waitOnLA, Boolean continueAsNew) { + @Parameters({"true", "false"}) + public void testUpdateWithLocalActivityInTheLastWorkflowTask(Boolean waitOnLA) { WorkflowWithUpdate client = testWorkflowRule.newWorkflowStub(WorkflowWithUpdate.class); WorkflowStub.fromTyped(client).start(true); @@ -54,7 +53,7 @@ public void testUpdateWithLocalActivityInTheLastWorkflowTask( new Thread( () -> { try { - client.update(waitOnLA, continueAsNew); + client.update(waitOnLA); } catch (Exception e) { } }); @@ -70,7 +69,7 @@ public interface WorkflowWithUpdate { String execute(Boolean finish); @UpdateMethod - String update(Boolean waitOnLA, Boolean continueAsNew); + String update(Boolean waitOnLA); } public static class WorkflowWithUpdateImpl implements WorkflowWithUpdate { @@ -91,16 +90,13 @@ public String execute(Boolean wait) { } @Override - public String update(Boolean waitOnLA, Boolean continueAsNew) { + public String update(Boolean waitOnLA) { if (waitOnLA) { Promise promise = Async.procedure(activities::sleepActivity, (long) 10, 0); Async.procedure(activities::sleepActivity, (long) 10000, 0); promise.get(); } - if (continueAsNew) { - Workflow.continueAsNew(false); - } finish = true; activities.sleepActivity(1000, 0); return "update"; From 531d3cb2d4fae3a16c7ac5ae7a0ea0f762fc4a1e Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Tue, 6 Aug 2024 14:52:23 -0700 Subject: [PATCH 14/25] Wrap GRPC::CANCELED and DEADLINE_EXCEEDED in new exception type (#2172) * Wrap GRPC::CANCELED and DEADLINE_EXCEEDED --- ...flowUpdateTimeoutOrCancelledException.java | 36 ++++++ .../internal/client/LazyUpdateHandleImpl.java | 5 +- .../client/RootWorkflowClientInvoker.java | 32 +++-- .../client/functional/UpdateTestTimeout.java | 10 +- .../updateTest/UpdateExceptionWrapped.java | 116 ++++++++++++++++++ 5 files changed, 180 insertions(+), 19 deletions(-) create mode 100644 temporal-sdk/src/main/java/io/temporal/client/WorkflowUpdateTimeoutOrCancelledException.java create mode 100644 temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateExceptionWrapped.java diff --git a/temporal-sdk/src/main/java/io/temporal/client/WorkflowUpdateTimeoutOrCancelledException.java b/temporal-sdk/src/main/java/io/temporal/client/WorkflowUpdateTimeoutOrCancelledException.java new file mode 100644 index 0000000000..fb7d24c82a --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/client/WorkflowUpdateTimeoutOrCancelledException.java @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.client; + +import io.temporal.api.common.v1.WorkflowExecution; + +/** + * Error that occurs when an update call times out or is cancelled. + * + *

Note, this is not related to any general concept of timing out or cancelling a running update, + * this is only related to the client call itself. + */ +public class WorkflowUpdateTimeoutOrCancelledException extends WorkflowServiceException { + public WorkflowUpdateTimeoutOrCancelledException( + WorkflowExecution execution, String updateId, String updateName, Throwable cause) { + super(execution, "", cause); + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/internal/client/LazyUpdateHandleImpl.java b/temporal-sdk/src/main/java/io/temporal/internal/client/LazyUpdateHandleImpl.java index e48c49808d..8ca5a611a9 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/client/LazyUpdateHandleImpl.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/client/LazyUpdateHandleImpl.java @@ -110,12 +110,13 @@ public CompletableFuture getResultAsync(long timeout, TimeUnit unit) { // does not exist or because the update ID does not exist. throw sre; } + throw sre; } else if (failure instanceof WorkflowException) { throw (WorkflowException) failure; } else if (failure instanceof TimeoutException) { - throw new CompletionException((TimeoutException) failure); + throw new CompletionException(failure); } - throw new WorkflowServiceException(execution, workflowType, (Throwable) failure); + throw new WorkflowServiceException(execution, workflowType, failure); }); } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/client/RootWorkflowClientInvoker.java b/temporal-sdk/src/main/java/io/temporal/internal/client/RootWorkflowClientInvoker.java index 439304e3f9..500cacf0a7 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/client/RootWorkflowClientInvoker.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/client/RootWorkflowClientInvoker.java @@ -339,8 +339,18 @@ public UpdateHandle startUpdate(StartUpdateInput input) { UpdateWorkflowExecutionLifecycleStage waitForStage = input.getWaitPolicy().getLifecycleStage(); do { Deadline pollTimeoutDeadline = Deadline.after(POLL_UPDATE_TIMEOUT_S, TimeUnit.SECONDS); - result = genericClient.update(updateRequest, pollTimeoutDeadline); - } while (result.getStage().getNumber() < waitForStage.getNumber() + try { + result = genericClient.update(updateRequest, pollTimeoutDeadline); + } catch (StatusRuntimeException e) { + if (e.getStatus().getCode() == Status.Code.DEADLINE_EXCEEDED + || e.getStatus().getCode() == Status.Code.CANCELLED) { + throw new WorkflowUpdateTimeoutOrCancelledException( + input.getWorkflowExecution(), input.getUpdateName(), input.getUpdateId(), e); + } + throw e; + } + + } while (result.getStage().getNumber() < input.getWaitPolicy().getLifecycleStage().getNumber() && result.getStage().getNumber() < UpdateWorkflowExecutionLifecycleStage .UPDATE_WORKFLOW_EXECUTION_LIFECYCLE_STAGE_ACCEPTED @@ -466,17 +476,17 @@ private void pollWorkflowUpdateHelper( return; } if ((e instanceof StatusRuntimeException - && ((StatusRuntimeException) e).getStatus().getCode() - == Status.Code.DEADLINE_EXCEEDED) + && (((StatusRuntimeException) e).getStatus().getCode() + == Status.Code.DEADLINE_EXCEEDED + || ((StatusRuntimeException) e).getStatus().getCode() + == Status.Code.CANCELLED)) || deadline.isExpired()) { resultCF.completeExceptionally( - new TimeoutException( - "WorkflowId=" - + request.getUpdateRef().getWorkflowExecution().getWorkflowId() - + ", runId=" - + request.getUpdateRef().getWorkflowExecution().getRunId() - + ", updateId=" - + request.getUpdateRef().getUpdateId())); + new WorkflowUpdateTimeoutOrCancelledException( + request.getUpdateRef().getWorkflowExecution(), + request.getUpdateRef().getUpdateId(), + "", + e)); } else if (e != null) { resultCF.completeExceptionally(e); } else { diff --git a/temporal-sdk/src/test/java/io/temporal/client/functional/UpdateTestTimeout.java b/temporal-sdk/src/test/java/io/temporal/client/functional/UpdateTestTimeout.java index f5219d6f84..717999167c 100644 --- a/temporal-sdk/src/test/java/io/temporal/client/functional/UpdateTestTimeout.java +++ b/temporal-sdk/src/test/java/io/temporal/client/functional/UpdateTestTimeout.java @@ -27,17 +27,13 @@ import static org.junit.Assert.assertEquals; import com.google.common.base.Stopwatch; -import io.temporal.client.UpdateHandle; -import io.temporal.client.WorkflowClient; -import io.temporal.client.WorkflowStub; -import io.temporal.client.WorkflowUpdateStage; +import io.temporal.client.*; import io.temporal.testing.internal.SDKTestOptions; import io.temporal.testing.internal.SDKTestWorkflowRule; import io.temporal.workflow.*; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; import org.junit.Rule; import org.junit.Test; @@ -111,7 +107,9 @@ public void WorkflowUpdateGetResultTimeout() throws ExecutionException, Interrup // Verify get throws the correct exception in around the right amount of time Stopwatch stopWatch = Stopwatch.createStarted(); ExecutionException executionException = assertThrows(ExecutionException.class, result::get); - assertThat(executionException.getCause(), is(instanceOf(TimeoutException.class))); + assertThat( + executionException.getCause(), + is(instanceOf(WorkflowUpdateTimeoutOrCancelledException.class))); stopWatch.stop(); long elapsedSeconds = stopWatch.elapsed(TimeUnit.SECONDS); assertTrue( diff --git a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateExceptionWrapped.java b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateExceptionWrapped.java new file mode 100644 index 0000000000..8a51e4bbb7 --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateExceptionWrapped.java @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.workflow.updateTest; + +import static org.junit.Assert.assertThrows; + +import io.grpc.Context; +import io.temporal.api.common.v1.WorkflowExecution; +import io.temporal.client.*; +import io.temporal.testing.internal.SDKTestOptions; +import io.temporal.testing.internal.SDKTestWorkflowRule; +import io.temporal.worker.WorkerOptions; +import io.temporal.workflow.CompletablePromise; +import io.temporal.workflow.Workflow; +import io.temporal.workflow.shared.TestWorkflows.WorkflowWithUpdate; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicReference; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; + +public class UpdateExceptionWrapped { + + private static ScheduledExecutorService scheduledExecutor = Executors.newScheduledThreadPool(1); + + @Rule + public SDKTestWorkflowRule testWorkflowRule = + SDKTestWorkflowRule.newBuilder() + .setWorkerOptions(WorkerOptions.newBuilder().build()) + .setWorkflowTypes(TestUpdateWorkflowImpl.class) + .build(); + + @Test + public void testUpdateStart() { + String workflowId = UUID.randomUUID().toString(); + WorkflowClient workflowClient = testWorkflowRule.getWorkflowClient(); + WorkflowOptions options = + SDKTestOptions.newWorkflowOptionsWithTimeouts(testWorkflowRule.getTaskQueue()).toBuilder() + .setWorkflowId(workflowId) + .build(); + WorkflowWithUpdate workflow = workflowClient.newWorkflowStub(WorkflowWithUpdate.class, options); + // To execute workflow client.execute() would do. But we want to start workflow and immediately + // return. + WorkflowExecution execution = WorkflowClient.start(workflow::execute); + testWorkflowRule.getTestEnvironment().shutdownNow(); + testWorkflowRule.getTestEnvironment().awaitTermination(1000, TimeUnit.MILLISECONDS); + + final AtomicReference exception = + new AtomicReference<>(); + + Context.current() + .withDeadlineAfter(500, TimeUnit.MILLISECONDS, scheduledExecutor) + .run( + () -> + exception.set( + assertThrows( + WorkflowUpdateTimeoutOrCancelledException.class, + () -> workflow.update(0, "")))); + Assert.assertEquals(execution.getWorkflowId(), exception.get().getExecution().getWorkflowId()); + } + + public static class TestUpdateWorkflowImpl implements WorkflowWithUpdate { + String state = "initial"; + List updates = new ArrayList<>(); + CompletablePromise promise = Workflow.newPromise(); + + @Override + public String execute() { + promise.get(); + return ""; + } + + @Override + public String getState() { + return state; + } + + @Override + public String update(Integer index, String value) { + Workflow.await(() -> false); + return ""; + } + + @Override + public void updateValidator(Integer index, String value) {} + + @Override + public void complete() { + promise.complete(null); + } + + @Override + public void completeValidator() {} + } +} From e0851f05cb531ef7f094ac12a92cf4576a8fab1f Mon Sep 17 00:00:00 2001 From: Spencer Judge Date: Thu, 8 Aug 2024 10:45:39 -0700 Subject: [PATCH 15/25] Build omes worker image in CI (#2171) --- .github/workflows/omes.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .github/workflows/omes.yml diff --git a/.github/workflows/omes.yml b/.github/workflows/omes.yml new file mode 100644 index 0000000000..59e6bce918 --- /dev/null +++ b/.github/workflows/omes.yml @@ -0,0 +1,17 @@ +name: Omes testing +on: + push: + branches: + - master + +jobs: + omes-image-build: + uses: temporalio/omes/.github/workflows/docker-images.yml@main + secrets: inherit + with: + lang: java + sdk-repo-url: ${{ github.event.pull_request.head.repo.full_name || 'temporalio/sdk-java' }} + sdk-repo-ref: ${{ github.event.pull_request.head.ref || github.ref }} + # TODO: Remove once we have a good way of cleaning up sha-based pushed images + docker-tag-ext: ci-latest + do-push: true From 59c485e93f9b02984d7d141f21f0023ab70fd738 Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Thu, 8 Aug 2024 13:40:56 -0700 Subject: [PATCH 16/25] Filter out third party protos (#2174) --- temporal-serviceclient/build.gradle | 2 ++ 1 file changed, 2 insertions(+) diff --git a/temporal-serviceclient/build.gradle b/temporal-serviceclient/build.gradle index 77ec231034..00ab0b71cc 100644 --- a/temporal-serviceclient/build.gradle +++ b/temporal-serviceclient/build.gradle @@ -61,6 +61,8 @@ sourceSets { main { proto { srcDir 'src/main/protocloud' + // TODO(https://github.com/temporalio/api/issues/400): Remove this exclusion once the 3rd party protos are removed. + exclude '**/google/**/*' } } } From e2d2608b377620178751186bf5f93e8ebb0564b9 Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Sun, 11 Aug 2024 23:16:25 -0700 Subject: [PATCH 17/25] Warn on dangling handlers and add method to help await on all handlers. (#2144) Warn on dangling handlers and add method to help await on all handlers --- .../WorkflowOutboundCallsInterceptor.java | 43 +++- .../replay/ReplayWorkflowExecutor.java | 61 ++++- .../internal/replay/WorkflowContext.java | 6 + .../internal/sync/SignalDispatcher.java | 34 ++- .../internal/sync/SignalHandlerInfo.java | 56 +++++ .../temporal/internal/sync/SyncWorkflow.java | 9 +- .../internal/sync/SyncWorkflowContext.java | 27 +- .../internal/sync/UpdateDispatcher.java | 55 +++-- .../internal/sync/UpdateHandlerInfo.java | 57 +++++ .../sync/WorkflowExecutionHandler.java | 6 +- .../internal/sync/WorkflowInternal.java | 8 + .../workflow/DynamicQueryHandler.java | 1 + .../workflow/DynamicSignalHandler.java | 6 + .../workflow/DynamicUpdateHandler.java | 5 + .../workflow/HandlerUnfinishedPolicy.java | 40 +++ .../io/temporal/workflow/SignalMethod.java | 3 + .../io/temporal/workflow/UpdateMethod.java | 3 + .../java/io/temporal/workflow/Workflow.java | 16 ++ ...orkflowRunTaskHandlerTaskHandlerTests.java | 6 + .../replay/WarnUnfinishedHandlers.java | 233 ++++++++++++++++++ .../SignalAllHandlersFinished.java | 95 +++++++ .../updateTest/UpdateAllHandlersFinished.java | 121 +++++++++ .../src/test/resources/logback-test.xml | 2 +- 23 files changed, 852 insertions(+), 41 deletions(-) create mode 100644 temporal-sdk/src/main/java/io/temporal/internal/sync/SignalHandlerInfo.java create mode 100644 temporal-sdk/src/main/java/io/temporal/internal/sync/UpdateHandlerInfo.java create mode 100644 temporal-sdk/src/main/java/io/temporal/workflow/HandlerUnfinishedPolicy.java create mode 100644 temporal-sdk/src/test/java/io/temporal/internal/replay/WarnUnfinishedHandlers.java create mode 100644 temporal-sdk/src/test/java/io/temporal/workflow/signalTests/SignalAllHandlersFinished.java create mode 100644 temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateAllHandlersFinished.java diff --git a/temporal-sdk/src/main/java/io/temporal/common/interceptors/WorkflowOutboundCallsInterceptor.java b/temporal-sdk/src/main/java/io/temporal/common/interceptors/WorkflowOutboundCallsInterceptor.java index 629bd79d76..cac927692e 100644 --- a/temporal-sdk/src/main/java/io/temporal/common/interceptors/WorkflowOutboundCallsInterceptor.java +++ b/temporal-sdk/src/main/java/io/temporal/common/interceptors/WorkflowOutboundCallsInterceptor.java @@ -371,16 +371,32 @@ public Header getHeader() { final class SignalRegistrationRequest { private final String signalType; + private final HandlerUnfinishedPolicy unfinishedPolicy; private final Class[] argTypes; private final Type[] genericArgTypes; private final Functions.Proc1 callback; + // Kept for backward compatibility public SignalRegistrationRequest( String signalType, Class[] argTypes, Type[] genericArgTypes, Functions.Proc1 callback) { this.signalType = signalType; + this.unfinishedPolicy = HandlerUnfinishedPolicy.WARN_AND_ABANDON; + this.argTypes = argTypes; + this.genericArgTypes = genericArgTypes; + this.callback = callback; + } + + public SignalRegistrationRequest( + String signalType, + HandlerUnfinishedPolicy unfinishedPolicy, + Class[] argTypes, + Type[] genericArgTypes, + Functions.Proc1 callback) { + this.signalType = signalType; + this.unfinishedPolicy = unfinishedPolicy; this.argTypes = argTypes; this.genericArgTypes = genericArgTypes; this.callback = callback; @@ -390,6 +406,10 @@ public String getSignalType() { return signalType; } + public HandlerUnfinishedPolicy getUnfinishedPolicy() { + return unfinishedPolicy; + } + public Class[] getArgTypes() { return argTypes; } @@ -417,19 +437,22 @@ public List getRequests() { @Experimental final class UpdateRegistrationRequest { - private final Functions.Func1 executeCallback; - private final Functions.Proc1 validateCallback; private final String updateName; + private final HandlerUnfinishedPolicy unfinishedPolicy; private final Class[] argTypes; private final Type[] genericArgTypes; + private final Functions.Func1 executeCallback; + private final Functions.Proc1 validateCallback; public UpdateRegistrationRequest( String updateName, + HandlerUnfinishedPolicy unfinishedPolicy, Class[] argTypes, Type[] genericArgTypes, Functions.Proc1 validateCallback, Functions.Func1 executeCallback) { this.updateName = updateName; + this.unfinishedPolicy = unfinishedPolicy; this.argTypes = argTypes; this.genericArgTypes = genericArgTypes; this.validateCallback = validateCallback; @@ -440,12 +463,8 @@ public String getUpdateName() { return updateName; } - public Functions.Proc1 getValidateCallback() { - return validateCallback; - } - - public Functions.Func1 getExecuteCallback() { - return executeCallback; + public HandlerUnfinishedPolicy getUnfinishedPolicy() { + return unfinishedPolicy; } public Class[] getArgTypes() { @@ -455,6 +474,14 @@ public Class[] getArgTypes() { public Type[] getGenericArgTypes() { return genericArgTypes; } + + public Functions.Proc1 getValidateCallback() { + return validateCallback; + } + + public Functions.Func1 getExecuteCallback() { + return executeCallback; + } } @Experimental diff --git a/temporal-sdk/src/main/java/io/temporal/internal/replay/ReplayWorkflowExecutor.java b/temporal-sdk/src/main/java/io/temporal/internal/replay/ReplayWorkflowExecutor.java index 1da0775a48..2a2b98266d 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/replay/ReplayWorkflowExecutor.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/replay/ReplayWorkflowExecutor.java @@ -20,6 +20,7 @@ package io.temporal.internal.replay; +import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.util.Timestamps; import com.uber.m3.tally.Scope; @@ -36,13 +37,44 @@ import io.temporal.internal.common.ProtobufTimeUtils; import io.temporal.internal.common.UpdateMessage; import io.temporal.internal.statemachines.WorkflowStateMachines; +import io.temporal.internal.sync.SignalHandlerInfo; +import io.temporal.internal.sync.UpdateHandlerInfo; import io.temporal.internal.worker.WorkflowExecutionException; import io.temporal.worker.MetricsType; import io.temporal.worker.NonDeterministicException; +import io.temporal.workflow.HandlerUnfinishedPolicy; +import java.util.List; +import java.util.Map; import java.util.Optional; +import java.util.stream.Collectors; import javax.annotation.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.MDC; final class ReplayWorkflowExecutor { + @VisibleForTesting + public static final String unfinishedUpdateHandlesWarnMessage = + "[TMPRL1102] Workflow finished while update handlers are still running. This may " + + "have interrupted work that the update handler was doing, and the client " + + "that sent the update will receive a 'workflow execution already completed' " + + "Exception instead of the update result. You can wait for all update and " + + "signal handlers to complete by using `await workflow.Await(() -> workflow.isEveryHandlerFinished())`. " + + "Alternatively, if both you and the clients sending the update are okay with " + + "interrupting running handlers when the workflow finishes, and causing " + + "clients to receive errors, then you can disable this warning via the update " + + "handler annotations: `@UpdateMethod(unfinishedPolicy = HandlerUnfinishedPolicy.ABANDON)`."; + + @VisibleForTesting + public static final String unfinishedSignalHandlesWarnMessage = + "[TMPRL1102] Workflow finished while signal handlers are still running. This may " + + "have interrupted work that the signal handler was doing. You can wait for all update and " + + "signal handlers to complete by using `await workflow.Await(() -> workflow.isEveryHandlerFinished())`. " + + "Alternatively, if both you and the clients sending the signal are okay with " + + "interrupting running handlers when the workflow finishes you can disable this warning via the signal " + + "handler annotations: `@SignalMethod(unfinishedPolicy = HandlerUnfinishedPolicy.ABANDON)`."; + + private static final Logger log = LoggerFactory.getLogger(ReplayWorkflowExecutor.class); private final ReplayWorkflow workflow; @@ -89,6 +121,33 @@ public void eventLoop() { } private void completeWorkflow(@Nullable WorkflowExecutionException failure) { + // If the workflow is failed we do not log any warnings about unfinished handlers. + if (log.isWarnEnabled() && (failure == null || context.isCancelRequested())) { + Map runningSignalHandlers = + workflow.getWorkflowContext().getRunningSignalHandlers(); + List unfinishedSignalHandlers = + runningSignalHandlers.values().stream() + .filter(a -> a.getPolicy() == HandlerUnfinishedPolicy.WARN_AND_ABANDON) + .collect(Collectors.toList()); + if (!unfinishedSignalHandlers.isEmpty()) { + MDC.put("Signals", unfinishedSignalHandlers.toString()); + log.warn(unfinishedSignalHandlesWarnMessage); + MDC.remove("Signals"); + } + + Map runningUpdateHandlers = + workflow.getWorkflowContext().getRunningUpdateHandlers(); + List unfinishedUpdateHandlers = + runningUpdateHandlers.values().stream() + .filter(a -> a.getPolicy() == HandlerUnfinishedPolicy.WARN_AND_ABANDON) + .collect(Collectors.toList()); + if (!unfinishedUpdateHandlers.isEmpty()) { + MDC.put("Updates", unfinishedUpdateHandlers.toString()); + log.warn(unfinishedUpdateHandlesWarnMessage); + MDC.remove("Updates"); + } + } + if (context.isCancelRequested()) { workflowStateMachines.cancelWorkflow(); metricsScope.counter(MetricsType.WORKFLOW_CANCELED_COUNTER).inc(1); @@ -161,7 +220,7 @@ public void handleWorkflowExecutionUpdated(UpdateMessage updateMessage) { Optional args = Optional.ofNullable(input.getArgs()); this.workflow.handleUpdate( input.getName(), - protocolMessage.getProtocolInstanceId(), + update.getMeta().getUpdateId(), args, protocolMessage.getEventId(), input.getHeader(), diff --git a/temporal-sdk/src/main/java/io/temporal/internal/replay/WorkflowContext.java b/temporal-sdk/src/main/java/io/temporal/internal/replay/WorkflowContext.java index c32f3fd424..49dfeebd0a 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/replay/WorkflowContext.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/replay/WorkflowContext.java @@ -22,6 +22,8 @@ import io.temporal.api.failure.v1.Failure; import io.temporal.common.context.ContextPropagator; +import io.temporal.internal.sync.SignalHandlerInfo; +import io.temporal.internal.sync.UpdateHandlerInfo; import io.temporal.worker.WorkflowImplementationOptions; import java.lang.reflect.Type; import java.util.List; @@ -66,4 +68,8 @@ public interface WorkflowContext { * ContextPropagator#getCurrentContext()} */ Map getPropagatedContexts(); + + Map getRunningSignalHandlers(); + + Map getRunningUpdateHandlers(); } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/SignalDispatcher.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/SignalDispatcher.java index 952bbe9040..5900cf051c 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/sync/SignalDispatcher.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/SignalDispatcher.java @@ -29,13 +29,9 @@ import io.temporal.common.interceptors.WorkflowOutboundCallsInterceptor; import io.temporal.worker.MetricsType; import io.temporal.workflow.DynamicSignalHandler; +import io.temporal.workflow.HandlerUnfinishedPolicy; import io.temporal.workflow.Workflow; -import java.util.ArrayDeque; -import java.util.HashMap; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Queue; +import java.util.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,6 +48,8 @@ class SignalDispatcher { /** Buffers signals which don't have a registered listener. */ private final Queue signalBuffer = new ArrayDeque<>(); + private Map runningSignalHandlers = new LinkedHashMap<>(); + public SignalDispatcher(DataConverter dataConverterWithWorkflowContext) { this.dataConverterWithWorkflowContext = dataConverterWithWorkflowContext; } @@ -77,17 +75,23 @@ public void handleInterceptedSignal(WorkflowInboundCallsInterceptor.SignalInput } } + public Map getRunningSignalHandlers() { + return runningSignalHandlers; + } + public void handleSignal( String signalName, Optional input, long eventId, Header header) { WorkflowOutboundCallsInterceptor.SignalRegistrationRequest handler = signalCallbacks.get(signalName); Object[] args; + HandlerUnfinishedPolicy policy; if (handler == null) { if (dynamicSignalHandler == null) { signalBuffer.add(new SignalData(signalName, input, eventId, header)); return; } args = new Object[] {new EncodedValues(input, dataConverterWithWorkflowContext)}; + policy = dynamicSignalHandler.getUnfinishedPolicy(signalName); } else { try { args = @@ -97,9 +101,23 @@ public void handleSignal( logSerializationException(signalName, eventId, e); return; } + policy = handler.getUnfinishedPolicy(); + } + // Track the signal handler + boolean threadDestroyed = false; + runningSignalHandlers.put(eventId, new SignalHandlerInfo(eventId, signalName, policy)); + try { + inboundCallsInterceptor.handleSignal( + new WorkflowInboundCallsInterceptor.SignalInput(signalName, args, eventId, header)); + } catch (DestroyWorkflowThreadError e) { + threadDestroyed = true; + throw e; + } finally { + // If the thread was destroyed the user did not finish the handler + if (!threadDestroyed) { + runningSignalHandlers.remove(eventId); + } } - inboundCallsInterceptor.handleSignal( - new WorkflowInboundCallsInterceptor.SignalInput(signalName, args, eventId, header)); } public void registerSignalHandlers( diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/SignalHandlerInfo.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/SignalHandlerInfo.java new file mode 100644 index 0000000000..10bbcf6cfd --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/SignalHandlerInfo.java @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.sync; + +import io.temporal.workflow.HandlerUnfinishedPolicy; + +public class SignalHandlerInfo { + private final long eventId; + private final String name; + private final HandlerUnfinishedPolicy policy; + + public SignalHandlerInfo(long eventId, String name, HandlerUnfinishedPolicy policy) { + this.eventId = eventId; + this.name = name; + this.policy = policy; + } + + public String getName() { + return name; + } + + public HandlerUnfinishedPolicy getPolicy() { + return policy; + } + + @Override + public String toString() { + return "SignalHandlerInfo{" + + "eventId=" + + eventId + + ", name='" + + name + + '\'' + + ", policy=" + + policy + + '}'; + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflow.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflow.java index 555607e3b6..d46f1ce766 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflow.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflow.java @@ -144,9 +144,12 @@ public void start(HistoryEvent event, ReplayWorkflowContext context) { @Override public void handleSignal( String signalName, Optional input, long eventId, Header header) { + // Signals can trigger completion runner.executeInWorkflowThread( "signal " + signalName, - () -> workflowProc.handleSignal(signalName, input, eventId, header)); + () -> { + workflowProc.handleSignal(signalName, input, eventId, header); + }); } @Override @@ -167,7 +170,7 @@ public void handleUpdate( if (!callbacks.isReplaying()) { try { workflowContext.setReadOnly(true); - workflowProc.handleValidateUpdate(updateName, input, eventId, header); + workflowProc.handleValidateUpdate(updateName, updateId, input, eventId, header); } catch (ReadOnlyException r) { // Rethrow instead on rejecting the update to fail the WFT throw r; @@ -184,7 +187,7 @@ public void handleUpdate( callbacks.accept(); try { Optional result = - workflowProc.handleExecuteUpdate(updateName, input, eventId, header); + workflowProc.handleExecuteUpdate(updateName, updateId, input, eventId, header); callbacks.complete(result, null); } catch (WorkflowExecutionException e) { callbacks.complete(Optional.empty(), e.getFailure()); diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflowContext.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflowContext.java index 208af532eb..094b1c8fe2 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflowContext.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/SyncWorkflowContext.java @@ -119,6 +119,10 @@ final class SyncWorkflowContext implements WorkflowContext, WorkflowOutboundCall private Map localActivityOptionsMap; private boolean readOnly = false; private final WorkflowThreadLocal currentUpdateInfo = new WorkflowThreadLocal<>(); + // Map of all running update handlers. Key is the update Id of the update request. + private Map runningUpdateHandlers = new HashMap<>(); + // Map of all running signal handlers. Key is the event Id of the signal event. + private Map runningSignalHandlers = new HashMap<>(); public SyncWorkflowContext( @Nonnull String namespace, @@ -317,13 +321,13 @@ public void handleSignal( } public void handleValidateUpdate( - String updateName, Optional input, long eventId, Header header) { - updateDispatcher.handleValidateUpdate(updateName, input, eventId, header); + String updateName, String updateId, Optional input, long eventId, Header header) { + updateDispatcher.handleValidateUpdate(updateName, updateId, input, eventId, header); } public Optional handleExecuteUpdate( - String updateName, Optional input, long eventId, Header header) { - return updateDispatcher.handleExecuteUpdate(updateName, input, eventId, header); + String updateName, String updateId, Optional input, long eventId, Header header) { + return updateDispatcher.handleExecuteUpdate(updateName, updateId, input, eventId, header); } public void handleInterceptedValidateUpdate(WorkflowInboundCallsInterceptor.UpdateInput input) { @@ -344,6 +348,11 @@ public Optional handleQuery(String queryName, Header header, Optional< return queryDispatcher.handleQuery(queryName, header, input); } + public boolean isEveryHandlerFinished() { + return updateDispatcher.getRunningUpdateHandlers().isEmpty() + && signalDispatcher.getRunningSignalHandlers().isEmpty(); + } + private class ActivityCallback { private final CompletablePromise> result = Workflow.newPromise(); @@ -1019,6 +1028,16 @@ void setReadOnly(boolean readOnly) { this.readOnly = readOnly; } + @Override + public Map getRunningSignalHandlers() { + return signalDispatcher.getRunningSignalHandlers(); + } + + @Override + public Map getRunningUpdateHandlers() { + return updateDispatcher.getRunningUpdateHandlers(); + } + @Override public ReplayWorkflowContext getReplayContext() { return replayContext; diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/UpdateDispatcher.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/UpdateDispatcher.java index aaa96060e6..9edec581e3 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/sync/UpdateDispatcher.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/UpdateDispatcher.java @@ -30,9 +30,8 @@ import io.temporal.common.interceptors.WorkflowOutboundCallsInterceptor; import io.temporal.common.interceptors.WorkflowOutboundCallsInterceptor.UpdateRegistrationRequest; import io.temporal.workflow.DynamicUpdateHandler; -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; +import io.temporal.workflow.HandlerUnfinishedPolicy; +import java.util.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,6 +44,7 @@ class UpdateDispatcher { private DynamicUpdateHandler dynamicUpdateHandler; private WorkflowInboundCallsInterceptor inboundCallsInterceptor; + private Map runningUpdateHandlers = new TreeMap<>(); public UpdateDispatcher(DataConverter dataConverterWithWorkflowContext) { this.dataConverterWithWorkflowContext = dataConverterWithWorkflowContext; @@ -55,48 +55,71 @@ public void setInboundCallsInterceptor(WorkflowInboundCallsInterceptor inboundCa } public void handleValidateUpdate( - String updateName, Optional input, long eventId, Header header) { + String updateName, String updateId, Optional input, long eventId, Header header) { WorkflowOutboundCallsInterceptor.UpdateRegistrationRequest handler = updateCallbacks.get(updateName); Object[] args; + HandlerUnfinishedPolicy policy; if (handler == null) { if (dynamicUpdateHandler == null) { throw new IllegalArgumentException( "Unknown update name: " + updateName + ", knownTypes=" + updateCallbacks.keySet()); } args = new Object[] {new EncodedValues(input, dataConverterWithWorkflowContext)}; + policy = dynamicUpdateHandler.getUnfinishedPolicy(updateName); } else { args = dataConverterWithWorkflowContext.fromPayloads( input, handler.getArgTypes(), handler.getGenericArgTypes()); + policy = handler.getUnfinishedPolicy(); + } + runningUpdateHandlers.put(updateId, new UpdateHandlerInfo(updateId, updateName, policy)); + try { + inboundCallsInterceptor.validateUpdate( + new WorkflowInboundCallsInterceptor.UpdateInput(updateName, header, args)); + } finally { + runningUpdateHandlers.remove(updateId); } - - inboundCallsInterceptor.validateUpdate( - new WorkflowInboundCallsInterceptor.UpdateInput(updateName, header, args)); } public Optional handleExecuteUpdate( - String updateName, Optional input, long eventId, Header header) { + String updateName, String updateId, Optional input, long eventId, Header header) { WorkflowOutboundCallsInterceptor.UpdateRegistrationRequest handler = updateCallbacks.get(updateName); Object[] args; + HandlerUnfinishedPolicy policy; if (handler == null) { if (dynamicUpdateHandler == null) { throw new IllegalArgumentException( "Unknown update name: " + updateName + ", knownTypes=" + updateCallbacks.keySet()); } args = new Object[] {new EncodedValues(input, dataConverterWithWorkflowContext)}; + policy = dynamicUpdateHandler.getUnfinishedPolicy(updateName); } else { args = dataConverterWithWorkflowContext.fromPayloads( input, handler.getArgTypes(), handler.getGenericArgTypes()); + policy = handler.getUnfinishedPolicy(); + } + + runningUpdateHandlers.put(updateId, new UpdateHandlerInfo(updateId, updateName, policy)); + boolean threadDestroyed = false; + try { + Object result = + inboundCallsInterceptor + .executeUpdate( + new WorkflowInboundCallsInterceptor.UpdateInput(updateName, header, args)) + .getResult(); + return dataConverterWithWorkflowContext.toPayloads(result); + } catch (DestroyWorkflowThreadError e) { + threadDestroyed = true; + throw e; + } finally { + // If the thread was destroyed the user did not finish the handler + if (!threadDestroyed) { + runningUpdateHandlers.remove(updateId); + } } - Object result = - inboundCallsInterceptor - .executeUpdate( - new WorkflowInboundCallsInterceptor.UpdateInput(updateName, header, args)) - .getResult(); - return dataConverterWithWorkflowContext.toPayloads(result); } public void registerUpdateHandlers( @@ -146,4 +169,8 @@ public UpdateOutput handleInterceptedExecuteUpdate(UpdateInput input) { } return new WorkflowInboundCallsInterceptor.UpdateOutput(result); } + + public Map getRunningUpdateHandlers() { + return runningUpdateHandlers; + } } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/UpdateHandlerInfo.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/UpdateHandlerInfo.java new file mode 100644 index 0000000000..56a8dfacb3 --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/UpdateHandlerInfo.java @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.sync; + +import io.temporal.workflow.HandlerUnfinishedPolicy; + +public class UpdateHandlerInfo { + private String updateId; + private String name; + private HandlerUnfinishedPolicy policy; + + public UpdateHandlerInfo(String updateId, String name, HandlerUnfinishedPolicy policy) { + this.updateId = updateId; + this.name = name; + this.policy = policy; + } + + public String getName() { + return name; + } + + public HandlerUnfinishedPolicy getPolicy() { + return policy; + } + + @Override + public String toString() { + return "UpdateHandlerInfo{" + + "updateId='" + + updateId + + '\'' + + ", name='" + + name + + '\'' + + ", policy=" + + policy + + '}'; + } +} diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowExecutionHandler.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowExecutionHandler.java index ebb87604a5..fff430d90c 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowExecutionHandler.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowExecutionHandler.java @@ -107,11 +107,12 @@ public Optional handleQuery( public void handleValidateUpdate( String updateName, + String updateId, Optional input, long eventId, io.temporal.api.common.v1.Header header) { try { - context.handleValidateUpdate(updateName, input, eventId, new Header(header)); + context.handleValidateUpdate(updateName, updateId, input, eventId, new Header(header)); } catch (Throwable e) { applyWorkflowFailurePolicyAndRethrow(e); } @@ -119,11 +120,12 @@ public void handleValidateUpdate( public Optional handleExecuteUpdate( String updateName, + String updateId, Optional input, long eventId, io.temporal.api.common.v1.Header header) { try { - return context.handleExecuteUpdate(updateName, input, eventId, new Header(header)); + return context.handleExecuteUpdate(updateName, updateId, input, eventId, new Header(header)); } catch (UnsupportedContinueAsNewRequest e) { // Re-throw to fail the workflow task throw e; diff --git a/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java index 04edcbaee7..1250acbc3e 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/sync/WorkflowInternal.java @@ -182,9 +182,11 @@ public static void registerListener(Object implementation) { List requests = new ArrayList<>(); for (POJOWorkflowMethodMetadata methodMetadata : workflowMetadata.getSignalMethods()) { Method method = methodMetadata.getWorkflowMethod(); + SignalMethod signalMethod = method.getAnnotation(SignalMethod.class); requests.add( new WorkflowOutboundCallsInterceptor.SignalRegistrationRequest( methodMetadata.getName(), + signalMethod.unfinishedPolicy(), method.getParameterTypes(), method.getGenericParameterTypes(), (args) -> { @@ -220,6 +222,7 @@ public static void registerListener(Object implementation) { for (POJOWorkflowMethodMetadata methodMetadata : workflowMetadata.getUpdateMethods()) { Method method = methodMetadata.getWorkflowMethod(); UpdateMethod updateMethod = method.getAnnotation(UpdateMethod.class); + // Get the update name, defaulting to the method name if not specified. String updateMethodName = updateMethod.name(); if (updateMethodName.isEmpty()) { updateMethodName = method.getName(); @@ -241,6 +244,7 @@ public static void registerListener(Object implementation) { updateRequests.add( new WorkflowOutboundCallsInterceptor.UpdateRegistrationRequest( methodMetadata.getName(), + updateMethod.unfinishedPolicy(), method.getParameterTypes(), method.getGenericParameterTypes(), (args) -> { @@ -745,6 +749,10 @@ public static Optional getPreviousRunFailure() { .map(f -> getDataConverterWithCurrentWorkflowContext().failureToException(f)); } + public static boolean isEveryHandlerFinished() { + return getRootWorkflowContext().isEveryHandlerFinished(); + } + private static WorkflowOutboundCallsInterceptor getWorkflowOutboundInterceptor() { return getRootWorkflowContext().getWorkflowOutboundInterceptor(); } diff --git a/temporal-sdk/src/main/java/io/temporal/workflow/DynamicQueryHandler.java b/temporal-sdk/src/main/java/io/temporal/workflow/DynamicQueryHandler.java index e64aabfde3..e7ee15df79 100644 --- a/temporal-sdk/src/main/java/io/temporal/workflow/DynamicQueryHandler.java +++ b/temporal-sdk/src/main/java/io/temporal/workflow/DynamicQueryHandler.java @@ -33,6 +33,7 @@ * * @see DynamicSignalHandler * @see DynamicWorkflow + * @see DynamicUpdateHandler */ public interface DynamicQueryHandler { Object handle(String queryType, EncodedValues args); diff --git a/temporal-sdk/src/main/java/io/temporal/workflow/DynamicSignalHandler.java b/temporal-sdk/src/main/java/io/temporal/workflow/DynamicSignalHandler.java index e9d1f8df91..9f28502303 100644 --- a/temporal-sdk/src/main/java/io/temporal/workflow/DynamicSignalHandler.java +++ b/temporal-sdk/src/main/java/io/temporal/workflow/DynamicSignalHandler.java @@ -33,7 +33,13 @@ * * @see DynamicQueryHandler * @see DynamicWorkflow + * @see DynamicUpdateHandler */ public interface DynamicSignalHandler { void handle(String signalName, EncodedValues args); + + /** Returns the actions taken if a workflow exits with a running instance of this handler. */ + default HandlerUnfinishedPolicy getUnfinishedPolicy(String signalName) { + return HandlerUnfinishedPolicy.WARN_AND_ABANDON; + } } diff --git a/temporal-sdk/src/main/java/io/temporal/workflow/DynamicUpdateHandler.java b/temporal-sdk/src/main/java/io/temporal/workflow/DynamicUpdateHandler.java index e82bca9fc1..c816197af9 100644 --- a/temporal-sdk/src/main/java/io/temporal/workflow/DynamicUpdateHandler.java +++ b/temporal-sdk/src/main/java/io/temporal/workflow/DynamicUpdateHandler.java @@ -40,4 +40,9 @@ public interface DynamicUpdateHandler { default void handleValidate(String updateName, EncodedValues args) {} EncodedValues handleExecute(String updateName, EncodedValues args); + + /** Returns the actions taken if a workflow exits with a running instance of this handler. */ + default HandlerUnfinishedPolicy getUnfinishedPolicy(String updateName) { + return HandlerUnfinishedPolicy.WARN_AND_ABANDON; + } } diff --git a/temporal-sdk/src/main/java/io/temporal/workflow/HandlerUnfinishedPolicy.java b/temporal-sdk/src/main/java/io/temporal/workflow/HandlerUnfinishedPolicy.java new file mode 100644 index 0000000000..383418a30d --- /dev/null +++ b/temporal-sdk/src/main/java/io/temporal/workflow/HandlerUnfinishedPolicy.java @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.workflow; + +/** + * Actions taken if a workflow terminates with running handlers. + * + *

Policy defining actions taken when a workflow exits while update or signal handlers are + * running. The workflow exit may be due to successful return, failure, cancellation, or + * continue-as-new. + */ +public enum HandlerUnfinishedPolicy { + /** Issue a warning in addition to abandon. */ + WARN_AND_ABANDON, + /** + * Abandon the handler. + * + *

In the case of an update handler this means that the client will receive an error rather + * than the update result. + */ + ABANDON, +} diff --git a/temporal-sdk/src/main/java/io/temporal/workflow/SignalMethod.java b/temporal-sdk/src/main/java/io/temporal/workflow/SignalMethod.java index c8260b6e36..e57c87e9c6 100644 --- a/temporal-sdk/src/main/java/io/temporal/workflow/SignalMethod.java +++ b/temporal-sdk/src/main/java/io/temporal/workflow/SignalMethod.java @@ -59,4 +59,7 @@ * tags. And systems like prometheus ignore metrics which have tags with unsupported characters. */ String name() default ""; + + /** Sets the actions taken if a workflow exits with a running instance of this handler. */ + HandlerUnfinishedPolicy unfinishedPolicy() default HandlerUnfinishedPolicy.WARN_AND_ABANDON; } diff --git a/temporal-sdk/src/main/java/io/temporal/workflow/UpdateMethod.java b/temporal-sdk/src/main/java/io/temporal/workflow/UpdateMethod.java index 1e106ed29b..4c52cc3e6e 100644 --- a/temporal-sdk/src/main/java/io/temporal/workflow/UpdateMethod.java +++ b/temporal-sdk/src/main/java/io/temporal/workflow/UpdateMethod.java @@ -43,4 +43,7 @@ * tags. And systems like prometheus ignore metrics which have tags with unsupported characters. */ String name() default ""; + + /** Sets the actions taken if a workflow exits with a running instance of this handler. */ + HandlerUnfinishedPolicy unfinishedPolicy() default HandlerUnfinishedPolicy.WARN_AND_ABANDON; } diff --git a/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java b/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java index 98e4341d27..104a77cfb5 100644 --- a/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java +++ b/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java @@ -1264,6 +1264,22 @@ public static void applyLocalActivityOptions( WorkflowInternal.applyLocalActivityOptions(activityTypeToOptions); } + /** + * Checks if all update and signal handlers have finished executing. + * + *

Consider waiting on this condition before workflow return or continue-as-new, to prevent + * interruption of in-progress handlers by workflow return: + * + *


+   *    Workflow.await(() -> Workflow.isAllHandlersFinished());
+   *  
will eventually have search attributes as: + * + * @return true if all handlers are finished, false otherwise. + */ + public static boolean isEveryHandlerFinished() { + return WorkflowInternal.isEveryHandlerFinished(); + } + /** Prohibit instantiation. */ private Workflow() {} } diff --git a/temporal-sdk/src/test/java/io/temporal/internal/replay/ReplayWorkflowRunTaskHandlerTaskHandlerTests.java b/temporal-sdk/src/test/java/io/temporal/internal/replay/ReplayWorkflowRunTaskHandlerTaskHandlerTests.java index 3f7d91d9b3..29ae017d6c 100644 --- a/temporal-sdk/src/test/java/io/temporal/internal/replay/ReplayWorkflowRunTaskHandlerTaskHandlerTests.java +++ b/temporal-sdk/src/test/java/io/temporal/internal/replay/ReplayWorkflowRunTaskHandlerTaskHandlerTests.java @@ -47,6 +47,7 @@ import io.temporal.testUtils.HistoryUtils; import io.temporal.testing.internal.SDKTestWorkflowRule; import java.time.Duration; +import java.util.HashMap; import java.util.List; import java.util.Optional; import org.junit.Rule; @@ -212,6 +213,11 @@ private ReplayWorkflowFactory setUpMockWorkflowFactory() throws Throwable { when(mockFactory.getWorkflow(any(), any())).thenReturn(mockWorkflow); when(mockWorkflow.eventLoop()).thenReturn(true); when(mockWorkflow.getOutput()).thenReturn(Optional.empty()); + + WorkflowContext mockWorkflowContext = mock(WorkflowContext.class); + when(mockWorkflowContext.getRunningUpdateHandlers()).thenReturn(new HashMap<>()); + when(mockWorkflowContext.getRunningUpdateHandlers()).thenReturn(new HashMap<>()); + when(mockWorkflow.getWorkflowContext()).thenReturn(mockWorkflowContext); return mockFactory; } } diff --git a/temporal-sdk/src/test/java/io/temporal/internal/replay/WarnUnfinishedHandlers.java b/temporal-sdk/src/test/java/io/temporal/internal/replay/WarnUnfinishedHandlers.java new file mode 100644 index 0000000000..1a0252ab91 --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/internal/replay/WarnUnfinishedHandlers.java @@ -0,0 +1,233 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.replay; + +import static io.temporal.internal.replay.ReplayWorkflowExecutor.unfinishedSignalHandlesWarnMessage; +import static io.temporal.internal.replay.ReplayWorkflowExecutor.unfinishedUpdateHandlesWarnMessage; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; + +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.Logger; +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.read.ListAppender; +import io.temporal.client.*; +import io.temporal.failure.ApplicationFailure; +import io.temporal.testing.internal.SDKTestOptions; +import io.temporal.testing.internal.SDKTestWorkflowRule; +import io.temporal.workflow.*; +import java.util.List; +import java.util.UUID; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.slf4j.LoggerFactory; + +public class WarnUnfinishedHandlers { + + @Rule + public SDKTestWorkflowRule testWorkflowRule = + SDKTestWorkflowRule.newBuilder().setWorkflowTypes(TestSignalWorkflowImpl.class).build(); + + // Get Logback Logger + Logger replayWorkflowExecutorLogger = + (Logger) LoggerFactory.getLogger(ReplayWorkflowExecutor.class); + ListAppender listAppender = new ListAppender<>(); + + @Before + public void setUp() { + listAppender.start(); + replayWorkflowExecutorLogger.addAppender(listAppender); + } + + private WorkflowWithDanglingHandlers setupWorkflow() { + String workflowId = UUID.randomUUID().toString(); + WorkflowClient workflowClient = testWorkflowRule.getWorkflowClient(); + WorkflowOptions options = + SDKTestOptions.newWorkflowOptionsWithTimeouts(testWorkflowRule.getTaskQueue()).toBuilder() + .setWorkflowId(workflowId) + .build(); + WorkflowWithDanglingHandlers workflow = + workflowClient.newWorkflowStub(WorkflowWithDanglingHandlers.class, options); + WorkflowClient.start(workflow::execute); + testWorkflowRule.waitForTheEndOfWFT(workflowId); + // Send a bunch of signals to warn + for (int i = 0; i < 5; i++) { + workflow.warningSignalHandler(); + // Wait for the signal to be processed to ensure the signal event ID is deterministic + testWorkflowRule.waitForTheEndOfWFT(workflowId); + } + for (int i = 0; i < 5; i++) { + workflow.nonWarningSignalHandler(); + testWorkflowRule.waitForTheEndOfWFT(workflowId); + } + // Send a bunch of updates to warn + WorkflowStub stub = WorkflowStub.fromTyped(workflow); + for (int i = 0; i < 5; i++) { + // Set an update ID to ensure the update event ID is deterministic + stub.startUpdate( + UpdateOptions.newBuilder(Integer.class) + .setUpdateName("warningUpdateHandler") + .setWaitForStage(WorkflowUpdateStage.ACCEPTED) + .setUpdateId(String.valueOf(i)) + .build()); + } + for (int i = 0; i < 5; i++) { + stub.startUpdate("nonWarningUpdateHandler", WorkflowUpdateStage.ACCEPTED, Integer.class); + } + return workflow; + } + + void assertLogs(Boolean expectLogs) { + List logsList = listAppender.list; + if (expectLogs) { + assertEquals(Level.WARN, logsList.get(0).getLevel()); + assertEquals(unfinishedSignalHandlesWarnMessage, logsList.get(0).getMessage()); + assertEquals( + logsList.get(0).getMDCPropertyMap().get("Signals"), + "[SignalHandlerInfo{eventId=5, name='warningSignalHandler', policy=WARN_AND_ABANDON}, SignalHandlerInfo{eventId=9, name='warningSignalHandler', policy=WARN_AND_ABANDON}, SignalHandlerInfo{eventId=13, name='warningSignalHandler', policy=WARN_AND_ABANDON}, SignalHandlerInfo{eventId=17, name='warningSignalHandler', policy=WARN_AND_ABANDON}, SignalHandlerInfo{eventId=21, name='warningSignalHandler', policy=WARN_AND_ABANDON}]"); + + assertEquals(Level.WARN, logsList.get(1).getLevel()); + assertEquals(unfinishedUpdateHandlesWarnMessage, logsList.get(1).getMessage()); + assertEquals( + logsList.get(1).getMDCPropertyMap().get("Updates"), + "[UpdateHandlerInfo{updateId='0', name='warningUpdateHandler', policy=WARN_AND_ABANDON}, UpdateHandlerInfo{updateId='1', name='warningUpdateHandler', policy=WARN_AND_ABANDON}, UpdateHandlerInfo{updateId='2', name='warningUpdateHandler', policy=WARN_AND_ABANDON}, UpdateHandlerInfo{updateId='3', name='warningUpdateHandler', policy=WARN_AND_ABANDON}, UpdateHandlerInfo{updateId='4', name='warningUpdateHandler', policy=WARN_AND_ABANDON}]"); + } else { + assertEquals(0, logsList.size()); + } + } + + @Test + public void warnOnWorkflowComplete() { + WorkflowWithDanglingHandlers workflow = setupWorkflow(); + // Try to complete the workflow + workflow.complete(CompletionMethod.COMPLETE); + assertEquals(0, workflow.execute()); + assertLogs(true); + } + + @Test + public void doesNotWarnOnWorkflowFail() { + WorkflowWithDanglingHandlers workflow = setupWorkflow(); + // Try to complete the workflow + workflow.complete(CompletionMethod.FAIL); + assertThrows(WorkflowFailedException.class, () -> workflow.execute()); + assertLogs(false); + } + + @Test + public void warnOnWorkflowCancelled() { + WorkflowWithDanglingHandlers workflow = setupWorkflow(); + // Cancel the workflow + WorkflowStub.fromTyped(workflow).cancel(); + assertThrows(WorkflowFailedException.class, () -> workflow.execute()); + assertLogs(true); + } + + @Test + public void warnOnWorkflowContinueAsNew() { + WorkflowWithDanglingHandlers workflow = setupWorkflow(); + // Request the workflow CAN + workflow.complete(CompletionMethod.CONTINUE_AS_NEW); + assertEquals(0, workflow.execute()); + assertLogs(true); + } + + @WorkflowInterface + public interface WorkflowWithDanglingHandlers { + + @WorkflowMethod + int execute(); + + @SignalMethod + void warningSignalHandler(); + + @SignalMethod(unfinishedPolicy = HandlerUnfinishedPolicy.ABANDON) + void nonWarningSignalHandler(); + + @UpdateMethod + void warningUpdateHandler(); + + @UpdateMethod(unfinishedPolicy = HandlerUnfinishedPolicy.ABANDON) + void nonWarningUpdateHandler(); + + @SignalMethod + void complete(CompletionMethod method); + } + + public static class TestSignalWorkflowImpl implements WorkflowWithDanglingHandlers { + int handlersFinished = 0; + boolean blocked = false; + CompletablePromise promise = Workflow.newPromise(); + + @Override + public int execute() { + if (Workflow.getInfo().getContinuedExecutionRunId().isPresent()) { + return 0; + } + CompletionMethod method = promise.cancellableGet(); + if (method == CompletionMethod.COMPLETE) { + return handlersFinished; + } else if (method == CompletionMethod.FAIL) { + blocked = true; + throw ApplicationFailure.newFailure("test failure", "TestFailure"); + } else if (method == CompletionMethod.CONTINUE_AS_NEW) { + Workflow.continueAsNew(); + } + return handlersFinished; + } + + @Override + public void complete(CompletionMethod method) { + promise.complete(method); + } + + @Override + public void warningSignalHandler() { + Workflow.await(() -> blocked); + handlersFinished++; + } + + @Override + public void nonWarningSignalHandler() { + Workflow.await(() -> false); + handlersFinished++; + } + + @Override + public void warningUpdateHandler() { + Workflow.await(() -> blocked); + handlersFinished++; + } + + @Override + public void nonWarningUpdateHandler() { + Workflow.await(() -> false); + handlersFinished++; + } + } + + enum CompletionMethod { + COMPLETE, + FAIL, + CONTINUE_AS_NEW, + } +} diff --git a/temporal-sdk/src/test/java/io/temporal/workflow/signalTests/SignalAllHandlersFinished.java b/temporal-sdk/src/test/java/io/temporal/workflow/signalTests/SignalAllHandlersFinished.java new file mode 100644 index 0000000000..59d4f823cb --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/workflow/signalTests/SignalAllHandlersFinished.java @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.workflow.signalTests; + +import static org.junit.Assert.assertEquals; + +import io.temporal.api.common.v1.WorkflowExecution; +import io.temporal.client.*; +import io.temporal.testing.internal.SDKTestOptions; +import io.temporal.testing.internal.SDKTestWorkflowRule; +import io.temporal.workflow.*; +import java.time.Duration; +import java.util.UUID; +import org.junit.Rule; +import org.junit.Test; + +public class SignalAllHandlersFinished { + + @Rule + public SDKTestWorkflowRule testWorkflowRule = + SDKTestWorkflowRule.newBuilder().setWorkflowTypes(TestSignalWorkflowImpl.class).build(); + + @Test + public void isAllHandlersFinished() { + String workflowId = UUID.randomUUID().toString(); + WorkflowClient workflowClient = testWorkflowRule.getWorkflowClient(); + WorkflowOptions options = + SDKTestOptions.newWorkflowOptionsWithTimeouts(testWorkflowRule.getTaskQueue()).toBuilder() + .setWorkflowId(workflowId) + .build(); + WorkflowWithSignal workflow = workflowClient.newWorkflowStub(WorkflowWithSignal.class, options); + WorkflowExecution execution = WorkflowClient.start(workflow::execute); + // Send a bunch of signals to the workflow + for (int i = 0; i < 5; i++) { + workflow.signal(); + } + // Try to complete the workflow, expecting that it will block until all signals are processed + workflow.tryComplete(); + assertEquals(5, workflow.execute()); + } + + @WorkflowInterface + public interface WorkflowWithSignal { + + @WorkflowMethod + int execute(); + + @SignalMethod + void signal(); + + @SignalMethod + void tryComplete(); + } + + public static class TestSignalWorkflowImpl implements WorkflowWithSignal { + int handlersFinished = 0; + CompletablePromise promise = Workflow.newPromise(); + + @Override + public int execute() { + promise.get(); + Workflow.await(() -> Workflow.isEveryHandlerFinished()); + return handlersFinished; + } + + @Override + public void tryComplete() { + promise.complete(null); + } + + @Override + public void signal() { + Workflow.sleep(Duration.ofSeconds(5)); + handlersFinished++; + } + } +} diff --git a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateAllHandlersFinished.java b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateAllHandlersFinished.java new file mode 100644 index 0000000000..e0c1fc2bec --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateAllHandlersFinished.java @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.workflow.updateTest; + +import static org.junit.Assert.assertEquals; + +import io.temporal.api.common.v1.WorkflowExecution; +import io.temporal.client.*; +import io.temporal.testing.internal.SDKTestOptions; +import io.temporal.testing.internal.SDKTestWorkflowRule; +import io.temporal.workflow.*; +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.ExecutionException; +import org.junit.Rule; +import org.junit.Test; + +public class UpdateAllHandlersFinished { + + @Rule + public SDKTestWorkflowRule testWorkflowRule = + SDKTestWorkflowRule.newBuilder().setWorkflowTypes(TestUpdateWorkflowImpl.class).build(); + + @Test + public void isAllHandlersFinished() throws ExecutionException, InterruptedException { + String workflowId = UUID.randomUUID().toString(); + WorkflowClient workflowClient = testWorkflowRule.getWorkflowClient(); + WorkflowOptions options = + SDKTestOptions.newWorkflowOptionsWithTimeouts(testWorkflowRule.getTaskQueue()).toBuilder() + .setWorkflowId(workflowId) + .build(); + WorkflowWithUpdate workflow = workflowClient.newWorkflowStub(WorkflowWithUpdate.class, options); + WorkflowExecution execution = WorkflowClient.start(workflow::execute); + + WorkflowStub untypedStub = workflowClient.newUntypedWorkflowStub(execution.getWorkflowId()); + List> updateHandles = new ArrayList<>(); + // Send a bunch of update requests + for (int i = 0; i < 5; i++) { + updateHandles.add( + untypedStub.startUpdate( + "update", WorkflowUpdateStage.ACCEPTED, String.class, "update request " + i)); + } + // Try to complete the workflow, expect workflow to wait for the update handlers to finish first + workflow.tryComplete(); + assertEquals( + " update request 0 update request 1 update request 2 update request 3 update request 4", + workflow.execute()); + // Ensure that all update handlers actually finished + for (int i = 0; i < 5; i++) { + assertEquals("update request " + i, updateHandles.get(i).getResultAsync().get()); + } + } + + @WorkflowInterface + public interface WorkflowWithUpdate { + + @WorkflowMethod + String execute(); + + @UpdateMethod + String update(String value); + + @UpdateValidatorMethod(updateName = "update") + void updateValidator(String value); + + @SignalMethod + void tryComplete(); + } + + public static class TestUpdateWorkflowImpl implements WorkflowWithUpdate { + List updates = new ArrayList<>(); + CompletablePromise promise = Workflow.newPromise(); + + @Override + public String execute() { + promise.get(); + Workflow.await(() -> Workflow.isEveryHandlerFinished()); + return updates.stream().reduce("", (a, b) -> a + " " + b); + } + + @Override + public void tryComplete() { + promise.complete(null); + } + + @Override + public String update(String value) { + promise.get(); + updates.add(value); + Workflow.sleep(Duration.ofSeconds(5)); + return value; + } + + @Override + public void updateValidator(String value) { + if (Workflow.isEveryHandlerFinished()) { + throw new IllegalArgumentException("Workflow.isEveryHandlerFinished() should return false"); + } + } + } +} diff --git a/temporal-sdk/src/test/resources/logback-test.xml b/temporal-sdk/src/test/resources/logback-test.xml index c05b899b85..ebb0f744d6 100644 --- a/temporal-sdk/src/test/resources/logback-test.xml +++ b/temporal-sdk/src/test/resources/logback-test.xml @@ -34,7 +34,7 @@ - + From a885812eac093740eee46c67125aad538013b51c Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Mon, 12 Aug 2024 16:08:37 -0700 Subject: [PATCH 18/25] Ad support for updating schedule search attributes (#2168) Support update SA on a schedule --- .../client/schedules/ScheduleUpdate.java | 30 +++++++++++ .../client/RootScheduleClientInvoker.java | 16 ++++-- .../client/schedules/ScheduleTest.java | 32 +++++++++++- .../io/temporal/testUtils/Eventually.java | 52 +++++++++++++++++++ 4 files changed, 125 insertions(+), 5 deletions(-) create mode 100644 temporal-sdk/src/test/java/io/temporal/testUtils/Eventually.java diff --git a/temporal-sdk/src/main/java/io/temporal/client/schedules/ScheduleUpdate.java b/temporal-sdk/src/main/java/io/temporal/client/schedules/ScheduleUpdate.java index 3da8bdd3e9..08410ac055 100644 --- a/temporal-sdk/src/main/java/io/temporal/client/schedules/ScheduleUpdate.java +++ b/temporal-sdk/src/main/java/io/temporal/client/schedules/ScheduleUpdate.java @@ -20,12 +20,33 @@ package io.temporal.client.schedules; +import io.temporal.common.SearchAttributes; + /** An update returned from a schedule updater. */ public final class ScheduleUpdate { private final Schedule schedule; + private final SearchAttributes typedSearchAttributes; + /** + * Create a new ScheduleUpdate. + * + * @param schedule schedule to replace the existing schedule with + */ public ScheduleUpdate(Schedule schedule) { this.schedule = schedule; + this.typedSearchAttributes = null; + } + + /** + * Create a new ScheduleUpdate. + * + * @param schedule schedule to replace the existing schedule with + * @param typedSearchAttributes search attributes to replace the existing search attributes with. + * Returning null will not update the search attributes. + */ + public ScheduleUpdate(Schedule schedule, SearchAttributes typedSearchAttributes) { + this.schedule = schedule; + this.typedSearchAttributes = typedSearchAttributes; } /** @@ -36,4 +57,13 @@ public ScheduleUpdate(Schedule schedule) { public Schedule getSchedule() { return schedule; } + + /** + * Get the search attributes to update. + * + * @return search attributes to update + */ + public SearchAttributes getTypedSearchAttributes() { + return typedSearchAttributes; + } } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/client/RootScheduleClientInvoker.java b/temporal-sdk/src/main/java/io/temporal/internal/client/RootScheduleClientInvoker.java index 79f0bd36b8..0ffc40589a 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/client/RootScheduleClientInvoker.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/client/RootScheduleClientInvoker.java @@ -26,6 +26,7 @@ import io.grpc.Status; import io.grpc.StatusRuntimeException; import io.temporal.api.common.v1.Memo; +import io.temporal.api.common.v1.SearchAttributes; import io.temporal.api.schedule.v1.*; import io.temporal.api.workflowservice.v1.*; import io.temporal.client.ListScheduleListDescriptionIterator; @@ -277,16 +278,23 @@ public void updateSchedule(UpdateScheduleInput input) { return; } - UpdateScheduleRequest request = + UpdateScheduleRequest.Builder request = UpdateScheduleRequest.newBuilder() .setNamespace(clientOptions.getNamespace()) .setIdentity(clientOptions.getIdentity()) .setScheduleId(input.getDescription().getId()) .setRequestId(UUID.randomUUID().toString()) - .setSchedule(scheduleRequestHeader.scheduleToProto(schedule.getSchedule())) - .build(); + .setSchedule(scheduleRequestHeader.scheduleToProto(schedule.getSchedule())); + if (schedule.getTypedSearchAttributes() != null) { + SearchAttributes encodedSa = + SearchAttributesUtil.encodeTyped(schedule.getTypedSearchAttributes()); + if (encodedSa == null) { + encodedSa = SearchAttributes.getDefaultInstance(); + } + request.setSearchAttributes(encodedSa); + } try { - genericClient.updateSchedule(request); + genericClient.updateSchedule(request.build()); } catch (Exception e) { throw new ScheduleException(e); } diff --git a/temporal-sdk/src/test/java/io/temporal/client/schedules/ScheduleTest.java b/temporal-sdk/src/test/java/io/temporal/client/schedules/ScheduleTest.java index 7cd8151a5e..1c37c579a0 100644 --- a/temporal-sdk/src/test/java/io/temporal/client/schedules/ScheduleTest.java +++ b/temporal-sdk/src/test/java/io/temporal/client/schedules/ScheduleTest.java @@ -29,6 +29,7 @@ import io.temporal.common.SearchAttributes; import io.temporal.common.converter.EncodedValues; import io.temporal.common.interceptors.ScheduleClientInterceptor; +import io.temporal.testUtils.Eventually; import io.temporal.testing.internal.SDKTestWorkflowRule; import io.temporal.workflow.shared.TestWorkflows; import java.time.Duration; @@ -415,9 +416,12 @@ public void describeSchedules() { public void updateSchedules() { ScheduleClient client = createScheduleClient(); // Create the schedule + String keywordSAValue = "keyword"; ScheduleOptions options = ScheduleOptions.newBuilder() .setMemo(Collections.singletonMap("memokey2", "memoval2")) + .setTypedSearchAttributes( + SearchAttributes.newBuilder().set(CUSTOM_KEYWORD_SA, keywordSAValue).build()) .build(); String scheduleId = UUID.randomUUID().toString(); Schedule schedule = createTestSchedule().build(); @@ -469,7 +473,7 @@ public void updateSchedules() { .setAction(input.getDescription().getSchedule().getAction()) .setSpec(ScheduleSpec.newBuilder().build()); builder.setState(ScheduleState.newBuilder().setPaused(true).build()); - return new ScheduleUpdate(builder.build()); + return new ScheduleUpdate(builder.build(), null); }); description = handle.describe(); // @@ -481,6 +485,32 @@ public void updateSchedules() { // Assert.assertNotEquals(expectedUpdateTime, description.getInfo().getLastUpdatedAt()); Assert.assertEquals(true, description.getSchedule().getState().isPaused()); + Assert.assertEquals(1, description.getTypedSearchAttributes().size()); + Assert.assertEquals( + keywordSAValue, description.getTypedSearchAttributes().get(CUSTOM_KEYWORD_SA)); + // Update the schedule search attribute by clearing them + handle.update( + (ScheduleUpdateInput input) -> + new ScheduleUpdate(input.getDescription().getSchedule(), SearchAttributes.EMPTY)); + Eventually.assertEventually( + Duration.ofSeconds(1), + () -> { + ScheduleDescription desc = handle.describe(); + Assert.assertEquals(0, desc.getTypedSearchAttributes().size()); + }); + // Update the schedule search attribute by adding a new search attribute + handle.update( + (ScheduleUpdateInput input) -> + new ScheduleUpdate( + input.getDescription().getSchedule(), + SearchAttributes.newBuilder().set(CUSTOM_KEYWORD_SA, "newkeyword").build())); + Eventually.assertEventually( + Duration.ofSeconds(1), + () -> { + ScheduleDescription desc = handle.describe(); + Assert.assertEquals(1, desc.getTypedSearchAttributes().size()); + Assert.assertEquals("newkeyword", desc.getTypedSearchAttributes().get(CUSTOM_KEYWORD_SA)); + }); // Cleanup schedule handle.delete(); } diff --git a/temporal-sdk/src/test/java/io/temporal/testUtils/Eventually.java b/temporal-sdk/src/test/java/io/temporal/testUtils/Eventually.java new file mode 100644 index 0000000000..2d388a95e5 --- /dev/null +++ b/temporal-sdk/src/test/java/io/temporal/testUtils/Eventually.java @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.testUtils; + +import java.time.Duration; +import java.time.Instant; + +public class Eventually { + public static void assertEventually(Duration timeout, Runnable command) { + final Instant start = Instant.now(); + final Instant deadline = start.plus(timeout); + + boolean failed; + do { + try { + command.run(); + failed = false; + } catch (Throwable t) { + failed = true; + if (Instant.now().isBefore(deadline)) { + // Try again after a short nap + try { + Thread.sleep(100); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException(e); + } + } else { + throw t; + } + } + } while (failed); + } +} From a5d6e604ebbb4c80def0768ddaa5b7c300300d56 Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Wed, 14 Aug 2024 07:45:07 -0700 Subject: [PATCH 19/25] Fix isEveryHandlerFinished doc string (#2182) --- temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java | 2 +- .../workflow/signalTests/SignalAllHandlersFinished.java | 2 +- .../temporal/workflow/updateTest/UpdateAllHandlersFinished.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java b/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java index 104a77cfb5..e4ead58ea8 100644 --- a/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java +++ b/temporal-sdk/src/main/java/io/temporal/workflow/Workflow.java @@ -1271,7 +1271,7 @@ public static void applyLocalActivityOptions( * interruption of in-progress handlers by workflow return: * *

-   *    Workflow.await(() -> Workflow.isAllHandlersFinished());
+   *    Workflow.await(() -> Workflow.isEveryHandlerFinished());
    *  
will eventually have search attributes as: * * @return true if all handlers are finished, false otherwise. diff --git a/temporal-sdk/src/test/java/io/temporal/workflow/signalTests/SignalAllHandlersFinished.java b/temporal-sdk/src/test/java/io/temporal/workflow/signalTests/SignalAllHandlersFinished.java index 59d4f823cb..684d9fb9a6 100644 --- a/temporal-sdk/src/test/java/io/temporal/workflow/signalTests/SignalAllHandlersFinished.java +++ b/temporal-sdk/src/test/java/io/temporal/workflow/signalTests/SignalAllHandlersFinished.java @@ -39,7 +39,7 @@ public class SignalAllHandlersFinished { SDKTestWorkflowRule.newBuilder().setWorkflowTypes(TestSignalWorkflowImpl.class).build(); @Test - public void isAllHandlersFinished() { + public void isEveryHandlerFinished() { String workflowId = UUID.randomUUID().toString(); WorkflowClient workflowClient = testWorkflowRule.getWorkflowClient(); WorkflowOptions options = diff --git a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateAllHandlersFinished.java b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateAllHandlersFinished.java index e0c1fc2bec..f8515d9eed 100644 --- a/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateAllHandlersFinished.java +++ b/temporal-sdk/src/test/java/io/temporal/workflow/updateTest/UpdateAllHandlersFinished.java @@ -42,7 +42,7 @@ public class UpdateAllHandlersFinished { SDKTestWorkflowRule.newBuilder().setWorkflowTypes(TestUpdateWorkflowImpl.class).build(); @Test - public void isAllHandlersFinished() throws ExecutionException, InterruptedException { + public void isEveryHandlerFinished() throws ExecutionException, InterruptedException { String workflowId = UUID.randomUUID().toString(); WorkflowClient workflowClient = testWorkflowRule.getWorkflowClient(); WorkflowOptions options = From abc53233a2bc6fdad6c35f0a71519ce48aded6bc Mon Sep 17 00:00:00 2001 From: pdoerner <122412190+pdoerner@users.noreply.github.com> Date: Thu, 15 Aug 2024 09:56:32 -0700 Subject: [PATCH 20/25] Implement test server support for sync Nexus operation commands (#2176) * Implement test server support for sync Nexus operations * Nexus operations command implementations * test cleanup * cleanup * tests --- .../internal/testservice/CommandVerifier.java | 26 +- .../internal/testservice/NexusTaskToken.java | 114 ++++ .../internal/testservice/RequestContext.java | 10 + .../internal/testservice/StateMachines.java | 413 +++++++++++++- .../testservice/TestNexusEndpointStore.java | 2 + .../TestNexusEndpointStoreImpl.java | 56 +- .../testservice/TestServicesStarter.java | 6 +- .../testservice/TestWorkflowMutableState.java | 6 + .../TestWorkflowMutableStateImpl.java | 313 +++++++++-- .../testservice/TestWorkflowService.java | 70 +++ .../testservice/TestWorkflowStore.java | 28 +- .../testservice/TestWorkflowStoreImpl.java | 42 +- .../functional/NexusEndpointTest.java | 20 +- .../functional/NexusWorkflowTest.java | 507 ++++++++++++++++++ 14 files changed, 1516 insertions(+), 97 deletions(-) create mode 100644 temporal-test-server/src/main/java/io/temporal/internal/testservice/NexusTaskToken.java create mode 100644 temporal-test-server/src/test/java/io/temporal/testserver/functional/NexusWorkflowTest.java diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/CommandVerifier.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/CommandVerifier.java index 1a4dfb75fe..a1b4dded19 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/CommandVerifier.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/CommandVerifier.java @@ -28,9 +28,12 @@ class CommandVerifier { private final TestVisibilityStore visibilityStore; + private final TestNexusEndpointStore nexusEndpointStore; - public CommandVerifier(TestVisibilityStore visibilityStore) { + public CommandVerifier( + TestVisibilityStore visibilityStore, TestNexusEndpointStore nexusEndpointStore) { this.visibilityStore = visibilityStore; + this.nexusEndpointStore = nexusEndpointStore; } InvalidCommandResult verifyCommand(RequestContext ctx, Command d) { @@ -52,6 +55,27 @@ InvalidCommandResult verifyCommand(RequestContext ctx, Command d) { eventAttributesFailure, e); } + break; + case COMMAND_TYPE_SCHEDULE_NEXUS_OPERATION: + try { + nexusEndpointStore.getEndpointByName( + d.getScheduleNexusOperationCommandAttributes().getEndpoint()); + } catch (StatusRuntimeException e) { + ServerFailure eventAttributesFailure = + new ServerFailure( + ProtoEnumNameUtils.uniqueToSimplifiedName( + WorkflowTaskFailedCause + .WORKFLOW_TASK_FAILED_CAUSE_BAD_SCHEDULE_NEXUS_OPERATION_ATTRIBUTES) + + ": " + + e.getStatus().getDescription(), + true); + return new InvalidCommandResult( + WorkflowTaskFailedCause + .WORKFLOW_TASK_FAILED_CAUSE_BAD_SCHEDULE_NEXUS_OPERATION_ATTRIBUTES, + eventAttributesFailure, + e); + } + break; } return null; } diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/NexusTaskToken.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/NexusTaskToken.java new file mode 100644 index 0000000000..298bfa2c2f --- /dev/null +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/NexusTaskToken.java @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.internal.testservice; + +import com.google.protobuf.ByteString; +import io.grpc.Status; +import io.temporal.api.common.v1.WorkflowExecution; +import java.io.*; +import java.util.Objects; +import javax.annotation.Nonnull; + +class NexusTaskToken { + + @Nonnull private final ExecutionId executionId; + private final long scheduledEventId; + private final int attempt; + + NexusTaskToken( + @Nonnull String namespace, + @Nonnull WorkflowExecution execution, + long scheduledEventId, + int attempt) { + this( + new ExecutionId(Objects.requireNonNull(namespace), Objects.requireNonNull(execution)), + scheduledEventId, + attempt); + } + + NexusTaskToken( + @Nonnull String namespace, + @Nonnull String workflowId, + @Nonnull String runId, + long scheduledEventId, + int attempt) { + this( + namespace, + WorkflowExecution.newBuilder() + .setWorkflowId(Objects.requireNonNull(workflowId)) + .setRunId(Objects.requireNonNull(runId)) + .build(), + scheduledEventId, + attempt); + } + + NexusTaskToken(@Nonnull ExecutionId executionId, long scheduledEventId, int attempt) { + this.executionId = Objects.requireNonNull(executionId); + this.scheduledEventId = scheduledEventId; + this.attempt = attempt; + } + + public ExecutionId getExecutionId() { + return executionId; + } + + public long getScheduledEventId() { + return scheduledEventId; + } + + public long getAttempt() { + return attempt; + } + + /** Used for task tokens. */ + public ByteString toBytes() { + try (ByteArrayOutputStream bout = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(bout)) { + out.writeUTF(executionId.getNamespace()); + WorkflowExecution execution = executionId.getExecution(); + out.writeUTF(execution.getWorkflowId()); + out.writeUTF(execution.getRunId()); + out.writeLong(scheduledEventId); + out.writeInt(attempt); + return ByteString.copyFrom(bout.toByteArray()); + } catch (IOException e) { + throw Status.INTERNAL.withCause(e).withDescription(e.getMessage()).asRuntimeException(); + } + } + + static NexusTaskToken fromBytes(ByteString serialized) { + ByteArrayInputStream bin = new ByteArrayInputStream(serialized.toByteArray()); + DataInputStream in = new DataInputStream(bin); + try { + String namespace = in.readUTF(); + String workflowId = in.readUTF(); + String runId = in.readUTF(); + long scheduledEventId = in.readLong(); + int attempt = in.readInt(); + return new NexusTaskToken(namespace, workflowId, runId, scheduledEventId, attempt); + } catch (IOException e) { + throw Status.INVALID_ARGUMENT + .withCause(e) + .withDescription(e.getMessage()) + .asRuntimeException(); + } + } +} diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/RequestContext.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/RequestContext.java index a07c031be4..704220faee 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/RequestContext.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/RequestContext.java @@ -124,6 +124,7 @@ public int getChange() { // If an eager dispatch was performed, it should be reset to null private WorkflowTask workflowTaskForMatching; private final List activityTasks = new ArrayList<>(); + private final List nexusTasks = new ArrayList<>(); private final List timers = new ArrayList<>(); private long workflowCompletedAtEventId = -1; private boolean needWorkflowTask; @@ -157,6 +158,7 @@ public int getChange() { void add(RequestContext ctx) { this.activityTasks.addAll(ctx.getActivityTasks()); + this.nexusTasks.addAll(ctx.getNexusTasks()); this.timers.addAll(ctx.getTimers()); this.events.addAll(ctx.getEvents()); } @@ -252,6 +254,10 @@ void addActivityTask(ActivityTask activityTask) { this.activityTasks.add(activityTask); } + void addNexusTask(TestWorkflowStore.NexusTask nexusTask) { + this.nexusTasks.add(nexusTask); + } + /** * @return cancellation handle */ @@ -269,6 +275,10 @@ List getActivityTasks() { return activityTasks; } + List getNexusTasks() { + return nexusTasks; + } + List getEvents() { return events; } diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/StateMachines.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/StateMachines.java index c64059b255..c11b57aae8 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/StateMachines.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/StateMachines.java @@ -44,25 +44,13 @@ import static io.temporal.internal.testservice.StateMachines.State.TIMED_OUT; import com.google.common.base.Preconditions; -import com.google.protobuf.Any; -import com.google.protobuf.Duration; -import com.google.protobuf.InvalidProtocolBufferException; -import com.google.protobuf.Timestamp; +import com.google.protobuf.*; import com.google.protobuf.util.Durations; import com.google.protobuf.util.Timestamps; import io.grpc.Status; import io.grpc.StatusRuntimeException; -import io.temporal.api.command.v1.CancelTimerCommandAttributes; -import io.temporal.api.command.v1.CancelWorkflowExecutionCommandAttributes; -import io.temporal.api.command.v1.CompleteWorkflowExecutionCommandAttributes; -import io.temporal.api.command.v1.ContinueAsNewWorkflowExecutionCommandAttributes; -import io.temporal.api.command.v1.FailWorkflowExecutionCommandAttributes; -import io.temporal.api.command.v1.RequestCancelActivityTaskCommandAttributes; -import io.temporal.api.command.v1.RequestCancelExternalWorkflowExecutionCommandAttributes; -import io.temporal.api.command.v1.ScheduleActivityTaskCommandAttributes; -import io.temporal.api.command.v1.SignalExternalWorkflowExecutionCommandAttributes; -import io.temporal.api.command.v1.StartChildWorkflowExecutionCommandAttributes; -import io.temporal.api.command.v1.StartTimerCommandAttributes; +import io.temporal.api.command.v1.*; +import io.temporal.api.common.v1.Payload; import io.temporal.api.common.v1.Payloads; import io.temporal.api.common.v1.RetryPolicy; import io.temporal.api.common.v1.WorkflowExecution; @@ -70,8 +58,13 @@ import io.temporal.api.errordetails.v1.QueryFailedFailure; import io.temporal.api.failure.v1.ApplicationFailureInfo; import io.temporal.api.failure.v1.Failure; +import io.temporal.api.failure.v1.NexusOperationFailureInfo; import io.temporal.api.failure.v1.TimeoutFailureInfo; import io.temporal.api.history.v1.*; +import io.temporal.api.nexus.v1.Endpoint; +import io.temporal.api.nexus.v1.StartOperationRequest; +import io.temporal.api.nexus.v1.StartOperationResponse; +import io.temporal.api.nexus.v1.UnsuccessfulOperationError; import io.temporal.api.protocol.v1.Message; import io.temporal.api.query.v1.WorkflowQueryResult; import io.temporal.api.taskqueue.v1.StickyExecutionAttributes; @@ -88,6 +81,7 @@ import java.util.*; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ForkJoinPool; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -346,6 +340,51 @@ public int getAttempt() { } } + static final class NexusOperationData { + + String operationId; + Endpoint endpoint; + NexusOperationScheduledEventAttributes scheduledEvent; + TestWorkflowStore.NexusTask nexusTask; + + long scheduledEventId = NO_EVENT_ID; + // TODO(pj): consider refactoring cancellation into its own state machine as part of async work + boolean cancelRequested = false; + + TestServiceRetryState retryState; + long lastAttemptCompleteTime; + Duration nextBackoffInterval; + long nextAttemptScheduleTime; + String identity; + + public NexusOperationData(Endpoint endpoint) { + this.operationId = UUID.randomUUID().toString(); + this.endpoint = endpoint; + } + + public int getAttempt() { + return retryState != null ? retryState.getAttempt() : 1; + } + + @Override + public String toString() { + return "NexusOperationData{" + + ", nexusEndpoint=" + + endpoint + + ", scheduledEvent=" + + scheduledEvent + + ", nexusTask=" + + nexusTask + + ", scheduledEventId=" + + scheduledEventId + + ", retryState=" + + retryState + + ", nextBackoffInterval=" + + nextBackoffInterval + + '}'; + } + } + static final class SignalExternalData { long initiatedEventId = NO_EVENT_ID; public SignalExternalWorkflowExecutionInitiatedEventAttributes initiatedEvent; @@ -568,6 +607,43 @@ public static StateMachine newUpdateWorkflowExecuti .add(STARTED, COMPLETE, COMPLETED, StateMachines::completeUpdate); } + public static StateMachine newNexusOperation(Endpoint endpoint) { + return new StateMachine<>(new NexusOperationData(endpoint)) + .add(NONE, INITIATE, INITIATED, StateMachines::scheduleNexusOperation) + .add(INITIATED, START, STARTED, StateMachines::startNexusOperation) + .add(INITIATED, TIME_OUT, TIMED_OUT, StateMachines::timeoutNexusOperation) + .add( + INITIATED, + REQUEST_CANCELLATION, + CANCELLATION_REQUESTED, + StateMachines::requestCancelNexusOperation) + // Transitions directly from INITIATED to COMPLETE for sync completions + .add(INITIATED, COMPLETE, COMPLETED, StateMachines::completeNexusOperation) + .add(STARTED, COMPLETE, COMPLETED, StateMachines::completeNexusOperation) + .add(CANCELLATION_REQUESTED, COMPLETE, COMPLETED, StateMachines::completeNexusOperation) + // Transitions to initiated in case of a retry + .add(STARTED, FAIL, new State[] {FAILED, INITIATED}, StateMachines::failNexusOperation) + // Transitions to initiated in case of a retry + .add( + STARTED, + TIME_OUT, + new State[] {TIMED_OUT, INITIATED}, + StateMachines::timeoutNexusOperation) + .add( + STARTED, + REQUEST_CANCELLATION, + CANCELLATION_REQUESTED, + StateMachines::requestCancelNexusOperation) + .add( + CANCELLATION_REQUESTED, + CANCEL, + CANCELED, + StateMachines::reportNexusOperationCancellation) + .add(CANCELLATION_REQUESTED, COMPLETE, COMPLETED, StateMachines::completeNexusOperation) + .add(CANCELLATION_REQUESTED, TIME_OUT, TIMED_OUT, StateMachines::timeoutNexusOperation) + .add(CANCELLATION_REQUESTED, FAIL, FAILED, StateMachines::failNexusOperation); + } + public static StateMachine newTimerStateMachine() { return new StateMachine<>(new TimerData()) .add(NONE, START, STARTED, StateMachines::startTimer) @@ -591,6 +667,301 @@ public static StateMachine newCancelExternalStateMachine() { private static void noop(RequestContext ctx, T data, A a, long notUsed) {} + private static void scheduleNexusOperation( + RequestContext ctx, + NexusOperationData data, + ScheduleNexusOperationCommandAttributes attr, + long workflowTaskCompletedId) { + RetryPolicy retryPolicy = getDefaultNexusOperationRetryPolicy(); + Duration expirationInterval = attr.getScheduleToCloseTimeout(); + Timestamp expirationTime = Timestamps.add(ctx.currentTime(), expirationInterval); + TestServiceRetryState retryState = new TestServiceRetryState(retryPolicy, expirationTime); + + NexusOperationScheduledEventAttributes.Builder a = + NexusOperationScheduledEventAttributes.newBuilder() + .setEndpoint(attr.getEndpoint()) + .setEndpointId(data.endpoint.getId()) + .setService(attr.getService()) + .setOperation(attr.getOperation()) + .setInput(attr.getInput()) + .setScheduleToCloseTimeout(attr.getScheduleToCloseTimeout()) + .putAllNexusHeader(attr.getNexusHeaderMap()) + .setRequestId(UUID.randomUUID().toString()) + .setWorkflowTaskCompletedEventId(workflowTaskCompletedId); + + data.scheduledEvent = a.build(); + HistoryEvent event = + HistoryEvent.newBuilder() + .setEventType(EventType.EVENT_TYPE_NEXUS_OPERATION_SCHEDULED) + .setNexusOperationScheduledEventAttributes(a) + .build(); + + long scheduledEventId = ctx.addEvent(event); + NexusTaskToken taskToken = + new NexusTaskToken(ctx.getExecutionId(), scheduledEventId, data.getAttempt()); + + PollNexusTaskQueueResponse.Builder pollResponse = + PollNexusTaskQueueResponse.newBuilder() + .setTaskToken(taskToken.toBytes()) + .setRequest( + io.temporal.api.nexus.v1.Request.newBuilder() + .setScheduledTime(ctx.currentTime()) + .putAllHeader(attr.getNexusHeaderMap()) + .setStartOperation( + StartOperationRequest.newBuilder() + .setService(attr.getService()) + .setOperation(attr.getOperation()) + .setPayload(attr.getInput()) + .setCallback(""))); // TODO(pj): support async operations + + TaskQueueId taskQueueId = + new TaskQueueId( + ctx.getNamespace(), data.endpoint.getSpec().getTarget().getWorker().getTaskQueue()); + TestWorkflowStore.NexusTask task = new TestWorkflowStore.NexusTask(taskQueueId, pollResponse); + + // Test server only supports worker targets, so just push directly to Nexus task queue without + // invoking Nexus client. + ctx.addNexusTask(task); + ctx.onCommit( + historySize -> { + data.scheduledEventId = scheduledEventId; + data.nexusTask = task; + data.retryState = retryState; + }); + } + + private static void startNexusOperation( + RequestContext ctx, + NexusOperationData data, + RespondNexusTaskCompletedRequest request, + long notUsed) { + // TODO(pj): support async operations + } + + private static void completeNexusOperation( + RequestContext ctx, NexusOperationData data, Object request, long notUsed) { + if (request instanceof RespondNexusTaskCompletedRequest) { + handleSyncStartOperation( + ctx, + data, + ((RespondNexusTaskCompletedRequest) request).getResponse().getStartOperation()); + } else { + // TODO(pj): support async completion + throw new IllegalArgumentException("Unknown request: " + request); + } + } + + private static void handleSyncStartOperation( + RequestContext ctx, NexusOperationData data, StartOperationResponse response) { + if (response.hasSyncSuccess()) { + handleSyncSuccess(ctx, data, response.getSyncSuccess()); + } else if (response.hasOperationError()) { + handleUnsuccessfulOperationError(ctx, data, response.getOperationError()); + } else { + throw new IllegalArgumentException( + "Unable to process StartOperationResponse. Expected SyncSuccess or OperationError."); + } + } + + private static void handleSyncSuccess( + RequestContext ctx, NexusOperationData data, StartOperationResponse.Sync response) { + ctx.addEvent( + HistoryEvent.newBuilder() + .setEventType(EventType.EVENT_TYPE_NEXUS_OPERATION_COMPLETED) + .setNexusOperationCompletedEventAttributes( + NexusOperationCompletedEventAttributes.newBuilder() + .setRequestId(data.scheduledEvent.getRequestId()) + .setScheduledEventId(data.scheduledEventId) + .setResult(response.getPayload())) + .build()); + } + + private static void handleUnsuccessfulOperationError( + RequestContext ctx, NexusOperationData data, UnsuccessfulOperationError err) { + + Failure f = + Failure.newBuilder() + .setMessage("nexus operation completed unsuccessfully") + .setNexusOperationExecutionFailureInfo( + NexusOperationFailureInfo.newBuilder() + .setEndpoint(data.endpoint.getSpec().getName()) + .setService(data.scheduledEvent.getService()) + .setOperation(data.scheduledEvent.getOperation()) + .setOperationId(data.operationId) + .setScheduledEventId(data.scheduledEventId)) + .setCause(nexusFailureToApplicationFailure(err.getFailure())) + .build(); + + HistoryEvent event; + if (data.cancelRequested) { + event = + HistoryEvent.newBuilder() + .setEventType(EventType.EVENT_TYPE_NEXUS_OPERATION_CANCELED) + .setNexusOperationCanceledEventAttributes( + NexusOperationCanceledEventAttributes.newBuilder() + .setRequestId(data.scheduledEvent.getRequestId()) + .setScheduledEventId(data.scheduledEventId) + .setFailure(f)) + .build(); + } else { + event = + HistoryEvent.newBuilder() + .setEventType(EventType.EVENT_TYPE_NEXUS_OPERATION_FAILED) + .setNexusOperationFailedEventAttributes( + NexusOperationFailedEventAttributes.newBuilder() + .setRequestId(data.scheduledEvent.getRequestId()) + .setScheduledEventId(data.scheduledEventId) + .setFailure(f)) + .build(); + } + + ctx.addEvent(event); + } + + private static State timeoutNexusOperation( + RequestContext ctx, NexusOperationData data, TimeoutType timeoutType, long notUsed) { + if (timeoutType != TimeoutType.TIMEOUT_TYPE_SCHEDULE_TO_CLOSE) { + throw new IllegalArgumentException( + "Timeout type not supported for Nexus operations: " + timeoutType); + } + + Optional previousFailure = data.retryState.getPreviousRunFailure(); + + // chaining with the previous run failure if we are preparing the final failure + Failure failure = newTimeoutFailure(timeoutType, Optional.empty(), previousFailure); + + // not chaining with the previous run failure if we are preparing the failure to be stored + // for the next iteration + Optional lastFailure = + Optional.of(newTimeoutFailure(timeoutType, Optional.empty(), Optional.empty())); + RetryState retryState = attemptNexusOperationRetry(ctx, lastFailure, data); + if (retryState == RetryState.RETRY_STATE_IN_PROGRESS) { + return INITIATED; + } + + ctx.addEvent( + HistoryEvent.newBuilder() + .setEventType(EventType.EVENT_TYPE_NEXUS_OPERATION_TIMED_OUT) + .setNexusOperationTimedOutEventAttributes( + NexusOperationTimedOutEventAttributes.newBuilder() + .setRequestId(data.scheduledEvent.getRequestId()) + .setScheduledEventId(data.scheduledEventId) + .setFailure(failure)) + .build()); + + return TIMED_OUT; + } + + private static State failNexusOperation( + RequestContext ctx, + NexusOperationData data, + RespondNexusTaskFailedRequest request, + long notUsed) { + if (!request.hasError()) { + throw new IllegalArgumentException( + "Nexus handler error not set on RespondNexusTaskFailedRequest"); + } + + Failure failure = nexusFailureToApplicationFailure(request.getError().getFailure()); + RetryState retryState = attemptNexusOperationRetry(ctx, Optional.of(failure), data); + if (retryState == RetryState.RETRY_STATE_IN_PROGRESS) { + return INITIATED; + } + + ctx.addEvent( + HistoryEvent.newBuilder() + .setEventType(EventType.EVENT_TYPE_NEXUS_OPERATION_FAILED) + .setNexusOperationFailedEventAttributes( + NexusOperationFailedEventAttributes.newBuilder() + .setRequestId(data.scheduledEvent.getRequestId()) + .setScheduledEventId(data.scheduledEventId) + .setFailure(failure)) + .build()); + return FAILED; + } + + private static RetryState attemptNexusOperationRetry( + RequestContext ctx, Optional failure, NexusOperationData data) { + Optional info = failure.map(Failure::getApplicationFailureInfo); + Optional nextRetryDelay = Optional.empty(); + if (info.isPresent()) { + if (info.get().getNonRetryable()) { + return RetryState.RETRY_STATE_NON_RETRYABLE_FAILURE; + } + if (info.get().hasNextRetryDelay()) { + nextRetryDelay = + Optional.of(ProtobufTimeUtils.toJavaDuration(info.get().getNextRetryDelay())); + } + } + + TestServiceRetryState nextAttempt = data.retryState.getNextAttempt(failure); + TestServiceRetryState.BackoffInterval backoffInterval = + data.retryState.getBackoffIntervalInSeconds( + info.map(ApplicationFailureInfo::getType), ctx.currentTime(), nextRetryDelay); + if (backoffInterval.getRetryState() == RetryState.RETRY_STATE_IN_PROGRESS) { + data.nextBackoffInterval = ProtobufTimeUtils.toProtoDuration(backoffInterval.getInterval()); + PollNexusTaskQueueResponse.Builder task = data.nexusTask.getTask(); + ctx.onCommit( + (historySize) -> { + data.retryState = nextAttempt; + data.nextAttemptScheduleTime = ctx.currentTime().getSeconds(); + }); + } else { + data.nextBackoffInterval = Durations.ZERO; + } + return backoffInterval.getRetryState(); + } + + private static Failure nexusFailureToApplicationFailure( + io.temporal.api.nexus.v1.Failure failure) { + return Failure.newBuilder() + .setMessage(failure.getMessage()) + .setApplicationFailureInfo( + ApplicationFailureInfo.newBuilder() + .setType("NexusOperationFailure") + .setNonRetryable(true) + .setDetails( + Payloads.newBuilder() + .addPayloads( + Payload.newBuilder() + .putAllMetadata( + failure.getMetadataMap().entrySet().stream() + .collect( + Collectors.toMap( + Map.Entry::getKey, + e -> ByteString.copyFromUtf8(e.getValue())))) + .setData(failure.getDetails())))) + .build(); + } + + private static void requestCancelNexusOperation( + RequestContext ctx, + NexusOperationData data, + RequestCancelNexusOperationCommandAttributes attr, + long workflowTaskCompletedId) { + data.cancelRequested = true; + ctx.addEvent( + HistoryEvent.newBuilder() + .setEventType(EventType.EVENT_TYPE_NEXUS_OPERATION_CANCEL_REQUESTED) + .setNexusOperationCancelRequestedEventAttributes( + NexusOperationCancelRequestedEventAttributes.newBuilder() + .setScheduledEventId(attr.getScheduledEventId()) + .setWorkflowTaskCompletedEventId(workflowTaskCompletedId)) + .build()); + } + + private static void reportNexusOperationCancellation( + RequestContext ctx, NexusOperationData data, Object request, long notUsed) { + ctx.addEvent( + HistoryEvent.newBuilder() + .setEventType(EventType.EVENT_TYPE_NEXUS_OPERATION_CANCELED) + .setNexusOperationCanceledEventAttributes( + NexusOperationCanceledEventAttributes.newBuilder() + .setScheduledEventId(data.scheduledEventId) + .setRequestId(data.scheduledEvent.getRequestId())) + .build()); + } + private static void timeoutChildWorkflow( RequestContext ctx, ChildWorkflowData data, RetryState retryState, long notUsed) { StartChildWorkflowExecutionInitiatedEventAttributes ie = data.initiatedEvent; @@ -2070,4 +2441,16 @@ static RetryPolicy ensureDefaultFieldsForActivityRetryPolicy(RetryPolicy origina : originalPolicy.getMaximumAttempts()) .build(); } + + static RetryPolicy getDefaultNexusOperationRetryPolicy() { + return RetryPolicy.newBuilder() + .addAllNonRetryableErrorTypes( + Arrays.asList( + "BAD_REQUEST", "INVALID_ARGUMENT", "NOT_FOUND", "DEADLINE_EXCEEDED", "CANCELLED")) + .setInitialInterval(Durations.fromSeconds(1)) + .setMaximumInterval(Durations.fromSeconds(10)) + .setBackoffCoefficient(2.0) + .setMaximumAttempts(10) + .build(); + } } diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStore.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStore.java index dcb9b9c23e..40083597a6 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStore.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStore.java @@ -35,6 +35,8 @@ public interface TestNexusEndpointStore extends Closeable { Endpoint getEndpoint(String id); + Endpoint getEndpointByName(String name); + List listEndpoints(long pageSize, byte[] nextPageToken, String name); void validateEndpointSpec(EndpointSpec spec); diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStoreImpl.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStoreImpl.java index dbaa2ddf31..1b91ecbe79 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStoreImpl.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestNexusEndpointStoreImpl.java @@ -24,6 +24,8 @@ import io.temporal.api.nexus.v1.Endpoint; import io.temporal.api.nexus.v1.EndpointSpec; import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListMap; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -35,25 +37,26 @@ */ public class TestNexusEndpointStoreImpl implements TestNexusEndpointStore { - private static final Pattern ENDPOINT_NAME_REGEX = Pattern.compile("^[a-zA-Z_][a-zA-Z0-9_]*$"); + private static final Pattern ENDPOINT_NAME_REGEX = + Pattern.compile("^[a-zA-Z][a-zA-Z0-9\\-]*[a-zA-Z0-9]$"); - private final SortedMap endpoints = new ConcurrentSkipListMap<>(); - private final Set endpointNames = new HashSet<>(); + private final SortedMap endpointsById = new ConcurrentSkipListMap<>(); + private final ConcurrentMap endpointsByName = new ConcurrentHashMap<>(); @Override public Endpoint createEndpoint(EndpointSpec spec) { validateEndpointSpec(spec); - if (!endpointNames.add(spec.getName())) { + String id = UUID.randomUUID().toString(); + Endpoint endpoint = Endpoint.newBuilder().setId(id).setVersion(1).setSpec(spec).build(); + + if (endpointsByName.putIfAbsent(spec.getName(), endpoint) != null) { throw Status.ALREADY_EXISTS .withDescription("Nexus endpoint already registered with name: " + spec.getName()) .asRuntimeException(); } - String id = UUID.randomUUID().toString(); - Endpoint endpoint = Endpoint.newBuilder().setId(id).setVersion(1).setSpec(spec).build(); - - if (endpoints.putIfAbsent(id, endpoint) != null) { + if (endpointsById.putIfAbsent(id, endpoint) != null) { // This should never happen in practice throw Status.ALREADY_EXISTS .withDescription("Nexus endpoint already exists with ID: " + id) @@ -67,7 +70,7 @@ public Endpoint createEndpoint(EndpointSpec spec) { public Endpoint updateEndpoint(String id, long version, EndpointSpec spec) { validateEndpointSpec(spec); - Endpoint prev = endpoints.get(id); + Endpoint prev = endpointsById.get(id); if (prev == null) { throw Status.NOT_FOUND @@ -86,7 +89,10 @@ public Endpoint updateEndpoint(String id, long version, EndpointSpec spec) { .asRuntimeException(); } - if (!prev.getSpec().getName().equals(spec.getName()) && !endpointNames.add(spec.getName())) { + Endpoint updated = Endpoint.newBuilder(prev).setVersion(version + 1).setSpec(spec).build(); + + if (!prev.getSpec().getName().equals(spec.getName()) + && endpointsByName.putIfAbsent(spec.getName(), updated) != null) { throw Status.ALREADY_EXISTS .withDescription( "Error updating Nexus endpoint: " @@ -94,18 +100,16 @@ public Endpoint updateEndpoint(String id, long version, EndpointSpec spec) { + spec.getName()) .asRuntimeException(); } else { - endpointNames.remove(prev.getSpec().getName()); + endpointsByName.remove(prev.getSpec().getName()); } - Endpoint updated = Endpoint.newBuilder(prev).setVersion(version + 1).setSpec(spec).build(); - - endpoints.put(id, updated); + endpointsById.put(id, updated); return updated; } @Override public void deleteEndpoint(String id, long version) { - Endpoint existing = endpoints.get(id); + Endpoint existing = endpointsById.get(id); if (existing == null) { throw Status.NOT_FOUND @@ -124,12 +128,13 @@ public void deleteEndpoint(String id, long version) { .asRuntimeException(); } - endpoints.remove(id); + endpointsById.remove(id); + endpointsByName.remove(existing.getSpec().getName()); } @Override public Endpoint getEndpoint(String id) { - Endpoint endpoint = endpoints.get(id); + Endpoint endpoint = endpointsById.get(id); if (endpoint == null) { throw Status.NOT_FOUND .withDescription("Could not find Nexus endpoint with ID: " + id) @@ -138,22 +143,33 @@ public Endpoint getEndpoint(String id) { return endpoint; } + @Override + public Endpoint getEndpointByName(String name) { + Endpoint endpoint = endpointsByName.get(name); + if (endpoint == null) { + throw Status.NOT_FOUND + .withDescription("Could not find Nexus endpoint with name: " + name) + .asRuntimeException(); + } + return endpoint; + } + @Override public List listEndpoints(long pageSize, byte[] nextPageToken, String name) { if (name != null && !name.isEmpty()) { - return endpoints.values().stream() + return endpointsById.values().stream() .filter(ep -> ep.getSpec().getName().equals(name)) .limit(1) .collect(Collectors.toList()); } if (nextPageToken.length > 0) { - return endpoints.tailMap(new String(nextPageToken)).values().stream() + return endpointsById.tailMap(new String(nextPageToken)).values().stream() .skip(1) .limit(pageSize) .collect(Collectors.toList()); } - return endpoints.values().stream().limit(pageSize).collect(Collectors.toList()); + return endpointsById.values().stream().limit(pageSize).collect(Collectors.toList()); } @Override diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestServicesStarter.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestServicesStarter.java index 1ade1ff7ce..275b52b4b5 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestServicesStarter.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestServicesStarter.java @@ -51,7 +51,11 @@ public TestServicesStarter(boolean lockTimeSkipping, long initialTimeMillis) { this.testService = new TestService(this.workflowStore, this.selfAdvancingTimer, lockTimeSkipping); this.workflowService = - new TestWorkflowService(this.workflowStore, this.visibilityStore, this.selfAdvancingTimer); + new TestWorkflowService( + this.workflowStore, + this.visibilityStore, + this.nexusEndpointStore, + this.selfAdvancingTimer); this.services = Arrays.asList(this.operatorService, this.testService, this.workflowService); } diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowMutableState.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowMutableState.java index 5d0be867b7..315d4d473d 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowMutableState.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowMutableState.java @@ -111,6 +111,12 @@ void cancelActivityTask( void cancelActivityTaskById(String id, RespondActivityTaskCanceledByIdRequest canceledRequest); + void startNexusTask(long scheduledEventId, RespondNexusTaskCompletedRequest request); + + void completeNexusTask(long scheduledEventId, RespondNexusTaskCompletedRequest request); + + void failNexusTask(long scheduledEventId, RespondNexusTaskFailedRequest request); + QueryWorkflowResponse query(QueryWorkflowRequest queryRequest, long deadline); UpdateWorkflowExecutionResponse updateWorkflowExecution( diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowMutableStateImpl.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowMutableStateImpl.java index 757e344257..d678320283 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowMutableStateImpl.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowMutableStateImpl.java @@ -22,11 +22,7 @@ import static io.temporal.api.enums.v1.UpdateWorkflowExecutionLifecycleStage.*; import static io.temporal.internal.testservice.CronUtils.getBackoffInterval; -import static io.temporal.internal.testservice.StateMachines.DEFAULT_WORKFLOW_EXECUTION_TIMEOUT_MILLISECONDS; -import static io.temporal.internal.testservice.StateMachines.DEFAULT_WORKFLOW_TASK_TIMEOUT_MILLISECONDS; -import static io.temporal.internal.testservice.StateMachines.MAX_WORKFLOW_TASK_TIMEOUT_MILLISECONDS; -import static io.temporal.internal.testservice.StateMachines.NO_EVENT_ID; -import static io.temporal.internal.testservice.StateMachines.newActivityStateMachine; +import static io.temporal.internal.testservice.StateMachines.*; import static io.temporal.internal.testservice.TestServiceRetryState.validateAndOverrideRetryPolicy; import com.google.common.base.Preconditions; @@ -39,21 +35,7 @@ import io.grpc.Deadline; import io.grpc.Status; import io.grpc.StatusRuntimeException; -import io.temporal.api.command.v1.CancelTimerCommandAttributes; -import io.temporal.api.command.v1.CancelWorkflowExecutionCommandAttributes; -import io.temporal.api.command.v1.Command; -import io.temporal.api.command.v1.CompleteWorkflowExecutionCommandAttributes; -import io.temporal.api.command.v1.ContinueAsNewWorkflowExecutionCommandAttributes; -import io.temporal.api.command.v1.FailWorkflowExecutionCommandAttributes; -import io.temporal.api.command.v1.ProtocolMessageCommandAttributes; -import io.temporal.api.command.v1.RecordMarkerCommandAttributes; -import io.temporal.api.command.v1.RequestCancelActivityTaskCommandAttributes; -import io.temporal.api.command.v1.RequestCancelExternalWorkflowExecutionCommandAttributes; -import io.temporal.api.command.v1.ScheduleActivityTaskCommandAttributes; -import io.temporal.api.command.v1.SignalExternalWorkflowExecutionCommandAttributes; -import io.temporal.api.command.v1.StartChildWorkflowExecutionCommandAttributes; -import io.temporal.api.command.v1.StartTimerCommandAttributes; -import io.temporal.api.command.v1.UpsertWorkflowSearchAttributesCommandAttributes; +import io.temporal.api.command.v1.*; import io.temporal.api.common.v1.Payloads; import io.temporal.api.common.v1.RetryPolicy; import io.temporal.api.common.v1.WorkflowExecution; @@ -61,28 +43,14 @@ import io.temporal.api.errordetails.v1.QueryFailedFailure; import io.temporal.api.failure.v1.ApplicationFailureInfo; import io.temporal.api.failure.v1.Failure; -import io.temporal.api.history.v1.ActivityTaskScheduledEventAttributes; -import io.temporal.api.history.v1.ChildWorkflowExecutionCanceledEventAttributes; -import io.temporal.api.history.v1.ChildWorkflowExecutionCompletedEventAttributes; -import io.temporal.api.history.v1.ChildWorkflowExecutionFailedEventAttributes; -import io.temporal.api.history.v1.ChildWorkflowExecutionStartedEventAttributes; -import io.temporal.api.history.v1.ChildWorkflowExecutionTimedOutEventAttributes; -import io.temporal.api.history.v1.ExternalWorkflowExecutionCancelRequestedEventAttributes; -import io.temporal.api.history.v1.HistoryEvent; -import io.temporal.api.history.v1.MarkerRecordedEventAttributes; -import io.temporal.api.history.v1.StartChildWorkflowExecutionFailedEventAttributes; -import io.temporal.api.history.v1.UpsertWorkflowSearchAttributesEventAttributes; -import io.temporal.api.history.v1.WorkflowExecutionContinuedAsNewEventAttributes; -import io.temporal.api.history.v1.WorkflowExecutionSignaledEventAttributes; +import io.temporal.api.history.v1.*; +import io.temporal.api.nexus.v1.Endpoint; import io.temporal.api.protocol.v1.Message; import io.temporal.api.query.v1.QueryRejected; import io.temporal.api.query.v1.WorkflowQueryResult; import io.temporal.api.taskqueue.v1.StickyExecutionAttributes; import io.temporal.api.update.v1.*; -import io.temporal.api.workflow.v1.PendingActivityInfo; -import io.temporal.api.workflow.v1.PendingChildExecutionInfo; -import io.temporal.api.workflow.v1.WorkflowExecutionConfig; -import io.temporal.api.workflow.v1.WorkflowExecutionInfo; +import io.temporal.api.workflow.v1.*; import io.temporal.api.workflowservice.v1.*; import io.temporal.common.converter.DefaultDataConverter; import io.temporal.failure.ServerFailure; @@ -137,6 +105,7 @@ private interface UpdateProcedure { private final OptionalLong parentChildInitiatedEventId; private final TestWorkflowStore store; private final TestVisibilityStore visibilityStore; + private final TestNexusEndpointStore nexusEndpointStore; private final TestWorkflowService service; private final CommandVerifier commandVerifier; @@ -145,6 +114,7 @@ private interface UpdateProcedure { private final Map> activities = new HashMap<>(); private final Map activityById = new HashMap<>(); private final Map> childWorkflows = new HashMap<>(); + private final Map> nexusOperations = new HashMap<>(); private final Map> timers = new HashMap<>(); private final Map> externalSignals = new HashMap<>(); private final Map> externalCancellations = @@ -176,11 +146,13 @@ private interface UpdateProcedure { TestWorkflowService service, TestWorkflowStore store, TestVisibilityStore visibilityStore, + TestNexusEndpointStore nexusEndpointStore, SelfAdvancingTimer selfAdvancingTimer) { this.store = store; this.visibilityStore = visibilityStore; + this.nexusEndpointStore = nexusEndpointStore; this.service = service; - this.commandVerifier = new CommandVerifier(visibilityStore); + this.commandVerifier = new CommandVerifier(visibilityStore, nexusEndpointStore); startRequest = overrideStartWorkflowExecutionRequest(startRequest); this.startRequest = startRequest; this.executionId = @@ -690,6 +662,14 @@ private void processCommand( identity, workflowTaskCompletedId); break; + case COMMAND_TYPE_SCHEDULE_NEXUS_OPERATION: + processScheduleNexusOperation( + ctx, d.getScheduleNexusOperationCommandAttributes(), workflowTaskCompletedId); + break; + case COMMAND_TYPE_REQUEST_CANCEL_NEXUS_OPERATION: + processRequestCancelNexusOperation( + ctx, d.getRequestCancelNexusOperationCommandAttributes(), workflowTaskCompletedId); + break; default: throw Status.INVALID_ARGUMENT .withDescription("Unknown command type: " + d.getCommandType() + " for " + d) @@ -726,6 +706,68 @@ private void processMessage( } } + private void processScheduleNexusOperation( + RequestContext ctx, + ScheduleNexusOperationCommandAttributes attr, + long workflowTaskCompletedId) { + attr = validateScheduleNexusOperation(attr); + Endpoint endpoint = nexusEndpointStore.getEndpointByName(attr.getEndpoint()); + StateMachine operationStateMachine = + newNexusOperation(endpoint); + long scheduleEventId = ctx.getNextEventId(); + nexusOperations.put(scheduleEventId, operationStateMachine); + + operationStateMachine.action(Action.INITIATE, ctx, attr, workflowTaskCompletedId); + ctx.addTimer( + ProtobufTimeUtils.toJavaDuration( + operationStateMachine.getData().scheduledEvent.getScheduleToCloseTimeout()), + () -> + timeoutNexusOperation( + scheduleEventId, + TimeoutType.TIMEOUT_TYPE_SCHEDULE_TO_CLOSE, + operationStateMachine.getData().getAttempt()), + "NexusOperation ScheduleToCloseTimeout"); + ctx.lockTimer("processScheduleNexusOperation"); + } + + private ScheduleNexusOperationCommandAttributes validateScheduleNexusOperation( + ScheduleNexusOperationCommandAttributes attr) { + ScheduleNexusOperationCommandAttributes.Builder result = + ScheduleNexusOperationCommandAttributes.newBuilder(attr); + + com.google.protobuf.Duration workflowRunTimeout = this.startRequest.getWorkflowRunTimeout(); + + if (Durations.compare(attr.getScheduleToCloseTimeout(), Durations.ZERO) <= 0 + || Durations.compare(attr.getScheduleToCloseTimeout(), workflowRunTimeout) > 0) { + result.setScheduleToCloseTimeout(workflowRunTimeout); + } + + return result.build(); + } + + private void processRequestCancelNexusOperation( + RequestContext ctx, + RequestCancelNexusOperationCommandAttributes attr, + long workflowTaskCompletedId) { + long scheduleEventId = attr.getScheduledEventId(); + StateMachine operation = nexusOperations.get(scheduleEventId); + if (operation == null) { + throw Status.INVALID_ARGUMENT + .withDescription("Nexus operation not found for scheduleEventId=" + scheduleEventId) + .asRuntimeException(); + } + + State before = operation.getState(); + operation.action(Action.REQUEST_CANCELLATION, ctx, attr, workflowTaskCompletedId); + if (before == State.INITIATED) { + // request is null here, because it's caused not by a separate cancel request, but by a + // command + operation.action(Action.CANCEL, ctx, null, 0); + // nexusOperations.remove(scheduleEventId); // TODO(pj): server doesn't currently remove + ctx.setNeedWorkflowTask(true); + } + } + private void processRequestCancelExternalWorkflowExecution( RequestContext ctx, RequestCancelExternalWorkflowExecutionCommandAttributes attr, @@ -2006,6 +2048,132 @@ private void timeoutActivity(long scheduledEventId, TimeoutType timeoutType, int } } + @Override + public void startNexusTask(long scheduledEventId, RespondNexusTaskCompletedRequest request) { + update( + ctx -> { + StateMachine operation = getPendingNexusOperation(scheduledEventId); + operation.action(StateMachines.Action.START, ctx, request, 0); + operation.getData().identity = request.getIdentity(); + }); + } + + @Override + public void completeNexusTask(long scheduledEventId, RespondNexusTaskCompletedRequest request) { + update( + ctx -> { + StateMachine operation = getPendingNexusOperation(scheduledEventId); + throwIfOperationTokenDoesntMatch(request.getTaskToken(), operation.getData()); + if (request.getResponse().hasCancelOperation()) { + operation.action(Action.CANCEL, ctx, request, 0); + } else { + operation.action(StateMachines.Action.COMPLETE, ctx, request, 0); + } + // nexusOperations.remove(scheduledEventId); // TODO(pj): server currently does not delete + scheduleWorkflowTask(ctx); + ctx.unlockTimer("completeNexusTask"); + }); + } + + @Override + public void failNexusTask(long scheduledEventId, RespondNexusTaskFailedRequest request) { + update( + ctx -> { + StateMachine operation = getPendingNexusOperation(scheduledEventId); + throwIfOperationTokenDoesntMatch(request.getTaskToken(), operation.getData()); + operation.action(StateMachines.Action.FAIL, ctx, request, 0); + if (isTerminalState(operation.getState())) { + // nexusOperations.remove(scheduledEventId); // TODO(pj): server currently does not + // delete + scheduleWorkflowTask(ctx); + } else { + addNexusOperationRetryTimer(ctx, operation); + } + // Allow time skipping when waiting for retry + ctx.unlockTimer("failNexusTask"); + }); + } + + private void timeoutNexusOperation( + long scheduledEventId, TimeoutType timeoutType, int timeoutAttempt) { + boolean unlockTimer = true; + try { + update( + ctx -> { + StateMachine operation = getPendingNexusOperation(scheduledEventId); + int attempt = operation.getData().getAttempt(); + if (timeoutAttempt != attempt + || (operation.getState() != State.INITIATED + && operation.getState() != State.STARTED)) { + throw Status.NOT_FOUND.withDescription("Timer fired earlier").asRuntimeException(); + } + operation.action(StateMachines.Action.TIME_OUT, ctx, timeoutType, 0); + if (isTerminalState(operation.getState())) { + // nexusOperations.remove(scheduledEventId); // TODO(pj): server + // currently does not delete + scheduleWorkflowTask(ctx); + } else { + addNexusOperationRetryTimer(ctx, operation); + } + }); + } catch (StatusRuntimeException e) { + // NOT_FOUND is expected as timers are not removed + if (e.getStatus().getCode() != Status.Code.NOT_FOUND) { + log.error("Failure trying to add task for a Nexus operation retry", e); + } + unlockTimer = false; + } catch (Exception e) { + // Cannot fail to timer threads + log.error("Failure trying to timeout a Nexus operation", e); + } finally { + if (unlockTimer) { + timerService.unlockTimeSkipping("timeoutNexusOperation: " + scheduledEventId); + } + } + } + + private void addNexusOperationRetryTimer( + RequestContext ctx, StateMachine operation) { + NexusOperationData data = operation.getData(); + int attempt = data.getAttempt(); + Duration nextDelay = ProtobufTimeUtils.toJavaDuration(data.nextBackoffInterval); + data.nextAttemptScheduleTime = clock.getAsLong() + nextDelay.toMillis(); + ctx.addTimer( + nextDelay, + () -> { + // Timers are not removed, so skip if it is not for this attempt. + if (operation.getState() != State.INITIATED && data.getAttempt() != attempt) { + return; + } + + LockHandle lockHandle = + timerService.lockTimeSkipping( + "nexusOperationRetryTimer " + operation.getData().operationId); + boolean unlockTimer = false; + + try { + // TODO this lock is getting released somewhere on the operation completion. + // We should rework it to pass the lockHandle downstream and use it for the release. + update(ctx1 -> ctx1.addNexusTask(data.nexusTask)); + } catch (StatusRuntimeException e) { + // NOT_FOUND is expected as timers are not removed + if (e.getStatus().getCode() != Status.Code.NOT_FOUND) { + log.error("Failure trying to add task for a Nexus operation retry", e); + } + unlockTimer = true; + } catch (Exception e) { + log.error("Failure trying to add task for a Nexus operation retry", e); + unlockTimer = true; + } finally { + if (unlockTimer) { + // Allow time skipping when waiting for an operation retry + lockHandle.unlock("nexusOperationRetryTimer " + operation.getData().operationId); + } + } + }, + "Nexus Operation Retry"); + } + // TODO(maxim): Add workflow retry on run timeout private void timeoutWorkflow() { lock.lock(); @@ -2738,6 +2906,12 @@ private DescribeWorkflowExecutionResponse describeWorkflowExecutionInsideLock() .map(TestWorkflowMutableStateImpl::constructPendingActivityInfo) .collect(Collectors.toList()); + List pendingNexusOperations = + this.nexusOperations.values().stream() + .filter(sm -> !isTerminalState(sm.getState())) + .map(TestWorkflowMutableStateImpl::constructPendingNexusOperationInfo) + .collect(Collectors.toList()); + List pendingChildren = this.childWorkflows.values().stream() .filter(sm -> !isTerminalState(sm.getState())) @@ -2748,6 +2922,7 @@ private DescribeWorkflowExecutionResponse describeWorkflowExecutionInsideLock() .setExecutionConfig(executionConfig) .setWorkflowExecutionInfo(executionInfo) .addAllPendingActivities(pendingActivities) + .addAllPendingNexusOperations(pendingNexusOperations) .addAllPendingChildren(pendingChildren) .build(); } @@ -2855,6 +3030,45 @@ private static void populatePendingActivityInfoFromRetryData( builder.setMaximumAttempts(retryPolicy.getMaximumAttempts()); } + private static PendingNexusOperationInfo constructPendingNexusOperationInfo( + StateMachine sm) { + NexusOperationData data = sm.getData(); + PendingNexusOperationInfo.Builder builder = + PendingNexusOperationInfo.newBuilder() + .setEndpoint(data.scheduledEvent.getEndpoint()) + .setService(data.scheduledEvent.getService()) + .setOperation(data.scheduledEvent.getOperation()) + .setOperationId(data.operationId) + .setScheduledEventId(data.scheduledEventId) + .setScheduleToCloseTimeout(data.scheduledEvent.getScheduleToCloseTimeout()) + .setState(convertNexusOperationState(sm.getState(), data)) + .setAttempt(data.getAttempt()) + .setLastAttemptCompleteTime(Timestamps.fromMillis(data.lastAttemptCompleteTime)) + .setNextAttemptScheduleTime(Timestamps.fromMillis(data.nextAttemptScheduleTime)); + + data.retryState.getPreviousRunFailure().ifPresent(builder::setLastAttemptFailure); + + // TODO(pj): support cancellation info + + return builder.build(); + } + + private static PendingNexusOperationState convertNexusOperationState( + State state, NexusOperationData data) { + // Terminal states have already been filtered out, so only handle pending states. + if (data.getAttempt() > 1) { + return PendingNexusOperationState.PENDING_NEXUS_OPERATION_STATE_BACKING_OFF; + } + switch (state) { + case INITIATED: + return PendingNexusOperationState.PENDING_NEXUS_OPERATION_STATE_SCHEDULED; + case STARTED: + return PendingNexusOperationState.PENDING_NEXUS_OPERATION_STATE_STARTED; + default: + return PendingNexusOperationState.PENDING_NEXUS_OPERATION_STATE_UNSPECIFIED; + } + } + private static void populateWorkflowExecutionInfoFromHistory( WorkflowExecutionInfo.Builder executionInfo, List fullHistory) { getStartEvent(fullHistory) @@ -2980,6 +3194,16 @@ private StateMachine getPendingActivityByScheduledEventId( return activity; } + private StateMachine getPendingNexusOperation(long scheduledEventId) { + StateMachine operation = nexusOperations.get(scheduledEventId); + if (operation == null) { + throw Status.NOT_FOUND + .withDescription("unknown Nexus operation with scheduledEventId: " + scheduledEventId) + .asRuntimeException(); + } + return operation; + } + private StateMachine getChildWorkflow(long initiatedEventId) { StateMachine child = childWorkflows.get(initiatedEventId); if (child == null) { @@ -3019,6 +3243,19 @@ private void throwIfTaskTokenDoesntMatch(ByteString taskToken, ActivityTaskData } } + private void throwIfOperationTokenDoesntMatch(ByteString taskToken, NexusOperationData data) { + if (!taskToken.isEmpty()) { + NexusTaskToken deserialized = NexusTaskToken.fromBytes(taskToken); + if (deserialized.getAttempt() != data.getAttempt() + || deserialized.getScheduledEventId() != data.scheduledEventId) { + throw Status.NOT_FOUND + .withDescription( + "invalid Nexus operationId or operation already timed out or workflow is completed") + .asRuntimeException(); + } + } + } + private boolean isTerminalState(State workflowState) { return workflowState == State.COMPLETED || workflowState == State.TIMED_OUT diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowService.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowService.java index 710c4f1e31..5271299a8e 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowService.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowService.java @@ -80,6 +80,7 @@ public final class TestWorkflowService extends WorkflowServiceGrpc.WorkflowServi private final TestWorkflowStore store; private final TestVisibilityStore visibilityStore; + private final TestNexusEndpointStore nexusEndpointStore; private final SelfAdvancingTimer selfAdvancingTimer; private final ScheduledExecutorService backgroundScheduler = @@ -92,9 +93,11 @@ public final class TestWorkflowService extends WorkflowServiceGrpc.WorkflowServi TestWorkflowService( TestWorkflowStore store, TestVisibilityStore visibilityStore, + TestNexusEndpointStore nexusEndpointStore, SelfAdvancingTimer selfAdvancingTimer) { this.store = store; this.visibilityStore = visibilityStore; + this.nexusEndpointStore = nexusEndpointStore; this.selfAdvancingTimer = selfAdvancingTimer; this.outOfProcessServer = null; this.inProcessServer = null; @@ -355,6 +358,7 @@ private StartWorkflowExecutionResponse startWorkflowExecutionNoRunningCheckLocke this, store, visibilityStore, + nexusEndpointStore, selfAdvancingTimer); WorkflowExecution execution = mutableState.getExecutionId().getExecution(); ExecutionId executionId = new ExecutionId(namespace, execution); @@ -740,6 +744,70 @@ public void respondActivityTaskCanceledById( } } + @Override + public void pollNexusTaskQueue( + PollNexusTaskQueueRequest request, + StreamObserver responseObserver) { + try (Context.CancellableContext ctx = deadlineCtx(getLongPollDeadline())) { + + PollNexusTaskQueueResponse.Builder task; + try { + task = pollTaskQueue(ctx, store.pollNexusTaskQueue(request)); + } catch (ExecutionException e) { + responseObserver.onError(e); + return; + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + responseObserver.onNext(PollNexusTaskQueueResponse.getDefaultInstance()); + responseObserver.onCompleted(); + return; + } catch (CancellationException e) { + responseObserver.onNext(PollNexusTaskQueueResponse.getDefaultInstance()); + responseObserver.onCompleted(); + return; + } + + responseObserver.onNext(task.build()); + responseObserver.onCompleted(); + } + } + + @Override + public void respondNexusTaskCompleted( + RespondNexusTaskCompletedRequest request, + StreamObserver responseObserver) { + try { + NexusTaskToken taskToken = NexusTaskToken.fromBytes(request.getTaskToken()); + TestWorkflowMutableState mutableState = getMutableState(taskToken.getExecutionId()); + if (request.getResponse().hasStartOperation() + && request.getResponse().getStartOperation().hasAsyncSuccess()) { + // Start event is only recorded for async success + mutableState.startNexusTask(taskToken.getScheduledEventId(), request); + } else { + mutableState.completeNexusTask(taskToken.getScheduledEventId(), request); + } + responseObserver.onNext(RespondNexusTaskCompletedResponse.getDefaultInstance()); + responseObserver.onCompleted(); + } catch (StatusRuntimeException e) { + handleStatusRuntimeException(e, responseObserver); + } + } + + @Override + public void respondNexusTaskFailed( + RespondNexusTaskFailedRequest request, + StreamObserver responseObserver) { + try { + NexusTaskToken taskToken = NexusTaskToken.fromBytes(request.getTaskToken()); + TestWorkflowMutableState mutableState = getMutableState(taskToken.getExecutionId()); + mutableState.failNexusTask(taskToken.getScheduledEventId(), request); + responseObserver.onNext(RespondNexusTaskFailedResponse.getDefaultInstance()); + responseObserver.onCompleted(); + } catch (StatusRuntimeException e) { + handleStatusRuntimeException(e, responseObserver); + } + } + @Override public void requestCancelWorkflowExecution( RequestCancelWorkflowExecutionRequest cancelRequest, @@ -1329,6 +1397,7 @@ private TestWorkflowService(long initialTimeMillis, boolean startInProcessServer new SelfAdvancingTimerImpl(initialTimeMillis, Clock.systemDefaultZone()); store = new TestWorkflowStoreImpl(this.selfAdvancingTimer); visibilityStore = new TestVisibilityStoreImpl(); + nexusEndpointStore = new TestNexusEndpointStoreImpl(); outOfProcessServer = null; if (startInProcessServer) { this.inProcessServer = new InProcessGRPCServer(Collections.singletonList(this)); @@ -1366,6 +1435,7 @@ private TestWorkflowService(boolean isOutOfProc, int port) { this.selfAdvancingTimer = new SelfAdvancingTimerImpl(0, Clock.systemDefaultZone()); store = new TestWorkflowStoreImpl(selfAdvancingTimer); visibilityStore = new TestVisibilityStoreImpl(); + nexusEndpointStore = new TestNexusEndpointStoreImpl(); try { ServerBuilder serverBuilder = Grpc.newServerBuilderForPort(port, InsecureServerCredentials.create()); diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowStore.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowStore.java index c6eb5d807f..72bdab863f 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowStore.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowStore.java @@ -23,12 +23,7 @@ import com.google.protobuf.Timestamp; import io.grpc.Deadline; import io.temporal.api.workflow.v1.WorkflowExecutionInfo; -import io.temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryRequest; -import io.temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryResponse; -import io.temporal.api.workflowservice.v1.PollActivityTaskQueueRequest; -import io.temporal.api.workflowservice.v1.PollActivityTaskQueueResponse; -import io.temporal.api.workflowservice.v1.PollWorkflowTaskQueueRequest; -import io.temporal.api.workflowservice.v1.PollWorkflowTaskQueueResponse; +import io.temporal.api.workflowservice.v1.*; import java.time.Duration; import java.util.List; import java.util.Objects; @@ -136,6 +131,24 @@ public PollActivityTaskQueueResponse.Builder getTask() { } } + class NexusTask { + private final TaskQueueId taskQueueId; + private final PollNexusTaskQueueResponse.Builder task; + + public NexusTask(TaskQueueId taskQueueId, PollNexusTaskQueueResponse.Builder task) { + this.taskQueueId = taskQueueId; + this.task = task; + } + + public TaskQueueId getTaskQueueId() { + return taskQueueId; + } + + public PollNexusTaskQueueResponse.Builder getTask() { + return task; + } + } + Timestamp currentTime(); long save(RequestContext requestContext); @@ -156,6 +169,9 @@ Future pollWorkflowTaskQueue( Future pollActivityTaskQueue( PollActivityTaskQueueRequest pollRequest); + Future pollNexusTaskQueue( + PollNexusTaskQueueRequest pollRequest); + void sendQueryTask( ExecutionId executionId, TaskQueueId taskQueue, PollWorkflowTaskQueueResponse.Builder task); diff --git a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowStoreImpl.java b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowStoreImpl.java index d2dbf28078..ab8b9d591d 100644 --- a/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowStoreImpl.java +++ b/temporal-test-server/src/main/java/io/temporal/internal/testservice/TestWorkflowStoreImpl.java @@ -34,12 +34,7 @@ import io.temporal.api.history.v1.HistoryEvent; import io.temporal.api.taskqueue.v1.StickyExecutionAttributes; import io.temporal.api.workflow.v1.WorkflowExecutionInfo; -import io.temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryRequest; -import io.temporal.api.workflowservice.v1.GetWorkflowExecutionHistoryResponse; -import io.temporal.api.workflowservice.v1.PollActivityTaskQueueRequest; -import io.temporal.api.workflowservice.v1.PollActivityTaskQueueResponse; -import io.temporal.api.workflowservice.v1.PollWorkflowTaskQueueRequest; -import io.temporal.api.workflowservice.v1.PollWorkflowTaskQueueResponse; +import io.temporal.api.workflowservice.v1.*; import io.temporal.common.WorkflowExecutionHistory; import io.temporal.failure.ApplicationFailure; import io.temporal.internal.common.WorkflowExecutionUtils; @@ -71,6 +66,8 @@ class TestWorkflowStoreImpl implements TestWorkflowStore { activityTaskQueues = new HashMap<>(); private final Map> workflowTaskQueues = new HashMap<>(); + private final Map> nexusTaskQueues = + new HashMap<>(); private final SelfAdvancingTimer selfAdvancingTimer; private static class HistoryStore { @@ -233,6 +230,15 @@ public long save(RequestContext ctx) { } } + List nexusTasks = ctx.getNexusTasks(); + if (nexusTasks != null) { + for (NexusTask nexusTask : nexusTasks) { + TaskQueue nexusTaskQueue = + getNexusTaskQueueQueue(nexusTask.getTaskQueueId()); + nexusTaskQueue.add(nexusTask.getTask()); + } + } + List timers = ctx.getTimers(); if (timers != null) { for (Timer t : timers) { @@ -304,6 +310,22 @@ private TaskQueue getWorkflowTaskQueueQue } } + private TaskQueue getNexusTaskQueueQueue( + TaskQueueId taskQueueId) { + lock.lock(); + try { + TaskQueue nexusTaskQueue = + nexusTaskQueues.get(taskQueueId); + if (nexusTaskQueue == null) { + nexusTaskQueue = new TaskQueue<>(); + nexusTaskQueues.put(taskQueueId, nexusTaskQueue); + } + return nexusTaskQueue; + } finally { + lock.unlock(); + } + } + @Override public Future pollWorkflowTaskQueue( PollWorkflowTaskQueueRequest pollRequest) { @@ -320,6 +342,14 @@ public Future pollActivityTaskQueue( return getActivityTaskQueueQueue(taskQueueId).poll(); } + @Override + public Future pollNexusTaskQueue( + PollNexusTaskQueueRequest pollRequest) { + final TaskQueueId taskQueueId = + new TaskQueueId(pollRequest.getNamespace(), pollRequest.getTaskQueue().getName()); + return getNexusTaskQueueQueue(taskQueueId).poll(); + } + @Override public void sendQueryTask( ExecutionId executionId, TaskQueueId taskQueue, PollWorkflowTaskQueueResponse.Builder task) { diff --git a/temporal-test-server/src/test/java/io/temporal/testserver/functional/NexusEndpointTest.java b/temporal-test-server/src/test/java/io/temporal/testserver/functional/NexusEndpointTest.java index 0a80308e8a..1af4ad3463 100644 --- a/temporal-test-server/src/test/java/io/temporal/testserver/functional/NexusEndpointTest.java +++ b/temporal-test-server/src/test/java/io/temporal/testserver/functional/NexusEndpointTest.java @@ -56,7 +56,7 @@ public void checkExternal() { @Test public void testValidateEndpointSpec() { // Create and Update use same validation logic, so just test once - EndpointSpec.Builder specBuilder = getTestEndpointSpecBuilder("valid_name_01"); + EndpointSpec.Builder specBuilder = getTestEndpointSpecBuilder("valid-name-01"); // Valid Endpoint testEndpoint = createTestEndpoint(specBuilder); @@ -77,11 +77,11 @@ public void testValidateEndpointSpec() { assertEquals( "Nexus endpoint name (" + specBuilder.getName() - + ") does not match expected pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$", + + ") does not match expected pattern: ^[a-zA-Z][a-zA-Z0-9\\-]*[a-zA-Z0-9]$", ex.getStatus().getDescription()); // Missing target - specBuilder.setName("valid_name_02"); + specBuilder.setName("valid-name-02"); specBuilder.clearTarget(); ex = assertThrows(StatusRuntimeException.class, () -> createTestEndpoint(specBuilder)); assertEquals(Status.Code.INVALID_ARGUMENT, ex.getStatus().getCode()); @@ -100,7 +100,7 @@ public void testValidateEndpointSpec() { @Test public void testCreate() { - EndpointSpec.Builder specBuilder = getTestEndpointSpecBuilder("valid_create_test_endpoint"); + EndpointSpec.Builder specBuilder = getTestEndpointSpecBuilder("valid-create-test-endpoint"); // Valid create Endpoint testEndpoint = createTestEndpoint(specBuilder); @@ -119,7 +119,7 @@ public void testCreate() { @Test public void testUpdate() { // Setup - Endpoint testEndpoint = createTestEndpoint(getTestEndpointSpecBuilder("update_test_endpoint")); + Endpoint testEndpoint = createTestEndpoint(getTestEndpointSpecBuilder("update-test-endpoint")); assertEquals(1, testEndpoint.getVersion()); EndpointSpec updatedSpec = EndpointSpec.newBuilder(testEndpoint.getSpec()) @@ -172,7 +172,7 @@ public void testUpdate() { ex.getStatus().getDescription()); // Updated name already registered - EndpointSpec.Builder otherSpec = getTestEndpointSpecBuilder("other_test_endpoint"); + EndpointSpec.Builder otherSpec = getTestEndpointSpecBuilder("other-test-endpoint"); createTestEndpoint(otherSpec); ex = assertThrows( @@ -214,7 +214,7 @@ public void testUpdate() { @Test public void testDelete() { // Setup - Endpoint testEndpoint = createTestEndpoint(getTestEndpointSpecBuilder("delete_test_endpoint")); + Endpoint testEndpoint = createTestEndpoint(getTestEndpointSpecBuilder("delete-test-endpoint")); assertEquals(1, testEndpoint.getVersion()); // Not found @@ -276,7 +276,7 @@ public void testDelete() { @Test public void testGet() { // Setup - Endpoint testEndpoint = createTestEndpoint(getTestEndpointSpecBuilder("get_test_endpoint")); + Endpoint testEndpoint = createTestEndpoint(getTestEndpointSpecBuilder("get-test-endpoint")); assertEquals(1, testEndpoint.getVersion()); // Not found @@ -311,7 +311,7 @@ public void testList() { // Setup List testEndpoints = new ArrayList<>(3); for (int i = 0; i < 3; i++) { - testEndpoints.add(createTestEndpoint(getTestEndpointSpecBuilder("list_test_endpoint_" + i))); + testEndpoints.add(createTestEndpoint(getTestEndpointSpecBuilder("list-test-endpoint-" + i))); } testEndpoints.sort(Comparator.comparing(Endpoint::getId)); @@ -322,7 +322,7 @@ public void testList() { .getOperatorServiceStubs() .blockingStub() .listNexusEndpoints( - ListNexusEndpointsRequest.newBuilder().setName("some_missing_name").build()); + ListNexusEndpointsRequest.newBuilder().setName("some-missing-name").build()); assertEquals(0, resp.getEndpointsCount()); // List with filter for existing name diff --git a/temporal-test-server/src/test/java/io/temporal/testserver/functional/NexusWorkflowTest.java b/temporal-test-server/src/test/java/io/temporal/testserver/functional/NexusWorkflowTest.java new file mode 100644 index 0000000000..90d83d5ec1 --- /dev/null +++ b/temporal-test-server/src/test/java/io/temporal/testserver/functional/NexusWorkflowTest.java @@ -0,0 +1,507 @@ +/* + * Copyright (C) 2022 Temporal Technologies, Inc. All Rights Reserved. + * + * Copyright (C) 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Modifications copyright (C) 2017 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this material except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.temporal.testserver.functional; + +import static org.junit.Assume.assumeFalse; + +import com.google.protobuf.ByteString; +import com.google.protobuf.util.Durations; +import io.temporal.api.command.v1.Command; +import io.temporal.api.command.v1.CompleteWorkflowExecutionCommandAttributes; +import io.temporal.api.command.v1.RequestCancelNexusOperationCommandAttributes; +import io.temporal.api.command.v1.ScheduleNexusOperationCommandAttributes; +import io.temporal.api.common.v1.Payload; +import io.temporal.api.common.v1.Payloads; +import io.temporal.api.common.v1.WorkflowExecution; +import io.temporal.api.enums.v1.CommandType; +import io.temporal.api.enums.v1.EventType; +import io.temporal.api.enums.v1.TaskQueueKind; +import io.temporal.api.history.v1.HistoryEvent; +import io.temporal.api.nexus.v1.*; +import io.temporal.api.operatorservice.v1.CreateNexusEndpointRequest; +import io.temporal.api.taskqueue.v1.TaskQueue; +import io.temporal.api.workflowservice.v1.*; +import io.temporal.client.WorkflowOptions; +import io.temporal.client.WorkflowStub; +import io.temporal.testing.internal.SDKTestWorkflowRule; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +public class NexusWorkflowTest { + @Rule + public SDKTestWorkflowRule testWorkflowRule = + SDKTestWorkflowRule.newBuilder().setDoNotStart(true).build(); + + @Before + public void checkExternal() { + // TODO: remove this skip once 1.25.0 is officially released and + // https://github.com/temporalio/sdk-java/issues/2165 is resolved + assumeFalse( + "Nexus APIs are not supported for server versions < 1.25.0", + testWorkflowRule.isUseExternalService()); + } + + @Test + public void testNexusOperationSyncCompletion() { + Endpoint testEndpoint = createEndpoint("test-sync-completion-endpoint"); + CompletableFuture nexusPoller = CompletableFuture.runAsync(pollAndCompleteNexusTask()); + + try { + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + WorkflowStub stub = + testWorkflowRule + .getWorkflowClient() + .newUntypedWorkflowStub("TestNexusOperationSyncCompletionWorkflow", options); + WorkflowExecution execution = stub.start(); + + // Get first WFT and respond with ScheduleNexusOperation command + PollWorkflowTaskQueueResponse pollResp = + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .pollWorkflowTaskQueue( + PollWorkflowTaskQueueRequest.newBuilder() + .setNamespace(testWorkflowRule.getTestEnvironment().getNamespace()) + .setTaskQueue( + TaskQueue.newBuilder() + .setName(testWorkflowRule.getTaskQueue()) + .setKind(TaskQueueKind.TASK_QUEUE_KIND_NORMAL)) + .setIdentity("test") + .build()); + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .respondWorkflowTaskCompleted( + RespondWorkflowTaskCompletedRequest.newBuilder() + .setIdentity("test") + .setTaskToken(pollResp.getTaskToken()) + .addCommands( + Command.newBuilder() + .setCommandType(CommandType.COMMAND_TYPE_SCHEDULE_NEXUS_OPERATION) + .setScheduleNexusOperationCommandAttributes( + ScheduleNexusOperationCommandAttributes.newBuilder() + .setEndpoint(testEndpoint.getSpec().getName()) + .setService("service") + .setOperation("operation") + .setInput( + Payload.newBuilder() + .setData(ByteString.copyFromUtf8("input")))) + .build()) + .build()); + + // Wait for Nexus operation result to be recorded + nexusPoller.get(1, TimeUnit.SECONDS); + + pollResp = + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .pollWorkflowTaskQueue( + PollWorkflowTaskQueueRequest.newBuilder() + .setNamespace(testWorkflowRule.getTestEnvironment().getNamespace()) + .setTaskQueue( + TaskQueue.newBuilder() + .setName(testWorkflowRule.getTaskQueue()) + .setKind(TaskQueueKind.TASK_QUEUE_KIND_NORMAL)) + .setIdentity("test") + .build()); + Assert.assertTrue( + pollResp.getHistory().getEventsList().stream() + .anyMatch( + event -> event.getEventType() == EventType.EVENT_TYPE_NEXUS_OPERATION_COMPLETED)); + List events = + testWorkflowRule.getHistoryEvents( + execution.getWorkflowId(), EventType.EVENT_TYPE_NEXUS_OPERATION_COMPLETED); + Assert.assertEquals(1, events.size()); + HistoryEvent completedEvent = events.get(0); + + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .respondWorkflowTaskCompleted( + RespondWorkflowTaskCompletedRequest.newBuilder() + .setIdentity("test") + .setTaskToken(pollResp.getTaskToken()) + .addCommands( + Command.newBuilder() + .setCommandType(CommandType.COMMAND_TYPE_COMPLETE_WORKFLOW_EXECUTION) + .setCompleteWorkflowExecutionCommandAttributes( + CompleteWorkflowExecutionCommandAttributes.newBuilder() + .setResult( + Payloads.newBuilder() + .addPayloads( + completedEvent + .getNexusOperationCompletedEventAttributes() + .getResult())))) + .build()); + + String result = stub.getResult(String.class); + Assert.assertEquals(result, "input"); + } catch (Exception e) { + System.out.println(e.getMessage()); + } finally { + nexusPoller.cancel(true); + } + } + + @Test + public void testNexusOperationCancelBeforeStart() { + Endpoint testEndpoint = createEndpoint("test-sync-cancel-before-start-endpoint"); + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + WorkflowStub stub = + testWorkflowRule + .getWorkflowClient() + .newUntypedWorkflowStub("TestNexusOperationSyncCompletionWorkflow", options); + WorkflowExecution execution = stub.start(); + + // Get first WFT and respond with ScheduleNexusOperation command + PollWorkflowTaskQueueResponse pollResp = + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .pollWorkflowTaskQueue( + PollWorkflowTaskQueueRequest.newBuilder() + .setNamespace(testWorkflowRule.getTestEnvironment().getNamespace()) + .setTaskQueue( + TaskQueue.newBuilder() + .setName(testWorkflowRule.getTaskQueue()) + .setKind(TaskQueueKind.TASK_QUEUE_KIND_NORMAL)) + .setIdentity("test") + .build()); + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .respondWorkflowTaskCompleted( + RespondWorkflowTaskCompletedRequest.newBuilder() + .setIdentity("test") + .setTaskToken(pollResp.getTaskToken()) + .setForceCreateNewWorkflowTask(true) + .addCommands( + Command.newBuilder() + .setCommandType(CommandType.COMMAND_TYPE_SCHEDULE_NEXUS_OPERATION) + .setScheduleNexusOperationCommandAttributes( + ScheduleNexusOperationCommandAttributes.newBuilder() + .setEndpoint(testEndpoint.getSpec().getName()) + .setService("service") + .setOperation("operation") + .setInput( + Payload.newBuilder().setData(ByteString.copyFromUtf8("input")))) + .build()) + .build()); + + // Poll for new WFT and respond with RequestCancelNexusOperation command + pollResp = + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .pollWorkflowTaskQueue( + PollWorkflowTaskQueueRequest.newBuilder() + .setNamespace(testWorkflowRule.getTestEnvironment().getNamespace()) + .setTaskQueue( + TaskQueue.newBuilder() + .setName(testWorkflowRule.getTaskQueue()) + .setKind(TaskQueueKind.TASK_QUEUE_KIND_NORMAL)) + .setIdentity("test") + .build()); + + List events = + testWorkflowRule.getHistoryEvents( + execution.getWorkflowId(), EventType.EVENT_TYPE_NEXUS_OPERATION_SCHEDULED); + Assert.assertEquals(1, events.size()); + HistoryEvent scheduledEvent = events.get(0); + + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .respondWorkflowTaskCompleted( + RespondWorkflowTaskCompletedRequest.newBuilder() + .setIdentity("test") + .setTaskToken(pollResp.getTaskToken()) + .setForceCreateNewWorkflowTask(true) + .addCommands( + Command.newBuilder() + .setCommandType(CommandType.COMMAND_TYPE_REQUEST_CANCEL_NEXUS_OPERATION) + .setRequestCancelNexusOperationCommandAttributes( + RequestCancelNexusOperationCommandAttributes.newBuilder() + .setScheduledEventId(scheduledEvent.getEventId())) + .build()) + .build()); + + events = + testWorkflowRule.getHistoryEvents( + execution.getWorkflowId(), EventType.EVENT_TYPE_NEXUS_OPERATION_CANCEL_REQUESTED); + Assert.assertEquals(1, events.size()); + events = + testWorkflowRule.getHistoryEvents( + execution.getWorkflowId(), EventType.EVENT_TYPE_NEXUS_OPERATION_CANCELED); + Assert.assertEquals(1, events.size()); + } + + @Test + public void testNexusOperationTimeout() { + Endpoint testEndpoint = createEndpoint("test-timeout-endpoint"); + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + WorkflowStub stub = + testWorkflowRule + .getWorkflowClient() + .newUntypedWorkflowStub("TestNexusOperationSyncCompletionWorkflow", options); + WorkflowExecution execution = stub.start(); + + // Get first WFT and respond with ScheduleNexusOperation command + PollWorkflowTaskQueueResponse pollResp = + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .pollWorkflowTaskQueue( + PollWorkflowTaskQueueRequest.newBuilder() + .setNamespace(testWorkflowRule.getTestEnvironment().getNamespace()) + .setTaskQueue( + TaskQueue.newBuilder() + .setName(testWorkflowRule.getTaskQueue()) + .setKind(TaskQueueKind.TASK_QUEUE_KIND_NORMAL)) + .setIdentity("test") + .build()); + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .respondWorkflowTaskCompleted( + RespondWorkflowTaskCompletedRequest.newBuilder() + .setIdentity("test") + .setTaskToken(pollResp.getTaskToken()) + .addCommands( + Command.newBuilder() + .setCommandType(CommandType.COMMAND_TYPE_SCHEDULE_NEXUS_OPERATION) + .setScheduleNexusOperationCommandAttributes( + ScheduleNexusOperationCommandAttributes.newBuilder() + .setScheduleToCloseTimeout(Durations.fromSeconds(1)) + .setEndpoint(testEndpoint.getSpec().getName()) + .setService("service") + .setOperation("operation") + .setInput( + Payload.newBuilder().setData(ByteString.copyFromUtf8("input")))) + .build()) + .build()); + + List events = + testWorkflowRule.getHistoryEvents( + execution.getWorkflowId(), EventType.EVENT_TYPE_NEXUS_OPERATION_SCHEDULED); + Assert.assertEquals(1, events.size()); + + // Poll to wait for new task after operation times out + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .pollWorkflowTaskQueue( + PollWorkflowTaskQueueRequest.newBuilder() + .setNamespace(testWorkflowRule.getTestEnvironment().getNamespace()) + .setTaskQueue( + TaskQueue.newBuilder() + .setName(testWorkflowRule.getTaskQueue()) + .setKind(TaskQueueKind.TASK_QUEUE_KIND_NORMAL)) + .setIdentity("test") + .build()); + + testWorkflowRule.assertHistoryEvent( + execution.getWorkflowId(), EventType.EVENT_TYPE_NEXUS_OPERATION_TIMED_OUT); + } + + @Test + public void testRespondNexusTaskFailed() { + Endpoint testEndpoint = createEndpoint("test-respond-failed-endpoint"); + CompletableFuture nexusPoller = CompletableFuture.runAsync(pollAndFailNexusTask()); + + try { + WorkflowOptions options = + WorkflowOptions.newBuilder().setTaskQueue(testWorkflowRule.getTaskQueue()).build(); + WorkflowStub stub = + testWorkflowRule + .getWorkflowClient() + .newUntypedWorkflowStub("TestNexusOperationSyncCompletionWorkflow", options); + WorkflowExecution execution = stub.start(); + + // Get first WFT and respond with ScheduleNexusOperation command + PollWorkflowTaskQueueResponse pollResp = + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .pollWorkflowTaskQueue( + PollWorkflowTaskQueueRequest.newBuilder() + .setNamespace(testWorkflowRule.getTestEnvironment().getNamespace()) + .setTaskQueue( + TaskQueue.newBuilder() + .setName(testWorkflowRule.getTaskQueue()) + .setKind(TaskQueueKind.TASK_QUEUE_KIND_NORMAL)) + .setIdentity("test") + .build()); + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .respondWorkflowTaskCompleted( + RespondWorkflowTaskCompletedRequest.newBuilder() + .setIdentity("test") + .setTaskToken(pollResp.getTaskToken()) + .addCommands( + Command.newBuilder() + .setCommandType(CommandType.COMMAND_TYPE_SCHEDULE_NEXUS_OPERATION) + .setScheduleNexusOperationCommandAttributes( + ScheduleNexusOperationCommandAttributes.newBuilder() + .setScheduleToCloseTimeout(Durations.fromSeconds(1)) + .setEndpoint(testEndpoint.getSpec().getName()) + .setService("service") + .setOperation("operation") + .setInput( + Payload.newBuilder() + .setData(ByteString.copyFromUtf8("input")))) + .build()) + .build()); + + // Wait for Nexus operation error to be recorded + nexusPoller.get(1, TimeUnit.SECONDS); + + testWorkflowRule.assertHistoryEvent( + execution.getWorkflowId(), EventType.EVENT_TYPE_NEXUS_OPERATION_FAILED); + } catch (Exception e) { + System.out.println(e.getMessage()); + } finally { + nexusPoller.cancel(true); + } + } + + private Runnable pollAndCompleteNexusTask() { + return () -> { + PollNexusTaskQueueResponse pollResp = + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .pollNexusTaskQueue( + PollNexusTaskQueueRequest.newBuilder() + .setIdentity(UUID.randomUUID().toString()) + .setNamespace(testWorkflowRule.getTestEnvironment().getNamespace()) + .setTaskQueue( + TaskQueue.newBuilder() + .setName(testWorkflowRule.getTaskQueue()) + .setKind(TaskQueueKind.TASK_QUEUE_KIND_NORMAL)) + .build()); + + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .respondNexusTaskCompleted( + RespondNexusTaskCompletedRequest.newBuilder() + .setIdentity(UUID.randomUUID().toString()) + .setNamespace(testWorkflowRule.getTestEnvironment().getNamespace()) + .setTaskToken(pollResp.getTaskToken()) + .setResponse( + Response.newBuilder() + .setStartOperation( + StartOperationResponse.newBuilder() + .setSyncSuccess( + StartOperationResponse.Sync.newBuilder() + .setPayload( + pollResp + .getRequest() + .getStartOperation() + .getPayload())))) + .build()); + }; + } + + private Runnable pollAndFailNexusTask() { + return () -> { + PollNexusTaskQueueResponse pollResp = + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .pollNexusTaskQueue( + PollNexusTaskQueueRequest.newBuilder() + .setIdentity(UUID.randomUUID().toString()) + .setNamespace(testWorkflowRule.getTestEnvironment().getNamespace()) + .setTaskQueue( + TaskQueue.newBuilder() + .setName(testWorkflowRule.getTaskQueue()) + .setKind(TaskQueueKind.TASK_QUEUE_KIND_NORMAL)) + .build()); + + testWorkflowRule + .getWorkflowClient() + .getWorkflowServiceStubs() + .blockingStub() + .respondNexusTaskFailed( + RespondNexusTaskFailedRequest.newBuilder() + .setIdentity(UUID.randomUUID().toString()) + .setNamespace(testWorkflowRule.getTestEnvironment().getNamespace()) + .setTaskToken(pollResp.getTaskToken()) + .setError( + HandlerError.newBuilder() + .setErrorType("BAD_REQUEST") + .setFailure(Failure.newBuilder().setMessage("deliberate error"))) + .build()); + }; + } + + private Endpoint createEndpoint(String name) { + return testWorkflowRule + .getTestEnvironment() + .getOperatorServiceStubs() + .blockingStub() + .createNexusEndpoint( + CreateNexusEndpointRequest.newBuilder() + .setSpec( + EndpointSpec.newBuilder() + .setName(name) + .setDescription( + Payload.newBuilder().setData(ByteString.copyFromUtf8("test endpoint"))) + .setTarget( + EndpointTarget.newBuilder() + .setWorker( + EndpointTarget.Worker.newBuilder() + .setNamespace( + testWorkflowRule.getTestEnvironment().getNamespace()) + .setTaskQueue(testWorkflowRule.getTaskQueue())))) + .build()) + .getEndpoint(); + } +} From b9eeda0df25f9ad4da16491470df4be404bfc421 Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Thu, 15 Aug 2024 13:00:43 -0700 Subject: [PATCH 21/25] Improve test time and flaky schedule test (#2183) Try running CI on 16 core runners --- .github/workflows/ci.yml | 4 ++-- .github/workflows/coverage.yml | 2 +- .../temporal/client/schedules/ScheduleTest.java | 15 ++++++++------- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b531cc7431..c2c7fbf626 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,7 +8,7 @@ on: jobs: unit_test_edge: name: Unit test with in-memory test service [Edge] - runs-on: ubuntu-latest + runs-on: ubuntu-latest-16-cores timeout-minutes: 30 steps: - name: Checkout repo @@ -47,7 +47,7 @@ jobs: unit_test_jdk8: name: Unit test with docker service [JDK8] - runs-on: ubuntu-latest + runs-on: ubuntu-latest-16-cores timeout-minutes: 30 steps: - name: Checkout repo diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 499f1f235d..aecfa5b62d 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -6,7 +6,7 @@ on: jobs: code-coverage: - runs-on: ubuntu-latest + runs-on: ubuntu-latest-16-cores steps: - name: Checkout uses: actions/checkout@v4 diff --git a/temporal-sdk/src/test/java/io/temporal/client/schedules/ScheduleTest.java b/temporal-sdk/src/test/java/io/temporal/client/schedules/ScheduleTest.java index 1c37c579a0..6eb2067e36 100644 --- a/temporal-sdk/src/test/java/io/temporal/client/schedules/ScheduleTest.java +++ b/temporal-sdk/src/test/java/io/temporal/client/schedules/ScheduleTest.java @@ -260,14 +260,15 @@ public void triggerScheduleNoPolicy() { @Test(timeout = 30000) public void backfillSchedules() { - // assumeTrue("skipping for test server", SDKTestWorkflowRule.useExternalService); - Instant now = Instant.now(); + Instant backfillTime = Instant.ofEpochSecond(100000); ScheduleClient client = createScheduleClient(); // Create schedule ScheduleOptions options = ScheduleOptions.newBuilder() .setBackfills( - Arrays.asList(new ScheduleBackfill(now.minusMillis(20500), now.minusMillis(10000)))) + Arrays.asList( + new ScheduleBackfill( + backfillTime.minusMillis(20500), backfillTime.minusMillis(10000)))) .build(); String scheduleId = UUID.randomUUID().toString(); Schedule schedule = @@ -283,8 +284,8 @@ public void backfillSchedules() { handle.backfill( Arrays.asList( - new ScheduleBackfill(now.minusMillis(5500), now.minusMillis(2500)), - new ScheduleBackfill(now.minusMillis(2500), now))); + new ScheduleBackfill(backfillTime.minusMillis(5500), backfillTime.minusMillis(2500)), + new ScheduleBackfill(backfillTime.minusMillis(2500), backfillTime))); waitForActions(handle, 15); // Cleanup schedule handle.delete(); @@ -292,8 +293,8 @@ public void backfillSchedules() { try { handle.backfill( Arrays.asList( - new ScheduleBackfill(now.minusMillis(5500), now.minusMillis(2500)), - new ScheduleBackfill(now.minusMillis(2500), now))); + new ScheduleBackfill(backfillTime.minusMillis(5500), backfillTime.minusMillis(2500)), + new ScheduleBackfill(backfillTime.minusMillis(2500), backfillTime))); Assert.fail(); } catch (ScheduleException e) { } From 1d668c68c89ffebef56d183981a401ac09d3d000 Mon Sep 17 00:00:00 2001 From: Spencer Judge Date: Thu, 15 Aug 2024 13:36:40 -0700 Subject: [PATCH 22/25] Activity slot test flake (#2186) Don't use metrics here and instead rely on wrapper for counts --- .../worker/WorkflowSlotsSmallSizeTests.java | 58 ++++--------------- .../testUtils/CountingSlotSupplier.java | 5 ++ 2 files changed, 16 insertions(+), 47 deletions(-) diff --git a/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java index ba39fd335b..e70d1bfba0 100644 --- a/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java +++ b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java @@ -22,8 +22,6 @@ import static org.junit.Assert.assertEquals; -import com.uber.m3.tally.RootScopeBuilder; -import com.uber.m3.tally.Scope; import com.uber.m3.util.ImmutableMap; import io.temporal.activity.ActivityInterface; import io.temporal.activity.ActivityMethod; @@ -32,10 +30,8 @@ import io.temporal.client.WorkflowClient; import io.temporal.client.WorkflowOptions; import io.temporal.common.RetryOptions; -import io.temporal.common.reporter.TestStatsReporter; import io.temporal.testUtils.CountingSlotSupplier; import io.temporal.testing.internal.SDKTestWorkflowRule; -import io.temporal.worker.MetricsType; import io.temporal.worker.WorkerOptions; import io.temporal.worker.tuning.ActivitySlotInfo; import io.temporal.worker.tuning.CompositeTuner; @@ -66,13 +62,9 @@ public class WorkflowSlotsSmallSizeTests { new CountingSlotSupplier<>(MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE); private final CountingSlotSupplier localActivitySlotSupplier = new CountingSlotSupplier<>(MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); - private final TestStatsReporter reporter = new TestStatsReporter(); static Semaphore parallelSemRunning = new Semaphore(0); static Semaphore parallelSemBlocked = new Semaphore(0); - Scope metricsScope = - new RootScopeBuilder().reporter(reporter).reportEvery(com.uber.m3.util.Duration.ofMillis(1)); - @Parameterized.Parameter public boolean activitiesAreLocal; @Parameterized.Parameters() @@ -91,7 +83,6 @@ public static Object[] data() { activityTaskSlotSupplier, localActivitySlotSupplier)) .build()) - .setMetricsScope(metricsScope) .setActivityImplementations(new TestActivitySemaphoreImpl()) .setWorkflowTypes(ParallelActivities.class) .setDoNotStart(true) @@ -99,7 +90,6 @@ public static Object[] data() { @Before public void setup() { - reporter.flush(); parallelSemRunning = new Semaphore(0); parallelSemBlocked = new Semaphore(0); } @@ -116,24 +106,9 @@ public void tearDown() { localActivitySlotSupplier.releasedCount.get()); } - private void assertWorkerSlotCount(int worker, int activity, int localActivity) { - try { - // There can be a delay in metrics emission, another option if this - // is too flaky is to poll the metrics. - Thread.sleep(100); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - reporter.assertGauge( - MetricsType.WORKER_TASK_SLOTS_AVAILABLE, getWorkerTags("WorkflowWorker"), worker); - // All slots should be available - reporter.assertGauge( - MetricsType.WORKER_TASK_SLOTS_AVAILABLE, getWorkerTags("ActivityWorker"), activity); - // All slots should be available - reporter.assertGauge( - MetricsType.WORKER_TASK_SLOTS_AVAILABLE, - getWorkerTags("LocalActivityWorker"), - localActivity); + private void assertCurrentUsedCount(int activity, int localActivity) { + assertEquals(activity, activityTaskSlotSupplier.currentUsedSet.size()); + assertEquals(localActivity, localActivitySlotSupplier.currentUsedSet.size()); } @WorkflowInterface @@ -219,14 +194,11 @@ private void assertIntraWFTSlotCount(int allowedToRun) { int runningLAs = activitiesAreLocal ? allowedToRun : 0; int runningAs = activitiesAreLocal ? 0 : allowedToRun; int runningWFTs = activitiesAreLocal ? 1 : 0; - assertWorkerSlotCount( - MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE - runningWFTs, - MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE - runningAs, - MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE - runningLAs); + assertCurrentUsedCount(runningAs, runningLAs); } @Test - public void TestLocalActivitySlotAtLimit() throws InterruptedException { + public void TestActivitySlotAtLimit() throws InterruptedException { testWorkflowRule.getTestEnvironment().start(); WorkflowClient client = testWorkflowRule.getWorkflowClient(); TestWorkflow workflow = @@ -244,14 +216,11 @@ public void TestLocalActivitySlotAtLimit() throws InterruptedException { } workflow.workflow(true); // All slots should be available - assertWorkerSlotCount( - MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE, - MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE, - MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); + assertCurrentUsedCount(0, 0); } @Test - public void TestLocalActivityShutdownWhileWaitingOnSlot() throws InterruptedException { + public void TestActivityShutdownWhileWaitingOnSlot() throws InterruptedException { testWorkflowRule.getTestEnvironment().start(); WorkflowClient client = testWorkflowRule.getWorkflowClient(); TestWorkflow workflow = @@ -267,14 +236,12 @@ public void TestLocalActivityShutdownWhileWaitingOnSlot() throws InterruptedExce parallelSemBlocked.release(2); testWorkflowRule.getTestEnvironment().getWorkerFactory().awaitTermination(3, TimeUnit.SECONDS); // All slots should be available - assertWorkerSlotCount( - MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE, - MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE, - MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); + // Used count here is actually -2 since the slots weren't marked used + assertCurrentUsedCount(0, 0); } @Test - public void TestLocalActivitySlotHitsCapacity() throws InterruptedException { + public void TestActivitySlotHitsCapacity() throws InterruptedException { testWorkflowRule.getTestEnvironment().start(); WorkflowClient client = testWorkflowRule.getWorkflowClient(); TestWorkflow workflow = @@ -301,9 +268,6 @@ public void TestLocalActivitySlotHitsCapacity() throws InterruptedException { parallelSemBlocked.release(100); workflow.workflow(true); // All slots should be available - assertWorkerSlotCount( - MAX_CONCURRENT_WORKFLOW_TASK_EXECUTION_SIZE, - MAX_CONCURRENT_ACTIVITY_EXECUTION_SIZE, - MAX_CONCURRENT_LOCAL_ACTIVITY_EXECUTION_SIZE); + assertCurrentUsedCount(0, 0); } } diff --git a/temporal-sdk/src/test/java/io/temporal/testUtils/CountingSlotSupplier.java b/temporal-sdk/src/test/java/io/temporal/testUtils/CountingSlotSupplier.java index e7a6bdbbd9..a9d0045e05 100644 --- a/temporal-sdk/src/test/java/io/temporal/testUtils/CountingSlotSupplier.java +++ b/temporal-sdk/src/test/java/io/temporal/testUtils/CountingSlotSupplier.java @@ -22,12 +22,15 @@ import io.temporal.worker.tuning.*; import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; public class CountingSlotSupplier extends FixedSizeSlotSupplier { public final AtomicInteger reservedCount = new AtomicInteger(); public final AtomicInteger releasedCount = new AtomicInteger(); public final AtomicInteger usedCount = new AtomicInteger(); + public final ConcurrentHashMap.KeySetView currentUsedSet = + ConcurrentHashMap.newKeySet(); public CountingSlotSupplier(int numSlots) { super(numSlots); @@ -52,12 +55,14 @@ public Optional tryReserveSlot(SlotReserveContext ctx) { @Override public void markSlotUsed(SlotMarkUsedContext ctx) { usedCount.incrementAndGet(); + currentUsedSet.add(ctx.getSlotPermit()); super.markSlotUsed(ctx); } @Override public void releaseSlot(SlotReleaseContext ctx) { super.releaseSlot(ctx); + currentUsedSet.remove(ctx.getSlotPermit()); releasedCount.incrementAndGet(); } } From ccfb368b8f34baafcfc90015f5c94930863aabd8 Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Thu, 15 Aug 2024 14:19:49 -0700 Subject: [PATCH 23/25] Concurrent poll request lock test fix (#2187) Hopefully make concurrentPollRequestLockTest less flakey --- .../internal/worker/WorkflowWorkerTest.java | 40 +++++++++++-------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowWorkerTest.java b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowWorkerTest.java index 2c97e29534..9da6741f5e 100644 --- a/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowWorkerTest.java +++ b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowWorkerTest.java @@ -40,6 +40,7 @@ import io.temporal.internal.replay.ReplayWorkflowFactory; import io.temporal.internal.replay.ReplayWorkflowTaskHandler; import io.temporal.serviceclient.WorkflowServiceStubs; +import io.temporal.testUtils.Eventually; import io.temporal.testUtils.HistoryUtils; import io.temporal.worker.MetricsType; import io.temporal.worker.tuning.FixedSizeSlotSupplier; @@ -192,27 +193,32 @@ public void concurrentPollRequestLockTest() throws Exception { pollTaskQueueLatch.await(); // Wait until the worker handles at least one WFT handleTaskLatch.await(); - // Sleep to allow metrics to be published - Thread.sleep(100); - // Since all polls have the same runID only one should get through, the other two should be - // blocked - assertEquals(runLockManager.totalLocks(), 1); // Verify 3 slots have been used - reporter.assertGauge( - MetricsType.WORKER_TASK_SLOTS_AVAILABLE, - ImmutableMap.of("worker_type", "WorkflowWorker"), - 97.0); + Eventually.assertEventually( + Duration.ofSeconds(10), + () -> { + // Since all polls have the same runID only one should get through, the other two should + // be + // blocked + assertEquals(runLockManager.totalLocks(), 1); + reporter.assertGauge( + MetricsType.WORKER_TASK_SLOTS_AVAILABLE, + ImmutableMap.of("worker_type", "WorkflowWorker"), + 97.0); + }); // Wait for the worker to respond, by this time the other blocked tasks should have timed out respondTaskLatch.await(); - // Sleep to allow metrics to be published - Thread.sleep(100); - // No task should have the lock anymore - assertEquals(runLockManager.totalLocks(), 0); // All slots should be available - reporter.assertGauge( - MetricsType.WORKER_TASK_SLOTS_AVAILABLE, - ImmutableMap.of("worker_type", "WorkflowWorker"), - 100.0); + Eventually.assertEventually( + Duration.ofSeconds(10), + () -> { + // No task should have the lock anymore + assertEquals(runLockManager.totalLocks(), 0); + reporter.assertGauge( + MetricsType.WORKER_TASK_SLOTS_AVAILABLE, + ImmutableMap.of("worker_type", "WorkflowWorker"), + 100.0); + }); // Cleanup worker.shutdown(new ShutdownManager(), false).get(); // Verify we only handled two tasks From 33bfef06345d2329385d92ac8dcbad1a122485dd Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Thu, 15 Aug 2024 14:52:46 -0700 Subject: [PATCH 24/25] Add to graal bindings (#2185) --- .../temporal-test-server/jni-config.json | 35 ++ .../predefined-classes-config.json | 8 + .../temporal-test-server/reflect-config.json | 421 +++++++++++++++++- .../temporal-test-server/resource-config.json | 6 +- .../serialization-config.json | 10 +- 5 files changed, 472 insertions(+), 8 deletions(-) create mode 100644 temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/predefined-classes-config.json diff --git a/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/jni-config.json b/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/jni-config.json index 0d4f101c7a..4cbc26349c 100644 --- a/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/jni-config.json +++ b/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/jni-config.json @@ -1,2 +1,37 @@ [ +{ + "name":"io.temporal.testserver.TestServer", + "methods":[{"name":"main","parameterTypes":["java.lang.String[]"] }] +}, +{ + "name":"java.lang.Boolean", + "methods":[{"name":"getBoolean","parameterTypes":["java.lang.String"] }] +}, +{ + "name":"java.lang.String", + "methods":[ + {"name":"lastIndexOf","parameterTypes":["int"] }, + {"name":"substring","parameterTypes":["int"] } + ] +}, +{ + "name":"java.lang.System", + "methods":[ + {"name":"getProperty","parameterTypes":["java.lang.String"] }, + {"name":"setProperty","parameterTypes":["java.lang.String","java.lang.String"] } + ] +}, +{ + "name":"sun.management.VMManagementImpl", + "fields":[ + {"name":"compTimeMonitoringSupport"}, + {"name":"currentThreadCpuTimeSupport"}, + {"name":"objectMonitorUsageSupport"}, + {"name":"otherThreadCpuTimeSupport"}, + {"name":"remoteDiagnosticCommandsSupport"}, + {"name":"synchronizerUsageSupport"}, + {"name":"threadAllocatedMemorySupport"}, + {"name":"threadContentionMonitoringSupport"} + ] +} ] diff --git a/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/predefined-classes-config.json b/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/predefined-classes-config.json new file mode 100644 index 0000000000..0e79b2c5d8 --- /dev/null +++ b/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/predefined-classes-config.json @@ -0,0 +1,8 @@ +[ + { + "type":"agent-extracted", + "classes":[ + ] + } +] + diff --git a/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/reflect-config.json b/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/reflect-config.json index aeff22a724..10e47998f2 100644 --- a/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/reflect-config.json +++ b/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/reflect-config.json @@ -1,12 +1,80 @@ [ +{ + "name":"com.google.common.util.concurrent.AbstractFuture", + "fields":[ + {"name":"listeners"}, + {"name":"value"}, + {"name":"waiters"} + ] +}, +{ + "name":"com.google.common.util.concurrent.AbstractFuture$Waiter", + "fields":[ + {"name":"next"}, + {"name":"thread"} + ] +}, +{ + "name":"com.google.protobuf.ExtensionRegistry", + "methods":[{"name":"getEmptyRegistry","parameterTypes":[] }] +}, +{ + "name":"com.google.protobuf.Timestamp", + "methods":[ + {"name":"getNanos","parameterTypes":[] }, + {"name":"getSeconds","parameterTypes":[] } + ] +}, +{ + "name":"com.google.protobuf.Timestamp$Builder", + "methods":[ + {"name":"clearNanos","parameterTypes":[] }, + {"name":"clearSeconds","parameterTypes":[] }, + {"name":"getNanos","parameterTypes":[] }, + {"name":"getSeconds","parameterTypes":[] }, + {"name":"setNanos","parameterTypes":["int"] }, + {"name":"setSeconds","parameterTypes":["long"] } + ] +}, +{ + "name":"io.grpc.netty.shaded.io.grpc.netty.AbstractNettyHandler", + "methods":[ + {"name":"channelActive","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"exceptionCaught","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Throwable"] } + ] +}, { "name":"io.grpc.netty.shaded.io.grpc.netty.NettyServer$1" }, { "name":"io.grpc.netty.shaded.io.grpc.netty.NettyServerHandler", "methods":[ - {"name":"channelInactive","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, - {"name":"close","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"channelInactive","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"close","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"write","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] } + ] +}, +{ + "name":"io.grpc.netty.shaded.io.grpc.netty.ProtocolNegotiators$GrpcNegotiationHandler", + "methods":[{"name":"userEventTriggered","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object"] }] +}, +{ + "name":"io.grpc.netty.shaded.io.grpc.netty.ProtocolNegotiators$ProtocolNegotiationHandler", + "methods":[{"name":"userEventTriggered","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object"] }] +}, +{ + "name":"io.grpc.netty.shaded.io.grpc.netty.ProtocolNegotiators$WaitUntilActiveHandler", + "methods":[{"name":"channelActive","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }] +}, +{ + "name":"io.grpc.netty.shaded.io.grpc.netty.WriteBufferingAndExceptionHandler", + "methods":[ + {"name":"channelInactive","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelRead","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object"] }, + {"name":"close","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"connect","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.net.SocketAddress","java.net.SocketAddress","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"exceptionCaught","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Throwable"] }, + {"name":"flush","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, {"name":"write","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] } ] }, @@ -16,12 +84,359 @@ { "name":"io.grpc.netty.shaded.io.netty.bootstrap.ServerBootstrap$ServerBootstrapAcceptor", "methods":[ - {"name":"channelRead","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object"] }, + {"name":"channelRead","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object"] }, + {"name":"exceptionCaught","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Throwable"] } + ] +}, +{ + "name":"io.grpc.netty.shaded.io.netty.buffer.AbstractByteBufAllocator", + "queryAllDeclaredMethods":true +}, +{ + "name":"io.grpc.netty.shaded.io.netty.buffer.AbstractReferenceCountedByteBuf", + "fields":[{"name":"refCnt"}] +}, +{ + "name":"io.grpc.netty.shaded.io.netty.channel.ChannelDuplexHandler", + "methods":[ + {"name":"bind","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.net.SocketAddress","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"close","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"connect","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.net.SocketAddress","java.net.SocketAddress","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"deregister","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"disconnect","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"flush","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"read","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"write","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] } + ] +}, +{ + "name":"io.grpc.netty.shaded.io.netty.channel.ChannelInboundHandlerAdapter", + "methods":[ + {"name":"channelActive","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelInactive","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelRead","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object"] }, + {"name":"channelReadComplete","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelRegistered","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelUnregistered","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelWritabilityChanged","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"exceptionCaught","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Throwable"] }, + {"name":"userEventTriggered","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object"] } + ] +}, +{ + "name":"io.grpc.netty.shaded.io.netty.channel.ChannelInitializer", + "methods":[ + {"name":"channelRegistered","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, {"name":"exceptionCaught","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Throwable"] } ] }, +{ + "name":"io.grpc.netty.shaded.io.netty.channel.DefaultChannelPipeline$HeadContext", + "methods":[ + {"name":"bind","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.net.SocketAddress","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"channelActive","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelInactive","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelRead","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object"] }, + {"name":"channelReadComplete","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelRegistered","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelUnregistered","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelWritabilityChanged","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"close","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"connect","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.net.SocketAddress","java.net.SocketAddress","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"deregister","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"disconnect","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"exceptionCaught","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Throwable"] }, + {"name":"flush","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"read","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"userEventTriggered","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object"] }, + {"name":"write","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] } + ] +}, +{ + "name":"io.grpc.netty.shaded.io.netty.channel.DefaultChannelPipeline$TailContext", + "methods":[ + {"name":"channelActive","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelInactive","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelRead","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object"] }, + {"name":"channelReadComplete","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelRegistered","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelUnregistered","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelWritabilityChanged","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"exceptionCaught","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Throwable"] }, + {"name":"userEventTriggered","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object"] } + ] +}, +{ + "name":"io.grpc.netty.shaded.io.netty.channel.DefaultFileRegion" +}, +{ + "name":"io.grpc.netty.shaded.io.netty.channel.epoll.Epoll", + "methods":[ + {"name":"isAvailable","parameterTypes":[] }, + {"name":"unavailabilityCause","parameterTypes":[] } + ] +}, { "name":"io.grpc.netty.shaded.io.netty.channel.epoll.EpollServerSocketChannel", "methods":[{"name":"","parameterTypes":[] }] +}, +{ + "name":"io.grpc.netty.shaded.io.netty.channel.epoll.NativeDatagramPacketArray$NativeDatagramPacket" +}, +{ + "name":"io.grpc.netty.shaded.io.netty.channel.unix.PeerCredentials" +}, +{ + "name":"io.grpc.netty.shaded.io.netty.handler.codec.ByteToMessageDecoder", + "methods":[ + {"name":"channelRead","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object"] }, + {"name":"userEventTriggered","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.lang.Object"] } + ] +}, +{ + "name":"io.grpc.netty.shaded.io.netty.handler.codec.http2.Http2ConnectionHandler", + "methods":[ + {"name":"bind","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.net.SocketAddress","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"channelReadComplete","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"channelWritabilityChanged","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"connect","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","java.net.SocketAddress","java.net.SocketAddress","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"deregister","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"disconnect","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext","io.grpc.netty.shaded.io.netty.channel.ChannelPromise"] }, + {"name":"flush","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] }, + {"name":"read","parameterTypes":["io.grpc.netty.shaded.io.netty.channel.ChannelHandlerContext"] } + ] +}, +{ + "name":"io.grpc.netty.shaded.io.netty.util.AbstractReferenceCounted", + "fields":[{"name":"refCnt"}] +}, +{ + "name":"io.grpc.netty.shaded.io.netty.util.ReferenceCountUtil", + "queryAllDeclaredMethods":true +}, +{ + "name":"io.grpc.netty.shaded.io.netty.util.internal.shaded.org.jctools.queues.BaseMpscLinkedArrayQueueColdProducerFields", + "fields":[{"name":"producerLimit"}] +}, +{ + "name":"io.grpc.netty.shaded.io.netty.util.internal.shaded.org.jctools.queues.BaseMpscLinkedArrayQueueConsumerFields", + "fields":[{"name":"consumerIndex"}] +}, +{ + "name":"io.grpc.netty.shaded.io.netty.util.internal.shaded.org.jctools.queues.BaseMpscLinkedArrayQueueProducerFields", + "fields":[{"name":"producerIndex"}] +}, +{ + "name":"io.grpc.netty.shaded.io.netty.util.internal.shaded.org.jctools.queues.MpscArrayQueueConsumerIndexField", + "fields":[{"name":"consumerIndex"}] +}, +{ + "name":"io.grpc.netty.shaded.io.netty.util.internal.shaded.org.jctools.queues.MpscArrayQueueProducerIndexField", + "fields":[{"name":"producerIndex"}] +}, +{ + "name":"io.grpc.netty.shaded.io.netty.util.internal.shaded.org.jctools.queues.MpscArrayQueueProducerLimitField", + "fields":[{"name":"producerLimit"}] +}, +{ + "name":"io.temporal.api.update.v1.Acceptance", + "methods":[ + {"name":"getAcceptedRequest","parameterTypes":[] }, + {"name":"getAcceptedRequestMessageId","parameterTypes":[] }, + {"name":"getAcceptedRequestMessageIdBytes","parameterTypes":[] }, + {"name":"getAcceptedRequestSequencingEventId","parameterTypes":[] }, + {"name":"getDefaultInstance","parameterTypes":[] }, + {"name":"hasAcceptedRequest","parameterTypes":[] } + ] +}, +{ + "name":"io.temporal.api.update.v1.Acceptance$Builder", + "methods":[ + {"name":"clearAcceptedRequest","parameterTypes":[] }, + {"name":"clearAcceptedRequestMessageId","parameterTypes":[] }, + {"name":"clearAcceptedRequestSequencingEventId","parameterTypes":[] }, + {"name":"getAcceptedRequest","parameterTypes":[] }, + {"name":"getAcceptedRequestBuilder","parameterTypes":[] }, + {"name":"getAcceptedRequestMessageId","parameterTypes":[] }, + {"name":"getAcceptedRequestMessageIdBytes","parameterTypes":[] }, + {"name":"getAcceptedRequestSequencingEventId","parameterTypes":[] }, + {"name":"hasAcceptedRequest","parameterTypes":[] }, + {"name":"setAcceptedRequest","parameterTypes":["io.temporal.api.update.v1.Request"] }, + {"name":"setAcceptedRequestMessageId","parameterTypes":["java.lang.String"] }, + {"name":"setAcceptedRequestMessageIdBytes","parameterTypes":["com.google.protobuf.ByteString"] }, + {"name":"setAcceptedRequestSequencingEventId","parameterTypes":["long"] } + ] +}, +{ + "name":"io.temporal.api.update.v1.Input", + "methods":[{"name":"newBuilder","parameterTypes":[] }] +}, +{ + "name":"io.temporal.api.update.v1.Meta", + "methods":[{"name":"newBuilder","parameterTypes":[] }] +}, +{ + "name":"io.temporal.api.update.v1.Outcome", + "methods":[{"name":"newBuilder","parameterTypes":[] }] +}, +{ + "name":"io.temporal.api.update.v1.Request", + "methods":[ + {"name":"getInput","parameterTypes":[] }, + {"name":"getMeta","parameterTypes":[] }, + {"name":"hasInput","parameterTypes":[] }, + {"name":"hasMeta","parameterTypes":[] }, + {"name":"newBuilder","parameterTypes":[] } + ] +}, +{ + "name":"io.temporal.api.update.v1.Request$Builder", + "methods":[ + {"name":"clearInput","parameterTypes":[] }, + {"name":"clearMeta","parameterTypes":[] }, + {"name":"getInput","parameterTypes":[] }, + {"name":"getInputBuilder","parameterTypes":[] }, + {"name":"getMeta","parameterTypes":[] }, + {"name":"getMetaBuilder","parameterTypes":[] }, + {"name":"hasInput","parameterTypes":[] }, + {"name":"hasMeta","parameterTypes":[] }, + {"name":"setInput","parameterTypes":["io.temporal.api.update.v1.Input"] }, + {"name":"setMeta","parameterTypes":["io.temporal.api.update.v1.Meta"] } + ] +}, +{ + "name":"io.temporal.api.update.v1.Response", + "methods":[ + {"name":"getDefaultInstance","parameterTypes":[] }, + {"name":"getMeta","parameterTypes":[] }, + {"name":"getOutcome","parameterTypes":[] }, + {"name":"hasMeta","parameterTypes":[] }, + {"name":"hasOutcome","parameterTypes":[] } + ] +}, +{ + "name":"io.temporal.api.update.v1.Response$Builder", + "methods":[ + {"name":"clearMeta","parameterTypes":[] }, + {"name":"clearOutcome","parameterTypes":[] }, + {"name":"getMeta","parameterTypes":[] }, + {"name":"getMetaBuilder","parameterTypes":[] }, + {"name":"getOutcome","parameterTypes":[] }, + {"name":"getOutcomeBuilder","parameterTypes":[] }, + {"name":"hasMeta","parameterTypes":[] }, + {"name":"hasOutcome","parameterTypes":[] }, + {"name":"setMeta","parameterTypes":["io.temporal.api.update.v1.Meta"] }, + {"name":"setOutcome","parameterTypes":["io.temporal.api.update.v1.Outcome"] } + ] +}, +{ + "name":"java.io.FileDescriptor" +}, +{ + "name":"java.lang.ProcessHandle", + "methods":[ + {"name":"current","parameterTypes":[] }, + {"name":"pid","parameterTypes":[] } + ] +}, +{ + "name":"java.lang.management.ManagementFactory", + "methods":[{"name":"getRuntimeMXBean","parameterTypes":[] }] +}, +{ + "name":"java.lang.management.RuntimeMXBean", + "methods":[{"name":"getInputArguments","parameterTypes":[] }] +}, +{ + "name":"java.nio.Bits", + "fields":[{"name":"UNALIGNED"}] +}, +{ + "name":"java.nio.Buffer", + "fields":[{"name":"address"}] +}, +{ + "name":"java.nio.ByteBuffer", + "methods":[{"name":"alignedSlice","parameterTypes":["int"] }] +}, +{ + "name":"java.nio.DirectByteBuffer", + "methods":[{"name":"","parameterTypes":["long","int"] }] +}, +{ + "name":"java.nio.channels.FileChannel" +}, +{ + "name":"java.nio.channels.spi.SelectorProvider", + "methods":[ + {"name":"openServerSocketChannel","parameterTypes":["java.net.ProtocolFamily"] }, + {"name":"openSocketChannel","parameterTypes":["java.net.ProtocolFamily"] } + ] +}, +{ + "name":"java.security.SecureRandomParameters" +}, +{ + "name":"java.util.concurrent.atomic.LongAdder", + "queryAllPublicConstructors":true, + "methods":[ + {"name":"","parameterTypes":[] }, + {"name":"add","parameterTypes":["long"] }, + {"name":"sum","parameterTypes":[] } + ] +}, +{ + "name":"jdk.internal.misc.Unsafe", + "methods":[{"name":"getUnsafe","parameterTypes":[] }] +}, +{ + "name":"sun.misc.Unsafe", + "allDeclaredFields":true, + "methods":[ + {"name":"arrayBaseOffset","parameterTypes":["java.lang.Class"] }, + {"name":"arrayIndexScale","parameterTypes":["java.lang.Class"] }, + {"name":"copyMemory","parameterTypes":["long","long","long"] }, + {"name":"copyMemory","parameterTypes":["java.lang.Object","long","java.lang.Object","long","long"] }, + {"name":"getAndAddLong","parameterTypes":["java.lang.Object","long","long"] }, + {"name":"getAndSetObject","parameterTypes":["java.lang.Object","long","java.lang.Object"] }, + {"name":"getBoolean","parameterTypes":["java.lang.Object","long"] }, + {"name":"getByte","parameterTypes":["long"] }, + {"name":"getByte","parameterTypes":["java.lang.Object","long"] }, + {"name":"getDouble","parameterTypes":["java.lang.Object","long"] }, + {"name":"getFloat","parameterTypes":["java.lang.Object","long"] }, + {"name":"getInt","parameterTypes":["long"] }, + {"name":"getInt","parameterTypes":["java.lang.Object","long"] }, + {"name":"getLong","parameterTypes":["long"] }, + {"name":"getLong","parameterTypes":["java.lang.Object","long"] }, + {"name":"getObject","parameterTypes":["java.lang.Object","long"] }, + {"name":"invokeCleaner","parameterTypes":["java.nio.ByteBuffer"] }, + {"name":"objectFieldOffset","parameterTypes":["java.lang.reflect.Field"] }, + {"name":"putBoolean","parameterTypes":["java.lang.Object","long","boolean"] }, + {"name":"putByte","parameterTypes":["long","byte"] }, + {"name":"putByte","parameterTypes":["java.lang.Object","long","byte"] }, + {"name":"putDouble","parameterTypes":["java.lang.Object","long","double"] }, + {"name":"putFloat","parameterTypes":["java.lang.Object","long","float"] }, + {"name":"putInt","parameterTypes":["long","int"] }, + {"name":"putInt","parameterTypes":["java.lang.Object","long","int"] }, + {"name":"putLong","parameterTypes":["long","long"] }, + {"name":"putLong","parameterTypes":["java.lang.Object","long","long"] }, + {"name":"putObject","parameterTypes":["java.lang.Object","long","java.lang.Object"] }, + {"name":"storeFence","parameterTypes":[] } + ] +}, +{ + "name":"sun.nio.ch.SelectorImpl", + "fields":[ + {"name":"publicSelectedKeys"}, + {"name":"selectedKeys"} + ] +}, +{ + "name":"sun.security.provider.NativePRNG", + "methods":[{"name":"","parameterTypes":[] }] +}, +{ + "name":"sun.security.provider.SHA", + "methods":[{"name":"","parameterTypes":[] }] } ] diff --git a/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/resource-config.json b/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/resource-config.json index 3740a83e01..eb21848e21 100644 --- a/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/resource-config.json +++ b/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/resource-config.json @@ -1,7 +1,7 @@ { "resources":{ - "includes":[ - {"pattern":"\\QMETA-INF/services/io.grpc.ServerProvider\\E"} - ]}, + "includes":[{ + "pattern":"\\QMETA-INF/services/io.grpc.ServerProvider\\E" + }]}, "bundles":[] } diff --git a/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/serialization-config.json b/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/serialization-config.json index 0d4f101c7a..f3d7e06e33 100644 --- a/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/serialization-config.json +++ b/temporal-test-server/src/main/resources/META-INF/native-image/io.temporal/temporal-test-server/serialization-config.json @@ -1,2 +1,8 @@ -[ -] +{ + "types":[ + ], + "lambdaCapturingTypes":[ + ], + "proxies":[ + ] +} From 7229a455036904c69a5886f6b959335b5fcefce8 Mon Sep 17 00:00:00 2001 From: Quinn Klassen Date: Thu, 15 Aug 2024 15:21:53 -0700 Subject: [PATCH 25/25] Release v1.25.0 (#2188) Release v1.25.0 --- releases/v1.25.0 | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 releases/v1.25.0 diff --git a/releases/v1.25.0 b/releases/v1.25.0 new file mode 100644 index 0000000000..816b3b6c34 --- /dev/null +++ b/releases/v1.25.0 @@ -0,0 +1,58 @@ +# Highlights + +## Slot Auto-Tuning (Preview) + +Added `WorkerTuner` - this wraps the previously added `SlotSupplier` classes to bring worker tuning options together under one class passed to the WorkerOptions + * Added `ResourceBasedTuner` and `ResourceBasedSlotSupplier` - these classes can be used to tune the workers slot count based on available memory and cpu resources. + Set a memory/cpu target and the worker will automatically try to reach those target usage levels. + * Added `CompositeTuner` - this allows you to combine different kinds of `SlotSuppliers` to implement `WorkerTuner`. + +Please give the resource based tuning a try and let us know how it works for you (feedback on community slack is welcome)! +The easiest thing to do is instantiate and pass a `ResourceBasedTuner` to `WorkerOptions`. +It'll work best if the worker is the only thing on your host using significant resources. +Try setting the thresholds to a reasonable value like 0.8, and make sure that your `JVM -Xmx` value is set appropriately. + +Note: Custom Slot Supplier is currently considered experimental and not intended for production use. + +## Handler Warnings + +Previously if you had a signal handler or update handler that was not complete when the workflow was marked complete, +it'd silently be ignored/abandoned. Now you will get a warning. Users are encouraged to add +`Workflow.await(() -> Workflow.isAllHandlersFinished())` to the bottom of their workflow to ensure they +have no outstanding handlers. If the previous no-warn situation is preferred, the signal/update annotation has a setting +to just abandon. + +# Changeset + +2024-06-25 - 659fee5c - Switch checkout in prepare-release.yml to v3 (#2126) +2024-06-27 - abd9f2d1 - Point feature repo back to main (#2130) +2024-07-08 - 0f903343 - Release v1.24.1 (#2140) +2024-07-08 - 46b239d4 - Revert configurable slot provider (#2134) +2024-07-08 - 99585c14 - Change build_native_images mac runner to macos-13 (#2135) +2024-07-09 - 3b26db7b - Make sure workflow_failed is incremented on NonDeterministicException (#2141) +2024-07-17 - eb7d9eed - Release v1.24.2 (#2147) +2024-07-19 - 0ba6188e - Experimental cloud operations client (#2146) +2024-07-23 - b95322f1 - Reintroduce slot supplier & add many tests (#2143) +2024-07-24 - eabd51fb - Ensure identity copied to Builder from source WorkerOptions (#2151) +2024-07-30 - 1acafa39 - Ensure shutdown of LA slot queue isn't swallowed (#2161) +2024-07-30 - 6b39e447 - Align Update API across test server and real server (#2153) +2024-07-30 - b92c97d3 - Workflow-friendly concurrency primitives (#2133) +2024-07-30 - bbf2de7a - Move workflow update polling inside of interceptor (#2159) +2024-07-30 - e5c08a19 - Enable next retry delay test for server (#2129) +2024-07-30 - f7c7341f - Fix transition in LA when handling canceled child wf (#2156) +2024-07-31 - 27a1fc25 - Add support for query in listSchedules (#2163) +2024-08-01 - 5d22bb5d - Add getCurrentUpdateInfo (#2158) +2024-08-05 - 48711683 - Test server Nexus endpoint operator apis (#2162) +2024-08-06 - 531d3cb2 - Wrap GRPC::CANCELED and DEADLINE_EXCEEDED in new exception type (#2172) +2024-08-06 - 98b2e78e - Disallow continue as new in update handlers (#2167) +2024-08-08 - 59c485e9 - Filter out third party protos (#2174) +2024-08-08 - e0851f05 - Build omes worker image in CI (#2171) +2024-08-11 - e2d2608b - Warn on dangling handlers and add method to help await on all handlers. (#2144) +2024-08-12 - a885812e - Ad support for updating schedule search attributes (#2168) +2024-08-14 - a5d6e604 - Fix isEveryHandlerFinished doc string (#2182) +2024-08-15 - 1d668c68 - Activity slot test flake (#2186) +2024-08-15 - 33bfef06 - Add to graal bindings (#2185) +2024-08-15 - abc53233 - Implement test server support for sync Nexus operation commands (#2176) +2024-08-15 - b9eeda0d - Improve test time and flaky schedule test (#2183) +2024-08-15 - ccfb368b - Concurrent poll request lock test fix (#2187) +