diff --git a/apps/docs/content/docs/de/api-reference/getting-started.mdx b/apps/docs/content/docs/de/api-reference/getting-started.mdx index fa9fad0baa3..25c8cfdbf2e 100644 --- a/apps/docs/content/docs/de/api-reference/getting-started.mdx +++ b/apps/docs/content/docs/de/api-reference/getting-started.mdx @@ -109,20 +109,22 @@ curl -X POST https://www.sim.ai/api/workflows/{workflowId}/execute \ -d '{"inputs": {}, "async": true}' ``` -This returns immediately with a `taskId`: +This returns immediately with a `jobId` and `statusUrl`: ```json { "success": true, - "taskId": "job_abc123", - "status": "queued" + "jobId": "job_abc123", + "statusUrl": "https://www.sim.ai/api/jobs/job_abc123", + "message": "Workflow execution started", + "async": true } ``` Poll the [Get Job Status](/api-reference/workflows/getJobStatus) endpoint until the status is `completed` or `failed`: ```bash -curl https://www.sim.ai/api/jobs/{taskId} \ +curl https://www.sim.ai/api/jobs/{jobId} \ -H "X-API-Key: YOUR_API_KEY" ``` diff --git a/apps/docs/content/docs/en/api-reference/getting-started.mdx b/apps/docs/content/docs/en/api-reference/getting-started.mdx index dced7aca61c..038998853cf 100644 --- a/apps/docs/content/docs/en/api-reference/getting-started.mdx +++ b/apps/docs/content/docs/en/api-reference/getting-started.mdx @@ -109,20 +109,22 @@ curl -X POST https://www.sim.ai/api/workflows/{workflowId}/execute \ -d '{"inputs": {}, "async": true}' ``` -This returns immediately with a `taskId`: +This returns immediately with a `jobId` and `statusUrl`: ```json { "success": true, - "taskId": "job_abc123", - "status": "queued" + "jobId": "job_abc123", + "statusUrl": "https://www.sim.ai/api/jobs/job_abc123", + "message": "Workflow execution started", + "async": true } ``` Poll the [Get Job Status](/api-reference/workflows/getJobStatus) endpoint until the status is `completed` or `failed`: ```bash -curl https://www.sim.ai/api/jobs/{taskId} \ +curl https://www.sim.ai/api/jobs/{jobId} \ -H "X-API-Key: YOUR_API_KEY" ``` diff --git a/apps/docs/content/docs/en/api-reference/python.mdx b/apps/docs/content/docs/en/api-reference/python.mdx index 903bac51f1c..d70bb50e3aa 100644 --- a/apps/docs/content/docs/en/api-reference/python.mdx +++ b/apps/docs/content/docs/en/api-reference/python.mdx @@ -80,7 +80,7 @@ result = client.execute_workflow( **Returns:** `WorkflowExecutionResult | AsyncExecutionResult` -When `async_execution=True`, returns immediately with a task ID for polling. Otherwise, waits for completion. +When `async_execution=True`, returns immediately with a `job_id` and `status_url` for polling. Otherwise, waits for completion. ##### get_workflow_status() @@ -117,20 +117,20 @@ if is_ready: Get the status of an async job execution. ```python -status = client.get_job_status("task-id-from-async-execution") +status = client.get_job_status("job-id-from-async-execution") print("Status:", status["status"]) # 'queued', 'processing', 'completed', 'failed' if status["status"] == "completed": print("Output:", status["output"]) ``` **Parameters:** -- `task_id` (str): The task ID returned from async execution +- `task_id` (str): The job ID returned from async execution **Returns:** `Dict[str, Any]` **Response fields:** - `success` (bool): Whether the request was successful -- `taskId` (str): The task ID +- `taskId` (str): The job ID - `status` (str): One of `'queued'`, `'processing'`, `'completed'`, `'failed'`, `'cancelled'` - `metadata` (dict): Contains `startedAt`, `completedAt`, and `duration` - `output` (any, optional): The workflow output (when completed) @@ -270,10 +270,11 @@ class WorkflowExecutionResult: @dataclass class AsyncExecutionResult: success: bool - task_id: str - status: str # 'queued' - created_at: str - links: Dict[str, str] # e.g., {"status": "/api/jobs/{taskId}"} + job_id: str + status_url: str + execution_id: Optional[str] = None + message: str = "" + async_execution: bool = True ``` ### WorkflowStatus @@ -493,17 +494,17 @@ def execute_async(): ) # Check if result is an async execution - if hasattr(result, 'task_id'): - print(f"Task ID: {result.task_id}") - print(f"Status endpoint: {result.links['status']}") + if hasattr(result, 'job_id'): + print(f"Job ID: {result.job_id}") + print(f"Status endpoint: {result.status_url}") # Poll for completion - status = client.get_job_status(result.task_id) + status = client.get_job_status(result.job_id) while status["status"] in ["queued", "processing"]: print(f"Current status: {status['status']}") time.sleep(2) # Wait 2 seconds - status = client.get_job_status(result.task_id) + status = client.get_job_status(result.job_id) if status["status"] == "completed": print("Workflow completed!") @@ -764,7 +765,7 @@ import { FAQ } from '@/components/ui/faq' ` -When `async: true`, returns immediately with a task ID for polling. Otherwise, waits for completion. +When `async: true`, returns immediately with a `jobId` and `statusUrl` for polling. Otherwise, waits for completion. ##### getWorkflowStatus() @@ -131,7 +131,7 @@ if (isReady) { Get the status of an async job execution. ```typescript -const status = await client.getJobStatus('task-id-from-async-execution'); +const status = await client.getJobStatus('job-id-from-async-execution'); console.log('Status:', status.status); // 'queued', 'processing', 'completed', 'failed' if (status.status === 'completed') { console.log('Output:', status.output); @@ -139,13 +139,13 @@ if (status.status === 'completed') { ``` **Parameters:** -- `taskId` (string): The task ID returned from async execution +- `jobId` (string): The job ID returned from async execution **Returns:** `Promise` **Response fields:** - `success` (boolean): Whether the request was successful -- `taskId` (string): The task ID +- `taskId` (string): The job ID - `status` (string): One of `'queued'`, `'processing'`, `'completed'`, `'failed'`, `'cancelled'` - `metadata` (object): Contains `startedAt`, `completedAt`, and `duration` - `output` (any, optional): The workflow output (when completed) @@ -278,12 +278,11 @@ interface WorkflowExecutionResult { ```typescript interface AsyncExecutionResult { success: boolean; - taskId: string; - status: 'queued'; - createdAt: string; - links: { - status: string; // e.g., "/api/jobs/{taskId}" - }; + jobId: string; + statusUrl: string; + executionId?: string; + message: string; + async: true; } ``` @@ -767,17 +766,17 @@ async function executeAsync() { }); // Check if result is an async execution - if ('taskId' in result) { - console.log('Task ID:', result.taskId); - console.log('Status endpoint:', result.links.status); + if ('jobId' in result) { + console.log('Job ID:', result.jobId); + console.log('Status endpoint:', result.statusUrl); // Poll for completion - let status = await client.getJobStatus(result.taskId); + let status = await client.getJobStatus(result.jobId); while (status.status === 'queued' || status.status === 'processing') { console.log('Current status:', status.status); await new Promise(resolve => setTimeout(resolve, 2000)); // Wait 2 seconds - status = await client.getJobStatus(result.taskId); + status = await client.getJobStatus(result.jobId); } if (status.status === 'completed') { @@ -1022,7 +1021,7 @@ import { FAQ } from '@/components/ui/faq' `. Larger values are stored in execution storage and passed around as small references until code explicitly reads them. + +File outputs are metadata-first by default. Referencing ``, ``, or similar metadata does not hydrate file contents. In JavaScript functions without imports, a direct base64 reference like `` is automatically rewritten to a lazy server-side read so the base64 string does not cross the Function request body. + +You can also call the helper explicitly: + +```javascript +const file = ; +const base64 = await sim.files.readBase64(file); +``` + +`sim.files.readBase64(file)`, `sim.files.readText(file)`, `sim.files.readBase64Chunk(file, { offset, length })`, and `sim.files.readTextChunk(file, { offset, length })` read from server-side execution storage under memory caps. `sim.values.read(ref)` can explicitly read a large execution value reference. These helpers are available only in JavaScript functions without imports. JavaScript with imports, Python, and shell do not support these lazy helpers yet. + +Very large full reads can still fail by design; use chunk helpers or return a file when you need to handle more data. + +Use text chunks for text-like files such as logs, CSV, JSONL, and markdown: + +```javascript +const file = ; +const firstMegabyte = await sim.files.readTextChunk(file, { + offset: 0, + length: 1024 * 1024, +}); + +return firstMegabyte.split('\n').slice(0, 10); +``` + +Use base64 chunks for binary files such as images, PDFs, audio, archives, or APIs that expect base64 input: + +```javascript +const file = ; +const firstMegabyteBase64 = await sim.files.readBase64Chunk(file, { + offset: 0, + length: 1024 * 1024, +}); + +return { name: file.name, chunk: firstMegabyteBase64 }; +``` + +Chunk `offset` and `length` are byte-based. For Unicode text, a chunk can split a multi-byte character at the boundary; use text chunks for approximate text processing and prefer smaller structured references when exact parsing matters. + +Avoid passing a full large object into a Function block when you only need one field. For example, prefer `` over `` when the API response is large. If a JavaScript Function without imports references a large execution value, Sim automatically reads it through `sim.values.read(...)` at runtime under memory caps. + +For large generated data, write the result to a file or table with `outputPath`, `outputSandboxPath`, or `outputTable` instead of returning the entire payload inline. + - **Keep functions focused**: Write functions that do one thing well to improve maintainability and debugging - **Handle errors gracefully**: Use try/catch blocks to handle potential errors and provide meaningful error messages - **Test edge cases**: Ensure your code handles unusual inputs, null values, and boundary conditions correctly diff --git a/apps/docs/content/docs/en/blocks/parallel.mdx b/apps/docs/content/docs/en/blocks/parallel.mdx index f3207d901bc..24fccc7ebf3 100644 --- a/apps/docs/content/docs/en/blocks/parallel.mdx +++ b/apps/docs/content/docs/en/blocks/parallel.mdx @@ -34,6 +34,7 @@ Choose between two types of parallel execution: Use this when you need to run the same operation multiple times concurrently. + If the total count is larger than the batch size, Sim runs the work in serial batches while preserving the original result order. ``` Example: Run 5 parallel instances @@ -57,7 +58,7 @@ Choose between two types of parallel execution: /> - Each instance processes one item from the collection simultaneously. + Each instance processes one item from the collection. Large collections run in serial batches while preserving each item's original index. ``` Example: Process ["task1", "task2", "task3"] in parallel @@ -140,6 +141,12 @@ const allResults = ; // Returns: [result1, result2, result3, ...] ``` +For large result sets, reference only the entry or field you need, such as ``. Sim keeps aggregate results indexable by storing oversized entries in execution storage and hydrating them only when an indexed server-side path is explicitly referenced. + +### Batch Size + +Parallel blocks run up to 20 branches at a time by default. Increase the total count or collection size to process more work; Sim will execute the next batch after the current batch finishes. You can lower the batch size to reduce concurrency for rate-limited APIs. + ### Instance Isolation Each parallel instance runs independently: @@ -157,7 +164,7 @@ Each parallel instance runs independently: While parallel execution is faster, be mindful of: - API rate limits when making concurrent requests - Memory usage with large datasets - - Maximum of 20 concurrent instances to prevent resource exhaustion + - Maximum of 20 concurrent instances per batch to prevent resource exhaustion ## Parallel vs Loop @@ -186,6 +193,9 @@ Understanding when to use each:
  • Collection: Array or object to distribute (collection-based)
  • +
  • + Batch size: Number of branches to run concurrently, from 1 to 20 +
  • diff --git a/apps/docs/content/docs/en/execution/api-deployment.mdx b/apps/docs/content/docs/en/execution/api-deployment.mdx index b74a886271c..b7f1de3fbf9 100644 --- a/apps/docs/content/docs/en/execution/api-deployment.mdx +++ b/apps/docs/content/docs/en/execution/api-deployment.mdx @@ -215,6 +215,25 @@ while (true) { +#### Oversized outputs + +Workflow execution responses are capped by platform request and response limits. When an internal output, log field, streamed field, or async status payload contains a value that is too large to inline, Sim may replace that nested value with a versioned reference: + +```json +{ + "__simLargeValueRef": true, + "version": 1, + "id": "lv_abc123DEF456", + "kind": "array", + "size": 12582912, + "key": "execution/workspace-id/workflow-id/exec_xyz/large-value-lv_abc123DEF456.json", + "executionId": "exec_xyz", + "preview": { "length": 25000 } +} +``` + +The `version` field is part of the external API contract. Treat the reference as an opaque placeholder for a value that could not be safely embedded in the response. `id`, `key`, and `executionId` are not fetch URLs; `key` points to execution-scoped server storage. Use `selectedOutputs` to request a smaller nested field, reduce the data passed between blocks, or return the data from a Response block when your workflow intentionally owns the HTTP response body. File outputs are metadata-first; request `.base64` only when you need inline file content. JavaScript Function blocks can explicitly read large files or value refs with the `sim.files` and `sim.values` helpers under memory caps. + ### Asynchronous For long-running workflows, async mode returns a job ID immediately so you don't need to hold the connection open. Add the `X-Execution-Mode: async` header to your request. The API returns HTTP 202 with a job ID and status URL. Poll the status URL until the job completes. diff --git a/apps/docs/content/docs/es/api-reference/getting-started.mdx b/apps/docs/content/docs/es/api-reference/getting-started.mdx index dced7aca61c..038998853cf 100644 --- a/apps/docs/content/docs/es/api-reference/getting-started.mdx +++ b/apps/docs/content/docs/es/api-reference/getting-started.mdx @@ -109,20 +109,22 @@ curl -X POST https://www.sim.ai/api/workflows/{workflowId}/execute \ -d '{"inputs": {}, "async": true}' ``` -This returns immediately with a `taskId`: +This returns immediately with a `jobId` and `statusUrl`: ```json { "success": true, - "taskId": "job_abc123", - "status": "queued" + "jobId": "job_abc123", + "statusUrl": "https://www.sim.ai/api/jobs/job_abc123", + "message": "Workflow execution started", + "async": true } ``` Poll the [Get Job Status](/api-reference/workflows/getJobStatus) endpoint until the status is `completed` or `failed`: ```bash -curl https://www.sim.ai/api/jobs/{taskId} \ +curl https://www.sim.ai/api/jobs/{jobId} \ -H "X-API-Key: YOUR_API_KEY" ``` diff --git a/apps/docs/content/docs/fr/api-reference/getting-started.mdx b/apps/docs/content/docs/fr/api-reference/getting-started.mdx index dced7aca61c..038998853cf 100644 --- a/apps/docs/content/docs/fr/api-reference/getting-started.mdx +++ b/apps/docs/content/docs/fr/api-reference/getting-started.mdx @@ -109,20 +109,22 @@ curl -X POST https://www.sim.ai/api/workflows/{workflowId}/execute \ -d '{"inputs": {}, "async": true}' ``` -This returns immediately with a `taskId`: +This returns immediately with a `jobId` and `statusUrl`: ```json { "success": true, - "taskId": "job_abc123", - "status": "queued" + "jobId": "job_abc123", + "statusUrl": "https://www.sim.ai/api/jobs/job_abc123", + "message": "Workflow execution started", + "async": true } ``` Poll the [Get Job Status](/api-reference/workflows/getJobStatus) endpoint until the status is `completed` or `failed`: ```bash -curl https://www.sim.ai/api/jobs/{taskId} \ +curl https://www.sim.ai/api/jobs/{jobId} \ -H "X-API-Key: YOUR_API_KEY" ``` diff --git a/apps/docs/content/docs/ja/api-reference/getting-started.mdx b/apps/docs/content/docs/ja/api-reference/getting-started.mdx index dced7aca61c..038998853cf 100644 --- a/apps/docs/content/docs/ja/api-reference/getting-started.mdx +++ b/apps/docs/content/docs/ja/api-reference/getting-started.mdx @@ -109,20 +109,22 @@ curl -X POST https://www.sim.ai/api/workflows/{workflowId}/execute \ -d '{"inputs": {}, "async": true}' ``` -This returns immediately with a `taskId`: +This returns immediately with a `jobId` and `statusUrl`: ```json { "success": true, - "taskId": "job_abc123", - "status": "queued" + "jobId": "job_abc123", + "statusUrl": "https://www.sim.ai/api/jobs/job_abc123", + "message": "Workflow execution started", + "async": true } ``` Poll the [Get Job Status](/api-reference/workflows/getJobStatus) endpoint until the status is `completed` or `failed`: ```bash -curl https://www.sim.ai/api/jobs/{taskId} \ +curl https://www.sim.ai/api/jobs/{jobId} \ -H "X-API-Key: YOUR_API_KEY" ``` diff --git a/apps/docs/content/docs/zh/api-reference/getting-started.mdx b/apps/docs/content/docs/zh/api-reference/getting-started.mdx index dced7aca61c..038998853cf 100644 --- a/apps/docs/content/docs/zh/api-reference/getting-started.mdx +++ b/apps/docs/content/docs/zh/api-reference/getting-started.mdx @@ -109,20 +109,22 @@ curl -X POST https://www.sim.ai/api/workflows/{workflowId}/execute \ -d '{"inputs": {}, "async": true}' ``` -This returns immediately with a `taskId`: +This returns immediately with a `jobId` and `statusUrl`: ```json { "success": true, - "taskId": "job_abc123", - "status": "queued" + "jobId": "job_abc123", + "statusUrl": "https://www.sim.ai/api/jobs/job_abc123", + "message": "Workflow execution started", + "async": true } ``` Poll the [Get Job Status](/api-reference/workflows/getJobStatus) endpoint until the status is `completed` or `failed`: ```bash -curl https://www.sim.ai/api/jobs/{taskId} \ +curl https://www.sim.ai/api/jobs/{jobId} \ -H "X-API-Key: YOUR_API_KEY" ``` diff --git a/apps/docs/content/docs/zh/api-reference/python.mdx b/apps/docs/content/docs/zh/api-reference/python.mdx index c44973c8660..608942d1baf 100644 --- a/apps/docs/content/docs/zh/api-reference/python.mdx +++ b/apps/docs/content/docs/zh/api-reference/python.mdx @@ -117,20 +117,20 @@ if is_ready: 获取异步任务执行的状态。 ```python -status = client.get_job_status("task-id-from-async-execution") +status = client.get_job_status("job-id-from-async-execution") print("Status:", status["status"]) # 'queued', 'processing', 'completed', 'failed' if status["status"] == "completed": print("Output:", status["output"]) ``` **参数:** -- `task_id` (str): 异步执行返回的任务 ID +- `job_id` (str): 异步执行返回的作业 ID **返回值:** `Dict[str, Any]` **响应字段:** - `success` (bool): 请求是否成功 -- `taskId` (str): 任务 ID +- `taskId` (str): 作业 ID - `status` (str): 可能的值包括 `'queued'`, `'processing'`, `'completed'`, `'failed'`, `'cancelled'` - `metadata` (dict): 包含 `startedAt`, `completedAt` 和 `duration` - `output` (any, optional): 工作流输出(完成时) @@ -271,10 +271,11 @@ class WorkflowExecutionResult: @dataclass class AsyncExecutionResult: success: bool - task_id: str - status: str # 'queued' - created_at: str - links: Dict[str, str] # e.g., {"status": "/api/jobs/{taskId}"} + job_id: str + status_url: str + execution_id: Optional[str] = None + message: str = "" + async_execution: bool = True ``` ### WorkflowStatus @@ -494,17 +495,17 @@ def execute_async(): ) # Check if result is an async execution - if hasattr(result, 'task_id'): - print(f"Task ID: {result.task_id}") - print(f"Status endpoint: {result.links['status']}") + if hasattr(result, 'job_id'): + print(f"Job ID: {result.job_id}") + print(f"Status endpoint: {result.status_url}") # Poll for completion - status = client.get_job_status(result.task_id) + status = client.get_job_status(result.job_id) while status["status"] in ["queued", "processing"]: print(f"Current status: {status['status']}") time.sleep(2) # Wait 2 seconds - status = client.get_job_status(result.task_id) + status = client.get_job_status(result.job_id) if status["status"] == "completed": print("Workflow completed!") diff --git a/apps/docs/content/docs/zh/api-reference/typescript.mdx b/apps/docs/content/docs/zh/api-reference/typescript.mdx index 0f038db92dd..fac3bdffb73 100644 --- a/apps/docs/content/docs/zh/api-reference/typescript.mdx +++ b/apps/docs/content/docs/zh/api-reference/typescript.mdx @@ -138,7 +138,7 @@ if (isReady) { 获取异步任务执行的状态。 ```typescript -const status = await client.getJobStatus('task-id-from-async-execution'); +const status = await client.getJobStatus('job-id-from-async-execution'); console.log('Status:', status.status); // 'queued', 'processing', 'completed', 'failed' if (status.status === 'completed') { console.log('Output:', status.output); @@ -146,13 +146,13 @@ if (status.status === 'completed') { ``` **参数:** -- `taskId`(字符串):异步执行返回的任务 ID +- `jobId`(字符串):异步执行返回的作业 ID **返回值:** `Promise` **响应字段:** - `success`(布尔值):请求是否成功 -- `taskId`(字符串):任务 ID +- `taskId`(字符串):作业 ID - `status`(字符串):以下之一 `'queued'`、`'processing'`、`'completed'`、`'failed'`、`'cancelled'` - `metadata`(对象):包含 `startedAt`、`completedAt` 和 `duration` - `output`(任意类型,可选):工作流输出(完成时) @@ -286,12 +286,11 @@ interface WorkflowExecutionResult { ```typescript interface AsyncExecutionResult { success: boolean; - taskId: string; - status: 'queued'; - createdAt: string; - links: { - status: string; // e.g., "/api/jobs/{taskId}" - }; + jobId: string; + statusUrl: string; + executionId?: string; + message: string; + async: true; } ``` @@ -797,17 +796,17 @@ async function executeAsync() { }); // Check if result is an async execution - if ('taskId' in result) { - console.log('Task ID:', result.taskId); - console.log('Status endpoint:', result.links.status); + if ('jobId' in result) { + console.log('Job ID:', result.jobId); + console.log('Status endpoint:', result.statusUrl); // Poll for completion - let status = await client.getJobStatus(result.taskId); + let status = await client.getJobStatus(result.jobId); while (status.status === 'queued' || status.status === 'processing') { console.log('Current status:', status.status); await new Promise(resolve => setTimeout(resolve, 2000)); // Wait 2 seconds - status = await client.getJobStatus(result.taskId); + status = await client.getJobStatus(result.jobId); } if (status.status === 'completed') { diff --git a/apps/realtime/src/database/operations.ts b/apps/realtime/src/database/operations.ts index 14fa8639eaf..38a98b14bb3 100644 --- a/apps/realtime/src/database/operations.ts +++ b/apps/realtime/src/database/operations.ts @@ -40,6 +40,7 @@ const db = socketDb const DEFAULT_LOOP_ITERATIONS = 5 const DEFAULT_PARALLEL_COUNT = 5 +const DEFAULT_PARALLEL_BATCH_SIZE = 20 /** Minimal block shape needed for protection and descendant checks */ interface DbBlockRef { @@ -740,8 +741,9 @@ async function handleBlocksOperationTx( workflowId, type: 'parallel', config: { - parallelType: 'fixed', + parallelType: 'count', count: DEFAULT_PARALLEL_COUNT, + batchSize: DEFAULT_PARALLEL_BATCH_SIZE, nodes: [], }, }) @@ -1620,11 +1622,23 @@ async function handleSubflowOperationTx( logger.debug(`Updating subflow ${payload.id} with config:`, payload.config) - // Update the subflow configuration + // Read-modify-write merge so partial config payloads never wipe other fields + // (e.g. an iteration-only update from one client should not drop batchSize set by another) + const existingSubflow = await tx + .select({ config: workflowSubflows.config }) + .from(workflowSubflows) + .where( + and(eq(workflowSubflows.id, payload.id), eq(workflowSubflows.workflowId, workflowId)) + ) + .limit(1) + + const existingConfig = (existingSubflow[0]?.config as Record) || {} + const mergedConfig = { ...existingConfig, ...payload.config } + const updateResult = await tx .update(workflowSubflows) .set({ - config: payload.config, + config: mergedConfig, updatedAt: new Date(), }) .where( @@ -1677,27 +1691,35 @@ async function handleSubflowOperationTx( }) .where(and(eq(workflowBlocks.id, payload.id), eq(workflowBlocks.workflowId, workflowId))) } else if (payload.type === 'parallel') { - // Update the parallel block's data properties - const blockData = { - ...payload.config, - width: 500, - height: 300, - type: 'subflowNode', - } - - // Include count if provided - if (payload.config.count !== undefined) { - blockData.count = payload.config.count - } + const existingBlock = await tx + .select({ data: workflowBlocks.data }) + .from(workflowBlocks) + .where(and(eq(workflowBlocks.id, payload.id), eq(workflowBlocks.workflowId, workflowId))) + .limit(1) - // Include collection if provided - if (payload.config.distribution !== undefined) { - blockData.collection = payload.config.distribution - } + const existingData = (existingBlock[0]?.data as any) || {} - // Include parallelType if provided - if (payload.config.parallelType !== undefined) { - blockData.parallelType = payload.config.parallelType + const blockData: any = { + ...existingData, + type: 'subflowNode', + width: existingData.width ?? 500, + height: existingData.height ?? 300, + count: + payload.config.count !== undefined + ? payload.config.count + : (existingData.count ?? DEFAULT_PARALLEL_COUNT), + parallelType: + payload.config.parallelType !== undefined + ? payload.config.parallelType + : (existingData.parallelType ?? 'count'), + collection: + payload.config.distribution !== undefined + ? payload.config.distribution + : (existingData.collection ?? ''), + batchSize: + payload.config.batchSize !== undefined + ? payload.config.batchSize + : (existingData.batchSize ?? DEFAULT_PARALLEL_BATCH_SIZE), } await tx diff --git a/apps/sim/app/api/chat/[identifier]/route.ts b/apps/sim/app/api/chat/[identifier]/route.ts index a6dff447355..f35d950a21c 100644 --- a/apps/sim/app/api/chat/[identifier]/route.ts +++ b/apps/sim/app/api/chat/[identifier]/route.ts @@ -274,6 +274,9 @@ export const POST = withRouteHandler( workflowTriggerType: 'chat', }, executionId, + workspaceId, + workflowId: deployment.workflowId, + userId: workspaceOwnerId, executeFn: async ({ onStream, onBlockComplete, abortSignal }) => executeWorkflow( workflowForExecution, diff --git a/apps/sim/app/api/form/[identifier]/route.ts b/apps/sim/app/api/form/[identifier]/route.ts index b91c6ef932a..d5ed51c4af7 100644 --- a/apps/sim/app/api/form/[identifier]/route.ts +++ b/apps/sim/app/api/form/[identifier]/route.ts @@ -227,6 +227,9 @@ export const POST = withRouteHandler( workflowTriggerType: 'api', }, executionId, + workspaceId, + workflowId: deployment.workflowId, + userId: workspaceOwnerId, executeFn: async ({ onStream, onBlockComplete, abortSignal }) => executeWorkflow( workflowForExecution, diff --git a/apps/sim/app/api/function/execute/route.ts b/apps/sim/app/api/function/execute/route.ts index fcfda730c4b..b7137a383dc 100644 --- a/apps/sim/app/api/function/execute/route.ts +++ b/apps/sim/app/api/function/execute/route.ts @@ -12,8 +12,18 @@ import { isE2bEnabled } from '@/lib/core/config/feature-flags' import { generateRequestId } from '@/lib/core/utils/request' import { withRouteHandler } from '@/lib/core/utils/with-route-handler' import { executeInE2B, executeShellInE2B } from '@/lib/execution/e2b' -import { executeInIsolatedVM } from '@/lib/execution/isolated-vm' +import { executeInIsolatedVM, type IsolatedVMBrokerHandler } from '@/lib/execution/isolated-vm' import { CodeLanguage, DEFAULT_CODE_LANGUAGE, isValidCodeLanguage } from '@/lib/execution/languages' +import { isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' +import { + MAX_FUNCTION_INLINE_BYTES, + MAX_INLINE_MATERIALIZATION_BYTES, + readLargeValueRefFromStorage, + readUserFileContent, + unavailableLargeValueError, +} from '@/lib/execution/payloads/materialization.server' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' +import { isExecutionResourceLimitError } from '@/lib/execution/resource-errors' import { uploadWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager' import { getWorkflowById } from '@/lib/workflows/utils' import { escapeRegExp, normalizeName, REFERENCE } from '@/executor/constants' @@ -684,6 +694,125 @@ function serializeForShellEnv(value: unknown, nullValue = ''): string { } } +interface FunctionRouteExecutionContext { + workflowId?: string + workspaceId?: string + executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean + userId?: string + requestId: string +} + +function asRecord(value: unknown): Record { + return value && typeof value === 'object' && !Array.isArray(value) + ? (value as Record) + : {} +} + +function getPositiveNumber(value: unknown): number | undefined { + if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) { + return undefined + } + return value +} + +function clampInlineBytes(value: unknown, limit = MAX_FUNCTION_INLINE_BYTES): number { + const requested = getPositiveNumber(value) + return Math.min(requested ?? limit, limit) +} + +function getBrokerFileArgs(args: unknown): { + file: unknown + maxBytes: number + offset?: number + length?: number +} { + const record = asRecord(args) + const options = asRecord(record.options) + return { + file: record.file, + maxBytes: clampInlineBytes(options.maxBytes), + offset: getPositiveNumber(options.offset), + length: getPositiveNumber(options.length), + } +} + +function createFunctionRuntimeBrokers( + context: FunctionRouteExecutionContext +): Record { + const base = { + requestId: context.requestId, + workflowId: context.workflowId, + workspaceId: context.workspaceId, + executionId: context.executionId, + largeValueExecutionIds: context.largeValueExecutionIds, + allowLargeValueWorkflowScope: context.allowLargeValueWorkflowScope, + userId: context.userId, + logger, + } + + const readFile = async (args: unknown, encoding: 'base64' | 'text', chunked = false) => { + const fileArgs = getBrokerFileArgs(args) + return readUserFileContent(fileArgs.file, { + ...base, + encoding, + maxBytes: fileArgs.maxBytes, + chunked, + offset: chunked ? fileArgs.offset : undefined, + length: chunked ? fileArgs.length : undefined, + }) + } + + return { + 'sim.files.readBase64': (args) => readFile(args, 'base64'), + 'sim.files.readText': (args) => readFile(args, 'text'), + 'sim.files.readBase64Chunk': (args) => readFile(args, 'base64', true), + 'sim.files.readTextChunk': (args) => readFile(args, 'text', true), + 'sim.values.read': async (args) => { + const record = asRecord(args) + const options = asRecord(record.options) + const ref = record.ref + if (!isLargeValueRef(ref)) { + throw new Error('Expected a large execution value reference.') + } + if (!context.executionId) { + throw new Error('Large execution values require an execution context.') + } + const value = await readLargeValueRefFromStorage(ref, { + ...base, + maxBytes: clampInlineBytes(options.maxBytes, MAX_INLINE_MATERIALIZATION_BYTES), + }) + if (value === undefined) { + throw unavailableLargeValueError(ref) + } + return value + }, + } +} + +async function compactFunctionRouteBody( + body: T, + context: FunctionRouteExecutionContext +): Promise { + return compactExecutionPayload(body, { + workflowId: context.workflowId, + workspaceId: context.workspaceId, + executionId: context.executionId, + userId: context.userId, + preserveRoot: true, + requireDurable: Boolean(context.workspaceId && context.workflowId && context.executionId), + }) +} + +async function functionJsonResponse( + body: T, + context: FunctionRouteExecutionContext, + init?: ResponseInit +) { + return NextResponse.json(await compactFunctionRouteBody(body, context), init) +} + async function maybeExportSandboxFileToWorkspace(args: { authUserId: string workflowId?: string @@ -792,6 +921,7 @@ export const POST = withRouteHandler(async (req: NextRequest) => { let userCodeStartLine = 3 // Default value for error reporting let resolvedCode = '' // Store resolved code for error reporting let sourceCodeForErrors: string | undefined + let routeContext: FunctionRouteExecutionContext | undefined try { const auth = await checkInternalAuth(req) @@ -823,6 +953,9 @@ export const POST = withRouteHandler(async (req: NextRequest) => { workflowVariables = {}, contextVariables: preResolvedContextVariables = {}, workflowId, + executionId, + largeValueExecutionIds, + allowLargeValueWorkflowScope = false, workspaceId, isCustomTool = false, _sandboxFiles, @@ -837,9 +970,20 @@ export const POST = withRouteHandler(async (req: NextRequest) => { paramsCount: Object.keys(executionParams).length, timeout, workflowId, + executionId, isCustomTool, }) + routeContext = { + workflowId, + workspaceId, + executionId, + largeValueExecutionIds, + allowLargeValueWorkflowScope, + userId: auth.userId, + requestId, + } + const lang = isValidCodeLanguage(language) ? language : DEFAULT_CODE_LANGUAGE let contextVariables: Record = {} @@ -927,12 +1071,13 @@ export const POST = withRouteHandler(async (req: NextRequest) => { }) if (shellError) { - return NextResponse.json( + return functionJsonResponse( { success: false, error: shellError, output: { result: null, stdout: cleanStdout(shellStdout), executionTime }, }, + routeContext, { status: 500 } ) } @@ -953,10 +1098,13 @@ export const POST = withRouteHandler(async (req: NextRequest) => { if (fileExportResponse) return fileExportResponse } - return NextResponse.json({ - success: true, - output: { result: shellResult ?? null, stdout: cleanStdout(shellStdout), executionTime }, - }) + return functionJsonResponse( + { + success: true, + output: { result: shellResult ?? null, stdout: cleanStdout(shellStdout), executionTime }, + }, + routeContext + ) } if (lang === CodeLanguage.Python && !isE2bEnabled) { @@ -1054,12 +1202,13 @@ export const POST = withRouteHandler(async (req: NextRequest) => { errorDisplayCode, prologueLineCount + importLineCount ) - return NextResponse.json( + return functionJsonResponse( { success: false, error: formattedError, output: { result: null, stdout: cleanedOutput, executionTime }, }, + routeContext, { status: 500 } ) } @@ -1080,10 +1229,13 @@ export const POST = withRouteHandler(async (req: NextRequest) => { if (fileExportResponse) return fileExportResponse } - return NextResponse.json({ - success: true, - output: { result: e2bResult ?? null, stdout: cleanStdout(stdout), executionTime }, - }) + return functionJsonResponse( + { + success: true, + output: { result: e2bResult ?? null, stdout: cleanStdout(stdout), executionTime }, + }, + routeContext + ) } let prologueLineCount = 0 @@ -1137,12 +1289,13 @@ export const POST = withRouteHandler(async (req: NextRequest) => { errorDisplayCode, prologueLineCount ) - return NextResponse.json( + return functionJsonResponse( { success: false, error: formattedError, output: { result: null, stdout: cleanedOutput, executionTime }, }, + routeContext, { status: 500 } ) } @@ -1163,10 +1316,13 @@ export const POST = withRouteHandler(async (req: NextRequest) => { if (fileExportResponse) return fileExportResponse } - return NextResponse.json({ - success: true, - output: { result: e2bResult ?? null, stdout: cleanStdout(stdout), executionTime }, - }) + return functionJsonResponse( + { + success: true, + output: { result: e2bResult ?? null, stdout: cleanStdout(stdout), executionTime }, + }, + routeContext + ) } const executionMethod = 'isolated-vm' @@ -1194,16 +1350,19 @@ export const POST = withRouteHandler(async (req: NextRequest) => { prependedLineCount = paramKeys.length } - const isolatedResult = await executeInIsolatedVM({ - code: codeToExecute, - params: executionParams, - envVars, - contextVariables, - timeoutMs: timeout, - requestId, - ownerKey: `user:${auth.userId}`, - ownerWeight: 1, - }) + const isolatedResult = await executeInIsolatedVM( + { + code: codeToExecute, + params: executionParams, + envVars, + contextVariables, + timeoutMs: timeout, + requestId, + ownerKey: `user:${auth.userId}`, + ownerWeight: 1, + }, + { brokers: createFunctionRuntimeBrokers(routeContext) } + ) const executionTime = Date.now() - startTime @@ -1255,7 +1414,7 @@ export const POST = withRouteHandler(async (req: NextRequest) => { errorType: enhancedError.name, }) - return NextResponse.json( + return functionJsonResponse( { success: false, error: userFriendlyErrorMessage, @@ -1272,6 +1431,7 @@ export const POST = withRouteHandler(async (req: NextRequest) => { stack: enhancedError.stack, }, }, + routeContext, { status: isSystemError ? 500 : 422 } ) } @@ -1281,12 +1441,51 @@ export const POST = withRouteHandler(async (req: NextRequest) => { executionTime, }) - return NextResponse.json({ - success: true, - output: { result: isolatedResult.result, stdout: cleanStdout(stdout), executionTime }, - }) + return functionJsonResponse( + { + success: true, + output: { result: isolatedResult.result, stdout: cleanStdout(stdout), executionTime }, + }, + routeContext + ) } catch (error: any) { const executionTime = Date.now() - startTime + if (isExecutionResourceLimitError(error)) { + logger.warn(`[${requestId}] Function execution exceeded resource limits`, { + resource: error.resource, + attemptedBytes: error.attemptedBytes, + limitBytes: error.limitBytes, + executionTime, + }) + if (routeContext) { + return functionJsonResponse( + { + success: false, + error: error.message, + output: { + result: null, + stdout: cleanStdout(stdout), + executionTime, + }, + }, + routeContext, + { status: error.statusCode } + ) + } + return NextResponse.json( + { + success: false, + error: error.message, + output: { + result: null, + stdout: cleanStdout(stdout), + executionTime, + }, + }, + { status: error.statusCode } + ) + } + logger.error(`[${requestId}] Function execution failed`, { error: error.message || 'Unknown error', stack: error.stack, @@ -1328,6 +1527,10 @@ export const POST = withRouteHandler(async (req: NextRequest) => { }, } + if (routeContext) { + return functionJsonResponse(errorResponse, routeContext, { status: 500 }) + } + return NextResponse.json(errorResponse, { status: 500 }) } }) diff --git a/apps/sim/app/api/resume/[workflowId]/[executionId]/[contextId]/route.ts b/apps/sim/app/api/resume/[workflowId]/[executionId]/[contextId]/route.ts index ff70c6f1898..47f2f381168 100644 --- a/apps/sim/app/api/resume/[workflowId]/[executionId]/[contextId]/route.ts +++ b/apps/sim/app/api/resume/[workflowId]/[executionId]/[contextId]/route.ts @@ -180,6 +180,10 @@ export const POST = withRouteHandler( timeoutMs: preprocessResult.executionTimeout?.sync, }, executionId: enqueueResult.resumeExecutionId, + workspaceId: workflow.workspaceId || undefined, + workflowId, + userId: enqueueResult.userId, + allowLargeValueWorkflowScope: true, executeFn: async ({ onStream, onBlockComplete, abortSignal }) => PauseResumeManager.startResumeExecution({ ...resumeArgs, diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index b0f0a0b1d4d..9d042cea756 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -36,6 +36,7 @@ import { registerManualExecutionAborter, unregisterManualExecutionAborter, } from '@/lib/execution/manual-cancellation' +import { compactBlockLogs, compactExecutionPayload } from '@/lib/execution/payloads/serializer' import { preprocessExecution } from '@/lib/execution/preprocessing' import { LoggingSession } from '@/lib/logs/execution/logging-session' import { @@ -65,7 +66,7 @@ import type { IterationContext, SerializableExecutionState, } from '@/executor/execution/types' -import type { NormalizedBlockOutput, StreamingExecution } from '@/executor/types' +import type { BlockLog, NormalizedBlockOutput, StreamingExecution } from '@/executor/types' import { getExecutionErrorStatus, hasExecutionResult } from '@/executor/utils/errors' import { Serializer } from '@/serializer' import { CORE_TRIGGER_TYPES, type CoreTriggerType } from '@/stores/logs/filters/types' @@ -75,6 +76,20 @@ const logger = createLogger('WorkflowExecuteAPI') export const runtime = 'nodejs' export const dynamic = 'force-dynamic' +async function compactRoutePayload( + value: T, + context: { + workspaceId?: string + workflowId?: string + executionId?: string + userId?: string + preserveUserFileBase64?: boolean + preserveRoot?: boolean + } +): Promise { + return compactExecutionPayload(value, { ...context, requireDurable: true }) +} + function resolveOutputIds( selectedOutputs: string[] | undefined, blocks: Record @@ -719,6 +734,14 @@ async function handleExecutePost( }) await handlePostExecutionPauseState({ result, workflowId, executionId, loggingSession }) + const compactResultOutput = await compactRoutePayload(result.output, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + preserveUserFileBase64: true, + preserveRoot: true, + }) if ( result.status === 'cancelled' && @@ -734,7 +757,7 @@ async function handleExecutePost( return NextResponse.json( { success: false, - output: result.output, + output: compactResultOutput, error: timeoutErrorMessage, metadata: result.metadata ? { @@ -751,21 +774,32 @@ async function handleExecutePost( const outputWithBase64 = includeFileBase64 ? ((await hydrateUserFilesWithBase64(result.output, { requestId, + workspaceId, + workflowId, executionId, + allowLargeValueWorkflowScope: Boolean(resolvedRunFromBlock?.sourceSnapshot), + userId: actorUserId, maxBytes: base64MaxBytes, })) as NormalizedBlockOutput) : result.output - const resultWithBase64 = { ...result, output: outputWithBase64 } - - if (auth.authType !== AuthType.INTERNAL_JWT && workflowHasResponseBlock(resultWithBase64)) { - return createHttpResponseFromBlock(resultWithBase64) + if (auth.authType !== AuthType.INTERNAL_JWT && workflowHasResponseBlock(result)) { + return createHttpResponseFromBlock({ ...result, output: outputWithBase64 }) } + const compactOutput = await compactRoutePayload(outputWithBase64, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + preserveUserFileBase64: true, + preserveRoot: true, + }) + const filteredResult = { success: result.success, executionId, - output: outputWithBase64, + output: compactOutput, error: result.error, metadata: result.metadata ? { @@ -784,11 +818,21 @@ async function handleExecutePost( const executionResult = hasExecutionResult(error) ? error.executionResult : undefined const status = getExecutionErrorStatus(error) + const compactErrorOutput = executionResult?.output + ? await compactRoutePayload(executionResult.output, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + preserveUserFileBase64: true, + preserveRoot: true, + }) + : undefined return NextResponse.json( { success: false, - output: executionResult?.output, + output: compactErrorOutput, error: executionResult?.error || errorMessage || 'Execution failed', metadata: executionResult?.metadata ? { @@ -838,6 +882,10 @@ async function handleExecutePost( timeoutMs: preprocessResult.executionTimeout?.sync, }, executionId, + workspaceId, + workflowId, + userId: actorUserId, + allowLargeValueWorkflowScope: Boolean(resolvedRunFromBlock?.sourceSnapshot), executeFn: async ({ onStream, onBlockComplete, abortSignal }) => executeWorkflow( streamWorkflow, @@ -856,6 +904,8 @@ async function handleExecutePost( base64MaxBytes, abortSignal, executionMode: 'stream', + stopAfterBlockId, + runFromBlock: resolvedRunFromBlock, }, executionId ), @@ -872,7 +922,12 @@ async function handleExecutePost( let isStreamClosed = false let isManualAbortRegistered = false - const eventWriter = createExecutionEventWriter(executionId) + const eventWriter = createExecutionEventWriter(executionId, { + workspaceId, + workflowId, + userId: actorUserId, + preserveUserFileBase64: includeFileBase64, + }) const metaInitialized = await initializeExecutionStreamMeta(executionId, { userId: actorUserId, workflowId, @@ -898,16 +953,18 @@ async function handleExecutePost( terminalStatus?: TerminalExecutionStreamStatus ) => { const isBuffered = event.type !== 'stream:chunk' && event.type !== 'stream:done' + let eventToSend = event if (isBuffered) { const entry = terminalStatus ? await eventWriter.writeTerminal(event, terminalStatus) : await eventWriter.write(event) - event.eventId = entry.eventId + eventToSend = entry.event + eventToSend.eventId = entry.eventId terminalEventPublished ||= Boolean(terminalStatus) } if (!isStreamClosed) { try { - controller.enqueue(encodeSSEEvent(event)) + controller.enqueue(encodeSSEEvent(eventToSend)) } catch { isStreamClosed = true } @@ -971,7 +1028,26 @@ async function handleExecutePost( iterationContext?: IterationContext, childWorkflowContext?: ChildWorkflowContext ) => { - const hasError = callbackData.output?.error + const compactCallbackData = { + ...callbackData, + input: await compactRoutePayload(callbackData.input, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + preserveUserFileBase64: includeFileBase64, + preserveRoot: true, + }), + output: await compactRoutePayload(callbackData.output, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + preserveUserFileBase64: includeFileBase64, + preserveRoot: true, + }), + } + const hasError = compactCallbackData.output?.error const childWorkflowData = childWorkflowContext ? { childWorkflowBlockId: childWorkflowContext.parentBlockId, @@ -988,7 +1064,7 @@ async function handleExecutePost( blockId, blockName, blockType, - error: callbackData.output.error, + error: compactCallbackData.output.error, }) await sendEvent({ type: 'block:error', @@ -999,12 +1075,12 @@ async function handleExecutePost( blockId, blockName, blockType, - input: callbackData.input, - error: callbackData.output.error, - durationMs: callbackData.executionTime || 0, - startedAt: callbackData.startedAt, - executionOrder: callbackData.executionOrder, - endedAt: callbackData.endedAt, + input: compactCallbackData.input, + error: compactCallbackData.output.error, + durationMs: compactCallbackData.executionTime || 0, + startedAt: compactCallbackData.startedAt, + executionOrder: compactCallbackData.executionOrder, + endedAt: compactCallbackData.endedAt, ...(iterationContext && { iterationCurrent: iterationContext.iterationCurrent, iterationTotal: iterationContext.iterationTotal, @@ -1033,12 +1109,12 @@ async function handleExecutePost( blockId, blockName, blockType, - input: callbackData.input, - output: callbackData.output, - durationMs: callbackData.executionTime || 0, - startedAt: callbackData.startedAt, - executionOrder: callbackData.executionOrder, - endedAt: callbackData.endedAt, + input: compactCallbackData.input, + output: compactCallbackData.output, + durationMs: compactCallbackData.executionTime || 0, + startedAt: compactCallbackData.startedAt, + executionOrder: compactCallbackData.executionOrder, + endedAt: compactCallbackData.endedAt, ...(iterationContext && { iterationCurrent: iterationContext.iterationCurrent, iterationTotal: iterationContext.iterationTotal, @@ -1172,6 +1248,20 @@ async function handleExecutePost( await handlePostExecutionPauseState({ result, workflowId, executionId, loggingSession }) + /** + * Compact block logs once and reuse across cancelled/timeout/paused/complete + * SSE events. Walks all block logs and durably serializes large values to + * object storage, so doing it twice would double the latency and storage + * load on the happy path. + */ + const compactedBlockLogs = await compactBlockLogs(result.logs, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + requireDurable: true, + }) + if (result.status === 'cancelled') { if (timeoutController.isTimedOut() && timeoutController.timeoutMs) { const timeoutErrorMessage = getTimeoutErrorMessage(null, timeoutController.timeoutMs) @@ -1191,7 +1281,7 @@ async function handleExecutePost( data: { error: timeoutErrorMessage, duration: result.metadata?.duration || 0, - finalBlockLogs: result.logs, + finalBlockLogs: compactedBlockLogs, }, }, 'error' @@ -1208,7 +1298,7 @@ async function handleExecutePost( workflowId, data: { duration: result.metadata?.duration || 0, - finalBlockLogs: result.logs, + finalBlockLogs: compactedBlockLogs, }, }, 'cancelled' @@ -1220,10 +1310,22 @@ async function handleExecutePost( const sseOutput = includeFileBase64 ? await hydrateUserFilesWithBase64(result.output, { requestId, + workspaceId, + workflowId, executionId, + allowLargeValueWorkflowScope: Boolean(resolvedRunFromBlock?.sourceSnapshot), + userId: actorUserId, maxBytes: base64MaxBytes, }) : result.output + const compactSseOutput = await compactRoutePayload(sseOutput, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + preserveUserFileBase64: true, + preserveRoot: true, + }) if (result.status === 'paused') { finalMetaStatus = 'complete' @@ -1234,11 +1336,11 @@ async function handleExecutePost( executionId, workflowId, data: { - output: sseOutput, + output: compactSseOutput, duration: result.metadata?.duration || 0, startTime: result.metadata?.startTime || startTime.toISOString(), endTime: result.metadata?.endTime || new Date().toISOString(), - finalBlockLogs: result.logs, + finalBlockLogs: compactedBlockLogs, }, }, 'complete' @@ -1253,11 +1355,11 @@ async function handleExecutePost( workflowId, data: { success: result.success, - output: sseOutput, + output: compactSseOutput, duration: result.metadata?.duration || 0, startTime: result.metadata?.startTime || startTime.toISOString(), endTime: result.metadata?.endTime || new Date().toISOString(), - finalBlockLogs: result.logs, + finalBlockLogs: compactedBlockLogs, }, }, 'complete' @@ -1274,6 +1376,22 @@ async function handleExecutePost( reqLogger.error(`SSE execution failed: ${errorMessage}`, { isTimeout }) const executionResult = hasExecutionResult(error) ? error.executionResult : undefined + let compactErrorLogs: BlockLog[] | undefined + try { + compactErrorLogs = executionResult?.logs + ? await compactBlockLogs(executionResult.logs, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + requireDurable: true, + }) + : undefined + } catch (compactionError) { + reqLogger.warn('Failed to compact SSE error logs, omitting oversized error details', { + error: toError(compactionError).message, + }) + } finalMetaStatus = 'error' await sendEvent( @@ -1285,7 +1403,7 @@ async function handleExecutePost( data: { error: executionResult?.error || errorMessage, duration: executionResult?.metadata?.duration || 0, - finalBlockLogs: executionResult?.logs, + finalBlockLogs: compactErrorLogs, }, }, 'error' diff --git a/apps/sim/app/api/workflows/[id]/executions/[executionId]/cancel/route.ts b/apps/sim/app/api/workflows/[id]/executions/[executionId]/cancel/route.ts index 02fab158465..92f32a26f7d 100644 --- a/apps/sim/app/api/workflows/[id]/executions/[executionId]/cancel/route.ts +++ b/apps/sim/app/api/workflows/[id]/executions/[executionId]/cancel/route.ts @@ -55,14 +55,19 @@ async function completePausedCancellationWithRetry( async function ensurePausedCancellationEventPublished( executionId: string, - workflowId: string + workflowId: string, + context: { workspaceId?: string; userId?: string } = {} ): Promise { const metaState = await readExecutionMetaState(executionId) if (metaState.status === 'found' && metaState.meta.status === 'cancelled') { return true } - const writer = createExecutionEventWriter(executionId) + const writer = createExecutionEventWriter(executionId, { + workspaceId: context.workspaceId, + workflowId, + userId: context.userId, + }) try { await writer.writeTerminal( { @@ -195,7 +200,11 @@ export const POST = withRouteHandler( if (pausedCancellationStarted) { pausedCancellationPublished = await ensurePausedCancellationEventPublished( executionId, - workflowId + workflowId, + { + workspaceId: workflowAuthorization.workflow?.workspaceId ?? undefined, + userId: auth.userId, + } ) pausedCancellationPublishFailed = !pausedCancellationPublished if (pausedCancellationPublished) { @@ -205,14 +214,22 @@ export const POST = withRouteHandler( if (pendingPausedCancellation === 'cancelled') { pausedCancellationPublished = await ensurePausedCancellationEventPublished( executionId, - workflowId + workflowId, + { + workspaceId: workflowAuthorization.workflow?.workspaceId ?? undefined, + userId: auth.userId, + } ) pausedCancellationPublishFailed = !pausedCancellationPublished pausedCancelled = pausedCancellationPublished } else if (pendingPausedCancellation === 'cancelling') { pausedCancellationPublished = await ensurePausedCancellationEventPublished( executionId, - workflowId + workflowId, + { + workspaceId: workflowAuthorization.workflow?.workspaceId ?? undefined, + userId: auth.userId, + } ) pausedCancellationPublishFailed = !pausedCancellationPublished if (pausedCancellationPublished) { diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx index 5fa9abe78fb..4805266b950 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx +++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx @@ -53,6 +53,7 @@ export function SubflowEditor({ isCountMode, isConditionMode, inputValue, + batchSizeValue, editorValue, typeOptions, showTagDropdown, @@ -60,7 +61,9 @@ export function SubflowEditor({ editorContainerRef, handleSubflowTypeChange, handleSubflowIterationsChange, - handleSubflowIterationsSave, + handleSubflowIterationsBlur, + handleParallelBatchSizeChange, + handleParallelBatchSizeBlur, handleSubflowEditorChange, handleSubflowTagSelect, highlightWithReferences, @@ -80,6 +83,7 @@ export function SubflowEditor({ activeSearchTarget.canonicalSubBlockId === fieldId) const isTypeHighlighted = isSearchHighlighted(WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.type) const isConfigHighlighted = isSearchHighlighted(configSearchFieldId) + const isBatchSizeHighlighted = isSearchHighlighted(WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize) return (
    @@ -149,13 +153,12 @@ export function SubflowEditor({ type='text' value={inputValue} onChange={handleSubflowIterationsChange} - onBlur={handleSubflowIterationsSave} - onKeyDown={(e) => e.key === 'Enter' && handleSubflowIterationsSave()} + onBlur={handleSubflowIterationsBlur} disabled={!userCanEdit} className='mb-1' />
    - Enter a number between 1 and {subflowConfig.maxIterations} + Enter a whole number greater than 0.
    ) : ( @@ -197,6 +200,33 @@ export function SubflowEditor({ )} + + {currentBlock.type === 'parallel' && ( +
    + + +
    + Run 1 to 20 parallel branches at a time. +
    +
    + )} diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts index 08428f5d17c..915e7fb77dd 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts +++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts @@ -29,7 +29,6 @@ const SUBFLOW_CONFIG = { }, typeKey: 'loopType' as const, storeKey: 'loops' as const, - maxIterations: 1000, configKeys: { iterations: 'iterations' as const, items: 'forEachItems' as const, @@ -40,7 +39,6 @@ const SUBFLOW_CONFIG = { typeLabels: { count: 'Parallel Count', collection: 'Parallel Each' }, typeKey: 'parallelType' as const, storeKey: 'parallels' as const, - maxIterations: 20, configKeys: { iterations: 'count' as const, items: 'distribution' as const, @@ -61,9 +59,17 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId const textareaRef = useRef(null) const editorContainerRef = useRef(null) - const [tempInputValue, setTempInputValue] = useState(null) const [showTagDropdown, setShowTagDropdown] = useState(false) const [cursorPosition, setCursorPosition] = useState(0) + /** + * In-flight string buffers for the numeric inputs. These let the user + * temporarily clear or mid-type the field (e.g. backspace to empty before + * typing a new value) without React snapping the value back from the store. + * Persistence still happens on every keystroke that parses to a number; + * the buffer is cleared on blur so the input rebinds to the store value. + */ + const [iterationsBuffer, setIterationsBuffer] = useState(null) + const [batchSizeBuffer, setBatchSizeBuffer] = useState(null) const isSubflow = currentBlock && (currentBlock.type === 'loop' || currentBlock.type === 'parallel') @@ -97,6 +103,7 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId const { collaborativeUpdateLoopType, collaborativeUpdateParallelType, + collaborativeUpdateParallelBatchSize, collaborativeUpdateIterationCount, collaborativeUpdateIterationCollection, } = useCollaborativeWorkflow() @@ -218,47 +225,54 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId ) /** - * Handle iterations input change + * Persist iterations on every keystroke that parses to a number. The + * visible string is buffered so transient states (empty, "0", partial typing) + * render correctly without snapping back to the persisted value. */ const handleSubflowIterationsChange = useCallback( (e: React.ChangeEvent) => { - if (!subflowConfig) return + if (!currentBlockId || !isSubflow || !subflowConfig || !currentBlock) return const sanitizedValue = e.target.value.replace(/[^0-9]/g, '') + setIterationsBuffer(sanitizedValue) const numValue = Number.parseInt(sanitizedValue) - - if (!Number.isNaN(numValue)) { - setTempInputValue(Math.min(subflowConfig.maxIterations, numValue).toString()) - } else { - setTempInputValue(sanitizedValue) - } + if (Number.isNaN(numValue)) return + collaborativeUpdateIterationCount( + currentBlockId, + currentBlock.type as 'loop' | 'parallel', + Math.max(1, numValue) + ) }, - [subflowConfig] + [currentBlockId, isSubflow, subflowConfig, currentBlock, collaborativeUpdateIterationCount] ) /** - * Save iterations value + * Clears the iterations buffer on blur so the field re-binds to the + * canonical store value (e.g. if the user left it empty, it snaps back + * to the last persisted count). */ - const handleSubflowIterationsSave = useCallback(() => { - if (!currentBlockId || !isSubflow || !subflowConfig || !currentBlock) return - const value = Number.parseInt(tempInputValue ?? '5') + const handleSubflowIterationsBlur = useCallback(() => { + setIterationsBuffer(null) + }, []) - if (!Number.isNaN(value)) { - const newValue = Math.min(subflowConfig.maxIterations, Math.max(1, value)) - collaborativeUpdateIterationCount( - currentBlockId, - currentBlock.type as 'loop' | 'parallel', - newValue - ) - } - setTempInputValue(null) - }, [ - tempInputValue, - currentBlockId, - isSubflow, - subflowConfig, - currentBlock, - collaborativeUpdateIterationCount, - ]) + /** + * Persist parallel batch size on every keystroke that parses to a number, + * clamped to 1..20. Buffered the same way as iterations. + */ + const handleParallelBatchSizeChange = useCallback( + (e: React.ChangeEvent) => { + if (!currentBlockId || currentBlock?.type !== 'parallel') return + const sanitizedValue = e.target.value.replace(/[^0-9]/g, '') + setBatchSizeBuffer(sanitizedValue) + const numValue = Number.parseInt(sanitizedValue) + if (Number.isNaN(numValue)) return + collaborativeUpdateParallelBatchSize(currentBlockId, Math.min(20, Math.max(1, numValue))) + }, + [currentBlockId, currentBlock, collaborativeUpdateParallelBatchSize] + ) + + const handleParallelBatchSizeBlur = useCallback(() => { + setBatchSizeBuffer(null) + }, []) /** * Handle editor value change (collection/condition) @@ -342,11 +356,16 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId : '' const iterations = configIterations + const parallelBatchSize = + isSubflow && currentBlock?.type === 'parallel' + ? ((nodeConfig as any)?.batchSize ?? (blockData as any)?.batchSize ?? 20) + : 20 const collectionString = typeof configCollection === 'string' ? configCollection : JSON.stringify(configCollection) || '' const conditionString = typeof configCondition === 'string' ? configCondition : '' - const inputValue = tempInputValue ?? iterations.toString() + const inputValue = iterationsBuffer ?? iterations.toString() + const batchSizeValue = batchSizeBuffer ?? parallelBatchSize.toString() const editorValue = isConditionMode ? conditionString : collectionString // Type options for combobox @@ -366,6 +385,7 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId isCountMode, isConditionMode, inputValue, + batchSizeValue, editorValue, typeOptions, showTagDropdown, @@ -376,7 +396,9 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId // Handlers handleSubflowTypeChange, handleSubflowIterationsChange, - handleSubflowIterationsSave, + handleSubflowIterationsBlur, + handleParallelBatchSizeChange, + handleParallelBatchSizeBlur, handleSubflowEditorChange, handleSubflowTagSelect, highlightWithReferences, diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/terminal/components/output-panel/components/structured-output.tsx b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/terminal/components/output-panel/components/structured-output.tsx index 6cc83299279..3144de9d5e4 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/terminal/components/output-panel/components/structured-output.tsx +++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/terminal/components/output-panel/components/structured-output.tsx @@ -14,6 +14,8 @@ import { import { List, type RowComponentProps, useListRef } from 'react-window' import { Badge, ChevronDown } from '@/components/emcn' import { cn } from '@/lib/core/utils/cn' +import { isUserFileDisplayMetadata } from '@/lib/core/utils/user-file' +import { isLargeValueRef, type LargeValueRef } from '@/lib/execution/payloads/large-value-ref' type ValueType = 'null' | 'undefined' | 'array' | 'string' | 'number' | 'boolean' | 'object' type BadgeVariant = 'green' | 'blue' | 'orange' | 'purple' | 'gray' | 'red' @@ -74,6 +76,19 @@ const STYLES = { } as const const EMPTY_MATCH_INDICES: number[] = [] +const USER_FILE_BASE64_PLACEHOLDER = '[TRUNCATED]' + +function formatLargeValueSize(bytes: number): string { + return `${(bytes / (1024 * 1024)).toFixed(1)} MB` +} + +function getLargeValueDisplayValue(ref: LargeValueRef): unknown { + return ref.preview ?? `[Large value: ${formatLargeValueSize(ref.size)}]` +} + +function getDisplayValue(value: unknown): unknown { + return isLargeValueRef(value) ? getLargeValueDisplayValue(value) : value +} function getTypeLabel(value: unknown): ValueType { if (value === null) return 'null' @@ -109,23 +124,39 @@ function extractErrorMessage(data: unknown): string { } function buildEntries(value: unknown, basePath: string): NodeEntry[] { - if (Array.isArray(value)) { - return value.map((item, i) => ({ key: String(i), value: item, path: `${basePath}[${i}]` })) + const displayValue = getDisplayValue(value) + + if (Array.isArray(displayValue)) { + return displayValue.map((item, i) => ({ + key: String(i), + value: item, + path: `${basePath}[${i}]`, + })) } - return Object.entries(value as Record).map(([k, v]) => ({ + const entries = Object.entries(displayValue as Record).map(([k, v]) => ({ key: k, value: v, path: `${basePath}.${k}`, })) + if (isUserFileDisplayMetadata(displayValue) && !('base64' in displayValue)) { + entries.push({ + key: 'base64', + value: USER_FILE_BASE64_PLACEHOLDER, + path: `${basePath}.base64`, + }) + } + return entries } function getCollapsedSummary(value: unknown): string | null { - if (Array.isArray(value)) { - const len = value.length + const displayValue = getDisplayValue(value) + + if (Array.isArray(displayValue)) { + const len = displayValue.length return `${len} item${len !== 1 ? 's' : ''}` } - if (typeof value === 'object' && value !== null) { - const count = Object.keys(value).length + if (typeof displayValue === 'object' && displayValue !== null) { + const count = buildEntries(displayValue, '').length return `${count} key${count !== 1 ? 's' : ''}` } return null @@ -133,10 +164,11 @@ function getCollapsedSummary(value: unknown): string | null { function computeInitialPaths(data: unknown, isError: boolean): Set { if (isError) return new Set(['root.error']) - if (!data || typeof data !== 'object') return new Set() - const entries = Array.isArray(data) - ? data.map((_, i) => `root[${i}]`) - : Object.keys(data).map((k) => `root.${k}`) + const displayData = getDisplayValue(data) + if (!displayData || typeof displayData !== 'object') return new Set() + const entries = Array.isArray(displayData) + ? displayData.map((_, i) => `root[${i}]`) + : Object.keys(displayData).map((k) => `root.${k}`) return new Set(entries) } @@ -184,13 +216,14 @@ function collectAllMatchPaths(data: unknown, query: string, basePath: string, de if (!query || depth > CONFIG.MAX_SEARCH_DEPTH) return [] const matches: string[] = [] + const displayData = getDisplayValue(data) - if (isPrimitive(data)) { - addPrimitiveMatches(data, `${basePath}.value`, query, matches) + if (isPrimitive(displayData)) { + addPrimitiveMatches(displayData, `${basePath}.value`, query, matches) return matches } - for (const entry of buildEntries(data, basePath)) { + for (const entry of buildEntries(displayData, basePath)) { if (isPrimitive(entry.value)) { addPrimitiveMatches(entry.value, entry.path, query, matches) } else { @@ -317,9 +350,10 @@ const StructuredNode = memo(function StructuredNode({ isError = false, }: StructuredNodeProps) { const searchContext = useContext(SearchContext) - const type = getTypeLabel(value) - const isPrimitiveValue = isPrimitive(value) - const isEmptyValue = !isPrimitiveValue && isEmpty(value) + const displayValue = getDisplayValue(value) + const type = getTypeLabel(displayValue) + const isPrimitiveValue = isPrimitive(displayValue) + const isEmptyValue = !isPrimitiveValue && isEmpty(displayValue) const isExpanded = expandedPaths.has(path) const handleToggle = useCallback(() => onToggle(path), [onToggle, path]) @@ -335,17 +369,17 @@ const StructuredNode = memo(function StructuredNode({ ) const childEntries = useMemo( - () => (isPrimitiveValue || isEmptyValue ? [] : buildEntries(value, path)), - [value, isPrimitiveValue, isEmptyValue, path] + () => (isPrimitiveValue || isEmptyValue ? [] : buildEntries(displayValue, path)), + [displayValue, isPrimitiveValue, isEmptyValue, path] ) const collapsedSummary = useMemo( - () => (isPrimitiveValue ? null : getCollapsedSummary(value)), - [value, isPrimitiveValue] + () => (isPrimitiveValue ? null : getCollapsedSummary(displayValue)), + [displayValue, isPrimitiveValue] ) const badgeVariant = isError ? 'red' : BADGE_VARIANTS[type] - const valueText = isPrimitiveValue ? formatPrimitive(value) : '' + const valueText = isPrimitiveValue ? formatPrimitive(displayValue) : '' const matchIndices = searchContext?.pathToMatchIndices.get(path) ?? EMPTY_MATCH_INDICES return ( @@ -472,16 +506,17 @@ function flattenTree( } function processNode(key: string, value: unknown, path: string, depth: number): void { - const valueType = getTypeLabel(value) - const isPrimitiveValue = isPrimitive(value) - const isEmptyValue = !isPrimitiveValue && isEmpty(value) + const displayValue = getDisplayValue(value) + const valueType = getTypeLabel(displayValue) + const isPrimitiveValue = isPrimitive(displayValue) + const isEmptyValue = !isPrimitiveValue && isEmpty(displayValue) const isExpanded = expandedPaths.has(path) - const collapsedSummary = isPrimitiveValue ? null : getCollapsedSummary(value) + const collapsedSummary = isPrimitiveValue ? null : getCollapsedSummary(displayValue) rows.push({ path, key, - value, + value: displayValue, depth, type: 'header', valueType, @@ -497,42 +532,43 @@ function flattenTree( rows.push({ path: `${path}.value`, key: '', - value, + value: displayValue, depth: depth + 1, type: 'value', valueType, isExpanded: false, isError: false, collapsedSummary: null, - displayText: formatPrimitive(value), + displayText: formatPrimitive(displayValue), matchIndices: pathToMatchIndices.get(path) ?? [], }) } else if (isEmptyValue) { rows.push({ path: `${path}.empty`, key: '', - value, + value: displayValue, depth: depth + 1, type: 'empty', valueType, isExpanded: false, isError: false, collapsedSummary: null, - displayText: Array.isArray(value) ? '[]' : '{}', + displayText: Array.isArray(displayValue) ? '[]' : '{}', matchIndices: [], }) } else { - for (const entry of buildEntries(value, path)) { + for (const entry of buildEntries(displayValue, path)) { processNode(entry.key, entry.value, entry.path, depth + 1) } } } } - if (isPrimitive(data)) { - processNode('value', data, 'root.value', 0) - } else if (data && typeof data === 'object') { - for (const entry of buildEntries(data, 'root')) { + const displayData = getDisplayValue(data) + if (isPrimitive(displayData)) { + processNode('value', displayData, 'root.value', 0) + } else if (displayData && typeof displayData === 'object') { + for (const entry of buildEntries(displayData, 'root')) { processNode(entry.key, entry.value, entry.path, 0) } } @@ -549,22 +585,24 @@ function countVisibleRows(data: unknown, expandedPaths: Set, isError: bo let count = 0 function countNode(value: unknown, path: string): void { + const displayValue = getDisplayValue(value) count++ if (!expandedPaths.has(path)) return - if (isPrimitive(value) || isEmpty(value)) { + if (isPrimitive(displayValue) || isEmpty(displayValue)) { count++ } else { - for (const entry of buildEntries(value, path)) { + for (const entry of buildEntries(displayValue, path)) { countNode(entry.value, entry.path) } } } - if (isPrimitive(data)) { - countNode(data, 'root.value') - } else if (data && typeof data === 'object') { - for (const entry of buildEntries(data, 'root')) { + const displayData = getDisplayValue(data) + if (isPrimitive(displayData)) { + countNode(displayData, 'root.value') + } else if (displayData && typeof displayData === 'object') { + for (const entry of buildEntries(displayData, 'root')) { countNode(entry.value, entry.path) } } @@ -782,8 +820,9 @@ export const StructuredOutput = memo(function StructuredOutput({ }, []) const rootEntries = useMemo(() => { - if (isPrimitive(data)) return [{ key: 'value', value: data, path: 'root.value' }] - return buildEntries(data, 'root') + const displayData = getDisplayValue(data) + if (isPrimitive(displayData)) return [{ key: 'value', value: displayData, path: 'root.value' }] + return buildEntries(displayData, 'root') }, [data]) const searchContextValue = useMemo(() => { diff --git a/apps/sim/app/workspace/[workspaceId]/w/components/preview/components/preview-editor/preview-editor.tsx b/apps/sim/app/workspace/[workspaceId]/w/components/preview/components/preview-editor/preview-editor.tsx index 735408dae4b..e9d82206878 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/components/preview/components/preview-editor/preview-editor.tsx +++ b/apps/sim/app/workspace/[workspaceId]/w/components/preview/components/preview-editor/preview-editor.tsx @@ -572,14 +572,12 @@ const SUBFLOW_CONFIG = { while: 'While Loop', doWhile: 'Do While Loop', }, - maxIterations: 1000, }, parallel: { typeLabels: { count: 'Parallel Count', collection: 'Parallel Each', }, - maxIterations: 20, }, } as const @@ -685,7 +683,7 @@ function SubflowConfigDisplay({ block, loop, parallel }: SubflowConfigDisplayPro className='mb-1' />
    - Enter a number between 1 and {config.maxIterations} + Enter a whole number greater than 0.
    ) : ( diff --git a/apps/sim/background/webhook-execution.ts b/apps/sim/background/webhook-execution.ts index bfd515695a3..1753813d849 100644 --- a/apps/sim/background/webhook-execution.ts +++ b/apps/sim/background/webhook-execution.ts @@ -578,7 +578,7 @@ async function executeWebhookJobInternal( snapshot, callbacks: {}, loggingSession, - includeFileBase64: true, + includeFileBase64: false, base64MaxBytes: undefined, abortSignal: timeoutController.signal, }) diff --git a/apps/sim/executor/constants.ts b/apps/sim/executor/constants.ts index feb962fe9ce..71fa8dea3f1 100644 --- a/apps/sim/executor/constants.ts +++ b/apps/sim/executor/constants.ts @@ -67,6 +67,7 @@ export const EDGE = { LOOP_CONTINUE: 'loop_continue', LOOP_CONTINUE_ALT: 'loop-continue-source', LOOP_EXIT: 'loop_exit', + PARALLEL_CONTINUE: 'parallel_continue', PARALLEL_EXIT: 'parallel_exit', ERROR: 'error', SOURCE: 'source', @@ -158,8 +159,7 @@ export const DEFAULTS = { BLOCK_TYPE: 'unknown', BLOCK_TITLE: 'Untitled Block', WORKFLOW_NAME: 'Workflow', - MAX_LOOP_ITERATIONS: 1000, - MAX_FOREACH_ITEMS: 1000, + DEFAULT_LOOP_ITERATIONS: 1000, MAX_PARALLEL_BRANCHES: 20, MAX_NESTING_DEPTH: 10, /** Maximum child workflow depth for propagating SSE callbacks (block:started, block:completed). */ diff --git a/apps/sim/executor/execution/block-executor.ts b/apps/sim/executor/execution/block-executor.ts index f1506bb33fc..a8fd36c63a7 100644 --- a/apps/sim/executor/execution/block-executor.ts +++ b/apps/sim/executor/execution/block-executor.ts @@ -3,6 +3,7 @@ import { toError } from '@sim/utils/errors' import { redactApiKeys } from '@/lib/core/security/redaction' import { normalizeStringArray } from '@/lib/core/utils/arrays' import { getBaseUrl } from '@/lib/core/utils/urls' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' import { containsUserFileWithMetadata, hydrateUserFilesWithBase64, @@ -126,7 +127,12 @@ export class BlockExecutor { resolvedInputs: fnInputs, displayInputs, contextVariables, - } = this.resolver.resolveInputsForFunctionBlock(ctx, node.id, block.config.params, block) + } = await this.resolver.resolveInputsForFunctionBlock( + ctx, + node.id, + block.config.params, + block + ) resolvedInputs = { ...fnInputs, [FUNCTION_BLOCK_CONTEXT_VARS_KEY]: contextVariables, @@ -136,7 +142,7 @@ export class BlockExecutor { } inputsForLog = displayInputs } else { - resolvedInputs = this.resolver.resolveInputs(ctx, node.id, block.config.params, block) + resolvedInputs = await this.resolver.resolveInputs(ctx, node.id, block.config.params, block) inputsForLog = resolvedInputs } @@ -189,14 +195,28 @@ export class BlockExecutor { normalizedOutput = this.normalizeOutput(output) } - if (containsUserFileWithMetadata(normalizedOutput)) { + if (ctx.includeFileBase64 === true && containsUserFileWithMetadata(normalizedOutput)) { normalizedOutput = (await hydrateUserFilesWithBase64(normalizedOutput, { requestId: ctx.metadata.requestId, + workspaceId: ctx.workspaceId, + workflowId: ctx.workflowId, executionId: ctx.executionId, + largeValueExecutionIds: ctx.largeValueExecutionIds, + allowLargeValueWorkflowScope: ctx.allowLargeValueWorkflowScope, + userId: ctx.userId, maxBytes: ctx.base64MaxBytes, })) as NormalizedBlockOutput } + normalizedOutput = (await compactExecutionPayload(normalizedOutput, { + workspaceId: ctx.workspaceId, + workflowId: ctx.workflowId, + executionId: ctx.executionId, + userId: ctx.userId, + preserveUserFileBase64: ctx.includeFileBase64 === true, + requireDurable: true, + })) as NormalizedBlockOutput + const endedAt = new Date().toISOString() const duration = performance.now() - startTime diff --git a/apps/sim/executor/execution/edge-manager.ts b/apps/sim/executor/execution/edge-manager.ts index 7bedea3a5a2..63a0748c8c6 100644 --- a/apps/sim/executor/execution/edge-manager.ts +++ b/apps/sim/executor/execution/edge-manager.ts @@ -230,6 +230,10 @@ export class EdgeManager { return handle === EDGE.PARALLEL_EXIT } + if (output.selectedRoute === EDGE.PARALLEL_CONTINUE) { + return false + } + if (!handle) { return true } diff --git a/apps/sim/executor/execution/executor.ts b/apps/sim/executor/execution/executor.ts index a141e017fb1..4866ebeba81 100644 --- a/apps/sim/executor/execution/executor.ts +++ b/apps/sim/executor/execution/executor.ts @@ -18,6 +18,7 @@ import { LoopOrchestrator } from '@/executor/orchestrators/loop' import { NodeExecutionOrchestrator } from '@/executor/orchestrators/node' import { ParallelOrchestrator } from '@/executor/orchestrators/parallel' import type { BlockState, ExecutionContext, ExecutionResult } from '@/executor/types' +import { ParallelExpander } from '@/executor/utils/parallel-expansion' import { computeExecutionSets, type RunFromBlockContext, @@ -34,6 +35,7 @@ import { extractParallelIdFromSentinel, } from '@/executor/utils/subflow-utils' import { VariableResolver } from '@/executor/variables/resolver' +import { navigatePathAsync } from '@/executor/variables/resolvers/reference-async.server' import type { SerializedWorkflow } from '@/serializer/types' import type { SubflowType } from '@/stores/workflows/workflow/types' @@ -78,6 +80,8 @@ export class DAGExecutor { triggerBlockId, savedIncomingEdges, }) + this.restoreSnapshotParallelBatches(dag, this.contextExtensions.snapshotState) + this.restoreSavedIncomingEdges(dag, savedIncomingEdges) const { context, state } = this.createExecutionContext(workflowId, triggerBlockId) context.subflowParentMap = this.buildSubflowParentMap(dag) @@ -212,8 +216,45 @@ export class DAGExecutor { return await engine.run() } + private restoreSavedIncomingEdges(dag: DAG, savedIncomingEdges?: Record): void { + if (!savedIncomingEdges) return + + for (const [nodeId, incomingEdges] of Object.entries(savedIncomingEdges)) { + const node = dag.nodes.get(nodeId) + if (node) { + node.incomingEdges = new Set(incomingEdges) + } + } + } + + private restoreSnapshotParallelBatches( + dag: DAG, + snapshotState?: SerializableExecutionState + ): void { + if (!snapshotState?.parallelExecutions) return + + const expander = new ParallelExpander() + for (const [parallelId, scope] of Object.entries(snapshotState.parallelExecutions)) { + const currentBatchSize = Number(scope.currentBatchSize ?? 0) + if (!Number.isFinite(currentBatchSize) || currentBatchSize <= 0) continue + + const currentBatchStart = Number(scope.currentBatchStart ?? 0) + const totalBranches = Number(scope.totalBranches ?? currentBatchStart + currentBatchSize) + const items = Array.isArray(scope.items) + ? scope.items.slice(currentBatchStart, currentBatchStart + currentBatchSize) + : undefined + + expander.expandParallel(dag, parallelId, currentBatchSize, items, { + branchIndexOffset: currentBatchStart, + totalBranches, + }) + } + } + private buildExecutionPipeline(context: ExecutionContext, dag: DAG, state: ExecutionState) { - const resolver = new VariableResolver(this.workflow, this.workflowVariables, state) + const resolver = new VariableResolver(this.workflow, this.workflowVariables, state, { + navigatePathAsync, + }) const allHandlers = createBlockHandlers() const blockExecutor = new BlockExecutor(allHandlers, resolver, this.contextExtensions, state) const edgeManager = new EdgeManager(dag) @@ -271,6 +312,8 @@ export class DAGExecutor { workflowId, workspaceId: this.contextExtensions.workspaceId, executionId: this.contextExtensions.executionId, + largeValueExecutionIds: this.contextExtensions.largeValueExecutionIds, + allowLargeValueWorkflowScope: this.contextExtensions.allowLargeValueWorkflowScope, userId: this.contextExtensions.userId, isDeployedContext: this.contextExtensions.isDeployedContext, enforceCredentialAccess: this.contextExtensions.enforceCredentialAccess, @@ -317,10 +360,18 @@ export class DAGExecutor { branchOutputs: scope.branchOutputs ? new Map(Object.entries(scope.branchOutputs).map(([k, v]) => [Number(k), v])) : new Map(), + accumulatedOutputs: scope.accumulatedOutputs + ? new Map( + Object.entries(scope.accumulatedOutputs).map(([k, v]) => [Number(k), v]) + ) + : new Map(), }, ]) ) : new Map(), + parallelBlockMapping: snapshotState?.parallelBlockMapping + ? new Map(Object.entries(snapshotState.parallelBlockMapping)) + : new Map(), executedBlocks: state.getExecutedBlocks(), activeExecutionPath: snapshotState?.activeExecutionPath ? new Set(snapshotState.activeExecutionPath) diff --git a/apps/sim/executor/execution/snapshot-serializer.test.ts b/apps/sim/executor/execution/snapshot-serializer.test.ts new file mode 100644 index 00000000000..9aa273d2bbd --- /dev/null +++ b/apps/sim/executor/execution/snapshot-serializer.test.ts @@ -0,0 +1,70 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { serializePauseSnapshot } from '@/executor/execution/snapshot-serializer' +import type { ExecutionContext } from '@/executor/types' + +function createContext(overrides: Partial = {}): ExecutionContext { + return { + workflowId: 'workflow-1', + workspaceId: 'workspace-1', + executionId: 'execution-1', + userId: 'user-1', + blockStates: new Map(), + executedBlocks: new Set(), + blockLogs: [], + metadata: { + requestId: 'request-1', + executionId: 'execution-1', + workflowId: 'workflow-1', + workspaceId: 'workspace-1', + userId: 'user-1', + triggerType: 'manual', + useDraftState: true, + startTime: '2026-01-01T00:00:00.000Z', + }, + environmentVariables: {}, + decisions: { + router: new Map(), + condition: new Map(), + }, + completedLoops: new Set(), + activeExecutionPath: new Set(), + ...overrides, + } as ExecutionContext +} + +describe('serializePauseSnapshot', () => { + it('serializes batched parallel accumulated outputs for cross-process resume', () => { + const context = createContext({ + parallelExecutions: new Map([ + [ + 'parallel-1', + { + parallelId: 'parallel-1', + totalBranches: 3, + branchOutputs: new Map([[2, [{ output: 'current-batch' }]]]), + accumulatedOutputs: new Map([ + [0, [{ output: 'batch-0' }]], + [1, [{ output: 'batch-1' }]], + ]), + }, + ], + ]), + }) + + const snapshot = serializePauseSnapshot(context, ['next-block']) + const serialized = JSON.parse(snapshot.snapshot) + + expect(serialized.state.parallelExecutions?.['parallel-1']).toMatchObject({ + branchOutputs: { + 2: [{ output: 'current-batch' }], + }, + accumulatedOutputs: { + 0: [{ output: 'batch-0' }], + 1: [{ output: 'batch-1' }], + }, + }) + }) +}) diff --git a/apps/sim/executor/execution/snapshot-serializer.ts b/apps/sim/executor/execution/snapshot-serializer.ts index 76c2a3dba5f..fbac5b893c2 100644 --- a/apps/sim/executor/execution/snapshot-serializer.ts +++ b/apps/sim/executor/execution/snapshot-serializer.ts @@ -35,16 +35,19 @@ function serializeParallelExecutions( if (!parallelExecutions) return undefined const result: Record = {} for (const [parallelId, scope] of parallelExecutions.entries()) { - let branchOutputs: any - if (scope.branchOutputs instanceof Map) { - branchOutputs = Object.fromEntries(scope.branchOutputs) - } else { - branchOutputs = scope.branchOutputs ?? {} - } + const branchOutputs = + scope.branchOutputs instanceof Map + ? Object.fromEntries(scope.branchOutputs) + : (scope.branchOutputs ?? {}) + const accumulatedOutputs = + scope.accumulatedOutputs instanceof Map + ? Object.fromEntries(scope.accumulatedOutputs) + : (scope.accumulatedOutputs ?? {}) result[parallelId] = { ...scope, branchOutputs, + accumulatedOutputs, } } return result diff --git a/apps/sim/executor/execution/state.ts b/apps/sim/executor/execution/state.ts index f9a664ca309..eefe09338c2 100644 --- a/apps/sim/executor/execution/state.ts +++ b/apps/sim/executor/execution/state.ts @@ -21,6 +21,10 @@ export interface LoopScope { export interface ParallelScope { parallelId: string totalBranches: number + batchSize?: number + currentBatchStart?: number + currentBatchSize?: number + accumulatedOutputs?: Map branchOutputs: Map items?: any[] /** Error message if parallel validation failed (e.g., exceeded max branches) */ diff --git a/apps/sim/executor/execution/types.ts b/apps/sim/executor/execution/types.ts index 1ed3db20a32..0180d9e0ad7 100644 --- a/apps/sim/executor/execution/types.ts +++ b/apps/sim/executor/execution/types.ts @@ -35,6 +35,8 @@ export interface ExecutionMetadata { parallels?: Record deploymentVersionId?: string } + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean callChain?: string[] correlation?: AsyncExecutionCorrelation executionMode?: 'sync' | 'stream' | 'async' @@ -143,6 +145,8 @@ export interface ExecutionCallbacks { export interface ContextExtensions { workspaceId?: string executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean userId?: string stream?: boolean selectedOutputs?: string[] diff --git a/apps/sim/executor/handlers/function/function-handler.test.ts b/apps/sim/executor/handlers/function/function-handler.test.ts index b288940850d..aafd49faea5 100644 --- a/apps/sim/executor/handlers/function/function-handler.test.ts +++ b/apps/sim/executor/handlers/function/function-handler.test.ts @@ -81,6 +81,7 @@ describe('FunctionBlockHandler', () => { _context: { workflowId: mockContext.workflowId, workspaceId: mockContext.workspaceId, + executionId: mockContext.executionId, userId: mockContext.userId, isDeployedContext: mockContext.isDeployedContext, enforceCredentialAccess: mockContext.enforceCredentialAccess, @@ -121,6 +122,7 @@ describe('FunctionBlockHandler', () => { _context: { workflowId: mockContext.workflowId, workspaceId: mockContext.workspaceId, + executionId: mockContext.executionId, userId: mockContext.userId, isDeployedContext: mockContext.isDeployedContext, enforceCredentialAccess: mockContext.enforceCredentialAccess, @@ -154,6 +156,7 @@ describe('FunctionBlockHandler', () => { _context: { workflowId: mockContext.workflowId, workspaceId: mockContext.workspaceId, + executionId: mockContext.executionId, userId: mockContext.userId, isDeployedContext: mockContext.isDeployedContext, enforceCredentialAccess: mockContext.enforceCredentialAccess, diff --git a/apps/sim/executor/handlers/function/function-handler.ts b/apps/sim/executor/handlers/function/function-handler.ts index 53fa8b4451b..ec08996ba5b 100644 --- a/apps/sim/executor/handlers/function/function-handler.ts +++ b/apps/sim/executor/handlers/function/function-handler.ts @@ -49,7 +49,7 @@ export class FunctionBlockHandler implements BlockHandler { readCodeContent(inputs[FUNCTION_BLOCK_DISPLAY_CODE_KEY]) ?? readCodeContent((block.config?.params as Record | undefined)?.code) - const { blockData, blockNameMapping, blockOutputSchemas } = collectBlockData(ctx) + const { blockNameMapping, blockOutputSchemas } = collectBlockData(ctx) const contextVariables = normalizeRecord(inputs[FUNCTION_BLOCK_CONTEXT_VARS_KEY]) @@ -60,13 +60,16 @@ export class FunctionBlockHandler implements BlockHandler { timeout: inputs.timeout || DEFAULT_EXECUTION_TIMEOUT_MS, envVars: normalizeStringRecord(ctx.environmentVariables), workflowVariables: normalizeWorkflowVariables(ctx.workflowVariables), - blockData, + blockData: {}, blockNameMapping, blockOutputSchemas, contextVariables, _context: { workflowId: ctx.workflowId, workspaceId: ctx.workspaceId, + executionId: ctx.executionId, + largeValueExecutionIds: ctx.largeValueExecutionIds, + allowLargeValueWorkflowScope: ctx.allowLargeValueWorkflowScope, userId: ctx.userId, isDeployedContext: ctx.isDeployedContext, enforceCredentialAccess: ctx.enforceCredentialAccess, diff --git a/apps/sim/executor/orchestrators/loop.ts b/apps/sim/executor/orchestrators/loop.ts index 1c089ac3cb3..2087bf09c45 100644 --- a/apps/sim/executor/orchestrators/loop.ts +++ b/apps/sim/executor/orchestrators/loop.ts @@ -3,6 +3,8 @@ import { toError } from '@sim/utils/errors' import { generateRequestId } from '@/lib/core/utils/request' import { isExecutionCancelled, isRedisCancellationEnabled } from '@/lib/execution/cancellation' import { executeInIsolatedVM } from '@/lib/execution/isolated-vm' +import { compactSubflowResults } from '@/lib/execution/payloads/serializer' +import { isLikelyReferenceSegment } from '@/lib/workflows/sanitization/references' import { buildLoopIndexCondition, DEFAULTS, EDGE, PARALLEL } from '@/executor/constants' import type { DAG } from '@/executor/dag/builder' import type { EdgeManager } from '@/executor/execution/edge-manager' @@ -10,7 +12,7 @@ import type { LoopScope } from '@/executor/execution/state' import type { BlockStateController, ContextExtensions } from '@/executor/execution/types' import type { ExecutionContext, NormalizedBlockOutput } from '@/executor/types' import type { LoopConfigWithNodes } from '@/executor/types/loop' -import { replaceValidReferences } from '@/executor/utils/reference-validation' +import { createReferencePattern } from '@/executor/utils/reference-validation' import { addSubflowErrorLog, buildParallelSentinelEndId, @@ -20,8 +22,7 @@ import { emitEmptySubflowEvents, emitSubflowSuccessEvents, extractBaseBlockId, - resolveArrayInput, - validateMaxCount, + resolveArrayInputAsync, } from '@/executor/utils/subflow-utils' import type { VariableResolver } from '@/executor/variables/resolver' import type { SerializedLoop } from '@/serializer/types' @@ -30,13 +31,31 @@ const logger = createLogger('LoopOrchestrator') const LOOP_CONDITION_TIMEOUT_MS = 5000 +async function replaceLoopConditionReferences( + condition: string, + replacer: (match: string) => Promise +): Promise { + const pattern = createReferencePattern() + let cursor = 0 + let result = '' + for (const match of condition.matchAll(pattern)) { + const fullMatch = match[0] + const index = match.index ?? 0 + result += condition.slice(cursor, index) + result += isLikelyReferenceSegment(fullMatch) ? await replacer(fullMatch) : fullMatch + cursor = index + fullMatch.length + } + return result + condition.slice(cursor) +} + export type LoopRoute = typeof EDGE.LOOP_CONTINUE | typeof EDGE.LOOP_EXIT export interface LoopContinuationResult { shouldContinue: boolean shouldExit: boolean selectedRoute: LoopRoute - aggregatedResults?: NormalizedBlockOutput[][] + aggregatedResults?: unknown + totalIterations?: number } export class LoopOrchestrator { @@ -87,25 +106,7 @@ export class LoopOrchestrator { switch (loopType) { case 'for': { scope.loopType = 'for' - const requestedIterations = loopConfig.iterations || DEFAULTS.MAX_LOOP_ITERATIONS - - const iterationError = validateMaxCount( - requestedIterations, - DEFAULTS.MAX_LOOP_ITERATIONS, - 'For loop iterations' - ) - if (iterationError) { - logger.error(iterationError, { loopId, requestedIterations }) - await this.addLoopErrorLog(ctx, loopId, loopType, iterationError, { - iterations: requestedIterations, - }) - scope.maxIterations = 0 - scope.validationError = iterationError - scope.condition = buildLoopIndexCondition(0) - ctx.loopExecutions?.set(loopId, scope) - throw new Error(iterationError) - } - + const requestedIterations = loopConfig.iterations || DEFAULTS.DEFAULT_LOOP_ITERATIONS scope.maxIterations = requestedIterations scope.condition = buildLoopIndexCondition(scope.maxIterations) break @@ -133,7 +134,7 @@ export class LoopOrchestrator { } let items: any[] try { - items = resolveArrayInput(ctx, loopConfig.forEachItems, this.resolver) + items = await resolveArrayInputAsync(ctx, loopConfig.forEachItems, this.resolver) } catch (error) { const errorMessage = `ForEach loop resolution failed: ${toError(error).message}` logger.error(errorMessage, { loopId, forEachItems: loopConfig.forEachItems }) @@ -148,25 +149,6 @@ export class LoopOrchestrator { throw new Error(errorMessage) } - const sizeError = validateMaxCount( - items.length, - DEFAULTS.MAX_FOREACH_ITEMS, - 'ForEach loop collection size' - ) - if (sizeError) { - logger.error(sizeError, { loopId, collectionSize: items.length }) - await this.addLoopErrorLog(ctx, loopId, loopType, sizeError, { - forEachItems: loopConfig.forEachItems, - collectionSize: items.length, - }) - scope.items = [] - scope.maxIterations = 0 - scope.validationError = sizeError - scope.condition = buildLoopIndexCondition(0) - ctx.loopExecutions?.set(loopId, scope) - throw new Error(sizeError) - } - scope.items = items scope.maxIterations = items.length scope.item = items[0] @@ -184,25 +166,7 @@ export class LoopOrchestrator { if (loopConfig.doWhileCondition) { scope.condition = loopConfig.doWhileCondition } else { - const requestedIterations = loopConfig.iterations || DEFAULTS.MAX_LOOP_ITERATIONS - - const iterationError = validateMaxCount( - requestedIterations, - DEFAULTS.MAX_LOOP_ITERATIONS, - 'Do-While loop iterations' - ) - if (iterationError) { - logger.error(iterationError, { loopId, requestedIterations }) - await this.addLoopErrorLog(ctx, loopId, loopType, iterationError, { - iterations: requestedIterations, - }) - scope.maxIterations = 0 - scope.validationError = iterationError - scope.condition = buildLoopIndexCondition(0) - ctx.loopExecutions?.set(loopId, scope) - throw new Error(iterationError) - } - + const requestedIterations = loopConfig.iterations || DEFAULTS.DEFAULT_LOOP_ITERATIONS scope.maxIterations = requestedIterations scope.condition = buildLoopIndexCondition(scope.maxIterations) } @@ -313,8 +277,17 @@ export class LoopOrchestrator { scope: LoopScope ): Promise { const results = scope.allIterationOutputs - const output = { results } + const totalIterations = results.length + const compactedResults = await compactSubflowResults(results, { + workspaceId: ctx.workspaceId, + workflowId: ctx.workflowId, + executionId: ctx.executionId, + userId: ctx.userId, + requireDurable: true, + }) + const output = { results: compactedResults } this.state.setBlockOutput(loopId, output, DEFAULTS.EXECUTION_TIME) + scope.allIterationOutputs = [] await emitSubflowSuccessEvents(ctx, loopId, 'loop', output, this.contextExtensions) @@ -322,7 +295,8 @@ export class LoopOrchestrator { shouldContinue: false, shouldExit: true, selectedRoute: EDGE.LOOP_EXIT, - aggregatedResults: results, + aggregatedResults: output.results, + totalIterations, } } @@ -680,8 +654,8 @@ export class LoopOrchestrator { workflowVariables: ctx.workflowVariables, }) - const evaluatedCondition = replaceValidReferences(condition, (match) => { - const resolved = this.resolver.resolveSingleReference(ctx, '', match, scope) + const evaluatedCondition = await replaceLoopConditionReferences(condition, async (match) => { + const resolved = await this.resolver.resolveSingleReference(ctx, '', match, scope) logger.debug('Resolved variable reference in loop condition', { reference: match, resolvedValue: resolved, diff --git a/apps/sim/executor/orchestrators/node.ts b/apps/sim/executor/orchestrators/node.ts index 9844e93fb57..4db656c3254 100644 --- a/apps/sim/executor/orchestrators/node.ts +++ b/apps/sim/executor/orchestrators/node.ts @@ -1,4 +1,5 @@ import { createLogger } from '@sim/logger' +import { isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' import { EDGE } from '@/executor/constants' import type { DAG, DAGNode } from '@/executor/dag/builder' import type { BlockExecutor } from '@/executor/execution/block-executor' @@ -10,6 +11,20 @@ import { extractBaseBlockId } from '@/executor/utils/subflow-utils' const logger = createLogger('NodeExecutionOrchestrator') +function getResultCount(value: unknown): number { + if (isLargeValueRef(value)) { + const preview = value.preview + if ( + preview && + typeof preview === 'object' && + typeof (preview as Record).length === 'number' + ) { + return (preview as { length: number }).length + } + } + return Array.isArray(value) ? value.length : 0 +} + export interface NodeExecutionResult { nodeId: string output: NormalizedBlockOutput @@ -130,7 +145,9 @@ export class NodeExecutionOrchestrator { shouldContinue: false, shouldExit: true, selectedRoute: continuationResult.selectedRoute, - totalIterations: continuationResult.aggregatedResults?.length || 0, + totalIterations: + continuationResult.totalIterations ?? + getResultCount(continuationResult.aggregatedResults), } } @@ -174,6 +191,14 @@ export class NodeExecutionOrchestrator { if (sentinelType === 'end') { const result = await this.parallelOrchestrator.aggregateParallelResults(ctx, parallelId) + if (!result.allBranchesComplete) { + return { + results: [], + sentinelEnd: true, + selectedRoute: EDGE.PARALLEL_CONTINUE, + totalBranches: result.totalBranches, + } + } return { results: result.results || [], sentinelEnd: true, @@ -258,6 +283,14 @@ export class NodeExecutionOrchestrator { this.loopOrchestrator.restoreLoopEdges(loopId) } } + + if ( + node.metadata.isParallelSentinel && + node.metadata.sentinelType === 'end' && + output.selectedRoute === EDGE.PARALLEL_CONTINUE + ) { + this.state.deleteBlockState(node.id) + } } private findParallelIdForNode(nodeId: string): string | undefined { diff --git a/apps/sim/executor/orchestrators/parallel.test.ts b/apps/sim/executor/orchestrators/parallel.test.ts index f0262b92e9e..96aa1ae684d 100644 --- a/apps/sim/executor/orchestrators/parallel.test.ts +++ b/apps/sim/executor/orchestrators/parallel.test.ts @@ -6,6 +6,15 @@ import type { DAG } from '@/executor/dag/builder' import type { BlockStateWriter, ContextExtensions } from '@/executor/execution/types' import { ParallelOrchestrator } from '@/executor/orchestrators/parallel' import type { ExecutionContext } from '@/executor/types' +import { buildBranchNodeId } from '@/executor/utils/subflow-utils' + +const { mockCompactSubflowResults } = vi.hoisted(() => ({ + mockCompactSubflowResults: vi.fn(async (results: unknown) => results), +})) + +vi.mock('@/lib/execution/payloads/serializer', () => ({ + compactSubflowResults: mockCompactSubflowResults, +})) function createDag(): DAG { return { @@ -75,6 +84,7 @@ function createContext(overrides: Partial = {}): ExecutionCont describe('ParallelOrchestrator', () => { beforeEach(() => { vi.clearAllMocks() + mockCompactSubflowResults.mockImplementation(async (results: unknown) => results) }) it('awaits empty-subflow lifecycle callbacks before returning the empty scope', async () => { @@ -99,9 +109,8 @@ describe('ParallelOrchestrator', () => { const ctx = createContext() const initializePromise = orchestrator.initializeParallelScope(ctx, 'parallel-1') - await Promise.resolve() + await vi.waitFor(() => expect(onBlockStart).toHaveBeenCalledTimes(1)) - expect(onBlockStart).toHaveBeenCalledTimes(1) expect(onBlockComplete).not.toHaveBeenCalled() releaseStart?.() @@ -130,4 +139,171 @@ describe('ParallelOrchestrator', () => { isEmpty: true, }) }) + + it('records resumed later-batch outputs under restored global branch indexes', () => { + const dag = createDag() + dag.nodes.set('task-1', { + id: 'task-1', + block: { + id: 'task-1', + position: { x: 0, y: 0 }, + config: { tool: '', params: {} }, + inputs: {}, + outputs: {}, + metadata: { id: 'function', name: 'Task 1' }, + enabled: true, + }, + incomingEdges: new Set(), + outgoingEdges: new Set(), + metadata: { branchIndex: 0 }, + }) + const orchestrator = new ParallelOrchestrator(dag, createState(), null, {}) + const ctx = createContext({ + parallelBlockMapping: new Map([ + ['task-1', { originalBlockId: 'task', parallelId: 'parallel-1', iterationIndex: 20 }], + ]), + parallelExecutions: new Map([ + [ + 'parallel-1', + { + parallelId: 'parallel-1', + totalBranches: 25, + currentBatchStart: 20, + currentBatchSize: 5, + accumulatedOutputs: new Map([[0, [{ output: 'previous' }]]]), + branchOutputs: new Map(), + }, + ], + ]), + }) + + orchestrator.handleParallelBranchCompletion(ctx, 'parallel-1', 'task-1', { output: 'resumed' }) + + const scope = ctx.parallelExecutions?.get('parallel-1') + expect(scope?.branchOutputs.get(20)).toEqual([{ output: 'resumed' }]) + expect(scope?.branchOutputs.has(0)).toBe(false) + }) + + it('resets only incoming batch branch state when scheduling later batches', async () => { + const dag = createDag() + const incomingBranchId = buildBranchNodeId('task-1', 0) + const previousBranchId = buildBranchNodeId('task-1', 1) + dag.nodes.set(incomingBranchId, { + id: incomingBranchId, + block: { + id: 'task-1', + position: { x: 0, y: 0 }, + config: { tool: '', params: {} }, + inputs: {}, + outputs: {}, + metadata: { id: 'function', name: 'Task 1' }, + enabled: true, + }, + incomingEdges: new Set(), + outgoingEdges: new Set(), + metadata: { parallelId: 'parallel-1', isParallelBranch: true, branchIndex: 0 }, + }) + dag.nodes.set(previousBranchId, { + id: previousBranchId, + block: { + id: 'task-1', + position: { x: 0, y: 0 }, + config: { tool: '', params: {} }, + inputs: {}, + outputs: {}, + metadata: { id: 'function', name: 'Task 1' }, + enabled: true, + }, + incomingEdges: new Set(), + outgoingEdges: new Set(), + metadata: { parallelId: 'parallel-1', isParallelBranch: true, branchIndex: 1 }, + }) + const state = createState() + const orchestrator = new ParallelOrchestrator(dag, state, null, {}) + + await ( + orchestrator as unknown as { + scheduleNextBatch( + ctx: ExecutionContext, + scope: NonNullable extends Map< + string, + infer Scope + > + ? Scope + : never, + nextBatchStart: number + ): Promise + } + ).scheduleNextBatch( + createContext(), + { + parallelId: 'parallel-1', + totalBranches: 3, + batchSize: 1, + currentBatchStart: 0, + currentBatchSize: 2, + accumulatedOutputs: new Map([[1, [{ output: 'previous' }]]]), + branchOutputs: new Map(), + }, + 2 + ) + + expect(state.deleteBlockState).toHaveBeenCalledWith(incomingBranchId) + expect(state.deleteBlockState).not.toHaveBeenCalledWith(previousBranchId) + expect(state.unmarkExecuted).toHaveBeenCalledWith(incomingBranchId) + expect(state.unmarkExecuted).not.toHaveBeenCalledWith(previousBranchId) + }) + + it('compacts accumulated outputs before scheduling later batches', async () => { + const dag = createDag() + const templateBranchId = buildBranchNodeId('task-1', 0) + dag.nodes.set(templateBranchId, { + id: templateBranchId, + block: { + id: 'task-1', + position: { x: 0, y: 0 }, + config: { tool: '', params: {} }, + inputs: {}, + outputs: {}, + metadata: { id: 'function', name: 'Task 1' }, + enabled: true, + }, + incomingEdges: new Set(), + outgoingEdges: new Set(), + metadata: { parallelId: 'parallel-1', isParallelBranch: true, branchIndex: 0 }, + }) + const orchestrator = new ParallelOrchestrator(dag, createState(), null, {}) + const previousOutputs = [{ output: 'previous' }] + const incomingOutputs = [{ output: 'incoming' }] + const compactedPrevious = [{ output: 'compacted-previous' }] + const compactedIncoming = [{ output: 'compacted-incoming' }] + mockCompactSubflowResults.mockResolvedValueOnce([compactedPrevious, compactedIncoming]) + const scope = { + parallelId: 'parallel-1', + totalBranches: 3, + batchSize: 1, + currentBatchStart: 0, + currentBatchSize: 2, + accumulatedOutputs: new Map([[0, previousOutputs]]), + branchOutputs: new Map([[1, incomingOutputs]]), + } + const ctx = createContext({ + parallelExecutions: new Map([['parallel-1', scope]]), + }) + + const result = await orchestrator.aggregateParallelResults(ctx, 'parallel-1') + + expect(result).toMatchObject({ allBranchesComplete: false, completedBranches: 2 }) + expect(mockCompactSubflowResults).toHaveBeenCalledWith( + [previousOutputs, incomingOutputs], + expect.objectContaining({ + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + requireDurable: true, + }) + ) + expect(scope.accumulatedOutputs.get(0)).toBe(compactedPrevious) + expect(scope.accumulatedOutputs.get(1)).toBe(compactedIncoming) + }) }) diff --git a/apps/sim/executor/orchestrators/parallel.ts b/apps/sim/executor/orchestrators/parallel.ts index 7cc10abbee7..aa9d0ad8c6e 100644 --- a/apps/sim/executor/orchestrators/parallel.ts +++ b/apps/sim/executor/orchestrators/parallel.ts @@ -1,24 +1,25 @@ import { createLogger } from '@sim/logger' import { toError } from '@sim/utils/errors' +import { compactSubflowResults } from '@/lib/execution/payloads/serializer' import { DEFAULTS } from '@/executor/constants' import type { DAG } from '@/executor/dag/builder' import type { ParallelScope } from '@/executor/execution/state' import type { BlockStateWriter, ContextExtensions } from '@/executor/execution/types' import type { ExecutionContext, NormalizedBlockOutput } from '@/executor/types' import type { ParallelConfigWithNodes } from '@/executor/types/parallel' -import { ParallelExpander } from '@/executor/utils/parallel-expansion' +import { type ClonedSubflowInfo, ParallelExpander } from '@/executor/utils/parallel-expansion' import { addSubflowErrorLog, emitEmptySubflowEvents, emitSubflowSuccessEvents, extractBranchIndex, - resolveArrayInput, - validateMaxCount, + resolveArrayInputAsync, } from '@/executor/utils/subflow-utils' import type { VariableResolver } from '@/executor/variables/resolver' import type { SerializedParallel } from '@/serializer/types' const logger = createLogger('ParallelOrchestrator') +const DEFAULT_PARALLEL_BATCH_SIZE = 20 export interface ParallelBranchMetadata { branchIndex: number @@ -29,7 +30,7 @@ export interface ParallelBranchMetadata { export interface ParallelAggregationResult { allBranchesComplete: boolean - results?: NormalizedBlockOutput[][] + results?: unknown completedBranches?: number totalBranches?: number } @@ -64,7 +65,7 @@ export class ParallelOrchestrator { let isEmpty = false try { - const resolved = this.resolveBranchCount(ctx, parallelConfig, parallelId) + const resolved = await this.resolveBranchCount(ctx, parallelConfig, parallelId) branchCount = resolved.branchCount items = resolved.items isEmpty = resolved.isEmpty ?? false @@ -81,21 +82,6 @@ export class ParallelOrchestrator { throw new Error(errorMessage) } - const branchError = validateMaxCount( - branchCount, - DEFAULTS.MAX_PARALLEL_BRANCHES, - 'Parallel branch count' - ) - if (branchError) { - logger.error(branchError, { parallelId, branchCount }) - await this.addParallelErrorLog(ctx, parallelId, branchError, { - distribution: parallelConfig.distribution, - branchCount, - }) - this.setErrorScope(ctx, parallelId, branchError) - throw new Error(branchError) - } - if (isEmpty || branchCount === 0) { const scope: ParallelScope = { parallelId, @@ -122,60 +108,27 @@ export class ParallelOrchestrator { return scope } - const { entryNodes, clonedSubflows } = this.expander.expandParallel( + const batchSize = this.resolveBatchSize(parallelConfig.batchSize) + const currentBatchSize = Math.min(batchSize, branchCount) + const batchItems = items?.slice(0, currentBatchSize) + const { entryNodes, clonedSubflows, allBranchNodes } = this.expander.expandParallel( this.dag, parallelId, - branchCount, - items + currentBatchSize, + batchItems, + { branchIndexOffset: 0, totalBranches: branchCount } ) - // Register cloned subflows in the parent map so iteration context resolves correctly. - // Build a per-branch clone map so nested clones point to the cloned parent, not the original. - if (clonedSubflows.length > 0 && ctx.subflowParentMap) { - const branchCloneMaps = new Map>() - for (const clone of clonedSubflows) { - let map = branchCloneMaps.get(clone.outerBranchIndex) - if (!map) { - map = new Map() - branchCloneMaps.set(clone.outerBranchIndex, map) - } - map.set(clone.originalId, clone.clonedId) - } - - for (const clone of clonedSubflows) { - const originalEntry = ctx.subflowParentMap.get(clone.originalId) - if (originalEntry) { - const cloneMap = branchCloneMaps.get(clone.outerBranchIndex) - const clonedParentId = cloneMap?.get(originalEntry.parentId) - if (clonedParentId) { - // Parent was also cloned — this is the original (branch 0) inside the cloned parent - ctx.subflowParentMap.set(clone.clonedId, { - parentId: clonedParentId, - parentType: originalEntry.parentType, - branchIndex: 0, - }) - } else { - // Parent was not cloned — direct child of the expanding parallel - ctx.subflowParentMap.set(clone.clonedId, { - parentId: parallelId, - parentType: 'parallel', - branchIndex: clone.outerBranchIndex, - }) - } - } else { - // Not in parent map — direct child of the expanding parallel - ctx.subflowParentMap.set(clone.clonedId, { - parentId: parallelId, - parentType: 'parallel', - branchIndex: clone.outerBranchIndex, - }) - } - } - } + this.registerClonedSubflows(ctx, parallelId, clonedSubflows) + this.registerBranchMappings(ctx, parallelId, allBranchNodes) const scope: ParallelScope = { parallelId, totalBranches: branchCount, + batchSize, + currentBatchStart: 0, + currentBatchSize, + accumulatedOutputs: new Map(), branchOutputs: new Map(), items, } @@ -196,6 +149,8 @@ export class ParallelOrchestrator { logger.info('Parallel scope initialized', { parallelId, branchCount, + batchSize, + currentBatchSize, entryNodeCount: entryNodes.length, newEntryNodes: newEntryNodes.length, }) @@ -203,16 +158,16 @@ export class ParallelOrchestrator { return scope } - private resolveBranchCount( + private async resolveBranchCount( ctx: ExecutionContext, config: SerializedParallel, parallelId: string - ): { branchCount: number; items?: any[]; isEmpty?: boolean } { + ): Promise<{ branchCount: number; items?: any[]; isEmpty?: boolean }> { if (config.parallelType === 'count') { return { branchCount: config.count ?? 1 } } - const items = this.resolveDistributionItems(ctx, config) + const items = await this.resolveDistributionItems(ctx, config) if (items.length === 0) { logger.info('Parallel has empty distribution, skipping parallel body', { parallelId }) return { branchCount: 0, items: [], isEmpty: true } @@ -251,7 +206,10 @@ export class ParallelOrchestrator { ctx.parallelExecutions.set(parallelId, scope) } - private resolveDistributionItems(ctx: ExecutionContext, config: SerializedParallel): any[] { + private async resolveDistributionItems( + ctx: ExecutionContext, + config: SerializedParallel + ): Promise { if ( config.distribution === undefined || config.distribution === null || @@ -261,7 +219,63 @@ export class ParallelOrchestrator { 'Parallel collection distribution is empty. Provide an array or a reference that resolves to a collection.' ) } - return resolveArrayInput(ctx, config.distribution, this.resolver) + return resolveArrayInputAsync(ctx, config.distribution, this.resolver) + } + + private resolveBatchSize(batchSize: unknown): number { + const parsed = + typeof batchSize === 'number' ? batchSize : Number.parseInt(String(batchSize), 10) + if (Number.isNaN(parsed)) { + return DEFAULT_PARALLEL_BATCH_SIZE + } + return Math.max(1, Math.min(DEFAULTS.MAX_PARALLEL_BRANCHES, parsed)) + } + + private registerClonedSubflows( + ctx: ExecutionContext, + parallelId: string, + clonedSubflows: ClonedSubflowInfo[] + ): void { + if (clonedSubflows.length === 0 || !ctx.subflowParentMap) { + return + } + + const branchCloneMaps = new Map>() + for (const clone of clonedSubflows) { + let map = branchCloneMaps.get(clone.outerBranchIndex) + if (!map) { + map = new Map() + branchCloneMaps.set(clone.outerBranchIndex, map) + } + map.set(clone.originalId, clone.clonedId) + } + + for (const clone of clonedSubflows) { + const originalEntry = ctx.subflowParentMap.get(clone.originalId) + if (originalEntry) { + const cloneMap = branchCloneMaps.get(clone.outerBranchIndex) + const clonedParentId = cloneMap?.get(originalEntry.parentId) + if (clonedParentId) { + ctx.subflowParentMap.set(clone.clonedId, { + parentId: clonedParentId, + parentType: originalEntry.parentType, + branchIndex: 0, + }) + } else { + ctx.subflowParentMap.set(clone.clonedId, { + parentId: parallelId, + parentType: 'parallel', + branchIndex: clone.outerBranchIndex, + }) + } + } else { + ctx.subflowParentMap.set(clone.clonedId, { + parentId: parallelId, + parentType: 'parallel', + branchIndex: clone.outerBranchIndex, + }) + } + } } /** @@ -282,7 +296,11 @@ export class ParallelOrchestrator { return } - const branchIndex = extractBranchIndex(nodeId) + const mappedBranch = ctx.parallelBlockMapping?.get(nodeId) + const branchIndex = + mappedBranch?.parallelId === parallelId + ? mappedBranch.iterationIndex + : (this.dag.nodes.get(nodeId)?.metadata.branchIndex ?? extractBranchIndex(nodeId)) if (branchIndex === null) { logger.warn('Could not extract branch index from node ID', { nodeId }) return @@ -304,33 +322,162 @@ export class ParallelOrchestrator { return { allBranchesComplete: false } } + const accumulatedOutputs = + scope.accumulatedOutputs ?? new Map() + for (const [branchIndex, outputs] of scope.branchOutputs.entries()) { + accumulatedOutputs.set(branchIndex, outputs) + } + scope.accumulatedOutputs = accumulatedOutputs + scope.branchOutputs = new Map() + + const nextBatchStart = + (scope.currentBatchStart ?? 0) + (scope.currentBatchSize ?? scope.totalBranches) + if (nextBatchStart < scope.totalBranches) { + /** + * Compact accumulated outputs before scheduling the next batch. Each + * block output is already individually compacted by `block-executor`, but + * many below-threshold branch results can still exceed the aggregate + * threshold over time. Re-running the existing subflow compactor over the + * accumulated entries forces aggregate-size spills while existing + * LargeValueRefs stay stable. + */ + if (accumulatedOutputs.size > 0) { + const accumulatedBranchIndexes = Array.from(accumulatedOutputs.keys()).sort((a, b) => a - b) + const accumulatedResults = accumulatedBranchIndexes.map( + (idx) => accumulatedOutputs.get(idx) ?? [] + ) + const compactedAccumulated = await compactSubflowResults(accumulatedResults, { + workspaceId: ctx.workspaceId, + workflowId: ctx.workflowId, + executionId: ctx.executionId, + userId: ctx.userId, + requireDurable: true, + }) + accumulatedBranchIndexes.forEach((branchIdx, position) => { + accumulatedOutputs.set(branchIdx, compactedAccumulated[position]) + }) + } + await this.scheduleNextBatch(ctx, scope, nextBatchStart) + return { + allBranchesComplete: false, + completedBranches: accumulatedOutputs.size, + totalBranches: scope.totalBranches, + } + } + const results: NormalizedBlockOutput[][] = [] for (let i = 0; i < scope.totalBranches; i++) { - const branchOutputs = scope.branchOutputs.get(i) + const branchOutputs = accumulatedOutputs.get(i) if (!branchOutputs) { logger.warn('Missing branch output during parallel aggregation', { parallelId, branch: i }) } results.push(branchOutputs ?? []) } - const output = { results } + const compactedResults = await compactSubflowResults(results, { + workspaceId: ctx.workspaceId, + workflowId: ctx.workflowId, + executionId: ctx.executionId, + userId: ctx.userId, + requireDurable: true, + }) + const output = { results: compactedResults } this.state.setBlockOutput(parallelId, output) + scope.accumulatedOutputs = new Map() await emitSubflowSuccessEvents(ctx, parallelId, 'parallel', output, this.contextExtensions) return { allBranchesComplete: true, - results, + results: output.results, completedBranches: scope.totalBranches, totalBranches: scope.totalBranches, } } + + private async scheduleNextBatch( + ctx: ExecutionContext, + scope: ParallelScope, + nextBatchStart: number + ): Promise { + const batchSize = scope.batchSize ?? DEFAULT_PARALLEL_BATCH_SIZE + const remaining = scope.totalBranches - nextBatchStart + const currentBatchSize = Math.min(batchSize, remaining) + const batchItems = scope.items?.slice(nextBatchStart, nextBatchStart + currentBatchSize) + + const { entryNodes, clonedSubflows, allBranchNodes } = this.expander.expandParallel( + this.dag, + scope.parallelId, + currentBatchSize, + batchItems, + { branchIndexOffset: nextBatchStart, totalBranches: scope.totalBranches } + ) + + this.registerClonedSubflows(ctx, scope.parallelId, clonedSubflows) + this.registerBranchMappings(ctx, scope.parallelId, allBranchNodes) + this.resetBatchExecutionState(allBranchNodes) + + scope.currentBatchStart = nextBatchStart + scope.currentBatchSize = currentBatchSize + + if (!ctx.pendingDynamicNodes) { + ctx.pendingDynamicNodes = [] + } + ctx.pendingDynamicNodes.push(...entryNodes) + + logger.info('Scheduled next parallel batch', { + parallelId: scope.parallelId, + nextBatchStart, + currentBatchSize, + totalBranches: scope.totalBranches, + }) + } + + private resetBatchExecutionState(branchNodeIds: string[]): void { + for (const nodeId of branchNodeIds) { + const node = this.dag.nodes.get(nodeId) + if (!node?.metadata.isParallelBranch) { + continue + } + this.state.unmarkExecuted(nodeId) + this.state.deleteBlockState(nodeId) + } + } + + private registerBranchMappings( + ctx: ExecutionContext, + parallelId: string, + branchNodeIds: string[] + ): void { + if (branchNodeIds.length === 0) { + return + } + + if (!ctx.parallelBlockMapping) { + ctx.parallelBlockMapping = new Map() + } + + for (const nodeId of branchNodeIds) { + const node = this.dag.nodes.get(nodeId) + const branchIndex = node?.metadata.branchIndex ?? extractBranchIndex(nodeId) + if (branchIndex === null || branchIndex === undefined) { + continue + } + + ctx.parallelBlockMapping.set(nodeId, { + originalBlockId: node?.metadata.originalBlockId ?? nodeId, + parallelId, + iterationIndex: branchIndex, + }) + } + } + extractBranchMetadata(nodeId: string): ParallelBranchMetadata | null { const node = this.dag.nodes.get(nodeId) if (!node?.metadata.isParallelBranch) { return null } - const branchIndex = extractBranchIndex(nodeId) + const branchIndex = node.metadata.branchIndex ?? extractBranchIndex(nodeId) if (branchIndex === null) { return null } diff --git a/apps/sim/executor/types.ts b/apps/sim/executor/types.ts index 2d48bb5a98f..d2569706085 100644 --- a/apps/sim/executor/types.ts +++ b/apps/sim/executor/types.ts @@ -290,6 +290,8 @@ export interface ExecutionContext { workflowId: string workspaceId?: string executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean userId?: string isDeployedContext?: boolean enforceCredentialAccess?: boolean @@ -344,6 +346,10 @@ export interface ExecutionContext { { parallelId: string totalBranches: number + batchSize?: number + currentBatchStart?: number + currentBatchSize?: number + accumulatedOutputs?: Map branchOutputs: Map parallelType?: 'count' | 'collection' items?: any[] diff --git a/apps/sim/executor/utils/block-reference.ts b/apps/sim/executor/utils/block-reference.ts index edf909a6d3b..082a9339782 100644 --- a/apps/sim/executor/utils/block-reference.ts +++ b/apps/sim/executor/utils/block-reference.ts @@ -1,6 +1,10 @@ import { USER_FILE_ACCESSIBLE_PROPERTIES } from '@/lib/workflows/types' import { normalizeName } from '@/executor/constants' -import { navigatePath } from '@/executor/variables/resolvers/reference' +import { + type AsyncPathNavigator, + navigatePath, + type ResolutionContext, +} from '@/executor/variables/resolvers/reference' /** * A single schema node encountered while walking an `OutputSchema`. Captures @@ -204,7 +208,11 @@ function getSchemaFieldNames(schema: OutputSchema | undefined): string[] { export function resolveBlockReference( blockName: string, pathParts: string[], - context: BlockReferenceContext + context: BlockReferenceContext, + options: { + allowLargeValueRefs?: boolean + executionContext?: ResolutionContext['executionContext'] + } = {} ): BlockReferenceResult | undefined { const normalizedName = normalizeName(blockName) const blockId = context.blockNameMapping[normalizedName] @@ -227,7 +235,42 @@ export function resolveBlockReference( return { value: blockOutput, blockId } } - const value = navigatePath(blockOutput, pathParts) + const value = navigatePath(blockOutput, pathParts, options) + + const schema = context.blockOutputSchemas?.[blockId] + if (value === undefined && schema) { + if (!isPathInSchema(schema, pathParts)) { + throw new InvalidFieldError(blockName, pathParts.join('.'), getSchemaFieldNames(schema)) + } + } + + return { value, blockId } +} + +export async function resolveBlockReferenceAsync( + blockName: string, + pathParts: string[], + context: BlockReferenceContext, + resolutionContext: ResolutionContext, + navigatePathAsync: AsyncPathNavigator +): Promise { + const normalizedName = normalizeName(blockName) + const blockId = context.blockNameMapping[normalizedName] + + if (!blockId) { + return undefined + } + + const blockOutput = context.blockData[blockId] + if (blockOutput === undefined) { + return { value: undefined, blockId } + } + + if (pathParts.length === 0) { + return { value: blockOutput, blockId } + } + + const value = await navigatePathAsync(blockOutput, pathParts, resolutionContext) const schema = context.blockOutputSchemas?.[blockId] if (value === undefined && schema) { diff --git a/apps/sim/executor/utils/output-filter.ts b/apps/sim/executor/utils/output-filter.ts index 5da00faba53..95c3cab5397 100644 --- a/apps/sim/executor/utils/output-filter.ts +++ b/apps/sim/executor/utils/output-filter.ts @@ -1,3 +1,4 @@ +import { isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' import { filterHiddenOutputKeys } from '@/lib/logs/execution/trace-spans/trace-spans' import { getBlock } from '@/blocks' import { isHiddenFromDisplay } from '@/blocks/types' @@ -27,6 +28,9 @@ export function filterOutputForLog( if (typeof output !== 'object' || output === null || Array.isArray(output)) { return output as NormalizedBlockOutput } + if (isLargeValueRef(output)) { + return output as NormalizedBlockOutput + } const blockConfig = blockType ? getBlock(blockType) : undefined const filtered: NormalizedBlockOutput = {} const additionalHiddenKeys = options?.additionalHiddenKeys ?? [] diff --git a/apps/sim/executor/utils/parallel-expansion.test.ts b/apps/sim/executor/utils/parallel-expansion.test.ts index bcb2fbeb5c5..67f0e865aef 100644 --- a/apps/sim/executor/utils/parallel-expansion.test.ts +++ b/apps/sim/executor/utils/parallel-expansion.test.ts @@ -207,6 +207,65 @@ describe('Nested parallel expansion + edge resolution', () => { expect(readyAfterClonedInnerEnd).toContain(outerEndId) }) + it('uses global branch indexes for nested subflow clones in later batches', () => { + const outerParallelId = 'outer-parallel' + const innerParallelId = 'inner-parallel' + const functionId = 'func-1' + + const workflow: SerializedWorkflow = { + version: '1', + blocks: [ + createBlock('start', BlockType.STARTER), + createBlock(outerParallelId, BlockType.PARALLEL), + createBlock(innerParallelId, BlockType.PARALLEL), + createBlock(functionId, BlockType.FUNCTION), + ], + connections: [ + { source: 'start', target: outerParallelId }, + { + source: outerParallelId, + target: innerParallelId, + sourceHandle: 'parallel-start-source', + }, + { + source: innerParallelId, + target: functionId, + sourceHandle: 'parallel-start-source', + }, + ], + loops: {}, + parallels: { + [innerParallelId]: { + id: innerParallelId, + nodes: [functionId], + count: 1, + parallelType: 'count', + }, + [outerParallelId]: { + id: outerParallelId, + nodes: [innerParallelId], + count: 4, + parallelType: 'count', + }, + }, + } + + const builder = new DAGBuilder() + const dag = builder.build(workflow) + const expander = new ParallelExpander() + const result = expander.expandParallel(dag, outerParallelId, 2, undefined, { + branchIndexOffset: 2, + totalBranches: 4, + }) + + expect(result.entryNodes).not.toContain(buildParallelSentinelStartId(innerParallelId)) + expect(result.clonedSubflows.map((clone) => clone.outerBranchIndex)).toEqual([2, 3]) + expect(result.clonedSubflows.map((clone) => clone.clonedId)).toEqual([ + `${innerParallelId}__obranch-2`, + `${innerParallelId}__obranch-3`, + ]) + }) + it('3-level nesting: pre-expansion clone IDs do not collide with runtime expansion', () => { const p1 = 'p1' const p2 = 'p2' @@ -251,7 +310,7 @@ describe('Nested parallel expansion + edge resolution', () => { // P3 should also be cloned (inside P2__obranch-1) with a __clone prefix const p3Clone = p1Result.clonedSubflows.find((c) => c.originalId === p3)! expect(p3Clone).toBeDefined() - expect(p3Clone.clonedId).toMatch(/^p3__clone\d+__obranch-1$/) + expect(p3Clone.clonedId).toMatch(/^p3__clone[0-9a-f]{24}__obranch-1$/) expect(stripCloneSuffixes(p3Clone.clonedId)).toBe('p3') // Step 2: Expand P2 (original, branch 0 of P1) — this creates P3__obranch-1 at runtime diff --git a/apps/sim/executor/utils/parallel-expansion.ts b/apps/sim/executor/utils/parallel-expansion.ts index 6d59af91c8d..f98c9f49e57 100644 --- a/apps/sim/executor/utils/parallel-expansion.ts +++ b/apps/sim/executor/utils/parallel-expansion.ts @@ -1,4 +1,5 @@ import { createLogger } from '@sim/logger' +import { sha256Hex } from '@sim/security/hash' import { EDGE } from '@/executor/constants' import type { DAG, DAGNode } from '@/executor/dag/builder' import type { SerializedBlock } from '@/serializer/types' @@ -29,14 +30,12 @@ export interface ExpansionResult { } export class ParallelExpander { - /** Monotonically increasing counter for generating unique pre-expansion clone IDs. */ - private cloneSeq = 0 - expandParallel( dag: DAG, parallelId: string, branchCount: number, - distributionItems?: any[] + distributionItems?: any[], + options: { branchIndexOffset?: number; totalBranches?: number } = {} ): ExpansionResult { const config = dag.parallelConfigs.get(parallelId) if (!config) { @@ -64,6 +63,8 @@ export class ParallelExpander { const regularSet = new Set(regularBlocks) const allBranchNodes: string[] = [] + const branchIndexOffset = options.branchIndexOffset ?? 0 + const branchTotal = options.totalBranches ?? branchCount for (const blockId of regularBlocks) { const templateId = buildBranchNodeId(blockId, 0) @@ -76,10 +77,16 @@ export class ParallelExpander { for (let i = 0; i < branchCount; i++) { const branchNodeId = buildBranchNodeId(blockId, i) + const globalBranchIndex = branchIndexOffset + i allBranchNodes.push(branchNodeId) if (i === 0) { - this.updateBranchMetadata(templateNode, i, branchCount, distributionItems?.[i]) + this.updateBranchMetadata( + templateNode, + globalBranchIndex, + branchTotal, + distributionItems?.[i] + ) continue } @@ -87,7 +94,8 @@ export class ParallelExpander { templateNode, blockId, i, - branchCount, + globalBranchIndex, + branchTotal, distributionItems?.[i] ) dag.nodes.set(branchNodeId, branchNode) @@ -114,20 +122,22 @@ export class ParallelExpander { ? buildParallelSentinelEndId(subflowId) : buildSentinelEndId(subflowId) - // Branch 0 uses original nodes - if (dag.nodes.has(startId)) entryNodes.push(startId) - if (dag.nodes.has(endId)) terminalNodes.push(endId) + for (let i = 0; i < branchCount; i++) { + const globalBranchIndex = branchIndexOffset + i + if (globalBranchIndex === 0) { + if (dag.nodes.has(startId)) entryNodes.push(startId) + if (dag.nodes.has(endId)) terminalNodes.push(endId) + continue + } - // Branches 1..N clone the entire subflow graph (recursively for deep nesting) - for (let i = 1; i < branchCount; i++) { - const cloned = this.cloneNestedSubflow(dag, subflowId, i, clonedSubflows) + const cloned = this.cloneNestedSubflow(dag, subflowId, globalBranchIndex, clonedSubflows) entryNodes.push(cloned.startId) terminalNodes.push(cloned.endId) clonedSubflows.push({ clonedId: cloned.clonedId, originalId: subflowId, - outerBranchIndex: i, + outerBranchIndex: globalBranchIndex, }) } } @@ -161,11 +171,12 @@ export class ParallelExpander { private cloneTemplateNode( template: DAGNode, originalBlockId: string, + localBranchIndex: number, branchIndex: number, branchTotal: number, distributionItem?: any ): DAGNode { - const branchNodeId = buildBranchNodeId(originalBlockId, branchIndex) + const branchNodeId = buildBranchNodeId(originalBlockId, localBranchIndex) const blockClone: SerializedBlock = { ...template.block, id: branchNodeId, @@ -201,7 +212,11 @@ export class ParallelExpander { const baseTargetId = extractBaseBlockId(edge.target) if (!blocksSet.has(baseTargetId)) continue - for (let i = 1; i < branchCount; i++) { + // Include branch 0 so per-batch re-expansion restores the template's + // incoming-edge bookkeeping that earlier batches consumed during + // edge processing. Without this, identifyBoundaryNodes mis-classifies + // chained children as entry nodes after the first batch. + for (let i = 0; i < branchCount; i++) { const sourceNodeId = buildBranchNodeId(blockId, i) const targetNodeId = buildBranchNodeId(baseTargetId, i) const sourceNode = dag.nodes.get(sourceNodeId) @@ -278,14 +293,20 @@ export class ParallelExpander { /** * Generates a unique clone ID for pre-expansion cloning. * - * Pre-expansion clones use `{originalId}__clone{N}__obranch-{branchIndex}` instead + * Pre-expansion clones use `{originalId}__clone{digest}__obranch-{branchIndex}` instead * of the plain `{originalId}__obranch-{branchIndex}` used by runtime expansion. - * The `__clone{N}` segment (from a monotonic counter) prevents naming collisions - * when the original (branch-0) subflow later expands at runtime and creates - * `{child}__obranch-{branchIndex}`. + * The clone segment prevents naming collisions when the original (branch-0) + * subflow later expands at runtime and creates `{child}__obranch-{branchIndex}`. + * Keeping it deterministic lets pause/resume rebuild the same active branch IDs. */ - private buildPreCloneId(originalId: string, outerBranchIndex: number): string { - return `${originalId}__clone${this.cloneSeq++}__obranch-${outerBranchIndex}` + private buildPreCloneIdForParent( + originalId: string, + outerBranchIndex: number, + parentCloneId: string + ): string { + const input = `${parentCloneId}:${originalId}:${outerBranchIndex}` + const digest = sha256Hex(input).slice(0, 24) + return `${originalId}__clone${digest}__obranch-${outerBranchIndex}` } /** @@ -293,8 +314,8 @@ export class ParallelExpander { * * The top-level subflow gets a standard `__obranch-{N}` clone ID (needed by * `findEffectiveContainerId` at runtime). All deeper children — both containers - * and regular blocks — receive unique `__clone{N}__obranch-{M}` IDs via - * {@link buildPreCloneId} to avoid collisions with runtime expansion. + * and regular blocks — receive deterministic `__clone{N}__obranch-{M}` IDs to + * avoid collisions with runtime expansion. */ private cloneNestedSubflow( dag: DAG, @@ -357,7 +378,7 @@ export class ParallelExpander { const isNestedLoop = dag.loopConfigs.has(blockId) if (isNestedParallel || isNestedLoop) { - const nestedClonedId = this.buildPreCloneId(blockId, outerBranchIndex) + const nestedClonedId = this.buildPreCloneIdForParent(blockId, outerBranchIndex, clonedId) clonedBlockIds.push(nestedClonedId) const innerResult = this.cloneSubflowGraph( @@ -377,7 +398,7 @@ export class ParallelExpander { outerBranchIndex, }) } else { - const clonedBlockId = this.buildPreCloneId(blockId, outerBranchIndex) + const clonedBlockId = this.buildPreCloneIdForParent(blockId, outerBranchIndex, clonedId) clonedBlockIds.push(clonedBlockId) if (isParallel) { diff --git a/apps/sim/executor/utils/subflow-utils.test.ts b/apps/sim/executor/utils/subflow-utils.test.ts index 18f7e2097d8..478319d6ca9 100644 --- a/apps/sim/executor/utils/subflow-utils.test.ts +++ b/apps/sim/executor/utils/subflow-utils.test.ts @@ -4,83 +4,99 @@ import { describe, expect, it, vi } from 'vitest' import type { ExecutionContext } from '@/executor/types' import type { VariableResolver } from '@/executor/variables/resolver' -import { resolveArrayInput } from './subflow-utils' +import { findEffectiveContainerId, resolveArrayInputAsync } from './subflow-utils' -describe('resolveArrayInput', () => { +describe('resolveArrayInputAsync', () => { const fakeCtx = {} as unknown as ExecutionContext - it('returns arrays as-is', () => { - expect(resolveArrayInput(fakeCtx, [1, 2, 3], null)).toEqual([1, 2, 3]) + it('returns arrays as-is', async () => { + await expect(resolveArrayInputAsync(fakeCtx, [1, 2, 3], null)).resolves.toEqual([1, 2, 3]) }) - it('converts plain objects to entries', () => { - expect(resolveArrayInput(fakeCtx, { a: 1, b: 2 }, null)).toEqual([ + it('converts plain objects to entries', async () => { + await expect(resolveArrayInputAsync(fakeCtx, { a: 1, b: 2 }, null)).resolves.toEqual([ ['a', 1], ['b', 2], ]) }) - it('returns empty array when a pure reference resolves to null (skipped block)', () => { + it('returns empty array when a pure reference resolves to null (skipped block)', async () => { // `resolveSingleReference` returns `null` for a reference that points at a // block that exists in the workflow but did not execute on this path. // A loop/parallel over such a reference should run zero iterations rather // than fail the workflow. const resolver = { - resolveSingleReference: vi.fn().mockReturnValue(null), + resolveSingleReference: vi.fn().mockResolvedValue(null), } as unknown as VariableResolver - const result = resolveArrayInput(fakeCtx, '', resolver) + const result = await resolveArrayInputAsync(fakeCtx, '', resolver) expect(result).toEqual([]) expect(resolver.resolveSingleReference).toHaveBeenCalled() }) - it('returns the array from a pure reference that resolved to an array', () => { + it('returns the array from a pure reference that resolved to an array', async () => { const resolver = { - resolveSingleReference: vi.fn().mockReturnValue([1, 2, 3]), + resolveSingleReference: vi.fn().mockResolvedValue([1, 2, 3]), } as unknown as VariableResolver - expect(resolveArrayInput(fakeCtx, '', resolver)).toEqual([1, 2, 3]) + await expect(resolveArrayInputAsync(fakeCtx, '', resolver)).resolves.toEqual([ + 1, 2, 3, + ]) }) - it('converts resolved objects to entries', () => { + it('converts resolved objects to entries', async () => { const resolver = { - resolveSingleReference: vi.fn().mockReturnValue({ x: 1, y: 2 }), + resolveSingleReference: vi.fn().mockResolvedValue({ x: 1, y: 2 }), } as unknown as VariableResolver - expect(resolveArrayInput(fakeCtx, '', resolver)).toEqual([ + await expect(resolveArrayInputAsync(fakeCtx, '', resolver)).resolves.toEqual([ ['x', 1], ['y', 2], ]) }) - it('throws when a pure reference resolves to a non-array, non-object, non-null value', () => { + it('throws when a pure reference resolves to a non-array, non-object, non-null value', async () => { const resolver = { - resolveSingleReference: vi.fn().mockReturnValue(42), + resolveSingleReference: vi.fn().mockResolvedValue(42), } as unknown as VariableResolver - expect(() => resolveArrayInput(fakeCtx, '', resolver)).toThrow( + await expect(resolveArrayInputAsync(fakeCtx, '', resolver)).rejects.toThrow( /did not resolve to an array or object/ ) }) - it('throws when a pure reference resolves to undefined (unknown block)', () => { + it('throws when a pure reference resolves to undefined (unknown block)', async () => { // `undefined` means the reference could not be matched to any block at // all (typo / deleted block). This must still fail loudly. const resolver = { - resolveSingleReference: vi.fn().mockReturnValue(undefined), + resolveSingleReference: vi.fn().mockResolvedValue(undefined), } as unknown as VariableResolver - expect(() => resolveArrayInput(fakeCtx, '', resolver)).toThrow( + await expect(resolveArrayInputAsync(fakeCtx, '', resolver)).rejects.toThrow( /did not resolve to an array or object/ ) }) - it('parses a JSON array string', () => { - expect(resolveArrayInput(fakeCtx, '[1, 2, 3]', null)).toEqual([1, 2, 3]) + it('parses a JSON array string', async () => { + await expect(resolveArrayInputAsync(fakeCtx, '[1, 2, 3]', null)).resolves.toEqual([1, 2, 3]) + }) + + it('throws on a string that is neither a reference nor valid JSON array/object', async () => { + await expect(resolveArrayInputAsync(fakeCtx, 'not json', null)).rejects.toThrow() }) +}) + +describe('findEffectiveContainerId', () => { + it('finds pre-cloned nested subflow IDs with clone sequence suffixes', () => { + const executionMap = new Map([ + ['inner-parallel', {}], + ['inner-parallel__obranch-2', {}], + ['inner-parallel__clone3__obranch-2', {}], + ]) - it('throws on a string that is neither a reference nor valid JSON array/object', () => { - expect(() => resolveArrayInput(fakeCtx, 'not json', null)).toThrow() + expect( + findEffectiveContainerId('inner-parallel', 'leaf__clone7__obranch-2₍0₎', executionMap) + ).toBe('inner-parallel__clone3__obranch-2') }) }) diff --git a/apps/sim/executor/utils/subflow-utils.ts b/apps/sim/executor/utils/subflow-utils.ts index 01765360787..7f363365627 100644 --- a/apps/sim/executor/utils/subflow-utils.ts +++ b/apps/sim/executor/utils/subflow-utils.ts @@ -96,7 +96,7 @@ export function isBranchNodeId(nodeId: string): boolean { const OUTER_BRANCH_PATTERN = /__obranch-(\d+)/ const OUTER_BRANCH_STRIP_PATTERN = /__obranch-\d+/g -const CLONE_SEQ_STRIP_PATTERN = /__clone\d+/g +const CLONE_DIGEST_STRIP_PATTERN = /__clone[0-9a-f]+/gi /** * Extracts the outer branch index from a cloned subflow ID. @@ -114,7 +114,7 @@ export function extractOuterBranchIndex(clonedId: string): number | undefined { */ export function stripCloneSuffixes(nodeId: string): string { return extractBaseBlockId( - nodeId.replace(OUTER_BRANCH_STRIP_PATTERN, '').replace(CLONE_SEQ_STRIP_PATTERN, '') + nodeId.replace(OUTER_BRANCH_STRIP_PATTERN, '').replace(CLONE_DIGEST_STRIP_PATTERN, '') ) } @@ -130,7 +130,7 @@ export function buildClonedSubflowId(originalId: string, branchIndex: number): s * returning the original workflow-level subflow ID. */ export function stripOuterBranchSuffix(id: string): string { - return id.replace(OUTER_BRANCH_STRIP_PATTERN, '').replace(CLONE_SEQ_STRIP_PATTERN, '') + return id.replace(OUTER_BRANCH_STRIP_PATTERN, '').replace(CLONE_DIGEST_STRIP_PATTERN, '') } /** @@ -154,10 +154,30 @@ export function findEffectiveContainerId( // and cloned variants coexist in the map; the clone is the correct scope. const match = currentNodeId.match(OUTER_BRANCH_PATTERN) if (match) { - const candidateId = buildClonedSubflowId(originalId, Number.parseInt(match[1], 10)) + const branchIndex = Number.parseInt(match[1], 10) + const cloneSuffix = `__obranch-${branchIndex}` + if (currentNodeId.includes('__clone')) { + for (const scopeId of executionMap.keys()) { + if ( + scopeId.includes('__clone') && + scopeId.endsWith(cloneSuffix) && + stripOuterBranchSuffix(scopeId) === originalId + ) { + return scopeId + } + } + } + + const candidateId = buildClonedSubflowId(originalId, branchIndex) if (executionMap.has(candidateId)) { return candidateId } + + for (const scopeId of executionMap.keys()) { + if (scopeId.endsWith(cloneSuffix) && stripOuterBranchSuffix(scopeId) === originalId) { + return scopeId + } + } } // Return original ID — for branch-0 (non-cloned) or when scope is missing. @@ -179,26 +199,14 @@ export function normalizeNodeId(nodeId: string): string { } /** - * Validates that a count doesn't exceed a maximum limit. - * Returns an error message if validation fails, undefined otherwise. - */ -export function validateMaxCount(count: number, max: number, itemType: string): string | undefined { - if (count > max) { - return `${itemType} (${count}) exceeds maximum allowed (${max}). Execution blocked.` - } - return undefined -} - -/** - * Resolves array input at runtime. Handles arrays, objects, references, and JSON strings. - * Used by both loop forEach and parallel distribution resolution. - * Throws an error if resolution fails. + * Async variant used by execution paths that may need durable large-value or + * explicit UserFile.base64 materialization while resolving collection inputs. */ -export function resolveArrayInput( +export async function resolveArrayInputAsync( ctx: ExecutionContext, items: any, resolver: VariableResolver | null -): any[] { +): Promise { if (Array.isArray(items)) { return items } @@ -210,7 +218,7 @@ export function resolveArrayInput( if (typeof items === 'string') { if (items.startsWith(REFERENCE.START) && items.endsWith(REFERENCE.END) && resolver) { try { - const resolved = resolver.resolveSingleReference(ctx, '', items) + const resolved = await resolver.resolveSingleReference(ctx, '', items) if (Array.isArray(resolved)) { return resolved } @@ -249,7 +257,7 @@ export function resolveArrayInput( if (resolver) { try { - const resolved = resolver.resolveInputs(ctx, 'subflow_items', { items }).items + const resolved = (await resolver.resolveInputs(ctx, 'subflow_items', { items })).items if (Array.isArray(resolved)) { return resolved } @@ -408,7 +416,7 @@ export async function emitSubflowSuccessEvents( ctx: ExecutionContext, blockId: string, blockType: 'loop' | 'parallel', - output: { results: any[] }, + output: { results: unknown }, contextExtensions: ContextExtensions | null ): Promise { const now = new Date().toISOString() diff --git a/apps/sim/executor/variables/resolver.test.ts b/apps/sim/executor/variables/resolver.test.ts index 9fe0e6273fd..e92eac9439e 100644 --- a/apps/sim/executor/variables/resolver.test.ts +++ b/apps/sim/executor/variables/resolver.test.ts @@ -18,6 +18,7 @@ function createBlock(id: string, name: string, type: string, params = {}): Seria outputs: { result: 'string', items: 'json', + file: 'file', }, enabled: true, } @@ -39,6 +40,16 @@ function createResolver(language = 'javascript') { state.setBlockOutput('producer', { result: 'hello world', items: ['a', 'b'], + file: { + id: 'file-1', + name: 'image.png', + url: 'https://example.com/image.png', + key: 'execution/workspace-1/workflow-1/execution-1/image.png', + context: 'execution', + size: 12 * 1024 * 1024, + type: 'image/png', + base64: 'large-inline-base64', + }, }) const ctx = { blockStates: state.getBlockStates(), @@ -61,18 +72,18 @@ function createResolver(language = 'javascript') { } describe('VariableResolver function block inputs', () => { - it('returns empty inputs when params are missing', () => { + it('returns empty inputs when params are missing', async () => { const { block, ctx, resolver } = createResolver() - const result = resolver.resolveInputsForFunctionBlock(ctx, 'function', undefined, block) + const result = await resolver.resolveInputsForFunctionBlock(ctx, 'function', undefined, block) expect(result).toEqual({ resolvedInputs: {}, displayInputs: {}, contextVariables: {} }) }) - it('resolves JavaScript block references through globalThis context variables', () => { + it('resolves JavaScript block references through globalThis context variables', async () => { const { block, ctx, resolver } = createResolver('javascript') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'return ' }, @@ -84,10 +95,362 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('resolves Python block references through globals lookup', () => { + it('resolves named loop result bracket paths in function code', async () => { + const loopBlock = createBlock('loop-1', 'Loop 1', 'loop') + const functionBlock = createBlock('function', 'Function', BlockType.FUNCTION, { + language: 'javascript', + }) + const workflow: SerializedWorkflow = { + version: '1', + blocks: [loopBlock, functionBlock], + connections: [], + loops: { 'loop-1': { nodes: ['producer'] } }, + parallels: {}, + } + const state = new ExecutionState() + state.setBlockOutput('loop-1', { + results: [[{ id: 'a' }], [{ id: 'b' }]], + }) + const ctx = { + blockStates: state.getBlockStates(), + blockLogs: [], + environmentVariables: {}, + workflowVariables: {}, + decisions: { router: new Map(), condition: new Map() }, + loopExecutions: new Map(), + executedBlocks: new Set(), + activeExecutionPath: new Set(), + completedLoops: new Set(), + metadata: {}, + } as ExecutionContext + const resolver = new VariableResolver(workflow, {}, state) + + const result = await resolver.resolveInputsForFunctionBlock( + ctx, + 'function', + { code: 'return ' }, + functionBlock + ) + + expect(result.resolvedInputs.code).toBe('return globalThis["__blockRef_0"]') + expect(result.displayInputs.code).toBe('return "b"') + expect(result.contextVariables).toEqual({ __blockRef_0: 'b' }) + }) + + it('rewrites JavaScript file base64 references to lazy runtime reads', async () => { + const { block, ctx, resolver } = createResolver('javascript') + + const result = await resolver.resolveInputsForFunctionBlock( + ctx, + 'function', + { code: 'const base64 = ;\nreturn base64' }, + block + ) + + expect(result.resolvedInputs.code).toBe( + 'const base64 = (await sim.files.readBase64(globalThis["__blockRef_0"]));\nreturn base64' + ) + expect(result.displayInputs.code).toBe('const base64 = ;\nreturn base64') + expect(result.contextVariables.__blockRef_0).toMatchObject({ + id: 'file-1', + name: 'image.png', + }) + expect(result.contextVariables.__blockRef_0).not.toHaveProperty('base64') + }) + + it('wraps lazy JavaScript file base64 reads before member access', async () => { + const { block, ctx, resolver } = createResolver('javascript') + + const result = await resolver.resolveInputsForFunctionBlock( + ctx, + 'function', + { code: 'return .length' }, + block + ) + + expect(result.resolvedInputs.code).toBe( + 'return (await sim.files.readBase64(globalThis["__blockRef_0"])).length' + ) + }) + + it('uses existing inline base64 for keyless files instead of lazy storage reads', async () => { + const { block, ctx, resolver } = createResolver('javascript') + const state = new ExecutionState() + state.setBlockOutput('producer', { + file: { + id: 'file-keyless', + name: 'inline.txt', + key: '', + url: 'https://example.com/inline.txt', + size: 5, + type: 'text/plain', + base64: 'aGVsbG8=', + }, + }) + + const keylessResolver = new VariableResolver( + { + version: '1', + blocks: [createBlock('producer', 'Producer', BlockType.API), block], + connections: [], + loops: {}, + parallels: {}, + }, + {}, + state + ) + + const result = await keylessResolver.resolveInputsForFunctionBlock( + ctx, + 'function', + { code: 'return ' }, + block + ) + + expect(result.resolvedInputs.code).toBe('return globalThis["__blockRef_0"]') + expect(result.contextVariables.__blockRef_0).toBe('aGVsbG8=') + }) + + it('rewrites JavaScript large value refs to lazy runtime reads', async () => { + const { block, ctx, resolver } = createResolver('javascript') + const state = new ExecutionState() + state.setBlockOutput('producer', { + result: { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 12 * 1024 * 1024, + key: 'execution/workspace-1/workflow-1/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + }) + const workflow: SerializedWorkflow = { + version: '1', + blocks: [createBlock('producer', 'Producer', BlockType.API), block], + connections: [], + loops: {}, + parallels: {}, + } + const largeResolver = new VariableResolver(workflow, {}, state) + const largeCtx = { + ...ctx, + blockStates: state.getBlockStates(), + } as ExecutionContext + + const result = await largeResolver.resolveInputsForFunctionBlock( + largeCtx, + 'function', + { code: 'return ' }, + block + ) + + expect(result.resolvedInputs.code).toBe( + 'return (await sim.values.read(globalThis["__blockRef_0"]))' + ) + expect(result.contextVariables.__blockRef_0).toMatchObject({ + __simLargeValueRef: true, + id: 'lv_ABCDEFGHIJKL', + }) + }) + + it('fails whole large value refs for Function runtimes without lazy helpers', async () => { + const { block, ctx } = createResolver('python') + const state = new ExecutionState() + state.setBlockOutput('producer', { + result: { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 12 * 1024 * 1024, + key: 'execution/workspace-1/workflow-1/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + }) + const workflow: SerializedWorkflow = { + version: '1', + blocks: [createBlock('producer', 'Producer', BlockType.API), block], + connections: [], + loops: {}, + parallels: {}, + } + const largeResolver = new VariableResolver(workflow, {}, state) + const largeCtx = { + ...ctx, + blockStates: state.getBlockStates(), + } as ExecutionContext + + await expect( + largeResolver.resolveInputsForFunctionBlock( + largeCtx, + 'function', + { code: 'return ' }, + block + ) + ).rejects.toThrow('This execution value is too large to inline') + }) + + it('fails whole large value refs for JavaScript with imports', async () => { + const { block, ctx } = createResolver('javascript') + const state = new ExecutionState() + state.setBlockOutput('producer', { + result: { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 12 * 1024 * 1024, + key: 'execution/workspace-1/workflow-1/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + }) + const workflow: SerializedWorkflow = { + version: '1', + blocks: [createBlock('producer', 'Producer', BlockType.API), block], + connections: [], + loops: {}, + parallels: {}, + } + const largeResolver = new VariableResolver(workflow, {}, state) + const largeCtx = { + ...ctx, + blockStates: state.getBlockStates(), + } as ExecutionContext + + await expect( + largeResolver.resolveInputsForFunctionBlock( + largeCtx, + 'function', + { code: "import x from 'x'\nreturn " }, + block + ) + ).rejects.toThrow('This execution value is too large to inline') + }) + + it('keeps JavaScript lazy helpers enabled when import appears in comments or strings', async () => { + const { block, ctx } = createResolver('javascript') + const state = new ExecutionState() + state.setBlockOutput('producer', { + result: { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 12 * 1024 * 1024, + key: 'execution/workspace-1/workflow-1/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + }) + const workflow: SerializedWorkflow = { + version: '1', + blocks: [createBlock('producer', 'Producer', BlockType.API), block], + connections: [], + loops: {}, + parallels: {}, + } + const largeResolver = new VariableResolver(workflow, {}, state) + const largeCtx = { + ...ctx, + blockStates: state.getBlockStates(), + } as ExecutionContext + + const result = await largeResolver.resolveInputsForFunctionBlock( + largeCtx, + 'function', + { + code: "/** @import { Foo } from 'foo' */\nconst text = \"import bar from 'bar'\"\nreturn ", + }, + block + ) + + expect(result.resolvedInputs.code).toBe( + '/** @import { Foo } from \'foo\' */\nconst text = "import bar from \'bar\'"\nreturn (await sim.values.read(globalThis["__blockRef_0"]))' + ) + }) + + it('keeps JavaScript lazy helpers enabled for dynamic import expressions', async () => { + const { block, ctx } = createResolver('javascript') + const state = new ExecutionState() + state.setBlockOutput('producer', { + result: { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 12 * 1024 * 1024, + key: 'execution/workspace-1/workflow-1/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + }) + const workflow: SerializedWorkflow = { + version: '1', + blocks: [createBlock('producer', 'Producer', BlockType.API), block], + connections: [], + loops: {}, + parallels: {}, + } + const largeResolver = new VariableResolver(workflow, {}, state) + const largeCtx = { + ...ctx, + blockStates: state.getBlockStates(), + } as ExecutionContext + + const result = await largeResolver.resolveInputsForFunctionBlock( + largeCtx, + 'function', + { code: "const mod = import('foo')\nreturn " }, + block + ) + + expect(result.resolvedInputs.code).toBe( + 'const mod = import(\'foo\')\nreturn (await sim.values.read(globalThis["__blockRef_0"]))' + ) + }) + + it('fails nested large value refs for Function runtimes without lazy helpers', async () => { + const { block, ctx } = createResolver('python') + const state = new ExecutionState() + state.setBlockOutput('producer', { + result: { + rows: { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'array', + size: 12 * 1024 * 1024, + key: 'execution/workspace-1/workflow-1/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + }, + }) + const workflow: SerializedWorkflow = { + version: '1', + blocks: [createBlock('producer', 'Producer', BlockType.API), block], + connections: [], + loops: {}, + parallels: {}, + } + const largeResolver = new VariableResolver(workflow, {}, state) + const largeCtx = { + ...ctx, + blockStates: state.getBlockStates(), + } as ExecutionContext + + await expect( + largeResolver.resolveInputsForFunctionBlock( + largeCtx, + 'function', + { code: 'return ' }, + block + ) + ).rejects.toThrow('This execution value is too large to inline') + }) + + it('resolves Python block references through globals lookup', async () => { const { block, ctx, resolver } = createResolver('python') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'return ' }, @@ -99,10 +462,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('breaks JavaScript string literals around quoted block references', () => { + it('breaks JavaScript string literals around quoted block references', async () => { const { block, ctx, resolver } = createResolver('javascript') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: "const rawEmail = '';\nreturn rawEmail" }, @@ -116,10 +479,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('uses template interpolation for JavaScript template literal block references', () => { + it('uses template interpolation for JavaScript template literal block references', async () => { const { block, ctx, resolver } = createResolver('javascript') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'return `value: `' }, @@ -134,10 +497,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('keeps JavaScript block references inside template expressions executable', () => { + it('keeps JavaScript block references inside template expressions executable', async () => { const { block, ctx, resolver } = createResolver('javascript') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', // biome-ignore lint/suspicious/noTemplateCurlyInString: intentional — asserting template literal is preserved @@ -152,10 +515,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('ignores JavaScript comment quotes before later block references', () => { + it('ignores JavaScript comment quotes before later block references', async () => { const { block, ctx, resolver } = createResolver('javascript') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: "// don't confuse quote tracking\nreturn " }, @@ -169,10 +532,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('breaks Python string literals around quoted block references', () => { + it('breaks Python string literals around quoted block references', async () => { const { block, ctx, resolver } = createResolver('python') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: "raw_email = ''\nreturn raw_email" }, @@ -186,10 +549,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('breaks Python triple-double-quoted strings around block references', () => { + it('breaks Python triple-double-quoted strings around block references', async () => { const { block, ctx, resolver } = createResolver('python') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'prompt = """\nSummary: \n"""\nreturn prompt' }, @@ -205,10 +568,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('ignores escaped triple-double quotes before later Python block references', () => { + it('ignores escaped triple-double quotes before later Python block references', async () => { const { block, ctx, resolver } = createResolver('python') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'prompt = """Escaped delimiter: \\"\\"\\"\nSummary: \n"""' }, @@ -224,10 +587,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('breaks Python triple-single-quoted strings around block references', () => { + it('breaks Python triple-single-quoted strings around block references', async () => { const { block, ctx, resolver } = createResolver('python') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: "prompt = '''\nSummary: \n'''\nreturn prompt" }, @@ -243,10 +606,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('ignores Python comment quotes before later block references', () => { + it('ignores Python comment quotes before later block references', async () => { const { block, ctx, resolver } = createResolver('python') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: "# don't confuse quote tracking\nreturn " }, @@ -260,10 +623,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('uses separate Python context variables for repeated mutable references', () => { + it('uses separate Python context variables for repeated mutable references', async () => { const { block, ctx, resolver } = createResolver('python') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'a = \nb = \nreturn b' }, @@ -282,10 +645,10 @@ describe('VariableResolver function block inputs', () => { }) }) - it('uses shell-safe expansions for block references', () => { + it('uses shell-safe expansions for block references', async () => { const { block, ctx, resolver } = createResolver('shell') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'echo suffix && echo ""' }, @@ -302,10 +665,10 @@ describe('VariableResolver function block inputs', () => { }) }) - it('ignores shell comment quotes when formatting later block references', () => { + it('ignores shell comment quotes when formatting later block references', async () => { const { block, ctx, resolver } = createResolver('shell') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: "# don't confuse quote tracking\necho " }, diff --git a/apps/sim/executor/variables/resolver.ts b/apps/sim/executor/variables/resolver.ts index c0ab54d23d9..80bb193fde4 100644 --- a/apps/sim/executor/variables/resolver.ts +++ b/apps/sim/executor/variables/resolver.ts @@ -1,14 +1,23 @@ import { createLogger } from '@sim/logger' import { toError } from '@sim/utils/errors' -import { BlockType } from '@/executor/constants' +import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' +import { + assertNoLargeValueRefs, + containsLargeValueRef, + getLargeValueMaterializationError, + isLargeValueRef, +} from '@/lib/execution/payloads/large-value-ref' +import { isLikelyReferenceSegment } from '@/lib/workflows/sanitization/references' +import { BlockType, parseReferencePath, REFERENCE } from '@/executor/constants' import type { ExecutionState, LoopScope } from '@/executor/execution/state' import type { ExecutionContext } from '@/executor/types' -import { createEnvVarPattern, replaceValidReferences } from '@/executor/utils/reference-validation' +import { createEnvVarPattern, createReferencePattern } from '@/executor/utils/reference-validation' import { BlockResolver } from '@/executor/variables/resolvers/block' import { EnvResolver } from '@/executor/variables/resolvers/env' import { LoopResolver } from '@/executor/variables/resolvers/loop' import { ParallelResolver } from '@/executor/variables/resolvers/parallel' import { + type AsyncPathNavigator, RESOLVED_EMPTY, type ResolutionContext, type Resolver, @@ -23,6 +32,42 @@ export const FUNCTION_BLOCK_DISPLAY_CODE_KEY = '_runtimeDisplayCode' const logger = createLogger('VariableResolver') +async function replaceValidReferencesAsync( + template: string, + replacer: (match: string, index: number, template: string) => Promise +): Promise { + const pattern = createReferencePattern() + let cursor = 0 + let result = '' + for (const match of template.matchAll(pattern)) { + const fullMatch = match[0] + const index = match.index ?? 0 + result += template.slice(cursor, index) + result += isLikelyReferenceSegment(fullMatch) + ? await replacer(fullMatch, index, template) + : fullMatch + cursor = index + fullMatch.length + } + return result + template.slice(cursor) +} + +async function replaceEnvVarsAsync( + template: string, + replacer: (match: string) => Promise +): Promise { + const pattern = createEnvVarPattern() + let cursor = 0 + let result = '' + for (const match of template.matchAll(pattern)) { + const fullMatch = match[0] + const index = match.index ?? 0 + result += template.slice(cursor, index) + result += await replacer(fullMatch) + cursor = index + fullMatch.length + } + return result + template.slice(cursor) +} + type ShellQuoteContext = 'single' | 'double' | null type CodeStringQuoteContext = ShellQuoteContext | 'triple-single' | 'triple-double' | 'template' type CodeScanMode = @@ -43,12 +88,13 @@ export class VariableResolver { constructor( workflow: SerializedWorkflow, workflowVariables: Record, - private state: ExecutionState + private state: ExecutionState, + options: { navigatePathAsync?: AsyncPathNavigator } = {} ) { - this.blockResolver = new BlockResolver(workflow) + this.blockResolver = new BlockResolver(workflow, options.navigatePathAsync) this.resolvers = [ - new LoopResolver(workflow), - new ParallelResolver(workflow), + new LoopResolver(workflow, options.navigatePathAsync), + new ParallelResolver(workflow, options.navigatePathAsync), new WorkflowResolver(workflowVariables), new EnvResolver(), this.blockResolver, @@ -64,16 +110,16 @@ export class VariableResolver { * should inject contextVariables into the function execution request body so the * isolated VM can access them as global variables. */ - resolveInputsForFunctionBlock( + async resolveInputsForFunctionBlock( ctx: ExecutionContext, currentNodeId: string, params: Record | null | undefined, block: SerializedBlock - ): { + ): Promise<{ resolvedInputs: Record displayInputs: Record contextVariables: Record - } { + }> { const contextVariables: Record = {} const resolved: Record = {} const display: Record = {} @@ -85,7 +131,7 @@ export class VariableResolver { for (const [key, value] of Object.entries(params)) { if (key === 'code') { if (typeof value === 'string') { - const code = this.resolveCodeWithContextVars( + const code = await this.resolveCodeWithContextVars( ctx, currentNodeId, value, @@ -100,7 +146,7 @@ export class VariableResolver { const displayItems: any[] = [] for (const item of value) { if (item && typeof item === 'object' && typeof item.content === 'string') { - const code = this.resolveCodeWithContextVars( + const code = await this.resolveCodeWithContextVars( ctx, currentNodeId, item.content, @@ -124,11 +170,11 @@ export class VariableResolver { resolved[key] = resolvedItems display[key] = displayItems } else { - resolved[key] = this.resolveValue(ctx, currentNodeId, value, undefined, block) + resolved[key] = await this.resolveValue(ctx, currentNodeId, value, undefined, block) display[key] = resolved[key] } } else { - resolved[key] = this.resolveValue(ctx, currentNodeId, value, undefined, block) + resolved[key] = await this.resolveValue(ctx, currentNodeId, value, undefined, block) display[key] = resolved[key] } } @@ -136,12 +182,12 @@ export class VariableResolver { return { resolvedInputs: resolved, displayInputs: display, contextVariables } } - resolveInputs( + async resolveInputs( ctx: ExecutionContext, currentNodeId: string, params: Record, block?: SerializedBlock - ): Record { + ): Promise> { if (!params) { return {} } @@ -152,15 +198,21 @@ export class VariableResolver { try { const parsed = JSON.parse(params.conditions) if (Array.isArray(parsed)) { - resolved.conditions = parsed.map((cond: any) => ({ - ...cond, - value: - typeof cond.value === 'string' - ? this.resolveTemplateWithoutConditionFormatting(ctx, currentNodeId, cond.value) - : cond.value, - })) + resolved.conditions = await Promise.all( + parsed.map(async (cond: any) => ({ + ...cond, + value: + typeof cond.value === 'string' + ? await this.resolveTemplateWithoutConditionFormatting( + ctx, + currentNodeId, + cond.value + ) + : cond.value, + })) + ) } else { - resolved.conditions = this.resolveValue( + resolved.conditions = await this.resolveValue( ctx, currentNodeId, params.conditions, @@ -173,7 +225,7 @@ export class VariableResolver { error: parseError, conditions: params.conditions, }) - resolved.conditions = this.resolveValue( + resolved.conditions = await this.resolveValue( ctx, currentNodeId, params.conditions, @@ -187,17 +239,17 @@ export class VariableResolver { if (isConditionBlock && key === 'conditions') { continue } - resolved[key] = this.resolveValue(ctx, currentNodeId, value, undefined, block) + resolved[key] = await this.resolveValue(ctx, currentNodeId, value, undefined, block) } return resolved } - resolveSingleReference( + async resolveSingleReference( ctx: ExecutionContext, currentNodeId: string, reference: string, loopScope?: LoopScope - ): any { + ): Promise { if (typeof reference === 'string') { const trimmed = reference.trim() if (/^<[^<>]+>$/.test(trimmed)) { @@ -208,7 +260,7 @@ export class VariableResolver { loopScope, } - const result = this.resolveReference(trimmed, resolutionContext) + const result = await this.resolveReference(trimmed, resolutionContext) if (result === RESOLVED_EMPTY) { return null } @@ -219,29 +271,31 @@ export class VariableResolver { return this.resolveValue(ctx, currentNodeId, reference, loopScope) } - private resolveValue( + private async resolveValue( ctx: ExecutionContext, currentNodeId: string, value: any, loopScope?: LoopScope, block?: SerializedBlock - ): any { + ): Promise { if (value === null || value === undefined) { return value } if (Array.isArray(value)) { - return value.map((v) => this.resolveValue(ctx, currentNodeId, v, loopScope, block)) + return Promise.all( + value.map((v) => this.resolveValue(ctx, currentNodeId, v, loopScope, block)) + ) } if (typeof value === 'object') { - return Object.entries(value).reduce( - (acc, [key, val]) => ({ - ...acc, - [key]: this.resolveValue(ctx, currentNodeId, val, loopScope, block), - }), - {} + const entries = await Promise.all( + Object.entries(value).map(async ([key, val]) => [ + key, + await this.resolveValue(ctx, currentNodeId, val, loopScope, block), + ]) ) + return Object.fromEntries(entries) } if (typeof value === 'string') { @@ -256,19 +310,20 @@ export class VariableResolver { * items, workflow variables, env vars) are still inlined as literals so they remain * available without any extra passing mechanism. */ - private resolveCodeWithContextVars( + private async resolveCodeWithContextVars( ctx: ExecutionContext, currentNodeId: string, template: string, loopScope: LoopScope | undefined, block: SerializedBlock, contextVarAccumulator: Record - ): { resolvedCode: string; displayCode: string } { + ): Promise<{ resolvedCode: string; displayCode: string }> { const resolutionContext: ResolutionContext = { executionContext: ctx, executionState: this.state, currentNodeId, loopScope, + allowLargeValueRefs: true, } const language = (block.config?.params as Record | undefined)?.language as @@ -279,14 +334,27 @@ export class VariableResolver { let displayResult = '' let displayCursor = 0 - let result = replaceValidReferences(template, (match, index) => { + let result = await replaceValidReferencesAsync(template, async (match, index) => { if (replacementError) return match displayResult += template.slice(displayCursor, index) displayCursor = index + match.length try { if (this.blockResolver.canResolve(match)) { - const resolved = this.resolveReference(match, resolutionContext) + const lazyBase64 = await this.resolveLazyFileBase64Reference( + match, + resolutionContext, + language, + template, + index, + contextVarAccumulator + ) + if (lazyBase64) { + displayResult += lazyBase64.display + return lazyBase64.replacement + } + + const resolved = await this.resolveReference(match, resolutionContext) if (resolved === undefined) { displayResult += match return match @@ -298,13 +366,33 @@ export class VariableResolver { // with language-specific runtime access to that stored value. const varName = `__blockRef_${Object.keys(contextVarAccumulator).length}` contextVarAccumulator[varName] = effectiveValue - const replacement = this.formatContextVariableReference( - varName, - language, - template, - index, - effectiveValue - ) + let replacement: string + if (isLargeValueRef(effectiveValue)) { + const lazyReplacement = this.formatLazyLargeValueReference( + varName, + language, + template, + index + ) + if (!lazyReplacement) { + throw getLargeValueMaterializationError(effectiveValue) + } + replacement = lazyReplacement + } else if ( + containsLargeValueRef(effectiveValue) && + !this.canUseJavaScriptRuntimeHelpers(language, template) + ) { + assertNoLargeValueRefs(effectiveValue) + throw new Error('This execution value is too large to inline.') + } else { + replacement = this.formatContextVariableReference( + varName, + language, + template, + index, + effectiveValue + ) + } displayResult += this.formatDisplayValueForCodeContext( effectiveValue, language, @@ -314,7 +402,7 @@ export class VariableResolver { return replacement } - const resolved = this.resolveReference(match, resolutionContext) + const resolved = await this.resolveReference(match, resolutionContext) if (resolved === undefined) { displayResult += match return match @@ -322,6 +410,35 @@ export class VariableResolver { const effectiveValue = resolved === RESOLVED_EMPTY ? null : resolved + if (isLargeValueRef(effectiveValue)) { + const varName = `__blockRef_${Object.keys(contextVarAccumulator).length}` + contextVarAccumulator[varName] = effectiveValue + const lazyReplacement = this.formatLazyLargeValueReference( + varName, + language, + template, + index + ) + if (lazyReplacement) { + displayResult += this.formatDisplayValueForCodeContext( + effectiveValue, + language, + template, + index + ) + return lazyReplacement + } + throw getLargeValueMaterializationError(effectiveValue) + } + + if ( + containsLargeValueRef(effectiveValue) && + !this.canUseJavaScriptRuntimeHelpers(language, template) + ) { + assertNoLargeValueRefs(effectiveValue) + throw new Error('This execution value is too large to inline.') + } + // Non-block reference (loop, parallel, workflow, env): embed as literal const replacement = this.blockResolver.formatValueForBlock( effectiveValue, @@ -342,18 +459,241 @@ export class VariableResolver { throw replacementError } - result = result.replace(createEnvVarPattern(), (match) => { - const resolved = this.resolveReference(match, resolutionContext) + result = await replaceEnvVarsAsync(result, async (match) => { + const resolved = await this.resolveReference(match, resolutionContext) return typeof resolved === 'string' ? resolved : match }) - displayResult = displayResult.replace(createEnvVarPattern(), (match) => { - const resolved = this.resolveReference(match, resolutionContext) + displayResult = await replaceEnvVarsAsync(displayResult, async (match) => { + const resolved = await this.resolveReference(match, resolutionContext) return typeof resolved === 'string' ? resolved : match }) return { resolvedCode: result, displayCode: displayResult } } + private async resolveLazyFileBase64Reference( + reference: string, + context: ResolutionContext, + language: string | undefined, + template: string, + matchIndex: number, + contextVarAccumulator: Record + ): Promise<{ replacement: string; display: string } | null> { + if (!this.canUseJavaScriptRuntimeHelpers(language, template)) { + return null + } + + const parts = parseReferencePath(reference) + if (parts.length < 3 || parts.at(-1) !== 'base64') { + return null + } + + const fileReference = `${REFERENCE.START}${parts.slice(0, -1).join(REFERENCE.PATH_DELIMITER)}${REFERENCE.END}` + const file = await this.resolveReference(fileReference, context) + if (!isUserFileWithMetadata(file)) { + return null + } + if (!file.key) { + return null + } + + const varName = `__blockRef_${Object.keys(contextVarAccumulator).length}` + const { base64: _base64, ...fileMetadata } = file + contextVarAccumulator[varName] = fileMetadata + const fileExpression = `globalThis[${JSON.stringify(varName)}]` + const lazyExpression = `(await sim.files.readBase64(${fileExpression}))` + + return { + replacement: this.formatJavaScriptAsyncExpression(lazyExpression, template, matchIndex), + display: reference, + } + } + + private formatLazyLargeValueReference( + varName: string, + language: string | undefined, + template: string, + matchIndex: number + ): string | null { + if (!this.canUseJavaScriptRuntimeHelpers(language, template)) { + return null + } + + const expression = `(await sim.values.read(globalThis[${JSON.stringify(varName)}]))` + return this.formatJavaScriptAsyncExpression(expression, template, matchIndex, { + stringifyInStringContext: true, + }) + } + + private formatJavaScriptAsyncExpression( + expression: string, + template: string, + matchIndex: number, + options: { stringifyInStringContext?: boolean } = {} + ): string { + const quoteContext = this.getCodeStringQuoteContext(template, matchIndex, 'javascript') + const stringExpression = options.stringifyInStringContext + ? `JSON.stringify(${expression})` + : expression + + if (quoteContext === 'template') { + return `\${${stringExpression}}` + } + if (quoteContext === 'single' || quoteContext === 'double') { + const quote = this.getCodeStringQuoteToken(quoteContext) + return `${quote} + ${stringExpression} + ${quote}` + } + return expression + } + + private canUseJavaScriptRuntimeHelpers(language: string | undefined, template: string): boolean { + if (language !== 'javascript') { + return false + } + return !this.hasJavaScriptModuleDependencySyntax(template) + } + + private hasJavaScriptModuleDependencySyntax(template: string): boolean { + const modes: CodeScanMode[] = [{ type: 'normal' }] + + for (let i = 0; i < template.length; i++) { + const char = template[i] + const next = template[i + 1] + const mode = modes[modes.length - 1] + + if (mode.type === 'line-comment') { + if (char === '\n') modes.pop() + continue + } + + if (mode.type === 'block-comment') { + if (char === '*' && next === '/') { + modes.pop() + i++ + } + continue + } + + if (mode.type === 'single' || mode.type === 'double') { + const quote = mode.type === 'single' ? "'" : '"' + if (char === '\\') { + i++ + continue + } + if (char === quote || char === '\n') modes.pop() + continue + } + + if (mode.type === 'template') { + if (char === '\\') { + i++ + continue + } + if (char === '`') { + modes.pop() + continue + } + if (char === '$' && next === '{') { + modes.push({ type: 'template-expression', depth: 1 }) + i++ + } + continue + } + + const isCodeMode = mode.type === 'normal' || mode.type === 'template-expression' + if (!isCodeMode) continue + + if (char === '/' && next === '/') { + modes.push({ type: 'line-comment' }) + i++ + continue + } + if (char === '/' && next === '*') { + modes.push({ type: 'block-comment' }) + i++ + continue + } + if (char === "'") { + modes.push({ type: 'single' }) + continue + } + if (char === '"') { + modes.push({ type: 'double' }) + continue + } + if (char === '`') { + modes.push({ type: 'template' }) + continue + } + + if (mode.type === 'template-expression') { + if (char === '{') { + mode.depth += 1 + continue + } + if (char === '}') { + mode.depth -= 1 + if (mode.depth === 0) modes.pop() + continue + } + } + + if (this.startsWithStaticImport(template, i) || this.startsWithRequireCall(template, i)) { + return true + } + } + + return false + } + + private startsWithStaticImport(template: string, index: number): boolean { + if (!this.matchesKeywordAt(template, index, 'import')) { + return false + } + const nextIndex = this.skipWhitespace(template, index + 'import'.length) + if (nextIndex === index + 'import'.length) { + return false + } + return template[nextIndex] !== '(' + } + + private startsWithRequireCall(template: string, index: number): boolean { + if (!this.matchesKeywordAt(template, index, 'require')) { + return false + } + const openParenIndex = this.skipWhitespace(template, index + 'require'.length) + if (template[openParenIndex] !== '(') { + return false + } + const argumentIndex = this.skipWhitespace(template, openParenIndex + 1) + return ( + template[argumentIndex] === "'" || + template[argumentIndex] === '"' || + template[argumentIndex] === '`' + ) + } + + private matchesKeywordAt(template: string, index: number, keyword: string): boolean { + if (!template.startsWith(keyword, index)) { + return false + } + const before = index > 0 ? template[index - 1] : '' + const after = template[index + keyword.length] ?? '' + return !this.isJavaScriptIdentifierChar(before) && !this.isJavaScriptIdentifierChar(after) + } + + private skipWhitespace(template: string, index: number): number { + let cursor = index + while (cursor < template.length && /\s/.test(template[cursor])) { + cursor++ + } + return cursor + } + + private isJavaScriptIdentifierChar(char: string): boolean { + return /[A-Za-z0-9_$]/.test(char) + } + private formatContextVariableReference( varName: string, language: string | undefined, @@ -669,13 +1009,13 @@ export class VariableResolver { return previous === undefined || /\s|[;&|()<>]/.test(previous) } - private resolveTemplate( + private async resolveTemplate( ctx: ExecutionContext, currentNodeId: string, template: string, loopScope?: LoopScope, block?: SerializedBlock - ): string { + ): Promise { const resolutionContext: ResolutionContext = { executionContext: ctx, executionState: this.state, @@ -693,11 +1033,11 @@ export class VariableResolver { | undefined) : undefined - let result = replaceValidReferences(template, (match) => { + let result = await replaceValidReferencesAsync(template, async (match) => { if (replacementError) return match try { - const resolved = this.resolveReference(match, resolutionContext) + const resolved = await this.resolveReference(match, resolutionContext) if (resolved === undefined) { return match } @@ -720,19 +1060,19 @@ export class VariableResolver { throw replacementError } - result = result.replace(createEnvVarPattern(), (match) => { - const resolved = this.resolveReference(match, resolutionContext) + result = await replaceEnvVarsAsync(result, async (match) => { + const resolved = await this.resolveReference(match, resolutionContext) return typeof resolved === 'string' ? resolved : match }) return result } - private resolveTemplateWithoutConditionFormatting( + private async resolveTemplateWithoutConditionFormatting( ctx: ExecutionContext, currentNodeId: string, template: string, loopScope?: LoopScope - ): string { + ): Promise { const resolutionContext: ResolutionContext = { executionContext: ctx, executionState: this.state, @@ -742,11 +1082,11 @@ export class VariableResolver { let replacementError: Error | null = null - let result = replaceValidReferences(template, (match) => { + let result = await replaceValidReferencesAsync(template, async (match) => { if (replacementError) return match try { - const resolved = this.resolveReference(match, resolutionContext) + const resolved = await this.resolveReference(match, resolutionContext) if (resolved === undefined) { return match } @@ -779,17 +1119,19 @@ export class VariableResolver { throw replacementError } - result = result.replace(createEnvVarPattern(), (match) => { - const resolved = this.resolveReference(match, resolutionContext) + result = await replaceEnvVarsAsync(result, async (match) => { + const resolved = await this.resolveReference(match, resolutionContext) return typeof resolved === 'string' ? resolved : match }) return result } - private resolveReference(reference: string, context: ResolutionContext): any { + private async resolveReference(reference: string, context: ResolutionContext): Promise { for (const resolver of this.resolvers) { if (resolver.canResolve(reference)) { - const result = resolver.resolve(reference, context) + const result = resolver.resolveAsync + ? await resolver.resolveAsync(reference, context) + : resolver.resolve(reference, context) return result } } diff --git a/apps/sim/executor/variables/resolvers/block.test.ts b/apps/sim/executor/variables/resolvers/block.test.ts index 5b9ed37fc3b..a739dc28734 100644 --- a/apps/sim/executor/variables/resolvers/block.test.ts +++ b/apps/sim/executor/variables/resolvers/block.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it, vi } from 'vitest' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' import { ExecutionState } from '@/executor/execution/state' import { BlockResolver } from './block' import { RESOLVED_EMPTY, type ResolutionContext } from './reference' @@ -174,6 +175,9 @@ function createTestContext( return { executionContext: { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', blockStates: contextBlockStates ?? new Map(), }, executionState: state, @@ -247,6 +251,45 @@ describe('BlockResolver', () => { expect(resolver.resolve('', ctx)).toBe('alice@test.com') }) + it('should resolve nested scalar paths inside compacted block references', async () => { + const workflow = createTestWorkflow([{ id: 'source' }]) + const resolver = new BlockResolver(workflow) + const compacted = await compactExecutionPayload( + { + user: { profile: { name: 'Alice' } }, + items: Array.from({ length: 100 }, (_, index) => ({ id: index })), + }, + { + thresholdBytes: 64, + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + } + ) + const ctx = createTestContext('current', { source: compacted }) + + expect(resolver.resolve('', ctx)).toBe('Alice') + expect(resolver.resolve('', ctx)).toBe(1) + expect(() => resolver.resolve('', ctx)).toThrow('too large to inline') + }) + + it('should reject full container references that contain compacted children', async () => { + const workflow = createTestWorkflow([{ id: 'source' }]) + const resolver = new BlockResolver(workflow) + const compacted = await compactExecutionPayload( + { + metadata: { id: 'event-1' }, + attachment: { body: 'x'.repeat(2048) }, + }, + { thresholdBytes: 256, preserveRoot: true } + ) + const ctx = createTestContext('current', { source: compacted }) + + expect(resolver.resolve('', ctx)).toBe('event-1') + expect(() => resolver.resolve('', ctx)).toThrow('too large to inline') + expect(() => resolver.resolve('', ctx)).toThrow('too large to inline') + }) + it.concurrent('should resolve array index in path', () => { const workflow = createTestWorkflow([{ id: 'source' }]) const resolver = new BlockResolver(workflow) diff --git a/apps/sim/executor/variables/resolvers/block.ts b/apps/sim/executor/variables/resolvers/block.ts index e1a5be03f7a..7a03093e6a1 100644 --- a/apps/sim/executor/variables/resolvers/block.ts +++ b/apps/sim/executor/variables/resolvers/block.ts @@ -1,3 +1,4 @@ +import { assertNoLargeValueRefs } from '@/lib/execution/payloads/large-value-ref' import { isReference, normalizeName, @@ -9,9 +10,11 @@ import { InvalidFieldError, type OutputSchema, resolveBlockReference, + resolveBlockReferenceAsync, } from '@/executor/utils/block-reference' import { formatLiteralForCode } from '@/executor/utils/code-formatting' import { + type AsyncPathNavigator, navigatePath, RESOLVED_EMPTY, type ResolutionContext, @@ -23,7 +26,10 @@ export class BlockResolver implements Resolver { private nameToBlockId: Map private blockById: Map - constructor(private workflow: SerializedWorkflow) { + constructor( + private workflow: SerializedWorkflow, + private navigatePathAsync?: AsyncPathNavigator + ) { this.nameToBlockId = new Map() this.blockById = new Map() for (const block of workflow.blocks) { @@ -75,17 +81,97 @@ export class BlockResolver implements Resolver { } try { - const result = resolveBlockReference(blockName, pathParts, { + const result = resolveBlockReference( + blockName, + pathParts, + { + blockNameMapping: Object.fromEntries(this.nameToBlockId), + blockData, + blockOutputSchemas, + }, + { + allowLargeValueRefs: context.allowLargeValueRefs, + executionContext: context.executionContext, + } + )! + + if (result.value !== undefined) { + if (!context.allowLargeValueRefs) { + assertNoLargeValueRefs(result.value) + } + return result.value + } + + const backwardsCompat = this.handleBackwardsCompatSync(block, output, pathParts) + if (backwardsCompat !== undefined) { + return backwardsCompat + } + + return RESOLVED_EMPTY + } catch (error) { + if (error instanceof InvalidFieldError) { + const fallback = this.handleBackwardsCompatSync(block, output, pathParts) + if (fallback !== undefined) { + return fallback + } + } + throw error + } + } + + async resolveAsync(reference: string, context: ResolutionContext): Promise { + if (!this.navigatePathAsync) { + return this.resolve(reference, context) + } + const parts = parseReferencePath(reference) + if (parts.length === 0) { + return undefined + } + const [blockName, ...pathParts] = parts + + const blockId = this.findBlockIdByName(blockName) + if (!blockId) { + return undefined + } + + const block = this.blockById.get(blockId)! + const output = this.getBlockOutput(blockId, context) + + const blockData: Record = {} + const blockOutputSchemas: Record = {} + + if (output !== undefined) { + blockData[blockId] = output + } + + const outputSchema = getBlockSchema(block) + + if (outputSchema && Object.keys(outputSchema).length > 0) { + blockOutputSchemas[blockId] = outputSchema + } + + try { + const blockReferenceContext = { blockNameMapping: Object.fromEntries(this.nameToBlockId), blockData, blockOutputSchemas, - })! + } + const result = (await resolveBlockReferenceAsync( + blockName, + pathParts, + blockReferenceContext, + context, + this.navigatePathAsync + ))! if (result.value !== undefined) { + if (!context.allowLargeValueRefs) { + assertNoLargeValueRefs(result.value) + } return result.value } - const backwardsCompat = this.handleBackwardsCompat(block, output, pathParts) + const backwardsCompat = await this.handleBackwardsCompat(block, output, pathParts, context) if (backwardsCompat !== undefined) { return backwardsCompat } @@ -93,7 +179,7 @@ export class BlockResolver implements Resolver { return RESOLVED_EMPTY } catch (error) { if (error instanceof InvalidFieldError) { - const fallback = this.handleBackwardsCompat(block, output, pathParts) + const fallback = await this.handleBackwardsCompat(block, output, pathParts, context) if (fallback !== undefined) { return fallback } @@ -102,7 +188,7 @@ export class BlockResolver implements Resolver { } } - private handleBackwardsCompat( + private handleBackwardsCompatSync( block: SerializedBlock, output: unknown, pathParts: string[] @@ -126,6 +212,56 @@ export class BlockResolver implements Resolver { } } + const outputRecord = output as Record | undefined + if ( + (block.metadata?.id === 'workflow' || block.metadata?.id === 'workflow_input') && + pathParts[0] === 'result' && + pathParts[1] === 'response' && + outputRecord?.result !== undefined && + typeof outputRecord.result === 'object' && + outputRecord.result !== null && + (outputRecord.result as Record)?.response === undefined + ) { + const adjustedPathParts = ['result', ...pathParts.slice(2)] + const fallbackResult = navigatePath(output, adjustedPathParts) + if (fallbackResult !== undefined) { + return fallbackResult + } + } + + return undefined + } + + private async handleBackwardsCompat( + block: SerializedBlock, + output: unknown, + pathParts: string[], + context: ResolutionContext + ): Promise { + const navigatePathAsync = this.navigatePathAsync + if (!navigatePathAsync) { + return this.handleBackwardsCompatSync(block, output, pathParts) + } + + if (output === undefined || pathParts.length === 0) { + return undefined + } + + if ( + block.metadata?.id === 'response' && + pathParts[0] === 'response' && + (output as Record)?.response === undefined + ) { + const adjustedPathParts = pathParts.slice(1) + if (adjustedPathParts.length === 0) { + return output + } + const fallbackResult = await navigatePathAsync(output, adjustedPathParts, context) + if (fallbackResult !== undefined) { + return fallbackResult + } + } + const isWorkflowBlock = block.metadata?.id === 'workflow' || block.metadata?.id === 'workflow_input' const outputRecord = output as Record | undefined> @@ -136,7 +272,7 @@ export class BlockResolver implements Resolver { outputRecord?.result?.response === undefined ) { const adjustedPathParts = ['result', ...pathParts.slice(2)] - const fallbackResult = navigatePath(output, adjustedPathParts) + const fallbackResult = await navigatePathAsync(output, adjustedPathParts, context) if (fallbackResult !== undefined) { return fallbackResult } diff --git a/apps/sim/executor/variables/resolvers/loop.test.ts b/apps/sim/executor/variables/resolvers/loop.test.ts index 3d3b643b516..48576ffc67a 100644 --- a/apps/sim/executor/variables/resolvers/loop.test.ts +++ b/apps/sim/executor/variables/resolvers/loop.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it } from 'vitest' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' import type { LoopScope } from '@/executor/execution/state' import { InvalidFieldError } from '@/executor/utils/block-reference' import { LoopResolver } from './loop' @@ -61,6 +62,9 @@ function createTestContext( ): ResolutionContext { return { executionContext: { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', loopExecutions: loopExecutions ?? new Map(), }, executionState: { @@ -232,6 +236,9 @@ describe('LoopResolver', () => { const ctx = createTestContext('block-1', loopScope) expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow( + 'Available fields: index' + ) }) it.concurrent('should handle iteration index 0 correctly', () => { @@ -361,7 +368,7 @@ describe('LoopResolver', () => { expect(resolver.resolve('', ctx)).toBe(4) }) - it.concurrent('should return undefined for index when block is outside the loop', () => { + it.concurrent('should throw for contextual fields when block is outside the loop', () => { const workflow = createTestWorkflow({ 'loop-1': { nodes: ['block-1'] } }, [ { id: 'loop-1', name: 'Loop 1' }, ]) @@ -370,7 +377,8 @@ describe('LoopResolver', () => { const loopExecutions = new Map([['loop-1', loopScope]]) const ctx = createTestContext('block-outside', undefined, loopExecutions) - expect(resolver.resolve('', ctx)).toBeUndefined() + expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow('Available fields: results') }) it.concurrent('should resolve result from anywhere after loop completes', () => { @@ -399,6 +407,30 @@ describe('LoopResolver', () => { expect(resolver.resolve('', ctx)).toEqual([{ response: 'a' }]) expect(resolver.resolve('', ctx)).toBe('b') + expect(resolver.resolve('', ctx)).toBe('b') + }) + + it('should resolve nested paths inside compacted result references', async () => { + const workflow = createTestWorkflow({ 'loop-1': { nodes: ['block-1'] } }, [ + { id: 'loop-1', name: 'Loop 1' }, + ]) + const resolver = new LoopResolver(workflow) + const compacted = await compactExecutionPayload( + { results: [[{ response: 'a' }], [{ response: 'b', payload: 'x'.repeat(2048) }]] }, + { + thresholdBytes: 256, + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + } + ) + const ctx = createTestContext('block-outside', undefined, new Map(), { + 'loop-1': compacted, + }) + + expect(resolver.resolve('', ctx)).toBe('b') + expect(resolver.resolve('', ctx)).toBe('b') + expect(() => resolver.resolve('', ctx)).toThrow('too large to inline') }) it.concurrent('should resolve forEach properties via named reference', () => { @@ -427,6 +459,20 @@ describe('LoopResolver', () => { const ctx = createTestContext('block-1', undefined, loopExecutions) expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow('Available fields: index') + }) + + it.concurrent('should list only results for unknown fields outside a named loop', () => { + const workflow = createTestWorkflow({ 'loop-1': { nodes: ['block-1'] } }, [ + { id: 'loop-1', name: 'Loop 1' }, + ]) + const resolver = new LoopResolver(workflow) + const loopScope = createLoopScope({ iteration: 0 }) + const loopExecutions = new Map([['loop-1', loopScope]]) + const ctx = createTestContext('block-outside', undefined, loopExecutions) + + expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow('Available fields: results') }) it.concurrent('should not resolve named ref when no matching block exists', () => { diff --git a/apps/sim/executor/variables/resolvers/loop.ts b/apps/sim/executor/variables/resolvers/loop.ts index 8df57668825..3b0a3e1b611 100644 --- a/apps/sim/executor/variables/resolvers/loop.ts +++ b/apps/sim/executor/variables/resolvers/loop.ts @@ -1,4 +1,5 @@ import { createLogger } from '@sim/logger' +import { assertNoLargeValueRefs } from '@/lib/execution/payloads/large-value-ref' import { isReference, normalizeName, parseReferencePath, REFERENCE } from '@/executor/constants' import { InvalidFieldError } from '@/executor/utils/block-reference' import { @@ -7,18 +8,26 @@ import { stripOuterBranchSuffix, } from '@/executor/utils/subflow-utils' import { + type AsyncPathNavigator, navigatePath, type ResolutionContext, type Resolver, + splitLeadingBracketPath, } from '@/executor/variables/resolvers/reference' import type { SerializedWorkflow } from '@/serializer/types' const logger = createLogger('LoopResolver') +const LOOP_OUTPUT_FIELDS = ['results'] as const +const LOOP_CONTEXT_FIELDS = ['index'] as const +const FOR_EACH_LOOP_CONTEXT_FIELDS = ['index', 'currentItem', 'items'] as const export class LoopResolver implements Resolver { private loopNameToId: Map - constructor(private workflow: SerializedWorkflow) { + constructor( + private workflow: SerializedWorkflow, + private navigatePathAsync?: AsyncPathNavigator + ) { this.loopNameToId = new Map() for (const block of workflow.blocks) { if (workflow.loops[block.id] && block.metadata?.name) { @@ -43,6 +52,27 @@ export class LoopResolver implements Resolver { } resolve(reference: string, context: ResolutionContext): any { + return this.resolveInternal(reference, context, false) + } + + async resolveAsync(reference: string, context: ResolutionContext): Promise { + if (!this.navigatePathAsync) { + return this.resolve(reference, context) + } + return this.resolveInternal(reference, context, true) + } + + private async resolveInternal( + reference: string, + context: ResolutionContext, + useAsyncPath: true + ): Promise + private resolveInternal(reference: string, context: ResolutionContext, useAsyncPath: false): any + private resolveInternal( + reference: string, + context: ResolutionContext, + useAsyncPath: boolean + ): any | Promise { const parts = parseReferencePath(reference) if (parts.length === 0) { logger.warn('Invalid loop reference', { reference }) @@ -76,34 +106,32 @@ export class LoopResolver implements Resolver { } if (rest.length > 0) { - const property = rest[0] + const { property, pathParts: bracketPathParts } = splitLeadingBracketPath(rest[0]) if (LoopResolver.OUTPUT_PROPERTIES.has(property)) { if (!targetLoopId) { return undefined } - return this.resolveOutput(targetLoopId, rest.slice(1), context) + return useAsyncPath + ? this.resolveOutputAsync(targetLoopId, [...bracketPathParts, ...rest.slice(1)], context) + : this.resolveOutput(targetLoopId, [...bracketPathParts, ...rest.slice(1)], context) } + const isContextual = + isGenericRef || + (targetLoopId !== undefined && + this.isBlockInLoopOrDescendant(context.currentNodeId, targetLoopId)) + if (!LoopResolver.KNOWN_PROPERTIES.has(property)) { - const isForEach = targetLoopId - ? this.isForEachLoop(targetLoopId) - : context.loopScope?.items !== undefined - const availableFields = isForEach - ? ['index', 'currentItem', 'items', 'result'] - : ['index', 'result'] - throw new InvalidFieldError(firstPart, property, availableFields) + throw new InvalidFieldError( + firstPart, + rest[0], + this.getAvailableFields(targetLoopId, context) + ) } - if (!isGenericRef && targetLoopId) { - if (!this.isBlockInLoopOrDescendant(context.currentNodeId, targetLoopId)) { - logger.warn('Block is not inside the referenced loop', { - reference, - blockId: context.currentNodeId, - loopId: targetLoopId, - }) - return undefined - } + if (!isContextual) { + throw new InvalidFieldError(firstPart, rest[0], [...LOOP_OUTPUT_FIELDS]) } } @@ -130,7 +158,9 @@ export class LoopResolver implements Resolver { return obj } - const [property, ...pathParts] = rest + const [rawProperty, ...remainingPathParts] = rest + const { property, pathParts: bracketPathParts } = splitLeadingBracketPath(rawProperty) + const pathParts = [...bracketPathParts, ...remainingPathParts] let value: any switch (property) { @@ -148,7 +178,9 @@ export class LoopResolver implements Resolver { } if (pathParts.length > 0) { - return navigatePath(value, pathParts) + return useAsyncPath && this.navigatePathAsync + ? this.navigatePathAsync(value, pathParts, context) + : navigatePath(value, pathParts, { executionContext: context.executionContext }) } return value @@ -161,7 +193,31 @@ export class LoopResolver implements Resolver { } const value = (output as Record).results if (pathParts.length > 0) { - return navigatePath(value, pathParts) + return navigatePath(value, pathParts, { executionContext: context.executionContext }) + } + if (!context.allowLargeValueRefs) { + assertNoLargeValueRefs(value) + } + return value + } + + private async resolveOutputAsync( + loopId: string, + pathParts: string[], + context: ResolutionContext + ): Promise { + const output = context.executionState.getBlockOutput(loopId) + if (!output || typeof output !== 'object') { + return undefined + } + const value = (output as Record).results + if (pathParts.length > 0) { + return this.navigatePathAsync + ? this.navigatePathAsync(value, pathParts, context) + : navigatePath(value, pathParts, { executionContext: context.executionContext }) + } + if (!context.allowLargeValueRefs) { + assertNoLargeValueRefs(value) } return value } @@ -234,4 +290,22 @@ export class LoopResolver implements Resolver { const loopConfig = this.workflow.loops?.[originalId] return loopConfig?.loopType === 'forEach' } + + private getAvailableFields( + targetLoopId: string | undefined, + context: ResolutionContext + ): string[] { + const isContextual = + targetLoopId === undefined || + this.isBlockInLoopOrDescendant(context.currentNodeId, targetLoopId) + + if (!isContextual) { + return [...LOOP_OUTPUT_FIELDS] + } + + const isForEach = targetLoopId + ? this.isForEachLoop(targetLoopId) + : context.loopScope?.items !== undefined + return isForEach ? [...FOR_EACH_LOOP_CONTEXT_FIELDS] : [...LOOP_CONTEXT_FIELDS] + } } diff --git a/apps/sim/executor/variables/resolvers/parallel.test.ts b/apps/sim/executor/variables/resolvers/parallel.test.ts index cec6294f391..3d4764acd4a 100644 --- a/apps/sim/executor/variables/resolvers/parallel.test.ts +++ b/apps/sim/executor/variables/resolvers/parallel.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it } from 'vitest' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' import { InvalidFieldError } from '@/executor/utils/block-reference' import { ParallelResolver } from './parallel' import type { ResolutionContext } from './reference' @@ -76,11 +77,16 @@ function createParallelScope(items: any[]) { function createTestContext( currentNodeId: string, parallelExecutions?: Map, - blockOutputs?: Record + blockOutputs?: Record, + parallelBlockMapping?: Map ): ResolutionContext { return { executionContext: { + workflowId: 'workflow-1', + workspaceId: 'workspace-1', + executionId: 'execution-1', parallelExecutions: parallelExecutions ?? new Map(), + parallelBlockMapping, }, executionState: { getBlockOutput: (id: string) => blockOutputs?.[id], @@ -158,6 +164,34 @@ describe('ParallelResolver', () => { expect(resolver.resolve('', createTestContext('block-1₍2₎'))).toBe(2) }) + it.concurrent('uses runtime branch mapping for batched local branch node IDs', () => { + const workflow = createTestWorkflow({ + 'parallel-1': { nodes: ['block-1'], distribution: ['a', 'b', 'c', 'd'] }, + }) + const resolver = new ParallelResolver(workflow) + const parallelScope = createParallelScope(['a', 'b', 'c', 'd']) + const parallelExecutions = new Map([['parallel-1', parallelScope]]) + const parallelBlockMapping = new Map([ + [ + 'block-1₍0₎', + { + originalBlockId: 'block-1', + parallelId: 'parallel-1', + iterationIndex: 2, + }, + ], + ]) + const ctx = createTestContext( + 'block-1₍0₎', + parallelExecutions, + undefined, + parallelBlockMapping + ) + + expect(resolver.resolve('', ctx)).toBe(2) + expect(resolver.resolve('', ctx)).toBe('c') + }) + it.concurrent('should return undefined when branch index cannot be extracted', () => { const workflow = createTestWorkflow({ 'parallel-1': { nodes: ['block-1'], distribution: ['a', 'b'] }, @@ -313,6 +347,9 @@ describe('ParallelResolver', () => { const ctx = createTestContext('block-1₍0₎') expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow( + 'Available fields: index' + ) }) it.concurrent('should return undefined when block is not in any parallel', () => { @@ -428,6 +465,31 @@ describe('ParallelResolver', () => { expect(resolver.resolve('', ctx)).toEqual([{ response: 'a' }]) expect(resolver.resolve('', ctx)).toBe('b') + expect(resolver.resolve('', ctx)).toBe('b') + }) + + it('should resolve nested paths inside compacted result references', async () => { + const workflow = createTestWorkflow( + { 'parallel-1': { nodes: ['block-1'], distribution: ['a', 'b'] } }, + [{ id: 'parallel-1', name: 'Parallel 1' }] + ) + const resolver = new ParallelResolver(workflow) + const compacted = await compactExecutionPayload( + { results: [[{ response: 'a' }], [{ response: 'b', payload: 'x'.repeat(2048) }]] }, + { + thresholdBytes: 256, + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + } + ) + const ctx = createTestContext('block-outside', new Map(), { + 'parallel-1': compacted, + }) + + expect(resolver.resolve('', ctx)).toBe('b') + expect(resolver.resolve('', ctx)).toBe('b') + expect(() => resolver.resolve('', ctx)).toThrow('too large to inline') }) it.concurrent('should resolve result with empty currentNodeId', () => { @@ -489,6 +551,29 @@ describe('ParallelResolver', () => { const ctx = createTestContext('block-1₍0₎') expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow( + 'Available fields: index, currentItem, items' + ) + }) + + it.concurrent('should list only results for contextual fields outside a named parallel', () => { + const workflow = createTestWorkflow( + { + 'parallel-1': { + nodes: ['block-1'], + distribution: ['a'], + parallelType: 'collection', + }, + }, + [{ id: 'parallel-1', name: 'Parallel 1' }] + ) + const resolver = new ParallelResolver(workflow) + const ctx = createTestContext('block-outside', new Map()) + + expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow('Available fields: results') + expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow('Available fields: results') }) it.concurrent('should not resolve named ref when no matching block exists', () => { diff --git a/apps/sim/executor/variables/resolvers/parallel.ts b/apps/sim/executor/variables/resolvers/parallel.ts index 7afeedece97..538fc69780d 100644 --- a/apps/sim/executor/variables/resolvers/parallel.ts +++ b/apps/sim/executor/variables/resolvers/parallel.ts @@ -1,25 +1,35 @@ import { createLogger } from '@sim/logger' +import { assertNoLargeValueRefs } from '@/lib/execution/payloads/large-value-ref' import { isReference, normalizeName, parseReferencePath, REFERENCE } from '@/executor/constants' import { InvalidFieldError } from '@/executor/utils/block-reference' import { extractBranchIndex, + extractOuterBranchIndex, findEffectiveContainerId, stripCloneSuffixes, stripOuterBranchSuffix, } from '@/executor/utils/subflow-utils' import { + type AsyncPathNavigator, navigatePath, type ResolutionContext, type Resolver, + splitLeadingBracketPath, } from '@/executor/variables/resolvers/reference' import type { SerializedParallel, SerializedWorkflow } from '@/serializer/types' const logger = createLogger('ParallelResolver') +const PARALLEL_OUTPUT_FIELDS = ['results'] as const +const PARALLEL_CONTEXT_FIELDS = ['index'] as const +const COLLECTION_PARALLEL_CONTEXT_FIELDS = ['index', 'currentItem', 'items'] as const export class ParallelResolver implements Resolver { private parallelNameToId: Map - constructor(private workflow: SerializedWorkflow) { + constructor( + private workflow: SerializedWorkflow, + private navigatePathAsync?: AsyncPathNavigator + ) { this.parallelNameToId = new Map() for (const block of workflow.blocks) { if (workflow.parallels?.[block.id] && block.metadata?.name) { @@ -44,6 +54,27 @@ export class ParallelResolver implements Resolver { } resolve(reference: string, context: ResolutionContext): any { + return this.resolveInternal(reference, context, false) + } + + async resolveAsync(reference: string, context: ResolutionContext): Promise { + if (!this.navigatePathAsync) { + return this.resolve(reference, context) + } + return this.resolveInternal(reference, context, true) + } + + private async resolveInternal( + reference: string, + context: ResolutionContext, + useAsyncPath: true + ): Promise + private resolveInternal(reference: string, context: ResolutionContext, useAsyncPath: false): any + private resolveInternal( + reference: string, + context: ResolutionContext, + useAsyncPath: boolean + ): any | Promise { const parts = parseReferencePath(reference) if (parts.length === 0) { logger.warn('Invalid parallel reference', { reference }) @@ -74,8 +105,17 @@ export class ParallelResolver implements Resolver { ) } - if (rest.length > 0 && ParallelResolver.OUTPUT_PROPERTIES.has(rest[0])) { - return this.resolveOutput(targetParallelId, rest.slice(1), context) + if (rest.length > 0) { + const { property, pathParts: bracketPathParts } = splitLeadingBracketPath(rest[0]) + if (ParallelResolver.OUTPUT_PROPERTIES.has(property)) { + return useAsyncPath + ? this.resolveOutputAsync( + targetParallelId, + [...bracketPathParts, ...rest.slice(1)], + context + ) + : this.resolveOutput(targetParallelId, [...bracketPathParts, ...rest.slice(1)], context) + } } // Look up config using the original (non-cloned) ID @@ -86,18 +126,14 @@ export class ParallelResolver implements Resolver { return undefined } - if (!isGenericRef) { - if (!this.isBlockInParallelOrDescendant(context.currentNodeId, originalParallelId)) { - logger.warn('Block is not inside the referenced parallel', { - reference, - blockId: context.currentNodeId, - parallelId: targetParallelId, - }) - return undefined - } + const isContextual = + isGenericRef || this.isBlockInParallelOrDescendant(context.currentNodeId, originalParallelId) + + if (rest.length > 0 && !isContextual) { + throw new InvalidFieldError(firstPart, rest[0], [...PARALLEL_OUTPUT_FIELDS]) } - const branchIndex = extractBranchIndex(context.currentNodeId) + const branchIndex = this.resolveBranchIndex(targetParallelId, context) if (branchIndex === null) { return undefined } @@ -116,15 +152,12 @@ export class ParallelResolver implements Resolver { return result } - const property = rest[0] - const pathParts = rest.slice(1) + const [rawProperty, ...remainingPathParts] = rest + const { property, pathParts: bracketPathParts } = splitLeadingBracketPath(rawProperty) + const pathParts = [...bracketPathParts, ...remainingPathParts] if (!ParallelResolver.KNOWN_PROPERTIES.has(property)) { - const isCollection = parallelConfig.parallelType === 'collection' - const availableFields = isCollection - ? ['index', 'currentItem', 'items', 'result'] - : ['index', 'result'] - throw new InvalidFieldError(firstPart, property, availableFields) + throw new InvalidFieldError(firstPart, rawProperty, this.getAvailableFields(parallelConfig)) } let value: unknown @@ -142,12 +175,28 @@ export class ParallelResolver implements Resolver { } if (pathParts.length > 0) { - return navigatePath(value, pathParts) + return useAsyncPath && this.navigatePathAsync + ? this.navigatePathAsync(value, pathParts, context) + : navigatePath(value, pathParts, { executionContext: context.executionContext }) } return value } + private resolveBranchIndex(targetParallelId: string, context: ResolutionContext): number | null { + const mapping = context.executionContext.parallelBlockMapping?.get(context.currentNodeId) + if (mapping?.parallelId === targetParallelId) { + return mapping.iterationIndex + } + + const outerBranchIndex = extractOuterBranchIndex(context.currentNodeId) + if (outerBranchIndex !== undefined) { + return outerBranchIndex + } + + return extractBranchIndex(context.currentNodeId) + } + private findInnermostParallelForBlock(blockId: string): string | undefined { const baseId = stripCloneSuffixes(blockId) const parallels = this.workflow.parallels @@ -234,7 +283,31 @@ export class ParallelResolver implements Resolver { } const value = (output as Record).results if (pathParts.length > 0) { - return navigatePath(value, pathParts) + return navigatePath(value, pathParts, { executionContext: context.executionContext }) + } + if (!context.allowLargeValueRefs) { + assertNoLargeValueRefs(value) + } + return value + } + + private async resolveOutputAsync( + parallelId: string, + pathParts: string[], + context: ResolutionContext + ): Promise { + const output = context.executionState.getBlockOutput(parallelId) + if (!output || typeof output !== 'object') { + return undefined + } + const value = (output as Record).results + if (pathParts.length > 0) { + return this.navigatePathAsync + ? this.navigatePathAsync(value, pathParts, context) + : navigatePath(value, pathParts, { executionContext: context.executionContext }) + } + if (!context.allowLargeValueRefs) { + assertNoLargeValueRefs(value) } return value } @@ -278,4 +351,10 @@ export class ParallelResolver implements Resolver { return [] } + + private getAvailableFields(parallelConfig: SerializedParallel): string[] { + return parallelConfig.parallelType === 'collection' + ? [...COLLECTION_PARALLEL_CONTEXT_FIELDS] + : [...PARALLEL_CONTEXT_FIELDS] + } } diff --git a/apps/sim/executor/variables/resolvers/reference-async.server.ts b/apps/sim/executor/variables/resolvers/reference-async.server.ts new file mode 100644 index 00000000000..78dca4a3712 --- /dev/null +++ b/apps/sim/executor/variables/resolvers/reference-async.server.ts @@ -0,0 +1,120 @@ +import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' +import { + assertNoLargeValueRefs, + getLargeValueMaterializationError, + isLargeValueRef, +} from '@/lib/execution/payloads/large-value-ref' +import { materializeLargeValueRef } from '@/lib/execution/payloads/store' +import { hydrateUserFileWithBase64 } from '@/lib/uploads/utils/user-file-base64.server' +import type { ResolutionContext } from '@/executor/variables/resolvers/reference' + +async function materializeLargeValueRefOrThrow( + value: unknown, + context: ResolutionContext +): Promise { + if (!isLargeValueRef(value)) { + return value + } + const materialized = await materializeLargeValueRef(value, { + workspaceId: context.executionContext.workspaceId, + workflowId: context.executionContext.workflowId, + executionId: context.executionContext.executionId, + largeValueExecutionIds: context.executionContext.largeValueExecutionIds, + allowLargeValueWorkflowScope: context.executionContext.allowLargeValueWorkflowScope, + userId: context.executionContext.userId, + }) + if (materialized === undefined) { + throw getLargeValueMaterializationError(value) + } + return materialized +} + +async function hydrateExplicitBase64( + file: unknown, + context: ResolutionContext +): Promise { + if (!isUserFileWithMetadata(file)) { + return undefined + } + const hydrated = await hydrateUserFileWithBase64(file, { + requestId: context.executionContext.metadata.requestId, + workspaceId: context.executionContext.workspaceId, + workflowId: context.executionContext.workflowId, + executionId: context.executionContext.executionId, + largeValueExecutionIds: context.executionContext.largeValueExecutionIds, + allowLargeValueWorkflowScope: context.executionContext.allowLargeValueWorkflowScope, + userId: context.executionContext.userId, + maxBytes: context.executionContext.base64MaxBytes, + }) + if (!hydrated.base64) { + throw new Error( + `Base64 content for ${file.name} is unavailable or exceeds the configured inline limit.` + ) + } + return hydrated.base64 +} + +/** + * Server-side path navigation used during execution. It can hydrate persisted + * large values and UserFile.base64 only when the requested path explicitly asks + * for base64. + */ +export async function navigatePathAsync( + obj: any, + path: string[], + context: ResolutionContext +): Promise { + let current = obj + for (const part of path) { + current = await materializeLargeValueRefOrThrow(current, context) + + if (current === null || current === undefined) { + return undefined + } + + if (part === 'base64') { + const base64 = await hydrateExplicitBase64(current, context) + if (base64 !== undefined) { + current = base64 + continue + } + } + + const arrayMatch = part.match(/^([^[]+)(\[.+)$/) + if (arrayMatch) { + const [, prop, bracketsPart] = arrayMatch + current = + typeof current === 'object' && current !== null + ? (current as Record)[prop] + : undefined + current = await materializeLargeValueRefOrThrow(current, context) + if (current === undefined || current === null) { + return undefined + } + + const indices = bracketsPart.match(/\[(\d+)\]/g) + if (indices) { + for (const indexMatch of indices) { + current = await materializeLargeValueRefOrThrow(current, context) + if (current === null || current === undefined) { + return undefined + } + const idx = Number.parseInt(indexMatch.slice(1, -1), 10) + current = Array.isArray(current) ? current[idx] : undefined + } + } + } else if (/^\d+$/.test(part)) { + const index = Number.parseInt(part, 10) + current = Array.isArray(current) ? current[index] : undefined + } else { + current = + typeof current === 'object' && current !== null + ? (current as Record)[part] + : undefined + } + } + if (!context.allowLargeValueRefs) { + assertNoLargeValueRefs(current) + } + return current +} diff --git a/apps/sim/executor/variables/resolvers/reference.ts b/apps/sim/executor/variables/resolvers/reference.ts index 35d32272739..70a49a4d11b 100644 --- a/apps/sim/executor/variables/resolvers/reference.ts +++ b/apps/sim/executor/variables/resolvers/reference.ts @@ -1,3 +1,5 @@ +import { materializeLargeValueRefSyncOrThrow } from '@/lib/execution/payloads/cache' +import { assertNoLargeValueRefs, isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' import type { ExecutionState, LoopScope } from '@/executor/execution/state' import type { ExecutionContext } from '@/executor/types' export interface ResolutionContext { @@ -5,13 +7,21 @@ export interface ResolutionContext { executionState: ExecutionState currentNodeId: string loopScope?: LoopScope + allowLargeValueRefs?: boolean } export interface Resolver { canResolve(reference: string): boolean resolve(reference: string, context: ResolutionContext): any + resolveAsync?(reference: string, context: ResolutionContext): Promise } +export type AsyncPathNavigator = ( + obj: any, + path: string[], + context: ResolutionContext +) => Promise + /** * Sentinel value indicating a reference was resolved to a known block * that produced no output (e.g., the block exists in the workflow but @@ -20,6 +30,19 @@ export interface Resolver { */ export const RESOLVED_EMPTY = Symbol('RESOLVED_EMPTY') +export function splitLeadingBracketPath(part: string): { property: string; pathParts: string[] } { + const bracketMatch = part.match(/^([^[]+)((?:\[\d+\])+)$/) + if (!bracketMatch) { + return { property: part, pathParts: [] } + } + + const indices = bracketMatch[2].match(/\[(\d+)\]/g) ?? [] + return { + property: bracketMatch[1], + pathParts: indices.map((indexMatch) => indexMatch.slice(1, -1)), + } +} + /** * Navigate through nested object properties using a path array. * Supports dot notation and array indices. @@ -28,9 +51,17 @@ export const RESOLVED_EMPTY = Symbol('RESOLVED_EMPTY') * navigatePath({a: {b: {c: 1}}}, ['a', 'b', 'c']) => 1 * navigatePath({items: [{name: 'test'}]}, ['items', '0', 'name']) => 'test' */ -export function navigatePath(obj: any, path: string[]): any { +export function navigatePath( + obj: any, + path: string[], + options: { allowLargeValueRefs?: boolean; executionContext?: ExecutionContext } = {} +): any { let current = obj for (const part of path) { + if (isLargeValueRef(current)) { + current = materializeLargeValueRefSyncOrThrow(current, options.executionContext) + } + if (current === null || current === undefined) { return undefined } @@ -42,6 +73,9 @@ export function navigatePath(obj: any, path: string[]): any { typeof current === 'object' && current !== null ? (current as Record)[prop] : undefined + if (isLargeValueRef(current)) { + current = materializeLargeValueRefSyncOrThrow(current, options.executionContext) + } if (current === undefined || current === null) { return undefined } @@ -52,6 +86,9 @@ export function navigatePath(obj: any, path: string[]): any { if (current === null || current === undefined) { return undefined } + if (isLargeValueRef(current)) { + current = materializeLargeValueRefSyncOrThrow(current, options.executionContext) + } const idx = Number.parseInt(indexMatch.slice(1, -1), 10) current = Array.isArray(current) ? current[idx] : undefined } @@ -66,5 +103,8 @@ export function navigatePath(obj: any, path: string[]): any { : undefined } } + if (!options.allowLargeValueRefs) { + assertNoLargeValueRefs(current) + } return current } diff --git a/apps/sim/executor/variables/resolvers/workflow.ts b/apps/sim/executor/variables/resolvers/workflow.ts index f11612e2ee2..ad2c667949e 100644 --- a/apps/sim/executor/variables/resolvers/workflow.ts +++ b/apps/sim/executor/variables/resolvers/workflow.ts @@ -57,7 +57,7 @@ export class WorkflowResolver implements Resolver { // If there are additional path parts, navigate deeper if (pathParts.length > 0) { - return navigatePath(value, pathParts) + return navigatePath(value, pathParts, { executionContext: context.executionContext }) } return value diff --git a/apps/sim/hooks/use-collaborative-workflow.ts b/apps/sim/hooks/use-collaborative-workflow.ts index 10585e1f8a9..a4b58338886 100644 --- a/apps/sim/hooks/use-collaborative-workflow.ts +++ b/apps/sim/hooks/use-collaborative-workflow.ts @@ -337,6 +337,9 @@ export function useCollaborativeWorkflow() { if (config.count !== undefined) { useWorkflowStore.getState().updateParallelCount(payload.id, config.count) } + if (config.batchSize !== undefined) { + useWorkflowStore.getState().updateParallelBatchSize(payload.id, config.batchSize) + } if (config.distribution !== undefined) { useWorkflowStore .getState() @@ -1728,6 +1731,7 @@ export function useCollaborativeWorkflow() { let newCount = currentBlock.data?.count || 5 let newDistribution = currentBlock.data?.collection || '' + const batchSize = currentBlock.data?.batchSize || 20 if (parallelType === 'count') { newDistribution = '' @@ -1742,6 +1746,7 @@ export function useCollaborativeWorkflow() { count: newCount, distribution: newDistribution, parallelType, + batchSize, } executeQueuedOperation( @@ -1752,6 +1757,7 @@ export function useCollaborativeWorkflow() { useWorkflowStore.getState().updateParallelType(parallelId, parallelType) useWorkflowStore.getState().updateParallelCount(parallelId, newCount) useWorkflowStore.getState().updateParallelCollection(parallelId, newDistribution) + useWorkflowStore.getState().updateParallelBatchSize(parallelId, batchSize) } ) }, @@ -1768,41 +1774,52 @@ export function useCollaborativeWorkflow() { .filter((b) => b.data?.parentId === nodeId) .map((b) => b.id) + const clampedCount = Math.max(1, count) + if (iterationType === 'loop') { const currentLoopType = currentBlock.data?.loopType || 'for' - const currentCollection = currentBlock.data?.collection || '' + const existingLoop = useWorkflowStore.getState().loops[nodeId] + const nextForEachItems = existingLoop?.forEachItems ?? currentBlock.data?.collection ?? '' + const nextWhileCondition = + existingLoop?.whileCondition ?? currentBlock.data?.whileCondition ?? '' + const nextDoWhileCondition = + existingLoop?.doWhileCondition ?? currentBlock.data?.doWhileCondition ?? '' const config = { id: nodeId, nodes: childNodes, - iterations: Math.max(1, Math.min(1000, count)), // Clamp between 1-1000 for loops + iterations: clampedCount, loopType: currentLoopType, - forEachItems: currentCollection, + forEachItems: nextForEachItems, + whileCondition: nextWhileCondition, + doWhileCondition: nextDoWhileCondition, } executeQueuedOperation( SUBFLOW_OPERATIONS.UPDATE, OPERATION_TARGETS.SUBFLOW, { id: nodeId, type: 'loop', config }, - () => useWorkflowStore.getState().updateLoopCount(nodeId, count) + () => useWorkflowStore.getState().updateLoopCount(nodeId, clampedCount) ) } else { const currentDistribution = currentBlock.data?.collection || '' const currentParallelType = currentBlock.data?.parallelType || 'count' + const batchSize = currentBlock.data?.batchSize || 20 const config = { id: nodeId, nodes: childNodes, - count: Math.max(1, Math.min(20, count)), // Clamp between 1-20 for parallels + count: clampedCount, distribution: currentDistribution, parallelType: currentParallelType, + batchSize, } executeQueuedOperation( SUBFLOW_OPERATIONS.UPDATE, OPERATION_TARGETS.SUBFLOW, { id: nodeId, type: 'parallel', config }, - () => useWorkflowStore.getState().updateParallelCount(nodeId, count) + () => useWorkflowStore.getState().updateParallelCount(nodeId, clampedCount) ) } }, @@ -1860,6 +1877,7 @@ export function useCollaborativeWorkflow() { } else { const currentCount = currentBlock.data?.count || 5 const currentParallelType = currentBlock.data?.parallelType || 'count' + const batchSize = currentBlock.data?.batchSize || 20 const config = { id: nodeId, @@ -1867,6 +1885,7 @@ export function useCollaborativeWorkflow() { count: currentCount, distribution: collection, parallelType: currentParallelType, + batchSize, } executeQueuedOperation( @@ -1880,6 +1899,38 @@ export function useCollaborativeWorkflow() { [executeQueuedOperation] ) + const collaborativeUpdateParallelBatchSize = useCallback( + (parallelId: string, batchSize: number) => { + const currentBlock = useWorkflowStore.getState().blocks[parallelId] + if (!currentBlock || currentBlock.type !== 'parallel') return + + const childNodes = Object.values(useWorkflowStore.getState().blocks) + .filter((b) => b.data?.parentId === parallelId) + .map((b) => b.id) + const currentCount = currentBlock.data?.count || 5 + const currentDistribution = currentBlock.data?.collection || '' + const currentParallelType = currentBlock.data?.parallelType || 'count' + const clampedBatchSize = Math.max(1, Math.min(20, batchSize)) + + const config = { + id: parallelId, + nodes: childNodes, + count: currentCount, + distribution: currentDistribution, + parallelType: currentParallelType, + batchSize: clampedBatchSize, + } + + executeQueuedOperation( + SUBFLOW_OPERATIONS.UPDATE, + OPERATION_TARGETS.SUBFLOW, + { id: parallelId, type: 'parallel', config }, + () => useWorkflowStore.getState().updateParallelBatchSize(parallelId, clampedBatchSize) + ) + }, + [executeQueuedOperation] + ) + const collaborativeUpdateVariable = useCallback( (variableId: string, field: 'name' | 'value' | 'type', value: any) => { executeQueuedOperation( @@ -2137,6 +2188,7 @@ export function useCollaborativeWorkflow() { // Collaborative loop/parallel operations collaborativeUpdateLoopType, collaborativeUpdateParallelType, + collaborativeUpdateParallelBatchSize, // Unified iteration operations collaborativeUpdateIterationCount, diff --git a/apps/sim/hooks/use-undo-redo.ts b/apps/sim/hooks/use-undo-redo.ts index 86fe2a6bcce..025c087de0e 100644 --- a/apps/sim/hooks/use-undo-redo.ts +++ b/apps/sim/hooks/use-undo-redo.ts @@ -617,7 +617,9 @@ export function useUndoRedo() { const currentCount = currentBlock.data?.count || 5 const currentParallelType = currentBlock.data?.parallelType || 'count' const currentDistribution = currentBlock.data?.collection || '' + const currentBatchSize = currentBlock.data?.batchSize || 20 const nextCount = Number.parseInt(String(update.after), 10) + const nextBatchSize = Number.parseInt(String(update.after), 10) const config = { id: update.blockId, nodes: childNodes, @@ -630,6 +632,10 @@ export function useUndoRedo() { ? update.after : currentDistribution, parallelType: currentParallelType, + batchSize: + update.fieldId === WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize + ? nextBatchSize + : currentBatchSize, } addToQueue({ @@ -650,6 +656,13 @@ export function useUndoRedo() { return } + if (update.fieldId === WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize) { + if (!Number.isNaN(nextBatchSize)) { + useWorkflowStore.getState().updateParallelBatchSize(update.blockId, nextBatchSize) + } + return + } + useWorkflowStore.getState().updateParallelCollection(update.blockId, String(update.after)) }, [activeWorkflowId, addToQueue, userId] diff --git a/apps/sim/lib/api/contracts/execution-payloads.ts b/apps/sim/lib/api/contracts/execution-payloads.ts new file mode 100644 index 00000000000..485918dc4ac --- /dev/null +++ b/apps/sim/lib/api/contracts/execution-payloads.ts @@ -0,0 +1,31 @@ +import { z } from 'zod' +import { + isLargeValueStorageKey, + LARGE_VALUE_KINDS, + LARGE_VALUE_REF_MARKER, + LARGE_VALUE_REF_VERSION, +} from '@/lib/execution/payloads/large-value-ref' + +export const largeValueRefSchema = z + .object({ + [LARGE_VALUE_REF_MARKER]: z.literal(true), + version: z.literal(LARGE_VALUE_REF_VERSION), + id: z.string().regex(/^lv_[A-Za-z0-9_-]{12}$/, 'Invalid large value reference ID'), + kind: z.enum(LARGE_VALUE_KINDS), + size: z.number().int().positive(), + key: z.string().optional(), + executionId: z.string().optional(), + preview: z.unknown().optional(), + }) + .strict() + .superRefine((value, ctx) => { + if (value.key && !isLargeValueStorageKey(value.key, value.id, value.executionId)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: ['key'], + message: 'Large value reference key must point to execution-scoped server storage', + }) + } + }) + +export type LargeValueRefResponse = z.output diff --git a/apps/sim/lib/api/contracts/hotspots.ts b/apps/sim/lib/api/contracts/hotspots.ts index 099170bc8be..4a3b92d371d 100644 --- a/apps/sim/lib/api/contracts/hotspots.ts +++ b/apps/sim/lib/api/contracts/hotspots.ts @@ -102,6 +102,9 @@ export const functionExecuteContract = defineRouteContract({ workflowVariables: unknownRecordSchema.optional().default({}), contextVariables: unknownRecordSchema.optional().default({}), workflowId: z.string().optional(), + executionId: z.string().optional(), + largeValueExecutionIds: z.array(z.string()).optional(), + allowLargeValueWorkflowScope: z.boolean().optional(), workspaceId: z.string().optional(), userId: z.string().optional(), isCustomTool: z.boolean().optional().default(false), diff --git a/apps/sim/lib/api/contracts/index.ts b/apps/sim/lib/api/contracts/index.ts index 062c01a5156..c8a079637d2 100644 --- a/apps/sim/lib/api/contracts/index.ts +++ b/apps/sim/lib/api/contracts/index.ts @@ -11,6 +11,7 @@ export * from './credential-sets' export * from './credentials' export * from './demo-requests' export * from './environment' +export * from './execution-payloads' export * from './file-uploads' export * from './folders' export * from './hotspots' diff --git a/apps/sim/lib/api/contracts/workflows.ts b/apps/sim/lib/api/contracts/workflows.ts index af55e5ef708..46e5095c933 100644 --- a/apps/sim/lib/api/contracts/workflows.ts +++ b/apps/sim/lib/api/contracts/workflows.ts @@ -20,6 +20,7 @@ const workflowBlockDataSchema = z.object({ whileCondition: z.string().optional(), doWhileCondition: z.string().optional(), parallelType: z.enum(['collection', 'count']).optional(), + batchSize: z.number().optional(), type: z.string().optional(), canonicalModes: z.record(z.string(), z.enum(['basic', 'advanced'])).optional(), }) @@ -90,6 +91,7 @@ const workflowParallelSchema = z.object({ .optional(), count: z.number().optional(), parallelType: z.enum(['count', 'collection']).optional(), + batchSize: z.number().optional(), enabled: z.boolean().optional(), locked: z.boolean().optional(), }) diff --git a/apps/sim/lib/core/utils/response-format.ts b/apps/sim/lib/core/utils/response-format.ts index 7223f17e0fa..7512cc50b59 100644 --- a/apps/sim/lib/core/utils/response-format.ts +++ b/apps/sim/lib/core/utils/response-format.ts @@ -1,4 +1,6 @@ import { createLogger } from '@sim/logger' +import { materializeLargeValueRefSyncOrThrow } from '@/lib/execution/payloads/cache' +import { isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' const logger = createLogger('ResponseFormatUtils') @@ -196,6 +198,10 @@ function traverseObjectPathInternal(obj: any, path: string): any { const parts = path.split('.') for (const part of parts) { + if (isLargeValueRef(current)) { + current = materializeLargeValueRefSyncOrThrow(current) + } + if (current?.[part] !== undefined) { current = current[part] } else { @@ -203,6 +209,10 @@ function traverseObjectPathInternal(obj: any, path: string): any { } } + if (isLargeValueRef(current)) { + return current + } + return current } diff --git a/apps/sim/lib/core/utils/user-file.ts b/apps/sim/lib/core/utils/user-file.ts index 0069eb4fbae..deee12cbf04 100644 --- a/apps/sim/lib/core/utils/user-file.ts +++ b/apps/sim/lib/core/utils/user-file.ts @@ -42,6 +42,27 @@ export function isUserFileWithMetadata(value: unknown): value is UserFile { return typeof candidate.size === 'number' && typeof candidate.type === 'string' } +/** + * Checks if a value matches the display-safe UserFile metadata shape after internal fields are stripped. + */ +export function isUserFileDisplayMetadata(value: unknown): value is Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) { + return false + } + + const candidate = value as Record + const url = typeof candidate.url === 'string' ? candidate.url : '' + + return ( + typeof candidate.id === 'string' && + typeof candidate.name === 'string' && + url.length > 0 && + typeof candidate.size === 'number' && + typeof candidate.type === 'string' && + (candidate.id.startsWith('file_') || url.includes('/api/files/serve/')) + ) +} + /** * Filters a UserFile object to only include display fields. * Used for both UI display and log sanitization. diff --git a/apps/sim/lib/execution/event-buffer.test.ts b/apps/sim/lib/execution/event-buffer.test.ts index 7e03ab8954d..da7de9404ca 100644 --- a/apps/sim/lib/execution/event-buffer.test.ts +++ b/apps/sim/lib/execution/event-buffer.test.ts @@ -57,7 +57,7 @@ describe('execution event buffer', () => { mockRedis.zremrangebyrank.mockResolvedValue(0) mockRedis.eval.mockImplementation( async ( - _script: string, + script: string, _keyCount: number, _eventsKey: string, _seqKey: string, @@ -68,6 +68,12 @@ describe('execution event buffer', () => { terminalStatus: string, ...args: (string | number)[] ) => { + if (script.includes('execution_redis_bytes')) { + return [1, 'ok', 0, 0] + } + if (script.includes('DECRBY')) { + return 1 + } for (let i = 0; i < args.length; i += 2) { persistedEntries.push(JSON.parse(args[i + 1] as string) as ExecutionEventEntry) } @@ -152,7 +158,10 @@ describe('execution event buffer', () => { () => Promise.resolve(), ] - mockRedis.eval.mockImplementation(async (_script: string, ...args: unknown[]) => { + mockRedis.eval.mockImplementation(async (script: string, ...args: unknown[]) => { + if (script.includes('execution_redis_bytes')) { + return [1, 'ok', 0, 0] + } const batchEntries: ExecutionEventEntry[] = [] const zaddArgs = args.slice(8) as (string | number)[] for (let i = 0; i < zaddArgs.length; i += 2) { @@ -237,7 +246,10 @@ describe('execution event buffer', () => { it('flushes replay events after a recovered final replay flush without terminal meta', async () => { mockRedis.incrby.mockResolvedValue(100) let flushAttempt = 0 - mockRedis.eval.mockImplementation(async (_script: string, ...args: unknown[]) => { + mockRedis.eval.mockImplementation(async (script: string, ...args: unknown[]) => { + if (script.includes('execution_redis_bytes')) { + return [1, 'ok', 0, 0] + } const zaddArgs = args.slice(8) as (string | number)[] if (flushAttempt > 0) { for (let i = 0; i < zaddArgs.length; i += 2) { @@ -287,6 +299,57 @@ describe('execution event buffer', () => { expect(mockRedis.hset).toHaveBeenCalledWith('meta', { status: 'complete' }) }) + it('surfaces execution memory limit errors when the Redis budget is exceeded', async () => { + mockRedis.incrby.mockResolvedValue(100) + mockRedis.eval.mockImplementationOnce(async () => [ + 0, + 'execution_redis_bytes', + 64 * 1024 * 1024, + ]) + + const writer = createExecutionEventWriter('exec-1') + + await expect(writer.writeTerminal(makeEvent('terminal'), 'complete')).rejects.toThrow( + 'Execution memory limit exceeded' + ) + expect(persistedEntries).toEqual([]) + }) + + it('preserves requested UserFile base64 when buffering terminal events', async () => { + mockRedis.incrby.mockResolvedValue(100) + const base64 = Buffer.from('hello').toString('base64') + const writer = createExecutionEventWriter('exec-1', { preserveUserFileBase64: true }) + + await writer.writeTerminal( + { + type: 'execution:completed', + timestamp: new Date().toISOString(), + executionId: 'exec-1', + workflowId: 'wf-1', + data: { + success: true, + duration: 1, + output: { + file: { + id: 'file-1', + name: 'small.txt', + size: 5, + type: 'text/plain', + context: 'execution', + base64, + }, + }, + }, + }, + 'complete' + ) + + const eventData = persistedEntries[0].event.data as { + output: { file: { base64?: string } } + } + expect(eventData.output.file.base64).toBe(base64) + }) + it('retries active meta initialization before giving up', async () => { mockRedis.hset.mockRejectedValueOnce(new Error('meta write failed')).mockResolvedValueOnce(1) diff --git a/apps/sim/lib/execution/event-buffer.ts b/apps/sim/lib/execution/event-buffer.ts index 02f5d750b18..81aebee8170 100644 --- a/apps/sim/lib/execution/event-buffer.ts +++ b/apps/sim/lib/execution/event-buffer.ts @@ -2,6 +2,15 @@ import { createLogger } from '@sim/logger' import { toError } from '@sim/utils/errors' import { env } from '@/lib/core/config/env' import { getRedisClient } from '@/lib/core/config/redis' +import { LARGE_VALUE_THRESHOLD_BYTES } from '@/lib/execution/payloads/large-value-ref' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' +import type { LargeValueStoreContext } from '@/lib/execution/payloads/store' +import { + type ExecutionRedisBudgetReservation, + releaseExecutionRedisBytes, + reserveExecutionRedisBytes, +} from '@/lib/execution/redis-budget.server' +import { isExecutionResourceLimitError } from '@/lib/execution/resource-errors' import type { ExecutionEvent } from '@/lib/workflows/executor/execution-events' const logger = createLogger('ExecutionEventBuffer') @@ -11,6 +20,7 @@ const TTL_SECONDS = 60 * 60 // 1 hour const EVENT_LIMIT = 1000 const RESERVE_BATCH = 100 const FLUSH_INTERVAL_MS = 15 +const FLUSH_MAX_RETRY_INTERVAL_MS = 1000 const FLUSH_MAX_BATCH = 200 const MAX_PENDING_EVENTS = 1000 const ACTIVE_META_ATTEMPTS = 3 @@ -53,6 +63,54 @@ function isExecutionStreamStatus(value: string | undefined): value is ExecutionS return value === 'active' || value === 'complete' || value === 'error' || value === 'cancelled' } +function getJsonSize(value: unknown): number | null { + try { + return Buffer.byteLength(JSON.stringify(value), 'utf8') + } catch { + return null + } +} + +function getExecutionEventEntryJson(entry: ExecutionEventEntry): string { + return JSON.stringify(entry) +} + +function trimFinalBlockLogsForEventData(data: unknown): unknown { + if (!data || typeof data !== 'object' || Array.isArray(data)) return data + + const record = data as Record + const finalBlockLogs = record.finalBlockLogs + if (!Array.isArray(finalBlockLogs)) return data + const originalSize = getJsonSize(data) + if (originalSize !== null && originalSize <= LARGE_VALUE_THRESHOLD_BYTES) return data + + const total = finalBlockLogs.length + let logs = finalBlockLogs + let trimmed: Record = { + ...record, + finalBlockLogs: logs, + finalBlockLogsTruncated: true, + finalBlockLogsTotal: total, + } + + while (logs.length > 0) { + const size = getJsonSize(trimmed) + if (size !== null && size <= LARGE_VALUE_THRESHOLD_BYTES) { + return trimmed + } + + logs = logs.length === 1 ? [] : logs.slice(Math.ceil(logs.length / 2)) + trimmed = { + ...record, + finalBlockLogs: logs, + finalBlockLogsTruncated: true, + finalBlockLogsTotal: total, + } + } + + return trimmed +} + export interface ExecutionStreamMeta { status: ExecutionStreamStatus userId?: string @@ -97,6 +155,37 @@ export interface ExecutionEventWriter { close: () => Promise } +export interface ExecutionEventWriterContext extends LargeValueStoreContext { + requireDurablePayloads?: boolean + preserveUserFileBase64?: boolean +} + +async function compactEventForBuffer( + event: ExecutionEvent, + context: ExecutionEventWriterContext = {} +): Promise { + if (!('data' in event)) { + return event + } + + const compactedData = await compactExecutionPayload(event.data, { + ...context, + executionId: context.executionId ?? event.executionId, + requireDurable: context.requireDurablePayloads, + preserveUserFileBase64: context.preserveUserFileBase64, + preserveRoot: true, + }) + const eventData = trimFinalBlockLogsForEventData(compactedData) + const eventDataSize = getJsonSize(eventData) + if (eventDataSize !== null && eventDataSize > LARGE_VALUE_THRESHOLD_BYTES) { + throw new Error( + `Execution event data remains too large after compaction (${eventDataSize} bytes)` + ) + } + + return { ...event, data: eventData } as ExecutionEvent +} + const memoryExecutionStreams = new Map() function canUseMemoryEventBuffer(): boolean { @@ -169,13 +258,17 @@ function readMemoryEvents(executionId: string, afterEventId: number): ExecutionE } } -function createMemoryExecutionEventWriter(executionId: string): ExecutionEventWriter { +function createMemoryExecutionEventWriter( + executionId: string, + context: ExecutionEventWriterContext = {} +): ExecutionEventWriter { const writeMemoryEvent = async (event: ExecutionEvent) => { const stream = getMemoryStream(executionId) + const compactEvent = await compactEventForBuffer(event, context) const entry = { eventId: stream.nextEventId++, executionId, - event, + event: compactEvent, } stream.events.push(entry) if (stream.events.length > EVENT_LIMIT) { @@ -450,12 +543,15 @@ export async function readExecutionEventsState( } } -export function createExecutionEventWriter(executionId: string): ExecutionEventWriter { +export function createExecutionEventWriter( + executionId: string, + context: ExecutionEventWriterContext = {} +): ExecutionEventWriter { const redis = getRedisClient() if (!redis) { if (canUseMemoryEventBuffer()) { logger.info('createExecutionEventWriter: using in-memory event buffer', { executionId }) - return createMemoryExecutionEventWriter(executionId) + return createMemoryExecutionEventWriter(executionId, context) } logger.warn( 'createExecutionEventWriter: Redis client unavailable, events will not be buffered', @@ -477,13 +573,23 @@ export function createExecutionEventWriter(executionId: string): ExecutionEventW let nextEventId = 0 let maxReservedId = 0 let flushTimer: ReturnType | null = null + let consecutiveFlushFailures = 0 - const scheduleFlush = () => { + const getFlushDelayMs = () => { + if (consecutiveFlushFailures === 0) return FLUSH_INTERVAL_MS + const backoff = Math.min( + FLUSH_INTERVAL_MS * 2 ** Math.min(consecutiveFlushFailures, 6), + FLUSH_MAX_RETRY_INTERVAL_MS + ) + return backoff + Math.floor(Math.random() * FLUSH_INTERVAL_MS) + } + + const scheduleFlush = (delayMs = FLUSH_INTERVAL_MS) => { if (flushTimer) return flushTimer = setTimeout(() => { flushTimer = null void flushPending() - }, FLUSH_INTERVAL_MS) + }, delayMs) } const reserveIds = async (minCount: number) => { @@ -506,12 +612,27 @@ export function createExecutionEventWriter(executionId: string): ExecutionEventW if (pending.length === 0) return true const batch = pending pending = [] + let reservedBudget: ExecutionRedisBudgetReservation | null = null + let budgetReserved = false try { const key = getEventsKey(executionId) const zaddArgs: (string | number)[] = [] + let batchBytes = 0 for (const entry of batch) { - zaddArgs.push(entry.eventId, JSON.stringify(entry)) + const entryJson = getExecutionEventEntryJson(entry) + batchBytes += Buffer.byteLength(entryJson, 'utf8') + zaddArgs.push(entry.eventId, entryJson) + } + reservedBudget = { + executionId, + userId: context.userId, + category: 'event_buffer', + operation: terminalStatus ? 'write_terminal_events' : 'write_events', + bytes: batchBytes, + logger, } + await reserveExecutionRedisBytes(redis, reservedBudget) + budgetReserved = true await redis.eval( FLUSH_EVENTS_SCRIPT, 3, @@ -524,11 +645,21 @@ export function createExecutionEventWriter(executionId: string): ExecutionEventW terminalStatus ?? '', ...zaddArgs ) + consecutiveFlushFailures = 0 return true } catch (error) { + if (budgetReserved && reservedBudget) { + await releaseExecutionRedisBytes(redis, reservedBudget) + } + if (isExecutionResourceLimitError(error)) { + pending = batch.concat(pending) + throw error + } + consecutiveFlushFailures += 1 logger.warn('Failed to flush execution events', { executionId, batchSize: batch.length, + consecutiveFailures: consecutiveFlushFailures, error: toError(error).message, stack: error instanceof Error ? error.stack : undefined, }) @@ -566,7 +697,7 @@ export function createExecutionEventWriter(executionId: string): ExecutionEventW flushPromise = null } if (!ok) { - if (scheduleOnFailure && pending.length > 0) scheduleFlush() + if (scheduleOnFailure && pending.length > 0) scheduleFlush(getFlushDelayMs()) return false } } @@ -577,7 +708,12 @@ export function createExecutionEventWriter(executionId: string): ExecutionEventW await reserveIds(1) } const eventId = nextEventId++ - const entry: ExecutionEventEntry = { eventId, executionId, event } + const compactEvent = await compactEventForBuffer(event, { + ...context, + executionId, + requireDurablePayloads: true, + }) + const entry: ExecutionEventEntry = { eventId, executionId, event: compactEvent } pending.push(entry) if (pending.length >= FLUSH_MAX_BATCH) { await flushPending() @@ -618,7 +754,12 @@ export function createExecutionEventWriter(executionId: string): ExecutionEventW await reserveIds(1) } const eventId = nextEventId++ - const entry: ExecutionEventEntry = { eventId, executionId, event } + const compactEvent = await compactEventForBuffer(event, { + ...context, + executionId, + requireDurablePayloads: true, + }) + const entry: ExecutionEventEntry = { eventId, executionId, event: compactEvent } pending.push(entry) const ok = await flushPending(false, status) if (!ok) { diff --git a/apps/sim/lib/execution/isolated-vm-worker.cjs b/apps/sim/lib/execution/isolated-vm-worker.cjs index 18828eebc60..a924beb8dfe 100644 --- a/apps/sim/lib/execution/isolated-vm-worker.cjs +++ b/apps/sim/lib/execution/isolated-vm-worker.cjs @@ -27,6 +27,21 @@ const SANDBOX_BUNDLE_FILES = { const bundleSourceCache = new Map() const activeIsolates = new Map() +/** + * Sends an IPC request and reports only actual delivery failures. + * Node queues messages under backpressure, so the boolean return value is not + * a failure signal. + */ +function sendIpcRequest(message, onError) { + try { + process.send(message, (err) => { + if (err) onError(err) + }) + } catch (error) { + onError(error instanceof Error ? error : new Error(String(error))) + } +} + function getBundleSource(bundleName) { const cached = bundleSourceCache.get(bundleName) if (cached) return cached @@ -180,6 +195,7 @@ async function executeCode(request, executionId) { let logCallback = null let errorCallback = null let fetchCallback = null + let brokerCallback = null const externalCopies = [] try { @@ -232,17 +248,50 @@ async function executeCode(request, executionId) { } }, FETCH_TIMEOUT_MS) pendingFetches.set(fetchId, { resolve, timeout }) - if (process.send && process.connected) { - process.send({ type: 'fetch', fetchId, requestId, url, optionsJson }) - } else { + if (!process.send || !process.connected) { clearTimeout(timeout) pendingFetches.delete(fetchId) resolve(JSON.stringify({ error: 'Parent process disconnected' })) + return } + sendIpcRequest({ type: 'fetch', fetchId, requestId, url, optionsJson }, (err) => { + const pending = pendingFetches.get(fetchId) + if (!pending) return + clearTimeout(pending.timeout) + pendingFetches.delete(fetchId) + pending.resolve(JSON.stringify({ error: `Fetch IPC send failed: ${err.message}` })) + }) }) }) await jail.set('__fetchRef', fetchCallback) + brokerCallback = new ivm.Reference(async (brokerName, argsJson) => { + return new Promise((resolve) => { + const brokerId = ++brokerIdCounter + const timeout = setTimeout(() => { + if (pendingBrokerCalls.has(brokerId)) { + pendingBrokerCalls.delete(brokerId) + resolve(JSON.stringify({ error: `Broker "${brokerName}" timed out` })) + } + }, BROKER_TIMEOUT_MS) + pendingBrokerCalls.set(brokerId, { resolve, timeout, executionId }) + if (!process.send || !process.connected) { + clearTimeout(timeout) + pendingBrokerCalls.delete(brokerId) + resolve(JSON.stringify({ error: 'Parent process disconnected' })) + return + } + sendIpcRequest({ type: 'broker', brokerId, executionId, brokerName, argsJson }, (err) => { + const pending = pendingBrokerCalls.get(brokerId) + if (!pending) return + clearTimeout(pending.timeout) + pendingBrokerCalls.delete(brokerId) + pending.resolve(JSON.stringify({ error: `Broker IPC send failed: ${err.message}` })) + }) + }) + }) + await jail.set('__brokerRef', brokerCallback) + const bootstrap = ` // Set up console object const console = { @@ -299,10 +348,57 @@ async function executeCode(request, executionId) { }; } + const sim = (() => { + const broker = __brokerRef; + async function callSimBroker(name, args) { + let argsJson; + try { + argsJson = args === undefined ? undefined : JSON.stringify(args); + } catch { + throw new Error('sim helper arguments must be JSON-serializable'); + } + if (argsJson && argsJson.length > ${MAX_FETCH_OPTIONS_JSON_CHARS}) { + throw new Error('sim helper arguments exceed maximum payload size'); + } + const responseJson = await broker.apply(undefined, [name, argsJson], { result: { promise: true } }); + let response; + try { + response = JSON.parse(responseJson); + } catch { + throw new Error('Invalid sim helper response'); + } + if (typeof response.error === 'string') { + throw new Error(response.error || 'Sim helper call failed'); + } + return response.resultJson === undefined || response.resultJson === null + ? null + : JSON.parse(response.resultJson); + } + + return Object.freeze({ + files: Object.freeze({ + readBase64: (file, options) => callSimBroker('sim.files.readBase64', { file, options }), + readText: (file, options) => callSimBroker('sim.files.readText', { file, options }), + readBase64Chunk: (file, options) => callSimBroker('sim.files.readBase64Chunk', { file, options }), + readTextChunk: (file, options) => callSimBroker('sim.files.readTextChunk', { file, options }), + }), + values: Object.freeze({ + read: (ref, options) => callSimBroker('sim.values.read', { ref, options }), + }), + }); + })(); + Object.defineProperty(global, 'sim', { + value: sim, + writable: false, + configurable: false, + enumerable: true + }); + // Prevent access to dangerous globals with stronger protection const undefined_globals = [ 'Isolate', 'Context', 'Script', 'Module', 'Callback', 'Reference', - 'ExternalCopy', 'process', 'require', 'module', 'exports', '__dirname', '__filename' + 'ExternalCopy', 'process', 'require', 'module', 'exports', '__dirname', '__filename', + '__brokerRef', '__broker', '__callSimBroker' ]; for (const name of undefined_globals) { try { @@ -439,6 +535,7 @@ async function executeCode(request, executionId) { bootstrapScript, ...externalCopies, fetchCallback, + brokerCallback, errorCallback, logCallback, context, @@ -662,13 +759,19 @@ async function executeTask(request, executionId) { } }, BROKER_TIMEOUT_MS) pendingBrokerCalls.set(brokerId, { resolve, timeout, executionId }) - if (process.send && process.connected) { - process.send({ type: 'broker', brokerId, executionId, brokerName, argsJson }) - } else { + if (!process.send || !process.connected) { clearTimeout(timeout) pendingBrokerCalls.delete(brokerId) resolve(JSON.stringify({ error: 'Parent process disconnected' })) + return } + sendIpcRequest({ type: 'broker', brokerId, executionId, brokerName, argsJson }, (err) => { + const pending = pendingBrokerCalls.get(brokerId) + if (!pending) return + clearTimeout(pending.timeout) + pendingBrokerCalls.delete(brokerId) + pending.resolve(JSON.stringify({ error: `Broker IPC send failed: ${err.message}` })) + }) }) }) releaseables.push(brokerRef) diff --git a/apps/sim/lib/execution/payloads/cache.ts b/apps/sim/lib/execution/payloads/cache.ts new file mode 100644 index 00000000000..507a8dd4ccc --- /dev/null +++ b/apps/sim/lib/execution/payloads/cache.ts @@ -0,0 +1,169 @@ +import { + getLargeValueMaterializationError, + isLargeValueRef, + type LargeValueRef, +} from '@/lib/execution/payloads/large-value-ref' + +const FALLBACK_TTL_MS = 15 * 60 * 1000 +const MAX_IN_MEMORY_BYTES = 256 * 1024 * 1024 + +interface LargeValueCacheScope { + workspaceId?: string + workflowId?: string + executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean +} + +const inMemoryValues = new Map< + string, + { + value: unknown + size: number + expiresAt: number + scope?: LargeValueCacheScope + recoverable: boolean + } +>() +let inMemoryBytes = 0 + +export function clearLargeValueCacheForTests(): void { + inMemoryValues.clear() + inMemoryBytes = 0 +} + +function cleanupExpiredValues(now = Date.now()): void { + for (const [id, entry] of inMemoryValues.entries()) { + if (entry.expiresAt <= now) { + inMemoryValues.delete(id) + inMemoryBytes -= entry.size + } + } +} + +export function cacheLargeValue( + id: string, + value: unknown, + size: number, + scope?: LargeValueCacheScope, + options: { recoverable?: boolean } = {} +): boolean { + if (size > MAX_IN_MEMORY_BYTES) { + return false + } + + cleanupExpiredValues() + + const existing = inMemoryValues.get(id) + if (existing) { + inMemoryValues.delete(id) + inMemoryBytes -= existing.size + } + + while (inMemoryBytes + size > MAX_IN_MEMORY_BYTES && inMemoryValues.size > 0) { + const oldestRecoverableId = Array.from(inMemoryValues.entries()).find( + ([, entry]) => entry.recoverable + )?.[0] + if (!oldestRecoverableId) break + const oldest = inMemoryValues.get(oldestRecoverableId) + inMemoryValues.delete(oldestRecoverableId) + inMemoryBytes -= oldest?.size ?? 0 + } + + if (inMemoryBytes + size > MAX_IN_MEMORY_BYTES) { + if (existing) { + inMemoryValues.set(id, existing) + inMemoryBytes += existing.size + } + return false + } + + inMemoryValues.set(id, { + value, + size, + scope, + recoverable: options.recoverable ?? false, + expiresAt: Date.now() + FALLBACK_TTL_MS, + }) + inMemoryBytes += size + return true +} + +function scopeMatchesRef( + ref: LargeValueRef, + cachedScope: LargeValueCacheScope | undefined, + callerScope?: LargeValueCacheScope +): boolean { + if (!cachedScope?.executionId) { + return false + } + if (ref.executionId && ref.executionId !== cachedScope.executionId) { + return false + } + if (!callerScope) { + return Boolean(ref.key) && (!ref.executionId || ref.executionId === cachedScope.executionId) + } + + const allowedExecutionIds = new Set([ + callerScope.executionId, + ...(callerScope.largeValueExecutionIds ?? []), + ]) + const workflowScopeAllowed = + callerScope.allowLargeValueWorkflowScope && + callerScope.workspaceId === cachedScope.workspaceId && + callerScope.workflowId === cachedScope.workflowId + + return allowedExecutionIds.has(cachedScope.executionId) || Boolean(workflowScopeAllowed) +} + +export function materializeLargeValueRefSync( + ref: LargeValueRef, + callerScope?: LargeValueCacheScope +): unknown { + cleanupExpiredValues() + const cached = inMemoryValues.get(ref.id) + if (!cached || !scopeMatchesRef(ref, cached.scope, callerScope)) { + return undefined + } + return cached.value +} + +export function materializeLargeValueRefSyncOrThrow( + ref: LargeValueRef, + callerScope?: LargeValueCacheScope +): unknown { + const materialized = materializeLargeValueRefSync(ref, callerScope) + if (materialized === undefined) { + throw getLargeValueMaterializationError(ref) + } + return materialized +} + +export function materializeLargeValueRefsSync( + value: unknown, + seen = new WeakSet() +): unknown { + if (isLargeValueRef(value)) { + return materializeLargeValueRefsSync(materializeLargeValueRefSyncOrThrow(value), seen) + } + + if (!value || typeof value !== 'object') { + return value + } + + if (seen.has(value)) { + return value + } + seen.add(value) + + if (Array.isArray(value)) { + return value.map((item) => materializeLargeValueRefsSync(item, seen)) + } + + return Object.fromEntries( + Object.entries(value).map(([key, entryValue]) => [ + key, + materializeLargeValueRefsSync(entryValue, seen), + ]) + ) +} diff --git a/apps/sim/lib/execution/payloads/hydration.ts b/apps/sim/lib/execution/payloads/hydration.ts new file mode 100644 index 00000000000..bfc825280ae --- /dev/null +++ b/apps/sim/lib/execution/payloads/hydration.ts @@ -0,0 +1,35 @@ +import { isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' +import { + type LargeValueStoreContext, + materializeLargeValueRef, +} from '@/lib/execution/payloads/store' + +export async function warmLargeValueRefs( + value: unknown, + context: LargeValueStoreContext = {}, + seen = new WeakSet() +): Promise { + if (!value || typeof value !== 'object') { + return + } + + if (isLargeValueRef(value)) { + const materialized = await materializeLargeValueRef(value, context) + await warmLargeValueRefs(materialized, context, seen) + return + } + + if (seen.has(value)) { + return + } + seen.add(value) + + if (Array.isArray(value)) { + await Promise.all(value.map((item) => warmLargeValueRefs(item, context, seen))) + return + } + + await Promise.all( + Object.values(value).map((entryValue) => warmLargeValueRefs(entryValue, context, seen)) + ) +} diff --git a/apps/sim/lib/execution/payloads/large-value-ref.ts b/apps/sim/lib/execution/payloads/large-value-ref.ts new file mode 100644 index 00000000000..d770f6ed37d --- /dev/null +++ b/apps/sim/lib/execution/payloads/large-value-ref.ts @@ -0,0 +1,97 @@ +export const LARGE_VALUE_REF_MARKER = '__simLargeValueRef' + +export const LARGE_VALUE_THRESHOLD_BYTES = 8 * 1024 * 1024 +export const LARGE_VALUE_REF_VERSION = 1 + +export const LARGE_VALUE_KINDS = ['array', 'object', 'string', 'json'] as const + +export type LargeValueKind = (typeof LARGE_VALUE_KINDS)[number] + +export interface LargeValueRef { + [LARGE_VALUE_REF_MARKER]: true + version: typeof LARGE_VALUE_REF_VERSION + id: string + kind: LargeValueKind + size: number + key?: string + executionId?: string + preview?: unknown +} + +const LARGE_VALUE_ID_PATTERN = /^lv_[A-Za-z0-9_-]{12}$/ + +export function isLargeValueStorageKey(key: string, id: string, executionId?: string): boolean { + if (!key.startsWith('execution/')) return false + if (!key.endsWith(`/large-value-${id}.json`)) return false + if (executionId && !key.includes(`/${executionId}/`)) return false + return true +} + +export function isLargeValueRef(value: unknown): value is LargeValueRef { + if (!value || typeof value !== 'object') return false + + const candidate = value as Record + const id = candidate.id + const key = candidate.key + const executionId = candidate.executionId + + return ( + candidate[LARGE_VALUE_REF_MARKER] === true && + candidate.version === LARGE_VALUE_REF_VERSION && + typeof id === 'string' && + LARGE_VALUE_ID_PATTERN.test(id) && + typeof candidate.kind === 'string' && + (LARGE_VALUE_KINDS as readonly string[]).includes(candidate.kind) && + typeof candidate.size === 'number' && + Number.isFinite(candidate.size) && + candidate.size > 0 && + (executionId === undefined || typeof executionId === 'string') && + (key === undefined || + (typeof key === 'string' && + isLargeValueStorageKey(key, id, executionId as string | undefined))) + ) +} + +export function containsLargeValueRef( + value: unknown, + seen = new WeakSet() +): LargeValueRef | null { + if (!value || typeof value !== 'object') return null + if (isLargeValueRef(value)) return value + if (seen.has(value)) return null + + seen.add(value) + + if (Array.isArray(value)) { + for (const item of value) { + const ref = containsLargeValueRef(item, seen) + if (ref) return ref + } + return null + } + + for (const entryValue of Object.values(value)) { + const ref = containsLargeValueRef(entryValue, seen) + if (ref) return ref + } + + return null +} + +export function getLargeValueMaterializationError(ref: LargeValueRef): Error { + return new Error( + `This execution value is too large to inline (${formatLargeValueSize(ref.size)}). Select a nested field or reduce the amount of data passed between blocks.` + ) +} + +function formatLargeValueSize(bytes: number): string { + const megabytes = bytes / (1024 * 1024) + return `${megabytes.toFixed(1)} MB` +} + +export function assertNoLargeValueRefs(value: unknown): void { + const ref = containsLargeValueRef(value) + if (ref) { + throw getLargeValueMaterializationError(ref) + } +} diff --git a/apps/sim/lib/execution/payloads/materialization.server.ts b/apps/sim/lib/execution/payloads/materialization.server.ts new file mode 100644 index 00000000000..5e337e35914 --- /dev/null +++ b/apps/sim/lib/execution/payloads/materialization.server.ts @@ -0,0 +1,294 @@ +import { createLogger, type Logger } from '@sim/logger' +import { toError } from '@sim/utils/errors' +import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' +import { + getLargeValueMaterializationError, + isLargeValueRef, + isLargeValueStorageKey, + type LargeValueRef, +} from '@/lib/execution/payloads/large-value-ref' +import { ExecutionResourceLimitError } from '@/lib/execution/resource-errors' +import type { StorageContext } from '@/lib/uploads' +import { bufferToBase64, inferContextFromKey } from '@/lib/uploads/utils/file-utils' +import { downloadFileFromStorage } from '@/lib/uploads/utils/file-utils.server' +import type { UserFile } from '@/executor/types' + +const logger = createLogger('ExecutionPayloadMaterialization') + +export const MAX_DURABLE_LARGE_VALUE_BYTES = 64 * 1024 * 1024 +export const MAX_INLINE_MATERIALIZATION_BYTES = 16 * 1024 * 1024 +export const MAX_FUNCTION_FILE_BYTES = 64 * 1024 * 1024 +export const MAX_FUNCTION_INLINE_BYTES = 10 * 1024 * 1024 + +export interface ExecutionMaterializationContext { + workflowId?: string + workspaceId?: string + executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean + userId?: string + requestId?: string + logger?: Logger +} + +export interface MaterializeLargeValueOptions extends ExecutionMaterializationContext { + maxBytes?: number +} + +export interface ReadUserFileContentOptions extends ExecutionMaterializationContext { + maxBytes?: number + maxSourceBytes?: number + offset?: number + length?: number + chunked?: boolean + encoding: 'base64' | 'text' +} + +function getLogger(options: ExecutionMaterializationContext): Logger { + return options.logger ?? logger +} + +export function assertDurableLargeValueSize(size: number): void { + if (size > MAX_DURABLE_LARGE_VALUE_BYTES) { + throw new ExecutionResourceLimitError({ + resource: 'execution_payload_bytes', + attemptedBytes: size, + limitBytes: MAX_DURABLE_LARGE_VALUE_BYTES, + }) + } +} + +export function assertInlineMaterializationSize(size: number, maxBytes?: number): void { + const limit = maxBytes ?? MAX_INLINE_MATERIALIZATION_BYTES + if (size > limit) { + throw new ExecutionResourceLimitError({ + resource: 'execution_payload_bytes', + attemptedBytes: size, + limitBytes: limit, + }) + } +} + +export function isValidLargeValueKey(ref: LargeValueRef): boolean { + return Boolean(ref.key && isLargeValueStorageKey(ref.key, ref.id, ref.executionId)) +} + +export function assertLargeValueRefAccess( + ref: LargeValueRef, + context: ExecutionMaterializationContext +): void { + if (!context.executionId) { + throw new Error('Large execution value requires an execution context.') + } + const allowedExecutionIds = new Set([ + context.executionId, + ...(context.largeValueExecutionIds ?? []), + ]) + + const parts = ref.key?.split('/') ?? [] + const [, workspaceId, workflowId, executionId] = parts + + if (!ref.key) { + if (ref.executionId && !allowedExecutionIds.has(ref.executionId)) { + throw new Error('Large execution value is not available in this execution.') + } + return + } + if (!context.workspaceId || !context.workflowId) { + throw new Error('Large execution value requires workspace and workflow context.') + } + const workflowScopeAllowed = + context.allowLargeValueWorkflowScope && + context.workspaceId === workspaceId && + context.workflowId === workflowId + if (ref.executionId && !allowedExecutionIds.has(ref.executionId) && !workflowScopeAllowed) { + throw new Error('Large execution value is not available in this execution.') + } + if (!allowedExecutionIds.has(executionId) && !workflowScopeAllowed) { + throw new Error('Large execution value is not available in this execution.') + } + if (context.workspaceId && workspaceId !== context.workspaceId) { + throw new Error('Large execution value is not available in this execution.') + } + if (context.workflowId && workflowId !== context.workflowId) { + throw new Error('Large execution value is not available in this execution.') + } +} + +export async function readLargeValueRefFromStorage( + ref: LargeValueRef, + options: MaterializeLargeValueOptions = {} +): Promise { + const log = getLogger(options) + if (!isLargeValueRef(ref) || !ref.key || !isValidLargeValueKey(ref)) { + return undefined + } + + assertLargeValueRefAccess(ref, options) + assertInlineMaterializationSize(ref.size, options.maxBytes) + + try { + const { StorageService } = await import('@/lib/uploads') + const buffer = await StorageService.downloadFile({ + key: ref.key, + context: 'execution', + }) + if (buffer.length > (options.maxBytes ?? MAX_INLINE_MATERIALIZATION_BYTES)) { + throw new ExecutionResourceLimitError({ + resource: 'execution_payload_bytes', + attemptedBytes: buffer.length, + limitBytes: options.maxBytes ?? MAX_INLINE_MATERIALIZATION_BYTES, + }) + } + return JSON.parse(buffer.toString('utf8')) + } catch (error) { + if (error instanceof ExecutionResourceLimitError) { + throw error + } + log.warn('Failed to materialize persisted large execution value', { + id: ref.id, + key: ref.key, + error: toError(error).message, + }) + return undefined + } +} + +function normalizeRange(buffer: Buffer, options: ReadUserFileContentOptions): Buffer { + const offset = Math.max(0, Math.floor(options.offset ?? 0)) + const maxLength = options.maxBytes ?? MAX_FUNCTION_INLINE_BYTES + const requestedLength = options.length === undefined ? maxLength : Math.floor(options.length) + const length = Math.max(0, Math.min(requestedLength, maxLength)) + return buffer.subarray(offset, offset + length) +} + +function getExecutionKeyParts(key: string): + | { + workspaceId: string + workflowId: string + executionId: string + } + | undefined { + const parts = key.split('/') + if (parts[0] !== 'execution' || parts.length < 5) { + return undefined + } + + return { + workspaceId: parts[1], + workflowId: parts[2], + executionId: parts[3], + } +} + +function assertExecutionFileScope(key: string, options: ExecutionMaterializationContext): void { + const parts = getExecutionKeyParts(key) + if (!parts) { + throw new Error('File is not available in this execution.') + } + + const allowedExecutionIds = new Set([ + options.executionId, + ...(options.largeValueExecutionIds ?? []), + ]) + const workflowScopeAllowed = + options.allowLargeValueWorkflowScope && + options.workspaceId === parts.workspaceId && + options.workflowId === parts.workflowId + if ( + !options.executionId || + (!allowedExecutionIds.has(parts.executionId) && !workflowScopeAllowed) + ) { + throw new Error('File is not available in this execution.') + } + + if (options.workspaceId && parts.workspaceId !== options.workspaceId) { + throw new Error('File is not available in this execution.') + } + + if (options.workflowId && parts.workflowId !== options.workflowId) { + throw new Error('File is not available in this execution.') + } +} + +function getVerifiedStorageContext(file: UserFile): StorageContext { + if (!file.key) { + throw new Error('File content requires a storage key.') + } + + const inferredContext = inferContextFromKey(file.key) + if (file.context && file.context !== inferredContext) { + throw new Error('File context does not match its storage key.') + } + + return inferredContext +} + +export async function assertUserFileContentAccess( + file: UserFile, + options: ExecutionMaterializationContext +): Promise { + const context = getVerifiedStorageContext(file) + + if (context === 'execution') { + assertExecutionFileScope(file.key, options) + } + + if (!options.userId) { + throw new Error('File access requires an authenticated user.') + } + + const { verifyFileAccess } = await import('@/app/api/files/authorization') + const hasAccess = await verifyFileAccess(file.key, options.userId, undefined, context, false) + if (!hasAccess) { + throw new Error('File is not available in this execution.') + } +} + +export async function readUserFileContent( + file: unknown, + options: ReadUserFileContentOptions +): Promise { + if (!isUserFileWithMetadata(file)) { + throw new Error('Expected a file object with metadata.') + } + + await assertUserFileContentAccess(file, options) + + const maxSourceBytes = options.maxSourceBytes ?? MAX_FUNCTION_FILE_BYTES + if (Number.isFinite(file.size) && file.size > maxSourceBytes) { + throw new ExecutionResourceLimitError({ + resource: 'execution_payload_bytes', + attemptedBytes: file.size, + limitBytes: maxSourceBytes, + }) + } + + let buffer: Buffer | null = null + const log = getLogger(options) + const requestId = options.requestId ?? 'unknown' + + buffer = await downloadFileFromStorage(file, requestId, log) + + if (!buffer) { + throw new Error(`File content for ${file.name} is unavailable.`) + } + if (buffer.length > maxSourceBytes) { + throw new ExecutionResourceLimitError({ + resource: 'execution_payload_bytes', + attemptedBytes: buffer.length, + limitBytes: maxSourceBytes, + }) + } + + const shouldSlice = + options.chunked || options.offset !== undefined || options.length !== undefined + const selected = shouldSlice ? normalizeRange(buffer, options) : buffer + assertInlineMaterializationSize(selected.length, options.maxBytes ?? MAX_FUNCTION_INLINE_BYTES) + + return options.encoding === 'base64' ? bufferToBase64(selected) : selected.toString('utf8') +} + +export function unavailableLargeValueError(ref: LargeValueRef): Error { + return getLargeValueMaterializationError(ref) +} diff --git a/apps/sim/lib/execution/payloads/serializer.test.ts b/apps/sim/lib/execution/payloads/serializer.test.ts new file mode 100644 index 00000000000..453c1637ece --- /dev/null +++ b/apps/sim/lib/execution/payloads/serializer.test.ts @@ -0,0 +1,129 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { + getLargeValueMaterializationError, + isLargeValueRef, +} from '@/lib/execution/payloads/large-value-ref' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' +import type { UserFile } from '@/executor/types' +import { navigatePath } from '@/executor/variables/resolvers/reference' + +const TEST_EXECUTION_CONTEXT = { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', +} + +describe('compactExecutionPayload', () => { + it('keeps small JSON payloads inline', async () => { + const value = { result: { id: 'event-1', text: 'hello' } } + + await expect(compactExecutionPayload(value, { thresholdBytes: 1024 })).resolves.toEqual(value) + }) + + it('strips UserFile base64 by default while preserving metadata', async () => { + const file: UserFile = { + id: 'file-1', + name: 'large.txt', + url: 'https://example.com/file', + size: 11 * 1024 * 1024, + type: 'text/plain', + key: 'execution/workflow/execution/large.txt', + context: 'execution', + base64: 'Zm9v', + } + + const compacted = await compactExecutionPayload( + { event: { files: [file] } }, + { thresholdBytes: 1024 } + ) + + expect(compacted).toEqual({ + event: { + files: [ + { + id: 'file-1', + name: 'large.txt', + url: 'https://example.com/file', + size: 11 * 1024 * 1024, + type: 'text/plain', + key: 'execution/workflow/execution/large.txt', + context: 'execution', + }, + ], + }, + }) + }) + + it('stores oversized arrays as refs and allows nested path navigation in-process', async () => { + const results = Array.from({ length: 100 }, (_, index) => [{ event: { id: `event-${index}` } }]) + const compacted = await compactExecutionPayload( + { results }, + { thresholdBytes: 256, ...TEST_EXECUTION_CONTEXT } + ) + + expect(isLargeValueRef(compacted.results)).toBe(true) + expect( + navigatePath(compacted, ['results', '1', '0', 'event', 'id'], { + executionContext: TEST_EXECUTION_CONTEXT, + }) + ).toBe('event-1') + }) + + it('does not double-spill existing refs', async () => { + const compacted = await compactExecutionPayload( + { results: [[{ payload: 'x'.repeat(2048) }]] }, + { thresholdBytes: 256 } + ) + + const compactedAgain = await compactExecutionPayload(compacted, { thresholdBytes: 256 }) + + expect(compactedAgain).toEqual(compacted) + }) + + it('rejects durable compaction when storage context is incomplete', async () => { + await expect( + compactExecutionPayload( + { payload: 'x'.repeat(2048) }, + { thresholdBytes: 256, requireDurable: true } + ) + ).rejects.toThrow('Cannot persist large execution value') + }) + + it('does not treat loosely marker-shaped user data as a large-value ref', () => { + expect( + isLargeValueRef({ + __simLargeValueRef: true, + id: 'user-supplied', + }) + ).toBe(false) + }) + + it('rejects ref-shaped user data with non-execution storage keys', () => { + expect( + isLargeValueRef({ + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 1024, + key: 'https://example.com/large-value-lv_ABCDEFGHIJKL.json', + }) + ).toBe(false) + }) + + it('omits opaque ref IDs from user-facing materialization errors', () => { + const error = getLargeValueMaterializationError({ + __simLargeValueRef: true, + version: 1, + id: 'lv_CQcekP8gSJI5', + kind: 'string', + size: 23_259_101, + }) + + expect(error.message).toContain('This execution value is too large to inline (22.2 MB)') + expect(error.message).not.toContain('lv_CQcekP8gSJI5') + }) +}) diff --git a/apps/sim/lib/execution/payloads/serializer.ts b/apps/sim/lib/execution/payloads/serializer.ts new file mode 100644 index 00000000000..d892b2a3226 --- /dev/null +++ b/apps/sim/lib/execution/payloads/serializer.ts @@ -0,0 +1,162 @@ +import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' +import { + isLargeValueRef, + LARGE_VALUE_THRESHOLD_BYTES, +} from '@/lib/execution/payloads/large-value-ref' +import { type LargeValueStoreContext, storeLargeValue } from '@/lib/execution/payloads/store' +import type { BlockLog } from '@/executor/types' + +export interface CompactExecutionPayloadOptions extends LargeValueStoreContext { + thresholdBytes?: number + preserveUserFileBase64?: boolean + preserveRoot?: boolean +} + +interface CompactState { + seen: WeakSet +} + +function getJsonAndSize(value: unknown): { json: string; size: number } | null { + try { + const json = JSON.stringify(value) + if (json === undefined) { + return null + } + return { + json, + size: Buffer.byteLength(json, 'utf8'), + } + } catch { + return null + } +} + +function stripUserFileBase64(value: T): Omit { + const { base64: _base64, ...rest } = value + return rest +} + +async function compactValue( + value: unknown, + options: CompactExecutionPayloadOptions, + state: CompactState, + depth = 0 +): Promise { + if (!value || typeof value !== 'object') { + const measured = getJsonAndSize(value) + if (measured && measured.size > (options.thresholdBytes ?? LARGE_VALUE_THRESHOLD_BYTES)) { + return options.preserveRoot && depth === 0 + ? value + : storeLargeValue(value, measured.json, measured.size, options) + } + return value + } + + if (isLargeValueRef(value)) { + return value + } + + if (isUserFileWithMetadata(value) && !options.preserveUserFileBase64) { + return stripUserFileBase64(value) + } + + if (state.seen.has(value)) { + return value + } + state.seen.add(value) + + const compacted = Array.isArray(value) + ? await Promise.all(value.map((item) => compactValue(item, options, state, depth + 1))) + : Object.fromEntries( + await Promise.all( + Object.entries(value).map(async ([key, entryValue]) => [ + key, + key === 'finalBlockLogs' && Array.isArray(entryValue) + ? await compactBlockLogs(entryValue as BlockLog[], options) + : await compactValue(entryValue, options, state, depth + 1), + ]) + ) + ) + + const measured = getJsonAndSize(compacted) + if (measured && measured.size > (options.thresholdBytes ?? LARGE_VALUE_THRESHOLD_BYTES)) { + return options.preserveRoot && depth === 0 + ? compacted + : storeLargeValue(compacted, measured.json, measured.size, options) + } + + return compacted +} + +async function forceStoreValue( + value: unknown, + options: CompactExecutionPayloadOptions +): Promise { + if (isLargeValueRef(value)) { + return value + } + const measured = getJsonAndSize(value) + if (!measured) { + return value + } + return storeLargeValue(value, measured.json, measured.size, options) +} + +export async function compactExecutionPayload( + value: T, + options: CompactExecutionPayloadOptions = {} +): Promise { + return (await compactValue(value, options, { seen: new WeakSet() })) as T +} + +/** + * Compacts subflow result aggregates while preserving indexable `results`. + */ +export async function compactSubflowResults( + results: T[], + options: CompactExecutionPayloadOptions = {} +): Promise { + const entryOptions = { ...options, preserveRoot: false } + let compactedResults = (await Promise.all( + results.map((result) => compactExecutionPayload(result, entryOptions)) + )) as T[] + + const aggregate = getJsonAndSize({ results: compactedResults }) + if (aggregate && aggregate.size <= (options.thresholdBytes ?? LARGE_VALUE_THRESHOLD_BYTES)) { + return compactedResults + } + + compactedResults = (await Promise.all( + compactedResults.map((result) => forceStoreValue(result, options)) + )) as T[] + + return compactedResults +} + +export async function compactBlockLogs( + logs: BlockLog[] | undefined, + options: CompactExecutionPayloadOptions = {} +): Promise { + if (!logs) { + return logs + } + + return Promise.all( + logs.map(async (log) => { + const compactedLog = { ...log } + if ('input' in compactedLog) { + compactedLog.input = await compactExecutionPayload(compactedLog.input, options) + } + if ('output' in compactedLog) { + compactedLog.output = await compactExecutionPayload(compactedLog.output, options) + } + if ('childTraceSpans' in compactedLog) { + compactedLog.childTraceSpans = await compactExecutionPayload( + compactedLog.childTraceSpans, + options + ) + } + return compactedLog + }) + ) +} diff --git a/apps/sim/lib/execution/payloads/store.test.ts b/apps/sim/lib/execution/payloads/store.test.ts new file mode 100644 index 00000000000..d9c052654ef --- /dev/null +++ b/apps/sim/lib/execution/payloads/store.test.ts @@ -0,0 +1,417 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' +import { + cacheLargeValue, + clearLargeValueCacheForTests, + materializeLargeValueRefSync, +} from '@/lib/execution/payloads/cache' +import { + MAX_DURABLE_LARGE_VALUE_BYTES, + readLargeValueRefFromStorage, + readUserFileContent, +} from '@/lib/execution/payloads/materialization.server' +import { materializeLargeValueRef, storeLargeValue } from '@/lib/execution/payloads/store' +import { EXECUTION_RESOURCE_LIMIT_CODE } from '@/lib/execution/resource-errors' + +const { mockDownloadFile, mockUploadFile, mockVerifyFileAccess } = vi.hoisted(() => ({ + mockDownloadFile: vi.fn(), + mockUploadFile: vi.fn(), + mockVerifyFileAccess: vi.fn(), +})) + +vi.mock('@/lib/uploads', () => ({ + StorageService: { + uploadFile: mockUploadFile, + downloadFile: mockDownloadFile, + }, +})) + +vi.mock('@/app/api/files/authorization', () => ({ + verifyFileAccess: mockVerifyFileAccess, +})) + +describe('large execution payload store', () => { + beforeEach(() => { + vi.clearAllMocks() + clearLargeValueCacheForTests() + mockUploadFile.mockImplementation(async ({ customKey }) => ({ key: customKey })) + mockVerifyFileAccess.mockResolvedValue(true) + }) + + it('stores oversized JSON in execution object storage and returns a small ref', async () => { + const value = { payload: 'x'.repeat(2048) } + const json = JSON.stringify(value) + + const ref = await storeLargeValue(value, json, Buffer.byteLength(json, 'utf8'), { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + userId: 'user-1', + requireDurable: true, + }) + + expect(ref).toMatchObject({ + __simLargeValueRef: true, + version: 1, + kind: 'object', + size: Buffer.byteLength(json, 'utf8'), + executionId: 'execution-1', + }) + expect(ref.key).toBe(`execution/workspace-1/workflow-1/execution-1/large-value-${ref.id}.json`) + expect(mockUploadFile).toHaveBeenCalledWith( + expect.objectContaining({ + contentType: 'application/json', + context: 'execution', + preserveKey: true, + customKey: ref.key, + }) + ) + }) + + it('fails durable writes before producing refs when execution context is missing', async () => { + const value = { payload: 'x'.repeat(2048) } + const json = JSON.stringify(value) + + await expect( + storeLargeValue(value, json, Buffer.byteLength(json, 'utf8'), { requireDurable: true }) + ).rejects.toThrow('Cannot persist large execution value') + + expect(mockUploadFile).not.toHaveBeenCalled() + }) + + it('fails durable writes when storage upload fails', async () => { + const value = { payload: 'x'.repeat(2048) } + const json = JSON.stringify(value) + mockUploadFile.mockRejectedValueOnce(new Error('storage down')) + + await expect( + storeLargeValue(value, json, Buffer.byteLength(json, 'utf8'), { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + requireDurable: true, + }) + ).rejects.toThrow('Failed to persist large execution value: storage down') + }) + + it('materializes object-storage refs through the server helper', async () => { + mockDownloadFile.mockResolvedValueOnce(Buffer.from(JSON.stringify({ ok: true }), 'utf8')) + + await expect( + materializeLargeValueRef( + { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 11, + key: 'execution/workflow-1/workflow-2/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + { + workspaceId: 'workflow-1', + workflowId: 'workflow-2', + executionId: 'execution-1', + } + ) + ).resolves.toEqual({ ok: true }) + }) + + it('bounds durable large-value writes', async () => { + const size = MAX_DURABLE_LARGE_VALUE_BYTES + 1 + + await expect( + storeLargeValue('x', JSON.stringify('x'), size, { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + requireDurable: true, + }) + ).rejects.toMatchObject({ code: EXECUTION_RESOURCE_LIMIT_CODE }) + }) + + it('bounds explicit server-side materialization', async () => { + await expect( + readLargeValueRefFromStorage( + { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 2048, + key: 'execution/workflow-1/workflow-2/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + { + workspaceId: 'workflow-1', + workflowId: 'workflow-2', + executionId: 'execution-1', + maxBytes: 1024, + } + ) + ).rejects.toMatchObject({ code: EXECUTION_RESOURCE_LIMIT_CODE }) + }) + + it('does not materialize durable refs without caller execution context', async () => { + await expect( + materializeLargeValueRef({ + __simLargeValueRef: true, + version: 1, + id: 'lv_NOCTXVALUE12', + kind: 'object', + size: 11, + key: 'execution/workflow-1/workflow-2/execution-1/large-value-lv_NOCTXVALUE12.json', + executionId: 'execution-1', + }) + ).resolves.toBeUndefined() + + expect(mockDownloadFile).not.toHaveBeenCalled() + }) + + it('checks caller execution context before returning cached large values', async () => { + const value = { payload: 'cached' } + const json = JSON.stringify(value) + const ref = await storeLargeValue(value, json, Buffer.byteLength(json, 'utf8'), { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + userId: 'user-1', + requireDurable: true, + }) + + await expect( + materializeLargeValueRef(ref, { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'other-execution', + userId: 'user-1', + }) + ).rejects.toThrow('Large execution value is not available in this execution.') + }) + + it('rejects durable refs whose key does not match caller execution context', async () => { + await expect( + readLargeValueRefFromStorage( + { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 11, + key: 'execution/workflow-1/workflow-2/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + { workspaceId: 'workflow-1', workflowId: 'workflow-2', executionId: 'other-execution' } + ) + ).rejects.toThrow('Large execution value is not available in this execution.') + + expect(mockDownloadFile).not.toHaveBeenCalled() + }) + + it('allows prior-execution durable refs only when workflow-scoped reads are explicitly enabled', async () => { + mockDownloadFile.mockResolvedValueOnce(Buffer.from(JSON.stringify({ ok: true }), 'utf8')) + + await expect( + readLargeValueRefFromStorage( + { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 11, + key: 'execution/workspace-1/workflow-1/source-execution/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'source-execution', + }, + { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'resume-execution', + allowLargeValueWorkflowScope: true, + } + ) + ).resolves.toEqual({ ok: true }) + }) + + it('does not materialize forged keyless refs from another cached execution', () => { + cacheLargeValue('lv_FORGEDCACHE1', { secret: true }, 16, { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'source-execution', + }) + + const forged = { + __simLargeValueRef: true, + version: 1, + id: 'lv_FORGEDCACHE1', + kind: 'object', + size: 16, + executionId: 'other-execution', + } as const + + expect( + materializeLargeValueRefSync(forged, { + workspaceId: 'workspace-2', + workflowId: 'workflow-2', + executionId: 'other-execution', + }) + ).toBeUndefined() + }) + + it('does not evict unrecoverable in-memory refs for recoverable cache entries', () => { + const scope = { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + } + const unrecoverableId = 'lv_UNRECOVER001' + const unrecoverableRef = { + __simLargeValueRef: true, + version: 1, + id: unrecoverableId, + kind: 'object', + size: 200 * 1024 * 1024, + executionId: scope.executionId, + } as const + + expect(cacheLargeValue(unrecoverableId, { retained: true }, unrecoverableRef.size, scope)).toBe( + true + ) + expect( + cacheLargeValue('lv_RECOVER00001', { recoverable: true }, 70 * 1024 * 1024, scope, { + recoverable: true, + }) + ).toBe(false) + expect(materializeLargeValueRefSync(unrecoverableRef, scope)).toEqual({ retained: true }) + }) + + it('rejects durable refs when caller omits workspace and workflow context', async () => { + await expect( + readLargeValueRefFromStorage( + { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 11, + key: 'execution/workflow-1/workflow-2/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + { executionId: 'execution-1' } + ) + ).rejects.toThrow('Large execution value requires workspace and workflow context.') + + expect(mockDownloadFile).not.toHaveBeenCalled() + }) + + it('rejects execution files with forged public contexts before storage download', async () => { + await expect( + readUserFileContent( + { + id: 'file_1', + name: 'secret.txt', + url: '/api/files/serve/execution/workspace-1/workflow-1/execution-1/secret.txt', + key: 'execution/workspace-1/workflow-1/execution-1/secret.txt', + context: 'profile-pictures', + size: 32, + type: 'text/plain', + }, + { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + userId: 'user-1', + encoding: 'text', + } + ) + ).rejects.toThrow('File context does not match its storage key.') + + expect(mockVerifyFileAccess).not.toHaveBeenCalled() + expect(mockDownloadFile).not.toHaveBeenCalled() + }) + + it('rejects URL-only file objects instead of reading internal URLs directly', async () => { + await expect( + readUserFileContent( + { + id: 'file_1', + name: 'secret.txt', + url: '/api/files/serve/execution/workspace-1/workflow-1/execution-1/secret.txt?context=execution', + key: '', + size: 32, + type: 'text/plain', + }, + { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + userId: 'user-1', + encoding: 'text', + } + ) + ).rejects.toThrow('File content requires a storage key.') + + expect(mockVerifyFileAccess).not.toHaveBeenCalled() + expect(mockDownloadFile).not.toHaveBeenCalled() + }) + + it('throws instead of truncating non-chunked file reads over the inline cap', async () => { + const workspaceId = '11111111-1111-4111-8111-111111111111' + const workflowId = '22222222-2222-4222-8222-222222222222' + const executionId = '33333333-3333-4333-8333-333333333333' + mockDownloadFile.mockResolvedValueOnce(Buffer.from('hello world', 'utf8')) + + await expect( + readUserFileContent( + { + id: 'file_1', + name: 'hello.txt', + url: `/api/files/serve/execution/${workspaceId}/${workflowId}/${executionId}/hello.txt`, + key: `execution/${workspaceId}/${workflowId}/${executionId}/hello.txt`, + context: 'execution', + size: 11, + type: 'text/plain', + }, + { + workspaceId, + workflowId, + executionId, + userId: 'user-1', + encoding: 'text', + maxBytes: 5, + } + ) + ).rejects.toMatchObject({ code: EXECUTION_RESOURCE_LIMIT_CODE }) + }) + + it('allows explicit chunked file reads to slice within the inline cap', async () => { + const workspaceId = '11111111-1111-4111-8111-111111111111' + const workflowId = '22222222-2222-4222-8222-222222222222' + const executionId = '33333333-3333-4333-8333-333333333333' + mockDownloadFile.mockResolvedValueOnce(Buffer.from('hello world', 'utf8')) + + await expect( + readUserFileContent( + { + id: 'file_1', + name: 'hello.txt', + url: `/api/files/serve/execution/${workspaceId}/${workflowId}/${executionId}/hello.txt`, + key: `execution/${workspaceId}/${workflowId}/${executionId}/hello.txt`, + context: 'execution', + size: 11, + type: 'text/plain', + }, + { + workspaceId, + workflowId, + executionId, + userId: 'user-1', + encoding: 'text', + maxBytes: 5, + chunked: true, + } + ) + ).resolves.toBe('hello') + }) +}) diff --git a/apps/sim/lib/execution/payloads/store.ts b/apps/sim/lib/execution/payloads/store.ts new file mode 100644 index 00000000000..2256813b941 --- /dev/null +++ b/apps/sim/lib/execution/payloads/store.ts @@ -0,0 +1,177 @@ +import { createLogger } from '@sim/logger' +import { toError } from '@sim/utils/errors' +import { generateShortId } from '@sim/utils/id' +import { cacheLargeValue, materializeLargeValueRefSync } from '@/lib/execution/payloads/cache' +import { + LARGE_VALUE_REF_VERSION, + type LargeValueKind, + type LargeValueRef, +} from '@/lib/execution/payloads/large-value-ref' +import { + assertDurableLargeValueSize, + assertLargeValueRefAccess, + isValidLargeValueKey, + readLargeValueRefFromStorage, +} from '@/lib/execution/payloads/materialization.server' +import { generateExecutionFileKey } from '@/lib/uploads/contexts/execution/utils' + +const logger = createLogger('LargeExecutionPayloadStore') + +export interface LargeValueStoreContext { + workspaceId?: string + workflowId?: string + executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean + userId?: string + requireDurable?: boolean +} + +function getKind(value: unknown): LargeValueKind { + if (typeof value === 'string') return 'string' + if (Array.isArray(value)) return 'array' + if (value && typeof value === 'object') return 'object' + return 'json' +} + +function getPreview(value: unknown): unknown { + if (typeof value === 'string') { + return value.length > 256 ? `${value.slice(0, 256)}...` : value + } + if (Array.isArray(value)) { + return { length: value.length } + } + if (value && typeof value === 'object') { + return { keys: Object.keys(value).slice(0, 20) } + } + return value +} + +async function persistValue( + id: string, + json: string, + context: LargeValueStoreContext +): Promise { + const { workspaceId, workflowId, executionId, userId } = context + if (!workspaceId || !workflowId || !executionId) { + if (context.requireDurable) { + throw new Error( + 'Cannot persist large execution value without workspace, workflow, and execution IDs' + ) + } + return undefined + } + + const key = generateExecutionFileKey( + { workspaceId, workflowId, executionId }, + `large-value-${id}.json` + ) + + try { + const { StorageService } = await import('@/lib/uploads') + const fileInfo = await StorageService.uploadFile({ + file: Buffer.from(json, 'utf8'), + fileName: key, + contentType: 'application/json', + context: 'execution', + preserveKey: true, + customKey: key, + metadata: { + originalName: `large-value-${id}.json`, + uploadedAt: new Date().toISOString(), + purpose: 'execution-large-value', + workspaceId, + ...(userId ? { userId } : {}), + }, + }) + return fileInfo.key + } catch (error) { + if (context.requireDurable) { + throw new Error(`Failed to persist large execution value: ${toError(error).message}`) + } + logger.warn('Failed to persist large execution value, keeping in memory only', { + id, + error: toError(error).message, + }) + return undefined + } +} + +export async function storeLargeValue( + value: unknown, + json: string, + size: number, + context: LargeValueStoreContext +): Promise { + assertDurableLargeValueSize(size) + const id = `lv_${generateShortId(12)}` + const key = await persistValue(id, json, context) + const cached = cacheLargeValue(id, value, size, context, { recoverable: Boolean(key) }) + if (!key && !cached) { + throw new Error('Cannot retain large execution value without durable storage') + } + + return { + __simLargeValueRef: true, + version: LARGE_VALUE_REF_VERSION, + id, + kind: getKind(value), + size, + key, + executionId: context.executionId, + preview: getPreview(value), + } +} + +export async function materializeLargeValueRef( + ref: LargeValueRef, + context?: LargeValueStoreContext +): Promise { + if (!context?.executionId) { + return undefined + } + + assertLargeValueRefAccess(ref, context) + + const cached = materializeLargeValueRefSync(ref, context) + if (cached !== undefined) { + return cached + } + + if (!ref.key || !isValidLargeValueKey(ref)) { + return undefined + } + + try { + const value = await readLargeValueRefFromStorage(ref, { + workspaceId: context.workspaceId, + workflowId: context.workflowId, + executionId: context.executionId, + largeValueExecutionIds: context.largeValueExecutionIds, + allowLargeValueWorkflowScope: context.allowLargeValueWorkflowScope, + userId: context.userId, + maxBytes: ref.size, + }) + if (value === undefined) { + return undefined + } + cacheLargeValue( + ref.id, + value, + ref.size, + { + ...context, + executionId: ref.executionId ?? context.executionId, + }, + { recoverable: true } + ) + return value + } catch (error) { + logger.warn('Failed to materialize persisted large execution value', { + id: ref.id, + key: ref.key, + error, + }) + return undefined + } +} diff --git a/apps/sim/lib/execution/redis-budget.server.ts b/apps/sim/lib/execution/redis-budget.server.ts new file mode 100644 index 00000000000..1e78199029e --- /dev/null +++ b/apps/sim/lib/execution/redis-budget.server.ts @@ -0,0 +1,136 @@ +import { createLogger, type Logger } from '@sim/logger' +import { toError } from '@sim/utils/errors' +import type { getRedisClient } from '@/lib/core/config/redis' +import { ExecutionResourceLimitError } from '@/lib/execution/resource-errors' + +type RedisClient = NonNullable> + +const logger = createLogger('ExecutionRedisBudget') +const REDIS_BUDGET_PREFIX = 'execution:redis-budget:' +const MAX_SINGLE_REDIS_WRITE_BYTES = 8 * 1024 * 1024 +const MAX_EXECUTION_REDIS_BYTES = 64 * 1024 * 1024 +const MAX_USER_REDIS_BYTES = 256 * 1024 * 1024 +const REDIS_BUDGET_TTL_SECONDS = 60 * 60 + +const RESERVE_REDIS_BYTES_SCRIPT = ` +local bytes = tonumber(ARGV[1]) +local execution_limit = tonumber(ARGV[2]) +local user_limit = tonumber(ARGV[3]) +local ttl_seconds = tonumber(ARGV[4]) +local execution_current = tonumber(redis.call('GET', KEYS[1]) or '0') +if execution_limit > 0 and execution_current + bytes > execution_limit then + return {0, 'execution_redis_bytes', execution_current} +end +local user_current = 0 +if #KEYS >= 2 then + user_current = tonumber(redis.call('GET', KEYS[2]) or '0') + if user_limit > 0 and user_current + bytes > user_limit then + return {0, 'user_redis_bytes', user_current} + end +end +redis.call('INCRBY', KEYS[1], bytes) +redis.call('EXPIRE', KEYS[1], ttl_seconds) +if #KEYS >= 2 then + redis.call('INCRBY', KEYS[2], bytes) + redis.call('EXPIRE', KEYS[2], ttl_seconds) +end +return {1, 'ok', execution_current + bytes, user_current + bytes} +` + +const RELEASE_REDIS_BYTES_SCRIPT = ` +local bytes = tonumber(ARGV[1]) +for i = 1, #KEYS do + local next_value = redis.call('DECRBY', KEYS[i], bytes) + if next_value <= 0 then + redis.call('DEL', KEYS[i]) + end +end +return 1 +` + +export type ExecutionRedisBudgetCategory = 'event_buffer' | 'base64_cache' + +export interface ExecutionRedisBudgetReservation { + executionId: string + userId?: string + category: ExecutionRedisBudgetCategory + bytes: number + operation: string + logger?: Logger +} + +export function getExecutionRedisBudgetLimits() { + return { + maxSingleWriteBytes: MAX_SINGLE_REDIS_WRITE_BYTES, + maxExecutionBytes: MAX_EXECUTION_REDIS_BYTES, + maxUserBytes: MAX_USER_REDIS_BYTES, + ttlSeconds: REDIS_BUDGET_TTL_SECONDS, + } +} + +function getBudgetKeys(reservation: ExecutionRedisBudgetReservation): string[] { + const keys = [`${REDIS_BUDGET_PREFIX}execution:${reservation.executionId}`] + if (reservation.userId) { + keys.push(`${REDIS_BUDGET_PREFIX}user:${reservation.userId}`) + } + return keys +} + +export async function reserveExecutionRedisBytes( + redis: RedisClient, + reservation: ExecutionRedisBudgetReservation +): Promise { + if (reservation.bytes <= 0) return + + const limits = getExecutionRedisBudgetLimits() + if (reservation.bytes > limits.maxSingleWriteBytes) { + throw new ExecutionResourceLimitError({ + resource: 'redis_key_bytes', + attemptedBytes: reservation.bytes, + limitBytes: limits.maxSingleWriteBytes, + }) + } + + const keys = getBudgetKeys(reservation) + const result = (await redis.eval( + RESERVE_REDIS_BYTES_SCRIPT, + keys.length, + ...keys, + reservation.bytes, + limits.maxExecutionBytes, + limits.maxUserBytes, + limits.ttlSeconds + )) as [number, string, number | string | null] + + const [allowed, resource, current] = result + if (allowed === 1) return + + throw new ExecutionResourceLimitError({ + resource: resource === 'user_redis_bytes' ? 'user_redis_bytes' : 'execution_redis_bytes', + attemptedBytes: reservation.bytes, + currentBytes: Number(current ?? 0), + limitBytes: resource === 'user_redis_bytes' ? limits.maxUserBytes : limits.maxExecutionBytes, + }) +} + +export async function releaseExecutionRedisBytes( + redis: RedisClient, + reservation: ExecutionRedisBudgetReservation +): Promise { + if (reservation.bytes <= 0) return + + try { + const keys = getBudgetKeys(reservation) + await redis.eval(RELEASE_REDIS_BYTES_SCRIPT, keys.length, ...keys, reservation.bytes) + } catch (error) { + const log = reservation.logger ?? logger + log.warn('Failed to release execution Redis budget reservation', { + executionId: reservation.executionId, + userId: reservation.userId, + category: reservation.category, + operation: reservation.operation, + bytes: reservation.bytes, + error: toError(error).message, + }) + } +} diff --git a/apps/sim/lib/execution/resource-errors.ts b/apps/sim/lib/execution/resource-errors.ts new file mode 100644 index 00000000000..3cd2f61bad9 --- /dev/null +++ b/apps/sim/lib/execution/resource-errors.ts @@ -0,0 +1,45 @@ +export const EXECUTION_RESOURCE_LIMIT_CODE = 'execution_resource_limit_exceeded' as const + +export type ExecutionResourceLimitResource = + | 'redis_key_bytes' + | 'execution_redis_bytes' + | 'user_redis_bytes' + | 'execution_payload_bytes' + +export interface ExecutionResourceLimitDetails { + resource: ExecutionResourceLimitResource + attemptedBytes: number + limitBytes: number + currentBytes?: number + statusCode?: number +} + +export class ExecutionResourceLimitError extends Error { + readonly code = EXECUTION_RESOURCE_LIMIT_CODE + readonly statusCode: number + readonly resource: ExecutionResourceLimitResource + readonly attemptedBytes: number + readonly limitBytes: number + readonly currentBytes?: number + + constructor(details: ExecutionResourceLimitDetails) { + super('Execution memory limit exceeded. Reduce payload size and try again.') + this.name = 'ExecutionResourceLimitError' + this.resource = details.resource + this.attemptedBytes = details.attemptedBytes + this.limitBytes = details.limitBytes + this.currentBytes = details.currentBytes + this.statusCode = details.statusCode ?? (details.resource === 'user_redis_bytes' ? 429 : 413) + } +} + +export function isExecutionResourceLimitError( + error: unknown +): error is ExecutionResourceLimitError { + return ( + error instanceof ExecutionResourceLimitError || + (typeof error === 'object' && + error !== null && + (error as { code?: unknown }).code === EXECUTION_RESOURCE_LIMIT_CODE) + ) +} diff --git a/apps/sim/lib/uploads/contexts/execution/execution-file-manager.ts b/apps/sim/lib/uploads/contexts/execution/execution-file-manager.ts index 6c237668c73..4665b6fc228 100644 --- a/apps/sim/lib/uploads/contexts/execution/execution-file-manager.ts +++ b/apps/sim/lib/uploads/contexts/execution/execution-file-manager.ts @@ -114,7 +114,6 @@ export async function uploadExecutionFile( url: presignedUrl, key: fileInfo.key, context: 'execution', - base64: fileBuffer.toString('base64'), } logger.info(`Successfully uploaded execution file: ${fileName} (${fileBuffer.length} bytes)`, { diff --git a/apps/sim/lib/uploads/utils/user-file-base64.server.test.ts b/apps/sim/lib/uploads/utils/user-file-base64.server.test.ts new file mode 100644 index 00000000000..b65d0795a06 --- /dev/null +++ b/apps/sim/lib/uploads/utils/user-file-base64.server.test.ts @@ -0,0 +1,117 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' +import { hydrateUserFilesWithBase64 } from '@/lib/uploads/utils/user-file-base64.server' +import type { UserFile } from '@/executor/types' + +const { mockDownloadFile, mockVerifyFileAccess } = vi.hoisted(() => ({ + mockDownloadFile: vi.fn(), + mockVerifyFileAccess: vi.fn(), +})) + +vi.mock('@/lib/core/config/redis', () => ({ + getRedisClient: () => null, +})) + +vi.mock('@/lib/uploads', () => ({ + StorageService: { + downloadFile: mockDownloadFile, + }, +})) + +vi.mock('@/lib/uploads/contexts/execution/execution-file-manager', () => ({ + downloadExecutionFile: mockDownloadFile, +})) + +vi.mock('@/lib/uploads/utils/file-utils.server', () => ({ + downloadFileFromStorage: mockDownloadFile, +})) + +vi.mock('@/app/api/files/authorization', () => ({ + verifyFileAccess: mockVerifyFileAccess, +})) + +describe('hydrateUserFilesWithBase64', () => { + beforeEach(() => { + vi.clearAllMocks() + mockVerifyFileAccess.mockResolvedValue(true) + }) + + it('strips existing base64 when it exceeds maxBytes', async () => { + const file: UserFile = { + id: 'file-1', + name: 'large.txt', + key: 'execution/workspace/workflow/execution/large.txt', + url: 'https://example.com/large.txt', + size: 5, + type: 'text/plain', + context: 'execution', + base64: Buffer.from('hello').toString('base64'), + } + + const hydrated = await hydrateUserFilesWithBase64({ file }, { maxBytes: 1 }) + + expect(hydrated.file).not.toHaveProperty('base64') + }) + + it('keeps existing base64 when it is within maxBytes', async () => { + const base64 = Buffer.from('hello').toString('base64') + const file: UserFile = { + id: 'file-1', + name: 'small.txt', + key: 'execution/workspace/workflow/execution/small.txt', + url: 'https://example.com/small.txt', + size: 5, + type: 'text/plain', + context: 'execution', + base64, + } + + const hydrated = await hydrateUserFilesWithBase64({ file }, { maxBytes: 10 }) + + expect(hydrated.file.base64).toBe(base64) + }) + + it('does not hydrate URL-only internal file objects', async () => { + const file: UserFile = { + id: 'file-1', + name: 'private.txt', + key: '', + url: '/api/files/serve/execution/workspace/workflow/execution/private.txt?context=execution', + size: 5, + type: 'text/plain', + } + + const hydrated = await hydrateUserFilesWithBase64({ file }, { maxBytes: 10, userId: 'user-1' }) + + expect(hydrated.file).not.toHaveProperty('base64') + }) + + it('hydrates prior-execution files when workflow-scoped reads are enabled', async () => { + mockDownloadFile.mockResolvedValueOnce(Buffer.from('hello', 'utf8')) + const file: UserFile = { + id: 'file-1', + name: 'prior.txt', + key: 'execution/workspace/workflow/source-execution/prior.txt', + url: '/api/files/serve/execution/workspace/workflow/source-execution/prior.txt?context=execution', + size: 5, + type: 'text/plain', + context: 'execution', + } + + const hydrated = await hydrateUserFilesWithBase64( + { file }, + { + workspaceId: 'workspace', + workflowId: 'workflow', + executionId: 'resume-execution', + allowLargeValueWorkflowScope: true, + userId: 'user-1', + maxBytes: 10, + } + ) + + expect(hydrated.file.base64).toBe(Buffer.from('hello').toString('base64')) + }) +}) diff --git a/apps/sim/lib/uploads/utils/user-file-base64.server.ts b/apps/sim/lib/uploads/utils/user-file-base64.server.ts index 3aa2f219eb1..299490b18e9 100644 --- a/apps/sim/lib/uploads/utils/user-file-base64.server.ts +++ b/apps/sim/lib/uploads/utils/user-file-base64.server.ts @@ -1,14 +1,24 @@ import type { Logger } from '@sim/logger' import { createLogger } from '@sim/logger' import { getRedisClient } from '@/lib/core/config/redis' -import { getMaxExecutionTimeout } from '@/lib/core/execution-limits' import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' -import { bufferToBase64 } from '@/lib/uploads/utils/file-utils' -import { downloadFileFromStorage, downloadFileFromUrl } from '@/lib/uploads/utils/file-utils.server' +import { LARGE_VALUE_THRESHOLD_BYTES } from '@/lib/execution/payloads/large-value-ref' +import { + assertUserFileContentAccess, + readUserFileContent, +} from '@/lib/execution/payloads/materialization.server' +import { + type ExecutionRedisBudgetReservation, + releaseExecutionRedisBytes, + reserveExecutionRedisBytes, +} from '@/lib/execution/redis-budget.server' +import { isExecutionResourceLimitError } from '@/lib/execution/resource-errors' import type { UserFile } from '@/executor/types' -const DEFAULT_MAX_BASE64_BYTES = 10 * 1024 * 1024 -const DEFAULT_TIMEOUT_MS = getMaxExecutionTimeout() +const INLINE_BASE64_JSON_OVERHEAD_BYTES = 512 * 1024 +const DEFAULT_MAX_BASE64_BYTES = Math.floor( + (LARGE_VALUE_THRESHOLD_BYTES - INLINE_BASE64_JSON_OVERHEAD_BYTES) * 0.75 +) const DEFAULT_CACHE_TTL_SECONDS = 300 const REDIS_KEY_PREFIX = 'user-file:base64:' @@ -25,7 +35,12 @@ interface HydrationState { export interface Base64HydrationOptions { requestId?: string + workspaceId?: string + workflowId?: string executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean + userId?: string logger?: Logger maxBytes?: number allowUnknownSize?: boolean @@ -78,10 +93,31 @@ function createBase64Cache(options: Base64HydrationOptions, logger: Logger): Bas } }, async set(file: UserFile, value: string, ttlSeconds: number) { + const budgetReservation: ExecutionRedisBudgetReservation | null = executionId + ? { + executionId, + userId: options.userId, + category: 'base64_cache', + operation: 'set_base64_cache', + bytes: Buffer.byteLength(value, 'utf8'), + logger, + } + : null + let budgetReserved = false try { const key = getFullCacheKey(executionId, file) + if (budgetReservation) { + await reserveExecutionRedisBytes(redis, budgetReservation) + budgetReserved = true + } await redis.set(key, value, 'EX', ttlSeconds) } catch (error) { + if (budgetReserved && budgetReservation) { + await releaseExecutionRedisBytes(redis, budgetReservation) + } + if (isExecutionResourceLimitError(error)) { + throw error + } logger.warn(`[${options.requestId}] Redis set failed, skipping cache`, error) } }, @@ -118,18 +154,31 @@ function getFullCacheKey(executionId: string | undefined, file: UserFile): strin return `${REDIS_KEY_PREFIX}${fileKey}` } +function stripBase64(file: UserFile): UserFile { + const { base64: _base64, ...rest } = file + return rest +} + async function resolveBase64( file: UserFile, options: Base64HydrationOptions, logger: Logger ): Promise { + const requestedMaxBytes = options.maxBytes ?? DEFAULT_MAX_BASE64_BYTES + const maxBytes = Math.min(requestedMaxBytes, DEFAULT_MAX_BASE64_BYTES) + if (file.base64) { + const base64Bytes = Buffer.byteLength(file.base64, 'base64') + if (base64Bytes > maxBytes) { + logger.warn( + `[${options.requestId}] Skipping existing base64 for ${file.name} (decoded ${base64Bytes} exceeds ${maxBytes})` + ) + return null + } return file.base64 } - const maxBytes = options.maxBytes ?? DEFAULT_MAX_BASE64_BYTES const allowUnknownSize = options.allowUnknownSize ?? false - const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS const hasStableStorageKey = Boolean(file.key) if (Number.isFinite(file.size) && file.size > maxBytes) { @@ -148,40 +197,24 @@ async function resolveBase64( return null } - let buffer: Buffer | null = null const requestId = options.requestId ?? 'unknown' - - if (file.key) { - try { - buffer = await downloadFileFromStorage(file, requestId, logger) - } catch (error) { - logger.warn( - `[${requestId}] Failed to download ${file.name} from storage, trying URL fallback`, - error - ) - } - } - - if (!buffer && file.url) { - try { - buffer = await downloadFileFromUrl(file.url, timeoutMs) - } catch (error) { - logger.warn(`[${requestId}] Failed to download ${file.name} from URL`, error) - } - } - - if (!buffer) { - return null - } - - if (buffer.length > maxBytes) { - logger.warn( - `[${options.requestId}] Skipping base64 for ${file.name} (downloaded ${buffer.length} exceeds ${maxBytes})` - ) + try { + return await readUserFileContent(file, { + requestId, + workspaceId: options.workspaceId, + workflowId: options.workflowId, + executionId: options.executionId, + largeValueExecutionIds: options.largeValueExecutionIds, + allowLargeValueWorkflowScope: options.allowLargeValueWorkflowScope, + userId: options.userId, + encoding: 'base64', + maxBytes, + maxSourceBytes: maxBytes, + }) + } catch (error) { + logger.warn(`[${requestId}] Failed to hydrate base64 for ${file.name}`, error) return null } - - return bufferToBase64(buffer) } async function hydrateUserFile( @@ -190,14 +223,39 @@ async function hydrateUserFile( state: HydrationState, logger: Logger ): Promise { + if (!file.base64) { + try { + await assertUserFileContentAccess(file, { + requestId: options.requestId, + workspaceId: options.workspaceId, + workflowId: options.workflowId, + executionId: options.executionId, + largeValueExecutionIds: options.largeValueExecutionIds, + allowLargeValueWorkflowScope: options.allowLargeValueWorkflowScope, + userId: options.userId, + logger, + }) + } catch (error) { + logger.warn(`[${options.requestId ?? 'unknown'}] Skipping unauthorized file base64`, error) + return stripBase64(file) + } + } + const cached = await state.cache.get(file) if (cached) { + const maxBytes = Math.min( + options.maxBytes ?? DEFAULT_MAX_BASE64_BYTES, + DEFAULT_MAX_BASE64_BYTES + ) + if (Buffer.byteLength(cached, 'base64') > maxBytes) { + return stripBase64(file) + } return { ...file, base64: cached } } const base64 = await resolveBase64(file, options, logger) if (!base64) { - return file + return stripBase64(file) } await state.cache.set(file, base64, state.cacheTtlSeconds) @@ -253,6 +311,18 @@ export async function hydrateUserFilesWithBase64( return (await hydrateValue(value, options, state, logger)) as T } +/** + * Hydrates a single UserFile object when a resolver explicitly asks for base64. + */ +export async function hydrateUserFileWithBase64( + file: UserFile, + options: Base64HydrationOptions +): Promise { + const logger = getHydrationLogger(options) + const state = createHydrationState(options, logger) + return hydrateUserFile(file, options, state, logger) +} + function isPlainObject(value: unknown): value is Record { if (!value || typeof value !== 'object') { return false diff --git a/apps/sim/lib/workflows/executor/execution-core.ts b/apps/sim/lib/workflows/executor/execution-core.ts index 22b58c5e707..c099ce3151b 100644 --- a/apps/sim/lib/workflows/executor/execution-core.ts +++ b/apps/sim/lib/workflows/executor/execution-core.ts @@ -10,6 +10,7 @@ import { z } from 'zod' import { isPlainRecord } from '@/lib/core/utils/records' import { getPersonalAndWorkspaceEnv } from '@/lib/environment/utils' import { clearExecutionCancellation } from '@/lib/execution/cancellation' +import { warmLargeValueRefs } from '@/lib/execution/payloads/hydration' import type { LoggingSession } from '@/lib/logs/execution/logging-session' import { buildTraceSpans } from '@/lib/logs/execution/trace-spans/trace-spans' import { @@ -552,10 +553,20 @@ export async function executeWorkflowCore( return persistencePromise } + const largeValueExecutionIds = Array.from( + new Set([executionId, ...(metadata.largeValueExecutionIds ?? [])].filter(Boolean)) + ) + const allowLargeValueWorkflowScope = + metadata.allowLargeValueWorkflowScope === true || + metadata.resumeFromSnapshot === true || + Boolean(runFromBlock?.sourceSnapshot) + const contextExtensions: ContextExtensions = { stream: !!onStream, selectedOutputs, executionId, + largeValueExecutionIds, + allowLargeValueWorkflowScope, workspaceId: providedWorkspaceId, userId, isDeployedContext: !metadata.isClientSession, @@ -582,6 +593,27 @@ export async function executeWorkflowCore( callChain: metadata.callChain, } + if (snapshot.state) { + await warmLargeValueRefs(snapshot.state, { + workspaceId: providedWorkspaceId, + workflowId, + executionId, + largeValueExecutionIds, + allowLargeValueWorkflowScope, + userId, + }) + } + if (runFromBlock?.sourceSnapshot) { + await warmLargeValueRefs(runFromBlock.sourceSnapshot, { + workspaceId: providedWorkspaceId, + workflowId, + executionId, + largeValueExecutionIds, + allowLargeValueWorkflowScope, + userId, + }) + } + for (const variable of Object.values(workflowVariables)) { if ( isPlainRecord(variable) && diff --git a/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts b/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts index e4e74a0f982..b41764a0ebd 100644 --- a/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts +++ b/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts @@ -13,6 +13,7 @@ import { resetExecutionStreamBuffer, type TerminalExecutionStreamStatus, } from '@/lib/execution/event-buffer' +import { compactBlockLogs, compactExecutionPayload } from '@/lib/execution/payloads/serializer' import { preprocessExecution } from '@/lib/execution/preprocessing' import { LoggingSession } from '@/lib/logs/execution/logging-session' import { executeWorkflowCore } from '@/lib/workflows/executor/execution-core' @@ -25,6 +26,7 @@ import type { SerializableExecutionState, } from '@/executor/execution/types' import type { + BlockLog, ExecutionResult, PauseKind, PausePoint, @@ -980,7 +982,12 @@ export class PauseResumeManager { throw new Error(RUN_BUFFER_UNAVAILABLE_ERROR) } - const eventWriter = createExecutionEventWriter(resumeExecutionId) + const eventWriter = createExecutionEventWriter(resumeExecutionId, { + workspaceId: metadata.workspaceId, + workflowId, + userId: metadata.userId, + preserveUserFileBase64: true, + }) const metaInitialized = await initializeExecutionStreamMeta(resumeExecutionId, { userId: metadata.userId, workflowId, @@ -1197,6 +1204,23 @@ export class PauseResumeManager { } } + const compactResultLogs = await compactBlockLogs(result.logs, { + workspaceId: baseSnapshot.metadata.workspaceId, + workflowId, + executionId: resumeExecutionId, + userId: metadata.userId, + requireDurable: true, + }) + const compactResultOutput = await compactExecutionPayload(result.output, { + workspaceId: baseSnapshot.metadata.workspaceId, + workflowId, + executionId: resumeExecutionId, + userId: metadata.userId, + preserveUserFileBase64: true, + preserveRoot: true, + requireDurable: true, + }) + if ( result.status === 'cancelled' && timeoutController?.isTimedOut() && @@ -1219,7 +1243,7 @@ export class PauseResumeManager { data: { error: timeoutErrorMessage, duration: result.metadata?.duration || 0, - finalBlockLogs: result.logs, + finalBlockLogs: compactResultLogs, }, }, 'error' @@ -1234,7 +1258,7 @@ export class PauseResumeManager { workflowId, data: { duration: result.metadata?.duration || 0, - finalBlockLogs: result.logs, + finalBlockLogs: compactResultLogs, }, }, 'cancelled' @@ -1248,11 +1272,11 @@ export class PauseResumeManager { executionId: resumeExecutionId, workflowId, data: { - output: result.output, + output: compactResultOutput, duration: result.metadata?.duration || 0, startTime: result.metadata?.startTime || new Date().toISOString(), endTime: result.metadata?.endTime || new Date().toISOString(), - finalBlockLogs: result.logs, + finalBlockLogs: compactResultLogs, }, }, 'complete' @@ -1267,11 +1291,11 @@ export class PauseResumeManager { workflowId, data: { success: result.success, - output: result.output, + output: compactResultOutput, duration: result.metadata?.duration || 0, startTime: result.metadata?.startTime || new Date().toISOString(), endTime: result.metadata?.endTime || new Date().toISOString(), - finalBlockLogs: result.logs, + finalBlockLogs: compactResultLogs, }, }, 'complete' @@ -1280,6 +1304,23 @@ export class PauseResumeManager { } catch (execError) { executionError = execError const execErrorResult = hasExecutionResult(execError) ? execError.executionResult : undefined + let compactErrorLogs: BlockLog[] | undefined + try { + compactErrorLogs = execErrorResult?.logs + ? await compactBlockLogs(execErrorResult.logs, { + workspaceId: baseSnapshot.metadata.workspaceId, + workflowId, + executionId: resumeExecutionId, + userId: metadata.userId, + requireDurable: true, + }) + : undefined + } catch (compactionError) { + logger.warn('Failed to compact resume error logs, omitting oversized error details', { + resumeExecutionId, + error: toError(compactionError).message, + }) + } finalMetaStatus = 'error' await writeBufferedEvent( { @@ -1290,7 +1331,7 @@ export class PauseResumeManager { data: { error: toError(execError).message, duration: 0, - finalBlockLogs: execErrorResult?.logs, + finalBlockLogs: compactErrorLogs, }, }, 'error' diff --git a/apps/sim/lib/workflows/persistence/utils.test.ts b/apps/sim/lib/workflows/persistence/utils.test.ts index 82997c4f518..e6b9dbb086a 100644 --- a/apps/sim/lib/workflows/persistence/utils.test.ts +++ b/apps/sim/lib/workflows/persistence/utils.test.ts @@ -179,6 +179,7 @@ const mockBlocksFromDb = [ name: 'Parallel Container', position: { x: 600, y: 50 }, height: 250, + count: 3, data: { width: 500, height: 300, parallelType: 'count', count: 3 }, }), mockWorkflowId @@ -225,7 +226,10 @@ const mockSubflowsFromDb = [ config: { id: 'parallel-1', nodes: ['block-3'], + count: 5, distribution: ['item1', 'item2'], + parallelType: 'count', + batchSize: 1, }, }, ] @@ -260,7 +264,8 @@ const mockWorkflowState = createWorkflowState({ name: 'Parallel Container', position: { x: 600, y: 50 }, height: 250, - data: { width: 500, height: 300, parallelType: 'count', count: 3 }, + count: 3, + data: { width: 500, height: 300, parallelType: 'count', count: 3, batchSize: 1 }, }), 'block-3': createApiBlock({ id: 'block-3', @@ -292,6 +297,8 @@ const mockWorkflowState = createWorkflowState({ id: 'parallel-1', nodes: ['block-3'], distribution: ['item1', 'item2'], + parallelType: 'count', + batchSize: 1, }, }, }) @@ -418,8 +425,16 @@ describe('Database Helpers', () => { count: 5, distribution: ['item1', 'item2'], parallelType: 'count', + batchSize: 1, enabled: true, }) + expect(result?.blocks['parallel-1'].data).toEqual( + expect.objectContaining({ + count: 5, + parallelType: 'count', + batchSize: 1, + }) + ) }) it('should return null when no blocks are found', async () => { @@ -709,6 +724,20 @@ describe('Database Helpers', () => { workflowId: mockWorkflowId, type: 'loop', }) + expect(capturedSubflowInserts).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + id: 'parallel-1', + workflowId: mockWorkflowId, + type: 'parallel', + config: expect.objectContaining({ + count: 3, + parallelType: 'count', + batchSize: 1, + }), + }), + ]) + ) }) it('should regenerate missing loop and parallel definitions from block data', async () => { @@ -748,7 +777,11 @@ describe('Database Helpers', () => { expect(capturedSubflowInserts).toEqual( expect.arrayContaining([ expect.objectContaining({ id: 'loop-1', type: 'loop' }), - expect.objectContaining({ id: 'parallel-1', type: 'parallel' }), + expect.objectContaining({ + id: 'parallel-1', + type: 'parallel', + config: expect.objectContaining({ batchSize: 1 }), + }), ]) ) }) diff --git a/apps/sim/lib/workflows/search-replace/replacements.test.ts b/apps/sim/lib/workflows/search-replace/replacements.test.ts index 8dd90198605..3ea5963acaa 100644 --- a/apps/sim/lib/workflows/search-replace/replacements.test.ts +++ b/apps/sim/lib/workflows/search-replace/replacements.test.ts @@ -1201,6 +1201,13 @@ describe('buildWorkflowSearchReplacePlan', () => { expect(countPlan.conflicts).toEqual([]) expect(countPlan.subflowUpdates).toEqual([ + expect.objectContaining({ + blockId: 'parallel-1', + blockType: 'parallel', + fieldId: WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize, + previousValue: '20', + nextValue: 3, + }), expect.objectContaining({ blockId: 'parallel-1', blockType: 'parallel', @@ -1569,8 +1576,8 @@ describe('buildWorkflowSearchReplacePlan', () => { expect(plan.subflowUpdates).toEqual([]) expect(plan.conflicts).toEqual([ { - matchId: matches[0].id, - reason: 'Subflow iteration count must be between 1 and 20', + matchId: 'subflow-text:parallel-1:subflowBatchSize:0:0', + reason: 'Parallel batch size must be between 1 and 20', }, ]) }) diff --git a/apps/sim/lib/workflows/search-replace/subflow-fields.ts b/apps/sim/lib/workflows/search-replace/subflow-fields.ts index c87b982efb2..6f46d9039e8 100644 --- a/apps/sim/lib/workflows/search-replace/subflow-fields.ts +++ b/apps/sim/lib/workflows/search-replace/subflow-fields.ts @@ -5,6 +5,7 @@ export const WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS = { iterations: 'subflowIterations', items: 'subflowItems', condition: 'subflowCondition', + batchSize: 'subflowBatchSize', } as const export type WorkflowSearchSubflowFieldId = @@ -18,6 +19,7 @@ interface WorkflowSearchSubflowBlock { loopType?: string parallelType?: string count?: unknown + batchSize?: unknown collection?: unknown whileCondition?: unknown doWhileCondition?: unknown @@ -113,6 +115,14 @@ export function getWorkflowSearchSubflowFields( editable: true, valueKind: parallelType === 'count' ? 'number' : 'text', }, + { + id: WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize, + title: 'Parallel Batch Size', + type: 'short-input', + value: String(block.data?.batchSize ?? 20), + editable: true, + valueKind: 'number', + }, ] } @@ -146,7 +156,10 @@ export function parseWorkflowSearchSubflowReplacement({ }): | { success: true; value: WorkflowSearchSubflowEditableValue } | { success: false; reason: string } { - if (fieldId !== WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.iterations) { + if ( + fieldId !== WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.iterations && + fieldId !== WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize + ) { return { success: true, value: replacement } } @@ -156,11 +169,17 @@ export function parseWorkflowSearchSubflowReplacement({ } const count = Number.parseInt(trimmed, 10) - const max = blockType === 'parallel' ? 20 : 1000 - if (count < 1 || count > max) { + const maxBatchSize = 20 + if ( + count < 1 || + (fieldId === WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize && count > maxBatchSize) + ) { return { success: false, - reason: `Subflow iteration count must be between 1 and ${max}`, + reason: + fieldId === WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize + ? `Parallel batch size must be between 1 and ${maxBatchSize}` + : 'Subflow iteration count must be greater than 0', } } diff --git a/apps/sim/lib/workflows/streaming/streaming.ts b/apps/sim/lib/workflows/streaming/streaming.ts index d4f881e78c6..3336f17a9c2 100644 --- a/apps/sim/lib/workflows/streaming/streaming.ts +++ b/apps/sim/lib/workflows/streaming/streaming.ts @@ -6,6 +6,7 @@ import { traverseObjectPath, } from '@/lib/core/utils/response-format' import { encodeSSE } from '@/lib/core/utils/sse' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' import { buildTraceSpans } from '@/lib/logs/execution/trace-spans/trace-spans' import { processStreamingBlockLogs } from '@/lib/tokenization' import { @@ -45,6 +46,11 @@ export interface StreamingResponseOptions { requestId: string streamConfig: StreamingConfig executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean + workspaceId?: string + workflowId?: string + userId?: string executeFn: StreamingExecutorFn } @@ -78,8 +84,18 @@ async function buildMinimalResult( completedBlockIds: Set, requestId: string, includeFileBase64: boolean, - base64MaxBytes: number | undefined + base64MaxBytes: number | undefined, + executionId?: string, + context: Pick = {} ): Promise<{ success: boolean; error?: string; output: Record }> { + const durableContext = { + workspaceId: context.workspaceId, + workflowId: context.workflowId, + executionId, + userId: context.userId, + requireDurable: Boolean(context.workspaceId && context.workflowId && executionId), + } + const minimalResult = { success: result.success, error: result.error, @@ -88,12 +104,20 @@ async function buildMinimalResult( if (result.status === 'paused') { minimalResult.output = result.output || {} - return minimalResult + return compactExecutionPayload(minimalResult, { + ...durableContext, + preserveUserFileBase64: includeFileBase64, + preserveRoot: true, + }) } if (!selectedOutputs?.length) { minimalResult.output = result.output || {} - return minimalResult + return compactExecutionPayload(minimalResult, { + ...durableContext, + preserveUserFileBase64: includeFileBase64, + preserveRoot: true, + }) } if (!result.output || !result.logs) { @@ -138,7 +162,11 @@ async function buildMinimalResult( ;(minimalResult.output[blockId] as Record)[path] = value } - return minimalResult + return compactExecutionPayload(minimalResult, { + ...durableContext, + preserveUserFileBase64: includeFileBase64, + preserveRoot: true, + }) } function updateLogsWithStreamedContent( @@ -191,6 +219,13 @@ export async function createStreamingResponse( options: StreamingResponseOptions ): Promise { const { requestId, streamConfig, executionId, executeFn } = options + const durableContext = { + workspaceId: options.workspaceId, + workflowId: options.workflowId, + executionId, + userId: options.userId, + requireDurable: Boolean(options.workspaceId && options.workflowId && executionId), + } const timeoutController = createTimeoutAbortController(streamConfig.timeoutMs) return new ReadableStream({ @@ -281,14 +316,23 @@ export async function createStreamingResponse( const hydratedOutput = includeFileBase64 ? await hydrateUserFilesWithBase64(outputValue, { requestId, + workspaceId: options.workspaceId, + workflowId: options.workflowId, executionId, + largeValueExecutionIds: options.largeValueExecutionIds, + allowLargeValueWorkflowScope: options.allowLargeValueWorkflowScope, + userId: options.userId, maxBytes: base64MaxBytes, }) : outputValue + const compactHydratedOutput = await compactExecutionPayload(hydratedOutput, { + ...durableContext, + preserveUserFileBase64: includeFileBase64, + }) const formattedOutput = - typeof hydratedOutput === 'string' - ? hydratedOutput - : JSON.stringify(hydratedOutput, null, 2) + typeof compactHydratedOutput === 'string' + ? compactHydratedOutput + : JSON.stringify(compactHydratedOutput, null, 2) sendChunk(blockId, formattedOutput) } } @@ -336,7 +380,13 @@ export async function createStreamingResponse( state.completedBlockIds, requestId, streamConfig.includeFileBase64 ?? true, - streamConfig.base64MaxBytes + streamConfig.base64MaxBytes, + executionId, + { + workspaceId: options.workspaceId, + workflowId: options.workflowId, + userId: options.userId, + } ) controller.enqueue( diff --git a/apps/sim/lib/workflows/utils.ts b/apps/sim/lib/workflows/utils.ts index 318d6249d6a..30afa6d81d3 100644 --- a/apps/sim/lib/workflows/utils.ts +++ b/apps/sim/lib/workflows/utils.ts @@ -6,6 +6,7 @@ import { authorizeWorkflowByWorkspacePermission } from '@sim/workflow-authz' import { and, asc, eq, inArray, isNull, max, min, sql } from 'drizzle-orm' import { NextResponse } from 'next/server' import { getSession } from '@/lib/auth' +import { materializeLargeValueRefsSync } from '@/lib/execution/payloads/cache' import { getNextWorkflowColor } from '@/lib/workflows/colors' import { buildDefaultWorkflowArtifacts } from '@/lib/workflows/defaults' import { saveWorkflowToNormalizedTables } from '@/lib/workflows/persistence/utils' @@ -319,13 +320,14 @@ export const createHttpResponseFromBlock = ( executionResult: Pick ): NextResponse => { const { data = {}, status = 200, headers = {} } = executionResult.output + const responseData = materializeLargeValueRefsSync(data) const responseHeaders = new Headers({ 'Content-Type': 'application/json', ...headers, }) - return NextResponse.json(data, { + return NextResponse.json(responseData, { status: status, headers: responseHeaders, }) diff --git a/apps/sim/proxy.ts b/apps/sim/proxy.ts index 5a2e2796269..ed642956360 100644 --- a/apps/sim/proxy.ts +++ b/apps/sim/proxy.ts @@ -203,6 +203,6 @@ export const config = { '/signup', '/invite/:path*', // Match invitation routes // Catch-all for other pages, excluding static assets and public directories - '/((?!_next/static|_next/image|ingest|favicon.ico|logo/|static/|footer/|social/|enterprise/|favicon/|twitter/|robots.txt|sitemap.xml).*)', + '/((?!api/|api$|_next/static|_next/image|ingest|favicon.ico|logo/|static/|footer/|social/|enterprise/|favicon/|twitter/|robots.txt|sitemap.xml).*)', ], } diff --git a/apps/sim/serializer/types.ts b/apps/sim/serializer/types.ts index 8192014a4ae..8d7bc56e4ed 100644 --- a/apps/sim/serializer/types.ts +++ b/apps/sim/serializer/types.ts @@ -58,4 +58,5 @@ export interface SerializedParallel { distribution?: any[] | Record | string // Items to distribute or expression to evaluate count?: number // Number of parallel executions for count-based parallel parallelType?: 'count' | 'collection' // Explicit parallel type to avoid inference bugs + batchSize?: number // Maximum number of branches to run concurrently per batch } diff --git a/apps/sim/stores/workflows/workflow/store.test.ts b/apps/sim/stores/workflows/workflow/store.test.ts index dc24da784e6..720fee128b8 100644 --- a/apps/sim/stores/workflows/workflow/store.test.ts +++ b/apps/sim/stores/workflows/workflow/store.test.ts @@ -500,7 +500,7 @@ describe('workflow store', () => { expect(state.loops.loop1.forEachItems).toBe('["item1", "item2", "item3"]') }) - it('should clamp loop count between 1 and 1000', () => { + it('should allow loop counts above 1000 and clamp only to at least 1', () => { const { updateLoopCount } = useWorkflowStore.getState() addBlock( @@ -517,7 +517,7 @@ describe('workflow store', () => { updateLoopCount('loop1', 1500) let state = useWorkflowStore.getState() - expect(state.blocks.loop1?.data?.count).toBe(1000) + expect(state.blocks.loop1?.data?.count).toBe(1500) updateLoopCount('loop1', 0) state = useWorkflowStore.getState() @@ -576,7 +576,7 @@ describe('workflow store', () => { expect(parsedDistribution).toHaveLength(3) }) - it('should clamp parallel count between 1 and 20', () => { + it('should allow parallel counts above 1000 and clamp only to at least 1', () => { const { updateParallelCount } = useWorkflowStore.getState() addBlock( @@ -592,13 +592,46 @@ describe('workflow store', () => { updateParallelCount('parallel1', 100) let state = useWorkflowStore.getState() - expect(state.blocks.parallel1?.data?.count).toBe(20) + expect(state.blocks.parallel1?.data?.count).toBe(100) + + updateParallelCount('parallel1', 1001) + state = useWorkflowStore.getState() + expect(state.blocks.parallel1?.data?.count).toBe(1001) updateParallelCount('parallel1', 0) state = useWorkflowStore.getState() expect(state.blocks.parallel1?.data?.count).toBe(1) }) + it('should clamp parallel batch size between 1 and 20', () => { + const { updateParallelBatchSize } = useWorkflowStore.getState() + + addBlock( + 'parallel1', + 'parallel', + 'Test Parallel', + { x: 0, y: 0 }, + { + count: 5, + batchSize: 20, + collection: '', + } + ) + + updateParallelBatchSize('parallel1', 7) + let state = useWorkflowStore.getState() + expect(state.blocks.parallel1?.data?.batchSize).toBe(7) + expect(state.parallels.parallel1.batchSize).toBe(7) + + updateParallelBatchSize('parallel1', 50) + state = useWorkflowStore.getState() + expect(state.blocks.parallel1?.data?.batchSize).toBe(20) + + updateParallelBatchSize('parallel1', 0) + state = useWorkflowStore.getState() + expect(state.blocks.parallel1?.data?.batchSize).toBe(1) + }) + it('should regenerate parallels when updateParallelType is called', () => { const { updateParallelType } = useWorkflowStore.getState() diff --git a/apps/sim/stores/workflows/workflow/store.ts b/apps/sim/stores/workflows/workflow/store.ts index 888bf069bef..e6fd406b80e 100644 --- a/apps/sim/stores/workflows/workflow/store.ts +++ b/apps/sim/stores/workflows/workflow/store.ts @@ -26,6 +26,7 @@ import type { WorkflowStore, } from '@/stores/workflows/workflow/types' import { + clampParallelBatchSize, findAllDescendantNodes, generateLoopBlocks, generateParallelBlocks, @@ -995,7 +996,7 @@ export const useWorkflowStore = create()( ...block, data: { ...block.data, - count: Math.max(1, Math.min(1000, count)), // Clamp between 1-1000 + count: Math.max(1, count), }, }, } @@ -1163,7 +1164,7 @@ export const useWorkflowStore = create()( ...block, data: { ...block.data, - count: Math.max(1, Math.min(20, count)), // Clamp between 1-20 + count: Math.max(1, count), }, }, } @@ -1180,6 +1181,32 @@ export const useWorkflowStore = create()( // Note: Socket.IO handles real-time sync automatically }, + updateParallelBatchSize: (parallelId: string, batchSize: number) => { + const block = get().blocks[parallelId] + if (!block || block.type !== 'parallel') return + + const newBlocks = { + ...get().blocks, + [parallelId]: { + ...block, + data: { + ...block.data, + batchSize: clampParallelBatchSize(batchSize), + }, + }, + } + + const newState = { + blocks: newBlocks, + edges: [...get().edges], + loops: { ...get().loops }, + parallels: generateParallelBlocks(newBlocks), + } + + set(newState) + get().updateLastSaved() + }, + updateParallelCollection: (parallelId: string, collection: string) => { const block = get().blocks[parallelId] if (!block || block.type !== 'parallel') return diff --git a/apps/sim/stores/workflows/workflow/types.ts b/apps/sim/stores/workflows/workflow/types.ts index c209cfd0eef..1f32f318764 100644 --- a/apps/sim/stores/workflows/workflow/types.ts +++ b/apps/sim/stores/workflows/workflow/types.ts @@ -84,6 +84,7 @@ export interface WorkflowActions { setLoopWhileCondition: (loopId: string, condition: string) => void setLoopDoWhileCondition: (loopId: string, condition: string) => void updateParallelCount: (parallelId: string, count: number) => void + updateParallelBatchSize: (parallelId: string, batchSize: number) => void updateParallelCollection: (parallelId: string, collection: string) => void updateParallelType: (parallelId: string, parallelType: 'count' | 'collection') => void generateLoopBlocks: () => Record diff --git a/apps/sim/stores/workflows/workflow/utils.ts b/apps/sim/stores/workflows/workflow/utils.ts index 26c2f642a85..a7077dc0903 100644 --- a/apps/sim/stores/workflows/workflow/utils.ts +++ b/apps/sim/stores/workflows/workflow/utils.ts @@ -6,6 +6,16 @@ import type { Edge } from 'reactflow' import type { BlockState, Loop, Parallel } from '@/stores/workflows/workflow/types' const DEFAULT_LOOP_ITERATIONS = 5 +const DEFAULT_PARALLEL_BATCH_SIZE = 20 +const MAX_PARALLEL_BATCH_SIZE = 20 + +export function clampParallelBatchSize(batchSize: unknown): number { + const parsed = typeof batchSize === 'number' ? batchSize : Number.parseInt(String(batchSize), 10) + if (Number.isNaN(parsed)) { + return DEFAULT_PARALLEL_BATCH_SIZE + } + return Math.max(1, Math.min(MAX_PARALLEL_BATCH_SIZE, parsed)) +} /** * Check if adding an edge would create a cycle in the graph. @@ -111,6 +121,7 @@ export function convertParallelBlockToParallel( validatedParallelType === 'collection' ? parallelBlock.data?.collection || '' : undefined const count = parallelBlock.data?.count || 5 + const batchSize = clampParallelBatchSize(parallelBlock.data?.batchSize) return { id: parallelBlockId, @@ -118,6 +129,7 @@ export function convertParallelBlockToParallel( distribution, count, parallelType: validatedParallelType, + batchSize, enabled: parallelBlock.enabled, } } diff --git a/apps/sim/tools/function/execute.test.ts b/apps/sim/tools/function/execute.test.ts index 73eb21de9e6..b174634e57f 100644 --- a/apps/sim/tools/function/execute.test.ts +++ b/apps/sim/tools/function/execute.test.ts @@ -66,6 +66,7 @@ describe('Function Execute Tool', () => { outputTable: undefined, timeout: 5000, workflowId: undefined, + executionId: undefined, workspaceId: undefined, userId: undefined, }) @@ -101,6 +102,7 @@ describe('Function Execute Tool', () => { outputSandboxPath: undefined, outputTable: undefined, workflowId: undefined, + executionId: undefined, workspaceId: undefined, userId: undefined, }) @@ -128,6 +130,7 @@ describe('Function Execute Tool', () => { outputSandboxPath: undefined, outputTable: undefined, workflowId: undefined, + executionId: undefined, workspaceId: undefined, userId: undefined, }) diff --git a/apps/sim/tools/function/execute.ts b/apps/sim/tools/function/execute.ts index 4d096ce7cf4..6821131b30a 100644 --- a/apps/sim/tools/function/execute.ts +++ b/apps/sim/tools/function/execute.ts @@ -137,6 +137,9 @@ export const functionExecuteTool: ToolConfig _context?: { workflowId?: string + executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean userId?: string workspaceId?: string } diff --git a/packages/python-sdk/README.md b/packages/python-sdk/README.md index e193e951c13..2690f635a17 100644 --- a/packages/python-sdk/README.md +++ b/packages/python-sdk/README.md @@ -115,17 +115,17 @@ result = client.execute_workflow_sync("workflow-id", {"data": "some input"}, tim **Returns:** `WorkflowExecutionResult` -##### get_job_status(task_id) +##### get_job_status(job_id) Get the status of an async job. ```python -status = client.get_job_status("task-id-from-async-execution") +status = client.get_job_status("job-id-from-async-execution") print("Job status:", status) ``` **Parameters:** -- `task_id` (str): The task ID returned from async execution +- `job_id` (str): The job ID returned from async execution **Returns:** `dict` @@ -248,10 +248,11 @@ class SimStudioError(Exception): @dataclass class AsyncExecutionResult: success: bool - task_id: str - status: str # 'queued' - created_at: str - links: Dict[str, str] + job_id: str + status_url: str + execution_id: Optional[str] = None + message: str = "" + async_execution: bool = True ``` ### RateLimitInfo diff --git a/packages/python-sdk/simstudio/__init__.py b/packages/python-sdk/simstudio/__init__.py index ec242338ec5..0e2609e2f26 100644 --- a/packages/python-sdk/simstudio/__init__.py +++ b/packages/python-sdk/simstudio/__init__.py @@ -49,10 +49,11 @@ class WorkflowStatus: class AsyncExecutionResult: """Result of an async workflow execution.""" success: bool - task_id: str - status: str # 'queued' - created_at: str - links: Dict[str, str] + job_id: str + status_url: str + execution_id: Optional[str] = None + message: str = "" + async_execution: bool = True @dataclass @@ -237,13 +238,14 @@ def execute_workflow( result_data = response.json() # Check if this is an async execution response (202 status) - if response.status_code == 202 and 'taskId' in result_data: + if response.status_code == 202 and 'jobId' in result_data: return AsyncExecutionResult( success=result_data.get('success', True), - task_id=result_data['taskId'], - status=result_data.get('status', 'queued'), - created_at=result_data.get('createdAt', ''), - links=result_data.get('links', {}) + job_id=result_data['jobId'], + status_url=result_data['statusUrl'], + execution_id=result_data.get('executionId'), + message=result_data.get('message', ''), + async_execution=result_data.get('async', True) ) return WorkflowExecutionResult( @@ -374,12 +376,12 @@ def close(self) -> None: """Close the underlying HTTP session.""" self._session.close() - def get_job_status(self, task_id: str) -> Dict[str, Any]: + def get_job_status(self, job_id: str) -> Dict[str, Any]: """ Get the status of an async job. Args: - task_id: The task ID returned from async execution + job_id: The job ID returned from async execution Returns: Dictionary containing the job status @@ -387,7 +389,7 @@ def get_job_status(self, task_id: str) -> Dict[str, Any]: Raises: SimStudioError: If getting the status fails """ - url = f"{self.base_url}/api/jobs/{task_id}" + url = f"{self.base_url}/api/jobs/{job_id}" try: response = self._session.get(url) diff --git a/packages/python-sdk/tests/test_client.py b/packages/python-sdk/tests/test_client.py index 8dfdee99b61..814ad7610ef 100644 --- a/packages/python-sdk/tests/test_client.py +++ b/packages/python-sdk/tests/test_client.py @@ -95,17 +95,18 @@ def test_context_manager(mock_close): @patch('simstudio.requests.Session.post') -def test_async_execution_returns_task_id(mock_post): +def test_async_execution_returns_job_id(mock_post): """Test async execution returns AsyncExecutionResult.""" mock_response = Mock() mock_response.ok = True mock_response.status_code = 202 mock_response.json.return_value = { "success": True, - "taskId": "task-123", - "status": "queued", - "createdAt": "2024-01-01T00:00:00Z", - "links": {"status": "/api/jobs/task-123"} + "jobId": "job-123", + "statusUrl": "https://test.sim.ai/api/jobs/job-123", + "executionId": "execution-123", + "message": "Workflow execution started", + "async": True } mock_response.headers.get.return_value = None mock_post.return_value = mock_response @@ -118,9 +119,10 @@ def test_async_execution_returns_task_id(mock_post): ) assert result.success is True - assert result.task_id == "task-123" - assert result.status == "queued" - assert result.links["status"] == "/api/jobs/task-123" + assert result.job_id == "job-123" + assert result.status_url == "https://test.sim.ai/api/jobs/job-123" + assert result.execution_id == "execution-123" + assert result.async_execution is True call_args = mock_post.call_args assert call_args[1]["headers"]["X-Execution-Mode"] == "async" diff --git a/packages/ts-sdk/README.md b/packages/ts-sdk/README.md index 44d21d0c9ed..0ce547f6e51 100644 --- a/packages/ts-sdk/README.md +++ b/packages/ts-sdk/README.md @@ -125,17 +125,17 @@ const result = await client.executeWorkflowSync('workflow-id', { data: 'some inp **Returns:** `Promise` -##### getJobStatus(taskId) +##### getJobStatus(jobId) Get the status of an async job. ```typescript -const status = await client.getJobStatus('task-id-from-async-execution'); +const status = await client.getJobStatus('job-id-from-async-execution'); console.log('Job status:', status); ``` **Parameters:** -- `taskId` (string): The task ID returned from async execution +- `jobId` (string): The job ID returned from async execution **Returns:** `Promise` @@ -226,6 +226,24 @@ interface WorkflowExecutionResult { } ``` +### LargeValueRef + +Oversized execution values may be returned as a versioned reference inside `output`, `logs`, streaming events, or async job status responses. +The `key` field is an opaque execution-scoped server storage pointer, not a client-readable download URL. + +```typescript +interface LargeValueRef { + __simLargeValueRef: true; + version: 1; + id: string; + kind: 'array' | 'object' | 'string' | 'json'; + size: number; + key?: string; + executionId?: string; + preview?: unknown; +} +``` + ### WorkflowStatus ```typescript @@ -250,12 +268,11 @@ class SimStudioError extends Error { ```typescript interface AsyncExecutionResult { success: boolean; - taskId: string; - status: 'queued'; - createdAt: string; - links: { - status: string; - }; + jobId: string; + statusUrl: string; + executionId?: string; + message: string; + async: true; } ``` diff --git a/packages/ts-sdk/src/index.ts b/packages/ts-sdk/src/index.ts index 31f7a34f263..ffed7ca1e7b 100644 --- a/packages/ts-sdk/src/index.ts +++ b/packages/ts-sdk/src/index.ts @@ -5,6 +5,18 @@ export interface SimStudioConfig { baseUrl?: string } +export interface LargeValueRef { + __simLargeValueRef: true + version: 1 + id: string + kind: 'array' | 'object' | 'string' | 'json' + size: number + /** Opaque execution-scoped server storage key. This is not a download URL. */ + key?: string + executionId?: string + preview?: unknown +} + export interface WorkflowExecutionResult { success: boolean output?: any diff --git a/packages/workflow-persistence/src/load.ts b/packages/workflow-persistence/src/load.ts index 3f6f8d2de39..288e9217e8a 100644 --- a/packages/workflow-persistence/src/load.ts +++ b/packages/workflow-persistence/src/load.ts @@ -4,6 +4,7 @@ import type { BlockState, Loop, Parallel } from '@sim/workflow-types/workflow' import { SUBFLOW_TYPES } from '@sim/workflow-types/workflow' import { and, eq, isNull } from 'drizzle-orm' import type { Edge } from 'reactflow' +import { clampParallelBatchSize } from './subflow-helpers' import type { DbOrTx, NormalizedWorkflowData } from './types' const logger = createLogger('WorkflowPersistenceLoad') @@ -141,9 +142,24 @@ export async function loadWorkflowFromNormalizedTablesRaw( (config as Parallel).parallelType === 'collection' ? (config as Parallel).parallelType : 'count', + batchSize: clampParallelBatchSize((config as Parallel).batchSize), enabled: blocksMap[subflow.id]?.enabled ?? true, } parallels[subflow.id] = parallel + + if (blocksMap[subflow.id]) { + const block = blocksMap[subflow.id] + blocksMap[subflow.id] = { + ...block, + data: { + ...block.data, + count: parallel.count, + collection: parallel.distribution ?? block.data?.collection ?? '', + parallelType: parallel.parallelType, + batchSize: parallel.batchSize, + }, + } + } } else { logger.warn(`Unknown subflow type: ${subflow.type} for subflow ${subflow.id}`) } diff --git a/packages/workflow-persistence/src/subflow-helpers.ts b/packages/workflow-persistence/src/subflow-helpers.ts index b0f552f1977..cf0c92b370b 100644 --- a/packages/workflow-persistence/src/subflow-helpers.ts +++ b/packages/workflow-persistence/src/subflow-helpers.ts @@ -1,6 +1,16 @@ import type { BlockState, Loop, Parallel } from '@sim/workflow-types/workflow' const DEFAULT_LOOP_ITERATIONS = 5 +const DEFAULT_PARALLEL_BATCH_SIZE = 20 +const MAX_PARALLEL_BATCH_SIZE = 20 + +export function clampParallelBatchSize(batchSize: unknown): number { + const parsed = typeof batchSize === 'number' ? batchSize : Number.parseInt(String(batchSize), 10) + if (Number.isNaN(parsed)) { + return DEFAULT_PARALLEL_BATCH_SIZE + } + return Math.max(1, Math.min(MAX_PARALLEL_BATCH_SIZE, parsed)) +} export function findChildNodes(containerId: string, blocks: Record): string[] { return Object.values(blocks) @@ -50,6 +60,7 @@ export function convertParallelBlockToParallel( validatedParallelType === 'collection' ? parallelBlock.data?.collection || '' : undefined const count = parallelBlock.data?.count || 5 + const batchSize = clampParallelBatchSize(parallelBlock.data?.batchSize) return { id: parallelBlockId, @@ -57,6 +68,7 @@ export function convertParallelBlockToParallel( distribution, count, parallelType: validatedParallelType, + batchSize, enabled: parallelBlock.enabled, } } diff --git a/packages/workflow-types/src/workflow.ts b/packages/workflow-types/src/workflow.ts index 006bd2ccab6..06b9692ddba 100644 --- a/packages/workflow-types/src/workflow.ts +++ b/packages/workflow-types/src/workflow.ts @@ -25,6 +25,7 @@ export interface ParallelConfig { nodes: string[] distribution?: unknown[] | Record | string parallelType?: 'count' | 'collection' + batchSize?: number } export interface Subflow { @@ -52,6 +53,7 @@ export interface BlockData { whileCondition?: string doWhileCondition?: string parallelType?: 'collection' | 'count' + batchSize?: number type?: string canonicalModes?: Record } @@ -178,6 +180,7 @@ export interface Parallel { distribution?: any[] | Record | string count?: number parallelType?: 'count' | 'collection' + batchSize?: number enabled: boolean locked?: boolean }