fix(executor): address code review feedback on retry logic

- Scope retries to LLM blocks only (agent, evaluator, router) to avoid duplicate side effects on non-idempotent blocks like HTTP, email, webhook - Treat statusless errors as non-retryable by default — only retry known network errors (ECONNRESET, ETIMEDOUT, etc.) not JS runtime errors - Respect Retry-After header for 503 responses in addition to 429 (RFC 7231) - Add afterEach vi.useRealTimers() to prevent timer leak between tests
simstudioai · Rabba-Meghana · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
commit 70ce97fdf02cc77d087b3ac9a19f883ecc128c6b
diff --git a/apps/sim/executor/execution/block-executor.ts b/apps/sim/executor/execution/block-executor.ts
@@ -121,11 +121,19 @@ export class BlockExecutor {
     cleanupSelfReference?.()
 
     try {
-      const output = await withRetry(
-        () => handler.executeWithNode
-          ? handler.executeWithNode(ctx, block, resolvedInputs, nodeMetadata)
-          : handler.execute(ctx, block, resolvedInputs)
-      )
+      const isLLMBlock = isAgentBlockType(block.metadata?.id) ||
+        block.metadata?.id === BlockType.EVALUATOR ||
+        block.metadata?.id === BlockType.ROUTER ||
+        block.metadata?.id === BlockType.ROUTER_V2
+      const output = isLLMBlock
+        ? await withRetry(
+            () => handler.executeWithNode
+              ? handler.executeWithNode(ctx, block, resolvedInputs, nodeMetadata)
+              : handler.execute(ctx, block, resolvedInputs)
+          )
+        : await (handler.executeWithNode
+            ? handler.executeWithNode(ctx, block, resolvedInputs, nodeMetadata)
+            : handler.execute(ctx, block, resolvedInputs))
 
       const isStreamingExecution =
         output && typeof output === 'object' && 'stream' in output && 'execution' in output

diff --git a/apps/sim/executor/utils/retry.test.ts b/apps/sim/executor/utils/retry.test.ts
@@ -6,6 +6,10 @@ describe('withRetry', () => {
     vi.useFakeTimers()
   })
 
+  afterEach(() => {
+    vi.useRealTimers()
+  })
+
   it('returns result immediately on success', async () => {
     const fn = vi.fn().mockResolvedValue('ok')
     const result = await withRetry(fn)

diff --git a/apps/sim/executor/utils/retry.ts b/apps/sim/executor/utils/retry.ts
@@ -36,10 +36,24 @@ function parseRetryAfterHeader(headers: Headers): number | null {
   return null
 }
 
+/**
+ * Returns true only for known transient network errors (no HTTP status code).
+ * Deliberately excludes JS runtime errors (TypeError, RangeError, etc.)
+ * to avoid masking bugs with silent retries.
+ */
+function isNetworkError(error: unknown): boolean {
+  if (!(error instanceof Error)) return false
+  const networkErrorCodes = ['ECONNRESET', 'ECONNREFUSED', 'ETIMEDOUT', 'ENOTFOUND', 'EPIPE']
+  const code = (error as any)?.code
+  if (code && networkErrorCodes.includes(code)) return true
+  const networkErrorNames = ['FetchError', 'NetworkError', 'AbortError']
+  return networkErrorNames.includes(error.name)
+}
+
 /**
  * Wraps an async function with retry logic using exponential backoff and jitter.
- * Respects Retry-After headers from LLM providers on 429 responses.
- * Only retries on transient errors (429, 503, 529) — never on user errors (4xx).
+ * Respects Retry-After headers from LLM providers on 429 and 503 responses.
+ * Only retries on known transient errors — never on JS runtime errors or user errors (4xx).
  */
 export async function withRetry<T>(
   fn: () => Promise<T>,
@@ -63,17 +77,21 @@ export async function withRetry<T>(
       const status = (error as any)?.status ?? (error as any)?.statusCode ?? null
       const responseHeaders: Headers | null = (error as any)?.headers ?? null
 
+      // Only retry known transient HTTP codes or recognised network errors
+      // Statusless JS errors (TypeError, RangeError, etc.) are NOT retried
       const isRetryable =
-        status === null ||
-        retryableStatusCodes.includes(status)
+        status !== null
+          ? retryableStatusCodes.includes(status)
+          : isNetworkError(error)
 
       if (!isRetryable) {
         logger.warn('Non-retryable error, aborting retry loop', { status, attempt })
         throw error
       }
 
+      // Respect Retry-After header for both 429 and 503 (RFC 7231 §7.1.3)
       let delayMs: number
-      if (responseHeaders && status === 429) {
+      if (responseHeaders && (status === 429 || status === 503)) {
         const retryAfterMs = parseRetryAfterHeader(responseHeaders)
         delayMs = retryAfterMs ?? calculateBackoffDelay(attempt, initialDelayMs, maxDelayMs)
       } else {