samples-python/batch_iterator/workflows.py at batch-iterator · shphrd/samples-python

204 lines (172 loc) · 8.26 KB
import asyncio
from datetime import timedelta
from typing import List
from temporalio import workflow
from temporalio.common import RetryPolicy, WorkflowIDReusePolicy
from temporalio.exceptions import ActivityError, ApplicationError
from batch_iterator.activities import get_records
from batch_iterator.shared import (
    GetRecordsInput,
    GetRecordsOutput,
    ProcessBatchInput,
    SingleRecord,
@workflow.defn(failure_exception_types=[ValueError, TypeError, RuntimeError])
class RecordProcessorWorkflow:
    """Child workflow with fault isolation per record."""
    @workflow.run
    async def run(self, record: SingleRecord) -> None:
        """Process record with deterministic timing."""
        # Type validation for serialization safety
        if isinstance(record, dict):
            try:
                record = SingleRecord(**record)
            except (TypeError, ValueError) as e:
                workflow.logger.error(f"Invalid record parameter: {e}")
                raise ApplicationError(f"Invalid record parameter: {e}")
        elif not isinstance(record, SingleRecord):
            raise ApplicationError(f"Expected SingleRecord, got {type(record)}")
        sleep_duration = workflow.random().randint(1, 30)
        await workflow.sleep(sleep_duration)
        workflow.logger.info(f"Processed {record} (took {sleep_duration}s)")
        parent = workflow.info().parent
        if parent:
            handle = workflow.get_external_workflow_handle(parent.workflow_id)
            await handle.signal("child_completed", record.id)
@workflow.defn(failure_exception_types=[ValueError, TypeError, RuntimeError])
class IteratorBatchWorkflow:
    """Batch processing with child workflows and continue-as-new."""
    def __init__(self):
        self._current_offset = 0
        self._current_batch_size = 0
        self._children_started = 0
        self._completed_children = set()
        self._pending_children = set()
    @workflow.run
    async def run(
        self, input: ProcessBatchInput, offset: int = 0, total_processed: int = 0
    ) -> int:
        """Process batch with continue-as-new for unlimited datasets."""
        # Enhanced type validation for serialization safety
        try:
            if isinstance(input, dict):
                input = ProcessBatchInput(**input)
            elif isinstance(input, list) and len(input) > 0:
                actual_input = input[0]
                offset = input[1] if len(input) > 1 else offset
                total_processed = input[2] if len(input) > 2 else total_processed
                if isinstance(actual_input, dict):
                    input = ProcessBatchInput(**actual_input)
                else:
                    input = actual_input
            elif not isinstance(input, ProcessBatchInput):
                raise TypeError(f"Expected ProcessBatchInput, got {type(input)}")
            # Validate parameter types
            if not isinstance(offset, int):
                raise TypeError(f"Expected int for offset, got {type(offset)}")
            if not isinstance(total_processed, int):
                raise TypeError(
                    f"Expected int for total_processed, got {type(total_processed)}"
        except (TypeError, ValueError) as e:
            workflow.logger.error(f"Invalid input parameters: {e}")
            raise ApplicationError(f"Invalid input parameters: {e}")
        self._current_offset = offset
        workflow.set_query_handler("state", self._get_state)
        workflow.set_signal_handler("child_completed", self._handle_child_completion)
        if input.page_size <= 0:
            raise ApplicationError("Page size must be positive")
        if input.record_count <= 0:
            raise ApplicationError("Record count must be positive")
        if offset < 0:
            raise ApplicationError("Offset cannot be negative")
        if offset >= input.record_count:
            raise ApplicationError(
                f"Offset {offset} cannot be >= record count {input.record_count}"
        if input.page_size > input.record_count:
            workflow.logger.info(
                f"Page size ({input.page_size}) is larger than record count ({input.record_count}), adjusting to record count"
        workflow.logger.info(
            f"Processing batch: offset={offset}, page_size={input.page_size}"
        get_records_input = GetRecordsInput(page_size=input.page_size, offset=offset)
        try:
            records_output: GetRecordsOutput = await workflow.execute_activity(
                get_records,
                get_records_input,
                start_to_close_timeout=timedelta(seconds=30),
                schedule_to_close_timeout=timedelta(
                    minutes=5
                ),  # Overall activity timeout
                retry_policy=RetryPolicy(
                    initial_interval=timedelta(milliseconds=500),  # Start with 500ms
                    backoff_coefficient=1.5,  # More gradual backoff
                    maximum_interval=timedelta(seconds=30),  # Cap at 30 seconds
                    maximum_attempts=5,  # More retry attempts for robustness
                    non_retryable_error_types=[
                        "ValueError"
                    ],  # Don't retry validation errors
        except ActivityError as e:
            workflow.logger.error(f"Failed to get records after retries: {e}")
            raise
        records = records_output.records
        if not records:
            workflow.logger.info(
                f"No more records to process, completing workflow with total processed: {total_processed}"
            return total_processed
        workflow.logger.info(
            f"Processing {len(records)} records in parallel (batch starting at offset {offset})"
        self._current_batch_size = len(records)
        self._children_started = len(records)
        # Start child workflows in parallel - each record gets fault isolation
        for i, record in enumerate(records):
            child_id = f"{workflow.info().workflow_id}/{record.id}"
            child_handle = await workflow.start_child_workflow(
                RecordProcessorWorkflow.run,
                record,
                id=child_id,
                id_reuse_policy=WorkflowIDReusePolicy.ALLOW_DUPLICATE,
                parent_close_policy=workflow.ParentClosePolicy.ABANDON,
                execution_timeout=timedelta(minutes=10),
            self._pending_children.add(record.id)
        # Wait for all child workflows to complete via signals
        await workflow.wait_condition(
            lambda: len(self._completed_children) >= len(records)
        current_batch_processed = len(records)
        new_total_processed = total_processed + current_batch_processed
        workflow.logger.info(
            f"Completed processing {current_batch_processed} records (batch {offset//input.page_size + 1}), total: {new_total_processed}"
        new_offset = offset + len(records)
        if new_offset < input.record_count:
            # Continue-as-new to prevent workflow history from growing unbounded
            await workflow.wait_condition(lambda: workflow.all_handlers_finished())
            workflow.logger.info(
                f"Continuing as new with offset {new_offset}, total processed: {new_total_processed}"
            workflow.continue_as_new([input, new_offset, new_total_processed])
        # Wait for any remaining handlers before completion
        await workflow.wait_condition(lambda: workflow.all_handlers_finished())
        return new_total_processed
    def _handle_child_completion(self, record_id: int) -> None:
        """Handle completion signal from child workflow."""
        if record_id in self._pending_children:
            self._pending_children.remove(record_id)
            self._completed_children.add(record_id)
    def _get_state(self) -> dict:
        """Query handler for monitoring workflow state."""
        return {
            "current_offset": self._current_offset,
            "current_batch_size": self._current_batch_size,
            "children_started": self._children_started,
            "completed_children": len(self._completed_children),
            "pending_children": len(self._pending_children),
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

workflows.py

Latest commit

History

workflows.py

File metadata and controls