yield requests from generator

Pulkit0110 · Pulkit0110 · commit fb9c976ced58 · 2026-01-19T07:13:05.000Z
diff --git a/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py b/google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py
@@ -359,13 +359,12 @@ async def append(
         attempt_count = 0
 
         def send_and_recv_generator(
-            requests: List[BidiWriteObjectRequest],
+            requests_generator,
             state: dict[str, _WriteState],
             metadata: Optional[List[Tuple[str, str]]] = None,
         ):
             async def generator():
                 nonlocal attempt_count
-                nonlocal requests
                 attempt_count += 1
                 resp = None
                 async with self._lock:
@@ -402,16 +401,33 @@ async def generator():
                         write_state.bytes_sent = write_state.persisted_size
                         write_state.bytes_since_last_flush = 0
 
-                        requests = strategy.generate_requests(state)
-
-                    num_requests = len(requests)
-                    for i, chunk_req in enumerate(requests):
-                        if i == num_requests - 1:
-                            chunk_req.state_lookup = True
-                            chunk_req.flush = True
+                    # Process requests from the generator
+                    # Strategy handles state_lookup and flush on the last request,
+                    # so we just stream requests directly
+                    for chunk_req in requests_generator:
+                        # Check if this is an open/state-lookup request (no checksummed_data)
+                        if chunk_req.state_lookup and not chunk_req.checksummed_data:
+                            # This is an open request - send it and get response
+                            await self.write_obj_stream.send(chunk_req)
+                            resp = await self.write_obj_stream.recv()
+                            
+                            # Update state from open response
+                            if resp:
+                                if resp.persisted_size is not None:
+                                    self.persisted_size = resp.persisted_size
+                                    write_state.persisted_size = resp.persisted_size
+                                    self.offset = self.persisted_size
+                                if resp.write_handle:
+                                    self.write_handle = resp.write_handle
+                                    write_state.write_handle = resp.write_handle
+                            continue
+                        
+                        # This is a data request - send it
                         await self.write_obj_stream.send(chunk_req)
 
+                    # Get final response from the last request (which has state_lookup=True)
                     resp = await self.write_obj_stream.recv()
+                    
                     if resp:
                         if resp.persisted_size is not None:
                             self.persisted_size = resp.persisted_size
diff --git a/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py b/google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py
@@ -377,7 +377,7 @@ async def download_ranges(
         attempt_count = 0
 
         def send_ranges_and_get_bytes(
-            requests: List[_storage_v2.ReadRange],
+            requests_generator,
             state: Dict[str, Any],
             metadata: Optional[List[Tuple[str, str]]] = None,
         ):
@@ -387,7 +387,7 @@ async def generator():
 
                 if attempt_count > 1:
                     logger.info(
-                        f"Resuming download (attempt {attempt_count - 1}) for {len(requests)} ranges."
+                        f"Resuming download (attempt {attempt_count - 1})."
                     )
 
                 async with lock:
@@ -436,17 +436,28 @@ async def generator():
                         )
                         self._is_stream_open = True
 
-                    pending_read_ids = {r.read_id for r in requests}
+                    # Stream requests directly without materializing
+                    pending_read_ids = set()
+                    current_batch = []
+
+                    for read_range in requests_generator:
+                        pending_read_ids.add(read_range.read_id)
+                        current_batch.append(read_range)
+                        
+                        # Send batch when it reaches max size
+                        if len(current_batch) >= _MAX_READ_RANGES_PER_BIDI_READ_REQUEST:
+                            await self.read_obj_str.send(
+                                _storage_v2.BidiReadObjectRequest(read_ranges=current_batch)
+                            )
+                            current_batch = []
 
-                    # Send Requests
-                    for i in range(
-                        0, len(requests), _MAX_READ_RANGES_PER_BIDI_READ_REQUEST
-                    ):
-                        batch = requests[i : i + _MAX_READ_RANGES_PER_BIDI_READ_REQUEST]
+                    # Send remaining partial batch
+                    if current_batch:
                         await self.read_obj_str.send(
-                            _storage_v2.BidiReadObjectRequest(read_ranges=batch)
+                            _storage_v2.BidiReadObjectRequest(read_ranges=current_batch)
                         )
 
+                    # Receive responses
                     while pending_read_ids:
                         response = await self.read_obj_str.recv()
                         if response is None:
diff --git a/google/cloud/storage/_experimental/asyncio/retry/base_strategy.py b/google/cloud/storage/_experimental/asyncio/retry/base_strategy.py
@@ -27,16 +27,20 @@ class _BaseResumptionStrategy(abc.ABC):
     """
 
     @abc.abstractmethod
-    def generate_requests(self, state: Any) -> Iterable[Any]:
-        """Generates the next batch of requests based on the current state.
+    def generate_requests(self, state: Any):
+        """Generates requests based on the current state as a generator.
 
         This method is called at the beginning of each retry attempt. It should
-        inspect the provided state object and generate the appropriate list of
-        request protos to send to the server. For example, a read strategy
-        would use this to implement "Smarter Resumption" by creating smaller
-        `ReadRange` requests for partially downloaded ranges. For bidi-writes,
-        it will set the `write_offset` field to the persisted size received
-        from the server in the next request.
+        inspect the provided state object and yield request protos to send to
+        the server. For example, a read strategy would use this to implement
+        "Smarter Resumption" by creating smaller `ReadRange` requests for
+        partially downloaded ranges. For bidi-writes, it will set the
+        `write_offset` field to the persisted size received from the server
+        in the next request.
+
+        This is a generator that yields requests incrementally rather than
+        returning them all at once, allowing for better memory efficiency
+        and on-demand generation.
 
         :type state: Any
         :param state: An object containing all the state needed for the
diff --git a/google/cloud/storage/_experimental/asyncio/retry/bidi_stream_retry_manager.py b/google/cloud/storage/_experimental/asyncio/retry/bidi_stream_retry_manager.py
@@ -50,8 +50,8 @@ async def execute(self, initial_state: Any, retry_policy):
         state = initial_state
 
         async def attempt():
-            requests = self._strategy.generate_requests(state)
-            stream = self._send_and_recv(requests, state)
+            requests_generator = self._strategy.generate_requests(state)
+            stream = self._send_and_recv(requests_generator, state)
             try:
                 async for response in stream:
                     self._strategy.update_state_from_response(response, state)
diff --git a/google/cloud/storage/_experimental/asyncio/retry/reads_resumption_strategy.py b/google/cloud/storage/_experimental/asyncio/retry/reads_resumption_strategy.py
@@ -49,14 +49,16 @@ def __init__(
 class _ReadResumptionStrategy(_BaseResumptionStrategy):
     """The concrete resumption strategy for bidi reads."""
 
-    def generate_requests(self, state: Dict[str, Any]) -> List[storage_v2.ReadRange]:
+    def generate_requests(self, state: Dict[str, Any]):
         """Generates new ReadRange requests for all incomplete downloads.
 
+        This is a generator that yields requests one at a time for incomplete
+        downloads, allowing for better memory efficiency and incremental processing.
+
         :type state: dict
         :param state: A dictionary mapping a read_id to its corresponding
                   _DownloadState object.
         """
-        pending_requests = []
         download_states: Dict[int, _DownloadState] = state["download_states"]
 
         for read_id, read_state in download_states.items():
@@ -74,8 +76,7 @@ def generate_requests(self, state: Dict[str, Any]) -> List[storage_v2.ReadRange]
                     read_length=new_length,
                     read_id=read_id,
                 )
-                pending_requests.append(new_request)
-        return pending_requests
+                yield new_request
 
     def update_state_from_response(
         self, response: storage_v2.BidiReadObjectResponse, state: Dict[str, Any]
diff --git a/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py b/google/cloud/storage/_experimental/asyncio/retry/writes_resumption_strategy.py
@@ -65,23 +65,38 @@ class _WriteResumptionStrategy(_BaseResumptionStrategy):
 
     def generate_requests(
         self, state: Dict[str, Any]
-    ) -> List[storage_type.BidiWriteObjectRequest]:
+    ):
         """Generates BidiWriteObjectRequests to resume or continue the upload.
 
-        This method is not applicable for `open` methods.
+        This method is a generator that yields requests one at a time,
+        allowing for incremental sending and better memory efficiency.
+        
+        On retry/redirect, yields a state_lookup request first to get the current
+        persisted state from the server before sending data requests.
+        
+        The last data request is always yielded with state_lookup=True and flush=True
+        to ensure the server persists the final data and returns the updated state.
         """
         write_state: _WriteState = state["write_state"]
 
-        requests = []
+        # If this is a retry/redirect, yield a state lookup request first
+        # This allows the sender to get current persisted_size before proceeding
+        if write_state.routing_token or write_state.bytes_sent > write_state.persisted_size:
+            # Yield an open/state-lookup request with no data
+            yield storage_type.BidiWriteObjectRequest(state_lookup=True)
+
         # The buffer should already be seeked to the correct position (persisted_size)
         # by the `recover_state_on_failure` method before this is called.
         while not write_state.is_finalized:
             chunk = write_state.user_buffer.read(write_state.chunk_size)
 
-            # End of File detection
             if not chunk:
                 break
 
+            # Peek to see if this is the last chunk. This is safe because both
+            # io.BytesIO and BufferedReader (used in file uploads) support peek().
+            is_last_chunk = not getattr(write_state.user_buffer, "peek", lambda n: b"")(1)
+
             checksummed_data = storage_type.ChecksummedData(content=chunk)
             checksum = google_crc32c.Checksum(chunk)
             checksummed_data.crc32c = int.from_bytes(checksum.digest(), "big")
@@ -102,8 +117,11 @@ def generate_requests(
                 # reset counter after marking flush
                 write_state.bytes_since_last_flush = 0
 
-            requests.append(request)
-        return requests
+            if is_last_chunk:
+                request.flush = True
+                request.state_lookup = True
+
+            yield request
 
     def update_state_from_response(
         self, response: storage_type.BidiWriteObjectResponse, state: Dict[str, Any]
diff --git a/tests/unit/asyncio/retry/test_bidi_stream_retry_manager.py b/tests/unit/asyncio/retry/test_bidi_stream_retry_manager.py
@@ -139,8 +139,10 @@ async def mock_send_and_recv(*args, **kwargs):
     @pytest.mark.asyncio
     async def test_execute_fails_immediately_on_non_retriable_error(self):
         mock_strategy = mock.AsyncMock(spec=base_strategy._BaseResumptionStrategy)
+        mock_strategy.generate_requests.return_value = iter([])
 
-        async def mock_send_and_recv(*args, **kwargs):
+        async def mock_send_and_recv(strategy, state, **kwargs):
+            strategy.generate_requests(state)
             if False:
                 yield
             raise exceptions.PermissionDenied("Auth error")
diff --git a/tests/unit/asyncio/retry/test_reads_resumption_strategy.py b/tests/unit/asyncio/retry/test_reads_resumption_strategy.py
@@ -109,7 +109,7 @@ def test_generate_requests_single_incomplete(self):
         read_state = self._add_download(_READ_ID, offset=0, length=100)
         read_state.bytes_written = 20
 
-        requests = self.strategy.generate_requests(self.state)
+        requests = list(self.strategy.generate_requests(self.state))
 
         self.assertEqual(len(requests), 1)
         self.assertEqual(requests[0].read_offset, 20)
@@ -124,7 +124,7 @@ def test_generate_requests_multiple_incomplete(self):
 
         self._add_download(read_id2, offset=200, length=100)
 
-        requests = self.strategy.generate_requests(self.state)
+        requests = list(self.strategy.generate_requests(self.state))
 
         self.assertEqual(len(requests), 2)
         requests.sort(key=lambda r: r.read_id)
@@ -145,7 +145,7 @@ def test_generate_requests_read_to_end_resumption(self):
         read_state = self._add_download(_READ_ID, offset=0, length=0)
         read_state.bytes_written = 500
 
-        requests = self.strategy.generate_requests(self.state)
+        requests = list(self.strategy.generate_requests(self.state))
 
         self.assertEqual(len(requests), 1)
         self.assertEqual(requests[0].read_offset, 500)
@@ -156,7 +156,7 @@ def test_generate_requests_with_complete(self):
         read_state = self._add_download(_READ_ID)
         read_state.is_complete = True
 
-        requests = self.strategy.generate_requests(self.state)
+        requests = list(self.strategy.generate_requests(self.state))
         self.assertEqual(len(requests), 0)
 
     def test_generate_requests_multiple_mixed_states(self):
@@ -170,7 +170,7 @@ def test_generate_requests_multiple_mixed_states(self):
         s3 = self._add_download(3, offset=200, length=100)
         s3.bytes_written = 0
 
-        requests = self.strategy.generate_requests(self.state)
+        requests = list(self.strategy.generate_requests(self.state))
 
         self.assertEqual(len(requests), 2)
         requests.sort(key=lambda r: r.read_id)
@@ -180,7 +180,7 @@ def test_generate_requests_multiple_mixed_states(self):
 
     def test_generate_requests_empty_state(self):
         """Test generating requests with an empty state."""
-        requests = self.strategy.generate_requests(self.state)
+        requests = list(self.strategy.generate_requests(self.state))
         self.assertEqual(len(requests), 0)
 
     # --- Update State and response processing Tests ---
diff --git a/tests/unit/asyncio/retry/test_writes_resumption_strategy.py b/tests/unit/asyncio/retry/test_writes_resumption_strategy.py
@@ -48,7 +48,7 @@ def test_generate_requests_initial_chunking(self, strategy):
         write_state = _WriteState(chunk_size=3, user_buffer=mock_buffer)
         state = {"write_state": write_state}
 
-        requests = strategy.generate_requests(state)
+        requests = list(strategy.generate_requests(state))
 
         # Expected: 4 requests (3, 3, 3, 1)
         assert len(requests) == 4
@@ -85,7 +85,7 @@ def test_generate_requests_resumption(self, strategy):
 
         state = {"write_state": write_state}
 
-        requests = strategy.generate_requests(state)
+        requests = list(strategy.generate_requests(state))
 
         # Since 4 bytes are done, we expect remaining 6 bytes: [4 bytes, 2 bytes]
         assert len(requests) == 2
@@ -104,7 +104,7 @@ def test_generate_requests_empty_file(self, strategy):
         write_state = _WriteState(chunk_size=4, user_buffer=mock_buffer)
         state = {"write_state": write_state}
 
-        requests = strategy.generate_requests(state)
+        requests = list(strategy.generate_requests(state))
 
         assert len(requests) == 0
 
@@ -115,7 +115,7 @@ def test_generate_requests_checksum_verification(self, strategy):
         write_state = _WriteState(chunk_size=10, user_buffer=mock_buffer)
         state = {"write_state": write_state}
 
-        requests = strategy.generate_requests(state)
+        requests = list(strategy.generate_requests(state))
 
         expected_crc = google_crc32c.Checksum(chunk_data).digest()
         expected_int = int.from_bytes(expected_crc, "big")
@@ -130,7 +130,7 @@ def test_generate_requests_flush_logic_exact_interval(self, strategy):
         )
         state = {"write_state": write_state}
 
-        requests = strategy.generate_requests(state)
+        requests = list(strategy.generate_requests(state))
 
         # Request index 1 (4 bytes total) should have flush=True
         assert requests[0].flush is False
@@ -155,7 +155,7 @@ def test_generate_requests_flush_logic_none_interval(self, strategy):
         )
         state = {"write_state": write_state}
 
-        requests = strategy.generate_requests(state)
+        requests = list(strategy.generate_requests(state))
 
         for req in requests:
             assert req.flush is False
@@ -169,7 +169,7 @@ def test_generate_requests_flush_logic_data_less_than_interval(self, strategy):
         )
         state = {"write_state": write_state}
 
-        requests = strategy.generate_requests(state)
+        requests = list(strategy.generate_requests(state))
 
         # Total 5 bytes < 10 bytes interval
         for req in requests:
@@ -184,7 +184,7 @@ def test_generate_requests_honors_finalized_state(self, strategy):
         write_state.is_finalized = True
         state = {"write_state": write_state}
 
-        requests = strategy.generate_requests(state)
+        requests = list(strategy.generate_requests(state))
         assert len(requests) == 0
 
     @pytest.mark.asyncio
@@ -217,7 +217,7 @@ async def test_generate_requests_after_failure_and_recovery(self, strategy):
         # 2. bytes_sent should track persisted_size (4)
         assert write_state.bytes_sent == 4
 
-        requests = strategy.generate_requests(state)
+        requests = list(strategy.generate_requests(state))
 
         # Remaining data from offset 4 to 16 (12 bytes total)
         # Chunks: [4-8], [8-12], [12-16]