diff --git a/src/crawlee/crawlers/_basic/_basic_crawler.py b/src/crawlee/crawlers/_basic/_basic_crawler.py index 973e0ad430..7106c13a56 100644 --- a/src/crawlee/crawlers/_basic/_basic_crawler.py +++ b/src/crawlee/crawlers/_basic/_basic_crawler.py @@ -1654,11 +1654,9 @@ async def _crawler_state_task(self) -> None: current_state = self.statistics.state if ( - failed_requests := ( - current_state.requests_failed - (self._previous_crawler_state or current_state).requests_failed - ) - > 0 - ): + failed_requests := current_state.requests_failed + - (self._previous_crawler_state or current_state).requests_failed + ) > 0: message = f'Experiencing problems, {failed_requests} failed requests since last status update.' else: request_manager = await self.get_request_manager() diff --git a/tests/unit/crawlers/_basic/test_basic_crawler.py b/tests/unit/crawlers/_basic/test_basic_crawler.py index 10322a3006..f358ac4a9d 100644 --- a/tests/unit/crawlers/_basic/test_basic_crawler.py +++ b/tests/unit/crawlers/_basic/test_basic_crawler.py @@ -29,7 +29,7 @@ from crawlee.events import Event, EventCrawlerStatusData, LocalEventManager from crawlee.request_loaders import RequestList, RequestManagerTandem from crawlee.sessions import Session, SessionPool -from crawlee.statistics import FinalStatistics +from crawlee.statistics import FinalStatistics, StatisticsState from crawlee.storage_clients import FileSystemStorageClient, MemoryStorageClient from crawlee.storages import Dataset, KeyValueStore, RequestQueue @@ -40,7 +40,6 @@ from yarl import URL from crawlee._types import JsonSerializable - from crawlee.statistics import StatisticsState async def test_processes_requests_from_explicit_queue() -> None: @@ -1736,6 +1735,27 @@ def listener(event_data: EventCrawlerStatusData) -> None: assert status_message_listener.called +async def test_status_message_reports_failed_request_count() -> None: + """The 'Experiencing problems' status message reports the count of new failures since the last update.""" + captured_messages: list[str] = [] + + async def status_callback( + state: StatisticsState, previous_state: StatisticsState | None, message: str + ) -> str | None: + captured_messages.append(message) + return None + + crawler = BasicCrawler(status_message_callback=status_callback) + crawler._previous_crawler_state = StatisticsState(requests_failed=2) + crawler._statistics = Mock(state=StatisticsState(requests_failed=5)) + + async with service_locator.get_event_manager(): + await crawler._crawler_state_task() + + problem_messages = [m for m in captured_messages if m.startswith('Experiencing problems')] + assert problem_messages == ['Experiencing problems, 3 failed requests since last status update.'] + + @pytest.mark.parametrize( ('queue_name', 'queue_alias', 'by_id'), [