Skip to content

Commit 40170a6

Browse files
authored
fix: Report integer count in 'Experiencing problems' status log (#1860)
## Summary The walrus operator at `_basic_crawler.py:1656` binds less tightly than `>`, so `failed_requests := X - Y > 0` was parsed as `failed_requests := ((X - Y) > 0)` per [PEP 572](https://peps.python.org/pep-0572/). As a result, `failed_requests` was assigned a `bool` instead of the integer difference, and the periodic status log emitted: ``` Experiencing problems, True failed requests since last status update. ``` instead of the actual count. ## Fix Move the parentheses so `:=` captures the integer subtraction and `> 0` sits outside. ## Regression test Added `test_status_message_reports_failed_request_count`, which directly invokes `_crawler_state_task` with a mocked `_statistics.state` and a separate `_previous_crawler_state`, asserting the rendered message contains the integer count `3`, not `True`.
1 parent 3c594fe commit 40170a6

2 files changed

Lines changed: 25 additions & 7 deletions

File tree

src/crawlee/crawlers/_basic/_basic_crawler.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1654,11 +1654,9 @@ async def _crawler_state_task(self) -> None:
16541654
current_state = self.statistics.state
16551655

16561656
if (
1657-
failed_requests := (
1658-
current_state.requests_failed - (self._previous_crawler_state or current_state).requests_failed
1659-
)
1660-
> 0
1661-
):
1657+
failed_requests := current_state.requests_failed
1658+
- (self._previous_crawler_state or current_state).requests_failed
1659+
) > 0:
16621660
message = f'Experiencing problems, {failed_requests} failed requests since last status update.'
16631661
else:
16641662
request_manager = await self.get_request_manager()

tests/unit/crawlers/_basic/test_basic_crawler.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from crawlee.events import Event, EventCrawlerStatusData, LocalEventManager
3030
from crawlee.request_loaders import RequestList, RequestManagerTandem
3131
from crawlee.sessions import Session, SessionPool
32-
from crawlee.statistics import FinalStatistics
32+
from crawlee.statistics import FinalStatistics, StatisticsState
3333
from crawlee.storage_clients import FileSystemStorageClient, MemoryStorageClient
3434
from crawlee.storages import Dataset, KeyValueStore, RequestQueue
3535

@@ -40,7 +40,6 @@
4040
from yarl import URL
4141

4242
from crawlee._types import JsonSerializable
43-
from crawlee.statistics import StatisticsState
4443

4544

4645
async def test_processes_requests_from_explicit_queue() -> None:
@@ -1736,6 +1735,27 @@ def listener(event_data: EventCrawlerStatusData) -> None:
17361735
assert status_message_listener.called
17371736

17381737

1738+
async def test_status_message_reports_failed_request_count() -> None:
1739+
"""The 'Experiencing problems' status message reports the count of new failures since the last update."""
1740+
captured_messages: list[str] = []
1741+
1742+
async def status_callback(
1743+
state: StatisticsState, previous_state: StatisticsState | None, message: str
1744+
) -> str | None:
1745+
captured_messages.append(message)
1746+
return None
1747+
1748+
crawler = BasicCrawler(status_message_callback=status_callback)
1749+
crawler._previous_crawler_state = StatisticsState(requests_failed=2)
1750+
crawler._statistics = Mock(state=StatisticsState(requests_failed=5))
1751+
1752+
async with service_locator.get_event_manager():
1753+
await crawler._crawler_state_task()
1754+
1755+
problem_messages = [m for m in captured_messages if m.startswith('Experiencing problems')]
1756+
assert problem_messages == ['Experiencing problems, 3 failed requests since last status update.']
1757+
1758+
17391759
@pytest.mark.parametrize(
17401760
('queue_name', 'queue_alias', 'by_id'),
17411761
[

0 commit comments

Comments
 (0)