Skip to content

Commit 0163bd7

Browse files
committed
Merge branch 'release/v0.7.1'
2 parents bde1bba + 26bad79 commit 0163bd7

18 files changed

Lines changed: 717 additions & 493 deletions

README.md

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -523,15 +523,18 @@ async def test_news_crawl():
523523
- **🧠 Adaptive Crawling**: Your crawler now learns and adapts to website patterns automatically:
524524
```python
525525
config = AdaptiveConfig(
526-
confidence_threshold=0.7,
527-
max_history=100,
528-
learning_rate=0.2
526+
confidence_threshold=0.7, # Min confidence to stop crawling
527+
max_depth=5, # Maximum crawl depth
528+
max_pages=20, # Maximum number of pages to crawl
529+
strategy="statistical"
529530
)
530531

531-
result = await crawler.arun(
532-
"https://news.example.com",
533-
config=CrawlerRunConfig(adaptive_config=config)
534-
)
532+
async with AsyncWebCrawler() as crawler:
533+
adaptive_crawler = AdaptiveCrawler(crawler, config)
534+
state = await adaptive_crawler.digest(
535+
start_url="https://news.example.com",
536+
query="latest news content"
537+
)
535538
# Crawler learns patterns and improves extraction over time
536539
```
537540

crawl4ai/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from .async_webcrawler import AsyncWebCrawler, CacheMode
55
# MODIFIED: Add SeedingConfig and VirtualScrollConfig here
6-
from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig, LLMConfig, ProxyConfig, GeolocationConfig, SeedingConfig, VirtualScrollConfig
6+
from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig, LLMConfig, ProxyConfig, GeolocationConfig, SeedingConfig, VirtualScrollConfig, LinkPreviewConfig
77

88
from .content_scraping_strategy import (
99
ContentScrapingStrategy,
@@ -173,6 +173,7 @@
173173
"CompilationResult",
174174
"ValidationResult",
175175
"ErrorDetail",
176+
"LinkPreviewConfig"
176177
]
177178

178179

crawl4ai/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# crawl4ai/__version__.py
22

33
# This is the version that will be used for stable releases
4-
__version__ = "0.7.0"
4+
__version__ = "0.7.1"
55

66
# For nightly builds, this gets set during build process
77
__nightly_version__ = None

crawl4ai/browser_manager.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,8 @@
1414
from .js_snippet import load_js_script
1515
from .config import DOWNLOAD_PAGE_TIMEOUT
1616
from .async_configs import BrowserConfig, CrawlerRunConfig
17-
from playwright_stealth import StealthConfig
1817
from .utils import get_chromium_path
1918

20-
stealth_config = StealthConfig(
21-
webdriver=True,
22-
chrome_app=True,
23-
chrome_csi=True,
24-
chrome_load_times=True,
25-
chrome_runtime=True,
26-
navigator_languages=True,
27-
navigator_plugins=True,
28-
navigator_permissions=True,
29-
webgl_vendor=True,
30-
outerdimensions=True,
31-
navigator_hardware_concurrency=True,
32-
media_codecs=True,
33-
)
3419

3520
BROWSER_DISABLE_OPTIONS = [
3621
"--disable-background-networking",

crawl4ai/content_scraping_strategy.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,10 +1145,10 @@ def _process_element(
11451145
link_data["intrinsic_score"] = intrinsic_score
11461146
except Exception:
11471147
# Fail gracefully - assign default score
1148-
link_data["intrinsic_score"] = float('inf')
1148+
link_data["intrinsic_score"] = 0
11491149
else:
11501150
# No scoring enabled - assign infinity (all links equal priority)
1151-
link_data["intrinsic_score"] = float('inf')
1151+
link_data["intrinsic_score"] = 0
11521152

11531153
is_external = is_external_url(normalized_href, base_domain)
11541154
if is_external:

0 commit comments

Comments
 (0)