diff --git a/cognee/tasks/web_scraper/config.py b/cognee/tasks/web_scraper/config.py index 4b54c6470..e28f412e5 100644 --- a/cognee/tasks/web_scraper/config.py +++ b/cognee/tasks/web_scraper/config.py @@ -1,12 +1,13 @@ from pydantic import BaseModel, Field -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Literal import os class TavilyConfig(BaseModel): api_key: str = os.getenv("TAVILY_API_KEY") - extract_depth: str = "basic" - timeout: Optional[int] = Field(default=None, ge=1, le=60) + extract_depth: Literal["basic","advanced"] = "basic" + proxies: Optional[Dict[str,str]] = None + timeout: Optional[int] = Field(default=10, ge=1, le=60) class SoupCrawlerConfig(BaseModel): diff --git a/cognee/tasks/web_scraper/utils.py b/cognee/tasks/web_scraper/utils.py index f22a18aa5..1a300a37b 100644 --- a/cognee/tasks/web_scraper/utils.py +++ b/cognee/tasks/web_scraper/utils.py @@ -105,8 +105,16 @@ async def fetch_with_tavily( "Failed to import tavily, make sure to install using pip install tavily-python>=0.7.0" ) raise - client = AsyncTavilyClient(api_key=tavily_config.api_key if tavily_config else None) - results = await client.extract(urls, format="text") + client = AsyncTavilyClient( + api_key=tavily_config.api_key if tavily_config else None, + proxies=tavily_config.proxies if tavily_config else None + ) + results = await client.extract( + urls, + format="text", + extract_depth=tavily_config.extract_depth if tavily_config else "basic", + timeout=tavily_config.timeout if tavily_config else 10 + ) for failed_result in results.get("failed_results", []): logger.warning(f"Failed to fetch {failed_result}") return_results = {}