From 572c8ebce745d4cd675eb522c6e500cf04102591 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Thu, 16 Oct 2025 18:26:50 +0100 Subject: [PATCH] refactor: use pydantic models for tavily and beautifulsoup configs instead of dicts --- .../loaders/external/web_url_loader.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cognee/infrastructure/loaders/external/web_url_loader.py b/cognee/infrastructure/loaders/external/web_url_loader.py index 38bca2523..f9fce47a9 100644 --- a/cognee/infrastructure/loaders/external/web_url_loader.py +++ b/cognee/infrastructure/loaders/external/web_url_loader.py @@ -3,6 +3,7 @@ from typing import List from cognee.modules.ingestion.exceptions.exceptions import IngestionError from cognee.modules.ingestion import save_data_to_file +from cognee.tasks.web_scraper.config import TavilyConfig, SoupCrawlerConfig class WebUrlLoader(LoaderInterface): @@ -77,8 +78,11 @@ class WebUrlLoader(LoaderInterface): from cognee.context_global_variables import tavily_config, soup_crawler_config from cognee.tasks.web_scraper import fetch_page_content - _tavily_config = web_url_loader_config.get("tavily_config") - _soup_config = web_url_loader_config.get("soup_config") + tavily_dict = web_url_loader_config.get("tavily_config") + _tavily_config = TavilyConfig(**tavily_dict) if tavily_dict else None + + soup_dict = web_url_loader_config.get("soup_config") + _soup_config = SoupCrawlerConfig(**soup_dict) if soup_dict else None # Set global configs for downstream access tavily_config.set(_tavily_config) @@ -109,4 +113,6 @@ class WebUrlLoader(LoaderInterface): except IngestionError: raise except Exception as e: - raise IngestionError(message=f"Error ingesting webpage from URL {file_path}: {str(e)}") + raise IngestionError( + message=f"Error ingesting webpage from URL {file_path}: {str(e)}" + ) from e