From a3fbbdf8eb03ac419c3bdfdce2d7865f9f81a66c Mon Sep 17 00:00:00 2001 From: Geoff-Robin Date: Wed, 8 Oct 2025 14:58:02 +0530 Subject: [PATCH] Solved nitpick comments --- cognee/tasks/web_scraper/web_scraper_task.py | 30 +++++++++----------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/cognee/tasks/web_scraper/web_scraper_task.py b/cognee/tasks/web_scraper/web_scraper_task.py index 570cf80c3..a77e732cb 100644 --- a/cognee/tasks/web_scraper/web_scraper_task.py +++ b/cognee/tasks/web_scraper/web_scraper_task.py @@ -11,8 +11,6 @@ from datetime import datetime from typing import Union, List from urllib.parse import urlparse from uuid import uuid5, NAMESPACE_OID -import asyncio -from apscheduler.triggers.cron import CronTrigger from cognee.infrastructure.databases.graph import get_graph_engine from cognee.shared.logging_utils import get_logger @@ -25,9 +23,10 @@ from .config import SoupCrawlerConfig, TavilyConfig from .utils import fetch_page_content try: - from apscheduler.schedulers.background import BackgroundScheduler + from apscheduler.triggers.cron import CronTrigger + from apscheduler.schedulers.asyncio import AsyncIOScheduler - scheduler = BackgroundScheduler() + scheduler = AsyncIOScheduler() except ImportError: raise ImportError("Please install apscheduler by pip install APScheduler>=3.10") @@ -75,17 +74,16 @@ async def cron_web_scraper_task( raise ValueError(f"Invalid cron string '{schedule}': {e}") scheduler.add_job( - lambda: asyncio.run( - web_scraper_task( - url=url, - schedule=schedule, - extraction_rules=extraction_rules, - tavily_api_key=tavily_api_key, - soup_crawler_config=soup_crawler_config, - tavily_config=tavily_config, - job_name=job_name, - ) - ), + web_scraper_task, + kwargs={ + "url": url, + "schedule": schedule, + "extraction_rules": extraction_rules, + "tavily_api_key": tavily_api_key, + "soup_crawler_config": soup_crawler_config, + "tavily_config": tavily_config, + "job_name": job_name, + }, trigger=trigger, id=job_name, name=job_name or f"WebScraper_{uuid5(NAMESPACE_OID, name=job_name)}", @@ -249,7 +247,7 @@ async def web_scraper_task( f"Webpage: {parsed_url.path.lstrip('/') or 'Home'}\n" f"URL: {page_url}\n" f"Scraped at: {now.strftime('%Y-%m-%d %H:%M:%S')}\n" - f"Content: {content_str}" + f"Content: {content_str}\n" f"Content type: text\n" f"Page size: {len(content_str)} bytes\n" f"Status code: 200"