Removed print statement logging and used cognee inbuilt logger and updated doctrings.

This commit is contained in:
Geoff-Robin 2025-10-08 14:06:13 +05:30
parent 49858c5416
commit ea33854d49

View file

@ -96,7 +96,7 @@ async def cron_web_scraper_task(
return return
# If no schedule, run immediately # If no schedule, run immediately
print(f"[{datetime.now()}] Running web scraper task immediately...") logger.info(f"[{datetime.now()}] Running web scraper task immediately...")
return await web_scraper_task( return await web_scraper_task(
url=url, url=url,
schedule=schedule, schedule=schedule,
@ -118,10 +118,10 @@ async def web_scraper_task(
tavily_config: TavilyConfig = None, tavily_config: TavilyConfig = None,
job_name: str = None, job_name: str = None,
): ):
"""Scrape URLs and store data points in a Kuzu graph database. """Scrape URLs and store data points in a Graph database.
This function scrapes content from the provided URLs, creates or updates WebPage, This function scrapes content from the provided URLs, creates or updates WebPage,
WebSite, and ScrapingJob data points, and stores them in a Kuzu graph database. WebSite, and ScrapingJob data points, and stores them in a Graph database.
Each data point includes a description field summarizing its attributes. It creates Each data point includes a description field summarizing its attributes. It creates
'is_scraping' (ScrapingJob to WebSite) and 'is_part_of' (WebPage to WebSite) 'is_scraping' (ScrapingJob to WebSite) and 'is_part_of' (WebPage to WebSite)
relationships, preserving existing edges during node updates. relationships, preserving existing edges during node updates.