From 66022751e959b293472d0793e011bada0edf521c Mon Sep 17 00:00:00 2001 From: Geoff-Robin Date: Wed, 8 Oct 2025 16:51:03 +0530 Subject: [PATCH] Addressed code rabbit comment on shortening content --- cognee/tasks/web_scraper/models.py | 2 +- cognee/tasks/web_scraper/web_scraper_task.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cognee/tasks/web_scraper/models.py b/cognee/tasks/web_scraper/models.py index 297aebb4f..aed2db0ed 100644 --- a/cognee/tasks/web_scraper/models.py +++ b/cognee/tasks/web_scraper/models.py @@ -16,7 +16,7 @@ class WebPage(DataPoint): page_size: int extraction_rules: Dict[str, Any] # CSS selectors, XPath rules used description: str - metadata: dict = {"index_fields": ["name", "description"]} + metadata: dict = {"index_fields": ["name", "description", "content"]} class WebSite(DataPoint): diff --git a/cognee/tasks/web_scraper/web_scraper_task.py b/cognee/tasks/web_scraper/web_scraper_task.py index 52b73b5b5..52154c6ef 100644 --- a/cognee/tasks/web_scraper/web_scraper_task.py +++ b/cognee/tasks/web_scraper/web_scraper_task.py @@ -250,13 +250,13 @@ async def web_scraper_task( # Create WebPage content_str = content if isinstance(content, str) else str(content) content_hash = hashlib.sha256(content_str.encode("utf-8")).hexdigest() - + content_preview = content_str[:500] + ("..." if len(content_str) > 500 else "") # Create description for WebPage webpage_description = ( f"Webpage: {parsed_url.path.lstrip('/') or 'Home'}\n" f"URL: {page_url}\n" f"Scraped at: {now.strftime('%Y-%m-%d %H:%M:%S')}\n" - f"Content: {content_str}\n" + f"Content: {content_preview}\n" f"Content type: text\n" f"Page size: {len(content_str)} bytes\n" f"Status code: 200"