Addressed code rabbit comment on shortening content

This commit is contained in:
Geoff-Robin 2025-10-08 16:51:03 +05:30
parent 599ef4a141
commit 66022751e9
2 changed files with 3 additions and 3 deletions

View file

@ -16,7 +16,7 @@ class WebPage(DataPoint):
page_size: int
extraction_rules: Dict[str, Any] # CSS selectors, XPath rules used
description: str
metadata: dict = {"index_fields": ["name", "description"]}
metadata: dict = {"index_fields": ["name", "description", "content"]}
class WebSite(DataPoint):

View file

@ -250,13 +250,13 @@ async def web_scraper_task(
# Create WebPage
content_str = content if isinstance(content, str) else str(content)
content_hash = hashlib.sha256(content_str.encode("utf-8")).hexdigest()
content_preview = content_str[:500] + ("..." if len(content_str) > 500 else "")
# Create description for WebPage
webpage_description = (
f"Webpage: {parsed_url.path.lstrip('/') or 'Home'}\n"
f"URL: {page_url}\n"
f"Scraped at: {now.strftime('%Y-%m-%d %H:%M:%S')}\n"
f"Content: {content_str}\n"
f"Content: {content_preview}\n"
f"Content type: text\n"
f"Page size: {len(content_str)} bytes\n"
f"Status code: 200"