From c2aa95521c2f98fe1a8ff501652503958b4b8ac6 Mon Sep 17 00:00:00 2001
From: Geoff-Robin <jeffrobin132004@gmail.com>
Date: Sun, 5 Oct 2025 20:00:19 +0530
Subject: [PATCH] removed structured argument

---
 cognee/tasks/web_scraper/bs4_crawler.py | 17 +++++------------
 cognee/tasks/web_scraper/config.py      |  1 -
 cognee/tasks/web_scraper/utils.py       |  1 -
 3 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/cognee/tasks/web_scraper/bs4_crawler.py b/cognee/tasks/web_scraper/bs4_crawler.py
index 514567317..19631a9fa 100644
--- a/cognee/tasks/web_scraper/bs4_crawler.py
+++ b/cognee/tasks/web_scraper/bs4_crawler.py
@@ -262,11 +262,10 @@ class BeautifulSoupCrawler:
         use_playwright: bool = False,
         playwright_js_wait: float = 0.8,
         join_all_matches: bool = False,
-        structured: bool = False,  # return structured output instead of concatenated string
-    ) -> Dict[str, Union[str, Dict[str, str]]]:
+    ) -> Dict[str, str]:
         """
         Fetch one or more URLs and extract text using BeautifulSoup (or lxml xpath).
-        Returns: dict[url] -> concatenated string OR structured dict depending on `structured`.
+        Returns: dict[url] -> concatenated string of extracted content.
         """
         if isinstance(urls, str):
             urls = [urls]
@@ -284,7 +283,7 @@ class BeautifulSoupCrawler:
                 allowed = await self._is_url_allowed(url)
                 if not allowed:
                     logger.warning(f"URL disallowed by robots.txt: {url}")
-                    return url, "" if not structured else {}
+                    return url, ""
 
                 # fetch (rendered or not)
                 if use_playwright:
@@ -294,12 +293,6 @@ class BeautifulSoupCrawler:
                 else:
                     html = await self._fetch_httpx(url)
 
-                if structured:
-                    return url, {
-                        field: self._extract_with_bs4(html, rule)
-                        for field, rule in normalized_rules.items()
-                    }
-
                 pieces = []
                 for field, rule in normalized_rules.items():
                     text = self._extract_with_bs4(html, rule)
@@ -314,8 +307,8 @@ class BeautifulSoupCrawler:
             try:
                 url, text = await coro
             except Exception as e:
-                results[url] = {} if structured else ""
+                results[url] = ""
                 logger.error(f"Error processing {url}: {e}")
                 continue
             results[url] = text
-        return results
+        return results
\ No newline at end of file
diff --git a/cognee/tasks/web_scraper/config.py b/cognee/tasks/web_scraper/config.py
index 505cef1df..4819800ab 100644
--- a/cognee/tasks/web_scraper/config.py
+++ b/cognee/tasks/web_scraper/config.py
@@ -21,4 +21,3 @@ class SoupCrawlerConfig(BaseModel):
     use_playwright: bool = False
     playwright_js_wait: float = 0.8
     join_all_matches: bool = False
-    structured: bool = False
diff --git a/cognee/tasks/web_scraper/utils.py b/cognee/tasks/web_scraper/utils.py
index d6b57d615..c1b6ecb53 100644
--- a/cognee/tasks/web_scraper/utils.py
+++ b/cognee/tasks/web_scraper/utils.py
@@ -54,7 +54,6 @@ async def fetch_page_content(
                 use_playwright=soup_crawler_config.use_playwright,
                 playwright_js_wait=soup_crawler_config.playwright_js_wait,
                 join_all_matches=soup_crawler_config.join_all_matches,
-                structured=soup_crawler_config.structured,
             )
             return results
         except Exception as e: