Feature/cog 3407 fixing integration test in ci (#1810)
<!-- .github/pull_request_template.md --> ## Description This PR should fix the web crawler integration test issue in our CI ## Type of Change <!-- Please check the relevant option --> - [x] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) <!-- Add screenshots or videos to help explain your changes --> ## Pre-submission Checklist <!-- Please check all boxes that apply before submitting your PR --> - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
parent
de6b842d02
commit
fe55071849
3 changed files with 15 additions and 29 deletions
|
|
@ -5,7 +5,7 @@ from cognee.tasks.web_scraper import DefaultUrlCrawler
|
|||
@pytest.mark.asyncio
|
||||
async def test_fetch():
|
||||
crawler = DefaultUrlCrawler()
|
||||
url = "https://en.wikipedia.org/wiki/Large_language_model"
|
||||
url = "https://httpbin.org/html"
|
||||
results = await crawler.fetch_urls(url)
|
||||
assert len(results) == 1
|
||||
assert isinstance(results, dict)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ skip_in_ci = pytest.mark.skipif(
|
|||
@skip_in_ci
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch():
|
||||
url = "https://en.wikipedia.org/wiki/Large_language_model"
|
||||
url = "https://httpbin.org/html"
|
||||
results = await fetch_with_tavily(url)
|
||||
assert isinstance(results, dict)
|
||||
assert len(results) == 1
|
||||
|
|
|
|||
|
|
@ -14,9 +14,7 @@ async def test_url_saves_as_html_file():
|
|||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
try:
|
||||
original_file_path = await save_data_item_to_storage(
|
||||
"https://en.wikipedia.org/wiki/Large_language_model"
|
||||
)
|
||||
original_file_path = await save_data_item_to_storage("https://httpbin.org/html")
|
||||
file_path = get_data_file_path(original_file_path)
|
||||
assert file_path.endswith(".html")
|
||||
file = Path(file_path)
|
||||
|
|
@ -44,9 +42,7 @@ async def test_saved_html_is_valid():
|
|||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
try:
|
||||
original_file_path = await save_data_item_to_storage(
|
||||
"https://en.wikipedia.org/wiki/Large_language_model"
|
||||
)
|
||||
original_file_path = await save_data_item_to_storage("https://httpbin.org/html")
|
||||
file_path = get_data_file_path(original_file_path)
|
||||
content = Path(file_path).read_text()
|
||||
|
||||
|
|
@ -72,7 +68,7 @@ async def test_add_url():
|
|||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
await cognee.add("https://en.wikipedia.org/wiki/Large_language_model")
|
||||
await cognee.add("https://httpbin.org/html")
|
||||
|
||||
|
||||
skip_in_ci = pytest.mark.skipif(
|
||||
|
|
@ -88,7 +84,7 @@ async def test_add_url_with_tavily():
|
|||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
await cognee.add("https://en.wikipedia.org/wiki/Large_language_model")
|
||||
await cognee.add("https://httpbin.org/html")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
@ -98,7 +94,7 @@ async def test_add_url_without_incremental_loading():
|
|||
|
||||
try:
|
||||
await cognee.add(
|
||||
"https://en.wikipedia.org/wiki/Large_language_model",
|
||||
"https://httpbin.org/html",
|
||||
incremental_loading=False,
|
||||
)
|
||||
except Exception as e:
|
||||
|
|
@ -112,7 +108,7 @@ async def test_add_url_with_incremental_loading():
|
|||
|
||||
try:
|
||||
await cognee.add(
|
||||
"https://en.wikipedia.org/wiki/Large_language_model",
|
||||
"https://httpbin.org/html",
|
||||
incremental_loading=True,
|
||||
)
|
||||
except Exception as e:
|
||||
|
|
@ -125,7 +121,7 @@ async def test_add_url_can_define_preferred_loader_as_list_of_str():
|
|||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
await cognee.add(
|
||||
"https://en.wikipedia.org/wiki/Large_language_model",
|
||||
"https://httpbin.org/html",
|
||||
preferred_loaders=["beautiful_soup_loader"],
|
||||
)
|
||||
|
||||
|
|
@ -144,7 +140,7 @@ async def test_add_url_with_extraction_rules():
|
|||
|
||||
try:
|
||||
await cognee.add(
|
||||
"https://en.wikipedia.org/wiki/Large_language_model",
|
||||
"https://httpbin.org/html",
|
||||
preferred_loaders={"beautiful_soup_loader": {"extraction_rules": extraction_rules}},
|
||||
)
|
||||
except Exception as e:
|
||||
|
|
@ -163,9 +159,7 @@ async def test_loader_is_none_by_default():
|
|||
}
|
||||
|
||||
try:
|
||||
original_file_path = await save_data_item_to_storage(
|
||||
"https://en.wikipedia.org/wiki/Large_language_model"
|
||||
)
|
||||
original_file_path = await save_data_item_to_storage("https://httpbin.org/html")
|
||||
file_path = get_data_file_path(original_file_path)
|
||||
assert file_path.endswith(".html")
|
||||
file = Path(file_path)
|
||||
|
|
@ -196,9 +190,7 @@ async def test_beautiful_soup_loader_is_selected_loader_if_preferred_loader_prov
|
|||
}
|
||||
|
||||
try:
|
||||
original_file_path = await save_data_item_to_storage(
|
||||
"https://en.wikipedia.org/wiki/Large_language_model"
|
||||
)
|
||||
original_file_path = await save_data_item_to_storage("https://httpbin.org/html")
|
||||
file_path = get_data_file_path(original_file_path)
|
||||
assert file_path.endswith(".html")
|
||||
file = Path(file_path)
|
||||
|
|
@ -225,9 +217,7 @@ async def test_beautiful_soup_loader_works_with_and_without_arguments():
|
|||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
try:
|
||||
original_file_path = await save_data_item_to_storage(
|
||||
"https://en.wikipedia.org/wiki/Large_language_model"
|
||||
)
|
||||
original_file_path = await save_data_item_to_storage("https://httpbin.org/html")
|
||||
file_path = get_data_file_path(original_file_path)
|
||||
assert file_path.endswith(".html")
|
||||
file = Path(file_path)
|
||||
|
|
@ -263,9 +253,7 @@ async def test_beautiful_soup_loader_successfully_loads_file_if_required_args_pr
|
|||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
try:
|
||||
original_file_path = await save_data_item_to_storage(
|
||||
"https://en.wikipedia.org/wiki/Large_language_model"
|
||||
)
|
||||
original_file_path = await save_data_item_to_storage("https://httpbin.org/html")
|
||||
file_path = get_data_file_path(original_file_path)
|
||||
assert file_path.endswith(".html")
|
||||
file = Path(file_path)
|
||||
|
|
@ -302,9 +290,7 @@ async def test_beautiful_soup_loads_file_successfully():
|
|||
}
|
||||
|
||||
try:
|
||||
original_file_path = await save_data_item_to_storage(
|
||||
"https://en.wikipedia.org/wiki/Large_language_model"
|
||||
)
|
||||
original_file_path = await save_data_item_to_storage("https://httpbin.org/html")
|
||||
file_path = get_data_file_path(original_file_path)
|
||||
assert file_path.endswith(".html")
|
||||
original_file = Path(file_path)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue