fix: Make exluded paths use absolute path
This commit is contained in:
parent
fd4deee27c
commit
4159846bb3
3 changed files with 29 additions and 8 deletions
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
import pathlib
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
from cognee.shared.logging_utils import get_logger, setup_logging
|
||||
from cognee.modules.observability.get_observe import get_observe
|
||||
|
||||
|
|
@ -28,7 +29,12 @@ logger = get_logger("code_graph_pipeline")
|
|||
|
||||
|
||||
@observe
|
||||
async def run_code_graph_pipeline(repo_path, include_docs=False, excluded_paths=None):
|
||||
async def run_code_graph_pipeline(
|
||||
repo_path,
|
||||
include_docs=False,
|
||||
excluded_paths: Optional[list[str]] = None,
|
||||
supported_languages: Optional[list[str]] = None,
|
||||
):
|
||||
import cognee
|
||||
from cognee.low_level import setup
|
||||
|
||||
|
|
@ -40,8 +46,6 @@ async def run_code_graph_pipeline(repo_path, include_docs=False, excluded_paths=
|
|||
user = await get_default_user()
|
||||
detailed_extraction = True
|
||||
|
||||
# Multi-language support: allow passing supported_languages
|
||||
supported_languages = None # defer to task defaults
|
||||
tasks = [
|
||||
Task(
|
||||
get_repo_file_dependencies,
|
||||
|
|
@ -95,7 +99,7 @@ async def run_code_graph_pipeline(repo_path, include_docs=False, excluded_paths=
|
|||
if __name__ == "__main__":
|
||||
|
||||
async def main():
|
||||
async for run_status in run_code_graph_pipeline("/Users/igorilic/Desktop/cognee/examples"):
|
||||
async for run_status in run_code_graph_pipeline("REPO_PATH"):
|
||||
print(f"{run_status.pipeline_run_id}: {run_status.status}")
|
||||
|
||||
file_path = os.path.join(
|
||||
|
|
|
|||
|
|
@ -94,7 +94,15 @@ class CodeRetriever(BaseRetriever):
|
|||
{"id": res.id, "score": res.score, "payload": res.payload}
|
||||
)
|
||||
|
||||
existing_collection = []
|
||||
for collection in self.classes_and_functions_collections:
|
||||
if await vector_engine.has_collection(collection):
|
||||
existing_collection.append(collection)
|
||||
|
||||
if not existing_collection:
|
||||
raise RuntimeError("No collection found for code retriever")
|
||||
|
||||
for collection in existing_collection:
|
||||
logger.debug(f"Searching {collection} collection with general query")
|
||||
search_results_code = await vector_engine.search(
|
||||
collection, query, limit=self.top_k
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import asyncio
|
||||
import math
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Set
|
||||
from typing import AsyncGenerator, Optional, List
|
||||
from uuid import NAMESPACE_OID, uuid5
|
||||
|
|
@ -78,15 +79,22 @@ async def get_source_code_files(
|
|||
if lang is None:
|
||||
continue
|
||||
# Exclude tests, common build/venv directories and files provided in exclude_paths
|
||||
excluded_dirs = EXCLUDED_DIRS | set(excluded_paths or [])
|
||||
root_parts = set(os.path.normpath(root).split(os.sep))
|
||||
excluded_dirs = EXCLUDED_DIRS
|
||||
excluded_paths = {Path(p).resolve() for p in (excluded_paths or [])} # full paths
|
||||
|
||||
root_path = Path(root).resolve()
|
||||
root_parts = set(root_path.parts) # same as before
|
||||
base_name, _ext = os.path.splitext(file)
|
||||
if (
|
||||
base_name.startswith("test_")
|
||||
or base_name.endswith("_test") # catches Go's *_test.go and similar
|
||||
or base_name.endswith("_test")
|
||||
or ".test." in file
|
||||
or ".spec." in file
|
||||
or (excluded_dirs & root_parts)
|
||||
or (excluded_dirs & root_parts) # name match
|
||||
or any(
|
||||
root_path.is_relative_to(p) # full-path match
|
||||
for p in excluded_paths
|
||||
)
|
||||
):
|
||||
continue
|
||||
file_path = os.path.abspath(os.path.join(root, file))
|
||||
|
|
@ -164,6 +172,7 @@ async def get_repo_file_dependencies(
|
|||
"go": [".go"],
|
||||
"rust": [".rs"],
|
||||
"cpp": [".cpp", ".c", ".h", ".hpp"],
|
||||
"c": [".c", ".h"],
|
||||
}
|
||||
if supported_languages is not None:
|
||||
language_config = {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue