From e46c0c4f6c4bdd2acf1ff64bce9c147602038fba Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Tue, 25 Nov 2025 17:37:19 +0100 Subject: [PATCH 1/6] CodeRabbit config --- .coderabbit.yaml | 154 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 .coderabbit.yaml diff --git a/.coderabbit.yaml b/.coderabbit.yaml new file mode 100644 index 000000000..7240d9edf --- /dev/null +++ b/.coderabbit.yaml @@ -0,0 +1,154 @@ +# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json +# .coderabbit.yaml +language: en +early_access: false +enable_free_tier: true +reviews: + enabled: true + profile: chill + instructions: >- + # Code Review Instructions + + - Ensure the code follows best practices and coding standards. + - For **Python** code, follow + [PEP 20](https://www.python.org/dev/peps/pep-0020/) and + [CEP-8](https://gist.github.com/reactive-firewall/b7ee98df9e636a51806e62ef9c4ab161) + standards. + + # Documentation Review Instructions + - Verify that documentation and comments are clear and comprehensive. + - Verify that documentation and comments are free of spelling mistakes. + + # Test Code Review Instructions + - Ensure that test code is automated, comprehensive, and follows testing best practices. + - Verify that all critical functionality is covered by tests. + - Ensure that test code follow + [CEP-8](https://gist.github.com/reactive-firewall/d840ee9990e65f302ce2a8d78ebe73f6) + + # Misc. + - Confirm that the code meets the project's requirements and objectives. + - Confirm that copyright years are up-to date whenever a file is changed. + request_changes_workflow: true + high_level_summary: true + high_level_summary_placeholder: '@coderabbitai summary' + auto_title_placeholder: '@coderabbitai' + review_status: true + poem: false + collapse_walkthrough: false + sequence_diagrams: false + changed_files_summary: true + path_filters: ['!*.xc*/**', '!node_modules/**', '!dist/**', '!build/**', '!.git/**', '!venv/**', '!__pycache__/**'] + path_instructions: + - path: README.md + instructions: >- + 1. Consider the file 'README.md' the overview/introduction of the project. + Also consider the 'README.md' file the first place to look for project documentation. + + 2. When reviewing the file 'README.md' it should be linted with help + from the tools `markdownlint` and `languagetool`, pointing out any issues. + + 3. You may assume the file 'README.md' will contain GitHub flavor Markdown. + - path: '**/*.py' + instructions: >- + When reviewing Python code for this project: + + 1. Prioritize portability over clarity, especially when dealing with cross-Python compatibility. However, with the priority in mind, do still consider improvements to clarity when relevant. + + 2. As a general guideline, consider the code style advocated in the PEP 8 standard (excluding the use of spaces for indentation) and evaluate suggested changes for code style compliance. + + 3. As a style convention, consider the code style advocated in [CEP-8](https://gist.github.com/reactive-firewall/b7ee98df9e636a51806e62ef9c4ab161) and evaluate suggested changes for code style compliance. + + 4. As a general guideline, try to provide any relevant, official, and supporting documentation links to any tool's suggestions in review comments. This guideline is important for posterity. + + 5. As a general rule, undocumented function definitions and class definitions in the project's Python code are assumed incomplete. Please consider suggesting a short summary of the code for any of these incomplete definitions as docstrings when reviewing. + - path: cognee/tests/* + instructions: >- + When reviewing test code: + + 1. Prioritize portability over clarity, especially when dealing with cross-Python compatibility. However, with the priority in mind, do still consider improvements to clarity when relevant. + + 2. As a general guideline, consider the code style advocated in the PEP 8 standard (excluding the use of spaces for indentation) and evaluate suggested changes for code style compliance. + + 3. As a style convention, consider the code style advocated in [CEP-8](https://gist.github.com/reactive-firewall/b7ee98df9e636a51806e62ef9c4ab161) and evaluate suggested changes for code style compliance, pointing out any violations discovered. + + 4. As a general guideline, try to provide any relevant, official, and supporting documentation links to any tool's suggestions in review comments. This guideline is important for posterity. + + 5. As a project rule, Python source files with names prefixed by the string "test_" and located in the project's "tests" directory are the project's unit-testing code. It is safe, albeit a heuristic, to assume these are considered part of the project's minimal acceptance testing unless a justifying exception to this assumption is documented. + + 6. As a project rule, any files without extensions and with names prefixed by either the string "check_" or the string "test_", and located in the project's "tests" directory, are the project's non-unit test code. "Non-unit test" in this context refers to any type of testing other than unit testing, such as (but not limited to) functional testing, style linting, regression testing, etc. It can also be assumed that non-unit testing code is usually written as Bash shell scripts. + - path: requirements.txt + instructions: >- + * The project's own Python dependencies are recorded in 'requirements.txt' for production code. + + * The project's testing-specific Python dependencies are recorded in 'tests/requirements.txt' and are used for testing the project. + + * The project's documentation-specific Python dependencies are recorded in 'docs/requirements.txt' and are used only for generating Python-focused documentation for the project. 'docs/requirements.txt' may be absent if not applicable. + + Consider these 'requirements.txt' files the records of truth regarding project dependencies. + - path: .github/** + instructions: >- + * When the project is hosted on GitHub: All GitHub-specific configurations, templates, and tools should be found in the '.github' directory tree. + + * 'actionlint' erroneously generates false positives when dealing with GitHub's `${{ ... }}` syntax in conditionals. + + * 'actionlint' erroneously generates incorrect solutions when suggesting the removal of valid `${{ ... }}` syntax. + abort_on_close: true + auto_review: + enabled: true + auto_incremental_review: true + ignore_title_keywords: [] + labels: [] + drafts: false + base_branches: + - dev + - main + tools: + languagetool: + enabled: true + language: en-US + configuration: + level: picky + mother_tongue: en + dictionary: + - 'reactive-firewall' + - 'CEP-9' + - 'CEP-8' + - 'CEP-7' + - 'CEP-5' + - 'Shellscript' + - 'bash' + disabled_rules: + - EN_QUOTES + - CONSECUTIVE_SPACES + enabled_rules: + - STYLE + - EN_CONTRACTION_SPELLING + - EN_WORD_COHERENCY + - IT_IS_OBVIOUS + - TWELFTH_OF_NEVER + - OXFORD_SPELLING + - PASSIVE_VOICE + shellcheck: + enabled: true + ruff: + enabled: true + configuration: + extend_select: + - E # Pycodestyle errors (style issues) + - F # PyFlakes codes (logical errors) + - W # Pycodestyle warnings + - N # PEP 8 naming conventions + ignore: + - W191 + - W391 + - E117 + - D208 + line_length: 100 + dummy_variable_rgx: '^(_.*|junk|extra)$' # Variables starting with '_' or named 'junk' or 'extras', are considered dummy variables + markdownlint: + enabled: true + yamllint: + enabled: true + configuration_file: ".yamllint.conf" +chat: + auto_reply: true \ No newline at end of file From ff20f021cc998966bdec1bc2b2c7a3cc4bd0b85a Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Tue, 25 Nov 2025 17:49:56 +0100 Subject: [PATCH 2/6] fix comments --- .coderabbit.yaml | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/.coderabbit.yaml b/.coderabbit.yaml index 7240d9edf..ffd7f1563 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -103,31 +103,6 @@ reviews: - dev - main tools: - languagetool: - enabled: true - language: en-US - configuration: - level: picky - mother_tongue: en - dictionary: - - 'reactive-firewall' - - 'CEP-9' - - 'CEP-8' - - 'CEP-7' - - 'CEP-5' - - 'Shellscript' - - 'bash' - disabled_rules: - - EN_QUOTES - - CONSECUTIVE_SPACES - enabled_rules: - - STYLE - - EN_CONTRACTION_SPELLING - - EN_WORD_COHERENCY - - IT_IS_OBVIOUS - - TWELFTH_OF_NEVER - - OXFORD_SPELLING - - PASSIVE_VOICE shellcheck: enabled: true ruff: @@ -148,7 +123,6 @@ reviews: markdownlint: enabled: true yamllint: - enabled: true - configuration_file: ".yamllint.conf" + enabled: true chat: auto_reply: true \ No newline at end of file From 0f8cec64d51580c29665ebe94089e32fc9639f3c Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Tue, 25 Nov 2025 17:51:32 +0100 Subject: [PATCH 3/6] fix comments --- .coderabbit.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.coderabbit.yaml b/.coderabbit.yaml index ffd7f1563..6373cd2f3 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -123,6 +123,6 @@ reviews: markdownlint: enabled: true yamllint: - enabled: true + enabled: true chat: auto_reply: true \ No newline at end of file From 39c6eba571474c8cb9e64a4d12de5cef20400080 Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Tue, 25 Nov 2025 18:09:43 +0100 Subject: [PATCH 4/6] coderabbit fix --- .coderabbit.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.coderabbit.yaml b/.coderabbit.yaml index 6373cd2f3..0cad35c84 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -3,8 +3,7 @@ language: en early_access: false enable_free_tier: true -reviews: - enabled: true +reviews: profile: chill instructions: >- # Code Review Instructions @@ -28,7 +27,7 @@ reviews: # Misc. - Confirm that the code meets the project's requirements and objectives. - Confirm that copyright years are up-to date whenever a file is changed. - request_changes_workflow: true + request_changes_workflow: false high_level_summary: true high_level_summary_placeholder: '@coderabbitai summary' auto_title_placeholder: '@coderabbitai' From 7c9a78abeac8f1ea0c2f8ff2d0726f31fe420018 Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Mon, 1 Dec 2025 14:39:45 +0100 Subject: [PATCH 5/6] CI: Smaller embedding model for Ollama test --- .github/workflows/test_ollama.yml | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/.github/workflows/test_ollama.yml b/.github/workflows/test_ollama.yml index 686545c70..cd1bf0aaf 100644 --- a/.github/workflows/test_ollama.yml +++ b/.github/workflows/test_ollama.yml @@ -7,13 +7,8 @@ jobs: run_ollama_test: - # needs 16 Gb RAM for phi4 + runs-on: buildjet-4vcpu-ubuntu-2204 -# services: -# ollama: -# image: ollama/ollama -# ports: -# - 11434:11434 steps: - name: Checkout repository @@ -28,14 +23,6 @@ jobs: run: | uv add torch -# - name: Install ollama -# run: curl -fsSL https://ollama.com/install.sh | sh -# - name: Run ollama -# run: | -# ollama serve --openai & -# ollama pull llama3.2 & -# ollama pull avr/sfr-embedding-mistral:latest - - name: Start Ollama container run: | docker run -d --name ollama -p 11434:11434 ollama/ollama @@ -60,15 +47,15 @@ jobs: - name: Pull required Ollama models run: | - curl -X POST http://localhost:11434/api/pull -d '{"name": "phi4"}' - curl -X POST http://localhost:11434/api/pull -d '{"name": "avr/sfr-embedding-mistral:latest"}' + curl -X POST http://localhost:11434/api/pull -d '{"name": "phi3:mini"}' + curl -X POST http://localhost:11434/api/pull -d '{"name": "nomic-embed-text"}' - name: Call ollama API run: | curl -X POST http://localhost:11434/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "phi4", + "model": "phi3:mini", "stream": false, "messages": [ { "role": "system", "content": "You are a helpful assistant." }, @@ -78,7 +65,7 @@ jobs: curl -X POST http://127.0.0.1:11434/api/embed \ -H "Content-Type: application/json" \ -d '{ - "model": "avr/sfr-embedding-mistral:latest", + "model": "nomic-embed-text", "input": "This is a test sentence to generate an embedding." }' @@ -95,10 +82,10 @@ jobs: LLM_PROVIDER: "ollama" LLM_API_KEY: "ollama" LLM_ENDPOINT: "http://localhost:11434/v1/" - LLM_MODEL: "phi4" + LLM_MODEL: "phi3:mini" EMBEDDING_PROVIDER: "ollama" - EMBEDDING_MODEL: "avr/sfr-embedding-mistral:latest" + EMBEDDING_MODEL: "nomic-embed-text" EMBEDDING_ENDPOINT: "http://localhost:11434/api/embed" - EMBEDDING_DIMENSIONS: "4096" - HUGGINGFACE_TOKENIZER: "Salesforce/SFR-Embedding-Mistral" + EMBEDDING_DIMENSIONS: "768" + HUGGINGFACE_TOKENIZER: "nomic-ai/nomic-embed-text-v1" run: uv run python ./examples/python/simple_example.py From ba9ca465749ca3c32b8a03d24780a542e2abe65e Mon Sep 17 00:00:00 2001 From: Pavel Zorin Date: Mon, 1 Dec 2025 15:21:58 +0100 Subject: [PATCH 6/6] Increase the machine size --- .github/workflows/test_ollama.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test_ollama.yml b/.github/workflows/test_ollama.yml index cd1bf0aaf..7696de474 100644 --- a/.github/workflows/test_ollama.yml +++ b/.github/workflows/test_ollama.yml @@ -7,8 +7,8 @@ jobs: run_ollama_test: - - runs-on: buildjet-4vcpu-ubuntu-2204 + # needs 32 Gb RAM for phi4 in a container + runs-on: buildjet-8vcpu-ubuntu-2204 steps: - name: Checkout repository @@ -47,15 +47,15 @@ jobs: - name: Pull required Ollama models run: | - curl -X POST http://localhost:11434/api/pull -d '{"name": "phi3:mini"}' - curl -X POST http://localhost:11434/api/pull -d '{"name": "nomic-embed-text"}' + curl -X POST http://localhost:11434/api/pull -d '{"name": "phi4"}' + curl -X POST http://localhost:11434/api/pull -d '{"name": "avr/sfr-embedding-mistral:latest"}' - name: Call ollama API run: | curl -X POST http://localhost:11434/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "phi3:mini", + "model": "phi4", "stream": false, "messages": [ { "role": "system", "content": "You are a helpful assistant." }, @@ -65,7 +65,7 @@ jobs: curl -X POST http://127.0.0.1:11434/api/embed \ -H "Content-Type: application/json" \ -d '{ - "model": "nomic-embed-text", + "model": "avr/sfr-embedding-mistral:latest", "input": "This is a test sentence to generate an embedding." }' @@ -82,10 +82,10 @@ jobs: LLM_PROVIDER: "ollama" LLM_API_KEY: "ollama" LLM_ENDPOINT: "http://localhost:11434/v1/" - LLM_MODEL: "phi3:mini" + LLM_MODEL: "phi4" EMBEDDING_PROVIDER: "ollama" - EMBEDDING_MODEL: "nomic-embed-text" + EMBEDDING_MODEL: "avr/sfr-embedding-mistral:latest" EMBEDDING_ENDPOINT: "http://localhost:11434/api/embed" - EMBEDDING_DIMENSIONS: "768" - HUGGINGFACE_TOKENIZER: "nomic-ai/nomic-embed-text-v1" + EMBEDDING_DIMENSIONS: "4096" + HUGGINGFACE_TOKENIZER: "Salesforce/SFR-Embedding-Mistral" run: uv run python ./examples/python/simple_example.py