Merge branch 'main' of github.com:topoteretes/cognee into COG-334-structure-routing
This commit is contained in:
commit
ddf495266b
30 changed files with 1661 additions and 1329 deletions
81
.github/workflows/auto-comment.yml
vendored
Normal file
81
.github/workflows/auto-comment.yml
vendored
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
name: Issue and PR Auto Comments
|
||||
on:
|
||||
issues:
|
||||
types:
|
||||
- opened
|
||||
- closed
|
||||
- assigned
|
||||
pull_request_target:
|
||||
types:
|
||||
- opened
|
||||
- closed
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
auto-comment:
|
||||
permissions:
|
||||
issues: write
|
||||
pull-requests: write
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
# configuration for auto-comment actions
|
||||
- name: Configure Auto Comments
|
||||
uses: wow-actions/auto-comment@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
|
||||
issuesOpened: |
|
||||
👋 @{{ author }}
|
||||
|
||||
Thank you for raising an issue. We will investigate the matter and get back to you as soon as possible.
|
||||
|
||||
To help us address your issue efficiently, please ensure you have provided:
|
||||
- A clear description of the problem
|
||||
- Steps to reproduce (if applicable)
|
||||
- Expected vs actual behavior
|
||||
- Any relevant screenshots or error messages
|
||||
|
||||
Our team typically responds within 2-3 business days.
|
||||
|
||||
issuesClosed: |
|
||||
✅ @{{ author }}
|
||||
|
||||
This issue has been closed. If you have any further questions or if the issue resurfaces,
|
||||
please feel free to:
|
||||
- Add a comment to this thread
|
||||
- Open a new issue with reference to this one
|
||||
|
||||
Thank you for helping us improve!
|
||||
|
||||
pullRequestOpened: |
|
||||
👍 @{{ author }}
|
||||
|
||||
Thank you for your pull request and contributing to our community!
|
||||
|
||||
Please ensure you have:
|
||||
- [ ] Followed our contributing guidelines
|
||||
- [ ] Added/updated tests (if applicable)
|
||||
- [ ] Updated documentation (if applicable)
|
||||
- [ ] Added a descriptive PR title
|
||||
|
||||
Our team will review your contribution as soon as possible. Feel free to reach out if you need any assistance.
|
||||
|
||||
# Separate action for merged PRs
|
||||
- name: Handle Merged Pull Requests
|
||||
if: github.event.pull_request.merged == true
|
||||
uses: actions-cool/pr-welcome@v1.2.1
|
||||
with:
|
||||
token: ${{ secrets.GH_TOKEN }}
|
||||
comment: |
|
||||
🎉 Fantastic work @${{ github.event.pull_request.user.login }}! 🎉
|
||||
|
||||
Your pull request has been merged successfully. Thank you for your valuable contribution!
|
||||
|
||||
We appreciate the time and effort you've put into improving our project.
|
||||
Your changes will be included in our next release.
|
||||
|
||||
Keep up the great work! 💪
|
||||
emoji: 'rocket'
|
||||
pr-emoji: '+1, heart, rocket'
|
||||
|
||||
6
.github/workflows/docker_compose.yml
vendored
6
.github/workflows/docker_compose.yml
vendored
|
|
@ -20,10 +20,16 @@ jobs:
|
|||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build Docker images
|
||||
env:
|
||||
ENVIRONMENT: dev
|
||||
ENV: dev
|
||||
run: |
|
||||
docker compose -f docker-compose.yml build
|
||||
|
||||
- name: Run Docker Compose
|
||||
env:
|
||||
ENVIRONMENT: dev
|
||||
ENV: dev
|
||||
run: |
|
||||
docker compose -f docker-compose.yml up -d
|
||||
|
||||
|
|
|
|||
61
.github/workflows/test_notebook.yml
vendored
Normal file
61
.github/workflows/test_notebook.yml
vendored
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
name: test | notebook
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
RUNTIME__LOG_LEVEL: ERROR
|
||||
|
||||
jobs:
|
||||
get_docs_changes:
|
||||
name: docs changes
|
||||
uses: ./.github/workflows/get_docs_changes.yml
|
||||
|
||||
run_notebook_test:
|
||||
name: test
|
||||
needs: get_docs_changes
|
||||
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@master
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Install Poetry
|
||||
uses: snok/install-poetry@v1.3.2
|
||||
with:
|
||||
virtualenvs-create: true
|
||||
virtualenvs-in-project: true
|
||||
installer-parallel: true
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install --no-interaction
|
||||
poetry add jupyter --no-interaction
|
||||
|
||||
- name: Execute Jupyter Notebook
|
||||
env:
|
||||
ENV: 'dev'
|
||||
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
|
||||
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
|
||||
run: |
|
||||
poetry run jupyter nbconvert \
|
||||
--to notebook \
|
||||
--execute notebooks/cognee_demo.ipynb \
|
||||
--output executed_notebook.ipynb \
|
||||
--ExecutePreprocessor.timeout=1200
|
||||
|
|
@ -25,7 +25,7 @@ RUN pip install poetry
|
|||
RUN poetry config virtualenvs.create false
|
||||
|
||||
# Install the dependencies
|
||||
RUN poetry install --no-root --no-dev
|
||||
RUN poetry install --all-extras --no-root --no-dev
|
||||
|
||||
|
||||
# Set the PYTHONPATH environment variable to include the /app directory
|
||||
|
|
|
|||
59
README.md
59
README.md
|
|
@ -29,6 +29,10 @@ If you have questions, join our <a href="https://discord.gg/NQPKmU5CCg">Discord
|
|||
pip install cognee
|
||||
```
|
||||
|
||||
### With pip with PostgreSQL support
|
||||
```bash
|
||||
pip install cognee[postgres]
|
||||
```
|
||||
|
||||
### With poetry
|
||||
|
||||
|
|
@ -36,6 +40,11 @@ pip install cognee
|
|||
poetry add cognee
|
||||
```
|
||||
|
||||
### With poetry with PostgreSQL support
|
||||
|
||||
```bash
|
||||
poetry add cognee -E postgres
|
||||
```
|
||||
|
||||
## 💻 Basic Usage
|
||||
|
||||
|
|
@ -50,7 +59,7 @@ os.environ["LLM_API_KEY"] = "YOUR OPENAI_API_KEY"
|
|||
or
|
||||
```
|
||||
import cognee
|
||||
cognee.config.llm_api_key = "YOUR_OPENAI_API_KEY"
|
||||
cognee.config.set_llm_api_key("YOUR_OPENAI_API_KEY")
|
||||
```
|
||||
You can also set the variables by creating .env file, here is our <a href="https://github.com/topoteretes/cognee/blob/main/.env.template">template.</a>
|
||||
To use different LLM providers, for more info check out our <a href="https://topoteretes.github.io/cognee">documentation</a>
|
||||
|
|
@ -73,26 +82,54 @@ docker-compose up
|
|||
```
|
||||
Then navigate to localhost:3000
|
||||
|
||||
If you want to use the UI with PostgreSQL through docker-compose make sure to set the following values in the .env file:
|
||||
```
|
||||
DB_PROVIDER=postgres
|
||||
|
||||
DB_HOST=postgres
|
||||
DB_PORT=5432
|
||||
|
||||
DB_NAME=cognee_db
|
||||
DB_USERNAME=cognee
|
||||
DB_PASSWORD=cognee
|
||||
```
|
||||
|
||||
### Simple example
|
||||
|
||||
Run the default cognee pipeline:
|
||||
First, copy `.env.template` to `.env` and add your OpenAI API key to the LLM_API_KEY field.
|
||||
|
||||
```
|
||||
Optionally, set `VECTOR_DB_PROVIDER="lancedb"` in `.env` to simplify setup.
|
||||
|
||||
This script will run the default pipeline:
|
||||
|
||||
```python
|
||||
import cognee
|
||||
import asyncio
|
||||
from cognee.api.v1.search import SearchType
|
||||
|
||||
text = """Natural language processing (NLP) is an interdisciplinary
|
||||
subfield of computer science and information retrieval"""
|
||||
async def main():
|
||||
await cognee.prune.prune_data() # Reset cognee data
|
||||
await cognee.prune.prune_system(metadata=True) # Reset cognee system state
|
||||
|
||||
await cognee.add(text) # Add a new piece of information
|
||||
text = """
|
||||
Natural language processing (NLP) is an interdisciplinary
|
||||
subfield of computer science and information retrieval.
|
||||
"""
|
||||
|
||||
await cognee.cognify() # Use LLMs and cognee to create a knowledge graph
|
||||
await cognee.add(text) # Add text to cognee
|
||||
await cognee.cognify() # Use LLMs and cognee to create knowledge graph
|
||||
|
||||
search_results = await cognee.search("INSIGHTS", {'query': 'NLP'}) # Query cognee for the insights
|
||||
search_results = await cognee.search( # Search cognee for insights
|
||||
SearchType.INSIGHTS,
|
||||
{'query': 'Tell me about NLP'}
|
||||
)
|
||||
|
||||
for result in search_results:
|
||||
do_something_with_result(result)
|
||||
for result_text in search_results: # Display results
|
||||
print(result_text)
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
A version of this example is here: `examples/pyton/simple_example.py`
|
||||
|
||||
### Create your own memory store
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ from cognee.modules.cognify.config import get_cognify_config
|
|||
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
||||
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
from cognee.infrastructure.llm.config import get_llm_config
|
||||
from cognee.infrastructure.databases.relational import get_relational_config
|
||||
from cognee.infrastructure.files.storage import LocalStorage
|
||||
|
||||
|
|
@ -55,19 +56,36 @@ class config():
|
|||
graph_config.graph_database_provider = graph_database_provider
|
||||
|
||||
@staticmethod
|
||||
def llm_provider(llm_provider: str):
|
||||
graph_config = get_graph_config()
|
||||
graph_config.llm_provider = llm_provider
|
||||
def set_llm_provider(llm_provider: str):
|
||||
llm_config = get_llm_config()
|
||||
llm_config.llm_provider = llm_provider
|
||||
|
||||
@staticmethod
|
||||
def llm_endpoint(llm_endpoint: str):
|
||||
graph_config = get_graph_config()
|
||||
graph_config.llm_endpoint = llm_endpoint
|
||||
def set_llm_endpoint(llm_endpoint: str):
|
||||
llm_config = get_llm_config()
|
||||
llm_config.llm_endpoint = llm_endpoint
|
||||
|
||||
@staticmethod
|
||||
def llm_model(llm_model: str):
|
||||
graph_config = get_graph_config()
|
||||
graph_config.llm_model = llm_model
|
||||
def set_llm_model(llm_model: str):
|
||||
llm_config = get_llm_config()
|
||||
llm_config.llm_model = llm_model
|
||||
|
||||
@staticmethod
|
||||
def set_llm_api_key(llm_api_key: str):
|
||||
llm_config = get_llm_config()
|
||||
llm_config.llm_api_key = llm_api_key
|
||||
|
||||
@staticmethod
|
||||
def set_llm_config(config_dict: dict):
|
||||
"""
|
||||
Updates the llm config with values from config_dict.
|
||||
"""
|
||||
llm_config = get_llm_config()
|
||||
for key, value in config_dict.items():
|
||||
if hasattr(llm_config, key):
|
||||
object.__setattr__(llm_config, key, value)
|
||||
else:
|
||||
raise AttributeError(f"'{key}' is not a valid attribute of the config.")
|
||||
|
||||
@staticmethod
|
||||
def set_chunk_strategy(chunk_strategy: object):
|
||||
|
|
@ -137,5 +155,5 @@ class config():
|
|||
if "username" not in graphistry_config or "password" not in graphistry_config:
|
||||
raise ValueError("graphistry_config dictionary must contain 'username' and 'password' keys.")
|
||||
|
||||
base_config.graphistry_username = graphistry_config.username
|
||||
base_config.graphistry_password = graphistry_config.password
|
||||
base_config.graphistry_username = graphistry_config.get("username")
|
||||
base_config.graphistry_password = graphistry_config.get("password")
|
||||
|
|
|
|||
|
|
@ -1,25 +0,0 @@
|
|||
from typing import List, Optional
|
||||
from fastembed import TextEmbedding
|
||||
from cognee.root_dir import get_absolute_path
|
||||
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
||||
|
||||
class FastembedEmbeddingEngine(EmbeddingEngine):
|
||||
embedding_model: str
|
||||
embedding_dimensions: int
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
embedding_model: Optional[str] = "BAAI/bge-large-en-v1.5",
|
||||
embedding_dimensions: Optional[int] = 1024,
|
||||
):
|
||||
self.embedding_model = embedding_model
|
||||
self.embedding_dimensions = embedding_dimensions
|
||||
|
||||
async def embed_text(self, text: List[str]) -> List[float]:
|
||||
embedding_model = TextEmbedding(model_name = self.embedding_model, cache_dir = get_absolute_path("cache/embeddings"))
|
||||
embeddings_list = list(map(lambda embedding: embedding.tolist(), embedding_model.embed(text)))
|
||||
|
||||
return embeddings_list
|
||||
|
||||
def get_vector_size(self) -> int:
|
||||
return self.embedding_dimensions
|
||||
|
|
@ -164,7 +164,10 @@ class LanceDBAdapter(VectorDBInterface):
|
|||
async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
|
||||
connection = await self.get_connection()
|
||||
collection = await connection.open_table(collection_name)
|
||||
results = await collection.delete(f"id IN {tuple(data_point_ids)}")
|
||||
if len(data_point_ids) == 1:
|
||||
results = await collection.delete(f"id = '{data_point_ids[0]}'")
|
||||
else:
|
||||
results = await collection.delete(f"id IN {tuple(data_point_ids)}")
|
||||
return results
|
||||
|
||||
async def prune(self):
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
from typing import BinaryIO
|
||||
from pypdf import PdfReader
|
||||
import filetype
|
||||
|
||||
def extract_text_from_file(file: BinaryIO, file_type) -> str:
|
||||
def extract_text_from_file(file: BinaryIO, file_type: filetype.Type) -> str:
|
||||
"""Extract text from a file"""
|
||||
if file_type.extension == "pdf":
|
||||
reader = PdfReader(stream = file)
|
||||
|
|
|
|||
|
|
@ -1,5 +0,0 @@
|
|||
import os
|
||||
|
||||
def get_file_size(file_path: str):
|
||||
"""Get the size of a file"""
|
||||
return os.path.getsize(file_path)
|
||||
|
|
@ -1,4 +1,3 @@
|
|||
import dsp
|
||||
import dspy
|
||||
from dspy.evaluate.evaluate import Evaluate
|
||||
from dspy.primitives.example import Example
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
import dsp
|
||||
import dspy
|
||||
from dspy.teleprompt import BootstrapFewShot
|
||||
from dspy.primitives.example import Example
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from .models.Task import Task
|
|||
|
||||
class PipelineConfig(BaseModel):
|
||||
batch_count: int = 10
|
||||
description: Optional[str]
|
||||
description: Optional[str] = None
|
||||
|
||||
class Pipeline():
|
||||
id: UUID = uuid4()
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
from typing import Any, Callable, Generator
|
||||
from typing import Any, Callable, Generator, List
|
||||
import asyncio
|
||||
from ..tasks.Task import Task
|
||||
|
||||
def run_tasks_parallel(tasks: [Task]) -> Callable[[Any], Generator[Any, Any, Any]]:
|
||||
def run_tasks_parallel(tasks: List[Task]) -> Callable[[Any], Generator[Any, Any, Any]]:
|
||||
async def parallel_run(*args, **kwargs):
|
||||
parallel_tasks = [asyncio.create_task(task.run(*args, **kwargs)) for task in tasks]
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ class Directory(BaseModel):
|
|||
directories: List['Directory'] = []
|
||||
|
||||
# Allows recursive Directory Model
|
||||
Directory.update_forward_refs()
|
||||
Directory.model_rebuild()
|
||||
|
||||
class RepositoryProperties(BaseModel):
|
||||
custom_properties: Optional[Dict[str, Any]] = None
|
||||
|
|
|
|||
|
|
@ -6,15 +6,15 @@ class BaseClass(BaseModel):
|
|||
name: str
|
||||
type: Literal["Class"] = "Class"
|
||||
description: str
|
||||
constructor_parameters: Optional[List[str]]
|
||||
constructor_parameters: Optional[List[str]] = None
|
||||
|
||||
class Class(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
type: Literal["Class"] = "Class"
|
||||
description: str
|
||||
constructor_parameters: Optional[List[str]]
|
||||
from_class: Optional[BaseClass]
|
||||
constructor_parameters: Optional[List[str]] = None
|
||||
from_class: Optional[BaseClass] = None
|
||||
|
||||
class ClassInstance(BaseModel):
|
||||
id: str
|
||||
|
|
@ -28,7 +28,7 @@ class Function(BaseModel):
|
|||
name: str
|
||||
type: Literal["Function"] = "Function"
|
||||
description: str
|
||||
parameters: Optional[List[str]]
|
||||
parameters: Optional[List[str]] = None
|
||||
return_type: str
|
||||
is_static: Optional[bool] = False
|
||||
|
||||
|
|
@ -38,7 +38,7 @@ class Variable(BaseModel):
|
|||
type: Literal["Variable"] = "Variable"
|
||||
description: str
|
||||
is_static: Optional[bool] = False
|
||||
default_value: Optional[str]
|
||||
default_value: Optional[str] = None
|
||||
|
||||
class Operator(BaseModel):
|
||||
id: str
|
||||
|
|
|
|||
|
|
@ -21,7 +21,6 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
|
|||
for chunk_index, chunk in enumerate(data_chunks):
|
||||
chunk_classification = chunk_classifications[chunk_index]
|
||||
classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))
|
||||
classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))
|
||||
|
||||
for classification_subclass in chunk_classification.label.subclass:
|
||||
classification_data_points.append(uuid5(NAMESPACE_OID, classification_subclass.value))
|
||||
|
|
@ -39,7 +38,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
|
|||
if await vector_engine.has_collection(collection_name):
|
||||
existing_data_points = await vector_engine.retrieve(
|
||||
collection_name,
|
||||
list(set(classification_data_points)),
|
||||
[str(classification_data) for classification_data in list(set(classification_data_points))],
|
||||
) if len(classification_data_points) > 0 else []
|
||||
|
||||
existing_points_map = {point.id: True for point in existing_data_points}
|
||||
|
|
@ -60,7 +59,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
|
|||
data_points.append(
|
||||
DataPoint[Keyword](
|
||||
id=str(classification_type_id),
|
||||
payload=Keyword.parse_obj({
|
||||
payload=Keyword.model_validate({
|
||||
"uuid": str(classification_type_id),
|
||||
"text": classification_type_label,
|
||||
"chunk_id": str(data_chunk.chunk_id),
|
||||
|
|
@ -99,7 +98,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
|
|||
data_points.append(
|
||||
DataPoint[Keyword](
|
||||
id=str(classification_subtype_id),
|
||||
payload=Keyword.parse_obj({
|
||||
payload=Keyword.model_validate({
|
||||
"uuid": str(classification_subtype_id),
|
||||
"text": classification_subtype_label,
|
||||
"chunk_id": str(data_chunk.chunk_id),
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ class OntologyEngine:
|
|||
for item in items:
|
||||
flat_list.extend(await self.recursive_flatten(item, parent_id))
|
||||
elif isinstance(items, dict):
|
||||
model = NodeModel.parse_obj(items)
|
||||
model = NodeModel.model_validate(items)
|
||||
flat_list.append(await self.flatten_model(model, parent_id))
|
||||
for child in model.children:
|
||||
flat_list.extend(await self.recursive_flatten(child, model.node_id))
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ class NodeModel(BaseModel):
|
|||
default_relationship: Optional[RelationshipModel] = None
|
||||
children: List[Union[Dict[str, Any], "NodeModel"]] = Field(default_factory=list)
|
||||
|
||||
NodeModel.update_forward_refs()
|
||||
NodeModel.model_rebuild()
|
||||
|
||||
|
||||
class OntologyNode(BaseModel):
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ async def save_chunks_to_store(data_chunks: list[DocumentChunk], collection_name
|
|||
|
||||
# Remove and unlink existing chunks
|
||||
if await vector_engine.has_collection(collection_name):
|
||||
existing_chunks = [DocumentChunk.parse_obj(chunk.payload) for chunk in (await vector_engine.retrieve(
|
||||
existing_chunks = [DocumentChunk.model_validate(chunk.payload) for chunk in (await vector_engine.retrieve(
|
||||
collection_name,
|
||||
[str(chunk.chunk_id) for chunk in data_chunks],
|
||||
))]
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ async def main():
|
|||
|
||||
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
||||
assert len(search_results) != 0, "Query related summaries don't exist."
|
||||
print("\n\Extracted summaries are:\n")
|
||||
print("\nExtracted summaries are:\n")
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ async def main():
|
|||
|
||||
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
||||
assert len(search_results) != 0, "Query related summaries don't exist."
|
||||
print("\n\Extracted summaries are:\n")
|
||||
print("\nExtracted summaries are:\n")
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ async def main():
|
|||
|
||||
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
||||
assert len(search_results) != 0, "Query related summaries don't exist."
|
||||
print("\n\Extracted summaries are:\n")
|
||||
print("\nExtracted summaries are:\n")
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ async def main():
|
|||
|
||||
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
||||
assert len(search_results) != 0, "Query related summaries don't exist."
|
||||
print("\n\Extracted summaries are:\n")
|
||||
print("\nExtracted summaries are:\n")
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
|
|
|
|||
|
|
@ -14,9 +14,11 @@ Check available configuration options:
|
|||
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
from cognee.infrastructure.databases.relational import get_relational_config
|
||||
from cognee.infrastructure.llm.config import get_llm_config
|
||||
print(get_vectordb_config().to_dict())
|
||||
print(get_graph_config().to_dict())
|
||||
print(get_relational_config().to_dict())
|
||||
print(get_llm_config().to_dict())
|
||||
|
||||
```
|
||||
|
||||
|
|
@ -29,8 +31,7 @@ GRAPH_DATABASE_PROVIDER = 'lancedb'
|
|||
Otherwise, you can set the configuration yourself:
|
||||
|
||||
```python
|
||||
|
||||
cognee.config.llm_provider = 'ollama'
|
||||
cognee.config.set_llm_provider('ollama')
|
||||
```
|
||||
|
||||
## 🚀 Getting Started with Local Models
|
||||
|
|
@ -52,15 +53,14 @@ LLM_PROVIDER = 'ollama'
|
|||
Otherwise, you can set the configuration for the model:
|
||||
|
||||
```bash
|
||||
cognee.config.llm_provider = 'ollama'
|
||||
cognee.config.set_llm_provider('ollama')
|
||||
|
||||
```
|
||||
You can also set the HOST and model name:
|
||||
|
||||
```bash
|
||||
|
||||
cognee.config.llm_endpoint = "http://localhost:11434/v1"
|
||||
cognee.config.llm_model = "mistral:instruct"
|
||||
cognee.config.set_llm_endpoint("http://localhost:11434/v1")
|
||||
cognee.config.set_llm_model("mistral:instruct")
|
||||
```
|
||||
|
||||
|
||||
|
|
@ -73,7 +73,7 @@ LLM_PROVIDER = 'custom'
|
|||
Otherwise, you can set the configuration for the model:
|
||||
|
||||
```bash
|
||||
cognee.config.llm_provider = 'custom'
|
||||
cognee.config.set_llm_provider('custom')
|
||||
|
||||
```
|
||||
You can also set the HOST and model name:
|
||||
|
|
|
|||
39
examples/python/simple_example.py
Normal file
39
examples/python/simple_example.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
import cognee
|
||||
import asyncio
|
||||
from cognee.api.v1.search import SearchType
|
||||
|
||||
# Prerequisites:
|
||||
# 1. Copy `.env.template` and rename it to `.env`.
|
||||
# 2. Add your OpenAI API key to the `.env` file in the `LLM_API_KEY` field:
|
||||
# LLM_API_KEY = "your_key_here"
|
||||
# 3. (Optional) To minimize setup effort, set `VECTOR_DB_PROVIDER="lancedb"` in `.env".
|
||||
|
||||
async def main():
|
||||
# Create a clean slate for cognee -- reset data and system state
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
# cognee knowledge graph will be created based on this text
|
||||
text = """
|
||||
Natural language processing (NLP) is an interdisciplinary
|
||||
subfield of computer science and information retrieval.
|
||||
"""
|
||||
|
||||
# Add the text, and make it available for cognify
|
||||
await cognee.add(text)
|
||||
|
||||
# Use LLMs and cognee to create knowledge graph
|
||||
await cognee.cognify()
|
||||
|
||||
# Query cognee for insights on the added text
|
||||
search_results = await cognee.search(
|
||||
SearchType.INSIGHTS,
|
||||
{'query': 'Tell me about NLP'}
|
||||
)
|
||||
|
||||
# Display search results
|
||||
for result_text in search_results:
|
||||
print(result_text)
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
0
log.txt
Normal file
0
log.txt
Normal file
|
|
@ -537,10 +537,14 @@
|
|||
"import os\n",
|
||||
"\n",
|
||||
"# # Setting environment variables\n",
|
||||
"os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
|
||||
"os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
|
||||
"if \"GRAPHISTRY_USERNAME\" not in os.environ: \n",
|
||||
" os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
|
||||
"\n",
|
||||
"os.environ[\"LLM_API_KEY\"] = \"\"\n",
|
||||
"if \"GRAPHISTRY_PASSWORD\" not in os.environ: \n",
|
||||
" os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
|
||||
"\n",
|
||||
"if \"LLM_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"LLM_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"os.environ[\"GRAPH_DATABASE_PROVIDER\"]=\"networkx\" # \"neo4j\" or \"networkx\"\n",
|
||||
"# Not needed if using networkx\n",
|
||||
|
|
@ -577,6 +581,7 @@
|
|||
"\n",
|
||||
"import cognee\n",
|
||||
"\n",
|
||||
"await cognee.prune.prune_data()\n",
|
||||
"await cognee.prune.prune_system(metadata=True)"
|
||||
]
|
||||
},
|
||||
|
|
@ -639,7 +644,8 @@
|
|||
" chunks_into_graph, \\\n",
|
||||
" source_documents_to_chunks, \\\n",
|
||||
" check_permissions_on_documents, \\\n",
|
||||
" classify_documents\n",
|
||||
" classify_documents, \\\n",
|
||||
" chunk_naive_llm_classifier\n",
|
||||
"from cognee.tasks.summarization import summarize_text\n",
|
||||
"\n",
|
||||
"async def run_cognify_pipeline(dataset: Dataset, user: User = None):\n",
|
||||
|
|
@ -667,6 +673,10 @@
|
|||
" summarization_model = cognee_config.summarization_model,\n",
|
||||
" collection_name = \"summaries\",\n",
|
||||
" ),\n",
|
||||
" Task(\n",
|
||||
" chunk_naive_llm_classifier,\n",
|
||||
" classification_model = cognee_config.classification_model,\n",
|
||||
" ),\n",
|
||||
" Task(chunk_remove_disconnected), # Remove the obsolete document chunks.\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
|
|
@ -876,7 +886,7 @@
|
|||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "cognee-bGi0WgSG-py3.9",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
|
@ -890,7 +900,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.5"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
|||
2548
poetry.lock
generated
2548
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -19,53 +19,51 @@ classifiers = [
|
|||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.9.0,<3.12"
|
||||
openai = "1.27.0"
|
||||
openai = "1.52.0"
|
||||
pydantic = "2.8.2"
|
||||
python-dotenv = "1.0.1"
|
||||
fastapi = "^0.109.2"
|
||||
uvicorn = "0.22.0"
|
||||
requests = "2.32.3"
|
||||
aiohttp = "3.10.10"
|
||||
typing_extensions = "4.12.2"
|
||||
dspy = "2.5.25"
|
||||
nest_asyncio = "1.6.0"
|
||||
numpy = "1.26.4"
|
||||
datasets = "3.1.0"
|
||||
falkordb = "1.0.9"
|
||||
boto3 = "^1.26.125"
|
||||
botocore="^1.35.54"
|
||||
gunicorn = "^20.1.0"
|
||||
sqlalchemy = "2.0.35"
|
||||
instructor = "1.3.5"
|
||||
instructor = "1.6.3"
|
||||
networkx = "^3.2.1"
|
||||
debugpy = "1.8.2"
|
||||
pyarrow = "15.0.0"
|
||||
pylint = "^3.0.3"
|
||||
aiosqlite = "^0.20.0"
|
||||
pandas = "2.0.3"
|
||||
greenlet = "^3.0.3"
|
||||
ruff = "^0.2.2"
|
||||
filetype = "^1.2.0"
|
||||
nltk = "^3.8.1"
|
||||
dlt = {extras = ["sqlalchemy"], version = "^1.2.0"}
|
||||
overrides = "^7.7.0"
|
||||
aiofiles = "^23.2.1"
|
||||
qdrant-client = "^1.9.0"
|
||||
graphistry = "^0.33.5"
|
||||
tenacity = "^8.2.3"
|
||||
tenacity = "^9.0.0"
|
||||
weaviate-client = "4.6.7"
|
||||
scikit-learn = "^1.5.0"
|
||||
fastembed = "0.2.7"
|
||||
pypdf = "^4.1.0"
|
||||
neo4j = "^5.20.0"
|
||||
jinja2 = "^3.1.3"
|
||||
matplotlib = "^3.8.3"
|
||||
structlog = "^24.1.0"
|
||||
tiktoken = "0.7.0"
|
||||
langchain_text_splitters = "0.3.2"
|
||||
langsmith = "0.1.139"
|
||||
langdetect = "1.0.9"
|
||||
posthog = "^3.5.0"
|
||||
lancedb = "0.8.0"
|
||||
litellm = "1.38.10"
|
||||
litellm = "1.49.1"
|
||||
groq = "0.8.0"
|
||||
tantivy = "^0.22.0"
|
||||
tokenizers ="0.15.2"
|
||||
transformers ="4.39.0"
|
||||
python-multipart = "^0.0.9"
|
||||
langfuse = "^2.32.0"
|
||||
protobuf = "<5.0.0"
|
||||
pydantic-settings = "^2.2.1"
|
||||
anthropic = "^0.26.1"
|
||||
pdfplumber = "^0.11.1"
|
||||
sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
|
||||
fastapi-users = { version = "*", extras = ["sqlalchemy"] }
|
||||
asyncpg = "^0.29.0"
|
||||
|
|
@ -88,6 +86,11 @@ pytest-asyncio = "^0.21.1"
|
|||
coverage = "^7.3.2"
|
||||
mypy = "^1.7.1"
|
||||
notebook = "^7.1.1"
|
||||
deptry = "^0.20.0"
|
||||
debugpy = "1.8.2"
|
||||
pylint = "^3.0.3"
|
||||
ruff = "^0.2.2"
|
||||
tweepy = "4.14.0"
|
||||
|
||||
[tool.poetry.group.docs.dependencies]
|
||||
mkdocs-material = "^9.5.42"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue