Cog 337 llama index support (#186)
* feat: Add support for LlamaIndex Document type Added support for LlamaIndex Document type Feature #COG-337 * docs: Add Jupyer Notebook for cognee with llama index document type Added jupyter notebook which demonstrates cognee with LlamaIndex document type usage Docs #COG-337 * feat: Add metadata migration from LlamaIndex document type Allow usage of metadata from LlamaIndex documents Feature #COG-337 * refactor: Change llama index migration function name Change name of llama index function Refactor #COG-337 * chore: Add llama index core dependency Downgrade needed on tenacity and instructor modules to support llama index Chore #COG-337 * Feature: Add ingest_data_with_metadata task Added task that will have access to metadata if data is provided from different data ingestion tools Feature #COG-337 * docs: Add description on why specific type checking is done Explained why specific type checking is used instead of isinstance, as isinstace returns True for child classes as well Docs #COG-337 * fix: Add missing parameter to function call Added missing parameter to function call Fix #COG-337 * refactor: Move storing of data from async to sync function Moved data storing from async to sync Refactor #COG-337 * refactor: Pretend ingest_data was changes instead of having two tasks Refactor so ingest_data file was modified instead of having two ingest tasks Refactor #COG-337 * refactor: Use old name for data ingestion with metadata Merged new and old data ingestion tasks into one Refactor #COG-337 * refactor: Return ingest_data and save_data_to_storage Tasks Returned ingest_data and save_data_to_storage tasks Refactor #COG-337 * refactor: Return previous ingestion Tasks to add function Returned previous ignestion tasks to add function Refactor #COG-337 * fix: Remove dict and use string for search query Remove dictionary and use string for query in notebook and simple example Fix COG-337 * refactor: Add changes request in pull request Added the following changes that were requested in pull request: Added synchronize label, Made uniform syntax in if statement in workflow, fixed instructor dependency, added llama-index to be optional Refactor COG-337 * fix: Resolve issue with llama-index being mandatory Resolve issue with llama-index being mandatory to run cognee Fix COG-337 * fix: Add install of llama-index to notebook Removed additional references to llama-index from core cognee lib. Added llama-index-core install from notebook Fix COG-337 ---------
This commit is contained in:
parent
a63490b916
commit
d30adb53f3
16 changed files with 588 additions and 37 deletions
BIN
.DS_Store
vendored
BIN
.DS_Store
vendored
Binary file not shown.
63
.github/workflows/test_cognee_llama_index_notebook.yml
vendored
Normal file
63
.github/workflows/test_cognee_llama_index_notebook.yml
vendored
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
name: test | llama index notebook
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
types: [labeled, synchronize]
|
||||
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
RUNTIME__LOG_LEVEL: ERROR
|
||||
|
||||
jobs:
|
||||
get_docs_changes:
|
||||
name: docs changes
|
||||
uses: ./.github/workflows/get_docs_changes.yml
|
||||
|
||||
run_notebook_test:
|
||||
name: test
|
||||
needs: get_docs_changes
|
||||
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && github.event.label.name == 'run-checks'
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@master
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Install Poetry
|
||||
uses: snok/install-poetry@v1.3.2
|
||||
with:
|
||||
virtualenvs-create: true
|
||||
virtualenvs-in-project: true
|
||||
installer-parallel: true
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install --no-interaction --all-extras --no-root
|
||||
poetry add jupyter --no-interaction
|
||||
|
||||
- name: Execute Jupyter Notebook
|
||||
env:
|
||||
ENV: 'dev'
|
||||
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
|
||||
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
|
||||
run: |
|
||||
poetry run jupyter nbconvert \
|
||||
--to notebook \
|
||||
--execute notebooks/cognee_llama_index.ipynb \
|
||||
--output executed_notebook.ipynb \
|
||||
--ExecutePreprocessor.timeout=1200
|
||||
BIN
cognee/.DS_Store
vendored
BIN
cognee/.DS_Store
vendored
Binary file not shown.
|
|
@ -21,4 +21,4 @@ async def add(data: Union[BinaryIO, list[BinaryIO], str, list[str]], dataset_nam
|
|||
pipeline = run_tasks(tasks, data, "add_pipeline")
|
||||
|
||||
async for result in pipeline:
|
||||
print(result)
|
||||
print(result)
|
||||
|
|
@ -33,4 +33,4 @@ async def check_permission_on_documents(user: User, permission_type: str, docume
|
|||
has_permissions = all(document_id in resource_ids for document_id in document_ids)
|
||||
|
||||
if not has_permissions:
|
||||
raise PermissionDeniedException(f"User {user.username} does not have {permission_type} permission on documents")
|
||||
raise PermissionDeniedException(f"User {user.email} does not have {permission_type} permission on documents")
|
||||
|
|
|
|||
|
|
@ -1,2 +1,4 @@
|
|||
from .ingest_data import ingest_data
|
||||
from .save_data_to_storage import save_data_to_storage
|
||||
from .save_data_item_to_storage import save_data_item_to_storage
|
||||
from .save_data_item_with_metadata_to_storage import save_data_item_with_metadata_to_storage
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import cognee.modules.ingestion as ingestion
|
|||
|
||||
from cognee.shared.utils import send_telemetry
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.infrastructure.databases.relational import get_relational_config, get_relational_engine
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from cognee.modules.data.methods import create_dataset
|
||||
from cognee.modules.users.permissions.methods import give_permission_on_document
|
||||
from .get_dlt_destination import get_dlt_destination
|
||||
|
|
|
|||
92
cognee/tasks/ingestion/ingest_data_with_metadata.py
Normal file
92
cognee/tasks/ingestion/ingest_data_with_metadata.py
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
import dlt
|
||||
import cognee.modules.ingestion as ingestion
|
||||
from typing import Any
|
||||
from cognee.shared.utils import send_telemetry
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from cognee.modules.data.methods import create_dataset
|
||||
from cognee.modules.users.permissions.methods import give_permission_on_document
|
||||
from .get_dlt_destination import get_dlt_destination
|
||||
from .save_data_item_with_metadata_to_storage import save_data_item_with_metadata_to_storage
|
||||
|
||||
async def ingest_data_with_metadata(data: Any, dataset_name: str, user: User):
|
||||
destination = get_dlt_destination()
|
||||
|
||||
pipeline = dlt.pipeline(
|
||||
pipeline_name = "file_load_from_filesystem",
|
||||
destination = destination,
|
||||
)
|
||||
|
||||
@dlt.resource(standalone = True, merge_key = "id")
|
||||
async def data_resources(data: Any, user: User):
|
||||
if not isinstance(data, list):
|
||||
# Convert data to a list as we work with lists further down.
|
||||
data = [data]
|
||||
|
||||
# Process data
|
||||
for data_item in data:
|
||||
|
||||
file_path = save_data_item_with_metadata_to_storage(data_item, dataset_name)
|
||||
|
||||
# Ingest data and add metadata
|
||||
with open(file_path.replace("file://", ""), mode = "rb") as file:
|
||||
classified_data = ingestion.classify(file)
|
||||
|
||||
data_id = ingestion.identify(classified_data)
|
||||
|
||||
file_metadata = classified_data.get_metadata()
|
||||
|
||||
from sqlalchemy import select
|
||||
from cognee.modules.data.models import Data
|
||||
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
async with db_engine.get_async_session() as session:
|
||||
dataset = await create_dataset(dataset_name, user.id, session)
|
||||
|
||||
data_point = (await session.execute(
|
||||
select(Data).filter(Data.id == data_id)
|
||||
)).scalar_one_or_none()
|
||||
|
||||
if data_point is not None:
|
||||
data_point.name = file_metadata["name"]
|
||||
data_point.raw_data_location = file_metadata["file_path"]
|
||||
data_point.extension = file_metadata["extension"]
|
||||
data_point.mime_type = file_metadata["mime_type"]
|
||||
|
||||
await session.merge(data_point)
|
||||
await session.commit()
|
||||
else:
|
||||
data_point = Data(
|
||||
id = data_id,
|
||||
name = file_metadata["name"],
|
||||
raw_data_location = file_metadata["file_path"],
|
||||
extension = file_metadata["extension"],
|
||||
mime_type = file_metadata["mime_type"],
|
||||
)
|
||||
|
||||
dataset.data.append(data_point)
|
||||
await session.commit()
|
||||
|
||||
yield {
|
||||
"id": data_id,
|
||||
"name": file_metadata["name"],
|
||||
"file_path": file_metadata["file_path"],
|
||||
"extension": file_metadata["extension"],
|
||||
"mime_type": file_metadata["mime_type"],
|
||||
}
|
||||
|
||||
await give_permission_on_document(user, data_id, "read")
|
||||
await give_permission_on_document(user, data_id, "write")
|
||||
|
||||
|
||||
send_telemetry("cognee.add EXECUTION STARTED", user_id = user.id)
|
||||
run_info = pipeline.run(
|
||||
data_resources(data, user),
|
||||
table_name = "file_metadata",
|
||||
dataset_name = dataset_name,
|
||||
write_disposition = "merge",
|
||||
)
|
||||
send_telemetry("cognee.add EXECUTION COMPLETED", user_id = user.id)
|
||||
|
||||
return run_info
|
||||
20
cognee/tasks/ingestion/save_data_item_to_storage.py
Normal file
20
cognee/tasks/ingestion/save_data_item_to_storage.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
from typing import Union, BinaryIO
|
||||
from cognee.modules.ingestion import save_data_to_file
|
||||
|
||||
def save_data_item_to_storage(data_item: Union[BinaryIO, str], dataset_name: str) -> str:
|
||||
|
||||
# data is a file object coming from upload.
|
||||
if hasattr(data_item, "file"):
|
||||
file_path = save_data_to_file(data_item.file, dataset_name, filename=data_item.filename)
|
||||
|
||||
elif isinstance(data_item, str):
|
||||
# data is a file path
|
||||
if data_item.startswith("file://") or data_item.startswith("/"):
|
||||
file_path = data_item.replace("file://", "")
|
||||
# data is text
|
||||
else:
|
||||
file_path = save_data_to_file(data_item, dataset_name)
|
||||
else:
|
||||
raise ValueError(f"Data type not supported: {type(data_item)}")
|
||||
|
||||
return file_path
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
from typing import Union, BinaryIO, Any
|
||||
from cognee.modules.ingestion import save_data_to_file
|
||||
|
||||
def save_data_item_with_metadata_to_storage(data_item: Union[BinaryIO, str, Any], dataset_name: str) -> str:
|
||||
# Dynamic import is used because the llama_index module is optional.
|
||||
# For the same reason Any is accepted as a data item
|
||||
from llama_index.core import Document
|
||||
from .transform_data import get_data_from_llama_index
|
||||
|
||||
# Check if data is of type Document or any of it's subclasses
|
||||
if isinstance(data_item, Document):
|
||||
file_path = get_data_from_llama_index(data_item, dataset_name)
|
||||
|
||||
# data is a file object coming from upload.
|
||||
elif hasattr(data_item, "file"):
|
||||
file_path = save_data_to_file(data_item.file, dataset_name, filename=data_item.filename)
|
||||
|
||||
elif isinstance(data_item, str):
|
||||
# data is a file path
|
||||
if data_item.startswith("file://") or data_item.startswith("/"):
|
||||
file_path = data_item.replace("file://", "")
|
||||
# data is text
|
||||
else:
|
||||
file_path = save_data_to_file(data_item, dataset_name)
|
||||
else:
|
||||
raise ValueError(f"Data type not supported: {type(data_item)}")
|
||||
|
||||
return file_path
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
from typing import Union, BinaryIO
|
||||
from cognee.modules.ingestion import save_data_to_file
|
||||
from cognee.tasks.ingestion.save_data_item_to_storage import save_data_item_to_storage
|
||||
|
||||
def save_data_to_storage(data: Union[BinaryIO, str], dataset_name) -> list[str]:
|
||||
if not isinstance(data, list):
|
||||
|
|
@ -9,19 +9,7 @@ def save_data_to_storage(data: Union[BinaryIO, str], dataset_name) -> list[str]:
|
|||
file_paths = []
|
||||
|
||||
for data_item in data:
|
||||
# data is a file object coming from upload.
|
||||
if hasattr(data_item, "file"):
|
||||
file_path = save_data_to_file(data_item.file, dataset_name, filename = data_item.filename)
|
||||
file_paths.append(file_path)
|
||||
|
||||
if isinstance(data_item, str):
|
||||
# data is a file path
|
||||
if data_item.startswith("file://") or data_item.startswith("/"):
|
||||
file_paths.append(data_item.replace("file://", ""))
|
||||
|
||||
# data is text
|
||||
else:
|
||||
file_path = save_data_to_file(data_item, dataset_name)
|
||||
file_paths.append(file_path)
|
||||
file_path = save_data_item_to_storage(data_item, dataset_name)
|
||||
file_paths.append(file_path)
|
||||
|
||||
return file_paths
|
||||
|
|
|
|||
18
cognee/tasks/ingestion/transform_data.py
Normal file
18
cognee/tasks/ingestion/transform_data.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
from llama_index.core import Document
|
||||
from llama_index.core.schema import ImageDocument
|
||||
from cognee.modules.ingestion import save_data_to_file
|
||||
from typing import Union
|
||||
|
||||
def get_data_from_llama_index(data_point: Union[Document, ImageDocument], dataset_name: str) -> str:
|
||||
# Specific type checking is used to ensure it's not a child class from Document
|
||||
if type(data_point) == Document:
|
||||
file_path = data_point.metadata.get("file_path")
|
||||
if file_path is None:
|
||||
file_path = save_data_to_file(data_point.text, dataset_name)
|
||||
return file_path
|
||||
return file_path
|
||||
elif type(data_point) == ImageDocument:
|
||||
if data_point.image_path is None:
|
||||
file_path = save_data_to_file(data_point.text, dataset_name)
|
||||
return file_path
|
||||
return data_point.image_path
|
||||
|
|
@ -27,8 +27,7 @@ async def main():
|
|||
|
||||
# Query cognee for insights on the added text
|
||||
search_results = await cognee.search(
|
||||
SearchType.INSIGHTS,
|
||||
{'query': 'Tell me about NLP'}
|
||||
SearchType.INSIGHTS, query='Tell me about NLP'
|
||||
)
|
||||
|
||||
# Display search results
|
||||
|
|
|
|||
229
notebooks/cognee_llama_index.ipynb
Normal file
229
notebooks/cognee_llama_index.ipynb
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Cognee GraphRAG with LlamaIndex Documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install llama-index-core"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Data\n",
|
||||
"\n",
|
||||
"We will use a sample news article dataset retrieved from Diffbot, which Tomaz has conveniently made available on GitHub for easy access.\n",
|
||||
"\n",
|
||||
"The dataset contains 2,500 samples; for ease of experimentation, we will use 5 of these samples, which include the `title` and `text` of news articles."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from llama_index.core import Document\n",
|
||||
"\n",
|
||||
"news = pd.read_csv(\n",
|
||||
" \"https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/news_articles.csv\"\n",
|
||||
")[:5]\n",
|
||||
"\n",
|
||||
"news.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prepare documents as required by LlamaIndex"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"documents = [\n",
|
||||
" Document(text=f\"{row['title']}: {row['text']}\")\n",
|
||||
" for i, row in news.iterrows()\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set environment variables"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# Setting environment variables\n",
|
||||
"if \"GRAPHISTRY_USERNAME\" not in os.environ: \n",
|
||||
" os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
|
||||
"\n",
|
||||
"if \"GRAPHISTRY_PASSWORD\" not in os.environ: \n",
|
||||
" os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
|
||||
"\n",
|
||||
"if \"LLM_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"LLM_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"# \"neo4j\" or \"networkx\"\n",
|
||||
"os.environ[\"GRAPH_DATABASE_PROVIDER\"]=\"networkx\" \n",
|
||||
"# Not needed if using networkx\n",
|
||||
"#GRAPH_DATABASE_URL=\"\"\n",
|
||||
"#GRAPH_DATABASE_USERNAME=\"\"\n",
|
||||
"#GRAPH_DATABASE_PASSWORD=\"\"\n",
|
||||
"\n",
|
||||
"# \"qdrant\", \"weaviate\" or \"lancedb\"\n",
|
||||
"os.environ[\"VECTOR_DB_PROVIDER\"]=\"lancedb\" \n",
|
||||
"# Not needed if using \"lancedb\"\n",
|
||||
"# os.environ[\"VECTOR_DB_URL\"]=\"\"\n",
|
||||
"# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n",
|
||||
"\n",
|
||||
"# Database provider\n",
|
||||
"os.environ[\"DB_PROVIDER\"]=\"sqlite\" # or \"postgres\"\n",
|
||||
"\n",
|
||||
"# Database name\n",
|
||||
"os.environ[\"DB_NAME\"]=\"cognee_db\"\n",
|
||||
"\n",
|
||||
"# Postgres specific parameters (Only if Postgres is run)\n",
|
||||
"# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n",
|
||||
"# os.environ[\"DB_PORT\"]=\"5432\"\n",
|
||||
"# os.environ[\"DB_USERNAME\"]=\"cognee\"\n",
|
||||
"# os.environ[\"DB_PASSWORD\"]=\"cognee\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run Cognee with LlamaIndex Documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Union, BinaryIO\n",
|
||||
"\n",
|
||||
"from cognee.infrastructure.databases.vector.pgvector import create_db_and_tables as create_pgvector_db_and_tables\n",
|
||||
"from cognee.infrastructure.databases.relational import create_db_and_tables as create_relational_db_and_tables\n",
|
||||
"from cognee.infrastructure.databases.graph import get_graph_engine\n",
|
||||
"from cognee.shared.utils import render_graph\n",
|
||||
"from cognee.modules.users.models import User\n",
|
||||
"from cognee.modules.users.methods import get_default_user\n",
|
||||
"from cognee.tasks.ingestion.ingest_data_with_metadata import ingest_data_with_metadata\n",
|
||||
"import cognee\n",
|
||||
"\n",
|
||||
"# Create a clean slate for cognee -- reset data and system state\n",
|
||||
"await cognee.prune.prune_data()\n",
|
||||
"await cognee.prune.prune_system(metadata=True)\n",
|
||||
"\n",
|
||||
"# Add the LlamaIndex documents, and make it available for cognify\n",
|
||||
"async def add(data: Union[BinaryIO, list[BinaryIO], str, list[str]], dataset_name: str = \"main_dataset\", user: User = None):\n",
|
||||
" await create_relational_db_and_tables()\n",
|
||||
" await create_pgvector_db_and_tables()\n",
|
||||
"\n",
|
||||
" if user is None:\n",
|
||||
" user = await get_default_user()\n",
|
||||
"\n",
|
||||
" await ingest_data_with_metadata(data, dataset_name, user)\n",
|
||||
"\n",
|
||||
"await add(documents)\n",
|
||||
"\n",
|
||||
"# Use LLMs and cognee to create knowledge graph\n",
|
||||
"await cognee.cognify()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query Cognee for summaries related to data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from cognee import SearchType\n",
|
||||
"\n",
|
||||
"# Query cognee for summaries\n",
|
||||
"search_results = await cognee.search(\n",
|
||||
" SearchType.SUMMARIES, query=\"What are the main news discussed in the document?\"\n",
|
||||
")\n",
|
||||
"# Display search results\n",
|
||||
"print(\"\\n Summary of main news discussed:\\n\")\n",
|
||||
"print(search_results[0][\"text\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Render Knowledge Graph generated from provided data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import graphistry\n",
|
||||
"\n",
|
||||
"# Get graph\n",
|
||||
"graphistry.login(username=os.getenv(\"GRAPHISTRY_USERNAME\"), password=os.getenv(\"GRAPHISTRY_PASSWORD\"))\n",
|
||||
"graph_engine = await get_graph_engine()\n",
|
||||
"\n",
|
||||
"graph_url = await render_graph(graph_engine.graph)\n",
|
||||
"print(graph_url)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
138
poetry.lock
generated
138
poetry.lock
generated
|
|
@ -1125,6 +1125,21 @@ files = [
|
|||
docs = ["ipython", "matplotlib", "numpydoc", "sphinx"]
|
||||
tests = ["pytest", "pytest-cov", "pytest-xdist"]
|
||||
|
||||
[[package]]
|
||||
name = "dataclasses-json"
|
||||
version = "0.6.7"
|
||||
description = "Easily serialize dataclasses to and from JSON."
|
||||
optional = true
|
||||
python-versions = "<4.0,>=3.7"
|
||||
files = [
|
||||
{file = "dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a"},
|
||||
{file = "dataclasses_json-0.6.7.tar.gz", hash = "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
marshmallow = ">=3.18.0,<4.0.0"
|
||||
typing-inspect = ">=0.4.0,<1"
|
||||
|
||||
[[package]]
|
||||
name = "datasets"
|
||||
version = "3.1.0"
|
||||
|
|
@ -1220,6 +1235,23 @@ files = [
|
|||
{file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deprecated"
|
||||
version = "1.2.14"
|
||||
description = "Python @deprecated decorator to deprecate old python classes, functions or methods."
|
||||
optional = true
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
files = [
|
||||
{file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"},
|
||||
{file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
wrapt = ">=1.10,<2"
|
||||
|
||||
[package.extras]
|
||||
dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"]
|
||||
|
||||
[[package]]
|
||||
name = "deprecation"
|
||||
version = "2.1.0"
|
||||
|
|
@ -1275,6 +1307,17 @@ files = [
|
|||
graph = ["objgraph (>=1.7.2)"]
|
||||
profile = ["gprof2dot (>=2022.7.29)"]
|
||||
|
||||
[[package]]
|
||||
name = "dirtyjson"
|
||||
version = "1.0.8"
|
||||
description = "JSON decoder for Python that can extract data from the muck"
|
||||
optional = true
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53"},
|
||||
{file = "dirtyjson-1.0.8.tar.gz", hash = "sha256:90ca4a18f3ff30ce849d100dcf4a003953c79d3a2348ef056f1d9c22231a25fd"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "distro"
|
||||
version = "1.9.0"
|
||||
|
|
@ -2396,37 +2439,35 @@ files = [
|
|||
|
||||
[[package]]
|
||||
name = "instructor"
|
||||
version = "1.6.3"
|
||||
version = "1.5.2"
|
||||
description = "structured outputs for llm"
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.9"
|
||||
files = [
|
||||
{file = "instructor-1.6.3-py3-none-any.whl", hash = "sha256:a8f973fea621c0188009b65a3429a526c24aeb249fc24100b605ea496e92d622"},
|
||||
{file = "instructor-1.6.3.tar.gz", hash = "sha256:399cd90e30b5bc7cbd47acd7399c9c4e84926a96c20c8b5d00c5a04b41ed41ab"},
|
||||
{file = "instructor-1.5.2-py3-none-any.whl", hash = "sha256:da25abbf1ab792fb094992f1d9ce593e26fe458cb1f9a8e7ebf9d68f3f2c757a"},
|
||||
{file = "instructor-1.5.2.tar.gz", hash = "sha256:fdd5ccbca21b4c558a24e9ba12c84afd907e65153a39d035f47c25800011a977"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
aiohttp = ">=3.9.1,<4.0.0"
|
||||
docstring-parser = ">=0.16,<0.17"
|
||||
jinja2 = ">=3.1.4,<4.0.0"
|
||||
jiter = ">=0.5.0,<0.6.0"
|
||||
openai = ">=1.52.0,<2.0.0"
|
||||
openai = ">=1.45.0,<2.0.0"
|
||||
pydantic = ">=2.8.0,<3.0.0"
|
||||
pydantic-core = ">=2.18.0,<3.0.0"
|
||||
rich = ">=13.7.0,<14.0.0"
|
||||
tenacity = ">=9.0.0,<10.0.0"
|
||||
tenacity = ">=8.4.1,<9.0.0"
|
||||
typer = ">=0.9.0,<1.0.0"
|
||||
|
||||
[package.extras]
|
||||
anthropic = ["anthropic (>=0.36.2,<0.37.0)", "xmltodict (>=0.13.0,<0.14.0)"]
|
||||
anthropic = ["anthropic (>=0.34.0,<0.35.0)", "xmltodict (>=0.13.0,<0.14.0)"]
|
||||
cerebras-cloud-sdk = ["cerebras_cloud_sdk (>=1.5.0,<2.0.0)"]
|
||||
cohere = ["cohere (>=5.1.8,<6.0.0)"]
|
||||
fireworks-ai = ["fireworks-ai (>=0.15.4,<0.16.0)"]
|
||||
google-generativeai = ["google-generativeai (>=0.8.2,<0.9.0)"]
|
||||
groq = ["groq (>=0.4.2,<0.5.0)"]
|
||||
litellm = ["litellm (>=1.35.31,<2.0.0)"]
|
||||
mistralai = ["mistralai (>=1.0.3,<2.0.0)"]
|
||||
test-docs = ["anthropic (>=0.36.2,<0.37.0)", "cohere (>=5.1.8,<6.0.0)", "diskcache (>=5.6.3,<6.0.0)", "fastapi (>=0.109.2,<0.110.0)", "groq (>=0.4.2,<0.5.0)", "litellm (>=1.35.31,<2.0.0)", "mistralai (>=1.0.3,<2.0.0)", "pandas (>=2.2.0,<3.0.0)", "pydantic_extra_types (>=2.6.0,<3.0.0)", "redis (>=5.0.1,<6.0.0)", "tabulate (>=0.9.0,<0.10.0)"]
|
||||
test-docs = ["anthropic (>=0.34.0,<0.35.0)", "cohere (>=5.1.8,<6.0.0)", "diskcache (>=5.6.3,<6.0.0)", "fastapi (>=0.109.2,<0.110.0)", "groq (>=0.4.2,<0.5.0)", "litellm (>=1.35.31,<2.0.0)", "mistralai (>=1.0.3,<2.0.0)", "pandas (>=2.2.0,<3.0.0)", "pydantic_extra_types (>=2.6.0,<3.0.0)", "redis (>=5.0.1,<6.0.0)", "tabulate (>=0.9.0,<0.10.0)"]
|
||||
vertexai = ["google-cloud-aiplatform (>=1.53.0,<2.0.0)", "jsonref (>=1.1.0,<2.0.0)"]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -3246,6 +3287,40 @@ tokenizers = "*"
|
|||
extra-proxy = ["azure-identity (>=1.15.0,<2.0.0)", "azure-keyvault-secrets (>=4.8.0,<5.0.0)", "google-cloud-kms (>=2.21.3,<3.0.0)", "prisma (==0.11.0)", "resend (>=0.8.0,<0.9.0)"]
|
||||
proxy = ["PyJWT (>=2.8.0,<3.0.0)", "apscheduler (>=3.10.4,<4.0.0)", "backoff", "cryptography (>=42.0.5,<43.0.0)", "fastapi (>=0.111.0,<0.112.0)", "fastapi-sso (>=0.10.0,<0.11.0)", "gunicorn (>=22.0.0,<23.0.0)", "orjson (>=3.9.7,<4.0.0)", "pynacl (>=1.5.0,<2.0.0)", "python-multipart (>=0.0.9,<0.0.10)", "pyyaml (>=6.0.1,<7.0.0)", "rq", "uvicorn (>=0.22.0,<0.23.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "llama-index-core"
|
||||
version = "0.11.22"
|
||||
description = "Interface between LLMs and your data"
|
||||
optional = true
|
||||
python-versions = "<4.0,>=3.8.1"
|
||||
files = [
|
||||
{file = "llama_index_core-0.11.22-py3-none-any.whl", hash = "sha256:5c59d95dec9bb0727f25b03de89392c69076b2e4aaa6acbd8773de1f07502e9e"},
|
||||
{file = "llama_index_core-0.11.22.tar.gz", hash = "sha256:ddc30b9c873495de40ad8278d0c894ba09f32f6aa7fc638012b1b22b74c32553"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
aiohttp = ">=3.8.6,<4.0.0"
|
||||
dataclasses-json = "*"
|
||||
deprecated = ">=1.2.9.3"
|
||||
dirtyjson = ">=1.0.8,<2.0.0"
|
||||
fsspec = ">=2023.5.0"
|
||||
httpx = "*"
|
||||
nest-asyncio = ">=1.5.8,<2.0.0"
|
||||
networkx = ">=3.0"
|
||||
nltk = ">3.8.1"
|
||||
numpy = "<2.0.0"
|
||||
pillow = ">=9.0.0"
|
||||
pydantic = ">=2.7.0,<3.0.0"
|
||||
PyYAML = ">=6.0.1"
|
||||
requests = ">=2.31.0"
|
||||
SQLAlchemy = {version = ">=1.4.49", extras = ["asyncio"]}
|
||||
tenacity = ">=8.2.0,<8.4.0 || >8.4.0,<9.0.0"
|
||||
tiktoken = ">=0.3.3"
|
||||
tqdm = ">=4.66.1,<5.0.0"
|
||||
typing-extensions = ">=4.5.0"
|
||||
typing-inspect = ">=0.8.0"
|
||||
wrapt = "*"
|
||||
|
||||
[[package]]
|
||||
name = "makefun"
|
||||
version = "1.15.6"
|
||||
|
|
@ -3388,6 +3463,25 @@ files = [
|
|||
{file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "marshmallow"
|
||||
version = "3.23.1"
|
||||
description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
|
||||
optional = true
|
||||
python-versions = ">=3.9"
|
||||
files = [
|
||||
{file = "marshmallow-3.23.1-py3-none-any.whl", hash = "sha256:fece2eb2c941180ea1b7fcbd4a83c51bfdd50093fdd3ad2585ee5e1df2508491"},
|
||||
{file = "marshmallow-3.23.1.tar.gz", hash = "sha256:3a8dfda6edd8dcdbf216c0ede1d1e78d230a6dc9c5a088f58c4083b974a0d468"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
packaging = ">=17.0"
|
||||
|
||||
[package.extras]
|
||||
dev = ["marshmallow[tests]", "pre-commit (>=3.5,<5.0)", "tox"]
|
||||
docs = ["alabaster (==1.0.0)", "autodocsumm (==0.2.14)", "sphinx (==8.1.3)", "sphinx-issues (==5.0.0)", "sphinx-version-warning (==1.1.2)"]
|
||||
tests = ["pytest", "simplejson"]
|
||||
|
||||
[[package]]
|
||||
name = "matplotlib"
|
||||
version = "3.9.2"
|
||||
|
|
@ -4885,7 +4979,7 @@ test = ["pytest", "pytest-xdist", "setuptools"]
|
|||
name = "psycopg2"
|
||||
version = "2.9.10"
|
||||
description = "psycopg2 - Python-PostgreSQL Database Adapter"
|
||||
optional = false
|
||||
optional = true
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "psycopg2-2.9.10-cp310-cp310-win32.whl", hash = "sha256:5df2b672140f95adb453af93a7d669d7a7bf0a56bcd26f1502329166f4a61716"},
|
||||
|
|
@ -6551,13 +6645,13 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7
|
|||
|
||||
[[package]]
|
||||
name = "tenacity"
|
||||
version = "9.0.0"
|
||||
version = "8.5.0"
|
||||
description = "Retry code until it succeeds"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539"},
|
||||
{file = "tenacity-9.0.0.tar.gz", hash = "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b"},
|
||||
{file = "tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687"},
|
||||
{file = "tenacity-8.5.0.tar.gz", hash = "sha256:8bc6c0c8a09b31e6cad13c47afbed1a567518250a9a171418582ed8d9c20ca78"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
|
|
@ -6946,6 +7040,21 @@ files = [
|
|||
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typing-inspect"
|
||||
version = "0.9.0"
|
||||
description = "Runtime inspection utilities for typing module."
|
||||
optional = true
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"},
|
||||
{file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
mypy-extensions = ">=0.3.0"
|
||||
typing-extensions = ">=3.7.4"
|
||||
|
||||
[[package]]
|
||||
name = "tzdata"
|
||||
version = "2024.2"
|
||||
|
|
@ -7513,6 +7622,7 @@ type = ["pytest-mypy"]
|
|||
[extras]
|
||||
cli = []
|
||||
filesystem = ["botocore"]
|
||||
llama-index = ["llama-index-core"]
|
||||
neo4j = ["neo4j"]
|
||||
notebook = []
|
||||
postgres = ["asyncpg", "pgvector", "psycopg2"]
|
||||
|
|
@ -7522,4 +7632,4 @@ weaviate = ["weaviate-client"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.9.0,<3.12"
|
||||
content-hash = "57a154a7bbdd990e0fbe2313fa24c412dad98e47b9cd05e41bf378a3f597713f"
|
||||
content-hash = "f5874af8364839dd2a362b6b3209c4aae108f30dcc27be43d0d07f7b28160eda"
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ boto3 = "^1.26.125"
|
|||
botocore="^1.35.54"
|
||||
gunicorn = "^20.1.0"
|
||||
sqlalchemy = "2.0.35"
|
||||
instructor = "1.6.3"
|
||||
instructor = "1.5.2"
|
||||
networkx = "^3.2.1"
|
||||
aiosqlite = "^0.20.0"
|
||||
pandas = "2.0.3"
|
||||
|
|
@ -45,7 +45,7 @@ dlt = {extras = ["sqlalchemy"], version = "^1.3.0"}
|
|||
aiofiles = "^23.2.1"
|
||||
qdrant-client = "^1.9.0"
|
||||
graphistry = "^0.33.5"
|
||||
tenacity = "^9.0.0"
|
||||
tenacity = "^8.4.1"
|
||||
weaviate-client = "4.6.7"
|
||||
scikit-learn = "^1.5.0"
|
||||
pypdf = "^4.1.0"
|
||||
|
|
@ -68,7 +68,8 @@ fastapi-users = {version = "*", extras = ["sqlalchemy"]}
|
|||
alembic = "^1.13.3"
|
||||
asyncpg = "^0.29.0"
|
||||
pgvector = "^0.3.5"
|
||||
psycopg2 = "^2.9.10"
|
||||
psycopg2 = {version = "^2.9.10", optional = true}
|
||||
llama-index-core = {version = "^0.11.22", optional = true}
|
||||
|
||||
[tool.poetry.extras]
|
||||
filesystem = ["s3fs", "botocore"]
|
||||
|
|
@ -78,6 +79,7 @@ qdrant = ["qdrant-client"]
|
|||
neo4j = ["neo4j"]
|
||||
postgres = ["psycopg2", "pgvector", "asyncpg"]
|
||||
notebook = ["ipykernel", "overrides", "ipywidgets", "jupyterlab", "jupyterlab_widgets", "jupyterlab-server", "jupyterlab-git"]
|
||||
llama-index = ["llama-index-core"]
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue