Merge remote-tracking branch 'origin/main' into feat/COG-184-add-falkordb
This commit is contained in:
commit
62022a85f3
9 changed files with 1477 additions and 1430 deletions
19
.github/workflows/mkdocs.yml
vendored
19
.github/workflows/mkdocs.yml
vendored
|
|
@ -17,22 +17,21 @@ permissions:
|
|||
contents: write
|
||||
|
||||
jobs:
|
||||
|
||||
|
||||
# get_docs_changes:
|
||||
# name: run docs changes
|
||||
# uses: ./.github/workflows/get_docs_changes.yml
|
||||
get_docs_changes:
|
||||
name: run docs changes
|
||||
uses: ./.github/workflows/get_docs_changes.yml
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
# needs: get_docs_changes
|
||||
needs: get_docs_changes
|
||||
if: needs.get_docs_changes.outputs.changes_outside_docs == 'false'
|
||||
|
||||
steps:
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@master
|
||||
|
||||
- name: Install Poetry
|
||||
uses: snok/install-poetry@v1.3.1
|
||||
|
||||
- name: Use output
|
||||
run: echo "The stage is finished"
|
||||
|
||||
|
|
@ -41,7 +40,6 @@ jobs:
|
|||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
|
||||
- name: Install APT packages
|
||||
run: |
|
||||
sudo apt-get update &&
|
||||
|
|
@ -49,9 +47,10 @@ jobs:
|
|||
|
||||
- name: Install via Poetry
|
||||
run: poetry install --with dev,docs
|
||||
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }}
|
||||
|
||||
- name: Build and deploy MkDocs
|
||||
run: poetry run mkdocs gh-deploy --force
|
||||
run: poetry run mkdocs gh-deploy --force
|
||||
env:
|
||||
DOCS_SEGMENT_KEY: ${{ secrets.DOCS_SEGMENT_KEY }}
|
||||
|
|
|
|||
61
.github/workflows/test_notebook.yml
vendored
Normal file
61
.github/workflows/test_notebook.yml
vendored
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
name: test | notebook
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
RUNTIME__LOG_LEVEL: ERROR
|
||||
|
||||
jobs:
|
||||
get_docs_changes:
|
||||
name: docs changes
|
||||
uses: ./.github/workflows/get_docs_changes.yml
|
||||
|
||||
run_notebook_test:
|
||||
name: test
|
||||
needs: get_docs_changes
|
||||
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@master
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Install Poetry
|
||||
uses: snok/install-poetry@v1.3.2
|
||||
with:
|
||||
virtualenvs-create: true
|
||||
virtualenvs-in-project: true
|
||||
installer-parallel: true
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install --no-interaction
|
||||
poetry add jupyter --no-interaction
|
||||
|
||||
- name: Execute Jupyter Notebook
|
||||
env:
|
||||
ENV: 'dev'
|
||||
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
|
||||
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
|
||||
run: |
|
||||
poetry run jupyter nbconvert \
|
||||
--to notebook \
|
||||
--execute notebooks/cognee_demo.ipynb \
|
||||
--output executed_notebook.ipynb \
|
||||
--ExecutePreprocessor.timeout=1200
|
||||
|
|
@ -200,7 +200,7 @@ Cognee supports a variety of tools and services for different operations:
|
|||
|
||||
## Demo
|
||||
|
||||
Check out our demo notebook [here](https://github.com/topoteretes/cognee/blob/main/notebooks/cognee%20-%20Get%20Started.ipynb)
|
||||
Check out our demo notebook [here](https://github.com/topoteretes/cognee/blob/main/notebooks/cognee_demo.ipynb)
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
from os import path
|
||||
from typing import AsyncGenerator
|
||||
from uuid import UUID
|
||||
from typing import Optional
|
||||
from typing import AsyncGenerator, List
|
||||
from contextlib import asynccontextmanager
|
||||
from sqlalchemy import text, select
|
||||
from sqlalchemy import text, select, MetaData, Table
|
||||
from sqlalchemy.orm import joinedload
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
|
||||
|
||||
|
|
@ -50,11 +52,14 @@ class SQLAlchemyAdapter():
|
|||
await connection.execute(text(f"CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} ({', '.join(fields_query_parts)});"))
|
||||
await connection.close()
|
||||
|
||||
async def delete_table(self, table_name: str):
|
||||
async def delete_table(self, table_name: str, schema_name: Optional[str] = "public"):
|
||||
async with self.engine.begin() as connection:
|
||||
await connection.execute(text(f"DROP TABLE IF EXISTS {table_name} CASCADE;"))
|
||||
|
||||
await connection.close()
|
||||
if self.engine.dialect.name == "sqlite":
|
||||
# SQLite doesn’t support schema namespaces and the CASCADE keyword.
|
||||
# However, foreign key constraint can be defined with ON DELETE CASCADE during table creation.
|
||||
await connection.execute(text(f"DROP TABLE IF EXISTS {table_name};"))
|
||||
else:
|
||||
await connection.execute(text(f"DROP TABLE IF EXISTS {schema_name}.{table_name} CASCADE;"))
|
||||
|
||||
async def insert_data(self, schema_name: str, table_name: str, data: list[dict]):
|
||||
columns = ", ".join(data[0].keys())
|
||||
|
|
@ -65,6 +70,55 @@ class SQLAlchemyAdapter():
|
|||
await connection.execute(insert_query, data)
|
||||
await connection.close()
|
||||
|
||||
async def get_schema_list(self) -> List[str]:
|
||||
"""
|
||||
Return a list of all schema names in database
|
||||
"""
|
||||
if self.engine.dialect.name == "postgresql":
|
||||
async with self.engine.begin() as connection:
|
||||
result = await connection.execute(
|
||||
text("""
|
||||
SELECT schema_name FROM information_schema.schemata
|
||||
WHERE schema_name NOT IN ('pg_catalog', 'pg_toast', 'information_schema');
|
||||
""")
|
||||
)
|
||||
return [schema[0] for schema in result.fetchall()]
|
||||
return []
|
||||
|
||||
async def delete_data_by_id(self, table_name: str, data_id: UUID, schema_name: Optional[str] = "public"):
|
||||
"""
|
||||
Delete data in given table based on id. Table must have an id Column.
|
||||
"""
|
||||
async with self.get_async_session() as session:
|
||||
TableModel = await self.get_table(table_name, schema_name)
|
||||
await session.execute(TableModel.delete().where(TableModel.c.id == data_id))
|
||||
await session.commit()
|
||||
|
||||
async def get_table(self, table_name: str, schema_name: Optional[str] = "public") -> Table:
|
||||
"""
|
||||
Dynamically loads a table using the given table name and schema name.
|
||||
"""
|
||||
async with self.engine.begin() as connection:
|
||||
if self.engine.dialect.name == "sqlite":
|
||||
# Load the schema information into the MetaData object
|
||||
await connection.run_sync(Base.metadata.reflect)
|
||||
if table_name in Base.metadata.tables:
|
||||
return Base.metadata.tables[table_name]
|
||||
else:
|
||||
raise ValueError(f"Table '{table_name}' not found.")
|
||||
else:
|
||||
# Create a MetaData instance to load table information
|
||||
metadata = MetaData()
|
||||
# Load table information from schema into MetaData
|
||||
await connection.run_sync(metadata.reflect, schema=schema_name)
|
||||
# Define the full table name
|
||||
full_table_name = f"{schema_name}.{table_name}"
|
||||
# Check if table is in list of tables for the given schema
|
||||
if full_table_name in metadata.tables:
|
||||
return metadata.tables[full_table_name]
|
||||
raise ValueError(f"Table '{full_table_name}' not found.")
|
||||
|
||||
|
||||
async def get_data(self, table_name: str, filters: dict = None):
|
||||
async with self.engine.begin() as connection:
|
||||
query = f"SELECT * FROM {table_name}"
|
||||
|
|
@ -119,12 +173,17 @@ class SQLAlchemyAdapter():
|
|||
self.db_path = None
|
||||
else:
|
||||
async with self.engine.begin() as connection:
|
||||
# Load the schema information into the MetaData object
|
||||
await connection.run_sync(Base.metadata.reflect)
|
||||
for table in Base.metadata.sorted_tables:
|
||||
drop_table_query = text(f"DROP TABLE IF EXISTS {table.name} CASCADE")
|
||||
await connection.execute(drop_table_query)
|
||||
|
||||
schema_list = await self.get_schema_list()
|
||||
# Create a MetaData instance to load table information
|
||||
metadata = MetaData()
|
||||
# Drop all tables from all schemas
|
||||
for schema_name in schema_list:
|
||||
# Load the schema information into the MetaData object
|
||||
await connection.run_sync(metadata.reflect, schema=schema_name)
|
||||
for table in metadata.sorted_tables:
|
||||
drop_table_query = text(f"DROP TABLE IF EXISTS {schema_name}.{table.name} CASCADE")
|
||||
await connection.execute(drop_table_query)
|
||||
metadata.clear()
|
||||
except Exception as e:
|
||||
print(f"Error deleting database: {e}")
|
||||
|
||||
|
|
|
|||
15
docs/overrides/partials/integrations/analytics/segment.html
Normal file
15
docs/overrides/partials/integrations/analytics/segment.html
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
<script>
|
||||
var segmentKey = "{{ config.extra.analytics.key }}"
|
||||
|
||||
/* Wait for page to load and application to mount */
|
||||
document.addEventListener("DOMContentLoaded", function() {
|
||||
try {
|
||||
!function(){var i="analytics",analytics=window[i]=window[i]||[];if(!analytics.initialize)if(analytics.invoked)window.console&&console.error&&console.error("Segment snippet included twice.");else{analytics.invoked=!0;analytics.methods=["trackSubmit","trackClick","trackLink","trackForm","pageview","identify","reset","group","track","ready","alias","debug","page","screen","once","off","on","addSourceMiddleware","addIntegrationMiddleware","setAnonymousId","addDestinationMiddleware","register"];analytics.factory=function(e){return function(){if(window[i].initialized)return window[i][e].apply(window[i],arguments);var n=Array.prototype.slice.call(arguments);if(["track","screen","alias","group","page","identify"].indexOf(e)>-1){var c=document.querySelector("link[rel='canonical']");n.push({__t:"bpc",c:c&&c.getAttribute("href")||void 0,p:location.pathname,u:location.href,s:location.search,t:document.title,r:document.referrer})}n.unshift(e);analytics.push(n);return analytics}};for(var n=0;n<analytics.methods.length;n++){var key=analytics.methods[n];analytics[key]=analytics.factory(key)}analytics.load=function(key,n){var t=document.createElement("script");t.type="text/javascript";t.async=!0;t.setAttribute("data-global-segment-analytics-key",i);t.src="https://cdn.segment.com/analytics.js/v1/" + key + "/analytics.min.js";var r=document.getElementsByTagName("script")[0];r.parentNode.insertBefore(t,r);analytics._loadOptions=n};analytics._writeKey=segmentKey;;analytics.SNIPPET_VERSION="5.2.0";
|
||||
analytics.load(segmentKey);
|
||||
analytics.page();
|
||||
}}();
|
||||
} catch (error) {
|
||||
console.error("Failed to load Segment analytics", error);
|
||||
}
|
||||
});
|
||||
</script>
|
||||
30
mkdocs.yml
30
mkdocs.yml
|
|
@ -63,6 +63,11 @@ theme:
|
|||
code: Roboto Mono
|
||||
custom_dir: docs/overrides
|
||||
|
||||
extra:
|
||||
analytics:
|
||||
provider: segment
|
||||
key: !ENV DOCS_SEGMENT_KEY
|
||||
|
||||
extra_css:
|
||||
- stylesheets/extra.css
|
||||
|
||||
|
|
@ -120,25 +125,24 @@ nav:
|
|||
- Add data: 'data_ingestion.md'
|
||||
- Create LLM enriched data store: 'templates.md'
|
||||
- Explore data: 'search.md'
|
||||
# - SDK:
|
||||
# - Overview: 'sdk_overview.md'
|
||||
- Configuration: 'configuration.md'
|
||||
- What is cognee:
|
||||
- Introduction: 'conceptual_overview.md'
|
||||
- API reference: 'api_reference.md'
|
||||
- Blog:
|
||||
- "blog/index.md"
|
||||
|
||||
|
||||
plugins:
|
||||
- mkdocs-jupyter:
|
||||
ignore_h1_titles: true
|
||||
execute: false
|
||||
- social
|
||||
- search:
|
||||
separator: '[\s\u200b\-_,:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|(?!\b)(?=[A-Z][a-z])'
|
||||
|
||||
- minify:
|
||||
minify_html: true
|
||||
minify_js: true
|
||||
minify_css: true
|
||||
htmlmin_opts:
|
||||
remove_comments: true
|
||||
cache_safe: true
|
||||
|
||||
- mkdocstrings:
|
||||
handlers:
|
||||
python:
|
||||
|
|
@ -146,13 +150,3 @@ plugins:
|
|||
members_order: alphabetical
|
||||
allow_inspection: true
|
||||
show_bases: true
|
||||
- blog:
|
||||
enabled: !ENV CI
|
||||
blog_dir: "blog"
|
||||
blog_toc: true
|
||||
post_dir: blog/posts
|
||||
post_date_format: yyyy/MM/dd
|
||||
post_url_format: "{date}/{slug}"
|
||||
authors_file: "{blog}/.authors.yml"
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -537,10 +537,14 @@
|
|||
"import os\n",
|
||||
"\n",
|
||||
"# # Setting environment variables\n",
|
||||
"os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
|
||||
"os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
|
||||
"if \"GRAPHISTRY_USERNAME\" not in os.environ: \n",
|
||||
" os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n",
|
||||
"\n",
|
||||
"os.environ[\"LLM_API_KEY\"] = \"\"\n",
|
||||
"if \"GRAPHISTRY_PASSWORD\" not in os.environ: \n",
|
||||
" os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n",
|
||||
"\n",
|
||||
"if \"LLM_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"LLM_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"os.environ[\"GRAPH_DATABASE_PROVIDER\"]=\"networkx\" # \"neo4j\" or \"networkx\"\n",
|
||||
"# Not needed if using networkx\n",
|
||||
|
|
@ -577,6 +581,7 @@
|
|||
"\n",
|
||||
"import cognee\n",
|
||||
"\n",
|
||||
"await cognee.prune.prune_data()\n",
|
||||
"await cognee.prune.prune_system(metadata=True)"
|
||||
]
|
||||
},
|
||||
|
|
@ -639,7 +644,8 @@
|
|||
" chunks_into_graph, \\\n",
|
||||
" source_documents_to_chunks, \\\n",
|
||||
" check_permissions_on_documents, \\\n",
|
||||
" classify_documents\n",
|
||||
" classify_documents, \\\n",
|
||||
" chunk_naive_llm_classifier\n",
|
||||
"from cognee.tasks.summarization import summarize_text\n",
|
||||
"\n",
|
||||
"async def run_cognify_pipeline(dataset: Dataset, user: User = None):\n",
|
||||
|
|
@ -667,6 +673,10 @@
|
|||
" summarization_model = cognee_config.summarization_model,\n",
|
||||
" collection_name = \"summaries\",\n",
|
||||
" ),\n",
|
||||
" Task(\n",
|
||||
" chunk_naive_llm_classifier,\n",
|
||||
" classification_model = cognee_config.classification_model,\n",
|
||||
" ),\n",
|
||||
" Task(chunk_remove_disconnected), # Remove the obsolete document chunks.\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
|
|
@ -876,7 +886,7 @@
|
|||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "cognee-bGi0WgSG-py3.9",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
|
@ -890,7 +900,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.5"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
|||
2660
poetry.lock
generated
2660
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -91,20 +91,9 @@ mypy = "^1.7.1"
|
|||
notebook = "^7.1.1"
|
||||
|
||||
[tool.poetry.group.docs.dependencies]
|
||||
mkdocs = "^1.4.3"
|
||||
mkdocs-material = {extras = ["imaging"], version = "^9.5.9"}
|
||||
mkdocstrings = "^0.22.0"
|
||||
mkdocstrings-python = "^1.1.2"
|
||||
pytest-examples = "^0.0.10"
|
||||
mkdocs-jupyter = "^0.24.6"
|
||||
mkdocs-material = "^9.5.42"
|
||||
mkdocs-minify-plugin = "^0.8.0"
|
||||
mkdocs-redirects = "^1.2.1"
|
||||
|
||||
[tool.poetry.group.test-docs.dependencies]
|
||||
fastapi = "^0.109.2"
|
||||
diskcache = "^5.6.3"
|
||||
pandas = "2.0.3"
|
||||
tabulate = "^0.9.0"
|
||||
mkdocstrings = {extras = ["python"], version = "^0.26.2"}
|
||||
|
||||
[tool.ruff] # https://beta.ruff.rs/docs/
|
||||
line-length = 100
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue