From 4ef56a654d48405a708e2a8e6d2bbf4a4be5c731 Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Mon, 7 Oct 2024 20:16:38 +0900 Subject: [PATCH 1/3] docs: update quickstart.md (#143) enviroment -> environment --- docs/quickstart.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/quickstart.md b/docs/quickstart.md index d662c9282..5005ad85a 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -14,7 +14,7 @@ Navigate to cognee folder and run docker compose up postgres ``` -Add your LLM API key to the enviroment variables +Add your LLM API key to the environment variables ``` import os @@ -54,4 +54,4 @@ for result_text in search_results[0]: ``` In the example above, we add a piece of information to cognee, use LLMs to create a GraphRAG, and then query cognee for the knowledge. -cognee is composable and you can build your own cognee pipelines using our [templates.](templates.md) \ No newline at end of file +cognee is composable and you can build your own cognee pipelines using our [templates.](templates.md) From dc187a81d70e4964667516493643b25a7856bf4e Mon Sep 17 00:00:00 2001 From: Boris Date: Mon, 7 Oct 2024 14:41:35 +0200 Subject: [PATCH 2/3] feat: migrate search to tasks (#144) * fix: don't return anything on health endpoint * feat: add alembic migrations * feat: align search types with the data we store and migrate search to tasks --- .gitignore | 1 - README.md | 29 +- alembic.ini | 117 ++++++ alembic/README | 1 + alembic/env.py | 106 ++++++ alembic/script.py.mako | 26 ++ .../8057ae7329c2_initial_migration.py | 26 ++ .../src/ui/Partials/SearchView/SearchView.tsx | 92 ++++- cognee/__init__.py | 2 +- cognee/api/client.py | 38 +- cognee/api/v1/cognify/cognify_v2.py | 10 +- cognee/api/v1/config/config.py | 13 +- cognee/api/v1/search/__init__.py | 2 +- .../v1/search/{search.py => search.legacy.py} | 0 cognee/api/v1/search/search_v2.py | 56 +++ .../databases/graph/neo4j_driver/adapter.py | 56 ++- .../databases/graph/networkx/adapter.py | 57 ++- .../sqlalchemy/SqlAlchemyAdapter.py | 18 +- .../{data => }/chunking/TextChunker.py | 2 +- cognee/modules/chunking/__init__.py | 2 + .../models}/DocumentChunk.py | 0 .../document_types/AudioDocument.py | 2 +- .../document_types/ImageDocument.py | 2 +- .../processing/document_types/PdfDocument.py | 2 +- .../processing/document_types/TextDocument.py | 2 +- .../modules/data/processing/has_new_chunks.py | 2 +- cognee/modules/search/graph/search_summary.py | 2 +- cognee/tasks/__init__.py | 4 +- .../tasks/chunk_extract_summary/__init__.py | 0 .../chunk_naive_llm_classifier.py | 2 +- .../chunk_remove_disconnected.py | 10 +- .../chunk_update_check/chunk_update_check.py | 2 +- cognee/tasks/chunking/__init__.py | 1 + cognee/tasks/chunking/chunk_by_paragraph.py | 2 - cognee/tasks/chunking/chunk_by_sentence.py | 2 - cognee/tasks/chunking/chunking_registry.py | 10 - cognee/tasks/chunking/query_chunks.py | 17 + cognee/tasks/chunks_into_graph/__init__.py | 0 cognee/tasks/graph/__init__.py | 2 + .../chunks_into_graph.py | 4 +- cognee/tasks/graph/query_graph_connections.py | 62 +++ .../save_chunks_to_store.py | 4 +- .../tasks/storage/save_to_vector_storage.py | 42 +++ cognee/tasks/summarization/__init__.py | 2 + .../models/TextSummary.py | 0 cognee/tasks/summarization/query_summaries.py | 17 + .../summarize_text.py} | 6 +- cognee/tests/test_library.py | 25 +- cognee/tests/test_neo4j.py | 17 +- cognee/tests/test_qdrant.py | 17 +- cognee/tests/test_weaviate.py | 17 +- docs/api_reference.md | 43 ++- docs/blog/.authors.yml | 6 - docs/blog/index.md | 39 -- .../architecture.png | Bin 146695 -> 0 bytes .../Untitled 1.png | Bin 160762 -> 0 bytes .../Untitled.png | Bin 766888 -> 0 bytes .../carbon_(19).png | Bin 303758 -> 0 bytes .../carbon_(20).png | Bin 476761 -> 0 bytes .../carbon_(21).png | Bin 492268 -> 0 bytes .../carbon_(22).png | Bin 768160 -> 0 bytes .../carbon_(23).png | Bin 736885 -> 0 bytes .../infographic_(2).png | Bin 1172260 -> 0 bytes .../Dashboard_example.png | Bin 1387191 -> 0 bytes .../How_cognee_works.png | Bin 324836 -> 0 bytes .../Untitled 1.png | Bin 700333 -> 0 bytes .../Untitled 2.png | Bin 646252 -> 0 bytes .../Untitled 3.png | Bin 710513 -> 0 bytes .../Untitled.png | Bin 106792 -> 0 bytes .../carbon_(5).png | Bin 411176 -> 0 bytes .../carbon_(6).png | Bin 348854 -> 0 bytes .../carbon_(9).png | Bin 229134 -> 0 bytes .../infographic_(2) 1.png | Bin 1068538 -> 0 bytes .../infographic_(2) 2.png | Bin 1068510 -> 0 bytes .../infographic_(2).png | Bin 1172260 -> 0 bytes docs/blog/posts/Shiny_new_LLMOps.md | 89 ----- docs/blog/posts/Shiny_new_LLMOps/Untitled.png | Bin 263664 -> 0 bytes .../posts/Shiny_new_LLMOps/llm_problems.png | Bin 57384 -> 0 bytes docs/blog/posts/cognee-library-release.md | 111 ------ docs/blog/posts/cognee-new-website.md | 17 - docs/blog/posts/cognee-v0.1.4.md | 57 --- docs/blog/posts/from-demo-to-production-1.md | 352 ------------------ docs/blog/posts/from-demo-to-production-2.md | 183 --------- docs/blog/posts/from-demo-to-production-3.md | 193 ---------- docs/blog/posts/from-demo-to-production-4.md | 120 ------ .../blog/posts/llmops-and-knowledge-graphs.md | 15 - docs/blog/rag/rag_explained.md | 15 - docs/quickstart.md | 40 +- docs/search.md | 48 +-- entrypoint.sh | 1 + evals/simple_rag_vs_cognee_eval.py | 5 +- notebooks/cognee - Get Started.ipynb | 11 +- notebooks/cognee_demo_1.5.ipynb | 16 +- poetry.lock | 119 +++--- pyproject.toml | 3 +- 95 files changed, 887 insertions(+), 1527 deletions(-) create mode 100644 alembic.ini create mode 100644 alembic/README create mode 100644 alembic/env.py create mode 100644 alembic/script.py.mako create mode 100644 alembic/versions/8057ae7329c2_initial_migration.py rename cognee/api/v1/search/{search.py => search.legacy.py} (100%) create mode 100644 cognee/api/v1/search/search_v2.py rename cognee/modules/{data => }/chunking/TextChunker.py (97%) create mode 100644 cognee/modules/chunking/__init__.py rename cognee/modules/{data/processing/chunk_types => chunking/models}/DocumentChunk.py (100%) delete mode 100644 cognee/tasks/chunk_extract_summary/__init__.py delete mode 100644 cognee/tasks/chunking/chunking_registry.py create mode 100644 cognee/tasks/chunking/query_chunks.py delete mode 100644 cognee/tasks/chunks_into_graph/__init__.py create mode 100644 cognee/tasks/graph/__init__.py rename cognee/tasks/{chunks_into_graph => graph}/chunks_into_graph.py (98%) create mode 100644 cognee/tasks/graph/query_graph_connections.py create mode 100644 cognee/tasks/storage/save_to_vector_storage.py create mode 100644 cognee/tasks/summarization/__init__.py rename cognee/tasks/{chunk_extract_summary => summarization}/models/TextSummary.py (100%) create mode 100644 cognee/tasks/summarization/query_summaries.py rename cognee/tasks/{chunk_extract_summary/chunk_extract_summary.py => summarization/summarize_text.py} (78%) delete mode 100644 docs/blog/.authors.yml delete mode 100644 docs/blog/index.md delete mode 100644 docs/blog/posts/Cognee - library release 157322a0aa8346ebbbf8d81943b4ca4f/architecture.png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate Level 2 towards 98ad7b915139478992c4c4386b5e5886/Untitled 1.png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate Level 2 towards 98ad7b915139478992c4c4386b5e5886/Untitled.png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate Level 2 towards 98ad7b915139478992c4c4386b5e5886/carbon_(19).png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate Level 2 towards 98ad7b915139478992c4c4386b5e5886/carbon_(20).png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate Level 2 towards 98ad7b915139478992c4c4386b5e5886/carbon_(21).png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate Level 2 towards 98ad7b915139478992c4c4386b5e5886/carbon_(22).png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate Level 2 towards 98ad7b915139478992c4c4386b5e5886/carbon_(23).png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate Level 2 towards 98ad7b915139478992c4c4386b5e5886/infographic_(2).png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate Level 3 towards e62946c272bf412584b12fbbf92d35b0/Dashboard_example.png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate Level 4 towards fe90ff40e56e44c4a49f1492d360173c/How_cognee_works.png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate and towards a pr 7351d77a1eba40aab4394c24bef3a278/Untitled 1.png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate and towards a pr 7351d77a1eba40aab4394c24bef3a278/Untitled 2.png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate and towards a pr 7351d77a1eba40aab4394c24bef3a278/Untitled 3.png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate and towards a pr 7351d77a1eba40aab4394c24bef3a278/Untitled.png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate and towards a pr 7351d77a1eba40aab4394c24bef3a278/carbon_(5).png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate and towards a pr 7351d77a1eba40aab4394c24bef3a278/carbon_(6).png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate and towards a pr 7351d77a1eba40aab4394c24bef3a278/carbon_(9).png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate and towards a pr 7351d77a1eba40aab4394c24bef3a278/infographic_(2) 1.png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate and towards a pr 7351d77a1eba40aab4394c24bef3a278/infographic_(2) 2.png delete mode 100644 docs/blog/posts/Going beyond Langchain + Weaviate and towards a pr 7351d77a1eba40aab4394c24bef3a278/infographic_(2).png delete mode 100644 docs/blog/posts/Shiny_new_LLMOps.md delete mode 100644 docs/blog/posts/Shiny_new_LLMOps/Untitled.png delete mode 100644 docs/blog/posts/Shiny_new_LLMOps/llm_problems.png delete mode 100644 docs/blog/posts/cognee-library-release.md delete mode 100644 docs/blog/posts/cognee-new-website.md delete mode 100644 docs/blog/posts/cognee-v0.1.4.md delete mode 100644 docs/blog/posts/from-demo-to-production-1.md delete mode 100644 docs/blog/posts/from-demo-to-production-2.md delete mode 100644 docs/blog/posts/from-demo-to-production-3.md delete mode 100644 docs/blog/posts/from-demo-to-production-4.md delete mode 100644 docs/blog/posts/llmops-and-knowledge-graphs.md delete mode 100644 docs/blog/rag/rag_explained.md diff --git a/.gitignore b/.gitignore index 468fc3e80..438f466f9 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,6 @@ __pycache__/ *.py[cod] *$py.class -notebooks/ full_run.ipynb evals/ diff --git a/README.md b/README.md index bf883ec16..64d88756a 100644 --- a/README.md +++ b/README.md @@ -65,16 +65,21 @@ You can use different LLM providers, for more info check out our =3.9 or backports.zoneinfo library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +# version_path_separator = newline +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = %(SQLALCHEMY_DATABASE_URI)s + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/README b/alembic/README new file mode 100644 index 000000000..e0d0858f2 --- /dev/null +++ b/alembic/README @@ -0,0 +1 @@ +Generic single-database configuration with an async dbapi. \ No newline at end of file diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 000000000..fe501d6cd --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,106 @@ +import asyncio +from logging.config import fileConfig + +from sqlalchemy import pool +from sqlalchemy.engine import Connection +from sqlalchemy.ext.asyncio import async_engine_from_config + +from alembic import context + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +from cognee.infrastructure.databases.relational import Base +target_metadata = Base.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def do_run_migrations(connection: Connection) -> None: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +async def run_async_migrations() -> None: + """In this scenario we need to create an Engine + and associate a connection with the context. + """ + + connectable = async_engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + async with connectable.connect() as connection: + await connection.run_sync(do_run_migrations) + + await connectable.dispose() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode.""" + + asyncio.run(run_async_migrations()) + + +from cognee.infrastructure.databases.relational import get_relational_engine, get_relational_config + +db_engine = get_relational_engine() + +if db_engine.engine.dialect.name == "sqlite": + from cognee.infrastructure.files.storage import LocalStorage + db_config = get_relational_config() + LocalStorage.ensure_directory_exists(db_config.db_path) + +config.set_section_option( + config.config_ini_section, + "SQLALCHEMY_DATABASE_URI", + db_engine.db_uri, +) + + +if context.is_offline_mode(): + print("OFFLINE MODE") + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 000000000..fbc4b07dc --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,26 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/alembic/versions/8057ae7329c2_initial_migration.py b/alembic/versions/8057ae7329c2_initial_migration.py new file mode 100644 index 000000000..d7d45d0ac --- /dev/null +++ b/alembic/versions/8057ae7329c2_initial_migration.py @@ -0,0 +1,26 @@ +"""Initial migration + +Revision ID: 8057ae7329c2 +Revises: +Create Date: 2024-10-02 12:55:20.989372 + +""" +from typing import Sequence, Union +from sqlalchemy.util import await_only +from cognee.infrastructure.databases.relational import get_relational_engine + +# revision identifiers, used by Alembic. +revision: str = "8057ae7329c2" +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + db_engine = get_relational_engine() + await_only(db_engine.create_database()) + + +def downgrade() -> None: + db_engine = get_relational_engine() + await_only(db_engine.delete_database()) diff --git a/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx b/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx index d2bafefb7..b20beb5b9 100644 --- a/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx +++ b/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx @@ -25,23 +25,40 @@ export default function SearchView() { }, []); const searchOptions = [{ - value: 'SIMILARITY', - label: 'Look for similar graph nodes', + value: 'INSIGHTS', + label: 'Query insights from documents', }, { - value: 'SUMMARY', - label: 'Get a summary related to query', + value: 'SUMMARIES', + label: 'Query document summaries', }, { - value: 'ADJACENT', - label: 'Look for graph node\'s neighbors', - }, { - value: 'TRAVERSE', - label: 'Traverse through the graph and get knowledge', + value: 'CHUNKS', + label: 'Query document chunks', }]; const [searchType, setSearchType] = useState(searchOptions[0]); + const scrollToBottom = useCallback(() => { + setTimeout(() => { + const messagesContainerElement = document.getElementById('messages'); + if (messagesContainerElement) { + const messagesElements = messagesContainerElement.children[0]; + + if (messagesElements) { + messagesContainerElement.scrollTo({ + top: messagesElements.scrollHeight, + behavior: 'smooth', + }); + } + } + }, 300); + }, []); + const handleSearchSubmit = useCallback((event: React.FormEvent) => { event.preventDefault(); + if (inputValue.trim() === '') { + return; + } + setMessages((currentMessages) => [ ...currentMessages, { @@ -51,16 +68,18 @@ export default function SearchView() { }, ]); + scrollToBottom(); + + const searchTypeValue = searchType.value; + fetch('/v1/search', { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ - query_params: { - query: inputValue, - searchType: searchType.value, - }, + query: inputValue, + searchType: searchTypeValue, }), }) .then((response) => response.json()) @@ -70,12 +89,14 @@ export default function SearchView() { { id: v4(), user: 'system', - text: systemMessage, + text: convertToSearchTypeOutput(systemMessage, searchTypeValue), }, ]); setInputValue(''); + + scrollToBottom(); }) - }, [inputValue, searchType]); + }, [inputValue, scrollToBottom, searchType.value]); const { value: isInputExpanded, @@ -83,6 +104,12 @@ export default function SearchView() { setFalse: contractInput, } = useBoolean(false); + const handleSubmitOnEnter = (event: React.KeyboardEvent) => { + if (event.key === 'Enter' && !event.shiftKey) { + handleSearchSubmit(event as unknown as React.FormEvent); + } + }; + return ( @@ -90,7 +117,7 @@ export default function SearchView() { options={searchOptions} onChange={setSearchType} /> -
+
{messages.map((message) => (
-