Merge pull request #45 from topoteretes/feat/COG-24-add-qdrant

 Add qdrant support and implement new architecture
This commit is contained in:
Vasilije 2024-03-13 18:05:58 +01:00 committed by GitHub
commit f8192c9909
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
193 changed files with 15483 additions and 3360 deletions

6
.dlt/config.toml Normal file
View file

@ -0,0 +1,6 @@
# put your configuration values here
[runtime]
log_level = "WARNING" # the system log level of dlt
# use the dlthub_telemetry setting to enable/disable anonymous usage data reporting, see https://dlthub.com/docs/telemetry
dlthub_telemetry = false

View file

@ -7,7 +7,7 @@ POSTGRES_PASSWORD = bla
POSTGRES_DB = bubu
POSTGRES_HOST = localhost
POSTGRES_HOST_DOCKER = postgres
COG_ARCH_DIR = cognitive_architecture
COG_ARCH_DIR = cognee
GRAPH_DB_URL =
GRAPH_DB_PW =
GRAPH_DB_USER =

41
.github/workflows/mkdocs.yml vendored Normal file
View file

@ -0,0 +1,41 @@
name: Deploy MkDocs
on:
push:
permissions:
contents: write
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Install Poetry
uses: snok/install-poetry@v1.3.1
- name: Use output
run: echo "The stage is finished"
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install APT packages
run: |
sudo apt-get update &&
sudo apt-get install pngquant
- name: Install via Poetry
run: poetry install --with dev,docs
env:
GH_TOKEN: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }}
- name: Build and deploy MkDocs
run: poetry run mkdocs gh-deploy --force

4
.gitignore vendored
View file

@ -163,3 +163,7 @@ cython_debug/
#.idea/
.vscode/
database/data/
cognee/data/
.DS_Store

638
.pylintrc Normal file
View file

@ -0,0 +1,638 @@
[MAIN]
# Analyse import fallback blocks. This can be used to support both Python 2 and
# 3 compatible code, which means that the block might have code that exists
# only in one or another interpreter, leading to false positives when analysed.
analyse-fallback-blocks=no
# Clear in-memory caches upon conclusion of linting. Useful if running pylint
# in a server-like mode.
clear-cache-post-run=no
# Load and enable all available extensions. Use --list-extensions to see a list
# all available extensions.
#enable-all-extensions=
# In error mode, messages with a category besides ERROR or FATAL are
# suppressed, and no reports are done by default. Error mode is compatible with
# disabling specific errors.
#errors-only=
# Always return a 0 (non-error) status code, even if lint errors are found.
# This is primarily useful in continuous integration scripts.
#exit-zero=
# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code.
extension-pkg-allow-list=
# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code. (This is an alternative name to extension-pkg-allow-list
# for backward compatibility.)
extension-pkg-whitelist=
# Return non-zero exit code if any of these messages/categories are detected,
# even if score is above --fail-under value. Syntax same as enable. Messages
# specified are enabled, while categories only check already-enabled messages.
fail-on=
# Specify a score threshold under which the program will exit with error.
fail-under=10
# Interpret the stdin as a python script, whose filename needs to be passed as
# the module_or_package argument.
#from-stdin=
# Files or directories to be skipped. They should be base names, not paths.
ignore=CVS
# Add files or directories matching the regular expressions patterns to the
# ignore-list. The regex matches against paths and can be in Posix or Windows
# format. Because '\\' represents the directory delimiter on Windows systems,
# it can't be used as an escape character.
ignore-paths=
# Files or directories matching the regular expression patterns are skipped.
# The regex matches against base names, not paths. The default value ignores
# Emacs file locks
ignore-patterns=^\.#
# List of module names for which member attributes should not be checked
# (useful for modules/projects where namespaces are manipulated during runtime
# and thus existing member attributes cannot be deduced by static analysis). It
# supports qualified module names, as well as Unix pattern matching.
ignored-modules=
# Python code to execute, usually for sys.path manipulation such as
# pygtk.require().
#init-hook=
# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
# number of processors available to use, and will cap the count on Windows to
# avoid hangs.
jobs=1
# Control the amount of potential inferred values when inferring a single
# object. This can help the performance when dealing with large functions or
# complex, nested conditions.
limit-inference-results=100
# List of plugins (as comma separated values of python module names) to load,
# usually to register additional checkers.
load-plugins=
# Pickle collected data for later comparisons.
persistent=yes
# Minimum Python version to use for version dependent checks. Will default to
# the version used to run pylint.
py-version=3.12
# Discover python modules and packages in the file system subtree.
recursive=no
# Add paths to the list of the source roots. Supports globbing patterns. The
# source root is an absolute path or a path relative to the current working
# directory used to determine a package namespace for modules located under the
# source root.
source-roots=
# When enabled, pylint would attempt to guess common misconfiguration and emit
# user-friendly hints instead of false-positive error messages.
suggestion-mode=yes
# Allow loading of arbitrary C extensions. Extensions are imported into the
# active Python interpreter and may run arbitrary code.
unsafe-load-any-extension=no
# In verbose mode, extra non-checker-related info will be displayed.
#verbose=
[BASIC]
# Naming style matching correct argument names.
argument-naming-style=snake_case
# Regular expression matching correct argument names. Overrides argument-
# naming-style. If left empty, argument names will be checked with the set
# naming style.
#argument-rgx=
# Naming style matching correct attribute names.
attr-naming-style=snake_case
# Regular expression matching correct attribute names. Overrides attr-naming-
# style. If left empty, attribute names will be checked with the set naming
# style.
#attr-rgx=
# Bad variable names which should always be refused, separated by a comma.
bad-names=foo,
bar,
baz,
toto,
tutu,
tata
# Bad variable names regexes, separated by a comma. If names match any regex,
# they will always be refused
bad-names-rgxs=
# Naming style matching correct class attribute names.
class-attribute-naming-style=any
# Regular expression matching correct class attribute names. Overrides class-
# attribute-naming-style. If left empty, class attribute names will be checked
# with the set naming style.
#class-attribute-rgx=
# Naming style matching correct class constant names.
class-const-naming-style=UPPER_CASE
# Regular expression matching correct class constant names. Overrides class-
# const-naming-style. If left empty, class constant names will be checked with
# the set naming style.
#class-const-rgx=
# Naming style matching correct class names.
class-naming-style=PascalCase
# Regular expression matching correct class names. Overrides class-naming-
# style. If left empty, class names will be checked with the set naming style.
#class-rgx=
# Naming style matching correct constant names.
const-naming-style=UPPER_CASE
# Regular expression matching correct constant names. Overrides const-naming-
# style. If left empty, constant names will be checked with the set naming
# style.
#const-rgx=
# Minimum line length for functions/classes that require docstrings, shorter
# ones are exempt.
docstring-min-length=-1
# Naming style matching correct function names.
function-naming-style=snake_case
# Regular expression matching correct function names. Overrides function-
# naming-style. If left empty, function names will be checked with the set
# naming style.
#function-rgx=
# Good variable names which should always be accepted, separated by a comma.
good-names=i,
j,
k,
ex,
Run,
_
# Good variable names regexes, separated by a comma. If names match any regex,
# they will always be accepted
good-names-rgxs=
# Include a hint for the correct naming format with invalid-name.
include-naming-hint=no
# Naming style matching correct inline iteration names.
inlinevar-naming-style=any
# Regular expression matching correct inline iteration names. Overrides
# inlinevar-naming-style. If left empty, inline iteration names will be checked
# with the set naming style.
#inlinevar-rgx=
# Naming style matching correct method names.
method-naming-style=snake_case
# Regular expression matching correct method names. Overrides method-naming-
# style. If left empty, method names will be checked with the set naming style.
#method-rgx=
# Naming style matching correct module names.
module-naming-style=snake_case
# Regular expression matching correct module names. Overrides module-naming-
# style. If left empty, module names will be checked with the set naming style.
#module-rgx=
# Colon-delimited sets of names that determine each other's naming style when
# the name regexes allow several styles.
name-group=
# Regular expression which should only match function or class names that do
# not require a docstring.
no-docstring-rgx=^_
# List of decorators that produce properties, such as abc.abstractproperty. Add
# to this list to register other decorators that produce valid properties.
# These decorators are taken in consideration only for invalid-name.
property-classes=abc.abstractproperty
# Regular expression matching correct type alias names. If left empty, type
# alias names will be checked with the set naming style.
#typealias-rgx=
# Regular expression matching correct type variable names. If left empty, type
# variable names will be checked with the set naming style.
#typevar-rgx=
# Naming style matching correct variable names.
variable-naming-style=snake_case
# Regular expression matching correct variable names. Overrides variable-
# naming-style. If left empty, variable names will be checked with the set
# naming style.
#variable-rgx=
[CLASSES]
# Warn about protected attribute access inside special methods
check-protected-access-in-special-methods=no
# List of method names used to declare (i.e. assign) instance attributes.
defining-attr-methods=__init__,
__new__,
setUp,
asyncSetUp,
__post_init__
# List of member names, which should be excluded from the protected access
# warning.
exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit
# List of valid names for the first argument in a class method.
valid-classmethod-first-arg=cls
# List of valid names for the first argument in a metaclass class method.
valid-metaclass-classmethod-first-arg=mcs
[DESIGN]
# List of regular expressions of class ancestor names to ignore when counting
# public methods (see R0903)
exclude-too-few-public-methods=
# List of qualified class names to ignore when counting class parents (see
# R0901)
ignored-parents=
# Maximum number of arguments for function / method.
max-args=5
# Maximum number of attributes for a class (see R0902).
max-attributes=7
# Maximum number of boolean expressions in an if statement (see R0916).
max-bool-expr=5
# Maximum number of branch for function / method body.
max-branches=12
# Maximum number of locals for function / method body.
max-locals=15
# Maximum number of parents for a class (see R0901).
max-parents=7
# Maximum number of public methods for a class (see R0904).
max-public-methods=20
# Maximum number of return / yield for function / method body.
max-returns=6
# Maximum number of statements in function / method body.
max-statements=50
# Minimum number of public methods for a class (see R0903).
min-public-methods=2
[EXCEPTIONS]
# Exceptions that will emit a warning when caught.
overgeneral-exceptions=builtins.BaseException,builtins.Exception
[FORMAT]
# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
expected-line-ending-format=
# Regexp for a line that is allowed to be longer than the limit.
ignore-long-lines=^\s*(# )?<?https?://\S+>?$
# Number of spaces of indent required inside a hanging or continued line.
indent-after-paren=4
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
# tab).
indent-string=' '
# Maximum number of characters on a single line.
max-line-length=100
# Maximum number of lines in a module.
max-module-lines=1000
# Allow the body of a class to be on the same line as the declaration if body
# contains single statement.
single-line-class-stmt=no
# Allow the body of an if to be on the same line as the test if there is no
# else.
single-line-if-stmt=no
[IMPORTS]
# List of modules that can be imported at any level, not just the top level
# one.
allow-any-import-level=
# Allow explicit reexports by alias from a package __init__.
allow-reexport-from-package=no
# Allow wildcard imports from modules that define __all__.
allow-wildcard-with-all=no
# Deprecated modules which should not be used, separated by a comma.
deprecated-modules=
# Output a graph (.gv or any supported image format) of external dependencies
# to the given file (report RP0402 must not be disabled).
ext-import-graph=
# Output a graph (.gv or any supported image format) of all (i.e. internal and
# external) dependencies to the given file (report RP0402 must not be
# disabled).
import-graph=
# Output a graph (.gv or any supported image format) of internal dependencies
# to the given file (report RP0402 must not be disabled).
int-import-graph=
# Force import order to recognize a module as part of the standard
# compatibility libraries.
known-standard-library=
# Force import order to recognize a module as part of a third party library.
known-third-party=enchant
# Couples of modules and preferred modules, separated by a comma.
preferred-modules=
[LOGGING]
# The type of string formatting that logging methods do. `old` means using %
# formatting, `new` is for `{}` formatting.
logging-format-style=old
# Logging modules to check that the string format arguments are in logging
# function parameter format.
logging-modules=logging
[MESSAGES CONTROL]
# Only show warnings with the listed confidence levels. Leave empty to show
# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE,
# UNDEFINED.
confidence=HIGH,
CONTROL_FLOW,
INFERENCE,
INFERENCE_FAILURE,
UNDEFINED
# Disable the message, report, category or checker with the given id(s). You
# can either give multiple identifiers separated by comma (,) or put this
# option multiple times (only on the command line, not in the configuration
# file where it should appear only once). You can also use "--disable=all" to
# disable everything first and then re-enable specific checks. For example, if
# you want to run only the similarities checker, you can use "--disable=all
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use "--disable=all --enable=classes
# --disable=W".
disable=raw-checker-failed,
bad-inline-option,
locally-disabled,
file-ignored,
suppressed-message,
useless-suppression,
deprecated-pragma,
use-symbolic-message-instead,
use-implicit-booleaness-not-comparison-to-string,
use-implicit-booleaness-not-comparison-to-zero,
missing-module-docstring,
missing-function-docstring,
missing-class-docstring
# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
# multiple time (only on the command line, not in the configuration file where
# it should appear only once). See also the "--disable" option for examples.
enable=
[METHOD_ARGS]
# List of qualified names (i.e., library.method) which require a timeout
# parameter e.g. 'requests.api.get,requests.api.post'
timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request
[MISCELLANEOUS]
# List of note tags to take in consideration, separated by a comma.
notes=FIXME,
XXX,
TODO
# Regular expression of note tags to take in consideration.
notes-rgx=
[REFACTORING]
# Maximum number of nested blocks for function / method body
max-nested-blocks=5
# Complete name of functions that never returns. When checking for
# inconsistent-return-statements if a never returning function is called then
# it will be considered as an explicit return statement and no message will be
# printed.
never-returning-functions=sys.exit,argparse.parse_error
[REPORTS]
# Python expression which should return a score less than or equal to 10. You
# have access to the variables 'fatal', 'error', 'warning', 'refactor',
# 'convention', and 'info' which contain the number of messages in each
# category, as well as 'statement' which is the total number of statements
# analyzed. This score is used by the global evaluation report (RP0004).
evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))
# Template used to display messages. This is a python new-style format string
# used to format the message information. See doc for all details.
msg-template=
# Set the output format. Available formats are: text, parseable, colorized,
# json2 (improved json format), json (old json format) and msvs (visual
# studio). You can also give a reporter class, e.g.
# mypackage.mymodule.MyReporterClass.
#output-format=
# Tells whether to display a full report or only the messages.
reports=no
# Activate the evaluation score.
score=yes
[SIMILARITIES]
# Comments are removed from the similarity computation
ignore-comments=yes
# Docstrings are removed from the similarity computation
ignore-docstrings=yes
# Imports are removed from the similarity computation
ignore-imports=yes
# Signatures are removed from the similarity computation
ignore-signatures=yes
# Minimum lines number of a similarity.
min-similarity-lines=4
[SPELLING]
# Limits count of emitted suggestions for spelling mistakes.
max-spelling-suggestions=4
# Spelling dictionary name. No available dictionaries : You need to install
# both the python package and the system dependency for enchant to work.
spelling-dict=
# List of comma separated words that should be considered directives if they
# appear at the beginning of a comment and should not be checked.
spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
# List of comma separated words that should not be checked.
spelling-ignore-words=
# A path to a file that contains the private dictionary; one word per line.
spelling-private-dict-file=
# Tells whether to store unknown words to the private dictionary (see the
# --spelling-private-dict-file option) instead of raising a message.
spelling-store-unknown-words=no
[STRING]
# This flag controls whether inconsistent-quotes generates a warning when the
# character used as a quote delimiter is used inconsistently within a module.
check-quote-consistency=yes
# This flag controls whether the implicit-str-concat should generate a warning
# on implicit string concatenation in sequences defined over several lines.
check-str-concat-over-line-jumps=no
[TYPECHECK]
# List of decorators that produce context managers, such as
# contextlib.contextmanager. Add to this list to register other decorators that
# produce valid context managers.
contextmanager-decorators=contextlib.contextmanager
# List of members which are set dynamically and missed by pylint inference
# system, and so shouldn't trigger E1101 when accessed. Python regular
# expressions are accepted.
generated-members=
# Tells whether to warn about missing members when the owner of the attribute
# is inferred to be None.
ignore-none=yes
# This flag controls whether pylint should warn about no-member and similar
# checks whenever an opaque object is returned when inferring. The inference
# can return multiple potential results while evaluating a Python object, but
# some branches might not be evaluated, which results in partial inference. In
# that case, it might be useful to still emit no-member and other checks for
# the rest of the inferred objects.
ignore-on-opaque-inference=yes
# List of symbolic message names to ignore for Mixin members.
ignored-checks-for-mixins=no-member,
not-async-context-manager,
not-context-manager,
attribute-defined-outside-init
# List of class names for which member attributes should not be checked (useful
# for classes with dynamically set attributes). This supports the use of
# qualified names.
ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace
# Show a hint with possible names when a member name was not found. The aspect
# of finding the hint is based on edit distance.
missing-member-hint=yes
# The minimum edit distance a name should have in order to be considered a
# similar match for a missing member name.
missing-member-hint-distance=1
# The total number of similar names that should be taken in consideration when
# showing a hint for a missing member.
missing-member-max-choices=1
# Regex pattern to define which classes are considered mixins.
mixin-class-rgx=.*[Mm]ixin
# List of decorators that change the signature of a decorated function.
signature-mutators=
[VARIABLES]
# List of additional names supposed to be defined in builtins. Remember that
# you should avoid defining new builtins when possible.
additional-builtins=
# Tells whether unused global variables should be treated as a violation.
allow-global-unused-variables=yes
# List of names allowed to shadow builtins
allowed-redefined-builtins=
# List of strings which can identify a callback function by name. A callback
# name must start or end with one of those strings.
callbacks=cb_,
_cb
# A regular expression matching the name of dummy variables (i.e. expected to
# not be used).
dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
# Argument names that match this expression will be ignored.
ignored-argument-names=_.*|^ignored_|^unused_
# Tells whether we should check for unused import in __init__ files.
init-import=no
# List of qualified module names which can have objects that can redefine
# builtins.
redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io

1
.python-version Normal file
View file

@ -0,0 +1 @@
3.10.13

4711
Demo_graph.ipynb Normal file

File diff suppressed because one or more lines are too long

View file

@ -17,7 +17,6 @@ COPY pyproject.toml poetry.lock /app/
# Install the dependencies
RUN poetry config virtualenvs.create false && \
poetry lock --no-update && \
poetry install --no-root --no-dev
RUN apt-get update -q && \
@ -44,7 +43,7 @@ WORKDIR /app
# Set the PYTHONPATH environment variable to include the /app directory
ENV PYTHONPATH=/app
COPY cognitive_architecture/ /app/cognitive_architecture
COPY cognee/ /app/cognee
COPY main.py /app
COPY api.py /app

154
README.md
View file

@ -1,6 +1,7 @@
# cognee
AI Memory - Cognitive Architecture
Make data processing for LLMs easy
<p>
<a href="https://cognee.ai" target="_blank">
@ -9,7 +10,7 @@ AI Memory - Cognitive Architecture
</p>
<p>
<i>Open-source framework for building AI Memory, extending the limits of cognitive architecture, designed for accuracy, transparency, and control.</i>
<i>Open-source framework for creating knowledge graphs and data models for LLMs.</i>
</p>
<p>
@ -27,60 +28,115 @@ AI Memory - Cognitive Architecture
</a>
</p>
[//]: # (<p>)
[//]: # ( <b>Share cognee Repository</b>)
[//]: # (</p>)
[//]: # (<p>)
[//]: # ( <a href="https://twitter.com/intent/tweet?text=Check%20this%20GitHub%20repository%20out.%20Cognee%20-%20Let%27s%20you%20easily%20build,%20manage%20and%20run%20useful%20autonomous%20AI%20agents.&url=https://github.com/topoteretes/cognee&hashtags=AGI,Autonomics,Cognee,future" target="_blank">)
[//]: # ( <img src="https://img.shields.io/twitter/follow/_promethAI?label=Share Repo on Twitter&style=social" alt="Follow Cognee"/>)
[//]: # ( </a>)
[//]: # ( <a href="https://t.me/share/url?text=Check%20this%20GitHub%20repository%20out.%20Cognee%20-%20Let%27s%20you%20easily%20build,%20manage%20and%20run%20useful%20autonomous%20AI%20agents.&url=https://github.com/topoteretes/cognee" target="_blank">)
[//]: # ( <img src="https://img.shields.io/twitter/url?label=Telegram&logo=Telegram&style=social&url=https://github.com/topoteretes/cognee" alt="Share on Telegram"/>)
[//]: # ( </a>)
[//]: # ( <a href="https://api.whatsapp.com/send?text=Check%20this%20GitHub%20repository%20out.%20Cognee%20-%20Let's%20you%20easily%20build,%20manage%20and%20run%20useful%20autonomous%20AI%20agents.%20https://github.com/topoteretes/cognee" target="_blank">)
[//]: # ( <img src="https://img.shields.io/twitter/url?label=whatsapp&logo=whatsapp&style=social&url=https://github.com/topoteretes/cognee" />)
[//]: # ( </a>)
[//]: # ( <a href="https://www.reddit.com/submit?url=https://github.com/topoteretes/cognee&title=Check%20this%20GitHub%20repository%20out.%20Cognee%20-%20Let's%20you%20easily%20build,%20manage%20and%20run%20useful%20autonomous%20AI%20agents.)
[//]: # ( " target="_blank">)
[//]: # ( <img src="https://img.shields.io/twitter/url?label=Reddit&logo=Reddit&style=social&url=https://github.com/topoteretes/cognee" alt="Share on Reddit"/>)
[//]: # ( </a>)
[//]: # ( <a href="mailto:?subject=Check%20this%20GitHub%20repository%20out.&body=Cognee%20-%20Let%27s%20you%20easily%20build,%20manage%20and%20run%20useful%20autonomous%20AI%20agents.%3A%0Ahttps://github.com/topoteretes/cognee" target="_blank">)
[//]: # ( <img src="https://img.shields.io/twitter/url?label=Gmail&logo=Gmail&style=social&url=https://github.com/topoteretes/cognee"/>)
[//]: # ( </a>)
[//]: # ( <a href="https://www.buymeacoffee.com/promethAI" target="_blank">)
[//]: # ( <img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="23" width="100" style="border-radius:1px">)
[//]: # ( </a>)
[//]: # (</p>)
[//]: # ()
[//]: # (<hr>)
[//]: # ()
[//]: # ([Star us on Github!]&#40;https://www.github.com/topoteretes/cognee&#41;)
[//]: # ()
[//]: # (<a href="https://www.cognee.ai" target="_blank">Cognee</a> runs in iterations, from POC towards production-ready code.)
## 🚀 It's alive
<p>
<b>Share cognee Repository</b>
</p>
<p>
<a href="https://twitter.com/intent/tweet?text=Check%20this%20GitHub%20repository%20out.%20Cognee%20-%20Let%27s%20you%20easily%20build,%20manage%20and%20run%20useful%20autonomous%20AI%20agents.&url=https://github.com/topoteretes/cognee&hashtags=AGI,Autonomics,Cognee,future" target="_blank">
<img src="https://img.shields.io/twitter/follow/_promethAI?label=Share Repo on Twitter&style=social" alt="Follow Cognee"/>
</a>
<a href="https://t.me/share/url?text=Check%20this%20GitHub%20repository%20out.%20Cognee%20-%20Let%27s%20you%20easily%20build,%20manage%20and%20run%20useful%20autonomous%20AI%20agents.&url=https://github.com/topoteretes/cognee" target="_blank">
<img src="https://img.shields.io/twitter/url?label=Telegram&logo=Telegram&style=social&url=https://github.com/topoteretes/cognee" alt="Share on Telegram"/>
</a>
<a href="https://api.whatsapp.com/send?text=Check%20this%20GitHub%20repository%20out.%20Cognee%20-%20Let's%20you%20easily%20build,%20manage%20and%20run%20useful%20autonomous%20AI%20agents.%20https://github.com/topoteretes/cognee" target="_blank">
<img src="https://img.shields.io/twitter/url?label=whatsapp&logo=whatsapp&style=social&url=https://github.com/topoteretes/cognee" />
</a>
<a href="https://www.reddit.com/submit?url=https://github.com/topoteretes/cognee&title=Check%20this%20GitHub%20repository%20out.%20Cognee%20-%20Let's%20you%20easily%20build,%20manage%20and%20run%20useful%20autonomous%20AI%20agents.
" target="_blank">
<img src="https://img.shields.io/twitter/url?label=Reddit&logo=Reddit&style=social&url=https://github.com/topoteretes/cognee" alt="Share on Reddit"/>
</a>
<a href="mailto:?subject=Check%20this%20GitHub%20repository%20out.&body=Cognee%20-%20Let%27s%20you%20easily%20build,%20manage%20and%20run%20useful%20autonomous%20AI%20agents.%3A%0Ahttps://github.com/topoteretes/cognee" target="_blank">
<img src="https://img.shields.io/twitter/url?label=Gmail&logo=Gmail&style=social&url=https://github.com/topoteretes/cognee"/>
</a>
<a href="https://www.buymeacoffee.com/promethAI" target="_blank">
<img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="23" width="100" style="border-radius:1px">
</a>
</p>
<hr>
[Star us on Github!](https://www.github.com/topoteretes/cognee)
Jump into AI memory architecture, inspired by human cognitive processes, using Python.
<a href="https://www.cognee.ai" target="_blank">Cognee</a> runs in iterations, from POC towards production-ready code.
To read more about the approach and details on cognitive architecture, see the blog post: <a href="https://topoteretes.notion.site/Going-beyond-Langchain-Weaviate-and-towards-a-production-ready-modern-data-platform-7351d77a1eba40aab4394c24bef3a278?pvs=4" target="_blank">AI Applications and RAGs - Cognitive Architecture, Testability, Production Ready Apps</a>
Try it yourself on Whatsapp with one of our <a href="https://keepi.ai">partners</a> by typing `/save {content you want to save}` followed by `/query {knowledge you saved previously}`
</p>
## 📦 Installation
With pip:
```bash
pip install cognee
```
With poetry:
```bash
poetry add cognee
```
## Getting started
## 💻 Usage
- Add a new piece of information to storage
```
import cognee
cognee.add(data_path, file_name)
```
To run cognee you need to have <a href="https://docs.docker.com/get-docker" target="_blank">Docker</a> installed on your machine.
- Use LLMs and cognee to create graphs
```
cognee.cognify(file_name)
```
Run <a href="https://www.cognee.ai" target="_blank">Cognee</a> in a couple of steps:
- Run `cp .env.template .env` in your terminal and set all the environment variables
- Run `docker compose up` in order to start graph and relational databases
- Run `docker compose up cognee` in order start Cognee
- Render the graph after adding your Graphistry credentials to .env
<!-- Send API requests add-memory, user-query-to-graph, document-to-graph-db, user-query-processor to the locahost:8000 -->
```
graph_url = await render_graph(graph, graph_type = "networkx")
print(graph_url)
```
## Debugging
To run Cognee with debugger attached you need to build the Cognee image with the `DEBUG` flag set to true.
- Query the graph for a piece of information
- `docker compose build cognee --no-cache --build-arg DEBUG=true`
- `docker compose up cognee`
```
query_params = {
SearchType.SIMILARITY: {'query': 'your search query here'}
}
cognee.search(graph, query_params)
```
## Demo
@ -97,14 +153,6 @@ Our framework for the OpenAI, Graph (Neo4j) and Vector (Weaviate) databases intr
- Document Topology: Structure and store documents in public and private domains.
- Personalized Context: Provide a context object to the LLM for a better response.
</br>
![Image](assets/architecture.png)
## Current Focus
### Integration with keepi.ai and other apps
- Cognee uses the Neo4j graph database to map user data into a graph structure consisting of semantic, episodic, and procedural memory.
- Stores data and files through the WhatsApp chatbot <a href="https://keepi.ai">keepi.ai</a>
- Uses the graph to answer user queries and store new information in the graph.

80
api.py
View file

@ -13,19 +13,19 @@ logging.basicConfig(
logger = logging.getLogger(__name__)
from cognitive_architecture.config import Config
from cognee.config import Config
config = Config()
config.load()
from typing import Dict, Any
from typing import Dict, Any, List
from fastapi import FastAPI, BackgroundTasks, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from cognitive_architecture.database.relationaldb.database import AsyncSessionLocal
from cognitive_architecture.database.relationaldb.database_crud import session_scope
from cognitive_architecture.vectorstore_manager import Memory
from cognee.database.relationaldb.database import AsyncSessionLocal
from cognee.database.relationaldb.database_crud import session_scope
from cognee.vectorstore_manager import Memory
from main import add_documents_to_graph_db, user_context_enrichment
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
@ -57,6 +57,54 @@ def health_check():
class Payload(BaseModel):
payload: Dict[str, Any]
from cognee.api.v1.memory.create_memory import MemoryType
class CreateMemoryPayload(BaseModel):
user_id: str
memory_name: str
memory_type: MemoryType
@app.post("/create-memory", response_model=dict)
async def create_memory(payload: CreateMemoryPayload):
from cognee.api.v1.memory.create_memory import create_memory as create_memory_v1, MemoryException
try:
await create_memory_v1(
payload.user_id,
payload.memory_name,
payload.memory_type or MemoryType.VECTOR,
)
except MemoryException as error:
return JSONResponse(
status_code = 409,
content = { "error": error.message }
)
return JSONResponse(
status_code = 200,
content = { "memory_name": payload.memory_name }
)
class RememberPayload(BaseModel):
user_id: str
memory_name: str
payload: List[str]
@app.post("/remember", response_model=dict)
async def remember(payload: RememberPayload):
from cognee.api.v1.memory.remember import remember as remember_v1
await remember_v1(
payload.user_id,
payload.memory_name,
payload.payload
)
return JSONResponse(
status_code = 200,
content = { "message": "ok" }
)
@app.post("/add-memory", response_model=dict)
async def add_memory(
@ -83,10 +131,10 @@ async def add_memory(
content = None
output = await load_documents_to_vectorstore(
session,
decoded_payload["user_id"],
content=content,
loader_settings=settings_for_loader,
session = session,
content = content,
user_id = decoded_payload["user_id"],
loader_settings = settings_for_loader,
)
return JSONResponse(content={"response": output}, status_code=200)
@ -114,10 +162,10 @@ async def add_memory(
loader_settings = {"format": "PDF", "source": "DEVICE", "path": [".data"]}
output = await load_documents_to_vectorstore(
session,
user_id=user_id,
content=content,
loader_settings=loader_settings,
user_id = user_id,
content = content,
session = session,
loader_settings = loader_settings,
)
return JSONResponse(content={"response": output}, status_code=200)
@ -216,7 +264,7 @@ async def user_query_classfier(payload: Payload):
# Execute the query - replace this with the actual execution method
async with session_scope(session=AsyncSessionLocal()) as session:
from cognitive_architecture.classifiers.classify_user_input import (
from cognee.classifiers.classify_user_input import (
classify_user_query,
)
@ -244,7 +292,7 @@ async def drop_db(payload: Payload):
else:
pass
from cognitive_architecture.database.create_database import (
from cognee.database.create_database import (
drop_database,
create_admin_engine,
)
@ -262,7 +310,7 @@ async def drop_db(payload: Payload):
else:
pass
from cognitive_architecture.database.create_database import (
from cognee.database.create_database import (
create_database,
create_admin_engine,
)

571
cognee - Get Started.ipynb Normal file

File diff suppressed because one or more lines are too long

118
cognee.ipynb Normal file
View file

@ -0,0 +1,118 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "c4d5a399",
"metadata": {},
"outputs": [],
"source": [
"from os import listdir, path\n",
"from uuid import uuid5, UUID\n",
"from cognee import add\n",
"\n",
"data_path = path.abspath(\".data\")\n",
"\n",
"results = await add(data_path, \"izmene\")\n",
"for result in results:\n",
" print(result)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "47edce91",
"metadata": {},
"outputs": [],
"source": [
"import duckdb\n",
"from cognee.root_dir import get_absolute_path\n",
"\n",
"dataset_name = \"pdf_files\"\n",
"\n",
"db_path = get_absolute_path(\"./data/cognee\")\n",
"db_location = db_path + \"/cognee.duckdb\"\n",
"print(db_location)\n",
"\n",
"db = duckdb.connect(db_location)\n",
"\n",
"tables = db.sql(\"SELECT DISTINCT schema_name FROM duckdb_tables();\").df()\n",
"print(list(filter(lambda table_name: table_name.endswith('staging') is False, tables.to_dict()[\"schema_name\"].values())))\n",
"\n",
"# izmene = db.sql(f\"SELECT * FROM izmene.file_metadata;\")\n",
"\n",
"# print(izmene)\n",
"\n",
"# pravilnik = db.sql(f\"SELECT * FROM pravilnik.file_metadata;\")\n",
"\n",
"# print(pravilnik)\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "607bf624",
"metadata": {},
"outputs": [],
"source": [
"from os import path, listdir\n",
"from cognee import cognify, list_datasets\n",
"from cognee.utils import render_graph\n",
"\n",
"print(list_datasets())\n",
"\n",
"graph = await cognify(\"izmene\")\n",
"\n",
"graph_url = await render_graph(graph, graph_type = \"networkx\")\n",
"print(graph_url)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6edb9cac",
"metadata": {},
"outputs": [],
"source": [
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client\n",
"from cognee.shared.data_models import GraphDBType\n",
"from cognee import search, SearchType\n",
"\n",
"graph_client = get_graph_client(GraphDBType.NETWORKX)\n",
"await graph_client.load_graph_from_file()\n",
"\n",
"graph = graph_client.graph\n",
"\n",
"query_params = {\n",
" SearchType.SIMILARITY: {'query': 'nacin vrsenja kontrole tehnicke dokumentacije'}\n",
"}\n",
"\n",
"results = await search(graph, query_params)\n",
"print(results)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

4
cognee/__init__.py Normal file
View file

@ -0,0 +1,4 @@
from .api.v1.add.add import add
from .api.v1.cognify.cognify import cognify
from .api.v1.list_datasets.list_datasets import list_datasets
from .api.v1.search.search import search, SearchType

View file

@ -0,0 +1 @@
from .add import add

91
cognee/api/v1/add/add.py Normal file
View file

@ -0,0 +1,91 @@
from typing import List, Union
from os import path, listdir
import asyncio
import dlt
import duckdb
from unstructured.cleaners.core import clean
from cognee.root_dir import get_absolute_path
import cognee.modules.ingestion as ingestion
from cognee.infrastructure.files import get_file_metadata
from cognee.infrastructure.files.storage import LocalStorage
async def add(file_paths: Union[str, List[str]], dataset_name: str = None):
if isinstance(file_paths, str):
# Directory path provided, we need to extract the file paths and dataset name
def list_dir_files(root_dir_path: str, parent_dir: str = "root"):
datasets = {}
for file_or_dir in listdir(root_dir_path):
if path.isdir(path.join(root_dir_path, file_or_dir)):
dataset_name = file_or_dir if parent_dir == "root" else parent_dir + "." + file_or_dir
dataset_name = clean(dataset_name.replace(" ", "_"))
nested_datasets = list_dir_files(path.join(root_dir_path, file_or_dir), dataset_name)
for dataset in nested_datasets:
datasets[dataset] = nested_datasets[dataset]
else:
if parent_dir not in datasets:
datasets[parent_dir] = []
datasets[parent_dir].append(path.join(root_dir_path, file_or_dir))
return datasets
datasets = list_dir_files(file_paths)
results = []
for key in datasets:
if dataset_name is not None and not key.startswith(dataset_name):
continue
results.append(add(datasets[key], dataset_name = key))
return await asyncio.gather(*results)
db_path = get_absolute_path("./data/cognee")
db_location = f"{db_path}/cognee.duckdb"
LocalStorage.ensure_directory_exists(db_path)
db = duckdb.connect(db_location)
destination = dlt.destinations.duckdb(
credentials = db,
)
pipeline = dlt.pipeline(
pipeline_name = "file_load_from_filesystem",
destination = destination,
)
@dlt.resource(standalone = True, merge_key = "id")
def data_resources(file_paths: str):
for file_path in file_paths:
with open(file_path.replace("file://", ""), mode = "rb") as file:
classified_data = ingestion.classify(file)
data_id = ingestion.identify(classified_data)
file_metadata = get_file_metadata(classified_data.get_data())
yield {
"id": data_id,
"name": file_metadata["name"],
"file_path": file_metadata["file_path"],
"extension": file_metadata["extension"],
"mime_type": file_metadata["mime_type"],
"keywords": "|".join(file_metadata["keywords"]),
}
run_info = pipeline.run(
data_resources(file_paths),
table_name = "file_metadata",
dataset_name = dataset_name,
write_disposition = "merge",
)
return run_info

View file

@ -0,0 +1,53 @@
import asyncio
from uuid import UUID, uuid4
from typing import Union, BinaryIO, List
import cognee.modules.ingestion as ingestion
from cognee.infrastructure import infrastructure_config
class DatasetException(Exception):
message: str
def __init__(self, message: str):
self.message = message
async def add_standalone(
data: Union[str, BinaryIO, List[Union[str, BinaryIO]]],
dataset_id: UUID = uuid4(),
dataset_name: str = None
):
db_engine = infrastructure_config.get_config()["database_engine"]
if db_engine.is_db_done is not True:
await db_engine.ensure_tables()
if not data:
raise DatasetException("Data must be provided to cognee.add(data: str)")
if isinstance(data, list):
promises = []
for data_item in data:
promises.append(add_standalone(data_item, dataset_id, dataset_name))
results = await asyncio.gather(*promises)
return results
if is_data_path(data):
with open(data.replace("file://", ""), "rb") as file:
return await add_standalone(file, dataset_id, dataset_name)
classified_data = ingestion.classify(data)
data_id = ingestion.identify(classified_data)
await ingestion.save(dataset_id, dataset_name, data_id, classified_data)
return dataset_id
# await ingestion.vectorize(dataset_id, dataset_name, data_id, classified_data)
def is_data_path(data: str) -> bool:
return False if not isinstance(data, str) else data.startswith("file://")

View file

@ -0,0 +1,21 @@
from typing import List
from enum import Enum
from cognee.modules.users.memory import create_information_points, is_existing_memory
class MemoryType(Enum):
GRAPH = "GRAPH"
VECTOR = "VECTOR"
RELATIONAL = "RELATIONAL"
class MemoryException(Exception):
message: str
def __init__(self, message: str):
self.message = message
async def remember(user_id: str, memory_name: str, payload: List[str]):
if await is_existing_memory(memory_name) is False:
raise MemoryException(f"Memory with the name \"{memory_name}\" doesn't exist.")
await create_information_points(memory_name, payload)

View file

@ -0,0 +1,164 @@
import asyncio
# import logging
from typing import List
from qdrant_client import models
import instructor
from openai import OpenAI
from unstructured.cleaners.core import clean
from unstructured.partition.pdf import partition_pdf
from cognee.infrastructure.databases.vector.qdrant.QDrantAdapter import CollectionConfig
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.modules.cognify.graph.add_classification_nodes import add_classification_nodes
from cognee.modules.cognify.graph.add_node_connections import add_node_connection, graph_ready_output, \
connect_nodes_in_graph, extract_node_descriptions
from cognee.modules.cognify.graph.add_propositions import append_to_graph
from cognee.modules.cognify.llm.add_node_connection_embeddings import process_items
from cognee.modules.cognify.vector.batch_search import adapted_qdrant_batch_search
from cognee.modules.cognify.vector.add_propositions import add_propositions
from cognee.config import Config
from cognee.modules.cognify.llm.classify_content import classify_into_categories
from cognee.modules.cognify.llm.content_to_cog_layers import content_to_cog_layers
from cognee.modules.cognify.llm.generate_graph import generate_graph
from cognee.shared.data_models import DefaultContentPrediction, KnowledgeGraph, DefaultCognitiveLayer
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognee.shared.data_models import GraphDBType
from cognee.infrastructure.databases.vector.get_vector_database import get_vector_database
from cognee.infrastructure.databases.relational import DuckDBAdapter
from cognee.modules.cognify.graph.add_document_node import add_document_node
from cognee.modules.cognify.graph.initialize_graph import initialize_graph
config = Config()
config.load()
aclient = instructor.patch(OpenAI())
USER_ID = "default_user"
async def cognify(dataset_name: str = "root"):
"""This function is responsible for the cognitive processing of the content."""
db = DuckDBAdapter()
files_metadata = db.get_files_metadata(dataset_name)
awaitables = []
await initialize_graph(USER_ID)
for file_metadata in files_metadata:
with open(file_metadata["file_path"], "rb") as file:
elements = partition_pdf(file = file, strategy = "fast")
text = "\n".join(map(lambda element: clean(element.text), elements))
awaitables.append(process_text(text, file_metadata))
graphs = await asyncio.gather(*awaitables)
return graphs[0]
async def process_text(input_text: str, file_metadata: dict):
print(f"Processing document ({file_metadata['id']})")
classified_categories = []
try:
# Classify the content into categories
classified_categories = await classify_into_categories(
input_text,
"classify_content.txt",
DefaultContentPrediction
)
file_metadata["categories"] = list(map(lambda category: category["layer_name"], classified_categories))
except Exception as e:
print(e)
raise e
await add_document_node(f"DefaultGraphModel:{USER_ID}", file_metadata)
print(f"Document ({file_metadata['id']}) categorized: {file_metadata['categories']}")
cognitive_layers = await content_to_cog_layers(
"generate_cog_layers.txt",
classified_categories[0],
response_model = DefaultCognitiveLayer
)
cognitive_layers = [layer_subgroup.name for layer_subgroup in cognitive_layers.cognitive_layers]
async def generate_graph_per_layer(text_input: str, layers: List[str], response_model: KnowledgeGraph = KnowledgeGraph):
generate_graphs_awaitables = [generate_graph(text_input, "generate_graph_prompt.txt", {"layer": layer}, response_model) for layer in
layers]
return await asyncio.gather(*generate_graphs_awaitables)
# Run the async function for each set of cognitive layers
layer_graphs = await generate_graph_per_layer(input_text, cognitive_layers)
# print(layer_graphs)
print(f"Document ({file_metadata['id']}) layer graphs created")
# G = await create_semantic_graph(graph_model_instance)
await add_classification_nodes(f"DOCUMENT:{file_metadata['id']}", classified_categories[0])
unique_layer_uuids = await append_to_graph(layer_graphs, classified_categories[0])
print(f"Document ({file_metadata['id']}) layers connected")
graph_client = get_graph_client(GraphDBType.NETWORKX)
await graph_client.load_graph_from_file()
graph = graph_client.graph
# # Extract the node descriptions
node_descriptions = await extract_node_descriptions(graph.nodes(data = True))
# print(node_descriptions)
unique_layer_uuids = set(node["layer_decomposition_uuid"] for node in node_descriptions)
collection_config = CollectionConfig(
vector_config = {
"content": models.VectorParams(
distance = models.Distance.COSINE,
size = 3072
)
},
)
try:
for layer in unique_layer_uuids:
db = get_vector_database()
await db.create_collection(layer, collection_config)
except Exception as e:
print(e)
await add_propositions(node_descriptions)
grouped_data = await add_node_connection(node_descriptions)
# print("we are here, grouped_data", grouped_data)
llm_client = get_llm_client()
relationship_dict = await process_items(grouped_data, unique_layer_uuids, llm_client)
# print("we are here", relationship_dict[0])
results = await adapted_qdrant_batch_search(relationship_dict, db)
# print(results)
relationship_d = graph_ready_output(results)
# print(relationship_d)
connect_nodes_in_graph(graph, relationship_d)
print(f"Document ({file_metadata['id']}) processed")
return graph
if __name__ == "__main__":
asyncio.run(cognify("""In the nicest possible way, Britons have always been a bit silly about animals. “Keeping pets, for the English, is not so much a leisure activity as it is an entire way of life,” wrote the anthropologist Kate Fox in Watching the English, nearly 20 years ago. Our dogs, in particular, have been an acceptable outlet for emotions and impulses we otherwise keep strictly controlled our latent desire to be demonstratively affectionate, to be silly and chat to strangers. If this seems like an exaggeration, consider the different reactions youd get if you struck up a conversation with someone in a park with a dog, versus someone on the train.
Indeed, British society has been set up to accommodate these four-legged ambassadors. In the UK unlike Australia, say, or New Zealand dogs are not just permitted on public transport but often openly encouraged. Many pubs and shops display waggish signs, reading, Dogs welcome, people tolerated, and have treat jars on their counters. The other day, as I was waiting outside a cafe with a friends dog, the barista urged me to bring her inside.
For years, Britons non-partisan passion for animals has been consistent amid dwindling common ground. But lately, rather than bringing out the best in us, our relationship with dogs is increasingly revealing us at our worst and our supposed best friends are paying the price.
As with so many latent traits in the national psyche, it all came unleashed with the pandemic, when many people thought they might as well make the most of all that time at home and in local parks with a dog. Between 2019 and 2022, the number of pet dogs in the UK rose from about nine million to 13 million. But theres long been a seasonal surge around this time of year, substantial enough for the Dogs Trust charity to coin its famous slogan back in 1978: A dog is for life, not just for Christmas.
"""))

View file

@ -0,0 +1,6 @@
from cognee.infrastructure.databases.relational import DuckDBAdapter
def list_datasets():
db = DuckDBAdapter()
return db.get_datasets()

View file

@ -0,0 +1,63 @@
""" This module contains the search function that is used to search for nodes in the graph."""
import asyncio
from enum import Enum, auto
from typing import Dict, Any, Callable, List
from cognee.modules.search.graph.search_adjacent import search_adjacent
from cognee.modules.search.vector.search_similarity import search_similarity
from cognee.modules.search.graph.search_categories import search_categories
from cognee.modules.search.graph.search_neighbour import search_neighbour
class SearchType(Enum):
ADJACENT = auto()
SIMILARITY = auto()
CATEGORIES = auto()
NEIGHBOR = auto()
async def search(graph, query_params: Dict[SearchType, Dict[str, Any]]) -> List:
search_functions: Dict[SearchType, Callable] = {
SearchType.ADJACENT: search_adjacent,
SearchType.SIMILARITY: search_similarity,
SearchType.CATEGORIES: search_categories,
SearchType.NEIGHBOR: search_neighbour,
}
results = []
# Create a list to hold all the coroutine objects
search_tasks = []
for search_type, params in query_params.items():
search_func = search_functions.get(search_type)
if search_func:
# Schedule the coroutine for execution and store the task
full_params = {**params, 'graph': graph}
task = search_func(**full_params)
search_tasks.append(task)
# Use asyncio.gather to run all scheduled tasks concurrently
search_results = await asyncio.gather(*search_tasks)
# Update the results set with the results from all tasks
for search_result in search_results:
results.append(search_result)
return results
# if __name__ == "__main__":
# import asyncio
# query_params = {
# SearchType.SIMILARITY: {'query': 'your search query here'}
# }
# async def main():
# graph_client = get_graph_client(GraphDBType.NETWORKX)
# await graph_client.load_graph_from_file()
# graph = graph_client.graph
# results = await search(graph, query_params)
# print(results)
# asyncio.run(main())

View file

@ -1,26 +1,24 @@
""" This module contains the classifiers for the documents. """
import json
import logging
from langchain.prompts import ChatPromptTemplate
import json
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from langchain.chains import create_extraction_chain
from langchain.chat_models import ChatOpenAI
from ..config import Config
from ..database.vectordb.loaders.loaders import _document_loader
config = Config()
config.load()
OPENAI_API_KEY = config.openai_key
async def classify_documents(query: str, document_id: str, content: str):
"""Classify the documents based on the query and content."""
document_context = content
logging.info("This is the document context", document_context)
logging.info("This is the document context %s", document_context)
llm = ChatOpenAI(temperature=0, model=config.model)
prompt_classify = ChatPromptTemplate.from_template(

View file

@ -1,8 +1,9 @@
""" This module contains the function to classify a summary of a document. """
import json
import logging
from langchain.prompts import ChatPromptTemplate
import json
from langchain.chains import create_extraction_chain
from langchain.chat_models import ChatOpenAI
@ -12,11 +13,6 @@ config = Config()
config.load()
OPENAI_API_KEY = config.openai_key
async def classify_summary(query, document_summaries):
"""Classify the documents based on the query and content."""
llm = ChatOpenAI(temperature=0, model=config.model)

View file

@ -1,16 +1,13 @@
""" This module contains the classifiers for the documents. """
import json
import logging
from langchain.prompts import ChatPromptTemplate
import json
from langchain.chains import create_extraction_chain
from langchain.chat_models import ChatOpenAI
from ..config import Config
from ..database.vectordb.loaders.loaders import _document_loader
config = Config()
config.load()
@ -20,7 +17,7 @@ async def classify_user_input(query, input_type):
""" Classify the user input based on the query and input type."""
llm = ChatOpenAI(temperature=0, model=config.model)
prompt_classify = ChatPromptTemplate.from_template(
"""You are a classifier.
"""You are a classifier.
Determine with a True or False if the following input: {query},
is relevant for the following memory category: {input_type}"""
)
@ -52,4 +49,4 @@ async def classify_user_input(query, input_type):
logging.info("Relevant summary is %s", arguments_dict.get("DocumentSummary", None))
InputClassification = arguments_dict.get("InputClassification", None)
logging.info("This is the classification %s", InputClassification)
return InputClassification
return InputClassification

View file

@ -1,19 +1,19 @@
""" This module contains the function to classify the user query. """
from langchain.prompts import ChatPromptTemplate
import json
from langchain.prompts import ChatPromptTemplate
from langchain.chains import create_extraction_chain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from ..config import Config
from ..database.vectordb.loaders.loaders import _document_loader
config = Config()
config.load()
OPENAI_API_KEY = config.openai_key
async def classify_user_query(query, context, document_types):
"""Classify the user query based on the context and document types."""
llm = ChatOpenAI(temperature=0, model=config.model)

View file

@ -27,24 +27,31 @@ class Config:
)
)
db_path = Path(__file__).resolve().parent / "database/data"
db_path = str(Path(__file__).resolve().parent / "data/system")
vectordb: str = os.getenv("VECTORDB", "weaviate")
qdrant_path: str = os.getenv("QDRANT_PATH")
qdrant_url: str = os.getenv("QDRANT_URL")
qdrant_api_key: str = os.getenv("QDRANT_API_KEY")
db_type: str = os.getenv("DB_TYPE", "sqlite")
db_name: str = os.getenv("DB_NAME", "cognee.db")
db_name: str = os.getenv("DB_NAME", "cognee.sqlite")
db_host: str = os.getenv("DB_HOST", "localhost")
db_port: str = os.getenv("DB_PORT", "5432")
db_user: str = os.getenv("DB_USER", "cognee")
db_password: str = os.getenv("DB_PASSWORD", "cognee")
sqlalchemy_logging: bool = os.getenv("SQLALCHEMY_LOGGING", True)
graph_name = os.getenv("GRAPH_NAME", "cognee_graph.pkl")
graph_filename = os.getenv("GRAPH_NAME", "cognee_graph.pkl")
# Model parameters
model: str = "gpt-4-1106-preview"
model: str = "gpt-4-0125-preview"
# model: str = "gpt-3.5-turbo"
model_endpoint: str = "openai"
openai_key: Optional[str] = os.getenv("OPENAI_API_KEY")
openai_temperature: float = float(os.getenv("OPENAI_TEMPERATURE", 0.0))
graphistry_username = os.getenv("GRAPHISTRY_USERNAME")
graphistry_password = os.getenv("GRAPHISTRY_PASSWORD")
# Embedding parameters
embedding_model: str = "openai"
embedding_dim: int = 1536

View file

@ -1,13 +1,11 @@
"""This module provides functionalities for creating and managing databases."""
import os
from contextlib import asynccontextmanager
from sqlalchemy import create_engine, text
from sqlalchemy import text
from sqlalchemy.ext.asyncio import create_async_engine
from config import Config
from database.relationaldb.database import Base, get_sqlalchemy_database_url
from database.relationaldb.models import memory, metadatas, operation, sessions, user, docs
from cognee.config import Config
from cognee.database.relationaldb.database import Base, get_sqlalchemy_database_url
globalConfig = Config()
@ -58,10 +56,8 @@ class DatabaseManager:
async def create_tables(self):
"""Create tables based on the SQLAlchemy Base metadata."""
try:
async with self.engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
async with self.engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
except Exception as e:
print(e)
raise e

View file

@ -1,4 +1,3 @@
import logging
import os
from neo4j import AsyncSession
@ -6,32 +5,23 @@ from neo4j.exceptions import Neo4jError
print(os.getcwd())
import networkx as nx
from langchain.graphs import Neo4jGraph
import os
import openai
import instructor
from openai import OpenAI
from openai import AsyncOpenAI
import pickle
from abc import ABC, abstractmethod
# Adds response_model to ChatCompletion
# Allows the return of Pydantic model rather than raw JSON
from pydantic import BaseModel, Field
from typing import List, Dict, Optional
from ...utils import (
format_dict,
append_uuid_to_variable_names,
create_edge_variable_mapping,
create_node_variable_mapping,
get_unsumarized_vector_db_namespace,
)
from ...llm.queries import generate_summary, generate_graph
from cognee.infrastructure.llm.openai.queries import generate_summary, generate_graph
import logging
from neo4j import AsyncGraphDatabase
from contextlib import asynccontextmanager
@ -45,11 +35,8 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
from ...config import Config
from ...shared.data_models import (
Node,
Edge,
KnowledgeGraph,
GraphQLQuery,
MemorySummary,
)
config = Config()

View file

@ -1,6 +1,6 @@
import pickle
from pathlib import Path
from cognitive_architecture.config import Config
from cognee.config import Config
import networkx as nx
config = Config()
config = config.load()

View file

@ -3,7 +3,7 @@ from pathlib import Path
# from contextlib import asynccontextmanager
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
from sqlalchemy.orm import declarative_base
from cognitive_architecture.config import Config
from cognee.config import Config
globalConfig = Config()

View file

@ -2,11 +2,8 @@
from datetime import datetime
from sqlalchemy import Column, String, DateTime, ForeignKey, Boolean
from sqlalchemy.orm import relationship
import os
import sys
from ..database import Base
class DocsModel(Base):
""" Docs model"""
__tablename__ = "docs"

View file

@ -1,6 +1,6 @@
""" This module contains the MemoryModel class, which is a SQLAlchemy model for the memory table in the relational database. """
from datetime import datetime
from sqlalchemy import Column, String, DateTime, ForeignKey
from sqlalchemy import Column, String, DateTime, ForeignKey, UUID
from sqlalchemy.orm import relationship
from ..database import Base
@ -9,7 +9,7 @@ class MemoryModel(Base):
""" Memory model"""
__tablename__ = "memories"
id = Column(String, primary_key=True)
id = Column(UUID, primary_key=True)
user_id = Column(String, ForeignKey("users.id"), index=True)
operation_id = Column(String, ForeignKey("operations.id"), index=True)
memory_name = Column(String, nullable=True)

View file

@ -16,12 +16,12 @@ from langchain.retrievers import WeaviateHybridSearchRetriever
from weaviate.gql.get import HybridFusion
from cognitive_architecture.database.relationaldb.models.sessions import Session
from cognitive_architecture.database.relationaldb.models.metadatas import MetaDatas
from cognitive_architecture.database.relationaldb.models.operation import Operation
from cognitive_architecture.database.relationaldb.models.docs import DocsModel
from cognee.database.relationaldb.models.sessions import Session
from cognee.database.relationaldb.models.metadatas import MetaDatas
from cognee.database.relationaldb.models.operation import Operation
from cognee.database.relationaldb.models.docs import DocsModel
from sqlalchemy.orm import sessionmaker
from cognitive_architecture.database.relationaldb.database import engine
from cognee.database.relationaldb.database import engine
from typing import Optional
import time
@ -31,7 +31,7 @@ tracemalloc.start()
from datetime import datetime
from langchain.embeddings.openai import OpenAIEmbeddings
from cognitive_architecture.database.vectordb.vectordb import (
from cognee.database.vectordb.vectordb import (
PineconeVectorDB,
WeaviateVectorDB,
LanceDB,
@ -41,7 +41,7 @@ import uuid
import weaviate
from marshmallow import Schema, fields
import json
from cognitive_architecture.database.vectordb.vector_db_type import VectorDBType
from cognee.database.vectordb.vector_db_type import VectorDBType
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")

View file

@ -2,7 +2,7 @@
import re
import logging
from cognitive_architecture.database.vectordb.chunkers.chunk_strategy import ChunkStrategy
from cognee.database.vectordb.chunkers.chunk_strategy import ChunkStrategy
from langchain.text_splitter import RecursiveCharacterTextSplitter

View file

@ -3,8 +3,8 @@ import fitz
import os
import sys
from cognitive_architecture.database.vectordb.chunkers.chunkers import chunk_data
from cognitive_architecture.shared.language_processing import (
from cognee.database.vectordb.chunkers.chunkers import chunk_data
from cognee.shared.language_processing import (
translate_text,
detect_language,
)
@ -148,7 +148,7 @@ async def _document_loader(observation: str, loader_settings: dict):
# file_content += page.get_text()
# pages = chunk_data(chunk_strategy=loader_strategy, source_data=file_content, chunk_size=chunk_size,
# chunk_overlap=chunk_overlap)
# from cognitive_architecture.shared.language_processing import translate_text,detect_language
# from cognee.shared.language_processing import translate_text,detect_language
#
# if detect_language(pages) != "en":
# logging.info("Current Directory 3")
@ -169,7 +169,7 @@ async def _document_loader(observation: str, loader_settings: dict):
# pages = chunk_data(chunk_strategy=loader_strategy, source_data=file_content, chunk_size=chunk_size,
# chunk_overlap=chunk_overlap)
#
# from cognitive_architecture.shared.language_processing import translate_text, detect_language
# from cognee.shared.language_processing import translate_text, detect_language
#
# if detect_language(pages) != "en":
# logging.info("Current Directory 3")
@ -196,7 +196,7 @@ async def _document_loader(observation: str, loader_settings: dict):
# pages = chunk_data(chunk_strategy=loader_strategy, source_data=str(documents), chunk_size=chunk_size,
# chunk_overlap=chunk_overlap)
# logging.info("Documents: %s", documents)
# from cognitive_architecture.shared.language_processing import translate_text, detect_language
# from cognee.shared.language_processing import translate_text, detect_language
#
# if detect_language(pages) != "en":
# logging.info("Current Directory 3")

View file

@ -3,7 +3,7 @@ import logging
from langchain.text_splitter import RecursiveCharacterTextSplitter
from marshmallow import Schema, fields
from cognitive_architecture.database.vectordb.loaders.loaders import _document_loader
from cognee.database.vectordb.loaders.loaders import _document_loader
# Add the parent directory to sys.path
@ -186,7 +186,7 @@ class WeaviateVectorDB(VectorDB):
)
else:
chunk_count = 0
from cognitive_architecture.database.vectordb.chunkers.chunkers import (
from cognee.database.vectordb.chunkers.chunkers import (
chunk_data,
)

View file

@ -0,0 +1,21 @@
from cognee.config import Config
from .databases.relational import SqliteEngine, DatabaseEngine
config = Config()
config.load()
class InfrastructureConfig():
database_engine: DatabaseEngine = None
def get_config(self) -> dict:
if self.database_engine is None:
self.database_engine = SqliteEngine(config.db_path, config.db_name)
return {
"database_engine": self.database_engine
}
def set_config(self, new_config: dict):
self.database_engine = new_config["database_engine"]
infrastructure_config = InfrastructureConfig()

View file

@ -0,0 +1 @@
from .InfrastructureConfig import infrastructure_config

View file

@ -0,0 +1,4 @@
from .models.Data import Data
from .models.Dataset import Dataset
from .models.DatasetData import DatasetData
from .add_data_to_dataset import add_data_to_dataset

View file

@ -0,0 +1,45 @@
import logging
from . import Dataset, Data
from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.relational import DatabaseEngine
from cognee.infrastructure.files import remove_file_from_storage
logger = logging.getLogger(__name__)
async def add_data_to_dataset(dataset: Dataset, data: Data):
db_engine: DatabaseEngine = infrastructure_config.get_config()["database_engine"]
existing_dataset = (await db_engine.query_entity(dataset)).scalar()
existing_data = (await db_engine.query_entity(data)).scalar()
if existing_dataset:
if existing_data:
await remove_old_raw_data(existing_data.raw_data_location)
def update_raw_data():
existing_data.raw_data_location = data.raw_data_location
await db_engine.update(update_raw_data)
if existing_dataset.id == dataset.id and dataset.name is not None:
def update_name(): existing_dataset.name = dataset.name
await db_engine.update(update_name)
else:
await db_engine.update(lambda: existing_dataset.data.append(data))
else:
if existing_data:
await remove_old_raw_data(existing_data.raw_data_location)
existing_data.raw_data_location = data.raw_data_location
await db_engine.update(lambda: dataset.data.append(existing_data))
else:
await db_engine.update(lambda: dataset.data.append(data))
await db_engine.create(dataset)
async def remove_old_raw_data(data_location: str):
try:
await remove_file_from_storage(data_location)
except Exception:
logger.error("Failed to remove old raw data file: %s", data_location)

View file

@ -0,0 +1,23 @@
from typing import List
from datetime import datetime, timezone
from sqlalchemy.orm import relationship, MappedColumn, Mapped
from sqlalchemy import Column, String, DateTime, UUID, Text, JSON
from cognee.infrastructure.databases.relational import ModelBase
from .DatasetData import DatasetData
class Data(ModelBase):
__tablename__ = "data"
id = Column(UUID, primary_key = True)
name = Column(String, nullable = True)
description = Column(Text, nullable = True)
raw_data_location = Column(String)
meta_data: Mapped[dict] = MappedColumn(type_ = JSON) # metadata is reserved word
created_at = Column(DateTime, default = datetime.now(timezone.utc))
updated_at = Column(DateTime, onupdate = datetime.now(timezone.utc))
datasets: Mapped[List["Dataset"]] = relationship(
secondary = DatasetData.__tablename__,
back_populates = "data"
)

View file

@ -0,0 +1,21 @@
from typing import List
from datetime import datetime, timezone
from sqlalchemy.orm import relationship, Mapped
from sqlalchemy import Column, Text, DateTime, UUID
from cognee.infrastructure.databases.relational import ModelBase
from .DatasetData import DatasetData
class Dataset(ModelBase):
__tablename__ = "dataset"
id = Column(UUID, primary_key = True)
name = Column(Text)
description = Column(Text, nullable = True)
created_at = Column(DateTime, default = datetime.now(timezone.utc))
updated_at = Column(DateTime, onupdate = datetime.now(timezone.utc))
data: Mapped[List["Data"]] = relationship(
secondary = DatasetData.__tablename__,
back_populates = "datasets"
)

View file

@ -0,0 +1,14 @@
from uuid import uuid4
from datetime import datetime, timezone
from sqlalchemy import Column, DateTime, UUID, ForeignKey
from cognee.infrastructure.databases.relational import ModelBase
class DatasetData(ModelBase):
__tablename__ = "dataset_data"
id = Column(UUID, primary_key = True, default = uuid4())
created_at = Column(DateTime, default = datetime.now(timezone.utc))
dataset_id = Column("dataset", UUID, ForeignKey("dataset.id"), primary_key = True)
data_id = Column("data", UUID, ForeignKey("data.id"), primary_key = True)

View file

@ -0,0 +1,23 @@
"""Factory function to get the appropriate graph client based on the graph type."""
from cognee.config import Config
from cognee.root_dir import get_absolute_path
from cognee.shared.data_models import GraphDBType
from .graph_db_interface import GraphDBInterface
from .networkx.adapter import NetworXAdapter
# from .neo4j.adapter import Neo4jAdapter
config = Config()
config.load()
def get_graph_client(graph_type: GraphDBType, graph_filename: str=None) -> GraphDBInterface :
"""Factory function to get the appropriate graph client based on the graph type."""
if graph_filename is None:
graph_filename = get_absolute_path(f"./data/cognee/{config.graph_filename}")
if graph_type == GraphDBType.NETWORKX:
return NetworXAdapter(filename = graph_filename)
elif graph_type == GraphDBType.NEO4J:
# return Neo4jAdapter(config.neo4j_config) # Uncomment and adjust as needed for Neo4j adapter configuration
raise NotImplementedError("Neo4j adapter is not implemented yet.")
else:
raise ValueError("Unsupported graph database type.")

View file

@ -0,0 +1,102 @@
from typing import List
from abc import abstractmethod
from typing import Protocol
class GraphDBInterface(Protocol):
""" Save and Load Graphs """
@abstractmethod
async def graph(self):
raise NotImplementedError
@abstractmethod
async def save_graph_to_file(
self,
file_path: str = None
): raise NotImplementedError
@abstractmethod
async def load_graph_from_file(
self,
file_path: str = None
): raise NotImplementedError
@abstractmethod
async def delete_graph_from_file(
self,
path: str = None
): raise NotImplementedError
""" CRUD operations on graph nodes """
@abstractmethod
async def add_node(
self,
id: str,
**kwargs
): raise NotImplementedError
@abstractmethod
async def delete_node(
self,
id: str
): raise NotImplementedError
""" CRUD operations on graph edges """
@abstractmethod
async def add_edge(
self,
from_node: str,
to_node: str,
**kwargs
): raise NotImplementedError
# @abstractmethod
# async def create_vector_index(
# self,
# collection_name: str,
# vector_index_config: object
# ): raise NotImplementedError
# @abstractmethod
# async def create_data_index(
# self,
# collection_name: str,
# vector_index_config: object
# ): raise NotImplementedError
# """ Data points """
# @abstractmethod
# async def create_data_points(
# self,
# collection_name: str,
# data_points: List[any]
# ): raise NotImplementedError
# @abstractmethod
# async def get_data_point(
# self,
# collection_name: str,
# data_point_id: str
# ): raise NotImplementedError
# @abstractmethod
# async def update_data_point(
# self,
# collection_name: str,
# data_point_id: str,
# payload: object
# ): raise NotImplementedError
# @abstractmethod
# async def delete_data_point(
# self,
# collection_name: str,
# data_point_id: str
# ): raise NotImplementedError

View file

@ -0,0 +1,7 @@
from databases.graph.graph_db_interface import GraphDBInterface
class Neo4jDB(GraphDBInterface):
pass

View file

@ -0,0 +1,177 @@
"""Adapter for NetworkX graph database."""
import json
import os
import pickle
from datetime import datetime
from typing import Optional, Dict, Any
import aiofiles.os
import aiofiles
import networkx as nx
from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
import logging
class NetworXAdapter(GraphDBInterface):
_instance = None # Class variable to store the singleton instance
def __new__(cls, *args, **kwargs):
if cls._instance is None:
cls._instance = super(NetworXAdapter, cls).__new__(cls)
return cls._instance
def __init__(self, filename="cognee_graph.pkl"):
self.filename = filename
self.graph = nx.MultiDiGraph()
async def graph(self):
return self.graph
# G = await client.load_graph_from_file()
# if G is None:
# G = client.graph # Directly access the graph attribute without calling it
# return G
async def add_node(self, id: str, **kwargs) -> None:
"""Asynchronously add a node to the graph if it doesn't already exist, with given properties."""
if not self.graph.has_node(id):
self.graph.add_node(id, **kwargs)
await self.save_graph_to_file(self.filename)
async def add_edge(self, from_node: str, to_node: str, **kwargs ) -> None:
"""Asynchronously add an edge between two nodes with optional properties."""
# properties = properties or {}
self.graph.add_edge(from_node, to_node, **kwargs)
await self.save_graph_to_file(self.filename)
async def delete_node(self, id: str) -> None:
"""Asynchronously delete a node from the graph if it exists."""
if self.graph.has_node(id):
self.graph.remove_node(id)
await self.save_graph_to_file(self.filename)
async def save_graph_to_file(self, file_path: str=None) -> None:
"""Asynchronously save the graph to a file in JSON format."""
if not file_path:
file_path = self.filename
graph_data = nx.readwrite.json_graph.node_link_data(self.graph)
async with aiofiles.open(file_path, 'w') as file:
await file.write(json.dumps(graph_data))
async def load_graph_from_file(self, file_path: str = None):
"""Asynchronously load the graph from a file in JSON format."""
if not file_path:
file_path = self.filename
try:
if os.path.exists(file_path):
async with aiofiles.open(file_path, 'r') as file:
graph_data = json.loads(await file.read())
self.graph = nx.readwrite.json_graph.node_link_graph(graph_data)
return self.graph
else:
# Log that the file does not exist and an empty graph is initialized
logging.warning(f"File {file_path} not found. Initializing an empty graph.")
self.graph = nx.MultiDiGraph() # Use MultiDiGraph to keep it consistent with __init__
return self.graph
except Exception as e:
logging.error(f"Failed to load graph from {file_path}: {e}")
# Consider initializing an empty graph in case of error
self.graph = nx.MultiDiGraph()
return self.graph
async def delete_graph_from_file(self, path: str = None):
"""Asynchronously delete the graph file from the filesystem."""
if path is None:
path = self.filename # Assuming self.filename is defined elsewhere and holds the default graph file path
try:
await aiofiles.os.remove(path) # Asynchronously remove the file
logging.info("Graph deleted successfully.")
except Exception as e:
logging.error(f"Failed to delete graph: {e}")
# async def create(self, user_id, custom_user_properties=None, required_layers=None, default_fields=None, existing_graph=None):
# """Asynchronously create or update a user content graph based on given parameters."""
# # Assume required_layers is a dictionary-like object; use more robust validation in production
# category_name = required_layers['data_type']
# subgroup_names = [required_layers['layer_name']]
#
# # Construct the additional_categories structure
# additional_categories = {category_name: subgroup_names}
#
# # Define default fields for all nodes if not provided
# if default_fields is None:
# default_fields = {
# 'created_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
# 'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# }
#
# # Merge custom user properties with default fields; custom properties take precedence
# user_properties = {**default_fields, **(custom_user_properties or {})}
#
# # Default content categories and update with any additional categories provided
# content_categories = {
# "Temporal": ["Historical events", "Schedules and timelines"],
# "Positional": ["Geographical locations", "Spatial data"],
# "Propositions": ["Hypotheses and theories", "Claims and arguments"],
# "Personalization": ["User preferences", "User information"]
# }
#
# content_categories = {
# "Temporal": ["Historical events", "Schedules and timelines"],
# "Positional": ["Geographical locations", "Spatial data"],
# "Propositions": ["Hypotheses and theories", "Claims and arguments"],
# "Personalization": ["User preferences", "User information"]
# }
#
# # Update content categories with any additional categories provided
# if additional_categories:
# content_categories.update(additional_categories)
#
# G = existing_graph if existing_graph else self.graph
#
# # Check if the user node already exists, if not, add the user node with properties
# if not G.has_node(user_id):
# G.add_node(user_id, **user_properties)
#
# # Add or update content category nodes and their edges
# for category, subclasses in content_categories.items():
# category_properties = {**default_fields, 'type': 'category'}
#
# # Add or update the category node
# if not G.has_node(category):
# G.add_node(category, **category_properties)
# G.add_edge(user_id, category, relationship='created')
#
# # Add or update subclass nodes and their edges
# for subclass in subclasses:
# # Using both category and subclass names to ensure uniqueness within categories
# subclass_node_id = f"{category}:{subclass}"
#
# # Check if subclass node exists before adding, based on node content
# if not any(subclass == data.get('content') for _, data in G.nodes(data=True)):
# subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass}
# G.add_node(subclass_node_id, **subclass_properties)
# G.add_edge(category, subclass_node_id, relationship='includes')
#
# return G
# content_categories.update(additional_categories)
#
# # Ensure the user node exists with properties
# self.graph.add_node(user_id, **user_properties, exist=True)
#
# # Add or update content category nodes and their edges
# for category, subclasses in content_categories.items():
# category_properties = {**default_fields, 'type': 'category'}
# self.graph.add_node(category, **category_properties, exist=True)
# self.graph.add_edge(user_id, category, relationship='created')
#
# # Add or update subclass nodes and their edges
# for subclass in subclasses:
# subclass_node_id = f"{category}:{subclass}"
# subclass_properties = {**default_fields, 'type': 'subclass', 'content': subclass}
# self.graph.add_node(subclass_node_id, **subclass_properties, exist=True)
# self.graph.add_edge(category, subclass_node_id, relationship='includes')
#
# # Save the graph asynchronously after modifications
# # await self.save_graph()
#
# return self.graph

View file

@ -0,0 +1,23 @@
from typing import Protocol
class DatabaseEngine(Protocol):
async def ensure_tables(self):
pass
def database_exists(self, db_name: str) -> bool:
pass
def create_database(self, db_name: str):
pass
def drop_database(self, db_name: str):
pass
async def table_exists(self, table_name: str) -> bool:
pass
async def create_tables(self):
pass
async def create(self, data):
pass

View file

@ -0,0 +1,3 @@
from sqlalchemy.orm import declarative_base
ModelBase = declarative_base()

View file

@ -0,0 +1,4 @@
from .ModelBase import ModelBase
from .DatabaseEngine import DatabaseEngine
from .sqlite.SqliteEngine import SqliteEngine
from .duckdb.DuckDBAdapter import DuckDBAdapter

View file

@ -0,0 +1,22 @@
import duckdb
from cognee.root_dir import get_absolute_path
class DuckDBAdapter():
def __init__(self):
db_path = get_absolute_path("./data/cognee")
db_location = db_path + "/cognee.duckdb"
self.db_client = duckdb.connect(db_location)
def get_datasets(self):
tables = self.db_client.sql("SELECT DISTINCT schema_name FROM duckdb_tables();").to_df().to_dict("list")
return list(
filter(
lambda table_name: table_name.endswith('staging') is False,
tables["schema_name"]
)
)
def get_files_metadata(self, dataset_name: str):
return self.db_client.sql(f"SELECT * FROM {dataset_name}.file_metadata;").to_df().to_dict("records")

View file

@ -0,0 +1,74 @@
import uuid
from pathlib import Path
from sqlalchemy import select
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
from cognee.config import Config
# from ..relational_db_interface import RelationalDBInterface
from cognee.database.relationaldb.models.memory import MemoryModel
config = Config()
config.load()
class RelationalDBAdapter():
session_maker: async_sessionmaker[AsyncSession]
def __init__(self):
engine = create_async_engine(
self.get_database_url(),
pool_recycle = 3600,
echo = config.sqlalchemy_logging,
)
self.create_session = async_sessionmaker[AsyncSession](
bind = engine,
class_ = AsyncSession,
expire_on_commit = False,
)
def get_database_url(
self,
db_type = config.db_type,
db_name = config.db_name,
db_path = config.db_path,
user = config.db_user,
password = config.db_password,
host = config.db_host,
port = config.db_port,
):
if db_type == "sqlite":
db_path = (Path(db_path) / db_name).absolute()
return f"sqlite+aiosqlite:///{db_path}" # SQLite uses file path
elif db_type == "duckdb":
db_path = (Path(db_path) / db_name).absolute()
return f"duckdb+aiosqlite:///{db_path}"
elif db_type == "postgresql":
# Ensure optional parameters are handled gracefully
port_str = f":{port}" if port else ""
password_str = f":{password}" if password else ""
if not all([user, host]):
raise ValueError("User and host are required for PostgreSQL connections.")
return f"postgresql+asyncpg://{user}{password_str}@{host}{port_str}/{db_name}"
else:
raise ValueError(f"Unsupported database type: {db_type}")
async def add_memory(self, user_id: str, memory_name: str):
memory_id = uuid.uuid4()
async with self.create_session() as session:
async with session.begin():
return session.add(MemoryModel(
id = memory_id,
user_id = user_id,
memory_name = memory_name,
))
async def get_memory_by_name(self, memory_name: int):
async with self.create_session() as session:
async with session.begin():
result = await session.execute(
select(MemoryModel.id)
.where(MemoryModel.memory_name == memory_name)
)
memory = result.scalars().one_or_none()
return memory

View file

@ -0,0 +1,26 @@
# from datetime import datetime, timezone
# from sqlalchemy.orm import relationship
# # from sqlalchemy.orm import DeclarativeBase
# from sqlalchemy import Column, String, DateTime, ForeignKey
# from cognee.database.relationaldb.database import Base
# class MemoryModel(Base):
# __tablename__ = "memories_v1"
# id = Column(String, primary_key = True)
# user_id = Column(String, ForeignKey("users.id"), index = True)
# memory_name = Column(String, nullable = True)
# memory_category = Column(String, nullable = True)
# created_at = Column(DateTime, default = datetime.now(timezone.utc))
# updated_at = Column(DateTime, onupdate = datetime.now(timezone.utc))
# methods_list = Column(String, nullable = True)
# attributes_list = Column(String, nullable = True)
# user = relationship("User", back_populates="memories")
# metadatas = relationship(
# "MetaDatas", back_populates="memory", cascade="all, delete-orphan"
# )
# def __repr__(self):
# return f"<Memory(id={self.id}, user_id={self.user_id}, created_at={self.created_at}, updated_at={self.updated_at})>"

View file

@ -0,0 +1,26 @@
from abc import abstractmethod
from typing import Protocol, TypeVar, Type, List
RowDataType = TypeVar('RowDataType')
class RelationalDBInterface(Protocol):
@abstractmethod
async def create_database(self, database_name: str, database_path: str): raise NotImplementedError
@abstractmethod
async def create_table(self, table_name: str, table_config: object): raise NotImplementedError
@abstractmethod
async def add_row(self, table_name: str, row_data: Type[RowDataType]): raise NotImplementedError
@abstractmethod
async def add_rows(self, table_name: str, rows_data: List[Type[RowDataType]]): raise NotImplementedError
@abstractmethod
async def get_row(self, table_name: str, row_id: str): raise NotImplementedError
@abstractmethod
async def update_row(self, table_name: str, row_id: str, row_data: Type[RowDataType]): raise NotImplementedError
@abstractmethod
async def delete_row(self, table_name: str, row_id: str): raise NotImplementedError

View file

@ -0,0 +1,82 @@
import os
import asyncio
from typing import Callable
from sqlalchemy.inspection import inspect
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncEngine, AsyncSession, async_scoped_session
from sqlalchemy.future import select
from cognee.infrastructure.files.storage.LocalStorage import LocalStorage
from ..DatabaseEngine import DatabaseEngine
from ..ModelBase import ModelBase
from ..utils import with_rollback
class SqliteEngine(DatabaseEngine):
db_path: str = None
db_name: str = None
engine: AsyncEngine = None
session_maker: Callable[[], async_scoped_session[AsyncSession]] = None
is_db_done: bool = False
def __init__(self, db_path: str, db_name: str):
self.db_path = db_path
self.db_name = db_name
self.db_location = db_path + "/" + db_name
self.engine = create_async_engine(
f"sqlite+aiosqlite:///{self.db_location}",
pool_recycle = 3600,
echo = False
)
self.session_maker = lambda: async_scoped_session(
async_sessionmaker(
bind = self.engine,
class_ = AsyncSession
),
scopefunc = asyncio.current_task
)
async def ensure_tables(self):
if not self.database_exists(self.db_name):
self.create_database(self.db_name)
await self.create_tables()
self.is_db_done = True
return True
def database_exists(self, db_name: str) -> bool:
return os.path.exists(self.db_path + "/" + db_name)
def create_database(self, db_name: str):
LocalStorage.ensure_directory_exists(self.db_path)
with open(self.db_path + "/" + db_name, mode = "w+", encoding = "utf-8") as file:
file.write("")
def drop_database(self, db_name: str):
os.remove(self.db_location)
async def table_exists(self, table_name: str) -> bool:
return inspect(self.engine).has_table(table_name)
async def create_tables(self):
async with self.engine.begin() as connection:
return await connection.run_sync(ModelBase.metadata.create_all)
async def create(self, data):
async with with_rollback(self.session_maker()) as session:
session.add(data)
async def query(self, query_term):
async with with_rollback(self.session_maker()) as session:
return await session.execute(query_term)
async def query_entity(self, entity):
async with with_rollback(self.session_maker()) as session:
return await session.execute(
select(type(entity))
.where(type(entity).id == entity.id)
)
async def update(self, data_update_fn):
async with with_rollback(self.session_maker()):
data_update_fn()

View file

@ -0,0 +1 @@
from .with_rollback import with_rollback

View file

@ -0,0 +1,18 @@
import logging
from contextlib import asynccontextmanager
from sqlalchemy.ext.asyncio import async_scoped_session
logger = logging.getLogger(__name__)
@asynccontextmanager
async def with_rollback(session: async_scoped_session):
"""Provide a transactional scope around a series of operations."""
try:
# async with session.begin():
yield session
await session.commit()
await session.remove()
except Exception as exception:
await session.rollback()
logger.error("Session rolled back due to: %s", str(exception))
raise exception

Some files were not shown because too many files have changed in this diff Show more