feat: add create-memory and remember API endpoints
Add possibility to create a new Vector memory and store text data points using openai embeddings.
This commit is contained in:
parent
a6b9c8a5bf
commit
769d6b5080
40 changed files with 2685 additions and 4926 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -163,3 +163,4 @@ cython_debug/
|
|||
#.idea/
|
||||
|
||||
.vscode/
|
||||
database/data/
|
||||
|
|
|
|||
638
.pylintrc
Normal file
638
.pylintrc
Normal file
|
|
@ -0,0 +1,638 @@
|
|||
[MAIN]
|
||||
|
||||
# Analyse import fallback blocks. This can be used to support both Python 2 and
|
||||
# 3 compatible code, which means that the block might have code that exists
|
||||
# only in one or another interpreter, leading to false positives when analysed.
|
||||
analyse-fallback-blocks=no
|
||||
|
||||
# Clear in-memory caches upon conclusion of linting. Useful if running pylint
|
||||
# in a server-like mode.
|
||||
clear-cache-post-run=no
|
||||
|
||||
# Load and enable all available extensions. Use --list-extensions to see a list
|
||||
# all available extensions.
|
||||
#enable-all-extensions=
|
||||
|
||||
# In error mode, messages with a category besides ERROR or FATAL are
|
||||
# suppressed, and no reports are done by default. Error mode is compatible with
|
||||
# disabling specific errors.
|
||||
#errors-only=
|
||||
|
||||
# Always return a 0 (non-error) status code, even if lint errors are found.
|
||||
# This is primarily useful in continuous integration scripts.
|
||||
#exit-zero=
|
||||
|
||||
# A comma-separated list of package or module names from where C extensions may
|
||||
# be loaded. Extensions are loading into the active Python interpreter and may
|
||||
# run arbitrary code.
|
||||
extension-pkg-allow-list=
|
||||
|
||||
# A comma-separated list of package or module names from where C extensions may
|
||||
# be loaded. Extensions are loading into the active Python interpreter and may
|
||||
# run arbitrary code. (This is an alternative name to extension-pkg-allow-list
|
||||
# for backward compatibility.)
|
||||
extension-pkg-whitelist=
|
||||
|
||||
# Return non-zero exit code if any of these messages/categories are detected,
|
||||
# even if score is above --fail-under value. Syntax same as enable. Messages
|
||||
# specified are enabled, while categories only check already-enabled messages.
|
||||
fail-on=
|
||||
|
||||
# Specify a score threshold under which the program will exit with error.
|
||||
fail-under=10
|
||||
|
||||
# Interpret the stdin as a python script, whose filename needs to be passed as
|
||||
# the module_or_package argument.
|
||||
#from-stdin=
|
||||
|
||||
# Files or directories to be skipped. They should be base names, not paths.
|
||||
ignore=CVS
|
||||
|
||||
# Add files or directories matching the regular expressions patterns to the
|
||||
# ignore-list. The regex matches against paths and can be in Posix or Windows
|
||||
# format. Because '\\' represents the directory delimiter on Windows systems,
|
||||
# it can't be used as an escape character.
|
||||
ignore-paths=
|
||||
|
||||
# Files or directories matching the regular expression patterns are skipped.
|
||||
# The regex matches against base names, not paths. The default value ignores
|
||||
# Emacs file locks
|
||||
ignore-patterns=^\.#
|
||||
|
||||
# List of module names for which member attributes should not be checked
|
||||
# (useful for modules/projects where namespaces are manipulated during runtime
|
||||
# and thus existing member attributes cannot be deduced by static analysis). It
|
||||
# supports qualified module names, as well as Unix pattern matching.
|
||||
ignored-modules=
|
||||
|
||||
# Python code to execute, usually for sys.path manipulation such as
|
||||
# pygtk.require().
|
||||
#init-hook=
|
||||
|
||||
# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
|
||||
# number of processors available to use, and will cap the count on Windows to
|
||||
# avoid hangs.
|
||||
jobs=1
|
||||
|
||||
# Control the amount of potential inferred values when inferring a single
|
||||
# object. This can help the performance when dealing with large functions or
|
||||
# complex, nested conditions.
|
||||
limit-inference-results=100
|
||||
|
||||
# List of plugins (as comma separated values of python module names) to load,
|
||||
# usually to register additional checkers.
|
||||
load-plugins=
|
||||
|
||||
# Pickle collected data for later comparisons.
|
||||
persistent=yes
|
||||
|
||||
# Minimum Python version to use for version dependent checks. Will default to
|
||||
# the version used to run pylint.
|
||||
py-version=3.12
|
||||
|
||||
# Discover python modules and packages in the file system subtree.
|
||||
recursive=no
|
||||
|
||||
# Add paths to the list of the source roots. Supports globbing patterns. The
|
||||
# source root is an absolute path or a path relative to the current working
|
||||
# directory used to determine a package namespace for modules located under the
|
||||
# source root.
|
||||
source-roots=
|
||||
|
||||
# When enabled, pylint would attempt to guess common misconfiguration and emit
|
||||
# user-friendly hints instead of false-positive error messages.
|
||||
suggestion-mode=yes
|
||||
|
||||
# Allow loading of arbitrary C extensions. Extensions are imported into the
|
||||
# active Python interpreter and may run arbitrary code.
|
||||
unsafe-load-any-extension=no
|
||||
|
||||
# In verbose mode, extra non-checker-related info will be displayed.
|
||||
#verbose=
|
||||
|
||||
|
||||
[BASIC]
|
||||
|
||||
# Naming style matching correct argument names.
|
||||
argument-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct argument names. Overrides argument-
|
||||
# naming-style. If left empty, argument names will be checked with the set
|
||||
# naming style.
|
||||
#argument-rgx=
|
||||
|
||||
# Naming style matching correct attribute names.
|
||||
attr-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct attribute names. Overrides attr-naming-
|
||||
# style. If left empty, attribute names will be checked with the set naming
|
||||
# style.
|
||||
#attr-rgx=
|
||||
|
||||
# Bad variable names which should always be refused, separated by a comma.
|
||||
bad-names=foo,
|
||||
bar,
|
||||
baz,
|
||||
toto,
|
||||
tutu,
|
||||
tata
|
||||
|
||||
# Bad variable names regexes, separated by a comma. If names match any regex,
|
||||
# they will always be refused
|
||||
bad-names-rgxs=
|
||||
|
||||
# Naming style matching correct class attribute names.
|
||||
class-attribute-naming-style=any
|
||||
|
||||
# Regular expression matching correct class attribute names. Overrides class-
|
||||
# attribute-naming-style. If left empty, class attribute names will be checked
|
||||
# with the set naming style.
|
||||
#class-attribute-rgx=
|
||||
|
||||
# Naming style matching correct class constant names.
|
||||
class-const-naming-style=UPPER_CASE
|
||||
|
||||
# Regular expression matching correct class constant names. Overrides class-
|
||||
# const-naming-style. If left empty, class constant names will be checked with
|
||||
# the set naming style.
|
||||
#class-const-rgx=
|
||||
|
||||
# Naming style matching correct class names.
|
||||
class-naming-style=PascalCase
|
||||
|
||||
# Regular expression matching correct class names. Overrides class-naming-
|
||||
# style. If left empty, class names will be checked with the set naming style.
|
||||
#class-rgx=
|
||||
|
||||
# Naming style matching correct constant names.
|
||||
const-naming-style=UPPER_CASE
|
||||
|
||||
# Regular expression matching correct constant names. Overrides const-naming-
|
||||
# style. If left empty, constant names will be checked with the set naming
|
||||
# style.
|
||||
#const-rgx=
|
||||
|
||||
# Minimum line length for functions/classes that require docstrings, shorter
|
||||
# ones are exempt.
|
||||
docstring-min-length=-1
|
||||
|
||||
# Naming style matching correct function names.
|
||||
function-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct function names. Overrides function-
|
||||
# naming-style. If left empty, function names will be checked with the set
|
||||
# naming style.
|
||||
#function-rgx=
|
||||
|
||||
# Good variable names which should always be accepted, separated by a comma.
|
||||
good-names=i,
|
||||
j,
|
||||
k,
|
||||
ex,
|
||||
Run,
|
||||
_
|
||||
|
||||
# Good variable names regexes, separated by a comma. If names match any regex,
|
||||
# they will always be accepted
|
||||
good-names-rgxs=
|
||||
|
||||
# Include a hint for the correct naming format with invalid-name.
|
||||
include-naming-hint=no
|
||||
|
||||
# Naming style matching correct inline iteration names.
|
||||
inlinevar-naming-style=any
|
||||
|
||||
# Regular expression matching correct inline iteration names. Overrides
|
||||
# inlinevar-naming-style. If left empty, inline iteration names will be checked
|
||||
# with the set naming style.
|
||||
#inlinevar-rgx=
|
||||
|
||||
# Naming style matching correct method names.
|
||||
method-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct method names. Overrides method-naming-
|
||||
# style. If left empty, method names will be checked with the set naming style.
|
||||
#method-rgx=
|
||||
|
||||
# Naming style matching correct module names.
|
||||
module-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct module names. Overrides module-naming-
|
||||
# style. If left empty, module names will be checked with the set naming style.
|
||||
#module-rgx=
|
||||
|
||||
# Colon-delimited sets of names that determine each other's naming style when
|
||||
# the name regexes allow several styles.
|
||||
name-group=
|
||||
|
||||
# Regular expression which should only match function or class names that do
|
||||
# not require a docstring.
|
||||
no-docstring-rgx=^_
|
||||
|
||||
# List of decorators that produce properties, such as abc.abstractproperty. Add
|
||||
# to this list to register other decorators that produce valid properties.
|
||||
# These decorators are taken in consideration only for invalid-name.
|
||||
property-classes=abc.abstractproperty
|
||||
|
||||
# Regular expression matching correct type alias names. If left empty, type
|
||||
# alias names will be checked with the set naming style.
|
||||
#typealias-rgx=
|
||||
|
||||
# Regular expression matching correct type variable names. If left empty, type
|
||||
# variable names will be checked with the set naming style.
|
||||
#typevar-rgx=
|
||||
|
||||
# Naming style matching correct variable names.
|
||||
variable-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct variable names. Overrides variable-
|
||||
# naming-style. If left empty, variable names will be checked with the set
|
||||
# naming style.
|
||||
#variable-rgx=
|
||||
|
||||
|
||||
[CLASSES]
|
||||
|
||||
# Warn about protected attribute access inside special methods
|
||||
check-protected-access-in-special-methods=no
|
||||
|
||||
# List of method names used to declare (i.e. assign) instance attributes.
|
||||
defining-attr-methods=__init__,
|
||||
__new__,
|
||||
setUp,
|
||||
asyncSetUp,
|
||||
__post_init__
|
||||
|
||||
# List of member names, which should be excluded from the protected access
|
||||
# warning.
|
||||
exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit
|
||||
|
||||
# List of valid names for the first argument in a class method.
|
||||
valid-classmethod-first-arg=cls
|
||||
|
||||
# List of valid names for the first argument in a metaclass class method.
|
||||
valid-metaclass-classmethod-first-arg=mcs
|
||||
|
||||
|
||||
[DESIGN]
|
||||
|
||||
# List of regular expressions of class ancestor names to ignore when counting
|
||||
# public methods (see R0903)
|
||||
exclude-too-few-public-methods=
|
||||
|
||||
# List of qualified class names to ignore when counting class parents (see
|
||||
# R0901)
|
||||
ignored-parents=
|
||||
|
||||
# Maximum number of arguments for function / method.
|
||||
max-args=5
|
||||
|
||||
# Maximum number of attributes for a class (see R0902).
|
||||
max-attributes=7
|
||||
|
||||
# Maximum number of boolean expressions in an if statement (see R0916).
|
||||
max-bool-expr=5
|
||||
|
||||
# Maximum number of branch for function / method body.
|
||||
max-branches=12
|
||||
|
||||
# Maximum number of locals for function / method body.
|
||||
max-locals=15
|
||||
|
||||
# Maximum number of parents for a class (see R0901).
|
||||
max-parents=7
|
||||
|
||||
# Maximum number of public methods for a class (see R0904).
|
||||
max-public-methods=20
|
||||
|
||||
# Maximum number of return / yield for function / method body.
|
||||
max-returns=6
|
||||
|
||||
# Maximum number of statements in function / method body.
|
||||
max-statements=50
|
||||
|
||||
# Minimum number of public methods for a class (see R0903).
|
||||
min-public-methods=2
|
||||
|
||||
|
||||
[EXCEPTIONS]
|
||||
|
||||
# Exceptions that will emit a warning when caught.
|
||||
overgeneral-exceptions=builtins.BaseException,builtins.Exception
|
||||
|
||||
|
||||
[FORMAT]
|
||||
|
||||
# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
|
||||
expected-line-ending-format=
|
||||
|
||||
# Regexp for a line that is allowed to be longer than the limit.
|
||||
ignore-long-lines=^\s*(# )?<?https?://\S+>?$
|
||||
|
||||
# Number of spaces of indent required inside a hanging or continued line.
|
||||
indent-after-paren=4
|
||||
|
||||
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
|
||||
# tab).
|
||||
indent-string=' '
|
||||
|
||||
# Maximum number of characters on a single line.
|
||||
max-line-length=100
|
||||
|
||||
# Maximum number of lines in a module.
|
||||
max-module-lines=1000
|
||||
|
||||
# Allow the body of a class to be on the same line as the declaration if body
|
||||
# contains single statement.
|
||||
single-line-class-stmt=no
|
||||
|
||||
# Allow the body of an if to be on the same line as the test if there is no
|
||||
# else.
|
||||
single-line-if-stmt=no
|
||||
|
||||
|
||||
[IMPORTS]
|
||||
|
||||
# List of modules that can be imported at any level, not just the top level
|
||||
# one.
|
||||
allow-any-import-level=
|
||||
|
||||
# Allow explicit reexports by alias from a package __init__.
|
||||
allow-reexport-from-package=no
|
||||
|
||||
# Allow wildcard imports from modules that define __all__.
|
||||
allow-wildcard-with-all=no
|
||||
|
||||
# Deprecated modules which should not be used, separated by a comma.
|
||||
deprecated-modules=
|
||||
|
||||
# Output a graph (.gv or any supported image format) of external dependencies
|
||||
# to the given file (report RP0402 must not be disabled).
|
||||
ext-import-graph=
|
||||
|
||||
# Output a graph (.gv or any supported image format) of all (i.e. internal and
|
||||
# external) dependencies to the given file (report RP0402 must not be
|
||||
# disabled).
|
||||
import-graph=
|
||||
|
||||
# Output a graph (.gv or any supported image format) of internal dependencies
|
||||
# to the given file (report RP0402 must not be disabled).
|
||||
int-import-graph=
|
||||
|
||||
# Force import order to recognize a module as part of the standard
|
||||
# compatibility libraries.
|
||||
known-standard-library=
|
||||
|
||||
# Force import order to recognize a module as part of a third party library.
|
||||
known-third-party=enchant
|
||||
|
||||
# Couples of modules and preferred modules, separated by a comma.
|
||||
preferred-modules=
|
||||
|
||||
|
||||
[LOGGING]
|
||||
|
||||
# The type of string formatting that logging methods do. `old` means using %
|
||||
# formatting, `new` is for `{}` formatting.
|
||||
logging-format-style=old
|
||||
|
||||
# Logging modules to check that the string format arguments are in logging
|
||||
# function parameter format.
|
||||
logging-modules=logging
|
||||
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
# Only show warnings with the listed confidence levels. Leave empty to show
|
||||
# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE,
|
||||
# UNDEFINED.
|
||||
confidence=HIGH,
|
||||
CONTROL_FLOW,
|
||||
INFERENCE,
|
||||
INFERENCE_FAILURE,
|
||||
UNDEFINED
|
||||
|
||||
# Disable the message, report, category or checker with the given id(s). You
|
||||
# can either give multiple identifiers separated by comma (,) or put this
|
||||
# option multiple times (only on the command line, not in the configuration
|
||||
# file where it should appear only once). You can also use "--disable=all" to
|
||||
# disable everything first and then re-enable specific checks. For example, if
|
||||
# you want to run only the similarities checker, you can use "--disable=all
|
||||
# --enable=similarities". If you want to run only the classes checker, but have
|
||||
# no Warning level messages displayed, use "--disable=all --enable=classes
|
||||
# --disable=W".
|
||||
disable=raw-checker-failed,
|
||||
bad-inline-option,
|
||||
locally-disabled,
|
||||
file-ignored,
|
||||
suppressed-message,
|
||||
useless-suppression,
|
||||
deprecated-pragma,
|
||||
use-symbolic-message-instead,
|
||||
use-implicit-booleaness-not-comparison-to-string,
|
||||
use-implicit-booleaness-not-comparison-to-zero,
|
||||
missing-module-docstring,
|
||||
missing-function-docstring,
|
||||
missing-class-docstring
|
||||
|
||||
|
||||
# Enable the message, report, category or checker with the given id(s). You can
|
||||
# either give multiple identifier separated by comma (,) or put this option
|
||||
# multiple time (only on the command line, not in the configuration file where
|
||||
# it should appear only once). See also the "--disable" option for examples.
|
||||
enable=
|
||||
|
||||
|
||||
[METHOD_ARGS]
|
||||
|
||||
# List of qualified names (i.e., library.method) which require a timeout
|
||||
# parameter e.g. 'requests.api.get,requests.api.post'
|
||||
timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request
|
||||
|
||||
|
||||
[MISCELLANEOUS]
|
||||
|
||||
# List of note tags to take in consideration, separated by a comma.
|
||||
notes=FIXME,
|
||||
XXX,
|
||||
TODO
|
||||
|
||||
# Regular expression of note tags to take in consideration.
|
||||
notes-rgx=
|
||||
|
||||
|
||||
[REFACTORING]
|
||||
|
||||
# Maximum number of nested blocks for function / method body
|
||||
max-nested-blocks=5
|
||||
|
||||
# Complete name of functions that never returns. When checking for
|
||||
# inconsistent-return-statements if a never returning function is called then
|
||||
# it will be considered as an explicit return statement and no message will be
|
||||
# printed.
|
||||
never-returning-functions=sys.exit,argparse.parse_error
|
||||
|
||||
|
||||
[REPORTS]
|
||||
|
||||
# Python expression which should return a score less than or equal to 10. You
|
||||
# have access to the variables 'fatal', 'error', 'warning', 'refactor',
|
||||
# 'convention', and 'info' which contain the number of messages in each
|
||||
# category, as well as 'statement' which is the total number of statements
|
||||
# analyzed. This score is used by the global evaluation report (RP0004).
|
||||
evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))
|
||||
|
||||
# Template used to display messages. This is a python new-style format string
|
||||
# used to format the message information. See doc for all details.
|
||||
msg-template=
|
||||
|
||||
# Set the output format. Available formats are: text, parseable, colorized,
|
||||
# json2 (improved json format), json (old json format) and msvs (visual
|
||||
# studio). You can also give a reporter class, e.g.
|
||||
# mypackage.mymodule.MyReporterClass.
|
||||
#output-format=
|
||||
|
||||
# Tells whether to display a full report or only the messages.
|
||||
reports=no
|
||||
|
||||
# Activate the evaluation score.
|
||||
score=yes
|
||||
|
||||
|
||||
[SIMILARITIES]
|
||||
|
||||
# Comments are removed from the similarity computation
|
||||
ignore-comments=yes
|
||||
|
||||
# Docstrings are removed from the similarity computation
|
||||
ignore-docstrings=yes
|
||||
|
||||
# Imports are removed from the similarity computation
|
||||
ignore-imports=yes
|
||||
|
||||
# Signatures are removed from the similarity computation
|
||||
ignore-signatures=yes
|
||||
|
||||
# Minimum lines number of a similarity.
|
||||
min-similarity-lines=4
|
||||
|
||||
|
||||
[SPELLING]
|
||||
|
||||
# Limits count of emitted suggestions for spelling mistakes.
|
||||
max-spelling-suggestions=4
|
||||
|
||||
# Spelling dictionary name. No available dictionaries : You need to install
|
||||
# both the python package and the system dependency for enchant to work.
|
||||
spelling-dict=
|
||||
|
||||
# List of comma separated words that should be considered directives if they
|
||||
# appear at the beginning of a comment and should not be checked.
|
||||
spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
|
||||
|
||||
# List of comma separated words that should not be checked.
|
||||
spelling-ignore-words=
|
||||
|
||||
# A path to a file that contains the private dictionary; one word per line.
|
||||
spelling-private-dict-file=
|
||||
|
||||
# Tells whether to store unknown words to the private dictionary (see the
|
||||
# --spelling-private-dict-file option) instead of raising a message.
|
||||
spelling-store-unknown-words=no
|
||||
|
||||
|
||||
[STRING]
|
||||
|
||||
# This flag controls whether inconsistent-quotes generates a warning when the
|
||||
# character used as a quote delimiter is used inconsistently within a module.
|
||||
check-quote-consistency=yes
|
||||
|
||||
# This flag controls whether the implicit-str-concat should generate a warning
|
||||
# on implicit string concatenation in sequences defined over several lines.
|
||||
check-str-concat-over-line-jumps=no
|
||||
|
||||
|
||||
[TYPECHECK]
|
||||
|
||||
# List of decorators that produce context managers, such as
|
||||
# contextlib.contextmanager. Add to this list to register other decorators that
|
||||
# produce valid context managers.
|
||||
contextmanager-decorators=contextlib.contextmanager
|
||||
|
||||
# List of members which are set dynamically and missed by pylint inference
|
||||
# system, and so shouldn't trigger E1101 when accessed. Python regular
|
||||
# expressions are accepted.
|
||||
generated-members=
|
||||
|
||||
# Tells whether to warn about missing members when the owner of the attribute
|
||||
# is inferred to be None.
|
||||
ignore-none=yes
|
||||
|
||||
# This flag controls whether pylint should warn about no-member and similar
|
||||
# checks whenever an opaque object is returned when inferring. The inference
|
||||
# can return multiple potential results while evaluating a Python object, but
|
||||
# some branches might not be evaluated, which results in partial inference. In
|
||||
# that case, it might be useful to still emit no-member and other checks for
|
||||
# the rest of the inferred objects.
|
||||
ignore-on-opaque-inference=yes
|
||||
|
||||
# List of symbolic message names to ignore for Mixin members.
|
||||
ignored-checks-for-mixins=no-member,
|
||||
not-async-context-manager,
|
||||
not-context-manager,
|
||||
attribute-defined-outside-init
|
||||
|
||||
# List of class names for which member attributes should not be checked (useful
|
||||
# for classes with dynamically set attributes). This supports the use of
|
||||
# qualified names.
|
||||
ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace
|
||||
|
||||
# Show a hint with possible names when a member name was not found. The aspect
|
||||
# of finding the hint is based on edit distance.
|
||||
missing-member-hint=yes
|
||||
|
||||
# The minimum edit distance a name should have in order to be considered a
|
||||
# similar match for a missing member name.
|
||||
missing-member-hint-distance=1
|
||||
|
||||
# The total number of similar names that should be taken in consideration when
|
||||
# showing a hint for a missing member.
|
||||
missing-member-max-choices=1
|
||||
|
||||
# Regex pattern to define which classes are considered mixins.
|
||||
mixin-class-rgx=.*[Mm]ixin
|
||||
|
||||
# List of decorators that change the signature of a decorated function.
|
||||
signature-mutators=
|
||||
|
||||
|
||||
[VARIABLES]
|
||||
|
||||
# List of additional names supposed to be defined in builtins. Remember that
|
||||
# you should avoid defining new builtins when possible.
|
||||
additional-builtins=
|
||||
|
||||
# Tells whether unused global variables should be treated as a violation.
|
||||
allow-global-unused-variables=yes
|
||||
|
||||
# List of names allowed to shadow builtins
|
||||
allowed-redefined-builtins=
|
||||
|
||||
# List of strings which can identify a callback function by name. A callback
|
||||
# name must start or end with one of those strings.
|
||||
callbacks=cb_,
|
||||
_cb
|
||||
|
||||
# A regular expression matching the name of dummy variables (i.e. expected to
|
||||
# not be used).
|
||||
dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
|
||||
|
||||
# Argument names that match this expression will be ignored.
|
||||
ignored-argument-names=_.*|^ignored_|^unused_
|
||||
|
||||
# Tells whether we should check for unused import in __init__ files.
|
||||
init-import=no
|
||||
|
||||
# List of qualified module names which can have objects that can redefine
|
||||
# builtins.
|
||||
redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
|
||||
16
README.md
16
README.md
|
|
@ -67,6 +67,8 @@ Try it yourself on Whatsapp with one of our <a href="https://keepi.ai">partners<
|
|||
|
||||
## Getting started
|
||||
|
||||
### Run with Docker
|
||||
|
||||
To run cognee you need to have <a href="https://docs.docker.com/get-docker" target="_blank">Docker</a> installed on your machine.
|
||||
|
||||
Run <a href="https://www.cognee.ai" target="_blank">Cognee</a> in a couple of steps:
|
||||
|
|
@ -74,15 +76,23 @@ Run <a href="https://www.cognee.ai" target="_blank">Cognee</a> in a couple of st
|
|||
- Run `docker compose up` in order to start graph and relational databases
|
||||
- Run `docker compose up cognee` in order start Cognee
|
||||
|
||||
<!-- Send API requests add-memory, user-query-to-graph, document-to-graph-db, user-query-processor to the locahost:8000 -->
|
||||
|
||||
## Debugging
|
||||
#### Debugging
|
||||
To run Cognee with debugger attached you need to build the Cognee image with the `DEBUG` flag set to true.
|
||||
|
||||
- `docker compose build cognee --no-cache --build-arg DEBUG=true`
|
||||
- `docker compose up cognee`
|
||||
|
||||
|
||||
### Run without Docker
|
||||
- Run `PYTHONPATH=. python cognitive_architecture/setup_database.py` to setup database
|
||||
- Run `python -m gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=127.0.0.1:8000 --log-level debug api:app`
|
||||
|
||||
#### Debugging
|
||||
- Run `python -m debugpy --wait-for-client --listen localhost:5678 -m gunicorn -w 1 -k uvicorn.workers.UvicornWorker -t 30000 --bind=127.0.0.1:8000 --log-level debug api:app`
|
||||
- Attach debugger
|
||||
|
||||
<!-- Send API requests add-memory, user-query-to-graph, document-to-graph-db, user-query-processor to the locahost:8000 -->
|
||||
|
||||
## Demo
|
||||
|
||||
[<img src="https://i3.ytimg.com/vi/yjParvJVgPI/maxresdefault.jpg" width="100%">](https://www.youtube.com/watch?v=yjParvJVgPI "Learn about cognee: 55")
|
||||
|
|
|
|||
56
api.py
56
api.py
|
|
@ -18,7 +18,7 @@ from cognitive_architecture.config import Config
|
|||
config = Config()
|
||||
config.load()
|
||||
|
||||
from typing import Dict, Any
|
||||
from typing import Dict, Any, List
|
||||
from fastapi import FastAPI, BackgroundTasks, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
|
|
@ -57,6 +57,54 @@ def health_check():
|
|||
class Payload(BaseModel):
|
||||
payload: Dict[str, Any]
|
||||
|
||||
from cognitive_architecture.api.v1.memory.create_memory import MemoryType
|
||||
|
||||
class CreateMemoryPayload(BaseModel):
|
||||
user_id: str
|
||||
memory_name: str
|
||||
memory_type: MemoryType
|
||||
|
||||
@app.post("/create-memory", response_model=dict)
|
||||
async def create_memory(payload: CreateMemoryPayload):
|
||||
from cognitive_architecture.api.v1.memory.create_memory import create_memory as create_memory_v1, MemoryException
|
||||
|
||||
try:
|
||||
await create_memory_v1(
|
||||
payload.user_id,
|
||||
payload.memory_name,
|
||||
payload.memory_type or MemoryType.VECTOR,
|
||||
)
|
||||
except MemoryException as error:
|
||||
return JSONResponse(
|
||||
status_code = 409,
|
||||
content = { "error": error.message }
|
||||
)
|
||||
|
||||
return JSONResponse(
|
||||
status_code = 200,
|
||||
content = { "memory_name": payload.memory_name }
|
||||
)
|
||||
|
||||
|
||||
class RememberPayload(BaseModel):
|
||||
user_id: str
|
||||
memory_name: str
|
||||
payload: List[str]
|
||||
|
||||
@app.post("/remember", response_model=dict)
|
||||
async def remember(payload: RememberPayload):
|
||||
from cognitive_architecture.api.v1.memory.remember import remember as remember_v1
|
||||
|
||||
await remember_v1(
|
||||
payload.user_id,
|
||||
payload.memory_name,
|
||||
payload.payload
|
||||
)
|
||||
|
||||
return JSONResponse(
|
||||
status_code = 200,
|
||||
content = { "message": "ok" }
|
||||
)
|
||||
|
||||
@app.post("/add-memory", response_model=dict)
|
||||
async def add_memory(
|
||||
|
|
@ -83,9 +131,9 @@ async def add_memory(
|
|||
content = None
|
||||
|
||||
output = await load_documents_to_vectorstore(
|
||||
session,
|
||||
decoded_payload["user_id"],
|
||||
session = session,
|
||||
content = content,
|
||||
user_id = decoded_payload["user_id"],
|
||||
loader_settings = settings_for_loader,
|
||||
)
|
||||
return JSONResponse(content={"response": output}, status_code=200)
|
||||
|
|
@ -114,9 +162,9 @@ async def add_memory(
|
|||
loader_settings = {"format": "PDF", "source": "DEVICE", "path": [".data"]}
|
||||
|
||||
output = await load_documents_to_vectorstore(
|
||||
session,
|
||||
user_id = user_id,
|
||||
content = content,
|
||||
session = session,
|
||||
loader_settings = loader_settings,
|
||||
)
|
||||
return JSONResponse(content={"response": output}, status_code=200)
|
||||
|
|
|
|||
32
cognitive_architecture/api/v1/memory/create_memory.py
Normal file
32
cognitive_architecture/api/v1/memory/create_memory.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
from enum import Enum
|
||||
from qdrant_client.models import Distance, VectorParams
|
||||
from cognitive_architecture.modules.memory.vector import create_vector_memory
|
||||
from cognitive_architecture.modules.users.memory import is_existing_memory, register_memory_for_user
|
||||
from cognitive_architecture.infrastructure.databases.vector.qdrant.adapter import CollectionConfig
|
||||
|
||||
class MemoryType(Enum):
|
||||
GRAPH = "GRAPH"
|
||||
VECTOR = "VECTOR"
|
||||
RELATIONAL = "RELATIONAL"
|
||||
|
||||
class MemoryException(Exception):
|
||||
message: str
|
||||
|
||||
def __init__(self, message: str):
|
||||
self.message = message
|
||||
|
||||
|
||||
async def create_memory(user_id: str, memory_name: str, memory_type: MemoryType):
|
||||
if await is_existing_memory(memory_name):
|
||||
raise MemoryException(f'Memory with the name "{memory_name}" already exists. Memory names must be unique.')
|
||||
|
||||
match memory_type:
|
||||
case MemoryType.VECTOR:
|
||||
await create_vector_memory(memory_name, CollectionConfig(
|
||||
vector_config = VectorParams(
|
||||
size = 1536,
|
||||
distance = Distance.DOT,
|
||||
)
|
||||
))
|
||||
|
||||
await register_memory_for_user(user_id, memory_name)
|
||||
21
cognitive_architecture/api/v1/memory/remember.py
Normal file
21
cognitive_architecture/api/v1/memory/remember.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
from typing import List
|
||||
from enum import Enum
|
||||
from cognitive_architecture.modules.users.memory import create_information_points, is_existing_memory
|
||||
|
||||
class MemoryType(Enum):
|
||||
GRAPH = "GRAPH"
|
||||
VECTOR = "VECTOR"
|
||||
RELATIONAL = "RELATIONAL"
|
||||
|
||||
class MemoryException(Exception):
|
||||
message: str
|
||||
|
||||
def __init__(self, message: str):
|
||||
self.message = message
|
||||
|
||||
|
||||
async def remember(user_id: str, memory_name: str, payload: List[str]):
|
||||
if await is_existing_memory(memory_name) is False:
|
||||
raise MemoryException(f"Memory with the name \"{memory_name}\" doesn't exist.")
|
||||
|
||||
await create_information_points(memory_name, payload)
|
||||
|
|
@ -30,8 +30,11 @@ class Config:
|
|||
db_path = Path(__file__).resolve().parent / "database/data"
|
||||
|
||||
vectordb: str = os.getenv("VECTORDB", "weaviate")
|
||||
qdrant_path: str = os.getenv("QDRANT_PATH")
|
||||
qdrant_url: str = os.getenv("QDRANT_URL")
|
||||
qdrant_api_key: str = os.getenv("QDRANT_API_KEY")
|
||||
db_type: str = os.getenv("DB_TYPE", "sqlite")
|
||||
db_name: str = os.getenv("DB_NAME", "cognee.db")
|
||||
db_name: str = os.getenv("DB_NAME", "cognee.sqlite")
|
||||
db_host: str = os.getenv("DB_HOST", "localhost")
|
||||
db_port: str = os.getenv("DB_PORT", "5432")
|
||||
db_user: str = os.getenv("DB_USER", "cognee")
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -2,11 +2,8 @@
|
|||
from datetime import datetime
|
||||
from sqlalchemy import Column, String, DateTime, ForeignKey, Boolean
|
||||
from sqlalchemy.orm import relationship
|
||||
import os
|
||||
import sys
|
||||
from ..database import Base
|
||||
|
||||
|
||||
class DocsModel(Base):
|
||||
""" Docs model"""
|
||||
__tablename__ = "docs"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
""" This module contains the MemoryModel class, which is a SQLAlchemy model for the memory table in the relational database. """
|
||||
from datetime import datetime
|
||||
from sqlalchemy import Column, String, DateTime, ForeignKey
|
||||
from sqlalchemy import Column, String, DateTime, ForeignKey, UUID
|
||||
from sqlalchemy.orm import relationship
|
||||
from ..database import Base
|
||||
|
||||
|
|
@ -9,7 +9,7 @@ class MemoryModel(Base):
|
|||
""" Memory model"""
|
||||
__tablename__ = "memories"
|
||||
|
||||
id = Column(String, primary_key=True)
|
||||
id = Column(UUID, primary_key=True)
|
||||
user_id = Column(String, ForeignKey("users.id"), index=True)
|
||||
operation_id = Column(String, ForeignKey("operations.id"), index=True)
|
||||
memory_name = Column(String, nullable=True)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,74 @@
|
|||
import uuid
|
||||
from pathlib import Path
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
|
||||
from cognitive_architecture.config import Config
|
||||
# from ..relational_db_interface import RelationalDBInterface
|
||||
from cognitive_architecture.database.relationaldb.models.memory import MemoryModel
|
||||
|
||||
config = Config()
|
||||
config.load()
|
||||
|
||||
class RelationalDBAdapter():
|
||||
session_maker: async_sessionmaker[AsyncSession]
|
||||
|
||||
def __init__(self):
|
||||
engine = create_async_engine(
|
||||
self.get_database_url(),
|
||||
pool_recycle = 3600,
|
||||
echo = config.sqlalchemy_logging,
|
||||
)
|
||||
self.create_session = async_sessionmaker[AsyncSession](
|
||||
bind = engine,
|
||||
class_ = AsyncSession,
|
||||
expire_on_commit = False,
|
||||
)
|
||||
|
||||
def get_database_url(
|
||||
self,
|
||||
db_type = config.db_type,
|
||||
db_name = config.db_name,
|
||||
db_path = config.db_path,
|
||||
user = config.db_user,
|
||||
password = config.db_password,
|
||||
host = config.db_host,
|
||||
port = config.db_port,
|
||||
):
|
||||
if db_type == "sqlite":
|
||||
db_path = (Path(db_path) / db_name).absolute()
|
||||
return f"sqlite+aiosqlite:///{db_path}" # SQLite uses file path
|
||||
elif db_type == "duckdb":
|
||||
db_path = (Path(db_path) / db_name).absolute()
|
||||
return f"duckdb+aiosqlite:///{db_path}"
|
||||
elif db_type == "postgresql":
|
||||
# Ensure optional parameters are handled gracefully
|
||||
port_str = f":{port}" if port else ""
|
||||
password_str = f":{password}" if password else ""
|
||||
if not all([user, host]):
|
||||
raise ValueError("User and host are required for PostgreSQL connections.")
|
||||
return f"postgresql+asyncpg://{user}{password_str}@{host}{port_str}/{db_name}"
|
||||
else:
|
||||
raise ValueError(f"Unsupported database type: {db_type}")
|
||||
|
||||
async def add_memory(self, user_id: str, memory_name: str):
|
||||
memory_id = uuid.uuid4()
|
||||
|
||||
async with self.create_session() as session:
|
||||
async with session.begin():
|
||||
return session.add(MemoryModel(
|
||||
id = memory_id,
|
||||
user_id = user_id,
|
||||
memory_name = memory_name,
|
||||
))
|
||||
|
||||
async def get_memory_by_name(self, memory_name: int):
|
||||
async with self.create_session() as session:
|
||||
async with session.begin():
|
||||
result = await session.execute(
|
||||
select(MemoryModel.id)
|
||||
.where(MemoryModel.memory_name == memory_name)
|
||||
)
|
||||
|
||||
memory = result.scalars().one_or_none()
|
||||
|
||||
return memory
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
# from datetime import datetime, timezone
|
||||
# from sqlalchemy.orm import relationship
|
||||
# # from sqlalchemy.orm import DeclarativeBase
|
||||
# from sqlalchemy import Column, String, DateTime, ForeignKey
|
||||
# from cognitive_architecture.database.relationaldb.database import Base
|
||||
|
||||
|
||||
# class MemoryModel(Base):
|
||||
# __tablename__ = "memories_v1"
|
||||
|
||||
# id = Column(String, primary_key = True)
|
||||
# user_id = Column(String, ForeignKey("users.id"), index = True)
|
||||
# memory_name = Column(String, nullable = True)
|
||||
# memory_category = Column(String, nullable = True)
|
||||
# created_at = Column(DateTime, default = datetime.now(timezone.utc))
|
||||
# updated_at = Column(DateTime, onupdate = datetime.now(timezone.utc))
|
||||
# methods_list = Column(String, nullable = True)
|
||||
# attributes_list = Column(String, nullable = True)
|
||||
|
||||
# user = relationship("User", back_populates="memories")
|
||||
# metadatas = relationship(
|
||||
# "MetaDatas", back_populates="memory", cascade="all, delete-orphan"
|
||||
# )
|
||||
|
||||
# def __repr__(self):
|
||||
# return f"<Memory(id={self.id}, user_id={self.user_id}, created_at={self.created_at}, updated_at={self.updated_at})>"
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
from .general.adapter import RelationalDBAdapter
|
||||
|
||||
def get_database():
|
||||
return RelationalDBAdapter()
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
from abc import abstractmethod
|
||||
from typing import Protocol, TypeVar, Type, List
|
||||
|
||||
RowDataType = TypeVar('RowDataType')
|
||||
|
||||
class RelationalDBInterface(Protocol):
|
||||
@abstractmethod
|
||||
async def create_database(self, database_name: str, database_path: str): raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def create_table(self, table_name: str, table_config: object): raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def add_row(self, table_name: str, row_data: Type[RowDataType]): raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def add_rows(self, table_name: str, rows_data: List[Type[RowDataType]]): raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def get_row(self, table_name: str, row_id: str): raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def update_row(self, table_name: str, row_id: str, row_data: Type[RowDataType]): raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def delete_row(self, table_name: str, row_id: str): raise NotImplementedError
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
from cognitive_architecture.config import Config
|
||||
from .qdrant import QDrantAdapter
|
||||
|
||||
config = Config()
|
||||
config.load()
|
||||
|
||||
def get_vector_database():
|
||||
return QDrantAdapter(config.qdrant_path, config.qdrant_url, config.qdrant_api_key)
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
import typing
|
||||
from qdrant_client import AsyncQdrantClient, models
|
||||
from databases.vector.vector_db_interface import VectorDBInterface
|
||||
|
||||
class VectorConfig(extra='forbid'):
|
||||
size: int
|
||||
distance: str
|
||||
on_disk: bool
|
||||
|
||||
class CollectionConfig(extra='forbid'):
|
||||
vector_config: VectorConfig
|
||||
hnsw_config: models.HnswConfig
|
||||
optimizers_config: models.OptimizersConfig
|
||||
quantization_config: models.QuantizationConfig
|
||||
|
||||
class LanceDBAdapter(VectorDBInterface):
|
||||
def __init__(self, lancedb_url, lancedb_api_key):
|
||||
self.lancedb_url = lancedb_url
|
||||
self.lancedb_api_key = lancedb_api_key
|
||||
|
||||
def get_lancedb_client(self) -> AsyncQdrantClient:
|
||||
return AsyncQdrantClient(
|
||||
url = self.lancedb_url,
|
||||
api_key = self.lancedb_api_key,
|
||||
location = ':memory:'
|
||||
)
|
||||
|
||||
async def create_collection(
|
||||
self,
|
||||
collection_name: str,
|
||||
collection_config: CollectionConfig
|
||||
):
|
||||
client = self.get_lancedb_client()
|
||||
|
||||
return await client.create_collection(
|
||||
collection_name = collection_name,
|
||||
vectors_config = collection_config.vector_config,
|
||||
hnsw_config = collection_config.hnsw_config,
|
||||
optimizers_config = collection_config.optimizers_config,
|
||||
quantization_config = collection_config.quantization_config
|
||||
)
|
||||
|
||||
async def create_data_points(self, collection_name: str, data_points: typing.List[any]):
|
||||
client = self.get_lancedb_client()
|
||||
|
||||
async def create_data_point(data):
|
||||
return {
|
||||
'vector': {},
|
||||
'payload': data
|
||||
}
|
||||
|
||||
return await client.upload_points(
|
||||
collection_name = collection_name,
|
||||
points = map(create_data_point, data_points)
|
||||
)
|
||||
|
||||
|
||||
# class LanceDB(VectorDB):
|
||||
# def __init__(self, *args, **kwargs):
|
||||
# super().__init__(*args, **kwargs)
|
||||
# self.db = self.init_lancedb()
|
||||
|
||||
# def init_lancedb(self):
|
||||
# # Initialize LanceDB connection
|
||||
# # Adjust the URI as needed for your LanceDB setup
|
||||
# uri = "s3://my-bucket/lancedb" if self.namespace else "~/.lancedb"
|
||||
# db = lancedb.connect(uri, api_key=os.getenv("LANCEDB_API_KEY"))
|
||||
# return db
|
||||
|
||||
# def create_table(
|
||||
# self,
|
||||
# name: str,
|
||||
# schema: Optional[pa.Schema] = None,
|
||||
# data: Optional[pd.DataFrame] = None,
|
||||
# ):
|
||||
# # Create a table in LanceDB. If schema is not provided, it will be inferred from the data.
|
||||
# if data is not None and schema is None:
|
||||
# schema = pa.Schema.from_pandas(data)
|
||||
# table = self.db.create_table(name, schema=schema)
|
||||
# if data is not None:
|
||||
# table.add(data.to_dict("records"))
|
||||
# return table
|
||||
|
||||
# def add_memories(self, table_name: str, data: pd.DataFrame):
|
||||
# # Add data to an existing table in LanceDB
|
||||
# table = self.db.open_table(table_name)
|
||||
# table.add(data.to_dict("records"))
|
||||
|
||||
# def fetch_memories(
|
||||
# self, table_name: str, query_vector: List[float], top_k: int = 10
|
||||
# ):
|
||||
# # Perform a vector search in the specified table
|
||||
# table = self.db.open_table(table_name)
|
||||
# results = table.search(query_vector).limit(top_k).to_pandas()
|
||||
# return results
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
class PineconeVectorDB(VectorDB):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.init_pinecone(self.index_name)
|
||||
|
||||
def init_pinecone(self, index_name):
|
||||
# Pinecone initialization logic
|
||||
pass
|
||||
|
|
@ -0,0 +1 @@
|
|||
from .adapter import QDrantAdapter
|
||||
|
|
@ -1,13 +1,61 @@
|
|||
from vector.vector_db_interface import VectorDBInterface
|
||||
from qdrant_client import AsyncQdrantClient
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from qdrant_client import AsyncQdrantClient, models
|
||||
from ..vector_db_interface import VectorDBInterface
|
||||
|
||||
class CollectionConfig(BaseModel, extra = "forbid"):
|
||||
vector_config: models.VectorParams = Field(..., description="Vector configuration")
|
||||
hnsw_config: Optional[models.HnswConfig] = Field(default = None, description="HNSW vector index configuration")
|
||||
optimizers_config: Optional[models.OptimizersConfig] = Field(default = None, description="Optimizers configuration")
|
||||
quantization_config: Optional[models.QuantizationConfig] = Field(default = None, description="Quantization configuration")
|
||||
|
||||
class QDrantAdapter(VectorDBInterface):
|
||||
def __init__(self, qdrant_url, qdrant_api_key):
|
||||
self.qdrant_client = AsyncQdrantClient(qdrant_url, qdrant_api_key)
|
||||
qdrant_url: str = None
|
||||
qdrant_path: str = None
|
||||
qdrant_api_key: str = None
|
||||
|
||||
def __init__(self, qdrant_path, qdrant_url, qdrant_api_key):
|
||||
if qdrant_path is not None:
|
||||
self.qdrant_path = qdrant_path
|
||||
else:
|
||||
self.qdrant_url = qdrant_url
|
||||
|
||||
self.qdrant_api_key = qdrant_api_key
|
||||
|
||||
def get_qdrant_client(self) -> AsyncQdrantClient:
|
||||
if self.qdrant_path is not None:
|
||||
return AsyncQdrantClient(
|
||||
path = self.qdrant_path,
|
||||
)
|
||||
elif self.qdrant_url is not None:
|
||||
return AsyncQdrantClient(
|
||||
url = self.qdrant_url,
|
||||
api_key = self.qdrant_api_key,
|
||||
)
|
||||
|
||||
return AsyncQdrantClient(
|
||||
location = ":memory:"
|
||||
)
|
||||
|
||||
async def create_collection(
|
||||
self,
|
||||
collection_name: str,
|
||||
collection_config: object
|
||||
collection_config: CollectionConfig,
|
||||
):
|
||||
return await self.qdrant_client.create_collection(collection_name, collection_config)
|
||||
client = self.get_qdrant_client()
|
||||
|
||||
return await client.create_collection(
|
||||
collection_name = collection_name,
|
||||
vectors_config = collection_config.vector_config,
|
||||
hnsw_config = collection_config.hnsw_config,
|
||||
optimizers_config = collection_config.optimizers_config,
|
||||
quantization_config = collection_config.quantization_config
|
||||
)
|
||||
|
||||
async def create_data_points(self, collection_name: str, data_points: List[any]):
|
||||
client = self.get_qdrant_client()
|
||||
|
||||
return await client.upload_points(
|
||||
collection_name = collection_name,
|
||||
points = data_points
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
from typing import List
|
||||
from abc import abstractmethod
|
||||
from typing import Protocol
|
||||
|
||||
|
|
@ -10,59 +11,59 @@ class VectorDBInterface(Protocol):
|
|||
collection_config: object
|
||||
): raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def update_collection(
|
||||
self,
|
||||
collection_name: str,
|
||||
collection_config: object
|
||||
): raise NotImplementedError
|
||||
# @abstractmethod
|
||||
# async def update_collection(
|
||||
# self,
|
||||
# collection_name: str,
|
||||
# collection_config: object
|
||||
# ): raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def delete_collection(
|
||||
self,
|
||||
collection_name: str
|
||||
): raise NotImplementedError
|
||||
# @abstractmethod
|
||||
# async def delete_collection(
|
||||
# self,
|
||||
# collection_name: str
|
||||
# ): raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def create_vector_index(
|
||||
self,
|
||||
collection_name: str,
|
||||
vector_index_config: object
|
||||
): raise NotImplementedError
|
||||
# @abstractmethod
|
||||
# async def create_vector_index(
|
||||
# self,
|
||||
# collection_name: str,
|
||||
# vector_index_config: object
|
||||
# ): raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def create_data_index(
|
||||
self,
|
||||
collection_name: str,
|
||||
vector_index_config: object
|
||||
): raise NotImplementedError
|
||||
# @abstractmethod
|
||||
# async def create_data_index(
|
||||
# self,
|
||||
# collection_name: str,
|
||||
# vector_index_config: object
|
||||
# ): raise NotImplementedError
|
||||
|
||||
""" Data points """
|
||||
@abstractmethod
|
||||
async def create_data_point(
|
||||
async def create_data_points(
|
||||
self,
|
||||
collection_name: str,
|
||||
payload: object
|
||||
data_points: List[any]
|
||||
): raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def get_data_point(
|
||||
self,
|
||||
collection_name: str,
|
||||
data_point_id: str
|
||||
): raise NotImplementedError
|
||||
# @abstractmethod
|
||||
# async def get_data_point(
|
||||
# self,
|
||||
# collection_name: str,
|
||||
# data_point_id: str
|
||||
# ): raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def update_data_point(
|
||||
self,
|
||||
collection_name: str,
|
||||
data_point_id: str,
|
||||
payload: object
|
||||
): raise NotImplementedError
|
||||
# @abstractmethod
|
||||
# async def update_data_point(
|
||||
# self,
|
||||
# collection_name: str,
|
||||
# data_point_id: str,
|
||||
# payload: object
|
||||
# ): raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
async def delete_data_point(
|
||||
self,
|
||||
collection_name: str,
|
||||
data_point_id: str
|
||||
): raise NotImplementedError
|
||||
# @abstractmethod
|
||||
# async def delete_data_point(
|
||||
# self,
|
||||
# collection_name: str,
|
||||
# data_point_id: str
|
||||
# ): raise NotImplementedError
|
||||
|
|
|
|||
|
|
@ -0,0 +1,417 @@
|
|||
from weaviate.gql.get import HybridFusion
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from langchain.retrievers import WeaviateHybridSearchRetriever, ParentDocumentRetriever
|
||||
from databases.vector.vector_db_interface import VectorDBInterface
|
||||
# from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from cognitive_architecture.database.vectordb.loaders.loaders import _document_loader
|
||||
|
||||
class WeaviateVectorDB(VectorDBInterface):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.init_weaviate(embeddings=self.embeddings, namespace=self.namespace)
|
||||
|
||||
def init_weaviate(
|
||||
self,
|
||||
embeddings=OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY", "")),
|
||||
namespace=None,
|
||||
retriever_type="",
|
||||
):
|
||||
# Weaviate initialization logic
|
||||
auth_config = weaviate.auth.AuthApiKey(
|
||||
api_key=os.environ.get("WEAVIATE_API_KEY")
|
||||
)
|
||||
client = weaviate.Client(
|
||||
url=os.environ.get("WEAVIATE_URL"),
|
||||
auth_client_secret=auth_config,
|
||||
additional_headers={"X-OpenAI-Api-Key": os.environ.get("OPENAI_API_KEY")},
|
||||
)
|
||||
|
||||
if retriever_type == "single_document_context":
|
||||
retriever = WeaviateHybridSearchRetriever(
|
||||
client=client,
|
||||
index_name=namespace,
|
||||
text_key="text",
|
||||
attributes=[],
|
||||
embedding=embeddings,
|
||||
create_schema_if_missing=True,
|
||||
)
|
||||
return retriever
|
||||
elif retriever_type == "multi_document_context":
|
||||
retriever = WeaviateHybridSearchRetriever(
|
||||
client=client,
|
||||
index_name=namespace,
|
||||
text_key="text",
|
||||
attributes=[],
|
||||
embedding=embeddings,
|
||||
create_schema_if_missing=True,
|
||||
)
|
||||
return retriever
|
||||
else:
|
||||
return client
|
||||
# child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
|
||||
# store = InMemoryStore()
|
||||
# retriever = ParentDocumentRetriever(
|
||||
# vectorstore=vectorstore,
|
||||
# docstore=store,
|
||||
# child_splitter=child_splitter,
|
||||
# )
|
||||
|
||||
from marshmallow import Schema, fields
|
||||
|
||||
def create_document_structure(observation, params, metadata_schema_class=None):
|
||||
"""
|
||||
Create and validate a document structure with optional custom fields.
|
||||
|
||||
:param observation: Content of the document.
|
||||
:param params: Metadata information.
|
||||
:param metadata_schema_class: Custom metadata schema class (optional).
|
||||
:return: A list containing the validated document data.
|
||||
"""
|
||||
document_data = {"metadata": params, "page_content": observation}
|
||||
|
||||
def get_document_schema():
|
||||
class DynamicDocumentSchema(Schema):
|
||||
metadata = fields.Nested(metadata_schema_class, required=True)
|
||||
page_content = fields.Str(required=True)
|
||||
|
||||
return DynamicDocumentSchema
|
||||
|
||||
# Validate and deserialize, defaulting to "1.0" if not provided
|
||||
CurrentDocumentSchema = get_document_schema()
|
||||
loaded_document = CurrentDocumentSchema().load(document_data)
|
||||
return [loaded_document]
|
||||
|
||||
def _stuct(self, observation, params, metadata_schema_class=None):
|
||||
"""Utility function to create the document structure with optional custom fields."""
|
||||
# Construct document data
|
||||
document_data = {"metadata": params, "page_content": observation}
|
||||
|
||||
def get_document_schema():
|
||||
class DynamicDocumentSchema(Schema):
|
||||
metadata = fields.Nested(metadata_schema_class, required=True)
|
||||
page_content = fields.Str(required=True)
|
||||
|
||||
return DynamicDocumentSchema
|
||||
|
||||
# Validate and deserialize # Default to "1.0" if not provided
|
||||
CurrentDocumentSchema = get_document_schema()
|
||||
loaded_document = CurrentDocumentSchema().load(document_data)
|
||||
return [loaded_document]
|
||||
|
||||
async def add_memories(
|
||||
self,
|
||||
observation,
|
||||
loader_settings=None,
|
||||
params=None,
|
||||
namespace=None,
|
||||
metadata_schema_class=None,
|
||||
embeddings="hybrid",
|
||||
):
|
||||
# Update Weaviate memories here
|
||||
if namespace is None:
|
||||
namespace = self.namespace
|
||||
params["user_id"] = self.user_id
|
||||
logging.info("User id is %s", self.user_id)
|
||||
retriever = self.init_weaviate(
|
||||
embeddings=OpenAIEmbeddings(),
|
||||
namespace=namespace,
|
||||
retriever_type="single_document_context",
|
||||
)
|
||||
if loader_settings:
|
||||
# Assuming _document_loader returns a list of documents
|
||||
documents = await _document_loader(observation, loader_settings)
|
||||
logging.info("here are the docs %s", str(documents))
|
||||
chunk_count = 0
|
||||
for doc_list in documents:
|
||||
for doc in doc_list:
|
||||
chunk_count += 1
|
||||
params["chunk_count"] = doc.metadata.get("chunk_count", "None")
|
||||
logging.info(
|
||||
"Loading document with provided loader settings %s", str(doc)
|
||||
)
|
||||
params["source"] = doc.metadata.get("source", "None")
|
||||
logging.info("Params are %s", str(params))
|
||||
retriever.add_documents(
|
||||
[Document(metadata=params, page_content=doc.page_content)]
|
||||
)
|
||||
else:
|
||||
chunk_count = 0
|
||||
from cognitive_architecture.database.vectordb.chunkers.chunkers import (
|
||||
chunk_data,
|
||||
)
|
||||
|
||||
documents = [
|
||||
chunk_data(
|
||||
chunk_strategy="VANILLA",
|
||||
source_data=observation,
|
||||
chunk_size=300,
|
||||
chunk_overlap=20,
|
||||
)
|
||||
]
|
||||
for doc in documents[0]:
|
||||
chunk_count += 1
|
||||
params["chunk_order"] = chunk_count
|
||||
params["source"] = "User loaded"
|
||||
logging.info(
|
||||
"Loading document with default loader settings %s", str(doc)
|
||||
)
|
||||
logging.info("Params are %s", str(params))
|
||||
retriever.add_documents(
|
||||
[Document(metadata=params, page_content=doc.page_content)]
|
||||
)
|
||||
|
||||
async def fetch_memories(
|
||||
self,
|
||||
observation: str,
|
||||
namespace: str = None,
|
||||
search_type: str = "hybrid",
|
||||
params=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Fetch documents from weaviate.
|
||||
|
||||
Parameters:
|
||||
- observation (str): User query.
|
||||
- namespace (str, optional): Type of memory accessed.
|
||||
- search_type (str, optional): Type of search ('text', 'hybrid', 'bm25', 'generate', 'generate_grouped'). Defaults to 'hybrid'.
|
||||
- **kwargs: Additional parameters for flexibility.
|
||||
|
||||
Returns:
|
||||
List of documents matching the query or an empty list in case of error.
|
||||
|
||||
Example:
|
||||
fetch_memories(query="some query", search_type='text', additional_param='value')
|
||||
"""
|
||||
client = self.init_weaviate(namespace=self.namespace)
|
||||
if search_type is None:
|
||||
search_type = "hybrid"
|
||||
|
||||
if not namespace:
|
||||
namespace = self.namespace
|
||||
|
||||
logging.info("Query on namespace %s", namespace)
|
||||
|
||||
params_user_id = {
|
||||
"path": ["user_id"],
|
||||
"operator": "Like",
|
||||
"valueText": self.user_id,
|
||||
}
|
||||
|
||||
def list_objects_of_class(class_name, schema):
|
||||
return [
|
||||
prop["name"]
|
||||
for class_obj in schema["classes"]
|
||||
if class_obj["class"] == class_name
|
||||
for prop in class_obj["properties"]
|
||||
]
|
||||
|
||||
base_query = (
|
||||
client.query.get(
|
||||
namespace, list(list_objects_of_class(namespace, client.schema.get()))
|
||||
)
|
||||
.with_additional(
|
||||
["id", "creationTimeUnix", "lastUpdateTimeUnix", "score", "distance"]
|
||||
)
|
||||
.with_where(params_user_id)
|
||||
.with_limit(10)
|
||||
)
|
||||
|
||||
n_of_observations = kwargs.get("n_of_observations", 2)
|
||||
|
||||
# try:
|
||||
if search_type == "text":
|
||||
query_output = (
|
||||
base_query.with_near_text({"concepts": [observation]})
|
||||
.with_autocut(n_of_observations)
|
||||
.do()
|
||||
)
|
||||
elif search_type == "hybrid":
|
||||
query_output = (
|
||||
base_query.with_hybrid(
|
||||
query=observation, fusion_type=HybridFusion.RELATIVE_SCORE
|
||||
)
|
||||
.with_autocut(n_of_observations)
|
||||
.do()
|
||||
)
|
||||
elif search_type == "bm25":
|
||||
query_output = (
|
||||
base_query.with_bm25(query=observation)
|
||||
.with_autocut(n_of_observations)
|
||||
.do()
|
||||
)
|
||||
elif search_type == "summary":
|
||||
filter_object = {
|
||||
"operator": "And",
|
||||
"operands": [
|
||||
{
|
||||
"path": ["user_id"],
|
||||
"operator": "Equal",
|
||||
"valueText": self.user_id,
|
||||
},
|
||||
{
|
||||
"path": ["chunk_order"],
|
||||
"operator": "LessThan",
|
||||
"valueNumber": 30,
|
||||
},
|
||||
],
|
||||
}
|
||||
base_query = (
|
||||
client.query.get(
|
||||
namespace,
|
||||
list(list_objects_of_class(namespace, client.schema.get())),
|
||||
)
|
||||
.with_additional(
|
||||
[
|
||||
"id",
|
||||
"creationTimeUnix",
|
||||
"lastUpdateTimeUnix",
|
||||
"score",
|
||||
"distance",
|
||||
]
|
||||
)
|
||||
.with_where(filter_object)
|
||||
.with_limit(30)
|
||||
)
|
||||
query_output = (
|
||||
base_query
|
||||
# .with_hybrid(query=observation, fusion_type=HybridFusion.RELATIVE_SCORE)
|
||||
.do()
|
||||
)
|
||||
|
||||
elif search_type == "summary_filter_by_object_name":
|
||||
filter_object = {
|
||||
"operator": "And",
|
||||
"operands": [
|
||||
{
|
||||
"path": ["user_id"],
|
||||
"operator": "Equal",
|
||||
"valueText": self.user_id,
|
||||
},
|
||||
{
|
||||
"path": ["doc_id"],
|
||||
"operator": "Equal",
|
||||
"valueText": params,
|
||||
},
|
||||
],
|
||||
}
|
||||
base_query = (
|
||||
client.query.get(
|
||||
namespace,
|
||||
list(list_objects_of_class(namespace, client.schema.get())),
|
||||
)
|
||||
.with_additional(
|
||||
[
|
||||
"id",
|
||||
"creationTimeUnix",
|
||||
"lastUpdateTimeUnix",
|
||||
"score",
|
||||
"distance",
|
||||
]
|
||||
)
|
||||
.with_where(filter_object)
|
||||
.with_limit(30)
|
||||
.with_hybrid(query=observation, fusion_type=HybridFusion.RELATIVE_SCORE)
|
||||
)
|
||||
query_output = base_query.do()
|
||||
|
||||
return query_output
|
||||
elif search_type == "generate":
|
||||
generate_prompt = kwargs.get("generate_prompt", "")
|
||||
query_output = (
|
||||
base_query.with_generate(single_prompt=observation)
|
||||
.with_near_text({"concepts": [observation]})
|
||||
.with_autocut(n_of_observations)
|
||||
.do()
|
||||
)
|
||||
elif search_type == "generate_grouped":
|
||||
generate_prompt = kwargs.get("generate_prompt", "")
|
||||
query_output = (
|
||||
base_query.with_generate(grouped_task=observation)
|
||||
.with_near_text({"concepts": [observation]})
|
||||
.with_autocut(n_of_observations)
|
||||
.do()
|
||||
)
|
||||
else:
|
||||
logging.error(f"Invalid search_type: {search_type}")
|
||||
return []
|
||||
# except Exception as e:
|
||||
# logging.error(f"Error executing query: {str(e)}")
|
||||
# return []
|
||||
|
||||
return query_output
|
||||
|
||||
async def delete_memories(self, namespace: str, params: dict = None):
|
||||
if namespace is None:
|
||||
namespace = self.namespace
|
||||
client = self.init_weaviate(namespace=self.namespace)
|
||||
if params:
|
||||
where_filter = {
|
||||
"path": ["id"],
|
||||
"operator": "Equal",
|
||||
"valueText": params.get("id", None),
|
||||
}
|
||||
return client.batch.delete_objects(
|
||||
class_name=self.namespace,
|
||||
# Same `where` filter as in the GraphQL API
|
||||
where=where_filter,
|
||||
)
|
||||
else:
|
||||
# Delete all objects
|
||||
return client.batch.delete_objects(
|
||||
class_name=namespace,
|
||||
where={
|
||||
"path": ["version"],
|
||||
"operator": "Equal",
|
||||
"valueText": "1.0",
|
||||
},
|
||||
)
|
||||
|
||||
async def count_memories(self, namespace: str = None, params: dict = None) -> int:
|
||||
"""
|
||||
Count memories in a Weaviate database.
|
||||
|
||||
Args:
|
||||
namespace (str, optional): The Weaviate namespace to count memories in. If not provided, uses the default namespace.
|
||||
|
||||
Returns:
|
||||
int: The number of memories in the specified namespace.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = self.namespace
|
||||
|
||||
client = self.init_weaviate(namespace=namespace)
|
||||
|
||||
try:
|
||||
object_count = client.query.aggregate(namespace).with_meta_count().do()
|
||||
return object_count
|
||||
except Exception as e:
|
||||
logging.info(f"Error counting memories: {str(e)}")
|
||||
# Handle the error or log it
|
||||
return 0
|
||||
|
||||
def update_memories(self, observation, namespace: str, params: dict = None):
|
||||
client = self.init_weaviate(namespace=self.namespace)
|
||||
|
||||
client.data_object.update(
|
||||
data_object={
|
||||
# "text": observation,
|
||||
"user_id": str(self.user_id),
|
||||
"version": params.get("version", None) or "",
|
||||
"agreement_id": params.get("agreement_id", None) or "",
|
||||
"privacy_policy": params.get("privacy_policy", None) or "",
|
||||
"terms_of_service": params.get("terms_of_service", None) or "",
|
||||
"format": params.get("format", None) or "",
|
||||
"schema_version": params.get("schema_version", None) or "",
|
||||
"checksum": params.get("checksum", None) or "",
|
||||
"owner": params.get("owner", None) or "",
|
||||
"license": params.get("license", None) or "",
|
||||
"validity_start": params.get("validity_start", None) or "",
|
||||
"validity_end": params.get("validity_end", None) or ""
|
||||
# **source_metadata,
|
||||
},
|
||||
class_name="Test",
|
||||
uuid=params.get("id", None),
|
||||
consistency_level=weaviate.data.replication.ConsistencyLevel.ALL, # default QUORUM
|
||||
)
|
||||
return
|
||||
0
cognitive_architecture/modules/__init__.py
Normal file
0
cognitive_architecture/modules/__init__.py
Normal file
0
cognitive_architecture/modules/memory/__init__.py
Normal file
0
cognitive_architecture/modules/memory/__init__.py
Normal file
311
cognitive_architecture/modules/memory/memory-legacy.py
Normal file
311
cognitive_architecture/modules/memory/memory-legacy.py
Normal file
|
|
@ -0,0 +1,311 @@
|
|||
class Memory:
|
||||
def __init__(
|
||||
self,
|
||||
user_id: str = "676",
|
||||
session=None,
|
||||
index_name: str = None,
|
||||
db_type: str = globalConfig.vectordb,
|
||||
namespace: str = None,
|
||||
memory_id: str = None,
|
||||
memory_class=None,
|
||||
job_id: str = None,
|
||||
) -> None:
|
||||
self.load_environment_variables()
|
||||
self.memory_id = memory_id
|
||||
self.user_id = user_id
|
||||
self.session = session
|
||||
self.index_name = index_name
|
||||
self.db_type = db_type
|
||||
self.namespace = namespace
|
||||
self.memory_instances = []
|
||||
self.memory_class = memory_class
|
||||
self.job_id = job_id
|
||||
# self.memory_class = DynamicBaseMemory(
|
||||
# "Memory", user_id, str(self.memory_id), index_name, db_type, namespace
|
||||
# )
|
||||
|
||||
def load_environment_variables(self) -> None:
|
||||
self.OPENAI_TEMPERATURE = globalConfig.openai_temperature
|
||||
self.OPENAI_API_KEY = globalConfig.openai_key
|
||||
|
||||
@classmethod
|
||||
async def create_memory(
|
||||
cls,
|
||||
user_id: str,
|
||||
session,
|
||||
job_id: str = None,
|
||||
memory_label: str = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Class method that acts as a factory method for creating Memory instances.
|
||||
It performs necessary DB checks or updates before instance creation.
|
||||
"""
|
||||
existing_user = await cls.check_existing_user(user_id, session)
|
||||
logging.info(f"Existing user: {existing_user}")
|
||||
|
||||
if existing_user:
|
||||
# Handle existing user scenario...
|
||||
memory_id = await cls.check_existing_memory(user_id, memory_label, session)
|
||||
if memory_id is None:
|
||||
memory_id = await cls.handle_new_memory(
|
||||
user_id=user_id,
|
||||
session=session,
|
||||
job_id=job_id,
|
||||
memory_name=memory_label,
|
||||
)
|
||||
logging.info(
|
||||
f"Existing user {user_id} found in the DB. Memory ID: {memory_id}"
|
||||
)
|
||||
else:
|
||||
# Handle new user scenario...
|
||||
await cls.handle_new_user(user_id, session)
|
||||
|
||||
memory_id = await cls.handle_new_memory(
|
||||
user_id=user_id,
|
||||
session=session,
|
||||
job_id=job_id,
|
||||
memory_name=memory_label,
|
||||
)
|
||||
logging.info(
|
||||
f"New user {user_id} created in the DB. Memory ID: {memory_id}"
|
||||
)
|
||||
|
||||
memory_class = DynamicBaseMemory(
|
||||
memory_label,
|
||||
user_id,
|
||||
str(memory_id),
|
||||
index_name=memory_label,
|
||||
db_type=globalConfig.vectordb,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
return cls(
|
||||
user_id=user_id,
|
||||
session=session,
|
||||
memory_id=memory_id,
|
||||
job_id=job_id,
|
||||
memory_class=memory_class,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
async def list_memory_classes(self):
|
||||
"""
|
||||
Lists all available memory classes in the memory instance.
|
||||
"""
|
||||
# Use a list comprehension to filter attributes that end with '_class'
|
||||
return [attr for attr in dir(self) if attr.endswith("_class")]
|
||||
|
||||
@staticmethod
|
||||
async def check_existing_user(user_id: str, session):
|
||||
"""Check if a user exists in the DB and return it."""
|
||||
result = await session.execute(select(User).where(User.id == user_id))
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
@staticmethod
|
||||
async def check_existing_memory(user_id: str, memory_label: str, session):
|
||||
"""Check if a user memory exists in the DB and return it. Filters by user and label"""
|
||||
try:
|
||||
result = await session.execute(
|
||||
select(MemoryModel.id)
|
||||
.where(MemoryModel.user_id == user_id)
|
||||
.filter_by(memory_name=memory_label)
|
||||
.order_by(MemoryModel.created_at)
|
||||
)
|
||||
return result.scalar_one_or_none()
|
||||
except Exception as e:
|
||||
logging.error(f"An error occurred: {str(e)}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
async def handle_new_user(user_id: str, session):
|
||||
"""
|
||||
Handle new user creation in the database.
|
||||
|
||||
Args:
|
||||
user_id (str): The unique identifier for the new user.
|
||||
session: The database session for the operation.
|
||||
|
||||
Returns:
|
||||
str: A success message or an error message.
|
||||
|
||||
Raises:
|
||||
Exception: If any error occurs during the user creation process.
|
||||
"""
|
||||
try:
|
||||
new_user = User(id=user_id)
|
||||
await add_entity(session, new_user)
|
||||
return "User creation successful."
|
||||
except Exception as e:
|
||||
return f"Error creating user: {str(e)}"
|
||||
|
||||
@staticmethod
|
||||
async def handle_new_memory(
|
||||
user_id: str,
|
||||
session,
|
||||
job_id: str = None,
|
||||
memory_name: str = None,
|
||||
memory_category: str = "PUBLIC",
|
||||
):
|
||||
"""
|
||||
Handle new memory creation associated with a user.
|
||||
|
||||
Args:
|
||||
user_id (str): The user's unique identifier.
|
||||
session: The database session for the operation.
|
||||
job_id (str, optional): The identifier of the associated job, if any.
|
||||
memory_name (str, optional): The name of the memory.
|
||||
|
||||
Returns:
|
||||
str: The unique memory ID if successful, or an error message.
|
||||
|
||||
Raises:
|
||||
Exception: If any error occurs during memory creation.
|
||||
"""
|
||||
try:
|
||||
memory_id = str(uuid.uuid4())
|
||||
logging.info("Job id %s", job_id)
|
||||
memory = MemoryModel(
|
||||
id=memory_id,
|
||||
user_id=user_id,
|
||||
operation_id=job_id,
|
||||
memory_name=memory_name,
|
||||
memory_category=memory_category,
|
||||
methods_list=str(["Memory", "SemanticMemory", "EpisodicMemory"]),
|
||||
attributes_list=str(
|
||||
[
|
||||
"user_id",
|
||||
"index_name",
|
||||
"db_type",
|
||||
"knowledge_source",
|
||||
"knowledge_type",
|
||||
"memory_id",
|
||||
"long_term_memory",
|
||||
"short_term_memory",
|
||||
"namespace",
|
||||
]
|
||||
),
|
||||
)
|
||||
await add_entity(session, memory)
|
||||
return memory_id
|
||||
except Exception as e:
|
||||
return f"Error creating memory: {str(e)}"
|
||||
|
||||
async def add_memory_instance(self, memory_class_name: str):
|
||||
"""Add a new memory instance to the memory_instances list."""
|
||||
instance = DynamicBaseMemory(
|
||||
memory_class_name,
|
||||
self.user_id,
|
||||
self.memory_id,
|
||||
self.index_name,
|
||||
self.db_type,
|
||||
self.namespace,
|
||||
)
|
||||
print("The following instance was defined", instance)
|
||||
self.memory_instances.append(instance)
|
||||
|
||||
async def query_method(self):
|
||||
methods_list = await self.session.execute(
|
||||
select(MemoryModel.methods_list).where(MemoryModel.id == self.memory_id)
|
||||
)
|
||||
methods_list = methods_list.scalar_one_or_none()
|
||||
return methods_list
|
||||
|
||||
async def manage_memory_attributes(self, existing_user):
|
||||
"""Manage memory attributes based on the user existence."""
|
||||
if existing_user:
|
||||
print(f"ID before query: {self.memory_id}, type: {type(self.memory_id)}")
|
||||
|
||||
# attributes_list = await self.session.query(MemoryModel.attributes_list).filter_by(id=self.memory_id[0]).scalar()
|
||||
attributes_list = await self.query_method()
|
||||
logging.info(f"Attributes list: {attributes_list}")
|
||||
if attributes_list is not None:
|
||||
attributes_list = ast.literal_eval(attributes_list)
|
||||
await self.handle_attributes(attributes_list)
|
||||
else:
|
||||
logging.warning("attributes_list is None!")
|
||||
else:
|
||||
attributes_list = [
|
||||
"user_id",
|
||||
"index_name",
|
||||
"db_type",
|
||||
"knowledge_source",
|
||||
"knowledge_type",
|
||||
"memory_id",
|
||||
"long_term_memory",
|
||||
"short_term_memory",
|
||||
"namespace",
|
||||
]
|
||||
await self.handle_attributes(attributes_list)
|
||||
|
||||
async def handle_attributes(self, attributes_list):
|
||||
"""Handle attributes for existing memory instances."""
|
||||
for attr in attributes_list:
|
||||
await self.memory_class.add_attribute(attr)
|
||||
|
||||
async def manage_memory_methods(self, existing_user):
|
||||
"""
|
||||
Manage memory methods based on the user existence.
|
||||
"""
|
||||
if existing_user:
|
||||
# Fetch existing methods from the database
|
||||
# methods_list = await self.session.query(MemoryModel.methods_list).filter_by(id=self.memory_id).scalar()
|
||||
|
||||
methods_list = await self.session.execute(
|
||||
select(MemoryModel.methods_list).where(
|
||||
MemoryModel.id == self.memory_id[0]
|
||||
)
|
||||
)
|
||||
methods_list = methods_list.scalar_one_or_none()
|
||||
methods_list = ast.literal_eval(methods_list)
|
||||
else:
|
||||
# Define default methods for a new user
|
||||
methods_list = [
|
||||
"async_create_long_term_memory",
|
||||
"async_init",
|
||||
"add_memories",
|
||||
"fetch_memories",
|
||||
"delete_memories",
|
||||
"async_create_short_term_memory",
|
||||
"_create_buffer_context",
|
||||
"_get_task_list",
|
||||
"_run_main_buffer",
|
||||
"_available_operations",
|
||||
"_provide_feedback",
|
||||
]
|
||||
# Apply methods to memory instances
|
||||
for class_instance in self.memory_instances:
|
||||
for method in methods_list:
|
||||
class_instance.add_method(method)
|
||||
|
||||
async def dynamic_method_call(
|
||||
self, dynamic_base_memory_instance, method_name: str, *args, **kwargs
|
||||
):
|
||||
if method_name in dynamic_base_memory_instance.methods:
|
||||
method = getattr(dynamic_base_memory_instance, method_name, None)
|
||||
if method:
|
||||
return await method(*args, **kwargs)
|
||||
raise AttributeError(
|
||||
f"{dynamic_base_memory_instance.name} object has no attribute {method_name}"
|
||||
)
|
||||
|
||||
async def add_dynamic_memory_class(self, class_name: str, namespace: str):
|
||||
logging.info("Here is the memory id %s", self.memory_id[0])
|
||||
new_memory_class = DynamicBaseMemory(
|
||||
class_name,
|
||||
self.user_id,
|
||||
self.memory_id[0],
|
||||
self.index_name,
|
||||
self.db_type,
|
||||
namespace,
|
||||
)
|
||||
setattr(self, f"{class_name.lower()}_class", new_memory_class)
|
||||
return new_memory_class
|
||||
|
||||
async def add_attribute_to_class(self, class_instance, attribute_name: str):
|
||||
# add this to database for a particular user and load under memory id
|
||||
await class_instance.add_attribute(attribute_name)
|
||||
|
||||
async def add_method_to_class(self, class_instance, method_name: str):
|
||||
# add this to database for a particular user and load under memory id
|
||||
await class_instance.add_method(method_name)
|
||||
1
cognitive_architecture/modules/memory/vector/__init__.py
Normal file
1
cognitive_architecture/modules/memory/vector/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
from .create_vector_memory import create_vector_memory
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
from cognitive_architecture.infrastructure.databases.vector.qdrant.adapter import CollectionConfig
|
||||
from cognitive_architecture.infrastructure.databases.vector.get_vector_database import get_vector_database
|
||||
|
||||
async def create_vector_memory(memory_name: str, collection_config: CollectionConfig):
|
||||
vector_db = get_vector_database()
|
||||
|
||||
return await vector_db.create_collection(memory_name, collection_config)
|
||||
0
cognitive_architecture/modules/users/__init__.py
Normal file
0
cognitive_architecture/modules/users/__init__.py
Normal file
3
cognitive_architecture/modules/users/memory/__init__.py
Normal file
3
cognitive_architecture/modules/users/memory/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .is_existing_memory import is_existing_memory
|
||||
from .register_memory_for_user import register_memory_for_user
|
||||
from .create_information_points import create_information_points
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
import uuid
|
||||
from typing import List
|
||||
from qdrant_client.models import PointStruct
|
||||
from cognitive_architecture.infrastructure.databases.vector.get_vector_database import get_vector_database
|
||||
from cognitive_architecture.openai_tools import async_get_embedding_with_backoff
|
||||
|
||||
async def create_information_points(memory_name: str, payload: List[str]):
|
||||
vector_db = get_vector_database()
|
||||
|
||||
data_points = list()
|
||||
for point in map(create_data_point, payload):
|
||||
data_points.append(await point)
|
||||
|
||||
return await vector_db.create_data_points(memory_name, data_points)
|
||||
|
||||
async def create_data_point(data: str) -> PointStruct:
|
||||
return PointStruct(
|
||||
id = str(uuid.uuid4()),
|
||||
vector = await async_get_embedding_with_backoff(data),
|
||||
payload = {
|
||||
"raw": data,
|
||||
}
|
||||
)
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
from cognitive_architecture.infrastructure.databases.relational.get_database import get_database
|
||||
|
||||
async def is_existing_memory(memory_name: str):
|
||||
memory = await (get_database().get_memory_by_name(memory_name))
|
||||
|
||||
return memory is not None
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
from cognitive_architecture.infrastructure.databases.relational.get_database import get_database
|
||||
|
||||
def register_memory_for_user(user_id: str, memory_name: str):
|
||||
return get_database().add_memory(user_id, memory_name)
|
||||
|
|
@ -3,24 +3,21 @@ import asyncio
|
|||
import random
|
||||
import os
|
||||
import time
|
||||
|
||||
import openai
|
||||
|
||||
HOST = os.getenv("OPENAI_API_BASE")
|
||||
HOST_TYPE = os.getenv("BACKEND_TYPE") # default None == ChatCompletion
|
||||
|
||||
import openai
|
||||
|
||||
if HOST is not None:
|
||||
openai.api_base = HOST
|
||||
|
||||
|
||||
def retry_with_exponential_backoff(
|
||||
func,
|
||||
initial_delay: float = 1,
|
||||
exponential_base: float = 2,
|
||||
jitter: bool = True,
|
||||
max_retries: int = 20,
|
||||
errors: tuple = (openai.error.RateLimitError,),
|
||||
errors: tuple = (openai.RateLimitError,),
|
||||
):
|
||||
"""Retry a function with exponential backoff."""
|
||||
|
||||
|
|
@ -35,7 +32,7 @@ def retry_with_exponential_backoff(
|
|||
return func(*args, **kwargs)
|
||||
|
||||
# Retry on specified errors
|
||||
except errors as e:
|
||||
except errors:
|
||||
# Increment retries
|
||||
num_retries += 1
|
||||
|
||||
|
|
@ -61,7 +58,7 @@ def retry_with_exponential_backoff(
|
|||
@retry_with_exponential_backoff
|
||||
def completions_with_backoff(**kwargs):
|
||||
# Local model
|
||||
return openai.ChatCompletion.create(**kwargs)
|
||||
return openai.chat.completions.create(**kwargs)
|
||||
|
||||
|
||||
def aretry_with_exponential_backoff(
|
||||
|
|
@ -70,7 +67,7 @@ def aretry_with_exponential_backoff(
|
|||
exponential_base: float = 2,
|
||||
jitter: bool = True,
|
||||
max_retries: int = 20,
|
||||
errors: tuple = (openai.error.RateLimitError,),
|
||||
errors: tuple = (openai.RateLimitError,),
|
||||
):
|
||||
"""Retry a function with exponential backoff."""
|
||||
|
||||
|
|
@ -111,13 +108,19 @@ def aretry_with_exponential_backoff(
|
|||
|
||||
@aretry_with_exponential_backoff
|
||||
async def acompletions_with_backoff(**kwargs):
|
||||
return await openai.ChatCompletion.acreate(**kwargs)
|
||||
return await openai.chat.completions.acreate(**kwargs)
|
||||
|
||||
|
||||
@aretry_with_exponential_backoff
|
||||
async def acreate_embedding_with_backoff(**kwargs):
|
||||
"""Wrapper around Embedding.acreate w/ backoff"""
|
||||
return await openai.Embedding.acreate(**kwargs)
|
||||
|
||||
client = openai.AsyncOpenAI(
|
||||
# This is the default and can be omitted
|
||||
api_key=os.environ.get("OPENAI_API_KEY"),
|
||||
)
|
||||
|
||||
return await client.embeddings.create(**kwargs)
|
||||
|
||||
|
||||
async def async_get_embedding_with_backoff(text, model="text-embedding-ada-002"):
|
||||
|
|
@ -125,17 +128,17 @@ async def async_get_embedding_with_backoff(text, model="text-embedding-ada-002")
|
|||
It specifies defaults + handles rate-limiting + is async"""
|
||||
text = text.replace("\n", " ")
|
||||
response = await acreate_embedding_with_backoff(input=[text], model=model)
|
||||
embedding = response["data"][0]["embedding"]
|
||||
embedding = response.data[0].embedding
|
||||
return embedding
|
||||
|
||||
|
||||
@retry_with_exponential_backoff
|
||||
def create_embedding_with_backoff(**kwargs):
|
||||
return openai.Embedding.create(**kwargs)
|
||||
return openai.embeddings.create(**kwargs)
|
||||
|
||||
|
||||
def get_embedding_with_backoff(text, model="text-embedding-ada-002"):
|
||||
text = text.replace("\n", " ")
|
||||
response = create_embedding_with_backoff(input=[text], model=model)
|
||||
embedding = response["data"][0]["embedding"]
|
||||
embedding = response.data[0].embedding
|
||||
return embedding
|
||||
|
|
|
|||
|
|
@ -5,11 +5,11 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
async def main():
|
||||
"""Runs as a part of startup docker scripts to create the database and tables."""
|
||||
from config import Config
|
||||
from cognitive_architecture.config import Config
|
||||
config = Config()
|
||||
config.load()
|
||||
|
||||
from database.database_manager import DatabaseManager
|
||||
from cognitive_architecture.database.database_manager import DatabaseManager
|
||||
|
||||
db_manager = DatabaseManager()
|
||||
database_name = config.db_name
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ echo "Environment: $ENVIRONMENT"
|
|||
if [ "$ENVIRONMENT" != "local" ]; then
|
||||
echo "Running fetch_secret.py"
|
||||
|
||||
python cognitive_architecture/fetch_secret.py
|
||||
PYTHONPATH=. python cognitive_architecture/fetch_secret.py
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Error: fetch_secret.py failed"
|
||||
|
|
@ -19,7 +19,7 @@ fi
|
|||
|
||||
echo "Creating database..."
|
||||
|
||||
python cognitive_architecture/setup_database.py
|
||||
PYTHONPATH=. python cognitive_architecture/setup_database.py
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Error: setup_database.py failed"
|
||||
exit 1
|
||||
|
|
|
|||
5595
poetry.lock
generated
5595
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -20,50 +20,30 @@ classifiers = [
|
|||
python = "^3.10"
|
||||
langchain = "^0.0.338"
|
||||
|
||||
nltk = "3.8.1"
|
||||
openai = "1.3.3"
|
||||
pinecone-client = "2.2.2"
|
||||
python-dotenv = "1.0.0"
|
||||
pyyaml = "6.0"
|
||||
fastapi = "0.104.1"
|
||||
uvicorn = "0.22.0"
|
||||
pexpect = "^4.8.0"
|
||||
boto3 = "^1.26.125"
|
||||
gptcache = "^0.1.22"
|
||||
gunicorn = "^20.1.0"
|
||||
tiktoken = "^0.4.0"
|
||||
spacy = "^3.5.3"
|
||||
python-jose = "^3.3.0"
|
||||
pypdf = "^3.12.0"
|
||||
fastjsonschema = "^2.18.0"
|
||||
marvin = "^1.3.0"
|
||||
dlt = { version ="^0.3.8", extras = ["duckdb"]}
|
||||
weaviate-client = "4.4b1"
|
||||
python-multipart = "^0.0.6"
|
||||
pymupdf = "^1.23.3"
|
||||
psycopg2 = "^2.9.8"
|
||||
llama-index = "^0.8.39.post2"
|
||||
llama-hub = "^0.0.34"
|
||||
sqlalchemy = "^2.0.21"
|
||||
asyncpg = "^0.28.0"
|
||||
dash = "^2.14.0"
|
||||
unstructured = {extras = ["pdf"], version = "^0.10.23"}
|
||||
sentence-transformers = "2.2.2"
|
||||
torch = "2.0.*"
|
||||
pdf2image = "^1.16.3"
|
||||
instructor = "^0.3.4"
|
||||
networkx = "^3.2.1"
|
||||
graphviz = "^0.20.1"
|
||||
greenlet = "^3.0.1"
|
||||
neo4j = "^5.14.1"
|
||||
grpcio = "^1.60.0"
|
||||
langdetect = "^1.0.9"
|
||||
iso639 = "^0.1.4"
|
||||
debugpy = "^1.8.0"
|
||||
lancedb = "^0.5.5"
|
||||
pyarrow = "^15.0.0"
|
||||
pylint = "^3.0.3"
|
||||
aiosqlite = "^0.19.0"
|
||||
qdrant-client = "^1.7.3"
|
||||
aiosqlite = "^0.20.0"
|
||||
pymupdf = "^1.23.25"
|
||||
pandas = "^2.2.1"
|
||||
greenlet = "^3.0.3"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue