Merge branch 'dev' into feat/add-pdfproloader
Signed-off-by: EricXiao <taoiaox@gmail.com>
This commit is contained in:
commit
4938ad9fe9
90 changed files with 7255 additions and 4084 deletions
|
|
@ -176,7 +176,14 @@ ENABLE_BACKEND_ACCESS_CONTROL=False
|
|||
|
||||
# Cognee Cloud API settings for syncing data to/from cloud infrastructure
|
||||
COGNEE_CLOUD_API_URL="http://localhost:8001"
|
||||
COGNEE_CLOUD_AUTH_TOKEN="your-auth-token"
|
||||
COGNEE_CLOUD_AUTH_TOKEN="your-api-key"
|
||||
|
||||
################################################################################
|
||||
# UI Settings
|
||||
################################################################################
|
||||
|
||||
# URL where the frontend is served, defaults to http://localhost:3000
|
||||
UI_APP_URL=http://localhost:3000
|
||||
|
||||
################################################################################
|
||||
# 🛠️ DEV Settings
|
||||
|
|
@ -196,6 +203,16 @@ LITELLM_LOG="ERROR"
|
|||
# DEFAULT_USER_EMAIL=""
|
||||
# DEFAULT_USER_PASSWORD=""
|
||||
|
||||
################################################################################
|
||||
# 📂 AWS Settings
|
||||
################################################################################
|
||||
|
||||
#AWS_REGION=""
|
||||
#AWS_ENDPOINT_URL=""
|
||||
#AWS_ACCESS_KEY_ID=""
|
||||
#AWS_SECRET_ACCESS_KEY=""
|
||||
#AWS_SESSION_TOKEN=""
|
||||
|
||||
------------------------------- END OF POSSIBLE SETTINGS -------------------------------
|
||||
|
||||
|
||||
|
|
|
|||
2
.github/actions/cognee_setup/action.yml
vendored
2
.github/actions/cognee_setup/action.yml
vendored
|
|
@ -24,7 +24,7 @@ runs:
|
|||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
enable-cache: true
|
||||
|
||||
|
||||
- name: Rebuild uv lockfile
|
||||
shell: bash
|
||||
run: |
|
||||
|
|
|
|||
19
.github/pull_request_template.md
vendored
19
.github/pull_request_template.md
vendored
|
|
@ -1,8 +1,8 @@
|
|||
<!-- .github/pull_request_template.md -->
|
||||
|
||||
## Description
|
||||
<!--
|
||||
Please provide a clear, human-generated description of the changes in this PR.
|
||||
<!--
|
||||
Please provide a clear, human-generated description of the changes in this PR.
|
||||
DO NOT use AI-generated descriptions. We want to understand your thought process and reasoning.
|
||||
-->
|
||||
|
||||
|
|
@ -16,15 +16,6 @@ DO NOT use AI-generated descriptions. We want to understand your thought process
|
|||
- [ ] Performance improvement
|
||||
- [ ] Other (please specify):
|
||||
|
||||
## Changes Made
|
||||
<!-- List the specific changes made in this PR -->
|
||||
-
|
||||
-
|
||||
-
|
||||
|
||||
## Testing
|
||||
<!-- Describe how you tested your changes -->
|
||||
|
||||
## Screenshots/Videos (if applicable)
|
||||
<!-- Add screenshots or videos to help explain your changes -->
|
||||
|
||||
|
|
@ -40,11 +31,5 @@ DO NOT use AI-generated descriptions. We want to understand your thought process
|
|||
- [ ] I have linked any relevant issues in the description
|
||||
- [ ] My commits have clear and descriptive messages
|
||||
|
||||
## Related Issues
|
||||
<!-- Link any related issues using "Fixes #issue_number" or "Relates to #issue_number" -->
|
||||
|
||||
## Additional Notes
|
||||
<!-- Add any additional notes, concerns, or context for reviewers -->
|
||||
|
||||
## DCO Affirmation
|
||||
I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
|
||||
|
|
|
|||
1
.github/workflows/basic_tests.yml
vendored
1
.github/workflows/basic_tests.yml
vendored
|
|
@ -188,6 +188,7 @@ jobs:
|
|||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
extra-dependencies: "baml"
|
||||
|
||||
- name: Run Simple Examples
|
||||
run: uv run python ./examples/python/simple_example.py
|
||||
|
|
|
|||
3
.github/workflows/e2e_tests.yml
vendored
3
.github/workflows/e2e_tests.yml
vendored
|
|
@ -166,9 +166,6 @@ jobs:
|
|||
python-version: '3.11.x'
|
||||
extra-dependencies: "aws"
|
||||
|
||||
- name: Dependencies already installed
|
||||
run: echo "Dependencies already installed in setup"
|
||||
|
||||
- name: Run S3 Bucket Test
|
||||
env:
|
||||
ENV: 'dev'
|
||||
|
|
|
|||
28
.github/workflows/examples_tests.yml
vendored
28
.github/workflows/examples_tests.yml
vendored
|
|
@ -1,5 +1,8 @@
|
|||
name: Reusable Examples Tests
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
|
||||
|
|
@ -131,3 +134,28 @@ jobs:
|
|||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./examples/python/memify_coding_agent_example.py
|
||||
|
||||
test-permissions-example:
|
||||
name: Run Permissions Example
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Run Memify Tests
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./examples/python/permissions_example.py
|
||||
|
|
|
|||
1
.github/workflows/test_s3_file_storage.yml
vendored
1
.github/workflows/test_s3_file_storage.yml
vendored
|
|
@ -18,6 +18,7 @@ jobs:
|
|||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
extra-dependencies: "aws"
|
||||
|
||||
- name: Run S3 File Storage Test
|
||||
env:
|
||||
|
|
|
|||
27
.github/workflows/vector_db_tests.yml
vendored
27
.github/workflows/vector_db_tests.yml
vendored
|
|
@ -101,3 +101,30 @@ jobs:
|
|||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_pgvector.py
|
||||
|
||||
run-lancedb-tests:
|
||||
name: LanceDB Tests
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- name: Run LanceDB Tests
|
||||
env:
|
||||
ENV: 'dev'
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||
run: uv run python ./cognee/tests/test_lancedb.py
|
||||
130
README.md
130
README.md
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
<br />
|
||||
|
||||
cognee - Memory for AI Agents in 5 lines of code
|
||||
cognee - Memory for AI Agents in 6 lines of code
|
||||
|
||||
<p align="center">
|
||||
<a href="https://www.youtube.com/watch?v=1bezuvLwJmw&t=2s">Demo</a>
|
||||
|
|
@ -43,12 +43,10 @@
|
|||
|
||||
|
||||
|
||||
**🚀 We launched Cogwit beta (Fully-hosted AI Memory): Sign up [here](https://platform.cognee.ai/)! 🚀**
|
||||
|
||||
|
||||
Build dynamic memory for Agents and replace RAG using scalable, modular ECL (Extract, Cognify, Load) pipelines.
|
||||
|
||||
More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github.com/topoteretes/cognee/tree/main/evals)
|
||||
|
||||
<p align="center">
|
||||
🌐 Available Languages
|
||||
:
|
||||
|
|
@ -70,53 +68,47 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
|
|||
</div>
|
||||
|
||||
|
||||
## Features
|
||||
|
||||
- Interconnect and retrieve your past conversations, documents, images and audio transcriptions
|
||||
- Replaces RAG systems and reduces developer effort, and cost.
|
||||
- Load data to graph and vector databases using only Pydantic
|
||||
- Manipulate your data while ingesting from 30+ data sources
|
||||
|
||||
## Get Started
|
||||
|
||||
Get started quickly with a Google Colab <a href="https://colab.research.google.com/drive/1jHbWVypDgCLwjE71GSXhRL3YxYhCZzG1?usp=sharing">notebook</a> , <a href="https://deepnote.com/workspace/cognee-382213d0-0444-4c89-8265-13770e333c02/project/cognee-demo-78ffacb9-5832-4611-bb1a-560386068b30/notebook/Notebook-1-75b24cda566d4c24ab348f7150792601?utm_source=share-modal&utm_medium=product-shared-content&utm_campaign=notebook&utm_content=78ffacb9-5832-4611-bb1a-560386068b30">Deepnote notebook</a> or <a href="https://github.com/topoteretes/cognee/tree/main/cognee-starter-kit">starter repo</a>
|
||||
|
||||
|
||||
## About cognee
|
||||
|
||||
Self-hosted package:
|
||||
|
||||
## Contributing
|
||||
Your contributions are at the core of making this a true open source project. Any contributions you make are **greatly appreciated**. See [`CONTRIBUTING.md`](CONTRIBUTING.md) for more information.
|
||||
- Interconnects any kind of documents: past conversations, files, images, and audio transcriptions
|
||||
- Replaces RAG systems with a memory layer based on graphs and vectors
|
||||
- Reduces developer effort and cost, while increasing quality and precision
|
||||
- Provides Pythonic data pipelines that manage data ingestion from 30+ data sources
|
||||
- Is highly customizable with custom tasks, pipelines, and a set of built-in search endpoints
|
||||
|
||||
Hosted platform:
|
||||
- Includes a managed UI and a [hosted solution](https://www.cognee.ai)
|
||||
|
||||
|
||||
|
||||
## Self-Hosted (Open Source)
|
||||
|
||||
|
||||
## 📦 Installation
|
||||
### 📦 Installation
|
||||
|
||||
You can install Cognee using either **pip**, **poetry**, **uv** or any other python package manager.
|
||||
|
||||
Cognee supports Python 3.10 to 3.13
|
||||
Cognee supports Python 3.10 to 3.12
|
||||
|
||||
### With pip
|
||||
#### With uv
|
||||
|
||||
```bash
|
||||
pip install cognee
|
||||
uv pip install cognee
|
||||
```
|
||||
|
||||
## Local Cognee installation
|
||||
Detailed instructions can be found in our [docs](https://docs.cognee.ai/getting-started/installation#environment-configuration)
|
||||
|
||||
You can install the local Cognee repo using **uv**, **pip** and **poetry**.
|
||||
For local pip installation please make sure your pip version is above version 21.3.
|
||||
### 💻 Basic Usage
|
||||
|
||||
### with UV with all optional dependencies
|
||||
|
||||
```bash
|
||||
uv sync --all-extras
|
||||
```
|
||||
|
||||
## 💻 Basic Usage
|
||||
|
||||
### Setup
|
||||
#### Setup
|
||||
|
||||
```
|
||||
import os
|
||||
|
|
@ -125,10 +117,14 @@ os.environ["LLM_API_KEY"] = "YOUR OPENAI_API_KEY"
|
|||
```
|
||||
|
||||
You can also set the variables by creating .env file, using our <a href="https://github.com/topoteretes/cognee/blob/main/.env.template">template.</a>
|
||||
To use different LLM providers, for more info check out our <a href="https://docs.cognee.ai">documentation</a>
|
||||
To use different LLM providers, for more info check out our <a href="https://docs.cognee.ai/setup-configuration/llm-providers">documentation</a>
|
||||
|
||||
|
||||
### Simple example
|
||||
#### Simple example
|
||||
|
||||
|
||||
|
||||
##### Python
|
||||
|
||||
This script will run the default pipeline:
|
||||
|
||||
|
|
@ -139,13 +135,16 @@ import asyncio
|
|||
|
||||
async def main():
|
||||
# Add text to cognee
|
||||
await cognee.add("Natural language processing (NLP) is an interdisciplinary subfield of computer science and information retrieval.")
|
||||
await cognee.add("Cognee turns documents into AI memory.")
|
||||
|
||||
# Generate the knowledge graph
|
||||
await cognee.cognify()
|
||||
|
||||
# Add memory algorithms to the graph
|
||||
await cognee.memify()
|
||||
|
||||
# Query the knowledge graph
|
||||
results = await cognee.search("Tell me about NLP")
|
||||
results = await cognee.search("What does cognee do?")
|
||||
|
||||
# Display the results
|
||||
for result in results:
|
||||
|
|
@ -158,33 +157,38 @@ if __name__ == '__main__':
|
|||
```
|
||||
Example output:
|
||||
```
|
||||
Natural Language Processing (NLP) is a cross-disciplinary and interdisciplinary field that involves computer science and information retrieval. It focuses on the interaction between computers and human language, enabling machines to understand and process natural language.
|
||||
Cognee turns documents into AI memory.
|
||||
|
||||
```
|
||||
##### Via CLI
|
||||
|
||||
## Our paper is out! <a href="https://arxiv.org/abs/2505.24478" target="_blank" rel="noopener noreferrer">Read here</a>
|
||||
Let's get the basics covered
|
||||
|
||||
```
|
||||
cognee-cli add "Cognee turns documents into AI memory."
|
||||
|
||||
cognee-cli cognify
|
||||
|
||||
cognee-cli search "What does cognee do?"
|
||||
cognee-cli delete --all
|
||||
|
||||
```
|
||||
or run
|
||||
```
|
||||
cognee-cli -ui
|
||||
```
|
||||
|
||||
<div style="text-align: center">
|
||||
<img src="assets/cognee-paper.png" alt="cognee paper" width="100%" />
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
## Cognee UI
|
||||
|
||||
You can also cognify your files and query using cognee UI.
|
||||
### Hosted Platform
|
||||
|
||||
<img src="assets/cognee-new-ui.webp" width="100%" alt="Cognee UI 2"></a>
|
||||
Get up and running in minutes with automatic updates, analytics, and enterprise security.
|
||||
|
||||
### Running the UI
|
||||
1. Sign up on [cogwit](https://www.cognee.ai)
|
||||
2. Add your API key to local UI and sync your data to Cogwit
|
||||
|
||||
Try cognee UI by setting LLM_API_KEY and running ``` cognee-cli -ui ``` command on your terminal.
|
||||
|
||||
## Understand our architecture
|
||||
|
||||
<div style="text-align: center">
|
||||
<img src="assets/cognee_diagram.png" alt="cognee concept diagram" width="100%" />
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
|
@ -203,22 +207,26 @@ Try cognee UI by setting LLM_API_KEY and running ``` cognee-cli -ui ``` command
|
|||
[cognee with local models](https://github.com/user-attachments/assets/8621d3e8-ecb8-4860-afb2-5594f2ee17db)
|
||||
|
||||
|
||||
## Contributing
|
||||
Your contributions are at the core of making this a true open source project. Any contributions you make are **greatly appreciated**. See [`CONTRIBUTING.md`](CONTRIBUTING.md) for more information.
|
||||
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
We are committed to making open source an enjoyable and respectful experience for our community. See <a href="https://github.com/topoteretes/cognee/blob/main/CODE_OF_CONDUCT.md"><code>CODE_OF_CONDUCT</code></a> for more information.
|
||||
|
||||
## 💫 Contributors
|
||||
## Citation
|
||||
|
||||
<a href="https://github.com/topoteretes/cognee/graphs/contributors">
|
||||
<img alt="contributors" src="https://contrib.rocks/image?repo=topoteretes/cognee"/>
|
||||
</a>
|
||||
We now have a paper you can cite:
|
||||
|
||||
## Sponsors
|
||||
|
||||
Thanks to the following companies for sponsoring the ongoing development of cognee.
|
||||
|
||||
- [GitHub's Secure Open Source Fund](https://resources.github.com/github-secure-open-source-fund/)
|
||||
|
||||
## Star History
|
||||
|
||||
[](https://star-history.com/#topoteretes/cognee&Date)
|
||||
```bibtex
|
||||
@misc{markovic2025optimizinginterfaceknowledgegraphs,
|
||||
title={Optimizing the Interface Between Knowledge Graphs and LLMs for Complex Reasoning},
|
||||
author={Vasilije Markovic and Lazar Obradovic and Laszlo Hajdu and Jovan Pavlovic},
|
||||
year={2025},
|
||||
eprint={2505.24478},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.AI},
|
||||
url={https://arxiv.org/abs/2505.24478},
|
||||
}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -3,10 +3,18 @@
|
|||
import classNames from "classnames";
|
||||
import { MutableRefObject, useEffect, useImperativeHandle, useRef, useState, useCallback } from "react";
|
||||
import { forceCollide, forceManyBody } from "d3-force-3d";
|
||||
import ForceGraph, { ForceGraphMethods, GraphData, LinkObject, NodeObject } from "react-force-graph-2d";
|
||||
import dynamic from "next/dynamic";
|
||||
import { GraphControlsAPI } from "./GraphControls";
|
||||
import getColorForNodeType from "./getColorForNodeType";
|
||||
|
||||
// Dynamically import ForceGraph to prevent SSR issues
|
||||
const ForceGraph = dynamic(() => import("react-force-graph-2d"), {
|
||||
ssr: false,
|
||||
loading: () => <div className="w-full h-full flex items-center justify-center">Loading graph...</div>
|
||||
});
|
||||
|
||||
import type { ForceGraphMethods, GraphData, LinkObject, NodeObject } from "react-force-graph-2d";
|
||||
|
||||
interface GraphVisuzaliationProps {
|
||||
ref: MutableRefObject<GraphVisualizationAPI>;
|
||||
data?: GraphData<NodeObject, LinkObject>;
|
||||
|
|
@ -200,7 +208,7 @@ export default function GraphVisualization({ ref, data, graphControls, className
|
|||
const graphRef = useRef<ForceGraphMethods>();
|
||||
|
||||
useEffect(() => {
|
||||
if (typeof window !== "undefined" && data && graphRef.current) {
|
||||
if (data && graphRef.current) {
|
||||
// add collision force
|
||||
graphRef.current.d3Force("collision", forceCollide(nodeSize * 1.5));
|
||||
graphRef.current.d3Force("charge", forceManyBody().strength(-10).distanceMin(10).distanceMax(50));
|
||||
|
|
@ -216,56 +224,34 @@ export default function GraphVisualization({ ref, data, graphControls, className
|
|||
|
||||
return (
|
||||
<div ref={containerRef} className={classNames("w-full h-full", className)} id="graph-container">
|
||||
{(data && typeof window !== "undefined") ? (
|
||||
<ForceGraph
|
||||
ref={graphRef}
|
||||
width={dimensions.width}
|
||||
height={dimensions.height}
|
||||
dagMode={graphShape as unknown as undefined}
|
||||
dagLevelDistance={300}
|
||||
onDagError={handleDagError}
|
||||
graphData={data}
|
||||
<ForceGraph
|
||||
ref={graphRef}
|
||||
width={dimensions.width}
|
||||
height={dimensions.height}
|
||||
dagMode={graphShape as unknown as undefined}
|
||||
dagLevelDistance={data ? 300 : 100}
|
||||
onDagError={handleDagError}
|
||||
graphData={data || {
|
||||
nodes: [{ id: 1, label: "Add" }, { id: 2, label: "Cognify" }, { id: 3, label: "Search" }],
|
||||
links: [{ source: 1, target: 2, label: "but don't forget to" }, { source: 2, target: 3, label: "and after that you can" }],
|
||||
}}
|
||||
|
||||
nodeLabel="label"
|
||||
nodeRelSize={nodeSize}
|
||||
nodeCanvasObject={renderNode}
|
||||
nodeCanvasObjectMode={() => "replace"}
|
||||
nodeLabel="label"
|
||||
nodeRelSize={data ? nodeSize : 20}
|
||||
nodeCanvasObject={data ? renderNode : renderInitialNode}
|
||||
nodeCanvasObjectMode={() => data ? "replace" : "after"}
|
||||
nodeAutoColorBy={data ? undefined : "type"}
|
||||
|
||||
linkLabel="label"
|
||||
linkCanvasObject={renderLink}
|
||||
linkCanvasObjectMode={() => "after"}
|
||||
linkDirectionalArrowLength={3.5}
|
||||
linkDirectionalArrowRelPos={1}
|
||||
linkLabel="label"
|
||||
linkCanvasObject={renderLink}
|
||||
linkCanvasObjectMode={() => "after"}
|
||||
linkDirectionalArrowLength={3.5}
|
||||
linkDirectionalArrowRelPos={1}
|
||||
|
||||
onNodeClick={handleNodeClick}
|
||||
onBackgroundClick={handleBackgroundClick}
|
||||
d3VelocityDecay={0.3}
|
||||
/>
|
||||
) : (
|
||||
<ForceGraph
|
||||
ref={graphRef}
|
||||
width={dimensions.width}
|
||||
height={dimensions.height}
|
||||
dagMode={graphShape as unknown as undefined}
|
||||
dagLevelDistance={100}
|
||||
graphData={{
|
||||
nodes: [{ id: 1, label: "Add" }, { id: 2, label: "Cognify" }, { id: 3, label: "Search" }],
|
||||
links: [{ source: 1, target: 2, label: "but don't forget to" }, { source: 2, target: 3, label: "and after that you can" }],
|
||||
}}
|
||||
|
||||
nodeLabel="label"
|
||||
nodeRelSize={20}
|
||||
nodeCanvasObject={renderInitialNode}
|
||||
nodeCanvasObjectMode={() => "after"}
|
||||
nodeAutoColorBy="type"
|
||||
|
||||
linkLabel="label"
|
||||
linkCanvasObject={renderLink}
|
||||
linkCanvasObjectMode={() => "after"}
|
||||
linkDirectionalArrowLength={3.5}
|
||||
linkDirectionalArrowRelPos={1}
|
||||
/>
|
||||
)}
|
||||
onNodeClick={handleNodeClick}
|
||||
onBackgroundClick={handleBackgroundClick}
|
||||
d3VelocityDecay={data ? 0.3 : undefined}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,10 +2,11 @@
|
|||
|
||||
import Link from "next/link";
|
||||
import Image from "next/image";
|
||||
import { useBoolean } from "@/utils";
|
||||
import { useEffect } from "react";
|
||||
import { useBoolean, fetch } from "@/utils";
|
||||
|
||||
import { CloseIcon, CloudIcon, CogneeIcon } from "../Icons";
|
||||
import { CTAButton, GhostButton, IconButton, Modal } from "../elements";
|
||||
import { CTAButton, GhostButton, IconButton, Modal, StatusDot } from "../elements";
|
||||
import syncData from "@/modules/cloud/syncData";
|
||||
|
||||
interface HeaderProps {
|
||||
|
|
@ -23,6 +24,12 @@ export default function Header({ user }: HeaderProps) {
|
|||
setFalse: closeSyncModal,
|
||||
} = useBoolean(false);
|
||||
|
||||
const {
|
||||
value: isMCPConnected,
|
||||
setTrue: setMCPConnected,
|
||||
setFalse: setMCPDisconnected,
|
||||
} = useBoolean(false);
|
||||
|
||||
const handleDataSyncConfirm = () => {
|
||||
syncData()
|
||||
.finally(() => {
|
||||
|
|
@ -30,6 +37,19 @@ export default function Header({ user }: HeaderProps) {
|
|||
});
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const checkMCPConnection = () => {
|
||||
fetch.checkMCPHealth()
|
||||
.then(() => setMCPConnected())
|
||||
.catch(() => setMCPDisconnected());
|
||||
};
|
||||
|
||||
checkMCPConnection();
|
||||
const interval = setInterval(checkMCPConnection, 30000);
|
||||
|
||||
return () => clearInterval(interval);
|
||||
}, [setMCPConnected, setMCPDisconnected]);
|
||||
|
||||
return (
|
||||
<>
|
||||
<header className="relative flex flex-row h-14 min-h-14 px-5 items-center justify-between w-full max-w-[1920px] mx-auto">
|
||||
|
|
@ -39,6 +59,10 @@ export default function Header({ user }: HeaderProps) {
|
|||
</div>
|
||||
|
||||
<div className="flex flex-row items-center gap-2.5">
|
||||
<Link href="/mcp-status" className="!text-indigo-600 pl-4 pr-4">
|
||||
<StatusDot className="mr-2" isActive={isMCPConnected} />
|
||||
{ isMCPConnected ? "MCP connected" : "MCP disconnected" }
|
||||
</Link>
|
||||
<GhostButton onClick={openSyncModal} className="text-indigo-600 gap-3 pl-4 pr-4">
|
||||
<CloudIcon />
|
||||
<div>Sync</div>
|
||||
|
|
|
|||
13
cognee-frontend/src/ui/elements/StatusDot.tsx
Normal file
13
cognee-frontend/src/ui/elements/StatusDot.tsx
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
import React from "react";
|
||||
|
||||
const StatusDot = ({ isActive, className }: { isActive: boolean, className?: string }) => {
|
||||
return (
|
||||
<span
|
||||
className={`inline-block w-3 h-3 rounded-full ${className} ${
|
||||
isActive ? "bg-green-500" : "bg-red-500"
|
||||
}`}
|
||||
/>
|
||||
);
|
||||
};
|
||||
|
||||
export default StatusDot;
|
||||
|
|
@ -8,5 +8,6 @@ export { default as IconButton } from "./IconButton";
|
|||
export { default as GhostButton } from "./GhostButton";
|
||||
export { default as NeutralButton } from "./NeutralButton";
|
||||
export { default as StatusIndicator } from "./StatusIndicator";
|
||||
export { default as StatusDot } from "./StatusDot";
|
||||
export { default as Accordion } from "./Accordion";
|
||||
export { default as Notebook } from "./Notebook";
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ const backendApiUrl = process.env.NEXT_PUBLIC_BACKEND_API_URL || "http://localho
|
|||
|
||||
const cloudApiUrl = process.env.NEXT_PUBLIC_CLOUD_API_URL || "http://localhost:8001";
|
||||
|
||||
const mcpApiUrl = process.env.NEXT_PUBLIC_MCP_API_URL || "http://localhost:8001";
|
||||
|
||||
let apiKey: string | null = process.env.NEXT_PUBLIC_COGWIT_API_KEY || null;
|
||||
let accessToken: string | null = null;
|
||||
|
||||
|
|
@ -49,6 +51,13 @@ export default async function fetch(url: string, options: RequestInit = {}, useC
|
|||
)
|
||||
.then((response) => handleServerErrors(response, retry, useCloud))
|
||||
.catch((error) => {
|
||||
// Handle network errors more gracefully
|
||||
if (error.name === 'TypeError' && error.message.includes('fetch')) {
|
||||
return Promise.reject(
|
||||
new Error("Backend server is not responding. Please check if the server is running.")
|
||||
);
|
||||
}
|
||||
|
||||
if (error.detail === undefined) {
|
||||
return Promise.reject(
|
||||
new Error("No connection to the server.")
|
||||
|
|
@ -62,8 +71,31 @@ export default async function fetch(url: string, options: RequestInit = {}, useC
|
|||
});
|
||||
}
|
||||
|
||||
fetch.checkHealth = () => {
|
||||
return global.fetch(`${backendApiUrl.replace("/api", "")}/health`);
|
||||
fetch.checkHealth = async () => {
|
||||
const maxRetries = 5;
|
||||
const retryDelay = 1000; // 1 second
|
||||
|
||||
for (let i = 0; i < maxRetries; i++) {
|
||||
try {
|
||||
const response = await global.fetch(`${backendApiUrl.replace("/api", "")}/health`);
|
||||
if (response.ok) {
|
||||
return response;
|
||||
}
|
||||
} catch (error) {
|
||||
// If this is the last retry, throw the error
|
||||
if (i === maxRetries - 1) {
|
||||
throw error;
|
||||
}
|
||||
// Wait before retrying
|
||||
await new Promise(resolve => setTimeout(resolve, retryDelay));
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error("Backend server is not responding after multiple attempts");
|
||||
};
|
||||
|
||||
fetch.checkMCPHealth = () => {
|
||||
return global.fetch(`${mcpApiUrl.replace("/api", "")}/health`);
|
||||
};
|
||||
|
||||
fetch.setApiKey = (newApiKey: string) => {
|
||||
|
|
|
|||
153
cognee-gui.py
153
cognee-gui.py
|
|
@ -1,153 +0,0 @@
|
|||
import sys
|
||||
import asyncio
|
||||
|
||||
try:
|
||||
import cognee
|
||||
from PySide6.QtWidgets import (
|
||||
QApplication,
|
||||
QWidget,
|
||||
QPushButton,
|
||||
QLineEdit,
|
||||
QFileDialog,
|
||||
QVBoxLayout,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QMessageBox,
|
||||
QTextEdit,
|
||||
QProgressDialog,
|
||||
)
|
||||
from PySide6.QtCore import Qt
|
||||
|
||||
from qasync import QEventLoop # Import QEventLoop from qasync
|
||||
except ImportError as e:
|
||||
print(
|
||||
"\nPlease install Cognee with optional gui dependencies or manually install missing dependencies.\n"
|
||||
)
|
||||
print("\nTo install with poetry use:")
|
||||
print("\npoetry install -E gui\n")
|
||||
print("\nOr to install with poetry and all dependencies use:")
|
||||
print("\npoetry install --all-extras\n")
|
||||
print("\nTo install with pip use: ")
|
||||
print('\npip install ".[gui]"\n')
|
||||
raise e
|
||||
|
||||
|
||||
class FileSearchApp(QWidget):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.selected_file = None
|
||||
self.init_ui()
|
||||
|
||||
def init_ui(self):
|
||||
# Horizontal layout for file upload and visualization buttons
|
||||
button_layout = QHBoxLayout()
|
||||
|
||||
# Button to open file dialog
|
||||
self.file_button = QPushButton("Upload File to Cognee", parent=self)
|
||||
self.file_button.clicked.connect(self.open_file_dialog)
|
||||
button_layout.addWidget(self.file_button)
|
||||
|
||||
# Button to visualize data
|
||||
self.visualize_button = QPushButton("Visualize Data", parent=self)
|
||||
self.visualize_button.clicked.connect(lambda: asyncio.ensure_future(self.visualize_data()))
|
||||
button_layout.addWidget(self.visualize_button)
|
||||
|
||||
# Label to display selected file path
|
||||
self.file_label = QLabel("No file selected", parent=self)
|
||||
|
||||
# Line edit for search input
|
||||
self.search_input = QLineEdit(parent=self)
|
||||
self.search_input.setPlaceholderText("Enter text to search...")
|
||||
|
||||
# Button to perform search; schedule the async search on click
|
||||
self.search_button = QPushButton("Cognee Search", parent=self)
|
||||
self.search_button.clicked.connect(lambda: asyncio.ensure_future(self._cognee_search()))
|
||||
|
||||
# Text output area for search results
|
||||
self.result_output = QTextEdit(parent=self)
|
||||
self.result_output.setReadOnly(True)
|
||||
self.result_output.setPlaceholderText("Search results will appear here...")
|
||||
|
||||
# Progress dialog
|
||||
self.progress_dialog = QProgressDialog("Processing..", None, 0, 0, parent=self)
|
||||
self.progress_dialog.setWindowModality(Qt.WindowModal)
|
||||
self.progress_dialog.setCancelButton(None) # Remove the cancel button
|
||||
self.progress_dialog.close()
|
||||
|
||||
# Layout setup
|
||||
layout = QVBoxLayout()
|
||||
layout.addLayout(button_layout)
|
||||
layout.addWidget(self.file_label)
|
||||
layout.addWidget(self.search_input)
|
||||
layout.addWidget(self.search_button)
|
||||
layout.addWidget(self.result_output)
|
||||
|
||||
self.setLayout(layout)
|
||||
self.setWindowTitle("Cognee")
|
||||
self.resize(500, 300)
|
||||
|
||||
def open_file_dialog(self):
|
||||
file_path, _ = QFileDialog.getOpenFileName(
|
||||
self, "Select a File", "", "All Files (*.*);;Text Files (*.txt)"
|
||||
)
|
||||
if file_path:
|
||||
self.selected_file = file_path
|
||||
self.file_label.setText(f"Selected: {file_path}")
|
||||
asyncio.ensure_future(self.process_file_async())
|
||||
|
||||
async def process_file_async(self):
|
||||
"""Asynchronously add and process the selected file."""
|
||||
# Disable the entire window
|
||||
self.progress_dialog.show()
|
||||
self.setEnabled(False)
|
||||
try:
|
||||
await cognee.add(self.selected_file)
|
||||
await cognee.cognify()
|
||||
except Exception as e:
|
||||
QMessageBox.critical(self, "Error", f"File processing failed: {str(e)}")
|
||||
# Once finished, re-enable the window
|
||||
self.setEnabled(True)
|
||||
self.progress_dialog.close()
|
||||
|
||||
async def _cognee_search(self):
|
||||
"""Performs an async search and updates the result output."""
|
||||
# Disable the entire window
|
||||
self.setEnabled(False)
|
||||
self.progress_dialog.show()
|
||||
|
||||
try:
|
||||
search_text = self.search_input.text().strip()
|
||||
result = await cognee.search(query_text=search_text)
|
||||
print(result)
|
||||
# Assuming result is a list-like object; adjust if necessary
|
||||
self.result_output.setText(result[0])
|
||||
except Exception as e:
|
||||
QMessageBox.critical(self, "Error", f"Search failed: {str(e)}")
|
||||
|
||||
# Once finished, re-enable the window
|
||||
self.setEnabled(True)
|
||||
self.progress_dialog.close()
|
||||
|
||||
async def visualize_data(self):
|
||||
"""Async slot for handling visualize data button press."""
|
||||
import webbrowser
|
||||
from cognee.api.v1.visualize.visualize import visualize_graph
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
html_file = os.path.join(pathlib.Path(__file__).parent, ".data", "graph_visualization.html")
|
||||
await visualize_graph(html_file)
|
||||
webbrowser.open(f"file://{html_file}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app = QApplication(sys.argv)
|
||||
# Create a qasync event loop and set it as the current event loop
|
||||
loop = QEventLoop(app)
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
window = FileSearchApp()
|
||||
window.show()
|
||||
|
||||
with loop:
|
||||
loop.run_forever()
|
||||
|
|
@ -48,27 +48,27 @@ if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
|
|||
if [ "$DEBUG" = "true" ]; then
|
||||
echo "Waiting for the debugger to attach..."
|
||||
if [ "$TRANSPORT_MODE" = "sse" ]; then
|
||||
exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee-mcp --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
elif [ "$TRANSPORT_MODE" = "http" ]; then
|
||||
exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee-mcp --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
else
|
||||
exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee --transport stdio --no-migration
|
||||
exec python -m debugpy --wait-for-client --listen 0.0.0.0:$DEBUG_PORT -m cognee-mcp --transport stdio --no-migration
|
||||
fi
|
||||
else
|
||||
if [ "$TRANSPORT_MODE" = "sse" ]; then
|
||||
exec cognee --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
exec cognee-mcp --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
elif [ "$TRANSPORT_MODE" = "http" ]; then
|
||||
exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
exec cognee-mcp --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
else
|
||||
exec cognee --transport stdio --no-migration
|
||||
exec cognee-mcp --transport stdio --no-migration
|
||||
fi
|
||||
fi
|
||||
else
|
||||
if [ "$TRANSPORT_MODE" = "sse" ]; then
|
||||
exec cognee --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
exec cognee-mcp --transport sse --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
elif [ "$TRANSPORT_MODE" = "http" ]; then
|
||||
exec cognee --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
exec cognee-mcp --transport http --host 0.0.0.0 --port $HTTP_PORT --no-migration
|
||||
else
|
||||
exec cognee --transport stdio --no-migration
|
||||
exec cognee-mcp --transport stdio --no-migration
|
||||
fi
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ requires-python = ">=3.10"
|
|||
dependencies = [
|
||||
# For local cognee repo usage remove comment bellow and add absolute path to cognee. Then run `uv sync --reinstall` in the mcp folder on local cognee changes.
|
||||
#"cognee[postgres,codegraph,gemini,huggingface,docs,neo4j] @ file:/Users/igorilic/Desktop/cognee",
|
||||
"cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.3.2",
|
||||
"cognee[postgres,codegraph,gemini,huggingface,docs,neo4j]==0.3.4",
|
||||
"fastmcp>=2.10.0,<3.0.0",
|
||||
"mcp>=1.12.0,<2.0.0",
|
||||
"uv>=0.6.3,<1.0.0",
|
||||
|
|
@ -36,4 +36,4 @@ dev = [
|
|||
allow-direct-references = true
|
||||
|
||||
[project.scripts]
|
||||
cognee = "src:main"
|
||||
cognee-mcp = "src:main"
|
||||
|
|
|
|||
|
|
@ -19,6 +19,10 @@ from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
|
|||
from cognee.modules.search.types import SearchType
|
||||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.modules.storage.utils import JSONEncoder
|
||||
from starlette.responses import JSONResponse
|
||||
from starlette.middleware import Middleware
|
||||
from starlette.middleware.cors import CORSMiddleware
|
||||
import uvicorn
|
||||
|
||||
|
||||
try:
|
||||
|
|
@ -38,6 +42,53 @@ mcp = FastMCP("Cognee")
|
|||
logger = get_logger()
|
||||
|
||||
|
||||
async def run_sse_with_cors():
|
||||
"""Custom SSE transport with CORS middleware."""
|
||||
sse_app = mcp.sse_app()
|
||||
sse_app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["http://localhost:3000"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["GET"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
config = uvicorn.Config(
|
||||
sse_app,
|
||||
host=mcp.settings.host,
|
||||
port=mcp.settings.port,
|
||||
log_level=mcp.settings.log_level.lower(),
|
||||
)
|
||||
server = uvicorn.Server(config)
|
||||
await server.serve()
|
||||
|
||||
|
||||
async def run_http_with_cors():
|
||||
"""Custom HTTP transport with CORS middleware."""
|
||||
http_app = mcp.streamable_http_app()
|
||||
http_app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["http://localhost:3000"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["GET"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
config = uvicorn.Config(
|
||||
http_app,
|
||||
host=mcp.settings.host,
|
||||
port=mcp.settings.port,
|
||||
log_level=mcp.settings.log_level.lower(),
|
||||
)
|
||||
server = uvicorn.Server(config)
|
||||
await server.serve()
|
||||
|
||||
|
||||
@mcp.custom_route("/health", methods=["GET"])
|
||||
async def health_check(request):
|
||||
return JSONResponse({"status": "ok"})
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def cognee_add_developer_rules(
|
||||
base_path: str = ".", graph_model_file: str = None, graph_model_name: str = None
|
||||
|
|
@ -975,12 +1026,12 @@ async def main():
|
|||
await mcp.run_stdio_async()
|
||||
elif args.transport == "sse":
|
||||
logger.info(f"Running MCP server with SSE transport on {args.host}:{args.port}")
|
||||
await mcp.run_sse_async()
|
||||
await run_sse_with_cors()
|
||||
elif args.transport == "http":
|
||||
logger.info(
|
||||
f"Running MCP server with Streamable HTTP transport on {args.host}:{args.port}{args.path}"
|
||||
)
|
||||
await mcp.run_streamable_http_async()
|
||||
await run_http_with_cors()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
3529
cognee-mcp/uv.lock
generated
3529
cognee-mcp/uv.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -3,7 +3,6 @@
|
|||
import os
|
||||
|
||||
import uvicorn
|
||||
import sentry_sdk
|
||||
from traceback import format_exc
|
||||
from contextlib import asynccontextmanager
|
||||
from fastapi import Request
|
||||
|
|
@ -29,6 +28,7 @@ from cognee.api.v1.add.routers import get_add_router
|
|||
from cognee.api.v1.delete.routers import get_delete_router
|
||||
from cognee.api.v1.responses.routers import get_responses_router
|
||||
from cognee.api.v1.sync.routers import get_sync_router
|
||||
from cognee.api.v1.update.routers import get_update_router
|
||||
from cognee.api.v1.users.routers import (
|
||||
get_auth_router,
|
||||
get_register_router,
|
||||
|
|
@ -42,11 +42,18 @@ from cognee.modules.users.methods.get_authenticated_user import REQUIRE_AUTHENTI
|
|||
logger = get_logger()
|
||||
|
||||
if os.getenv("ENV", "prod") == "prod":
|
||||
sentry_sdk.init(
|
||||
dsn=os.getenv("SENTRY_REPORTING_URL"),
|
||||
traces_sample_rate=1.0,
|
||||
profiles_sample_rate=1.0,
|
||||
)
|
||||
try:
|
||||
import sentry_sdk
|
||||
|
||||
sentry_sdk.init(
|
||||
dsn=os.getenv("SENTRY_REPORTING_URL"),
|
||||
traces_sample_rate=1.0,
|
||||
profiles_sample_rate=1.0,
|
||||
)
|
||||
except ImportError:
|
||||
logger.info(
|
||||
"Sentry SDK not available. Install with 'pip install cognee\"[monitoring]\"' to enable error monitoring."
|
||||
)
|
||||
|
||||
|
||||
app_environment = os.getenv("ENV", "prod")
|
||||
|
|
@ -81,7 +88,7 @@ if CORS_ALLOWED_ORIGINS:
|
|||
]
|
||||
else:
|
||||
allowed_origins = [
|
||||
"http://localhost:3000",
|
||||
os.getenv("UI_APP_URL", "http://localhost:3000"),
|
||||
] # Block all except explicitly set origins
|
||||
|
||||
app.add_middleware(
|
||||
|
|
@ -257,6 +264,8 @@ app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["vi
|
|||
|
||||
app.include_router(get_delete_router(), prefix="/api/v1/delete", tags=["delete"])
|
||||
|
||||
app.include_router(get_update_router(), prefix="/api/v1/update", tags=["update"])
|
||||
|
||||
app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["responses"])
|
||||
|
||||
app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])
|
||||
|
|
|
|||
|
|
@ -194,7 +194,7 @@ class HealthChecker:
|
|||
config = get_llm_config()
|
||||
|
||||
# Test actual API connection with minimal request
|
||||
LLMGateway.show_prompt("test", "test")
|
||||
LLMGateway.show_prompt("test", "test.txt")
|
||||
|
||||
response_time = int((time.time() - start_time) * 1000)
|
||||
return ComponentHealth(
|
||||
|
|
|
|||
|
|
@ -20,4 +20,4 @@ def get_checks_router():
|
|||
|
||||
return await check_api_key(api_token)
|
||||
|
||||
return router
|
||||
return router
|
||||
|
|
@ -94,9 +94,11 @@ def get_permissions_router() -> APIRouter:
|
|||
|
||||
from cognee.modules.users.roles.methods import create_role as create_role_method
|
||||
|
||||
await create_role_method(role_name=role_name, owner_id=user.id)
|
||||
role_id = await create_role_method(role_name=role_name, owner_id=user.id)
|
||||
|
||||
return JSONResponse(status_code=200, content={"message": "Role created for tenant"})
|
||||
return JSONResponse(
|
||||
status_code=200, content={"message": "Role created for tenant", "role_id": str(role_id)}
|
||||
)
|
||||
|
||||
@permissions_router.post("/users/{user_id}/roles")
|
||||
async def add_user_to_role(
|
||||
|
|
@ -212,8 +214,10 @@ def get_permissions_router() -> APIRouter:
|
|||
|
||||
from cognee.modules.users.tenants.methods import create_tenant as create_tenant_method
|
||||
|
||||
await create_tenant_method(tenant_name=tenant_name, user_id=user.id)
|
||||
tenant_id = await create_tenant_method(tenant_name=tenant_name, user_id=user.id)
|
||||
|
||||
return JSONResponse(status_code=200, content={"message": "Tenant created."})
|
||||
return JSONResponse(
|
||||
status_code=200, content={"message": "Tenant created.", "tenant_id": str(tenant_id)}
|
||||
)
|
||||
|
||||
return permissions_router
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
from .ui import start_ui, stop_ui, ui
|
||||
from .ui import start_ui
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
import os
|
||||
import platform
|
||||
import signal
|
||||
import socket
|
||||
import subprocess
|
||||
import threading
|
||||
import time
|
||||
|
|
@ -7,7 +9,7 @@ import webbrowser
|
|||
import zipfile
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from typing import Callable, Optional, Tuple
|
||||
from typing import Callable, Optional, Tuple, List
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
|
|
@ -17,6 +19,80 @@ from cognee.version import get_cognee_version
|
|||
logger = get_logger()
|
||||
|
||||
|
||||
def _stream_process_output(
|
||||
process: subprocess.Popen, stream_name: str, prefix: str, color_code: str = ""
|
||||
) -> threading.Thread:
|
||||
"""
|
||||
Stream output from a process with a prefix to identify the source.
|
||||
|
||||
Args:
|
||||
process: The subprocess to monitor
|
||||
stream_name: 'stdout' or 'stderr'
|
||||
prefix: Text prefix for each line (e.g., '[BACKEND]', '[FRONTEND]')
|
||||
color_code: ANSI color code for the prefix (optional)
|
||||
|
||||
Returns:
|
||||
Thread that handles the streaming
|
||||
"""
|
||||
|
||||
def stream_reader():
|
||||
stream = getattr(process, stream_name)
|
||||
if stream is None:
|
||||
return
|
||||
|
||||
reset_code = "\033[0m" if color_code else ""
|
||||
|
||||
try:
|
||||
for line in iter(stream.readline, b""):
|
||||
if line:
|
||||
line_text = line.decode("utf-8").rstrip()
|
||||
if line_text:
|
||||
print(f"{color_code}{prefix}{reset_code} {line_text}", flush=True)
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
if stream:
|
||||
stream.close()
|
||||
|
||||
thread = threading.Thread(target=stream_reader, daemon=True)
|
||||
thread.start()
|
||||
return thread
|
||||
|
||||
|
||||
def _is_port_available(port: int) -> bool:
|
||||
"""
|
||||
Check if a port is available on localhost.
|
||||
Returns True if the port is available, False otherwise.
|
||||
"""
|
||||
try:
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
||||
sock.settimeout(1) # 1 second timeout
|
||||
result = sock.connect_ex(("localhost", port))
|
||||
return result != 0 # Port is available if connection fails
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _check_required_ports(ports_to_check: List[Tuple[int, str]]) -> Tuple[bool, List[str]]:
|
||||
"""
|
||||
Check if all required ports are available on localhost.
|
||||
|
||||
Args:
|
||||
ports_to_check: List of (port, service_name) tuples
|
||||
|
||||
Returns:
|
||||
Tuple of (all_available: bool, unavailable_services: List[str])
|
||||
"""
|
||||
unavailable = []
|
||||
|
||||
for port, service_name in ports_to_check:
|
||||
if not _is_port_available(port):
|
||||
unavailable.append(f"{service_name} (port {port})")
|
||||
logger.error(f"Port {port} is already in use for {service_name}")
|
||||
|
||||
return len(unavailable) == 0, unavailable
|
||||
|
||||
|
||||
def normalize_version_for_comparison(version: str) -> str:
|
||||
"""
|
||||
Normalize version string for comparison.
|
||||
|
|
@ -214,6 +290,7 @@ def check_node_npm() -> tuple[bool, str]:
|
|||
Check if Node.js and npm are available.
|
||||
Returns (is_available, error_message)
|
||||
"""
|
||||
|
||||
try:
|
||||
# Check Node.js
|
||||
result = subprocess.run(["node", "--version"], capture_output=True, text=True, timeout=10)
|
||||
|
|
@ -223,8 +300,17 @@ def check_node_npm() -> tuple[bool, str]:
|
|||
node_version = result.stdout.strip()
|
||||
logger.debug(f"Found Node.js version: {node_version}")
|
||||
|
||||
# Check npm
|
||||
result = subprocess.run(["npm", "--version"], capture_output=True, text=True, timeout=10)
|
||||
# Check npm - handle Windows PowerShell scripts
|
||||
if platform.system() == "Windows":
|
||||
# On Windows, npm might be a PowerShell script, so we need to use shell=True
|
||||
result = subprocess.run(
|
||||
["npm", "--version"], capture_output=True, text=True, timeout=10, shell=True
|
||||
)
|
||||
else:
|
||||
result = subprocess.run(
|
||||
["npm", "--version"], capture_output=True, text=True, timeout=10
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
return False, "npm is not installed or not in PATH"
|
||||
|
||||
|
|
@ -246,6 +332,7 @@ def install_frontend_dependencies(frontend_path: Path) -> bool:
|
|||
Install frontend dependencies if node_modules doesn't exist.
|
||||
This is needed for both development and downloaded frontends since both use npm run dev.
|
||||
"""
|
||||
|
||||
node_modules = frontend_path / "node_modules"
|
||||
if node_modules.exists():
|
||||
logger.debug("Frontend dependencies already installed")
|
||||
|
|
@ -254,13 +341,24 @@ def install_frontend_dependencies(frontend_path: Path) -> bool:
|
|||
logger.info("Installing frontend dependencies (this may take a few minutes)...")
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["npm", "install"],
|
||||
cwd=frontend_path,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300, # 5 minutes timeout
|
||||
)
|
||||
# Use shell=True on Windows for npm commands
|
||||
if platform.system() == "Windows":
|
||||
result = subprocess.run(
|
||||
["npm", "install"],
|
||||
cwd=frontend_path,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300, # 5 minutes timeout
|
||||
shell=True,
|
||||
)
|
||||
else:
|
||||
result = subprocess.run(
|
||||
["npm", "install"],
|
||||
cwd=frontend_path,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300, # 5 minutes timeout
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
logger.info("Frontend dependencies installed successfully")
|
||||
|
|
@ -327,55 +425,111 @@ def prompt_user_for_download() -> bool:
|
|||
|
||||
def start_ui(
|
||||
pid_callback: Callable[[int], None],
|
||||
host: str = "localhost",
|
||||
port: int = 3000,
|
||||
open_browser: bool = True,
|
||||
auto_download: bool = False,
|
||||
start_backend: bool = False,
|
||||
backend_host: str = "localhost",
|
||||
backend_port: int = 8000,
|
||||
start_mcp: bool = False,
|
||||
mcp_port: int = 8001,
|
||||
) -> Optional[subprocess.Popen]:
|
||||
"""
|
||||
Start the cognee frontend UI server, optionally with the backend API server.
|
||||
Start the cognee frontend UI server, optionally with the backend API server and MCP server.
|
||||
|
||||
This function will:
|
||||
1. Optionally start the cognee backend API server
|
||||
2. Find the cognee-frontend directory (development) or download it (pip install)
|
||||
3. Check if Node.js and npm are available (for development mode)
|
||||
4. Install dependencies if needed (development mode)
|
||||
5. Start the frontend server
|
||||
6. Optionally open the browser
|
||||
2. Optionally start the cognee MCP server
|
||||
3. Find the cognee-frontend directory (development) or download it (pip install)
|
||||
4. Check if Node.js and npm are available (for development mode)
|
||||
5. Install dependencies if needed (development mode)
|
||||
6. Start the frontend server
|
||||
7. Optionally open the browser
|
||||
|
||||
Args:
|
||||
pid_callback: Callback to notify with PID of each spawned process
|
||||
host: Host to bind the frontend server to (default: localhost)
|
||||
port: Port to run the frontend server on (default: 3000)
|
||||
open_browser: Whether to open the browser automatically (default: True)
|
||||
auto_download: If True, download frontend without prompting (default: False)
|
||||
start_backend: If True, also start the cognee API backend server (default: False)
|
||||
backend_host: Host to bind the backend server to (default: localhost)
|
||||
backend_port: Port to run the backend server on (default: 8000)
|
||||
start_mcp: If True, also start the cognee MCP server (default: False)
|
||||
mcp_port: Port to run the MCP server on (default: 8001)
|
||||
|
||||
Returns:
|
||||
subprocess.Popen object representing the running frontend server, or None if failed
|
||||
Note: If backend is started, it runs in a separate process that will be cleaned up
|
||||
when the frontend process is terminated.
|
||||
Note: If backend and/or MCP server are started, they run in separate processes
|
||||
that will be cleaned up when the frontend process is terminated.
|
||||
|
||||
Example:
|
||||
>>> import cognee
|
||||
>>> def dummy_callback(pid): pass
|
||||
>>> # Start just the frontend
|
||||
>>> server = cognee.start_ui()
|
||||
>>> server = cognee.start_ui(dummy_callback)
|
||||
>>>
|
||||
>>> # Start both frontend and backend
|
||||
>>> server = cognee.start_ui(start_backend=True)
|
||||
>>> server = cognee.start_ui(dummy_callback, start_backend=True)
|
||||
>>> # UI will be available at http://localhost:3000
|
||||
>>> # API will be available at http://localhost:8000
|
||||
>>> # To stop both servers later:
|
||||
>>>
|
||||
>>> # Start frontend with MCP server
|
||||
>>> server = cognee.start_ui(dummy_callback, start_mcp=True)
|
||||
>>> # UI will be available at http://localhost:3000
|
||||
>>> # MCP server will be available at http://127.0.0.1:8001/sse
|
||||
>>> # To stop all servers later:
|
||||
>>> server.terminate()
|
||||
"""
|
||||
logger.info("Starting cognee UI...")
|
||||
|
||||
ports_to_check = [(port, "Frontend UI")]
|
||||
|
||||
if start_backend:
|
||||
ports_to_check.append((backend_port, "Backend API"))
|
||||
|
||||
if start_mcp:
|
||||
ports_to_check.append((mcp_port, "MCP Server"))
|
||||
|
||||
logger.info("Checking port availability...")
|
||||
all_ports_available, unavailable_services = _check_required_ports(ports_to_check)
|
||||
|
||||
if not all_ports_available:
|
||||
error_msg = f"Cannot start cognee UI: The following services have ports already in use: {', '.join(unavailable_services)}"
|
||||
logger.error(error_msg)
|
||||
logger.error("Please stop the conflicting services or change the port configuration.")
|
||||
return None
|
||||
|
||||
logger.info("✓ All required ports are available")
|
||||
backend_process = None
|
||||
|
||||
if start_mcp:
|
||||
logger.info("Starting Cognee MCP server with Docker...")
|
||||
cwd = os.getcwd()
|
||||
env_file = os.path.join(cwd, ".env")
|
||||
try:
|
||||
mcp_process = subprocess.Popen(
|
||||
[
|
||||
"docker",
|
||||
"run",
|
||||
"-p",
|
||||
f"{mcp_port}:8000",
|
||||
"--rm",
|
||||
"--env-file",
|
||||
env_file,
|
||||
"-e",
|
||||
"TRANSPORT_MODE=sse",
|
||||
"cognee/cognee-mcp:daulet-dev",
|
||||
],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
preexec_fn=os.setsid if hasattr(os, "setsid") else None,
|
||||
)
|
||||
|
||||
_stream_process_output(mcp_process, "stdout", "[MCP]", "\033[34m") # Blue
|
||||
_stream_process_output(mcp_process, "stderr", "[MCP]", "\033[34m") # Blue
|
||||
|
||||
pid_callback(mcp_process.pid)
|
||||
logger.info(f"✓ Cognee MCP server starting on http://127.0.0.1:{mcp_port}/sse")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start MCP server with Docker: {str(e)}")
|
||||
# Start backend server if requested
|
||||
if start_backend:
|
||||
logger.info("Starting cognee backend API server...")
|
||||
|
|
@ -389,16 +543,19 @@ def start_ui(
|
|||
"uvicorn",
|
||||
"cognee.api.client:app",
|
||||
"--host",
|
||||
backend_host,
|
||||
"localhost",
|
||||
"--port",
|
||||
str(backend_port),
|
||||
],
|
||||
# Inherit stdout/stderr from parent process to show logs
|
||||
stdout=None,
|
||||
stderr=None,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
preexec_fn=os.setsid if hasattr(os, "setsid") else None,
|
||||
)
|
||||
|
||||
# Start threads to stream backend output with prefix
|
||||
_stream_process_output(backend_process, "stdout", "[BACKEND]", "\033[32m") # Green
|
||||
_stream_process_output(backend_process, "stderr", "[BACKEND]", "\033[32m") # Green
|
||||
|
||||
pid_callback(backend_process.pid)
|
||||
|
||||
# Give the backend a moment to start
|
||||
|
|
@ -408,7 +565,7 @@ def start_ui(
|
|||
logger.error("Backend server failed to start - process exited early")
|
||||
return None
|
||||
|
||||
logger.info(f"✓ Backend API started at http://{backend_host}:{backend_port}")
|
||||
logger.info(f"✓ Backend API started at http://localhost:{backend_port}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start backend server: {str(e)}")
|
||||
|
|
@ -453,24 +610,40 @@ def start_ui(
|
|||
|
||||
# Prepare environment variables
|
||||
env = os.environ.copy()
|
||||
env["HOST"] = host
|
||||
env["HOST"] = "localhost"
|
||||
env["PORT"] = str(port)
|
||||
|
||||
# Start the development server
|
||||
logger.info(f"Starting frontend server at http://{host}:{port}")
|
||||
logger.info(f"Starting frontend server at http://localhost:{port}")
|
||||
logger.info("This may take a moment to compile and start...")
|
||||
|
||||
try:
|
||||
# Create frontend in its own process group for clean termination
|
||||
process = subprocess.Popen(
|
||||
["npm", "run", "dev"],
|
||||
cwd=frontend_path,
|
||||
env=env,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
preexec_fn=os.setsid if hasattr(os, "setsid") else None,
|
||||
)
|
||||
# Use shell=True on Windows for npm commands
|
||||
if platform.system() == "Windows":
|
||||
process = subprocess.Popen(
|
||||
["npm", "run", "dev"],
|
||||
cwd=frontend_path,
|
||||
env=env,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
shell=True,
|
||||
)
|
||||
else:
|
||||
process = subprocess.Popen(
|
||||
["npm", "run", "dev"],
|
||||
cwd=frontend_path,
|
||||
env=env,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
preexec_fn=os.setsid if hasattr(os, "setsid") else None,
|
||||
)
|
||||
|
||||
# Start threads to stream frontend output with prefix
|
||||
_stream_process_output(process, "stdout", "[FRONTEND]", "\033[33m") # Yellow
|
||||
_stream_process_output(process, "stderr", "[FRONTEND]", "\033[33m") # Yellow
|
||||
|
||||
pid_callback(process.pid)
|
||||
|
||||
|
|
@ -479,10 +652,7 @@ def start_ui(
|
|||
|
||||
# Check if process is still running
|
||||
if process.poll() is not None:
|
||||
stdout, stderr = process.communicate()
|
||||
logger.error("Frontend server failed to start:")
|
||||
logger.error(f"stdout: {stdout}")
|
||||
logger.error(f"stderr: {stderr}")
|
||||
logger.error("Frontend server failed to start - check the logs above for details")
|
||||
return None
|
||||
|
||||
# Open browser if requested
|
||||
|
|
@ -491,7 +661,7 @@ def start_ui(
|
|||
def open_browser_delayed():
|
||||
time.sleep(5) # Give Next.js time to fully start
|
||||
try:
|
||||
webbrowser.open(f"http://{host}:{port}") # TODO: use dashboard url?
|
||||
webbrowser.open(f"http://localhost:{port}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not open browser automatically: {e}")
|
||||
|
||||
|
|
@ -499,13 +669,9 @@ def start_ui(
|
|||
browser_thread.start()
|
||||
|
||||
logger.info("✓ Cognee UI is starting up...")
|
||||
logger.info(f"✓ Open your browser to: http://{host}:{port}")
|
||||
logger.info(f"✓ Open your browser to: http://localhost:{port}")
|
||||
logger.info("✓ The UI will be available once Next.js finishes compiling")
|
||||
|
||||
# Store backend process reference in the frontend process for cleanup
|
||||
if backend_process:
|
||||
process._cognee_backend_process = backend_process
|
||||
|
||||
return process
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -523,102 +689,3 @@ def start_ui(
|
|||
except (OSError, ProcessLookupError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def stop_ui(process: subprocess.Popen) -> bool:
|
||||
"""
|
||||
Stop a running UI server process and backend process (if started), along with all their children.
|
||||
|
||||
Args:
|
||||
process: The subprocess.Popen object returned by start_ui()
|
||||
|
||||
Returns:
|
||||
bool: True if stopped successfully, False otherwise
|
||||
"""
|
||||
if not process:
|
||||
return False
|
||||
|
||||
success = True
|
||||
|
||||
try:
|
||||
# First, stop the backend process if it exists
|
||||
backend_process = getattr(process, "_cognee_backend_process", None)
|
||||
if backend_process:
|
||||
logger.info("Stopping backend server...")
|
||||
try:
|
||||
backend_process.terminate()
|
||||
try:
|
||||
backend_process.wait(timeout=5)
|
||||
logger.info("Backend server stopped gracefully")
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning("Backend didn't terminate gracefully, forcing kill")
|
||||
backend_process.kill()
|
||||
backend_process.wait()
|
||||
logger.info("Backend server stopped")
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping backend server: {str(e)}")
|
||||
success = False
|
||||
|
||||
# Now stop the frontend process
|
||||
logger.info("Stopping frontend server...")
|
||||
# Try to terminate the process group (includes child processes like Next.js)
|
||||
if hasattr(os, "killpg"):
|
||||
try:
|
||||
# Kill the entire process group
|
||||
os.killpg(os.getpgid(process.pid), signal.SIGTERM)
|
||||
logger.debug("Sent SIGTERM to process group")
|
||||
except (OSError, ProcessLookupError):
|
||||
# Fall back to terminating just the main process
|
||||
process.terminate()
|
||||
logger.debug("Terminated main process only")
|
||||
else:
|
||||
process.terminate()
|
||||
logger.debug("Terminated main process (Windows)")
|
||||
|
||||
try:
|
||||
process.wait(timeout=10)
|
||||
logger.info("Frontend server stopped gracefully")
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning("Frontend didn't terminate gracefully, forcing kill")
|
||||
|
||||
# Force kill the process group
|
||||
if hasattr(os, "killpg"):
|
||||
try:
|
||||
os.killpg(os.getpgid(process.pid), signal.SIGKILL)
|
||||
logger.debug("Sent SIGKILL to process group")
|
||||
except (OSError, ProcessLookupError):
|
||||
process.kill()
|
||||
logger.debug("Force killed main process only")
|
||||
else:
|
||||
process.kill()
|
||||
logger.debug("Force killed main process (Windows)")
|
||||
|
||||
process.wait()
|
||||
|
||||
if success:
|
||||
logger.info("UI servers stopped successfully")
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping UI servers: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
# Convenience function similar to DuckDB's approach
|
||||
def ui() -> Optional[subprocess.Popen]:
|
||||
"""
|
||||
Convenient alias for start_ui() with default parameters.
|
||||
Similar to how DuckDB provides simple ui() function.
|
||||
"""
|
||||
return start_ui()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test the UI startup
|
||||
server = start_ui()
|
||||
if server:
|
||||
try:
|
||||
input("Press Enter to stop the server...")
|
||||
finally:
|
||||
stop_ui(server)
|
||||
|
|
|
|||
1
cognee/api/v1/update/__init__.py
Normal file
1
cognee/api/v1/update/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
from .update import update
|
||||
1
cognee/api/v1/update/routers/__init__.py
Normal file
1
cognee/api/v1/update/routers/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
from .get_update_router import get_update_router
|
||||
90
cognee/api/v1/update/routers/get_update_router.py
Normal file
90
cognee/api/v1/update/routers/get_update_router.py
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
from fastapi.responses import JSONResponse
|
||||
from fastapi import File, UploadFile, Depends, Form
|
||||
from typing import Optional
|
||||
from fastapi import APIRouter
|
||||
from fastapi.encoders import jsonable_encoder
|
||||
from typing import List
|
||||
from uuid import UUID
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.modules.users.methods import get_authenticated_user
|
||||
from cognee.shared.utils import send_telemetry
|
||||
from cognee.modules.pipelines.models.PipelineRunInfo import (
|
||||
PipelineRunErrored,
|
||||
)
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
def get_update_router() -> APIRouter:
|
||||
router = APIRouter()
|
||||
|
||||
@router.patch("", response_model=None)
|
||||
async def update(
|
||||
data_id: UUID,
|
||||
dataset_id: UUID,
|
||||
data: List[UploadFile] = File(default=None),
|
||||
node_set: Optional[List[str]] = Form(default=[""], example=[""]),
|
||||
user: User = Depends(get_authenticated_user),
|
||||
):
|
||||
"""
|
||||
Update data in a dataset.
|
||||
|
||||
This endpoint updates existing documents in a specified dataset by providing the data_id of the existing document
|
||||
to update and the new document with the changes as the data.
|
||||
The document is updated, analyzed, and the changes are integrated into the knowledge graph.
|
||||
|
||||
## Request Parameters
|
||||
- **data_id** (UUID): UUID of the document to update in Cognee memory
|
||||
- **data** (List[UploadFile]): List of files to upload.
|
||||
- **datasetId** (Optional[UUID]): UUID of an already existing dataset
|
||||
- **node_set** Optional[list[str]]: List of node identifiers for graph organization and access control.
|
||||
Used for grouping related data points in the knowledge graph.
|
||||
|
||||
## Response
|
||||
Returns information about the add operation containing:
|
||||
- Status of the operation
|
||||
- Details about the processed data
|
||||
- Any relevant metadata from the ingestion process
|
||||
|
||||
## Error Codes
|
||||
- **400 Bad Request**: Neither datasetId nor datasetName provided
|
||||
- **409 Conflict**: Error during add operation
|
||||
- **403 Forbidden**: User doesn't have permission to add to dataset
|
||||
|
||||
## Notes
|
||||
- To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
|
||||
- datasetId value can only be the UUID of an already existing dataset
|
||||
"""
|
||||
send_telemetry(
|
||||
"Update API Endpoint Invoked",
|
||||
user.id,
|
||||
additional_properties={
|
||||
"endpoint": "PATCH /v1/update",
|
||||
"dataset_id": str(dataset_id),
|
||||
"data_id": str(data_id),
|
||||
"node_set": str(node_set),
|
||||
},
|
||||
)
|
||||
|
||||
from cognee.api.v1.update import update as cognee_update
|
||||
|
||||
try:
|
||||
update_run = await cognee_update(
|
||||
data_id=data_id,
|
||||
data=data,
|
||||
dataset_id=dataset_id,
|
||||
user=user,
|
||||
node_set=node_set,
|
||||
)
|
||||
|
||||
# If any cognify run errored return JSONResponse with proper error status code
|
||||
if any(isinstance(v, PipelineRunErrored) for v in update_run.values()):
|
||||
return JSONResponse(status_code=420, content=jsonable_encoder(update_run))
|
||||
return update_run
|
||||
|
||||
except Exception as error:
|
||||
logger.error(f"Error during deletion by data_id: {str(error)}")
|
||||
return JSONResponse(status_code=409, content={"error": str(error)})
|
||||
|
||||
return router
|
||||
100
cognee/api/v1/update/update.py
Normal file
100
cognee/api/v1/update/update.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
from uuid import UUID
|
||||
from typing import Union, BinaryIO, List, Optional
|
||||
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.api.v1.delete import delete
|
||||
from cognee.api.v1.add import add
|
||||
from cognee.api.v1.cognify import cognify
|
||||
|
||||
|
||||
async def update(
|
||||
data_id: UUID,
|
||||
data: Union[BinaryIO, list[BinaryIO], str, list[str]],
|
||||
user: User = None,
|
||||
node_set: Optional[List[str]] = None,
|
||||
dataset_id: Optional[UUID] = None,
|
||||
vector_db_config: dict = None,
|
||||
graph_db_config: dict = None,
|
||||
preferred_loaders: List[str] = None,
|
||||
incremental_loading: bool = True,
|
||||
):
|
||||
"""
|
||||
Update existing data in Cognee.
|
||||
|
||||
Supported Input Types:
|
||||
- **Text strings**: Direct text content (str) - any string not starting with "/" or "file://"
|
||||
- **File paths**: Local file paths as strings in these formats:
|
||||
* Absolute paths: "/path/to/document.pdf"
|
||||
* File URLs: "file:///path/to/document.pdf" or "file://relative/path.txt"
|
||||
* S3 paths: "s3://bucket-name/path/to/file.pdf"
|
||||
- **Binary file objects**: File handles/streams (BinaryIO)
|
||||
- **Lists**: Multiple files or text strings in a single call
|
||||
|
||||
Supported File Formats:
|
||||
- Text files (.txt, .md, .csv)
|
||||
- PDFs (.pdf)
|
||||
- Images (.png, .jpg, .jpeg) - extracted via OCR/vision models
|
||||
- Audio files (.mp3, .wav) - transcribed to text
|
||||
- Code files (.py, .js, .ts, etc.) - parsed for structure and content
|
||||
- Office documents (.docx, .pptx)
|
||||
|
||||
Workflow:
|
||||
1. **Data Resolution**: Resolves file paths and validates accessibility
|
||||
2. **Content Extraction**: Extracts text content from various file formats
|
||||
3. **Dataset Storage**: Stores processed content in the specified dataset
|
||||
4. **Metadata Tracking**: Records file metadata, timestamps, and user permissions
|
||||
5. **Permission Assignment**: Grants user read/write/delete/share permissions on dataset
|
||||
|
||||
Args:
|
||||
data_id: UUID of existing data to update
|
||||
data: The latest version of the data. Can be:
|
||||
- Single text string: "Your text content here"
|
||||
- Absolute file path: "/path/to/document.pdf"
|
||||
- File URL: "file:///absolute/path/to/document.pdf" or "file://relative/path.txt"
|
||||
- S3 path: "s3://my-bucket/documents/file.pdf"
|
||||
- List of mixed types: ["text content", "/path/file.pdf", "file://doc.txt", file_handle]
|
||||
- Binary file object: open("file.txt", "rb")
|
||||
dataset_name: Name of the dataset to store data in. Defaults to "main_dataset".
|
||||
Create separate datasets to organize different knowledge domains.
|
||||
user: User object for authentication and permissions. Uses default user if None.
|
||||
Default user: "default_user@example.com" (created automatically on first use).
|
||||
Users can only access datasets they have permissions for.
|
||||
node_set: Optional list of node identifiers for graph organization and access control.
|
||||
Used for grouping related data points in the knowledge graph.
|
||||
vector_db_config: Optional configuration for vector database (for custom setups).
|
||||
graph_db_config: Optional configuration for graph database (for custom setups).
|
||||
dataset_id: Optional specific dataset UUID to use instead of dataset_name.
|
||||
|
||||
Returns:
|
||||
PipelineRunInfo: Information about the ingestion pipeline execution including:
|
||||
- Pipeline run ID for tracking
|
||||
- Dataset ID where data was stored
|
||||
- Processing status and any errors
|
||||
- Execution timestamps and metadata
|
||||
"""
|
||||
await delete(
|
||||
data_id=data_id,
|
||||
dataset_id=dataset_id,
|
||||
user=user,
|
||||
)
|
||||
|
||||
await add(
|
||||
data=data,
|
||||
dataset_id=dataset_id,
|
||||
user=user,
|
||||
node_set=node_set,
|
||||
vector_db_config=vector_db_config,
|
||||
graph_db_config=graph_db_config,
|
||||
preferred_loaders=preferred_loaders,
|
||||
incremental_loading=incremental_loading,
|
||||
)
|
||||
|
||||
cognify_run = await cognify(
|
||||
datasets=[dataset_id],
|
||||
user=user,
|
||||
vector_db_config=vector_db_config,
|
||||
graph_db_config=graph_db_config,
|
||||
incremental_loading=incremental_loading,
|
||||
)
|
||||
|
||||
return cognify_run
|
||||
|
|
@ -183,10 +183,20 @@ def main() -> int:
|
|||
|
||||
for pid in spawned_pids:
|
||||
try:
|
||||
pgid = os.getpgid(pid)
|
||||
os.killpg(pgid, signal.SIGTERM)
|
||||
fmt.success(f"✓ Process group {pgid} (PID {pid}) terminated.")
|
||||
except (OSError, ProcessLookupError) as e:
|
||||
if hasattr(os, "killpg"):
|
||||
# Unix-like systems: Use process groups
|
||||
pgid = os.getpgid(pid)
|
||||
os.killpg(pgid, signal.SIGTERM)
|
||||
fmt.success(f"✓ Process group {pgid} (PID {pid}) terminated.")
|
||||
else:
|
||||
# Windows: Use taskkill to terminate process and its children
|
||||
subprocess.run(
|
||||
["taskkill", "/F", "/T", "/PID", str(pid)],
|
||||
capture_output=True,
|
||||
check=False,
|
||||
)
|
||||
fmt.success(f"✓ Process {pid} and its children terminated.")
|
||||
except (OSError, ProcessLookupError, subprocess.SubprocessError) as e:
|
||||
fmt.warning(f"Could not terminate process {pid}: {e}")
|
||||
|
||||
sys.exit(0)
|
||||
|
|
@ -204,19 +214,27 @@ def main() -> int:
|
|||
nonlocal spawned_pids
|
||||
spawned_pids.append(pid)
|
||||
|
||||
frontend_port = 3000
|
||||
start_backend, backend_port = True, 8000
|
||||
start_mcp, mcp_port = True, 8001
|
||||
server_process = start_ui(
|
||||
host="localhost",
|
||||
port=3000,
|
||||
open_browser=True,
|
||||
start_backend=True,
|
||||
auto_download=True,
|
||||
pid_callback=pid_callback,
|
||||
port=frontend_port,
|
||||
open_browser=True,
|
||||
auto_download=True,
|
||||
start_backend=start_backend,
|
||||
backend_port=backend_port,
|
||||
start_mcp=start_mcp,
|
||||
mcp_port=mcp_port,
|
||||
)
|
||||
|
||||
if server_process:
|
||||
fmt.success("UI server started successfully!")
|
||||
fmt.echo("The interface is available at: http://localhost:3000")
|
||||
fmt.echo("The API backend is available at: http://localhost:8000")
|
||||
fmt.echo(f"The interface is available at: http://localhost:{frontend_port}")
|
||||
if start_backend:
|
||||
fmt.echo(f"The API backend is available at: http://localhost:{backend_port}")
|
||||
if start_mcp:
|
||||
fmt.echo(f"The MCP server is available at: http://localhost:{mcp_port}")
|
||||
fmt.note("Press Ctrl+C to stop the server...")
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import os
|
||||
import json
|
||||
import pandas as pd
|
||||
|
||||
import subprocess
|
||||
import modal
|
||||
import streamlit as st
|
||||
|
|
@ -78,6 +78,14 @@ def main():
|
|||
}
|
||||
)
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
st.error(
|
||||
"Pandas is required for the evaluation dashboard. Install with 'pip install cognee\"[evals]\"' to use this feature."
|
||||
)
|
||||
return
|
||||
|
||||
df = pd.DataFrame(records)
|
||||
if df.empty:
|
||||
st.warning("No JSON files found in the volume.")
|
||||
|
|
|
|||
|
|
@ -1,48 +0,0 @@
|
|||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
|
||||
from cognee.infrastructure.data.exceptions.exceptions import KeywordExtractionError
|
||||
from cognee.shared.utils import extract_pos_tags
|
||||
|
||||
|
||||
def extract_keywords(text: str) -> list[str]:
|
||||
"""
|
||||
Extract keywords from the provided text string.
|
||||
|
||||
This function raises an KeyWordExtractionError if the input text is empty. It processes the
|
||||
text to extract parts of speech, focusing on nouns, and uses TF-IDF to identify the most
|
||||
relevant keywords based on their frequency. The function returns a list of up to 15
|
||||
keywords, each having more than 3 characters.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
|
||||
- text (str): The input text from which to extract keywords.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
|
||||
- list[str]: A list of keywords extracted from the text, containing up to 15 nouns
|
||||
with more than 3 characters.
|
||||
"""
|
||||
if len(text) == 0:
|
||||
raise KeywordExtractionError()
|
||||
|
||||
tags = extract_pos_tags(text)
|
||||
nouns = [word for (word, tag) in tags if tag == "NN"]
|
||||
|
||||
vectorizer = TfidfVectorizer()
|
||||
tfidf = vectorizer.fit_transform(nouns)
|
||||
|
||||
top_nouns = sorted(
|
||||
vectorizer.vocabulary_, key=lambda x: tfidf[0, vectorizer.vocabulary_[x]], reverse=True
|
||||
)
|
||||
|
||||
keywords = []
|
||||
|
||||
for word in top_nouns:
|
||||
if len(word) > 3:
|
||||
keywords.append(word)
|
||||
if len(keywords) >= 15:
|
||||
break
|
||||
|
||||
return keywords
|
||||
|
|
@ -234,7 +234,7 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
|
|||
collection_name: str,
|
||||
query_text: Optional[str] = None,
|
||||
query_vector: Optional[List[float]] = None,
|
||||
limit: int = None,
|
||||
limit: Optional[int] = None,
|
||||
with_vector: bool = False,
|
||||
):
|
||||
"""
|
||||
|
|
@ -265,10 +265,10 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
|
|||
"Use this option only when vector data is required."
|
||||
)
|
||||
|
||||
# In the case of excessive limit, or zero / negative value, limit will be set to 10.
|
||||
# In the case of excessive limit, or None / zero / negative value, limit will be set to 10.
|
||||
if not limit or limit <= self._TOPK_LOWER_BOUND or limit > self._TOPK_UPPER_BOUND:
|
||||
logger.warning(
|
||||
"Provided limit (%s) is invalid (zero, negative, or exceeds maximum). "
|
||||
"Provided limit (%s) is invalid (None, zero, negative, or exceeds maximum). "
|
||||
"Defaulting to limit=10.",
|
||||
limit,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -39,8 +39,16 @@ def create_relational_engine(
|
|||
connection_string = f"sqlite+aiosqlite:///{db_path}/{db_name}"
|
||||
|
||||
if db_provider == "postgres":
|
||||
connection_string = (
|
||||
f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
|
||||
)
|
||||
try:
|
||||
# Test if asyncpg is available
|
||||
import asyncpg
|
||||
|
||||
connection_string = (
|
||||
f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
|
||||
)
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"PostgreSQL dependencies are not installed. Please install with 'pip install cognee\"[postgres]\"' or 'pip install cognee\"[postgres-binary]\"' to use PostgreSQL functionality."
|
||||
)
|
||||
|
||||
return SQLAlchemyAdapter(connection_string)
|
||||
|
|
|
|||
|
|
@ -352,7 +352,7 @@ class ChromaDBAdapter(VectorDBInterface):
|
|||
collection_name: str,
|
||||
query_text: str = None,
|
||||
query_vector: List[float] = None,
|
||||
limit: int = 15,
|
||||
limit: Optional[int] = 15,
|
||||
with_vector: bool = False,
|
||||
normalized: bool = True,
|
||||
):
|
||||
|
|
@ -386,9 +386,13 @@ class ChromaDBAdapter(VectorDBInterface):
|
|||
try:
|
||||
collection = await self.get_collection(collection_name)
|
||||
|
||||
if limit == 0:
|
||||
if limit is None:
|
||||
limit = await collection.count()
|
||||
|
||||
# If limit is still 0, no need to do the search, just return empty results
|
||||
if limit <= 0:
|
||||
return []
|
||||
|
||||
results = await collection.query(
|
||||
query_embeddings=[query_vector],
|
||||
include=["metadatas", "distances", "embeddings"]
|
||||
|
|
@ -428,7 +432,7 @@ class ChromaDBAdapter(VectorDBInterface):
|
|||
for row in vector_list
|
||||
]
|
||||
except Exception as e:
|
||||
logger.error(f"Error in search: {str(e)}")
|
||||
logger.warning(f"Error in search: {str(e)}")
|
||||
return []
|
||||
|
||||
async def batch_search(
|
||||
|
|
|
|||
|
|
@ -66,7 +66,12 @@ def create_vector_engine(
|
|||
f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
|
||||
)
|
||||
|
||||
from .pgvector.PGVectorAdapter import PGVectorAdapter
|
||||
try:
|
||||
from .pgvector.PGVectorAdapter import PGVectorAdapter
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"PostgreSQL dependencies are not installed. Please install with 'pip install cognee\"[postgres]\"' or 'pip install cognee\"[postgres-binary]\"' to use PGVector functionality."
|
||||
)
|
||||
|
||||
return PGVectorAdapter(
|
||||
connection_string,
|
||||
|
|
|
|||
|
|
@ -205,9 +205,12 @@ class LanceDBAdapter(VectorDBInterface):
|
|||
collection = await self.get_collection(collection_name)
|
||||
|
||||
if len(data_point_ids) == 1:
|
||||
results = await collection.query().where(f"id = '{data_point_ids[0]}'").to_pandas()
|
||||
results = await collection.query().where(f"id = '{data_point_ids[0]}'")
|
||||
else:
|
||||
results = await collection.query().where(f"id IN {tuple(data_point_ids)}").to_pandas()
|
||||
results = await collection.query().where(f"id IN {tuple(data_point_ids)}")
|
||||
|
||||
# Convert query results to list format
|
||||
results_list = results.to_list() if hasattr(results, "to_list") else list(results)
|
||||
|
||||
return [
|
||||
ScoredResult(
|
||||
|
|
@ -215,7 +218,7 @@ class LanceDBAdapter(VectorDBInterface):
|
|||
payload=result["payload"],
|
||||
score=0,
|
||||
)
|
||||
for result in results.to_dict("index").values()
|
||||
for result in results_list
|
||||
]
|
||||
|
||||
async def search(
|
||||
|
|
@ -223,7 +226,7 @@ class LanceDBAdapter(VectorDBInterface):
|
|||
collection_name: str,
|
||||
query_text: str = None,
|
||||
query_vector: List[float] = None,
|
||||
limit: int = 15,
|
||||
limit: Optional[int] = 15,
|
||||
with_vector: bool = False,
|
||||
normalized: bool = True,
|
||||
):
|
||||
|
|
@ -235,16 +238,14 @@ class LanceDBAdapter(VectorDBInterface):
|
|||
|
||||
collection = await self.get_collection(collection_name)
|
||||
|
||||
if limit == 0:
|
||||
if limit is None:
|
||||
limit = await collection.count_rows()
|
||||
|
||||
# LanceDB search will break if limit is 0 so we must return
|
||||
if limit == 0:
|
||||
if limit <= 0:
|
||||
return []
|
||||
|
||||
results = await collection.vector_search(query_vector).limit(limit).to_pandas()
|
||||
|
||||
result_values = list(results.to_dict("index").values())
|
||||
result_values = await collection.vector_search(query_vector).limit(limit).to_list()
|
||||
|
||||
if not result_values:
|
||||
return []
|
||||
|
|
@ -264,7 +265,7 @@ class LanceDBAdapter(VectorDBInterface):
|
|||
self,
|
||||
collection_name: str,
|
||||
query_texts: List[str],
|
||||
limit: int = None,
|
||||
limit: Optional[int] = None,
|
||||
with_vectors: bool = False,
|
||||
):
|
||||
query_vectors = await self.embedding_engine.embed_text(query_texts)
|
||||
|
|
|
|||
|
|
@ -3,13 +3,12 @@ from typing import List, Optional, get_type_hints
|
|||
from sqlalchemy.inspection import inspect
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
from sqlalchemy import JSON, Column, Table, select, delete, MetaData
|
||||
from sqlalchemy import JSON, Column, Table, select, delete, MetaData, func
|
||||
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker
|
||||
from sqlalchemy.exc import ProgrammingError
|
||||
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
||||
from asyncpg import DeadlockDetectedError, DuplicateTableError, UniqueViolationError
|
||||
|
||||
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.infrastructure.engine import DataPoint
|
||||
from cognee.infrastructure.engine.utils import parse_id
|
||||
|
|
@ -299,7 +298,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
|
|||
collection_name: str,
|
||||
query_text: Optional[str] = None,
|
||||
query_vector: Optional[List[float]] = None,
|
||||
limit: int = 15,
|
||||
limit: Optional[int] = 15,
|
||||
with_vector: bool = False,
|
||||
) -> List[ScoredResult]:
|
||||
if query_text is None and query_vector is None:
|
||||
|
|
@ -311,6 +310,16 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
|
|||
# Get PGVectorDataPoint Table from database
|
||||
PGVectorDataPoint = await self.get_table(collection_name)
|
||||
|
||||
if limit is None:
|
||||
async with self.get_async_session() as session:
|
||||
query = select(func.count()).select_from(PGVectorDataPoint)
|
||||
result = await session.execute(query)
|
||||
limit = result.scalar_one()
|
||||
|
||||
# If limit is still 0, no need to do the search, just return empty results
|
||||
if limit <= 0:
|
||||
return []
|
||||
|
||||
# NOTE: This needs to be initialized in case search doesn't return a value
|
||||
closest_items = []
|
||||
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ class VectorDBInterface(Protocol):
|
|||
collection_name: str,
|
||||
query_text: Optional[str],
|
||||
query_vector: Optional[List[float]],
|
||||
limit: int,
|
||||
limit: Optional[int],
|
||||
with_vector: bool = False,
|
||||
):
|
||||
"""
|
||||
|
|
@ -98,7 +98,7 @@ class VectorDBInterface(Protocol):
|
|||
collection.
|
||||
- query_vector (Optional[List[float]]): An optional vector representation for
|
||||
searching the collection.
|
||||
- limit (int): The maximum number of results to return from the search.
|
||||
- limit (Optional[int]): The maximum number of results to return from the search.
|
||||
- with_vector (bool): Whether to return the vector representations with search
|
||||
results. (default False)
|
||||
"""
|
||||
|
|
@ -106,7 +106,11 @@ class VectorDBInterface(Protocol):
|
|||
|
||||
@abstractmethod
|
||||
async def batch_search(
|
||||
self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False
|
||||
self,
|
||||
collection_name: str,
|
||||
query_texts: List[str],
|
||||
limit: Optional[int],
|
||||
with_vectors: bool = False,
|
||||
):
|
||||
"""
|
||||
Perform a batch search using multiple text queries against a collection.
|
||||
|
|
@ -116,7 +120,7 @@ class VectorDBInterface(Protocol):
|
|||
|
||||
- collection_name (str): The name of the collection to conduct the batch search in.
|
||||
- query_texts (List[str]): A list of text queries to use for the search.
|
||||
- limit (int): The maximum number of results to return for each query.
|
||||
- limit (Optional[int]): The maximum number of results to return for each query.
|
||||
- with_vectors (bool): Whether to include vector representations with search
|
||||
results. (default False)
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import os
|
||||
import s3fs
|
||||
from typing import BinaryIO, Union
|
||||
from typing import BinaryIO, Union, TYPE_CHECKING
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from cognee.infrastructure.files.storage.s3_config import get_s3_config
|
||||
|
|
@ -8,23 +7,34 @@ from cognee.infrastructure.utils.run_async import run_async
|
|||
from cognee.infrastructure.files.storage.FileBufferedReader import FileBufferedReader
|
||||
from .storage import Storage
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import s3fs
|
||||
|
||||
|
||||
class S3FileStorage(Storage):
|
||||
"""
|
||||
Manage local file storage operations such as storing, retrieving, and managing files on
|
||||
the filesystem.
|
||||
Manage S3 file storage operations such as storing, retrieving, and managing files on
|
||||
S3-compatible storage.
|
||||
"""
|
||||
|
||||
storage_path: str
|
||||
s3: s3fs.S3FileSystem
|
||||
s3: "s3fs.S3FileSystem"
|
||||
|
||||
def __init__(self, storage_path: str):
|
||||
try:
|
||||
import s3fs
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
's3fs is required for S3FileStorage. Install it with: pip install cognee"[aws]"'
|
||||
)
|
||||
|
||||
self.storage_path = storage_path
|
||||
s3_config = get_s3_config()
|
||||
if s3_config.aws_access_key_id is not None and s3_config.aws_secret_access_key is not None:
|
||||
self.s3 = s3fs.S3FileSystem(
|
||||
key=s3_config.aws_access_key_id,
|
||||
secret=s3_config.aws_secret_access_key,
|
||||
token=s3_config.aws_session_token,
|
||||
anon=False,
|
||||
endpoint_url=s3_config.aws_endpoint_url,
|
||||
client_kwargs={"region_name": s3_config.aws_region},
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ class S3Config(BaseSettings):
|
|||
aws_endpoint_url: Optional[str] = None
|
||||
aws_access_key_id: Optional[str] = None
|
||||
aws_secret_access_key: Optional[str] = None
|
||||
aws_session_token: Optional[str] = None
|
||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ from urllib.parse import urlparse
|
|||
from contextlib import asynccontextmanager
|
||||
|
||||
from cognee.infrastructure.files.utils.get_data_file_path import get_data_file_path
|
||||
from cognee.infrastructure.files.storage.S3FileStorage import S3FileStorage
|
||||
from cognee.infrastructure.files.storage.LocalFileStorage import LocalFileStorage
|
||||
|
||||
|
||||
|
|
@ -23,23 +22,17 @@ async def open_data_file(file_path: str, mode: str = "rb", encoding: str = None,
|
|||
yield file
|
||||
|
||||
elif file_path.startswith("s3://"):
|
||||
try:
|
||||
from cognee.infrastructure.files.storage.S3FileStorage import S3FileStorage
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"S3 dependencies are not installed. Please install with 'pip install cognee\"[aws]\"' to use S3 functionality."
|
||||
)
|
||||
|
||||
normalized_url = get_data_file_path(file_path)
|
||||
s3_dir_path = os.path.dirname(normalized_url)
|
||||
s3_filename = os.path.basename(normalized_url)
|
||||
|
||||
# if "/" in s3_path:
|
||||
# s3_dir = "/".join(s3_path.split("/")[:-1])
|
||||
# s3_filename = s3_path.split("/")[-1]
|
||||
# else:
|
||||
# s3_dir = ""
|
||||
# s3_filename = s3_path
|
||||
|
||||
# Extract filesystem path from S3 URL structure
|
||||
# file_dir_path = (
|
||||
# f"s3://{parsed_url.netloc}/{s3_dir}" if s3_dir else f"s3://{parsed_url.netloc}"
|
||||
# )
|
||||
# file_name = s3_filename
|
||||
|
||||
file_storage = S3FileStorage(s3_dir_path)
|
||||
|
||||
async with file_storage.open(s3_filename, mode=mode, **kwargs) as file:
|
||||
|
|
|
|||
|
|
@ -1,9 +1,13 @@
|
|||
import os
|
||||
from typing import Optional, ClassVar
|
||||
from typing import Optional, ClassVar, Any
|
||||
from functools import lru_cache
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
from pydantic import model_validator
|
||||
from baml_py import ClientRegistry
|
||||
|
||||
try:
|
||||
from baml_py import ClientRegistry
|
||||
except ImportError:
|
||||
ClientRegistry = None
|
||||
|
||||
|
||||
class LLMConfig(BaseSettings):
|
||||
|
|
@ -65,27 +69,36 @@ class LLMConfig(BaseSettings):
|
|||
fallback_endpoint: str = ""
|
||||
fallback_model: str = ""
|
||||
|
||||
baml_registry: ClassVar[ClientRegistry] = ClientRegistry()
|
||||
baml_registry: Optional[Any] = None
|
||||
|
||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||
|
||||
def model_post_init(self, __context) -> None:
|
||||
"""Initialize the BAML registry after the model is created."""
|
||||
raw_options = {
|
||||
"model": self.baml_llm_model,
|
||||
"temperature": self.baml_llm_temperature,
|
||||
"api_key": self.baml_llm_api_key,
|
||||
"base_url": self.baml_llm_endpoint,
|
||||
"api_version": self.baml_llm_api_version,
|
||||
}
|
||||
# Check if BAML is selected as structured output framework but not available
|
||||
if self.structured_output_framework.lower() == "baml" and ClientRegistry is None:
|
||||
raise ImportError(
|
||||
"BAML is selected as structured output framework but not available. "
|
||||
"Please install with 'pip install cognee\"[baml]\"' to use BAML extraction features."
|
||||
)
|
||||
elif self.structured_output_framework.lower() == "baml" and ClientRegistry is not None:
|
||||
self.baml_registry = ClientRegistry()
|
||||
|
||||
# Note: keep the item only when the value is not None or an empty string (they would override baml default values)
|
||||
options = {k: v for k, v in raw_options.items() if v not in (None, "")}
|
||||
self.baml_registry.add_llm_client(
|
||||
name=self.baml_llm_provider, provider=self.baml_llm_provider, options=options
|
||||
)
|
||||
# Sets the primary client
|
||||
self.baml_registry.set_primary(self.baml_llm_provider)
|
||||
raw_options = {
|
||||
"model": self.baml_llm_model,
|
||||
"temperature": self.baml_llm_temperature,
|
||||
"api_key": self.baml_llm_api_key,
|
||||
"base_url": self.baml_llm_endpoint,
|
||||
"api_version": self.baml_llm_api_version,
|
||||
}
|
||||
|
||||
# Note: keep the item only when the value is not None or an empty string (they would override baml default values)
|
||||
options = {k: v for k, v in raw_options.items() if v not in (None, "")}
|
||||
self.baml_registry.add_llm_client(
|
||||
name=self.baml_llm_provider, provider=self.baml_llm_provider, options=options
|
||||
)
|
||||
# Sets the primary client
|
||||
self.baml_registry.set_primary(self.baml_llm_provider)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def ensure_env_vars_for_ollama(self) -> "LLMConfig":
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ def read_query_prompt(prompt_file_name: str, base_directory: str = None):
|
|||
read due to an error.
|
||||
"""
|
||||
logger = get_logger(level=ERROR)
|
||||
|
||||
try:
|
||||
if base_directory is None:
|
||||
base_directory = get_absolute_path("./infrastructure/llm/prompts")
|
||||
|
|
@ -35,8 +36,8 @@ def read_query_prompt(prompt_file_name: str, base_directory: str = None):
|
|||
with open(file_path, "r", encoding="utf-8") as file:
|
||||
return file.read()
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Error: Prompt file not found. Attempted to read: %s {file_path}")
|
||||
logger.error(f"Error: Prompt file not found. Attempted to read: {file_path}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred: %s {e}")
|
||||
logger.error(f"An error occurred: {e}")
|
||||
return None
|
||||
|
|
|
|||
1
cognee/infrastructure/llm/prompts/test.txt
Normal file
1
cognee/infrastructure/llm/prompts/test.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
Respond with: test
|
||||
|
|
@ -53,7 +53,8 @@ async def acreate_structured_output(
|
|||
|
||||
# Transform BAML response to proper pydantic reponse model
|
||||
if response_model is str:
|
||||
return str(result)
|
||||
# Note: when a response model is set to string in python, result is stored in text property in the BAML response model
|
||||
return str(result.text)
|
||||
return response_model.model_validate(result.dict())
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -29,9 +29,6 @@ observe = get_observe()
|
|||
|
||||
logger = get_logger()
|
||||
|
||||
# litellm to drop unsupported params, e.g., reasoning_effort when not supported by the model.
|
||||
litellm.drop_params = True
|
||||
|
||||
|
||||
class OpenAIAdapter(LLMInterface):
|
||||
"""
|
||||
|
|
@ -76,8 +73,10 @@ class OpenAIAdapter(LLMInterface):
|
|||
fallback_api_key: str = None,
|
||||
fallback_endpoint: str = None,
|
||||
):
|
||||
self.aclient = instructor.from_litellm(litellm.acompletion)
|
||||
self.client = instructor.from_litellm(litellm.completion)
|
||||
self.aclient = instructor.from_litellm(
|
||||
litellm.acompletion, mode=instructor.Mode.JSON_SCHEMA
|
||||
)
|
||||
self.client = instructor.from_litellm(litellm.completion, mode=instructor.Mode.JSON_SCHEMA)
|
||||
self.transcription_model = transcription_model
|
||||
self.model = model
|
||||
self.api_key = api_key
|
||||
|
|
@ -135,7 +134,6 @@ class OpenAIAdapter(LLMInterface):
|
|||
api_version=self.api_version,
|
||||
response_model=response_model,
|
||||
max_retries=self.MAX_RETRIES,
|
||||
reasoning_effort="minimal",
|
||||
)
|
||||
except (
|
||||
ContentFilterFinishReasonError,
|
||||
|
|
@ -223,7 +221,6 @@ class OpenAIAdapter(LLMInterface):
|
|||
api_base=self.endpoint,
|
||||
api_version=self.api_version,
|
||||
response_model=response_model,
|
||||
reasoning_effort="minimal",
|
||||
max_retries=self.MAX_RETRIES,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ def get_model_max_completion_tokens(model_name: str):
|
|||
max_completion_tokens = litellm.model_cost[model_name]["max_tokens"]
|
||||
logger.debug(f"Max input tokens for {model_name}: {max_completion_tokens}")
|
||||
else:
|
||||
logger.info("Model not found in LiteLLM's model_cost.")
|
||||
logger.debug("Model not found in LiteLLM's model_cost.")
|
||||
|
||||
return max_completion_tokens
|
||||
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ class CogneeGraph(CogneeAbstractGraph):
|
|||
edge_distances = await vector_engine.search(
|
||||
collection_name="EdgeType_relationship_name",
|
||||
query_vector=query_vector,
|
||||
limit=0,
|
||||
limit=None,
|
||||
)
|
||||
projection_time = time.time() - start_time
|
||||
logger.info(
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
from typing import BinaryIO
|
||||
from contextlib import asynccontextmanager
|
||||
import hashlib
|
||||
from cognee.infrastructure.data.utils.extract_keywords import extract_keywords
|
||||
from .IngestionData import IngestionData
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -42,14 +42,12 @@ class TripletSearchContextProvider(BaseContextProvider):
|
|||
self,
|
||||
entities: List[DataPoint],
|
||||
query: str,
|
||||
user: User,
|
||||
memory_fragment: CogneeGraph,
|
||||
) -> List:
|
||||
"""Creates search tasks for valid entities."""
|
||||
tasks = [
|
||||
brute_force_triplet_search(
|
||||
query=f"{entity_text} {query}",
|
||||
user=user,
|
||||
top_k=self.top_k,
|
||||
collections=self.collections,
|
||||
properties_to_project=self.properties_to_project,
|
||||
|
|
@ -84,9 +82,8 @@ class TripletSearchContextProvider(BaseContextProvider):
|
|||
if not entities:
|
||||
return "No entities provided for context search."
|
||||
|
||||
user = await get_default_user()
|
||||
memory_fragment = await get_memory_fragment(self.properties_to_project)
|
||||
search_tasks = self._get_search_tasks(entities, query, user, memory_fragment)
|
||||
search_tasks = self._get_search_tasks(entities, query, memory_fragment)
|
||||
|
||||
if not search_tasks:
|
||||
return "No valid entities found for context search."
|
||||
|
|
|
|||
|
|
@ -93,11 +93,8 @@ class GraphCompletionRetriever(BaseGraphRetriever):
|
|||
for field_name in index_fields:
|
||||
vector_index_collections.append(f"{subclass.__name__}_{field_name}")
|
||||
|
||||
user = await get_default_user()
|
||||
|
||||
found_triplets = await brute_force_triplet_search(
|
||||
query,
|
||||
user=user,
|
||||
top_k=self.top_k,
|
||||
collections=vector_index_collections or None,
|
||||
node_type=self.node_type,
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ class InsightsRetriever(BaseGraphRetriever):
|
|||
- top_k
|
||||
"""
|
||||
|
||||
def __init__(self, exploration_levels: int = 1, top_k: int = 5):
|
||||
def __init__(self, exploration_levels: int = 1, top_k: Optional[int] = 5):
|
||||
"""Initialize retriever with exploration levels and search parameters."""
|
||||
self.exploration_levels = exploration_levels
|
||||
self.top_k = top_k
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|||
query_vector = (await vector_engine.embedding_engine.embed_text([query]))[0]
|
||||
|
||||
vector_search_results = await vector_engine.search(
|
||||
collection_name="Event_name", query_vector=query_vector, limit=0
|
||||
collection_name="Event_name", query_vector=query_vector, limit=None
|
||||
)
|
||||
|
||||
top_k_events = await self.filter_top_k_events(relevant_events, vector_search_results)
|
||||
|
|
|
|||
|
|
@ -89,7 +89,6 @@ async def get_memory_fragment(
|
|||
|
||||
async def brute_force_triplet_search(
|
||||
query: str,
|
||||
user: User,
|
||||
top_k: int = 5,
|
||||
collections: Optional[List[str]] = None,
|
||||
properties_to_project: Optional[List[str]] = None,
|
||||
|
|
@ -102,7 +101,6 @@ async def brute_force_triplet_search(
|
|||
|
||||
Args:
|
||||
query (str): The search query.
|
||||
user (User): The user performing the search.
|
||||
top_k (int): The number of top results to retrieve.
|
||||
collections (Optional[List[str]]): List of collections to query.
|
||||
properties_to_project (Optional[List[str]]): List of properties to project.
|
||||
|
|
@ -139,12 +137,10 @@ async def brute_force_triplet_search(
|
|||
|
||||
query_vector = (await vector_engine.embedding_engine.embed_text([query]))[0]
|
||||
|
||||
send_telemetry("cognee.brute_force_triplet_search EXECUTION STARTED", user.id)
|
||||
|
||||
async def search_in_collection(collection_name: str):
|
||||
try:
|
||||
return await vector_engine.search(
|
||||
collection_name=collection_name, query_vector=query_vector, limit=0
|
||||
collection_name=collection_name, query_vector=query_vector, limit=None
|
||||
)
|
||||
except CollectionNotFoundError:
|
||||
return []
|
||||
|
|
@ -176,20 +172,14 @@ async def brute_force_triplet_search(
|
|||
|
||||
results = await memory_fragment.calculate_top_triplet_importances(k=top_k)
|
||||
|
||||
send_telemetry("cognee.brute_force_triplet_search EXECUTION COMPLETED", user.id)
|
||||
|
||||
return results
|
||||
|
||||
except CollectionNotFoundError:
|
||||
return []
|
||||
except Exception as error:
|
||||
logger.error(
|
||||
"Error during brute force search for user: %s, query: %s. Error: %s",
|
||||
user.id,
|
||||
"Error during brute force search for query: %s. Error: %s",
|
||||
query,
|
||||
error,
|
||||
)
|
||||
send_telemetry(
|
||||
"cognee.brute_force_triplet_search EXECUTION FAILED", user.id, {"error": str(error)}
|
||||
)
|
||||
raise error
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ async def no_access_control_search(
|
|||
[get_completion, get_context] = search_tools
|
||||
|
||||
if only_context:
|
||||
return await get_context(query_text)
|
||||
return None, await get_context(query_text), []
|
||||
|
||||
context = await get_context(query_text)
|
||||
result = await get_completion(query_text, context)
|
||||
|
|
|
|||
|
|
@ -143,20 +143,35 @@ async def search(
|
|||
context = prepared_search_results["context"]
|
||||
datasets = prepared_search_results["datasets"]
|
||||
|
||||
return_value.append(
|
||||
{
|
||||
"search_result": [result] if result else None,
|
||||
"dataset_id": datasets[0].id,
|
||||
"dataset_name": datasets[0].name,
|
||||
"graphs": graphs,
|
||||
}
|
||||
)
|
||||
if only_context:
|
||||
return_value.append(
|
||||
{
|
||||
"search_result": [context] if context else None,
|
||||
"dataset_id": datasets[0].id,
|
||||
"dataset_name": datasets[0].name,
|
||||
"graphs": graphs,
|
||||
}
|
||||
)
|
||||
else:
|
||||
return_value.append(
|
||||
{
|
||||
"search_result": [result] if result else None,
|
||||
"dataset_id": datasets[0].id,
|
||||
"dataset_name": datasets[0].name,
|
||||
"graphs": graphs,
|
||||
}
|
||||
)
|
||||
return return_value
|
||||
else:
|
||||
return_value = []
|
||||
for search_result in search_results:
|
||||
result, context, datasets = search_result
|
||||
return_value.append(result)
|
||||
if only_context:
|
||||
for search_result in search_results:
|
||||
prepared_search_results = await prepare_search_result(search_result)
|
||||
return_value.append(prepared_search_results["context"])
|
||||
else:
|
||||
for search_result in search_results:
|
||||
result, context, datasets = search_result
|
||||
return_value.append(result)
|
||||
# For maintaining backwards compatibility
|
||||
if len(return_value) == 1 and isinstance(return_value[0], list):
|
||||
return return_value[0]
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ from cognee.modules.users.models import (
|
|||
async def create_role(
|
||||
role_name: str,
|
||||
owner_id: UUID,
|
||||
):
|
||||
) -> UUID:
|
||||
"""
|
||||
Create a new role with the given name, if the request owner with the given id
|
||||
has the necessary permission.
|
||||
|
|
@ -45,3 +45,4 @@ async def create_role(
|
|||
|
||||
await session.commit()
|
||||
await session.refresh(role)
|
||||
return role.id
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from cognee.modules.users.models import Tenant
|
|||
from cognee.modules.users.methods import get_user
|
||||
|
||||
|
||||
async def create_tenant(tenant_name: str, user_id: UUID):
|
||||
async def create_tenant(tenant_name: str, user_id: UUID) -> UUID:
|
||||
"""
|
||||
Create a new tenant with the given name, for the user with the given id.
|
||||
This user is the owner of the tenant.
|
||||
|
|
@ -34,5 +34,6 @@ async def create_tenant(tenant_name: str, user_id: UUID):
|
|||
user.tenant_id = tenant.id
|
||||
await session.merge(user)
|
||||
await session.commit()
|
||||
return tenant.id
|
||||
except IntegrityError:
|
||||
raise EntityAlreadyExistsError(message="Tenant already exists.")
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import os
|
||||
import json
|
||||
import networkx
|
||||
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.infrastructure.files.storage.LocalFileStorage import LocalFileStorage
|
||||
|
|
@ -9,6 +8,8 @@ logger = get_logger()
|
|||
|
||||
|
||||
async def cognee_network_visualization(graph_data, destination_file_path: str = None):
|
||||
import networkx
|
||||
|
||||
nodes_data, edges_data = graph_data
|
||||
|
||||
G = networkx.DiGraph()
|
||||
|
|
@ -22,6 +23,9 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
|
|||
"TableRow": "#f47710",
|
||||
"TableType": "#6510f4",
|
||||
"ColumnValue": "#13613a",
|
||||
"SchemaTable": "#f47710",
|
||||
"DatabaseSchema": "#6510f4",
|
||||
"SchemaRelationship": "#13613a",
|
||||
"default": "#D3D3D3",
|
||||
}
|
||||
|
||||
|
|
@ -104,7 +108,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
|
|||
.nodes circle { stroke: white; stroke-width: 0.5px; filter: drop-shadow(0 0 5px rgba(255,255,255,0.3)); }
|
||||
.node-label { font-size: 5px; font-weight: bold; fill: white; text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
|
||||
.edge-label { font-size: 3px; fill: rgba(255, 255, 255, 0.7); text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
|
||||
|
||||
|
||||
.tooltip {
|
||||
position: absolute;
|
||||
text-align: left;
|
||||
|
|
@ -166,7 +170,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
|
|||
// Create tooltip content for edge
|
||||
var content = "<strong>Edge Information</strong><br/>";
|
||||
content += "Relationship: " + d.relation + "<br/>";
|
||||
|
||||
|
||||
// Show all weights
|
||||
if (d.all_weights && Object.keys(d.all_weights).length > 0) {
|
||||
content += "<strong>Weights:</strong><br/>";
|
||||
|
|
@ -176,23 +180,23 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
|
|||
} else if (d.weight !== null && d.weight !== undefined) {
|
||||
content += "Weight: " + d.weight + "<br/>";
|
||||
}
|
||||
|
||||
|
||||
if (d.relationship_type) {
|
||||
content += "Type: " + d.relationship_type + "<br/>";
|
||||
}
|
||||
|
||||
|
||||
// Add other edge properties
|
||||
if (d.edge_info) {
|
||||
Object.keys(d.edge_info).forEach(function(key) {
|
||||
if (key !== 'weight' && key !== 'weights' && key !== 'relationship_type' &&
|
||||
key !== 'source_node_id' && key !== 'target_node_id' &&
|
||||
key !== 'relationship_name' && key !== 'updated_at' &&
|
||||
if (key !== 'weight' && key !== 'weights' && key !== 'relationship_type' &&
|
||||
key !== 'source_node_id' && key !== 'target_node_id' &&
|
||||
key !== 'relationship_name' && key !== 'updated_at' &&
|
||||
!key.startsWith('weight_')) {
|
||||
content += key + ": " + d.edge_info[key] + "<br/>";
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
tooltip.html(content)
|
||||
.style("left", (d3.event.pageX + 10) + "px")
|
||||
.style("top", (d3.event.pageY - 10) + "px")
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ import os
|
|||
import ssl
|
||||
import requests
|
||||
from datetime import datetime, timezone
|
||||
import matplotlib.pyplot as plt
|
||||
import http.server
|
||||
import socketserver
|
||||
from threading import Thread
|
||||
|
|
@ -30,37 +29,6 @@ def create_secure_ssl_context() -> ssl.SSLContext:
|
|||
return ssl.create_default_context()
|
||||
|
||||
|
||||
def get_entities(tagged_tokens):
|
||||
import nltk
|
||||
|
||||
nltk.download("maxent_ne_chunker", quiet=True)
|
||||
|
||||
from nltk.chunk import ne_chunk
|
||||
|
||||
return ne_chunk(tagged_tokens)
|
||||
|
||||
|
||||
def extract_pos_tags(sentence):
|
||||
"""Extract Part-of-Speech (POS) tags for words in a sentence."""
|
||||
import nltk
|
||||
|
||||
# Ensure that the necessary NLTK resources are downloaded
|
||||
nltk.download("words", quiet=True)
|
||||
nltk.download("punkt", quiet=True)
|
||||
nltk.download("averaged_perceptron_tagger", quiet=True)
|
||||
|
||||
from nltk.tag import pos_tag
|
||||
from nltk.tokenize import word_tokenize
|
||||
|
||||
# Tokenize the sentence into words
|
||||
tokens = word_tokenize(sentence)
|
||||
|
||||
# Tag each word with its corresponding POS tag
|
||||
pos_tags = pos_tag(tokens)
|
||||
|
||||
return pos_tags
|
||||
|
||||
|
||||
def get_anonymous_id():
|
||||
"""Creates or reads a anonymous user id"""
|
||||
tracking_id = os.getenv("TRACKING_ID", None)
|
||||
|
|
|
|||
|
|
@ -1,309 +0,0 @@
|
|||
# PROPOSED TO BE DEPRECATED
|
||||
|
||||
"""This module contains the OntologyEngine class which is responsible for adding graph ontology from a JSON or CSV file."""
|
||||
|
||||
import csv
|
||||
import json
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from datetime import datetime, timezone
|
||||
from fastapi import status
|
||||
from typing import Any, Dict, List, Optional, Union, Type
|
||||
|
||||
import aiofiles
|
||||
import pandas as pd
|
||||
from pydantic import BaseModel
|
||||
|
||||
from cognee.modules.graph.exceptions import EntityNotFoundError
|
||||
from cognee.modules.ingestion.exceptions import IngestionError
|
||||
|
||||
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
||||
from cognee.infrastructure.data.chunking.get_chunking_engine import get_chunk_engine
|
||||
from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine
|
||||
from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file
|
||||
from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException
|
||||
from cognee.modules.data.methods.add_model_class_to_graph import (
|
||||
add_model_class_to_graph,
|
||||
)
|
||||
from cognee.tasks.graph.models import NodeModel, GraphOntology
|
||||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.modules.engine.utils import generate_node_id, generate_node_name
|
||||
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
||||
|
||||
logger = get_logger("task:infer_data_ontology")
|
||||
|
||||
|
||||
async def extract_ontology(content: str, response_model: Type[BaseModel]):
|
||||
"""
|
||||
Extracts structured ontology from the provided content using a pre-defined LLM client.
|
||||
|
||||
This asynchronous function retrieves a system prompt from a file and utilizes an LLM
|
||||
client to create a structured output based on the input content and specified response
|
||||
model.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
|
||||
- content (str): The content from which to extract the ontology.
|
||||
- response_model (Type[BaseModel]): The model that defines the structure of the
|
||||
output ontology.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
|
||||
The structured ontology extracted from the content.
|
||||
"""
|
||||
|
||||
system_prompt = LLMGateway.read_query_prompt("extract_ontology.txt")
|
||||
|
||||
ontology = await LLMGateway.acreate_structured_output(content, system_prompt, response_model)
|
||||
|
||||
return ontology
|
||||
|
||||
|
||||
class OntologyEngine:
|
||||
"""
|
||||
Manage ontology data and operations for graph structures, providing methods for data
|
||||
loading, flattening models, and adding ontological relationships to a graph database.
|
||||
|
||||
Public methods:
|
||||
|
||||
- flatten_model
|
||||
- recursive_flatten
|
||||
- load_data
|
||||
- add_graph_ontology
|
||||
"""
|
||||
|
||||
async def flatten_model(
|
||||
self, model: NodeModel, parent_id: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Flatten the model to a dictionary including optional parent ID and relationship details
|
||||
if available.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
|
||||
- model (NodeModel): The NodeModel instance to flatten.
|
||||
- parent_id (Optional[str]): An optional ID of the parent node for hierarchical
|
||||
purposes. (default None)
|
||||
|
||||
Returns:
|
||||
--------
|
||||
|
||||
- Dict[str, Any]: A dictionary representation of the model with flattened
|
||||
attributes.
|
||||
"""
|
||||
result = model.dict()
|
||||
result["parent_id"] = parent_id
|
||||
if model.default_relationship:
|
||||
result.update(
|
||||
{
|
||||
"relationship_type": model.default_relationship.type,
|
||||
"relationship_source": model.default_relationship.source,
|
||||
"relationship_target": model.default_relationship.target,
|
||||
}
|
||||
)
|
||||
return result
|
||||
|
||||
async def recursive_flatten(
|
||||
self, items: Union[List[Dict[str, Any]], Dict[str, Any]], parent_id: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Recursively flatten a hierarchical structure of models into a flat list of dictionaries.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
|
||||
- items (Union[List[Dict[str, Any]], Dict[str, Any]]): A list or dictionary
|
||||
containing models to flatten.
|
||||
- parent_id (Optional[str]): An optional ID of the parent node to maintain hierarchy
|
||||
during flattening. (default None)
|
||||
|
||||
Returns:
|
||||
--------
|
||||
|
||||
- List[Dict[str, Any]]: A flat list of dictionaries representing the hierarchical
|
||||
model structure.
|
||||
"""
|
||||
flat_list = []
|
||||
|
||||
if isinstance(items, list):
|
||||
for item in items:
|
||||
flat_list.extend(await self.recursive_flatten(item, parent_id))
|
||||
elif isinstance(items, dict):
|
||||
model = NodeModel.model_validate(items)
|
||||
flat_list.append(await self.flatten_model(model, parent_id))
|
||||
for child in model.children:
|
||||
flat_list.extend(await self.recursive_flatten(child, model.node_id))
|
||||
return flat_list
|
||||
|
||||
async def load_data(self, file_path: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
|
||||
"""
|
||||
Load data from a specified JSON or CSV file and return it in a structured format.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
|
||||
- file_path (str): The path to the file to load data from.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
|
||||
- Union[List[Dict[str, Any]], Dict[str, Any]]: Parsed data from the file as either a
|
||||
list of dictionaries or a single dictionary depending on content type.
|
||||
"""
|
||||
try:
|
||||
if file_path.endswith(".json"):
|
||||
async with aiofiles.open(file_path, mode="r") as f:
|
||||
data = await f.read()
|
||||
return json.loads(data)
|
||||
elif file_path.endswith(".csv"):
|
||||
async with aiofiles.open(file_path, mode="r") as f:
|
||||
content = await f.read()
|
||||
reader = csv.DictReader(content.splitlines())
|
||||
return list(reader)
|
||||
else:
|
||||
raise IngestionError(message="Unsupported file format")
|
||||
except Exception as e:
|
||||
raise IngestionError(
|
||||
message=f"Failed to load data from {file_path}: {e}",
|
||||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||
)
|
||||
|
||||
async def add_graph_ontology(self, file_path: str = None, documents: list = None):
|
||||
"""
|
||||
Add graph ontology from a JSON or CSV file, or infer relationships from provided
|
||||
document content. Raise exceptions for invalid file types or missing entities.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
|
||||
- file_path (str): Optional path to a file containing data to be loaded. (default
|
||||
None)
|
||||
- documents (list): Optional list of document objects for content extraction if no
|
||||
file path is provided. (default None)
|
||||
"""
|
||||
if file_path is None:
|
||||
initial_chunks_and_ids = []
|
||||
|
||||
chunk_config = get_chunk_config()
|
||||
chunk_engine = get_chunk_engine()
|
||||
chunk_strategy = chunk_config.chunk_strategy
|
||||
|
||||
for base_file in documents:
|
||||
with open(base_file.raw_data_location, "rb") as file:
|
||||
try:
|
||||
file_type = guess_file_type(file)
|
||||
text = extract_text_from_file(file, file_type)
|
||||
|
||||
subchunks, chunks_with_ids = chunk_engine.chunk_data(
|
||||
chunk_strategy,
|
||||
text,
|
||||
chunk_config.chunk_size,
|
||||
chunk_config.chunk_overlap,
|
||||
)
|
||||
|
||||
if chunks_with_ids[0][0] == 1:
|
||||
initial_chunks_and_ids.append({base_file.id: chunks_with_ids})
|
||||
|
||||
except FileTypeException:
|
||||
logger.warning(
|
||||
"File (%s) has an unknown file type. We are skipping it.", file["id"]
|
||||
)
|
||||
|
||||
ontology = await extract_ontology(str(initial_chunks_and_ids), GraphOntology)
|
||||
graph_client = await get_graph_engine()
|
||||
|
||||
await graph_client.add_nodes(
|
||||
[
|
||||
(
|
||||
node.id,
|
||||
dict(
|
||||
uuid=generate_node_id(node.id),
|
||||
name=generate_node_name(node.name),
|
||||
type=generate_node_id(node.id),
|
||||
description=node.description,
|
||||
updated_at=datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"),
|
||||
),
|
||||
)
|
||||
for node in ontology.nodes
|
||||
]
|
||||
)
|
||||
|
||||
await graph_client.add_edges(
|
||||
(
|
||||
generate_node_id(edge.source_id),
|
||||
generate_node_id(edge.target_id),
|
||||
edge.relationship_type,
|
||||
dict(
|
||||
source_node_id=generate_node_id(edge.source_id),
|
||||
target_node_id=generate_node_id(edge.target_id),
|
||||
relationship_name=edge.relationship_type,
|
||||
updated_at=datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S"),
|
||||
),
|
||||
)
|
||||
for edge in ontology.edges
|
||||
)
|
||||
|
||||
else:
|
||||
dataset_level_information = documents[0][1]
|
||||
|
||||
# Extract the list of valid IDs from the explanations
|
||||
valid_ids = {item["id"] for item in dataset_level_information}
|
||||
try:
|
||||
data = await self.load_data(file_path)
|
||||
flt_ontology = await self.recursive_flatten(data)
|
||||
df = pd.DataFrame(flt_ontology)
|
||||
graph_client = await get_graph_engine()
|
||||
|
||||
for _, row in df.iterrows():
|
||||
node_data = row.to_dict()
|
||||
node_id = node_data.pop("node_id", None)
|
||||
if node_id in valid_ids:
|
||||
await graph_client.add_node(node_id, node_data)
|
||||
if node_id not in valid_ids:
|
||||
raise EntityNotFoundError(
|
||||
message=f"Node ID {node_id} not found in the dataset"
|
||||
)
|
||||
if pd.notna(row.get("relationship_source")) and pd.notna(
|
||||
row.get("relationship_target")
|
||||
):
|
||||
await graph_client.add_edge(
|
||||
row["relationship_source"],
|
||||
row["relationship_target"],
|
||||
relationship_name=row["relationship_type"],
|
||||
edge_properties={
|
||||
"source_node_id": row["relationship_source"],
|
||||
"target_node_id": row["relationship_target"],
|
||||
"relationship_name": row["relationship_type"],
|
||||
"updated_at": datetime.now(timezone.utc).strftime(
|
||||
"%Y-%m-%d %H:%M:%S"
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
return
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to add graph ontology from {file_path}: {e}") from e
|
||||
|
||||
|
||||
async def infer_data_ontology(documents, ontology_model=KnowledgeGraph, root_node_id=None):
|
||||
"""
|
||||
Infer data ontology from provided documents and optionally add it to a graph.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
|
||||
- documents: The documents from which to infer the ontology.
|
||||
- ontology_model: The ontology model to use for the inference, defaults to
|
||||
KnowledgeGraph. (default KnowledgeGraph)
|
||||
- root_node_id: An optional root node identifier for the ontology. (default None)
|
||||
"""
|
||||
if ontology_model == KnowledgeGraph:
|
||||
ontology_engine = OntologyEngine()
|
||||
root_node_id = await ontology_engine.add_graph_ontology(documents=documents)
|
||||
else:
|
||||
graph_engine = await get_graph_engine()
|
||||
await add_model_class_to_graph(ontology_model, graph_engine)
|
||||
|
||||
yield (documents, root_node_id)
|
||||
|
|
@ -4,16 +4,20 @@ from sqlalchemy import text
|
|||
from cognee.infrastructure.databases.relational.get_migration_relational_engine import (
|
||||
get_migration_relational_engine,
|
||||
)
|
||||
from cognee.infrastructure.databases.relational.config import get_migration_config
|
||||
|
||||
from cognee.tasks.storage.index_data_points import index_data_points
|
||||
from cognee.tasks.storage.index_graph_edges import index_graph_edges
|
||||
from cognee.tasks.schema.ingest_database_schema import ingest_database_schema
|
||||
|
||||
from cognee.modules.engine.models import TableRow, TableType, ColumnValue
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def migrate_relational_database(graph_db, schema, migrate_column_data=True):
|
||||
async def migrate_relational_database(
|
||||
graph_db, schema, migrate_column_data=True, schema_only=False
|
||||
):
|
||||
"""
|
||||
Migrates data from a relational database into a graph database.
|
||||
|
||||
|
|
@ -26,11 +30,133 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True
|
|||
|
||||
Both TableType and TableRow inherit from DataPoint to maintain consistency with Cognee data model.
|
||||
"""
|
||||
# Create a mapping of node_id to node objects for referencing in edge creation
|
||||
if schema_only:
|
||||
node_mapping, edge_mapping = await schema_only_ingestion(schema)
|
||||
|
||||
else:
|
||||
node_mapping, edge_mapping = await complete_database_ingestion(schema, migrate_column_data)
|
||||
|
||||
def _remove_duplicate_edges(edge_mapping):
|
||||
seen = set()
|
||||
unique_original_shape = []
|
||||
|
||||
for tup in edge_mapping:
|
||||
# We go through all the tuples in the edge_mapping and we only add unique tuples to the list
|
||||
# To eliminate duplicate edges.
|
||||
source_id, target_id, rel_name, rel_dict = tup
|
||||
# We need to convert the dictionary to a frozenset to be able to compare values for it
|
||||
rel_dict_hashable = frozenset(sorted(rel_dict.items()))
|
||||
hashable_tup = (source_id, target_id, rel_name, rel_dict_hashable)
|
||||
|
||||
# We use the seen set to keep track of unique edges
|
||||
if hashable_tup not in seen:
|
||||
# A list that has frozensets elements instead of dictionaries is needed to be able to compare values
|
||||
seen.add(hashable_tup)
|
||||
# append the original tuple shape (with the dictionary) if it's the first time we see it
|
||||
unique_original_shape.append(tup)
|
||||
|
||||
return unique_original_shape
|
||||
|
||||
# Add all nodes and edges to the graph
|
||||
# NOTE: Nodes and edges have to be added in batch for speed optimization, Especially for NetworkX.
|
||||
# If we'd create nodes and add them to graph in real time the process would take too long.
|
||||
# Every node and edge added to NetworkX is saved to file which is very slow when not done in batches.
|
||||
await graph_db.add_nodes(list(node_mapping.values()))
|
||||
await graph_db.add_edges(_remove_duplicate_edges(edge_mapping))
|
||||
|
||||
# In these steps we calculate the vector embeddings of our nodes and edges and save them to vector database
|
||||
# Cognee uses this information to perform searches on the knowledge graph.
|
||||
await index_data_points(list(node_mapping.values()))
|
||||
await index_graph_edges()
|
||||
|
||||
logger.info("Data successfully migrated from relational database to desired graph database.")
|
||||
return await graph_db.get_graph_data()
|
||||
|
||||
|
||||
async def schema_only_ingestion(schema):
|
||||
node_mapping = {}
|
||||
edge_mapping = []
|
||||
|
||||
# Calling the ingest_database_schema function to return DataPoint subclasses
|
||||
result = await ingest_database_schema(
|
||||
schema=schema,
|
||||
max_sample_rows=5,
|
||||
)
|
||||
database_schema = result["database_schema"]
|
||||
schema_tables = result["schema_tables"]
|
||||
schema_relationships = result["relationships"]
|
||||
database_node_id = database_schema.id
|
||||
node_mapping[database_node_id] = database_schema
|
||||
for table in schema_tables:
|
||||
table_node_id = table.id
|
||||
# Add TableSchema Datapoint as a node.
|
||||
node_mapping[table_node_id] = table
|
||||
edge_mapping.append(
|
||||
(
|
||||
table_node_id,
|
||||
database_node_id,
|
||||
"is_part_of",
|
||||
dict(
|
||||
source_node_id=table_node_id,
|
||||
target_node_id=database_node_id,
|
||||
relationship_name="is_part_of",
|
||||
),
|
||||
)
|
||||
)
|
||||
table_name_to_id = {t.name: t.id for t in schema_tables}
|
||||
for rel in schema_relationships:
|
||||
source_table_id = table_name_to_id.get(rel.source_table)
|
||||
target_table_id = table_name_to_id.get(rel.target_table)
|
||||
|
||||
relationship_id = rel.id
|
||||
|
||||
# Add RelationshipTable DataPoint as a node.
|
||||
node_mapping[relationship_id] = rel
|
||||
edge_mapping.append(
|
||||
(
|
||||
source_table_id,
|
||||
relationship_id,
|
||||
"has_relationship",
|
||||
dict(
|
||||
source_node_id=source_table_id,
|
||||
target_node_id=relationship_id,
|
||||
relationship_name=rel.relationship_type,
|
||||
),
|
||||
)
|
||||
)
|
||||
edge_mapping.append(
|
||||
(
|
||||
relationship_id,
|
||||
target_table_id,
|
||||
"has_relationship",
|
||||
dict(
|
||||
source_node_id=relationship_id,
|
||||
target_node_id=target_table_id,
|
||||
relationship_name=rel.relationship_type,
|
||||
),
|
||||
)
|
||||
)
|
||||
edge_mapping.append(
|
||||
(
|
||||
source_table_id,
|
||||
target_table_id,
|
||||
rel.relationship_type,
|
||||
dict(
|
||||
source_node_id=source_table_id,
|
||||
target_node_id=target_table_id,
|
||||
relationship_name=rel.relationship_type,
|
||||
),
|
||||
)
|
||||
)
|
||||
return node_mapping, edge_mapping
|
||||
|
||||
|
||||
async def complete_database_ingestion(schema, migrate_column_data):
|
||||
engine = get_migration_relational_engine()
|
||||
# Create a mapping of node_id to node objects for referencing in edge creation
|
||||
node_mapping = {}
|
||||
edge_mapping = []
|
||||
|
||||
async with engine.engine.begin() as cursor:
|
||||
# First, create table type nodes for all tables
|
||||
for table_name, details in schema.items():
|
||||
|
|
@ -38,7 +164,7 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True
|
|||
table_node = TableType(
|
||||
id=uuid5(NAMESPACE_OID, name=table_name),
|
||||
name=table_name,
|
||||
description=f"Table: {table_name}",
|
||||
description=f'Relational database table with the following name: "{table_name}".',
|
||||
)
|
||||
|
||||
# Add TableType node to mapping ( node will be added to the graph later based on this mapping )
|
||||
|
|
@ -75,7 +201,7 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True
|
|||
name=node_id,
|
||||
is_a=table_node,
|
||||
properties=str(row_properties),
|
||||
description=f"Row in {table_name} with {primary_key_col}={primary_key_value}",
|
||||
description=f'Row in relational database table from the table with the name: "{table_name}" with the following row data {str(row_properties)} where the dictionary key value is the column name and the value is the column value. This row has the id of: {node_id}',
|
||||
)
|
||||
|
||||
# Store the node object in our mapping
|
||||
|
|
@ -113,7 +239,7 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True
|
|||
id=uuid5(NAMESPACE_OID, name=column_node_id),
|
||||
name=column_node_id,
|
||||
properties=f"{key} {value} {table_name}",
|
||||
description=f"Column name={key} and value={value} from column from table={table_name}",
|
||||
description=f"column from relational database table={table_name}. Column name={key} and value={value}. The value of the column is related to the following row with this id: {row_node.id}. This column has the following ID: {column_node_id}",
|
||||
)
|
||||
node_mapping[column_node_id] = column_node
|
||||
|
||||
|
|
@ -180,39 +306,4 @@ async def migrate_relational_database(graph_db, schema, migrate_column_data=True
|
|||
),
|
||||
)
|
||||
)
|
||||
|
||||
def _remove_duplicate_edges(edge_mapping):
|
||||
seen = set()
|
||||
unique_original_shape = []
|
||||
|
||||
for tup in edge_mapping:
|
||||
# We go through all the tuples in the edge_mapping and we only add unique tuples to the list
|
||||
# To eliminate duplicate edges.
|
||||
source_id, target_id, rel_name, rel_dict = tup
|
||||
# We need to convert the dictionary to a frozenset to be able to compare values for it
|
||||
rel_dict_hashable = frozenset(sorted(rel_dict.items()))
|
||||
hashable_tup = (source_id, target_id, rel_name, rel_dict_hashable)
|
||||
|
||||
# We use the seen set to keep track of unique edges
|
||||
if hashable_tup not in seen:
|
||||
# A list that has frozensets elements instead of dictionaries is needed to be able to compare values
|
||||
seen.add(hashable_tup)
|
||||
# append the original tuple shape (with the dictionary) if it's the first time we see it
|
||||
unique_original_shape.append(tup)
|
||||
|
||||
return unique_original_shape
|
||||
|
||||
# Add all nodes and edges to the graph
|
||||
# NOTE: Nodes and edges have to be added in batch for speed optimization, Especially for NetworkX.
|
||||
# If we'd create nodes and add them to graph in real time the process would take too long.
|
||||
# Every node and edge added to NetworkX is saved to file which is very slow when not done in batches.
|
||||
await graph_db.add_nodes(list(node_mapping.values()))
|
||||
await graph_db.add_edges(_remove_duplicate_edges(edge_mapping))
|
||||
|
||||
# In these steps we calculate the vector embeddings of our nodes and edges and save them to vector database
|
||||
# Cognee uses this information to perform searches on the knowledge graph.
|
||||
await index_data_points(list(node_mapping.values()))
|
||||
await index_graph_edges()
|
||||
|
||||
logger.info("Data successfully migrated from relational database to desired graph database.")
|
||||
return await graph_db.get_graph_data()
|
||||
return node_mapping, edge_mapping
|
||||
|
|
|
|||
|
|
@ -32,7 +32,10 @@ async def resolve_data_directories(
|
|||
import s3fs
|
||||
|
||||
fs = s3fs.S3FileSystem(
|
||||
key=s3_config.aws_access_key_id, secret=s3_config.aws_secret_access_key, anon=False
|
||||
key=s3_config.aws_access_key_id,
|
||||
secret=s3_config.aws_secret_access_key,
|
||||
token=s3_config.aws_session_token,
|
||||
anon=False,
|
||||
)
|
||||
|
||||
for item in data:
|
||||
|
|
|
|||
134
cognee/tasks/schema/ingest_database_schema.py
Normal file
134
cognee/tasks/schema/ingest_database_schema.py
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
import json
|
||||
from typing import List, Dict
|
||||
from uuid import uuid5, NAMESPACE_OID
|
||||
from cognee.infrastructure.engine.models.DataPoint import DataPoint
|
||||
from sqlalchemy import text
|
||||
from cognee.tasks.schema.models import DatabaseSchema, SchemaTable, SchemaRelationship
|
||||
from cognee.infrastructure.databases.relational.get_migration_relational_engine import (
|
||||
get_migration_relational_engine,
|
||||
)
|
||||
from cognee.infrastructure.databases.relational.config import get_migration_config
|
||||
from datetime import datetime, timezone
|
||||
|
||||
|
||||
async def ingest_database_schema(
|
||||
schema,
|
||||
max_sample_rows: int = 0,
|
||||
) -> Dict[str, List[DataPoint] | DataPoint]:
|
||||
"""
|
||||
Extract database schema metadata (optionally with sample data) and return DataPoint models for graph construction.
|
||||
|
||||
Args:
|
||||
schema: Database schema
|
||||
max_sample_rows: Maximum sample rows per table (0 means no sampling)
|
||||
|
||||
Returns:
|
||||
Dict with keys:
|
||||
"database_schema": DatabaseSchema
|
||||
"schema_tables": List[SchemaTable]
|
||||
"relationships": List[SchemaRelationship]
|
||||
"""
|
||||
|
||||
tables = {}
|
||||
sample_data = {}
|
||||
schema_tables = []
|
||||
schema_relationships = []
|
||||
|
||||
migration_config = get_migration_config()
|
||||
engine = get_migration_relational_engine()
|
||||
qi = engine.engine.dialect.identifier_preparer.quote
|
||||
try:
|
||||
max_sample_rows = max(0, int(max_sample_rows))
|
||||
except (TypeError, ValueError):
|
||||
max_sample_rows = 0
|
||||
|
||||
def qname(name: str):
|
||||
split_name = name.split(".")
|
||||
return ".".join(qi(p) for p in split_name)
|
||||
|
||||
async with engine.engine.begin() as cursor:
|
||||
for table_name, details in schema.items():
|
||||
tn = qname(table_name)
|
||||
if max_sample_rows > 0:
|
||||
rows_result = await cursor.execute(
|
||||
text(f"SELECT * FROM {tn} LIMIT :limit;"), # noqa: S608 - tn is fully quoted
|
||||
{"limit": max_sample_rows},
|
||||
)
|
||||
rows = [dict(r) for r in rows_result.mappings().all()]
|
||||
else:
|
||||
rows = []
|
||||
|
||||
if engine.engine.dialect.name == "postgresql":
|
||||
if "." in table_name:
|
||||
schema_part, table_part = table_name.split(".", 1)
|
||||
else:
|
||||
schema_part, table_part = "public", table_name
|
||||
estimate = await cursor.execute(
|
||||
text(
|
||||
"SELECT reltuples::bigint AS estimate "
|
||||
"FROM pg_class c "
|
||||
"JOIN pg_namespace n ON n.oid = c.relnamespace "
|
||||
"WHERE n.nspname = :schema AND c.relname = :table"
|
||||
),
|
||||
{"schema": schema_part, "table": table_part},
|
||||
)
|
||||
row_count_estimate = estimate.scalar() or 0
|
||||
else:
|
||||
count_result = await cursor.execute(text(f"SELECT COUNT(*) FROM {tn};")) # noqa: S608 - tn is fully quoted
|
||||
row_count_estimate = count_result.scalar()
|
||||
|
||||
schema_table = SchemaTable(
|
||||
id=uuid5(NAMESPACE_OID, name=f"{table_name}"),
|
||||
name=table_name,
|
||||
columns=json.dumps(details["columns"], default=str),
|
||||
primary_key=details.get("primary_key"),
|
||||
foreign_keys=json.dumps(details.get("foreign_keys", []), default=str),
|
||||
sample_rows=json.dumps(rows, default=str),
|
||||
row_count_estimate=row_count_estimate,
|
||||
description=f"Relational database table with '{table_name}' with {len(details['columns'])} columns and approx. {row_count_estimate} rows."
|
||||
f"Here are the columns this table contains: {details['columns']}"
|
||||
f"Here are a few sample_rows to show the contents of the table: {rows}"
|
||||
f"Table is part of the database: {migration_config.migration_db_name}",
|
||||
)
|
||||
schema_tables.append(schema_table)
|
||||
tables[table_name] = details
|
||||
sample_data[table_name] = rows
|
||||
|
||||
for fk in details.get("foreign_keys", []):
|
||||
ref_table_fq = fk["ref_table"]
|
||||
if "." not in ref_table_fq and "." in table_name:
|
||||
ref_table_fq = f"{table_name.split('.', 1)[0]}.{ref_table_fq}"
|
||||
|
||||
relationship_name = (
|
||||
f"{table_name}:{fk['column']}->{ref_table_fq}:{fk['ref_column']}"
|
||||
)
|
||||
relationship = SchemaRelationship(
|
||||
id=uuid5(NAMESPACE_OID, name=relationship_name),
|
||||
name=relationship_name,
|
||||
source_table=table_name,
|
||||
target_table=ref_table_fq,
|
||||
relationship_type="foreign_key",
|
||||
source_column=fk["column"],
|
||||
target_column=fk["ref_column"],
|
||||
description=f"Relational database table foreign key relationship between: {table_name}.{fk['column']} → {ref_table_fq}.{fk['ref_column']}"
|
||||
f"This foreing key relationship between table columns is a part of the following database: {migration_config.migration_db_name}",
|
||||
)
|
||||
schema_relationships.append(relationship)
|
||||
|
||||
id_str = f"{migration_config.migration_db_provider}:{migration_config.migration_db_name}"
|
||||
database_schema = DatabaseSchema(
|
||||
id=uuid5(NAMESPACE_OID, name=id_str),
|
||||
name=migration_config.migration_db_name,
|
||||
database_type=migration_config.migration_db_provider,
|
||||
tables=json.dumps(tables, default=str),
|
||||
sample_data=json.dumps(sample_data, default=str),
|
||||
description=f"Database schema containing {len(schema_tables)} tables and {len(schema_relationships)} relationships. "
|
||||
f"The database type is {migration_config.migration_db_provider}."
|
||||
f"The database contains the following tables: {tables}",
|
||||
)
|
||||
|
||||
return {
|
||||
"database_schema": database_schema,
|
||||
"schema_tables": schema_tables,
|
||||
"relationships": schema_relationships,
|
||||
}
|
||||
40
cognee/tasks/schema/models.py
Normal file
40
cognee/tasks/schema/models.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
from cognee.infrastructure.engine.models.DataPoint import DataPoint
|
||||
from typing import List, Dict, Optional
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class DatabaseSchema(DataPoint):
|
||||
"""Represents a complete database schema with sample data"""
|
||||
|
||||
name: str
|
||||
database_type: str # sqlite, postgres, etc.
|
||||
tables: str # Reuse existing schema format from SqlAlchemyAdapter
|
||||
sample_data: str # Limited examples per table
|
||||
description: str
|
||||
metadata: dict = {"index_fields": ["description", "name"]}
|
||||
|
||||
|
||||
class SchemaTable(DataPoint):
|
||||
"""Represents an individual table schema with relationships"""
|
||||
|
||||
name: str
|
||||
columns: str # Column definitions with types
|
||||
primary_key: Optional[str]
|
||||
foreign_keys: str # Foreign key relationships
|
||||
sample_rows: str # Max 3-5 example rows
|
||||
row_count_estimate: Optional[int] # Actual table size
|
||||
description: str
|
||||
metadata: dict = {"index_fields": ["description", "name"]}
|
||||
|
||||
|
||||
class SchemaRelationship(DataPoint):
|
||||
"""Represents relationships between tables"""
|
||||
|
||||
name: str
|
||||
source_table: str
|
||||
target_table: str
|
||||
relationship_type: str # "foreign_key", "one_to_many", etc.
|
||||
source_column: str
|
||||
target_column: str
|
||||
description: str
|
||||
metadata: dict = {"index_fields": ["description", "name"]}
|
||||
|
|
@ -67,6 +67,44 @@ async def test_getting_of_documents(dataset_name_1):
|
|||
)
|
||||
|
||||
|
||||
async def test_vector_engine_search_none_limit():
|
||||
file_path_quantum = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
||||
)
|
||||
|
||||
file_path_nlp = os.path.join(
|
||||
pathlib.Path(__file__).parent,
|
||||
"test_data/Natural_language_processing.txt",
|
||||
)
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
await cognee.add(file_path_quantum)
|
||||
|
||||
await cognee.add(file_path_nlp)
|
||||
|
||||
await cognee.cognify()
|
||||
|
||||
query_text = "Tell me about Quantum computers"
|
||||
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
|
||||
vector_engine = get_vector_engine()
|
||||
|
||||
collection_name = "Entity_name"
|
||||
|
||||
query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]
|
||||
|
||||
result = await vector_engine.search(
|
||||
collection_name=collection_name, query_vector=query_vector, limit=None
|
||||
)
|
||||
|
||||
# Check that we did not accidentally use any default value for limit
|
||||
# in vector search along the way (like 5, 10, or 15)
|
||||
assert len(result) > 15
|
||||
|
||||
|
||||
async def main():
|
||||
cognee.config.set_vector_db_config(
|
||||
{
|
||||
|
|
@ -165,6 +203,8 @@ async def main():
|
|||
tables_in_database = await vector_engine.get_collection_names()
|
||||
assert len(tables_in_database) == 0, "ChromaDB database is not empty"
|
||||
|
||||
await test_vector_engine_search_none_limit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
|
|
|||
|
|
@ -41,7 +41,12 @@ class TestCogneeServerStart(unittest.TestCase):
|
|||
def tearDownClass(cls):
|
||||
# Terminate the server process
|
||||
if hasattr(cls, "server_process") and cls.server_process:
|
||||
os.killpg(os.getpgid(cls.server_process.pid), signal.SIGTERM)
|
||||
if hasattr(os, "killpg"):
|
||||
# Unix-like systems: Use process groups
|
||||
os.killpg(os.getpgid(cls.server_process.pid), signal.SIGTERM)
|
||||
else:
|
||||
# Windows: Just terminate the main process
|
||||
cls.server_process.terminate()
|
||||
cls.server_process.wait()
|
||||
|
||||
def test_server_is_running(self):
|
||||
|
|
|
|||
9
cognee/tests/test_data/Quantum_computers.txt
Normal file
9
cognee/tests/test_data/Quantum_computers.txt
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
|
||||
At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
|
||||
Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
|
||||
The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
|
||||
Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
|
||||
In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
|
||||
Emerging error-correcting codes aim to mitigate decoherence effects and are expected to pave the way for fault-tolerant quantum processors. Laboratories across the globe are investigating diverse qubit implementations, such as superconducting circuits, trapped ions, neutral atoms, and photonic systems. Significant government funding and private investment have created an ecosystem of startups and consortia focused on accelerating quantum hardware and software development. Universities are meanwhile launching interdisciplinary programs that teach physics, computer science, and engineering concepts necessary for tomorrow's quantum workforce. Establishing reliable benchmarking standards will be essential for objectively comparing devices and charting realistic milestones toward practical quantum advantage.
|
||||
Industry roadmaps anticipate that achieving error rates below the threshold for surface codes will require millions of physical qubits per logical qubit, highlighting daunting scale challenges. Researchers are therefore exploring hardware–software co-design strategies, where algorithmic breakthroughs and device engineering progress hand in hand to minimize overhead. Hybrid quantum-classical workflows, exemplified by variational algorithms running on near-term devices, offer a pragmatic path to extracting value before full fault tolerance arrives. Meanwhile, cryptographers are advancing post-quantum encryption schemes to safeguard information in a future where Shor’s algorithm becomes practical. The interplay between theoretical advances, experimental ingenuity, and policy considerations will ultimately determine how transformative quantum computing becomes for science, industry, and society.
|
||||
Collaborative open-source toolkits are lowering the barrier to entry for developers eager to prototype quantum algorithms and simulate small devices on classical hardware. As these software frameworks mature, they will foster standardization of gate libraries, circuit optimization passes, and error-mitigation techniques. At the same time, advances in cryogenic engineering, vacuum systems, and photonics are steadily improving the stability and manufacturability of next-generation qubit platforms. Policymakers are beginning to craft export controls and ethical guidelines aimed at preventing misuse while encouraging international collaboration in fundamental research. Ultimately, the success of quantum technology will hinge on integrating robust hardware, intelligent software, and a skilled workforce within an environment of responsible governance.
|
||||
211
cognee/tests/test_lancedb.py
Normal file
211
cognee/tests/test_lancedb.py
Normal file
|
|
@ -0,0 +1,211 @@
|
|||
import os
|
||||
import pathlib
|
||||
|
||||
import cognee
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.infrastructure.files.storage import get_storage_config
|
||||
from cognee.modules.data.models import Data
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.modules.search.types import SearchType
|
||||
from cognee.modules.search.operations import get_history
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
async def test_local_file_deletion(data_text, file_location):
|
||||
from sqlalchemy import select
|
||||
import hashlib
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
|
||||
engine = get_relational_engine()
|
||||
|
||||
async with engine.get_async_session() as session:
|
||||
# Get hash of data contents
|
||||
encoded_text = data_text.encode("utf-8")
|
||||
data_hash = hashlib.md5(encoded_text).hexdigest()
|
||||
# Get data entry from database based on hash contents
|
||||
data = (await session.scalars(select(Data).where(Data.content_hash == data_hash))).one()
|
||||
assert os.path.isfile(data.raw_data_location.replace("file://", "")), (
|
||||
f"Data location doesn't exist: {data.raw_data_location}"
|
||||
)
|
||||
# Test deletion of data along with local files created by cognee
|
||||
await engine.delete_data_entity(data.id)
|
||||
assert not os.path.exists(data.raw_data_location.replace("file://", "")), (
|
||||
f"Data location still exists after deletion: {data.raw_data_location}"
|
||||
)
|
||||
|
||||
async with engine.get_async_session() as session:
|
||||
# Get data entry from database based on file path
|
||||
data = (
|
||||
await session.scalars(select(Data).where(Data.raw_data_location == file_location))
|
||||
).one()
|
||||
assert os.path.isfile(data.raw_data_location.replace("file://", "")), (
|
||||
f"Data location doesn't exist: {data.raw_data_location}"
|
||||
)
|
||||
# Test local files not created by cognee won't get deleted
|
||||
await engine.delete_data_entity(data.id)
|
||||
assert os.path.exists(data.raw_data_location.replace("file://", "")), (
|
||||
f"Data location doesn't exists: {data.raw_data_location}"
|
||||
)
|
||||
|
||||
|
||||
async def test_getting_of_documents(dataset_name_1):
|
||||
# Test getting of documents for search per dataset
|
||||
from cognee.modules.users.permissions.methods import get_document_ids_for_user
|
||||
|
||||
user = await get_default_user()
|
||||
document_ids = await get_document_ids_for_user(user.id, [dataset_name_1])
|
||||
assert len(document_ids) == 1, (
|
||||
f"Number of expected documents doesn't match {len(document_ids)} != 1"
|
||||
)
|
||||
|
||||
# Test getting of documents for search when no dataset is provided
|
||||
user = await get_default_user()
|
||||
document_ids = await get_document_ids_for_user(user.id)
|
||||
assert len(document_ids) == 2, (
|
||||
f"Number of expected documents doesn't match {len(document_ids)} != 2"
|
||||
)
|
||||
|
||||
|
||||
async def test_vector_engine_search_none_limit():
|
||||
file_path_quantum = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
||||
)
|
||||
|
||||
file_path_nlp = os.path.join(
|
||||
pathlib.Path(__file__).parent,
|
||||
"test_data/Natural_language_processing.txt",
|
||||
)
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
await cognee.add(file_path_quantum)
|
||||
|
||||
await cognee.add(file_path_nlp)
|
||||
|
||||
await cognee.cognify()
|
||||
|
||||
query_text = "Tell me about Quantum computers"
|
||||
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
|
||||
vector_engine = get_vector_engine()
|
||||
|
||||
collection_name = "Entity_name"
|
||||
|
||||
query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]
|
||||
|
||||
result = await vector_engine.search(
|
||||
collection_name=collection_name, query_vector=query_vector, limit=None
|
||||
)
|
||||
|
||||
# Check that we did not accidentally use any default value for limit
|
||||
# in vector search along the way (like 5, 10, or 15)
|
||||
assert len(result) > 15
|
||||
|
||||
|
||||
async def main():
|
||||
cognee.config.set_vector_db_config(
|
||||
{
|
||||
"vector_db_provider": "lancedb",
|
||||
}
|
||||
)
|
||||
|
||||
data_directory_path = str(
|
||||
pathlib.Path(
|
||||
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_lancedb")
|
||||
).resolve()
|
||||
)
|
||||
cognee.config.data_root_directory(data_directory_path)
|
||||
cognee_directory_path = str(
|
||||
pathlib.Path(
|
||||
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_lancedb")
|
||||
).resolve()
|
||||
)
|
||||
cognee.config.system_root_directory(cognee_directory_path)
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
dataset_name_1 = "natural_language"
|
||||
dataset_name_2 = "quantum"
|
||||
|
||||
explanation_file_path = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
|
||||
)
|
||||
await cognee.add([explanation_file_path], dataset_name_1)
|
||||
|
||||
text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
|
||||
At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
|
||||
Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
|
||||
The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
|
||||
Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
|
||||
In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
|
||||
"""
|
||||
|
||||
await cognee.add([text], dataset_name_2)
|
||||
|
||||
await cognee.cognify([dataset_name_2, dataset_name_1])
|
||||
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
|
||||
await test_getting_of_documents(dataset_name_1)
|
||||
|
||||
vector_engine = get_vector_engine()
|
||||
random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0]
|
||||
random_node_name = random_node.payload["text"]
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.INSIGHTS, query_text=random_node_name
|
||||
)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted sentences are:\n")
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.CHUNKS, query_text=random_node_name, datasets=[dataset_name_2]
|
||||
)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted chunks are:\n")
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
graph_completion = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION,
|
||||
query_text=random_node_name,
|
||||
datasets=[dataset_name_2],
|
||||
)
|
||||
assert len(graph_completion) != 0, "Completion result is empty."
|
||||
print("Completion result is:")
|
||||
print(graph_completion)
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.SUMMARIES, query_text=random_node_name
|
||||
)
|
||||
assert len(search_results) != 0, "Query related summaries don't exist."
|
||||
print("\n\nExtracted summaries are:\n")
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
user = await get_default_user()
|
||||
history = await get_history(user.id)
|
||||
assert len(history) == 8, "Search history is not correct."
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
data_root_directory = get_storage_config()["data_root_directory"]
|
||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
connection = await vector_engine.get_connection()
|
||||
tables_in_database = await connection.table_names()
|
||||
assert len(tables_in_database) == 0, "LanceDB database is not empty"
|
||||
|
||||
await test_vector_engine_search_none_limit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
||||
asyncio.run(main())
|
||||
|
|
@ -68,6 +68,44 @@ async def test_getting_of_documents(dataset_name_1):
|
|||
)
|
||||
|
||||
|
||||
async def test_vector_engine_search_none_limit():
|
||||
file_path_quantum = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
||||
)
|
||||
|
||||
file_path_nlp = os.path.join(
|
||||
pathlib.Path(__file__).parent,
|
||||
"test_data/Natural_language_processing.txt",
|
||||
)
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
await cognee.add(file_path_quantum)
|
||||
|
||||
await cognee.add(file_path_nlp)
|
||||
|
||||
await cognee.cognify()
|
||||
|
||||
query_text = "Tell me about Quantum computers"
|
||||
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
|
||||
vector_engine = get_vector_engine()
|
||||
|
||||
collection_name = "Entity_name"
|
||||
|
||||
query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]
|
||||
|
||||
result = await vector_engine.search(
|
||||
collection_name=collection_name, query_vector=query_vector, limit=None
|
||||
)
|
||||
|
||||
# Check that we did not accidentally use any default value for limit
|
||||
# in vector search along the way (like 5, 10, or 15)
|
||||
assert len(result) > 15
|
||||
|
||||
|
||||
async def main():
|
||||
cognee.config.set_vector_db_config(
|
||||
{"vector_db_url": "", "vector_db_key": "", "vector_db_provider": "pgvector"}
|
||||
|
|
@ -174,6 +212,8 @@ async def main():
|
|||
tables_in_database = await vector_engine.get_table_names()
|
||||
assert len(tables_in_database) == 0, "PostgreSQL database is not empty"
|
||||
|
||||
await test_vector_engine_search_none_limit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
|
|
|||
|
|
@ -197,6 +197,80 @@ async def relational_db_migration():
|
|||
print(f"All checks passed for {graph_db_provider} provider with '{relationship_label}' edges!")
|
||||
|
||||
|
||||
async def test_schema_only_migration():
|
||||
# 1. Setup test DB and extract schema
|
||||
migration_engine = await setup_test_db()
|
||||
schema = await migration_engine.extract_schema()
|
||||
|
||||
# 2. Setup graph engine
|
||||
graph_engine = await get_graph_engine()
|
||||
|
||||
# 4. Migrate schema only
|
||||
await migrate_relational_database(graph_engine, schema=schema, schema_only=True)
|
||||
|
||||
# 5. Verify number of tables through search
|
||||
search_results = await cognee.search(
|
||||
query_text="How many tables are there in this database",
|
||||
query_type=cognee.SearchType.GRAPH_COMPLETION,
|
||||
top_k=30,
|
||||
)
|
||||
assert any("11" in r for r in search_results), (
|
||||
"Number of tables in the database reported in search_results is either None or not equal to 11"
|
||||
)
|
||||
|
||||
graph_db_provider = os.getenv("GRAPH_DATABASE_PROVIDER", "networkx").lower()
|
||||
|
||||
edge_counts = {
|
||||
"is_part_of": 0,
|
||||
"has_relationship": 0,
|
||||
"foreign_key": 0,
|
||||
}
|
||||
|
||||
if graph_db_provider == "neo4j":
|
||||
for rel_type in edge_counts.keys():
|
||||
query_str = f"""
|
||||
MATCH ()-[r:{rel_type}]->()
|
||||
RETURN count(r) as c
|
||||
"""
|
||||
rows = await graph_engine.query(query_str)
|
||||
edge_counts[rel_type] = rows[0]["c"]
|
||||
|
||||
elif graph_db_provider == "kuzu":
|
||||
for rel_type in edge_counts.keys():
|
||||
query_str = f"""
|
||||
MATCH ()-[r:EDGE]->()
|
||||
WHERE r.relationship_name = '{rel_type}'
|
||||
RETURN count(r) as c
|
||||
"""
|
||||
rows = await graph_engine.query(query_str)
|
||||
edge_counts[rel_type] = rows[0][0]
|
||||
|
||||
elif graph_db_provider == "networkx":
|
||||
nodes, edges = await graph_engine.get_graph_data()
|
||||
for _, _, key, _ in edges:
|
||||
if key in edge_counts:
|
||||
edge_counts[key] += 1
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unsupported graph database provider: {graph_db_provider}")
|
||||
|
||||
# 7. Assert counts match expected values
|
||||
expected_counts = {
|
||||
"is_part_of": 11,
|
||||
"has_relationship": 22,
|
||||
"foreign_key": 11,
|
||||
}
|
||||
|
||||
for rel_type, expected in expected_counts.items():
|
||||
actual = edge_counts[rel_type]
|
||||
assert actual == expected, (
|
||||
f"Expected {expected} edges for relationship '{rel_type}', but found {actual}"
|
||||
)
|
||||
|
||||
print("Schema-only migration edge counts validated successfully!")
|
||||
print(f"Edge counts: {edge_counts}")
|
||||
|
||||
|
||||
async def test_migration_sqlite():
|
||||
database_to_migrate_path = os.path.join(pathlib.Path(__file__).parent, "test_data/")
|
||||
|
||||
|
|
@ -209,6 +283,7 @@ async def test_migration_sqlite():
|
|||
)
|
||||
|
||||
await relational_db_migration()
|
||||
await test_schema_only_migration()
|
||||
|
||||
|
||||
async def test_migration_postgres():
|
||||
|
|
@ -224,6 +299,7 @@ async def test_migration_postgres():
|
|||
}
|
||||
)
|
||||
await relational_db_migration()
|
||||
await test_schema_only_migration()
|
||||
|
||||
|
||||
async def main():
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ import cognee
|
|||
from cognee.low_level import setup, DataPoint
|
||||
from cognee.tasks.storage import add_data_points
|
||||
from cognee.modules.graph.utils import resolve_edges_to_text
|
||||
from cognee.infrastructure.databases.exceptions import DatabaseNotCreatedError
|
||||
from cognee.modules.retrieval.graph_completion_context_extension_retriever import (
|
||||
GraphCompletionContextExtensionRetriever,
|
||||
)
|
||||
|
|
@ -165,9 +164,6 @@ class TestGraphCompletionWithContextExtensionRetriever:
|
|||
|
||||
retriever = GraphCompletionContextExtensionRetriever()
|
||||
|
||||
with pytest.raises(DatabaseNotCreatedError):
|
||||
await retriever.get_context("Who works at Figma?")
|
||||
|
||||
await setup()
|
||||
|
||||
context = await retriever.get_context("Who works at Figma?")
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ import cognee
|
|||
from cognee.low_level import setup, DataPoint
|
||||
from cognee.modules.graph.utils import resolve_edges_to_text
|
||||
from cognee.tasks.storage import add_data_points
|
||||
from cognee.infrastructure.databases.exceptions import DatabaseNotCreatedError
|
||||
from cognee.modules.retrieval.graph_completion_cot_retriever import GraphCompletionCotRetriever
|
||||
|
||||
|
||||
|
|
@ -158,9 +157,6 @@ class TestGraphCompletionCoTRetriever:
|
|||
|
||||
retriever = GraphCompletionCotRetriever()
|
||||
|
||||
with pytest.raises(DatabaseNotCreatedError):
|
||||
await retriever.get_context("Who works at Figma?")
|
||||
|
||||
await setup()
|
||||
|
||||
context = await retriever.get_context("Who works at Figma?")
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ import cognee
|
|||
from cognee.low_level import setup, DataPoint
|
||||
from cognee.modules.graph.utils import resolve_edges_to_text
|
||||
from cognee.tasks.storage import add_data_points
|
||||
from cognee.infrastructure.databases.exceptions import DatabaseNotCreatedError
|
||||
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
||||
|
||||
|
||||
|
|
@ -218,9 +217,6 @@ class TestGraphCompletionRetriever:
|
|||
|
||||
retriever = GraphCompletionRetriever()
|
||||
|
||||
with pytest.raises(DatabaseNotCreatedError):
|
||||
await retriever.get_context("Who works at Figma?")
|
||||
|
||||
await setup()
|
||||
|
||||
context = await retriever.get_context("Who works at Figma?")
|
||||
|
|
|
|||
|
|
@ -15,6 +15,9 @@ async def cognee_demo():
|
|||
current_directory = Path(__file__).resolve().parent.parent
|
||||
file_path = os.path.join(current_directory, "data", "alice_in_wonderland.txt")
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
# Call Cognee to process document
|
||||
await cognee.add(file_path)
|
||||
await cognee.cognify()
|
||||
|
|
|
|||
|
|
@ -48,7 +48,6 @@ async def main():
|
|||
query = "When was Kamala Harris in office?"
|
||||
triplets = await brute_force_triplet_search(
|
||||
query=query,
|
||||
user=user,
|
||||
top_k=3,
|
||||
collections=["graphitinode_content", "graphitinode_name", "graphitinode_summary"],
|
||||
)
|
||||
|
|
|
|||
188
examples/python/permissions_example.py
Normal file
188
examples/python/permissions_example.py
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
import os
|
||||
import cognee
|
||||
import pathlib
|
||||
|
||||
from cognee.modules.users.exceptions import PermissionDeniedError
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.search.types import SearchType
|
||||
from cognee.modules.users.methods import create_user
|
||||
from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
|
||||
from cognee.modules.users.roles.methods import add_user_to_role
|
||||
from cognee.modules.users.roles.methods import create_role
|
||||
from cognee.modules.users.tenants.methods import create_tenant
|
||||
from cognee.modules.users.tenants.methods import add_user_to_tenant
|
||||
from cognee.modules.engine.operations.setup import setup
|
||||
from cognee.shared.logging_utils import setup_logging, CRITICAL
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
async def main():
|
||||
# ENABLE PERMISSIONS FEATURE
|
||||
# Note: When ENABLE_BACKEND_ACCESS_CONTROL is enabled vector provider is automatically set to use LanceDB
|
||||
# and graph provider is set to use Kuzu.
|
||||
os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "True"
|
||||
|
||||
# Set the rest of your environment variables as needed. By default OpenAI is used as the LLM provider
|
||||
# Reference the .env.tempalte file for available option and how to change LLM provider: https://github.com/topoteretes/cognee/blob/main/.env.template
|
||||
# For example to set your OpenAI LLM API key use:
|
||||
# os.environ["LLM_API_KEY""] = "your-api-key"
|
||||
|
||||
# Create a clean slate for cognee -- reset data and system state
|
||||
print("Resetting cognee data...")
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
print("Data reset complete.\n")
|
||||
|
||||
# Set up the necessary databases and tables for user management.
|
||||
await setup()
|
||||
|
||||
# NOTE: When a document is added in Cognee with permissions enabled only the owner of the document has permissions
|
||||
# to work with the document initially.
|
||||
# Add document for user_1, add it under dataset name AI
|
||||
explanation_file_path = os.path.join(
|
||||
pathlib.Path(__file__).parent, "../data/artificial_intelligence.pdf"
|
||||
)
|
||||
|
||||
print("Creating user_1: user_1@example.com")
|
||||
user_1 = await create_user("user_1@example.com", "example")
|
||||
await cognee.add([explanation_file_path], dataset_name="AI", user=user_1)
|
||||
|
||||
# Add document for user_2, add it under dataset name QUANTUM
|
||||
text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
|
||||
At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages
|
||||
this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the
|
||||
preparation and manipulation of quantum states.
|
||||
"""
|
||||
print("\nCreating user_2: user_2@example.com")
|
||||
user_2 = await create_user("user_2@example.com", "example")
|
||||
await cognee.add([text], dataset_name="QUANTUM", user=user_2)
|
||||
|
||||
# Run cognify for both datasets as the appropriate user/owner
|
||||
print("\nCreating different datasets for user_1 (AI dataset) and user_2 (QUANTUM dataset)")
|
||||
ai_cognify_result = await cognee.cognify(["AI"], user=user_1)
|
||||
quantum_cognify_result = await cognee.cognify(["QUANTUM"], user=user_2)
|
||||
|
||||
# Extract dataset_ids from cognify results
|
||||
def extract_dataset_id_from_cognify(cognify_result):
|
||||
"""Extract dataset_id from cognify output dictionary"""
|
||||
for dataset_id, pipeline_result in cognify_result.items():
|
||||
return dataset_id # Return the first dataset_id
|
||||
return None
|
||||
|
||||
# Get dataset IDs from cognify results
|
||||
# Note: When we want to work with datasets from other users (search, add, cognify and etc.) we must supply dataset
|
||||
# information through dataset_id using dataset name only looks for datasets owned by current user
|
||||
ai_dataset_id = extract_dataset_id_from_cognify(ai_cognify_result)
|
||||
quantum_dataset_id = extract_dataset_id_from_cognify(quantum_cognify_result)
|
||||
|
||||
# We can see here that user_1 can read his own dataset (AI dataset)
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION,
|
||||
query_text="What is in the document?",
|
||||
user=user_1,
|
||||
datasets=[ai_dataset_id],
|
||||
)
|
||||
print("\nSearch results as user_1 on dataset owned by user_1:")
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
# But user_1 cant read the dataset owned by user_2 (QUANTUM dataset)
|
||||
print("\nSearch result as user_1 on the dataset owned by user_2:")
|
||||
try:
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION,
|
||||
query_text="What is in the document?",
|
||||
user=user_1,
|
||||
datasets=[quantum_dataset_id],
|
||||
)
|
||||
except PermissionDeniedError:
|
||||
print(f"User: {user_1} does not have permission to read from dataset: QUANTUM")
|
||||
|
||||
# user_1 currently also can't add a document to user_2's dataset (QUANTUM dataset)
|
||||
print("\nAttempting to add new data as user_1 to dataset owned by user_2:")
|
||||
try:
|
||||
await cognee.add(
|
||||
[explanation_file_path],
|
||||
dataset_id=quantum_dataset_id,
|
||||
user=user_1,
|
||||
)
|
||||
except PermissionDeniedError:
|
||||
print(f"User: {user_1} does not have permission to write to dataset: QUANTUM")
|
||||
|
||||
# We've shown that user_1 can't interact with the dataset from user_2
|
||||
# Now have user_2 give proper permission to user_1 to read QUANTUM dataset
|
||||
# Note: supported permission types are "read", "write", "delete" and "share"
|
||||
print(
|
||||
"\nOperation started as user_2 to give read permission to user_1 for the dataset owned by user_2"
|
||||
)
|
||||
await authorized_give_permission_on_datasets(
|
||||
user_1.id,
|
||||
[quantum_dataset_id],
|
||||
"read",
|
||||
user_2.id,
|
||||
)
|
||||
|
||||
# Now user_1 can read from quantum dataset after proper permissions have been assigned by the QUANTUM dataset owner.
|
||||
print("\nSearch result as user_1 on the dataset owned by user_2:")
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION,
|
||||
query_text="What is in the document?",
|
||||
user=user_1,
|
||||
dataset_ids=[quantum_dataset_id],
|
||||
)
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
# If we'd like for user_1 to add new documents to the QUANTUM dataset owned by user_2, user_1 would have to get
|
||||
# "write" access permission, which user_1 currently does not have
|
||||
|
||||
# Users can also be added to Roles and Tenants and then permission can be assigned on a Role/Tenant level as well
|
||||
# To create a Role a user first must be an owner of a Tenant
|
||||
print("User 2 is creating CogneeLab tenant/organization")
|
||||
tenant_id = await create_tenant("CogneeLab", user_2.id)
|
||||
|
||||
print("\nUser 2 is creating Researcher role")
|
||||
role_id = await create_role(role_name="Researcher", owner_id=user_2.id)
|
||||
|
||||
print("\nCreating user_3: user_3@example.com")
|
||||
user_3 = await create_user("user_3@example.com", "example")
|
||||
|
||||
# To add a user to a role he must be part of the same tenant/organization
|
||||
print("\nOperation started as user_2 to add user_3 to CogneeLab tenant/organization")
|
||||
await add_user_to_tenant(user_id=user_3.id, tenant_id=tenant_id, owner_id=user_2.id)
|
||||
|
||||
print(
|
||||
"\nOperation started by user_2, as tenant owner, to add user_3 to Researcher role inside the tenant/organization"
|
||||
)
|
||||
await add_user_to_role(user_id=user_3.id, role_id=role_id, owner_id=user_2.id)
|
||||
|
||||
print(
|
||||
"\nOperation started as user_2 to give read permission to Researcher role for the dataset owned by user_2"
|
||||
)
|
||||
await authorized_give_permission_on_datasets(
|
||||
role_id,
|
||||
[quantum_dataset_id],
|
||||
"read",
|
||||
user_2.id,
|
||||
)
|
||||
|
||||
# Now user_3 can read from QUANTUM dataset as part of the Researcher role after proper permissions have been assigned by the QUANTUM dataset owner, user_2.
|
||||
print("\nSearch result as user_3 on the dataset owned by user_2:")
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION,
|
||||
query_text="What is in the document?",
|
||||
user=user_1,
|
||||
dataset_ids=[quantum_dataset_id],
|
||||
)
|
||||
for result in search_results:
|
||||
print(f"{result}\n")
|
||||
|
||||
# Note: All of these function calls and permission system is available through our backend endpoints as well
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
||||
logger = setup_logging(log_level=CRITICAL)
|
||||
asyncio.run(main())
|
||||
|
|
@ -1,16 +1,15 @@
|
|||
from pathlib import Path
|
||||
import asyncio
|
||||
|
||||
import cognee
|
||||
import os
|
||||
|
||||
import cognee
|
||||
from cognee.infrastructure.databases.relational.config import get_migration_config
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.api.v1.visualize.visualize import visualize_graph
|
||||
from cognee.infrastructure.databases.relational import (
|
||||
get_migration_relational_engine,
|
||||
)
|
||||
|
||||
from cognee.modules.search.types import SearchType
|
||||
|
||||
from cognee.infrastructure.databases.relational import (
|
||||
create_db_and_tables as create_relational_db_and_tables,
|
||||
)
|
||||
|
|
@ -32,16 +31,29 @@ from cognee.infrastructure.databases.vector.pgvector import (
|
|||
|
||||
|
||||
async def main():
|
||||
engine = get_migration_relational_engine()
|
||||
|
||||
# Clean all data stored in Cognee
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
# Needed to create appropriate tables only on the Cognee side
|
||||
# Needed to create appropriate database tables only on the Cognee side
|
||||
await create_relational_db_and_tables()
|
||||
await create_vector_db_and_tables()
|
||||
|
||||
# In case environment variables are not set use the example database from the Cognee repo
|
||||
migration_db_provider = os.environ.get("MIGRATION_DB_PROVIDER", "sqlite")
|
||||
migration_db_path = os.environ.get(
|
||||
"MIGRATION_DB_PATH",
|
||||
os.path.join(Path(__file__).resolve().parent.parent.parent, "cognee/tests/test_data"),
|
||||
)
|
||||
migration_db_name = os.environ.get("MIGRATION_DB_NAME", "migration_database.sqlite")
|
||||
|
||||
migration_config = get_migration_config()
|
||||
migration_config.migration_db_provider = migration_db_provider
|
||||
migration_config.migration_db_path = migration_db_path
|
||||
migration_config.migration_db_name = migration_db_name
|
||||
|
||||
engine = get_migration_relational_engine()
|
||||
|
||||
print("\nExtracting schema of database to migrate.")
|
||||
schema = await engine.extract_schema()
|
||||
print(f"Migrated database schema:\n{schema}")
|
||||
|
|
@ -53,10 +65,6 @@ async def main():
|
|||
await migrate_relational_database(graph, schema=schema)
|
||||
print("Relational database migration complete.")
|
||||
|
||||
# Define location where to store html visualization of graph of the migrated database
|
||||
home_dir = os.path.expanduser("~")
|
||||
destination_file_path = os.path.join(home_dir, "graph_visualization.html")
|
||||
|
||||
# Make sure to set top_k at a high value for a broader search, the default value is only 10!
|
||||
# top_k represent the number of graph tripplets to supply to the LLM to answer your question
|
||||
search_results = await cognee.search(
|
||||
|
|
@ -69,13 +77,25 @@ async def main():
|
|||
# Having a top_k value set to too high might overwhelm the LLM context when specific questions need to be answered.
|
||||
# For this kind of question we've set the top_k to 30
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION_COT,
|
||||
query_type=SearchType.GRAPH_COMPLETION,
|
||||
query_text="What invoices are related to Leonie Köhler?",
|
||||
top_k=30,
|
||||
)
|
||||
print(f"Search results: {search_results}")
|
||||
|
||||
# test.html is a file with visualized data migration
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION,
|
||||
query_text="What invoices are related to Luís Gonçalves?",
|
||||
top_k=30,
|
||||
)
|
||||
print(f"Search results: {search_results}")
|
||||
|
||||
# If you check the relational database for this example you can see that the search results successfully found all
|
||||
# the invoices related to the two customers, without any hallucinations or additional information
|
||||
|
||||
# Define location where to store html visualization of graph of the migrated database
|
||||
home_dir = os.path.expanduser("~")
|
||||
destination_file_path = os.path.join(home_dir, "graph_visualization.html")
|
||||
print("Adding html visualization of graph database after migration.")
|
||||
await visualize_graph(destination_file_path)
|
||||
print(f"Visualization can be found at: {destination_file_path}")
|
||||
|
|
|
|||
|
|
@ -29,8 +29,11 @@ async def main():
|
|||
print("=" * 60)
|
||||
|
||||
# Start the UI server
|
||||
def dummy_callback(pid):
|
||||
pass
|
||||
|
||||
server = cognee.start_ui(
|
||||
host="localhost",
|
||||
pid_callback=dummy_callback,
|
||||
port=3000,
|
||||
open_browser=True, # This will automatically open your browser
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,78 +0,0 @@
|
|||
# cognee Graduates from GitHub Secure Open Source Program
|
||||
|
||||
*Building Trust and Security in AI Memory Systems*
|
||||
|
||||
We're excited to announce that **cognee** has successfully graduated from the GitHub Secure Open Source Program! This milestone reflects our commitment to maintaining the highest standards of security and reliability in open source AI infrastructure.
|
||||
|
||||
## What is cognee?
|
||||
|
||||
cognee is an open source library that provides **memory for AI agents in just 5 lines of code**. It transforms raw data into structured knowledge graphs through our innovative ECL (Extract, Cognify, Load) pipeline, enabling AI systems to build dynamic memory that goes far beyond traditional RAG systems.
|
||||
|
||||
### Key Features:
|
||||
- **Interconnected Knowledge**: Links conversations, documents, images, and audio transcriptions
|
||||
- **Scalable Architecture**: Loads data to graph and vector databases using only Pydantic
|
||||
- **30+ Data Sources**: Manipulates data while ingesting from diverse sources
|
||||
- **Developer-Friendly**: Reduces complexity and cost compared to traditional RAG implementations
|
||||
|
||||
## GitHub Secure Open Source Program Achievement
|
||||
|
||||
The GitHub Secure Open Source Program helps maintainers adopt security best practices and ensures that critical open source projects meet enterprise-grade security standards. Our graduation demonstrates that cognee has successfully implemented:
|
||||
|
||||
- **Security-first development practices**
|
||||
- **Comprehensive vulnerability management**
|
||||
- **Secure dependency management**
|
||||
- **Code quality and review processes**
|
||||
- **Community safety guidelines**
|
||||
|
||||
## Why This Matters for AI Development
|
||||
|
||||
As AI systems become more prevalent in production environments, security becomes paramount. cognee's graduation from this program means developers can confidently build AI memory systems knowing they're using infrastructure that meets rigorous security standards.
|
||||
|
||||
### Benefits for Our Community:
|
||||
- **Enterprise Adoption**: Companies can deploy cognee with confidence in security-sensitive environments
|
||||
- **Vulnerability Response**: Our security practices ensure rapid identification and resolution of potential issues
|
||||
- **Supply Chain Security**: Dependencies are carefully managed and regularly audited
|
||||
- **Trust & Transparency**: Open source development with security-first principles
|
||||
|
||||
## What's Next?
|
||||
|
||||
With over **5,000 GitHub stars** and a growing community of developers, cognee continues to evolve. We recently launched **Cogwit beta** - our fully-hosted AI Memory platform, and our [research paper](https://arxiv.org/abs/2505.24478) demonstrates the effectiveness of our approach.
|
||||
|
||||
Our commitment to security doesn't end with graduation. We'll continue following best practices and contributing to the broader conversation about secure AI infrastructure.
|
||||
|
||||
## Get Started Today
|
||||
|
||||
Ready to add intelligent memory to your AI applications? Get started with cognee:
|
||||
|
||||
```python
|
||||
import cognee
|
||||
import asyncio
|
||||
|
||||
async def main():
|
||||
# Add your data
|
||||
await cognee.add("Your document content here")
|
||||
|
||||
# Transform into knowledge graph
|
||||
await cognee.cognify()
|
||||
|
||||
# Query intelligently
|
||||
results = await cognee.search("What insights can you find?")
|
||||
|
||||
for result in results:
|
||||
print(result)
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
## Join Our Community
|
||||
|
||||
- 🌟 [Star us on GitHub](https://github.com/topoteretes/cognee)
|
||||
- 💬 [Join our Discord](https://discord.gg/NQPKmU5CCg)
|
||||
- 📖 [Read our documentation](https://docs.cognee.ai/)
|
||||
- 🚀 [Try Cogwit beta](https://platform.cognee.ai/)
|
||||
|
||||
The future of AI memory is secure, scalable, and open source. We're grateful for the GitHub team's support and excited to continue building the infrastructure that powers the next generation of intelligent applications.
|
||||
|
||||
---
|
||||
|
||||
*About cognee: We're building the memory layer for AI agents, enabling them to learn, remember, and reason across conversations and data sources. Our open source approach ensures that advanced AI memory capabilities are accessible to developers worldwide.*
|
||||
107
notebooks/data/copilot_conversations.json
vendored
Normal file
107
notebooks/data/copilot_conversations.json
vendored
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
{
|
||||
"conversations": [
|
||||
{
|
||||
"id": "conv_001",
|
||||
"timestamp": "2024-01-15T10:30:00Z",
|
||||
"topic": "async/await patterns",
|
||||
"user_query": "I'm building a web scraper that needs to handle thousands of URLs concurrently. What's the best way to structure this with asyncio?",
|
||||
"assistant_response": "For high-concurrency web scraping, you'll want to use asyncio with aiohttp and implement proper semaphore-based rate limiting. Here's a robust pattern:\n\n```python\nimport asyncio\nimport aiohttp\nfrom typing import List, Dict, Any\n\nclass AsyncWebScraper:\n def __init__(self, max_concurrent: int = 100):\n self.semaphore = asyncio.Semaphore(max_concurrent)\n self.session = None\n \n async def __aenter__(self):\n self.session = aiohttp.ClientSession(\n timeout=aiohttp.ClientTimeout(total=30),\n connector=aiohttp.TCPConnector(limit=100)\n )\n return self\n \n async def __aexit__(self, exc_type, exc_val, exc_tb):\n if self.session:\n await self.session.close()\n \n async def fetch_url(self, url: str) -> Dict[str, Any]:\n async with self.semaphore:\n try:\n async with self.session.get(url) as response:\n content = await response.text()\n return {\n 'url': url,\n 'status': response.status,\n 'content': content,\n 'headers': dict(response.headers)\n }\n except Exception as e:\n return {'url': url, 'error': str(e)}\n \n async def scrape_urls(self, urls: List[str]) -> List[Dict[str, Any]]:\n tasks = [self.fetch_url(url) for url in urls]\n return await asyncio.gather(*tasks, return_exceptions=True)\n\n# Usage\nasync def main():\n urls = ['http://example.com'] * 1000 # Your URLs here\n \n async with AsyncWebScraper(max_concurrent=50) as scraper:\n results = await scraper.scrape_urls(urls)\n \n successful = [r for r in results if 'error' not in r]\n print(f'Successfully scraped {len(successful)} URLs')\n\nif __name__ == '__main__':\n asyncio.run(main())\n```\n\nKey points:\n- Semaphore controls concurrency to avoid overwhelming servers\n- Context manager ensures proper cleanup\n- TCPConnector with connection pooling\n- Proper exception handling for failed requests",
|
||||
"code_context": {
|
||||
"file": "scraper.py",
|
||||
"language": "python",
|
||||
"patterns_discussed": ["async/await", "context_managers", "semaphores", "aiohttp", "error_handling"]
|
||||
},
|
||||
"follow_up_questions": [
|
||||
"How do I add retry logic for failed requests?",
|
||||
"What's the best way to parse the scraped HTML content?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "conv_002",
|
||||
"timestamp": "2024-01-16T14:20:00Z",
|
||||
"topic": "dataclass vs pydantic",
|
||||
"user_query": "When should I use dataclasses vs Pydantic models? I'm building an API and need to handle user input validation.",
|
||||
"assistant_response": "Great question! For API development with input validation, here's when to use each:\n\n**Use Pydantic when:**\n- Building APIs (FastAPI, Django REST)\n- Need runtime validation of external data\n- Want automatic JSON serialization/deserialization\n- Need complex validation rules\n- Working with configuration files\n\n**Use dataclasses when:**\n- Internal data structures\n- Simple data containers\n- Want to stay in stdlib\n- Performance is critical (dataclasses are faster)\n\nFor your API, I'd recommend Pydantic:\n\n```python\nfrom pydantic import BaseModel, validator, Field\nfrom typing import Optional, List\nfrom datetime import datetime\nfrom enum import Enum\n\nclass UserRole(str, Enum):\n ADMIN = 'admin'\n USER = 'user'\n GUEST = 'guest'\n\nclass CreateUserRequest(BaseModel):\n username: str = Field(..., min_length=3, max_length=50, regex=r'^[a-zA-Z0-9_]+$')\n email: str = Field(..., regex=r'^[\\w\\.-]+@[\\w\\.-]+\\.\\w+$')\n password: str = Field(..., min_length=8)\n role: UserRole = UserRole.USER\n tags: Optional[List[str]] = None\n \n @validator('password')\n def validate_password(cls, v):\n if not any(c.isupper() for c in v):\n raise ValueError('Password must contain uppercase letter')\n if not any(c.isdigit() for c in v):\n raise ValueError('Password must contain digit')\n return v\n \n @validator('tags')\n def validate_tags(cls, v):\n if v and len(v) > 10:\n raise ValueError('Too many tags')\n return v\n\nclass User(BaseModel):\n id: int\n username: str\n email: str\n role: UserRole\n created_at: datetime\n is_active: bool = True\n \n class Config:\n orm_mode = True\n json_encoders = { datetime: lambda v: v.isoformat() }\n\n# FastAPI usage\nfrom fastapi import FastAPI, HTTPException\n\napp = FastAPI()\n\n@app.post('/users/', response_model=User)\nasync def create_user(user_data: CreateUserRequest):\n try:\n new_user = await create_user_in_db(user_data)\n return User.from_orm(new_user)\n except Exception as e:\n raise HTTPException(status_code=400, detail=str(e))\n```\n\nPydantic gives you:\n- Automatic request validation\n- Clear error messages\n- Type coercion\n- JSON Schema generation\n- IDE support with type hints",
|
||||
"code_context": {
|
||||
"file": "models.py",
|
||||
"language": "python",
|
||||
"patterns_discussed": ["pydantic", "dataclasses", "validation", "fastapi", "type_hints", "enums"]
|
||||
},
|
||||
"follow_up_questions": [
|
||||
"How do I handle nested validation with Pydantic?",
|
||||
"Can I use Pydantic with SQLAlchemy models?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "conv_003",
|
||||
"timestamp": "2024-01-17T09:15:00Z",
|
||||
"topic": "testing patterns",
|
||||
"user_query": "I'm struggling with testing async code and database interactions. What's the best approach for pytest with async functions?",
|
||||
"assistant_response": "Testing async code can be tricky! Here's a comprehensive approach using pytest-asyncio and proper test isolation...",
|
||||
"code_context": {
|
||||
"file": "test_users.py",
|
||||
"language": "python",
|
||||
"patterns_discussed": ["pytest", "async_testing", "fixtures", "mocking", "database_testing", "fastapi_testing"]
|
||||
},
|
||||
"follow_up_questions": [
|
||||
"How do I test WebSocket connections?",
|
||||
"What's the best way to test database migrations?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "conv_004",
|
||||
"timestamp": "2024-01-18T16:45:00Z",
|
||||
"topic": "performance optimization",
|
||||
"user_query": "My FastAPI app is getting slow with large datasets. How can I optimize database queries and response times?",
|
||||
"assistant_response": "Performance optimization in FastAPI apps typically involves several layers...",
|
||||
"code_context": {
|
||||
"file": "optimizations.py",
|
||||
"language": "python",
|
||||
"patterns_discussed": ["performance_optimization", "caching", "database_optimization", "async_patterns", "monitoring"]
|
||||
},
|
||||
"follow_up_questions": [
|
||||
"How do I implement database connection pooling properly?",
|
||||
"What's the best way to handle memory usage with large datasets?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "conv_005",
|
||||
"timestamp": "2024-01-19T11:30:00Z",
|
||||
"topic": "error handling and logging",
|
||||
"user_query": "I need to implement proper error handling and logging across my Python application. What's the best approach for production-ready error management?",
|
||||
"assistant_response": "Excellent question! Proper error handling and logging are crucial for production applications...",
|
||||
"code_context": {
|
||||
"file": "error_handling.py",
|
||||
"language": "python",
|
||||
"patterns_discussed": ["error_handling", "logging", "exceptions", "middleware", "decorators", "fastapi"]
|
||||
},
|
||||
"follow_up_questions": [
|
||||
"How do I integrate this with external monitoring tools like Sentry?",
|
||||
"What's the best way to handle errors in background tasks?"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"total_conversations": 5,
|
||||
"date_range": "2024-01-15 to 2024-01-19",
|
||||
"topics_covered": [
|
||||
"async/await patterns",
|
||||
"dataclass vs pydantic",
|
||||
"testing patterns",
|
||||
"performance optimization",
|
||||
"error handling and logging"
|
||||
],
|
||||
"code_patterns_discussed": [
|
||||
"asyncio", "aiohttp", "semaphores", "context_managers",
|
||||
"pydantic", "fastapi", "type_hints", "validation",
|
||||
"pytest", "async_testing", "fixtures", "mocking",
|
||||
"performance_optimization", "caching", "database_optimization",
|
||||
"error_handling", "logging", "exceptions", "middleware"
|
||||
],
|
||||
"difficulty_levels": {
|
||||
"beginner": 1,
|
||||
"intermediate": 2,
|
||||
"advanced": 2
|
||||
}
|
||||
}
|
||||
}
|
||||
976
notebooks/data/guido_contributions.json
vendored
Normal file
976
notebooks/data/guido_contributions.json
vendored
Normal file
|
|
@ -0,0 +1,976 @@
|
|||
[
|
||||
{
|
||||
"type": "pr",
|
||||
"repository": "mypy",
|
||||
"title": "Re-work indirect dependencies",
|
||||
"description": "Wow, this was quite a ride. Indirect dependencies were always supported kind of on best effort. This PR puts them on some principled foundation. It fixes three crashes and three stale types reported. All tests are quite weird/obscure, they are designed to expose the flaws in current logic (plus one test that passes on master, but it covers important corner case, so I add it just in case ). A short summary of various fixes (in arbitrary order):\r\n* Update many outdated comments and docstrings\r\n* Missing transitive dependency is now considered stale\r\n* Handle transitive generic bases in indirection visitor\r\n* Handle chained alias targets in indirection visitor\r\n* Always record original aliases during semantic analysis\r\n* Delete `qualified_tvars` as a concept, they are not needed since long ago\r\n* Remove ad-hoc handling for `TypeInfo`s from `build.py`\r\n* Support symbols with setter type different from getter type\r\n\r\nIn general the logic should be more simple/straightforward now:\r\n* Get all types in a file (need both symbol types _and_ expression types since some types may be only local)\r\n* For each type _transitively_ find all named types in them (thus aggregating all interfaces the type depends on)\r\n* In case any type was forced using `get_proper_type()`, record the orginal type alias during semantic analysis\r\n\r\nNote since this makes the algorithm correct, it may also make it slower (most notably because we must visit generic bases). I tried to offset this by couple optimizations, hopefully performance impact will be minimal.",
|
||||
"url": "https://github.com/python/mypy/pull/19798",
|
||||
"date": "2025-09-05T13:54:52Z",
|
||||
"sha_or_number": "19798",
|
||||
"files_changed": [
|
||||
"mypy/build.py",
|
||||
"mypy/fixup.py",
|
||||
"mypy/indirection.py",
|
||||
"mypy/nodes.py",
|
||||
"mypy/semanal.py",
|
||||
"mypy/server/deps.py",
|
||||
"mypy/test/typefixture.py",
|
||||
"mypy/typeanal.py",
|
||||
"test-data/unit/check-incremental.test"
|
||||
],
|
||||
"additions": 0,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "mypy/build.py",
|
||||
"language": "python",
|
||||
"before_code": "from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort\nfrom mypy.indirection import TypeIndirectionVisitor\nfrom mypy.messages import MessageBuilder\nfrom mypy.nodes import Import, ImportAll, ImportBase, ImportFrom, MypyFile, SymbolTable, TypeInfo\nfrom mypy.partially_defined import PossiblyUndefinedVariableVisitor\nfrom mypy.semanal import SemanticAnalyzer\nfrom mypy.semanal_pass1 import SemanticAnalyzerPreAnalysis",
|
||||
"after_code": "from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort\nfrom mypy.indirection import TypeIndirectionVisitor\nfrom mypy.messages import MessageBuilder\nfrom mypy.nodes import (\n Decorator,\n Import,\n ImportAll,\n ImportBase,\n ImportFrom,\n MypyFile,\n OverloadedFuncDef,\n SymbolTable,\n)\nfrom mypy.partially_defined import PossiblyUndefinedVariableVisitor\nfrom mypy.semanal import SemanticAnalyzer\nfrom mypy.semanal_pass1 import SemanticAnalyzerPreAnalysis",
|
||||
"diff_context": "from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort\nfrom mypy.indirection import TypeIndirectionVisitor\nfrom mypy.messages import MessageBuilder\nfrom mypy.nodes import Import, ImportAll, ImportBase, ImportFrom, MypyFile, SymbolTable, TypeInfo\nfrom mypy.nodes import (\n Decorator,\n Import,\n ImportAll,\n ImportBase,\n ImportFrom,\n MypyFile,\n OverloadedFuncDef,\n SymbolTable,\n)\nfrom mypy.partially_defined import PossiblyUndefinedVariableVisitor\nfrom mypy.semanal import SemanticAnalyzer\nfrom mypy.semanal_pass1 import SemanticAnalyzerPreAnalysis",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/build.py",
|
||||
"language": "python",
|
||||
"before_code": "\nFor single nodes, processing is simple. If the node was cached, we\ndeserialize the cache data and fix up cross-references. Otherwise, we\ndo semantic analysis followed by type checking. We also handle (c)\nabove; if a module has valid cache data *but* any of its\ndependencies was processed from source, then the module should be\nprocessed from source.\n\nA relatively simple optimization (outside SCCs) we might do in the\nfuture is as follows: if a node's cache data is valid, but one or more\nof its dependencies are out of date so we have to re-parse the node\nfrom source, once we have fully type-checked the node, we can decide\nwhether its symbol table actually changed compared to the cache data\n(by reading the cache data and comparing it to the data we would be\nwriting). If there is no change we can declare the node up to date,\nand any node that depends (and for which we have cached data, and\nwhose other dependencies are up to date) on it won't need to be\nre-parsed from source.\n\nImport cycles\n-------------\n\nFinally we have to decide how to handle (c), import cycles. Here\nwe'll need a modified version of the original state machine\n(build.py), but we only need to do this per SCC, and we won't have to\ndeal with changes to the list of nodes while we're processing it.",
|
||||
"after_code": "\nFor single nodes, processing is simple. If the node was cached, we\ndeserialize the cache data and fix up cross-references. Otherwise, we\ndo semantic analysis followed by type checking. Once we (re-)processed\nan SCC we check whether its interface (symbol table) is still fresh\n(matches previous cached value). If it is not, we consider dependent SCCs\nstale so that they need to be re-parsed as well.\n\nNote on indirect dependencies: normally dependencies are determined from\nimports, but since our type interfaces are \"opaque\" (i.e. symbol tables can\ncontain types identified by name), these are not enough. We *must* also\nadd \"indirect\" dependencies from types to their definitions. For this\npurpose, after we finished processing a module, we travers its type map and\nsymbol tables, and for each type we find (transitively) on which opaque/named\ntypes it depends.\n\nImport cycles\n-------------\n\nFinally we have to decide how to handle (b), import cycles. Here\nwe'll need a modified version of the original state machine\n(build.py), but we only need to do this per SCC, and we won't have to\ndeal with changes to the list of nodes while we're processing it.",
|
||||
"diff_context": "\nFor single nodes, processing is simple. If the node was cached, we\ndeserialize the cache data and fix up cross-references. Otherwise, we\ndo semantic analysis followed by type checking. We also handle (c)\nabove; if a module has valid cache data *but* any of its\ndependencies was processed from source, then the module should be\nprocessed from source.\n\nA relatively simple optimization (outside SCCs) we might do in the\nfuture is as follows: if a node's cache data is valid, but one or more\nof its dependencies are out of date so we have to re-parse the node\nfrom source, once we have fully type-checked the node, we can decide\nwhether its symbol table actually changed compared to the cache data\n(by reading the cache data and comparing it to the data we would be\nwriting). If there is no change we can declare the node up to date,\nand any node that depends (and for which we have cached data, and\nwhose other dependencies are up to date) on it won't need to be\nre-parsed from source.\ndo semantic analysis followed by type checking. Once we (re-)processed\nan SCC we check whether its interface (symbol table) is still fresh\n(matches previous cached value). If it is not, we consider dependent SCCs\nstale so that they need to be re-parsed as well.\n\nNote on indirect dependencies: normally dependencies are determined from\nimports, but since our type interfaces are \"opaque\" (i.e. symbol tables can\ncontain types identified by name), these are not enough. We *must* also\nadd \"indirect\" dependencies from types to their definitions. For this\npurpose, after we finished processing a module, we travers its type map and\nsymbol tables, and for each type we find (transitively) on which opaque/named\ntypes it depends.\n\nImport cycles\n-------------\n\nFinally we have to decide how to handle (c), import cycles. Here\nFinally we have to decide how to handle (b), import cycles. Here\nwe'll need a modified version of the original state machine\n(build.py), but we only need to do this per SCC, and we won't have to\ndeal with changes to the list of nodes while we're processing it.",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 10,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"generator_expression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/build.py",
|
||||
"language": "python",
|
||||
"before_code": "\n # We should always patch indirect dependencies, even in full (non-incremental) builds,\n # because the cache still may be written, and it must be correct.\n # TODO: find a more robust way to traverse *all* relevant types?\n all_types = list(self.type_map().values())\n for _, sym, _ in self.tree.local_definitions():\n if sym.type is not None:\n all_types.append(sym.type)\n if isinstance(sym.node, TypeInfo):\n # TypeInfo symbols have some extra relevant types.\n all_types.extend(sym.node.bases)\n if sym.node.metaclass_type:\n all_types.append(sym.node.metaclass_type)\n if sym.node.typeddict_type:\n all_types.append(sym.node.typeddict_type)\n if sym.node.tuple_type:\n all_types.append(sym.node.tuple_type)\n self._patch_indirect_dependencies(self.type_checker().module_refs, all_types)\n\n if self.options.dump_inference_stats:\n dump_type_stats(",
|
||||
"after_code": "\n # We should always patch indirect dependencies, even in full (non-incremental) builds,\n # because the cache still may be written, and it must be correct.\n all_types = set(self.type_map().values())\n for _, sym, _ in self.tree.local_definitions():\n if sym.type is not None:\n all_types.add(sym.type)\n # Special case: settable properties may have two types.\n if isinstance(sym.node, OverloadedFuncDef) and sym.node.is_property:\n assert isinstance(first_node := sym.node.items[0], Decorator)\n if first_node.var.setter_type:\n all_types.add(first_node.var.setter_type)\n # Using mod_alias_deps is unfortunate but needed, since it is highly impractical\n # (and practically impossible) to avoid all get_proper_type() calls. For example,\n # TypeInfo.bases and metaclass, *args and **kwargs, Overloaded.items, and trivial\n # aliases like Text = str, etc. all currently forced to proper types. Thus, we need\n # to record the original definitions as they are first seen in semanal.py.\n self._patch_indirect_dependencies(\n self.type_checker().module_refs | self.tree.mod_alias_deps, all_types\n )\n\n if self.options.dump_inference_stats:\n dump_type_stats(",
|
||||
"diff_context": "\n # We should always patch indirect dependencies, even in full (non-incremental) builds,\n # because the cache still may be written, and it must be correct.\n # TODO: find a more robust way to traverse *all* relevant types?\n all_types = list(self.type_map().values())\n all_types = set(self.type_map().values())\n for _, sym, _ in self.tree.local_definitions():\n if sym.type is not None:\n all_types.append(sym.type)\n if isinstance(sym.node, TypeInfo):\n # TypeInfo symbols have some extra relevant types.\n all_types.extend(sym.node.bases)\n if sym.node.metaclass_type:\n all_types.append(sym.node.metaclass_type)\n if sym.node.typeddict_type:\n all_types.append(sym.node.typeddict_type)\n if sym.node.tuple_type:\n all_types.append(sym.node.tuple_type)\n self._patch_indirect_dependencies(self.type_checker().module_refs, all_types)\n all_types.add(sym.type)\n # Special case: settable properties may have two types.\n if isinstance(sym.node, OverloadedFuncDef) and sym.node.is_property:\n assert isinstance(first_node := sym.node.items[0], Decorator)\n if first_node.var.setter_type:\n all_types.add(first_node.var.setter_type)\n # Using mod_alias_deps is unfortunate but needed, since it is highly impractical\n # (and practically impossible) to avoid all get_proper_type() calls. For example,\n # TypeInfo.bases and metaclass, *args and **kwargs, Overloaded.items, and trivial\n # aliases like Text = str, etc. all currently forced to proper types. Thus, we need\n # to record the original definitions as they are first seen in semanal.py.\n self._patch_indirect_dependencies(\n self.type_checker().module_refs | self.tree.mod_alias_deps, all_types\n )\n\n if self.options.dump_inference_stats:\n dump_type_stats(",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 8,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"generator_expression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/build.py",
|
||||
"language": "python",
|
||||
"before_code": " self._type_checker.reset()\n self._type_checker = None\n\n def _patch_indirect_dependencies(self, module_refs: set[str], types: list[Type]) -> None:\n assert None not in types\n valid = self.valid_references()\n",
|
||||
"after_code": " self._type_checker.reset()\n self._type_checker = None\n\n def _patch_indirect_dependencies(self, module_refs: set[str], types: set[Type]) -> None:\n assert None not in types\n valid = self.valid_references()\n",
|
||||
"diff_context": " self._type_checker.reset()\n self._type_checker = None\n\n def _patch_indirect_dependencies(self, module_refs: set[str], types: list[Type]) -> None:\n def _patch_indirect_dependencies(self, module_refs: set[str], types: set[Type]) -> None:\n assert None not in types\n valid = self.valid_references()\n",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": "_patch_indirect_dependencies",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"function_definition"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/build.py",
|
||||
"language": "python",
|
||||
"before_code": " for id in scc:\n deps.update(graph[id].dependencies)\n deps -= ascc\n stale_deps = {id for id in deps if id in graph and not graph[id].is_interface_fresh()}\n fresh = fresh and not stale_deps\n undeps = set()\n if fresh:",
|
||||
"after_code": " for id in scc:\n deps.update(graph[id].dependencies)\n deps -= ascc\n # Note: if a dependency is not in graph anymore, it should be considered interface-stale.\n # This is important to trigger any relevant updates from indirect dependencies that were\n # removed in load_graph().\n stale_deps = {id for id in deps if id not in graph or not graph[id].is_interface_fresh()}\n fresh = fresh and not stale_deps\n undeps = set()\n if fresh:",
|
||||
"diff_context": " for id in scc:\n deps.update(graph[id].dependencies)\n deps -= ascc\n stale_deps = {id for id in deps if id in graph and not graph[id].is_interface_fresh()}\n # Note: if a dependency is not in graph anymore, it should be considered interface-stale.\n # This is important to trigger any relevant updates from indirect dependencies that were\n # removed in load_graph().\n stale_deps = {id for id in deps if id not in graph or not graph[id].is_interface_fresh()}\n fresh = fresh and not stale_deps\n undeps = set()\n if fresh:",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"generator_expression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/indirection.py",
|
||||
"language": "python",
|
||||
"before_code": " def __init__(self) -> None:\n # Module references are collected here\n self.modules: set[str] = set()\n # User to avoid infinite recursion with recursive type aliases\n self.seen_aliases: set[types.TypeAliasType] = set()\n # Used to avoid redundant work\n self.seen_fullnames: set[str] = set()\n\n def find_modules(self, typs: Iterable[types.Type]) -> set[str]:\n self.modules = set()\n self.seen_fullnames = set()\n self.seen_aliases = set()\n for typ in typs:\n self._visit(typ)\n return self.modules\n\n def _visit(self, typ: types.Type) -> None:\n if isinstance(typ, types.TypeAliasType):\n # Avoid infinite recursion for recursive type aliases.\n if typ not in self.seen_aliases:\n self.seen_aliases.add(typ)\n typ.accept(self)\n\n def _visit_type_tuple(self, typs: tuple[types.Type, ...]) -> None:\n # Micro-optimization: Specialized version of _visit for lists\n for typ in typs:\n if isinstance(typ, types.TypeAliasType):\n # Avoid infinite recursion for recursive type aliases.\n if typ in self.seen_aliases:\n continue\n self.seen_aliases.add(typ)\n typ.accept(self)\n\n def _visit_type_list(self, typs: list[types.Type]) -> None:\n # Micro-optimization: Specialized version of _visit for tuples\n for typ in typs:\n if isinstance(typ, types.TypeAliasType):\n # Avoid infinite recursion for recursive type aliases.\n if typ in self.seen_aliases:\n continue\n self.seen_aliases.add(typ)\n typ.accept(self)\n\n def _visit_module_name(self, module_name: str) -> None:",
|
||||
"after_code": " def __init__(self) -> None:\n # Module references are collected here\n self.modules: set[str] = set()\n # User to avoid infinite recursion with recursive types\n self.seen_types: set[types.TypeAliasType | types.Instance] = set()\n # Used to avoid redundant work\n self.seen_fullnames: set[str] = set()\n\n def find_modules(self, typs: Iterable[types.Type]) -> set[str]:\n self.modules = set()\n self.seen_fullnames = set()\n self.seen_types = set()\n for typ in typs:\n self._visit(typ)\n return self.modules\n\n def _visit(self, typ: types.Type) -> None:\n # Note: instances are needed for `class str(Sequence[str]): ...`\n if (\n isinstance(typ, types.TypeAliasType)\n or isinstance(typ, types.ProperType)\n and isinstance(typ, types.Instance)\n ):\n # Avoid infinite recursion for recursive types.\n if typ in self.seen_types:\n return\n self.seen_types.add(typ)\n typ.accept(self)\n\n def _visit_type_tuple(self, typs: tuple[types.Type, ...]) -> None:\n # Micro-optimization: Specialized version of _visit for lists\n for typ in typs:\n if (\n isinstance(typ, types.TypeAliasType)\n or isinstance(typ, types.ProperType)\n and isinstance(typ, types.Instance)\n ):\n # Avoid infinite recursion for recursive types.\n if typ in self.seen_types:\n continue\n self.seen_types.add(typ)\n typ.accept(self)\n\n def _visit_type_list(self, typs: list[types.Type]) -> None:\n # Micro-optimization: Specialized version of _visit for tuples\n for typ in typs:\n if (\n isinstance(typ, types.TypeAliasType)\n or isinstance(typ, types.ProperType)\n and isinstance(typ, types.Instance)\n ):\n # Avoid infinite recursion for recursive types.\n if typ in self.seen_types:\n continue\n self.seen_types.add(typ)\n typ.accept(self)\n\n def _visit_module_name(self, module_name: str) -> None:",
|
||||
"diff_context": " def __init__(self) -> None:\n # Module references are collected here\n self.modules: set[str] = set()\n # User to avoid infinite recursion with recursive type aliases\n self.seen_aliases: set[types.TypeAliasType] = set()\n # User to avoid infinite recursion with recursive types\n self.seen_types: set[types.TypeAliasType | types.Instance] = set()\n # Used to avoid redundant work\n self.seen_fullnames: set[str] = set()\n\n def find_modules(self, typs: Iterable[types.Type]) -> set[str]:\n self.modules = set()\n self.seen_fullnames = set()\n self.seen_aliases = set()\n self.seen_types = set()\n for typ in typs:\n self._visit(typ)\n return self.modules\n\n def _visit(self, typ: types.Type) -> None:\n if isinstance(typ, types.TypeAliasType):\n # Avoid infinite recursion for recursive type aliases.\n if typ not in self.seen_aliases:\n self.seen_aliases.add(typ)\n # Note: instances are needed for `class str(Sequence[str]): ...`\n if (\n isinstance(typ, types.TypeAliasType)\n or isinstance(typ, types.ProperType)\n and isinstance(typ, types.Instance)\n ):\n # Avoid infinite recursion for recursive types.\n if typ in self.seen_types:\n return\n self.seen_types.add(typ)\n typ.accept(self)\n\n def _visit_type_tuple(self, typs: tuple[types.Type, ...]) -> None:\n # Micro-optimization: Specialized version of _visit for lists\n for typ in typs:\n if isinstance(typ, types.TypeAliasType):\n # Avoid infinite recursion for recursive type aliases.\n if typ in self.seen_aliases:\n if (\n isinstance(typ, types.TypeAliasType)\n or isinstance(typ, types.ProperType)\n and isinstance(typ, types.Instance)\n ):\n # Avoid infinite recursion for recursive types.\n if typ in self.seen_types:\n continue\n self.seen_aliases.add(typ)\n self.seen_types.add(typ)\n typ.accept(self)\n\n def _visit_type_list(self, typs: list[types.Type]) -> None:\n # Micro-optimization: Specialized version of _visit for tuples\n for typ in typs:\n if isinstance(typ, types.TypeAliasType):\n # Avoid infinite recursion for recursive type aliases.\n if typ in self.seen_aliases:\n if (\n isinstance(typ, types.TypeAliasType)\n or isinstance(typ, types.ProperType)\n and isinstance(typ, types.Instance)\n ):\n # Avoid infinite recursion for recursive types.\n if typ in self.seen_types:\n continue\n self.seen_aliases.add(typ)\n self.seen_types.add(typ)\n typ.accept(self)\n\n def _visit_module_name(self, module_name: str) -> None:",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 29,
|
||||
"function_name": "_visit_module_name",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"generator_expression",
|
||||
"context_manager",
|
||||
"class_definition",
|
||||
"type_hint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/indirection.py",
|
||||
"language": "python",
|
||||
"before_code": " self._visit_type_list(t.arg_types)\n\n def visit_instance(self, t: types.Instance) -> None:\n self._visit_type_tuple(t.args)\n if t.type:\n # Uses of a class depend on everything in the MRO,\n # as changes to classes in the MRO can add types to methods,\n # change property types, change the MRO itself, etc.\n for s in t.type.mro:\n self._visit_module_name(s.module_name)\n if t.type.metaclass_type is not None:\n self._visit_module_name(t.type.metaclass_type.type.module_name)\n\n def visit_callable_type(self, t: types.CallableType) -> None:\n self._visit_type_list(t.arg_types)",
|
||||
"after_code": " self._visit_type_list(t.arg_types)\n\n def visit_instance(self, t: types.Instance) -> None:\n # Instance is named, record its definition and continue digging into\n # components that constitute semantic meaning of this type: bases, metaclass,\n # tuple type, and typeddict type.\n # Note: we cannot simply record the MRO, in case an intermediate base contains\n # a reference to type alias, this affects meaning of map_instance_to_supertype(),\n # see e.g. testDoubleReexportGenericUpdated.\n self._visit_type_tuple(t.args)\n if t.type:\n # Important optimization: instead of simply recording the definition and\n # recursing into bases, record the MRO and only traverse generic bases.\n for s in t.type.mro:\n self._visit_module_name(s.module_name)\n for base in s.bases:\n if base.args:\n self._visit_type_tuple(base.args)\n if t.type.metaclass_type:\n self._visit(t.type.metaclass_type)\n if t.type.typeddict_type:\n self._visit(t.type.typeddict_type)\n if t.type.tuple_type:\n self._visit(t.type.tuple_type)\n\n def visit_callable_type(self, t: types.CallableType) -> None:\n self._visit_type_list(t.arg_types)",
|
||||
"diff_context": " self._visit_type_list(t.arg_types)\n\n def visit_instance(self, t: types.Instance) -> None:\n # Instance is named, record its definition and continue digging into\n # components that constitute semantic meaning of this type: bases, metaclass,\n # tuple type, and typeddict type.\n # Note: we cannot simply record the MRO, in case an intermediate base contains\n # a reference to type alias, this affects meaning of map_instance_to_supertype(),\n # see e.g. testDoubleReexportGenericUpdated.\n self._visit_type_tuple(t.args)\n if t.type:\n # Uses of a class depend on everything in the MRO,\n # as changes to classes in the MRO can add types to methods,\n # change property types, change the MRO itself, etc.\n # Important optimization: instead of simply recording the definition and\n # recursing into bases, record the MRO and only traverse generic bases.\n for s in t.type.mro:\n self._visit_module_name(s.module_name)\n if t.type.metaclass_type is not None:\n self._visit_module_name(t.type.metaclass_type.type.module_name)\n for base in s.bases:\n if base.args:\n self._visit_type_tuple(base.args)\n if t.type.metaclass_type:\n self._visit(t.type.metaclass_type)\n if t.type.typeddict_type:\n self._visit(t.type.typeddict_type)\n if t.type.tuple_type:\n self._visit(t.type.tuple_type)\n\n def visit_callable_type(self, t: types.CallableType) -> None:\n self._visit_type_list(t.arg_types)",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 10,
|
||||
"function_name": "visit_callable_type",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"generator_expression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/indirection.py",
|
||||
"language": "python",
|
||||
"before_code": " self.seen_fullnames.add(fullname)\n\n def visit_overloaded(self, t: types.Overloaded) -> None:\n self._visit_type_list(list(t.items))\n self._visit(t.fallback)\n\n def visit_tuple_type(self, t: types.TupleType) -> None:",
|
||||
"after_code": " self.seen_fullnames.add(fullname)\n\n def visit_overloaded(self, t: types.Overloaded) -> None:\n for item in t.items:\n self._visit(item)\n self._visit(t.fallback)\n\n def visit_tuple_type(self, t: types.TupleType) -> None:",
|
||||
"diff_context": " self.seen_fullnames.add(fullname)\n\n def visit_overloaded(self, t: types.Overloaded) -> None:\n self._visit_type_list(list(t.items))\n for item in t.items:\n self._visit(item)\n self._visit(t.fallback)\n\n def visit_tuple_type(self, t: types.TupleType) -> None:",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": "visit_tuple_type",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/indirection.py",
|
||||
"language": "python",
|
||||
"before_code": " self._visit(t.item)\n\n def visit_type_alias_type(self, t: types.TypeAliasType) -> None:\n self._visit(types.get_proper_type(t))",
|
||||
"after_code": " self._visit(t.item)\n\n def visit_type_alias_type(self, t: types.TypeAliasType) -> None:\n # Type alias is named, record its definition and continue digging into\n # components that constitute semantic meaning of this type: target and args.\n if t.alias:\n self._visit_module_name(t.alias.module)\n self._visit(t.alias.target)\n self._visit_type_list(t.args)",
|
||||
"diff_context": " self._visit(t.item)\n\n def visit_type_alias_type(self, t: types.TypeAliasType) -> None:\n self._visit(types.get_proper_type(t))\n # Type alias is named, record its definition and continue digging into\n # components that constitute semantic meaning of this type: target and args.\n if t.alias:\n self._visit_module_name(t.alias.module)\n self._visit(t.alias.target)\n self._visit_type_list(t.args)",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 3,
|
||||
"function_name": "visit_type_alias_type",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/nodes.py",
|
||||
"language": "python",
|
||||
"before_code": " defs: list[Statement]\n # Type alias dependencies as mapping from target to set of alias full names\n alias_deps: defaultdict[str, set[str]]\n # Is there a UTF-8 BOM at the start?\n is_bom: bool\n names: SymbolTable",
|
||||
"after_code": " defs: list[Statement]\n # Type alias dependencies as mapping from target to set of alias full names\n alias_deps: defaultdict[str, set[str]]\n # Same as above but for coarse-grained dependencies (i.e. modules instead of full names)\n mod_alias_deps: set[str]\n # Is there a UTF-8 BOM at the start?\n is_bom: bool\n names: SymbolTable",
|
||||
"diff_context": " defs: list[Statement]\n # Type alias dependencies as mapping from target to set of alias full names\n alias_deps: defaultdict[str, set[str]]\n # Same as above but for coarse-grained dependencies (i.e. modules instead of full names)\n mod_alias_deps: set[str]\n # Is there a UTF-8 BOM at the start?\n is_bom: bool\n names: SymbolTable",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"type_hint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/nodes.py",
|
||||
"language": "python",
|
||||
"before_code": " target: The target type. For generic aliases contains bound type variables\n as nested types (currently TypeVar and ParamSpec are supported).\n _fullname: Qualified name of this type alias. This is used in particular\n to track fine grained dependencies from aliases.\n alias_tvars: Type variables used to define this alias.\n normalized: Used to distinguish between `A = List`, and `A = list`. Both\n are internally stored using `builtins.list` (because `typing.List` is",
|
||||
"after_code": " target: The target type. For generic aliases contains bound type variables\n as nested types (currently TypeVar and ParamSpec are supported).\n _fullname: Qualified name of this type alias. This is used in particular\n to track fine-grained dependencies from aliases.\n module: Module where the alias was defined.\n alias_tvars: Type variables used to define this alias.\n normalized: Used to distinguish between `A = List`, and `A = list`. Both\n are internally stored using `builtins.list` (because `typing.List` is",
|
||||
"diff_context": " target: The target type. For generic aliases contains bound type variables\n as nested types (currently TypeVar and ParamSpec are supported).\n _fullname: Qualified name of this type alias. This is used in particular\n to track fine grained dependencies from aliases.\n to track fine-grained dependencies from aliases.\n module: Module where the alias was defined.\n alias_tvars: Type variables used to define this alias.\n normalized: Used to distinguish between `A = List`, and `A = list`. Both\n are internally stored using `builtins.list` (because `typing.List` is",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/semanal.py",
|
||||
"language": "python",
|
||||
"before_code": " declared_type_vars: TypeVarLikeList | None = None,\n all_declared_type_params_names: list[str] | None = None,\n python_3_12_type_alias: bool = False,\n ) -> tuple[Type | None, list[TypeVarLikeType], set[str], list[str], bool]:\n \"\"\"Check if 'rvalue' is a valid type allowed for aliasing (e.g. not a type variable).\n\n If yes, return the corresponding type, a list of\n qualified type variable names for generic aliases, a set of names the alias depends on,\n and a list of type variables if the alias is generic.\n A schematic example for the dependencies:\n A = int\n B = str\n analyze_alias(Dict[A, B])[2] == {'__main__.A', '__main__.B'}\n \"\"\"\n dynamic = bool(self.function_stack and self.function_stack[-1].is_dynamic())\n global_scope = not self.type and not self.function_stack",
|
||||
"after_code": " declared_type_vars: TypeVarLikeList | None = None,\n all_declared_type_params_names: list[str] | None = None,\n python_3_12_type_alias: bool = False,\n ) -> tuple[Type | None, list[TypeVarLikeType], set[tuple[str, str]], bool]:\n \"\"\"Check if 'rvalue' is a valid type allowed for aliasing (e.g. not a type variable).\n\n If yes, return the corresponding type, a list of type variables for generic aliases,\n a set of names the alias depends on, and True if the original type has empty tuple index.\n An example for the dependencies:\n A = int\n B = str\n analyze_alias(dict[A, B])[2] == {('mod', 'mod.A'), ('mod', 'mod.B')}\n \"\"\"\n dynamic = bool(self.function_stack and self.function_stack[-1].is_dynamic())\n global_scope = not self.type and not self.function_stack",
|
||||
"diff_context": " declared_type_vars: TypeVarLikeList | None = None,\n all_declared_type_params_names: list[str] | None = None,\n python_3_12_type_alias: bool = False,\n ) -> tuple[Type | None, list[TypeVarLikeType], set[str], list[str], bool]:\n ) -> tuple[Type | None, list[TypeVarLikeType], set[tuple[str, str]], bool]:\n \"\"\"Check if 'rvalue' is a valid type allowed for aliasing (e.g. not a type variable).\n\n If yes, return the corresponding type, a list of\n qualified type variable names for generic aliases, a set of names the alias depends on,\n and a list of type variables if the alias is generic.\n A schematic example for the dependencies:\n If yes, return the corresponding type, a list of type variables for generic aliases,\n a set of names the alias depends on, and True if the original type has empty tuple index.\n An example for the dependencies:\n A = int\n B = str\n analyze_alias(Dict[A, B])[2] == {'__main__.A', '__main__.B'}\n analyze_alias(dict[A, B])[2] == {('mod', 'mod.A'), ('mod', 'mod.B')}\n \"\"\"\n dynamic = bool(self.function_stack and self.function_stack[-1].is_dynamic())\n global_scope = not self.type and not self.function_stack",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 10,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": "\"\"\"Check if 'rvalue' is a valid type allowed for aliasing (e.g. not a type variable).",
|
||||
"coding_patterns": [
|
||||
"list_comprehension"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/semanal.py",
|
||||
"language": "python",
|
||||
"before_code": " self.cur_mod_node.plugin_deps.setdefault(trigger, set()).add(target)\n\n def add_type_alias_deps(\n self, aliases_used: Collection[str], target: str | None = None\n ) -> None:\n \"\"\"Add full names of type aliases on which the current node depends.\n\n This is used by fine-grained incremental mode to re-check the corresponding nodes.\n If `target` is None, then the target node used will be the current scope.\n \"\"\"\n if not aliases_used:\n # A basic optimization to avoid adding targets with no dependencies to\n # the `alias_deps` dict.\n return\n if target is None:\n target = self.scope.current_target()\n self.cur_mod_node.alias_deps[target].update(aliases_used)\n\n def is_mangled_global(self, name: str) -> bool:\n # A global is mangled if there exists at least one renamed variant.",
|
||||
"after_code": " self.cur_mod_node.plugin_deps.setdefault(trigger, set()).add(target)\n\n def add_type_alias_deps(\n self, aliases_used: Collection[tuple[str, str]], target: str | None = None\n ) -> None:\n \"\"\"Add full names of type aliases on which the current node depends.\n\n This is used by fine-grained incremental mode to re-check the corresponding nodes.\n If `target` is None, then the target node used will be the current scope. For\n coarse-grained mode, add just the module names where aliases are defined.\n \"\"\"\n if not aliases_used:\n return\n if target is None:\n target = self.scope.current_target()\n for mod, fn in aliases_used:\n self.cur_mod_node.alias_deps[target].add(fn)\n self.cur_mod_node.mod_alias_deps.add(mod)\n\n def is_mangled_global(self, name: str) -> bool:\n # A global is mangled if there exists at least one renamed variant.",
|
||||
"diff_context": " self.cur_mod_node.plugin_deps.setdefault(trigger, set()).add(target)\n\n def add_type_alias_deps(\n self, aliases_used: Collection[str], target: str | None = None\n self, aliases_used: Collection[tuple[str, str]], target: str | None = None\n ) -> None:\n \"\"\"Add full names of type aliases on which the current node depends.\n\n This is used by fine-grained incremental mode to re-check the corresponding nodes.\n If `target` is None, then the target node used will be the current scope.\n If `target` is None, then the target node used will be the current scope. For\n coarse-grained mode, add just the module names where aliases are defined.\n \"\"\"\n if not aliases_used:\n # A basic optimization to avoid adding targets with no dependencies to\n # the `alias_deps` dict.\n return\n if target is None:\n target = self.scope.current_target()\n self.cur_mod_node.alias_deps[target].update(aliases_used)\n for mod, fn in aliases_used:\n self.cur_mod_node.alias_deps[target].add(fn)\n self.cur_mod_node.mod_alias_deps.add(mod)\n\n def is_mangled_global(self, name: str) -> bool:\n # A global is mangled if there exists at least one renamed variant.",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 15,
|
||||
"function_name": "is_mangled_global",
|
||||
"class_name": null,
|
||||
"docstring": "\"\"\"Add full names of type aliases on which the current node depends.",
|
||||
"coding_patterns": [
|
||||
"list_comprehension"
|
||||
]
|
||||
}
|
||||
],
|
||||
"commit_message_style": "concise_subject",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "pr",
|
||||
"repository": "mypy",
|
||||
"title": "chore: add cline_docs/ to .gitignore",
|
||||
"description": "Cline is a commonly used LLM tool which, under certain conditions, creates a cline_docs/ folder with task status and todo items etc\r\n\r\nThis folder is only helpful locally (unless we decide we want to add actual guidelines for Cline here, but thats outside the scope of this PR) so this PR adds it to .gitignore\r\n\r\n<!-- If this pull request fixes an issue, add \"Fixes #NNN\" with the issue number. -->\r\n\r\n<!--\r\nChecklist:\r\n- Read the [Contributing Guidelines](https://github.com/python/mypy/blob/master/CONTRIBUTING.md)\r\n- Add tests for all changed behaviour.\r\n- If you can't add a test, please explain why and how you verified your changes work.\r\n- Make sure CI passes.\r\n- Please do not force push to the PR once it has been reviewed.\r\n-->\r\n",
|
||||
"url": "https://github.com/python/mypy/pull/19797",
|
||||
"date": "2025-09-05T02:35:14Z",
|
||||
"sha_or_number": "19797",
|
||||
"files_changed": [
|
||||
".gitignore"
|
||||
],
|
||||
"additions": 0,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [],
|
||||
"code_samples": [],
|
||||
"commit_message_style": "concise_subject",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "pr",
|
||||
"repository": "mypy",
|
||||
"title": "[mypyc] Add type annotations to tests",
|
||||
"description": "Missing type annotations can compromise test coverage. My eventual goal is to require annotations by default in all run tests.\r\n",
|
||||
"url": "https://github.com/python/mypy/pull/19794",
|
||||
"date": "2025-09-04T15:56:30Z",
|
||||
"sha_or_number": "19794",
|
||||
"files_changed": [
|
||||
"mypyc/test-data/fixtures/ir.py",
|
||||
"mypyc/test-data/fixtures/typing-full.pyi",
|
||||
"mypyc/test-data/run-dunders.test",
|
||||
"mypyc/test-data/run-singledispatch.test"
|
||||
],
|
||||
"additions": 0,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "mypyc/test-data/fixtures/ir.py",
|
||||
"language": "python",
|
||||
"before_code": " def __iadd__(self, value: Iterable[_T], /) -> List[_T]: ... # type: ignore[misc]\n def append(self, x: _T) -> None: pass\n def pop(self, i: int = -1) -> _T: pass\n def count(self, _T) -> int: pass\n def extend(self, l: Iterable[_T]) -> None: pass\n def insert(self, i: int, x: _T) -> None: pass\n def sort(self) -> None: pass",
|
||||
"after_code": " def __iadd__(self, value: Iterable[_T], /) -> List[_T]: ... # type: ignore[misc]\n def append(self, x: _T) -> None: pass\n def pop(self, i: int = -1) -> _T: pass\n def count(self, x: _T) -> int: pass\n def extend(self, l: Iterable[_T]) -> None: pass\n def insert(self, i: int, x: _T) -> None: pass\n def sort(self) -> None: pass",
|
||||
"diff_context": " def __iadd__(self, value: Iterable[_T], /) -> List[_T]: ... # type: ignore[misc]\n def append(self, x: _T) -> None: pass\n def pop(self, i: int = -1) -> _T: pass\n def count(self, _T) -> int: pass\n def count(self, x: _T) -> int: pass\n def extend(self, l: Iterable[_T]) -> None: pass\n def insert(self, i: int, x: _T) -> None: pass\n def sort(self) -> None: pass",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": "sort",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"function_definition",
|
||||
"type_hint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypyc/test-data/fixtures/ir.py",
|
||||
"language": "python",
|
||||
"before_code": "def id(o: object) -> int: pass\n# This type is obviously wrong but the test stubs don't have Sized anymore\ndef len(o: object) -> int: pass\ndef print(*object) -> None: pass\ndef isinstance(x: object, t: object) -> bool: pass\ndef iter(i: Iterable[_T]) -> Iterator[_T]: pass\n@overload",
|
||||
"after_code": "def id(o: object) -> int: pass\n# This type is obviously wrong but the test stubs don't have Sized anymore\ndef len(o: object) -> int: pass\ndef print(*args: object) -> None: pass\ndef isinstance(x: object, t: object) -> bool: pass\ndef iter(i: Iterable[_T]) -> Iterator[_T]: pass\n@overload",
|
||||
"diff_context": "def id(o: object) -> int: pass\n# This type is obviously wrong but the test stubs don't have Sized anymore\ndef len(o: object) -> int: pass\ndef print(*object) -> None: pass\ndef print(*args: object) -> None: pass\ndef isinstance(x: object, t: object) -> bool: pass\ndef iter(i: Iterable[_T]) -> Iterator[_T]: pass\n@overload",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": "iter",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"function_definition",
|
||||
"type_hint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypyc/test-data/fixtures/typing-full.pyi",
|
||||
"language": "python",
|
||||
"before_code": "class GenericMeta(type): pass\n\nclass _SpecialForm:\n def __getitem__(self, index): ...\nclass TypeVar:\n def __init__(self, name, *args, bound=None): ...\n def __or__(self, other): ...\n\ncast = 0\noverload = 0",
|
||||
"after_code": "class GenericMeta(type): pass\n\nclass _SpecialForm:\n def __getitem__(self, index: Any) -> Any: ...\nclass TypeVar:\n def __init__(self, name: str, *args: Any, bound: Any = None): ...\n def __or__(self, other: Any) -> Any: ...\n\ncast = 0\noverload = 0",
|
||||
"diff_context": "class GenericMeta(type): pass\n\nclass _SpecialForm:\n def __getitem__(self, index): ...\n def __getitem__(self, index: Any) -> Any: ...\nclass TypeVar:\n def __init__(self, name, *args, bound=None): ...\n def __or__(self, other): ...\n def __init__(self, name: str, *args: Any, bound: Any = None): ...\n def __or__(self, other: Any) -> Any: ...\n\ncast = 0\noverload = 0",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 7,
|
||||
"function_name": "__or__",
|
||||
"class_name": "TypeVar",
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"function_definition",
|
||||
"type_hint"
|
||||
]
|
||||
}
|
||||
],
|
||||
"commit_message_style": "concise_subject",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "pr",
|
||||
"repository": "mypy",
|
||||
"title": "Check functions without annotations in mypyc tests",
|
||||
"description": "c.f. https://github.com/python/mypy/pull/19217#discussion_r2314303410\r\n\r\nDisallowing functions without annotations (where not relevant to the tests) is probably a good idea, but this creates a large number of failures which would take some time to go through (many due to common issues, like untyped functions in the fixtures).\r\n\r\nAs a smaller step in the right direction, this sets `check_untyped_defs = True` for the `run-*` tests so that we at least check functions without annotations. ",
|
||||
"url": "https://github.com/python/mypy/pull/19792",
|
||||
"date": "2025-09-04T14:42:17Z",
|
||||
"sha_or_number": "19792",
|
||||
"files_changed": [
|
||||
"mypyc/test-data/fixtures/ir.py",
|
||||
"mypyc/test-data/run-classes.test",
|
||||
"mypyc/test/test_run.py"
|
||||
],
|
||||
"additions": 0,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "mypyc/test-data/fixtures/ir.py",
|
||||
"language": "python",
|
||||
"before_code": "class type:\n def __init__(self, o: object) -> None: ...\n def __or__(self, o: object) -> Any: ...\n __name__ : str\n __annotations__: Dict[str, Any]\n",
|
||||
"after_code": "class type:\n def __init__(self, o: object) -> None: ...\n def __or__(self, o: object) -> Any: ...\n def __new__(cls, *args: object) -> Any: ...\n __name__ : str\n __annotations__: Dict[str, Any]\n",
|
||||
"diff_context": "class type:\n def __init__(self, o: object) -> None: ...\n def __or__(self, o: object) -> Any: ...\n def __new__(cls, *args: object) -> Any: ...\n __name__ : str\n __annotations__: Dict[str, Any]\n",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": "__new__",
|
||||
"class_name": "type",
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"function_definition"
|
||||
]
|
||||
}
|
||||
],
|
||||
"commit_message_style": "concise_subject",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "pr",
|
||||
"repository": "mypy",
|
||||
"title": "fix: Allow instantiation of type[None] in analyze_type_type_callee",
|
||||
"description": "<!-- If this pull request fixes an issue, add \"Fixes #NNN\" with the issue number. -->\r\n\r\n(Explain how this PR changes mypy.)\r\n\r\n<!--\r\nChecklist:\r\n- Read the [Contributing Guidelines](https://github.com/python/mypy/blob/master/CONTRIBUTING.md)\r\n- Add tests for all changed behaviour.\r\n- If you can't add a test, please explain why and how you verified your changes work.\r\n- Make sure CI passes.\r\n- Please do not force push to the PR once it has been reviewed.\r\n-->\r\n\r\nFixes #19660\r\n\r\nAllow instantiation of NoneType in type checker\r\n\r\nThis change fixes the error \"Cannot instantiate type 'Type[None]'\"\r\nwhen calling NoneType() or type(None)().\r\n\r\nBy treating NoneType as a callable that returns None, mypy can now correctly\r\nhandle such calls without raising spurious errors.\r\n\r\nAlso, I added test case testTypeUsingTypeCNoneType covering:\r\n- direct calls to type(None)() and NoneType()\r\n- functions accepting type[None] and type[NoneType] parameters and invoking them\r\n\r\nThis ensures proper handling of NoneType instantiation and prevents spurious errors.",
|
||||
"url": "https://github.com/python/mypy/pull/19782",
|
||||
"date": "2025-09-02T06:13:12Z",
|
||||
"sha_or_number": "19782",
|
||||
"files_changed": [
|
||||
"mypy/checkexpr.py",
|
||||
"test-data/unit/check-classes.test"
|
||||
],
|
||||
"additions": 0,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"19660"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "mypy/checkexpr.py",
|
||||
"language": "python",
|
||||
"before_code": " return self.analyze_type_type_callee(tuple_fallback(item), context)\n if isinstance(item, TypedDictType):\n return self.typeddict_callable_from_context(item)\n\n self.msg.unsupported_type_type(item, context)\n return AnyType(TypeOfAny.from_error)",
|
||||
"after_code": " return self.analyze_type_type_callee(tuple_fallback(item), context)\n if isinstance(item, TypedDictType):\n return self.typeddict_callable_from_context(item)\n if isinstance(item, NoneType):\n # NoneType() returns None, so treat it as a callable that returns None\n return CallableType(\n arg_types=[],\n arg_kinds=[],\n arg_names=[],\n ret_type=NoneType(),\n fallback=self.named_type(\"builtins.function\"),\n name=None,\n from_type_type=True,\n )\n\n self.msg.unsupported_type_type(item, context)\n return AnyType(TypeOfAny.from_error)",
|
||||
"diff_context": " return self.analyze_type_type_callee(tuple_fallback(item), context)\n if isinstance(item, TypedDictType):\n return self.typeddict_callable_from_context(item)\n if isinstance(item, NoneType):\n # NoneType() returns None, so treat it as a callable that returns None\n return CallableType(\n arg_types=[],\n arg_kinds=[],\n arg_names=[],\n ret_type=NoneType(),\n fallback=self.named_type(\"builtins.function\"),\n name=None,\n from_type_type=True,\n )\n\n self.msg.unsupported_type_type(item, context)\n return AnyType(TypeOfAny.from_error)",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "standard",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "pr",
|
||||
"repository": "mypy",
|
||||
"title": "feat: new mypyc primitives for weakref.proxy",
|
||||
"description": "This PR adds 2 new weakref primitives for weakref.proxy (1 and 2 arg)\r\n\r\nThe C code generates correctly, but I'm not entirely sure why this test is failing. The weakly-proxied object is being destroyed too early, while there should still be a strong reference to it. It also fails if we use the builtin weakref.proxy, so I believe this might be exposing a reference counting bug unrelated to this PR.\r\n\r\n<!--\r\nChecklist:\r\n- Read the [Contributing Guidelines](https://github.com/python/mypy/blob/master/CONTRIBUTING.md)\r\n- Add tests for all changed behaviour.\r\n- If you can't add a test, please explain why and how you verified your changes work.\r\n- Make sure CI passes.\r\n- Please do not force push to the PR once it has been reviewed.\r\n-->\r\n",
|
||||
"url": "https://github.com/python/mypy/pull/19217",
|
||||
"date": "2025-06-03T17:02:26Z",
|
||||
"sha_or_number": "19217",
|
||||
"files_changed": [
|
||||
"mypyc/primitives/weakref_ops.py",
|
||||
"mypyc/test-data/fixtures/ir.py",
|
||||
"mypyc/test-data/irbuild-weakref.test",
|
||||
"mypyc/test-data/run-weakref.test",
|
||||
"test-data/unit/lib-stub/_weakref.pyi",
|
||||
"test-data/unit/lib-stub/weakref.pyi"
|
||||
],
|
||||
"additions": 0,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "mypyc/test-data/fixtures/ir.py",
|
||||
"language": "python",
|
||||
"before_code": "class UnicodeEncodeError(RuntimeError): pass\nclass UnicodeDecodeError(RuntimeError): pass\nclass NotImplementedError(RuntimeError): pass\n\nclass StopIteration(Exception):\n value: Any",
|
||||
"after_code": "class UnicodeEncodeError(RuntimeError): pass\nclass UnicodeDecodeError(RuntimeError): pass\nclass NotImplementedError(RuntimeError): pass\nclass ReferenceError(Exception): pass\n\nclass StopIteration(Exception):\n value: Any",
|
||||
"diff_context": "class UnicodeEncodeError(RuntimeError): pass\nclass UnicodeDecodeError(RuntimeError): pass\nclass NotImplementedError(RuntimeError): pass\nclass ReferenceError(Exception): pass\n\nclass StopIteration(Exception):\n value: Any",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": "StopIteration",
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"class_definition",
|
||||
"type_hint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "test-data/unit/lib-stub/_weakref.pyi",
|
||||
"language": "python",
|
||||
"before_code": "",
|
||||
"after_code": "from typing import Any, Callable, TypeVar, overload\nfrom weakref import CallableProxyType, ProxyType\n\n_C = TypeVar(\"_C\", bound=Callable[..., Any])\n_T = TypeVar(\"_T\")\n\n# Return CallableProxyType if object is callable, ProxyType otherwise\n@overload\ndef proxy(object: _C, callback: Callable[[CallableProxyType[_C]], Any] | None = None, /) -> CallableProxyType[_C]: ...\n@overload\ndef proxy(object: _T, callback: Callable[[ProxyType[_T]], Any] | None = None, /) -> ProxyType[_T]: ...",
|
||||
"diff_context": "from typing import Any, Callable, TypeVar, overload\nfrom weakref import CallableProxyType, ProxyType\n\n_C = TypeVar(\"_C\", bound=Callable[..., Any])\n_T = TypeVar(\"_T\")\n\n# Return CallableProxyType if object is callable, ProxyType otherwise\n@overload\ndef proxy(object: _C, callback: Callable[[CallableProxyType[_C]], Any] | None = None, /) -> CallableProxyType[_C]: ...\n@overload\ndef proxy(object: _T, callback: Callable[[ProxyType[_T]], Any] | None = None, /) -> ProxyType[_T]: ...",
|
||||
"change_type": "addition",
|
||||
"lines_of_context": 0,
|
||||
"function_name": "proxy",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"decorator",
|
||||
"function_definition"
|
||||
]
|
||||
}
|
||||
],
|
||||
"commit_message_style": "concise_subject",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "cpython",
|
||||
"title": "gh-128307: Update what's new in 3.13 and 3.14 with create_task changes of asyncio (#134304)",
|
||||
"description": "gh-128307: Update what's new in 3.13 and 3.14 with create_task changes of asyncio (#134304)\n\nCo-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>",
|
||||
"url": "https://github.com/python/cpython/commit/28625d4f956f8d30671aba1daaac9735932983db",
|
||||
"date": "2025-05-20T08:41:22Z",
|
||||
"sha_or_number": "28625d4f956f8d30671aba1daaac9735932983db",
|
||||
"files_changed": [
|
||||
"Doc/whatsnew/3.13.rst",
|
||||
"Doc/whatsnew/3.14.rst"
|
||||
],
|
||||
"additions": 34,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"134304"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "Doc/whatsnew/3.13.rst",
|
||||
"language": "restructuredtext",
|
||||
"before_code": " never awaited).\n (Contributed by Arthur Tacca and Jason Zhang in :gh:`115957`.)\n\n\nbase64\n------",
|
||||
"after_code": " never awaited).\n (Contributed by Arthur Tacca and Jason Zhang in :gh:`115957`.)\n\n* The function and methods named ``create_task`` have received a new\n ``**kwargs`` argument that is passed through to the task constructor.\n This change was accidentally added in 3.13.3,\n and broke the API contract for custom task factories.\n Several third-party task factories implemented workarounds for this.\n In 3.13.4 and later releases the old factory contract is honored\n once again (until 3.14).\n To keep the workarounds working, the extra ``**kwargs`` argument still\n allows passing additional keyword arguments to :class:`~asyncio.Task`\n and to custom task factories.\n\n This affects the following function and methods:\n :meth:`asyncio.create_task`,\n :meth:`asyncio.loop.create_task`,\n :meth:`asyncio.TaskGroup.create_task`.\n (Contributed by Thomas Grainger in :gh:`128307`.)\n\nbase64\n------",
|
||||
"diff_context": " never awaited).\n (Contributed by Arthur Tacca and Jason Zhang in :gh:`115957`.)\n\n* The function and methods named ``create_task`` have received a new\n ``**kwargs`` argument that is passed through to the task constructor.\n This change was accidentally added in 3.13.3,\n and broke the API contract for custom task factories.\n Several third-party task factories implemented workarounds for this.\n In 3.13.4 and later releases the old factory contract is honored\n once again (until 3.14).\n To keep the workarounds working, the extra ``**kwargs`` argument still\n allows passing additional keyword arguments to :class:`~asyncio.Task`\n and to custom task factories.\n\n This affects the following function and methods:\n :meth:`asyncio.create_task`,\n :meth:`asyncio.loop.create_task`,\n :meth:`asyncio.TaskGroup.create_task`.\n (Contributed by Thomas Grainger in :gh:`128307`.)\n\nbase64\n------",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "Doc/whatsnew/3.14.rst",
|
||||
"language": "restructuredtext",
|
||||
"before_code": " (Contributed by Semyon Moroz in :gh:`133367`.)\n\n\nbdb\n---\n",
|
||||
"after_code": " (Contributed by Semyon Moroz in :gh:`133367`.)\n\n\nasyncio\n-------\n\n* The function and methods named :func:`!create_task` now take an arbitrary\n list of keyword arguments. All keyword arguments are passed to the\n :class:`~asyncio.Task` constructor or the custom task factory.\n (See :meth:`~asyncio.loop.set_task_factory` for details.)\n The ``name`` and ``context`` keyword arguments are no longer special;\n the name should now be set using the ``name`` keyword argument of the factory,\n and ``context`` may be ``None``.\n\n This affects the following function and methods:\n :meth:`asyncio.create_task`,\n :meth:`asyncio.loop.create_task`,\n :meth:`asyncio.TaskGroup.create_task`.\n (Contributed by Thomas Grainger in :gh:`128307`.)\n\n\nbdb\n---\n",
|
||||
"diff_context": " (Contributed by Semyon Moroz in :gh:`133367`.)\n\n\nasyncio\n-------\n\n* The function and methods named :func:`!create_task` now take an arbitrary\n list of keyword arguments. All keyword arguments are passed to the\n :class:`~asyncio.Task` constructor or the custom task factory.\n (See :meth:`~asyncio.loop.set_task_factory` for details.)\n The ``name`` and ``context`` keyword arguments are no longer special;\n the name should now be set using the ``name`` keyword argument of the factory,\n and ``context`` may be ``None``.\n\n This affects the following function and methods:\n :meth:`asyncio.create_task`,\n :meth:`asyncio.loop.create_task`,\n :meth:`asyncio.TaskGroup.create_task`.\n (Contributed by Thomas Grainger in :gh:`128307`.)\n\n\nbdb\n---\n",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "cpython",
|
||||
"title": "Update CODEOWNERS (#126005)",
|
||||
"description": "Update CODEOWNERS (#126005)",
|
||||
"url": "https://github.com/python/cpython/commit/905eddceb2d61da9087f0d303aa7e4a405d2261a",
|
||||
"date": "2024-10-26T15:24:51Z",
|
||||
"sha_or_number": "905eddceb2d61da9087f0d303aa7e4a405d2261a",
|
||||
"files_changed": [
|
||||
".github/CODEOWNERS"
|
||||
],
|
||||
"additions": 2,
|
||||
"deletions": 2,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"126005"
|
||||
],
|
||||
"code_samples": [],
|
||||
"commit_message_style": "concise_subject; imperative_mood; references_issue",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "cpython",
|
||||
"title": "Withdraw most of my ownership in favor of Mark (#119611)",
|
||||
"description": "Withdraw most of my ownership in favor of Mark (#119611)",
|
||||
"url": "https://github.com/python/cpython/commit/3ff06ebec4e8b466f76078aa9c97cea2093d52ab",
|
||||
"date": "2024-05-27T18:07:16Z",
|
||||
"sha_or_number": "3ff06ebec4e8b466f76078aa9c97cea2093d52ab",
|
||||
"files_changed": [
|
||||
".github/CODEOWNERS"
|
||||
],
|
||||
"additions": 6,
|
||||
"deletions": 6,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"119611"
|
||||
],
|
||||
"code_samples": [],
|
||||
"commit_message_style": "references_issue",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "cpython",
|
||||
"title": "gh-117549: Don't use designated initializers in headers (#118580)",
|
||||
"description": "gh-117549: Don't use designated initializers in headers (#118580)\n\nThe designated initializer syntax in static inline functions in pycore_backoff.h\r\ncauses problems for C++ or MSVC users who aren't yet using C++20.\r\nWhile internal, pycore_backoff.h is included (indirectly, via pycore_code.h)\r\nby some key 3rd party software that does so for speed.",
|
||||
"url": "https://github.com/python/cpython/commit/40cc809902304f60c6e1c933191dd4d64e570e28",
|
||||
"date": "2024-05-05T19:28:55Z",
|
||||
"sha_or_number": "40cc809902304f60c6e1c933191dd4d64e570e28",
|
||||
"files_changed": [
|
||||
"Include/internal/pycore_backoff.h",
|
||||
"Misc/NEWS.d/next/Core and Builtins/2024-05-05-12-04-02.gh-issue-117549.kITawD.rst"
|
||||
],
|
||||
"additions": 12,
|
||||
"deletions": 2,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"118580"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "Misc/NEWS.d/next/Core and Builtins/2024-05-05-12-04-02.gh-issue-117549.kITawD.rst",
|
||||
"language": "restructuredtext",
|
||||
"before_code": "",
|
||||
"after_code": "Don't use designated initializer syntax in inline functions in internal\nheaders. They cause problems for C++ or MSVC users who aren't yet using the\nlatest C++ standard (C++20). While internal, pycore_backoff.h, is included\n(indirectly, via pycore_code.h) by some key 3rd party software that does so\nfor speed.",
|
||||
"diff_context": "Don't use designated initializer syntax in inline functions in internal\nheaders. They cause problems for C++ or MSVC users who aren't yet using the\nlatest C++ standard (C++20). While internal, pycore_backoff.h, is included\n(indirectly, via pycore_code.h) by some key 3rd party software that does so\nfor speed.",
|
||||
"change_type": "addition",
|
||||
"lines_of_context": 0,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "cpython",
|
||||
"title": "gh-74929: Rudimentary docs for PEP 667 (#118581)",
|
||||
"description": "gh-74929: Rudimentary docs for PEP 667 (#118581)\n\nThis is *not* sufficient for the final 3.13 release, but it will do for beta 1:\r\n\r\n- What's new entry\r\n- Updated changelog entry (news blurb)\r\n- Mention the proxy for f_globals in the datamodel and Python frame object docs\r\n\r\nThis doesn't have any C API details (what's new refers to the PEP).",
|
||||
"url": "https://github.com/python/cpython/commit/9c13d9e37a194f574b8591da634bf98419786448",
|
||||
"date": "2024-05-05T15:31:26Z",
|
||||
"sha_or_number": "9c13d9e37a194f574b8591da634bf98419786448",
|
||||
"files_changed": [
|
||||
"Doc/c-api/frame.rst",
|
||||
"Doc/reference/datamodel.rst",
|
||||
"Doc/whatsnew/3.13.rst",
|
||||
"Misc/NEWS.d/next/Core and Builtins/2024-04-27-21-44-40.gh-issue-74929.C2nESp.rst"
|
||||
],
|
||||
"additions": 22,
|
||||
"deletions": 3,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"118581"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "Doc/c-api/frame.rst",
|
||||
"language": "restructuredtext",
|
||||
"before_code": "\n.. c:function:: PyObject* PyFrame_GetLocals(PyFrameObject *frame)\n\n Get the *frame*'s :attr:`~frame.f_locals` attribute (:class:`dict`).\n\n Return a :term:`strong reference`.\n\n .. versionadded:: 3.11\n\n\n.. c:function:: int PyFrame_GetLineNumber(PyFrameObject *frame)\n",
|
||||
"after_code": "\n.. c:function:: PyObject* PyFrame_GetLocals(PyFrameObject *frame)\n\n Get the *frame*'s :attr:`~frame.f_locals` attribute.\n If the frame refers to a function or comprehension, this returns\n a write-through proxy object that allows modifying the locals.\n In all other cases (classes, modules) it returns the :class:`dict`\n representing the frame locals directly.\n\n Return a :term:`strong reference`.\n\n .. versionadded:: 3.11\n\n .. versionchanged:: 3.13\n Return a proxy object for functions and comprehensions.\n\n\n.. c:function:: int PyFrame_GetLineNumber(PyFrameObject *frame)\n",
|
||||
"diff_context": "\n.. c:function:: PyObject* PyFrame_GetLocals(PyFrameObject *frame)\n\n Get the *frame*'s :attr:`~frame.f_locals` attribute (:class:`dict`).\n Get the *frame*'s :attr:`~frame.f_locals` attribute.\n If the frame refers to a function or comprehension, this returns\n a write-through proxy object that allows modifying the locals.\n In all other cases (classes, modules) it returns the :class:`dict`\n representing the frame locals directly.\n\n Return a :term:`strong reference`.\n\n .. versionadded:: 3.11\n\n .. versionchanged:: 3.13\n Return a proxy object for functions and comprehensions.\n\n\n.. c:function:: int PyFrame_GetLineNumber(PyFrameObject *frame)\n",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 11,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "Doc/reference/datamodel.rst",
|
||||
"language": "restructuredtext",
|
||||
"before_code": "\n * - .. attribute:: frame.f_locals\n - The dictionary used by the frame to look up\n :ref:`local variables <naming>`\n\n * - .. attribute:: frame.f_globals\n - The dictionary used by the frame to look up",
|
||||
"after_code": "\n * - .. attribute:: frame.f_locals\n - The dictionary used by the frame to look up\n :ref:`local variables <naming>`.\n If the frame refers to a function or comprehension,\n this may return a write-through proxy object.\n\n .. versionchanged:: 3.13\n Return a proxy for functions and comprehensions.\n\n * - .. attribute:: frame.f_globals\n - The dictionary used by the frame to look up",
|
||||
"diff_context": "\n * - .. attribute:: frame.f_locals\n - The dictionary used by the frame to look up\n :ref:`local variables <naming>`\n :ref:`local variables <naming>`.\n If the frame refers to a function or comprehension,\n this may return a write-through proxy object.\n\n .. versionchanged:: 3.13\n Return a proxy for functions and comprehensions.\n\n * - .. attribute:: frame.f_globals\n - The dictionary used by the frame to look up",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "Doc/whatsnew/3.13.rst",
|
||||
"language": "restructuredtext",
|
||||
"before_code": " Performance improvements are modest -- we expect to be improving this\n over the next few releases.\n\nNew typing features:\n\n* :pep:`696`: Type parameters (:data:`typing.TypeVar`, :data:`typing.ParamSpec`,",
|
||||
"after_code": " Performance improvements are modest -- we expect to be improving this\n over the next few releases.\n\n* :pep:`667`: :attr:`FrameType.f_locals <frame.f_locals>` when used in\n a function now returns a write-through proxy to the frame's locals,\n rather than a ``dict``. See the PEP for corresponding C API changes\n and deprecations.\n\nNew typing features:\n\n* :pep:`696`: Type parameters (:data:`typing.TypeVar`, :data:`typing.ParamSpec`,",
|
||||
"diff_context": " Performance improvements are modest -- we expect to be improving this\n over the next few releases.\n\n* :pep:`667`: :attr:`FrameType.f_locals <frame.f_locals>` when used in\n a function now returns a write-through proxy to the frame's locals,\n rather than a ``dict``. See the PEP for corresponding C API changes\n and deprecations.\n\nNew typing features:\n\n* :pep:`696`: Type parameters (:data:`typing.TypeVar`, :data:`typing.ParamSpec`,",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "concise_subject; references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "cpython",
|
||||
"title": "gh-118335: Rename --experimental-interpreter on Windows to --experimental-jit-interpreter (#118497)",
|
||||
"description": "gh-118335: Rename --experimental-interpreter on Windows to --experimental-jit-interpreter (#118497)\n\nAlso fix docs for this in whatsnew.",
|
||||
"url": "https://github.com/python/cpython/commit/a37b0932285b5e883b13a46ff2a32f15d7339894",
|
||||
"date": "2024-05-02T00:48:34Z",
|
||||
"sha_or_number": "a37b0932285b5e883b13a46ff2a32f15d7339894",
|
||||
"files_changed": [
|
||||
"Doc/whatsnew/3.13.rst",
|
||||
"PCbuild/build.bat"
|
||||
],
|
||||
"additions": 5,
|
||||
"deletions": 4,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"118497"
|
||||
],
|
||||
"code_samples": [],
|
||||
"commit_message_style": "references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "mypy",
|
||||
"title": "Support TypeGuard (PEP 647) (#9865)",
|
||||
"description": "Support TypeGuard (PEP 647) (#9865)\n\nPEP 647 is still in draft mode, but it is likely to be accepted, and this helps solve some real issues.",
|
||||
"url": "https://github.com/python/mypy/commit/fffbe88fc54807c8b10ac40456522ad2faf8d350",
|
||||
"date": "2021-01-18T18:13:36Z",
|
||||
"sha_or_number": "fffbe88fc54807c8b10ac40456522ad2faf8d350",
|
||||
"files_changed": [
|
||||
"mypy/checker.py",
|
||||
"mypy/checkexpr.py",
|
||||
"mypy/constraints.py",
|
||||
"mypy/expandtype.py",
|
||||
"mypy/fixup.py",
|
||||
"mypy/nodes.py",
|
||||
"mypy/test/testcheck.py",
|
||||
"mypy/typeanal.py",
|
||||
"mypy/types.py",
|
||||
"test-data/unit/check-python38.test",
|
||||
"test-data/unit/check-serialize.test",
|
||||
"test-data/unit/check-typeguard.test",
|
||||
"test-data/unit/lib-stub/typing_extensions.pyi"
|
||||
],
|
||||
"additions": 408,
|
||||
"deletions": 9,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"9865"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": "mypy/checker.py",
|
||||
"language": "python",
|
||||
"before_code": " if literal(expr) == LITERAL_TYPE:\n vartype = type_map[expr]\n return self.conditional_callable_type_map(expr, vartype)\n elif isinstance(node, ComparisonExpr):\n # Step 1: Obtain the types of each operand and whether or not we can\n # narrow their types. (For example, we shouldn't try narrowing the",
|
||||
"after_code": " if literal(expr) == LITERAL_TYPE:\n vartype = type_map[expr]\n return self.conditional_callable_type_map(expr, vartype)\n elif isinstance(node.callee, RefExpr):\n if node.callee.type_guard is not None:\n # TODO: Follow keyword args or *args, **kwargs\n if node.arg_kinds[0] != nodes.ARG_POS:\n self.fail(\"Type guard requires positional argument\", node)\n return {}, {}\n if literal(expr) == LITERAL_TYPE:\n return {expr: TypeGuardType(node.callee.type_guard)}, {}\n elif isinstance(node, ComparisonExpr):\n # Step 1: Obtain the types of each operand and whether or not we can\n # narrow their types. (For example, we shouldn't try narrowing the",
|
||||
"diff_context": " if literal(expr) == LITERAL_TYPE:\n vartype = type_map[expr]\n return self.conditional_callable_type_map(expr, vartype)\n elif isinstance(node.callee, RefExpr):\n if node.callee.type_guard is not None:\n # TODO: Follow keyword args or *args, **kwargs\n if node.arg_kinds[0] != nodes.ARG_POS:\n self.fail(\"Type guard requires positional argument\", node)\n return {}, {}\n if literal(expr) == LITERAL_TYPE:\n return {expr: TypeGuardType(node.callee.type_guard)}, {}\n elif isinstance(node, ComparisonExpr):\n # Step 1: Obtain the types of each operand and whether or not we can\n # narrow their types. (For example, we shouldn't try narrowing the",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/checkexpr.py",
|
||||
"language": "python",
|
||||
"before_code": " ret_type=self.object_type(),\n fallback=self.named_type('builtins.function'))\n callee_type = get_proper_type(self.accept(e.callee, type_context, always_allow_any=True))\n if (self.chk.options.disallow_untyped_calls and\n self.chk.in_checked_function() and\n isinstance(callee_type, CallableType)",
|
||||
"after_code": " ret_type=self.object_type(),\n fallback=self.named_type('builtins.function'))\n callee_type = get_proper_type(self.accept(e.callee, type_context, always_allow_any=True))\n if (isinstance(e.callee, RefExpr)\n and isinstance(callee_type, CallableType)\n and callee_type.type_guard is not None):\n # Cache it for find_isinstance_check()\n e.callee.type_guard = callee_type.type_guard\n if (self.chk.options.disallow_untyped_calls and\n self.chk.in_checked_function() and\n isinstance(callee_type, CallableType)",
|
||||
"diff_context": " ret_type=self.object_type(),\n fallback=self.named_type('builtins.function'))\n callee_type = get_proper_type(self.accept(e.callee, type_context, always_allow_any=True))\n if (isinstance(e.callee, RefExpr)\n and isinstance(callee_type, CallableType)\n and callee_type.type_guard is not None):\n # Cache it for find_isinstance_check()\n e.callee.type_guard = callee_type.type_guard\n if (self.chk.options.disallow_untyped_calls and\n self.chk.in_checked_function() and\n isinstance(callee_type, CallableType)",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"generator_expression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/checkexpr.py",
|
||||
"language": "python",
|
||||
"before_code": " \"\"\"\n if literal(expr) >= LITERAL_TYPE:\n restriction = self.chk.binder.get(expr)\n # If the current node is deferred, some variables may get Any types that they\n # otherwise wouldn't have. We don't want to narrow down these since it may\n # produce invalid inferred Optional[Any] types, at least.",
|
||||
"after_code": " \"\"\"\n if literal(expr) >= LITERAL_TYPE:\n restriction = self.chk.binder.get(expr)\n # Ignore the error about using get_proper_type().\n if isinstance(restriction, TypeGuardType): # type: ignore[misc]\n # A type guard forces the new type even if it doesn't overlap the old.\n return restriction.type_guard\n # If the current node is deferred, some variables may get Any types that they\n # otherwise wouldn't have. We don't want to narrow down these since it may\n # produce invalid inferred Optional[Any] types, at least.",
|
||||
"diff_context": " \"\"\"\n if literal(expr) >= LITERAL_TYPE:\n restriction = self.chk.binder.get(expr)\n # Ignore the error about using get_proper_type().\n if isinstance(restriction, TypeGuardType): # type: ignore[misc]\n # A type guard forces the new type even if it doesn't overlap the old.\n return restriction.type_guard\n # If the current node is deferred, some variables may get Any types that they\n # otherwise wouldn't have. We don't want to narrow down these since it may\n # produce invalid inferred Optional[Any] types, at least.",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": "\"\"\"",
|
||||
"coding_patterns": [
|
||||
"type_hint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/constraints.py",
|
||||
"language": "python",
|
||||
"before_code": " for t, a in zip(template.arg_types, cactual.arg_types):\n # Negate direction due to function argument type contravariance.\n res.extend(infer_constraints(t, a, neg_op(self.direction)))\n res.extend(infer_constraints(template.ret_type, cactual.ret_type,\n self.direction))\n return res\n elif isinstance(self.actual, AnyType):",
|
||||
"after_code": " for t, a in zip(template.arg_types, cactual.arg_types):\n # Negate direction due to function argument type contravariance.\n res.extend(infer_constraints(t, a, neg_op(self.direction)))\n template_ret_type, cactual_ret_type = template.ret_type, cactual.ret_type\n if template.type_guard is not None:\n template_ret_type = template.type_guard\n if cactual.type_guard is not None:\n cactual_ret_type = cactual.type_guard\n res.extend(infer_constraints(template_ret_type, cactual_ret_type,\n self.direction))\n return res\n elif isinstance(self.actual, AnyType):",
|
||||
"diff_context": " for t, a in zip(template.arg_types, cactual.arg_types):\n # Negate direction due to function argument type contravariance.\n res.extend(infer_constraints(t, a, neg_op(self.direction)))\n res.extend(infer_constraints(template.ret_type, cactual.ret_type,\n template_ret_type, cactual_ret_type = template.ret_type, cactual.ret_type\n if template.type_guard is not None:\n template_ret_type = template.type_guard\n if cactual.type_guard is not None:\n cactual_ret_type = cactual.type_guard\n res.extend(infer_constraints(template_ret_type, cactual_ret_type,\n self.direction))\n return res\n elif isinstance(self.actual, AnyType):",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": [
|
||||
"type_hint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/expandtype.py",
|
||||
"language": "python",
|
||||
"before_code": "\n def visit_callable_type(self, t: CallableType) -> Type:\n return t.copy_modified(arg_types=self.expand_types(t.arg_types),\n ret_type=t.ret_type.accept(self))\n\n def visit_overloaded(self, t: Overloaded) -> Type:\n items = [] # type: List[CallableType]",
|
||||
"after_code": "\n def visit_callable_type(self, t: CallableType) -> Type:\n return t.copy_modified(arg_types=self.expand_types(t.arg_types),\n ret_type=t.ret_type.accept(self),\n type_guard=(t.type_guard.accept(self)\n if t.type_guard is not None else None))\n\n def visit_overloaded(self, t: Overloaded) -> Type:\n items = [] # type: List[CallableType]",
|
||||
"diff_context": "\n def visit_callable_type(self, t: CallableType) -> Type:\n return t.copy_modified(arg_types=self.expand_types(t.arg_types),\n ret_type=t.ret_type.accept(self))\n ret_type=t.ret_type.accept(self),\n type_guard=(t.type_guard.accept(self)\n if t.type_guard is not None else None))\n\n def visit_overloaded(self, t: Overloaded) -> Type:\n items = [] # type: List[CallableType]",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": "visit_overloaded",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": "mypy/fixup.py",
|
||||
"language": "python",
|
||||
"before_code": " for arg in ct.bound_args:\n if arg:\n arg.accept(self)\n\n def visit_overloaded(self, t: Overloaded) -> None:\n for ct in t.items():",
|
||||
"after_code": " for arg in ct.bound_args:\n if arg:\n arg.accept(self)\n if ct.type_guard is not None:\n ct.type_guard.accept(self)\n\n def visit_overloaded(self, t: Overloaded) -> None:\n for ct in t.items():",
|
||||
"diff_context": " for arg in ct.bound_args:\n if arg:\n arg.accept(self)\n if ct.type_guard is not None:\n ct.type_guard.accept(self)\n\n def visit_overloaded(self, t: Overloaded) -> None:\n for ct in t.items():",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 6,
|
||||
"function_name": "visit_overloaded",
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "concise_subject; references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "mypy",
|
||||
"title": "Add a separate issue form to report crashes (#9549)",
|
||||
"description": "Add a separate issue form to report crashes (#9549)",
|
||||
"url": "https://github.com/python/mypy/commit/cca6e2fdc874b7538bd1d2ef70daab687b2a0363",
|
||||
"date": "2020-10-08T22:30:06Z",
|
||||
"sha_or_number": "cca6e2fdc874b7538bd1d2ef70daab687b2a0363",
|
||||
"files_changed": [
|
||||
".github/ISSUE_TEMPLATE/crash.md"
|
||||
],
|
||||
"additions": 41,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"9549"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": ".github/ISSUE_TEMPLATE/crash.md",
|
||||
"language": "markdown",
|
||||
"before_code": "",
|
||||
"after_code": "---\nname: Crash Report\nabout: Crash (traceback or \"INTERNAL ERROR\")\nlabels: \"crash\"\n---\n\n<!--\n Use this form only if mypy reports an \"INTERNAL ERROR\" and/or gives a traceback.\n Please include the traceback and all other messages below (use `mypy --show-traceback`).\n-->\n\n**Crash Report**\n\n(Tell us what happened.)\n\n**Traceback**\n\n```\n(Insert traceback and other messages from mypy here -- use `--show-traceback`.)\n```\n\n**To Reproduce**\n\n(Write what you did to reproduce the crash. Full source code is\nappreciated. We also very much appreciate it if you try to narrow the\nsource down to a small stand-alone example.)\n\n**Your Environment**\n\n<!-- Include as many relevant details about the environment you experienced the bug in -->\n\n- Mypy version used:\n- Mypy command-line flags:\n- Mypy configuration options from `mypy.ini` (and other config files):\n- Python version used:\n- Operating system and version:\n\n<!--\nYou can freely edit this text, please remove all the lines\nyou believe are unnecessary.\n-->",
|
||||
"diff_context": "---\nname: Crash Report\nabout: Crash (traceback or \"INTERNAL ERROR\")\nlabels: \"crash\"\n---\n\n<!--\n Use this form only if mypy reports an \"INTERNAL ERROR\" and/or gives a traceback.\n Please include the traceback and all other messages below (use `mypy --show-traceback`).\n-->\n\n**Crash Report**\n\n(Tell us what happened.)\n\n**Traceback**\n\n```\n(Insert traceback and other messages from mypy here -- use `--show-traceback`.)\n```\n\n**To Reproduce**\n\n(Write what you did to reproduce the crash. Full source code is\nappreciated. We also very much appreciate it if you try to narrow the\nsource down to a small stand-alone example.)\n\n**Your Environment**\n\n<!-- Include as many relevant details about the environment you experienced the bug in -->\n\n- Mypy version used:\n- Mypy command-line flags:\n- Mypy configuration options from `mypy.ini` (and other config files):\n- Python version used:\n- Operating system and version:\n\n<!--\nYou can freely edit this text, please remove all the lines\nyou believe are unnecessary.\n-->",
|
||||
"change_type": "addition",
|
||||
"lines_of_context": 0,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "imperative_mood; references_issue",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "mypy",
|
||||
"title": "Make the new bug templates less markup-heavy (#9438)",
|
||||
"description": "Make the new bug templates less markup-heavy (#9438)\n\n- Remove emoji\r\n- Instead of `## H2 headings` just use `**bold**`\r\n- Add link to docs\r\n- Add suggestion for new users not to file a bug",
|
||||
"url": "https://github.com/python/mypy/commit/6f07cb6a2e02446b909846f99817f674675e826e",
|
||||
"date": "2020-09-11T18:35:59Z",
|
||||
"sha_or_number": "6f07cb6a2e02446b909846f99817f674675e826e",
|
||||
"files_changed": [
|
||||
".github/ISSUE_TEMPLATE/bug.md",
|
||||
".github/ISSUE_TEMPLATE/documentation.md",
|
||||
".github/ISSUE_TEMPLATE/feature.md",
|
||||
".github/ISSUE_TEMPLATE/question.md"
|
||||
],
|
||||
"additions": 24,
|
||||
"deletions": 18,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"9438"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": ".github/ISSUE_TEMPLATE/bug.md",
|
||||
"language": "markdown",
|
||||
"before_code": "---\nname: 🐛 Bug Report\nabout: Submit a bug report\nlabels: \"bug\"\n---\n\n<!--\nNote: If the problem you are reporting is about a specific library function, then the typeshed tracker is better suited\nfor this report: https://github.com/python/typeshed/issues\n-->\n\n## 🐛 Bug Report\n\n(A clear and concise description of what the bug is.)\n\n## To Reproduce\n\n(Write your steps here:)\n\n1. Step 1...\n1. Step 2...\n1. Step 3...\n\n## Expected Behavior\n\n<!--\n How did you expect your project to behave?",
|
||||
"after_code": "---\nname: Bug Report\nabout: Submit a bug report\nlabels: \"bug\"\n---\n\n<!--\n If you're new to mypy and you're not sure whether what you're experiencing is a mypy bug, please see the \"Question and Help\" form\n instead.\n-->\n\n**Bug Report**\n\n<!--\nNote: If the problem you are reporting is about a specific library function, then the typeshed tracker is better suited\nfor this report: https://github.com/python/typeshed/issues\n-->\n\n(A clear and concise description of what the bug is.)\n\n**To Reproduce**\n\n(Write your steps here:)\n\n1. Step 1...\n2. Step 2...\n3. Step 3...\n\n**Expected Behavior**\n\n<!--\n How did you expect your project to behave?",
|
||||
"diff_context": "---\nname: 🐛 Bug Report\nname: Bug Report\nabout: Submit a bug report\nlabels: \"bug\"\n---\n\n<!--\n If you're new to mypy and you're not sure whether what you're experiencing is a mypy bug, please see the \"Question and Help\" form\n instead.\n-->\n\n**Bug Report**\n\n<!--\nNote: If the problem you are reporting is about a specific library function, then the typeshed tracker is better suited\nfor this report: https://github.com/python/typeshed/issues\n-->\n\n## 🐛 Bug Report\n\n(A clear and concise description of what the bug is.)\n\n## To Reproduce\n**To Reproduce**\n\n(Write your steps here:)\n\n1. Step 1...\n1. Step 2...\n1. Step 3...\n2. Step 2...\n3. Step 3...\n\n## Expected Behavior\n**Expected Behavior**\n\n<!--\n How did you expect your project to behave?",
|
||||
"change_type": "modification",
|
||||
"lines_of_context": 20,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "mypy",
|
||||
"title": "Add MYPY_CONFIG_FILE_DIR to environment when config file is read (2nd try) (#9414)",
|
||||
"description": "Add MYPY_CONFIG_FILE_DIR to environment when config file is read (2nd try) (#9414)\n\n(This fixes the mistake I introduced in the previous version.)\r\n\r\nResubmit of #9403.\r\n\r\nFixes #7968.\r\n\r\nCo-authored-by: aghast <aghast@aghast.dev>",
|
||||
"url": "https://github.com/python/mypy/commit/9d038469d80e36057c77e0a8a18831f829778f9d",
|
||||
"date": "2020-09-04T20:55:14Z",
|
||||
"sha_or_number": "9d038469d80e36057c77e0a8a18831f829778f9d",
|
||||
"files_changed": [
|
||||
"mypy/config_parser.py",
|
||||
"mypy/test/testcmdline.py",
|
||||
"test-data/unit/envvars.test"
|
||||
],
|
||||
"additions": 15,
|
||||
"deletions": 0,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"9403",
|
||||
"7968",
|
||||
"9414"
|
||||
],
|
||||
"code_samples": [],
|
||||
"commit_message_style": "imperative_mood; references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "mypy",
|
||||
"title": "Revert \"Add MYPY_CONFIG_FILE_DIR to environment when config file is read (#9403)\"",
|
||||
"description": "Revert \"Add MYPY_CONFIG_FILE_DIR to environment when config file is read (#9403)\"\n\nReason: This broke CI.\n\nThis reverts commit 652aca96609c876c47ca7eaa68d67ac1e36f4215.",
|
||||
"url": "https://github.com/python/mypy/commit/57d3473ae906fe945953b874d3dcb66efb2710ca",
|
||||
"date": "2020-09-04T02:45:27Z",
|
||||
"sha_or_number": "57d3473ae906fe945953b874d3dcb66efb2710ca",
|
||||
"files_changed": [
|
||||
"mypy/config_parser.py",
|
||||
"mypy/test/testcmdline.py",
|
||||
"test-data/unit/envvars.test"
|
||||
],
|
||||
"additions": 0,
|
||||
"deletions": 15,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"9403"
|
||||
],
|
||||
"code_samples": [],
|
||||
"commit_message_style": "references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
},
|
||||
{
|
||||
"type": "commit",
|
||||
"repository": "mypy",
|
||||
"title": "Revert issue template (#9345) -- it doesn't work",
|
||||
"description": "Revert issue template (#9345) -- it doesn't work\n\nThis reverts commit 18c84e0f6906cfb315c367aa35550a4727cb57f8.",
|
||||
"url": "https://github.com/python/mypy/commit/42a522089c6b418727e143c181128e902acf0908",
|
||||
"date": "2020-08-27T22:21:28Z",
|
||||
"sha_or_number": "42a522089c6b418727e143c181128e902acf0908",
|
||||
"files_changed": [
|
||||
".github/ISSUE_TEMPLATE/bug.md",
|
||||
".github/ISSUE_TEMPLATE/documentation.md",
|
||||
".github/ISSUE_TEMPLATE/feature.md",
|
||||
".github/ISSUE_TEMPLATE/question.md",
|
||||
".github/PULL_REQUEST_TEMPLATE.md",
|
||||
"ISSUE_TEMPLATE.md"
|
||||
],
|
||||
"additions": 20,
|
||||
"deletions": 110,
|
||||
"labels": [],
|
||||
"related_issues": [
|
||||
"9345"
|
||||
],
|
||||
"code_samples": [
|
||||
{
|
||||
"file_path": ".github/ISSUE_TEMPLATE/bug.md",
|
||||
"language": "markdown",
|
||||
"before_code": "---\nname: 🐛 Bug Report\nlabels: \"bug\"\n---\n\n<!--\nNote: If the problem you are reporting is about a specific library function, then the typeshed tracker is better suited\nfor this report: https://github.com/python/typeshed/issues\n-->\n\n## 🐛 Bug Report\n\n(A clear and concise description of what the bug is.)\n\n## To Reproduce\n\n(Write your steps here:)\n\n1. Step 1...\n1. Step 2...\n1. Step 3...\n\n## Expected Behavior\n\n<!--\n How did you expect your project to behave?\n It’s fine if you’re not sure your understanding is correct.\n Write down what you thought would happen. If you just expected no errors, you can delete this section.\n-->\n\n(Write what you thought would happen.)\n\n## Actual Behavior\n\n<!--\n Did something go wrong?\n Is something broken, or not behaving as you expected?\n-->\n\n(Write what happened.)\n\n## Your Environment\n\n<!-- Include as many relevant details about the environment you experienced the bug in -->\n\n- Mypy version used:\n- Mypy command-line flags:\n- Mypy configuration options from `mypy.ini` (and other config files):\n- Python version used:\n- Operating system and version:\n\n<!--\nYou can freely edit this text, please remove all the lines\nyou believe are unnecessary.\n-->",
|
||||
"after_code": "",
|
||||
"diff_context": "---\nname: 🐛 Bug Report\nlabels: \"bug\"\n---\n\n<!--\nNote: If the problem you are reporting is about a specific library function, then the typeshed tracker is better suited\nfor this report: https://github.com/python/typeshed/issues\n-->\n\n## 🐛 Bug Report\n\n(A clear and concise description of what the bug is.)\n\n## To Reproduce\n\n(Write your steps here:)\n\n1. Step 1...\n1. Step 2...\n1. Step 3...\n\n## Expected Behavior\n\n<!--\n How did you expect your project to behave?\n It’s fine if you’re not sure your understanding is correct.\n Write down what you thought would happen. If you just expected no errors, you can delete this section.\n-->\n\n(Write what you thought would happen.)\n\n## Actual Behavior\n\n<!--\n Did something go wrong?\n Is something broken, or not behaving as you expected?\n-->\n\n(Write what happened.)\n\n## Your Environment\n\n<!-- Include as many relevant details about the environment you experienced the bug in -->\n\n- Mypy version used:\n- Mypy command-line flags:\n- Mypy configuration options from `mypy.ini` (and other config files):\n- Python version used:\n- Operating system and version:\n\n<!--\nYou can freely edit this text, please remove all the lines\nyou believe are unnecessary.\n-->",
|
||||
"change_type": "deletion",
|
||||
"lines_of_context": 0,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": ".github/ISSUE_TEMPLATE/feature.md",
|
||||
"language": "markdown",
|
||||
"before_code": "---\nname: 🚀 Feature\nlabels: \"feature\"\n---\n\n## 🚀 Feature\n\n(A clear and concise description of your feature proposal.)\n\n## Pitch\n\n(Please explain why this feature should be implemented and how it would be used. Add examples, if applicable.)",
|
||||
"after_code": "",
|
||||
"diff_context": "---\nname: 🚀 Feature\nlabels: \"feature\"\n---\n\n## 🚀 Feature\n\n(A clear and concise description of your feature proposal.)\n\n## Pitch\n\n(Please explain why this feature should be implemented and how it would be used. Add examples, if applicable.)",
|
||||
"change_type": "deletion",
|
||||
"lines_of_context": 0,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
},
|
||||
{
|
||||
"file_path": ".github/PULL_REQUEST_TEMPLATE.md",
|
||||
"language": "markdown",
|
||||
"before_code": "### Have you read the [Contributing Guidelines](https://github.com/python/mypy/blob/master/CONTRIBUTING.md)?\n\n(Once you have, delete this section. If you leave it in, your PR may be closed without action.)\n\n### Description\n\n<!--\nIf this pull request closes or fixes an issue, write Closes #NNN\" or \"Fixes #NNN\" in that exact\nformat.\n-->\n\n(Explain how this PR changes mypy.)\n\n## Test Plan\n\n<!--\nIf this is a documentation change, rebuild the docs (link to instructions) and review the changed pages for markup errors.\nIf this is a code change, include new tests (link to the testing docs). Be sure to run the tests locally and fix any errors before submitting the PR (more instructions).\nIf this change cannot be tested by the CI, please explain how to verify it manually.\n-->\n\n(Write your test plan here. If you changed any code, please provide us with clear instructions on how you verified your changes work.)",
|
||||
"after_code": "",
|
||||
"diff_context": "### Have you read the [Contributing Guidelines](https://github.com/python/mypy/blob/master/CONTRIBUTING.md)?\n\n(Once you have, delete this section. If you leave it in, your PR may be closed without action.)\n\n### Description\n\n<!--\nIf this pull request closes or fixes an issue, write Closes #NNN\" or \"Fixes #NNN\" in that exact\nformat.\n-->\n\n(Explain how this PR changes mypy.)\n\n## Test Plan\n\n<!--\nIf this is a documentation change, rebuild the docs (link to instructions) and review the changed pages for markup errors.\nIf this is a code change, include new tests (link to the testing docs). Be sure to run the tests locally and fix any errors before submitting the PR (more instructions).\nIf this change cannot be tested by the CI, please explain how to verify it manually.\n-->\n\n(Write your test plan here. If you changed any code, please provide us with clear instructions on how you verified your changes work.)",
|
||||
"change_type": "deletion",
|
||||
"lines_of_context": 0,
|
||||
"function_name": null,
|
||||
"class_name": null,
|
||||
"docstring": null,
|
||||
"coding_patterns": []
|
||||
}
|
||||
],
|
||||
"commit_message_style": "concise_subject; references_issue; has_body",
|
||||
"python_version": null,
|
||||
"pep_status": null
|
||||
}
|
||||
]
|
||||
79
notebooks/data/my_developer_rules.md
vendored
Normal file
79
notebooks/data/my_developer_rules.md
vendored
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
Assistant Guidelines
|
||||
These rules are absolutely imperative to adhere to. Comply with them precisely as they are outlined.
|
||||
|
||||
The agent must use sequential thinking MCP tool to work out problems.
|
||||
|
||||
Core Behavior Guidelines
|
||||
|
||||
Respond only to explicit requests. Do not add files, code, tests, or comments unless asked.
|
||||
|
||||
Follow instructions precisely. No assumptions or speculative additions.
|
||||
|
||||
Use provided context accurately.
|
||||
|
||||
Avoid extra output. No debugging logs or test harnesses unless requested.
|
||||
|
||||
Produce clean, optimized code when code is requested. Respect existing style.
|
||||
|
||||
Deliver complete, standalone solutions. No placeholders.
|
||||
|
||||
Limit file creation. Only create new files when necessary.
|
||||
|
||||
If you modify the model in a user's code, you must confirm with the user and never be sneaky. Always tell the user exactly what you are doing.
|
||||
|
||||
Communication & Delivery
|
||||
|
||||
9. Don't explain unless asked. Do not expose reasoning in outputs.
|
||||
10. If unsure, say "I don't know." Avoid hallucinated content.
|
||||
11. Maintain consistency across sessions. Refer to project memory and documentation.
|
||||
12. Respect privacy and permissions. Never leak or infer secure data.
|
||||
13. Prioritize targeted edits over full rewrites.
|
||||
14. Optimize incrementally. Avoid unnecessary overhauls.
|
||||
|
||||
Spec.md Requirement
|
||||
|
||||
You must maintain a file named Spec.md. This file acts as the single source of truth for the project.
|
||||
|
||||
Rules:
|
||||
|
||||
Before starting any implementation, check if Spec.md already exists.
|
||||
|
||||
If it does not exist, create one using the template provided below.
|
||||
|
||||
Always update Spec.md before and after any major change.
|
||||
|
||||
Use the contents of Spec.md to guide logic, structure, and implementation decisions.
|
||||
|
||||
When updating a section, condense previous content to keep the document concise.
|
||||
|
||||
Spec.md Starter Template (Plain Text Format)
|
||||
|
||||
Title: Spec.md – Project Specification
|
||||
|
||||
Section: Purpose
|
||||
Describe the main goal of this feature, tool, or system.
|
||||
|
||||
Section: Core Functionality
|
||||
List the key features, expected behaviors, and common use cases.
|
||||
|
||||
Section: Architecture Overview
|
||||
Summarize the technical setup, frameworks used, and main modules or services.
|
||||
|
||||
Section: Input and Output Contracts
|
||||
List all inputs and outputs in a table-like format:
|
||||
|
||||
Input: describe the input data, its format, and where it comes from.
|
||||
|
||||
Output: describe the output data, its format, and its destination.
|
||||
|
||||
Section: Edge Cases and Constraints
|
||||
List known limitations, special scenarios, and fallback behaviors.
|
||||
|
||||
Section: File and Module Map
|
||||
List all important files or modules and describe what each one is responsible for.
|
||||
|
||||
Section: Open Questions or TODOs
|
||||
Create a checklist of unresolved decisions, logic that needs clarification, or tasks that are still pending.
|
||||
|
||||
Section: Last Updated
|
||||
Include the most recent update date and who made the update.
|
||||
74
notebooks/data/pep_style_guide.md
vendored
Normal file
74
notebooks/data/pep_style_guide.md
vendored
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
# PEP 8 Style Guide: Essentials
|
||||
|
||||
## Code Layout
|
||||
- Indentation: 4 spaces per level
|
||||
- Line length: 79 for code (88/100 acceptable by team), 72 for comments/docstrings
|
||||
- Blank lines: 2 around top-level defs/classes, 1 between methods
|
||||
|
||||
```python
|
||||
# Hanging indent for long calls
|
||||
foo = long_function_name(
|
||||
var_one, var_two,
|
||||
var_three, var_four,
|
||||
)
|
||||
```
|
||||
|
||||
## Imports
|
||||
- One import per line
|
||||
- Group: stdlib, third-party, local
|
||||
- Prefer absolute imports; avoid wildcard imports
|
||||
|
||||
```python
|
||||
import os
|
||||
import sys
|
||||
from subprocess import Popen, PIPE
|
||||
|
||||
import requests
|
||||
|
||||
from myproject.models import User
|
||||
```
|
||||
|
||||
## Whitespace
|
||||
- No space inside brackets or before commas/semicolons
|
||||
- Spaces around binary operators
|
||||
|
||||
```python
|
||||
x = 1
|
||||
hypot2 = x * x + y * y
|
||||
```
|
||||
|
||||
## Naming
|
||||
- snake_case: functions, variables
|
||||
- PascalCase: classes
|
||||
- SCREAMING_SNAKE_CASE: constants
|
||||
|
||||
## Comments & Docstrings
|
||||
- Use complete sentences; keep up to date
|
||||
- Triple-double quotes for public modules, classes, functions
|
||||
```python
|
||||
def f(x: int) -> int:
|
||||
"""Return x doubled."""
|
||||
return x * 2
|
||||
```
|
||||
|
||||
## Type Hints
|
||||
- Space after colon; arrow for returns
|
||||
```python
|
||||
def munge(s: str) -> str: ...
|
||||
```
|
||||
|
||||
## Tooling
|
||||
- Black, isort, Flake8 (or Ruff) to automate style
|
||||
- Example pyproject.toml excerpt:
|
||||
```toml
|
||||
[tool.black]
|
||||
line-length = 88
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
```
|
||||
|
||||
## Common Violations
|
||||
- E501: line too long -> break with parentheses
|
||||
- E225: missing whitespace around operator
|
||||
- E402: module import not at top of file
|
||||
74
notebooks/data/zen_principles.md
vendored
Normal file
74
notebooks/data/zen_principles.md
vendored
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
# The Zen of Python: Practical Guide
|
||||
|
||||
## Overview
|
||||
The Zen of Python (Tim Peters, import this) captures Python's philosophy. Use these principles as a checklist during design, coding, and reviews.
|
||||
|
||||
## Key Principles With Guidance
|
||||
|
||||
### 1. Beautiful is better than ugly
|
||||
Prefer descriptive names, clear structure, and consistent formatting.
|
||||
|
||||
### 2. Explicit is better than implicit
|
||||
Be clear about behavior, imports, and types.
|
||||
```python
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
def get_future_date(days_ahead: int) -> datetime:
|
||||
return datetime.now() + timedelta(days=days_ahead)
|
||||
```
|
||||
|
||||
### 3. Simple is better than complex
|
||||
Choose straightforward solutions first.
|
||||
|
||||
### 4. Complex is better than complicated
|
||||
When complexity is needed, organize it with clear abstractions.
|
||||
|
||||
### 5. Flat is better than nested
|
||||
Use early returns to reduce indentation.
|
||||
|
||||
### 6. Sparse is better than dense
|
||||
Give code room to breathe with whitespace.
|
||||
|
||||
### 7. Readability counts
|
||||
Optimize for human readers; add docstrings for nontrivial code.
|
||||
|
||||
### 8. Special cases aren't special enough to break the rules
|
||||
Stay consistent; exceptions should be rare and justified.
|
||||
|
||||
### 9. Although practicality beats purity
|
||||
Prefer practical solutions that teams can maintain.
|
||||
|
||||
### 10. Errors should never pass silently
|
||||
Handle exceptions explicitly; log with context.
|
||||
|
||||
### 11. Unless explicitly silenced
|
||||
Silence only specific, acceptable errors and document why.
|
||||
|
||||
### 12. In the face of ambiguity, refuse the temptation to guess
|
||||
Require explicit inputs and behavior.
|
||||
|
||||
### 13. There should be one obvious way to do it
|
||||
Prefer standard library patterns and idioms.
|
||||
|
||||
### 14. Although that way may not be obvious at first
|
||||
Learn Python idioms; embrace clarity over novelty.
|
||||
|
||||
### 15. Now is better than never; 16. Never is often better than right now
|
||||
Iterate, but don't rush broken code.
|
||||
|
||||
### 17/18. Hard to explain is bad; easy to explain is good
|
||||
Prefer designs you can explain simply.
|
||||
|
||||
### 19. Namespaces are one honking great idea
|
||||
Use modules/packages to separate concerns; avoid wildcard imports.
|
||||
|
||||
## Modern Python Tie-ins
|
||||
- Type hints reinforce explicitness
|
||||
- Context managers enforce safe resource handling
|
||||
- Dataclasses improve readability for data containers
|
||||
|
||||
## Quick Review Checklist
|
||||
- Is it readable and explicit?
|
||||
- Is this the simplest working solution?
|
||||
- Are errors explicit and logged?
|
||||
- Are modules/namespaces used appropriately?
|
||||
584
notebooks/tutorial.ipynb
vendored
Normal file
584
notebooks/tutorial.ipynb
vendored
Normal file
|
|
@ -0,0 +1,584 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6f22c8fe6d92cfcc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Using Cognee with Python Development Data\n",
|
||||
"\n",
|
||||
"Unite authoritative Python practice (Guido van Rossum's own contributions!), normative guidance (Zen/PEP 8), and your lived context (rules + conversations) into one *AI memory* that produces answers that are relevant, explainable, and consistent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fe69acbf9ab1a22b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## What You'll Learn\n",
|
||||
"\n",
|
||||
"In this comprehensive tutorial, you'll discover how to transform scattered development data into an intelligent knowledge system that enhances your coding workflow. By the end, you'll have:\n",
|
||||
"\n",
|
||||
"- **Connected disparate data sources** (Guido's CPython contributions, mypy development, PEP discussions, your Python projects) into a unified AI memory graph\n",
|
||||
"- **Built an memory layer** that understands Python design philosophy, best practice coding patterns, and your preferences and experience\n",
|
||||
"- **Learn how to use intelligent search capabilities** that combine the diverse context\n",
|
||||
"\n",
|
||||
"This tutorial demonstrates the power of **knowledge graphs** and **retrieval-augmented generation (RAG)** for software development, showing you how to build systems that learn from Python's creator and improve your own Python development."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b03b59c064213dd4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Cognee and its core operations\n",
|
||||
"\n",
|
||||
"Before we dive in, let's understand the core Cognee operations we'll be working with:\n",
|
||||
"\n",
|
||||
"- **`cognee.add()`** - Ingests raw data (files, text, APIs) into the system\n",
|
||||
"- **`cognee.cognify()`** - Processes and structures data into a knowledge graph using AI\n",
|
||||
"- **`cognee.search()`** - Queries the knowledge graph with natural language or Cypher\n",
|
||||
"- **`cognee.memify()`** - Cognee's \"secret sauce\" that infers implicit connections and rules from your data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6a7669fbb6a3e6c7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Data used in this tutorial\n",
|
||||
"\n",
|
||||
"Cognee can ingest many types of sources. In this tutorial, we use a small, concrete set of files that cover different perspectives:\n",
|
||||
"\n",
|
||||
"- **`guido_contributions.json` — Authoritative exemplars.** Real PRs and commits from Guido van Rossum (mypy, CPython). These show how Python’s creator solved problems and provide concrete anchors for patterns.\n",
|
||||
"- **`pep_style_guide.md` — Norms.** Encodes community style and typing conventions (PEP 8 and related). Ensures that search results and inferred rules align with widely accepted standards.\n",
|
||||
"- **`zen_principles.md` — Philosophy.** The Zen of Python. Grounds design trade‑offs (simplicity, explicitness, readability) beyond syntax or mechanics.\n",
|
||||
"- **`my_developer_rules.md` — Local constraints.** Your house rules, conventions, and project‑specific requirements (scope, privacy, Spec.md). Keeps recommendations relevant to your actual workflow.\n",
|
||||
"- **`copilot_conversations.json` — Personal history.** Transcripts of real assistant conversations, including your questions, code snippets, and discussion topics. Captures “how you code” and connects it to “how Guido codes.”"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2a5dac2c6fdc7ca7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Preliminaries\n",
|
||||
"\n",
|
||||
"Cognee relies heavily on async functions.\n",
|
||||
"We need `nest_asyncio` so `await` works in this notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "20cb02b49e3c53e2",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:35:00.836706Z",
|
||||
"start_time": "2025-09-07T14:35:00.832646Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nest_asyncio\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "30e66c894fb4cfd5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To strike the balanace between speed, cost, anc quality, we recommend using OpenAI's `4o-mini` model; make sure your `.env` file contains this line:\n",
|
||||
"\n",
|
||||
"```LLM_MODEL=\"gpt-4o-mini\"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45e1caaec20c9518",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will do a quick import check."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9386ecb596860399",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:35:03.910260Z",
|
||||
"start_time": "2025-09-07T14:35:00.938966Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cognee\n",
|
||||
"import os\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"print('🔍 Quick Cognee Import Check')\n",
|
||||
"print('=' * 30)\n",
|
||||
"print(f'📍 Cognee location: {cognee.__file__}')\n",
|
||||
"print(f'📁 Package directory: {os.path.dirname(cognee.__file__)}')\n",
|
||||
"\n",
|
||||
"# Check if it's local or installed\n",
|
||||
"current_dir = Path.cwd()\n",
|
||||
"cognee_path = Path(cognee.__file__)\n",
|
||||
"if current_dir in cognee_path.parents:\n",
|
||||
" print('🏠 Status: LOCAL DEVELOPMENT VERSION')\n",
|
||||
"else:\n",
|
||||
" print('📦 Status: INSTALLED PACKAGE')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "76895c6570d1a4dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And just to be safe, we will make sure that the path contains the root directory, so Python can find everything it needs to run the notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "19e74e6b691020db",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:35:03.921217Z",
|
||||
"start_time": "2025-09-07T14:35:03.918659Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"from pathlib import Path\n",
|
||||
"notebook_dir = Path.cwd()\n",
|
||||
"if notebook_dir.name == 'notebooks':\n",
|
||||
" project_root = notebook_dir.parent\n",
|
||||
"else:\n",
|
||||
" project_root = Path.cwd()\n",
|
||||
"\n",
|
||||
"# Add project root to the beginning of sys.path\n",
|
||||
"project_root_str = str(project_root.absolute())\n",
|
||||
"if project_root_str not in sys.path:\n",
|
||||
" sys.path.insert(0, project_root_str)\n",
|
||||
"\n",
|
||||
"print(f\"📁 Project root: {project_root_str}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "af584b935cbdc8d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, we will begin with a clean slate, by removing any previous Cognee data:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dd47383aa9519465",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:35:06.194073Z",
|
||||
"start_time": "2025-09-07T14:35:03.929446Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"await cognee.prune.prune_data()\n",
|
||||
"await cognee.prune.prune_system(metadata=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "93c9783037715026",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### First data ingestion: Exploring Guido's Python Contributions\n",
|
||||
"\n",
|
||||
"We'll begin with a document that contains detailed PRs and commits from Guido van Rossum's work on mypy and CPython, showing real-world examples of Python's creator solving type system and language design challenges.\n",
|
||||
"\n",
|
||||
"We'll use Cognee's `add()` and `cognify()` functions to ingest this data and build a knowledge graph that connects Guido's development patterns with Python best practices."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b8743ed520b4de37",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:39:53.511862Z",
|
||||
"start_time": "2025-09-07T14:35:06.228778Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cognee\n",
|
||||
"\n",
|
||||
"result = await cognee.add(\n",
|
||||
" os.path.abspath(\"data/guido_contributions.json\"),\n",
|
||||
" node_set=[\"guido_data\"]\n",
|
||||
")\n",
|
||||
"await cognee.cognify(temporal_cognify=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f08b362cbf12b398",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:39:53.561679Z",
|
||||
"start_time": "2025-09-07T14:39:53.559528Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = await cognee.search(\"Show me commits\")\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "10d582d02ead905e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### What's just happened?\n",
|
||||
"The `search()` function uses natural language to query a knowledge graph containing Guido's development history.\n",
|
||||
"Unlike traditional databases, Cognee understands the relationships between commits, language features, design decisions, and evolution over time.\n",
|
||||
"\n",
|
||||
"Cognee also allows you to visualize the graphs created:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1fb068f422bda6cf",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:39:53.688017Z",
|
||||
"start_time": "2025-09-07T14:39:53.598467Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from cognee import visualize_graph\n",
|
||||
"await visualize_graph('./guido_contributions.html')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f24341c97d6eaccb",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:39:53.733197Z",
|
||||
"start_time": "2025-09-07T14:39:53.729922Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from IPython.display import IFrame, HTML, display\n",
|
||||
"display(IFrame(\"./guido_contributions.html\", width=\"100%\", height=\"500\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3418aa17bf35e3bb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Why visualization matters:** Knowledge graphs reveal hidden patterns in data. In this case, patterins in Guido's contributions to Python's development. The interactive visualization shows how different projects (CPython, mypy, PEPs), features, and time periods connect - insights that show Python's thoughtful evolution.\n",
|
||||
"\n",
|
||||
"Take a moment to explore the graph. Notice how:\n",
|
||||
"\n",
|
||||
"- CPython core development clusters around 2020\n",
|
||||
"- Mypy contributions focus on fixtures and run classes\n",
|
||||
"- PEP discussions mention Thomas Grainiger and Adam Turner\n",
|
||||
"- Time-based connections show how ideas evolved into features\n",
|
||||
"\n",
|
||||
"*Note: You can open the visualization on your browser using the html file from path provided above.*"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5e8d9094a09ae05d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Ingesting more data\n",
|
||||
"\n",
|
||||
"Now we'll add the remaining data and see how they connections emerge between Guido's contributions, Python best practices, and user conversations."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5315318324968f0f",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:40:48.589875Z",
|
||||
"start_time": "2025-09-07T14:39:53.785054Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"await cognee.add(os.path.abspath(\"data/copilot_conversations.json\"), node_set=[\"developer_data\"])\n",
|
||||
"await cognee.add(os.path.abspath(\"data/my_developer_rules.md\"), node_set=[\"developer_data\"])\n",
|
||||
"await cognee.add(os.path.abspath(\"data/zen_principles.md\"), node_set=[\"principles_data\"])\n",
|
||||
"await cognee.add(os.path.abspath(\"data/pep_style_guide.md\"), node_set=[\"principles_data\"])\n",
|
||||
"\n",
|
||||
"await cognee.cognify(temporal_cognify=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "98b69c45db2fca3",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:40:51.305617Z",
|
||||
"start_time": "2025-09-07T14:40:48.605622Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = await cognee.search(\n",
|
||||
" \"What Python type hinting challenges did I face, and how does Guido approach similar problems in mypy?\",\n",
|
||||
" query_type=cognee.SearchType.GRAPH_COMPLETION\n",
|
||||
")\n",
|
||||
"print(results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6c49c4c252036fa1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You'll see that cognee has connected your Python development challenges with Guido's approaches, revealing patterns like:\n",
|
||||
"\n",
|
||||
"- \"Type hint implementation failed due to circular imports - similar to issue Guido solved in mypy PR #1234\"\n",
|
||||
"- \"Performance bottleneck in list comprehension matches pattern Guido optimized in CPython commit abc123\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a1f4606bfed8fc45",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Memify\n",
|
||||
"\n",
|
||||
"Let's now introduce the memory functions. These algorithms run on top of your semantic layer, connecting the dots and improving the search.\n",
|
||||
"\n",
|
||||
"Memify is customizable and can use any transformation you'd like to write."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "20234960f7566b15",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:43:42.390990Z",
|
||||
"start_time": "2025-09-07T14:40:51.321301Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"await cognee.memify()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "58d3ccec16f67c24",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**What `memify()` does for Python:** This advanced function uses AI to:\n",
|
||||
"\n",
|
||||
"- **Infer rule patterns** from your code (e.g., \"When implementing iterators, always follow the protocol Guido established\")\n",
|
||||
"- **Connect design philosophy to practice** (e.g., linking \"explicit is better than implicit\" to your type hinting decisions)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a304033f9f0f5dcf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's see how the system has connected your Python development patterns with established best practices:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "518fa9b17a604657",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:43:44.837614Z",
|
||||
"start_time": "2025-09-07T14:43:42.465351Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Search for connections between your async patterns and Python philosophy\n",
|
||||
"results = await cognee.search(\n",
|
||||
" query_text= \"How does my AsyncWebScraper implementation align with Python's design principles?\",\n",
|
||||
" query_type=cognee.SearchType.GRAPH_COMPLETION\n",
|
||||
")\n",
|
||||
"print(\"Python Pattern Analysis:\", results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2c77c1582ab9fc32",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Nodeset filtering\n",
|
||||
"\n",
|
||||
"You may have noticed that we added different documents to different datasets. This allows us to narrow our retrieval at search time.\n",
|
||||
"\n",
|
||||
"By constraining searches to particular node sets, you can ensure that responses draw from appropriate sources. Questions about style guidelines can be directed specifically to PEP documents and design principles, while implementation questions can focus on actual code examples and developer experiences."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c64036c03abe41e7",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:43:49.542281Z",
|
||||
"start_time": "2025-09-07T14:43:44.852455Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from cognee.modules.engine.models.node_set import NodeSet\n",
|
||||
"results = await cognee.search(\n",
|
||||
" query_text= \"How should variables be named?\",\n",
|
||||
" query_type=cognee.SearchType.GRAPH_COMPLETION,\n",
|
||||
" node_type=NodeSet,\n",
|
||||
" node_name=['principles_data']\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c641b8b7e50dd2ae",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Temporal graphs\n",
|
||||
"\n",
|
||||
"The `temporal_cognify` option enabled during graph construction provides powerful capabilities for understanding how Python development has evolved over time. This temporal awareness allows queries that explore trends, identify periods of intense development activity, or understand how specific features emerged and matured.\n",
|
||||
"\n",
|
||||
"Temporal queries can reveal insights about development velocity, the relationship between different features introduced in similar timeframes, or how implementation approaches have changed as the language has evolved. This historical perspective provides valuable context for understanding current best practices and anticipating future directions.\n",
|
||||
"\n",
|
||||
"For example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "28e7d5a75e076b8f",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:43:55.278031Z",
|
||||
"start_time": "2025-09-07T14:43:49.555704Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = await cognee.search(\n",
|
||||
" query_text = \"What can we learn from Guido's contributions in 2025?\",\n",
|
||||
" query_type=cognee.SearchType.TEMPORAL\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ec6cf074a6c272ab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Feedback loops\n",
|
||||
"\n",
|
||||
"cognee supports continuous improvement through a feedback mechanism that captures the utility and relevance of search results based on actualy user messages. This creates a learning system that adapts to your specific needs and preferences over time.\n",
|
||||
"\n",
|
||||
"When search interactions are saved, you can store the results use SearchType.FEEDBACK to provide feedback about the last_k answer from the system. This feedback becomes part of the graph itself, helping the system provide increasingly relevant results."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "67dec85a658aad76",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:43:58.472950Z",
|
||||
"start_time": "2025-09-07T14:43:55.288993Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"answer = await cognee.search(\n",
|
||||
" query_type=cognee.SearchType.GRAPH_COMPLETION,\n",
|
||||
" query_text=\"What is the most zen thing about Python?\",\n",
|
||||
" save_interaction=True, # This enables feedback later\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(answer)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2f64d90e6dadaefb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This enables giving feedback, that itself can also be stored in the graph and will be included in future searches:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "870149ea58fd109c",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-09-07T14:44:00.503091Z",
|
||||
"start_time": "2025-09-07T14:43:58.480893Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"feedback = await cognee.search(\n",
|
||||
" query_type=cognee.SearchType.FEEDBACK,\n",
|
||||
" query_text=\"Last result was useful, I like code that complies with best practices.\",\n",
|
||||
" last_k=1,\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
1646
poetry.lock
generated
1646
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -20,49 +20,38 @@ classifiers = [
|
|||
"Operating System :: Microsoft :: Windows",
|
||||
]
|
||||
dependencies = [
|
||||
"openai>=1.80.1,<2.0.0",
|
||||
"openai>=1.80.1",
|
||||
"python-dotenv>=1.0.1,<2.0.0",
|
||||
"pydantic>=2.10.5,<3.0.0",
|
||||
"pydantic-settings>=2.2.1,<3",
|
||||
"typing_extensions>=4.12.2,<5.0.0",
|
||||
"nltk>=3.9.1,<4.0.0",
|
||||
"numpy>=1.26.4, <=4.0.0",
|
||||
"pandas>=2.2.2,<3.0.0",
|
||||
# Note: New s3fs and boto3 versions don't work well together
|
||||
# Always use comaptible fixed versions of these two dependencies
|
||||
"s3fs[boto3]==2025.3.2",
|
||||
"sqlalchemy>=2.0.39,<3.0.0",
|
||||
"aiosqlite>=0.20.0,<1.0.0",
|
||||
"tiktoken>=0.8.0,<1.0.0",
|
||||
"litellm>=1.71.0, <2.0.0",
|
||||
"litellm>=1.76.0",
|
||||
"instructor>=1.9.1,<2.0.0",
|
||||
"langfuse>=2.32.0,<3",
|
||||
"filetype>=1.2.0,<2.0.0",
|
||||
"aiohttp>=3.11.14,<4.0.0",
|
||||
"aiofiles>=23.2.1,<24.0.0",
|
||||
"rdflib>=7.1.4,<7.2.0",
|
||||
"pypdf>=4.1.0,<7.0.0",
|
||||
"jinja2>=3.1.3,<4",
|
||||
"matplotlib>=3.8.3,<4",
|
||||
"networkx>=3.4.2,<4",
|
||||
"lancedb>=0.24.0,<1.0.0",
|
||||
"nbformat>=5.7.0,<6.0.0",
|
||||
"alembic>=1.13.3,<2",
|
||||
"pre-commit>=4.0.1,<5",
|
||||
"scikit-learn>=1.6.1,<2",
|
||||
"limits>=4.4.1,<5",
|
||||
"fastapi>=0.115.7,<1.0.0",
|
||||
"python-multipart>=0.0.20,<1.0.0",
|
||||
"fastapi-users[sqlalchemy]>=14.0.1,<15.0.0",
|
||||
"dlt[sqlalchemy]>=1.9.0,<2",
|
||||
"sentry-sdk[fastapi]>=2.9.0,<3",
|
||||
"structlog>=25.2.0,<26",
|
||||
"baml-py (==0.206.0)",
|
||||
"pympler>=1.1,<2.0.0",
|
||||
"onnxruntime>=1.0.0,<2.0.0",
|
||||
"pylance>=0.22.0,<1.0.0",
|
||||
"onnxruntime<=1.22.1",
|
||||
"pylance>=0.22.0,<=0.36.0",
|
||||
"kuzu (==0.11.0)",
|
||||
"python-magic-bin<0.5 ; platform_system == 'Windows'", # Only needed for Windows
|
||||
"fastembed<=0.6.0",
|
||||
"networkx>=3.4.2,<4",
|
||||
"uvicorn>=0.34.0,<1.0.0",
|
||||
"gunicorn>=20.1.0,<24",
|
||||
"websockets>=15.0.1,<16.0.0"
|
||||
|
|
@ -70,6 +59,7 @@ dependencies = [
|
|||
|
||||
[project.optional-dependencies]
|
||||
api=[]
|
||||
|
||||
distributed = [
|
||||
"modal>=1.0.5,<2.0.0",
|
||||
]
|
||||
|
|
@ -115,25 +105,28 @@ codegraph = [
|
|||
evals = [
|
||||
"plotly>=6.0.0,<7",
|
||||
"gdown>=5.2.0,<6",
|
||||
"pandas>=2.2.2,<3.0.0",
|
||||
"matplotlib>=3.8.3,<4",
|
||||
"scikit-learn>=1.6.1,<2",
|
||||
]
|
||||
gui = [
|
||||
"pyside6>=6.8.3,<7",
|
||||
"qasync>=0.27.1,<0.28",
|
||||
]
|
||||
|
||||
graphiti = ["graphiti-core>=0.7.0,<0.8"]
|
||||
# Note: New s3fs and boto3 versions don't work well together
|
||||
# Always use comaptible fixed versions of these two dependencies
|
||||
aws = ["s3fs[boto3]==2025.3.2"]
|
||||
dlt = ["dlt[sqlalchemy]>=1.9.0,<2"]
|
||||
baml = ["baml-py (==0.206.0)"]
|
||||
dev = [
|
||||
"pytest>=7.4.0,<8",
|
||||
"pytest-cov>=6.1.1,<7.0.0",
|
||||
"pytest-asyncio>=0.21.1,<0.22",
|
||||
"coverage>=7.3.2,<8",
|
||||
"mypy>=1.7.1,<2",
|
||||
"pre-commit>=4.0.1,<5",
|
||||
"notebook>=7.1.0,<8",
|
||||
"deptry>=0.20.0,<0.21",
|
||||
"pylint>=3.0.3,<4",
|
||||
"ruff>=0.9.2,<1.0.0",
|
||||
"ruff>=0.9.2,<=0.13.1",
|
||||
"tweepy>=4.14.0,<5.0.0",
|
||||
"gitpython>=3.1.43,<4",
|
||||
"mkdocs-material>=9.5.42,<10",
|
||||
|
|
@ -142,11 +135,14 @@ dev = [
|
|||
]
|
||||
debug = ["debugpy>=1.8.9,<2.0.0"]
|
||||
|
||||
monitoring = ["sentry-sdk[fastapi]>=2.9.0,<3", "langfuse>=2.32.0,<3"]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://www.cognee.ai"
|
||||
Repository = "https://github.com/topoteretes/cognee"
|
||||
|
||||
[project.scripts]
|
||||
cognee = "cognee.cli._cognee:main"
|
||||
cognee-cli = "cognee.cli._cognee:main"
|
||||
|
||||
[build-system]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue