feat: add initial cognee frontend

This commit is contained in:
Boris Arzentar 2024-05-17 13:42:14 +02:00
parent a5966a5dc0
commit 1ac28f4cb8
53 changed files with 5451 additions and 86 deletions

1
.gitignore vendored
View file

@ -124,6 +124,7 @@ celerybeat.pid
# Environments
.env
.env.local
.venv
env/
venv/

View file

@ -0,0 +1,3 @@
{
"extends": "next/core-web-vitals"
}

36
cognee-frontend/.gitignore vendored Normal file
View file

@ -0,0 +1,36 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# dependencies
/node_modules
/.pnp
.pnp.js
.yarn/install-state.gz
# testing
/coverage
# next.js
/.next/
/out/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# local env files
.env*.local
# vercel
.vercel
# typescript
*.tsbuildinfo
next-env.d.ts

36
cognee-frontend/README.md Normal file
View file

@ -0,0 +1,36 @@
This is a [Next.js](https://nextjs.org/) project bootstrapped with [`create-next-app`](https://github.com/vercel/next.js/tree/canary/packages/create-next-app).
## Getting Started
First, run the development server:
```bash
npm run dev
# or
yarn dev
# or
pnpm dev
# or
bun dev
```
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
This project uses [`next/font`](https://nextjs.org/docs/basic-features/font-optimization) to automatically optimize and load Inter, a custom Google Font.
## Learn More
To learn more about Next.js, take a look at the following resources:
- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js/) - your feedback and contributions are welcome!
## Deploy on Vercel
The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
Check out our [Next.js deployment documentation](https://nextjs.org/docs/deployment) for more details.

View file

@ -0,0 +1,4 @@
/** @type {import('next').NextConfig} */
const nextConfig = {};
export default nextConfig;

3983
cognee-frontend/package-lock.json generated Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,28 @@
{
"name": "cognee-frontend",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "next lint"
},
"dependencies": {
"classnames": "^2.5.1",
"next": "14.2.3",
"ohmy-ui": "^0.0.1",
"react": "^18",
"react-dom": "^18",
"uuid": "^9.0.1"
},
"devDependencies": {
"@types/node": "^20",
"@types/react": "^18",
"@types/react-dom": "^18",
"@types/uuid": "^9.0.8",
"eslint": "^8",
"eslint-config-next": "14.2.3",
"typescript": "^5"
}
}

View file

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>

After

Width:  |  Height:  |  Size: 1.3 KiB

View file

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 283 64"><path fill="black" d="M141 16c-11 0-19 7-19 18s9 18 20 18c7 0 13-3 16-7l-7-5c-2 3-6 4-9 4-5 0-9-3-10-7h28v-3c0-11-8-18-19-18zm-9 15c1-4 4-7 9-7s8 3 9 7h-18zm117-15c-11 0-19 7-19 18s9 18 20 18c6 0 12-3 16-7l-8-5c-2 3-5 4-8 4-5 0-9-3-11-7h28l1-3c0-11-8-18-19-18zm-10 15c2-4 5-7 10-7s8 3 9 7h-19zm-39 3c0 6 4 10 10 10 4 0 7-2 9-5l8 5c-3 5-9 8-17 8-11 0-19-7-19-18s8-18 19-18c8 0 14 3 17 8l-8 5c-2-3-5-5-9-5-6 0-10 4-10 10zm83-29v46h-9V5h9zM37 0l37 64H0L37 0zm92 5-27 48L74 5h10l18 30 17-30h10zm59 12v10l-3-1c-6 0-10 4-10 10v15h-9V17h9v9c0-5 6-9 13-9z"/></svg>

After

Width:  |  Height:  |  Size: 629 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View file

@ -0,0 +1,107 @@
:root {
--max-width: 1100px;
--border-radius: 12px;
--font-mono: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono",
"Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro",
"Fira Mono", "Droid Sans Mono", "Courier New", monospace;
--foreground-rgb: 0, 0, 0;
--background-start-rgb: 214, 219, 220;
--background-end-rgb: 255, 255, 255;
--primary-glow: conic-gradient(
from 180deg at 50% 50%,
#16abff33 0deg,
#0885ff33 55deg,
#54d6ff33 120deg,
#0071ff33 160deg,
transparent 360deg
);
--secondary-glow: radial-gradient(
rgba(255, 255, 255, 1),
rgba(255, 255, 255, 0)
);
--tile-start-rgb: 239, 245, 249;
--tile-end-rgb: 228, 232, 233;
--tile-border: conic-gradient(
#00000080,
#00000040,
#00000030,
#00000020,
#00000010,
#00000010,
#00000080
);
--callout-rgb: 238, 240, 241;
--callout-border-rgb: 172, 175, 176;
--card-rgb: 180, 185, 188;
--card-border-rgb: 131, 134, 135;
}
@media (prefers-color-scheme: dark) {
:root {
--foreground-rgb: 255, 255, 255;
--background-start-rgb: 0, 0, 0;
--background-end-rgb: 0, 0, 0;
--primary-glow: radial-gradient(rgba(1, 65, 255, 0.4), rgba(1, 65, 255, 0));
--secondary-glow: linear-gradient(
to bottom right,
rgba(1, 65, 255, 0),
rgba(1, 65, 255, 0),
rgba(1, 65, 255, 0.3)
);
--tile-start-rgb: 2, 13, 46;
--tile-end-rgb: 2, 5, 19;
--tile-border: conic-gradient(
#ffffff80,
#ffffff40,
#ffffff30,
#ffffff20,
#ffffff10,
#ffffff10,
#ffffff80
);
--callout-rgb: 20, 20, 20;
--callout-border-rgb: 108, 108, 108;
--card-rgb: 100, 100, 100;
--card-border-rgb: 200, 200, 200;
}
}
* {
box-sizing: border-box;
padding: 0;
margin: 0;
}
html,
body {
max-width: 100vw;
overflow-x: hidden;
}
body {
color: rgb(var(--foreground-rgb));
background: linear-gradient(
to bottom,
transparent,
rgb(var(--background-end-rgb))
)
rgb(var(--background-start-rgb));
}
a {
color: inherit;
text-decoration: none;
}
@media (prefers-color-scheme: dark) {
html {
color-scheme: dark;
}
}

View file

@ -0,0 +1,22 @@
import type { Metadata } from "next";
import { Inter } from "next/font/google";
import "./globals.css";
const inter = Inter({ subsets: ["latin"] });
export const metadata: Metadata = {
title: "Create Next App",
description: "Generated by create next app",
};
export default function RootLayout({
children,
}: Readonly<{
children: React.ReactNode;
}>) {
return (
<html lang="en">
<body className={inter.className}>{children}</body>
</html>
);
}

View file

@ -0,0 +1,51 @@
.main {
display: flex;
flex-direction: row;
padding: 32px;
min-height: 100vh;
gap: 32px;
}
.main.noData {
flex-direction: column;
}
.datasetsView {
width: 50%;
transition: width 0.3s ease-in-out;
padding-right: 8px;
border-right: 2px solid white;
}
.openDatasetData {
width: 30%;
}
.dataView {
width: 70%;
animation: grow-width 0.3s ease-in-out;
}
@keyframes grow-width {
0% {
width: 50%;
}
100% {
width: 70%;
}
}
.noDataWizardContainer {
width: 100%;
margin-top: 96px;
}
.wizardDataset {
border: 2px solid white;
border-radius: var(--border-radius);
padding: 24px;
min-width: 350px;
}
.fileSize {
display: block;
}

View file

@ -0,0 +1,303 @@
'use client';
import { Fragment, useCallback, useEffect, useState } from 'react';
import styles from "./page.module.css";
import { CTAButton, H1, Notification, NotificationContainer, Stack, Text, UploadInput, useBoolean, useNotifications } from 'ohmy-ui';
import useDatasets from '@/modules/ingestion/useDatasets';
import DataView, { Data } from '@/modules/ingestion/DataView';
import DatasetsView from '@/modules/ingestion/DatasetsView';
import classNames from 'classnames';
import { TextLogo, LoadingIndicator } from '@/modules/app';
import { IFrameView } from '@/ui';
export default function Home() {
const {
datasets,
refreshDatasets,
} = useDatasets();
const [datasetData, setDatasetData] = useState<Data[]>([]);
const [selectedDataset, setSelectedDataset] = useState<string | null>(null);
const {
value: isWizardShown,
setFalse: hideWizard,
} = useBoolean(true);
const [wizardStep, setWizardStep] = useState<'add' | 'upload' | 'cognify' | 'explore'>('add');
const [wizardData, setWizardData] = useState<File[] | null>(null);
// useEffect(() => {
// if (datasets.length > 0) {
// hideWizard();
// }
// }, [datasets, hideWizard]);
useEffect(() => {
refreshDatasets();
}, [refreshDatasets]);
const openDatasetData = (dataset: { id: string }) => {
fetch(`http://localhost:8000/datasets/${dataset.id}/data`)
.then((response) => response.json())
.then(setDatasetData)
.then(() => setSelectedDataset(dataset.id));
};
const closeDatasetData = () => {
setDatasetData([]);
setSelectedDataset(null);
};
const { notifications, showNotification } = useNotifications();
const handleDataAdd = useCallback((dataset: { id: string }, files: File[]) => {
const formData = new FormData();
formData.append('datasetId', dataset.id);
const file = files[0];
formData.append('data', file, file.name);
return fetch('http://localhost:8000/add', {
method: 'POST',
body: formData,
})
.then(() => {
showNotification("Data added successfully.", 5000);
openDatasetData(dataset);
});
}, [showNotification])
const addWizardData = useCallback((files: File[]) => {
setWizardData(files);
setWizardStep('upload');
}, []);
const {
value: isUploadRunning,
setTrue: disableUploadRun,
setFalse: enableUploadRun,
} = useBoolean(false);
const uploadWizardData = useCallback(() => {
disableUploadRun()
handleDataAdd({ id: 'main' }, wizardData!)
.then(() => {
setWizardStep('cognify')
})
.finally(() => enableUploadRun());
}, [disableUploadRun, enableUploadRun, handleDataAdd, wizardData]);
const cognifyDataset = useCallback((dataset: { id: string }) => {
showNotification(`Cognification started for dataset "${dataset.id}".`, 5000);
return fetch('http://localhost:8000/cognify', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
datasets: [dataset.id],
}),
})
.then(() => {
showNotification(`Dataset "${dataset.id}" cognified.`, 5000);
})
.catch((error) => {
console.error(error);
});
}, [showNotification]);
const {
value: isCognifyRunning,
setTrue: disableCognifyRun,
setFalse: enableCognifyRun,
} = useBoolean(false);
const cognifyWizardData = useCallback(() => {
disableCognifyRun();
cognifyDataset({ id: 'main' })
.then(() => {
setWizardStep('explore');
})
.finally(() => enableCognifyRun());
}, [cognifyDataset, disableCognifyRun, enableCognifyRun]);
const deleteDataset = useCallback((dataset: { id: string }) => {
fetch(`http://localhost:8000/datasets/${dataset.id}`, {
method: 'DELETE',
})
.then(() => {
showNotification(`Dataset "${dataset.id}" deleted.`, 5000);
refreshDatasets();
})
}, [refreshDatasets, showNotification]);
interface ExplorationWindowProps {
url: string;
title: string;
}
const [explorationWindowProps, setExplorationWindowProps] = useState<ExplorationWindowProps | null>(null);
const {
value: isExplorationWindowShown,
setTrue: showExplorationWindow,
setFalse: hideExplorationWindow,
} = useBoolean(false);
const openExplorationWindow = useCallback((explorationWindowProps: ExplorationWindowProps) => {
setExplorationWindowProps(explorationWindowProps);
showExplorationWindow();
}, [showExplorationWindow]);
const exploreDataset = useCallback((dataset: { id: string }) => {
fetch(`http://localhost:8000/datasets/${dataset.id}/graph`)
.then((response) => response.text())
.then((text) => text.replace('"', ''))
.then((graphUrl: string) => {
openExplorationWindow({
url: graphUrl,
title: dataset.id,
});
});
}, [openExplorationWindow]);
const exploreWizardData = useCallback(() => {
exploreDataset({ id: 'main' });
}, [exploreDataset]);
const closeWizard = useCallback(() => {
hideExplorationWindow();
hideWizard();
}, [hideExplorationWindow, hideWizard]);
if (isWizardShown) {
return (
<main className={classNames(styles.main, styles.noData)}>
<TextLogo />
<Stack gap="4" orientation="vertical" align="center/center" className={styles.noDataWizardContainer}>
<H1>Add Knowledge</H1>
<Stack gap="4" orientation="vertical" align="center/center">
{wizardStep === 'upload' && wizardData && (
<div className={styles.wizardDataset}>
{wizardData.map((file, index) => (
<Fragment key={index}>
<Text bold>{file.name}</Text>
<Text className={styles.fileSize} size="small">
{getBiggestUnitSize(file.size)}
</Text>
</Fragment>
))}
</div>
)}
{(wizardStep === 'add' || wizardStep === 'upload') && (
<Text>No data in the system. Let&apos;s add your data.</Text>
)}
{wizardStep === 'cognify' && (
<Text>Process data and make it explorable.</Text>
)}
{wizardStep === 'add' && (
<UploadInput onChange={addWizardData}>
<Text>Add data</Text>
</UploadInput>
)}
{wizardStep === 'upload' && (
<CTAButton disabled={isUploadRunning} onClick={uploadWizardData}>
<Stack gap="2" orientation="horizontal" align="center/center">
<Text>Upload</Text>
{isUploadRunning && (
<LoadingIndicator />
)}
</Stack>
</CTAButton>
)}
{wizardStep === 'cognify' && (
<>
{isCognifyRunning && (
<Text>Processing may take a minute, depending on data size.</Text>
)}
<CTAButton disabled={isCognifyRunning} onClick={cognifyWizardData}>
<Stack gap="2" orientation="horizontal" align="center/center">
<Text>Cognify</Text>
{isCognifyRunning && (
<LoadingIndicator />
)}
</Stack>
</CTAButton>
</>
)}
{wizardStep === 'explore' && (
<>
{!isExplorationWindowShown && (
<CTAButton onClick={exploreWizardData}>
<Text>Start exploring the data</Text>
</CTAButton>
)}
{isExplorationWindowShown && (
<IFrameView
src={explorationWindowProps!.url}
title={explorationWindowProps!.title}
onClose={closeWizard}
/>
)}
</>
)}
</Stack>
</Stack>
</main>
);
}
return (
<main className={styles.main}>
<div className={classNames(styles.datasetsView, {
[styles.openDatasetData]: datasetData.length > 0,
})}>
<DatasetsView
datasets={datasets}
onDataAdd={handleDataAdd}
onDatasetClick={openDatasetData}
onDatasetCognify={cognifyDataset}
onDatasetDelete={deleteDataset}
onDatasetExplore={exploreDataset}
/>
{isExplorationWindowShown && (
<IFrameView
src={explorationWindowProps!.url}
title={explorationWindowProps!.title}
onClose={hideExplorationWindow}
/>
)}
</div>
{datasetData.length > 0 && selectedDataset && (
<div className={styles.dataView}>
<DataView
data={datasetData}
datasetId={selectedDataset}
onClose={closeDatasetData}
onDataAdd={handleDataAdd}
/>
</div>
)}
<NotificationContainer gap="1" bottom right>
{notifications.map((notification, index) => (
<Notification
key={notification.id}
isOpen={notification.isOpen}
style={{ top: `${index * 60}px` }}
expireIn={notification.expireIn}
onClose={notification.delete}
>
<Text>{notification.message}</Text>
</Notification>
))}
</NotificationContainer>
</main>
);
}
function getBiggestUnitSize(sizeInBytes: number): string {
const units = ['B', 'KB', 'MB', 'GB'];
let i = 0;
while (sizeInBytes >= 1024 && i < units.length - 1) {
sizeInBytes /= 1024;
i++;
}
return `${sizeInBytes.toFixed(2)} ${units[i]}`;
}

View file

@ -0,0 +1,19 @@
.loadingIndicator {
width: 16px;
height: 16px;
border-radius: 50%;
border: 2px solid white;
border-top-color: transparent;
border-bottom-color: transparent;
animation: spin 2s linear infinite;
}
@keyframes spin {
0% {
transform: rotate(0deg);
}
100% {
transform: rotate(360deg);
}
}

View file

@ -0,0 +1,5 @@
import styles from './LoadingIndicator.module.css';
export default function LoadingIndicator() {
return <div className={styles.loadingIndicator} />
}

View file

@ -0,0 +1 @@
export { default as LoadingIndicator } from './LoadingIndicator';

View file

@ -0,0 +1,13 @@
export default function Logo({ width = 29, height = 32, className = '' }) {
return (
<svg width={width} height={height} viewBox="0 0 29 32" fill="none" xmlns="http://www.w3.org/2000/svg" className={className}>
<path d="M15.3691 31.0517C13.0426 30.9916 10.7881 30.2321 8.89943 28.8722C6.69031 27.2555 4.876 25.1595 3.59261 22.7415L3.56616 22.6957C2.14681 20.3193 1.27502 17.6565 1.01422 14.9008C0.811029 12.541 1.29935 10.1731 2.41954 8.08614C3.63274 5.92912 5.41754 4.14864 7.5775 2.94069C9.65642 1.6311 12.063 0.935339 14.52 0.933546C16.8401 0.992115 19.0886 1.74958 20.9713 3.10686C23.1849 4.73118 25.0019 6.83563 26.2861 9.26236L26.3126 9.30818C27.7245 11.677 28.591 14.3306 28.849 17.0762C29.0537 19.4354 28.5744 21.8036 27.4686 23.8976C26.2597 26.0637 24.4714 27.8503 22.3042 29.0573C20.2272 30.3637 17.8228 31.0552 15.3691 31.0517ZM21.918 24.2438C22.6157 23.6083 23.1845 22.8444 23.5932 21.9938C24.2654 20.5264 24.4947 18.8948 24.2529 17.299C23.951 15.3261 23.27 13.4303 22.2473 11.7164L22.219 11.6674C21.2879 9.90905 20.021 8.35045 18.4898 7.07987C17.2707 6.06818 15.7705 5.45503 14.1917 5.32326C13.2645 5.26502 12.3349 5.39081 11.4565 5.69339L21.918 24.2438ZM6.28845 10.0042C5.6116 11.4657 5.37963 13.094 5.62144 14.6864C5.92664 16.6685 6.61266 18.5729 7.64165 20.2943L7.6681 20.3401C8.59695 22.0928 9.86088 23.6461 11.3882 24.9118C12.61 25.9214 14.1095 26.5364 15.6883 26.6756C16.6098 26.7374 17.5343 26.6166 18.409 26.3203L7.95451 7.77844C7.26261 8.4076 6.69714 9.16303 6.28845 10.0042Z" fill="url(#paint0_linear_28_77)"/>
<defs>
<linearGradient id="paint0_linear_28_77" x1="21" y1="-1.5" x2="4.55895" y2="29.4033" gradientUnits="userSpaceOnUse">
<stop stopColor="#FF3CAC"/>
<stop offset="1" stopColor="#5200FF"/>
</linearGradient>
</defs>
</svg>
);
}

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,2 @@
export { default as Logo } from './Logo';
export { default as TextLogo } from './TextLogo';

View file

@ -0,0 +1,3 @@
export { default as Logo } from './Logo/Logo';
export { default as TextLogo } from './Logo/TextLogo';
export { default as LoadingIndicator } from './Loading/LoadingIndicator';

View file

@ -0,0 +1,23 @@
.tableContainer {
overflow: auto;
padding-bottom: 32px;
min-height: 300px;
}
.datasetMenu {
background-color: var(--global-background-default);
border-radius: var(--border-radius);
padding: 4px;
}
.dataTable {
border-collapse: collapse;
}
.dataTable td, .dataTable th {
vertical-align: top;
padding: 8px;
border: 1px solid white;
margin: 0;
white-space: nowrap;
}

View file

@ -0,0 +1,122 @@
import { useCallback, useState } from 'react';
import {
DropdownMenu,
GhostButton,
Stack,
Text,
UploadInput,
CloseIcon,
} from "ohmy-ui";
import styles from "./DataView.module.css";
import RawDataPreview from './RawDataPreview';
export interface Data {
id: string;
name: string;
filePath: string;
mimeType: string;
keywords: string[];
}
interface DatasetLike {
id: string;
}
interface DataViewProps {
data: Data[];
datasetId: string;
onClose: () => void;
onDataAdd: (dataset: DatasetLike, files: File[]) => void;
}
export default function DataView({ datasetId, data, onClose, onDataAdd }: DataViewProps) {
const handleDataDelete = () => {};
const [rawData, setRawData] = useState<ArrayBuffer | null>(null);
const [selectedData, setSelectedData] = useState<Data | null>(null);
const showRawData = useCallback((dataItem: Data) => {
setSelectedData(dataItem);
fetch(`http://localhost:8000/datasets/${datasetId}/data/${dataItem.id}/raw`)
.then((response) => response.arrayBuffer())
.then(setRawData);
}, [datasetId]);
const resetDataPreview = useCallback(() => {
setSelectedData(null);
setRawData(null);
}, []);
const handleDataAdd = (files: File[]) => {
onDataAdd({ id: datasetId }, files);
}
return (
<Stack orientation="vertical" gap="4">
<Stack gap="2" orientation="horizontal" align="/end">
<UploadInput onChange={handleDataAdd}>
<Text>Add data</Text>
</UploadInput>
<GhostButton onClick={onClose}>
<CloseIcon />
</GhostButton>
</Stack>
{rawData && selectedData && (
<RawDataPreview
fileName={selectedData.name}
rawData={rawData}
onClose={resetDataPreview}
/>
)}
<div className={styles.tableContainer}>
<table className={styles.dataTable}>
<thead>
<tr>
<th>Actions</th>
<th>ID</th>
<th>Name</th>
<th>File path</th>
<th>MIME type</th>
<th>Keywords</th>
</tr>
</thead>
<tbody>
{data.map((dataItem) => (
<tr key={dataItem.id}>
<td>
<Stack orientation="horizontal" gap="2" align="center">
<DropdownMenu position="right">
<Stack gap="1" className={styles.datasetMenu} orientation="vertical">
<GhostButton onClick={() => showRawData(dataItem)}>
<Text>View raw data</Text>
</GhostButton>
{/* <NegativeButton onClick={handleDataDelete}>
<Text>Delete</Text>
</NegativeButton> */}
</Stack>
</DropdownMenu>
</Stack>
</td>
<td>
<Text>{dataItem.id}</Text>
</td>
<td>
<Text>{dataItem.name}</Text>
</td>
<td>
<Text>{dataItem.filePath}</Text>
</td>
<td>
<Text>{dataItem.mimeType}</Text>
</td>
<td>
<Text>{dataItem.keywords.join(", ")}</Text>
</td>
</tr>
))}
</tbody>
</table>
</div>
</Stack>
);
}

View file

@ -0,0 +1,34 @@
import { IFrameView } from '@/ui';
interface RawDataPreviewProps {
fileName: string;
rawData: ArrayBuffer;
onClose: () => void;
}
const file_header = ';headers=filename%3D';
export default function RawDataPreview({ fileName, rawData, onClose }: RawDataPreviewProps) {
const src = `data:application/pdf;base64,${arrayBufferToBase64(rawData)}`.replace(';', file_header + encodeURIComponent(fileName) + ';');
return (
<IFrameView
src={src}
title={fileName}
onClose={onClose}
/>
);
}
function arrayBufferToBase64(buffer: ArrayBuffer): string {
let binary = '';
const bytes = new Uint8Array(buffer);
const len = bytes.byteLength;
for (var i = 0; i < len; i++) {
binary += String.fromCharCode( bytes[ i ] );
}
return window.btoa(binary);
}

View file

@ -0,0 +1 @@
export { default, type Data } from './DataView';

View file

@ -0,0 +1,6 @@
.datasetMenu {
background-color: var(--global-background-default);
border-radius: var(--border-radius);
padding: 4px;
}

View file

@ -0,0 +1,88 @@
import { DropdownMenu, GhostButton, Stack, Text, UploadInput, CTAButton } from "ohmy-ui";
import styles from "./DatasetsView.module.css";
import StatusIcon from './StatusIcon';
interface Dataset {
id: string;
name: string;
status: string;
}
const DatasetItem = GhostButton.mixClassName()("div")
interface DatasetsViewProps {
datasets: Dataset[];
onDataAdd: (dataset: Dataset, files: File[]) => void;
onDatasetClick: (dataset: Dataset) => void;
onDatasetDelete: (dataset: Dataset) => void;
onDatasetCognify: (dataset: Dataset) => void;
onDatasetExplore: (dataset: Dataset) => void;
}
export default function DatasetsView({
datasets,
onDatasetClick,
onDataAdd,
onDatasetCognify,
onDatasetDelete,
onDatasetExplore,
}: DatasetsViewProps) {
const handleCognifyDataset = (event: React.MouseEvent<HTMLButtonElement>, dataset: Dataset) => {
event.stopPropagation();
onDatasetCognify(dataset);
}
// const handleDatasetDelete = (event: React.MouseEvent<HTMLButtonElement>, dataset: Dataset) => {
// event.stopPropagation();
// onDatasetDelete(dataset);
// }
const handleExploreDataset = (event: React.MouseEvent<HTMLButtonElement>, dataset: Dataset) => {
event.stopPropagation();
onDatasetExplore(dataset);
}
const handleDataAdd = (dataset: Dataset, files: File[]) => {
onDataAdd(dataset, files);
}
return (
<Stack orientation="vertical" gap="4">
{datasets.map((dataset) => (
<DatasetItem key={dataset.id} onClick={() => onDatasetClick(dataset)}>
<Stack orientation="horizontal" gap="between" align="start/center">
<Text>{dataset.name}</Text>
<Stack orientation="horizontal" gap="2" align="center">
<StatusIcon status={dataset.status} />
<DropdownMenu>
<Stack gap="1" className={styles.datasetMenu} orientation="vertical">
{dataset.status === 'DATASET_PROCESSING_FINISHED' ? (
<CTAButton
onClick={(event: React.MouseEvent<HTMLButtonElement>) => handleExploreDataset(event, dataset)}
>
<Text>Explore</Text>
</CTAButton>
) : (
<CTAButton
onClick={(event: React.MouseEvent<HTMLButtonElement>) => handleCognifyDataset(event, dataset)}
>
<Text>Cognify</Text>
</CTAButton>
)}
<UploadInput as={GhostButton} onChange={(files: File[]) => handleDataAdd(dataset, files)}>
<Text>Add data</Text>
</UploadInput>
{/* <NegativeButton
onClick={(event: React.MouseEvent<HTMLButtonElement>) => handleDatasetDelete(event, dataset)}
>
<Text>Delete</Text>
</NegativeButton> */}
</Stack>
</DropdownMenu>
</Stack>
</Stack>
</DatasetItem>
))}
</Stack>
);
}

View file

@ -0,0 +1,15 @@
export default function StatusIcon({ status }: { status: 'DATASET_PROCESSING_FINISHED' | string }) {
const isSuccess = status === 'DATASET_PROCESSING_FINISHED';
return (
<div
style={{
width: '16px',
height: '16px',
borderRadius: '4px',
background: isSuccess ? '#53ff24' : '#ff5024',
}}
title={isSuccess ? 'Dataset cognified' : 'Cognify data in order to explore it'}
/>
);
}

View file

@ -0,0 +1 @@
export { default } from './DatasetsView';

View file

@ -0,0 +1,32 @@
import { v4 } from 'uuid';
import { useCallback, useState } from 'react';
export interface DataFile {
id: string;
name: string;
file: File;
}
const useData = () => {
const [data, setNewData] = useState<DataFile[]>([]);
const addData = useCallback((files: File[]) => {
setNewData(
files.map((file) => ({
id: v4(),
name: file.name,
file,
}))
);
}, []);
const removeData = useCallback((dataToRemove: DataFile) => {
setNewData((data) =>
data ? data.filter((data) => data.file !== dataToRemove.file) : []
);
}, []);
return { data, addData, removeData };
};
export default useData;

View file

@ -0,0 +1,80 @@
import { useCallback, useEffect, useRef, useState } from 'react';
import { v4 } from 'uuid';
import { DataFile } from './useData';
export interface Dataset {
id: string;
name: string;
data: DataFile[];
status: string;
}
function useDatasets() {
const [datasets, setDatasets] = useState<Dataset[]>([]);
const statusTimeout = useRef<any>(null);
const fetchDatasetStatuses = useCallback((datasets: Dataset[]) => {
fetch(`http://localhost:8000/datasets/status?dataset=${datasets.map(d => d.id).join('&dataset=')}`)
.then((response) => response.json())
.then((statuses) => setDatasets(
(datasets) => (
datasets.map((dataset) => ({
...dataset,
status: statuses[dataset.id]
}))
)));
}, []);
const checkDatasetStatuses = useCallback((datasets: Dataset[]) => {
fetchDatasetStatuses(datasets);
if (statusTimeout.current !== null) {
clearTimeout(statusTimeout.current);
}
statusTimeout.current = setTimeout(() => {
checkDatasetStatuses(datasets);
}, 5000);
}, [fetchDatasetStatuses]);
useEffect(() => {
return () => {
if (statusTimeout.current !== null) {
clearTimeout(statusTimeout.current);
statusTimeout.current = null;
}
};
}, []);
const addDataset = useCallback((datasetName: string) => {
setDatasets((datasets) => [
...datasets,
{
id: v4(),
name: datasetName,
data: [],
status: 'DATASET_INITIALIZED',
}
]);
}, []);
const removeDataset = useCallback((datasetId: string) => {
setDatasets((datasets) =>
datasets.filter((dataset) => dataset.id !== datasetId)
);
}, []);
const fetchDatasets = useCallback(() => {
fetch('http://localhost:8000/datasets')
.then((response) => response.json())
.then((datasets) => datasets.map((dataset: string) => ({ id: dataset, name: dataset })))
.then((datasets) => {
setDatasets(datasets);
checkDatasetStatuses(datasets);
});
}, [checkDatasetStatuses]);
return { datasets, addDataset, removeDataset, refreshDatasets: fetchDatasets };
};
export default useDatasets;

View file

@ -0,0 +1,12 @@
.iFrameViewContainer {
position: absolute;
top: 0;
bottom: 0;
right: 0;
left: 30%;
background: var(--global-background-default);
border-radius: var(--border-radius);
border: 1px solid white;
z-index: 10;
}

View file

@ -0,0 +1,28 @@
import { CloseIcon, GhostButton, Spacer, Stack, Text } from 'ohmy-ui';
import styles from './IFrameView.module.css';
interface IFrameViewProps {
src: string;
title: string;
onClose: () => void;
}
export default function IFrameView({ title, src, onClose }: IFrameViewProps) {
return (
<div className={styles.iFrameViewContainer}>
<Stack gap="between" align="center/" orientation="horizontal">
<Spacer horizontal="2">
<Text>{title}</Text>
</Spacer>
<GhostButton onClick={onClose}>
<CloseIcon />
</GhostButton>
</Stack>
<iframe
src={src}
width="100%"
height="100%"
/>
</div>
);
}

View file

@ -0,0 +1 @@
export { default as IFrameView } from './IFrameView/IFrameView';

View file

@ -0,0 +1,26 @@
{
"compilerOptions": {
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [
{
"name": "next"
}
],
"paths": {
"@/*": ["./src/*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules"]
}

View file

@ -1,30 +1,42 @@
""" FastAPI server for the Cognee API. """
import os
from uuid import UUID
import aiohttp
import uvicorn
import logging
import logging
# Set up logging
logging.basicConfig(
level=logging.INFO, # Set the logging level (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL)
format="%(asctime)s [%(levelname)s] %(message)s", # Set the log message format
)
logger = logging.getLogger(__name__)
from cognee.config import Config
config = Config()
config.load()
from typing import Dict, Any, List, Union, BinaryIO
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from typing import Dict, Any, List, Union, Annotated
from fastapi import FastAPI, HTTPException, Form, File, UploadFile, Query
from fastapi.responses import JSONResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
app = FastAPI(debug=True)
origins = [
"http://localhost:3000",
"http://localhost:3001",
]
app.add_middleware(
CORSMiddleware,
allow_origins = origins,
allow_credentials = True,
allow_methods = ["OPTIONS", "GET", "POST", "DELETE"],
allow_headers = ["*"],
)
#
# from auth.cognito.JWTBearer import JWTBearer
# from auth.auth import jwks
@ -52,30 +64,101 @@ class Payload(BaseModel):
payload: Dict[str, Any]
@app.get("/datasets", response_model=list)
async def get_datasets():
from cognee import datasets
return datasets.list_datasets()
@app.delete("/datasets/{dataset_id}", response_model=dict)
async def delete_dataset(dataset_id: str):
from cognee import datasets
datasets.delete_dataset(dataset_id)
return JSONResponse(
status_code = 200,
content = "OK",
)
@app.get("/datasets/{dataset_id}/graph", response_model=list)
async def get_dataset_graph(dataset_id: str):
from cognee import utils
from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
graph_engine = infrastructure_config.get_config("graph_engine")
graph_client = await get_graph_client(graph_engine)
graph_url = await utils.render_graph(graph_client.graph)
return JSONResponse(
status_code = 200,
content = str(graph_url),
)
@app.get("/datasets/{dataset_id}/data", response_model=list)
async def get_dataset_data(dataset_id: str):
from cognee import datasets
dataset_data = datasets.list_data(dataset_id)
if dataset_data is None:
raise HTTPException(status_code = 404, detail = f"Dataset ({dataset_id}) not found.")
return [dict(
id = data["id"],
name = f"{data['name']}.{data['extension']}",
keywords = data["keywords"].split("|"),
filePath = data["file_path"],
mimeType = data["mime_type"],
) for data in dataset_data]
@app.get("/datasets/status", response_model=dict)
async def get_dataset_status(datasets: Annotated[list, Query(alias = "dataset")] = None):
from cognee import datasets as cognee_datasets
datasets_statuses = cognee_datasets.get_status(datasets)
return JSONResponse(
status_code = 200,
content = datasets_statuses
)
@app.get("/datasets/{dataset_id}/data/{data_id}/raw", response_class=FileResponse)
async def get_raw_data(dataset_id: str, data_id: str):
from cognee import datasets
dataset_data = datasets.list_data(dataset_id)
if dataset_data is None:
raise HTTPException(status_code = 404, detail = f"Dataset ({dataset_id}) not found.")
data = [data for data in dataset_data if data["id"] == data_id][0]
return data["file_path"]
class AddPayload(BaseModel):
data: Union[str, BinaryIO, List[Union[str, BinaryIO]]]
dataset_id: UUID
dataset_name: str
data: Union[str, UploadFile, List[Union[str, UploadFile]]]
dataset_id: str
class Config:
arbitrary_types_allowed = True # This is required to allow the use of Union
class CognifyPayload(BaseModel):
datasets: Union[str, List[str]]
class SearchPayload(BaseModel):
query_params: Dict[str, Any]
@app.post("/add", response_model=dict)
async def add(payload: AddPayload):
async def add(
datasetId: str = Form(...),
data: List[UploadFile] = File(...),
):
""" This endpoint is responsible for adding data to the graph."""
from v1.add.add import add
from cognee import add as cognee_add
try:
data = payload.data
if isinstance(data, str) and data.startswith('http'):
if 'github' in data:
if isinstance(data, str) and data.startswith("http"):
if "github" in data:
# Perform git clone if the URL is from GitHub
repo_name = data.split('/')[-1].replace('.git', '')
os.system(f'git clone {data} .data/{repo_name}')
await add(
repo_name = data.split("/")[-1].replace(".git", "")
os.system(f"git clone {data} .data/{repo_name}")
await cognee_add(
"data://.data/",
f"{repo_name}",
)
@ -85,16 +168,20 @@ async def add(payload: AddPayload):
async with session.get(data) as resp:
if resp.status == 200:
file_data = await resp.read()
with open(f'.data/{data.split("/")[-1]}', 'wb') as f:
with open(f".data/{data.split('/')[-1]}", "wb") as f:
f.write(file_data)
await add(
f"data://.data/",
await cognee_add(
"data://.data/",
f"{data.split('/')[-1]}",
)
else:
await add(
payload.data,
payload.dataset_name,
await cognee_add(
data,
datasetId,
)
return JSONResponse(
status_code = 200,
content = "OK"
)
except Exception as error:
return JSONResponse(
@ -102,13 +189,21 @@ async def add(payload: AddPayload):
content = { "error": str(error) }
)
class CognifyPayload(BaseModel):
datasets: list[str]
@app.post("/cognify", response_model=dict)
async def cognify(payload: CognifyPayload):
""" This endpoint is responsible for the cognitive processing of the content."""
from v1.cognify.cognify import cognify
from cognee import cognify as cognee_cognify
try:
await cognify(payload.datasets)
await cognee_cognify(payload.datasets)
return JSONResponse(
status_code = 200,
content = "OK"
)
except Exception as error:
return JSONResponse(
status_code = 409,
@ -116,14 +211,17 @@ async def cognify(payload: CognifyPayload):
)
class SearchPayload(BaseModel):
query_params: Dict[str, Any]
@app.post("/search", response_model=dict)
async def search(payload: SearchPayload):
""" This endpoint is responsible for searching for nodes in the graph."""
from v1.search.search import search
from cognee import search as cognee_search
try:
search_type = 'SIMILARITY'
await search(search_type, payload.query_params)
search_type = "SIMILARITY"
await cognee_search(search_type, payload.query_params)
except Exception as error:
return JSONResponse(
status_code = 409,
@ -141,6 +239,13 @@ def start_api_server(host: str = "0.0.0.0", port: int = 8000):
"""
try:
logger.info(f"Starting server at {host}:{port}")
from cognee import config
data_directory_path = os.path.abspath(".data_storage")
config.data_root_directory(data_directory_path)
cognee_directory_path = os.path.abspath(".cognee_system")
config.system_root_directory(cognee_directory_path)
uvicorn.run(app, host=host, port=port)
except Exception as e:
logger.exception(f"Failed to start server: {e}")

View file

@ -1,4 +1,4 @@
from typing import List, Union
from typing import List, Union, BinaryIO
from os import path
import asyncio
import dlt
@ -10,39 +10,40 @@ from cognee.modules.discovery import discover_directory_datasets
from cognee.utils import send_telemetry
async def add(data_path: Union[str, List[str]], dataset_name: str = None):
if isinstance(data_path, str):
# data_path is a data directory path
if "data://" in data_path:
return await add_data_directory(data_path.replace("data://", ""), dataset_name)
# data_path is a file path
if "file://" in data_path:
return await add([data_path], dataset_name)
# data_path is a text
async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = None):
if isinstance(data, str):
# data is a data directory path
if "data://" in data:
return await add_data_directory(data.replace("data://", ""), dataset_name)
# data is a file path
if "file://" in data:
return await add([data], dataset_name)
# data is a text
else:
file_path = save_text_to_file(data_path, dataset_name)
file_path = save_data_to_file(data, dataset_name)
return await add([file_path], dataset_name)
# data_path is a list of file paths or texts
if hasattr(data, "file"):
file_path = save_data_to_file(data.file, dataset_name, filename = data.filename)
return await add([file_path], dataset_name)
# data is a list of file paths or texts
file_paths = []
texts = []
for file_path in data_path:
if file_path.startswith("/") or file_path.startswith("file://"):
file_paths.append(file_path)
else:
texts.append(file_path)
awaitables = []
if len(texts) > 0:
for text in texts:
file_paths.append(save_text_to_file(text, dataset_name))
for data_item in data:
if hasattr(data_item, "file"):
file_paths.append(save_data_to_file(data_item, dataset_name, filename = data_item.filename))
elif isinstance(data_item, str) and (
data_item.startswith("/") or data_item.startswith("file://")
):
file_paths.append(data_item)
elif isinstance(data_item, str):
file_paths.append(save_data_to_file(data_item, dataset_name))
if len(file_paths) > 0:
awaitables.append(add_files(file_paths, dataset_name))
return await add_files(file_paths, dataset_name)
return await asyncio.gather(*awaitables)
return []
async def add_files(file_paths: List[str], dataset_name: str):
infra_config = infrastructure_config.get_config()
@ -118,16 +119,17 @@ async def add_data_directory(data_path: str, dataset_name: str = None):
return await asyncio.gather(*results)
def save_text_to_file(text: str, dataset_name: str):
def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None):
data_directory_path = infrastructure_config.get_config()["data_root_directory"]
classified_data = ingestion.classify(text)
data_id = ingestion.identify(classified_data)
classified_data = ingestion.classify(data, filename)
# data_id = ingestion.identify(classified_data)
storage_path = data_directory_path + "/" + dataset_name.replace(".", "/")
LocalStorage.ensure_directory_exists(storage_path)
text_file_name = data_id + ".txt"
LocalStorage(storage_path).store(text_file_name, classified_data.get_data())
file_metadata = classified_data.get_metadata()
file_name = file_metadata["name"]
LocalStorage(storage_path).store(file_name, classified_data.get_data())
return "file://" + storage_path + "/" + text_file_name
return "file://" + storage_path + "/" + file_name

View file

@ -2,9 +2,9 @@ import asyncio
from uuid import uuid4
from typing import List, Union
import logging
import instructor
# import instructor
import nltk
from openai import OpenAI
# from openai import OpenAI
from nltk.corpus import stopwords
from cognee.config import Config
from cognee.modules.cognify.graph.add_data_chunks import add_data_chunks
@ -26,12 +26,13 @@ from cognee.modules.data.get_content_summary import get_content_summary
from cognee.modules.data.get_cognitive_layers import get_cognitive_layers
from cognee.modules.data.get_layer_graphs import get_layer_graphs
from cognee.utils import send_telemetry
from cognee.modules.tasks import create_task_status_table, update_task_status
config = Config()
config.load()
aclient = instructor.patch(OpenAI())
# aclient = instructor.patch(OpenAI())
USER_ID = "default_user"
@ -42,6 +43,7 @@ async def cognify(datasets: Union[str, List[str]] = None):
# Has to be loaded in advance, multithreading doesn't work without it.
nltk.download("stopwords", quiet=True)
stopwords.ensure_loaded()
create_task_status_table()
graph_db_type = infrastructure_config.get_config()["graph_engine"]
@ -80,6 +82,8 @@ async def cognify(datasets: Union[str, List[str]] = None):
chunk_strategy = infrastructure_config.get_config()["chunk_strategy"]
for (dataset_name, files) in dataset_files:
update_task_status(dataset_name, "DATASET_PROCESSING_STARTED")
for file_metadata in files:
with open(file_metadata["file_path"], "rb") as file:
try:
@ -88,6 +92,7 @@ async def cognify(datasets: Union[str, List[str]] = None):
parent_node_id = f"DefaultGraphModel__{USER_ID}",
document_metadata = file_metadata,
)
update_task_status(document_id, "DOCUMENT_PROCESSING_STARTED")
file_type = guess_file_type(file)
text = extract_text_from_file(file, file_type)
@ -97,15 +102,39 @@ async def cognify(datasets: Union[str, List[str]] = None):
data_chunks[dataset_name] = []
for subchunk in subchunks:
data_chunks[dataset_name].append(dict(document_id = document_id, chunk_id = str(uuid4()), text = subchunk))
data_chunks[dataset_name].append(dict(
document_id = document_id,
chunk_id = str(uuid4()),
text = subchunk,
))
except FileTypeException:
logger.warning("File (%s) has an unknown file type. We are skipping it.", file_metadata["id"])
added_chunks: list[tuple[str, str, dict]] = await add_data_chunks(data_chunks)
await asyncio.gather(
*[process_text(chunk["document_id"], chunk["chunk_id"], chunk["collection"], chunk["text"]) for chunk in added_chunks]
)
chunks_by_document = {}
for chunk in added_chunks:
if chunk["document_id"] not in chunks_by_document:
chunks_by_document[chunk["document_id"]] = []
chunks_by_document[chunk["document_id"]].append(chunk)
for document_id, chunks in chunks_by_document.items():
try:
await asyncio.gather(
*[process_text(
chunk["document_id"],
chunk["chunk_id"],
chunk["collection"],
chunk["text"],
) for chunk in chunks]
)
update_task_status(document_id, "DOCUMENT_PROCESSING_FINISHED")
except Exception as e:
logger.exception(e)
update_task_status(document_id, "DOCUMENT_PROCESSING_FAILED")
update_task_status(dataset_name, "DATASET_PROCESSING_FINISHED")
return graph_client.graph

View file

@ -1,3 +1,4 @@
from duckdb import CatalogException
from cognee.modules.discovery import discover_directory_datasets
from cognee.infrastructure import infrastructure_config
@ -12,6 +13,27 @@ class datasets():
return list(discover_directory_datasets(directory_path).keys())
@staticmethod
def query_data(dataset_name: str):
def list_data(dataset_name: str):
db = infrastructure_config.get_config("database_engine")
return db.get_files_metadata(dataset_name)
try:
return db.get_files_metadata(dataset_name)
except CatalogException:
return None
@staticmethod
def get_status(dataset_ids: list[str]) -> dict:
db = infrastructure_config.get_config("database_engine")
try:
return db.get_data("cognee_task_status", {
"data_id": dataset_ids
})
except CatalogException:
return {}
@staticmethod
def delete_dataset(dataset_id: str):
db = infrastructure_config.get_config("database_engine")
try:
return db.delete_table(dataset_id)
except CatalogException:
return {}

View file

@ -21,6 +21,49 @@ class DuckDBAdapter():
with self.get_connection() as connection:
return connection.sql(f"SELECT id, name, file_path, extension, mime_type, keywords FROM {dataset_name}.file_metadata;").to_df().to_dict("records")
def create_table(self, table_name: str, table_config: list[dict]):
fields_query_parts = []
for table_config_item in table_config:
fields_query_parts.append(f"{table_config_item['name']} {table_config_item['type']}")
with self.get_connection() as connection:
query = f"CREATE TABLE IF NOT EXISTS {table_name} ({', '.join(fields_query_parts)});"
connection.execute(query)
def delete_table(self, table_name: str):
with self.get_connection() as connection:
query = f"DROP TABLE IF EXISTS {table_name};"
connection.execute(query)
def insert_data(self, table_name: str, data: list[dict]):
def get_values(data_entry: list):
return ", ".join([f"'{value}'" if isinstance(value, str) else value for value in data_entry])
columns = ", ".join(data[0].keys())
values = ", ".join([f"({get_values(data_entry.values())})" for data_entry in data])
with self.get_connection() as connection:
query = f"INSERT INTO {table_name} ({columns}) VALUES {values};"
connection.execute(query)
def get_data(self, table_name: str, filters: dict = None):
with self.get_connection() as connection:
def get_values(values: list):
return ", ".join([f"'{value}'" for value in values])
def get_filters(filters: dict):
return " AND ".join([
f"{key} IN ({get_values(value)})" if isinstance(value, list)
else f"{key} = '{value}'" for (key, value) in filters.items()
])
query = f"SELECT * FROM {table_name}" + (";" if filters is None else f" WHERE {get_filters(filters)};")
results = connection.sql(query).to_df().to_dict("records")
return {
result["data_id"]: result["status"] for result in results
}
def load_cognify_data(self, data):
with self.get_connection() as connection:
@ -29,7 +72,7 @@ class DuckDBAdapter():
CREATE TABLE IF NOT EXISTS cognify (
document_id STRING,
layer_id STRING,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT NULL,
processed BOOLEAN DEFAULT FALSE,
document_id_target STRING NULL

View file

@ -19,12 +19,17 @@ class LocalStorage(Storage):
mode = "w" if isinstance(data, str) else "wb",
encoding = "utf-8" if isinstance(data, str) else None
) as f:
f.write(data if isinstance(data, str) else data.read())
if hasattr(data, "read"):
data.seek(0)
f.write(data.read())
else:
f.write(data)
def retrieve(self, file_path: str, mode: str = "rb"):
full_file_path = self.storage_path + "/" + file_path
with open(full_file_path, mode = mode) as f:
f.seek(0)
return f.read()
@staticmethod

View file

@ -19,7 +19,7 @@ def get_file_metadata(file: BinaryIO) -> FileMetadata:
keywords = extract_keywords(file_text)
file_path = file.name
file_name = file_path.split("/")[-1].split(".")[0]
file_name = file_path.split("/")[-1].split(".")[0] if file_path else None
return FileMetadata(
name = file_name,

View file

@ -22,6 +22,21 @@ txt_file_type = TxtFileType()
filetype.add_type(txt_file_type)
class CustomPdfMatcher(filetype.Type):
MIME = "application/pdf"
EXTENSION = "pdf"
def __init__(self):
super(CustomPdfMatcher, self).__init__(mime = CustomPdfMatcher.MIME, extension = CustomPdfMatcher.EXTENSION)
def match(self, buf):
return b"PDF-" in buf
custom_pdf_matcher = CustomPdfMatcher()
filetype.add_type(custom_pdf_matcher)
def guess_file_type(file: BinaryIO) -> filetype.Type:
file_type = filetype.guess(file)

View file

@ -1,13 +1,16 @@
from io import BufferedReader
from typing import Union, BinaryIO
from .exceptions import IngestionException
from .data_types import create_text_data, create_binary_data
from .data_types import TextData, BinaryData
def classify(data: Union[str, BinaryIO]):
def classify(data: Union[str, BinaryIO], filename: str = None):
if isinstance(data, str):
return create_text_data(data)
return TextData(data)
if isinstance(data, BufferedReader):
return create_binary_data(data)
return BinaryData(data)
raise IngestionException(f"Type of data sent to cognee.add(data_path: string | List[string]) not supported: {type(data)}")
if hasattr(data, "file"):
return BinaryData(data.file, filename)
raise IngestionException(f"Type of data sent to classify(data: Union[str, BinaryIO) not supported: {type(data)}")

View file

@ -6,10 +6,12 @@ def create_binary_data(data: BinaryIO):
return BinaryData(data)
class BinaryData(IngestionData):
name: str = None
data: BinaryIO = None
metadata: FileMetadata = None
def __init__(self, data: BinaryIO):
def __init__(self, data: BinaryIO, name: str = None):
self.name = name
self.data = data
def get_identifier(self):
@ -26,5 +28,8 @@ class BinaryData(IngestionData):
if self.metadata is None:
self.metadata = get_file_metadata(self.data)
if self.metadata["name"] is None:
self.metadata["name"] = self.name
def get_data(self):
return self.data

View file

@ -0,0 +1,2 @@
from .update_task_status import update_task_status
from .create_task_status_table import create_task_status_table

View file

@ -0,0 +1,10 @@
from cognee.infrastructure.InfrastructureConfig import infrastructure_config
def create_task_status_table():
db_engine = infrastructure_config.get_config("database_engine")
db_engine.create_table("cognee_task_status", [
dict(name = "data_id", type = "STRING"),
dict(name = "status", type = "STRING"),
dict(name = "created_at", type = "TIMESTAMP DEFAULT CURRENT_TIMESTAMP"),
])

View file

@ -0,0 +1,5 @@
from cognee.infrastructure.InfrastructureConfig import infrastructure_config
def update_task_status(data_id: str, status: str):
db_engine = infrastructure_config.get_config("database_engine")
db_engine.insert_data("cognee_task_status", [dict(data_id = data_id, status = status)])

View file

@ -212,6 +212,7 @@ async def render_graph(graph, include_nodes=False, include_color=False, include_
# Visualization
url = plotter.plot(render=False, as_files=True, memoize=False)
print(f"Graph is visualized at: {url}")
return url
def sanitize_df(df):

View file

@ -262,7 +262,7 @@
"\n",
"for dataset in datasets:\n",
" print(dataset)\n",
" data_from_dataset = cognee.datasets.query_data(dataset)\n",
" data_from_dataset = cognee.datasets.list_data(dataset)\n",
" for file_info in data_from_dataset:\n",
" print(file_info) \n"
]

View file

@ -66,7 +66,7 @@
"\n",
"print(cognee.datasets.list_datasets())\n",
"\n",
"train_dataset = cognee.datasets.query_data(\"short_stories\")\n",
"train_dataset = cognee.datasets.list_data(\"short_stories\")\n",
"print(len(train_dataset))"
]
},

16
poetry.lock generated
View file

@ -5085,6 +5085,20 @@ files = [
{file = "python_json_logger-2.0.7-py3-none-any.whl", hash = "sha256:f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd"},
]
[[package]]
name = "python-multipart"
version = "0.0.9"
description = "A streaming multipart parser for Python"
optional = false
python-versions = ">=3.8"
files = [
{file = "python_multipart-0.0.9-py3-none-any.whl", hash = "sha256:97ca7b8ea7b05f977dc3849c3ba99d51689822fab725c3703af7c866a0c2b215"},
{file = "python_multipart-0.0.9.tar.gz", hash = "sha256:03f54688c663f1b7977105f021043b0793151e4cb1c1a9d4a11fc13d622c4026"},
]
[package.extras]
dev = ["atomicwrites (==1.4.1)", "attrs (==23.2.0)", "coverage (==7.4.1)", "hatch", "invoke (==2.2.0)", "more-itertools (==10.2.0)", "pbr (==6.0.0)", "pluggy (==1.4.0)", "py (==1.11.0)", "pytest (==8.0.0)", "pytest-cov (==4.1.0)", "pytest-timeout (==2.2.0)", "pyyaml (==6.0.1)", "ruff (==0.2.1)"]
[[package]]
name = "pytz"
version = "2024.1"
@ -7155,4 +7169,4 @@ weaviate = ["weaviate-client"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.9.0,<3.12"
content-hash = "a374c7915e291ac7e68fe13c2d340c2f534decaa37f407286dae18a3c94bd759"
content-hash = "cbbd369a966c9484516f344966ccf53dee0b74d9ba186870cb14559d9e75ec9b"

View file

@ -61,6 +61,7 @@ dspy-ai = "2.4.3"
posthog = "^3.5.0"
lancedb = "^0.6.10"
tantivy = "^0.21.0"
python-multipart = "^0.0.9"
[tool.poetry.extras]