Merge pull request #145 from topoteretes/COG-382-refactor-demo-notebook
Cog 382 refactor demo notebook
This commit is contained in:
commit
3322891132
6 changed files with 741 additions and 5711 deletions
|
|
@ -22,7 +22,7 @@ We build for developers who need a reliable, production-ready data layer for AI
|
|||
## What is cognee?
|
||||
|
||||
cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you ability to interconnect and retrieve past conversations, documents, audio transcriptions, while also reducing hallucinations, developer effort and cost.
|
||||
Try it in a Google collab <a href="https://colab.research.google.com/drive/1jayZ5JRwDaUGFvCw9UZySBG-iB9gpYfu?usp=sharing">notebook</a> or have a look at our <a href="https://topoteretes.github.io/cognee">documentation</a>
|
||||
Try it in a Google collab <a href="https://colab.research.google.com/drive/1g-Qnx6l_ecHZi0IOw23rg0qC4TYvEvWZ?usp=sharing">notebook</a> or have a look at our <a href="https://topoteretes.github.io/cognee">documentation</a>
|
||||
|
||||
If you have questions, join our <a href="https://discord.gg/NQPKmU5CCg">Discord</a> community
|
||||
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
740
notebooks/cognee_demo.ipynb
Normal file
740
notebooks/cognee_demo.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -1,437 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "df16431d0f48b006",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-09-20T14:02:48.519686Z",
|
||||
"start_time": "2024-09-20T14:02:48.515589Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"job_position = \"\"\"Senior Data Scientist (Machine Learning)\n",
|
||||
"\n",
|
||||
"Company: TechNova Solutions\n",
|
||||
"Location: San Francisco, CA\n",
|
||||
"\n",
|
||||
"Job Description:\n",
|
||||
"\n",
|
||||
"TechNova Solutions is seeking a Senior Data Scientist specializing in Machine Learning to join our dynamic analytics team. The ideal candidate will have a strong background in developing and deploying machine learning models, working with large datasets, and translating complex data into actionable insights.\n",
|
||||
"\n",
|
||||
"Responsibilities:\n",
|
||||
"\n",
|
||||
"Develop and implement advanced machine learning algorithms and models.\n",
|
||||
"Analyze large, complex datasets to extract meaningful patterns and insights.\n",
|
||||
"Collaborate with cross-functional teams to integrate predictive models into products.\n",
|
||||
"Stay updated with the latest advancements in machine learning and data science.\n",
|
||||
"Mentor junior data scientists and provide technical guidance.\n",
|
||||
"Qualifications:\n",
|
||||
"\n",
|
||||
"Master’s or Ph.D. in Data Science, Computer Science, Statistics, or a related field.\n",
|
||||
"5+ years of experience in data science and machine learning.\n",
|
||||
"Proficient in Python, R, and SQL.\n",
|
||||
"Experience with deep learning frameworks (e.g., TensorFlow, PyTorch).\n",
|
||||
"Strong problem-solving skills and attention to detail.\n",
|
||||
"Candidate CVs\n",
|
||||
"\"\"\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "9086abf3af077ab4",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-09-20T14:02:49.120838Z",
|
||||
"start_time": "2024-09-20T14:02:49.118294Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"job_1 = \"\"\"\n",
|
||||
"CV 1: Relevant\n",
|
||||
"Name: Dr. Emily Carter\n",
|
||||
"Contact Information:\n",
|
||||
"\n",
|
||||
"Email: emily.carter@example.com\n",
|
||||
"Phone: (555) 123-4567\n",
|
||||
"Summary:\n",
|
||||
"\n",
|
||||
"Senior Data Scientist with over 8 years of experience in machine learning and predictive analytics. Expertise in developing advanced algorithms and deploying scalable models in production environments.\n",
|
||||
"\n",
|
||||
"Education:\n",
|
||||
"\n",
|
||||
"Ph.D. in Computer Science, Stanford University (2014)\n",
|
||||
"B.S. in Mathematics, University of California, Berkeley (2010)\n",
|
||||
"Experience:\n",
|
||||
"\n",
|
||||
"Senior Data Scientist, InnovateAI Labs (2016 – Present)\n",
|
||||
"Led a team in developing machine learning models for natural language processing applications.\n",
|
||||
"Implemented deep learning algorithms that improved prediction accuracy by 25%.\n",
|
||||
"Collaborated with cross-functional teams to integrate models into cloud-based platforms.\n",
|
||||
"Data Scientist, DataWave Analytics (2014 – 2016)\n",
|
||||
"Developed predictive models for customer segmentation and churn analysis.\n",
|
||||
"Analyzed large datasets using Hadoop and Spark frameworks.\n",
|
||||
"Skills:\n",
|
||||
"\n",
|
||||
"Programming Languages: Python, R, SQL\n",
|
||||
"Machine Learning: TensorFlow, Keras, Scikit-Learn\n",
|
||||
"Big Data Technologies: Hadoop, Spark\n",
|
||||
"Data Visualization: Tableau, Matplotlib\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a9de0cc07f798b7f",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-09-20T14:02:49.675003Z",
|
||||
"start_time": "2024-09-20T14:02:49.671615Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"job_2 = \"\"\"\n",
|
||||
"CV 2: Relevant\n",
|
||||
"Name: Michael Rodriguez\n",
|
||||
"Contact Information:\n",
|
||||
"\n",
|
||||
"Email: michael.rodriguez@example.com\n",
|
||||
"Phone: (555) 234-5678\n",
|
||||
"Summary:\n",
|
||||
"\n",
|
||||
"Data Scientist with a strong background in machine learning and statistical modeling. Skilled in handling large datasets and translating data into actionable business insights.\n",
|
||||
"\n",
|
||||
"Education:\n",
|
||||
"\n",
|
||||
"M.S. in Data Science, Carnegie Mellon University (2013)\n",
|
||||
"B.S. in Computer Science, University of Michigan (2011)\n",
|
||||
"Experience:\n",
|
||||
"\n",
|
||||
"Senior Data Scientist, Alpha Analytics (2017 – Present)\n",
|
||||
"Developed machine learning models to optimize marketing strategies.\n",
|
||||
"Reduced customer acquisition cost by 15% through predictive modeling.\n",
|
||||
"Data Scientist, TechInsights (2013 – 2017)\n",
|
||||
"Analyzed user behavior data to improve product features.\n",
|
||||
"Implemented A/B testing frameworks to evaluate product changes.\n",
|
||||
"Skills:\n",
|
||||
"\n",
|
||||
"Programming Languages: Python, Java, SQL\n",
|
||||
"Machine Learning: Scikit-Learn, XGBoost\n",
|
||||
"Data Visualization: Seaborn, Plotly\n",
|
||||
"Databases: MySQL, MongoDB\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "185ff1c102d06111",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-09-20T14:02:50.286828Z",
|
||||
"start_time": "2024-09-20T14:02:50.284369Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"job_3 = \"\"\"\n",
|
||||
"CV 3: Relevant\n",
|
||||
"Name: Sarah Nguyen\n",
|
||||
"Contact Information:\n",
|
||||
"\n",
|
||||
"Email: sarah.nguyen@example.com\n",
|
||||
"Phone: (555) 345-6789\n",
|
||||
"Summary:\n",
|
||||
"\n",
|
||||
"Data Scientist specializing in machine learning with 6 years of experience. Passionate about leveraging data to drive business solutions and improve product performance.\n",
|
||||
"\n",
|
||||
"Education:\n",
|
||||
"\n",
|
||||
"M.S. in Statistics, University of Washington (2014)\n",
|
||||
"B.S. in Applied Mathematics, University of Texas at Austin (2012)\n",
|
||||
"Experience:\n",
|
||||
"\n",
|
||||
"Data Scientist, QuantumTech (2016 – Present)\n",
|
||||
"Designed and implemented machine learning algorithms for financial forecasting.\n",
|
||||
"Improved model efficiency by 20% through algorithm optimization.\n",
|
||||
"Junior Data Scientist, DataCore Solutions (2014 – 2016)\n",
|
||||
"Assisted in developing predictive models for supply chain optimization.\n",
|
||||
"Conducted data cleaning and preprocessing on large datasets.\n",
|
||||
"Skills:\n",
|
||||
"\n",
|
||||
"Programming Languages: Python, R\n",
|
||||
"Machine Learning Frameworks: PyTorch, Scikit-Learn\n",
|
||||
"Statistical Analysis: SAS, SPSS\n",
|
||||
"Cloud Platforms: AWS, Azure\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d55ce4c58f8efb67",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-09-20T14:02:50.950343Z",
|
||||
"start_time": "2024-09-20T14:02:50.946378Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"job_4 = \"\"\"\n",
|
||||
"CV 4: Not Relevant\n",
|
||||
"Name: David Thompson\n",
|
||||
"Contact Information:\n",
|
||||
"\n",
|
||||
"Email: david.thompson@example.com\n",
|
||||
"Phone: (555) 456-7890\n",
|
||||
"Summary:\n",
|
||||
"\n",
|
||||
"Creative Graphic Designer with over 8 years of experience in visual design and branding. Proficient in Adobe Creative Suite and passionate about creating compelling visuals.\n",
|
||||
"\n",
|
||||
"Education:\n",
|
||||
"\n",
|
||||
"B.F.A. in Graphic Design, Rhode Island School of Design (2012)\n",
|
||||
"Experience:\n",
|
||||
"\n",
|
||||
"Senior Graphic Designer, CreativeWorks Agency (2015 – Present)\n",
|
||||
"Led design projects for clients in various industries.\n",
|
||||
"Created branding materials that increased client engagement by 30%.\n",
|
||||
"Graphic Designer, Visual Innovations (2012 – 2015)\n",
|
||||
"Designed marketing collateral, including brochures, logos, and websites.\n",
|
||||
"Collaborated with the marketing team to develop cohesive brand strategies.\n",
|
||||
"Skills:\n",
|
||||
"\n",
|
||||
"Design Software: Adobe Photoshop, Illustrator, InDesign\n",
|
||||
"Web Design: HTML, CSS\n",
|
||||
"Specialties: Branding and Identity, Typography\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "ca4ecc32721ad332",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-09-20T14:02:51.548191Z",
|
||||
"start_time": "2024-09-20T14:02:51.545520Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"job_5 = \"\"\"\n",
|
||||
"CV 5: Not Relevant\n",
|
||||
"Name: Jessica Miller\n",
|
||||
"Contact Information:\n",
|
||||
"\n",
|
||||
"Email: jessica.miller@example.com\n",
|
||||
"Phone: (555) 567-8901\n",
|
||||
"Summary:\n",
|
||||
"\n",
|
||||
"Experienced Sales Manager with a strong track record in driving sales growth and building high-performing teams. Excellent communication and leadership skills.\n",
|
||||
"\n",
|
||||
"Education:\n",
|
||||
"\n",
|
||||
"B.A. in Business Administration, University of Southern California (2010)\n",
|
||||
"Experience:\n",
|
||||
"\n",
|
||||
"Sales Manager, Global Enterprises (2015 – Present)\n",
|
||||
"Managed a sales team of 15 members, achieving a 20% increase in annual revenue.\n",
|
||||
"Developed sales strategies that expanded customer base by 25%.\n",
|
||||
"Sales Representative, Market Leaders Inc. (2010 – 2015)\n",
|
||||
"Consistently exceeded sales targets and received the 'Top Salesperson' award in 2013.\n",
|
||||
"Skills:\n",
|
||||
"\n",
|
||||
"Sales Strategy and Planning\n",
|
||||
"Team Leadership and Development\n",
|
||||
"CRM Software: Salesforce, Zoho\n",
|
||||
"Negotiation and Relationship Building\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "904df61ba484a8e5",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-09-20T14:02:54.243987Z",
|
||||
"start_time": "2024-09-20T14:02:52.498195Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cognee\n",
|
||||
"\n",
|
||||
"await cognee.add([job_1, job_2, job_3, job_4, job_5, job_position], \"example\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "7c431fdef4921ae0",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-09-20T14:02:57.925667Z",
|
||||
"start_time": "2024-09-20T14:02:57.922353Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from cognee.shared.data_models import KnowledgeGraph\n",
|
||||
"from cognee.modules.data.models import Dataset, Data\n",
|
||||
"from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n",
|
||||
"from cognee.modules.pipelines.tasks.Task import Task\n",
|
||||
"from cognee.modules.pipelines import run_tasks\n",
|
||||
"from cognee.modules.users.models import User\n",
|
||||
"from cognee.tasks import chunk_remove_disconnected, \\\n",
|
||||
" infer_data_ontology, \\\n",
|
||||
" save_chunks_to_store, \\\n",
|
||||
" chunk_update_check, \\\n",
|
||||
" chunks_into_graph, \\\n",
|
||||
" source_documents_to_chunks, \\\n",
|
||||
" check_permissions_on_documents, \\\n",
|
||||
" classify_documents\n",
|
||||
"\n",
|
||||
"async def run_cognify_pipeline(dataset: Dataset, user: User = None):\n",
|
||||
" data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
"\n",
|
||||
" root_node_id = None\n",
|
||||
"\n",
|
||||
" tasks = [\n",
|
||||
" Task(classify_documents),\n",
|
||||
" Task(check_permissions_on_documents, user = user, permissions = [\"write\"]),\n",
|
||||
" Task(infer_data_ontology, root_node_id = root_node_id, ontology_model = KnowledgeGraph),\n",
|
||||
" Task(source_documents_to_chunks, chunk_size = 800, parent_node_id = root_node_id), # Classify documents and save them as a nodes in graph db, extract text chunks based on the document type\n",
|
||||
" Task(chunks_into_graph, graph_model = KnowledgeGraph, collection_name = \"entities\", task_config = { \"batch_size\": 10 }), # Generate knowledge graphs from the document chunks and attach it to chunk nodes\n",
|
||||
" Task(chunk_update_check, collection_name = \"chunks\"), # Find all affected chunks, so we don't process unchanged chunks\n",
|
||||
" Task(\n",
|
||||
" save_chunks_to_store,\n",
|
||||
" collection_name = \"chunks\",\n",
|
||||
" ), \n",
|
||||
" Task(chunk_remove_disconnected), # Remove the obsolete document chunks.\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" pipeline = run_tasks(tasks, data_documents)\n",
|
||||
"\n",
|
||||
" async for result in pipeline:\n",
|
||||
" print(result)\n",
|
||||
" except Exception as error:\n",
|
||||
" raise error"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f0a91b99c6215e09",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-09-20T14:02:58.905774Z",
|
||||
"start_time": "2024-09-20T14:02:58.625915Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from cognee.modules.users.methods import get_default_user\n",
|
||||
"from cognee.modules.data.methods import get_datasets_by_name\n",
|
||||
"\n",
|
||||
"user = await get_default_user()\n",
|
||||
"\n",
|
||||
"datasets = await get_datasets_by_name([\"example\"], user.id)\n",
|
||||
"\n",
|
||||
"await run_cognify_pipeline(datasets[0], user)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "080389e5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from cognee.shared.utils import render_graph\n",
|
||||
"from cognee.infrastructure.databases.graph import get_graph_engine\n",
|
||||
"import graphistry\n",
|
||||
"\n",
|
||||
"# # Setting an environment variable\n",
|
||||
"# os.environ[\"GRAPHISTRY_USERNAME\"] = placeholder\n",
|
||||
"# os.environ[\"GRAPHISTRY_PASSWORD\"] = placeholder\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"graphistry.login(username=os.getenv(\"GRAPHISTRY_USERNAME\"), password=os.getenv(\"GRAPHISTRY_PASSWORD\"))\n",
|
||||
"\n",
|
||||
"graph_engine = await get_graph_engine()\n",
|
||||
"\n",
|
||||
"graph_url = await render_graph(graph_engine.graph)\n",
|
||||
"print(graph_url)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e5e7dfc8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"async def search(\n",
|
||||
" vector_engine,\n",
|
||||
" collection_name: str,\n",
|
||||
" query_text: str = None,\n",
|
||||
"):\n",
|
||||
" query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]\n",
|
||||
"\n",
|
||||
" connection = await vector_engine.get_connection()\n",
|
||||
" collection = await connection.open_table(collection_name)\n",
|
||||
"\n",
|
||||
" results = await collection.vector_search(query_vector).limit(10).to_pandas()\n",
|
||||
"\n",
|
||||
" result_values = list(results.to_dict(\"index\").values())\n",
|
||||
"\n",
|
||||
" return [dict(\n",
|
||||
" id = str(result[\"id\"]),\n",
|
||||
" payload = result[\"payload\"],\n",
|
||||
" score = result[\"_distance\"],\n",
|
||||
" ) for result in result_values]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from cognee.infrastructure.databases.vector import get_vector_engine\n",
|
||||
"\n",
|
||||
"vector_engine = get_vector_engine()\n",
|
||||
"results = await search(vector_engine, \"entities\", \"sarah.nguyen@example.com\")\n",
|
||||
"for result in results:\n",
|
||||
" print(result)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
|
@ -1,191 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "38135bf7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from os import path\n",
|
||||
"import cognee\n",
|
||||
"import dspy\n",
|
||||
"from cognee.modules.cognify.dataset import HotPotQA\n",
|
||||
"\n",
|
||||
"data_directory_path = path.abspath(\"../.data\")\n",
|
||||
"cognee.config.data_root_directory(data_directory_path)\n",
|
||||
"\n",
|
||||
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
|
||||
"cognee.config.system_root_directory(cognee_directory_path)\n",
|
||||
"\n",
|
||||
"await cognee.prune.prune_system()\n",
|
||||
"\n",
|
||||
"colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = \"http://20.102.90.50:2017/wiki17_abstracts\")\n",
|
||||
"dspy.configure(rm = colbertv2_wiki17_abstracts)\n",
|
||||
"\n",
|
||||
"# dataset = HotPotQA(\n",
|
||||
"# train_seed = 1,\n",
|
||||
"# train_size = 10,\n",
|
||||
"# eval_seed = 2023,\n",
|
||||
"# dev_size = 0,\n",
|
||||
"# test_size = 0,\n",
|
||||
"# keep_details = True,\n",
|
||||
"# )\n",
|
||||
"\n",
|
||||
"# texts_to_add = []\n",
|
||||
"\n",
|
||||
"# for train_case in dataset.train:\n",
|
||||
"# train_case_text = \"\\r\\n\".join(\" \".join(context_sentences) for context_sentences in train_case.get(\"context\")[\"sentences\"])\n",
|
||||
"\n",
|
||||
"# texts_to_add.append(train_case_text)\n",
|
||||
"\n",
|
||||
"dataset_name = \"short_stories\"\n",
|
||||
"await cognee.add(\"data://\" + data_directory_path, dataset_name)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "44603a2a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-30T16:53:16.917678Z",
|
||||
"start_time": "2024-04-30T16:53:14.700232Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from os import path\n",
|
||||
"import cognee\n",
|
||||
"\n",
|
||||
"data_directory_path = path.abspath(\"../.data\")\n",
|
||||
"cognee.config.data_root_directory(data_directory_path)\n",
|
||||
"\n",
|
||||
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
|
||||
"cognee.config.system_root_directory(cognee_directory_path)\n",
|
||||
"\n",
|
||||
"print(cognee.datasets.list_datasets())\n",
|
||||
"\n",
|
||||
"train_dataset = cognee.datasets.list_data(\"short_stories\")\n",
|
||||
"print(len(train_dataset))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "65bfaf09",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-30T16:55:30.886217Z",
|
||||
"start_time": "2024-04-30T16:53:19.164943Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from os import path\n",
|
||||
"import logging\n",
|
||||
"import cognee\n",
|
||||
"\n",
|
||||
"logging.basicConfig(level = logging.INFO)\n",
|
||||
"\n",
|
||||
"data_directory_path = path.abspath(\"../.data\")\n",
|
||||
"cognee.config.data_root_directory(data_directory_path)\n",
|
||||
"\n",
|
||||
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
|
||||
"cognee.config.system_root_directory(cognee_directory_path)\n",
|
||||
"\n",
|
||||
"await cognee.prune.prune_system()\n",
|
||||
"\n",
|
||||
"await cognee.cognify(\"short_stories\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a514cf38",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-04-30T16:55:39.819971Z",
|
||||
"start_time": "2024-04-30T16:55:35.627964Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import graphistry\n",
|
||||
"from cognee.shared.utils import render_graph\n",
|
||||
"from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine, GraphDBType\n",
|
||||
"from cognee.base_config import get_base_config\n",
|
||||
"\n",
|
||||
"config = get_base_config()\n",
|
||||
"\n",
|
||||
"graphistry.register(\n",
|
||||
" api = 3,\n",
|
||||
" username = config.graphistry_username,\n",
|
||||
" password = config.graphistry_password\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"graph_client = await get_graph_engine()\n",
|
||||
"graph = graph_client.graph\n",
|
||||
"\n",
|
||||
"await render_graph(graph)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e916c484",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from os import path\n",
|
||||
"import cognee\n",
|
||||
"from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine, GraphDBType\n",
|
||||
"from cognee.modules.search.vector.search_similarity import search_similarity\n",
|
||||
"\n",
|
||||
"data_directory_path = path.abspath(\"../.data\")\n",
|
||||
"cognee.config.data_root_directory(data_directory_path)\n",
|
||||
"\n",
|
||||
"cognee_directory_path = path.abspath(\"../.cognee_system\")\n",
|
||||
"cognee.config.system_root_directory(cognee_directory_path)\n",
|
||||
"\n",
|
||||
"graph_client = await get_graph_engine()\n",
|
||||
"graph = graph_client.graph\n",
|
||||
"\n",
|
||||
"results = await search_similarity(\"Who are French girls?\", graph)\n",
|
||||
"\n",
|
||||
"for result in results:\n",
|
||||
" print(\"French girls\" in result)\n",
|
||||
" print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e5e44018878d383f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue