feature: Adds doctype handling to delete (audio, image, unstructured) (#1239)
<!-- .github/pull_request_template.md --> ## Description feature: Adds doctype handling to delete (audio, image, unstructured) ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
parent
0c42d19505
commit
fd9aaf57b1
3 changed files with 35 additions and 11 deletions
|
|
@ -1550,7 +1550,7 @@ class KuzuAdapter(GraphDBInterface):
|
|||
"""
|
||||
query = """
|
||||
MATCH (doc:Node)
|
||||
WHERE (doc.type = 'TextDocument' OR doc.type = 'PdfDocument') AND doc.id = $data_id
|
||||
WHERE (doc.type = 'TextDocument' OR doc.type = 'PdfDocument' OR doc.type = 'AudioDocument' OR doc.type = 'ImageDocument' OR doc.type = 'UnstructuredDocument') AND doc.id = $data_id
|
||||
|
||||
OPTIONAL MATCH (doc)<-[e1:EDGE]-(chunk:Node)
|
||||
WHERE e1.relationship_name = 'is_part_of' AND chunk.type = 'DocumentChunk'
|
||||
|
|
@ -1561,7 +1561,7 @@ class KuzuAdapter(GraphDBInterface):
|
|||
MATCH (entity)<-[e3:EDGE]-(otherChunk:Node)-[e4:EDGE]->(otherDoc:Node)
|
||||
WHERE e3.relationship_name = 'contains'
|
||||
AND e4.relationship_name = 'is_part_of'
|
||||
AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument')
|
||||
AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument' OR otherDoc.type = 'AudioDocument' OR otherDoc.type = 'ImageDocument' OR otherDoc.type = 'UnstructuredDocument')
|
||||
AND otherDoc.id <> doc.id
|
||||
}
|
||||
|
||||
|
|
@ -1577,7 +1577,7 @@ class KuzuAdapter(GraphDBInterface):
|
|||
AND e9.relationship_name = 'is_part_of'
|
||||
AND otherEntity.type = 'Entity'
|
||||
AND otherChunk.type = 'DocumentChunk'
|
||||
AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument')
|
||||
AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument' OR otherDoc.type = 'AudioDocument' OR otherDoc.type = 'ImageDocument' OR otherDoc.type = 'UnstructuredDocument')
|
||||
AND otherDoc.id <> doc.id
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1270,21 +1270,21 @@ class Neo4jAdapter(GraphDBInterface):
|
|||
"""
|
||||
query = """
|
||||
MATCH (doc)
|
||||
WHERE (doc:TextDocument OR doc:PdfDocument)
|
||||
WHERE (doc:TextDocument OR doc:PdfDocument OR doc:UnstructuredDocument OR doc:AudioDocument or doc:ImageDocument)
|
||||
AND doc.id = $data_id
|
||||
|
||||
OPTIONAL MATCH (doc)<-[:is_part_of]-(chunk:DocumentChunk)
|
||||
OPTIONAL MATCH (chunk)-[:contains]->(entity:Entity)
|
||||
WHERE NOT EXISTS {
|
||||
MATCH (entity)<-[:contains]-(otherChunk:DocumentChunk)-[:is_part_of]->(otherDoc)
|
||||
WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument)
|
||||
WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument OR otherDoc:UnstructuredDocument OR otherDoc:AudioDocument or otherDoc:ImageDocument)
|
||||
AND otherDoc.id <> doc.id
|
||||
}
|
||||
OPTIONAL MATCH (chunk)<-[:made_from]-(made_node:TextSummary)
|
||||
OPTIONAL MATCH (entity)-[:is_a]->(type:EntityType)
|
||||
WHERE NOT EXISTS {
|
||||
MATCH (type)<-[:is_a]-(otherEntity:Entity)<-[:contains]-(otherChunk:DocumentChunk)-[:is_part_of]->(otherDoc)
|
||||
WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument)
|
||||
WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument OR otherDoc:UnstructuredDocument OR otherDoc:AudioDocument or otherDoc:ImageDocument)
|
||||
AND otherDoc.id <> doc.id
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,15 +12,21 @@ async def main():
|
|||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
first_file = os.path.join(
|
||||
pdf_document = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/artificial-intelligence.pdf"
|
||||
)
|
||||
|
||||
second_file = os.path.join(
|
||||
txt_document = os.path.join(
|
||||
pathlib.Path(__file__).parent, "test_data/Natural_language_processing_copy.txt"
|
||||
)
|
||||
|
||||
third_content = """
|
||||
audio_document = os.path.join(pathlib.Path(__file__).parent, "test_data/text_to_speech.mp3")
|
||||
|
||||
image_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.png")
|
||||
|
||||
unstructured_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.pptx")
|
||||
|
||||
text_document_as_literal = """
|
||||
1. Audi
|
||||
Audi is known for its modern designs and advanced technology. Founded in the early 1900s, the brand has earned a reputation for precision engineering and innovation. With features like the Quattro all-wheel-drive system, Audi offers a range of vehicles from stylish sedans to high-performance sports cars.
|
||||
|
||||
|
|
@ -42,7 +48,16 @@ async def main():
|
|||
################### HARD DELETE
|
||||
|
||||
# Add documents and get dataset information
|
||||
add_result = await cognee.add([first_file, second_file, third_content])
|
||||
add_result = await cognee.add(
|
||||
[
|
||||
pdf_document,
|
||||
txt_document,
|
||||
text_document_as_literal,
|
||||
unstructured_document,
|
||||
audio_document,
|
||||
image_document,
|
||||
]
|
||||
)
|
||||
dataset_id = add_result.dataset_id
|
||||
|
||||
await cognee.cognify()
|
||||
|
|
@ -68,7 +83,16 @@ async def main():
|
|||
################### SOFT DELETE
|
||||
|
||||
# Add documents and get dataset information
|
||||
add_result = await cognee.add([first_file, second_file, third_content])
|
||||
add_result = await cognee.add(
|
||||
[
|
||||
pdf_document,
|
||||
txt_document,
|
||||
text_document_as_literal,
|
||||
unstructured_document,
|
||||
audio_document,
|
||||
image_document,
|
||||
]
|
||||
)
|
||||
dataset_id = add_result.dataset_id
|
||||
|
||||
await cognee.cognify()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue