fixes to delete
This commit is contained in:
parent
9d423f5e16
commit
bf191ae6d0
1 changed files with 19 additions and 3 deletions
|
|
@ -16,9 +16,12 @@ class TextData(IngestionData):
|
||||||
self.data = data
|
self.data = data
|
||||||
|
|
||||||
def get_identifier(self):
|
def get_identifier(self):
|
||||||
keywords = extract_keywords(self.data)
|
import hashlib
|
||||||
|
|
||||||
return "text/plain" + "_" + "|".join(keywords)
|
content_bytes = self.data.encode("utf-8")
|
||||||
|
content_hash = hashlib.md5(content_bytes).hexdigest()
|
||||||
|
|
||||||
|
return "text/plain" + "_" + content_hash
|
||||||
|
|
||||||
def get_metadata(self):
|
def get_metadata(self):
|
||||||
self.ensure_metadata()
|
self.ensure_metadata()
|
||||||
|
|
@ -27,7 +30,20 @@ class TextData(IngestionData):
|
||||||
|
|
||||||
def ensure_metadata(self):
|
def ensure_metadata(self):
|
||||||
if self.metadata is None:
|
if self.metadata is None:
|
||||||
self.metadata = {}
|
import hashlib
|
||||||
|
|
||||||
|
keywords = extract_keywords(self.data)
|
||||||
|
content_bytes = self.data.encode("utf-8")
|
||||||
|
content_hash = hashlib.md5(content_bytes).hexdigest()
|
||||||
|
|
||||||
|
self.metadata = {
|
||||||
|
"keywords": keywords,
|
||||||
|
"content_hash": content_hash,
|
||||||
|
"content_type": "text/plain",
|
||||||
|
"mime_type": "text/plain",
|
||||||
|
"extension": "txt",
|
||||||
|
"file_size": len(content_bytes),
|
||||||
|
}
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def get_data(self):
|
async def get_data(self):
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue