txt to md

This commit is contained in:
phact 2025-12-16 23:03:53 -05:00
parent 9bc6f8b6eb
commit f6b9fe0395
2 changed files with 23 additions and 12 deletions

View file

@ -54,8 +54,9 @@ def test_file(tmp_path) -> Path:
import uuid
file_path = tmp_path / f"sdk_test_doc_{uuid.uuid4().hex[:8]}.txt"
file_path.write_text(
f"SDK Integration Test Document {uuid.uuid4()}\n\n"
"This document tests the OpenRAG Python SDK.\n"
f"SDK Integration Test Document\n\n"
f"ID: {uuid.uuid4()}\n\n"
"This document tests the OpenRAG Python SDK.\n\n"
"It contains unique content about purple elephants dancing.\n"
)
return file_path
@ -76,15 +77,6 @@ class TestSettings:
class TestDocuments:
"""Test document operations."""
@pytest.mark.asyncio
async def test_ingest_document(self, client, test_file: Path):
"""Test document ingestion."""
# wait=True (default) polls until completion
result = await client.documents.ingest(file_path=str(test_file))
assert result.status == "completed"
assert result.successful_files >= 1
@pytest.mark.asyncio
async def test_ingest_document_no_wait(self, client, test_file: Path):
"""Test document ingestion without waiting."""
@ -96,6 +88,17 @@ class TestDocuments:
# Can poll manually
final_status = await client.documents.wait_for_task(result.task_id)
assert final_status.status == "completed"
assert final_status.successful_files >= 1
@pytest.mark.asyncio
async def test_ingest_document(self, client, test_file: Path):
"""Test document ingestion."""
# wait=True (default) polls until completion
result = await client.documents.ingest(file_path=str(test_file))
assert result.status == "completed"
assert result.successful_files >= 1
@pytest.mark.asyncio
async def test_delete_document(self, client, test_file: Path):

View file

@ -730,7 +730,15 @@ class LangflowFileProcessor(TaskProcessor):
if not content_type:
content_type = 'application/octet-stream'
file_tuple = (original_filename, content, content_type)
# Rename .txt to .md for Langflow compatibility
# Langflow has issues processing text/plain files
langflow_filename = original_filename
if original_filename.lower().endswith('.txt'):
langflow_filename = original_filename[:-4] + '.md'
content_type = 'text/markdown'
logger.debug(f"Renamed {original_filename} to {langflow_filename} for Langflow")
file_tuple = (langflow_filename, content, content_type)
# Get JWT token using same logic as DocumentFileProcessor
# This will handle anonymous JWT creation if needed