Adapt chunk_by_paragraph_test.py
This commit is contained in:
parent
f8e5b529c3
commit
1b4a7e4fdc
1 changed files with 13 additions and 13 deletions
|
|
@ -3,13 +3,13 @@ from cognee.tasks.chunks import chunk_by_paragraph
|
||||||
GROUND_TRUTH = {
|
GROUND_TRUTH = {
|
||||||
"whole_text": [
|
"whole_text": [
|
||||||
{
|
{
|
||||||
"text": "This is example text. It contains multiple sentences.",
|
"text": "This is example text. It contains multiple sentences.\n",
|
||||||
"word_count": 8,
|
"word_count": 9,
|
||||||
"cut_type": "paragraph_end",
|
"cut_type": "paragraph_end",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"text": "This is a second paragraph. First two paragraphs are whole.",
|
"text": "This is a second paragraph. First two paragraphs are whole.\n",
|
||||||
"word_count": 10,
|
"word_count": 11,
|
||||||
"cut_type": "paragraph_end",
|
"cut_type": "paragraph_end",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -20,30 +20,30 @@ GROUND_TRUTH = {
|
||||||
],
|
],
|
||||||
"cut_text": [
|
"cut_text": [
|
||||||
{
|
{
|
||||||
"text": "This is example text. It contains multiple sentences.",
|
"text": "This is example text. It contains multiple sentences.\n",
|
||||||
"word_count": 8,
|
"word_count": 9,
|
||||||
"cut_type": "paragraph_end",
|
"cut_type": "paragraph_end",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"text": "This is a second paragraph. First two paragraphs are whole.",
|
"text": "This is a second paragraph. First two paragraphs are whole.\n",
|
||||||
"word_count": 10,
|
"word_count": 11,
|
||||||
"cut_type": "paragraph_end",
|
"cut_type": "paragraph_end",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"text": "Third paragraph is cut and is missing the dot at the end",
|
"text": "Third paragraph is cut and is missing the dot at the end",
|
||||||
"word_count": 12,
|
"word_count": 12,
|
||||||
"cut_type": "sentence_cut",
|
"cut_type": "word",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
INPUT_TEXT = {
|
INPUT_TEXT = {
|
||||||
"whole_text": """This is example text. It contains multiple sentences.
|
"whole_text": """This is example text. It contains multiple sentences.
|
||||||
This is a second paragraph. First two paragraphs are whole.
|
This is a second paragraph. First two paragraphs are whole.
|
||||||
Third paragraph is a bit longer and is finished with a dot.""",
|
Third paragraph is a bit longer and is finished with a dot.""",
|
||||||
"cut_text": """This is example text. It contains multiple sentences.
|
"cut_text": """This is example text. It contains multiple sentences.
|
||||||
This is a second paragraph. First two paragraphs are whole.
|
This is a second paragraph. First two paragraphs are whole.
|
||||||
Third paragraph is cut and is missing the dot at the end""",
|
Third paragraph is cut and is missing the dot at the end""",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue