update summary character limit (#1073)

* update * update tests
2025-11-18 17:16:02 -05:00 · 2025-11-18 17:16:02 -05:00 · d2654003ff
commit d2654003ff
parent 8b7ad6f84c
10 changed files with 91 additions and 87 deletions
--- a/examples/azure-openai/azure_openai_neo4j.py
+++ b/examples/azure-openai/azure_openai_neo4j.py
@ -40,8 +40,8 @@ from graphiti_core.nodes import EpisodeType
 # Configure logging
 logging.basicConfig(
    level=INFO,
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-    datefmt="%Y-%m-%d %H:%M:%S",
+    datefmt='%Y-%m-%d %H:%M:%S',
 )
 logger = logging.getLogger(__name__)
@ -49,20 +49,20 @@ load_dotenv()
 # Neo4j connection parameters
 # Make sure Neo4j Desktop is running with a local DBMS started
-neo4j_uri = os.environ.get("NEO4J_URI", "bolt://localhost:7687")
+neo4j_uri = os.environ.get('NEO4J_URI', 'bolt://localhost:7687')
-neo4j_user = os.environ.get("NEO4J_USER", "neo4j")
+neo4j_user = os.environ.get('NEO4J_USER', 'neo4j')
-neo4j_password = os.environ.get("NEO4J_PASSWORD", "password")
+neo4j_password = os.environ.get('NEO4J_PASSWORD', 'password')
 # Azure OpenAI connection parameters
-azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
+azure_endpoint = os.environ.get('AZURE_OPENAI_ENDPOINT')
-azure_api_key = os.environ.get("AZURE_OPENAI_API_KEY")
+azure_api_key = os.environ.get('AZURE_OPENAI_API_KEY')
-azure_deployment = os.environ.get("AZURE_OPENAI_DEPLOYMENT", "gpt-4.1")
+azure_deployment = os.environ.get('AZURE_OPENAI_DEPLOYMENT', 'gpt-4.1')
 azure_embedding_deployment = os.environ.get(
-    "AZURE_OPENAI_EMBEDDING_DEPLOYMENT", "text-embedding-3-small"
+    'AZURE_OPENAI_EMBEDDING_DEPLOYMENT', 'text-embedding-3-small'
 )
 if not azure_endpoint or not azure_api_key:
-    raise ValueError("AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY must be set")
+    raise ValueError('AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY must be set')
 async def main():
@ -76,7 +76,7 @@ async def main():
    # Initialize Azure OpenAI client
    azure_client = AsyncOpenAI(
-        base_url=f"{azure_endpoint}/openai/v1/",
+        base_url=f'{azure_endpoint}/openai/v1/',
        api_key=azure_api_key,
    )
@ -112,40 +112,40 @@ async def main():
        # Episodes list containing both text and JSON episodes
        episodes = [
            {
-                "content": "Kamala Harris is the Attorney General of California. She was previously "
+                'content': 'Kamala Harris is the Attorney General of California. She was previously '
-                "the district attorney for San Francisco.",
+                'the district attorney for San Francisco.',
-                "type": EpisodeType.text,
+                'type': EpisodeType.text,
-                "description": "podcast transcript",
+                'description': 'podcast transcript',
            },
            {
-                "content": "As AG, Harris was in office from January 3, 2011 – January 3, 2017",
+                'content': 'As AG, Harris was in office from January 3, 2011 – January 3, 2017',
-                "type": EpisodeType.text,
+                'type': EpisodeType.text,
-                "description": "podcast transcript",
+                'description': 'podcast transcript',
            },
            {
-                "content": {
+                'content': {
-                    "name": "Gavin Newsom",
+                    'name': 'Gavin Newsom',
-                    "position": "Governor",
+                    'position': 'Governor',
-                    "state": "California",
+                    'state': 'California',
-                    "previous_role": "Lieutenant Governor",
+                    'previous_role': 'Lieutenant Governor',
-                    "previous_location": "San Francisco",
+                    'previous_location': 'San Francisco',
                },
-                "type": EpisodeType.json,
+                'type': EpisodeType.json,
-                "description": "podcast metadata",
+                'description': 'podcast metadata',
            },
        ]
        # Add episodes to the graph
        for i, episode in enumerate(episodes):
            await graphiti.add_episode(
-                name=f"California Politics {i}",
+                name=f'California Politics {i}',
                episode_body=(
-                    episode["content"]
+                    episode['content']
-                    if isinstance(episode["content"], str)
+                    if isinstance(episode['content'], str)
-                    else json.dumps(episode["content"])
+                    else json.dumps(episode['content'])
                ),
-                source=episode["type"],
+                source=episode['type'],
-                source_description=episode["description"],
+                source_description=episode['description'],
                reference_time=datetime.now(timezone.utc),
            )
            print(f'Added episode: California Politics {i} ({episode["type"].value})')
@ -161,18 +161,18 @@ async def main():
        # Perform a hybrid search combining semantic similarity and BM25 retrieval
        print("\nSearching for: 'Who was the California Attorney General?'")
-        results = await graphiti.search("Who was the California Attorney General?")
+        results = await graphiti.search('Who was the California Attorney General?')
        # Print search results
-        print("\nSearch Results:")
+        print('\nSearch Results:')
        for result in results:
-            print(f"UUID: {result.uuid}")
+            print(f'UUID: {result.uuid}')
-            print(f"Fact: {result.fact}")
+            print(f'Fact: {result.fact}')
-            if hasattr(result, "valid_at") and result.valid_at:
+            if hasattr(result, 'valid_at') and result.valid_at:
-                print(f"Valid from: {result.valid_at}")
+                print(f'Valid from: {result.valid_at}')
-            if hasattr(result, "invalid_at") and result.invalid_at:
+            if hasattr(result, 'invalid_at') and result.invalid_at:
-                print(f"Valid until: {result.invalid_at}")
+                print(f'Valid until: {result.invalid_at}')
-            print("---")
+            print('---')
        #################################################
        # CENTER NODE SEARCH
@ -187,26 +187,26 @@ async def main():
            # Get the source node UUID from the top result
            center_node_uuid = results[0].source_node_uuid
-            print("\nReranking search results based on graph distance:")
+            print('\nReranking search results based on graph distance:')
-            print(f"Using center node UUID: {center_node_uuid}")
+            print(f'Using center node UUID: {center_node_uuid}')
            reranked_results = await graphiti.search(
-                "Who was the California Attorney General?",
+                'Who was the California Attorney General?',
                center_node_uuid=center_node_uuid,
            )
            # Print reranked search results
-            print("\nReranked Search Results:")
+            print('\nReranked Search Results:')
            for result in reranked_results:
-                print(f"UUID: {result.uuid}")
+                print(f'UUID: {result.uuid}')
-                print(f"Fact: {result.fact}")
+                print(f'Fact: {result.fact}')
-                if hasattr(result, "valid_at") and result.valid_at:
+                if hasattr(result, 'valid_at') and result.valid_at:
-                    print(f"Valid from: {result.valid_at}")
+                    print(f'Valid from: {result.valid_at}')
-                if hasattr(result, "invalid_at") and result.invalid_at:
+                if hasattr(result, 'invalid_at') and result.invalid_at:
-                    print(f"Valid until: {result.invalid_at}")
+                    print(f'Valid until: {result.invalid_at}')
-                print("---")
+                print('---')
        else:
-            print("No results found in the initial search to use as center node.")
+            print('No results found in the initial search to use as center node.')
    finally:
        #################################################
@ -218,8 +218,8 @@ async def main():
        # Close the connection
        await graphiti.close()
-        print("\nConnection closed")
+        print('\nConnection closed')
-if __name__ == "__main__":
+if __name__ == '__main__':
    asyncio.run(main())
--- a/graphiti_core/driver/neo4j_driver.py
+++ b/graphiti_core/driver/neo4j_driver.py
@ -106,7 +106,7 @@ class Neo4jDriver(GraphDriver):
                for query in index_queries
            ]
        )
-    
+
    async def health_check(self) -> None:
        """Check Neo4j connectivity by running the driver's verify_connectivity method."""
        try:
--- a/graphiti_core/embedder/azure_openai.py
+++ b/graphiti_core/embedder/azure_openai.py
@ -33,7 +33,7 @@ class AzureOpenAIEmbedderClient(EmbedderClient):
    def __init__(
        self,
        azure_client: AsyncAzureOpenAI | AsyncOpenAI,
-        model: str = "text-embedding-3-small",
+        model: str = 'text-embedding-3-small',
    ):
        self.azure_client = azure_client
        self.model = model
@ -44,22 +44,18 @@ class AzureOpenAIEmbedderClient(EmbedderClient):
            # Handle different input types
            if isinstance(input_data, str):
                text_input = [input_data]
-            elif isinstance(input_data, list) and all(
+            elif isinstance(input_data, list) and all(isinstance(item, str) for item in input_data):
                isinstance(item, str) for item in input_data
            ):
                text_input = input_data
            else:
                # Convert to string list for other types
                text_input = [str(input_data)]
-            response = await self.azure_client.embeddings.create(
+            response = await self.azure_client.embeddings.create(model=self.model, input=text_input)
                model=self.model, input=text_input
            )
            # Return the first embedding as a list of floats
            return response.data[0].embedding
        except Exception as e:
-            logger.error(f"Error in Azure OpenAI embedding: {e}")
+            logger.error(f'Error in Azure OpenAI embedding: {e}')
            raise
    async def create_batch(self, input_data_list: list[str]) -> list[list[float]]:
@ -71,5 +67,5 @@ class AzureOpenAIEmbedderClient(EmbedderClient):
            return [embedding.embedding for embedding in response.data]
        except Exception as e:
-            logger.error(f"Error in Azure OpenAI batch embedding: {e}")
+            logger.error(f'Error in Azure OpenAI batch embedding: {e}')
            raise
--- a/graphiti_core/llm_client/azure_openai_client.py
+++ b/graphiti_core/llm_client/azure_openai_client.py
@ -66,21 +66,21 @@ class AzureOpenAILLMClient(BaseOpenAIClient):
        """Create a structured completion using Azure OpenAI's responses.parse API."""
        supports_reasoning = self._supports_reasoning_features(model)
        request_kwargs = {
-            "model": model,
+            'model': model,
-            "input": messages,
+            'input': messages,
-            "max_output_tokens": max_tokens,
+            'max_output_tokens': max_tokens,
-            "text_format": response_model,  # type: ignore
+            'text_format': response_model,  # type: ignore
        }
        temperature_value = temperature if not supports_reasoning else None
        if temperature_value is not None:
-            request_kwargs["temperature"] = temperature_value
+            request_kwargs['temperature'] = temperature_value
        if supports_reasoning and reasoning:
-            request_kwargs["reasoning"] = {"effort": reasoning}  # type: ignore
+            request_kwargs['reasoning'] = {'effort': reasoning}  # type: ignore
        if supports_reasoning and verbosity:
-            request_kwargs["text"] = {"verbosity": verbosity}  # type: ignore
+            request_kwargs['text'] = {'verbosity': verbosity}  # type: ignore
        return await self.client.responses.parse(**request_kwargs)
@ -96,20 +96,20 @@ class AzureOpenAILLMClient(BaseOpenAIClient):
        supports_reasoning = self._supports_reasoning_features(model)
        request_kwargs = {
-            "model": model,
+            'model': model,
-            "messages": messages,
+            'messages': messages,
-            "max_tokens": max_tokens,
+            'max_tokens': max_tokens,
-            "response_format": {"type": "json_object"},
+            'response_format': {'type': 'json_object'},
        }
        temperature_value = temperature if not supports_reasoning else None
        if temperature_value is not None:
-            request_kwargs["temperature"] = temperature_value
+            request_kwargs['temperature'] = temperature_value
        return await self.client.chat.completions.create(**request_kwargs)
    @staticmethod
    def _supports_reasoning_features(model: str) -> bool:
        """Return True when the Azure model supports reasoning/verbosity options."""
-        reasoning_prefixes = ("o1", "o3", "gpt-5")
+        reasoning_prefixes = ('o1', 'o3', 'gpt-5')
        return model.startswith(reasoning_prefixes)
--- a/graphiti_core/llm_client/openai_base_client.py
+++ b/graphiti_core/llm_client/openai_base_client.py
@ -166,13 +166,17 @@ class BaseOpenAIClient(LLMClient):
        except openai.RateLimitError as e:
            raise RateLimitError from e
        except openai.AuthenticationError as e:
-            logger.error(f'OpenAI Authentication Error: {e}. Please verify your API key is correct.')
+            logger.error(
                f'OpenAI Authentication Error: {e}. Please verify your API key is correct.'
            )
            raise
        except Exception as e:
            # Provide more context for connection errors
            error_msg = str(e)
            if 'Connection error' in error_msg or 'connection' in error_msg.lower():
-                logger.error(f'Connection error communicating with OpenAI API. Please check your network connection and API key. Error: {e}')
+                logger.error(
                    f'Connection error communicating with OpenAI API. Please check your network connection and API key. Error: {e}'
                )
            else:
                logger.error(f'Error in generating LLM response: {e}')
            raise
--- a/graphiti_core/llm_client/openai_client.py
+++ b/graphiti_core/llm_client/openai_client.py
@ -74,7 +74,9 @@ class OpenAIClient(BaseOpenAIClient):
    ):
        """Create a structured completion using OpenAI's beta parse API."""
        # Reasoning models (gpt-5 family) don't support temperature
-        is_reasoning_model = model.startswith('gpt-5') or model.startswith('o1') or model.startswith('o3')
+        is_reasoning_model = (
            model.startswith('gpt-5') or model.startswith('o1') or model.startswith('o3')
        )
        response = await self.client.responses.parse(
            model=model,
@ -100,7 +102,9 @@ class OpenAIClient(BaseOpenAIClient):
    ):
        """Create a regular completion with JSON format."""
        # Reasoning models (gpt-5 family) don't support temperature
-        is_reasoning_model = model.startswith('gpt-5') or model.startswith('o1') or model.startswith('o3')
+        is_reasoning_model = (
            model.startswith('gpt-5') or model.startswith('o1') or model.startswith('o3')
        )
        return await self.client.chat.completions.create(
            model=model,
--- a/graphiti_core/utils/text_utils.py
+++ b/graphiti_core/utils/text_utils.py
@ -17,7 +17,7 @@ limitations under the License.
 import re
 # Maximum length for entity/node summaries
-MAX_SUMMARY_CHARS = 250
+MAX_SUMMARY_CHARS = 500
 def truncate_at_sentence(text: str, max_chars: int) -> str:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,7 +1,7 @@
 [project]
 name = "graphiti-core"
 description = "A temporal graph building library"
-version = "0.24.0"
+version = "0.24.1"
 authors = [
    { name = "Paul Paliychuk", email = "paul@getzep.com" },
    { name = "Preston Rasmussen", email = "preston@getzep.com" },
--- a/tests/test_text_utils.py
+++ b/tests/test_text_utils.py
@ -87,7 +87,7 @@ def test_truncate_at_sentence_strips_trailing_whitespace():
 def test_max_summary_chars_constant():
    """Test that MAX_SUMMARY_CHARS is set to expected value."""
-    assert MAX_SUMMARY_CHARS == 250
+    assert MAX_SUMMARY_CHARS == 500
 def test_truncate_at_sentence_realistic_summary():
--- a/uv.lock
+++ b/uv.lock
@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.10, <4"
 resolution-markers = [
    "python_full_version >= '3.14'",
@ -808,7 +808,7 @@ wheels = [
 [[package]]
 name = "graphiti-core"
-version = "0.24.0"
+version = "0.24.1"
 source = { editable = "." }
 dependencies = [
    { name = "diskcache" },