diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py index b3ab4148..7319f7fc 100644 --- a/letta/llm_api/google_vertex_client.py +++ b/letta/llm_api/google_vertex_client.py @@ -235,6 +235,8 @@ class GoogleVertexClient(GoogleAIClient): ) except json.decoder.JSONDecodeError: + if candidate.finish_reason == "MAX_TOKENS": + raise ValueError(f"Could not parse response data from LLM: exceeded max token limit") # Inner thoughts are the content by default inner_thoughts = response_message.text