feat: parallel tool calling in model settings [LET-6239] (#6262)

* parallel tool calling in model settings * configs for send message sdk v1 * change models for all tests --------- Co-authored-by: Ari Webb <ari@letta.com>
2025-11-20 13:00:31 -08:00
parent 2d1667a97c
commit d417870537
7 changed files with 28 additions and 26 deletions
--- a/.github/workflows/send-message-integration-tests.yml
+++ b/.github/workflows/send-message-integration-tests.yml
@@ -37,12 +37,10 @@ jobs:
          "matrix": {
            "config_file": [
              "openai-gpt-4o-mini.json",
+              "openai-gpt-4.1.json",
+              "openai-gpt-5.json",
              "claude-4-5-sonnet.json",
-              "claude-4-sonnet-extended.json",
-              "claude-3-7-sonnet-extended.json",
-              "gemini-1.5-pro.json",
              "gemini-2.5-pro.json",
-              "gemini-2.5-flash.json"
            ]
          }
        }
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -23748,8 +23748,7 @@
              }
            ],
            "title": "Parallel Tool Calls",
-            "description": "Deprecated: Use `model` field to configure parallel tool calls instead. If set to True, enables parallel tool calling.",
-            "default": false,
+            "description": "Deprecated: Use `model_settings` to configure parallel tool calls instead. If set to True, enables parallel tool calling.",
            "deprecated": true
          }
        },
@@ -27948,8 +27947,7 @@
              }
            ],
            "title": "Parallel Tool Calls",
-            "description": "Deprecated: Use `model` field to configure parallel tool calls instead. If set to True, enables parallel tool calling.",
-            "default": false,
+            "description": "Deprecated: Use `model_settings` to configure parallel tool calls instead. If set to True, enables parallel tool calling.",
            "deprecated": true
          },
          "deployment_id": {
@@ -28703,8 +28701,9 @@
              }
            ],
            "title": "Parallel Tool Calls",
-            "description": "If set to True, enables parallel tool calling. Defaults to False.",
-            "default": false
+            "description": "Deprecated: Use model_settings to configure parallel tool calls instead. If set to True, enables parallel tool calling. Defaults to False.",
+            "default": false,
+            "deprecated": true
          }
        },
        "type": "object",
@@ -36493,8 +36492,7 @@
              }
            ],
            "title": "Parallel Tool Calls",
-            "description": "Deprecated: Use `model` field to configure parallel tool calls instead. If set to True, enables parallel tool calling.",
-            "default": false,
+            "description": "Deprecated: Use `model_settings` to configure parallel tool calls instead. If set to True, enables parallel tool calling.",
            "deprecated": true
          },
          "response_format": {
@@ -37937,8 +37935,7 @@
              }
            ],
            "title": "Parallel Tool Calls",
-            "description": "Deprecated: Use `model` field to configure parallel tool calls instead. If set to True, enables parallel tool calling.",
-            "default": false,
+            "description": "Deprecated: Use `model_settings` to configure parallel tool calls instead. If set to True, enables parallel tool calling.",
            "deprecated": true
          },
          "id": {
--- a/letta/schemas/agent.py
+++ b/letta/schemas/agent.py
@@ -315,8 +315,8 @@ class CreateAgent(BaseModel, validate_assignment=True):  #
        deprecated=True,
    )
    parallel_tool_calls: Optional[bool] = Field(
-        False,
-        description="Deprecated: Use `model` field to configure parallel tool calls instead. If set to True, enables parallel tool calling.",
+        None,
+        description="Deprecated: Use `model_settings` to configure parallel tool calls instead. If set to True, enables parallel tool calling.",
        deprecated=True,
    )

@@ -441,8 +441,8 @@ class UpdateAgent(BaseModel):
    )
    embedding_config: Optional[EmbeddingConfig] = Field(None, description="The embedding configuration used by the agent.")
    parallel_tool_calls: Optional[bool] = Field(
-        False,
-        description="Deprecated: Use `model` field to configure parallel tool calls instead. If set to True, enables parallel tool calling.",
+        None,
+        description="Deprecated: Use `model_settings` to configure parallel tool calls instead. If set to True, enables parallel tool calling.",
        deprecated=True,
    )
    response_format: Optional[ResponseFormatUnion] = Field(
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -90,7 +90,11 @@ class LLMConfig(BaseModel):

    # FIXME hack to silence pydantic protected namespace warning
    model_config = ConfigDict(protected_namespaces=())
-    parallel_tool_calls: Optional[bool] = Field(False, description="If set to True, enables parallel tool calling. Defaults to False.")
+    parallel_tool_calls: Optional[bool] = Field(
+        False,
+        description="Deprecated: Use model_settings to configure parallel tool calls instead. If set to True, enables parallel tool calling. Defaults to False.",
+        deprecated=True,
+    )

    @model_validator(mode="before")
    @classmethod
--- a/letta/schemas/model.py
+++ b/letta/schemas/model.py
@@ -354,6 +354,7 @@ class GroqModelSettings(ModelSettings):
            "temperature": self.temperature,
            "max_tokens": self.max_output_tokens,
            "response_format": self.response_format,
+            "parallel_tool_calls": self.parallel_tool_calls,
        }


--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -461,6 +461,11 @@ class SyncServer(object):
                    f"LLM config handle {request.llm_config.handle} does not match request handle {request.model}"
                )

+        # update with model_settings
+        if request.model_settings is not None:
+            update_llm_config_params = request.model_settings._to_legacy_config_params()
+            request.llm_config = request.llm_config.model_copy(update=update_llm_config_params)
+
        # Copy parallel_tool_calls from request to llm_config if provided
        if request.parallel_tool_calls is not None:
            request.llm_config.parallel_tool_calls = request.parallel_tool_calls
--- a/tests/sdk_v1/integration/integration_test_send_message_v2.py
+++ b/tests/sdk_v1/integration/integration_test_send_message_v2.py
@@ -644,17 +644,14 @@ async def test_parallel_tool_calls(
    if provider_type in ["google_ai", "google_vertex"]:
        pytest.skip("Gemini models are flaky for this test so we disable them for now")

-    # # Update model_settings to enable parallel tool calling
-    # modified_model_settings = model_settings.copy()
-    # modified_model_settings["parallel_tool_calls"] = True
+    # Update model_settings to enable parallel tool calling
+    modified_model_settings = model_settings.copy()
+    modified_model_settings["parallel_tool_calls"] = True

-    # IMPORTANT: Set parallel_tool_calls at BOTH the agent level and in model_settings
-    # Even though the agent-level parameter is deprecated, it may still be needed
    agent_state = await client.agents.update(
        agent_id=agent_state.id,
        model=model_handle,
-        model_settings=model_settings,
-        parallel_tool_calls=True,  # Set at agent level as well
+        model_settings=modified_model_settings,
    )

    if send_type == "step":