feat: migrate integration tests to use model_settings (#6125)

* migration integration tests to use model_settings * updated configs * update v2 tests * revert files * add some configs and fix tests * fix conversation_search --------- Co-authored-by: Ari Webb <ari@letta.com>
2025-11-19 11:21:09 -08:00
parent f9b3978460
commit ce3add4f55
23 changed files with 675 additions and 398 deletions
--- a/tests/sdk_v1/integration/integration_test_send_message.py
+++ b/tests/sdk_v1/integration/integration_test_send_message.py
--- a/tests/sdk_v1/integration/integration_test_send_message_v2.py
+++ b/tests/sdk_v1/integration/integration_test_send_message_v2.py
@@ -16,8 +16,6 @@ from letta_client.types import AgentState, MessageCreateParam, ToolReturnMessage
 from letta_client.types.agents import AssistantMessage, ReasoningMessage, Run, ToolCallMessage, UserMessage
 from letta_client.types.agents.letta_streaming_response import LettaPing, LettaStopReason, LettaUsageStatistics

-from letta.schemas.llm_config import LLMConfig
-
 logger = logging.getLogger(__name__)


@@ -35,17 +33,17 @@ all_configs = [
 ]


-def get_llm_config(filename: str, llm_config_dir: str = "tests/configs/llm_model_configs") -> LLMConfig:
-    filename = os.path.join(llm_config_dir, filename)
+def get_model_config(filename: str, model_settings_dir: str = "tests/sdk_v1/model_settings") -> Tuple[str, dict]:
+    """Load a model_settings file and return the handle and settings dict."""
+    filename = os.path.join(model_settings_dir, filename)
    with open(filename, "r") as f:
        config_data = json.load(f)
-    llm_config = LLMConfig(**config_data)
-    return llm_config
+    return config_data["handle"], config_data.get("model_settings", {})


 requested = os.getenv("LLM_CONFIG_FILE")
 filenames = [requested] if requested else all_configs
-TESTED_LLM_CONFIGS: List[LLMConfig] = [get_llm_config(fn) for fn in filenames]
+TESTED_MODEL_CONFIGS: List[Tuple[str, dict]] = [get_model_config(fn) for fn in filenames]


 def roll_dice(num_sides: int) -> int:
@@ -91,7 +89,8 @@ USER_MESSAGE_PARALLEL_TOOL_CALL: List[MessageCreateParam] = [

 def assert_greeting_response(
    messages: List[Any],
-    llm_config: LLMConfig,
+    model_handle: str,
+    model_settings: dict,
    streaming: bool = False,
    token_streaming: bool = False,
    from_db: bool = False,
@@ -106,7 +105,7 @@ def assert_greeting_response(
    ]

    expected_message_count_min, expected_message_count_max = get_expected_message_count_range(
-        llm_config, streaming=streaming, from_db=from_db
+        model_handle, model_settings, streaming=streaming, from_db=from_db
    )
    assert expected_message_count_min <= len(messages) <= expected_message_count_max

@@ -120,7 +119,7 @@ def assert_greeting_response(
    # Reasoning message if reasoning enabled
    otid_suffix = 0
    try:
-        if is_reasoner_model(llm_config):
+        if is_reasoner_model(model_handle, model_settings):
            assert isinstance(messages[index], ReasoningMessage)
            assert messages[index].otid and messages[index].otid[-1] == str(otid_suffix)
            index += 1
@@ -151,7 +150,8 @@ def assert_greeting_response(

 def assert_tool_call_response(
    messages: List[Any],
-    llm_config: LLMConfig,
+    model_handle: str,
+    model_settings: dict,
    streaming: bool = False,
    from_db: bool = False,
    with_cancellation: bool = False,
@@ -172,7 +172,7 @@ def assert_tool_call_response(

    if not with_cancellation:
        expected_message_count_min, expected_message_count_max = get_expected_message_count_range(
-            llm_config, tool_call=True, streaming=streaming, from_db=from_db
+            model_handle, model_settings, tool_call=True, streaming=streaming, from_db=from_db
        )
        assert expected_message_count_min <= len(messages) <= expected_message_count_max

@@ -190,7 +190,7 @@ def assert_tool_call_response(
    # Reasoning message if reasoning enabled
    otid_suffix = 0
    try:
-        if is_reasoner_model(llm_config):
+        if is_reasoner_model(model_handle, model_settings):
            assert isinstance(messages[index], ReasoningMessage)
            assert messages[index].otid and messages[index].otid[-1] == str(otid_suffix)
            index += 1
@@ -201,7 +201,7 @@ def assert_tool_call_response(

    # Special case for claude-sonnet-4-5-20250929 and opus-4.1 which can generate an extra AssistantMessage before tool call
    if (
-        (llm_config.model == "claude-sonnet-4-5-20250929" or llm_config.model.startswith("claude-opus-4-1"))
+        ("claude-sonnet-4-5-20250929" in model_handle or "claude-opus-4-1" in model_handle)
        and index < len(messages)
        and isinstance(messages[index], AssistantMessage)
    ):
@@ -235,7 +235,7 @@ def assert_tool_call_response(
        # Reasoning message if reasoning enabled
        otid_suffix = 0
        try:
-            if is_reasoner_model(llm_config):
+            if is_reasoner_model(model_handle, model_settings):
                assert isinstance(messages[index], ReasoningMessage)
                assert messages[index].otid and messages[index].otid[-1] == str(otid_suffix)
                index += 1
@@ -373,7 +373,7 @@ async def wait_for_run_completion(client: AsyncLetta, run_id: str, timeout: floa


 def get_expected_message_count_range(
-    llm_config: LLMConfig, tool_call: bool = False, streaming: bool = False, from_db: bool = False
+    model_handle: str, model_settings: dict, tool_call: bool = False, streaming: bool = False, from_db: bool = False
 ) -> Tuple[int, int]:
    """
    Returns the expected range of number of messages for a given LLM configuration. Uses range to account for possible variations in the number of reasoning messages.
@@ -402,23 +402,26 @@ def get_expected_message_count_range(
    expected_message_count = 1
    expected_range = 0

-    if is_reasoner_model(llm_config):
+    if is_reasoner_model(model_handle, model_settings):
        # reasoning message
        expected_range += 1
        if tool_call:
            # check for sonnet 4.5 or opus 4.1 specifically
            is_sonnet_4_5_or_opus_4_1 = (
-                llm_config.model_endpoint_type == "anthropic"
-                and llm_config.enable_reasoner
-                and (llm_config.model.startswith("claude-sonnet-4-5") or llm_config.model.startswith("claude-opus-4-1"))
+                model_settings.get("provider_type") == "anthropic"
+                and model_settings.get("thinking", {}).get("type") == "enabled"
+                and ("claude-sonnet-4-5" in model_handle or "claude-opus-4-1" in model_handle)
            )
-            if is_sonnet_4_5_or_opus_4_1 or not LLMConfig.is_anthropic_reasoning_model(llm_config):
+            is_anthropic_reasoning = (
+                model_settings.get("provider_type") == "anthropic" and model_settings.get("thinking", {}).get("type") == "enabled"
+            )
+            if is_sonnet_4_5_or_opus_4_1 or not is_anthropic_reasoning:
                # sonnet 4.5 and opus 4.1 return a reasoning message before the final assistant message
                # so do the other native reasoning models
                expected_range += 1

            # opus 4.1 generates an extra AssistantMessage before the tool call
-            if llm_config.model.startswith("claude-opus-4-1"):
+            if "claude-opus-4-1" in model_handle:
                expected_range += 1

    if tool_call:
@@ -436,13 +439,34 @@ def get_expected_message_count_range(
    return expected_message_count, expected_message_count + expected_range


-def is_reasoner_model(llm_config: LLMConfig) -> bool:
-    return (
-        (LLMConfig.is_openai_reasoning_model(llm_config) and llm_config.reasoning_effort == "high")
-        or LLMConfig.is_anthropic_reasoning_model(llm_config)
-        or LLMConfig.is_google_vertex_reasoning_model(llm_config)
-        or LLMConfig.is_google_ai_reasoning_model(llm_config)
+def is_reasoner_model(model_handle: str, model_settings: dict) -> bool:
+    """Check if the model is a reasoning model based on its handle and settings."""
+    # OpenAI reasoning models with high reasoning effort
+    is_openai_reasoning = (
+        model_settings.get("provider_type") == "openai"
+        and (
+            "gpt-5" in model_handle
+            or "o1" in model_handle
+            or "o3" in model_handle
+            or "o4-mini" in model_handle
+            or "gpt-4.1" in model_handle
+        )
+        and model_settings.get("reasoning", {}).get("reasoning_effort") == "high"
    )
+    # Anthropic models with thinking enabled
+    is_anthropic_reasoning = (
+        model_settings.get("provider_type") == "anthropic" and model_settings.get("thinking", {}).get("type") == "enabled"
+    )
+    # Google Vertex models with thinking config
+    is_google_vertex_reasoning = (
+        model_settings.get("provider_type") == "google_vertex" and model_settings.get("thinking_config", {}).get("include_thoughts") is True
+    )
+    # Google AI models with thinking config
+    is_google_ai_reasoning = (
+        model_settings.get("provider_type") == "google_ai" and model_settings.get("thinking_config", {}).get("include_thoughts") is True
+    )
+
+    return is_openai_reasoning or is_anthropic_reasoning or is_google_vertex_reasoning or is_google_ai_reasoning


 # ------------------------------
@@ -524,9 +548,9 @@ async def agent_state(client: AsyncLetta) -> AgentState:


@pytest.mark.parametrize(
-    "llm_config",
-    TESTED_LLM_CONFIGS,
-    ids=[c.model for c in TESTED_LLM_CONFIGS],
+    "model_config",
+    TESTED_MODEL_CONFIGS,
+    ids=[handle for handle, _ in TESTED_MODEL_CONFIGS],
 )
@pytest.mark.parametrize("send_type", ["step", "stream_steps", "stream_tokens", "stream_tokens_background", "async"])
@pytest.mark.asyncio(loop_scope="function")
@@ -534,12 +558,13 @@ async def test_greeting(
    disable_e2b_api_key: Any,
    client: AsyncLetta,
    agent_state: AgentState,
-    llm_config: LLMConfig,
+    model_config: Tuple[str, dict],
    send_type: str,
 ) -> None:
+    model_handle, model_settings = model_config
    last_message_page = await client.agents.messages.list(agent_id=agent_state.id, limit=1)
    last_message = last_message_page.items[0] if last_message_page.items else None
-    agent_state = await client.agents.update(agent_id=agent_state.id, llm_config=llm_config)
+    agent_state = await client.agents.update(agent_id=agent_state.id, model=model_handle, model_settings=model_settings)

    if send_type == "step":
        response = await client.agents.messages.create(
@@ -573,19 +598,19 @@ async def test_greeting(
        run_id = runs.items[0].id if runs.items else None

    assert_greeting_response(
-        messages, streaming=("stream" in send_type), token_streaming=(send_type == "stream_tokens"), llm_config=llm_config
+        messages, model_handle, model_settings, streaming=("stream" in send_type), token_streaming=(send_type == "stream_tokens")
    )

    if "background" in send_type:
        response = await client.runs.messages.stream(run_id=run_id, starting_after=0)
        messages = await accumulate_chunks(response)
        assert_greeting_response(
-            messages, streaming=("stream" in send_type), token_streaming=(send_type == "stream_tokens"), llm_config=llm_config
+            messages, model_handle, model_settings, streaming=("stream" in send_type), token_streaming=(send_type == "stream_tokens")
        )

    messages_from_db_page = await client.agents.messages.list(agent_id=agent_state.id, after=last_message.id if last_message else None)
    messages_from_db = messages_from_db_page.items
-    assert_greeting_response(messages_from_db, from_db=True, llm_config=llm_config)
+    assert_greeting_response(messages_from_db, model_handle, model_settings, from_db=True)

    assert run_id is not None
    run = await client.runs.retrieve(run_id=run_id)
@@ -593,9 +618,9 @@ async def test_greeting(


@pytest.mark.parametrize(
-    "llm_config",
-    TESTED_LLM_CONFIGS,
-    ids=[c.model for c in TESTED_LLM_CONFIGS],
+    "model_config",
+    TESTED_MODEL_CONFIGS,
+    ids=[handle for handle, _ in TESTED_MODEL_CONFIGS],
 )
@pytest.mark.parametrize("send_type", ["step", "stream_steps", "stream_tokens", "stream_tokens_background", "async"])
@pytest.mark.asyncio(loop_scope="function")
@@ -603,28 +628,33 @@ async def test_parallel_tool_calls(
    disable_e2b_api_key: Any,
    client: AsyncLetta,
    agent_state: AgentState,
-    llm_config: LLMConfig,
+    model_config: Tuple[str, dict],
    send_type: str,
 ) -> None:
-    if llm_config.model_endpoint_type not in ["anthropic", "openai", "google_ai", "google_vertex"]:
+    model_handle, model_settings = model_config
+    provider_type = model_settings.get("provider_type", "")
+
+    if provider_type not in ["anthropic", "openai", "google_ai", "google_vertex"]:
        pytest.skip("Parallel tool calling test only applies to Anthropic, OpenAI, and Gemini models.")

-    if llm_config.model in ["gpt-5", "o3"]:
+    if "gpt-5" in model_handle or "o3" in model_handle:
        pytest.skip("GPT-5 takes too long to test, o3 is bad at this task.")

-    # change llm_config to support parallel tool calling
-    # Create a copy and modify it to ensure we're not modifying the original
-    modified_llm_config = llm_config.model_copy(deep=True)
-    modified_llm_config.parallel_tool_calls = True
-    # this test was flaking so set temperature to 0.0 to avoid randomness
-    modified_llm_config.temperature = 0.0
+    # Skip Gemini models due to issues with parallel tool calling
+    if provider_type in ["google_ai", "google_vertex"]:
+        pytest.skip("Gemini models are flaky for this test so we disable them for now")

-    # IMPORTANT: Set parallel_tool_calls at BOTH the agent level and llm_config level
-    # There are two different parallel_tool_calls fields that need to be set
+    # # Update model_settings to enable parallel tool calling
+    # modified_model_settings = model_settings.copy()
+    # modified_model_settings["parallel_tool_calls"] = True
+
+    # IMPORTANT: Set parallel_tool_calls at BOTH the agent level and in model_settings
+    # Even though the agent-level parameter is deprecated, it may still be needed
    agent_state = await client.agents.update(
        agent_id=agent_state.id,
-        llm_config=modified_llm_config,
-        parallel_tool_calls=True,  # Set at agent level as well!
+        model=model_handle,
+        model_settings=model_settings,
+        parallel_tool_calls=True,  # Set at agent level as well
    )

    if send_type == "step":
@@ -696,7 +726,7 @@ async def test_parallel_tool_calls(
    # IMPORTANT: Assert that parallel tool calling is actually working
    # This test should FAIL if parallel tool calling is not working properly
    assert is_parallel, (
-        f"Parallel tool calling is NOT working for {llm_config.model_endpoint_type}! "
+        f"Parallel tool calling is NOT working for {provider_type}! "
        f"Got {len(tool_call_messages)} ToolCallMessage(s) instead of 1 with 3 parallel calls. "
        f"When using letta_v1_agent with parallel_tool_calls=True, all tool calls should be in a single message."
    )
@@ -773,9 +803,9 @@ async def test_parallel_tool_calls(


@pytest.mark.parametrize(
-    "llm_config",
-    TESTED_LLM_CONFIGS,
-    ids=[c.model for c in TESTED_LLM_CONFIGS],
+    "model_config",
+    TESTED_MODEL_CONFIGS,
+    ids=[handle for handle, _ in TESTED_MODEL_CONFIGS],
 )
@pytest.mark.parametrize(
    ["send_type", "cancellation"],
@@ -796,20 +826,22 @@ async def test_tool_call(
    disable_e2b_api_key: Any,
    client: AsyncLetta,
    agent_state: AgentState,
-    llm_config: LLMConfig,
+    model_config: Tuple[str, dict],
    send_type: str,
    cancellation: str,
 ) -> None:
+    model_handle, model_settings = model_config
+
    # Skip models with OTID mismatch issues between ToolCallMessage and ToolReturnMessage
-    if llm_config.model == "gpt-5" or llm_config.model == "claude-sonnet-4-5-20250929" or llm_config.model.startswith("claude-opus-4-1"):
-        pytest.skip(f"Skipping {llm_config.model} due to OTID chain issue - messages receive incorrect OTID suffixes")
+    if "gpt-5" in model_handle or "claude-sonnet-4-5-20250929" in model_handle or "claude-opus-4-1" in model_handle:
+        pytest.skip(f"Skipping {model_handle} due to OTID chain issue - messages receive incorrect OTID suffixes")

    last_message_page = await client.agents.messages.list(agent_id=agent_state.id, limit=1)
    last_message = last_message_page.items[0] if last_message_page.items else None
-    agent_state = await client.agents.update(agent_id=agent_state.id, llm_config=llm_config)
+    agent_state = await client.agents.update(agent_id=agent_state.id, model=model_handle, model_settings=model_settings)

    if cancellation == "with_cancellation":
-        delay = 5 if llm_config.model == "gpt-5" else 0.5  # increase delay for responses api
+        delay = 5 if "gpt-5" in model_handle else 0.5  # increase delay for responses api
        _cancellation_task = asyncio.create_task(cancel_run_after_delay(client, agent_state.id, delay=delay))

    if send_type == "step":
@@ -844,20 +876,24 @@ async def test_tool_call(
        run_id = runs.items[0].id if runs.items else None

    assert_tool_call_response(
-        messages, streaming=("stream" in send_type), llm_config=llm_config, with_cancellation=(cancellation == "with_cancellation")
+        messages, model_handle, model_settings, streaming=("stream" in send_type), with_cancellation=(cancellation == "with_cancellation")
    )

    if "background" in send_type:
        response = await client.runs.messages.stream(run_id=run_id, starting_after=0)
        messages = await accumulate_chunks(response)
        assert_tool_call_response(
-            messages, streaming=("stream" in send_type), llm_config=llm_config, with_cancellation=(cancellation == "with_cancellation")
+            messages,
+            model_handle,
+            model_settings,
+            streaming=("stream" in send_type),
+            with_cancellation=(cancellation == "with_cancellation"),
        )

    messages_from_db_page = await client.agents.messages.list(agent_id=agent_state.id, after=last_message.id if last_message else None)
    messages_from_db = messages_from_db_page.items
    assert_tool_call_response(
-        messages_from_db, from_db=True, llm_config=llm_config, with_cancellation=(cancellation == "with_cancellation")
+        messages_from_db, model_handle, model_settings, from_db=True, with_cancellation=(cancellation == "with_cancellation")
    )

    assert run_id is not None
--- a/tests/sdk_v1/model_settings/azure-gpt-4o-mini.json
+++ b/tests/sdk_v1/model_settings/azure-gpt-4o-mini.json
@@ -0,0 +1,9 @@
+{
+  "handle": "azure/gpt-4o-mini",
+  "model_settings": {
+    "provider_type": "azure",
+    "temperature": 0.7,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false
+  }
+}
--- a/tests/sdk_v1/model_settings/bedrock-claude-4-sonnet.json
+++ b/tests/sdk_v1/model_settings/bedrock-claude-4-sonnet.json
@@ -0,0 +1,9 @@
+{
+  "handle": "bedrock/arn:aws:bedrock:us-east-1:474668403324:inference-profile/us.anthropic.claude-sonnet-4-20250514-v1:0",
+  "model_settings": {
+    "provider_type": "bedrock",
+    "temperature": 0.7,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false
+  }
+}
--- a/tests/sdk_v1/model_settings/claude-3-5-sonnet.json
+++ b/tests/sdk_v1/model_settings/claude-3-5-sonnet.json
@@ -0,0 +1,13 @@
+{
+  "handle": "anthropic/claude-3-5-sonnet-20241022",
+  "model_settings": {
+    "provider_type": "anthropic",
+    "temperature": 1.0,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false,
+    "thinking": {
+      "type": "enabled",
+      "budget_tokens": 1024
+    }
+  }
+}
--- a/tests/sdk_v1/model_settings/claude-3-7-sonnet-extended.json
+++ b/tests/sdk_v1/model_settings/claude-3-7-sonnet-extended.json
@@ -0,0 +1,13 @@
+{
+  "handle": "anthropic/claude-3-7-sonnet-20250219",
+  "model_settings": {
+    "provider_type": "anthropic",
+    "temperature": 1.0,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false,
+    "thinking": {
+      "type": "enabled",
+      "budget_tokens": 1024
+    }
+  }
+}
--- a/tests/sdk_v1/model_settings/claude-3-7-sonnet.json
+++ b/tests/sdk_v1/model_settings/claude-3-7-sonnet.json
@@ -0,0 +1,13 @@
+{
+  "handle": "anthropic/claude-3-7-sonnet-20250219",
+  "model_settings": {
+    "provider_type": "anthropic",
+    "temperature": 1.0,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false,
+    "thinking": {
+      "type": "enabled",
+      "budget_tokens": 1024
+    }
+  }
+}
--- a/tests/sdk_v1/model_settings/claude-4-5-sonnet.json
+++ b/tests/sdk_v1/model_settings/claude-4-5-sonnet.json
@@ -0,0 +1,13 @@
+{
+  "handle": "anthropic/claude-sonnet-4-5-20250929",
+  "model_settings": {
+    "provider_type": "anthropic",
+    "temperature": 1.0,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false,
+    "thinking": {
+      "type": "enabled",
+      "budget_tokens": 1024
+    }
+  }
+}
--- a/tests/sdk_v1/model_settings/claude-4-sonnet-extended.json
+++ b/tests/sdk_v1/model_settings/claude-4-sonnet-extended.json
@@ -0,0 +1,13 @@
+{
+  "handle": "anthropic/claude-sonnet-4-20250514",
+  "model_settings": {
+    "provider_type": "anthropic",
+    "temperature": 1.0,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false,
+    "thinking": {
+      "type": "enabled",
+      "budget_tokens": 1024
+    }
+  }
+}
--- a/tests/sdk_v1/model_settings/claude-4-sonnet.json
+++ b/tests/sdk_v1/model_settings/claude-4-sonnet.json
@@ -0,0 +1,13 @@
+{
+  "handle": "anthropic/claude-sonnet-4-20250514",
+  "model_settings": {
+    "provider_type": "anthropic",
+    "temperature": 1.0,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false,
+    "thinking": {
+      "type": "enabled",
+      "budget_tokens": 1024
+    }
+  }
+}
--- a/tests/sdk_v1/model_settings/gemini-2.5-flash-vertex.json
+++ b/tests/sdk_v1/model_settings/gemini-2.5-flash-vertex.json
@@ -0,0 +1,13 @@
+{
+  "handle": "google_vertex/gemini-2.5-flash",
+  "model_settings": {
+    "provider_type": "google_vertex",
+    "temperature": 0.7,
+    "max_output_tokens": 65536,
+    "parallel_tool_calls": false,
+    "thinking_config": {
+      "include_thoughts": true,
+      "thinking_budget": 1024
+    }
+  }
+}
--- a/tests/sdk_v1/model_settings/gemini-2.5-pro-vertex.json
+++ b/tests/sdk_v1/model_settings/gemini-2.5-pro-vertex.json
@@ -0,0 +1,13 @@
+{
+  "handle": "google_vertex/gemini-2.5-pro",
+  "model_settings": {
+    "provider_type": "google_vertex",
+    "temperature": 0.7,
+    "max_output_tokens": 65536,
+    "parallel_tool_calls": false,
+    "thinking_config": {
+      "include_thoughts": true,
+      "thinking_budget": 1024
+    }
+  }
+}
--- a/tests/sdk_v1/model_settings/gemini-2.5-pro.json
+++ b/tests/sdk_v1/model_settings/gemini-2.5-pro.json
@@ -0,0 +1,13 @@
+{
+  "handle": "google_ai/gemini-2.5-pro",
+  "model_settings": {
+    "provider_type": "google_ai",
+    "temperature": 0.7,
+    "max_output_tokens": 65536,
+    "parallel_tool_calls": false,
+    "thinking_config": {
+      "include_thoughts": true,
+      "thinking_budget": 1024
+    }
+  }
+}
--- a/tests/sdk_v1/model_settings/groq.json
+++ b/tests/sdk_v1/model_settings/groq.json
@@ -0,0 +1,9 @@
+{
+  "handle": "groq/qwen/qwen3-32b",
+  "model_settings": {
+    "provider_type": "groq",
+    "temperature": 0.7,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false
+  }
+}
--- a/tests/sdk_v1/model_settings/ollama.json
+++ b/tests/sdk_v1/model_settings/ollama.json
@@ -0,0 +1,9 @@
+{
+  "handle": "ollama/qwen2.5:7b",
+  "model_settings": {
+    "provider_type": "openai",
+    "temperature": 0.7,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false
+  }
+}
--- a/tests/sdk_v1/model_settings/openai-gpt-4.1.json
+++ b/tests/sdk_v1/model_settings/openai-gpt-4.1.json
@@ -0,0 +1,12 @@
+{
+  "handle": "openai/gpt-4.1-2025-04-14",
+  "model_settings": {
+    "provider_type": "openai",
+    "temperature": 0.7,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false,
+    "reasoning": {
+      "reasoning_effort": "high"
+    }
+  }
+}
--- a/tests/sdk_v1/model_settings/openai-gpt-4o-mini.json
+++ b/tests/sdk_v1/model_settings/openai-gpt-4o-mini.json
@@ -0,0 +1,12 @@
+{
+  "handle": "openai/gpt-4o-mini",
+  "model_settings": {
+    "provider_type": "openai",
+    "temperature": 0.7,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false,
+    "reasoning": {
+      "reasoning_effort": "minimal"
+    }
+  }
+}
--- a/tests/sdk_v1/model_settings/openai-gpt-5.json
+++ b/tests/sdk_v1/model_settings/openai-gpt-5.json
@@ -0,0 +1,11 @@
+{
+  "handle": "openai/gpt-5",
+  "model_settings": {
+    "provider_type": "openai",
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false,
+    "reasoning": {
+      "reasoning_effort": "minimal"
+    }
+  }
+}
--- a/tests/sdk_v1/model_settings/openai-o1.json
+++ b/tests/sdk_v1/model_settings/openai-o1.json
@@ -0,0 +1,12 @@
+{
+  "handle": "openai/o1",
+  "model_settings": {
+    "provider_type": "openai",
+    "temperature": 0.7,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false,
+    "reasoning": {
+      "reasoning_effort": "high"
+    }
+  }
+}
--- a/tests/sdk_v1/model_settings/openai-o3.json
+++ b/tests/sdk_v1/model_settings/openai-o3.json
@@ -0,0 +1,12 @@
+{
+  "handle": "openai/o3",
+  "model_settings": {
+    "provider_type": "openai",
+    "temperature": 0.7,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false,
+    "reasoning": {
+      "reasoning_effort": "high"
+    }
+  }
+}
--- a/tests/sdk_v1/model_settings/openai-o4-mini.json
+++ b/tests/sdk_v1/model_settings/openai-o4-mini.json
@@ -0,0 +1,12 @@
+{
+  "handle": "openai/o4-mini",
+  "model_settings": {
+    "provider_type": "openai",
+    "temperature": 0.7,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false,
+    "reasoning": {
+      "reasoning_effort": "high"
+    }
+  }
+}
--- a/tests/sdk_v1/model_settings/together-qwen-2.5-72b-instruct.json
+++ b/tests/sdk_v1/model_settings/together-qwen-2.5-72b-instruct.json
@@ -0,0 +1,9 @@
+{
+  "handle": "together/Qwen/Qwen2.5-72B-Instruct-Turbo",
+  "model_settings": {
+    "provider_type": "together",
+    "temperature": 0.7,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false
+  }
+}