Testing & Troubleshooting

Learn how to test agents, planners, and tools, and troubleshoot common issues.

This guide covers testing strategies for Goa-AI agents and solutions to common issues.

Testing Agents

Testing with the In-Memory Engine

The in-memory engine is ideal for testing because it:

  • Requires no external dependencies (no Temporal)
  • Executes synchronously for predictable test behavior
  • Provides fast feedback during development
func TestChatAgent(t *testing.T) {
    // Create runtime with in-memory engine (default)
    rt := runtime.New()
    ctx := context.Background()
    
    // Register agent with test planner
    err := chat.RegisterChatAgent(ctx, rt, chat.ChatAgentConfig{
        Planner: &TestPlanner{},
    })
    require.NoError(t, err)

    _, err = rt.CreateSession(ctx, "test-session")
    require.NoError(t, err)
    
    // Run agent
    client := chat.NewClient(rt)
    out, err := client.Run(
        ctx,
        "test-session",
        []*model.Message{{
            Role:  model.ConversationRoleUser,
            Parts: []model.Part{model.TextPart{Text: "Hello"}},
        }},
    )
    require.NoError(t, err)
    
    // Assert on output
    assert.NotEmpty(t, out.RunID)
    assert.NotNil(t, out.Final)
}

Testing Planners with Mock Model Clients

Isolate planner logic by mocking the model client:

type MockModelClient struct {
    responses []model.Message
    callCount int
}

func (m *MockModelClient) Complete(ctx context.Context, req *model.Request) (*model.Response, error) {
    if m.callCount >= len(m.responses) {
        return nil, fmt.Errorf("no more mock responses")
    }
    resp := &model.Response{
        Content: []model.Message{m.responses[m.callCount]},
    }
    m.callCount++
    return resp, nil
}

func (m *MockModelClient) Stream(ctx context.Context, req *model.Request) (model.Streamer, error) {
    // Return a mock streamer for streaming tests
    return &MockStreamer{response: m.responses[m.callCount]}, nil
}

func TestPlannerWithMockClient(t *testing.T) {
    mockClient := &MockModelClient{
        responses: []model.Message{
            {
                Role: model.ConversationRoleAssistant,
                Parts: []model.Part{
                    model.TextPart{Text: "I'll search for that."},
                    model.ToolUsePart{
                        ID:    "call-1",
                        Name:  "search",
                        Input: json.RawMessage(`{"query": "test"}`),
                    },
                },
            },
        },
    }
    
    planner := &MyPlanner{client: mockClient}
    
    input := &planner.PlanInput{
        Messages: []*model.Message{{
            Role:  model.ConversationRoleUser,
            Parts: []model.Part{model.TextPart{Text: "Search for test"}},
        }},
    }
    
    result, err := planner.PlanStart(context.Background(), input)
    require.NoError(t, err)
    
    // Assert planner returned tool calls
    assert.NotNil(t, result.ToolCalls)
    assert.Len(t, result.ToolCalls, 1)
    assert.Equal(t, "search", string(result.ToolCalls[0].Name))
}

Testing Tools in Isolation

Test tool executors independently from the agent:

func TestSearchToolExecutor(t *testing.T) {
    // Create executor with mock dependencies
    mockSearchService := &MockSearchService{
        results: []string{"doc1", "doc2", "doc3"},
    }
    executor := &SearchExecutor{searchService: mockSearchService}
    
    // Create test tool call
    meta := &runtime.ToolCallMeta{
        RunID:      "test-run",
        SessionID:  "test-session",
        TurnID:     "test-turn",
        ToolCallID: "call-1",
    }
    
    call := &planner.ToolRequest{
        Name:    specs.Search,
        Payload: json.RawMessage(`{"query": "test", "limit": 5}`),
    }
    
    // Execute tool
    result, err := executor.Execute(context.Background(), meta, call)
    require.NoError(t, err)
    require.NotNil(t, result.ToolResult)
    
    // Assert on result
    assert.Nil(t, result.ToolResult.Error)
    assert.NotNil(t, result.ToolResult.Result)
    
    // Unmarshal and verify typed result
    searchResult, ok := result.ToolResult.Result.(*specs.SearchResult)
    require.True(t, ok)
    assert.Len(t, searchResult.Documents, 3)
}

Testing Tool Validation and Retry Hints

Verify that tools return proper errors and hints for invalid input:

func TestToolValidationReturnsHint(t *testing.T) {
    executor := &SearchExecutor{}
    
    // Invalid payload - missing required field
    call := &planner.ToolRequest{
        Name:    specs.Search,
        Payload: json.RawMessage(`{"limit": 5}`), // missing "query"
    }
    
    result, err := executor.Execute(context.Background(), &runtime.ToolCallMeta{}, call)
    require.NoError(t, err) // Executor should not return error
    require.NotNil(t, result.ToolResult)
    
    // Should return ToolError with RetryHint
    assert.NotNil(t, result.ToolResult.Error)
    assert.NotNil(t, result.ToolResult.RetryHint)
    assert.Equal(t, planner.RetryReasonMissingFields, result.ToolResult.RetryHint.Reason)
    assert.Contains(t, result.ToolResult.RetryHint.MissingFields, "query")
}

Testing Agent Composition

Test agent-as-tool scenarios:

func TestAgentComposition(t *testing.T) {
    rt := runtime.New()
    ctx := context.Background()
    
    // Register provider agent
    err := planner.RegisterPlannerAgent(ctx, rt, planner.PlannerAgentConfig{
        Planner: &PlanningPlanner{},
    })
    require.NoError(t, err)
    
    // Register consumer agent that uses provider's tools
    err = orchestrator.RegisterOrchestratorAgent(ctx, rt, orchestrator.OrchestratorAgentConfig{
        Planner: &OrchestratorPlanner{},
    })
    require.NoError(t, err)

    _, err = rt.CreateSession(ctx, "test-session")
    require.NoError(t, err)
    
    // Run orchestrator - it should invoke planner agent as a tool
    client := orchestrator.NewClient(rt)
    out, err := client.Run(
        ctx,
        "test-session",
        []*model.Message{{
            Role:  model.ConversationRoleUser,
            Parts: []model.Part{model.TextPart{Text: "Create a plan for X"}},
        }},
    )
    require.NoError(t, err)
    
    // Verify child run was created
    assert.Greater(t, out.ChildrenCount, 0)
}

Troubleshooting

Common Errors

“registration closed” Error

Symptom:

error: registration closed: cannot register agent after runtime start

Cause: Attempting to register an agent after the runtime has started processing runs.

Solution: Register all agents before starting any runs:

rt := runtime.New()

// ✓ Register all agents first
chat.RegisterChatAgent(ctx, rt, chatConfig)
planner.RegisterPlannerAgent(ctx, rt, plannerConfig)

// ✓ Then create a session and start runs
client := chat.NewClient(rt)
if _, err := rt.CreateSession(ctx, "session-123"); err != nil {
    panic(err)
}
out, err := client.Run(ctx, "session-123", messages, opts...)

“missing session ID” Error

Symptom:

error: missing session ID: session ID is required for run

Cause: Starting a run without providing a session ID.

Solution: Always provide a session ID as the required positional argument:

// ✗ Wrong - no session ID
out, err := client.Run(ctx, "", messages)

// ✓ Correct - session ID provided
if _, err := rt.CreateSession(ctx, "session-123"); err != nil {
    panic(err)
}
out, err := client.Run(ctx, "session-123", messages)

Tip: For testing, use a fixed session ID. For production, generate unique session IDs per conversation.

Policy Violation Errors

Symptom:

error: policy violation: max tool calls exceeded (10/10)

Cause: The agent exceeded the configured MaxToolCalls limit for budgeted tools. Tools declared Bookkeeping() do not count against this cap.

Solutions:

  1. Increase the limit if the use case legitimately requires more tool calls:
RunPolicy(func() {
    DefaultCaps(MaxToolCalls(20)) // Increase from default
})
  1. Improve planner efficiency to use fewer tool calls:

    • Batch operations where possible
    • Use more specific tool calls
    • Improve prompt engineering
  2. Check for infinite loops in planner logic that repeatedly calls the same tool.

  3. Exempt structured bookkeeping tools from the budget by declaring them Bookkeeping() in the DSL. Status updates, progress markers, and terminal-commit tools typically belong in this category; once exempt, they never consume RemainingToolCalls and can always execute. Pair Bookkeeping() with TerminalRun() for a “commit this run” tool that is guaranteed to finalize even after the retrieval budget is exhausted.

Symptom:

error: bookkeeping-only tool batch requires a terminal tool or terminal planner payload

Cause: The planner emitted only bookkeeping tools, but none of those results were eligible to drive another planner turn. By default successful bookkeeping results stay hidden from future PlanResume turns, so the same turn must either resolve terminally / await input or produce a planner-visible bookkeeping result.

Solutions:

  1. Finish in the same turn with TerminalRun(), FinalResponse, or FinalToolResult when the bookkeeping batch is already terminal.
  2. Pause explicitly with an await/pause handshake if the run is waiting for human or external input.
  3. Mark the bookkeeping result PlannerVisible() when it carries canonical state that the next planner turn must reason over, such as a structured progress snapshot.
  4. Do not combine PlannerVisible() with TerminalRun(). Use TerminalRun() for atomic completion and PlannerVisible() for non-terminal bookkeeping that should resume planning.

Symptom:

error: policy violation: max consecutive failed tool calls exceeded (3/3)

Cause: Multiple consecutive tool calls failed.

Solutions:

  1. Fix the underlying tool errors - check tool executor logs
  2. Improve retry hints so the planner can self-correct
  3. Increase the limit if transient failures are expected:
RunPolicy(func() {
    DefaultCaps(MaxConsecutiveFailedToolCalls(5))
})

Symptom:

error: policy violation: time budget exceeded (2m0s)

Cause: The agent run exceeded the configured TimeBudget.

Solutions:

  1. Increase the budget for long-running operations:
RunPolicy(func() {
    TimeBudget("10m")
})
  1. Use Timing for fine-grained control:
RunPolicy(func() {
    Timing(func() {
        Budget("10m")  // Overall budget
        Plan("1m")     // Per-plan timeout
        Tools("2m")    // Per-tool timeout
    })
})
  1. Optimize tool execution to complete faster.

“unknown tool” Error

Symptom:

error: unknown tool: orchestrator.helpers.search

Cause: The planner requested a tool that isn’t registered.

Solutions:

  1. Verify toolset registration - ensure the toolset is registered with the agent:
Agent("chat", "Chat agent", func() {
    Use(HelpersToolset) // Make sure this is included
})
  1. Check tool name spelling - tool names are case-sensitive and use qualified names.

  2. Regenerate code after DSL changes:

goa gen example.com/project/design

“invalid payload” Error

Symptom:

error: invalid payload: json: cannot unmarshal string into Go struct field SearchPayload.limit of type int

Cause: The LLM provided a payload that doesn’t match the tool’s schema.

Solutions:

  1. Return a RetryHint from the executor so the planner can self-correct:
if err != nil {
    return runtime.Executed(&planner.ToolResult{
        Name:  call.Name,
        Error: planner.NewToolError("invalid payload"),
        RetryHint: &planner.RetryHint{
            Reason:       planner.RetryReasonInvalidArguments,
            Tool:         call.Name,
            ExampleInput: map[string]any{"query": "example", "limit": 10},
            Message:      "limit must be an integer",
        },
    }), nil
}
  1. Improve tool descriptions to clarify expected types.

  2. Add examples to the DSL:

Args(func() {
    Attribute("limit", Int, "Maximum results", func() {
        Example(10)
        Minimum(1)
        Maximum(100)
    })
})

Debugging Tips

Enable Debug Logging

import "goa.design/goa-ai/runtime/agent/runtime"

rt := runtime.New(
    runtime.WithLogger(slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{
        Level: slog.LevelDebug,
    }))),
)

Subscribe to Events for Debugging

type DebugSink struct{}

func (s *DebugSink) Send(ctx context.Context, event stream.Event) error {
    fmt.Printf("[%s] %s run=%s session=%s payload=%v\n",
        time.Now().Format(time.RFC3339),
        event.Type(),
        event.RunID(),
        event.SessionID(),
        event.Payload(),
    )
    return nil
}

func (s *DebugSink) Close(ctx context.Context) error { return nil }

// Wire the sink into the runtime to observe all stream events.
rt := runtime.New(runtime.WithStream(&DebugSink{}))

Inspect Tool Specs at Runtime

// List all registered tools
for _, spec := range rt.ToolSpecsForAgent(chat.AgentID) {
    fmt.Printf("Tool: %s\n", spec.Name)
    fmt.Printf("  Description: %s\n", spec.Description)
    fmt.Printf("  Payload Schema: %s\n", spec.Payload.Schema)
}

Next Steps