Claude Agent SDK

Claude Agent SDK integration captures traces from your Claude Agent applications, including agent execution flow, tool calls, and LLM interactions.

Quickstart

Install Dependencies

uv add claude-agent-sdk judgeval python-dotenv

pip install claude-agent-sdk judgeval python-dotenv

The Claude Agent SDK requires Node.js to be installed. You'll also need to install the Claude Code CLI:

npm install -g @anthropic-ai/claude-code

Initialize Integration

setup.py

from judgeval.v1 import Judgeval
from judgeval.v1.integrations.claude_agent_sdk import setup_claude_agent_sdk

judgeval = Judgeval()
tracer = judgeval.tracer.create(project_name="claude_agents_project")
setup_claude_agent_sdk(tracer=tracer)

Always initialize the Tracer before calling setup_claude_agent_sdk() to ensure proper trace routing.

Add to Existing Code

Add these lines to your existing Claude Agent SDK application:

import dotenv
import asyncio

dotenv.load_dotenv()

from judgeval.tracer import Tracer  
from judgeval.v1.integrations.claude_agent_sdk import setup_claude_agent_sdk  

tracer = Tracer(project_name="claude-agents-app")  
setup_claude_agent_sdk(tracer)  

from claude_agent_sdk import ClaudeSDKClient, ClaudeAgentOptions

options = ClaudeAgentOptions(
    model="claude-sonnet-4-20250514",
)

async def main():
    async with ClaudeSDKClient(options=options) as client:
        await client.query("Hello, how are you?")
        
        response_text = []
        async for message in client.receive_response():
            if hasattr(message, "content"):
                for block in message.content:
                    if hasattr(block, "text"):
                        response_text.append(block.text)
        
        print("".join(response_text))

if __name__ == "__main__":
    asyncio.run(main())

All agent executions and tool calls are automatically traced.

Example: Agent with Tool Execution

tool_example.py

import dotenv
import asyncio
from typing import Any, Dict

dotenv.load_dotenv()

from judgeval.tracer import Tracer
from judgeval.v1.integrations.claude_agent_sdk import setup_claude_agent_sdk

tracer = Tracer(project_name="claude_tool_agent")
setup_claude_agent_sdk(tracer)

from claude_agent_sdk import (
    ClaudeSDKClient,
    ClaudeAgentOptions,
    tool,
    create_sdk_mcp_server,
)

# Define a calculator tool
@tool(
    "calculator",
    "Calculates simple math expressions. Input should be a string like '2+2' or '10*5'.",
    {"expression": str},
)
async def calculator(args: Dict[str, Any]) -> Dict[str, Any]:
    """Simple calculator tool."""
    try:
        expression = args.get("expression", "")
        # Security: only allow basic math
        allowed_chars = set("0123456789+-*/(). ")
        if not all(c in allowed_chars for c in expression):
            return {
                "content": [{
                    "type": "text",
                    "text": "Error: Invalid characters in expression",
                }],
                "is_error": True,
            }
        
        result = eval(expression, {"__builtins__": {}}, {}) # This is just an example, we don't recommend using this code in production as it can lead to security vulnerabilities
        return {"content": [{"type": "text", "text": f"{expression} = {result}"}]}
    except Exception as e:
        return {
            "content": [{"type": "text", "text": f"Error: {str(e)}"}],
            "is_error": True,
        }

# Create MCP server with the tool
calc_server = create_sdk_mcp_server(
    name="math_tools",
    version="1.0.0",
    tools=[calculator]
)

@tracer.observe(span_type="function")  
async def main():
    # Configure options with tool
    options = ClaudeAgentOptions(
        model="claude-sonnet-4-20250514",
        system_prompt="You are a helpful assistant. Use the calculator tool for any math.",
        mcp_servers={"math": calc_server},
        allowed_tools=["mcp__math__calculator"],
        permission_mode="acceptEdits",
    )

    # Make API call with tool
    async with ClaudeSDKClient(options=options) as client:
        await client.query("What is 15 multiplied by 23? Use the calculator tool.")
        
        response_text = []
        async for message in client.receive_response():
            if hasattr(message, "content"):
                for block in message.content:
                    if hasattr(block, "text"):
                        response_text.append(block.text)
        
        print("".join(response_text))

if __name__ == "__main__":
    asyncio.run(main())

Standalone Query API

Claude Agent SDK also provides a simpler standalone query() function for quick interactions:

standalone_query.py

import dotenv
import asyncio

dotenv.load_dotenv()

from judgeval.tracer import Tracer
from judgeval.v1.integrations.claude_agent_sdk import setup_claude_agent_sdk

tracer = Tracer(project_name="claude_standalone")
setup_claude_agent_sdk(tracer)

from claude_agent_sdk import query, ClaudeAgentOptions

async def main():
    options = ClaudeAgentOptions(
        model="claude-sonnet-4-20250514",
        system_prompt="You are a helpful assistant",
        permission_mode="acceptEdits",
    )

    # Use the standalone query function
    response_text = []
    async for message in query(
        prompt="What is the square root of 144?",
        options=options,
    ):
        if hasattr(message, "content"):
            for block in message.content:
                if hasattr(block, "text"):
                    response_text.append(block.text)
    
    print("".join(response_text))

if __name__ == "__main__":
    asyncio.run(main())

Tracking Additional Operations: Use @tracer.observe() to track any function or method outside the Claude Agent SDK workflow. This is especially useful for monitoring utility functions, API calls, or other operations that are part of your overall application flow.

complete_example.py

from judgeval.tracer import Tracer
from judgeval.v1.integrations.claude_agent_sdk import setup_claude_agent_sdk
from claude_agent_sdk import ClaudeSDKClient, ClaudeAgentOptions
import asyncio

tracer = Tracer(project_name="my_agent")
setup_claude_agent_sdk(tracer)

@tracer.observe(span_type="function")
def preprocess_input(data: str) -> str:
    # Helper function tracked with @tracer.observe()
    return f"Preprocessed: {data}"

async def run_agent(user_input: str):
    # Preprocess input (traced with @tracer.observe)
    processed_input = preprocess_input(user_input)

    # Agent execution (automatically traced)
    options = ClaudeAgentOptions(model="claude-sonnet-4-20250514")
    
    async with ClaudeSDKClient(options=options) as client:
        await client.query(processed_input)
        
        response_text = []
        async for message in client.receive_response():
            if hasattr(message, "content"):
                for block in message.content:
                    if hasattr(block, "text"):
                        response_text.append(block.text)
        
        return "".join(response_text)

# Execute - both helper functions and agents are traced
result = asyncio.run(run_agent("Hello World"))
print(result)

Next Steps

Anthropic Integration

Learn more about integrating Anthropic Claude models with Judgment.

Agent Behavior Monitoring

Monitor your Claude agents in production with behavioral scoring.

Tracing Reference

Learn more about Judgment's tracing capabilities and advanced configuration.