Tracer
Complete reference for the Tracer Python SDK
Tracer API Reference
The Tracer is your primary interface for adding observability to your AI agents. It provides methods for tracing function execution, evaluating performance, and collecting comprehensive environment interaction data.
tracer.observe()
Decorator to trace function execution with detailed entry/exit information.
Parameters
func
CallableThe function to decorate (automatically provided when used as decorator)
name
strdefault: NoneOptional custom name for the span (defaults to function name)
"custom_span_name"
span_type
strdefault: "span"Label for the span. Use 'tool' for functions that should be tracked and exported as agent tools
"tool"
project_name
strdefault: NoneOptional project name override
"my_project"
overwrite
booldefault: FalseWhether to overwrite existing traces
False
deep_tracing
booldefault: FalseWhether to enable deep tracing for this function and all nested calls. If None, uses the tracer's default setting.
True
Example Code
from openai import OpenAI
from judgeval.common.tracer import Tracer
client = OpenAI()
tracer = Tracer(project_name='simple-agent', deep_tracing=False)
@tracer.observe(span_type="tool")
def search_web(query):
return f"Results for: {query}"
@tracer.observe(span_type="retriever")
def get_database(query):
return f"Database results for: {query}"
@tracer.observe(span_type="function")
def run_agent(user_query):
# Use tools based on query
if "database" in user_query:
info = get_database(user_query)
else:
info = search_web(user_query)
prompt = f"Context: {info}, Question: {user_query}"
# Generate response
response = client.chat.completions.create(
model="gpt-4.1",
messages=[{"role": "user", "content": prompt"}]
)
return response.choices[0].message.content
tracer.observe_tools()
Automatically adds @observe(span_type='tool') to all methods in a class.
Parameters
cls
typeThe class to decorate (automatically provided when used as decorator)
exclude_methods
List[str]default: ["__init__", "__new__", "__del__", "__str__", "__repr__"]List of method names to skip decorating. Defaults to common magic methods
["__init__", "private_method"]
include_private
booldefault: FalseWhether to decorate methods starting with underscore. Defaults to False
False
warn_on_double_decoration
booldefault: TrueWhether to print warnings when skipping already-decorated methods. Defaults to True
True
Example Code
@tracer.observe_tools()
class SearchTool:
def search_web(self, query):
return f"Web results for: {query}"
def search_docs(self, query):
return f"Document results for: {query}"
def _private_helper(self):
# This won't be traced by default
return "helper"
class MyAgent(SearchTool):
@tracer.observe(span_type="function")
def run_agent(self, user_query):
# Use inherited tools
if "docs" in user_query:
info = self.search_docs(user_query)
else:
info = self.search_web(user_query)
return f"Agent response based on: {info}"
# All public methods from SearchTool are automatically traced
agent = MyAgent()
result = agent.run_agent("Find web results") # Both calls are traced
wrap()
Wraps an API client to add tracing capabilities. Supports OpenAI, Together, Anthropic, and Google GenAI clients. Patches both '.create' and Anthropic's '.stream' methods using a wrapper class.
Parameters
client
AnyAPI client to wrap (OpenAI, Anthropic, Together, Google GenAI)
OpenAI()
trace_across_async_contexts
booldefault: FalseWhether to trace across async contexts
True
Example Code
from openai import OpenAI
from judgeval import wrap
client = OpenAI()
wrapped_client = wrap(client)
# All API calls are now automatically traced
response = wrapped_client.chat.completions.create(
model="gpt-4.1",
messages=[{"role": "user", "content": "Hello"}]
)
# Streaming calls are also traced
stream = wrapped_client.chat.completions.create(
model="gpt-4.1",
messages=[{"role": "user", "content": "Hello"}],
stream=True
)
Evaluation & Logging
tracer.async_evaluate()
Runs quality evaluations on the current trace/span using specified scorers. You can provide either an Example object or individual evaluation parameters (input, actual_output, etc.).
Parameters
scorers
List[Union[APIJudgmentScorer, JudgevalScorer]]List of evaluation scorers to run
[FaithfulnessScorer()]
example
Exampledefault: NoneExample object containing evaluation data
input
strdefault: NoneInput text to evaluate
"What is the capital of France?"
actual_output
Union[str, List[str]]default: NoneActual output from your system
"Paris is the capital of France"
expected_output
Union[str, List[str]]default: NoneExpected/reference output
"Paris"
context
List[str]default: NoneContext information for evaluation
["France is a country in Europe"]
retrieval_context
List[str]default: NoneRetrieved documents for RAG evaluation
tools_called
List[str]default: NoneTools that were actually called
["search", "calculate"]
expected_tools
List[str]default: NoneTools that should have been called
["search"]
additional_metadata
Dict[str, Any]default: NoneAdditional metadata for the evaluation
model
strdefault: NoneModel name for evaluation
"gpt-4.1"
span_id
strdefault: NoneSpecific span ID to attach evaluation to
log_results
booldefault: TrueWhether to log results to the Judgment platform
Example Code
from judgeval.scorers import FaithfulnessScorer
from judgeval.data import Example
answer = "Paris is the capital of France"
# Create example object
example = Example(
input=question,
actual_output=answer,
expected_output="Paris",
context=["France is a country in Europe"]
)
# Evaluate using Example
tracer.async_evaluate(
scorers=[FaithfulnessScorer()],
example=example
)
return answer
tracer.log()
Log a message with the current span context
Parameters
msg
strMessage to log
"Starting web search"
label
strdefault: "log"Label/category for the log entry
"debug"
score
intdefault: 1Numeric score associated with the log
1
Example Code
def search_process(query):
tracer.log("Starting search", label="info")
try:
results = perform_search(query)
tracer.log(f"Found {len(results)} results", label="success", score=1)
return results
except Exception as e:
tracer.log(f"Search failed: {e}", label="error", score=0)
raise
Metadata & Organization
tracer.set_metadata()
Set metadata for the current trace.
Parameters
**kwargs
AnyKey-value pairs to set as metadata for the current trace. Each keyword argument becomes a metadata field.
Example Code
def process_user_request(user_id, request):
# Add metadata to the current trace
tracer.set_metadata(
user_id=user_id,
environment="production",
experiment_id="exp_456",
version="1.2.3"
)
return handle_request(request)
tracer.set_customer_id()
Set the customer ID for the current trace.
Parameters
customer_id
strThe customer ID to set
"customer_123"
Example Code
def handle_customer_request(customer_id, request):
tracer.set_customer_id(customer_id)
return process_request(request)
tracer.set_tags()
Set the tags for the current trace.
Parameters
tags
List[str]List of tags to set
["experiment", "production", "v2"]
Example Code
def experimental_feature(data):
tracer.set_tags(["experiment", "feature_v2", "production"])
return new_algorithm(data)
Advanced Features
tracer.identify()
Class decorator for multi-agent systems that assigns a unique identifier to agent and enables tracking of their internal state variables. Essential for monitoring and debugging complex multi-agent workflows where multiple agents interact and you need to track each agent's behavior and state separately.
Parameters
identifier
strThe identifier to associate with the decorated class. This will be used as the instance name in traces.
"user_agent"
track_state
booldefault: FalseWhether to automatically capture the state (attributes) of instances before and after function execution. Defaults to False.
True
track_attributes
List[str]default: NoneOptional list of specific attribute names to track. If None, all non-private attributes (not starting with '_') will be tracked when track_state=True.
["memory", "goals"]
field_mappings
Dict[str, str]default: NoneOptional dictionary mapping internal attribute names to display names in the captured state. For example: {"system_prompt": "instructions"} will capture the 'instructions' attribute as 'system_prompt' in the state.
{"system_prompt": "instructions"}
Example Code
@judgment.identify(identifier="name", track_state=True)
class Agent(AgentTools, AgentBase):
"""An AI agent."""
def __init__(self):
self.name = name
self.function_map = {
"func": self.function,
...
}
@judgment.observe(span_type="function")
def process_request(self, user_request):
"""Process a user request using all available tools."""
pass
Current Span Access
tracer.get_current_span()
Returns the current span object for direct access to span properties and methods, useful for debugging and inspection.
Available Span Properties
The current span object provides these properties for inspection and debugging:
Property | Type | Description |
---|---|---|
span_id | str | Unique identifier for this span |
trace_id | str | ID of the parent trace |
function | str | Name of the function being traced |
span_type | str | Type of span ("span", "tool", "llm", "evaluation", "chain") |
inputs | dict | Input parameters for this span |
output | Any | Output/result of the span execution |
duration | float | Execution time in seconds |
depth | int | Nesting depth in the trace hierarchy |
parent_span_id | str | None | ID of the parent span (if nested) |
agent_name | str | None | Name of the agent executing this span |
has_evaluation | bool | Whether this span has evaluation runs |
evaluation_runs | List[EvaluationRun] | List of evaluations run on this span |
usage | TraceUsage | None | Token usage and cost information |
error | Dict[str, Any] | None | Error information if span failed |
state_before | dict | None | Agent state before execution |
state_after | dict | None | Agent state after execution |
Example Usage
@tracer.observe(span_type="tool")
def debug_tool(query):
span = tracer.get_current_span()
if span:
# Access span properties for debugging
print(f"🔧 Executing {span.function} (ID: {span.span_id})")
print(f"📊 Depth: {span.depth}, Type: {span.span_type}")
print(f"📥 Inputs: {span.inputs}")
# Check parent relationship
if span.parent_span_id:
print(f"👆 Parent span: {span.parent_span_id}")
# Monitor execution state
if span.agent_name:
print(f"🤖 Agent: {span.agent_name}")
result = perform_search(query)
# Check span after execution
if span:
print(f"📤 Output: {span.output}")
print(f"⏱️ Duration: {span.duration}s")
if span.has_evaluation:
print(f"✅ Has {len(span.evaluation_runs)} evaluations")
if span.error:
print(f"❌ Error: {span.error}")
return result
Getting Started
from judgeval import Tracer
# Initialize tracer
tracer = Tracer(
api_key="your_api_key",
project_name="my_agent_project"
)
# Basic function tracing
@tracer.observe(span_type="agent")
def my_agent(query):
tracer.set_metadata(user_query=query)
result = process_query(query)
tracer.log("Processing completed", label="info")
return result
# Auto-trace LLM calls
from openai import OpenAI
from judgeval import wrap
client = wrap(OpenAI())
response = client.chat.completions.create(...) # Automatically traced