Tracer
Complete reference for the Tracer Python SDK
Tracer API Reference
The Tracer is your primary interface for adding observability to your AI agents. It provides methods for tracing function execution, evaluating performance, and collecting comprehensive environment interaction data.
Tracer()
Initialize a Tracer
object.
Parameters
api_key
str
Optional
Recommended - set using the JUDGMENT_API_KEY
environment variable
organization_id
str
Optional
Recommended - set using the JUDGMENT_ORG_ID
environment variable
project_name
str
Default: "default_project"
Optional
Project name override
deep_tracing
bool
Default: False
Optional
Whether to enable deep tracing, which will trace all nested function calls without the need to decorate each function.
enable_monitoring
bool
Default: True
Optional
If you need to toggle monitoring on and off
enable_evaluations
bool
Default: True
Optional
If you need to toggle evaluations on and off for async_evaluate()
use_s3
bool
Default: False
Optional
Whether to use S3 for storage
s3_bucket_name
str
Default: "None"
Optional
Name of the S3 bucket to use
s3_aws_access_key_id
str
Default: "None"
Optional
AWS access key ID for S3
s3_region_name
str
Default: "None"
Optional
AWS region name for S3
trace_across_async_contexts
bool
Default: False
Optional
Whether to trace across async contexts
span_batch_size
int
Default: 50
Optional
Number of spans to batch before sending
span_flush_interval
float
Default: 1.0
Optional
Time in seconds between automatic flushes
span_num_workers
int
Default: 10
Optional
Number of worker threads for span processing
Example Code
from judgeval import Tracer
tracer = Tracer()
@tracer.observe()
Records an observation or output during a trace. This is useful for capturing intermediate steps, tool results, or decisions made by the agent.
Parameters
func
Callable
Required
The function to decorate (automatically provided when used as decorator)
name
str
Default: "None"
Optional
Optional custom name for the span (defaults to function name)
"custom_span_name"
span_type
str
Default: "span"
Optional
Label for the span. Use 'tool' for functions that should be tracked and exported as agent tools
"tool"
Example Code
from openai import OpenAI
from judgeval.common.tracer import Tracer
client = OpenAI()
tracer = Tracer(project_name='default_project', deep_tracing=False)
@tracer.observe(span_type="tool")
def search_web(query):
return f"Results for: {query}"
@tracer.observe(span_type="retriever")
def get_database(query):
return f"Database results for: {query}"
@tracer.observe(span_type="function")
def run_agent(user_query):
# Use tools based on query
if "database" in user_query:
info = get_database(user_query)
else:
info = search_web(user_query)
prompt = f"Context: {info}, Question: {user_query}"
# Generate response
response = client.chat.completions.create(
model="gpt-4.1",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
@tracer.observe_tools()
Automatically adds @observe(span_type='tool')
to all methods in a class.
Parameters
cls
Class
Required
The class to decorate (automatically provided when used as decorator)
exclude_methods
List[str]
Default: ["__init__", "__new__", "__del__", "__str__", "__repr__"]
Optional
List of method names to skip decorating. Defaults to common magic methods
["__init__", "private_method"]
include_private
bool
Default: False
Optional
Whether to decorate methods starting with underscore. Defaults to False
False
warn_on_double_decoration
str
Default: "None"
Optional
Whether to print warnings when skipping already-decorated methods. Defaults to True
True
deep_tracing
bool
Default: False
Optional
Whether to enable deep tracing for this function and all nested calls. If None, uses the tracer's default setting.
True
Example Code
@tracer.observe_tools()
class SearchTool:
def search_web(self, query):
return f"Web results for: {query}"
def search_docs(self, query):
return f"Document results for: {query}"
def _private_helper(self):
# This won't be traced by default
return "helper"
class MyAgent(SearchTool):
@tracer.observe(span_type="function")
def run_agent(self, user_query):
# Use inherited tools
if "docs" in user_query:
info = self.search_docs(user_query)
else:
info = self.search_web(user_query)
return f"Agent response based on: {info}"
# All public methods from SearchTool are automatically traced
agent = MyAgent()
result = agent.run_agent("Find web results") # Both calls are traced
wrap()
Wraps an API client to add tracing capabilities. Supports OpenAI, Together, Anthropic, and Google GenAI clients. Patches methods like .create
, Anthropic's .stream
, and OpenAI's .responses.create
and .beta.chat.completions.parse
methods using a wrapper class.
Parameters
client
Any
Required
API client to wrap (OpenAI, Anthropic, Together, Google GenAI, Groq)
OpenAI()
trace_across_async_contexts
bool
Default: False
Optional
Whether to trace across async contexts
True
Example Code
from openai import OpenAI
from judgeval import wrap
client = OpenAI()
wrapped_client = wrap(client)
# All API calls are now automatically traced
response = wrapped_client.chat.completions.create(
model="gpt-4.1",
messages=[{"role": "user", "content": "Hello"}]
)
# Streaming calls are also traced
stream = wrapped_client.chat.completions.create(
model="gpt-4.1",
messages=[{"role": "user", "content": "Hello"}],
stream=True
)
Evaluation & Logging
tracer.async_evaluate()
Runs quality evaluations on the current trace/span using specified scorers. You can provide either an Example object or individual evaluation parameters (input, actual_output, etc.).
Parameters
scorer
Union[APIScorerConfig, BaseScorer]
Required
A evaluation scorer to run
FaithfulnessScorer()
example
Example
Required
Example object containing evaluation data
sampling_rate
float
Default: 1
Optional
A float between 0 and 1 representing the chance the eval should be sampled.
0.75 # Eval occurs 75% of the time
Example Code
from judgeval.scorers import FaithfulnessScorer
from judgeval.data import Example
answer = "Paris is the capital of France"
# Create example object
example = Example(
input=question,
actual_output=answer,
expected_output="Paris",
context=["France is a country in Europe"]
)
# Evaluate using Example
tracer.async_evaluate(
scores=FaithfulnessScorer(),
example=example,
model="gpt-4.1",
sampling_rate=0.5
)
return answer
tracer.log()
Log a message with the current span context
Example Code
def search_process(query):
tracer.log("Starting search", label="info")
try:
results = perform_search(query)
tracer.log(f"Found {len(results)} results", label="success", score=1)
return results
except Exception as e:
tracer.log(f"Search failed: {e}", label="error", score=0)
raise
Metadata & Organization
tracer.update_metadata()
Set metadata for the current trace.
Parameters
metadata
dict
Required
Metadata as a dictionary for any miscellaneous information you want to track on your trace
Supported special keys:
- customer_id: ID of the customer using this trace
- tags: List of tags for this trace
- has_notification: Whether this trace has a notification
- name: Name of the trace
Any other keys will be stored as custom metadata.
Example Code
def process_user_request(user_id, request):
# Add metadata to the current trace
tracer.update_metadata({
"user_id": user_id,
"environment": "production",
"experiment_id": "exp_456",
"version": "1.2.3"
})
return handle_request(request)
tracer.set_reward_score()
Set the reward score for this trace to be used for RL or SFT.
Parameters
Example Code
def process_response(response):
# Calculate reward score
reward = calculate_reward(response)
tracer.set_reward_score(reward)
# Or set multiple reward scores
tracer.set_reward_score({
"helpfulness": 0.9,
"accuracy": 0.8,
"safety": 1.0
})
return response
tracer.set_customer_id()
Set the customer ID for the current trace.
Parameters
Example Code
def handle_customer_request(customer_id, request):
tracer.set_customer_id(customer_id)
return process_request(request)
tracer.set_tags()
Set the tags for the current trace.
Parameters
Example Code
def experimental_feature(data):
tracer.set_tags(["experiment", "feature_v2", "production"])
return new_algorithm(data)
Advanced Features
@tracer.agent()
Class decorator for multi-agent systems that assigns a unique identifier to agent and enables tracking of their internal state variables. Essential for monitoring and debugging complex multi-agent workflows where multiple agents interact and you need to track each agent's behavior and state separately.
Parameters
identifier
str
Required
The identifier to associate with the decorated class. This will be used as the instance name in traces.
"user_agent"
track_state
bool
Default: False
Optional
Whether to automatically capture the state (attributes) of instances before and after function execution. Defaults to False
.
True
track_attributes
List[str]
Default: None
Optional
Optional list of specific attribute names to track. If None, all non-private attributes (not starting with '_') will be tracked when track_state=True.
["memory", "goals"]
field_mappings
Dict[str, str]
Default: None
Optional
Optional dictionary mapping internal attribute names to display names in the captured state.
For example: {"system_prompt": "instructions"}
will capture the instructions
attribute as system_prompt
in the state.
{"system_prompt": "instructions"}
Example Code
judgment = Tracer(project_name="default_project")
@judgment.agent(identifier="name", track_state=True)
class Agent(AgentTools, AgentBase):
"""An AI agent."""
def __init__(self):
self.name = name
self.function_map = {
"func": self.function,
...
}
@judgment.observe(span_type="function")
def process_request(self, user_request):
"""Process a user request using all available tools."""
pass
Current Span Access
tracer.get_current_span()
Returns the current span object for direct access to span properties and methods, useful for debugging and inspection.
Available Span Properties
The current span object provides these properties for inspection and debugging:
Property | Type | Description |
---|---|---|
span_id | str | Unique identifier for this span |
trace_id | str | ID of the parent trace |
function | str | Name of the function being traced |
span_type | str | Type of span ("span", "tool", "llm", "evaluation", "chain") |
inputs | dict | Input parameters for this span |
output | Any | Output/result of the span execution |
duration | float | Execution time in seconds |
depth | int | Nesting depth in the trace hierarchy |
parent_span_id | str | None | ID of the parent span (if nested) |
agent_name | str | None | Name of the agent executing this span |
has_evaluation | bool | Whether this span has evaluation runs |
evaluation_runs | List[EvaluationRun] | List of evaluations run on this span |
usage | TraceUsage | None | Token usage and cost information |
error | Dict[str, Any] | None | Error information if span failed |
state_before | dict | None | Agent state before execution |
state_after | dict | None | Agent state after execution |
Example Usage
@tracer.observe(span_type="tool")
def debug_tool(query):
span = tracer.get_current_span()
if span:
# Access span properties for debugging
print(f"🔧 Executing {span.function} (ID: {span.span_id})")
print(f"📊 Depth: {span.depth}, Type: {span.span_type}")
print(f"📥 Inputs: {span.inputs}")
# Check parent relationship
if span.parent_span_id:
print(f"👆 Parent span: {span.parent_span_id}")
# Monitor execution state
if span.agent_name:
print(f"🤖 Agent: {span.agent_name}")
result = perform_search(query)
# Check span after execution
if span:
print(f"📤 Output: {span.output}")
print(f"⏱️ Duration: {span.duration}s")
if span.has_evaluation:
print(f"✅ Has {len(span.evaluation_runs)} evaluations")
if span.error:
print(f"❌ Error: {span.error}")
return result
Getting Started
from judgeval import Tracer
# Initialize tracer
tracer = Tracer(
api_key="your_api_key",
project_name="default_project"
)
# Basic function tracing
@tracer.observe(span_type="agent")
def my_agent(query):
tracer.update_metadata({"user_query": query})
result = process_query(query)
tracer.log("Processing completed", label="info")
return result
# Auto-trace LLM calls
from openai import OpenAI
from judgeval import wrap
client = wrap(OpenAI())
response = client.chat.completions.create(...) # Automatically traced