Judgeval Python-v1 SDK

Tracer

Track agent behavior and evaluate performance in real-time with the Tracer class.

The Tracer class provides comprehensive observability for AI agents and LLM applications. It automatically captures execution traces, spans, and performance metrics while enabling real-time evaluation and monitoring through the Judgment platform.

from judgeval import Judgeval
from openai import OpenAI

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

openai = OpenAI()
openai = tracer.wrap(openai)

class QAAgent:
    def __init__(self, openai_client):
        self.client = openai_client

    @tracer.observe(span_type="tool")
    def process_query(self, query):
        response = self.client.chat.completions.create(
            model="gpt-5.2",
            messages=[
                {"role": "system", "content": "You are a helpful assistant"},
                {"role": "user", "content": f"I have a query: {query}"}]
        )
        return f"Response: {response.choices[0].message.content}"

    @tracer.agent()
    @tracer.observe(span_type="agent")
    def invoke_agent(self, query):
        result = self.process_query(query)
        return result


if __name__ == "__main__":
    agent = QAAgent(openai)
    print(agent.invoke_agent("What is the capital of the United States?"))

client.tracer.create()

Create a Tracer instance for monitoring and evaluation.

tracer = client.tracer.create(
    enable_evaluation: bool = True,
    enable_monitoring: bool = True,
    serializer: Callable[[Any], str] = safe_serialize,
    filter_tracer: Optional[FilterTracerCallback] = None,
    isolated: bool = False
)

Parameters

enable_evaluation

:bool
Toggle evaluations for async_evaluate()
Default: True

enable_monitoring

:bool
Toggle monitoring
Default: True

serializer

:Callable[[Any], str]

Custom serializer function for converting objects to strings.

Default: safe_serialize

filter_tracer

:Optional[FilterTracerCallback]

Callback to filter which instrumentations should be traced.

Default: None

isolated

:bool

Run tracer in isolated mode (does not set global tracer provider).

Default: False

resource_attributes

:Optional[Dict[str, Any]]

Custom resource attributes to attach to the tracer.

Default: None

initialize

:bool

Whether to initialize the tracer immediately on creation.

Default: True

use_default_span_processor

:bool

Whether to use the default span processor for monitoring. When enabled along with enable_monitoring, automatically adds the JudgmentSpanProcessor.

Default: True

Example

tracer.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

@tracer.observe()

Decorate a function to record an observation or output during a trace. This is useful for capturing intermediate steps, tool results, or decisions made by the agent.

Parameters

funcrequired

:Callable

The function to decorate (automatically provided when used as decorator)

span_name

:str

Optional custom name for the span (defaults to function name)

Default: None

span_type

:str

Type of span to create. Available options:

  • "span": General span (default)
  • "tool": For functions that should be tracked and exported as agent tools
  • "function": For main functions or entry points
  • "llm": For language model calls (automatically applied to wrapped clients)

LLM clients wrapped using tracer.wrap() automatically use the "llm" span type without needing manual decoration.

Default: "span"

Examples

from judgeval import Judgeval

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

@tracer.observe(span_type="function") 
def answer_question(question: str) -> str:
  answer = "The capital of the United States is Washington, D.C."
  return answer

@tracer.observe(span_type="tool") 
def process_request(question: str) -> str:
  answer = answer_question(question)
  return answer

if __name__ == "__main__":
  print(process_request("What is the capital of the United States?"))
from openai import OpenAI
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

openai = OpenAI()
openai = tracer.wrap(openai)

@tracer.observe(span_type="tool") 
def search_web(query):
  return f"Results for: {query}"

@tracer.observe(span_type="retriever") 
def get_database(query):
  return f"Database results for: {query}"

@tracer.observe(span_type="function") 
def run_agent(user_query):
  # Use tools based on query
  if "database" in user_query:
    info = get_database(user_query)
  else:
    info = search_web(user_query)

  prompt = f"Context: {info}, Question: {user_query}"

  # Generate response
  response = openai.chat.completions.create(
    model="gpt-5.2",
    messages=[{"role": "user", "content": prompt}]
  )
  return response.choices[0].message.content

tracer.wrap()

Wraps an API client to add tracing capabilities. Supports OpenAI, Together, Anthropic, and Google GenAI clients. Patches methods like .create, Anthropic's .stream, and OpenAI's .responses.create and .beta.chat.completions.parse methods using a wrapper class.

Parameters

clientrequired

:Any

API client to wrap (OpenAI, Anthropic, Together, Google GenAI, Groq)

Example

wrapped_api_client.py
from openai import OpenAI
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

openai = OpenAI()
openai = tracer.wrap(openai)

# All API calls are now automatically traced
response = openai.chat.completions.create(
    model="gpt-5.2",
    messages=[{"role": "user", "content": "Hello"}]
)

# Streaming calls are also traced
stream = openai.chat.completions.create(
    model="gpt-5.2",
    messages=[{"role": "user", "content": "Hello"}],
    stream=True
)

tracer.async_evaluate()

Runs quality evaluations on the current trace/span using specified scorers. You can provide either an Example object or individual evaluation parameters (input, actual_output, etc.).

Parameters

scorerrequired

:BaseScorer

A evaluation scorer to run. Access built-in scorers via client.scorers.built_in.

Example: client.scorers.built_in.faithfulness()

examplerequired

:Example

Example object containing evaluation data. Create using Example.create().

Example

async_evaluate.py
from judgeval import Judgeval
from judgeval.v1.data.example import Example

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

@tracer.observe(span_type="function")
def agent(question: str) -> str:
    answer = "Paris is the capital of France"

    # Create example object
    example = Example.create(
        input=question,
        actual_output=answer,
    )

    # Evaluate using Example
    tracer.async_evaluate(
        scorer=client.scorers.built_in.answer_relevancy(threshold=0.5),
        example=example,
    )

    return answer

if __name__ == "__main__":
    print(agent("What is the capital of France?"))

tracer.async_trace_evaluate()

Runs quality evaluations on the current trace/span using trace-level scorers. This evaluates the entire trace tree rooted at the current span.

Parameters

scorerrequired

:BaseScorer

A trace-level scorer to run. Access trace prompt scorers via client.scorers.trace_prompt_scorer.

Example

async_trace_evaluate.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

# Get a trace scorer
trace_scorer = client.scorers.trace_prompt_scorer.get(name="sample_trace_scorer")

@tracer.observe(span_type="function")
def agent(question: str) -> str:
    answer = "Paris is the capital of France"

    # Evaluate the entire trace
    tracer.async_trace_evaluate(scorer=trace_scorer)

    return answer

if __name__ == "__main__":
    print(agent("What is the capital of France?"))

tracer.set_session_id()

You can specify a span and its children spans to be associated with a session_id, which enables further insights through our session view and our Behaviors dashboards. Note that in order for the trace to be associated with a session_id, you need to set this on the root span of the trace for it to show up on the monitoring tables.

Parameters

session_idrequired

:str
The session ID to set

Example

set_session_id.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

@tracer.observe(span_type="function")
def process_request(session_id: str, query: str) -> str:
    tracer.set_session_id(session_id)
    # ... process the request
    return "Response"

tracer.set_customer_id()

You can specify a span and its children spans to be associated with a customer_id, which enables further insights through our usage dashboards. Note that in order for the trace to be associated with a customer_id, you need to set this on the root span of the trace for it to show up on the monitoring tables.

Parameters

customer_idrequired

:str
The customer ID to set

Example

customer_id.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

@tracer.observe(span_type="function")
def process_request(user_id: str, query: str) -> str:
    tracer.set_customer_id(user_id)
    # ... process the request
    return "Response"

tracer.override_project()

Override the project for the current trace. Use this when you want a single tracer to send spans to different projects depending on runtime context (e.g. tenant, environment, or feature flag). The override only applies to the current root span and its children.

Constraints:

  • Must be called within an active span context; otherwise the call is ignored.
  • Must be called on the root span of the trace (e.g. in the same function that starts the trace). Calling it on a child span has no effect and is ignored.

Parameters

project_namerequired

:str

The name of the project to send this trace to. Must resolve to a valid project in your organization.

Example

override_project.py
import os
from judgeval import Judgeval

env = os.getenv("ENVIRONMENT", "staging")  # "staging" or "production"

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

@tracer.observe(span_type="function")
def handle_request(tenant: str, query: str) -> str:
    # Send this trace to an environment-specific project
    tracer.override_project(f"{env} - tenant_{tenant}")
    return "Response"

tracer.set_attribute()

Set custom attributes on the current span.

Parameters

keyrequired

:str
The attribute key

valuerequired

:Any
The attribute value (will be serialized if not a primitive)

Example

set_attribute.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

@tracer.observe(span_type="function")
def process_request(query: str) -> str:
    tracer.set_attribute("query_length", len(query))
    tracer.set_attribute("query_type", "search")
    # ... process the request
    return "Response"

tracer.set_input() / tracer.set_output()

Manually set input and output data on the current span.

Example

set_input_output.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

@tracer.observe(span_type="function")
def process_request(query: str) -> str:
    tracer.set_input({"query": query, "timestamp": "2024-01-01"})
    
    result = "Processed response"
    
    tracer.set_output({"response": result, "tokens": 100})
    return result

tracer.span()

Context manager for creating manual spans.

Example

manual_span.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

def process_request(query: str) -> str:
    with tracer.span("custom_operation") as span:
        # Custom logic here
        result = perform_operation(query)
        span.set_attribute("custom_key", "custom_value")
    return result

tracer.shutdown()

Shutdown the tracer and flush all pending spans.

Parameters

timeout_millis

:int
Timeout in milliseconds for shutdown
Default: 30000

Example

shutdown.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

# ... your application logic ...

# Shutdown when done
tracer.shutdown()

tracer.force_flush()

Force flush all pending spans without shutting down.

Parameters

timeout_millis

:int
Timeout in milliseconds for flush
Default: 30000

Example

force_flush.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

# ... your application logic ...

# Force flush without shutdown
tracer.force_flush()