Judgeval Python-v1 SDK

PromptScorer

Evaluate agent behavior based on a rubric you define and iterate on the platform.

A PromptScorer is a powerful tool for evaluating your LLM system using use-case specific, natural language rubrics. Prompt Scorers make it easy to prototype your evaluation rubrics—you can easily set up new criteria and test them on a few examples in the scorer playground, then evaluate your agents' behavior in production with real customer usage.

from openai import OpenAI
from judgeval import Judgeval
from judgeval.v1.data.example import Example

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

openai = OpenAI()
openai = tracer.wrap(openai)

# Retrieve a PromptScorer created on the platform
scorer = client.scorers.prompt_scorer.get(name="PositivityScorer")

class QAAgent:
    def __init__(self, openai_client):
        self.client = openai_client

    @tracer.observe(span_type="tool")
    def process_query(self, query):
        response = self.client.chat.completions.create(
            model="gpt-5.2",
            messages=[
                {"role": "system", "content": "You are a helpful assistant"},
                {"role": "user", "content": query}
            ]
        )
        return response.choices[0].message.content

    @tracer.agent()
    @tracer.observe(span_type="agent")
    def invoke_agent(self, query):
        result = self.process_query(query)
        tracer.async_evaluate(
            scorer=scorer,
            example=Example.create(input=query, actual_output=result),
        )
        return result

if __name__ == "__main__":
    agent = QAAgent(openai)
    print(agent.invoke_agent("What is the capital of the United States?"))
from openai import OpenAI
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

openai = OpenAI()
openai = tracer.wrap(openai)

# Retrieve a TracePromptScorer created on the platform
scorer = client.scorers.trace_prompt_scorer.get(name="PositivityScorer")

class QAAgent:
    def __init__(self, openai_client):
        self.client = openai_client

    @tracer.observe(span_type="tool")
    def process_query(self, query):
        response = self.client.chat.completions.create(
            model="gpt-5.2",
            messages=[
                {"role": "system", "content": "You are a helpful assistant"},
                {"role": "user", "content": f"I have a query: {query}"}]
        )  # Automatically traced
        return f"Response: {response.choices[0].message.content}"

    @tracer.agent()
    @tracer.observe(span_type="agent")
    def invoke_agent(self, query):
        result = self.process_query(query)
        # Evaluate the entire trace rooted at this span
        tracer.async_trace_evaluate(scorer=scorer)
        return result


if __name__ == "__main__":
    agent = QAAgent(openai)
    print(agent.invoke_agent("What is the capital of the United States?"))

PromptScorers are created and managed on the Judgment platform. Use client.scorers.prompt_scorer.get() or client.scorers.trace_prompt_scorer.get() to retrieve them in your code.


client.scorers.prompt_scorer.get()

Retrieve a PromptScorer that was created on the Judgment platform.

Parameters

namerequired

:str

The name of the PromptScorer you would like to retrieve

Returns

A PromptScorer instance, or None if not found.

Example

get_prompt_scorer.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")

client.scorers.trace_prompt_scorer.get()

Retrieve a TracePromptScorer that was created on the Judgment platform. Trace prompt scorers evaluate entire traces rather than individual examples.

Parameters

namerequired

:str

The name of the TracePromptScorer you would like to retrieve

Returns

A PromptScorer instance configured for trace evaluation, or None if not found.

Example

get_trace_prompt_scorer.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
scorer = client.scorers.trace_prompt_scorer.get(name="Test Trace Scorer")

set_threshold()

Update the threshold for your PromptScorer.

Parameters

thresholdrequired

:float

The new threshold you would like the PromptScorer to use (must be between 0 and 1)

Returns

None

Example

set_threshold.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")

scorer.set_threshold(0.5)

set_prompt()

Update the prompt for your PromptScorer.

Parameters

promptrequired

:str

The new prompt you would like the PromptScorer to use

Returns

None

Example

set_prompt.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")

scorer.set_prompt("Is the response helpful to the question? Question: {{input}}, response: {{actual_output}}")

set_options()

Update the options for your PromptScorer.

Parameters

optionsrequired

:dict

The new options you would like the PromptScorer to use

Returns

None

Example

set_options.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")

scorer.set_options({"Yes": 1, "No": 0})

set_description()

Update the description for your PromptScorer.

Parameters

descriptionrequired

:str

The new description for the PromptScorer

Returns

None

Example

set_description.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")

scorer.set_description("Evaluates the positivity of responses")

get_threshold()

Retrieve the threshold for your PromptScorer.

Parameters

None

Returns

The threshold value for the PromptScorer (float)

Example

get_threshold.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")

threshold = scorer.get_threshold()

get_prompt()

Retrieve the prompt for your PromptScorer.

Parameters

None

Returns

The prompt string for the PromptScorer (str)

Example

get_prompt.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")

prompt = scorer.get_prompt()

get_options()

Retrieve the options for your PromptScorer.

Parameters

None

Returns

The options dictionary for the PromptScorer (dict or None)

Example

get_options.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")

options = scorer.get_options()

get_description()

Retrieve the description for your PromptScorer.

Parameters

None

Returns

The description of the PromptScorer (str or None)

Example

get_description.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")

description = scorer.get_description()

get_name()

Retrieve the name for your PromptScorer.

Parameters

None

Returns

The name of the PromptScorer (str)

Example

get_name.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")

name = scorer.get_name()

get_scorer_config()

Retrieve the full configuration for your PromptScorer as a ScorerConfig object.

Parameters

None

Returns

A ScorerConfig object containing the scorer's configuration

Example

get_scorer_config.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")

config = scorer.get_scorer_config()

Using with async_evaluate()

Use a PromptScorer with tracer.async_evaluate() for example-level evaluation:

async_evaluate_with_prompt_scorer.py
from judgeval import Judgeval
from judgeval.v1.data.example import Example

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

scorer = client.scorers.prompt_scorer.get(name="PositivityScorer")

@tracer.observe(span_type="function")
def agent(question: str) -> str:
    answer = "Paris is the capital of France"

    tracer.async_evaluate(
        scorer=scorer,
        example=Example.create(
            input=question,
            actual_output=answer,
        ),
    )

    return answer

if __name__ == "__main__":
    print(agent("What is the capital of France?"))

Using with async_trace_evaluate()

Use a TracePromptScorer with tracer.async_trace_evaluate() for trace-level evaluation:

async_trace_evaluate_with_prompt_scorer.py
from judgeval import Judgeval

client = Judgeval(project_name="default_project")
tracer = client.tracer.create()

scorer = client.scorers.trace_prompt_scorer.get(name="TraceQualityScorer")

@tracer.observe(span_type="function")
def agent(question: str) -> str:
    answer = "Paris is the capital of France"

    # Evaluates the entire trace tree rooted at this span
    tracer.async_trace_evaluate(scorer=scorer)

    return answer

if __name__ == "__main__":
    print(agent("What is the capital of France?"))