PromptScorer
Evaluate agent behavior based on a rubric you define and iterate on the platform.
A PromptScorer is a powerful tool for evaluating your LLM system using use-case specific, natural language rubrics. Prompt Scorers make it easy to prototype your evaluation rubrics—you can easily set up new criteria and test them on a few examples in the scorer playground, then evaluate your agents' behavior in production with real customer usage.
from openai import OpenAI
from judgeval import Judgeval
from judgeval.v1.data.example import Example
client = Judgeval(project_name="default_project")
tracer = client.tracer.create()
openai = OpenAI()
openai = tracer.wrap(openai)
# Retrieve a PromptScorer created on the platform
scorer = client.scorers.prompt_scorer.get(name="PositivityScorer")
class QAAgent:
def __init__(self, openai_client):
self.client = openai_client
@tracer.observe(span_type="tool")
def process_query(self, query):
response = self.client.chat.completions.create(
model="gpt-5.2",
messages=[
{"role": "system", "content": "You are a helpful assistant"},
{"role": "user", "content": query}
]
)
return response.choices[0].message.content
@tracer.agent()
@tracer.observe(span_type="agent")
def invoke_agent(self, query):
result = self.process_query(query)
tracer.async_evaluate(
scorer=scorer,
example=Example.create(input=query, actual_output=result),
)
return result
if __name__ == "__main__":
agent = QAAgent(openai)
print(agent.invoke_agent("What is the capital of the United States?"))from openai import OpenAI
from judgeval import Judgeval
client = Judgeval(project_name="default_project")
tracer = client.tracer.create()
openai = OpenAI()
openai = tracer.wrap(openai)
# Retrieve a TracePromptScorer created on the platform
scorer = client.scorers.trace_prompt_scorer.get(name="PositivityScorer")
class QAAgent:
def __init__(self, openai_client):
self.client = openai_client
@tracer.observe(span_type="tool")
def process_query(self, query):
response = self.client.chat.completions.create(
model="gpt-5.2",
messages=[
{"role": "system", "content": "You are a helpful assistant"},
{"role": "user", "content": f"I have a query: {query}"}]
) # Automatically traced
return f"Response: {response.choices[0].message.content}"
@tracer.agent()
@tracer.observe(span_type="agent")
def invoke_agent(self, query):
result = self.process_query(query)
# Evaluate the entire trace rooted at this span
tracer.async_trace_evaluate(scorer=scorer)
return result
if __name__ == "__main__":
agent = QAAgent(openai)
print(agent.invoke_agent("What is the capital of the United States?"))client.scorers.prompt_scorer.get()
Retrieve a PromptScorer that was created on the Judgment platform.
Parameters
namerequired
:strThe name of the PromptScorer you would like to retrieve
Returns
A PromptScorer instance, or None if not found.
Example
from judgeval import Judgeval
client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")client.scorers.trace_prompt_scorer.get()
Retrieve a TracePromptScorer that was created on the Judgment platform. Trace prompt scorers evaluate entire traces rather than individual examples.
Parameters
namerequired
:strThe name of the TracePromptScorer you would like to retrieve
Returns
A PromptScorer instance configured for trace evaluation, or None if not found.
Example
from judgeval import Judgeval
client = Judgeval(project_name="default_project")
scorer = client.scorers.trace_prompt_scorer.get(name="Test Trace Scorer")set_threshold()
Update the threshold for your PromptScorer.
Parameters
thresholdrequired
:floatThe new threshold you would like the PromptScorer to use (must be between 0 and 1)
Returns
None
Example
from judgeval import Judgeval
client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")
scorer.set_threshold(0.5)set_prompt()
Update the prompt for your PromptScorer.
Parameters
promptrequired
:strThe new prompt you would like the PromptScorer to use
Returns
None
Example
from judgeval import Judgeval
client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")
scorer.set_prompt("Is the response helpful to the question? Question: {{input}}, response: {{actual_output}}")set_options()
Update the options for your PromptScorer.
Parameters
optionsrequired
:dictThe new options you would like the PromptScorer to use
Returns
None
Example
from judgeval import Judgeval
client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")
scorer.set_options({"Yes": 1, "No": 0})set_description()
Update the description for your PromptScorer.
Parameters
descriptionrequired
:strThe new description for the PromptScorer
Returns
None
Example
from judgeval import Judgeval
client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")
scorer.set_description("Evaluates the positivity of responses")get_threshold()
Retrieve the threshold for your PromptScorer.
Parameters
None
Returns
The threshold value for the PromptScorer (float)
Example
from judgeval import Judgeval
client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")
threshold = scorer.get_threshold()get_prompt()
Retrieve the prompt for your PromptScorer.
Parameters
None
Returns
The prompt string for the PromptScorer (str)
Example
from judgeval import Judgeval
client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")
prompt = scorer.get_prompt()get_options()
Retrieve the options for your PromptScorer.
Parameters
None
Returns
The options dictionary for the PromptScorer (dict or None)
Example
from judgeval import Judgeval
client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")
options = scorer.get_options()get_description()
Retrieve the description for your PromptScorer.
Parameters
None
Returns
The description of the PromptScorer (str or None)
Example
from judgeval import Judgeval
client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")
description = scorer.get_description()get_name()
Retrieve the name for your PromptScorer.
Parameters
None
Returns
The name of the PromptScorer (str)
Example
from judgeval import Judgeval
client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")
name = scorer.get_name()get_scorer_config()
Retrieve the full configuration for your PromptScorer as a ScorerConfig object.
Parameters
None
Returns
A ScorerConfig object containing the scorer's configuration
Example
from judgeval import Judgeval
client = Judgeval(project_name="default_project")
scorer = client.scorers.prompt_scorer.get(name="Test Scorer")
config = scorer.get_scorer_config()Using with async_evaluate()
Use a PromptScorer with tracer.async_evaluate() for example-level evaluation:
from judgeval import Judgeval
from judgeval.v1.data.example import Example
client = Judgeval(project_name="default_project")
tracer = client.tracer.create()
scorer = client.scorers.prompt_scorer.get(name="PositivityScorer")
@tracer.observe(span_type="function")
def agent(question: str) -> str:
answer = "Paris is the capital of France"
tracer.async_evaluate(
scorer=scorer,
example=Example.create(
input=question,
actual_output=answer,
),
)
return answer
if __name__ == "__main__":
print(agent("What is the capital of France?"))Using with async_trace_evaluate()
Use a TracePromptScorer with tracer.async_trace_evaluate() for trace-level evaluation:
from judgeval import Judgeval
client = Judgeval(project_name="default_project")
tracer = client.tracer.create()
scorer = client.scorers.trace_prompt_scorer.get(name="TraceQualityScorer")
@tracer.observe(span_type="function")
def agent(question: str) -> str:
answer = "Paris is the capital of France"
# Evaluates the entire trace tree rooted at this span
tracer.async_trace_evaluate(scorer=scorer)
return answer
if __name__ == "__main__":
print(agent("What is the capital of France?"))