PromptScorer
Evaluate agent behavior based on a rubric you define and iterate on the platform.
A PromptScorer is a powerful tool for evaluating your LLM system using use-case specific, natural language rubrics. Prompt Scorers make it easy to prototype your evaluation rubrics—you can easily set up new criteria and test them on a few examples in the scorer playground, then evaluate your agents' behavior in production with real customer usage.
from openai import OpenAI
from judgeval.scorers import PromptScorer
from judgeval.tracer import Tracer, wrap
from judgeval.data import Example
judgment = Tracer(project_name="default_project")
client = wrap(OpenAI())
# Create or retrieve a PromptScorer
scorer = PromptScorer.create(
name="PositivityScorer",
prompt="Is the response positive or negative? Response: {{actual_output}}",
options={"positive": 1, "negative": 0}
)
class QAAgent:
def __init__(self, client):
self.client = client
@judgment.observe(span_type="tool")
def process_query(self, query):
response = self.client.chat.completions.create(
model="gpt-5",
messages=[
{"role": "system", "content": "You are a helpful assistant"},
{"role": "user", "content": query}
]
)
return response.choices[0].message.content
@judgment.agent()
@judgment.observe(span_type="agent")
def invoke_agent(self, query):
result = self.process_query(query)
judgment.async_evaluate(
scorer=scorer,
example=Example(input=query, actual_output=result),
model="gpt-5"
)
return result
if __name__ == "__main__":
agent = QAAgent(client)
print(agent.invoke_agent("What is the capital of the United States?"))from openai import OpenAI
from judgeval.scorers import TracePromptScorer
from judgeval.tracer import Tracer, wrap, TraceScorerConfig
# Initialize tracer
judgment = Tracer(
project_name="default_project"
)
# Auto-trace LLM calls
client = wrap(OpenAI())
# Retrieve your TracePromptScorer, created either through the platform or with the "create" class method
scorer = TracePromptScorer.get(
name="PositivityScorer"
)
class QAAgent:
def __init__(self, client):
self.client = client
@judgment.observe(span_type="tool")
def process_query(self, query):
response = self.client.chat.completions.create(
model="gpt-5",
messages=[
{"role": "system", "content": "You are a helpful assitant"},
{"role": "user", "content": f"I have a query: {query}"}]
) # Automatically traced
return f"Response: {response.choices[0].message.content}"
# Basic function tracing
@judgment.agent()
@judgment.observe(span_type="agent", scorer_config=TraceScorerConfig(scorer=scorer))
def invoke_agent(self, query):
result = self.process_query(query)
return result
if __name__ == "__main__":
agent = QAAgent(client)
print(agent.invoke_agent("What is the capital of the United States?"))Static Method
PromptScorer.create() | TracePromptScorer.create()
Initialize a PromptScorer or TracePromptScorer object.
Parameters
The prompt used by the LLM judge to make an evaluation
If specified, the LLM judge will pick from one of the choices, and the score will be the one corresponding to the choice
Recommended - set using the JUDGMENT_API_KEY environment variable
Recommended - set using the JUDGMENT_ORG_ID environment variable
Returns
A PromptScorer instance
Example
from judgeval.scorers import PromptScorer
scorer = PromptScorer.create(
name="Test Scorer",
prompt="Is the response positive or negative? Response: {{actual_output}}",
options={"positive" : 1, "negative" : 0}
)Static Method
PromptScorer.get() | TracePromptScorer.get()
Retrieve a PromptScorer or TracePromptScorer object that had already been created for the organization.
Parameters
The name of the PromptScorer you would like to retrieve
Recommended - set using the JUDGMENT_API_KEY environment variable
Recommended - set using the JUDGMENT_ORG_ID environment variable
Returns
A PromptScorer instance
Example
from judgeval.scorers import PromptScorer
scorer = PromptScorer.get(
name="Test Scorer"
)append_to_prompt()
Add to the prompt for your PromptScorer
Parameters
This string will be added to the existing prompt for the scorer.
Returns
None
Example
from judgeval.scorers import PromptScorer
scorer = PromptScorer.get(
name="Test Scorer"
)
scorer.append_to_prompt("Consider the overall tone, word choice, and emotional sentiment when making your determination.")set_threshold()
Update the threshold for your PromptScorer
Parameters
The new threshold you would like the PromptScorer to use
Returns
None
Example
from judgeval.scorers import PromptScorer
scorer = PromptScorer.get(
name="Test Scorer"
)
scorer.set_threshold(0.5)set_prompt()
Update the prompt for your PromptScorer
Parameters
The new prompt you would like the PromptScorer to use
Returns
None
Example
from judgeval.scorers import PromptScorer
scorer = PromptScorer.get(
name="Test Scorer"
)
scorer.set_prompt("Is the response helpful to the question? Question: {{input}}, response: {{actual_output}}")set_options()
Update the options for your PromptScorer
Parameters
The new options you would like the PromptScorer to use
Returns
None
Example
from judgeval.scorers import PromptScorer
scorer = PromptScorer.get(
name="Test Scorer"
)
scorer.set_options({"Yes" : 1, "No" : 0})get_threshold()
Retrieve the threshold for your PromptScorer
Parameters
None
Returns
The threshold value for the PromptScorer (float)
Example
from judgeval.scorers import PromptScorer
scorer = PromptScorer.get(
name="Test Scorer"
)
threshold = scorer.get_threshold()get_prompt()
Retrieve the prompt for your PromptScorer
Parameters
None
Returns
The prompt string for the PromptScorer (str)
Example
from judgeval.scorers import PromptScorer
scorer = PromptScorer.get(
name="Test Scorer"
)
prompt = scorer.get_prompt()get_options()
Retrieve the options for your PromptScorer
Parameters
None
Returns
The options dictionary for the PromptScorer (dict)
Example
from judgeval.scorers import PromptScorer
scorer = PromptScorer.get(
name="Test Scorer"
)
options = scorer.get_options()get_name()
Retrieve the name for your PromptScorer
Parameters
None
Returns
The name of the PromptScorer (str)
Example
from judgeval.scorers import PromptScorer
scorer = PromptScorer.get(
name="Test Scorer"
)
name = scorer.get_name()get_config()
Retrieve the name, prompt, options, and threshold for your PromptScorer in a dictionary format
Parameters
None
Returns
Dictionary containing the name, prompt, options, and threshold for the PromptScorer (dict)
Example
from judgeval.scorers import PromptScorer
scorer = PromptScorer.get(
name="Test Scorer"
)
config = scorer.get_config()