Judgeval Python SDK

PromptScorer

Evaluate agent behavior based on a rubric you define and iterate on the platform.

A PromptScorer is a powerful tool for evaluating your LLM system using use-case specific, natural language rubrics. Prompt Scorers make it easy to prototype your evaluation rubrics—you can easily set up new criteria and test them on a few examples in the scorer playground, then evaluate your agents' behavior in production with real customer usage.

from openai import OpenAI
from judgeval.scorers import PromptScorer
from judgeval.tracer import Tracer, wrap
from judgeval.data import Example

judgment = Tracer(project_name="default_project")
client = wrap(OpenAI())

# Create or retrieve a PromptScorer
scorer = PromptScorer.create(
    name="PositivityScorer",
    prompt="Is the response positive or negative? Response: {{actual_output}}",
    options={"positive": 1, "negative": 0}
)

class QAAgent:
    def __init__(self, client):
        self.client = client

    @judgment.observe(span_type="tool")
    def process_query(self, query):
        response = self.client.chat.completions.create(
            model="gpt-5",
            messages=[
                {"role": "system", "content": "You are a helpful assistant"},
                {"role": "user", "content": query}
            ]
        )
        return response.choices[0].message.content

    @judgment.agent()
    @judgment.observe(span_type="agent")
    def invoke_agent(self, query):
        result = self.process_query(query)
        judgment.async_evaluate(
            scorer=scorer,
            example=Example(input=query, actual_output=result),
            model="gpt-5"
        )
        return result

if __name__ == "__main__":
    agent = QAAgent(client)
    print(agent.invoke_agent("What is the capital of the United States?"))
from openai import OpenAI
from judgeval.scorers import TracePromptScorer
from judgeval.tracer import Tracer, wrap, TraceScorerConfig

# Initialize tracer
judgment = Tracer(
    project_name="default_project"
)

# Auto-trace LLM calls
client = wrap(OpenAI())

# Retrieve your TracePromptScorer, created either through the platform or with the "create" class method
scorer = TracePromptScorer.get(
    name="PositivityScorer"
)

class QAAgent:
    def __init__(self, client):
        self.client = client

    @judgment.observe(span_type="tool")
    def process_query(self, query):
        response = self.client.chat.completions.create(
            model="gpt-5",
            messages=[
                {"role": "system", "content": "You are a helpful assitant"},
                {"role": "user", "content": f"I have a query: {query}"}]
        )  # Automatically traced
        return f"Response: {response.choices[0].message.content}"

    # Basic function tracing
    @judgment.agent()
    @judgment.observe(span_type="agent", scorer_config=TraceScorerConfig(scorer=scorer))
    def invoke_agent(self, query):
        result = self.process_query(query)
        return result


if __name__ == "__main__":
    agent = QAAgent(client)
    print(agent.invoke_agent("What is the capital of the United States?"))

All PromptScorer methods automatically sync changes with the Judgment platform.


Static Method

PromptScorer.create() | TracePromptScorer.create()

Initialize a PromptScorer or TracePromptScorer object.

Parameters

namerequired:str
The name of the PromptScorer
promptrequired:str

The prompt used by the LLM judge to make an evaluation

options:dict

If specified, the LLM judge will pick from one of the choices, and the score will be the one corresponding to the choice

judgment_api_key:str

Recommended - set using the JUDGMENT_API_KEY environment variable

organization_id:str

Recommended - set using the JUDGMENT_ORG_ID environment variable

Returns

A PromptScorer instance

Example

create_prompt_scorer.py
from judgeval.scorers import PromptScorer

scorer = PromptScorer.create(
    name="Test Scorer",
    prompt="Is the response positive or negative? Response: {{actual_output}}",
    options={"positive" : 1, "negative" : 0}
)

Static Method

PromptScorer.get() | TracePromptScorer.get()

Retrieve a PromptScorer or TracePromptScorer object that had already been created for the organization.

Parameters

namerequired:str

The name of the PromptScorer you would like to retrieve

judgment_api_key:str

Recommended - set using the JUDGMENT_API_KEY environment variable

organization_id:str

Recommended - set using the JUDGMENT_ORG_ID environment variable

Returns

A PromptScorer instance

Example

get_prompt_scorer.py
from judgeval.scorers import PromptScorer

scorer = PromptScorer.get(
    name="Test Scorer"
)

append_to_prompt()

Add to the prompt for your PromptScorer

Parameters

prompt_additionrequired:str

This string will be added to the existing prompt for the scorer.

Returns

None

Example

append_to_prompt.py
from judgeval.scorers import PromptScorer

scorer = PromptScorer.get(
    name="Test Scorer"
)

scorer.append_to_prompt("Consider the overall tone, word choice, and emotional sentiment when making your determination.")

set_threshold()

Update the threshold for your PromptScorer

Parameters

thresholdrequired:float

The new threshold you would like the PromptScorer to use

Returns

None

Example

set_threshold.py
from judgeval.scorers import PromptScorer

scorer = PromptScorer.get(
    name="Test Scorer"
)

scorer.set_threshold(0.5)

set_prompt()

Update the prompt for your PromptScorer

Parameters

promptrequired:str

The new prompt you would like the PromptScorer to use

Returns

None

Example

set_prompt.py
from judgeval.scorers import PromptScorer

scorer = PromptScorer.get(
    name="Test Scorer"
)

scorer.set_prompt("Is the response helpful to the question? Question: {{input}}, response: {{actual_output}}")

set_options()

Update the options for your PromptScorer

Parameters

optionsrequired:dict

The new options you would like the PromptScorer to use

Returns

None

Example

set_options.py
from judgeval.scorers import PromptScorer

scorer = PromptScorer.get(
    name="Test Scorer"
)

scorer.set_options({"Yes" : 1, "No" : 0})

get_threshold()

Retrieve the threshold for your PromptScorer

Parameters

None

Returns

The threshold value for the PromptScorer (float)

Example

get_threshold.py
from judgeval.scorers import PromptScorer

scorer = PromptScorer.get(
    name="Test Scorer"
)

threshold = scorer.get_threshold()

get_prompt()

Retrieve the prompt for your PromptScorer

Parameters

None

Returns

The prompt string for the PromptScorer (str)

Example

get_prompt.py
from judgeval.scorers import PromptScorer

scorer = PromptScorer.get(
    name="Test Scorer"
)

prompt = scorer.get_prompt()

get_options()

Retrieve the options for your PromptScorer

Parameters

None

Returns

The options dictionary for the PromptScorer (dict)

Example

get_options.py
from judgeval.scorers import PromptScorer

scorer = PromptScorer.get(
    name="Test Scorer"
)

options = scorer.get_options()

get_name()

Retrieve the name for your PromptScorer

Parameters

None

Returns

The name of the PromptScorer (str)

Example

get_name.py
from judgeval.scorers import PromptScorer

scorer = PromptScorer.get(
    name="Test Scorer"
)

name = scorer.get_name()

get_config()

Retrieve the name, prompt, options, and threshold for your PromptScorer in a dictionary format

Parameters

None

Returns

Dictionary containing the name, prompt, options, and threshold for the PromptScorer (dict)

Example

get_config.py
from judgeval.scorers import PromptScorer

scorer = PromptScorer.get(
    name="Test Scorer"
)

config = scorer.get_config()