The JudgmentClient is your primary interface for interacting with the Judgment platform. It provides methods for running evaluations, managing datasets, handling traces, and more.
Runs evaluations as unit tests, raising an exception if the score falls below the defined threshold.
Example:
"True"
Example Code
resolution.py
from judgeval import JudgmentClientfrom judgeval.data import Examplefrom judgeval.scorers.example_scorer import ExampleScorerclient = JudgmentClient()class CustomerRequest(Example): request: str response: strclass ResolutionScorer(ExampleScorer): name: str = "Resolution Scorer" async def a_score_example(self, example: CustomerRequest): # Replace this logic with your own scoring logic if "package" in example.response: self.reason = "The response contains the word 'package'" return 1 else: self.reason = "The response does not contain the word 'package'" return 0example = CustomerRequest(request="Where is my package?", response="Your package will arrive tomorrow at 10:00 AM.")res = client.run_evaluation( examples=[example], scorers=[ResolutionScorer()], project_name="default_project",)# Example with a failing test using assert_test=True# This will raise an error because the response does not contain the word "package"try: example = CustomerRequest(request="Where is my package?", response="Empty response.") client.run_evaluation( examples=[example], scorers=[ResolutionScorer()], project_name="default_project", assert_test=True, # This will raise an error if any test fails )except Exception as e: print(f"Test assertion failed: {e}")
Returns
A list of ScoringResult objects. See Return Types for detailed structure.
Each ScorerData object within scorers_data contains the results from an individual scorer:
Attribute
Type
Description
name
str
Name of the scorer
threshold
float
Threshold used for pass/fail determination
success
bool
Whether this scorer passed its threshold
score
Optional[float]
Numerical score from the scorer
reason
Optional[str]
Explanation for the score/decision
evaluation_model
Optional[Union[List[str], str]]
Model(s) used for evaluation
error
Optional[str]
Error message if scoring failed
accessing_results.py
# Example of accessing ScoringResult dataresults = client.run_evaluation(examples, scorers)for result in results: print(f"Overall success: {result.success}") print(f"Example input: {result.data_object.input}") for scorer_data in result.scorers_data: print(f"Scorer '{scorer_data.name}': {scorer_data.score} (threshold: {scorer_data.threshold})") if scorer_data.reason: print(f"Reason: {scorer_data.reason}")
from judgeval import JudgmentClientfrom judgeval.data import Examplefrom judgeval.scorers.example_scorer import ExampleScorerfrom judgeval.exceptions import JudgmentAPIErrorclient = JudgmentClient()class CustomerRequest(Example): request: str response: strexample = CustomerRequest(request="Where is my package?", response="Your package will arrive tomorrow at 10:00 AM.")class ResolutionScorer(ExampleScorer): name: str = "Resolution Scorer" async def a_score_example(self, example: CustomerRequest): # Replace this logic with your own scoring logic if "package" in example.response: self.reason = "The response contains the word 'package'" return 1 else: self.reason = "The response does not contain the word 'package'" return 0try: res = client.run_evaluation( examples=[example], scorers=[ResolutionScorer()], project_name="default_project", )except JudgmentAPIError as e: print(f"API Error: {e}")except ValueError as e: print(f"Invalid parameters: {e}")except FileNotFoundError as e: print(f"File not found: {e}")