PythonEvaluation
HostedEvaluatorRunner
_build_payload()
def _build_payload(eval_id, project_id, eval_run_name, created_at, examples, scorers) -> ExampleEvaluationRun:Parameters
eval_id
required:str
project_id
required:str
eval_run_name
required:str
created_at
required:str
examples
required:List[Example]
scorers
required:List[str]
Returns
ExampleEvaluationRun
_submit()
def _submit(console, project_id, eval_id, examples, scorers, payload, progress) -> int:Parameters
console
required:Console
project_id
required:str
eval_id
required:str
examples
required:List[Example]
scorers
required:List[str]
payload
required:ExampleEvaluationRun
progress
required:Progress
Returns
int
run()
Execute an evaluation run and return results.
def run(examples, scorers, eval_run_name, assert_test=False, timeout_seconds=300) -> typing.List:Parameters
examples
required:List[Example]
Examples to evaluate.
scorers
required:List[S]
Scorers to run (strings or Judge instances).
eval_run_name
required:str
Name for this evaluation run.
assert_test
:bool
When True, raises AssertionError if any scorer
fails its threshold.
Default:
False
timeout_seconds
:int
Maximum time to wait for results.
Default:
300
Returns
typing.List - A list of ScoringResult objects, one per example.