DatasetFactory
Create, retrieve, and list datasets in your project.
Create, retrieve, and list datasets in your project.
Access this via client.datasets -- you don't instantiate it directly.
# Create a dataset with initial examples
dataset = client.datasets.create(
name="golden-set",
examples=[
Example.create(input="What is AI?", expected_output="Artificial Intelligence"),
],
)
# Retrieve an existing dataset
dataset = client.datasets.get(name="golden-set")
# List all datasets
for info in client.datasets.list():
print(info.name, info.entries)__init__()
def __init__(client, project_id, project_name):Parameters
client
required:JudgmentSyncClient
project_id
required:Optional[str]
project_name
required:str
get()
Fetch an existing dataset with all its examples loaded.
dataset = client.datasets.get(name="golden-set")
print(len(dataset)) # number of examplesdef get(name) -> typing.Optional:Parameters
name
required:str
The dataset name.
Returns
typing.Optional - A Dataset with examples populated, or None if the project
is not resolved.
create()
Create a new dataset, optionally with initial examples.
dataset = client.datasets.create(
name="qa-pairs",
examples=[
Example.create(input="What is 2+2?", expected_output="4"),
Example.create(input="Capital of France?", expected_output="Paris"),
],
)def create(name, examples=[], overwrite=False, batch_size=100) -> typing.Optional:Parameters
name
required:str
Name for the dataset (must be unique within the project
unless overwrite=True).
examples
:Iterable[Example]
Examples to upload immediately after creation.
[]
overwrite
:bool
Replace an existing dataset with the same name.
False
batch_size
:int
Examples per upload batch.
100
Returns
typing.Optional - The new Dataset, or None if the project is not resolved.
list()
List all datasets in the project.
for info in client.datasets.list():
print(f"{info.name}: {info.entries} examples")def list() -> typing.Optional:Returns
typing.Optional - A list of DatasetInfo summaries, or None if the project
is not resolved.