PythonDatasets

DatasetFactory

Create, retrieve, and list datasets in your project.

Create, retrieve, and list datasets in your project.

Access this via client.datasets -- you don't instantiate it directly.

# Create a dataset with initial examples
dataset = client.datasets.create(
    name="golden-set",
    examples=[
        Example.create(input="What is AI?", expected_output="Artificial Intelligence"),
    ],
)

# Retrieve an existing dataset
dataset = client.datasets.get(name="golden-set")

# List all datasets
for info in client.datasets.list():
    print(info.name, info.entries)

__init__()

def __init__(client, project_id, project_name):

Parameters

client

required

:

JudgmentSyncClient

project_id

required

:

Optional[str]

project_name

required

:

str


get()

Fetch an existing dataset with all its examples loaded.

dataset = client.datasets.get(name="golden-set")
print(len(dataset))  # number of examples
def get(name) -> typing.Optional:

Parameters

name

required

:

str

The dataset name.

Returns

typing.Optional - A Dataset with examples populated, or None if the project is not resolved.


create()

Create a new dataset, optionally with initial examples.

dataset = client.datasets.create(
    name="qa-pairs",
    examples=[
        Example.create(input="What is 2+2?", expected_output="4"),
        Example.create(input="Capital of France?", expected_output="Paris"),
    ],
)
def create(name, examples=[], overwrite=False, batch_size=100) -> typing.Optional:

Parameters

name

required

:

str

Name for the dataset (must be unique within the project unless overwrite=True).

examples

:

Iterable[Example]

Examples to upload immediately after creation.

Default:

[]

overwrite

:

bool

Replace an existing dataset with the same name.

Default:

False

batch_size

:

int

Examples per upload batch.

Default:

100

Returns

typing.Optional - The new Dataset, or None if the project is not resolved.


list()

List all datasets in the project.

for info in client.datasets.list():
    print(f"{info.name}: {info.entries} examples")
def list() -> typing.Optional:

Returns

typing.Optional - A list of DatasetInfo summaries, or None if the project is not resolved.