Experiments

How to run experiments

Basics

create_experiment_columns

 create_experiment_columns (project_id, experiment_id, columns,
                            create_experiment_column_func)

source

Project.create_experiment

 Project.create_experiment (name:str, model:Type[ragas_experimental.model.
                            pydantic_model.ExtendedPydanticBaseModel])

*Create a new experiment.

Args: name: Name of the experiment model: Model class defining the experiment structure

Returns: Experiment: An experiment object for managing results*

import os

RAGAS_APP_TOKEN = "api-key"
RAGAS_API_BASE_URL = "https://api.dev.app.ragas.io"

os.environ["RAGAS_APP_TOKEN"] = RAGAS_APP_TOKEN
os.environ["RAGAS_API_BASE_URL"] = RAGAS_API_BASE_URL

PROJECT_ID = "919a4d42-aaf2-45cd-badd-152249788bfa"
p = Project(project_id=PROJECT_ID)
p

Project(name='yann-lecun-wisdom')

class TestModel(BaseModel):
    name: str
    description: str
    price: float
    url: t.Annotated[str, rt.Url()] = "https://www.google.com"
    tags: t.Annotated[t.Literal["test", "test2"], rt.Select(colors=["red", "blue"])] = "test"

experiment_id = "5d7752ab-17bf-46bc-a302-afe04ce1a763"
exp = p.create_experiment(name="test-exp", model=TestModel)
#exp = p.create_dataset(name="just name and desc 2", model=TestModel)

exp

Experiment(name=test-exp, model=TestModel)

source

Project.get_experiment_by_id

 Project.get_experiment_by_id (experiment_id:str, model:Type[ragas_experim
                               ental.model.pydantic_model.ExtendedPydantic
                               BaseModel])

Get an existing experiment by ID.

exp.experiment_id

'effe0e10-916d-4530-b974-91d5115f5dc2'

p.get_experiment_by_id(exp.experiment_id, TestModel)

Experiment(name=test-exp, model=TestModel)

source

Project.get_experiment

 Project.get_experiment (experiment_name:str, model)

Get an existing dataset by name.

p.get_experiment("test-exp", TestModel)

Experiment(name=test-exp, model=TestModel)

Git Versioning for Experiments

source

find_git_root

 find_git_root (start_path:Union[str,pathlib.Path,NoneType]=None)

Find the root directory of a git repository by traversing up from the start path.

	Type	Default	Details
start_path	Union	None	starting path to search from
Returns	Path

find_git_root()

Path('/Users/jjmachan/workspace/eglabs/ragas_annotator')

git.Repo(find_git_root())

<git.repo.base.Repo '/Users/jjmachan/workspace/eglabs/ragas_annotator/.git'>

source

version_experiment

 version_experiment (experiment_name:str,
                     commit_message:Optional[str]=None,
                     repo_path:Union[str,pathlib.Path,NoneType]=None,
                     create_branch:bool=True, stage_all:bool=False)

Version control the current state of the codebase for an experiment.

source

cleanup_experiment_branches

 cleanup_experiment_branches (prefix:str='ragas/',
                              repo_path:Union[str,pathlib.Path,NoneType]=N
                              one, interactive:bool=True,
                              dry_run:bool=False)

Clean up git branches with the specified prefix.

cleanup_experiment_branches(dry_run=True)

No branches found with prefix 'ragas/'

[]

Experiment Wrapper

source

ExperimentProtocol

 ExperimentProtocol (*args, **kwargs)

*Base class for protocol classes.

Protocol classes are defined as::

class Proto(Protocol):
    def meth(self) -> int:
        ...

Such classes are primarily used with static type checkers that recognize structural subtyping (static duck-typing), for example::

class C:
    def meth(self) -> int:
        return 0

def func(x: Proto) -> int:
    return x.meth()

func(C())  # Passes static type check

See PEP 544 for details. Protocol classes decorated with @typing.runtime_checkable act as simple-minded runtime protocols that check only the presence of given attributes, ignoring their type signatures. Protocol classes can be generic, they are defined as::

class GenProto(Protocol[T]):
    def meth(self) -> T:
        ...*

source

Project.experiment

 Project.experiment (experiment_model, name_prefix:str='',
                     save_to_git:bool=True, stage_all:bool=True)

*Decorator for creating experiment functions without Langfuse integration.

Args: experiment_model: The NotionModel type to use for experiment results name_prefix: Optional prefix for experiment names

Returns: Decorator function that wraps experiment functions*

# create experimental test dataset
test_dataset = p.create_dataset(name="test dataset for experiment", model=TestModel)
test_dataset.append(TestModel(name="test item 1", description="test item 1 description", price=100))
test_dataset.append(TestModel(name="test item 2", description="test item 2 description", price=200))
test_dataset.append(TestModel(name="test item 3", description="test item 3 description", price=300))

# create experiment model
class TextExperimentModel(TestModel):
    response: str
    is_correct: t.Literal["yes", "no"]

# create a test experiment function
@p.experiment(TextExperimentModel, save_to_git=False, stage_all=True)
async def test_experiment(item: TestModel):
    return TextExperimentModel(**item.model_dump(), response="test response", is_correct="yes")

# run the experiment
await test_experiment.run_async(test_dataset)

Running experiment: 100%|██████████| 6/6 [00:01<00:00,  3.05it/s]

Experiment(name=xenodochial_dorsey, model=TextExperimentModel)

source

Project.langfuse_experiment

 Project.langfuse_experiment (experiment_model, name_prefix:str='',
                              save_to_git:bool=True, stage_all:bool=True)

*Decorator for creating experiment functions with Langfuse integration.

Args: experiment_model: The NotionModel type to use for experiment results name_prefix: Optional prefix for experiment names

Returns: Decorator function that wraps experiment functions with Langfuse observation*

import os
# import langfuse
from langfuse import Langfuse

langfuse = Langfuse(
  secret_key=os.getenv("LANGFUSE_SECRET_KEY"),
  public_key=os.getenv("LANGFUSE_PUBLIC_KEY"),
  host="https://us.cloud.langfuse.com"
)

@p.langfuse_experiment(TextExperimentModel)
async def test_experiment(item: TestModel):
    return TextExperimentModel(**item.model_dump(), response="test response", is_correct="yes")

await test_experiment(test_dataset[0])

TextExperimentModel(name='test item 1', description='test item 1 description', price=100.0, url='https://www.google.com', tags='test', response='test response', is_correct='yes')

await test_experiment.run_async(test_dataset)

Running experiment: 100%|██████████| 6/6 [00:01<00:00,  4.01it/s]

Experiment(name=cool_matsumoto, model=TextExperimentModel)

Compare and Plot

source

Project.mlflow_experiment

 Project.mlflow_experiment (experiment_model, name_prefix:str='',
                            save_to_git:bool=True, stage_all:bool=True)

*Decorator for creating experiment functions with mlflow integration.

Args: experiment_model: The NotionModel type to use for experiment results name_prefix: Optional prefix for experiment names

Returns: Decorator function that wraps experiment functions with mlflow observation*

source

Project.compare_and_plot

 Project.compare_and_plot (experiment_names:List[str], model:Type[ragas_ex
                           perimental.model.pydantic_model.ExtendedPydanti
                           cBaseModel], metric_names:List[str])

*Compare multiple experiments and generate a plot.

Args: experiment_names: List of experiment IDs to compare model: Model class defining the experiment structure*

from ragas_experimental import BaseModel

class TestDataset(BaseModel):
    question: str
    citations: list[str]
    grading_notes: str
    

class ExperimentModel(TestDataset):
    response: str
    score: str
    score_reason: str

p.compare_and_plot(
    experiment_names=["xenodochial_hoare","confident_liskov"],
    model=ExperimentModel,
    metric_names=["score"]
)

Fetching experiments: 100%|██████████| 2/2 [00:05<00:00,  2.60s/it]

Unable to display output for mime type(s): application/vnd.plotly.v1+json