Numeric Metric

Base class for all numeric metrics

NumericMetric

 NumericMetric (name:str,
                prompt:str|ragas_experimental.prompt.base.Prompt,
                llm:ragas_experimental.llm.llm.RagasLLM,
                range:Tuple[float,float])

Example usage

from ragas_experimental.llm import ragas_llm
from openai import OpenAI

llm = ragas_llm(provider="openai",model="gpt-4o",client=OpenAI())


my_metric = NumericMetric(
    name='helpfulness',
    llm=llm,
    prompt="Evaluate if given answer is helpful\n\n{response}",
    range=(0,10),
)

result = my_metric.score(response="this is my response")
result #gives "low"
result.reason #gives reasoning from llm

"The provided input lacks context or content to determine if it is helpful as it merely states 'this is my response' without any additional information."

Write custom numeric metric

from ragas_experimental.metric import MetricResult

@numeric_metric(llm=llm,
    prompt="Evaluate if given answer is helpful\n\n{response}",
    name='new_metric',range=(0,10))
def my_metric(llm,prompt,**kwargs):

        class response_model(BaseModel):
             output: int
             reason: str
        
        traces = {}
        traces['input'] = kwargs
        response = llm.generate(prompt.format(**kwargs),response_model=response_model)
        traces['output'] = response.dict()
        total = response.output
        if total < 1:
            score = 0
        else:
            score = 10
        return MetricResult(result=score,reason=response.reason,traces=traces)

result = my_metric.score(response='my response') # result
result # 10
result.reason # the reason for the answer

result1 = my_metric.score(response='my response 1') # result
result2 = my_metric.score(response='my response 2') # result

result1 + result2 # should be addable and behave like a float