from ragas_experimental.llm import ragas_llmfrom openai import OpenAIllm = ragas_llm(provider="openai",model="gpt-4o",client=OpenAI())my_metric = DiscreteMetric( llm=llm, name='helpfulness', prompt="Evaluate if given answer is helpful\n\n{response}", values=["low","med","high"],)result = my_metric.score(response="this is my response")print(result) #gives "low"print(result.reason) #gives reasoning from llm
low
The response is incomplete and lacks any specific information. It cannot be evaluated for helpfulness without further context or content.
Write custom discrete metric
from ragas_experimental.metric.result import MetricResult@discrete_metric( llm=llm, prompt="Evaluate if given answer is helpful\n\n{response}", name='new_metric', values=["low","med","high"])def my_metric(llm,prompt,**kwargs):class response_model(BaseModel): output: t.List[bool] reason: str traces = {} traces['input'] = kwargs response = llm.generate(prompt.format(**kwargs),response_model=response_model) traces['output'] = response.model_dump() total =sum(response.output)if total <1: score ='low'else: score ='high'return MetricResult(result=score,reason=response.reason,traces=traces)result = my_metric.score(response='my response') # resultprint(result)print(result.reason)
low
The prompt 'my response' does not provide sufficient information or context for me to evaluate its helpfulness. An answer needs to be specific and provide insight or information relative to a clear question or context.