Class-based evaluators provide a structured way to create complex evaluators with initialization steps and state management.
They follow the standard Patronus evaluator interface while allowing more sophisticated implementations.
Key advantages of class-based evaluators:
Encapsulate initialization steps like loading models
Separate configuration from evaluation logic
import numpy as npfrom transformers import BertTokenizer, BertModelfrom patronus import StructuredEvaluator, EvaluationResultfrom patronus.experiments import run_experimentclass BERTScore(StructuredEvaluator): def __init__(self, pass_threshold: float): self.pass_threshold = pass_threshold self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") self.model = BertModel.from_pretrained("bert-base-uncased") def evaluate(self, *, task_output: str, gold_answer: str, **kwargs) -> EvaluationResult: output_toks = self.tokenizer(task_output, return_tensors="pt", padding=True, truncation=True) gold_answer_toks = self.tokenizer(gold_answer, return_tensors="pt", padding=True, truncation=True) output_embeds = self.model(**output_toks).last_hidden_state.mean(dim=1).detach().numpy() gold_answer_embeds = self.model(**gold_answer_toks).last_hidden_state.mean(dim=1).detach().numpy() score = np.dot(output_embeds, gold_answer_embeds.T) / ( np.linalg.norm(output_embeds) * np.linalg.norm(gold_answer_embeds) ) return EvaluationResult( score=score, pass_=score >= self.pass_threshold, tags={"pass_threshold": str(self.pass_threshold)}, )bert_scorer = BERTScore(pass_threshold=0.6)# Calling the evaluator function will automatically record the result to the platform.result = bert_scorer.evaluate(task_output="What is the capital of France?", gold_answer="Paris")result.pretty_print()
from patronus import initfrom patronus.evals import RemoteEvaluatorinit()hallucination_checker = RemoteEvaluator("lynx", "patronus:hallucination")result = hallucination_checker.evaluate( task_input="What's the largest animal?", task_output="The blue whale is the largest animal, weighing up to 200 tons.", task_context="The blue whale is the largest animal on Earth, weighing up to 173 tons.")result.pretty_print()