Custom Evaluators Script

We'd recommend reading through the Custom Evaluators section here before using this script.

import requests

API_KEY = "INSERT_YOUR_API_KEY_HERE"

custom_samples = [
    {
        "evaluated_model_input": "What is a shark?",
        "evaluated_model_output": "A shark is a type of fish that has cartilage instead of bones.",
    },
    {
        "evaluated_model_input": "What is a shark?",
        "evaluated_model_output": "Sharks are a group of elasmobranch fish characterized by a cartilaginous skeleton, five to seven gill slits on the sides of the head, and pectoral fins that are not fused to the head.",
    },
    {
        "evaluated_model_input": "What is a shark?",
        "evaluated_model_output": "Sharks are a group of elasmobranch fish characterized by a cartilaginous skeleton, five to seven gill slits on the sides of the head, and pectoral fins that are not fused to the head. Modern sharks are classified within the clade Selachimorpha (or Selachii) and are the sister group to the Batoidea (rays and kin). Some sources extend the term 'shark' as an informal category including extinct members of Chondrichthyes (cartilaginous fish) with a shark-like morphology, such as hybodonts. Shark-like chondrichthyans such as Cladoselache and Doliodus first appeared in the Devonian Period (419–359 million years), though some fossilized chondrichthyan-like scales are as old as the Late Ordovician (458–444 million years ago). The earliest confirmed modern sharks (selachimorphs) are known from the Early Jurassic around 200 million years ago, with the oldest known member being Agaleus, though records of true sharks may extend back as far as the Permian.",
    },
]

headers = {
    "Content-Type": "application/json",
    "X-API-KEY": API_KEY,
}

for i, sample in enumerate(custom_samples):
    data = {
        "evaluators": [
            {
                "evaluator": "custom-v1",
                # You'll need to create a profile called "simple-english" in your account
                "profile_name": "simple-english",
                "explain_strategy": "always",
            },
            {
                "evaluator": "custom-v1",
                "profile_name": "system:is-concise",
                "explain_strategy": "always",
            },
        ],
        "evaluated_model_input": sample["evaluated_model_input"],
        "evaluated_model_output": sample["evaluated_model_output"],
        "app": "demo_custom",
    }
    response = requests.post(
        "https://api.patronus.ai/v1/evaluate", headers=headers, json=data
    )
    response.raise_for_status()

    results = response.json()["results"]
    print("------------------------------------")
    print(f"Evaluated Model Input : {sample['evaluated_model_input']}")
    print(f"Evaluated Model Output: {sample['evaluated_model_output']}")
    print("------------------------------------")
    for result in results:
        evaluation_result = result.get("evaluation_result")
        evaluator_id = evaluation_result.get("evaluator_id")
        passed = bool(evaluation_result["pass"])

        print(f"{evaluator_id}: {'PASS' if passed else 'FAIL'}")
        print("------------------------------------")

What’s Next