Trustworthy Agents with OpenAI Agents SDK and Cleanlab

Run in Google Colab

Agentic AI applications — such as those built using the OpenAI Agents SDK — orchestrate tools and language models to carry out complex user tasks. But like any system built on LLMs, Agents can still hallucinate, occasionally generating incorrect or misleading responses that undermine user trust. This tutorial shows how to evaluate and score the trustworthiness of any OpenAI Agent’s responses in real-time using Cleanlab, and how to automatically block untrustworthy outputs from reaching your users.

Setup

# Install required packages
%pip install cleanlab-tlm openai-agents --quiet

# Set API keys
import os
os.environ["CLEANLAB_TLM_API_KEY"] = "<YOUR_CLEANLAB_TLM_API_KEY>"  # Get your free API key from: https://tlm.cleanlab.ai/
os.environ["OPENAI_API_KEY"] = "<YOUR_OPENAI_API_KEY>"  # Get API key from: https://platform.openai.com/signup

# Import required libraries
from cleanlab_tlm import TLM
from cleanlab_tlm.utils.chat import form_prompt_string, form_response_string_chat_completions_api

from agents import Agent, Runner, RunContextWrapper, function_tool, RunConfig, set_default_openai_api
from agents.models.chatcmpl_converter import Converter
from agents.models.interface import Model, ModelProvider
from agents.models.multi_provider import MultiProvider

# Use the Chat Completions API
set_default_openai_api("chat_completions")

Build an OpenAI Agent

For this tutorial, we’ll build a Customer Support AI Agent using the OpenAI Agents SDK. We have another tutorial on adding TLM into a version of this same Agent built with LangGraph.

Optional: Define Tools the Agent can call

from typing import Union, Optional

import re
import shutil
import openai
import sqlite3
import requests
import numpy as np
import pandas as pd
from dataclasses import dataclass
from datetime import date, datetime

@dataclass
class UserInfo:
    passenger_id: str

response = requests.get(
    "https://storage.googleapis.com/benchmarks-artifacts/travel-db/swiss_faq.md"
)
response.raise_for_status()
faq_text = response.text

docs = [{"page_content": txt} for txt in re.split(r"(?=\n##)", faq_text)]

class VectorStoreRetriever:
    def __init__(self, docs: list, vectors: list, oai_client):
        self._arr = np.array(vectors)
        self._docs = docs
        self._client = oai_client

    @classmethod
    def from_docs(cls, docs, oai_client):
        embeddings = oai_client.embeddings.create(
            model="text-embedding-3-small", input=[doc["page_content"] for doc in docs]
        )
        vectors = [emb.embedding for emb in embeddings.data]
        return cls(docs, vectors, oai_client)

    def query(self, query: str, k: int = 5) -> list[dict]:
        embed = self._client.embeddings.create(
            model="text-embedding-3-small", input=[query]
        )
        # "@" is just a matrix multiplication in python
        scores = np.array(embed.data[0].embedding) @ self._arr.T
        top_k_idx = np.argpartition(scores, -k)[-k:]
        top_k_idx_sorted = top_k_idx[np.argsort(-scores[top_k_idx])]
        return [
            {**self._docs[idx], "similarity": scores[idx]} for idx in top_k_idx_sorted
        ]

retriever = VectorStoreRetriever.from_docs(docs, openai.Client())

@function_tool
def lookup_policy(query: str) -> str:
    """Consult the company policies to check whether certain options are permitted.
    Use this before making any flight changes performing other 'write' events."""
    docs = retriever.query(query, k=2)
    return "\n\n".join([doc["page_content"] for doc in docs])

# Populate Database that Agent can access via Tool Call
db_url = "https://storage.googleapis.com/benchmarks-artifacts/travel-db/travel2.sqlite"
local_file = "travel2.sqlite"
# The backup lets us restart for each tutorial section
backup_file = "travel2.backup.sqlite"
overwrite = False
if overwrite or not os.path.exists(local_file):
    response = requests.get(db_url)
    response.raise_for_status()  # Ensure the request was successful
    with open(local_file, "wb") as f:
        f.write(response.content)
    # Backup - we will use this to "reset" our DB in each section
    shutil.copy(local_file, backup_file)

# Convert the flights to present time for our tutorial
def update_dates(file):
    shutil.copy(backup_file, file)
    conn = sqlite3.connect(file)
    cursor = conn.cursor()

    tables = pd.read_sql(
        "SELECT name FROM sqlite_master WHERE type='table';", conn
    ).name.tolist()
    tdf = {}
    for t in tables:
        tdf[t] = pd.read_sql(f"SELECT * from {t}", conn)

    example_time = pd.to_datetime(
        tdf["flights"]["actual_departure"].replace("\\N", pd.NaT)
    ).max()
    current_time = pd.to_datetime("now").tz_localize(example_time.tz)
    time_diff = current_time - example_time

    tdf["bookings"]["book_date"] = (
        pd.to_datetime(tdf["bookings"]["book_date"].replace("\\N", pd.NaT), utc=True)
        + time_diff
    )

    datetime_columns = [
        "scheduled_departure",
        "scheduled_arrival",
        "actual_departure",
        "actual_arrival",
    ]
    for column in datetime_columns:
        tdf["flights"][column] = (
            pd.to_datetime(tdf["flights"][column].replace("\\N", pd.NaT)) + time_diff
        )

    for table_name, df in tdf.items():
        df.to_sql(table_name, conn, if_exists="replace", index=False)
    del df
    del tdf
    conn.commit()
    conn.close()

    return file

db = update_dates(local_file)

@function_tool
def fetch_user_flight_information(wrapper: RunContextWrapper[UserInfo]) -> list[dict]:
    """Fetch all tickets for the user along with corresponding flight information and seat assignments.

    Returns:
        A list of dictionaries where each dictionary contains the ticket details,
        associated flight details, and the seat assignments for each ticket belonging to the user.
    """
    passenger_id = wrapper.context.passenger_id
    if not passenger_id:
        raise ValueError("No passenger ID configured.")

    conn = sqlite3.connect(db)
    cursor = conn.cursor()

    query = """
    SELECT
        t.ticket_no, t.book_ref,
        f.flight_id, f.flight_no, f.departure_airport, f.arrival_airport, f.scheduled_departure, f.scheduled_arrival,
        bp.seat_no, tf.fare_conditions
    FROM
        tickets t
        JOIN ticket_flights tf ON t.ticket_no = tf.ticket_no
        JOIN flights f ON tf.flight_id = f.flight_id
        JOIN boarding_passes bp ON bp.ticket_no = t.ticket_no AND bp.flight_id = f.flight_id
    WHERE
        t.passenger_id = ?
    """
    cursor.execute(query, (passenger_id,))
    rows = cursor.fetchall()
    column_names = [column[0] for column in cursor.description]
    results = [dict(zip(column_names, row)) for row in rows]

    cursor.close()
    conn.close()

    return results

@function_tool
def search_flights(
    departure_airport: Optional[str] = None,
    arrival_airport: Optional[str] = None,
    start_time: Optional[date | datetime] = None,
    end_time: Optional[date | datetime] = None,
    limit: int = 20,
) -> list[dict]:
    """Search for flights based on departure airport, arrival airport, and departure time range."""
    conn = sqlite3.connect(db)
    cursor = conn.cursor()

    query = "SELECT * FROM flights WHERE 1 = 1"
    params = []

    if departure_airport:
        query += " AND departure_airport = ?"
        params.append(departure_airport)

    if arrival_airport:
        query += " AND arrival_airport = ?"
        params.append(arrival_airport)

    if start_time:
        query += " AND scheduled_departure >= ?"
        params.append(start_time)

    if end_time:
        query += " AND scheduled_departure <= ?"
        params.append(end_time)
    query += " LIMIT ?"
    params.append(limit)
    cursor.execute(query, params)
    rows = cursor.fetchall()
    column_names = [column[0] for column in cursor.description]
    results = [dict(zip(column_names, row)) for row in rows]

    cursor.close()
    conn.close()

    return results

@function_tool
def search_car_rentals(
    location: Optional[str] = None,
    name: Optional[str] = None,
    price_tier: Optional[str] = None,
    start_date: Optional[Union[datetime, date]] = None,
    end_date: Optional[Union[datetime, date]] = None,
) -> list[dict]:
    """
    Search for car rentals based on location, name, price tier, start date, and end date.

    Args:
        location (Optional[str]): The location of the car rental. Defaults to None.
        name (Optional[str]): The name of the car rental company. Defaults to None.
        price_tier (Optional[str]): The price tier of the car rental. Defaults to None.
        start_date (Optional[Union[datetime, date]]): The start date of the car rental. Defaults to None.
        end_date (Optional[Union[datetime, date]]): The end date of the car rental. Defaults to None.

    Returns:
        list[dict]: A list of car rental dictionaries matching the search criteria.
    """
    conn = sqlite3.connect(db)
    cursor = conn.cursor()

    query = "SELECT * FROM car_rentals WHERE 1=1"
    params = []

    if location:
        query += " AND location LIKE ?"
        params.append(f"%{location}%")
    if name:
        query += " AND name LIKE ?"
        params.append(f"%{name}%")
    if price_tier:
        query += " AND price_tier LIKE ?"
        params.append(f"%{price_tier}%")
    if start_date:
        query += " AND start_date >= ?"
        params.append(str(start_date))
    if end_date:
        query += " AND end_date <= ?"
        params.append(str(end_date))
    # This tool allows matching on price tier and dates even though data is limited
    # which tests Agent behavior when relevant data might be missing.
    cursor.execute(query, params)
    results = cursor.fetchall()

    conn.close()

    return [
        dict(zip([column[0] for column in cursor.description], row)) for row in results
    ]

@function_tool
def search_hotels(
    location: Optional[str] = None,
    name: Optional[str] = None,
    price_tier: Optional[str] = None,
    checkin_date: Optional[Union[datetime, date]] = None,
    checkout_date: Optional[Union[datetime, date]] = None,
) -> list[dict]:
    """
    Search for hotels based on location, name, price tier, check-in date, and check-out date.

    Args:
        location (Optional[str]): The location of the hotel. Defaults to None.
        name (Optional[str]): The name of the hotel. Defaults to None.
        price_tier (Optional[str]): The price tier of the hotel. Defaults to None. Examples: Midscale, Upper Midscale, Upscale, Luxury
        checkin_date (Optional[Union[datetime, date]]): The check-in date of the hotel. Defaults to None.
        checkout_date (Optional[Union[datetime, date]]): The check-out date of the hotel. Defaults to None.

    Returns:
        list[dict]: A list of hotel dictionaries matching the search criteria.
    """
    conn = sqlite3.connect(db)
    cursor = conn.cursor()

    query = "SELECT * FROM hotels WHERE 1=1"
    params = []

    if location:
        query += " AND location LIKE ?"
        params.append(f"%{location}%")
    if name:
        query += " AND name LIKE ?"
        params.append(f"%{name}%")
    # For the sake of this tutorial, we will let you match on any dates and price tier.
    cursor.execute(query, params)
    results = cursor.fetchall()

    conn.close()

    return [
        dict(zip([column[0] for column in cursor.description], row)) for row in results
    ]

@function_tool
def search_trip_recommendations(
    location: Optional[str] = None,
    name: Optional[str] = None,
    keywords: Optional[str] = None,
) -> list[dict]:
    """
    Search for trip recommendations based on location, name, and keywords.

    Args:
        location (Optional[str]): The location of the trip recommendation. Defaults to None.
        name (Optional[str]): The name of the trip recommendation. Defaults to None.
        keywords (Optional[str]): The keywords associated with the trip recommendation. Defaults to None.

    Returns:
        list[dict]: A list of trip recommendation dictionaries matching the search criteria.
    """
    conn = sqlite3.connect(db)
    cursor = conn.cursor()

    query = "SELECT * FROM trip_recommendations WHERE 1=1"
    params = []

    if location:
        query += " AND location LIKE ?"
        params.append(f"%{location}%")
    if name:
        query += " AND name LIKE ?"
        params.append(f"%{name}%")
    if keywords:
        keyword_list = keywords.split(",")
        keyword_conditions = " OR ".join(["keywords LIKE ?" for _ in keyword_list])
        query += f" AND ({keyword_conditions})"
        params.extend([f"%{keyword.strip()}%" for keyword in keyword_list])

    cursor.execute(query, params)
    results = cursor.fetchall()

    conn.close()

    return [
        dict(zip([column[0] for column in cursor.description], row)) for row in results
    ]

@function_tool
def get_travel_advisory(country: str) -> dict:
    """Returns a mock travel advisory for a country."""
    return {
        "level": "Level 2 – Exercise Increased Caution",
        "notes": f"Travelers to {country} should be aware of petty crime and take precautions."
    }

@function_tool
def get_discount_plan(name: str) -> str:
    """Returns details about a discount plan based on its name.

    Valid names: "basic", "premium", "five", "student"
    """
    return f"Returning details for the '{name}' discount plan:"

class InsuranceTerms:
    def __init__(self, provider: str):
        self.provider = provider
        self.coverage = {
            "GloboSure": {
                "trip_delay": "Up to 500 credits after 6 hours",
                "lost_baggage": "Up to 1200 credits with receipt proof",
                "medical": "Emergency care covered up to 50,000 credits"
            },
            "NimbusCoverage": {
                "cancellation": "Refunds up to 70% for non-weather issues",
                "extreme_weather": "Full coverage with documentation"
            }
        }

@function_tool
def get_insurance_terms(provider: str) -> InsuranceTerms:
    """Returns a policy explanation object for a given (possibly obscure) travel insurance provider."""
    return InsuranceTerms(provider)

Optional: Setup OpenAI Agent

import json

system_prompt = """You are a helpful customer support assistant for Swiss Airlines. Use the provided tools to search for flights, company policies, and other information to assist the user's queries. When searching, be persistent. Expand your query bounds if the first search returns no results. If a search comes up empty, expand your search before giving up.

Current time: {time}.""".format(time=datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

tools=[
    fetch_user_flight_information,
    search_flights,
    lookup_policy,
    search_car_rentals,
    search_hotels,
    search_trip_recommendations,
    get_travel_advisory,
    get_discount_plan,
    get_insurance_terms,
]

user_info = UserInfo(passenger_id="3442 587242")

agent = Agent[UserInfo](
    name="Customer support agent",
    instructions=system_prompt,
    tools=tools,
    model="gpt-4o-mini"
)

max_length = 2000
def truncate(msg, max_length=max_length):
    if msg is None:
        return ""
    msg = str(msg)
    if len(msg) > max_length:
        return msg[:max_length] + " ... (truncated)"
    return msg

HEADER = {
    "human_message":       (None,            "Human Message"),
    "ai_message_header":   ("ai_message",    "AI Message"),
    "tool_calls_header":   ("tool_calls",    "Tool Calls"),
    "tool_message_header": ("tool_message",  "Tool Message"),
}

def print_event(tag, printed, data=None, item=None, score=None, tool_calls_from_message=None):
    if tag == "human_message":
        if tag not in printed:
            printed.add(tag)
            print(f"\n{'='*10}  Human Message  {'='*10}")
        print(data)

    elif tag == "tool_call":
        key, title = HEADER["tool_calls_header"]
        if key not in printed:
            printed.add(key)
            print(f"\n{'='*10}  {title}  {'='*10}")
        # If called from a message, print each call in the list
        if tool_calls_from_message is not None:
            for call in tool_calls_from_message:
                func = call.get("function", {})
                name = func.get("name", "UnknownFunction")
                call_id = call.get("id", "unknown_id")
                print(f"{name} ({call_id})\nArgs:")
                try:
                    args = json.loads(func.get("arguments", "{}"))
                except Exception:
                    args = {}
                for k, v in args.items():
                    print(f"  {k}: {v}")
        elif item is not None:
            call = item.raw_item
            print(f"{call.name} ({call.call_id})\nArgs:")
            for k, v in json.loads(call.arguments).items():
                print(f"  {k}: {truncate(v)}")

    elif tag == "tool_output":
        key, title = HEADER["tool_message_header"]
        if key not in printed:
            printed.add(key)
            print(f"\n{'='*10}  {title}  {'='*10}")
        print(truncate(data or item.raw_item.get("output", "")))

    elif tag == "ai_message":
        key, title = HEADER["ai_message_header"]
        if key not in printed:
            printed.add(key)
            print(f"\n{'='*10}  {title}  {'='*10}")
        print(data or item.raw_item.content[0].text)

    elif tag == "tlm_score":
        print(f"\n[TLM Score]: {score}")

def print_items(items):
    printed = set()
    messages = Converter.items_to_messages(items)
    for msg in messages:
        role = msg.get('role')
        content = msg.get("content")
        tool_calls = msg.get("tool_calls")

        if role == "user":
            print_event("human_message", printed, data=content)
        elif role == "assistant":
            if tool_calls:
                print_event("tool_call", printed, tool_calls_from_message=tool_calls)
            if content:
                print_event("ai_message", printed, data=content)

async def print_stream_events(stream):
    printed, cached_score = set(), None
    event_map = {
        "tool_called": "tool_call",
        "tool_output": "tool_output",
        "message_output_created": "ai_message"
    }

    async for e in stream.stream_events():
        if e.type == "run_item_stream_event" and e.item and e.name in event_map:
            print_event(event_map[e.name], printed, item=e.item)
            if cached_score is not None:
                print_event("tlm_score", printed, score=cached_score)
                cached_score = None
        elif getattr(e, "type", None) == "raw_response_event" and getattr(getattr(e, "data", None), "type", None) == "tlm_score_event":
            cached_score = e.data.score

    if cached_score is not None:
        print_event("tlm_score", printed, score=cached_score)
    print()

For educational purposes, we implement a stream_agent_response() method —based on the OpenAI streaming example— which prints everything produced/processed by the Agent (user messages, Tool calls, Tool outputs, LLM responses, …).

async def stream_agent_response(agent, user_input):
    input_items = [{"content": user_input, "role": "user"}]
    print_items(input_items)
    stream = Runner.run_streamed(agent, 
                                 input=input_items, 
                                 context=user_info)
    await print_stream_events(stream)

Run the Agent

Let’s ask our Agent some queries.

user_input = "Can I get a refund if I cancel my flight with Swiss Airlines?"
await stream_agent_response(agent, user_input)

==========  Human Message  ==========
Can I get a refund if I cancel my flight with Swiss Airlines?

==========  Tool Calls  ==========
lookup_policy (call_iDFx4YyxlIJjEKBKo0WQsfD7)
Args:
  query: refund policy for flight cancellation

==========  Tool Message  ==========

## How to Cancel a Swiss Air Flight: 877-5O7-7341 Step-by-Step Guide
Swiss Air is a premium airline based in Switzerland that offers a range of domestic and international flights to passengers. However, sometimes situations arise where passengers may need to cancel their flights. In such cases, it is important to understand the Swiss Air Cancellation Policy to avoid any confusion or additional charges.

Swiss International Airlines Cancellation Policy
In this article, we will provide you with everything you need to know about the Swiss Air Cancellation Policy, including how to cancel a Swiss Air flight, the fees associated with cancelling a flight, and the refund policy.

If you have booked a flight with Swiss Airlines but need to cancel it, it's important to understand their cancellation policy to avoid any unnecessary fees or charges.
Swiss Airlines offers different fare types, each with their own specific cancellation terms and conditions. The most flexible fare types such as Flex and Business Flex allow you to cancel your flight up to 24 hours before departure without any penalty. For other fare types, cancellation fees may apply.
If you cancel your Swiss Airlines flight outside of the 24-hour window, cancellation fees will be charged depending on your fare type and the time of cancellation. For example, if you cancel a non-flexible economy class ticket, a cancellation fee will be charged. The closer you cancel to the departure date, the higher the cancellation fee.
In some cases, Swiss Airlines may allow you to make changes to your flight instead of cancelling it outright. However, these changes may also come with fees or penalties depending on your fare type and the type of change requested.
If Swiss Airlines cancels your flight, you may be entitled to a full refund or rebooking on another flight. However, if the cancellation is due to extraordinary circumstances such as bad weather or political unrest, Swiss Airlines may not be obligated to offer any compens ... (truncated)

==========  AI Message  ==========
Yes, you may be eligible for a refund if you cancel your flight with Swiss Airlines, but it depends on the type of ticket you purchased and when you cancel.

Here are the key points regarding Swiss Airlines' refund policy:

1. **24-Hour Cancellation Policy**: If you cancel your flight within 24 hours of booking, you can receive a full refund, regardless of the fare type.

2. **Fare Types and Refund Eligibility**:
    - **Flex Ticket**: You can cancel your flight without incurring any penalties and receive a full refund.
    - **Standard Ticket**: If you cancel within 24 hours of booking, you can receive a full refund. Outside of this window, cancellation fees apply, and you may receive a partial refund.
    - **Economy Ticket**: Similar to the Standard Ticket, you can get a full refund if you cancel within 24 hours. After that, cancellation fees apply, and a partial refund may be issued.

3. **Cancellation Fees**: If you cancel after the 24-hour window, the fees can range from 100 to 350 CHF, depending on your fare type and how close to the departure date you cancel.

4. **No Refund for Non-Refundable Tickets**: If you have a non-refundable ticket, you may not be eligible for a refund or may receive only a partial refund.

5. **Circumstances Beyond Your Control**: If Swiss Airlines cancels your flight for reasons such as extraordinary circumstances (e.g., bad weather), you may be entitled to a full refund or rebooking options.

To proceed with canceling your flight and request a refund, you can do so via the Swiss Airlines website, mobile app, or by contacting customer service.

This example illustrates the Agent’s ability to call the appropriate Tool and craft an accurate, policy-based answer from the returned data. Let’s run some more queries, showcasing some that yield problematic Agent responses.

user_input = "What are the details of the 5 discount plan"
await stream_agent_response(agent, user_input)

==========  Human Message  ==========
What are the details of the 5 discount plan

==========  Tool Calls  ==========
get_discount_plan (call_eOwFVNhCzP6B9VZ6xpq898Zm)
Args:
  name: five

==========  Tool Message  ==========
Returning details for the 'five' discount plan:

==========  AI Message  ==========
The details of the "five" discount plan are as follows:

- **Discount Rate**: 5% off on all flights.
- **Eligibility**: This plan is available for all travelers, providing a straightforward discount without any specific requirements.
- **Usage**: The discount can be applied to multiple bookings but is limited to one discount per flight.
- **Validity**: The discount is valid for one year from the date of enrollment.
- **Additional Benefits**: Members of this plan may receive exclusive offers and updates regarding flight promotions.

If you have any specific questions or need assistance with anything else, feel free to ask!

In this case, incomplete results were returned from the Tool. Without complete details to draw on, the LLM hallucinated incorrect details about the discount plan, potentially misleading the user.

user_input = "What coverage does GloboSure give me for travel delays?"
await stream_agent_response(agent, user_input)

==========  Human Message  ==========
What coverage does GloboSure give me for travel delays?

==========  Tool Calls  ==========
get_insurance_terms (call_qsrXQ0XAmiphXkuwUxK1myqO)
Args:
  provider: GloboSure

==========  Tool Message  ==========
<__main__.InsuranceTerms object at 0x112836da0>

==========  AI Message  ==========
GloboSure provides coverage for travel delays, which typically includes the following:

1. **Trip Interruption**: Financial reimbursement for additional expenses incurred due to a trip being interrupted or delayed.

2. **Lodging and Meals**: Coverage for necessary accommodations and meals if you are stranded due to a delay.

3. **Transportation Costs**: Reimbursement for transportation costs to reach your destination, including alternative travel arrangements.

4. **Emergency Assistance**: Access to support services to assist you in managing travel disruptions.

Please review the specific terms and conditions of your insurance policy for exact coverage limits and conditions, as they can vary. Would you like more details on a specific aspect of the coverage?

In this case, the Tool was implemented to return raw Python objects rather then the object’s body, and the LLM filled the gap by hallucinating incorrect travel-delay coverage details.

user_input = "Is there a health advisory in Basel"
await stream_agent_response(agent, user_input)

==========  Human Message  ==========
Is there a health advisory in Basel

==========  Tool Calls  ==========
get_travel_advisory (call_zQVC8QeCQ8wc2zzqpge8h1oe)
Args:
  country: Switzerland

==========  Tool Message  ==========
{'level': 'Level 2 – Exercise Increased Caution', 'notes': 'Travelers to Switzerland should be aware of petty crime and take precautions.'}

==========  AI Message  ==========
The health advisory for Switzerland, including Basel, is classified as **Level 2 – Exercise Increased Caution**. Travelers should be aware of petty crime and take appropriate precautions while visiting.

In this case, the Agent chose the right Tool, but it returned an advisory about crime rather than the requested health advisory for Basel. The Tool didn’t explicitly explain that it did not have the relevant information. The Agent didn’t notice the discrepancy and thus gave an incorrect response.

user_input = "What is my arrival time in their time zone?"
await stream_agent_response(agent, user_input)

==========  Human Message  ==========
What is my arrival time in their time zone?

==========  Tool Calls  ==========
fetch_user_flight_information (call_dSzQ4J383xPhOmte8h4Kk48o)
Args:

==========  Tool Message  ==========
[{'ticket_no': '7240005432906569', 'book_ref': 'C46E9F', 'flight_id': 19250, 'flight_no': 'LX0112', 'departure_airport': 'CDG', 'arrival_airport': 'BSL', 'scheduled_departure': '2025-07-03 12:06:50.893299-04:00', 'scheduled_arrival': '2025-07-03 13:36:50.893299-04:00', 'seat_no': '18E', 'fare_conditions': 'Economy'}]

==========  AI Message  ==========
Your flight LX0112 is scheduled to arrive at Basel (BSL) on July 3, 2025, at 1:36 PM (local time, UTC-4).

In this case, the LLM mistakenly assumed that Basel (BSL) uses the UTC-4 time zone. In May, Basel actually uses Central European Summer Time (UTC+2), so the correct local arrival time should be six hours later than stated by the Agent.

user_input = "What are conspiracy theories around Swiss Airlines safety?"
await stream_agent_response(agent, user_input)

==========  Human Message  ==========
What are conspiracy theories around Swiss Airlines safety?

==========  AI Message  ==========
Conspiracy theories surrounding any airline, including Swiss Airlines, can often stem from unusual incidents, accidents, or general mistrust of the aviation industry. Here are some common themes that can appear in conspiracy theories regarding airline safety:

1. **Flight Disappearances**: High-profile flight disappearances (e.g., Malaysia Airlines Flight 370) can lead to theories about government cover-ups or undisclosed safety issues affecting certain airlines, including Swiss Airlines.

2. **Technical Failures**: Some conspiracy theories focus on claims that airlines, including Swiss Airlines, underreport technical failures or don't adhere to safety regulations for financial gain.

3. **Insider Information**: There are theories suggesting that employees or insiders know more about safety risks than are publicly disclosed, leading to speculations about hidden dangers.

4. **Government Influence**: The idea that governments manipulate airline safety records for political reasons, such as to favor certain airlines over others, can lead to conspiracy theories.

5. **Aircraft Design Flaws**: Theories may arise surrounding specific aircraft models used by airlines, suggesting that design flaws are covered up by manufacturers and airlines alike.

It's important to approach such theories critically and rely on verified information from credible sources, such as aviation safety organizations, official accident investigation reports, and reputable news outlets. Safety records of airlines, including Swiss Airlines, are typically subject to strict regulatory oversight to ensure passenger safety.

In this case, no appropriate Tool is available for this query. The Agent responds with a potentially hallucinated answer that could harm the airline’s reputation.

Trustworthy Agent

Now, let’s explore how Cleanlab’s Trustworthy Language Model (TLM) can help identify problematic responses from your OpenAI agent in real-time. TLM provides a state-of-the-art trustworthiness score for each LLM-generated message, quantifying how likely a response is to be incorrect or flawed. To integrate TLM into your OpenAI Agent application, we provide a TLMModelProvider. By including TLMModelProvider in your run configuration (or wrapping your own ModelProvider), your Agent model will automatically use our wrapper—whether you specify the model argument as a string or as a Model object. Trust scores are then added to all responses, for both get_response (standard) and stream_response (streamed) outputs.

Optional: Classes to score the trustworthiness of LLM calls using TLM

Initialize the TLM client

tlm = TLM(options={"log": ["explanation"]})  # See Advanced Tutorial for additional TLM configuration options

class TLMScoreEvent:
    """Custom event for emitting the TLM trustworthiness score."""
    def __init__(self, score):
        self.type = "tlm_score_event"
        self.score = score

class TLMGuardrailTripwireTriggered(Exception):
    """
    Raised when the Trustworthy Language Model (TLM) trust score is below the configured threshold.

    This exception should be caught at the Agent run level (e.g., using try/except around `Runner.run(agent, ...)`)
    for custom handling of low-trust responses.

    Attributes:
        score (float): The TLM trust score for the response.
        input_items (Any): The inputs provided to the LLM.
        response_items (Any): The generated responses from the LLM.
        explanation (str, optional): Optional explanation from TLM (set log=["explanation"] in TLM options).
    """
    def __init__(self, score, input_items, response_items, explanation=None):
        super().__init__(f"Trust score too low: {score}")
        self.score = score
        self.input_items = input_items
        self.response_items = response_items
        self.explanation = explanation

class TLMModel(Model):
    """
    Model wrapper that augments the base class with Cleanlab's Trustworthy Language Model (TLM) trust scoring.

    Adds trust scoring to every LLM response. Raises TLMGuardrailTripwireTriggered if the score is below threshold.
    """

    def __init__(self, base_model, tlm, trust_threshold=0.9, score_tool_calls=True):
        self.base_model = base_model
        self.tlm = tlm
        self.trust_threshold = trust_threshold
        self.score_tool_calls = score_tool_calls

    def _handle_trust_score(self, score, input, response_items, expl):
        """
        Handles the trust score for each response.

        By default, raises TLMGuardrailTripwireTriggered if the score is below the configured threshold.
        TODO: Modify this method to customize how trust scores are handled—for example, logging, monitoring,
        or applying different policies based on the score.
        Tip: To avoid interrupting the Agent's execution, you can store low scores instead of raising an exception 
        and handle them later.
        """
        if score < self.trust_threshold:
            raise TLMGuardrailTripwireTriggered(score, input, response_items, expl)

    def _score_with_tlm(self, system_instructions, input, tools, response_items):
        """
        Prepare the prompt and compute the TLM trust score for the response.

        Skips scoring Tool calls if configured.
        """
        # Skip scoring Tool calls if configured
        if not self.score_tool_calls and response_items[0].get("type") == "function_call": return None

        # Prepare the prompt for TLM
        msgs = Converter.items_to_messages(input if isinstance(input, list) else [input])
        if system_instructions: msgs.insert(0, {"role": "system", "content": system_instructions})
        tlm_prompt = form_prompt_string(msgs, [Converter.tool_to_openai(t) for t in tools])

        # Score the response
        response_msgs = Converter.items_to_messages(response_items)
        tlm_response = form_response_string_chat_completions_api(response_msgs[0])
        tlm_res = self.tlm.get_trustworthiness_score(tlm_prompt, tlm_response)
        score = tlm_res["trustworthiness_score"]
        expl = tlm_res.get("log", {}).get("explanation")

        # Handle trust score using a helper method
        self._handle_trust_score(score, input, response_items, expl)
        return score

    async def get_response(self, system_instructions, input, model_settings, tools,
                           output_schema, handoffs, tracing, previous_response_id, prompt=None):
        """
        Add trust scoring to Runner.run(agent, ...).
        """
        response = await self.base_model.get_response(
            system_instructions, input, model_settings, tools,
            output_schema, handoffs, tracing, previous_response_id, prompt)
        self._score_with_tlm(system_instructions, input, tools, response.to_input_items())
        return response

    async def stream_response(self, system_instructions, input, model_settings, tools,
                              output_schema, handoffs, tracing, previous_response_id, prompt=None):
        """
        Add trust scoring to Runner.run_streamed(agent, ...).
        """
        final = None
        async for e in self.base_model.stream_response(
            system_instructions, input, model_settings, tools,
            output_schema, handoffs, tracing, previous_response_id, prompt):
            yield e
            if getattr(e, 'type', None) == "response.completed":
                final = e.response

        if final:
            response_items = [item.model_dump(exclude_unset=True) for item in final.output]
            score = self._score_with_tlm(system_instructions, input, tools, response_items)
            if not score: return
            yield TLMScoreEvent(score)

class TLMModelProvider(ModelProvider):
    def __init__(self, base_provider=None, trust_threshold=0.9, score_tool_calls=True):
        self.base_provider = base_provider or MultiProvider()
        self.trust_threshold = trust_threshold
        self.score_tool_calls = score_tool_calls
    def get_model(self, name=None):
        return TLMModel(self.base_provider.get_model(name), tlm, self.trust_threshold, self.score_tool_calls)

To wrap your model with TLM scoring, use the TLMModelProvider within your run_config. You can optionally define a custom trustworthiness scoring threshold and specify whether to apply trust scoring to Tool calls in addition to natural language messages.

async def stream_trustworthy_agent_response(agent, user_input):
    input_items = [{"content": user_input, "role": "user"}]
    print_items(input_items)
    try:
        stream = Runner.run_streamed(
            agent,
            input=user_input,
            context=user_info,
            run_config=RunConfig(
                model_provider=TLMModelProvider(trust_threshold=0.9, score_tool_calls=True)
            )  # New: Wrap your LLM model with TLM scoring
        )
        await print_stream_events(stream)
    except TLMGuardrailTripwireTriggered as e: # Handling low trust scores will be covered later in this tutorial
        # For now, we just print the untrustworthy response and the TLM score
        print_items(e.response_items)
        print("[TLM Score]:", e.score)

Now let’s run our Agent with automated trustworthiness scoring in place. In the traces below, the trustworthiness scores from TLM correspond to the previous Tool call/message in each conversation. The final AI Message is the response given to your user, whose correctness is vital.

user_input = "Can I get a refund if I cancel my flight with Swiss Airlines?"
await stream_trustworthy_agent_response(agent, user_input)

==========  Human Message  ==========
Can I get a refund if I cancel my flight with Swiss Airlines?

==========  Tool Calls  ==========
lookup_policy (call_6YpgYz0jkdNdktDPCrX5nc5i)
Args:
  query: refund policy

[TLM Score]: 0.9457662347215227

==========  Tool Message  ==========

## How to Cancel a Swiss Air Flight: 877-5O7-7341 Step-by-Step Guide
Swiss Air is a premium airline based in Switzerland that offers a range of domestic and international flights to passengers. However, sometimes situations arise where passengers may need to cancel their flights. In such cases, it is important to understand the Swiss Air Cancellation Policy to avoid any confusion or additional charges.

Swiss International Airlines Cancellation Policy
In this article, we will provide you with everything you need to know about the Swiss Air Cancellation Policy, including how to cancel a Swiss Air flight, the fees associated with cancelling a flight, and the refund policy.

If you have booked a flight with Swiss Airlines but need to cancel it, it's important to understand their cancellation policy to avoid any unnecessary fees or charges.
Swiss Airlines offers different fare types, each with their own specific cancellation terms and conditions. The most flexible fare types such as Flex and Business Flex allow you to cancel your flight up to 24 hours before departure without any penalty. For other fare types, cancellation fees may apply.
If you cancel your Swiss Airlines flight outside of the 24-hour window, cancellation fees will be charged depending on your fare type and the time of cancellation. For example, if you cancel a non-flexible economy class ticket, a cancellation fee will be charged. The closer you cancel to the departure date, the higher the cancellation fee.
In some cases, Swiss Airlines may allow you to make changes to your flight instead of cancelling it outright. However, these changes may also come with fees or penalties depending on your fare type and the type of change requested.
If Swiss Airlines cancels your flight, you may be entitled to a full refund or rebooking on another flight. However, if the cancellation is due to extraordinary circumstances such as bad weather or political unrest, Swiss Airlines may not be obligated to offer any compens ... (truncated)

==========  AI Message  ==========
Whether you can get a refund upon canceling your flight with Swiss Airlines depends on several factors, including the type of ticket you purchased and when you cancel your flight:

1. **Fare Types**:
   - **Flex Ticket**: You can cancel your flight without incurring any fees and receive a full refund.
   - **Standard Ticket**: You may receive a full refund if you cancel within 24 hours of booking. If canceled after 24 hours, a cancellation fee will apply, and you may receive a partial refund.
   - **Economy Ticket**: Similar to the Standard Ticket, you can receive a full refund if you cancel within 24 hours of booking. Cancellation after 24 hours may incur a fee, leading to a partial refund.

2. **24-Hour Cancellation Policy**: You can cancel your flight within 24 hours of booking for a full refund, regardless of the ticket type, if booked directly through Swiss Airlines.

3. **Cancellation Fees**: If you cancel your flight after the 24-hour window, fees will apply based on your fare type. 

4. **Extraordinary Circumstances**: If Swiss Airlines cancels your flight due to extraordinary circumstances (e.g., bad weather), you may be entitled to a full refund or rebooking.

To ensure you understand the applicable fees and your refund options, it's best to consult the terms and conditions associated with your specific ticket or reach out to Swiss Airlines directly for further clarification.

[TLM Score]: 0.9822303397639188

Upon review, we see that the Agent’s response was correct for this simple question. TLM computed a high trustworthiness score in real-time, letting our application automatically know it can serve this response to users with great confidence.

Let’s run the queries where our Agent responded incorrectly before.

user_input = "What are the details of the 5 discount plan"
await stream_trustworthy_agent_response(agent, user_input)

==========  Human Message  ==========
What are the details of the 5 discount plan

==========  Tool Calls  ==========
get_discount_plan (call_FE34BtC8GlvLPvqeN7aVBMGv)
Args:
  name: five

[TLM Score]: 0.9736146368086337

==========  Tool Message  ==========
Returning details for the 'five' discount plan:

==========  AI Message  ==========
The details of the "five" discount plan are as follows:

- **Discount Rate:** 5% off on all flights.
- **Eligible Bookings:** Works for both domestic and international flights.
- **Validity:** The discount is valid for one year from the date of registration.
- **Membership Fee:** There is an annual membership fee to join this plan.
- **Additional Perks:** Members may receive exclusive offers and priority boarding.

If you have any more questions about this plan or need assistance with anything else, feel free to ask!
[TLM Score]: 0.4318924907393133

user_input = "What coverage does GloboSure give me for travel delays?"
await stream_trustworthy_agent_response(agent, user_input)

==========  Human Message  ==========
What coverage does GloboSure give me for travel delays?

==========  Tool Calls  ==========
get_insurance_terms (call_LIu90J9zN99iStaE3RdyZs8E)
Args:
  provider: GloboSure

[TLM Score]: 0.9660944871604443

==========  Tool Message  ==========
<__main__.InsuranceTerms object at 0x14ff272b0>

==========  AI Message  ==========
GloboSure provides coverage for travel delays, which typically includes the following:

1. **Reimbursement for Additional Expenses**: If your travel is delayed due to covered reasons (like weather, strike, or technical issues), they may reimburse you for necessary expenses incurred, such as meals and accommodation.

2. **Compensation for Delayed Departure**: If your flight is delayed beyond a specific duration (usually a few hours), you may be eligible for a cash benefit.

3. **Lost or Delayed Connecting Flights**: If you miss a connecting flight due to the delay of your initial flight, GloboSure may cover the costs of rebooking or accommodations.

4. **Policy Limits**: There may be limits on the amount that can be claimed for specific delays and expenses, so it's important to read the fine print.

For detailed terms regarding specific coverage amounts, conditions, and exclusions, please refer to your policy document or contact GloboSure directly.
[TLM Score]: 0.7476438457064796

user_input = "Is there a health advisory in Basel"
await stream_trustworthy_agent_response(agent, user_input)

==========  Human Message  ==========
Is there a health advisory in Basel

==========  Tool Calls  ==========
get_travel_advisory (call_hwlnG6xZbJmRpdfdI85hHbNn)
Args:
  country: Switzerland

[TLM Score]: 0.9098774357140064

==========  Tool Message  ==========
{'level': 'Level 2 – Exercise Increased Caution', 'notes': 'Travelers to Switzerland should be aware of petty crime and take precautions.'}

==========  AI Message  ==========
In Basel, Switzerland, the health advisory is categorized as "Level 2 – Exercise Increased Caution." Travelers are advised to be aware of petty crime and to take necessary precautions while visiting.
[TLM Score]: 0.728133889104744

user_input = "What is my arrival time in their time zone?"
await stream_trustworthy_agent_response(agent, user_input)

==========  Human Message  ==========
What is my arrival time in their time zone?

==========  Tool Calls  ==========
fetch_user_flight_information (call_1tpabywXtQXR733V4bt9OFKn)
Args:

[TLM Score]: 0.9382414855994283

==========  Tool Message  ==========
[{'ticket_no': '7240005432906569', 'book_ref': 'C46E9F', 'flight_id': 19250, 'flight_no': 'LX0112', 'departure_airport': 'CDG', 'arrival_airport': 'BSL', 'scheduled_departure': '2025-07-03 12:06:50.893299-04:00', 'scheduled_arrival': '2025-07-03 13:36:50.893299-04:00', 'seat_no': '18E', 'fare_conditions': 'Economy'}]

==========  AI Message  ==========
Your flight LX0112 is scheduled to arrive at Basel Mulhouse Airport (BSL) on July 3, 2025, at 13:36 (local time).
[TLM Score]: 0.39498104653554034

user_input = "What are conspiracy theories around Swiss Airlines safety?"
await stream_trustworthy_agent_response(agent, user_input)

==========  Human Message  ==========
What are conspiracy theories around Swiss Airlines safety?

==========  AI Message  ==========
Conspiracy theories regarding airline safety, including those that may reference Swiss Airlines, often revolve around broader themes such as:

1. **Cover-Ups**: Some conspiracy theorists argue that airlines and governments cover up accidents or incidents to protect the reputation of the airline or to avoid panic among the public.

2. **Safety Records**: There may be claims that an airline’s safety record is misrepresented, either by downplaying incidents or by manipulating data to appear safer than it is.

3. **Operational Failures**: Theories might suggest that the airlines intentionally overlook or hide operational failures, leading to unsafe practices.

4. **Cabin Pressure and Control**: Some conspiracy theorists speculate about the control pilots have over cabin pressure systems during flights and suggest they may be tampered with for various reasons.

5. **Restricted Information**: There are claims that airlines don't fully disclose technical details about aircraft, maintenance issues, or pilot training, which supposedly jeopardizes passenger safety.

It’s important to note that these theories are typically unfounded and often lack solid evidence. Swiss Airlines, like other reputable airlines, adheres to strict safety regulations and guidelines set by aviation authorities.
[TLM Score]: 0.8347943045536521

Upon review, we find that the Agent’s responses were problematic for the above queries. These responses received lower trustworthiness scores from TLM in real-time, allowing your application to automatically flag them before they are served to users.

You could still choose to show such responses to users appending a caveat like: CAUTION: THIS RESPONSE HAS BEEN FLAGGED AS POTENTIALLY UNTRUSTWORTHY.

Alternatively, you could escalate this interaction to a human customer support representative, or return a canned fallback response in place of the Agent’s response. The next sections demonstrate how to implement different fallback strategies in your OpenAI Agent.

Fallback Logic: Replacing Untrustworthy Responses

There are many options for handling low trust scores in your Agent. One option: whenever a LLM call has low trustworthiness, you can raise an exception and manage it similarly to OpenAI’s native Guardrails. In this case, these exceptions indicate the Agent may be going off the rails.

Below, we demonstrate a fallback strategy that halts the Agent when this exception is raised, and has the Agent return a pre-written abstention response (indicating it does not know how to handle this request).

You can easily swap this out for other fallback behaviors like:

Escalate to a human
Re-run the Agent with a modified prompt
Re-generate the recent untrustworthy LLM output to have the Agent autonomously improve its response

async def stream_trustworthy_agent_response(agent, user_input):
    input_items = [{"content": user_input, "role": "user"}]
    print_items(input_items)

    try:
        stream = Runner.run_streamed(
            agent,
            input=input_items,
            context=user_info,
            run_config=RunConfig(
                model_provider=TLMModelProvider(trust_threshold=0.9, score_tool_calls=True)
            )  # New: Wrap the model with TLM scoring
        )
        await print_stream_events(stream)
    except TLMGuardrailTripwireTriggered:
        print_items([{
            "content": "Sorry, I cannot answer based on available information. Try rephrasing your question or providing more details.",
            "role": "assistant"
        }])

user_input = "What are the details of the 5 discount plan"
await stream_trustworthy_agent_response(agent, user_input)

==========  Human Message  ==========
What are the details of the 5 discount plan

==========  Tool Calls  ==========
get_discount_plan (call_IsRptrZ3WC6K2WELRfYfmgWC)
Args:
  name: five

[TLM Score]: 0.9736146368086337

==========  Tool Message  ==========
Returning details for the 'five' discount plan:

==========  AI Message  ==========
Sorry, I cannot answer based on available information. Try rephrasing your question or providing more details.

In this case, the trust-based fallback prevented the Agent from hallucinating incorrect details about the “five” discount plan. The Agent instead now responds with a fallback message, which is significantly preferrable over the incorrect response returned by the original Agent for this query.

Fallback Logic: Regenerate Untrustworthy LLM Outputs

Let’s demonstrate a different fallback strategy to have the Agent autonomously improve its responses. Here, we have the Agent re-generate its most recent LLM output whenever the trustworthiness score was low. In our re-generation step, the LLM prompt is modified to include the TLM explanation of why the LLM’s previous output was considered untrustworthy. When the LLM is able to use this feedback to generate more accurate outputs, this fallback strategy can automatically improve the overall Agent.

async def stream_trustworthy_agent_response(agent, user_input):
    input_items = [{"content": user_input, "role": "user"}]
    print_items(input_items)
    while True:
        try:
            stream = Runner.run_streamed(
                agent,
                input=input_items,
                context=user_info,
                run_config=RunConfig(
                    model_provider=TLMModelProvider(trust_threshold=0.9, score_tool_calls=True)
                )  # New: Wrap the model with TLM scoring
            )
            await print_stream_events(stream)
        except TLMGuardrailTripwireTriggered as e:
            input_items = e.input_items
            rewrite_prompt = (
                f"Your previous response was flagged as untrustworthy with potential inaccuracies. "
                f"Reason: {e.explanation}\n\n"
                "# Instructions\n\n"
                "Please provide a new response, ensuring it is accurate and trustworthy. "
                "If you don't know how to respond accurately, simply express that in your new response.\n"
                "Do not reference the earlier version of your response; it will not be shown to the user.\n"
            )
            last_item = e.response_items[-1]
            new_input_items = [
                *e.response_items,
                *(
                    [{
                        "call_id": last_item.get("call_id"),
                        "output": "Tool call was flagged as untrustworthy, cancelled",
                        "type": "function_call_output"
                    }] if last_item.get("type") == "function_call" else []
                ),
                {"content": rewrite_prompt, "role": "user"}
            ]
            print_items(new_input_items)
            input_items.extend(new_input_items)
            continue
        break

user_input = "What are the details of the 5 discount plan"
await stream_trustworthy_agent_response(agent, user_input)

==========  Human Message  ==========
What are the details of the 5 discount plan

==========  Tool Calls  ==========
get_discount_plan (call_z5cogr4a3pxJAg17TjeytH9A)
Args:
  name: five

[TLM Score]: 0.9736146368086337

==========  Tool Message  ==========
Returning details for the 'five' discount plan:

==========  AI Message  ==========
The "five" discount plan offers the following details:

- **Discount Rate:** 5% off on all flights.
- **Eligibility:** Available for all passengers booking through the Swiss Airlines website.
- **Usage:** Can be used for one booking per passenger.
- **Validity:** The discount is applicable for flights booked within a specific promotional period, and it may have seasonal restrictions.

If you have any specific questions about this plan or its application, feel free to ask!

==========  Human Message  ==========
Your previous response was flagged as untrustworthy with potential inaccuracies. Reason: The assistant's answer provides specific details about the "five" discount plan, including a 5% discount rate, eligibility criteria, usage limits, and validity conditions. However, the only information retrieved from the tool call was a generic statement: "Returning details for the 'five' discount plan:" without any actual details about the discount plan itself. This means the assistant fabricated the detailed description without any factual basis from the provided data.

Since the assistant did not have access to the actual discount plan details from the system and instead invented plausible-sounding information, the answer cannot be considered factually correct. The assistant should have either reported that the details were not available or requested further clarification or data.

Therefore, the answer is not reliable or verifiable based on the given interaction and tool response. The score should be very low, close to zero, because the assistant made up details rather than providing factual information.

A better answer would have been to state that the details for the "five" discount plan were not provided or to ask if the user wanted more information once the system could provide it.

Hence, the score is 0.

# Instructions

Please provide a new response, ensuring it is accurate and trustworthy. If you don't know how to respond accurately, simply express that in your new response.
Do not reference the earlier version of your response; it will not be shown to the user.


==========  Tool Calls  ==========
get_discount_plan (call_fCfIN3El0bghte8HM4IgGRgV)
Args:
  name: five

[TLM Score]: 0.9054652657679149

==========  Tool Message  ==========
Returning details for the 'five' discount plan:

==========  AI Message  ==========
It seems that I was unable to retrieve the specific details for the "five" discount plan. Unfortunately, the information is not currently available. If you have any other inquiries or need assistance with something else, please let me know!

[TLM Score]: 0.9873659611548646

user_input = "What are the details of the 5 discount plan"
await stream_trustworthy_agent_response(agent, user_input)

==========  Human Message  ==========
What are the details of the 5 discount plan

==========  Tool Calls  ==========
get_discount_plan (call_mnVjkDF4JqEfoLcRbVjvvA20)
Args:
  name: five

[TLM Score]: 0.9736146368086337

==========  Tool Message  ==========
Returning details for the 'five' discount plan:

==========  AI Message  ==========
The "five" discount plan offers the following benefits:

- **Discount Rate**: Enjoy a 5% discount on all eligible flights.
- **Eligibility**: This plan is open to frequent travelers, individuals with loyalty program memberships, and group bookings.
- **Duration**: The discounts are applicable for a period of one year from the date of enrollment.
- **Additional Benefits**: You may accumulate points on flights booked under this plan, which can be redeemed for future travel.

If you have any specific questions or need further assistance, feel free to ask!

==========  Human Message  ==========
Your previous response was flagged as untrustworthy with potential inaccuracies. Reason: The assistant's answer provides specific details about the "five" discount plan, including discount rate, eligibility, duration, and additional benefits. However, the only information retrieved from the tool call was a generic statement: "Returning details for the 'five' discount plan:" with no actual details provided. This means the assistant fabricated the detailed description without any factual basis from the data available. Since the assistant did not provide any verified information from the system or company policy, the answer cannot be considered factually correct. The assistant should have either provided the exact details returned by the system or stated that no detailed information was available. Therefore, the answer is likely incorrect or at best speculative, lacking any verified source or confirmation from the tools or company data.

# Instructions

Please provide a new response, ensuring it is accurate and trustworthy. If you don't know how to respond accurately, simply express that in your new response.
Do not reference the earlier version of your response; it will not be shown to the user.


==========  Tool Calls  ==========
get_discount_plan (call_B9k0g2i7ha1hEGYqM4zWjpjk)
Args:
  name: five

[TLM Score]: 0.9144291452597826

==========  Tool Message  ==========
Returning details for the 'five' discount plan:

==========  AI Message  ==========
It seems I didn't receive specific details about the "five" discount plan. Unfortunately, I can't provide any descriptions or benefits associated with it at this time.

If you have any other questions or need assistance with a different topic, please let me know!

[TLM Score]: 0.9871278801749991

In this case, the Agent now responds by acknowledging that it lacks specific details about the discount plan. This approach can offer a better experience for your Agent’s users.

Conclusion. Adding trust scoring to your Agent is easy and can automatically prevent incorrect responses from your Agent. When LLM outputs receive low trustworthiness scores, this tutorial showcased two fallback strategies: (1) halting the Agent and having it respond with a predefined fallback message, or (2) re-generating the previous LLM output, optionally using feedback from TLM’s explanation. In these cases, you could alternatively: escalate to a human, re-run the Agent from scratch with modified system instructions, or add whatever fallback strategy is best for your use-case.

Production-Ready Agents

For the Agents above, we printed internal messages and trustworthiness scores for educational purposes, but this is not how they would be utilized in real applications. In this section, we’ll show the minimal code needed to add trust scoring and handling in your production-ready Agent. Let’s start by reviewing the basic OpenAI Agents SDK code that retrieves the Agent’s final output.

Optional: Setup a Basic OpenAI Agent with a Single Tool Call

@function_tool
def get_discount_plan(name: str) -> str:
    """Returns details about a discount plan based on its name.

    Valid names: "basic", "premium", "five", "student"
    """
    return f"Returning details for the '{name}' discount plan:"

agent = Agent(name="Assistant", 
              model="gpt-4o-mini",
              instructions="You are a customer support bot.", 
              tools=[get_discount_plan])

agent_response = ""

# Your basic OpenAI Agents SDK code to get the final output
async def get_agent_response(agent, user_input):
    result = await Runner.run(agent, user_input)
    return result.final_output

Let’s query this production-ready Agent.

user_input= "What are the details of the 5 discount plan"
agent_response = await get_agent_response(agent, user_input)
print(agent_response)

The details of the "five" discount plan are as follows:

- **Discount Rate:** 5% off on all purchases.
- **Eligibility:** Available for all customers.
- **Duration:** Valid for a one-time purchase or subscription.
- **Limits:** No minimum spend required.

If you have any questions or need further assistance, feel free to ask!

To integrate TLM trust scoring, simply add the TLMModelProvider to your Agent run.

# Minimal changes for TLM integration
async def get_agent_response(agent, user_input):
    result = await Runner.run(
                agent,
                input=user_input,
                run_config=RunConfig(
                    model_provider=TLMModelProvider(trust_threshold=0.9, score_tool_calls=True)
                )  # New: Wrap the model with TLM scoring
            )
    return result.final_output

We are integrating TLM as a Guardrail, which, like other Guardrails in the OpenAI Agents SDK, raises an Exception when triggered. In our case, the TLM Guardrail triggers whenever the trust score of any LLM call falls below a predefined threshold.

try:
  agent_response = await get_agent_response(agent, user_input)
except TLMGuardrailTripwireTriggered: # New: Handle TLM guardrail exception
  agent_response = "Sorry I cannot answer based on available information, try re-phrasing your question or providing more details."
print(agent_response)

Sorry I cannot answer based on available information, try re-phrasing your question or providing more details.

Other OpenAI Multi-Agent Design Patterns

Handoffs

Our trust scoring integration seamlessly supports any OpenAI Agent Pattern. For instance, consider OpenAI’s multi-agent customer support example, which involves agent handoffs. For more details on implementing handoffs, refer to the OpenAI Agents SDK documentation.

Optional: Setup OpenAI Agent (Multi-Agent Customer Support)

from __future__ import annotations as _annotations

import random

from pydantic import BaseModel

from agents import (
    Agent,
    RunContextWrapper,
    Runner,
    function_tool,
    handoff,
)
from agents.extensions.handoff_prompt import RECOMMENDED_PROMPT_PREFIX

class AirlineAgentContext(BaseModel):
    passenger_name: str | None = None
    confirmation_number: str | None = None
    seat_number: str | None = None
    flight_number: str | None = None

@function_tool(name_override="faq_lookup_tool", description_override="Lookup frequently asked questions.")
async def faq_lookup_tool(question: str) -> str:
    if "bag" in question or "baggage" in question:
        return "You are allowed to bring one bag on the plane. It must be under 50 pounds and 22 inches x 14 inches x 9 inches."
    elif "seats" in question or "plane" in question:
        return "There are 120 seats on the plane. 22 business class, 98 economy. Exit rows: 4, 16. Economy Plus: 5–8."
    elif "wifi" in question:
        return "We have free wifi on the plane, join Airline-Wifi"
    return "I'm sorry, I don't know the answer to that question."

@function_tool
async def update_seat(context: RunContextWrapper[AirlineAgentContext], confirmation_number: str, new_seat: str) -> str:
    context.context.confirmation_number = confirmation_number
    context.context.seat_number = new_seat
    assert context.context.flight_number is not None, "Flight number is required"
    return f"Updated seat to {new_seat} for confirmation number {confirmation_number}"

async def on_seat_booking_handoff(context: RunContextWrapper[AirlineAgentContext]) -> None:
    context.context.flight_number = f"FLT-{random.randint(100, 999)}"

faq_agent = Agent[AirlineAgentContext](
    name="FAQ Agent",
    handoff_description="A helpful agent that can answer questions about the airline.",
    instructions=f"""{RECOMMENDED_PROMPT_PREFIX}
    You are an FAQ agent. If you are speaking to a customer, you probably were transferred to from the triage agent.
    Use the following routine to support the customer:
    1. Identify the customer's last question.
    2. Use the FAQ tool to answer. Do NOT rely on your own knowledge.
    3. If you can't answer, transfer back to the triage agent.""",
    tools=[faq_lookup_tool],
)

seat_booking_agent = Agent[AirlineAgentContext](
    name="Seat Booking Agent",
    handoff_description="A helpful agent that can update a seat on a flight.",
    instructions=f"""{RECOMMENDED_PROMPT_PREFIX}
    You are a seat booking agent. Use the following routine:
    1. Ask for confirmation number.
    2. Ask for desired seat number.
    3. Use the seat update tool.
    If off-topic, transfer back to triage agent.""",
    tools=[update_seat],
)

triage_agent = Agent[AirlineAgentContext](
    name="Triage Agent",
    handoff_description="Routes customer requests to appropriate agents.",
    instructions=(
        f"{RECOMMENDED_PROMPT_PREFIX} "
        "You are a helpful triage agent. Use your tools to route requests."
    ),
    handoffs=[
        faq_agent,
        handoff(agent=seat_booking_agent, on_handoff=on_seat_booking_handoff),
    ],
)

faq_agent.handoffs.append(triage_agent)
seat_booking_agent.handoffs.append(triage_agent)

async def get_agent_response(agent, user_input):
    result = await Runner.run(agent, 
                              [{"content": user_input, "role": "user"}], 
                              context=AirlineAgentContext(),
                              run_config=RunConfig(model_provider=TLMModelProvider(trust_threshold=0.9))) # New: Wrap the model with TLM scoring
    return result.final_output

user_input = "What are the baggage restrictions?"
try:
    agent_response = await get_agent_response(triage_agent, user_input)
except TLMGuardrailTripwireTriggered: # New: Handle TLM guardrail exception
    agent_response = "Sorry, I cannot answer based on available information. Try rephrasing your question or providing more details."

print(agent_response)

You are allowed to bring one bag on the plane. It must be under 50 pounds and 22 inches x 14 inches x 9 inches. If you have any other questions, feel free to ask!

user_input = "Tell me about recent plane crashes?"
try:
    agent_response = await get_agent_response(triage_agent, user_input)
except TLMGuardrailTripwireTriggered: # New: Handle TLM guardrail exception
    agent_response = "Sorry, I cannot answer based on available information. Try rephrasing your question or providing more details."

print(agent_response)

Sorry, I cannot answer based on available information. Try rephrasing your question or providing more details.

Agent-as-a-tool

OpenAI recommends to alternatively use your Agent-as-a-tool, allowing Agents to call other Agents without handing off to them. You can learn more about this pattern from the official OpenAI documentation. To add trust scoring to your Agent-as-a-tool, implement a function tool to run your Agent with our TLMModelProvider as demonstrated below.

from agents import default_tool_error_function

def custom_tool_error_function(ctx, error) -> str:
    if isinstance(error, TLMGuardrailTripwireTriggered):
        return f"Agent-as-a-tool returned an untrustworthy response. Please try again. Error: {str(error)}"
    return default_tool_error_function(ctx, error) # For all other exceptions, use the default behavior

@function_tool(failure_error_function=custom_tool_error_function)
async def run_discount_agent(input: str) -> str:
    """A tool that runs the agent with custom configs"""

    discount_agent = Agent(
    name="discount_agent",
    model="gpt-4o-mini",
    instructions=(
        "You are a customer service agent specializing in discount inquiries. "
        "Respond helpfully and clearly. "
        "Valid discount plan names: 'basic', 'premium', 'five', 'student'. "
        "Details for the 'student' discount plan: Students with a valid student ID receive a 15% discount on all full-priced items. "
        "Details for the 'five' discount plan: "
    ),
    handoff_description="Handles discount-related questions.",
    )

    result = await Runner.run(
        discount_agent,
        input=input,
        max_turns=1,
        run_config=RunConfig(model_provider=TLMModelProvider(trust_threshold=0.9, score_tool_calls=True))
    )

    return str(result.final_output)

customer_support_agent = Agent(
    name="Customer Support Agent",
    model="gpt-4o-mini",
    instructions=(
        "You are a customer service agent. Always use your tools to handle specific requests. "
        "Never answer directly; always use your tools."
    ),
    tools=[run_discount_agent],
)

async def get_agent_response(agent, user_input):
    result = await Runner.run(
        agent,
        [{"content": user_input, "role": "user"}],
        run_config=RunConfig(model_provider=TLMModelProvider(trust_threshold=0.9, score_tool_calls=True))
    )
    return result.final_output

user_input = "I am a student, can I get a discount?"
try:
    agent_response = await get_agent_response(customer_support_agent, user_input)
except TLMGuardrailTripwireTriggered: # New: Handle TLM guardrail exception
    agent_response = "Sorry, I cannot answer based on available information. Try rephrasing your question or providing more details."

print(agent_response)

Yes, as a student, you can get a 15% discount on all full-priced items by presenting a valid student ID. If you have your ID ready, you can enjoy this discount on your purchases! Let me know if you need further assistance.

user_input = "What are the details of the 5 discount plan"
try:
    agent_response = await get_agent_response(customer_support_agent, user_input)
except TLMGuardrailTripwireTriggered: # New: Handle TLM guardrail exception
    agent_response = "Sorry, I cannot answer based on available information. Try rephrasing your question or providing more details."

print(agent_response)

I'm currently unable to retrieve the details of the 5 discount plan. You may want to check back later or provide more specific criteria regarding the plan you're interested in.

Under the hood, TLM scoring intercepted the hallucinated response from the Agent-as-a-tool, preventing it from propagating to the main Agent’s final output.

Setup​

Build an OpenAI Agent​

Run the Agent​

Trustworthy Agent​

Fallback Logic: Replacing Untrustworthy Responses​

Fallback Logic: Regenerate Untrustworthy LLM Outputs​

Production-Ready Agents​

Other OpenAI Multi-Agent Design Patterns​

Handoffs​

Agent-as-a-tool​