Create eval
client.evals.create(EvalCreateParams { data_source_config, testing_criteria, metadata, name } body, RequestOptionsoptions?): EvalCreateResponse { id, created_at, data_source_config, 4 more }
POST/evals
Create the structure of an evaluation that can be used to test a model's performance. An evaluation is a set of testing criteria and the config for a data source, which dictates the schema of the data used in the evaluation. After creating an evaluation, you can run it on different models and model parameters. We support several types of graders and datasources. For more information, see the Evals guide.
Create eval
import OpenAI from "openai";
const openai = new OpenAI();
const evalObj = await openai.evals.create({
name: "Sentiment",
data_source_config: {
type: "stored_completions",
metadata: { usecase: "chatbot" }
},
testing_criteria: [
{
type: "label_model",
model: "o3-mini",
input: [
{ role: "developer", content: "Classify the sentiment of the following statement as one of 'positive', 'neutral', or 'negative'" },
{ role: "user", content: "Statement: {{item.input}}" }
],
passing_labels: ["positive"],
labels: ["positive", "neutral", "negative"],
name: "Example label grader"
}
]
});
console.log(evalObj);
{
"object": "eval",
"id": "eval_67b7fa9a81a88190ab4aa417e397ea21",
"data_source_config": {
"type": "stored_completions",
"metadata": {
"usecase": "chatbot"
},
"schema": {
"type": "object",
"properties": {
"item": {
"type": "object"
},
"sample": {
"type": "object"
}
},
"required": [
"item",
"sample"
]
},
"testing_criteria": [
{
"name": "Example label grader",
"type": "label_model",
"model": "o3-mini",
"input": [
{
"type": "message",
"role": "developer",
"content": {
"type": "input_text",
"text": "Classify the sentiment of the following statement as one of positive, neutral, or negative"
}
},
{
"type": "message",
"role": "user",
"content": {
"type": "input_text",
"text": "Statement: {{item.input}}"
}
}
],
"passing_labels": [
"positive"
],
"labels": [
"positive",
"neutral",
"negative"
]
}
],
"name": "Sentiment",
"created_at": 1740110490,
"metadata": {
"description": "An eval for sentiment analysis"
}
}
Returns Examples
{
"object": "eval",
"id": "eval_67b7fa9a81a88190ab4aa417e397ea21",
"data_source_config": {
"type": "stored_completions",
"metadata": {
"usecase": "chatbot"
},
"schema": {
"type": "object",
"properties": {
"item": {
"type": "object"
},
"sample": {
"type": "object"
}
},
"required": [
"item",
"sample"
]
},
"testing_criteria": [
{
"name": "Example label grader",
"type": "label_model",
"model": "o3-mini",
"input": [
{
"type": "message",
"role": "developer",
"content": {
"type": "input_text",
"text": "Classify the sentiment of the following statement as one of positive, neutral, or negative"
}
},
{
"type": "message",
"role": "user",
"content": {
"type": "input_text",
"text": "Statement: {{item.input}}"
}
}
],
"passing_labels": [
"positive"
],
"labels": [
"positive",
"neutral",
"negative"
]
}
],
"name": "Sentiment",
"created_at": 1740110490,
"metadata": {
"description": "An eval for sentiment analysis"
}
}