# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. from __future__ import annotations from typing import Dict, Union, Iterable, Optional from typing_extensions import Literal, Required, TypeAlias, TypedDict from .._types import SequenceNotStr from .shared_params.metadata import Metadata from .graders.grader_inputs_param import GraderInputsParam from .graders.python_grader_param import PythonGraderParam from .graders.score_model_grader_param import ScoreModelGraderParam from .graders.string_check_grader_param import StringCheckGraderParam from .responses.response_input_text_param import ResponseInputTextParam from .graders.text_similarity_grader_param import TextSimilarityGraderParam from .responses.response_input_audio_param import ResponseInputAudioParam __all__ = [ "EvalCreateParams", "DataSourceConfig", "DataSourceConfigCustom", "DataSourceConfigLogs", "DataSourceConfigStoredCompletions", "TestingCriterion", "TestingCriterionLabelModel", "TestingCriterionLabelModelInput", "TestingCriterionLabelModelInputSimpleInputMessage", "TestingCriterionLabelModelInputEvalItem", "TestingCriterionLabelModelInputEvalItemContent", "TestingCriterionLabelModelInputEvalItemContentOutputText", "TestingCriterionLabelModelInputEvalItemContentInputImage", "TestingCriterionTextSimilarity", "TestingCriterionPython", "TestingCriterionScoreModel", ] class EvalCreateParams(TypedDict, total=False): data_source_config: Required[DataSourceConfig] """The configuration for the data source used for the evaluation runs. Dictates the schema of the data used in the evaluation. """ testing_criteria: Required[Iterable[TestingCriterion]] """A list of graders for all eval runs in this group. Graders can reference variables in the data source using double curly braces notation, like `{{item.variable_name}}`. To reference the model's output, use the `sample` namespace (ie, `{{sample.output_text}}`). """ metadata: Optional[Metadata] """Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters. """ name: str """The name of the evaluation.""" class DataSourceConfigCustom(TypedDict, total=False): """ A CustomDataSourceConfig object that defines the schema for the data source used for the evaluation runs. This schema is used to define the shape of the data that will be: - Used to define your testing criteria and - What data is required when creating a run """ item_schema: Required[Dict[str, object]] """The json schema for each row in the data source.""" type: Required[Literal["custom"]] """The type of data source. Always `custom`.""" include_sample_schema: bool """ Whether the eval should expect you to populate the sample namespace (ie, by generating responses off of your data source) """ class DataSourceConfigLogs(TypedDict, total=False): """ A data source config which specifies the metadata property of your logs query. This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc. """ type: Required[Literal["logs"]] """The type of data source. Always `logs`.""" metadata: Dict[str, object] """Metadata filters for the logs data source.""" class DataSourceConfigStoredCompletions(TypedDict, total=False): """Deprecated in favor of LogsDataSourceConfig.""" type: Required[Literal["stored_completions"]] """The type of data source. Always `stored_completions`.""" metadata: Dict[str, object] """Metadata filters for the stored completions data source.""" DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigLogs, DataSourceConfigStoredCompletions] class TestingCriterionLabelModelInputSimpleInputMessage(TypedDict, total=False): content: Required[str] """The content of the message.""" role: Required[str] """The role of the message (e.g. "system", "assistant", "user").""" class TestingCriterionLabelModelInputEvalItemContentOutputText(TypedDict, total=False): """A text output from the model.""" text: Required[str] """The text output from the model.""" type: Required[Literal["output_text"]] """The type of the output text. Always `output_text`.""" class TestingCriterionLabelModelInputEvalItemContentInputImage(TypedDict, total=False): """An image input block used within EvalItem content arrays.""" image_url: Required[str] """The URL of the image input.""" type: Required[Literal["input_image"]] """The type of the image input. Always `input_image`.""" detail: str """The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`. """ TestingCriterionLabelModelInputEvalItemContent: TypeAlias = Union[ str, ResponseInputTextParam, TestingCriterionLabelModelInputEvalItemContentOutputText, TestingCriterionLabelModelInputEvalItemContentInputImage, ResponseInputAudioParam, GraderInputsParam, ] class TestingCriterionLabelModelInputEvalItem(TypedDict, total=False): """ A message input to the model with a role indicating instruction following hierarchy. Instructions given with the `developer` or `system` role take precedence over instructions given with the `user` role. Messages with the `assistant` role are presumed to have been generated by the model in previous interactions. """ content: Required[TestingCriterionLabelModelInputEvalItemContent] """Inputs to the model - can contain template strings. Supports text, output text, input images, and input audio, either as a single item or an array of items. """ role: Required[Literal["user", "assistant", "system", "developer"]] """The role of the message input. One of `user`, `assistant`, `system`, or `developer`. """ type: Literal["message"] """The type of the message input. Always `message`.""" TestingCriterionLabelModelInput: TypeAlias = Union[ TestingCriterionLabelModelInputSimpleInputMessage, TestingCriterionLabelModelInputEvalItem ] class TestingCriterionLabelModel(TypedDict, total=False): """ A LabelModelGrader object which uses a model to assign labels to each item in the evaluation. """ input: Required[Iterable[TestingCriterionLabelModelInput]] """A list of chat messages forming the prompt or context. May include variable references to the `item` namespace, ie {{item.name}}. """ labels: Required[SequenceNotStr[str]] """The labels to classify to each item in the evaluation.""" model: Required[str] """The model to use for the evaluation. Must support structured outputs.""" name: Required[str] """The name of the grader.""" passing_labels: Required[SequenceNotStr[str]] """The labels that indicate a passing result. Must be a subset of labels.""" type: Required[Literal["label_model"]] """The object type, which is always `label_model`.""" class TestingCriterionTextSimilarity(TextSimilarityGraderParam, total=False): """A TextSimilarityGrader object which grades text based on similarity metrics.""" pass_threshold: Required[float] """The threshold for the score.""" class TestingCriterionPython(PythonGraderParam, total=False): """A PythonGrader object that runs a python script on the input.""" pass_threshold: float """The threshold for the score.""" class TestingCriterionScoreModel(ScoreModelGraderParam, total=False): """A ScoreModelGrader object that uses a model to assign a score to the input.""" pass_threshold: float """The threshold for the score.""" TestingCriterion: TypeAlias = Union[ TestingCriterionLabelModel, StringCheckGraderParam, TestingCriterionTextSimilarity, TestingCriterionPython, TestingCriterionScoreModel, ]