openai-python/src/openai/types/eval_create_params.py at main · openai/openai-python

244 lines (172 loc) · 8.05 KB
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
from __future__ import annotations
from typing import Dict, Union, Iterable, Optional
from typing_extensions import Literal, Required, TypeAlias, TypedDict
from .._types import SequenceNotStr
from .shared_params.metadata import Metadata
from .graders.grader_inputs_param import GraderInputsParam
from .graders.python_grader_param import PythonGraderParam
from .graders.score_model_grader_param import ScoreModelGraderParam
from .graders.string_check_grader_param import StringCheckGraderParam
from .responses.response_input_text_param import ResponseInputTextParam
from .graders.text_similarity_grader_param import TextSimilarityGraderParam
from .responses.response_input_audio_param import ResponseInputAudioParam
__all__ = [
    "EvalCreateParams",
    "DataSourceConfig",
    "DataSourceConfigCustom",
    "DataSourceConfigLogs",
    "DataSourceConfigStoredCompletions",
    "TestingCriterion",
    "TestingCriterionLabelModel",
    "TestingCriterionLabelModelInput",
    "TestingCriterionLabelModelInputSimpleInputMessage",
    "TestingCriterionLabelModelInputEvalItem",
    "TestingCriterionLabelModelInputEvalItemContent",
    "TestingCriterionLabelModelInputEvalItemContentOutputText",
    "TestingCriterionLabelModelInputEvalItemContentInputImage",
    "TestingCriterionTextSimilarity",
    "TestingCriterionPython",
    "TestingCriterionScoreModel",
class EvalCreateParams(TypedDict, total=False):
    data_source_config: Required[DataSourceConfig]
    """The configuration for the data source used for the evaluation runs.
    Dictates the schema of the data used in the evaluation.
    testing_criteria: Required[Iterable[TestingCriterion]]
    """A list of graders for all eval runs in this group.
    Graders can reference variables in the data source using double curly braces
    notation, like `{{item.variable_name}}`. To reference the model's output, use
    the `sample` namespace (ie, `{{sample.output_text}}`).
    metadata: Optional[Metadata]
    """Set of 16 key-value pairs that can be attached to an object.
    This can be useful for storing additional information about the object in a
    structured format, and querying for objects via API or the dashboard.
    Keys are strings with a maximum length of 64 characters. Values are strings with
    a maximum length of 512 characters.
    name: str
    """The name of the evaluation."""
class DataSourceConfigCustom(TypedDict, total=False):
    A CustomDataSourceConfig object that defines the schema for the data source used for the evaluation runs.
    This schema is used to define the shape of the data that will be:
    - Used to define your testing criteria and
    - What data is required when creating a run
    item_schema: Required[Dict[str, object]]
    """The json schema for each row in the data source."""
    type: Required[Literal["custom"]]
    """The type of data source. Always `custom`."""
    include_sample_schema: bool
    Whether the eval should expect you to populate the sample namespace (ie, by
    generating responses off of your data source)
class DataSourceConfigLogs(TypedDict, total=False):
    A data source config which specifies the metadata property of your logs query.
    This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc.
    type: Required[Literal["logs"]]
    """The type of data source. Always `logs`."""
    metadata: Dict[str, object]
    """Metadata filters for the logs data source."""
class DataSourceConfigStoredCompletions(TypedDict, total=False):
    """Deprecated in favor of LogsDataSourceConfig."""
    type: Required[Literal["stored_completions"]]
    """The type of data source. Always `stored_completions`."""
    metadata: Dict[str, object]
    """Metadata filters for the stored completions data source."""
DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigLogs, DataSourceConfigStoredCompletions]
class TestingCriterionLabelModelInputSimpleInputMessage(TypedDict, total=False):
    content: Required[str]
    """The content of the message."""
    role: Required[str]
    """The role of the message (e.g. "system", "assistant", "user")."""
class TestingCriterionLabelModelInputEvalItemContentOutputText(TypedDict, total=False):
    """A text output from the model."""
    text: Required[str]
    """The text output from the model."""
    type: Required[Literal["output_text"]]
    """The type of the output text. Always `output_text`."""
class TestingCriterionLabelModelInputEvalItemContentInputImage(TypedDict, total=False):
    """An image input block used within EvalItem content arrays."""
    image_url: Required[str]
    """The URL of the image input."""
    type: Required[Literal["input_image"]]
    """The type of the image input. Always `input_image`."""
    detail: str
    """The detail level of the image to be sent to the model.
    One of `high`, `low`, or `auto`. Defaults to `auto`.
TestingCriterionLabelModelInputEvalItemContent: TypeAlias = Union[
    ResponseInputTextParam,
    TestingCriterionLabelModelInputEvalItemContentOutputText,
    TestingCriterionLabelModelInputEvalItemContentInputImage,
    ResponseInputAudioParam,
    GraderInputsParam,
class TestingCriterionLabelModelInputEvalItem(TypedDict, total=False):
    A message input to the model with a role indicating instruction following
    hierarchy. Instructions given with the `developer` or `system` role take
    precedence over instructions given with the `user` role. Messages with the
    `assistant` role are presumed to have been generated by the model in previous
    interactions.
    content: Required[TestingCriterionLabelModelInputEvalItemContent]
    """Inputs to the model - can contain template strings.
    Supports text, output text, input images, and input audio, either as a single
    item or an array of items.
    role: Required[Literal["user", "assistant", "system", "developer"]]
    """The role of the message input.
    One of `user`, `assistant`, `system`, or `developer`.
    type: Literal["message"]
    """The type of the message input. Always `message`."""
TestingCriterionLabelModelInput: TypeAlias = Union[
    TestingCriterionLabelModelInputSimpleInputMessage, TestingCriterionLabelModelInputEvalItem
class TestingCriterionLabelModel(TypedDict, total=False):
    A LabelModelGrader object which uses a model to assign labels to each item
    in the evaluation.
    input: Required[Iterable[TestingCriterionLabelModelInput]]
    """A list of chat messages forming the prompt or context.
    May include variable references to the `item` namespace, ie {{item.name}}.
    labels: Required[SequenceNotStr[str]]
    """The labels to classify to each item in the evaluation."""
    model: Required[str]
    """The model to use for the evaluation. Must support structured outputs."""
    name: Required[str]
    """The name of the grader."""
    passing_labels: Required[SequenceNotStr[str]]
    """The labels that indicate a passing result. Must be a subset of labels."""
    type: Required[Literal["label_model"]]
    """The object type, which is always `label_model`."""
class TestingCriterionTextSimilarity(TextSimilarityGraderParam, total=False):
    """A TextSimilarityGrader object which grades text based on similarity metrics."""
    pass_threshold: Required[float]
    """The threshold for the score."""
class TestingCriterionPython(PythonGraderParam, total=False):
    """A PythonGrader object that runs a python script on the input."""
    pass_threshold: float
    """The threshold for the score."""
class TestingCriterionScoreModel(ScoreModelGraderParam, total=False):
    """A ScoreModelGrader object that uses a model to assign a score to the input."""
    pass_threshold: float
    """The threshold for the score."""
TestingCriterion: TypeAlias = Union[
    TestingCriterionLabelModel,
    StringCheckGraderParam,
    TestingCriterionTextSimilarity,
    TestingCriterionPython,
    TestingCriterionScoreModel,
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

eval_create_params.py

Latest commit

History

eval_create_params.py

File metadata and controls