# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

from __future__ import annotations

from typing import Dict, Union, Iterable, Optional
from typing_extensions import Literal, Required, TypeAlias, TypedDict

from .._types import SequenceNotStr
from .shared_params.metadata import Metadata
from .graders.grader_inputs_param import GraderInputsParam
from .graders.python_grader_param import PythonGraderParam
from .graders.score_model_grader_param import ScoreModelGraderParam
from .graders.string_check_grader_param import StringCheckGraderParam
from .responses.response_input_text_param import ResponseInputTextParam
from .graders.text_similarity_grader_param import TextSimilarityGraderParam
from .responses.response_input_audio_param import ResponseInputAudioParam

__all__ = [
    "EvalCreateParams",
    "DataSourceConfig",
    "DataSourceConfigCustom",
    "DataSourceConfigLogs",
    "DataSourceConfigStoredCompletions",
    "TestingCriterion",
    "TestingCriterionLabelModel",
    "TestingCriterionLabelModelInput",
    "TestingCriterionLabelModelInputSimpleInputMessage",
    "TestingCriterionLabelModelInputEvalItem",
    "TestingCriterionLabelModelInputEvalItemContent",
    "TestingCriterionLabelModelInputEvalItemContentOutputText",
    "TestingCriterionLabelModelInputEvalItemContentInputImage",
    "TestingCriterionTextSimilarity",
    "TestingCriterionPython",
    "TestingCriterionScoreModel",
]


class EvalCreateParams(TypedDict, total=False):
    data_source_config: Required[DataSourceConfig]
    """The configuration for the data source used for the evaluation runs.

    Dictates the schema of the data used in the evaluation.
    """

    testing_criteria: Required[Iterable[TestingCriterion]]
    """A list of graders for all eval runs in this group.

    Graders can reference variables in the data source using double curly braces
    notation, like `{{item.variable_name}}`. To reference the model's output, use
    the `sample` namespace (ie, `{{sample.output_text}}`).
    """

    metadata: Optional[Metadata]
    """Set of 16 key-value pairs that can be attached to an object.

    This can be useful for storing additional information about the object in a
    structured format, and querying for objects via API or the dashboard.

    Keys are strings with a maximum length of 64 characters. Values are strings with
    a maximum length of 512 characters.
    """

    name: str
    """The name of the evaluation."""


class DataSourceConfigCustom(TypedDict, total=False):
    """
    A CustomDataSourceConfig object that defines the schema for the data source used for the evaluation runs.
    This schema is used to define the shape of the data that will be:
    - Used to define your testing criteria and
    - What data is required when creating a run
    """

    item_schema: Required[Dict[str, object]]
    """The json schema for each row in the data source."""

    type: Required[Literal["custom"]]
    """The type of data source. Always `custom`."""

    include_sample_schema: bool
    """
    Whether the eval should expect you to populate the sample namespace (ie, by
    generating responses off of your data source)
    """


class DataSourceConfigLogs(TypedDict, total=False):
    """
    A data source config which specifies the metadata property of your logs query.
    This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc.
    """

    type: Required[Literal["logs"]]
    """The type of data source. Always `logs`."""

    metadata: Dict[str, object]
    """Metadata filters for the logs data source."""


class DataSourceConfigStoredCompletions(TypedDict, total=False):
    """Deprecated in favor of LogsDataSourceConfig."""

    type: Required[Literal["stored_completions"]]
    """The type of data source. Always `stored_completions`."""

    metadata: Dict[str, object]
    """Metadata filters for the stored completions data source."""


DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigLogs, DataSourceConfigStoredCompletions]


class TestingCriterionLabelModelInputSimpleInputMessage(TypedDict, total=False):
    content: Required[str]
    """The content of the message."""

    role: Required[str]
    """The role of the message (e.g. "system", "assistant", "user")."""


class TestingCriterionLabelModelInputEvalItemContentOutputText(TypedDict, total=False):
    """A text output from the model."""

    text: Required[str]
    """The text output from the model."""

    type: Required[Literal["output_text"]]
    """The type of the output text. Always `output_text`."""


class TestingCriterionLabelModelInputEvalItemContentInputImage(TypedDict, total=False):
    """An image input block used within EvalItem content arrays."""

    image_url: Required[str]
    """The URL of the image input."""

    type: Required[Literal["input_image"]]
    """The type of the image input. Always `input_image`."""

    detail: str
    """The detail level of the image to be sent to the model.

    One of `high`, `low`, or `auto`. Defaults to `auto`.
    """


TestingCriterionLabelModelInputEvalItemContent: TypeAlias = Union[
    str,
    ResponseInputTextParam,
    TestingCriterionLabelModelInputEvalItemContentOutputText,
    TestingCriterionLabelModelInputEvalItemContentInputImage,
    ResponseInputAudioParam,
    GraderInputsParam,
]


class TestingCriterionLabelModelInputEvalItem(TypedDict, total=False):
    """
    A message input to the model with a role indicating instruction following
    hierarchy. Instructions given with the `developer` or `system` role take
    precedence over instructions given with the `user` role. Messages with the
    `assistant` role are presumed to have been generated by the model in previous
    interactions.
    """

    content: Required[TestingCriterionLabelModelInputEvalItemContent]
    """Inputs to the model - can contain template strings.

    Supports text, output text, input images, and input audio, either as a single
    item or an array of items.
    """

    role: Required[Literal["user", "assistant", "system", "developer"]]
    """The role of the message input.

    One of `user`, `assistant`, `system`, or `developer`.
    """

    type: Literal["message"]
    """The type of the message input. Always `message`."""


TestingCriterionLabelModelInput: TypeAlias = Union[
    TestingCriterionLabelModelInputSimpleInputMessage, TestingCriterionLabelModelInputEvalItem
]


class TestingCriterionLabelModel(TypedDict, total=False):
    """
    A LabelModelGrader object which uses a model to assign labels to each item
    in the evaluation.
    """

    input: Required[Iterable[TestingCriterionLabelModelInput]]
    """A list of chat messages forming the prompt or context.

    May include variable references to the `item` namespace, ie {{item.name}}.
    """

    labels: Required[SequenceNotStr[str]]
    """The labels to classify to each item in the evaluation."""

    model: Required[str]
    """The model to use for the evaluation. Must support structured outputs."""

    name: Required[str]
    """The name of the grader."""

    passing_labels: Required[SequenceNotStr[str]]
    """The labels that indicate a passing result. Must be a subset of labels."""

    type: Required[Literal["label_model"]]
    """The object type, which is always `label_model`."""


class TestingCriterionTextSimilarity(TextSimilarityGraderParam, total=False):
    """A TextSimilarityGrader object which grades text based on similarity metrics."""

    pass_threshold: Required[float]
    """The threshold for the score."""


class TestingCriterionPython(PythonGraderParam, total=False):
    """A PythonGrader object that runs a python script on the input."""

    pass_threshold: float
    """The threshold for the score."""


class TestingCriterionScoreModel(ScoreModelGraderParam, total=False):
    """A ScoreModelGrader object that uses a model to assign a score to the input."""

    pass_threshold: float
    """The threshold for the score."""


TestingCriterion: TypeAlias = Union[
    TestingCriterionLabelModel,
    StringCheckGraderParam,
    TestingCriterionTextSimilarity,
    TestingCriterionPython,
    TestingCriterionScoreModel,
]