-
Notifications
You must be signed in to change notification settings - Fork 4.7k
Expand file tree
/
Copy patheval_create_params.py
More file actions
244 lines (172 loc) · 8.05 KB
/
eval_create_params.py
File metadata and controls
244 lines (172 loc) · 8.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
from __future__ import annotations
from typing import Dict, Union, Iterable, Optional
from typing_extensions import Literal, Required, TypeAlias, TypedDict
from .._types import SequenceNotStr
from .shared_params.metadata import Metadata
from .graders.grader_inputs_param import GraderInputsParam
from .graders.python_grader_param import PythonGraderParam
from .graders.score_model_grader_param import ScoreModelGraderParam
from .graders.string_check_grader_param import StringCheckGraderParam
from .responses.response_input_text_param import ResponseInputTextParam
from .graders.text_similarity_grader_param import TextSimilarityGraderParam
from .responses.response_input_audio_param import ResponseInputAudioParam
__all__ = [
"EvalCreateParams",
"DataSourceConfig",
"DataSourceConfigCustom",
"DataSourceConfigLogs",
"DataSourceConfigStoredCompletions",
"TestingCriterion",
"TestingCriterionLabelModel",
"TestingCriterionLabelModelInput",
"TestingCriterionLabelModelInputSimpleInputMessage",
"TestingCriterionLabelModelInputEvalItem",
"TestingCriterionLabelModelInputEvalItemContent",
"TestingCriterionLabelModelInputEvalItemContentOutputText",
"TestingCriterionLabelModelInputEvalItemContentInputImage",
"TestingCriterionTextSimilarity",
"TestingCriterionPython",
"TestingCriterionScoreModel",
]
class EvalCreateParams(TypedDict, total=False):
data_source_config: Required[DataSourceConfig]
"""The configuration for the data source used for the evaluation runs.
Dictates the schema of the data used in the evaluation.
"""
testing_criteria: Required[Iterable[TestingCriterion]]
"""A list of graders for all eval runs in this group.
Graders can reference variables in the data source using double curly braces
notation, like `{{item.variable_name}}`. To reference the model's output, use
the `sample` namespace (ie, `{{sample.output_text}}`).
"""
metadata: Optional[Metadata]
"""Set of 16 key-value pairs that can be attached to an object.
This can be useful for storing additional information about the object in a
structured format, and querying for objects via API or the dashboard.
Keys are strings with a maximum length of 64 characters. Values are strings with
a maximum length of 512 characters.
"""
name: str
"""The name of the evaluation."""
class DataSourceConfigCustom(TypedDict, total=False):
"""
A CustomDataSourceConfig object that defines the schema for the data source used for the evaluation runs.
This schema is used to define the shape of the data that will be:
- Used to define your testing criteria and
- What data is required when creating a run
"""
item_schema: Required[Dict[str, object]]
"""The json schema for each row in the data source."""
type: Required[Literal["custom"]]
"""The type of data source. Always `custom`."""
include_sample_schema: bool
"""
Whether the eval should expect you to populate the sample namespace (ie, by
generating responses off of your data source)
"""
class DataSourceConfigLogs(TypedDict, total=False):
"""
A data source config which specifies the metadata property of your logs query.
This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc.
"""
type: Required[Literal["logs"]]
"""The type of data source. Always `logs`."""
metadata: Dict[str, object]
"""Metadata filters for the logs data source."""
class DataSourceConfigStoredCompletions(TypedDict, total=False):
"""Deprecated in favor of LogsDataSourceConfig."""
type: Required[Literal["stored_completions"]]
"""The type of data source. Always `stored_completions`."""
metadata: Dict[str, object]
"""Metadata filters for the stored completions data source."""
DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigLogs, DataSourceConfigStoredCompletions]
class TestingCriterionLabelModelInputSimpleInputMessage(TypedDict, total=False):
content: Required[str]
"""The content of the message."""
role: Required[str]
"""The role of the message (e.g. "system", "assistant", "user")."""
class TestingCriterionLabelModelInputEvalItemContentOutputText(TypedDict, total=False):
"""A text output from the model."""
text: Required[str]
"""The text output from the model."""
type: Required[Literal["output_text"]]
"""The type of the output text. Always `output_text`."""
class TestingCriterionLabelModelInputEvalItemContentInputImage(TypedDict, total=False):
"""An image input block used within EvalItem content arrays."""
image_url: Required[str]
"""The URL of the image input."""
type: Required[Literal["input_image"]]
"""The type of the image input. Always `input_image`."""
detail: str
"""The detail level of the image to be sent to the model.
One of `high`, `low`, or `auto`. Defaults to `auto`.
"""
TestingCriterionLabelModelInputEvalItemContent: TypeAlias = Union[
str,
ResponseInputTextParam,
TestingCriterionLabelModelInputEvalItemContentOutputText,
TestingCriterionLabelModelInputEvalItemContentInputImage,
ResponseInputAudioParam,
GraderInputsParam,
]
class TestingCriterionLabelModelInputEvalItem(TypedDict, total=False):
"""
A message input to the model with a role indicating instruction following
hierarchy. Instructions given with the `developer` or `system` role take
precedence over instructions given with the `user` role. Messages with the
`assistant` role are presumed to have been generated by the model in previous
interactions.
"""
content: Required[TestingCriterionLabelModelInputEvalItemContent]
"""Inputs to the model - can contain template strings.
Supports text, output text, input images, and input audio, either as a single
item or an array of items.
"""
role: Required[Literal["user", "assistant", "system", "developer"]]
"""The role of the message input.
One of `user`, `assistant`, `system`, or `developer`.
"""
type: Literal["message"]
"""The type of the message input. Always `message`."""
TestingCriterionLabelModelInput: TypeAlias = Union[
TestingCriterionLabelModelInputSimpleInputMessage, TestingCriterionLabelModelInputEvalItem
]
class TestingCriterionLabelModel(TypedDict, total=False):
"""
A LabelModelGrader object which uses a model to assign labels to each item
in the evaluation.
"""
input: Required[Iterable[TestingCriterionLabelModelInput]]
"""A list of chat messages forming the prompt or context.
May include variable references to the `item` namespace, ie {{item.name}}.
"""
labels: Required[SequenceNotStr[str]]
"""The labels to classify to each item in the evaluation."""
model: Required[str]
"""The model to use for the evaluation. Must support structured outputs."""
name: Required[str]
"""The name of the grader."""
passing_labels: Required[SequenceNotStr[str]]
"""The labels that indicate a passing result. Must be a subset of labels."""
type: Required[Literal["label_model"]]
"""The object type, which is always `label_model`."""
class TestingCriterionTextSimilarity(TextSimilarityGraderParam, total=False):
"""A TextSimilarityGrader object which grades text based on similarity metrics."""
pass_threshold: Required[float]
"""The threshold for the score."""
class TestingCriterionPython(PythonGraderParam, total=False):
"""A PythonGrader object that runs a python script on the input."""
pass_threshold: float
"""The threshold for the score."""
class TestingCriterionScoreModel(ScoreModelGraderParam, total=False):
"""A ScoreModelGrader object that uses a model to assign a score to the input."""
pass_threshold: float
"""The threshold for the score."""
TestingCriterion: TypeAlias = Union[
TestingCriterionLabelModel,
StringCheckGraderParam,
TestingCriterionTextSimilarity,
TestingCriterionPython,
TestingCriterionScoreModel,
]