-
Notifications
You must be signed in to change notification settings - Fork 4.8k
Expand file tree
/
Copy patheval_create_response.py
More file actions
130 lines (98 loc) · 4.36 KB
/
eval_create_response.py
File metadata and controls
130 lines (98 loc) · 4.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
from typing import Dict, List, Union, Optional
from typing_extensions import Literal, Annotated, TypeAlias
from pydantic import Field as FieldInfo
from .._utils import PropertyInfo
from .._models import BaseModel
from .shared.metadata import Metadata
from .graders.python_grader import PythonGrader
from .graders.label_model_grader import LabelModelGrader
from .graders.score_model_grader import ScoreModelGrader
from .graders.string_check_grader import StringCheckGrader
from .eval_custom_data_source_config import EvalCustomDataSourceConfig
from .graders.text_similarity_grader import TextSimilarityGrader
from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
__all__ = [
"EvalCreateResponse",
"DataSourceConfig",
"DataSourceConfigLogs",
"TestingCriterion",
"TestingCriterionEvalGraderTextSimilarity",
"TestingCriterionEvalGraderPython",
"TestingCriterionEvalGraderScoreModel",
]
class DataSourceConfigLogs(BaseModel):
"""
A LogsDataSourceConfig which specifies the metadata property of your logs query.
This is usually metadata like `usecase=chatbot` or `prompt-version=v2`, etc.
The schema returned by this data source config is used to defined what variables are available in your evals.
`item` and `sample` are both defined when using this data source config.
"""
schema_: Dict[str, object] = FieldInfo(alias="schema")
"""
The json schema for the run data source items. Learn how to build JSON schemas
[here](https://json-schema.org/).
"""
type: Literal["logs"]
"""The type of data source. Always `logs`."""
metadata: Optional[Metadata] = None
"""Set of 16 key-value pairs that can be attached to an object.
This can be useful for storing additional information about the object in a
structured format, and querying for objects via API or the dashboard.
Keys are strings with a maximum length of 64 characters. Values are strings with
a maximum length of 512 characters.
"""
DataSourceConfig: TypeAlias = Annotated[
Union[EvalCustomDataSourceConfig, DataSourceConfigLogs, EvalStoredCompletionsDataSourceConfig],
PropertyInfo(discriminator="type"),
]
class TestingCriterionEvalGraderTextSimilarity(TextSimilarityGrader):
__test__ = False
"""A TextSimilarityGrader object which grades text based on similarity metrics."""
pass_threshold: float
"""The threshold for the score."""
class TestingCriterionEvalGraderPython(PythonGrader):
__test__ = False
"""A PythonGrader object that runs a python script on the input."""
pass_threshold: Optional[float] = None
"""The threshold for the score."""
class TestingCriterionEvalGraderScoreModel(ScoreModelGrader):
__test__ = False
"""A ScoreModelGrader object that uses a model to assign a score to the input."""
pass_threshold: Optional[float] = None
"""The threshold for the score."""
TestingCriterion: TypeAlias = Union[
LabelModelGrader,
StringCheckGrader,
TestingCriterionEvalGraderTextSimilarity,
TestingCriterionEvalGraderPython,
TestingCriterionEvalGraderScoreModel,
]
class EvalCreateResponse(BaseModel):
"""
An Eval object with a data source config and testing criteria.
An Eval represents a task to be done for your LLM integration.
Like:
- Improve the quality of my chatbot
- See how well my chatbot handles customer support
- Check if o4-mini is better at my usecase than gpt-4o
"""
id: str
"""Unique identifier for the evaluation."""
created_at: int
"""The Unix timestamp (in seconds) for when the eval was created."""
data_source_config: DataSourceConfig
"""Configuration of data sources used in runs of the evaluation."""
metadata: Optional[Metadata] = None
"""Set of 16 key-value pairs that can be attached to an object.
This can be useful for storing additional information about the object in a
structured format, and querying for objects via API or the dashboard.
Keys are strings with a maximum length of 64 characters. Values are strings with
a maximum length of 512 characters.
"""
name: str
"""The name of the evaluation."""
object: Literal["eval"]
"""The object type."""
testing_criteria: List[TestingCriterion]
"""A list of testing criteria."""