-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Expand file tree
/
Copy pathexp_scaletest_llmmock.go
More file actions
121 lines (106 loc) · 3.84 KB
/
exp_scaletest_llmmock.go
File metadata and controls
121 lines (106 loc) · 3.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
//go:build !slim
package cli
import (
"fmt"
"os/signal"
"time"
"golang.org/x/xerrors"
"cdr.dev/slog/v3"
"cdr.dev/slog/v3/sloggers/sloghuman"
"github.com/coder/coder/v2/scaletest/llmmock"
"github.com/coder/serpent"
)
func (*RootCmd) scaletestLLMMock() *serpent.Command {
var (
address string
artificialLatency time.Duration
responsePayloadSize int64
pprofEnable bool
pprofAddress string
traceEnable bool
)
cmd := &serpent.Command{
Use: "llm-mock",
Short: "Start a mock LLM API server for testing",
Long: `Start a mock LLM API server that simulates OpenAI and Anthropic APIs`,
Handler: func(inv *serpent.Invocation) error {
ctx, stop := signal.NotifyContext(inv.Context(), StopSignals...)
defer stop()
logger := slog.Make(sloghuman.Sink(inv.Stderr)).Leveled(slog.LevelInfo)
if pprofEnable {
closePprof := ServeHandler(ctx, logger, nil, pprofAddress, "pprof")
defer closePprof()
logger.Info(ctx, "pprof server started", slog.F("address", pprofAddress))
}
config := llmmock.Config{
Address: address,
Logger: logger,
ArtificialLatency: artificialLatency,
ResponsePayloadSize: int(responsePayloadSize),
PprofEnable: pprofEnable,
PprofAddress: pprofAddress,
TraceEnable: traceEnable,
}
srv := new(llmmock.Server)
if err := srv.Start(ctx, config); err != nil {
return xerrors.Errorf("start mock LLM server: %w", err)
}
defer func() {
if err := srv.Stop(); err != nil {
logger.Error(ctx, "failed to stop mock LLM server", slog.Error(err))
}
}()
_, _ = fmt.Fprintf(inv.Stdout, "Mock LLM API server started on %s\n", srv.APIAddress())
_, _ = fmt.Fprintf(inv.Stdout, " OpenAI endpoint: %s/v1/chat/completions\n", srv.APIAddress())
_, _ = fmt.Fprintf(inv.Stdout, " OpenAI responses endpoint: %s/v1/responses\n", srv.APIAddress())
_, _ = fmt.Fprintf(inv.Stdout, " Anthropic endpoint: %s/v1/messages\n", srv.APIAddress())
<-ctx.Done()
return nil
},
}
cmd.Options = []serpent.Option{
{
Flag: "address",
Env: "CODER_SCALETEST_LLM_MOCK_ADDRESS",
Default: "localhost",
Description: "Address to bind the mock LLM API server. Can include a port (e.g., 'localhost:8080' or ':8080'). Uses a random port if no port is specified.",
Value: serpent.StringOf(&address),
},
{
Flag: "artificial-latency",
Env: "CODER_SCALETEST_LLM_MOCK_ARTIFICIAL_LATENCY",
Default: "0s",
Description: "Artificial latency to add to each response (e.g., 100ms, 1s). Simulates slow upstream processing.",
Value: serpent.DurationOf(&artificialLatency),
},
{
Flag: "response-payload-size",
Env: "CODER_SCALETEST_LLM_MOCK_RESPONSE_PAYLOAD_SIZE",
Default: "0",
Description: "Size in bytes of the response payload. If 0, uses default context-aware responses.",
Value: serpent.Int64Of(&responsePayloadSize),
},
{
Flag: "pprof-enable",
Env: "CODER_SCALETEST_LLM_MOCK_PPROF_ENABLE",
Default: "false",
Description: "Serve pprof metrics on the address defined by pprof-address.",
Value: serpent.BoolOf(&pprofEnable),
},
{
Flag: "pprof-address",
Env: "CODER_SCALETEST_LLM_MOCK_PPROF_ADDRESS",
Default: "127.0.0.1:6060",
Description: "The bind address to serve pprof.",
Value: serpent.StringOf(&pprofAddress),
},
{
Flag: "trace-enable",
Env: "CODER_SCALETEST_LLM_MOCK_TRACE_ENABLE",
Default: "false",
Description: "Whether application tracing data is collected. It exports to a backend configured by environment variables. See: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md.",
Value: serpent.BoolOf(&traceEnable),
},
}
return cmd
}