agentdesktop

package
v2.33.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 13, 2026 License: AGPL-3.0 Imports: 28 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ErrDesktopClosed = xerrors.New("desktop closed")

ErrDesktopClosed is returned when an operation is attempted on a closed desktop session.

View Source
var ErrRecordingCorrupted = xerrors.New("recording corrupted: process was force-killed")

ErrRecordingCorrupted is returned by StopRecording when the recording process was force-killed and the artifact is likely incomplete or corrupt.

View Source
var ErrUnknownRecording = xerrors.New("unknown recording ID")

ErrUnknownRecording is returned by StopRecording when the recording ID is not recognized.

Functions

This section is empty.

Types

type API

type API struct {
	// contains filtered or unexported fields
}

API exposes the desktop streaming HTTP routes for the agent.

func NewAPI

func NewAPI(logger slog.Logger, desktop Desktop, clock quartz.Clock) *API

NewAPI creates a new desktop streaming API.

func (*API) Close

func (a *API) Close() error

Close shuts down the desktop session if one is running.

func (*API) Routes

func (a *API) Routes() http.Handler

Routes returns the chi router for mounting at /api/v0/desktop.

type Desktop

type Desktop interface {
	// Start launches the desktop session. It is idempotent — calling
	// Start on an already-running session returns the existing
	// config. The returned DisplayConfig describes the running
	// session.
	Start(ctx context.Context) (DisplayConfig, error)

	// VNCConn dials the desktop's VNC server and returns a raw
	// net.Conn carrying RFB binary frames. Each call returns a new
	// connection; multiple clients can connect simultaneously.
	// Start must be called before VNCConn.
	VNCConn(ctx context.Context) (net.Conn, error)

	// Screenshot captures the current framebuffer as a PNG and
	// returns it base64-encoded. TargetWidth/TargetHeight in opts
	// are the desired output dimensions (the implementation
	// rescales); pass 0 to use native resolution.
	Screenshot(ctx context.Context, opts ScreenshotOptions) (ScreenshotResult, error)

	// Move moves the mouse cursor to absolute coordinates.
	Move(ctx context.Context, x, y int) error
	// Click performs a mouse button click at the given coordinates.
	Click(ctx context.Context, x, y int, button MouseButton) error
	// DoubleClick performs a double-click at the given coordinates.
	DoubleClick(ctx context.Context, x, y int, button MouseButton) error
	// ButtonDown presses and holds a mouse button.
	ButtonDown(ctx context.Context, button MouseButton) error
	// ButtonUp releases a mouse button.
	ButtonUp(ctx context.Context, button MouseButton) error
	// Scroll scrolls by (dx, dy) clicks at the given coordinates.
	Scroll(ctx context.Context, x, y, dx, dy int) error
	// Drag moves from (startX,startY) to (endX,endY) while holding
	// the left mouse button.
	Drag(ctx context.Context, startX, startY, endX, endY int) error

	// KeyPress sends a key-down then key-up for a key combo string
	// (e.g. "Return", "ctrl+c").
	KeyPress(ctx context.Context, keys string) error
	// KeyDown presses and holds a key.
	KeyDown(ctx context.Context, key string) error
	// KeyUp releases a key.
	KeyUp(ctx context.Context, key string) error
	// Type types a string of text character-by-character.
	Type(ctx context.Context, text string) error

	// CursorPosition returns the current cursor coordinates.
	CursorPosition(ctx context.Context) (x, y int, err error)

	// RecordActivity marks the desktop as having received user
	// interaction, resetting the idle-recording timer.
	RecordActivity()

	// StartRecording begins recording the desktop to an MP4 file
	// using the caller-provided recording ID. Safe to call
	// repeatedly - active recordings continue unchanged, stopped
	// recordings are discarded and restarted. Concurrent recordings
	// are supported.
	StartRecording(ctx context.Context, recordingID string) error

	// StopRecording finalizes the recording identified by the given
	// ID. Idempotent - safe to call on an already-stopped recording.
	// Returns a RecordingArtifact that the caller can stream. The
	// caller must close the artifact when done. Returns an error if
	// the recording ID is unknown.
	StopRecording(ctx context.Context, recordingID string) (*RecordingArtifact, error)

	// Close shuts down the desktop session and cleans up resources.
	Close() error
}

Desktop abstracts a virtual desktop session running inside a workspace.

func NewPortableDesktop

func NewPortableDesktop(
	logger slog.Logger,
	execer agentexec.Execer,
	scriptBinDir string,
	clk quartz.Clock,
) Desktop

NewPortableDesktop creates a Desktop backed by the portabledesktop CLI binary, using execer to spawn child processes. scriptBinDir is the coder script bin directory checked for the binary. If clk is nil, a real clock is used.

type DesktopAction

type DesktopAction struct {
	Action          string  `json:"action"`
	Coordinate      *[2]int `json:"coordinate,omitempty"`
	StartCoordinate *[2]int `json:"start_coordinate,omitempty"`
	Text            *string `json:"text,omitempty"`
	Duration        *int    `json:"duration,omitempty"`
	ScrollAmount    *int    `json:"scroll_amount,omitempty"`
	ScrollDirection *string `json:"scroll_direction,omitempty"`
	// ScaledWidth and ScaledHeight describe the declared model-facing desktop
	// geometry. When provided, input coordinates are mapped from declared space
	// to native desktop pixels before dispatching.
	ScaledWidth  *int `json:"scaled_width,omitempty"`
	ScaledHeight *int `json:"scaled_height,omitempty"`
}

DesktopAction is the request body for the desktop action endpoint.

type DesktopActionResponse

type DesktopActionResponse struct {
	Output           string `json:"output,omitempty"`
	ScreenshotData   string `json:"screenshot_data,omitempty"`
	ScreenshotWidth  int    `json:"screenshot_width,omitempty"`
	ScreenshotHeight int    `json:"screenshot_height,omitempty"`
}

DesktopActionResponse is the response from the desktop action endpoint.

type DisplayConfig

type DisplayConfig struct {
	Width   int // native width in pixels
	Height  int // native height in pixels
	VNCPort int // local TCP port for the VNC server
	Display int // X11 display number (e.g. 1 for :1), -1 if N/A
}

DisplayConfig describes a running desktop session.

type MouseButton

type MouseButton string

MouseButton identifies a mouse button.

const (
	MouseButtonLeft   MouseButton = "left"
	MouseButtonRight  MouseButton = "right"
	MouseButtonMiddle MouseButton = "middle"
)

type RecordingArtifact

type RecordingArtifact struct {
	// Reader is the MP4 content. Callers must close it when done.
	Reader io.ReadCloser
	// Size is the byte length of the MP4 content.
	Size int64
	// ThumbnailReader is the JPEG thumbnail. May be nil if no
	// thumbnail was produced. Callers must close it when done.
	ThumbnailReader io.ReadCloser
	// ThumbnailSize is the byte length of the thumbnail.
	ThumbnailSize int64
}

RecordingArtifact is a finalized recording returned by StopRecording. The caller streams the artifact and must call Close when done. The artifact remains valid even if the same recording ID is restarted or the desktop is closed while the caller is reading.

type ScreenshotOptions

type ScreenshotOptions struct {
	TargetWidth  int // 0 = native
	TargetHeight int // 0 = native
}

ScreenshotOptions configures a screenshot capture.

type ScreenshotResult

type ScreenshotResult struct {
	Data string // base64-encoded PNG
}

ScreenshotResult is a captured screenshot.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL