Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 12 additions & 25 deletions apps/sim/lib/copilot/orchestrator/tool-executor/materialize-file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { db } from '@sim/db'
import { workflow, workspaceFiles } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { and, eq, isNull } from 'drizzle-orm'
import { findMothershipUploadRowByChatAndName } from '@/lib/copilot/orchestrator/tool-executor/upload-file-reader'
import type { ExecutionContext, ToolCallResult } from '@/lib/copilot/orchestrator/types'
import { getServePathPrefix } from '@/lib/uploads'
import { downloadWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager'
Expand All @@ -12,22 +13,6 @@ import { extractWorkflowMetadata } from '@/app/api/v1/admin/types'

const logger = createLogger('MaterializeFile')

async function findUploadRecord(fileName: string, chatId: string) {
const rows = await db
.select()
.from(workspaceFiles)
.where(
and(
eq(workspaceFiles.originalName, fileName),
eq(workspaceFiles.chatId, chatId),
eq(workspaceFiles.context, 'mothership'),
isNull(workspaceFiles.deletedAt)
)
)
.limit(1)
return rows[0] ?? null
}

function toFileRecord(row: typeof workspaceFiles.$inferSelect) {
const pathPrefix = getServePathPrefix()
return {
Expand All @@ -41,21 +26,23 @@ function toFileRecord(row: typeof workspaceFiles.$inferSelect) {
uploadedBy: row.userId,
deletedAt: row.deletedAt,
uploadedAt: row.uploadedAt,
storageContext: 'mothership' as const,
}
}

async function executeSave(fileName: string, chatId: string): Promise<ToolCallResult> {
const row = await findMothershipUploadRowByChatAndName(chatId, fileName)
if (!row) {
return {
success: false,
error: `Upload not found: "${fileName}". Use glob("uploads/*") to list available uploads.`,
}
}

const [updated] = await db
.update(workspaceFiles)
.set({ context: 'workspace', chatId: null })
.where(
and(
eq(workspaceFiles.originalName, fileName),
eq(workspaceFiles.chatId, chatId),
eq(workspaceFiles.context, 'mothership'),
isNull(workspaceFiles.deletedAt)
)
)
.where(and(eq(workspaceFiles.id, row.id), isNull(workspaceFiles.deletedAt)))
.returning({ id: workspaceFiles.id, originalName: workspaceFiles.originalName })

if (!updated) {
Expand Down Expand Up @@ -84,7 +71,7 @@ async function executeImport(
workspaceId: string,
userId: string
): Promise<ToolCallResult> {
const row = await findUploadRecord(fileName, chatId)
const row = await findMothershipUploadRowByChatAndName(chatId, fileName)
if (!row) {
return {
success: false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { workspaceFiles } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { and, eq, isNull } from 'drizzle-orm'
import { type FileReadResult, readFileRecord } from '@/lib/copilot/vfs/file-reader'
import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment'
import { getServePathPrefix } from '@/lib/uploads'
import type { WorkspaceFileRecord } from '@/lib/uploads/contexts/workspace/workspace-file-manager'

Expand All @@ -21,9 +22,50 @@ function toWorkspaceFileRecord(row: typeof workspaceFiles.$inferSelect): Workspa
uploadedBy: row.userId,
deletedAt: row.deletedAt,
uploadedAt: row.uploadedAt,
storageContext: 'mothership',
}
}

/**
* Resolve a mothership upload row by `originalName`, preferring an exact DB match (limit 1) and
* only scanning all chat uploads when that misses (e.g. macOS U+202F vs ASCII space in the name).
*/
export async function findMothershipUploadRowByChatAndName(
chatId: string,
fileName: string
): Promise<typeof workspaceFiles.$inferSelect | null> {
const exactRows = await db
.select()
.from(workspaceFiles)
.where(
and(
eq(workspaceFiles.chatId, chatId),
eq(workspaceFiles.context, 'mothership'),
eq(workspaceFiles.originalName, fileName),
isNull(workspaceFiles.deletedAt)
)
)
.limit(1)

if (exactRows[0]) {
return exactRows[0]
}

const allRows = await db
.select()
.from(workspaceFiles)
.where(
and(
eq(workspaceFiles.chatId, chatId),
eq(workspaceFiles.context, 'mothership'),
isNull(workspaceFiles.deletedAt)
)
)

const segmentKey = normalizeVfsSegment(fileName)
return allRows.find((r) => normalizeVfsSegment(r.originalName) === segmentKey) ?? null
}

/**
* List all chat-scoped uploads for a given chat.
*/
Expand Down Expand Up @@ -51,30 +93,18 @@ export async function listChatUploads(chatId: string): Promise<WorkspaceFileReco
}

/**
* Read a specific uploaded file by name within a chat session.
* Read a specific uploaded file by display name within a chat session.
* Resolves names with `normalizeVfsSegment` so macOS screenshot spacing (e.g. U+202F)
* matches when the model passes a visually equivalent path.
*/
export async function readChatUpload(
filename: string,
chatId: string
): Promise<FileReadResult | null> {
try {
const rows = await db
.select()
.from(workspaceFiles)
.where(
and(
eq(workspaceFiles.chatId, chatId),
eq(workspaceFiles.context, 'mothership'),
eq(workspaceFiles.originalName, filename),
isNull(workspaceFiles.deletedAt)
)
)
.limit(1)

if (rows.length === 0) return null

const record = toWorkspaceFileRecord(rows[0])
return readFileRecord(record)
const row = await findMothershipUploadRowByChatAndName(chatId, filename)
if (!row) return null
return readFileRecord(toWorkspaceFileRecord(row))
} catch (err) {
logger.warn('Failed to read chat upload', {
filename,
Expand Down
6 changes: 3 additions & 3 deletions apps/sim/lib/copilot/vfs/file-reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import { isImageFileType } from '@/lib/uploads/utils/file-utils'

const logger = createLogger('FileReader')

const MAX_TEXT_READ_BYTES = 512 * 1024 // 512 KB
const MAX_IMAGE_READ_BYTES = 5 * 1024 * 1024 // 5 MB
const MAX_TEXT_READ_BYTES = 5 * 1024 * 1024 // 5 MB
const MAX_IMAGE_READ_BYTES = 20 * 1024 * 1024 // 20 MB

const TEXT_TYPES = new Set([
'text/plain',
Expand Down Expand Up @@ -53,7 +53,7 @@ export async function readFileRecord(record: WorkspaceFileRecord): Promise<FileR
if (isImageFileType(record.type)) {
if (record.size > MAX_IMAGE_READ_BYTES) {
return {
content: `[Image too large: ${record.name} (${(record.size / 1024 / 1024).toFixed(1)}MB, limit 5MB)]`,
content: `[Image too large: ${record.name} (${(record.size / 1024 / 1024).toFixed(1)}MB, limit 20MB)]`,
totalLines: 1,
}
}
Expand Down
14 changes: 14 additions & 0 deletions apps/sim/lib/copilot/vfs/normalize-segment.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/**
* Normalize a string for use as a single VFS path segment (workflow name, file name, etc.).
* Applies NFC normalization, trims, strips ASCII control characters, maps `/` to `-`, and
* collapses Unicode whitespace (including U+202F as in macOS screenshot names) to a single
* ASCII space.
*/
export function normalizeVfsSegment(name: string): string {
return name
.normalize('NFC')
.trim()
.replace(/[\x00-\x1f\x7f]/g, '')
.replace(/\//g, '-')
.replace(/\s+/g, ' ')
}
120 changes: 120 additions & 0 deletions apps/sim/lib/copilot/vfs/operations.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/**
* @vitest-environment node
*/
import { describe, expect, it } from 'vitest'
import { glob, grep } from '@/lib/copilot/vfs/operations'

function vfsFromEntries(entries: [string, string][]): Map<string, string> {
return new Map(entries)
}

describe('glob', () => {
it('matches one path segment for single star (files listing pattern)', () => {
const files = vfsFromEntries([
['files/a/meta.json', '{}'],
['files/a/b/meta.json', '{}'],
['uploads/x.png', ''],
])
const hits = glob(files, 'files/*/meta.json')
expect(hits).toContain('files/a/meta.json')
expect(hits).not.toContain('files/a/b/meta.json')
})

it('matches nested paths with double star', () => {
const files = vfsFromEntries([
['workflows/W/state.json', ''],
['workflows/W/sub/state.json', ''],
])
const hits = glob(files, 'workflows/**/state.json')
expect(hits.sort()).toEqual(['workflows/W/state.json', 'workflows/W/sub/state.json'].sort())
})

it('includes virtual directory prefixes when pattern matches descendants', () => {
const files = vfsFromEntries([['files/a/meta.json', '{}']])
const hits = glob(files, 'files/**')
expect(hits).toContain('files')
expect(hits).toContain('files/a')
expect(hits).toContain('files/a/meta.json')
})

it('treats braces literally when nobrace is set (matches old builder)', () => {
const files = vfsFromEntries([
['weird{brace}/x', ''],
['weirdA/x', ''],
])
const hits = glob(files, 'weird{brace}/*')
expect(hits).toContain('weird{brace}/x')
expect(hits).not.toContain('weirdA/x')
})
})

describe('grep', () => {
it('returns content matches per line in default mode', () => {
const files = vfsFromEntries([['a.txt', 'hello\nworld\nhello']])
const matches = grep(files, 'hello', undefined, { outputMode: 'content' })
expect(matches).toHaveLength(2)
expect(matches[0]).toMatchObject({ path: 'a.txt', line: 1, content: 'hello' })
expect(matches[1]).toMatchObject({ path: 'a.txt', line: 3, content: 'hello' })
})

it('strips CR before end-of-line matching on CRLF content', () => {
const files = vfsFromEntries([['x.txt', 'foo\r\n']])
const matches = grep(files, 'foo$', undefined, { outputMode: 'content' })
expect(matches).toHaveLength(1)
expect(matches[0]?.content).toBe('foo')
})

it('counts matching lines', () => {
const files = vfsFromEntries([['a.txt', 'a\nb\na']])
const counts = grep(files, 'a', undefined, { outputMode: 'count' })
expect(counts).toEqual([{ path: 'a.txt', count: 2 }])
})

it('files_with_matches scans whole file (can match across newlines with dot-all style pattern)', () => {
const files = vfsFromEntries([['a.txt', 'foo\nbar']])
const multiline = grep(files, 'foo[\\s\\S]*bar', undefined, {
outputMode: 'files_with_matches',
})
expect(multiline).toContain('a.txt')

const lineOnly = grep(files, 'foo[\\s\\S]*bar', undefined, { outputMode: 'content' })
expect(lineOnly).toHaveLength(0)
})

it('scopes to directory prefix without matching unrelated prefixes', () => {
const files = vfsFromEntries([
['workflows/a/x', 'needle'],
['workflowsManual/x', 'needle'],
])
const hits = grep(files, 'needle', 'workflows', { outputMode: 'files_with_matches' })
expect(hits).toContain('workflows/a/x')
expect(hits).not.toContain('workflowsManual/x')
})

it('treats scope with literal brackets as directory prefix, not a glob character class', () => {
const files = vfsFromEntries([['weird[bracket]/x.txt', 'needle']])
const hits = grep(files, 'needle', 'weird[bracket]', { outputMode: 'files_with_matches' })
expect(hits).toContain('weird[bracket]/x.txt')
})

it('scopes with glob pattern when path contains metacharacters', () => {
const files = vfsFromEntries([
['workflows/A/state.json', '{"x":1}'],
['workflows/B/sub/state.json', '{"x":1}'],
['workflows/C/other.json', '{"x":1}'],
])
const hits = grep(files, '1', 'workflows/*/state.json', { outputMode: 'files_with_matches' })
expect(hits).toEqual(['workflows/A/state.json'])
})

it('returns empty array for invalid regex pattern', () => {
const files = vfsFromEntries([['a.txt', 'x']])
expect(grep(files, '(unclosed', undefined, { outputMode: 'content' })).toEqual([])
})

it('respects ignoreCase', () => {
const files = vfsFromEntries([['a.txt', 'Hello']])
const hits = grep(files, 'hello', undefined, { outputMode: 'content', ignoreCase: true })
expect(hits).toHaveLength(1)
})
})
Loading
Loading