Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
improvement(vfs): update custom glob impl to use micromatch, fix vfs …
…filename regex
  • Loading branch information
icecrasher321 committed Mar 19, 2026
commit 5abe61ed6065823945733a2fb129ee97fde54bdf
24 changes: 13 additions & 11 deletions apps/sim/lib/copilot/orchestrator/tool-executor/materialize-file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { workflow, workspaceFiles } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { and, eq, isNull } from 'drizzle-orm'
import type { ExecutionContext, ToolCallResult } from '@/lib/copilot/orchestrator/types'
import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment'
import { getServePathPrefix } from '@/lib/uploads'
import { downloadWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager'
import { parseWorkflowJson } from '@/lib/workflows/operations/import-export'
Expand All @@ -18,14 +19,13 @@ async function findUploadRecord(fileName: string, chatId: string) {
.from(workspaceFiles)
.where(
and(
eq(workspaceFiles.originalName, fileName),
eq(workspaceFiles.chatId, chatId),
eq(workspaceFiles.context, 'mothership'),
isNull(workspaceFiles.deletedAt)
)
)
.limit(1)
return rows[0] ?? null
const segmentKey = normalizeVfsSegment(fileName)
return rows.find((r) => normalizeVfsSegment(r.originalName) === segmentKey) ?? null
}

function toFileRecord(row: typeof workspaceFiles.$inferSelect) {
Expand All @@ -41,21 +41,23 @@ function toFileRecord(row: typeof workspaceFiles.$inferSelect) {
uploadedBy: row.userId,
deletedAt: row.deletedAt,
uploadedAt: row.uploadedAt,
storageContext: 'mothership' as const,
}
}

async function executeSave(fileName: string, chatId: string): Promise<ToolCallResult> {
const row = await findUploadRecord(fileName, chatId)
if (!row) {
return {
success: false,
error: `Upload not found: "${fileName}". Use glob("uploads/*") to list available uploads.`,
}
}

const [updated] = await db
.update(workspaceFiles)
.set({ context: 'workspace', chatId: null })
.where(
and(
eq(workspaceFiles.originalName, fileName),
eq(workspaceFiles.chatId, chatId),
eq(workspaceFiles.context, 'mothership'),
isNull(workspaceFiles.deletedAt)
)
)
.where(and(eq(workspaceFiles.id, row.id), isNull(workspaceFiles.deletedAt)))
.returning({ id: workspaceFiles.id, originalName: workspaceFiles.originalName })

if (!updated) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { workspaceFiles } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { and, eq, isNull } from 'drizzle-orm'
import { type FileReadResult, readFileRecord } from '@/lib/copilot/vfs/file-reader'
import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment'
import { getServePathPrefix } from '@/lib/uploads'
import type { WorkspaceFileRecord } from '@/lib/uploads/contexts/workspace/workspace-file-manager'

Expand All @@ -21,6 +22,7 @@ function toWorkspaceFileRecord(row: typeof workspaceFiles.$inferSelect): Workspa
uploadedBy: row.userId,
deletedAt: row.deletedAt,
uploadedAt: row.uploadedAt,
storageContext: 'mothership',
}
}

Expand Down Expand Up @@ -51,29 +53,19 @@ export async function listChatUploads(chatId: string): Promise<WorkspaceFileReco
}

/**
* Read a specific uploaded file by name within a chat session.
* Read a specific uploaded file by display name within a chat session.
* Resolves names with `normalizeVfsSegment` so macOS screenshot spacing (e.g. U+202F)
* matches when the model passes a visually equivalent path.
*/
export async function readChatUpload(
filename: string,
chatId: string
): Promise<FileReadResult | null> {
try {
const rows = await db
.select()
.from(workspaceFiles)
.where(
and(
eq(workspaceFiles.chatId, chatId),
eq(workspaceFiles.context, 'mothership'),
eq(workspaceFiles.originalName, filename),
isNull(workspaceFiles.deletedAt)
)
)
.limit(1)

if (rows.length === 0) return null

const record = toWorkspaceFileRecord(rows[0])
const uploads = await listChatUploads(chatId)
const segmentKey = normalizeVfsSegment(filename)
const record = uploads.find((u) => normalizeVfsSegment(u.name) === segmentKey)
if (!record) return null
Comment thread
icecrasher321 marked this conversation as resolved.
Outdated
return readFileRecord(record)
} catch (err) {
logger.warn('Failed to read chat upload', {
Expand Down
14 changes: 14 additions & 0 deletions apps/sim/lib/copilot/vfs/normalize-segment.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/**
* Normalize a string for use as a single VFS path segment (workflow name, file name, etc.).
* Applies NFC normalization, trims, strips ASCII control characters, maps `/` to `-`, and
* collapses Unicode whitespace (including U+202F as in macOS screenshot names) to a single
* ASCII space.
*/
export function normalizeVfsSegment(name: string): string {
return name
.normalize('NFC')
.trim()
.replace(/[\x00-\x1f\x7f]/g, '')
.replace(/\//g, '-')
.replace(/\s+/g, ' ')
}
106 changes: 52 additions & 54 deletions apps/sim/lib/copilot/vfs/operations.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import micromatch from 'micromatch'

export interface GrepMatch {
path: string
line: number
Expand Down Expand Up @@ -30,8 +32,46 @@ export interface DirEntry {
}

/**
* Regex search over VFS file contents.
* Supports multiple output modes: content (default), files_with_matches, count.
* Micromatch options tuned to match the prior in-house glob: `bash: false` so a single `*`
* never crosses path slashes (required for `files` + star + `meta.json` style paths). `nobrace`
* and `noext` disable brace and extglob expansion like the old builder. Uses `micromatch` for
* well-tested `**` and edge cases instead of a custom `RegExp`.
*/
const VFS_GLOB_OPTIONS: micromatch.Options = {
bash: false,
dot: false,
windows: false,
nobrace: true,
noext: true,
}
Comment thread
icecrasher321 marked this conversation as resolved.

/**
* Returns true when `filePath` is `scope` or a descendant path (`scope/...`), matching how
* `grep -r pattern dir` limits to a directory. If `scope` looks like a glob, filters with
* micromatch `isMatch` and {@link VFS_GLOB_OPTIONS}.
*/
/**
* Splits VFS text into lines for line-oriented grep. Strips a trailing CR so Windows-style
* CRLF payloads still match patterns anchored at line end (`$`).
*/
function splitLinesForGrep(content: string): string[] {
return content.split('\n').map((line) => line.replace(/\r$/, ''))
}

function pathWithinGrepScope(filePath: string, scope: string): boolean {
const looksLikeGlob =
/[*?[{]/.test(scope) || scope.includes('!(') || scope.includes('@(') || scope.includes('+(')
if (looksLikeGlob) {
Comment thread
icecrasher321 marked this conversation as resolved.
Outdated
return micromatch.isMatch(filePath, scope, VFS_GLOB_OPTIONS)
}
return filePath === scope || filePath.startsWith(scope + '/')
}
Comment thread
cursor[bot] marked this conversation as resolved.
Comment thread
icecrasher321 marked this conversation as resolved.

/**
* Regex search over VFS file contents using ECMAScript `RegExp` syntax.
* `content` and `count` are line-oriented (split on newline, CR stripped per line).
* `files_with_matches` tests the entire file string once, so multiline patterns can match there
* but not in line modes.
*/
export function grep(
files: Map<string, string>,
Expand All @@ -56,7 +96,7 @@ export function grep(
if (outputMode === 'files_with_matches') {
const matchingFiles: string[] = []
for (const [filePath, content] of files) {
if (path && !filePath.startsWith(path)) continue
if (path && !pathWithinGrepScope(filePath, path)) continue
regex.lastIndex = 0
if (regex.test(content)) {
matchingFiles.push(filePath)
Expand All @@ -69,8 +109,8 @@ export function grep(
if (outputMode === 'count') {
const counts: GrepCountEntry[] = []
for (const [filePath, content] of files) {
if (path && !filePath.startsWith(path)) continue
const lines = content.split('\n')
if (path && !pathWithinGrepScope(filePath, path)) continue
const lines = splitLinesForGrep(content)
let count = 0
for (const line of lines) {
regex.lastIndex = 0
Expand All @@ -87,9 +127,9 @@ export function grep(
// Default: 'content' mode
const matches: GrepMatch[] = []
for (const [filePath, content] of files) {
if (path && !filePath.startsWith(path)) continue
if (path && !pathWithinGrepScope(filePath, path)) continue

const lines = content.split('\n')
const lines = splitLinesForGrep(content)
for (let i = 0; i < lines.length; i++) {
regex.lastIndex = 0
if (regex.test(lines[i])) {
Expand Down Expand Up @@ -119,53 +159,13 @@ export function grep(
}

/**
* Convert a glob pattern to a RegExp.
* Supports *, **, and ? wildcards.
*/
function globToRegExp(pattern: string): RegExp {
let regexStr = '^'
let i = 0
while (i < pattern.length) {
const ch = pattern[i]
if (ch === '*') {
if (pattern[i + 1] === '*') {
// ** matches any number of path segments
if (pattern[i + 2] === '/') {
regexStr += '(?:.+/)?'
i += 3
} else {
regexStr += '.*'
i += 2
}
} else {
// * matches anything except /
regexStr += '[^/]*'
i++
}
} else if (ch === '?') {
regexStr += '[^/]'
i++
} else if (/[.+^${}()|[\]\\]/.test(ch)) {
regexStr += `\\${ch}`
i++
} else {
regexStr += ch
i++
}
}
regexStr += '$'
return new RegExp(regexStr)
}

/**
* Glob pattern matching against VFS file paths and virtual directories.
* Returns matching paths (both files and directory prefixes), just like a real filesystem.
* Glob pattern matching against VFS file paths and virtual directories using `micromatch`
* with {@link VFS_GLOB_OPTIONS} (path-aware `*` and `?`, `**`, no brace or extglob expansion).
* Returns matching file keys and virtual directory prefixes.
*/
export function glob(files: Map<string, string>, pattern: string): string[] {
const regex = globToRegExp(pattern)
const result = new Set<string>()

// Collect all virtual directory paths from file paths
const directories = new Set<string>()
for (const filePath of files.keys()) {
const parts = filePath.split('/')
Expand All @@ -174,16 +174,14 @@ export function glob(files: Map<string, string>, pattern: string): string[] {
}
}

// Match file paths
for (const filePath of files.keys()) {
if (regex.test(filePath)) {
if (micromatch.isMatch(filePath, pattern, VFS_GLOB_OPTIONS)) {
result.add(filePath)
}
}

// Match virtual directory paths
for (const dir of directories) {
if (regex.test(dir)) {
if (micromatch.isMatch(dir, pattern, VFS_GLOB_OPTIONS)) {
result.add(dir)
}
}
Expand Down
11 changes: 3 additions & 8 deletions apps/sim/lib/copilot/vfs/workspace-vfs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import { createLogger } from '@sim/logger'
import { and, desc, eq, isNull, ne } from 'drizzle-orm'
import { listApiKeys } from '@/lib/api-key/service'
import { type FileReadResult, readFileRecord } from '@/lib/copilot/vfs/file-reader'
import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment'
import type { DirEntry, GrepMatch, GrepOptions, ReadResult } from '@/lib/copilot/vfs/operations'
import * as ops from '@/lib/copilot/vfs/operations'
import type { DeploymentData } from '@/lib/copilot/vfs/serializers'
Expand Down Expand Up @@ -1177,14 +1178,8 @@ export type { FileReadResult } from '@/lib/copilot/vfs/file-reader'

/**
* Sanitize a name for use as a VFS path segment.
* Normalizes Unicode to NFC, collapses whitespace, strips control
* characters, and replaces forward slashes (path separators).
* Delegates to {@link normalizeVfsSegment} so workspace file paths match DB lookups.
*/
export function sanitizeName(name: string): string {
return name
.normalize('NFC')
.trim()
.replace(/[\x00-\x1f\x7f]/g, '')
.replace(/\//g, '-')
.replace(/\s+/g, ' ')
return normalizeVfsSegment(name)
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
decrementStorageUsage,
incrementStorageUsage,
} from '@/lib/billing/storage'
import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment'
import {
downloadFile,
hasCloudStorage,
Expand Down Expand Up @@ -44,6 +45,8 @@ export interface WorkspaceFileRecord {
uploadedBy: string
deletedAt?: Date | null
uploadedAt: Date
/** Pass-through to `downloadFile` when not default `workspace` (e.g. chat mothership uploads). */
storageContext?: 'workspace' | 'mothership'
}

/**
Expand Down Expand Up @@ -363,12 +366,9 @@ export function findWorkspaceFileRecord(
}

const normalizedReference = normalizeWorkspaceFileReference(fileReference)
const segmentKey = normalizeVfsSegment(normalizedReference)
return (
files.find(
(file) =>
file.name === normalizedReference ||
file.name.normalize('NFC') === normalizedReference.normalize('NFC')
) ?? null
files.find((file) => normalizeVfsSegment(file.name) === segmentKey) ?? null
)
}

Expand Down Expand Up @@ -445,7 +445,7 @@ export async function downloadWorkspaceFile(fileRecord: WorkspaceFileRecord): Pr
try {
const buffer = await downloadFile({
key: fileRecord.key,
context: 'workspace',
context: fileRecord.storageContext ?? 'workspace',
})
logger.info(
`Successfully downloaded workspace file: ${fileRecord.name} (${buffer.length} bytes)`
Expand Down
4 changes: 3 additions & 1 deletion apps/sim/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
"lucide-react": "^0.479.0",
"mammoth": "^1.9.0",
"marked": "17.0.4",
"micromatch": "4.0.8",
"mongodb": "6.19.0",
"mysql2": "3.14.3",
"nanoid": "^3.3.7",
Expand Down Expand Up @@ -179,13 +180,15 @@
"devDependencies": {
"@sim/testing": "workspace:*",
"@sim/tsconfig": "workspace:*",
"@tailwindcss/typography": "0.5.19",
"@testing-library/jest-dom": "^6.6.3",
"@trigger.dev/build": "4.1.2",
"@types/fluent-ffmpeg": "2.1.28",
"@types/html-to-text": "9.0.4",
"@types/js-yaml": "4.0.9",
"@types/jsdom": "21.1.7",
"@types/lodash": "^4.17.16",
"@types/micromatch": "4.0.10",
"@types/node": "24.2.1",
"@types/nodemailer": "7.0.4",
"@types/papaparse": "5.3.16",
Expand All @@ -195,7 +198,6 @@
"@types/ssh2": "^1.15.5",
"@vitejs/plugin-react": "^4.3.4",
"@vitest/coverage-v8": "^3.0.8",
"@tailwindcss/typography": "0.5.19",
"autoprefixer": "10.4.21",
"concurrently": "^9.1.0",
"critters": "0.0.25",
Expand Down
Loading
Loading