improvement(vfs): update custom glob impl to use micromatch, fix vfs …

…filename regex
simstudioai · icecrasher321 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026
commit 5abe61ed6065823945733a2fb129ee97fde54bdf
diff --git a/apps/sim/lib/copilot/orchestrator/tool-executor/materialize-file.ts b/apps/sim/lib/copilot/orchestrator/tool-executor/materialize-file.ts
@@ -3,6 +3,7 @@ import { workflow, workspaceFiles } from '@sim/db/schema'
 import { createLogger } from '@sim/logger'
 import { and, eq, isNull } from 'drizzle-orm'
 import type { ExecutionContext, ToolCallResult } from '@/lib/copilot/orchestrator/types'
+import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment'
 import { getServePathPrefix } from '@/lib/uploads'
 import { downloadWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager'
 import { parseWorkflowJson } from '@/lib/workflows/operations/import-export'
@@ -18,14 +19,13 @@ async function findUploadRecord(fileName: string, chatId: string) {
     .from(workspaceFiles)
     .where(
       and(
-        eq(workspaceFiles.originalName, fileName),
         eq(workspaceFiles.chatId, chatId),
         eq(workspaceFiles.context, 'mothership'),
         isNull(workspaceFiles.deletedAt)
       )
     )
-    .limit(1)
-  return rows[0] ?? null
+  const segmentKey = normalizeVfsSegment(fileName)
+  return rows.find((r) => normalizeVfsSegment(r.originalName) === segmentKey) ?? null
 }
 
 function toFileRecord(row: typeof workspaceFiles.$inferSelect) {
@@ -41,21 +41,23 @@ function toFileRecord(row: typeof workspaceFiles.$inferSelect) {
     uploadedBy: row.userId,
     deletedAt: row.deletedAt,
     uploadedAt: row.uploadedAt,
+    storageContext: 'mothership' as const,
   }
 }
 
 async function executeSave(fileName: string, chatId: string): Promise<ToolCallResult> {
+  const row = await findUploadRecord(fileName, chatId)
+  if (!row) {
+    return {
+      success: false,
+      error: `Upload not found: "${fileName}". Use glob("uploads/*") to list available uploads.`,
+    }
+  }
+
   const [updated] = await db
     .update(workspaceFiles)
     .set({ context: 'workspace', chatId: null })
-    .where(
-      and(
-        eq(workspaceFiles.originalName, fileName),
-        eq(workspaceFiles.chatId, chatId),
-        eq(workspaceFiles.context, 'mothership'),
-        isNull(workspaceFiles.deletedAt)
-      )
-    )
+    .where(and(eq(workspaceFiles.id, row.id), isNull(workspaceFiles.deletedAt)))
     .returning({ id: workspaceFiles.id, originalName: workspaceFiles.originalName })
 
   if (!updated) {

diff --git a/apps/sim/lib/copilot/orchestrator/tool-executor/upload-file-reader.ts b/apps/sim/lib/copilot/orchestrator/tool-executor/upload-file-reader.ts
@@ -3,6 +3,7 @@ import { workspaceFiles } from '@sim/db/schema'
 import { createLogger } from '@sim/logger'
 import { and, eq, isNull } from 'drizzle-orm'
 import { type FileReadResult, readFileRecord } from '@/lib/copilot/vfs/file-reader'
+import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment'
 import { getServePathPrefix } from '@/lib/uploads'
 import type { WorkspaceFileRecord } from '@/lib/uploads/contexts/workspace/workspace-file-manager'
 
@@ -21,6 +22,7 @@ function toWorkspaceFileRecord(row: typeof workspaceFiles.$inferSelect): Workspa
     uploadedBy: row.userId,
     deletedAt: row.deletedAt,
     uploadedAt: row.uploadedAt,
+    storageContext: 'mothership',
   }
 }
 
@@ -51,29 +53,19 @@ export async function listChatUploads(chatId: string): Promise<WorkspaceFileReco
 }
 
 /**
- * Read a specific uploaded file by name within a chat session.
+ * Read a specific uploaded file by display name within a chat session.
+ * Resolves names with `normalizeVfsSegment` so macOS screenshot spacing (e.g. U+202F)
+ * matches when the model passes a visually equivalent path.
  */
 export async function readChatUpload(
   filename: string,
   chatId: string
 ): Promise<FileReadResult | null> {
   try {
-    const rows = await db
-      .select()
-      .from(workspaceFiles)
-      .where(
-        and(
-          eq(workspaceFiles.chatId, chatId),
-          eq(workspaceFiles.context, 'mothership'),
-          eq(workspaceFiles.originalName, filename),
-          isNull(workspaceFiles.deletedAt)
-        )
-      )
-      .limit(1)
-
-    if (rows.length === 0) return null
-
-    const record = toWorkspaceFileRecord(rows[0])
+    const uploads = await listChatUploads(chatId)
+    const segmentKey = normalizeVfsSegment(filename)
+    const record = uploads.find((u) => normalizeVfsSegment(u.name) === segmentKey)
+    if (!record) return null
     return readFileRecord(record)
   } catch (err) {
     logger.warn('Failed to read chat upload', {

diff --git a/apps/sim/lib/copilot/vfs/normalize-segment.ts b/apps/sim/lib/copilot/vfs/normalize-segment.ts
@@ -0,0 +1,14 @@
+/**
+ * Normalize a string for use as a single VFS path segment (workflow name, file name, etc.).
+ * Applies NFC normalization, trims, strips ASCII control characters, maps `/` to `-`, and
+ * collapses Unicode whitespace (including U+202F as in macOS screenshot names) to a single
+ * ASCII space.
+ */
+export function normalizeVfsSegment(name: string): string {
+  return name
+    .normalize('NFC')
+    .trim()
+    .replace(/[\x00-\x1f\x7f]/g, '')
+    .replace(/\//g, '-')
+    .replace(/\s+/g, ' ')
+}
diff --git a/apps/sim/lib/copilot/vfs/operations.ts b/apps/sim/lib/copilot/vfs/operations.ts
@@ -1,3 +1,5 @@
+import micromatch from 'micromatch'
+
 export interface GrepMatch {
   path: string
   line: number
@@ -30,8 +32,46 @@ export interface DirEntry {
 }
 
 /**
- * Regex search over VFS file contents.
- * Supports multiple output modes: content (default), files_with_matches, count.
+ * Micromatch options tuned to match the prior in-house glob: `bash: false` so a single `*`
+ * never crosses path slashes (required for `files` + star + `meta.json` style paths). `nobrace`
+ * and `noext` disable brace and extglob expansion like the old builder. Uses `micromatch` for
+ * well-tested `**` and edge cases instead of a custom `RegExp`.
+ */
+const VFS_GLOB_OPTIONS: micromatch.Options = {
+  bash: false,
+  dot: false,
+  windows: false,
+  nobrace: true,
+  noext: true,
+}
+
+/**
+ * Returns true when `filePath` is `scope` or a descendant path (`scope/...`), matching how
+ * `grep -r pattern dir` limits to a directory. If `scope` looks like a glob, filters with
+ * micromatch `isMatch` and {@link VFS_GLOB_OPTIONS}.
+ */
+/**
+ * Splits VFS text into lines for line-oriented grep. Strips a trailing CR so Windows-style
+ * CRLF payloads still match patterns anchored at line end (`$`).
+ */
+function splitLinesForGrep(content: string): string[] {
+  return content.split('\n').map((line) => line.replace(/\r$/, ''))
+}
+
+function pathWithinGrepScope(filePath: string, scope: string): boolean {
+  const looksLikeGlob =
+    /[*?[{]/.test(scope) || scope.includes('!(') || scope.includes('@(') || scope.includes('+(')
+  if (looksLikeGlob) {
+    return micromatch.isMatch(filePath, scope, VFS_GLOB_OPTIONS)
+  }
+  return filePath === scope || filePath.startsWith(scope + '/')
+}
+
+/**
+ * Regex search over VFS file contents using ECMAScript `RegExp` syntax.
+ * `content` and `count` are line-oriented (split on newline, CR stripped per line).
+ * `files_with_matches` tests the entire file string once, so multiline patterns can match there
+ * but not in line modes.
  */
 export function grep(
   files: Map<string, string>,
@@ -56,7 +96,7 @@ export function grep(
   if (outputMode === 'files_with_matches') {
     const matchingFiles: string[] = []
     for (const [filePath, content] of files) {
-      if (path && !filePath.startsWith(path)) continue
+      if (path && !pathWithinGrepScope(filePath, path)) continue
       regex.lastIndex = 0
       if (regex.test(content)) {
         matchingFiles.push(filePath)
@@ -69,8 +109,8 @@ export function grep(
   if (outputMode === 'count') {
     const counts: GrepCountEntry[] = []
     for (const [filePath, content] of files) {
-      if (path && !filePath.startsWith(path)) continue
-      const lines = content.split('\n')
+      if (path && !pathWithinGrepScope(filePath, path)) continue
+      const lines = splitLinesForGrep(content)
       let count = 0
       for (const line of lines) {
         regex.lastIndex = 0
@@ -87,9 +127,9 @@ export function grep(
   // Default: 'content' mode
   const matches: GrepMatch[] = []
   for (const [filePath, content] of files) {
-    if (path && !filePath.startsWith(path)) continue
+    if (path && !pathWithinGrepScope(filePath, path)) continue
 
-    const lines = content.split('\n')
+    const lines = splitLinesForGrep(content)
     for (let i = 0; i < lines.length; i++) {
       regex.lastIndex = 0
       if (regex.test(lines[i])) {
@@ -119,53 +159,13 @@ export function grep(
 }
 
 /**
- * Convert a glob pattern to a RegExp.
- * Supports *, **, and ? wildcards.
- */
-function globToRegExp(pattern: string): RegExp {
-  let regexStr = '^'
-  let i = 0
-  while (i < pattern.length) {
-    const ch = pattern[i]
-    if (ch === '*') {
-      if (pattern[i + 1] === '*') {
-        // ** matches any number of path segments
-        if (pattern[i + 2] === '/') {
-          regexStr += '(?:.+/)?'
-          i += 3
-        } else {
-          regexStr += '.*'
-          i += 2
-        }
-      } else {
-        // * matches anything except /
-        regexStr += '[^/]*'
-        i++
-      }
-    } else if (ch === '?') {
-      regexStr += '[^/]'
-      i++
-    } else if (/[.+^${}()|[\]\\]/.test(ch)) {
-      regexStr += `\\${ch}`
-      i++
-    } else {
-      regexStr += ch
-      i++
-    }
-  }
-  regexStr += '$'
-  return new RegExp(regexStr)
-}
-
-/**
- * Glob pattern matching against VFS file paths and virtual directories.
- * Returns matching paths (both files and directory prefixes), just like a real filesystem.
+ * Glob pattern matching against VFS file paths and virtual directories using `micromatch`
+ * with {@link VFS_GLOB_OPTIONS} (path-aware `*` and `?`, `**`, no brace or extglob expansion).
+ * Returns matching file keys and virtual directory prefixes.
  */
 export function glob(files: Map<string, string>, pattern: string): string[] {
-  const regex = globToRegExp(pattern)
   const result = new Set<string>()
 
-  // Collect all virtual directory paths from file paths
   const directories = new Set<string>()
   for (const filePath of files.keys()) {
     const parts = filePath.split('/')
@@ -174,16 +174,14 @@ export function glob(files: Map<string, string>, pattern: string): string[] {
     }
   }
 
-  // Match file paths
   for (const filePath of files.keys()) {
-    if (regex.test(filePath)) {
+    if (micromatch.isMatch(filePath, pattern, VFS_GLOB_OPTIONS)) {
       result.add(filePath)
     }
   }
 
-  // Match virtual directory paths
   for (const dir of directories) {
-    if (regex.test(dir)) {
+    if (micromatch.isMatch(dir, pattern, VFS_GLOB_OPTIONS)) {
       result.add(dir)
     }
   }

diff --git a/apps/sim/lib/copilot/vfs/workspace-vfs.ts b/apps/sim/lib/copilot/vfs/workspace-vfs.ts
@@ -18,6 +18,7 @@ import { createLogger } from '@sim/logger'
 import { and, desc, eq, isNull, ne } from 'drizzle-orm'
 import { listApiKeys } from '@/lib/api-key/service'
 import { type FileReadResult, readFileRecord } from '@/lib/copilot/vfs/file-reader'
+import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment'
 import type { DirEntry, GrepMatch, GrepOptions, ReadResult } from '@/lib/copilot/vfs/operations'
 import * as ops from '@/lib/copilot/vfs/operations'
 import type { DeploymentData } from '@/lib/copilot/vfs/serializers'
@@ -1177,14 +1178,8 @@ export type { FileReadResult } from '@/lib/copilot/vfs/file-reader'
 
 /**
  * Sanitize a name for use as a VFS path segment.
- * Normalizes Unicode to NFC, collapses whitespace, strips control
- * characters, and replaces forward slashes (path separators).
+ * Delegates to {@link normalizeVfsSegment} so workspace file paths match DB lookups.
  */
 export function sanitizeName(name: string): string {
-  return name
-    .normalize('NFC')
-    .trim()
-    .replace(/[\x00-\x1f\x7f]/g, '')
-    .replace(/\//g, '-')
-    .replace(/\s+/g, ' ')
+  return normalizeVfsSegment(name)
 }
diff --git a/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts b/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts
@@ -12,6 +12,7 @@ import {
   decrementStorageUsage,
   incrementStorageUsage,
 } from '@/lib/billing/storage'
+import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment'
 import {
   downloadFile,
   hasCloudStorage,
@@ -44,6 +45,8 @@ export interface WorkspaceFileRecord {
   uploadedBy: string
   deletedAt?: Date | null
   uploadedAt: Date
+  /** Pass-through to `downloadFile` when not default `workspace` (e.g. chat mothership uploads). */
+  storageContext?: 'workspace' | 'mothership'
 }
 
 /**
@@ -363,12 +366,9 @@ export function findWorkspaceFileRecord(
   }
 
   const normalizedReference = normalizeWorkspaceFileReference(fileReference)
+  const segmentKey = normalizeVfsSegment(normalizedReference)
   return (
-    files.find(
-      (file) =>
-        file.name === normalizedReference ||
-        file.name.normalize('NFC') === normalizedReference.normalize('NFC')
-    ) ?? null
+    files.find((file) => normalizeVfsSegment(file.name) === segmentKey) ?? null
   )
 }
 
@@ -445,7 +445,7 @@ export async function downloadWorkspaceFile(fileRecord: WorkspaceFileRecord): Pr
   try {
     const buffer = await downloadFile({
       key: fileRecord.key,
-      context: 'workspace',
+      context: fileRecord.storageContext ?? 'workspace',
     })
     logger.info(
       `Successfully downloaded workspace file: ${fileRecord.name} (${buffer.length} bytes)`

diff --git a/apps/sim/package.json b/apps/sim/package.json
@@ -126,6 +126,7 @@
     "lucide-react": "^0.479.0",
     "mammoth": "^1.9.0",
     "marked": "17.0.4",
+    "micromatch": "4.0.8",
     "mongodb": "6.19.0",
     "mysql2": "3.14.3",
     "nanoid": "^3.3.7",
@@ -179,13 +180,15 @@
   "devDependencies": {
     "@sim/testing": "workspace:*",
     "@sim/tsconfig": "workspace:*",
+    "@tailwindcss/typography": "0.5.19",
     "@testing-library/jest-dom": "^6.6.3",
     "@trigger.dev/build": "4.1.2",
     "@types/fluent-ffmpeg": "2.1.28",
     "@types/html-to-text": "9.0.4",
     "@types/js-yaml": "4.0.9",
     "@types/jsdom": "21.1.7",
     "@types/lodash": "^4.17.16",
+    "@types/micromatch": "4.0.10",
     "@types/node": "24.2.1",
     "@types/nodemailer": "7.0.4",
     "@types/papaparse": "5.3.16",
@@ -195,7 +198,6 @@
     "@types/ssh2": "^1.15.5",
     "@vitejs/plugin-react": "^4.3.4",
     "@vitest/coverage-v8": "^3.0.8",
-    "@tailwindcss/typography": "0.5.19",
     "autoprefixer": "10.4.21",
     "concurrently": "^9.1.0",
     "critters": "0.0.25",