From 5abe61ed6065823945733a2fb129ee97fde54bdf Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Thu, 19 Mar 2026 12:48:23 -0700 Subject: [PATCH 1/6] improvement(vfs): update custom glob impl to use micromatch, fix vfs filename regex --- .../tool-executor/materialize-file.ts | 24 ++-- .../tool-executor/upload-file-reader.ts | 26 ++--- apps/sim/lib/copilot/vfs/normalize-segment.ts | 14 +++ apps/sim/lib/copilot/vfs/operations.ts | 106 +++++++++--------- apps/sim/lib/copilot/vfs/workspace-vfs.ts | 11 +- .../workspace/workspace-file-manager.ts | 12 +- apps/sim/package.json | 4 +- bun.lock | 6 + 8 files changed, 106 insertions(+), 97 deletions(-) create mode 100644 apps/sim/lib/copilot/vfs/normalize-segment.ts diff --git a/apps/sim/lib/copilot/orchestrator/tool-executor/materialize-file.ts b/apps/sim/lib/copilot/orchestrator/tool-executor/materialize-file.ts index 9d40fb974bf..f7200b920c6 100644 --- a/apps/sim/lib/copilot/orchestrator/tool-executor/materialize-file.ts +++ b/apps/sim/lib/copilot/orchestrator/tool-executor/materialize-file.ts @@ -3,6 +3,7 @@ import { workflow, workspaceFiles } from '@sim/db/schema' import { createLogger } from '@sim/logger' import { and, eq, isNull } from 'drizzle-orm' import type { ExecutionContext, ToolCallResult } from '@/lib/copilot/orchestrator/types' +import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment' import { getServePathPrefix } from '@/lib/uploads' import { downloadWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager' import { parseWorkflowJson } from '@/lib/workflows/operations/import-export' @@ -18,14 +19,13 @@ async function findUploadRecord(fileName: string, chatId: string) { .from(workspaceFiles) .where( and( - eq(workspaceFiles.originalName, fileName), eq(workspaceFiles.chatId, chatId), eq(workspaceFiles.context, 'mothership'), isNull(workspaceFiles.deletedAt) ) ) - .limit(1) - return rows[0] ?? null + const segmentKey = normalizeVfsSegment(fileName) + return rows.find((r) => normalizeVfsSegment(r.originalName) === segmentKey) ?? null } function toFileRecord(row: typeof workspaceFiles.$inferSelect) { @@ -41,21 +41,23 @@ function toFileRecord(row: typeof workspaceFiles.$inferSelect) { uploadedBy: row.userId, deletedAt: row.deletedAt, uploadedAt: row.uploadedAt, + storageContext: 'mothership' as const, } } async function executeSave(fileName: string, chatId: string): Promise { + const row = await findUploadRecord(fileName, chatId) + if (!row) { + return { + success: false, + error: `Upload not found: "${fileName}". Use glob("uploads/*") to list available uploads.`, + } + } + const [updated] = await db .update(workspaceFiles) .set({ context: 'workspace', chatId: null }) - .where( - and( - eq(workspaceFiles.originalName, fileName), - eq(workspaceFiles.chatId, chatId), - eq(workspaceFiles.context, 'mothership'), - isNull(workspaceFiles.deletedAt) - ) - ) + .where(and(eq(workspaceFiles.id, row.id), isNull(workspaceFiles.deletedAt))) .returning({ id: workspaceFiles.id, originalName: workspaceFiles.originalName }) if (!updated) { diff --git a/apps/sim/lib/copilot/orchestrator/tool-executor/upload-file-reader.ts b/apps/sim/lib/copilot/orchestrator/tool-executor/upload-file-reader.ts index c519f1a4c23..200ba23a128 100644 --- a/apps/sim/lib/copilot/orchestrator/tool-executor/upload-file-reader.ts +++ b/apps/sim/lib/copilot/orchestrator/tool-executor/upload-file-reader.ts @@ -3,6 +3,7 @@ import { workspaceFiles } from '@sim/db/schema' import { createLogger } from '@sim/logger' import { and, eq, isNull } from 'drizzle-orm' import { type FileReadResult, readFileRecord } from '@/lib/copilot/vfs/file-reader' +import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment' import { getServePathPrefix } from '@/lib/uploads' import type { WorkspaceFileRecord } from '@/lib/uploads/contexts/workspace/workspace-file-manager' @@ -21,6 +22,7 @@ function toWorkspaceFileRecord(row: typeof workspaceFiles.$inferSelect): Workspa uploadedBy: row.userId, deletedAt: row.deletedAt, uploadedAt: row.uploadedAt, + storageContext: 'mothership', } } @@ -51,29 +53,19 @@ export async function listChatUploads(chatId: string): Promise { try { - const rows = await db - .select() - .from(workspaceFiles) - .where( - and( - eq(workspaceFiles.chatId, chatId), - eq(workspaceFiles.context, 'mothership'), - eq(workspaceFiles.originalName, filename), - isNull(workspaceFiles.deletedAt) - ) - ) - .limit(1) - - if (rows.length === 0) return null - - const record = toWorkspaceFileRecord(rows[0]) + const uploads = await listChatUploads(chatId) + const segmentKey = normalizeVfsSegment(filename) + const record = uploads.find((u) => normalizeVfsSegment(u.name) === segmentKey) + if (!record) return null return readFileRecord(record) } catch (err) { logger.warn('Failed to read chat upload', { diff --git a/apps/sim/lib/copilot/vfs/normalize-segment.ts b/apps/sim/lib/copilot/vfs/normalize-segment.ts new file mode 100644 index 00000000000..9f857014f5f --- /dev/null +++ b/apps/sim/lib/copilot/vfs/normalize-segment.ts @@ -0,0 +1,14 @@ +/** + * Normalize a string for use as a single VFS path segment (workflow name, file name, etc.). + * Applies NFC normalization, trims, strips ASCII control characters, maps `/` to `-`, and + * collapses Unicode whitespace (including U+202F as in macOS screenshot names) to a single + * ASCII space. + */ +export function normalizeVfsSegment(name: string): string { + return name + .normalize('NFC') + .trim() + .replace(/[\x00-\x1f\x7f]/g, '') + .replace(/\//g, '-') + .replace(/\s+/g, ' ') +} diff --git a/apps/sim/lib/copilot/vfs/operations.ts b/apps/sim/lib/copilot/vfs/operations.ts index 194334c20eb..aa35d6a2d4e 100644 --- a/apps/sim/lib/copilot/vfs/operations.ts +++ b/apps/sim/lib/copilot/vfs/operations.ts @@ -1,3 +1,5 @@ +import micromatch from 'micromatch' + export interface GrepMatch { path: string line: number @@ -30,8 +32,46 @@ export interface DirEntry { } /** - * Regex search over VFS file contents. - * Supports multiple output modes: content (default), files_with_matches, count. + * Micromatch options tuned to match the prior in-house glob: `bash: false` so a single `*` + * never crosses path slashes (required for `files` + star + `meta.json` style paths). `nobrace` + * and `noext` disable brace and extglob expansion like the old builder. Uses `micromatch` for + * well-tested `**` and edge cases instead of a custom `RegExp`. + */ +const VFS_GLOB_OPTIONS: micromatch.Options = { + bash: false, + dot: false, + windows: false, + nobrace: true, + noext: true, +} + +/** + * Returns true when `filePath` is `scope` or a descendant path (`scope/...`), matching how + * `grep -r pattern dir` limits to a directory. If `scope` looks like a glob, filters with + * micromatch `isMatch` and {@link VFS_GLOB_OPTIONS}. + */ +/** + * Splits VFS text into lines for line-oriented grep. Strips a trailing CR so Windows-style + * CRLF payloads still match patterns anchored at line end (`$`). + */ +function splitLinesForGrep(content: string): string[] { + return content.split('\n').map((line) => line.replace(/\r$/, '')) +} + +function pathWithinGrepScope(filePath: string, scope: string): boolean { + const looksLikeGlob = + /[*?[{]/.test(scope) || scope.includes('!(') || scope.includes('@(') || scope.includes('+(') + if (looksLikeGlob) { + return micromatch.isMatch(filePath, scope, VFS_GLOB_OPTIONS) + } + return filePath === scope || filePath.startsWith(scope + '/') +} + +/** + * Regex search over VFS file contents using ECMAScript `RegExp` syntax. + * `content` and `count` are line-oriented (split on newline, CR stripped per line). + * `files_with_matches` tests the entire file string once, so multiline patterns can match there + * but not in line modes. */ export function grep( files: Map, @@ -56,7 +96,7 @@ export function grep( if (outputMode === 'files_with_matches') { const matchingFiles: string[] = [] for (const [filePath, content] of files) { - if (path && !filePath.startsWith(path)) continue + if (path && !pathWithinGrepScope(filePath, path)) continue regex.lastIndex = 0 if (regex.test(content)) { matchingFiles.push(filePath) @@ -69,8 +109,8 @@ export function grep( if (outputMode === 'count') { const counts: GrepCountEntry[] = [] for (const [filePath, content] of files) { - if (path && !filePath.startsWith(path)) continue - const lines = content.split('\n') + if (path && !pathWithinGrepScope(filePath, path)) continue + const lines = splitLinesForGrep(content) let count = 0 for (const line of lines) { regex.lastIndex = 0 @@ -87,9 +127,9 @@ export function grep( // Default: 'content' mode const matches: GrepMatch[] = [] for (const [filePath, content] of files) { - if (path && !filePath.startsWith(path)) continue + if (path && !pathWithinGrepScope(filePath, path)) continue - const lines = content.split('\n') + const lines = splitLinesForGrep(content) for (let i = 0; i < lines.length; i++) { regex.lastIndex = 0 if (regex.test(lines[i])) { @@ -119,53 +159,13 @@ export function grep( } /** - * Convert a glob pattern to a RegExp. - * Supports *, **, and ? wildcards. - */ -function globToRegExp(pattern: string): RegExp { - let regexStr = '^' - let i = 0 - while (i < pattern.length) { - const ch = pattern[i] - if (ch === '*') { - if (pattern[i + 1] === '*') { - // ** matches any number of path segments - if (pattern[i + 2] === '/') { - regexStr += '(?:.+/)?' - i += 3 - } else { - regexStr += '.*' - i += 2 - } - } else { - // * matches anything except / - regexStr += '[^/]*' - i++ - } - } else if (ch === '?') { - regexStr += '[^/]' - i++ - } else if (/[.+^${}()|[\]\\]/.test(ch)) { - regexStr += `\\${ch}` - i++ - } else { - regexStr += ch - i++ - } - } - regexStr += '$' - return new RegExp(regexStr) -} - -/** - * Glob pattern matching against VFS file paths and virtual directories. - * Returns matching paths (both files and directory prefixes), just like a real filesystem. + * Glob pattern matching against VFS file paths and virtual directories using `micromatch` + * with {@link VFS_GLOB_OPTIONS} (path-aware `*` and `?`, `**`, no brace or extglob expansion). + * Returns matching file keys and virtual directory prefixes. */ export function glob(files: Map, pattern: string): string[] { - const regex = globToRegExp(pattern) const result = new Set() - // Collect all virtual directory paths from file paths const directories = new Set() for (const filePath of files.keys()) { const parts = filePath.split('/') @@ -174,16 +174,14 @@ export function glob(files: Map, pattern: string): string[] { } } - // Match file paths for (const filePath of files.keys()) { - if (regex.test(filePath)) { + if (micromatch.isMatch(filePath, pattern, VFS_GLOB_OPTIONS)) { result.add(filePath) } } - // Match virtual directory paths for (const dir of directories) { - if (regex.test(dir)) { + if (micromatch.isMatch(dir, pattern, VFS_GLOB_OPTIONS)) { result.add(dir) } } diff --git a/apps/sim/lib/copilot/vfs/workspace-vfs.ts b/apps/sim/lib/copilot/vfs/workspace-vfs.ts index 122cff4a5bc..872e6120e8a 100644 --- a/apps/sim/lib/copilot/vfs/workspace-vfs.ts +++ b/apps/sim/lib/copilot/vfs/workspace-vfs.ts @@ -18,6 +18,7 @@ import { createLogger } from '@sim/logger' import { and, desc, eq, isNull, ne } from 'drizzle-orm' import { listApiKeys } from '@/lib/api-key/service' import { type FileReadResult, readFileRecord } from '@/lib/copilot/vfs/file-reader' +import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment' import type { DirEntry, GrepMatch, GrepOptions, ReadResult } from '@/lib/copilot/vfs/operations' import * as ops from '@/lib/copilot/vfs/operations' import type { DeploymentData } from '@/lib/copilot/vfs/serializers' @@ -1177,14 +1178,8 @@ export type { FileReadResult } from '@/lib/copilot/vfs/file-reader' /** * Sanitize a name for use as a VFS path segment. - * Normalizes Unicode to NFC, collapses whitespace, strips control - * characters, and replaces forward slashes (path separators). + * Delegates to {@link normalizeVfsSegment} so workspace file paths match DB lookups. */ export function sanitizeName(name: string): string { - return name - .normalize('NFC') - .trim() - .replace(/[\x00-\x1f\x7f]/g, '') - .replace(/\//g, '-') - .replace(/\s+/g, ' ') + return normalizeVfsSegment(name) } diff --git a/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts b/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts index 25d7639ab89..ba90521e9a8 100644 --- a/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts +++ b/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts @@ -12,6 +12,7 @@ import { decrementStorageUsage, incrementStorageUsage, } from '@/lib/billing/storage' +import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment' import { downloadFile, hasCloudStorage, @@ -44,6 +45,8 @@ export interface WorkspaceFileRecord { uploadedBy: string deletedAt?: Date | null uploadedAt: Date + /** Pass-through to `downloadFile` when not default `workspace` (e.g. chat mothership uploads). */ + storageContext?: 'workspace' | 'mothership' } /** @@ -363,12 +366,9 @@ export function findWorkspaceFileRecord( } const normalizedReference = normalizeWorkspaceFileReference(fileReference) + const segmentKey = normalizeVfsSegment(normalizedReference) return ( - files.find( - (file) => - file.name === normalizedReference || - file.name.normalize('NFC') === normalizedReference.normalize('NFC') - ) ?? null + files.find((file) => normalizeVfsSegment(file.name) === segmentKey) ?? null ) } @@ -445,7 +445,7 @@ export async function downloadWorkspaceFile(fileRecord: WorkspaceFileRecord): Pr try { const buffer = await downloadFile({ key: fileRecord.key, - context: 'workspace', + context: fileRecord.storageContext ?? 'workspace', }) logger.info( `Successfully downloaded workspace file: ${fileRecord.name} (${buffer.length} bytes)` diff --git a/apps/sim/package.json b/apps/sim/package.json index 6148884846a..719b625eb06 100644 --- a/apps/sim/package.json +++ b/apps/sim/package.json @@ -126,6 +126,7 @@ "lucide-react": "^0.479.0", "mammoth": "^1.9.0", "marked": "17.0.4", + "micromatch": "4.0.8", "mongodb": "6.19.0", "mysql2": "3.14.3", "nanoid": "^3.3.7", @@ -179,6 +180,7 @@ "devDependencies": { "@sim/testing": "workspace:*", "@sim/tsconfig": "workspace:*", + "@tailwindcss/typography": "0.5.19", "@testing-library/jest-dom": "^6.6.3", "@trigger.dev/build": "4.1.2", "@types/fluent-ffmpeg": "2.1.28", @@ -186,6 +188,7 @@ "@types/js-yaml": "4.0.9", "@types/jsdom": "21.1.7", "@types/lodash": "^4.17.16", + "@types/micromatch": "4.0.10", "@types/node": "24.2.1", "@types/nodemailer": "7.0.4", "@types/papaparse": "5.3.16", @@ -195,7 +198,6 @@ "@types/ssh2": "^1.15.5", "@vitejs/plugin-react": "^4.3.4", "@vitest/coverage-v8": "^3.0.8", - "@tailwindcss/typography": "0.5.19", "autoprefixer": "10.4.21", "concurrently": "^9.1.0", "critters": "0.0.25", diff --git a/bun.lock b/bun.lock index 9e3efc4d447..71e25a6b6f0 100644 --- a/bun.lock +++ b/bun.lock @@ -151,6 +151,7 @@ "lucide-react": "^0.479.0", "mammoth": "^1.9.0", "marked": "17.0.4", + "micromatch": "4.0.8", "mongodb": "6.19.0", "mysql2": "3.14.3", "nanoid": "^3.3.7", @@ -212,6 +213,7 @@ "@types/js-yaml": "4.0.9", "@types/jsdom": "21.1.7", "@types/lodash": "^4.17.16", + "@types/micromatch": "4.0.10", "@types/node": "24.2.1", "@types/nodemailer": "7.0.4", "@types/papaparse": "5.3.16", @@ -1464,6 +1466,8 @@ "@types/babel__traverse": ["@types/babel__traverse@7.28.0", "", { "dependencies": { "@babel/types": "^7.28.2" } }, "sha512-8PvcXf70gTDZBgt9ptxJ8elBeBjcLOAcOtoO/mPJjtji1+CdGbHgm77om1GrsPxsiE+uXIpNSK64UYaIwQXd4Q=="], + "@types/braces": ["@types/braces@3.0.5", "", {}, "sha512-SQFof9H+LXeWNz8wDe7oN5zu7ket0qwMu5vZubW4GCJ8Kkeh6nBWUz87+KTz/G3Kqsrp0j/W253XJb3KMEeg3w=="], + "@types/chai": ["@types/chai@5.2.3", "", { "dependencies": { "@types/deep-eql": "*", "assertion-error": "^2.0.1" } }, "sha512-Mw558oeA9fFbv65/y4mHtXDs9bPnFMZAL/jxdPFUpOHHIXX91mcgEHbS5Lahr+pwZFR8A7GQleRWeI6cGFC2UA=="], "@types/cookie": ["@types/cookie@0.4.1", "", {}, "sha512-XW/Aa8APYr6jSVVA1y/DEIZX0/GMKLEVekNG727R8cs56ahETkRAy/3DR7+fJyh7oUgGwNQaRfXCun0+KbWY7Q=="], @@ -1562,6 +1566,8 @@ "@types/mdx": ["@types/mdx@2.0.13", "", {}, "sha512-+OWZQfAYyio6YkJb3HLxDrvnx6SWWDbC0zVPfBRzUk0/nqoDyf6dNxQi3eArPe8rJ473nobTMQ/8Zk+LxJ+Yuw=="], + "@types/micromatch": ["@types/micromatch@4.0.10", "", { "dependencies": { "@types/braces": "*" } }, "sha512-5jOhFDElqr4DKTrTEbnW8DZ4Hz5LRUEmyrGpCMrD/NphYv3nUnaF08xmSLx1rGGnyEs/kFnhiw6dCgcDqMr5PQ=="], + "@types/ms": ["@types/ms@2.1.0", "", {}, "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA=="], "@types/node": ["@types/node@22.19.10", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-tF5VOugLS/EuDlTBijk0MqABfP8UxgYazTLo3uIn3b4yJgg26QRbVYJYsDtHrjdDUIRfP70+VfhTTc+CE1yskw=="], From ad8fb838b9389ef142cf6c43809412aaacdc1a5a Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Thu, 19 Mar 2026 12:51:43 -0700 Subject: [PATCH 2/6] add tests --- apps/sim/lib/copilot/vfs/operations.test.ts | 114 ++++++++++++++++++++ apps/sim/lib/copilot/vfs/operations.ts | 9 +- 2 files changed, 118 insertions(+), 5 deletions(-) create mode 100644 apps/sim/lib/copilot/vfs/operations.test.ts diff --git a/apps/sim/lib/copilot/vfs/operations.test.ts b/apps/sim/lib/copilot/vfs/operations.test.ts new file mode 100644 index 00000000000..34c1eadb0f1 --- /dev/null +++ b/apps/sim/lib/copilot/vfs/operations.test.ts @@ -0,0 +1,114 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { glob, grep } from '@/lib/copilot/vfs/operations' + +function vfsFromEntries(entries: [string, string][]): Map { + return new Map(entries) +} + +describe('glob', () => { + it('matches one path segment for single star (files listing pattern)', () => { + const files = vfsFromEntries([ + ['files/a/meta.json', '{}'], + ['files/a/b/meta.json', '{}'], + ['uploads/x.png', ''], + ]) + const hits = glob(files, 'files/*/meta.json') + expect(hits).toContain('files/a/meta.json') + expect(hits).not.toContain('files/a/b/meta.json') + }) + + it('matches nested paths with double star', () => { + const files = vfsFromEntries([ + ['workflows/W/state.json', ''], + ['workflows/W/sub/state.json', ''], + ]) + const hits = glob(files, 'workflows/**/state.json') + expect(hits.sort()).toEqual(['workflows/W/state.json', 'workflows/W/sub/state.json'].sort()) + }) + + it('includes virtual directory prefixes when pattern matches descendants', () => { + const files = vfsFromEntries([['files/a/meta.json', '{}']]) + const hits = glob(files, 'files/**') + expect(hits).toContain('files') + expect(hits).toContain('files/a') + expect(hits).toContain('files/a/meta.json') + }) + + it('treats braces literally when nobrace is set (matches old builder)', () => { + const files = vfsFromEntries([ + ['weird{brace}/x', ''], + ['weirdA/x', ''], + ]) + const hits = glob(files, 'weird{brace}/*') + expect(hits).toContain('weird{brace}/x') + expect(hits).not.toContain('weirdA/x') + }) +}) + +describe('grep', () => { + it('returns content matches per line in default mode', () => { + const files = vfsFromEntries([['a.txt', 'hello\nworld\nhello']]) + const matches = grep(files, 'hello', undefined, { outputMode: 'content' }) + expect(matches).toHaveLength(2) + expect(matches[0]).toMatchObject({ path: 'a.txt', line: 1, content: 'hello' }) + expect(matches[1]).toMatchObject({ path: 'a.txt', line: 3, content: 'hello' }) + }) + + it('strips CR before end-of-line matching on CRLF content', () => { + const files = vfsFromEntries([['x.txt', 'foo\r\n']]) + const matches = grep(files, 'foo$', undefined, { outputMode: 'content' }) + expect(matches).toHaveLength(1) + expect(matches[0]?.content).toBe('foo') + }) + + it('counts matching lines', () => { + const files = vfsFromEntries([['a.txt', 'a\nb\na']]) + const counts = grep(files, 'a', undefined, { outputMode: 'count' }) + expect(counts).toEqual([{ path: 'a.txt', count: 2 }]) + }) + + it('files_with_matches scans whole file (can match across newlines with dot-all style pattern)', () => { + const files = vfsFromEntries([['a.txt', 'foo\nbar']]) + const multiline = grep(files, 'foo[\\s\\S]*bar', undefined, { + outputMode: 'files_with_matches', + }) + expect(multiline).toContain('a.txt') + + const lineOnly = grep(files, 'foo[\\s\\S]*bar', undefined, { outputMode: 'content' }) + expect(lineOnly).toHaveLength(0) + }) + + it('scopes to directory prefix without matching unrelated prefixes', () => { + const files = vfsFromEntries([ + ['workflows/a/x', 'needle'], + ['workflowsManual/x', 'needle'], + ]) + const hits = grep(files, 'needle', 'workflows', { outputMode: 'files_with_matches' }) + expect(hits).toContain('workflows/a/x') + expect(hits).not.toContain('workflowsManual/x') + }) + + it('scopes with glob pattern when path contains metacharacters', () => { + const files = vfsFromEntries([ + ['workflows/A/state.json', '{"x":1}'], + ['workflows/B/sub/state.json', '{"x":1}'], + ['workflows/C/other.json', '{"x":1}'], + ]) + const hits = grep(files, '1', 'workflows/*/state.json', { outputMode: 'files_with_matches' }) + expect(hits).toEqual(['workflows/A/state.json']) + }) + + it('returns empty array for invalid regex pattern', () => { + const files = vfsFromEntries([['a.txt', 'x']]) + expect(grep(files, '(unclosed', undefined, { outputMode: 'content' })).toEqual([]) + }) + + it('respects ignoreCase', () => { + const files = vfsFromEntries([['a.txt', 'Hello']]) + const hits = grep(files, 'hello', undefined, { outputMode: 'content', ignoreCase: true }) + expect(hits).toHaveLength(1) + }) +}) diff --git a/apps/sim/lib/copilot/vfs/operations.ts b/apps/sim/lib/copilot/vfs/operations.ts index aa35d6a2d4e..548de4a91ad 100644 --- a/apps/sim/lib/copilot/vfs/operations.ts +++ b/apps/sim/lib/copilot/vfs/operations.ts @@ -45,11 +45,6 @@ const VFS_GLOB_OPTIONS: micromatch.Options = { noext: true, } -/** - * Returns true when `filePath` is `scope` or a descendant path (`scope/...`), matching how - * `grep -r pattern dir` limits to a directory. If `scope` looks like a glob, filters with - * micromatch `isMatch` and {@link VFS_GLOB_OPTIONS}. - */ /** * Splits VFS text into lines for line-oriented grep. Strips a trailing CR so Windows-style * CRLF payloads still match patterns anchored at line end (`$`). @@ -58,6 +53,10 @@ function splitLinesForGrep(content: string): string[] { return content.split('\n').map((line) => line.replace(/\r$/, '')) } +/** + * Returns true when `filePath` is `scope` or a descendant path (`scope/...`). If `scope` looks + * like a glob, filters with micromatch `isMatch` and `VFS_GLOB_OPTIONS`. + */ function pathWithinGrepScope(filePath: string, scope: string): boolean { const looksLikeGlob = /[*?[{]/.test(scope) || scope.includes('!(') || scope.includes('@(') || scope.includes('+(') From de73cc53c448031612cd0bb0061601a98093a089 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Thu, 19 Mar 2026 12:59:54 -0700 Subject: [PATCH 3/6] file caps --- apps/sim/lib/copilot/vfs/file-reader.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/sim/lib/copilot/vfs/file-reader.ts b/apps/sim/lib/copilot/vfs/file-reader.ts index d0dfca9355a..c0a901fa47c 100644 --- a/apps/sim/lib/copilot/vfs/file-reader.ts +++ b/apps/sim/lib/copilot/vfs/file-reader.ts @@ -5,8 +5,8 @@ import { isImageFileType } from '@/lib/uploads/utils/file-utils' const logger = createLogger('FileReader') -const MAX_TEXT_READ_BYTES = 512 * 1024 // 512 KB -const MAX_IMAGE_READ_BYTES = 5 * 1024 * 1024 // 5 MB +const MAX_TEXT_READ_BYTES = 5 * 1024 * 1024 // 5 MB +const MAX_IMAGE_READ_BYTES = 20 * 1024 * 1024 // 20 MB const TEXT_TYPES = new Set([ 'text/plain', @@ -53,7 +53,7 @@ export async function readFileRecord(record: WorkspaceFileRecord): Promise MAX_IMAGE_READ_BYTES) { return { - content: `[Image too large: ${record.name} (${(record.size / 1024 / 1024).toFixed(1)}MB, limit 5MB)]`, + content: `[Image too large: ${record.name} (${(record.size / 1024 / 1024).toFixed(1)}MB, limit 20MB)]`, totalLines: 1, } } From ffe56fc894a775adb27b5856edf11e890918cd2b Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Thu, 19 Mar 2026 13:03:01 -0700 Subject: [PATCH 4/6] address comments --- .../tool-executor/materialize-file.ts | 21 ++------ .../tool-executor/upload-file-reader.ts | 48 +++++++++++++++++-- apps/sim/lib/copilot/vfs/operations.test.ts | 6 +++ apps/sim/lib/copilot/vfs/operations.ts | 11 +++-- 4 files changed, 58 insertions(+), 28 deletions(-) diff --git a/apps/sim/lib/copilot/orchestrator/tool-executor/materialize-file.ts b/apps/sim/lib/copilot/orchestrator/tool-executor/materialize-file.ts index f7200b920c6..5428e4dff8a 100644 --- a/apps/sim/lib/copilot/orchestrator/tool-executor/materialize-file.ts +++ b/apps/sim/lib/copilot/orchestrator/tool-executor/materialize-file.ts @@ -2,8 +2,8 @@ import { db } from '@sim/db' import { workflow, workspaceFiles } from '@sim/db/schema' import { createLogger } from '@sim/logger' import { and, eq, isNull } from 'drizzle-orm' +import { findMothershipUploadRowByChatAndName } from '@/lib/copilot/orchestrator/tool-executor/upload-file-reader' import type { ExecutionContext, ToolCallResult } from '@/lib/copilot/orchestrator/types' -import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment' import { getServePathPrefix } from '@/lib/uploads' import { downloadWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager' import { parseWorkflowJson } from '@/lib/workflows/operations/import-export' @@ -13,21 +13,6 @@ import { extractWorkflowMetadata } from '@/app/api/v1/admin/types' const logger = createLogger('MaterializeFile') -async function findUploadRecord(fileName: string, chatId: string) { - const rows = await db - .select() - .from(workspaceFiles) - .where( - and( - eq(workspaceFiles.chatId, chatId), - eq(workspaceFiles.context, 'mothership'), - isNull(workspaceFiles.deletedAt) - ) - ) - const segmentKey = normalizeVfsSegment(fileName) - return rows.find((r) => normalizeVfsSegment(r.originalName) === segmentKey) ?? null -} - function toFileRecord(row: typeof workspaceFiles.$inferSelect) { const pathPrefix = getServePathPrefix() return { @@ -46,7 +31,7 @@ function toFileRecord(row: typeof workspaceFiles.$inferSelect) { } async function executeSave(fileName: string, chatId: string): Promise { - const row = await findUploadRecord(fileName, chatId) + const row = await findMothershipUploadRowByChatAndName(chatId, fileName) if (!row) { return { success: false, @@ -86,7 +71,7 @@ async function executeImport( workspaceId: string, userId: string ): Promise { - const row = await findUploadRecord(fileName, chatId) + const row = await findMothershipUploadRowByChatAndName(chatId, fileName) if (!row) { return { success: false, diff --git a/apps/sim/lib/copilot/orchestrator/tool-executor/upload-file-reader.ts b/apps/sim/lib/copilot/orchestrator/tool-executor/upload-file-reader.ts index 200ba23a128..cf40f13584d 100644 --- a/apps/sim/lib/copilot/orchestrator/tool-executor/upload-file-reader.ts +++ b/apps/sim/lib/copilot/orchestrator/tool-executor/upload-file-reader.ts @@ -26,6 +26,46 @@ function toWorkspaceFileRecord(row: typeof workspaceFiles.$inferSelect): Workspa } } +/** + * Resolve a mothership upload row by `originalName`, preferring an exact DB match (limit 1) and + * only scanning all chat uploads when that misses (e.g. macOS U+202F vs ASCII space in the name). + */ +export async function findMothershipUploadRowByChatAndName( + chatId: string, + fileName: string +): Promise { + const exactRows = await db + .select() + .from(workspaceFiles) + .where( + and( + eq(workspaceFiles.chatId, chatId), + eq(workspaceFiles.context, 'mothership'), + eq(workspaceFiles.originalName, fileName), + isNull(workspaceFiles.deletedAt) + ) + ) + .limit(1) + + if (exactRows[0]) { + return exactRows[0] + } + + const allRows = await db + .select() + .from(workspaceFiles) + .where( + and( + eq(workspaceFiles.chatId, chatId), + eq(workspaceFiles.context, 'mothership'), + isNull(workspaceFiles.deletedAt) + ) + ) + + const segmentKey = normalizeVfsSegment(fileName) + return allRows.find((r) => normalizeVfsSegment(r.originalName) === segmentKey) ?? null +} + /** * List all chat-scoped uploads for a given chat. */ @@ -62,11 +102,9 @@ export async function readChatUpload( chatId: string ): Promise { try { - const uploads = await listChatUploads(chatId) - const segmentKey = normalizeVfsSegment(filename) - const record = uploads.find((u) => normalizeVfsSegment(u.name) === segmentKey) - if (!record) return null - return readFileRecord(record) + const row = await findMothershipUploadRowByChatAndName(chatId, filename) + if (!row) return null + return readFileRecord(toWorkspaceFileRecord(row)) } catch (err) { logger.warn('Failed to read chat upload', { filename, diff --git a/apps/sim/lib/copilot/vfs/operations.test.ts b/apps/sim/lib/copilot/vfs/operations.test.ts index 34c1eadb0f1..4c00264ee3c 100644 --- a/apps/sim/lib/copilot/vfs/operations.test.ts +++ b/apps/sim/lib/copilot/vfs/operations.test.ts @@ -91,6 +91,12 @@ describe('grep', () => { expect(hits).not.toContain('workflowsManual/x') }) + it('treats scope with literal brackets as directory prefix, not a glob character class', () => { + const files = vfsFromEntries([['weird[bracket]/x.txt', 'needle']]) + const hits = grep(files, 'needle', 'weird[bracket]', { outputMode: 'files_with_matches' }) + expect(hits).toContain('weird[bracket]/x.txt') + }) + it('scopes with glob pattern when path contains metacharacters', () => { const files = vfsFromEntries([ ['workflows/A/state.json', '{"x":1}'], diff --git a/apps/sim/lib/copilot/vfs/operations.ts b/apps/sim/lib/copilot/vfs/operations.ts index 548de4a91ad..e488264de0d 100644 --- a/apps/sim/lib/copilot/vfs/operations.ts +++ b/apps/sim/lib/copilot/vfs/operations.ts @@ -54,13 +54,14 @@ function splitLinesForGrep(content: string): string[] { } /** - * Returns true when `filePath` is `scope` or a descendant path (`scope/...`). If `scope` looks - * like a glob, filters with micromatch `isMatch` and `VFS_GLOB_OPTIONS`. + * Returns true when `filePath` is `scope` or a descendant path (`scope/...`). If `scope` contains + * `*` or `?`, filters with micromatch `isMatch` and {@link VFS_GLOB_OPTIONS}. Other characters + * (including `[`, `{`, spaces) use directory-prefix logic so literal VFS path segments are not + * parsed as glob syntax. */ function pathWithinGrepScope(filePath: string, scope: string): boolean { - const looksLikeGlob = - /[*?[{]/.test(scope) || scope.includes('!(') || scope.includes('@(') || scope.includes('+(') - if (looksLikeGlob) { + const scopeUsesStarOrQuestionGlob = /[*?]/.test(scope) + if (scopeUsesStarOrQuestionGlob) { return micromatch.isMatch(filePath, scope, VFS_GLOB_OPTIONS) } return filePath === scope || filePath.startsWith(scope + '/') From f2546783250a88a081d419e98570ac7b3c68b37d Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Thu, 19 Mar 2026 13:35:40 -0700 Subject: [PATCH 5/6] fix open resource --- .../resource-content/resource-content.tsx | 2 +- .../orchestrator/tool-executor/index.ts | 36 ++++++++++++++++--- apps/sim/lib/copilot/vfs/operations.test.ts | 10 ++++++ apps/sim/lib/copilot/vfs/operations.ts | 9 +++-- .../workspace/workspace-file-manager.ts | 26 +++----------- .../workspace/workspace-file-reference.ts | 24 +++++++++++++ 6 files changed, 78 insertions(+), 29 deletions(-) create mode 100644 apps/sim/lib/uploads/contexts/workspace/workspace-file-reference.ts diff --git a/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/components/resource-content/resource-content.tsx b/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/components/resource-content/resource-content.tsx index 461f02f7dd3..f441c97ed4a 100644 --- a/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/components/resource-content/resource-content.tsx +++ b/apps/sim/app/workspace/[workspaceId]/home/components/mothership-view/components/resource-content/resource-content.tsx @@ -260,7 +260,7 @@ function EmbeddedFileActions({ workspaceId, fileId }: EmbeddedFileActionsProps) }, [file]) const handleOpenInFiles = useCallback(() => { - router.push(`/workspace/${workspaceId}/files?fileId=${fileId}`) + router.push(`/workspace/${workspaceId}/files?fileId=${encodeURIComponent(fileId)}`) }, [router, workspaceId, fileId]) return ( diff --git a/apps/sim/lib/copilot/orchestrator/tool-executor/index.ts b/apps/sim/lib/copilot/orchestrator/tool-executor/index.ts index 3617a0321e2..910b3401790 100644 --- a/apps/sim/lib/copilot/orchestrator/tool-executor/index.ts +++ b/apps/sim/lib/copilot/orchestrator/tool-executor/index.ts @@ -16,6 +16,7 @@ import { validateMcpDomain } from '@/lib/mcp/domain-check' import { mcpService } from '@/lib/mcp/service' import { generateMcpServerId } from '@/lib/mcp/utils' import { getAllOAuthServices } from '@/lib/oauth/utils' +import { getWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager' import { deleteCustomTool, getCustomToolById, @@ -24,7 +25,7 @@ import { } from '@/lib/workflows/custom-tools/operations' import { deleteSkill, listSkills, upsertSkills } from '@/lib/workflows/skills/operations' import { getWorkflowById } from '@/lib/workflows/utils' -import { isMcpTool } from '@/executor/constants' +import { isMcpTool, isUuid } from '@/executor/constants' import { executeTool } from '@/tools' import { getTool, resolveToolId } from '@/tools/utils' import { @@ -1029,7 +1030,7 @@ const SIM_WORKFLOW_TOOL_HANDLERS: Record< list: (p, c) => executeVfsList(p, c), // Resource visibility - open_resource: async (p: OpenResourceParams) => { + open_resource: async (p: OpenResourceParams, c: ExecutionContext) => { const validated = validateOpenResourceParams(p) if (!validated.success) { return { success: false, error: validated.error } @@ -1037,7 +1038,34 @@ const SIM_WORKFLOW_TOOL_HANDLERS: Record< const params = validated.params const resourceType = params.type - const resourceId = params.id + let resourceId = params.id + let title: string = resourceType + + if (resourceType === 'file') { + if (!c.workspaceId) { + return { + success: false, + error: + 'Opening a workspace file requires workspace context. Pass the file UUID from files//meta.json.', + } + } + if (!isUuid(params.id)) { + return { + success: false, + error: + 'open_resource for files requires the canonical UUID from files//meta.json (the "id" field). Do not pass VFS paths, display names, or file_ strings.', + } + } + const record = await getWorkspaceFile(c.workspaceId, params.id) + if (!record) { + return { + success: false, + error: `No workspace file with id "${params.id}". Confirm the UUID from meta.json.`, + } + } + resourceId = record.id + title = record.name + } return { success: true, @@ -1046,7 +1074,7 @@ const SIM_WORKFLOW_TOOL_HANDLERS: Record< { type: resourceType as 'workflow' | 'table' | 'knowledgebase' | 'file', id: resourceId, - title: resourceType, + title, }, ], } diff --git a/apps/sim/lib/copilot/vfs/operations.test.ts b/apps/sim/lib/copilot/vfs/operations.test.ts index 4c00264ee3c..add84c29010 100644 --- a/apps/sim/lib/copilot/vfs/operations.test.ts +++ b/apps/sim/lib/copilot/vfs/operations.test.ts @@ -81,6 +81,16 @@ describe('grep', () => { expect(lineOnly).toHaveLength(0) }) + it('treats trailing slash on directory scope like grep (files/ matches files/foo)', () => { + const files = vfsFromEntries([ + ['files/TEST BOY.md/meta.json', '"name": "TEST BOY.md"'], + ['workflows/x', 'TEST BOY'], + ]) + const hits = grep(files, 'TEST BOY', 'files/', { outputMode: 'files_with_matches' }) + expect(hits).toContain('files/TEST BOY.md/meta.json') + expect(hits).not.toContain('workflows/x') + }) + it('scopes to directory prefix without matching unrelated prefixes', () => { const files = vfsFromEntries([ ['workflows/a/x', 'needle'], diff --git a/apps/sim/lib/copilot/vfs/operations.ts b/apps/sim/lib/copilot/vfs/operations.ts index e488264de0d..f1704ab81a9 100644 --- a/apps/sim/lib/copilot/vfs/operations.ts +++ b/apps/sim/lib/copilot/vfs/operations.ts @@ -57,14 +57,19 @@ function splitLinesForGrep(content: string): string[] { * Returns true when `filePath` is `scope` or a descendant path (`scope/...`). If `scope` contains * `*` or `?`, filters with micromatch `isMatch` and {@link VFS_GLOB_OPTIONS}. Other characters * (including `[`, `{`, spaces) use directory-prefix logic so literal VFS path segments are not - * parsed as glob syntax. + * parsed as glob syntax. Trailing slashes are stripped so `files/` and `files` both scope under + * `files/...`. */ function pathWithinGrepScope(filePath: string, scope: string): boolean { const scopeUsesStarOrQuestionGlob = /[*?]/.test(scope) if (scopeUsesStarOrQuestionGlob) { return micromatch.isMatch(filePath, scope, VFS_GLOB_OPTIONS) } - return filePath === scope || filePath.startsWith(scope + '/') + const base = scope.replace(/\/+$/, '') + if (base === '') { + return true + } + return filePath === base || filePath.startsWith(`${base}/`) } /** diff --git a/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts b/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts index ba90521e9a8..5ab874ead48 100644 --- a/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts +++ b/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts @@ -13,6 +13,9 @@ import { incrementStorageUsage, } from '@/lib/billing/storage' import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment' +import { normalizeWorkspaceFileReference } from '@/lib/uploads/contexts/workspace/workspace-file-reference' + +export { normalizeWorkspaceFileReference } import { downloadFile, hasCloudStorage, @@ -331,30 +334,9 @@ export async function listWorkspaceFiles( } } -/** - * Normalize a workspace file reference to its display name. - * Supports raw names and VFS-style paths like `files/name`, `files/name/content`, - * and `files/name/meta.json`. - */ -export function normalizeWorkspaceFileReference(fileReference: string): string { - const trimmed = fileReference.trim().replace(/^\/+/, '') - - if (trimmed.startsWith('files/')) { - const withoutPrefix = trimmed.slice('files/'.length) - if (withoutPrefix.endsWith('/meta.json')) { - return withoutPrefix.slice(0, -'/meta.json'.length) - } - if (withoutPrefix.endsWith('/content')) { - return withoutPrefix.slice(0, -'/content'.length) - } - return withoutPrefix - } - - return trimmed -} - /** * Find a workspace file record in an existing list from either its id or a VFS/name reference. + * For copilot `open_resource` and the resource panel, use {@link getWorkspaceFile} with a UUID only. */ export function findWorkspaceFileRecord( files: WorkspaceFileRecord[], diff --git a/apps/sim/lib/uploads/contexts/workspace/workspace-file-reference.ts b/apps/sim/lib/uploads/contexts/workspace/workspace-file-reference.ts new file mode 100644 index 00000000000..e37895c5f70 --- /dev/null +++ b/apps/sim/lib/uploads/contexts/workspace/workspace-file-reference.ts @@ -0,0 +1,24 @@ +/** + * Normalize a workspace file reference to its display name. + * Supports raw names and VFS-style paths like `files/name`, `files/name/content`, + * and `files/name/meta.json`. + * + * Used by storage resolution (`findWorkspaceFileRecord`), not by `open_resource`, which + * requires the canonical database UUID only. + */ +export function normalizeWorkspaceFileReference(fileReference: string): string { + const trimmed = fileReference.trim().replace(/^\/+/, '') + + if (trimmed.startsWith('files/')) { + const withoutPrefix = trimmed.slice('files/'.length) + if (withoutPrefix.endsWith('/meta.json')) { + return withoutPrefix.slice(0, -'/meta.json'.length) + } + if (withoutPrefix.endsWith('/content')) { + return withoutPrefix.slice(0, -'/content'.length) + } + return withoutPrefix + } + + return trimmed +} From 0fb04fbbb011bbc2e6d0b3533d30d8f8dc66a14c Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Thu, 19 Mar 2026 13:37:57 -0700 Subject: [PATCH 6/6] consolidate files --- .../workspace/workspace-file-manager.ts | 28 +++++++++++++++++-- .../workspace/workspace-file-reference.ts | 24 ---------------- 2 files changed, 25 insertions(+), 27 deletions(-) delete mode 100644 apps/sim/lib/uploads/contexts/workspace/workspace-file-reference.ts diff --git a/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts b/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts index 5ab874ead48..540b6e9b434 100644 --- a/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts +++ b/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts @@ -13,9 +13,6 @@ import { incrementStorageUsage, } from '@/lib/billing/storage' import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment' -import { normalizeWorkspaceFileReference } from '@/lib/uploads/contexts/workspace/workspace-file-reference' - -export { normalizeWorkspaceFileReference } import { downloadFile, hasCloudStorage, @@ -334,6 +331,31 @@ export async function listWorkspaceFiles( } } +/** + * Normalize a workspace file reference to its display name. + * Supports raw names and VFS-style paths like `files/name`, `files/name/content`, + * and `files/name/meta.json`. + * + * Used by storage resolution (`findWorkspaceFileRecord`), not by `open_resource`, which + * requires the canonical database UUID only. + */ +export function normalizeWorkspaceFileReference(fileReference: string): string { + const trimmed = fileReference.trim().replace(/^\/+/, '') + + if (trimmed.startsWith('files/')) { + const withoutPrefix = trimmed.slice('files/'.length) + if (withoutPrefix.endsWith('/meta.json')) { + return withoutPrefix.slice(0, -'/meta.json'.length) + } + if (withoutPrefix.endsWith('/content')) { + return withoutPrefix.slice(0, -'/content'.length) + } + return withoutPrefix + } + + return trimmed +} + /** * Find a workspace file record in an existing list from either its id or a VFS/name reference. * For copilot `open_resource` and the resource panel, use {@link getWorkspaceFile} with a UUID only. diff --git a/apps/sim/lib/uploads/contexts/workspace/workspace-file-reference.ts b/apps/sim/lib/uploads/contexts/workspace/workspace-file-reference.ts deleted file mode 100644 index e37895c5f70..00000000000 --- a/apps/sim/lib/uploads/contexts/workspace/workspace-file-reference.ts +++ /dev/null @@ -1,24 +0,0 @@ -/** - * Normalize a workspace file reference to its display name. - * Supports raw names and VFS-style paths like `files/name`, `files/name/content`, - * and `files/name/meta.json`. - * - * Used by storage resolution (`findWorkspaceFileRecord`), not by `open_resource`, which - * requires the canonical database UUID only. - */ -export function normalizeWorkspaceFileReference(fileReference: string): string { - const trimmed = fileReference.trim().replace(/^\/+/, '') - - if (trimmed.startsWith('files/')) { - const withoutPrefix = trimmed.slice('files/'.length) - if (withoutPrefix.endsWith('/meta.json')) { - return withoutPrefix.slice(0, -'/meta.json'.length) - } - if (withoutPrefix.endsWith('/content')) { - return withoutPrefix.slice(0, -'/content'.length) - } - return withoutPrefix - } - - return trimmed -}