From dcdda607e5b9a645c7fdeb092eb91e04e39bb356 Mon Sep 17 00:00:00 2001 From: brandonkachen Date: Fri, 12 Dec 2025 16:02:11 -0800 Subject: [PATCH 001/885] partial Revert "fix: use bunx --bun for drizzle-kit to ensure .env.local loading" This partially reverts commit 9365583c7e7374c75c6540e389d98c5f005a31b3. --- packages/internal/knowledge.md | 24 ------------------------ packages/internal/package.json | 6 +++--- 2 files changed, 3 insertions(+), 27 deletions(-) delete mode 100644 packages/internal/knowledge.md diff --git a/packages/internal/knowledge.md b/packages/internal/knowledge.md deleted file mode 100644 index b51080049f..0000000000 --- a/packages/internal/knowledge.md +++ /dev/null @@ -1,24 +0,0 @@ -# @codebuff/internal - -This package contains internal utilities, database schema, and shared server-side code. - -## Database Commands - -### Why `bunx --bun` for Drizzle Commands - -All drizzle-kit scripts in `package.json` use `bunx --bun` instead of calling `drizzle-kit` directly: - -```json -"db:generate": "bunx --bun drizzle-kit generate --config=./src/db/drizzle.config.ts", -"db:migrate": "bunx --bun drizzle-kit push --config=./src/db/drizzle.config.ts", -"db:studio": "bunx --bun drizzle-kit studio --config=./src/db/drizzle.config.ts" -``` - -**Why this is necessary:** - -1. `drizzle-kit` runs via Node.js (not Bun) by default - it has `#!/usr/bin/env node` in its shebang -2. Node.js does NOT auto-load `.env.local` files like Bun does -3. Without `--bun`, drizzle-kit won't have access to environment variables like `DATABASE_URL` -4. The `--bun` flag forces the command to run via Bun's runtime, which properly loads `.env.local` - -**If you add new drizzle-kit scripts**, always use `bunx --bun drizzle-kit ...` to ensure environment variables are available. diff --git a/packages/internal/package.json b/packages/internal/package.json index 3ac48c004c..c7dffa79ea 100644 --- a/packages/internal/package.json +++ b/packages/internal/package.json @@ -38,10 +38,10 @@ "scripts": { "typecheck": "tsc --noEmit -p .", "test": "bun test", - "db:generate": "bunx --bun drizzle-kit generate --config=./src/db/drizzle.config.ts", - "db:migrate": "bunx --bun drizzle-kit push --config=./src/db/drizzle.config.ts", + "db:generate": "drizzle-kit generate --config=./src/db/drizzle.config.ts", + "db:migrate": "drizzle-kit push --config=./src/db/drizzle.config.ts", "db:start": "docker compose -f ./src/db/docker-compose.yml up --wait && bun run db:generate && (timeout 1 || sleep 1) && bun run db:migrate", - "db:studio": "bunx --bun drizzle-kit studio --config=./src/db/drizzle.config.ts" + "db:studio": "drizzle-kit studio --config=./src/db/drizzle.config.ts" }, "sideEffects": false, "engines": { From 6643e80abb011ed30d11e793f1a22d65d0f25e4d Mon Sep 17 00:00:00 2001 From: brandonkachen Date: Fri, 12 Dec 2025 16:07:38 -0800 Subject: [PATCH 002/885] fix(cli): use DI pattern in fetchUsageData test instead of modifying process.env --- cli/src/hooks/__tests__/use-usage-query.test.ts | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cli/src/hooks/__tests__/use-usage-query.test.ts b/cli/src/hooks/__tests__/use-usage-query.test.ts index 58273e0b3a..83d191709e 100644 --- a/cli/src/hooks/__tests__/use-usage-query.test.ts +++ b/cli/src/hooks/__tests__/use-usage-query.test.ts @@ -11,6 +11,8 @@ import { } from 'bun:test' import React from 'react' +import type { ClientEnv } from '@codebuff/common/types/contracts/env' + import { useChatStore } from '../../state/chat-store' import * as authModule from '../../utils/auth' import { @@ -65,11 +67,12 @@ describe('fetchUsageData', () => { }) test('should throw error when app URL is not set', async () => { - delete process.env.NEXT_PUBLIC_CODEBUFF_APP_URL - - await expect(fetchUsageData({ authToken: 'test-token' })).rejects.toThrow( - 'NEXT_PUBLIC_CODEBUFF_APP_URL is not set', - ) + await expect( + fetchUsageData({ + authToken: 'test-token', + clientEnv: { NEXT_PUBLIC_CODEBUFF_APP_URL: undefined } as ClientEnv, + }), + ).rejects.toThrow('NEXT_PUBLIC_CODEBUFF_APP_URL is not set') }) }) From bb2ca2dd0840ee021f9de1b4852c40ef29142061 Mon Sep 17 00:00:00 2001 From: brandonkachen Date: Fri, 12 Dec 2025 16:25:51 -0800 Subject: [PATCH 003/885] fix(env): use --env-file flags to pass env vars to script subprocesses Bun's automatic .env loading doesn't pass env vars to subprocesses like drizzle-kit. This fixes the issue by explicitly passing --env-file flags. Also updates init-worktree to: - Explicitly create new branches from HEAD to get latest tooling - Merge base branch into existing branches to get latest tooling --- .bin/bun | 43 ++++++++++++++++++++++++++++++---------- scripts/init-worktree.ts | 31 ++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 12 deletions(-) diff --git a/.bin/bun b/.bin/bun index 83536b7c72..253ecd840e 100755 --- a/.bin/bun +++ b/.bin/bun @@ -1,17 +1,20 @@ #!/usr/bin/env bash # Bun wrapper that syncs secrets from Infisical to .env.local -# Bun natively loads .env files in this order (highest precedence last): -# 1. .env -# 2. .env.development (or .env.production/.env.test based on NODE_ENV) -# 3. .env.local -# 4. .env.development.local - Worktree-specific overrides (ports, etc.) # -# .env.development.local has highest precedence, so worktree port overrides -# correctly override values from Infisical-synced .env.local. +# Why this wrapper exists: +# Bun's automatic .env loading doesn't pass env vars to script subprocesses +# (e.g., drizzle-kit, tsx, etc.). This wrapper explicitly passes --env-file +# flags to ensure env vars are available everywhere. # -# This wrapper ensures .env.local is up-to-date from Infisical before running bun. -# If Infisical is not set up, it falls back to using existing .env.local if present. +# Env file precedence (later files override earlier ones): +# 1. .env.local - Main secrets from Infisical +# 2. .env.development.local - Worktree-specific overrides (ports, etc.) +# +# This wrapper: +# 1. Syncs .env.local from Infisical (with caching) +# 2. Creates symlinks in subdirectories for tools that need local .env files +# 3. Passes --env-file flags to bun so subprocesses inherit env vars # Common bun installation paths to check BUN_PATHS=( @@ -202,11 +205,29 @@ check_env_setup() { echo "" >&2 } +# Build env file arguments for bun +# Bun's automatic .env loading doesn't pass vars to script subprocesses (like drizzle-kit), +# so we explicitly pass --env-file flags to ensure env vars are available everywhere. +build_env_file_args() { + ENV_FILE_ARGS="" + + # Add .env.local if it exists + if [ -f "$ENV_LOCAL_FILE" ]; then + ENV_FILE_ARGS="--env-file=$ENV_LOCAL_FILE" + fi + + # Add .env.development.local if it exists (higher precedence for worktree overrides) + if [ -f "$ENV_DEVELOPMENT_LOCAL_FILE" ]; then + ENV_FILE_ARGS="$ENV_FILE_ARGS --env-file=$ENV_DEVELOPMENT_LOCAL_FILE" + fi +} + run_bun() { create_env_symlinks check_env_setup - # Bun natively loads .env files and gives them precedence over inherited shell vars - exec "$REAL_BUN" "$@" + build_env_file_args + # Use --env-file to ensure env vars are passed to script subprocesses + exec "$REAL_BUN" $ENV_FILE_ARGS "$@" } # Main logic diff --git a/scripts/init-worktree.ts b/scripts/init-worktree.ts index b923ff4af4..6a7f6ca24e 100644 --- a/scripts/init-worktree.ts +++ b/scripts/init-worktree.ts @@ -206,6 +206,15 @@ async function checkGitBranchExists(branchName: string): Promise { } } +async function getCurrentBranch(): Promise { + const proc = Bun.spawn(['git', 'rev-parse', '--abbrev-ref', 'HEAD'], { + stdout: 'pipe', + stderr: 'pipe', + }) + const output = await new Response(proc.stdout).text() + return output.trim() || 'main' +} + function createEnvDevelopmentLocalFile( worktreePath: string, args: WorktreeArgs, @@ -387,14 +396,33 @@ async function main(): Promise { console.log(`Location: ${worktreePath}`) // Create the git worktree (with or without creating new branch) + // Explicitly use HEAD to ensure worktree has latest tooling (.bin/bun, etc.) + const baseBranch = await getCurrentBranch() const worktreeAddArgs = ['worktree', 'add', worktreePath] if (branchExists) { + // Branch exists - check it out worktreeAddArgs.push(args.name) } else { - worktreeAddArgs.push('-b', args.name) + // New branch - explicitly create from HEAD to get latest tooling + worktreeAddArgs.push('-b', args.name, 'HEAD') } await runCommand('git', worktreeAddArgs) + // If branch already existed, merge in the base branch to get latest tooling + if (branchExists) { + console.log(`Merging ${baseBranch} into ${args.name} to get latest tooling...`) + const mergeResult = await runCommand( + 'git', + ['merge', baseBranch, '--no-edit', '-m', `Merge ${baseBranch} to get latest tooling`], + worktreePath, + ) + if (mergeResult.exitCode !== 0) { + console.warn( + `Warning: Merge had conflicts. Please resolve them manually in the worktree.`, + ) + } + } + console.log('Setting up worktree environment...') console.log(`Backend port: ${args.backendPort}`) console.log(`Web port: ${args.webPort}`) @@ -416,6 +444,7 @@ async function main(): Promise { console.log(`✅ Worktree '${args.name}' created and set up successfully!`) console.log(`📁 Location: ${worktreePath}`) + console.log(`🌿 Based on: ${baseBranch} (HEAD)`) console.log(`🚀 You can now cd into the worktree and start working:`) console.log(` cd ${worktreePath}`) console.log(``) From 0bc0ed89efe956403c64242076e4bdb7ec350e56 Mon Sep 17 00:00:00 2001 From: brandonkachen Date: Fri, 12 Dec 2025 16:55:46 -0800 Subject: [PATCH 004/885] Fix direnv allow prompt in init-worktree script. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve handling of direnv .envrc file authorization to ensure proper setup during worktree initialization. 🤖 Generated with Codebuff Co-Authored-By: Codebuff --- scripts/init-worktree.ts | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/scripts/init-worktree.ts b/scripts/init-worktree.ts index 6a7f6ca24e..6e42d7f4eb 100644 --- a/scripts/init-worktree.ts +++ b/scripts/init-worktree.ts @@ -343,11 +343,32 @@ async function runDirenvAllow(worktreePath: string): Promise { const envrcPath = join(worktreePath, '.envrc') if (existsSync(envrcPath)) { console.log('Running direnv allow...') - try { - await runCommand('direnv', ['allow'], worktreePath) - } catch (error) { - console.warn('Failed to run direnv allow:', error) - } + return new Promise((resolve) => { + // Use bash -c with explicit cd to ensure direnv sees the correct directory context + // Just using cwd option doesn't work reliably with direnv + const proc = spawn('bash', ['-c', `cd '${worktreePath}' && direnv allow`], { + stdio: 'inherit', + shell: false, + }) + + proc.on('close', (code) => { + if (code === 0) { + console.log('direnv allow completed successfully') + } else { + console.warn(`direnv allow exited with code ${code}`) + } + resolve() + }) + + proc.on('error', (error) => { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') { + console.warn('bash not found, skipping direnv allow') + } else { + console.warn('Failed to run direnv allow:', error.message) + } + resolve() + }) + }) } else { console.log('No .envrc found, skipping direnv allow') } From a8cd5f2bdc5ac0ae851cecf9c495580c017f37fb Mon Sep 17 00:00:00 2001 From: brandonkachen Date: Fri, 12 Dec 2025 16:58:09 -0800 Subject: [PATCH 005/885] fix: typecheck --- cli/src/hooks/__tests__/use-usage-query.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/hooks/__tests__/use-usage-query.test.ts b/cli/src/hooks/__tests__/use-usage-query.test.ts index 83d191709e..cb91228ea1 100644 --- a/cli/src/hooks/__tests__/use-usage-query.test.ts +++ b/cli/src/hooks/__tests__/use-usage-query.test.ts @@ -70,7 +70,7 @@ describe('fetchUsageData', () => { await expect( fetchUsageData({ authToken: 'test-token', - clientEnv: { NEXT_PUBLIC_CODEBUFF_APP_URL: undefined } as ClientEnv, + clientEnv: { NEXT_PUBLIC_CODEBUFF_APP_URL: undefined } as unknown as ClientEnv, }), ).rejects.toThrow('NEXT_PUBLIC_CODEBUFF_APP_URL is not set') }) From dd37a85665793c024bd00f28f720eedbc747d27a Mon Sep 17 00:00:00 2001 From: brandonkachen Date: Fri, 12 Dec 2025 19:50:21 -0800 Subject: [PATCH 006/885] feat(ask-user): design tweaks --- .../__tests__/multiple-choice-form.test.ts | 429 ++++++++++++++++ .../components/accordion-question.tsx | 141 ++---- .../ask-user/components/question-option.tsx | 48 +- cli/src/components/ask-user/constants.ts | 18 + cli/src/components/ask-user/index.tsx | 464 ++++++++++-------- 5 files changed, 774 insertions(+), 326 deletions(-) create mode 100644 cli/src/components/ask-user/__tests__/multiple-choice-form.test.ts diff --git a/cli/src/components/ask-user/__tests__/multiple-choice-form.test.ts b/cli/src/components/ask-user/__tests__/multiple-choice-form.test.ts new file mode 100644 index 0000000000..20f28b6098 --- /dev/null +++ b/cli/src/components/ask-user/__tests__/multiple-choice-form.test.ts @@ -0,0 +1,429 @@ +/** + * Integration tests for MultipleChoiceForm component logic + * + * NOTE: Due to React 19 + Bun + OpenTUI compatibility issues, we test the component's + * core logic in isolation rather than rendering the full component. + * See cli/knowledge.md for details on the testing constraints. + */ + +import { describe, it, expect } from 'bun:test' + +import { getOptionLabel, OTHER_OPTION_INDEX } from '../constants' + +import type { AccordionAnswer } from '../components/accordion-question' +import type { AskUserOption } from '../constants' + +/** Question type - mirrors AskUserQuestion from chat-store */ +interface TestQuestion { + question: string + options: AskUserOption[] + multiSelect?: boolean +} + +/** + * Format answer for submission (mirrors component logic) + */ +function formatAnswer( + question: TestQuestion, + answer: AccordionAnswer | undefined, +): { question: string; answer: string } { + if (!answer) { + return { question: question.question, answer: 'Skipped' } + } + + const selectedOptions = question.multiSelect + ? Array.from(answer.selectedIndices ?? []) + .map((idx) => getOptionLabel(question.options[idx])) + .filter(Boolean) + : answer.selectedIndex !== undefined + ? [getOptionLabel(question.options[answer.selectedIndex])] + : [] + + const customText = + answer.isOther && (answer.otherText?.trim().length ?? 0) > 0 + ? (answer.otherText ?? '').trim() + : '' + + const parts = customText ? [...selectedOptions, customText] : selectedOptions + if (parts.length === 0) { + return { question: question.question, answer: 'Skipped' } + } + + return { + question: question.question, + answer: question.multiSelect ? parts.join(', ') : parts[0], + } +} + +describe('getOptionLabel', () => { + it('extracts label from string option', () => { + expect(getOptionLabel('Option A')).toBe('Option A') + }) + + it('extracts label from object option', () => { + expect(getOptionLabel({ label: 'Option B', description: 'Description' })).toBe('Option B') + }) + + it('extracts label from object option without description', () => { + expect(getOptionLabel({ label: 'Option C' })).toBe('Option C') + }) + + it('returns empty string for null-ish label', () => { + expect(getOptionLabel({ label: '' })).toBe('') + }) +}) + +describe('formatAnswer', () => { + const singleSelectQuestion: TestQuestion = { + question: 'What is your favorite color?', + options: ['Red', 'Blue', 'Green'], + multiSelect: false, + } + + const multiSelectQuestion: TestQuestion = { + question: 'Select your hobbies', + options: ['Reading', 'Gaming', 'Sports'], + multiSelect: true, + } + + const objectOptionsQuestion: TestQuestion = { + question: 'Choose a plan', + options: [ + { label: 'Basic', description: '$10/month' }, + { label: 'Pro', description: '$20/month' }, + ], + multiSelect: false, + } + + describe('single-select questions', () => { + it('returns Skipped when no answer provided', () => { + const result = formatAnswer(singleSelectQuestion, undefined) + expect(result).toEqual({ + question: 'What is your favorite color?', + answer: 'Skipped', + }) + }) + + it('returns Skipped when answer is empty object', () => { + const result = formatAnswer(singleSelectQuestion, {}) + expect(result).toEqual({ + question: 'What is your favorite color?', + answer: 'Skipped', + }) + }) + + it('returns selected option label', () => { + const answer: AccordionAnswer = { selectedIndex: 1 } + const result = formatAnswer(singleSelectQuestion, answer) + expect(result).toEqual({ + question: 'What is your favorite color?', + answer: 'Blue', + }) + }) + + it('returns first option when index is 0', () => { + const answer: AccordionAnswer = { selectedIndex: 0 } + const result = formatAnswer(singleSelectQuestion, answer) + expect(result).toEqual({ + question: 'What is your favorite color?', + answer: 'Red', + }) + }) + + it('returns custom text when isOther is true', () => { + const answer: AccordionAnswer = { + isOther: true, + otherText: 'Purple', + } + const result = formatAnswer(singleSelectQuestion, answer) + expect(result).toEqual({ + question: 'What is your favorite color?', + answer: 'Purple', + }) + }) + + it('trims whitespace from custom text', () => { + const answer: AccordionAnswer = { + isOther: true, + otherText: ' Purple ', + } + const result = formatAnswer(singleSelectQuestion, answer) + expect(result).toEqual({ + question: 'What is your favorite color?', + answer: 'Purple', + }) + }) + + it('returns Skipped when isOther is true but text is empty', () => { + const answer: AccordionAnswer = { + isOther: true, + otherText: '', + } + const result = formatAnswer(singleSelectQuestion, answer) + expect(result).toEqual({ + question: 'What is your favorite color?', + answer: 'Skipped', + }) + }) + + it('returns Skipped when isOther is true but text is only whitespace', () => { + const answer: AccordionAnswer = { + isOther: true, + otherText: ' ', + } + const result = formatAnswer(singleSelectQuestion, answer) + expect(result).toEqual({ + question: 'What is your favorite color?', + answer: 'Skipped', + }) + }) + }) + + describe('multi-select questions', () => { + it('returns Skipped when no selections', () => { + const answer: AccordionAnswer = { selectedIndices: new Set() } + const result = formatAnswer(multiSelectQuestion, answer) + expect(result).toEqual({ + question: 'Select your hobbies', + answer: 'Skipped', + }) + }) + + it('returns single selection', () => { + const answer: AccordionAnswer = { selectedIndices: new Set([0]) } + const result = formatAnswer(multiSelectQuestion, answer) + expect(result).toEqual({ + question: 'Select your hobbies', + answer: 'Reading', + }) + }) + + it('returns multiple selections joined by comma', () => { + const answer: AccordionAnswer = { selectedIndices: new Set([0, 2]) } + const result = formatAnswer(multiSelectQuestion, answer) + expect(result).toEqual({ + question: 'Select your hobbies', + answer: 'Reading, Sports', + }) + }) + + it('returns all selections when all selected', () => { + const answer: AccordionAnswer = { selectedIndices: new Set([0, 1, 2]) } + const result = formatAnswer(multiSelectQuestion, answer) + expect(result).toEqual({ + question: 'Select your hobbies', + answer: 'Reading, Gaming, Sports', + }) + }) + + it('includes custom text with selections', () => { + const answer: AccordionAnswer = { + selectedIndices: new Set([0]), + isOther: true, + otherText: 'Cooking', + } + const result = formatAnswer(multiSelectQuestion, answer) + expect(result).toEqual({ + question: 'Select your hobbies', + answer: 'Reading, Cooking', + }) + }) + + it('returns only custom text when no other selections', () => { + const answer: AccordionAnswer = { + selectedIndices: new Set(), + isOther: true, + otherText: 'Cooking', + } + const result = formatAnswer(multiSelectQuestion, answer) + expect(result).toEqual({ + question: 'Select your hobbies', + answer: 'Cooking', + }) + }) + }) + + describe('object options', () => { + it('extracts label from object options', () => { + const answer: AccordionAnswer = { selectedIndex: 0 } + const result = formatAnswer(objectOptionsQuestion, answer) + expect(result).toEqual({ + question: 'Choose a plan', + answer: 'Basic', + }) + }) + + it('works with second object option', () => { + const answer: AccordionAnswer = { selectedIndex: 1 } + const result = formatAnswer(objectOptionsQuestion, answer) + expect(result).toEqual({ + question: 'Choose a plan', + answer: 'Pro', + }) + }) + }) +}) + +describe('OTHER_OPTION_INDEX constant', () => { + it('is -1 for identifying custom/other option', () => { + expect(OTHER_OPTION_INDEX).toBe(-1) + }) + + it('is distinct from valid option indices', () => { + expect(OTHER_OPTION_INDEX).toBeLessThan(0) + }) +}) + +describe('answer state management patterns', () => { + describe('single-select behavior', () => { + it('selecting an option clears isOther flag', () => { + const previousAnswer: AccordionAnswer = { + isOther: true, + otherText: 'Custom text', + } + + const optionIndex: number = 1 + const isOtherOption = optionIndex === OTHER_OPTION_INDEX + + const newAnswer: AccordionAnswer = isOtherOption + ? { + selectedIndex: undefined, + selectedIndices: undefined, + isOther: true, + otherText: previousAnswer.otherText || '', + } + : { + selectedIndex: optionIndex, + selectedIndices: undefined, + isOther: false, + } + + expect(newAnswer.selectedIndex).toBe(1) + expect(newAnswer.isOther).toBe(false) + }) + + it('selecting OTHER clears selectedIndex and enables isOther', () => { + const previousAnswer: AccordionAnswer = { + selectedIndex: 1, + } + + const optionIndex = OTHER_OPTION_INDEX + const isOtherOption = optionIndex === OTHER_OPTION_INDEX + + const newAnswer: AccordionAnswer = isOtherOption + ? { + selectedIndex: undefined, + selectedIndices: undefined, + isOther: true, + otherText: previousAnswer.otherText || '', + } + : { + selectedIndex: optionIndex, + selectedIndices: undefined, + isOther: false, + } + + expect(newAnswer.selectedIndex).toBeUndefined() + expect(newAnswer.isOther).toBe(true) + }) + }) + + describe('multi-select behavior', () => { + it('toggling adds option to selectedIndices', () => { + const currentAnswer: AccordionAnswer = { + selectedIndices: new Set([0]), + } + + const optionIndex = 2 + const newIndices = new Set(currentAnswer.selectedIndices) + if (newIndices.has(optionIndex)) { + newIndices.delete(optionIndex) + } else { + newIndices.add(optionIndex) + } + + expect(newIndices.has(0)).toBe(true) + expect(newIndices.has(2)).toBe(true) + expect(newIndices.size).toBe(2) + }) + + it('toggling removes option if already selected', () => { + const currentAnswer: AccordionAnswer = { + selectedIndices: new Set([0, 1, 2]), + } + + const optionIndex = 1 + const newIndices = new Set(currentAnswer.selectedIndices) + if (newIndices.has(optionIndex)) { + newIndices.delete(optionIndex) + } else { + newIndices.add(optionIndex) + } + + expect(newIndices.has(0)).toBe(true) + expect(newIndices.has(1)).toBe(false) + expect(newIndices.has(2)).toBe(true) + expect(newIndices.size).toBe(2) + }) + + it('toggling OTHER toggles isOther flag', () => { + const currentAnswer: AccordionAnswer = { + selectedIndices: new Set([0]), + isOther: false, + } + + const optionIndex = OTHER_OPTION_INDEX + const toggledOtherOn = optionIndex === OTHER_OPTION_INDEX && !currentAnswer.isOther + + expect(toggledOtherOn).toBe(true) + }) + }) +}) + +describe('navigation edge cases', () => { + it('clamps question index to valid range', () => { + const questionsLength = 3 + const focusedQuestionIndex = 5 + + const currentQuestionIndex = Math.min( + Math.max(focusedQuestionIndex, 0), + questionsLength - 1, + ) + + expect(currentQuestionIndex).toBe(2) + }) + + it('clamps negative question index to 0', () => { + const questionsLength = 3 + const focusedQuestionIndex = -1 + + const currentQuestionIndex = Math.min( + Math.max(focusedQuestionIndex, 0), + questionsLength - 1, + ) + + expect(currentQuestionIndex).toBe(0) + }) + + it('clamps option index to valid range', () => { + const optionCount = 4 + const focusedOptionIndex = 10 + + const lastOptionIndex = Math.max(optionCount - 1, 0) + const currentOptionIndex = Math.min( + Math.max(focusedOptionIndex, 0), + lastOptionIndex, + ) + + expect(currentOptionIndex).toBe(3) + }) + + it('handles empty questions array', () => { + const questionsLength = 0 + const expandedIndex = questionsLength > 0 ? 0 : null + const focusedOptionIndex = questionsLength > 0 ? 0 : null + + expect(expandedIndex).toBeNull() + expect(focusedOptionIndex).toBeNull() + }) +}) diff --git a/cli/src/components/ask-user/components/accordion-question.tsx b/cli/src/components/ask-user/components/accordion-question.tsx index 468b7e5498..6172f47cb9 100644 --- a/cli/src/components/ask-user/components/accordion-question.tsx +++ b/cli/src/components/ask-user/components/accordion-question.tsx @@ -9,20 +9,10 @@ import { QuestionOption } from './question-option' import { useTheme } from '../../../hooks/use-theme' import { Button } from '../../button' import { MultilineInput } from '../../multiline-input' -import { SYMBOLS } from '../constants' - -import type { KeyEvent } from '@opentui/core' +import { getOptionLabel, OTHER_OPTION_INDEX, SYMBOLS } from '../constants' import type { AskUserQuestion } from '../../../state/chat-store' -/** Option type - can be string or object with label/description */ -type AskUserOption = string | { label: string; description?: string } - -/** Helper to extract label from an option (handles both string and object formats) */ -const getOptionLabel = (option: AskUserOption): string => { - return typeof option === 'string' ? option : option?.label ?? '' -} - /** Answer state for a single question */ export interface AccordionAnswer { selectedIndex?: number @@ -37,14 +27,12 @@ export interface AccordionQuestionProps { totalQuestions: number answer: AccordionAnswer | undefined isExpanded: boolean - isQuestionFocused: boolean isTypingOther: boolean onToggleExpand: () => void onSelectOption: (optionIndex: number) => void onToggleOption: (optionIndex: number) => void onSetOtherText: (text: string, cursorPosition: number) => void onOtherSubmit: () => void - onOtherCancel: () => void otherCursorPosition: number focusedOptionIndex: number | null onFocusOption: (index: number | null) => void @@ -56,14 +44,12 @@ export const AccordionQuestion: React.FC = ({ totalQuestions, answer, isExpanded, - isQuestionFocused, isTypingOther, onToggleExpand, onSelectOption, onToggleOption, onSetOtherText, onOtherSubmit, - onOtherCancel, otherCursorPosition, focusedOptionIndex, onFocusOption, @@ -71,20 +57,16 @@ export const AccordionQuestion: React.FC = ({ const theme = useTheme() const isMultiSelect = question.multiSelect const showQuestionNumber = totalQuestions > 1 + const questionNumber = questionIndex + 1 + const questionPrefix = showQuestionNumber ? `${questionNumber}. ` : '' + const optionIndent = 2 + questionPrefix.length // Check if question has a valid answer - const isAnswered = (() => { - if (!answer) return false - if (answer.isOther && answer.otherText?.trim()) return true - if ( - isMultiSelect && - answer.selectedIndices && - answer.selectedIndices.size > 0 - ) - return true - if (answer.selectedIndex !== undefined) return true - return false - })() + const isAnswered = + !!answer && + ((answer.isOther && !!answer.otherText?.trim()) || + (isMultiSelect && (answer.selectedIndices?.size ?? 0) > 0) || + answer.selectedIndex !== undefined) // Get display text for the current answer const getAnswerDisplay = (): string => { @@ -119,26 +101,23 @@ export const AccordionQuestion: React.FC = ({ } } - // Question number (1-indexed) - only shown when multiple questions - const questionNumber = questionIndex + 1 const isCustomSelected = answer?.isOther ?? false + const isCustomFocused = focusedOptionIndex === question.options.length || isTypingOther + const selectedFg = theme.name === 'dark' ? '#ffffff' : '#000000' const customSymbol = isMultiSelect - ? isCustomSelected - ? SYMBOLS.CHECKBOX_CHECKED - : SYMBOLS.CHECKBOX_UNCHECKED - : isCustomSelected - ? SYMBOLS.SELECTED - : SYMBOLS.UNSELECTED + ? isCustomSelected ? SYMBOLS.CHECKBOX_CHECKED : SYMBOLS.CHECKBOX_UNCHECKED + : isCustomSelected ? SYMBOLS.SELECTED : SYMBOLS.UNSELECTED + const customFg = isCustomFocused ? '#000000' : isCustomSelected ? selectedFg : theme.muted + const customAttributes = isCustomFocused || isCustomSelected ? TextAttributes.BOLD : undefined return ( - + {/* Question header - always visible */} {/* Text input area when typing Custom */} {isTypingOther && ( - + = ({ onSetOtherText(newText, otherCursorPosition + text.length) } }} - onKeyIntercept={(key: KeyEvent) => { - // Handle Escape/Ctrl+C: first clears text, second deselects option - if (key.name === 'escape' || (key.ctrl && key.name === 'c')) { - if ('preventDefault' in key && typeof key.preventDefault === 'function') { - key.preventDefault() - } - const currentText = answer?.otherText || '' - if (currentText.length > 0) { - // First escape: just clear the text - onSetOtherText('', 0) - } else { - // Second escape (text already empty): deselect the option - onOtherCancel() - } - return true - } - return false - }} focused={true} maxHeight={3} minHeight={1} diff --git a/cli/src/components/ask-user/components/question-option.tsx b/cli/src/components/ask-user/components/question-option.tsx index 79ed8775b0..920ed4a7bc 100644 --- a/cli/src/components/ask-user/components/question-option.tsx +++ b/cli/src/components/ask-user/components/question-option.tsx @@ -11,7 +11,7 @@ import { SYMBOLS } from '../constants' export interface QuestionOptionProps { option: string | { label: string; description?: string } - optionIndex: number + indent: number isSelected: boolean isFocused: boolean isMultiSelect?: boolean @@ -22,6 +22,7 @@ export interface QuestionOptionProps { export const QuestionOption: React.FC = memo( ({ option, + indent, isSelected, isFocused, isMultiSelect = false, @@ -34,14 +35,12 @@ export const QuestionOption: React.FC = memo( const label = typeof option === 'string' ? option : option.label const description = typeof option === 'object' ? option.description : undefined - // Determine symbol based on selection type + const selectedFg = theme.name === 'dark' ? '#ffffff' : '#000000' const symbol = isMultiSelect - ? isSelected - ? SYMBOLS.CHECKBOX_CHECKED - : SYMBOLS.CHECKBOX_UNCHECKED - : isSelected - ? SYMBOLS.SELECTED - : SYMBOLS.UNSELECTED + ? isSelected ? SYMBOLS.CHECKBOX_CHECKED : SYMBOLS.CHECKBOX_UNCHECKED + : isSelected ? SYMBOLS.SELECTED : SYMBOLS.UNSELECTED + const fg = isFocused ? '#000000' : isSelected ? selectedFg : theme.muted + const attributes = isFocused || isSelected ? TextAttributes.BOLD : undefined return ( @@ -478,63 +502,113 @@ export const MultipleChoiceForm: React.FC = ({ totalQuestions={questions.length} answer={answers.get(index)} isExpanded={expandedIndex === index} - isQuestionFocused={focusedQuestionIndex === index && !submitFocused} isTypingOther={isTypingOther && expandedIndex === index} onToggleExpand={() => { - setExpandedIndex(expandedIndex === index ? null : index) + const nextExpandedIndex = expandedIndex === index ? null : index + setExpandedIndex(nextExpandedIndex) setFocusedQuestionIndex(index) setSubmitFocused(false) setIsTypingOther(false) + setFocusedOptionIndex(nextExpandedIndex === null ? null : 0) }} onSelectOption={(optionIndex) => - handleSelectOption(index, optionIndex) + handleSelectOption(index, optionIndex, 'mouse') } onToggleOption={(optionIndex) => handleToggleOption(index, optionIndex) } onSetOtherText={(text, cursorPos) => handleSetOtherText(index, text, cursorPos)} onOtherSubmit={() => handleOtherSubmit(index)} - onOtherCancel={() => handleOtherCancel(index)} otherCursorPosition={otherCursorPositions.get(index) ?? 0} focusedOptionIndex={ - expandedIndex === index ? focusedOptionIndex : null + expandedIndex === index && !submitFocused && showFocusHighlight + ? focusedOptionIndex + : null } - onFocusOption={setFocusedOptionIndex} + onFocusOption={(optionIndex) => { + if (!terminalFocused || isTypingOther) return + if (suppressNextHoverFocusRef.current) { + suppressNextHoverFocusRef.current = false + return + } + setShowFocusHighlight(true) + setSubmitFocused(false) + setFocusedQuestionIndex(index) + if (expandedIndex !== index) { + setExpandedIndex(index) + } + setFocusedOptionIndex(optionIndex) + }} /> ))} - {/* Submit button */} - - + + Submit + + + + + + {KEYBOARD_HINTS.map((hint, idx) => ( + + {hint} + + ))} + ) From bc26f4ec71b449cc18ad02fffd53e2015409a53c Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 13 Dec 2025 20:55:29 -0800 Subject: [PATCH 007/885] Remove indent level which should be 0 (subagents are already in boxes --- cli/src/components/blocks/thinking-block.tsx | 7 ++--- cli/src/components/blocks/tool-branch.tsx | 5 +--- cli/src/components/message-block.tsx | 27 +++----------------- 3 files changed, 6 insertions(+), 33 deletions(-) diff --git a/cli/src/components/blocks/thinking-block.tsx b/cli/src/components/blocks/thinking-block.tsx index 0203427e56..80497ee909 100644 --- a/cli/src/components/blocks/thinking-block.tsx +++ b/cli/src/components/blocks/thinking-block.tsx @@ -8,7 +8,6 @@ interface ThinkingBlockProps { blocks: Extract[] keyPrefix: string startIndex: number - indentLevel: number onToggleCollapsed: (id: string) => void availableWidth: number } @@ -18,7 +17,6 @@ export const ThinkingBlock = memo( blocks, keyPrefix, startIndex, - indentLevel, onToggleCollapsed, availableWidth, }: ThinkingBlockProps) => { @@ -30,8 +28,7 @@ export const ThinkingBlock = memo( const firstBlock = blocks[0] const isCollapsed = firstBlock?.isCollapsed ?? true - const marginLeft = Math.max(0, indentLevel * 2) - const availWidth = Math.max(10, availableWidth - marginLeft - 10) + const availWidth = Math.max(10, availableWidth - 10) const handleToggle = useCallback(() => { onToggleCollapsed(thinkingId) @@ -42,7 +39,7 @@ export const ThinkingBlock = memo( } return ( - + - indentLevel: number keyPrefix: string availableWidth: number streamingAgents: Set @@ -22,7 +21,6 @@ interface ToolBranchProps { export const ToolBranch = memo( ({ toolBlock, - indentLevel, keyPrefix, availableWidth, streamingAgents, @@ -105,9 +103,8 @@ export const ToolBranch = memo( getToolFinishedPreview(commandPreview, lastLine) : '' - const indentationOffset = indentLevel * 2 const agentMarkdownOptions = { - codeBlockWidth: Math.max(10, availableWidth - 12 - indentationOffset), + codeBlockWidth: Math.max(10, availableWidth - 12), palette: { ...markdownPalette, codeTextFg: theme.foreground, diff --git a/cli/src/components/message-block.tsx b/cli/src/components/message-block.tsx index 6c86439659..ce135a01f1 100644 --- a/cli/src/components/message-block.tsx +++ b/cli/src/components/message-block.tsx @@ -1,11 +1,5 @@ - import { TextAttributes } from '@opentui/core' -import React, { - memo, - useCallback, - useState, - type ReactNode, -} from 'react' +import React, { memo, useCallback, useState, type ReactNode } from 'react' import { AgentBranchItem } from './agent-branch-item' import { Button } from './button' @@ -15,7 +9,7 @@ import { MessageFooter } from './message-footer' import { ValidationErrorPopover } from './validation-error-popover' import { useTheme } from '../hooks/use-theme' import { useWhyDidYouUpdateById } from '../hooks/use-why-did-you-update' -import { isTextBlock, isToolBlock , isImageBlock } from '../types/chat' +import { isTextBlock, isToolBlock, isImageBlock } from '../types/chat' import { shouldRenderAsSimpleText } from '../utils/constants' import { isImplementorAgent, @@ -106,9 +100,6 @@ const MessageAttachments = ({ ) } - - - export const MessageBlock: React.FC = ({ messageId, blocks, @@ -379,7 +370,6 @@ const isRenderableTimelineBlock = ( interface AgentBodyProps { agentBlock: Extract - indentLevel: number keyPrefix: string parentIsStreaming: boolean availableWidth: number @@ -394,7 +384,6 @@ interface AgentBodyProps { const AgentBody = memo( ({ agentBlock, - indentLevel, keyPrefix, parentIsStreaming, availableWidth, @@ -443,7 +432,6 @@ const AgentBody = memo( blocks={reasoningBlocks} keyPrefix={keyPrefix} startIndex={start} - indentLevel={indentLevel} onToggleCollapsed={onToggleCollapsed} availableWidth={availableWidth} />, @@ -461,7 +449,7 @@ const AgentBody = memo( ? trimTrailingNewlines(textBlock.content) : textBlock.content.trim() const renderKey = `${keyPrefix}-text-${nestedIdx}` - const markdownOptionsForLevel = getAgentMarkdownOptions(indentLevel) + const markdownOptionsForLevel = getAgentMarkdownOptions(0) const marginTop = textBlock.marginTop ?? 0 const marginBottom = textBlock.marginBottom ?? 0 const explicitColor = textBlock.color @@ -472,7 +460,6 @@ const AgentBody = memo( style={{ wrapMode: 'word', fg: nestedTextColor, - marginLeft: Math.max(0, indentLevel * 2), marginTop, marginBottom, }} @@ -527,7 +514,6 @@ const AgentBody = memo( - indentLevel: number keyPrefix: string availableWidth: number markdownPalette: MarkdownPalette @@ -611,7 +595,6 @@ interface AgentBranchWrapperProps { const AgentBranchWrapper = memo( ({ agentBlock, - indentLevel, keyPrefix, availableWidth, markdownPalette, @@ -816,7 +799,6 @@ const AgentBranchWrapper = memo( > , @@ -1138,7 +1118,6 @@ const BlocksRenderer = memo( Date: Sat, 13 Dec 2025 21:02:02 -0800 Subject: [PATCH 008/885] ask user: Remove keyboard hints for open/close and esc/ctrl-c --- cli/src/components/ask-user/constants.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cli/src/components/ask-user/constants.ts b/cli/src/components/ask-user/constants.ts index e17331b4ca..4765df056a 100644 --- a/cli/src/components/ask-user/constants.ts +++ b/cli/src/components/ask-user/constants.ts @@ -33,8 +33,6 @@ export const getOptionLabel = (option: AskUserOption): string => { export const OTHER_OPTION_INDEX: number = -1 export const KEYBOARD_HINTS = [ - '←→ open/close •', '↑↓ navigate •', - 'Enter select •', - 'Esc/^C skip', + 'Enter select', ] as const From ecead952c2d9ae19fe5d4bab7acbfb39a3c1c4bc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 14 Dec 2025 05:15:43 +0000 Subject: [PATCH 009/885] Bump version to 1.0.550 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index 5623f8b6d5..d1f9bf34b1 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.549", + "version": "1.0.550", "description": "AI coding agent", "license": "MIT", "bin": { From 896b72fdc20738224020b88ae55c3ab2ad266553 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 13 Dec 2025 21:50:42 -0800 Subject: [PATCH 010/885] Block guy who keeps disputing charges --- .../completions/__tests__/completions.test.ts | 38 +++++++++++++++++++ web/src/app/api/v1/chat/completions/_post.ts | 16 ++++++++ 2 files changed, 54 insertions(+) diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index 61e8ffeeaa..6c23803a7e 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -28,6 +28,9 @@ describe('/api/v1/chat/completions POST endpoint', () => { 'test-api-key-no-credits': { id: 'user-no-credits', }, + 'test-api-key-blocked': { + id: '5e5aa538-92c8-4051-b0ec-5f75dbd69767', + }, } const mockGetUserInfoFromApiKey: GetUserInfoFromApiKeyFn = async ({ @@ -348,6 +351,41 @@ describe('/api/v1/chat/completions POST endpoint', () => { }) }) + describe('Blocked users', () => { + it('returns 503 with cryptic error for blocked user IDs', async () => { + const req = new NextRequest( + 'http://localhost:3000/api/v1/chat/completions', + { + method: 'POST', + headers: { Authorization: 'Bearer test-api-key-blocked' }, + body: JSON.stringify({ + stream: true, + codebuff_metadata: { run_id: 'run-123' }, + }), + }, + ) + + const response = await postChatCompletions({ + req, + getUserInfoFromApiKey: mockGetUserInfoFromApiKey, + logger: mockLogger, + trackEvent: mockTrackEvent, + getUserUsageData: mockGetUserUsageData, + getAgentRunFromId: mockGetAgentRunFromId, + fetch: mockFetch, + insertMessageBigquery: mockInsertMessageBigquery, + loggerWithContext: mockLoggerWithContext, + }) + + expect(response.status).toBe(503) + const body = await response.json() + expect(body).toEqual({ + error: 'upstream_timeout', + message: 'Overloaded. Request could not be processed', + }) + }) + }) + describe('Credit validation', () => { it('returns 402 when user has insufficient credits', async () => { const req = new NextRequest( diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index d8e8840a00..c6e8bc6715 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -16,6 +16,14 @@ import { } from '@/llm-api/openrouter' import { extractApiKeyFromHeader } from '@/util/auth' +/** + * User IDs that are blocked from using the chat completions API. + * Returns a cryptic error to avoid revealing the block. + */ +const BLOCKED_USER_IDS: string[] = [ + '5e5aa538-92c8-4051-b0ec-5f75dbd69767', +] + import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics' import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery' import type { GetUserUsageDataFn } from '@codebuff/common/types/contracts/billing' @@ -149,6 +157,14 @@ export async function postChatCompletions(params: { const userId = userInfo.id + // Check if user is blocked. Return fake overloaded error to avoid revealing the block. + if (BLOCKED_USER_IDS.includes(userId)) { + return NextResponse.json( + { error: 'upstream_timeout', message: 'Overloaded. Request could not be processed' }, + { status: 503 }, + ) + } + // Track API request trackEvent({ event: AnalyticsEvent.CHAT_COMPLETIONS_REQUEST, From f4da47aa1cdd429e37294a9bcf0eee41ff5a1c12 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 13 Dec 2025 22:16:27 -0800 Subject: [PATCH 011/885] Mention ask user tool in system prompt --- .agents/base2/base2.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.agents/base2/base2.ts b/.agents/base2/base2.ts index 5dd3f3e5d9..410c8064b0 100644 --- a/.agents/base2/base2.ts +++ b/.agents/base2/base2.ts @@ -85,7 +85,7 @@ export function createBase2( - **Validate assumptions:** Use researchers, file pickers, and the read_files tool to verify assumptions about libraries and APIs before implementing. - **Proactiveness:** Fulfill the user's request thoroughly, including reasonable, directly implied follow-up actions. - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If asked *how* to do something, explain first, don't just do it. -- **Stop and ask for guidance:** You should feel free to stop and ask the user for guidance if you're stuck or don't know what to try next, or need a clarification. +- **Ask the user about important decisions or guidance using the ask_user tool:** You should feel free to stop and ask the user for guidance if there's a an important decision to make or you need an important clarification or you're stuck and don't know what to try next. Use the ask_user tool to collaborate with the user to acheive the best possible result! Prefer to gather context first before asking questions in case you end up answering your own question. - **Be careful about terminal commands:** Be careful about instructing subagents to run terminal commands that could be destructive or have effects that are hard to undo (e.g. git push, git commit, running any scripts -- especially ones that could alter production environments (!), installing packages globally, etc). Don't run any of these effectful commands unless the user explicitly asks you to. - **Do what the user asks:** If the user asks you to do something, even running a risky terminal command, do it. From 9b5da4e04be526d006fa33bccef73acb5a2c623d Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 13 Dec 2025 22:20:10 -0800 Subject: [PATCH 012/885] Fix tools reordering in ui --- .../__tests__/message-block-helpers.test.ts | 52 ++++++++++- cli/src/utils/message-block-helpers.ts | 86 ++++++++++++++----- 2 files changed, 116 insertions(+), 22 deletions(-) diff --git a/cli/src/utils/__tests__/message-block-helpers.test.ts b/cli/src/utils/__tests__/message-block-helpers.test.ts index 80858bdd32..262bd77355 100644 --- a/cli/src/utils/__tests__/message-block-helpers.test.ts +++ b/cli/src/utils/__tests__/message-block-helpers.test.ts @@ -634,7 +634,7 @@ describe('moveSpawnAgentBlock', () => { expect(parent.blocks[0].agentId).toBe('real') }) - test('appends when parent missing', () => { + test('updates in place when parent missing to preserve order', () => { const blocks: ContentBlock[] = [ { type: 'agent', @@ -649,7 +649,55 @@ describe('moveSpawnAgentBlock', () => { { type: 'text', content: 'other' }, ] const result = moveSpawnAgentBlock(blocks, 'temp', 'real', 'missing') - expect(result[result.length - 1]).toMatchObject({ type: 'agent' }) + // Block should stay in its original position (index 0), not move to end + expect(result[0]).toMatchObject({ type: 'agent', agentId: 'real' }) + expect(result[1]).toMatchObject({ type: 'text', content: 'other' }) + }) + + test('preserves block order when multiple agents resolve out of order', () => { + // Simulate spawn_agents creating 3 placeholder blocks in order + const blocks: ContentBlock[] = [ + { + type: 'agent', + agentId: 'toolcall-0', + agentName: 'Agent A', + agentType: 'file-picker', + content: '', + status: 'running', + blocks: [], + initialPrompt: '', + }, + { + type: 'agent', + agentId: 'toolcall-1', + agentName: 'Agent B', + agentType: 'code-searcher', + content: '', + status: 'running', + blocks: [], + initialPrompt: '', + }, + { + type: 'agent', + agentId: 'toolcall-2', + agentName: 'Agent C', + agentType: 'commander', + content: '', + status: 'running', + blocks: [], + initialPrompt: '', + }, + ] + + // Agents resolve in different order: C first, then A, then B + let result = moveSpawnAgentBlock(blocks, 'toolcall-2', 'real-c') + result = moveSpawnAgentBlock(result, 'toolcall-0', 'real-a') + result = moveSpawnAgentBlock(result, 'toolcall-1', 'real-b') + + // Order should be preserved: A, B, C + expect(result[0]).toMatchObject({ agentId: 'real-a' }) + expect(result[1]).toMatchObject({ agentId: 'real-b' }) + expect(result[2]).toMatchObject({ agentId: 'real-c' }) }) }) diff --git a/cli/src/utils/message-block-helpers.ts b/cli/src/utils/message-block-helpers.ts index bd152de6ba..9edd0a6931 100644 --- a/cli/src/utils/message-block-helpers.ts +++ b/cli/src/utils/message-block-helpers.ts @@ -316,6 +316,48 @@ export const nestBlockUnderParent = ( return { blocks: updatedBlocks, parentFound } } +/** + * Checks if a block with the given targetId exists anywhere in the children of the blocks. + */ +const findBlockInChildren = ( + blocks: ContentBlock[], + targetId: string, +): boolean => { + for (const block of blocks) { + if (block.type === 'agent' && block.agentId === targetId) { + return true + } + if (block.type === 'agent' && block.blocks) { + if (findBlockInChildren(block.blocks, targetId)) { + return true + } + } + } + return false +} + +/** + * Checks if a block with the given agentId is already nested under the specified parent. + */ +const checkBlockIsUnderParent = ( + blocks: ContentBlock[], + targetAgentId: string, + parentAgentId: string, +): boolean => { + for (const block of blocks) { + if (block.type === 'agent' && block.agentId === parentAgentId) { + // Found the parent, check if target is anywhere in its children + return findBlockInChildren(block.blocks || [], targetAgentId) + } else if (block.type === 'agent' && block.blocks) { + // Recurse into other agent blocks to find the parent + if (checkBlockIsUnderParent(block.blocks, targetAgentId, parentAgentId)) { + return true + } + } + } + return false +} + /** * Extracts a block with given agentId from nested blocks structure. * Returns the remaining blocks and the extracted block (if found). @@ -356,8 +398,6 @@ export const moveSpawnAgentBlock = ( params?: Record, prompt?: string, ): ContentBlock[] => { - const { remainingBlocks, extractedBlock } = extractBlockById(blocks, tempId) - const updateAgentBlock = (block: ContentBlock): ContentBlock => { if (block.type !== 'agent') { return block @@ -378,28 +418,34 @@ export const moveSpawnAgentBlock = ( return updatedBlock } - if (!extractedBlock) { - return updateBlocksRecursively(blocks, tempId, updateAgentBlock) - } - - if (extractedBlock.type !== 'agent') { - return remainingBlocks - } - - const blockToMove = updateAgentBlock(extractedBlock) - + // If there's a parentId, we need to move the block under the parent. + // First check if the block is already under the correct parent. if (parentId) { - const { blocks: nestedBlocks, parentFound } = nestBlockUnderParent( - remainingBlocks, - parentId, - blockToMove, - ) - if (parentFound) { - return nestedBlocks + const isAlreadyUnderParent = checkBlockIsUnderParent(blocks, tempId, parentId) + if (isAlreadyUnderParent) { + // Block is already under the correct parent, just update it in place + return updateBlocksRecursively(blocks, tempId, updateAgentBlock) + } + + // Block needs to be moved under the parent - extract and nest + const { remainingBlocks, extractedBlock } = extractBlockById(blocks, tempId) + if (extractedBlock && extractedBlock.type === 'agent') { + const blockToMove = updateAgentBlock(extractedBlock) + const { blocks: nestedBlocks, parentFound } = nestBlockUnderParent( + remainingBlocks, + parentId, + blockToMove, + ) + if (parentFound) { + return nestedBlocks + } + // Parent not found, update in place instead of appending to end + return updateBlocksRecursively(blocks, tempId, updateAgentBlock) } } - return [...remainingBlocks, blockToMove] + // No parentId or block not found - just update in place to preserve order + return updateBlocksRecursively(blocks, tempId, updateAgentBlock) } /** From 06ecdd5f1df886a025282360da7705013f43e352 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 13 Dec 2025 23:18:51 -0800 Subject: [PATCH 013/885] Restore clicking a suggestion to actually send it (but preserve your input text!) --- cli/src/chat.tsx | 51 +++++++++---------- .../components/tools/suggest-followups.tsx | 4 +- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/cli/src/chat.tsx b/cli/src/chat.tsx index afbe900e21..8aaf0d0f62 100644 --- a/cli/src/chat.tsx +++ b/cli/src/chat.tsx @@ -115,7 +115,8 @@ export const Chat = ({ // Message pagination - show last N messages with "Load previous" button const MESSAGE_BATCH_SIZE = 15 - const [visibleMessageCount, setVisibleMessageCount] = useState(MESSAGE_BATCH_SIZE) + const [visibleMessageCount, setVisibleMessageCount] = + useState(MESSAGE_BATCH_SIZE) const queryClient = useQueryClient() const [, startUiTransition] = useTransition() @@ -638,17 +639,16 @@ export const Chat = ({ ensureQueueActiveBeforeSubmit() const preserveInput = options?.preserveInputValue === true - const previousInputValue = - preserveInput - ? (() => { - const { - inputValue: text, - cursorPosition, - lastEditDueToNav, - } = useChatStore.getState() - return { text, cursorPosition, lastEditDueToNav } - })() - : null + const previousInputValue = preserveInput + ? (() => { + const { + inputValue: text, + cursorPosition, + lastEditDueToNav, + } = useChatStore.getState() + return { text, cursorPosition, lastEditDueToNav } + })() + : null const preservedPendingImages = preserveInput && useChatStore.getState().pendingImages.length > 0 ? [...useChatStore.getState().pendingImages] @@ -717,11 +717,9 @@ export const Chat = ({ // Mark this followup as clicked (persisted per toolCallId) useChatStore.getState().markFollowupClicked(toolCallId, index) - // Fill the input with the followup prompt so the user can modify it before sending - setInputValue({ - text: prompt, - cursorPosition: prompt.length, - lastEditDueToNav: false, + // Send the followup prompt directly, preserving the user's current input + void onSubmitPrompt(prompt, agentMode, { + preserveInputValue: true, }) } @@ -732,7 +730,7 @@ export const Chat = ({ handleFollowupClick, ) } - }, [setInputValue]) + }, [onSubmitPrompt, agentMode]) // handleSlashItemClick is defined later after feedback/publish stores are available @@ -839,7 +837,12 @@ export const Chat = ({ } } }, - [saveCurrentInput, openFeedbackForMessage, openPublishMode, preSelectAgents], + [ + saveCurrentInput, + openFeedbackForMessage, + openPublishMode, + preSelectAgents, + ], ) // Click handler for slash menu items - executes command immediately @@ -943,12 +946,7 @@ export const Chat = ({ const handleSubmit = useCallback(async () => { const result = await onSubmitPrompt(inputValue, agentMode) handleCommandResult(result) - }, [ - onSubmitPrompt, - inputValue, - agentMode, - handleCommandResult, - ]) + }, [onSubmitPrompt, inputValue, agentMode, handleCommandResult]) const totalMentionMatches = agentMatches.length + fileMatches.length const historyNavUpEnabled = @@ -1256,7 +1254,8 @@ export const Chat = ({ return topLevelMessages.slice(-visibleMessageCount) }, [topLevelMessages, visibleMessageCount]) - const hiddenMessageCount = topLevelMessages.length - visibleTopLevelMessages.length + const hiddenMessageCount = + topLevelMessages.length - visibleTopLevelMessages.length const handleLoadPreviousMessages = useCallback(() => { setVisibleMessageCount((prev) => prev + MESSAGE_BATCH_SIZE) diff --git a/cli/src/components/tools/suggest-followups.tsx b/cli/src/components/tools/suggest-followups.tsx index db1f494a2d..3da44cbf86 100644 --- a/cli/src/components/tools/suggest-followups.tsx +++ b/cli/src/components/tools/suggest-followups.tsx @@ -50,8 +50,8 @@ const FollowupLine = ({ const handleMouseOut = useCallback(() => onHover(null), [onHover]) // Compute effective hover state declaratively - // Show hover effects if actually hovered AND not disabled (clicked items can still be hovered) - const showHoverState = isHovered && !disabled + // Show hover effects if actually hovered AND not disabled AND not already clicked + const showHoverState = isHovered && !disabled && !isClicked const hasLabel = Boolean(followup.label) const displayText = hasLabel ? followup.label : followup.prompt From ace7f65ac30953532b189e156eb2afeca1b4eaba Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 14 Dec 2025 22:17:43 -0800 Subject: [PATCH 014/885] Block another user that disputed a charge --- web/src/app/api/v1/chat/completions/_post.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/web/src/app/api/v1/chat/completions/_post.ts b/web/src/app/api/v1/chat/completions/_post.ts index c6e8bc6715..75f856a03b 100644 --- a/web/src/app/api/v1/chat/completions/_post.ts +++ b/web/src/app/api/v1/chat/completions/_post.ts @@ -22,6 +22,7 @@ import { extractApiKeyFromHeader } from '@/util/auth' */ const BLOCKED_USER_IDS: string[] = [ '5e5aa538-92c8-4051-b0ec-5f75dbd69767', + '5972546e-648d-4da6-991f-17c42b037329', ] import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics' @@ -160,7 +161,10 @@ export async function postChatCompletions(params: { // Check if user is blocked. Return fake overloaded error to avoid revealing the block. if (BLOCKED_USER_IDS.includes(userId)) { return NextResponse.json( - { error: 'upstream_timeout', message: 'Overloaded. Request could not be processed' }, + { + error: 'upstream_timeout', + message: 'Overloaded. Request could not be processed', + }, { status: 503 }, ) } From b217090ceb64b47c3cc8846f7d259f03d0b429a3 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sun, 14 Dec 2025 22:44:22 -0800 Subject: [PATCH 015/885] Gpt 5.2 + propose_str_replace, propose_write_file (#393) --- .agents/base2/base2.ts | 14 +- .agents/context-pruner.ts | 6 + .../editor/best-of-n/best-of-n-selector2.ts | 144 ++++ .../editor/best-of-n/editor-implementor.ts | 2 +- .../best-of-n/editor-implementor2-gpt-5.ts | 7 + .../editor/best-of-n/editor-implementor2.ts | 156 ++++ .../editor/best-of-n/editor-multi-prompt.ts | 28 +- .../editor/best-of-n/editor-multi-prompt2.ts | 251 ++++++ .agents/editor/editor.ts | 4 +- .agents/reviewer/code-reviewer-gpt-5.ts | 2 +- .agents/reviewer/code-reviewer.ts | 2 +- .agents/types/tools.ts | 33 + cli/src/utils/create-run-config.ts | 17 +- cli/src/utils/implementor-helpers.ts | 21 +- common/src/constants/agents.ts | 2 +- common/src/tools/constants.ts | 2 + common/src/tools/list.ts | 4 + .../tools/params/tool/propose-str-replace.ts | 86 +++ .../tools/params/tool/propose-write-file.ts | 71 ++ package.json | 2 +- .../src/__tests__/loop-agent-steps.test.ts | 1 - .../src/__tests__/main-prompt.test.ts | 1 - .../src/__tests__/malformed-tool-call.test.ts | 331 -------- .../prompt-caching-subagents.test.ts | 1 - .../src/__tests__/propose-tools.test.ts | 716 ++++++++++++++++++ .../spawn-agents-message-history.test.ts | 31 +- .../spawn-agents-permissions.test.ts | 22 +- packages/agent-runtime/src/run-agent-step.ts | 1 + .../src/run-programmatic-step.ts | 3 + .../agent-runtime/src/tools/handlers/list.ts | 4 + .../handlers/tool/propose-str-replace.ts | 108 +++ .../tools/handlers/tool/propose-write-file.ts | 87 +++ .../handlers/tool/proposed-content-store.ts | 64 ++ .../tools/handlers/tool/spawn-agent-inline.ts | 17 +- .../tools/handlers/tool/spawn-agent-utils.ts | 97 ++- .../src/tools/handlers/tool/spawn-agents.ts | 11 +- .../src/tools/handlers/tool/write-file.ts | 2 +- .../agent-runtime/src/tools/stream-parser.ts | 7 - .../agent-runtime/src/tools/tool-executor.ts | 110 +-- sdk/src/run.ts | 1 - 40 files changed, 2005 insertions(+), 464 deletions(-) create mode 100644 .agents/editor/best-of-n/best-of-n-selector2.ts create mode 100644 .agents/editor/best-of-n/editor-implementor2-gpt-5.ts create mode 100644 .agents/editor/best-of-n/editor-implementor2.ts create mode 100644 .agents/editor/best-of-n/editor-multi-prompt2.ts create mode 100644 common/src/tools/params/tool/propose-str-replace.ts create mode 100644 common/src/tools/params/tool/propose-write-file.ts delete mode 100644 packages/agent-runtime/src/__tests__/malformed-tool-call.test.ts create mode 100644 packages/agent-runtime/src/__tests__/propose-tools.test.ts create mode 100644 packages/agent-runtime/src/tools/handlers/tool/propose-str-replace.ts create mode 100644 packages/agent-runtime/src/tools/handlers/tool/propose-write-file.ts create mode 100644 packages/agent-runtime/src/tools/handlers/tool/proposed-content-store.ts diff --git a/.agents/base2/base2.ts b/.agents/base2/base2.ts index 410c8064b0..0f6582f90c 100644 --- a/.agents/base2/base2.ts +++ b/.agents/base2/base2.ts @@ -54,6 +54,8 @@ export function createBase2( !isFast && 'suggest_followups', 'str_replace', 'write_file', + 'propose_str_replace', + 'propose_write_file', 'ask_user', 'set_output', ), @@ -68,7 +70,7 @@ export function createBase2( isDefault && 'thinker', isLite && 'editor-gpt-5', isDefault && 'editor', - isMax && 'editor-multi-prompt', + isMax && 'editor-multi-prompt2', isMax && 'thinker-best-of-n-opus', !isLite && 'code-reviewer', 'context-pruner', @@ -127,7 +129,7 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u (isDefault || isMax) && `- Spawn the ${isDefault ? 'thinker' : 'thinker-best-of-n-opus'} after gathering context to solve complex problems or when the user asks you to think about a problem.`, isMax && - `- Spawn the editor-multi-prompt agent to implement the changes after you have gathered all the context you need. You must spawn this agent for non-trivial changes, since it writes much better code than you would with the str_replace or write_file tools. Don't spawn the editor in parallel with context-gathering agents.`, + `- IMPORTANT: You must spawn the editor-multi-prompt2 agent to implement the changes after you have gathered all the context you need. You must spawn this agent for non-trivial changes, since it writes much better code than you would with the str_replace or write_file tools. Don't spawn the editor in parallel with context-gathering agents.`, '- Spawn commanders sequentially if the second command depends on the the first.', !isFast && !isLite && @@ -181,7 +183,7 @@ ${ ? '[ You implement the changes using the str_replace or write_file tools ]' : isLite ? '[ You implement the changes using the editor-gpt-5 agent ]' - : '[ You implement the changes using the editor-multi-prompt agent ]' + : '[ You implement the changes using the editor-multi-prompt2 agent ]' } ${ @@ -291,6 +293,8 @@ ${buildArray( EXPLORE_PROMPT, isMax && `- Important: Read as many files as could possibly be relevant to the task over several steps to improve your understanding of the user's request and produce the best possible code changes. Find more examples within the codebase similar to the user's request, dependencies that help with understanding how things work, tests, etc. This is frequently 12-20 files, depending on the task.`, + isMax && + 'If needed, use the ask_user tool to ask the user for clarification on their request or alternate implementation strategies. It is good to get context on the codebase before asking questions so you can ask informed questions.', (isDefault || isMax) && `- For any task requiring 3+ steps, use the write_todos tool to write out your step-by-step implementation plan. Include ALL of the applicable tasks in the list.${isFast ? '' : ' You should include a step to review the changes after you have implemented the changes.'}:${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'} You may be able to do reviewing and validation in parallel in the same step. Skip write_todos for simple tasks like quick edits or answering questions.`, isDefault && @@ -300,7 +304,7 @@ ${buildArray( isDefault && '- IMPORTANT: You must spawn the editor agent to implement the changes after you have gathered all the context you need. This agent will do the best job of implementing the changes so you must spawn it for all non-trivial changes. Do not pass any prompt or params to the editor agent when spawning it. It will make its own best choices of what to do.', isMax && - `- IMPORTANT: You must spawn the editor-multi-prompt agent to implement non-trivial code changes, since it will generate the best code changes from multiple implementation proposals. This is the best way to make high quality code changes -- strongly prefer using this agent over the str_replace or write_file tools, unless the change is very straightforward and obvious.`, + `- IMPORTANT: You must spawn the editor-multi-prompt2 agent to implement non-trivial code changes, since it will generate the best code changes from multiple implementation proposals. This is the best way to make high quality code changes -- strongly prefer using this agent over the str_replace or write_file tools, unless the change is very straightforward and obvious.`, isFast && '- Implement the changes using the str_replace or write_file tools. Implement all the changes in one go.', isFast && @@ -334,7 +338,7 @@ function buildImplementationStepPrompt({ isMax && `Keep working until the user's request is completely satisfied${!hasNoValidation ? ' and validated' : ''}, or until you require more information from the user.`, isMax && - `You must spawn the 'editor-multi-prompt' agent to implement code changes, since it will generate the best code changes.`, + `You must spawn the 'editor-multi-prompt2' agent to implement code changes, since it will generate the best code changes.`, (isDefault || isMax) && 'Spawn code-reviewer to review the changes after you have implemented the changes and in parallel with typechecking or testing.', `After completing the user request, summarize your changes in a sentence${isFast ? '' : ' or a few short bullet points'}.${isSonnet ? " Don't create any summary markdown files or example documentation files, unless asked by the user." : ''} Don't repeat yourself, especially if you have already concluded and summarized the changes in a previous step -- just end your turn.`, diff --git a/.agents/context-pruner.ts b/.agents/context-pruner.ts index 3628a1006d..e7d29a7bbe 100644 --- a/.agents/context-pruner.ts +++ b/.agents/context-pruner.ts @@ -172,6 +172,12 @@ const definition: AgentDefinition = { if (lastInstructionsPromptIndex !== -1) { currentMessages.splice(lastInstructionsPromptIndex, 1) } + const lastSubagentSpawnIndex = currentMessages.findLastIndex((message) => + message.tags?.includes('SUBAGENT_SPAWN'), + ) + if (lastSubagentSpawnIndex !== -1) { + currentMessages.splice(lastSubagentSpawnIndex, 1) + } // Initial check - if already under limit, return const initialTokens = countMessagesTokens(currentMessages) diff --git a/.agents/editor/best-of-n/best-of-n-selector2.ts b/.agents/editor/best-of-n/best-of-n-selector2.ts new file mode 100644 index 0000000000..2b7c52ead1 --- /dev/null +++ b/.agents/editor/best-of-n/best-of-n-selector2.ts @@ -0,0 +1,144 @@ +import { + PLACEHOLDER, + type SecretAgentDefinition, +} from '../../types/secret-agent-definition' +import { publisher } from '../../constants' + +export const createBestOfNSelector2 = (options: { + model: 'sonnet' | 'opus' | 'gpt-5' +}): Omit => { + const { model } = options + const isSonnet = model === 'sonnet' + const isOpus = model === 'opus' + const isGpt5 = model === 'gpt-5' + return { + publisher, + model: isSonnet + ? 'anthropic/claude-sonnet-4.5' + : isOpus + ? 'anthropic/claude-opus-4.5' + : 'openai/gpt-5.2', + ...(isGpt5 && { + reasoningOptions: { + effort: 'high', + }, + }), + displayName: isGpt5 + ? 'Best-of-N GPT-5 Diff Selector' + : isOpus + ? 'Best-of-N Opus Diff Selector' + : 'Best-of-N Sonnet Diff Selector', + spawnerPrompt: + 'Analyzes multiple implementation proposals (as unified diffs) and selects the best one', + + includeMessageHistory: true, + inheritParentSystemPrompt: true, + + toolNames: ['set_output'], + spawnableAgents: [], + + inputSchema: { + params: { + type: 'object', + properties: { + implementations: { + type: 'array', + items: { + type: 'object', + properties: { + id: { type: 'string' }, + strategy: { type: 'string' }, + content: { type: 'string', description: 'Unified diff of the proposed changes' }, + }, + required: ['id', 'content'], + }, + }, + }, + required: ['implementations'], + }, + }, + outputMode: 'structured_output', + outputSchema: { + type: 'object', + properties: { + implementationId: { + type: 'string', + description: 'The id of the chosen implementation', + }, + reason: { + type: 'string', + description: + 'An extremely short (1 sentence) description of why this implementation was chosen', + }, + suggestedImprovements: { + type: 'string', + description: + 'A summary of suggested improvements from non-chosen implementations that could enhance the selected implementation. You can also include any new ideas you have to improve upon the selected implementation. Leave empty if no valuable improvements were found.', + }, + }, + required: ['implementationId', 'reason', 'suggestedImprovements'], + }, + + instructionsPrompt: `As part of the best-of-n workflow of agents, you are the implementation selector agent. + +## Task Instructions + +You have been provided with multiple implementation proposals via params. Each implementation shows a UNIFIED DIFF of the proposed changes. + +The implementations are available in the params.implementations array, where each has: +- id: A unique identifier for the implementation (A, B, C, etc.) +- strategy: The strategy/approach used for this implementation +- content: The unified diff showing what would change + +Your task is to: +1. Analyze each implementation's diff carefully, compare them against the original user requirements +2. Select the best implementation +3. Identify the best ideas/techniques from the NON-CHOSEN implementations that could improve the selected implementation + +Evaluate each based on (in order of importance): +- Correctness and completeness in fulfilling the user's request +- Simplicity and maintainability +- Code quality and adherence to project conventions +- Proper reuse of existing code (helper functions, libraries, etc.) +- Minimal changes to existing code (fewer files changed, fewer lines changed) +- Clarity and readability + +## Analyzing Non-Chosen Implementations + +After selecting the best implementation, look at each non-chosen implementation and identify any valuable aspects that could enhance the selected implementation. These might include: +- More elegant code patterns or abstractions +- Simplified logic or reuse of existing code +- Additional edge case handling +- Better naming or organization +- Useful comments or documentation +- Additional features that align with the user's request + +Only include improvements that are genuinely valuable and compatible with the selected implementation. If a non-chosen implementation has no useful improvements to offer, don't include it. + +## User Request + +For context, here is the original user request again: + +${PLACEHOLDER.USER_INPUT_PROMPT} + + +Try to select an implementation that fulfills all the requirements in the user's request. + +## Response Format + +${ + isSonnet || isOpus + ? `Use tags to write out your thoughts about the implementations as needed to pick the best implementation. IMPORTANT: You should think really really hard to make sure you pick the absolute best implementation! Also analyze the non-chosen implementations for any valuable techniques or approaches that could improve the selected one. + +Then, do not write any other explanations AT ALL. You should directly output a single tool call to set_output with the selected implementationId, short reason, and suggestedImprovements array.` + : `Output a single tool call to set_output with the selected implementationId, reason, and suggestedImprovements. Do not write anything else.` +}`, + } +} + +const definition: SecretAgentDefinition = { + ...createBestOfNSelector2({ model: 'opus' }), + id: 'best-of-n-selector2', +} + +export default definition diff --git a/.agents/editor/best-of-n/editor-implementor.ts b/.agents/editor/best-of-n/editor-implementor.ts index f2f225bbdc..522110a6a3 100644 --- a/.agents/editor/best-of-n/editor-implementor.ts +++ b/.agents/editor/best-of-n/editor-implementor.ts @@ -19,7 +19,7 @@ export const createBestOfNImplementor = (options: { ? 'anthropic/claude-opus-4.5' : isGemini ? 'google/gemini-3-pro-preview' - : 'openai/gpt-5.1', + : 'openai/gpt-5.2', displayName: 'Implementation Generator', spawnerPrompt: 'Generates a complete implementation plan with all code changes', diff --git a/.agents/editor/best-of-n/editor-implementor2-gpt-5.ts b/.agents/editor/best-of-n/editor-implementor2-gpt-5.ts new file mode 100644 index 0000000000..460057bf97 --- /dev/null +++ b/.agents/editor/best-of-n/editor-implementor2-gpt-5.ts @@ -0,0 +1,7 @@ +import { createBestOfNImplementor2 } from './editor-implementor2' + +const definition = { + ...createBestOfNImplementor2({ model: 'gpt-5' }), + id: 'editor-implementor2-gpt-5', +} +export default definition diff --git a/.agents/editor/best-of-n/editor-implementor2.ts b/.agents/editor/best-of-n/editor-implementor2.ts new file mode 100644 index 0000000000..8456ed2e13 --- /dev/null +++ b/.agents/editor/best-of-n/editor-implementor2.ts @@ -0,0 +1,156 @@ +import { publisher } from '../../constants' + +import type { SecretAgentDefinition } from '../../types/secret-agent-definition' + +export const createBestOfNImplementor2 = (options: { + model: 'gpt-5' | 'opus' | 'sonnet' +}): Omit => { + const { model } = options + const isGpt5 = model === 'gpt-5' + const isOpus = model === 'opus' + return { + publisher, + model: isGpt5 + ? 'openai/gpt-5.2' + : isOpus + ? 'anthropic/claude-opus-4.5' + : 'anthropic/claude-sonnet-4.5', + displayName: isGpt5 + ? 'GPT-5 Implementation Generator v2' + : isOpus + ? 'Opus Implementation Generator v2' + : 'Sonnet Implementation Generator v2', + spawnerPrompt: + 'Generates a complete implementation using propose_* tools that draft changes without applying them', + + includeMessageHistory: true, + inheritParentSystemPrompt: true, + + toolNames: ['propose_write_file', 'propose_str_replace'], + spawnableAgents: [], + + inputSchema: {}, + outputMode: 'structured_output', + + instructionsPrompt: `You are an expert code editor with deep understanding of software engineering principles. You were spawned to generate an implementation for the user's request. + +Your task is to write out ALL the code changes needed to complete the user's request. + +IMPORTANT: Use propose_str_replace and propose_write_file tools to make your edits. These tools draft changes without actually applying them - they will be reviewed first. + +You can make multiple tool calls across multiple steps to complete the implementation. + +After your edit tool calls, you can optionally mention any follow-up steps to take, like deleting a file, or a specific way to validate the changes. + +Your implementation should: +- Be complete and comprehensive +- Include all necessary changes to fulfill the user's request +- Follow the project's conventions and patterns +- Be as simple and maintainable as possible +- Reuse existing code wherever possible +- Be well-structured and organized + +More style notes: +- Extra try/catch blocks clutter the code -- use them sparingly. +- Optional arguments are code smell and worse than required arguments. +- New components often should be added to a new file, not added to an existing file. + +Write out your complete implementation now.`, + + handleSteps: function* ({ agentState: initialAgentState }) { + const initialMessageHistoryLength = + initialAgentState.messageHistory.length + + // Helper to check if a message is empty (no tool calls and empty/no text) + const isEmptyAssistantMessage = (message: any): boolean => { + if (message.role !== 'assistant' || !Array.isArray(message.content)) { + return false + } + const hasToolCalls = message.content.some( + (part: any) => part.type === 'tool-call', + ) + if (hasToolCalls) { + return false + } + // Check if all text parts are empty or there are no text parts + const textParts = message.content.filter( + (part: any) => part.type === 'text', + ) + if (textParts.length === 0) { + return true + } + return textParts.every((part: any) => !part.text || !part.text.trim()) + } + + const { agentState } = yield 'STEP_ALL' + + let postMessages = agentState.messageHistory.slice( + initialMessageHistoryLength, + ) + + // Retry if no messages or if the only message is empty (no tool calls and empty text) + if (postMessages.length === 0) { + const { agentState: postMessagesAgentState } = yield 'STEP_ALL' + postMessages = postMessagesAgentState.messageHistory.slice( + initialMessageHistoryLength, + ) + } else if ( + postMessages.length === 1 && + isEmptyAssistantMessage(postMessages[0]) + ) { + const { agentState: postMessagesAgentState } = yield 'STEP_ALL' + postMessages = postMessagesAgentState.messageHistory.slice( + initialMessageHistoryLength, + ) + } + + // Extract tool calls from assistant messages + // Handle both 'input' and 'args' property names for compatibility + const toolCalls: { toolName: string; input: any }[] = [] + for (const message of postMessages) { + if (message.role !== 'assistant' || !Array.isArray(message.content)) + continue + for (const part of message.content) { + if (part.type === 'tool-call') { + toolCalls.push({ + toolName: part.toolName, + input: part.input ?? (part as any).args ?? {}, + }) + } + } + } + + // Extract tool results (unified diffs) from tool messages + const toolResults: any[] = [] + for (const message of postMessages) { + if (message.role !== 'tool' || !Array.isArray(message.content)) continue + for (const part of message.content) { + if (part.type === 'json' && part.value) { + toolResults.push(part.value) + } + } + } + + // Concatenate all unified diffs for the selector to review + const unifiedDiffs = toolResults + .filter((result: any) => result.unifiedDiff) + .map((result: any) => `--- ${result.file} ---\n${result.unifiedDiff}`) + .join('\n\n') + + yield { + toolName: 'set_output', + input: { + toolCalls, + toolResults, + unifiedDiffs, + }, + includeToolCall: false, + } + }, + } +} +const definition = { + ...createBestOfNImplementor2({ model: 'opus' }), + id: 'editor-implementor2', +} +export default definition diff --git a/.agents/editor/best-of-n/editor-multi-prompt.ts b/.agents/editor/best-of-n/editor-multi-prompt.ts index e76a3abea3..873d751e3a 100644 --- a/.agents/editor/best-of-n/editor-multi-prompt.ts +++ b/.agents/editor/best-of-n/editor-multi-prompt.ts @@ -29,7 +29,11 @@ export function createMultiPromptEditor(): Omit { 'set_messages', 'set_output', ], - spawnableAgents: ['best-of-n-selector-opus', 'editor-implementor-opus'], + spawnableAgents: [ + 'best-of-n-selector-opus', + 'editor-implementor-opus', + 'editor-implementor-gpt-5', + ], inputSchema: { params: { @@ -92,10 +96,16 @@ function* handleStepsMultiPrompt({ } satisfies ToolCall<'set_messages'> // Spawn one opus implementor per prompt - const implementorAgents = prompts.map((prompt) => ({ - agent_type: 'editor-implementor-opus', - prompt: `Strategy: ${prompt}`, - })) + const implementorAgents: { agent_type: string; prompt?: string }[] = + prompts.map((prompt) => ({ + agent_type: 'editor-implementor-opus', + prompt: `Strategy: ${prompt}`, + })) + + // Always spawn an additional gpt-5 implementor with no prompt + implementorAgents.push({ + agent_type: 'editor-implementor-gpt-5', + }) // Spawn all implementor agents const { toolResult: implementorResults } = yield { @@ -111,16 +121,12 @@ function* handleStepsMultiPrompt({ implementorResults, ) as any[] - logger.info( - { implementorResults, spawnedImplementations, prompts }, - 'spawnedImplementations', - ) - // Extract all the implementations from the results const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + const strategies = [...prompts, prompts[0]] const implementations = spawnedImplementations.map((result, index) => ({ id: letters[index], - strategy: prompts[index], + strategy: strategies[index], content: 'errorMessage' in result ? `Error: ${result.errorMessage}` diff --git a/.agents/editor/best-of-n/editor-multi-prompt2.ts b/.agents/editor/best-of-n/editor-multi-prompt2.ts new file mode 100644 index 0000000000..1921c3b760 --- /dev/null +++ b/.agents/editor/best-of-n/editor-multi-prompt2.ts @@ -0,0 +1,251 @@ +import { publisher } from '../../constants' + +import type { AgentStepContext, ToolCall } from '../../types/agent-definition' +import type { SecretAgentDefinition } from '../../types/secret-agent-definition' + +/** + * Creates a multi-prompt editor agent that spawns one implementor per prompt. + * Each prompt specifies a slightly different implementation strategy/approach. + */ +export function createMultiPromptEditor(): Omit { + return { + publisher, + model: 'anthropic/claude-opus-4.5', + displayName: 'Multi-Prompt Editor', + spawnerPrompt: + 'Edits code by spawning multiple implementor agents with different strategy prompts, selects the best implementation, and applies the changes. It also returns further suggested improvements which you should take seriously and act on. Pass as input an array of short prompts specifying different implementation approaches or strategies. Make sure to read any files intended to be edited before spawning this agent.', + + includeMessageHistory: true, + inheritParentSystemPrompt: true, + + toolNames: [ + 'spawn_agents', + 'str_replace', + 'write_file', + 'set_messages', + 'set_output', + ], + spawnableAgents: [ + 'best-of-n-selector2', + 'editor-implementor2', + 'editor-implementor2-gpt-5', + ], + + inputSchema: { + params: { + type: 'object', + properties: { + prompts: { + type: 'array', + items: { type: 'string' }, + description: + 'Array of short prompts, each specifying a slightly different implementation strategy or approach. Example: ["use a cache for the data", "don\t cache anything", "make the minimal possible changes", "modularize your solution by creating new files"]', + }, + }, + required: ['prompts'], + }, + }, + outputMode: 'structured_output', + + handleSteps: handleStepsMultiPrompt, + } +} + +function* handleStepsMultiPrompt({ + agentState, + params, +}: AgentStepContext): ReturnType< + NonNullable +> { + const prompts = (params?.prompts as string[] | undefined) ?? [] + + if (prompts.length === 0) { + yield { + toolName: 'set_output', + input: { + error: 'No prompts provided. Please pass an array of strategy prompts.', + }, + } satisfies ToolCall<'set_output'> + return + } + + // Only keep messages up to just before the last user role message (skips input prompt, instructions prompt). + const { messageHistory: initialMessageHistory } = agentState + let userMessageIndex = initialMessageHistory.length + + while (userMessageIndex > 0) { + const message = initialMessageHistory[userMessageIndex - 1] + if (message.role === 'user') { + userMessageIndex-- + } else { + break + } + } + const updatedMessageHistory = initialMessageHistory.slice(0, userMessageIndex) + yield { + toolName: 'set_messages', + input: { + messages: updatedMessageHistory, + }, + includeToolCall: false, + } satisfies ToolCall<'set_messages'> + + // Spawn one implementor2 per prompt (uses propose_* tools) + const implementorAgents: { agent_type: string; prompt?: string }[] = + prompts.map((prompt) => ({ + agent_type: 'editor-implementor2', + prompt: `Strategy: ${prompt}`, + })) + + // Always spawn an additional gpt-5 implementor first with no prompt + implementorAgents.unshift({ + agent_type: 'editor-implementor2-gpt-5', + }) + + // Spawn all implementor agents + const { toolResult: implementorResults } = yield { + toolName: 'spawn_agents', + input: { + agents: implementorAgents, + }, + includeToolCall: false, + } + + // Extract spawn results - each is structured output with { toolCalls, toolResults, unifiedDiffs } + const spawnedImplementations = extractSpawnResults<{ + toolCalls: { toolName: string; input: any }[] + toolResults: any[] + unifiedDiffs: string + }>(implementorResults) + + // Build implementations for selector using the unified diffs + const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + const strategies = [...prompts, 'default'] + const implementations = spawnedImplementations.map((result, index) => { + if (!result || 'errorMessage' in result) { + return { + id: letters[index], + strategy: strategies[index] ?? 'unknown', + content: `Error: ${result?.errorMessage ?? 'Unknown error'}`, + toolCalls: [], + } + } + + return { + id: letters[index], + strategy: strategies[index] ?? 'unknown', + content: result.unifiedDiffs ?? 'No changes proposed', + toolCalls: result.toolCalls ?? [], + } + }) + + // Spawn selector with implementations (showing unified diffs for review) + const { toolResult: selectorResult } = yield { + toolName: 'spawn_agents', + input: { + agents: [ + { + agent_type: 'best-of-n-selector2', + params: { + implementations: implementations.map((impl) => ({ + id: impl.id, + strategy: impl.strategy, + content: impl.content, + })), + }, + }, + ], + }, + includeToolCall: false, + } satisfies ToolCall<'spawn_agents'> + + const selectorOutput = extractSpawnResults<{ + implementationId: string + reason: string + suggestedImprovements: string + }>(selectorResult)[0] + + if (!selectorOutput || !selectorOutput.implementationId) { + yield { + toolName: 'set_output', + input: { error: 'Selector failed to return an implementation' }, + } satisfies ToolCall<'set_output'> + return + } + + const { implementationId } = selectorOutput + const chosenImplementation = implementations.find( + (implementation) => implementation.id === implementationId, + ) + + if (!chosenImplementation) { + yield { + toolName: 'set_output', + input: { + error: `Failed to find chosen implementation: ${implementationId}`, + }, + } satisfies ToolCall<'set_output'> + return + } + + // Apply the chosen implementation's tool calls as real edits + const appliedToolResults: any[] = [] + for (const toolCall of chosenImplementation.toolCalls) { + // Convert propose_* tool calls to real edit tool calls + const realToolName = + toolCall.toolName === 'propose_str_replace' + ? 'str_replace' + : toolCall.toolName === 'propose_write_file' + ? 'write_file' + : toolCall.toolName + + if (realToolName === 'str_replace' || realToolName === 'write_file') { + const { toolResult } = yield { + toolName: realToolName, + input: toolCall.input, + includeToolCall: true, + } satisfies ToolCall<'str_replace'> | ToolCall<'write_file'> + + appliedToolResults.push(toolResult) + } + } + + // Extract suggested improvements from selector output + const { suggestedImprovements } = selectorOutput + + // Set output with the applied results and suggested improvements + yield { + toolName: 'set_output', + input: { + chosenStrategy: chosenImplementation.strategy, + toolResults: appliedToolResults, + suggestedImprovements, + }, + includeToolCall: false, + } satisfies ToolCall<'set_output'> + + /** + * Extracts the array of subagent results from spawn_agents tool output. + */ + function extractSpawnResults(results: any[] | undefined): T[] { + if (!results || results.length === 0) return [] + + const jsonResult = results.find((r) => r.type === 'json') + if (!jsonResult?.value) return [] + + const spawnedResults = Array.isArray(jsonResult.value) + ? jsonResult.value + : [jsonResult.value] + + return spawnedResults + .map((result: any) => result?.value) + .map((result: any) => ('value' in result ? result.value : result)) + .filter(Boolean) + } +} + +const definition = { + ...createMultiPromptEditor(), + id: 'editor-multi-prompt2', +} +export default definition diff --git a/.agents/editor/editor.ts b/.agents/editor/editor.ts index 99d03721f8..a0197828a3 100644 --- a/.agents/editor/editor.ts +++ b/.agents/editor/editor.ts @@ -9,7 +9,7 @@ export const createCodeEditor = (options: { publisher, model: options.model === 'gpt-5' - ? 'openai/gpt-5.1' + ? 'openai/gpt-5.2' : 'anthropic/claude-opus-4.5', displayName: 'Code Editor', spawnerPrompt: @@ -20,7 +20,7 @@ export const createCodeEditor = (options: { includeMessageHistory: true, inheritParentSystemPrompt: true, - instructionsPrompt: `You are an expert code editor with deep understanding of software engineering principles. You were spawned to generate an implementation for the user's request. + instructionsPrompt: `You are an expert code editor with deep understanding of software engineering principles. You were spawned to generate an implementation for the user's request. Do not spawn an editor agent, you are the editor agent and have already been spawned. Your task is to write out ALL the code changes needed to complete the user's request in a single comprehensive response. diff --git a/.agents/reviewer/code-reviewer-gpt-5.ts b/.agents/reviewer/code-reviewer-gpt-5.ts index 5d045d4b74..dcd97403da 100644 --- a/.agents/reviewer/code-reviewer-gpt-5.ts +++ b/.agents/reviewer/code-reviewer-gpt-5.ts @@ -4,7 +4,7 @@ import type { SecretAgentDefinition } from '../types/secret-agent-definition' const definition: SecretAgentDefinition = { ...codeReviewer, id: 'code-reviewer-gpt-5', - model: 'openai/gpt-5.1', + model: 'openai/gpt-5.2', } export default definition diff --git a/.agents/reviewer/code-reviewer.ts b/.agents/reviewer/code-reviewer.ts index 3115d51d6d..5cbb7bc6b6 100644 --- a/.agents/reviewer/code-reviewer.ts +++ b/.agents/reviewer/code-reviewer.ts @@ -25,7 +25,7 @@ export const createReviewer = ( inheritParentSystemPrompt: true, includeMessageHistory: true, - instructionsPrompt: `You are a subagent that reviews code changes. Do not use any tools. For reference, here is the original user request: + instructionsPrompt: `You are a subagent that reviews code changes and gives helpful critical feedback. Do not use any tools. For reference, here is the original user request: ${PLACEHOLDER.USER_INPUT_PROMPT} diff --git a/.agents/types/tools.ts b/.agents/types/tools.ts index 4d47cc8c4c..2c14b6e383 100644 --- a/.agents/types/tools.ts +++ b/.agents/types/tools.ts @@ -10,6 +10,8 @@ export type ToolName = | 'glob' | 'list_directory' | 'lookup_agent_info' + | 'propose_str_replace' + | 'propose_write_file' | 'read_docs' | 'read_files' | 'read_subtree' @@ -38,6 +40,8 @@ export interface ToolParamsMap { glob: GlobParams list_directory: ListDirectoryParams lookup_agent_info: LookupAgentInfoParams + propose_str_replace: ProposeStrReplaceParams + propose_write_file: ProposeWriteFileParams read_docs: ReadDocsParams read_files: ReadFilesParams read_subtree: ReadSubtreeParams @@ -149,6 +153,35 @@ export interface LookupAgentInfoParams { agentId: string } +/** + * Propose string replacements in a file without actually applying them. + */ +export interface ProposeStrReplaceParams { + /** The path to the file to edit. */ + path: string + /** Array of replacements to make. */ + replacements: { + /** The string to replace. This must be an *exact match* of the string you want to replace, including whitespace and punctuation. */ + old: string + /** The string to replace the corresponding old string with. Can be empty to delete. */ + new: string + /** Whether to allow multiple replacements of old string. */ + allowMultiple?: boolean + }[] +} + +/** + * Propose creating or editing a file without actually applying the changes. + */ +export interface ProposeWriteFileParams { + /** Path to the file relative to the **project root** */ + path: string + /** What the change is intended to do in only one sentence. */ + instructions: string + /** Edit snippet to apply to the file. */ + content: string +} + /** * Fetch up-to-date documentation for libraries and frameworks using Context7 API. */ diff --git a/cli/src/utils/create-run-config.ts b/cli/src/utils/create-run-config.ts index 6b62b4272e..e43100f6d9 100644 --- a/cli/src/utils/create-run-config.ts +++ b/cli/src/utils/create-run-config.ts @@ -4,7 +4,10 @@ import { RETRY_BACKOFF_MAX_DELAY_MS, } from '@codebuff/sdk' -import { createEventHandler, createStreamChunkHandler } from './sdk-event-handlers' +import { + createEventHandler, + createStreamChunkHandler, +} from './sdk-event-handlers' import type { EventHandlerState } from './sdk-event-handlers' import type { AgentDefinition, MessageContent, RunState } from '@codebuff/sdk' @@ -68,15 +71,15 @@ export const createRunConfig = (params: CreateRunConfigParams) => { setIsRetrying(true) setStreamStatus('waiting') }, - onRetryExhausted: async ({ totalAttempts, errorCode }: RetryExhaustedArgs) => { - logger.warn( - { totalAttempts, errorCode }, - 'SDK exhausted all retries', - ) + onRetryExhausted: async ({ + totalAttempts, + errorCode, + }: RetryExhaustedArgs) => { + logger.warn({ totalAttempts, errorCode }, 'SDK exhausted all retries') }, }, agentDefinitions, - maxAgentSteps: 40, + maxAgentSteps: 100, handleStreamChunk: createStreamChunkHandler(eventHandlerState), handleEvent: createEventHandler(eventHandlerState), } diff --git a/cli/src/utils/implementor-helpers.ts b/cli/src/utils/implementor-helpers.ts index a0d91791d4..cd801a1139 100644 --- a/cli/src/utils/implementor-helpers.ts +++ b/cli/src/utils/implementor-helpers.ts @@ -5,6 +5,10 @@ export const IMPLEMENTOR_AGENT_IDS = [ 'editor-implementor-opus', 'editor-implementor-gemini', 'editor-implementor-gpt-5', + 'editor-implementor2', + 'editor-implementor2-opus', + 'editor-implementor2-gpt-5', + 'editor-implementor2-sonnet', ] as const /** @@ -24,12 +28,23 @@ export const getImplementorDisplayName = ( index?: number, ): string => { let baseName = 'Implementor' - if (agentType.includes('editor-implementor-opus')) { + // Check most specific patterns first (editor-implementor2-* with model suffix) + if (agentType.includes('editor-implementor2-gpt-5')) { + baseName = 'GPT-5.2' + } else if (agentType.includes('editor-implementor2-opus')) { baseName = 'Opus' - } else if (agentType.includes('editor-implementor-gemini')) { - baseName = 'Gemini' + } else if (agentType.includes('editor-implementor2-sonnet')) { + baseName = 'Sonnet' + } else if (agentType.includes('editor-implementor2')) { + // Generic editor-implementor2 defaults to Opus + baseName = 'Opus' + // Then check editor-implementor-* patterns (less specific) } else if (agentType.includes('editor-implementor-gpt-5')) { baseName = 'GPT-5' + } else if (agentType.includes('editor-implementor-opus')) { + baseName = 'Opus' + } else if (agentType.includes('editor-implementor-gemini')) { + baseName = 'Gemini' } else if (agentType.includes('editor-implementor')) { baseName = 'Sonnet' } diff --git a/common/src/constants/agents.ts b/common/src/constants/agents.ts index 36aafd3248..01b92e37d4 100644 --- a/common/src/constants/agents.ts +++ b/common/src/constants/agents.ts @@ -92,4 +92,4 @@ export const AGENT_NAME_TO_TYPES = Object.entries(AGENT_NAMES).reduce( {} as Record, ) -export const MAX_AGENT_STEPS_DEFAULT = 25 +export const MAX_AGENT_STEPS_DEFAULT = 100 diff --git a/common/src/tools/constants.ts b/common/src/tools/constants.ts index f03af6e043..bcf3138c06 100644 --- a/common/src/tools/constants.ts +++ b/common/src/tools/constants.ts @@ -31,6 +31,8 @@ export const toolNames = [ 'glob', 'list_directory', 'lookup_agent_info', + 'propose_str_replace', + 'propose_write_file', 'read_docs', 'read_files', 'read_subtree', diff --git a/common/src/tools/list.ts b/common/src/tools/list.ts index 1d1cadaaa9..bc2157b1c5 100644 --- a/common/src/tools/list.ts +++ b/common/src/tools/list.ts @@ -12,6 +12,8 @@ import { findFilesParams } from './params/tool/find-files' import { globParams } from './params/tool/glob' import { listDirectoryParams } from './params/tool/list-directory' import { lookupAgentInfoParams } from './params/tool/lookup-agent-info' +import { proposeStrReplaceParams } from './params/tool/propose-str-replace' +import { proposeWriteFileParams } from './params/tool/propose-write-file' import { readDocsParams } from './params/tool/read-docs' import { readFilesParams } from './params/tool/read-files' import { readSubtreeParams } from './params/tool/read-subtree' @@ -46,6 +48,8 @@ export const toolParams = { glob: globParams, list_directory: listDirectoryParams, lookup_agent_info: lookupAgentInfoParams, + propose_str_replace: proposeStrReplaceParams, + propose_write_file: proposeWriteFileParams, read_docs: readDocsParams, read_files: readFilesParams, read_subtree: readSubtreeParams, diff --git a/common/src/tools/params/tool/propose-str-replace.ts b/common/src/tools/params/tool/propose-str-replace.ts new file mode 100644 index 0000000000..15915e7c34 --- /dev/null +++ b/common/src/tools/params/tool/propose-str-replace.ts @@ -0,0 +1,86 @@ +import z from 'zod/v4' + +import { $getNativeToolCallExampleString, jsonToolResultSchema } from '../utils' + +import type { $ToolParams } from '../../constants' + +export const proposeUpdateFileResultSchema = z.union([ + z.object({ + file: z.string(), + message: z.string(), + unifiedDiff: z.string(), + }), + z.object({ + file: z.string(), + errorMessage: z.string(), + }), +]) + +const toolName = 'propose_str_replace' +const endsAgentStep = false +const inputSchema = z + .object({ + path: z + .string() + .min(1, 'Path cannot be empty') + .describe(`The path to the file to edit.`), + replacements: z + .array( + z + .object({ + old: z + .string() + .min(1, 'Old cannot be empty') + .describe( + `The string to replace. This must be an *exact match* of the string you want to replace, including whitespace and punctuation.`, + ), + new: z + .string() + .describe( + `The string to replace the corresponding old string with. Can be empty to delete.`, + ), + allowMultiple: z + .boolean() + .optional() + .default(false) + .describe( + 'Whether to allow multiple replacements of old string.', + ), + }) + .describe('Pair of old and new strings.'), + ) + .min(1, 'Replacements cannot be empty') + .describe('Array of replacements to make.'), + }) + .describe(`Propose string replacements in a file without actually applying them.`) +const description = ` +Propose edits to a file without actually applying them. Use this tool when you want to draft changes that will be reviewed before being applied. + +This tool works identically to str_replace but the changes are not written to disk. Instead, it returns the unified diff of what would change. Multiple propose calls on the same file will stack correctly. + +Example: +${$getNativeToolCallExampleString({ + toolName, + inputSchema, + input: { + path: 'path/to/file', + replacements: [ + { old: 'This is the old string', new: 'This is the new string' }, + { + old: '\nfoo:', + new: '\nbar:', + allowMultiple: true, + }, + ], + }, + endsAgentStep, +})} + `.trim() + +export const proposeStrReplaceParams = { + toolName, + endsAgentStep, + description, + inputSchema, + outputSchema: jsonToolResultSchema(proposeUpdateFileResultSchema), +} satisfies $ToolParams diff --git a/common/src/tools/params/tool/propose-write-file.ts b/common/src/tools/params/tool/propose-write-file.ts new file mode 100644 index 0000000000..ab30ec0565 --- /dev/null +++ b/common/src/tools/params/tool/propose-write-file.ts @@ -0,0 +1,71 @@ +import z from 'zod/v4' + +import { proposeUpdateFileResultSchema } from './propose-str-replace' +import { $getNativeToolCallExampleString, jsonToolResultSchema } from '../utils' + +import type { $ToolParams } from '../../constants' + +const toolName = 'propose_write_file' +const endsAgentStep = false +const inputSchema = z + .object({ + path: z + .string() + .min(1, 'Path cannot be empty') + .describe(`Path to the file relative to the **project root**`), + instructions: z + .string() + .describe('What the change is intended to do in only one sentence.'), + content: z.string().describe(`Complete file content to write to the file.`), + }) + .describe( + `Propose creating or editing a file without actually applying the changes.`, + ) +const description = ` +Propose creating or editing a file without actually applying the changes. + +This tool works identically to write_file but the changes are not written to disk. Instead, it returns the unified diff of what would change. Each call overwrites the previous call. + +Format the \`content\` parameter with the entire content of the file. + +This tool is to be used in subagents. + +Example - Simple file creation: +${$getNativeToolCallExampleString({ + toolName, + inputSchema, + input: { + path: 'new-file.ts', + instructions: 'Prints Hello, world', + content: 'console.log("Hello, world!");', + }, + endsAgentStep, +})} + +Example - Overwriting a file: +${$getNativeToolCallExampleString({ + toolName, + inputSchema, + input: { + path: 'foo.ts', + instructions: 'Update foo function', + content: `function foo() { + doSomethingNew(); +} + +function bar() { + doSomethingOld(); +} +`, + }, + endsAgentStep, +})} +`.trim() + +export const proposeWriteFileParams = { + toolName, + endsAgentStep, + description, + inputSchema, + outputSchema: jsonToolResultSchema(proposeUpdateFileResultSchema), +} satisfies $ToolParams diff --git a/package.json b/package.json index bac28addb7..d06efd5819 100644 --- a/package.json +++ b/package.json @@ -15,7 +15,7 @@ "cli" ], "scripts": { - "dev": "bun scripts/dev.ts", + "dev": "bun start-cli", "up": "bun scripts/start-services.ts", "start-cli": "bun --cwd cli dev", "start-db": "bun --cwd packages/internal db:start", diff --git a/packages/agent-runtime/src/__tests__/loop-agent-steps.test.ts b/packages/agent-runtime/src/__tests__/loop-agent-steps.test.ts index 8c2fba9406..2015f8f063 100644 --- a/packages/agent-runtime/src/__tests__/loop-agent-steps.test.ts +++ b/packages/agent-runtime/src/__tests__/loop-agent-steps.test.ts @@ -141,7 +141,6 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () => ancestorRunIds: [], onResponseChunk: () => {}, signal: new AbortController().signal, - tools: {}, } }) diff --git a/packages/agent-runtime/src/__tests__/main-prompt.test.ts b/packages/agent-runtime/src/__tests__/main-prompt.test.ts index d60f0a63d7..ab87fcbe13 100644 --- a/packages/agent-runtime/src/__tests__/main-prompt.test.ts +++ b/packages/agent-runtime/src/__tests__/main-prompt.test.ts @@ -98,7 +98,6 @@ describe('mainPrompt', () => { onResponseChunk: () => {}, localAgentTemplates: mockLocalAgentTemplates, signal: new AbortController().signal, - tools: {}, } // Mock analytics and tracing diff --git a/packages/agent-runtime/src/__tests__/malformed-tool-call.test.ts b/packages/agent-runtime/src/__tests__/malformed-tool-call.test.ts deleted file mode 100644 index cf681129bf..0000000000 --- a/packages/agent-runtime/src/__tests__/malformed-tool-call.test.ts +++ /dev/null @@ -1,331 +0,0 @@ -import * as bigquery from '@codebuff/bigquery' -import * as analytics from '@codebuff/common/analytics' -import { TEST_USER_ID } from '@codebuff/common/old-constants' -import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime' -import { getInitialSessionState } from '@codebuff/common/types/session-state' -import * as stringUtils from '@codebuff/common/util/string' -import { - afterEach, - beforeEach, - describe, - expect, - mock, - spyOn, - test, -} from 'bun:test' - -import { createToolCallChunk, mockFileContext } from './test-utils' -import { processStream } from '../tools/stream-parser' - -import type { StreamChunk } from '@codebuff/common/types/contracts/llm' - -import type { AgentTemplate } from '../templates/types' -import type { - AgentRuntimeDeps, - AgentRuntimeScopedDeps, -} from '@codebuff/common/types/contracts/agent-runtime' -import type { ParamsOf } from '@codebuff/common/types/function-params' -import type { - Message, - ToolMessage, -} from '@codebuff/common/types/messages/codebuff-message' - -let agentRuntimeImpl: AgentRuntimeDeps = { ...TEST_AGENT_RUNTIME_IMPL } - -describe('malformed tool call error handling', () => { - let testAgent: AgentTemplate - let agentRuntimeImpl: AgentRuntimeDeps & AgentRuntimeScopedDeps - let defaultParams: ParamsOf - - beforeEach(() => { - agentRuntimeImpl = { ...TEST_AGENT_RUNTIME_IMPL } - - testAgent = { - id: 'test-agent', - displayName: 'Test Agent', - spawnerPrompt: 'Testing malformed tool calls', - model: 'claude-3-5-sonnet-20241022', - inputSchema: {}, - outputMode: 'all_messages' as const, - includeMessageHistory: true, - inheritParentSystemPrompt: false, - mcpServers: {}, - toolNames: ['read_files', 'end_turn'], - spawnableAgents: [], - systemPrompt: 'Test system prompt', - instructionsPrompt: 'Test instructions prompt', - stepPrompt: 'Test agent step prompt', - } - - const sessionState = getInitialSessionState(mockFileContext) - const agentState = sessionState.mainAgentState - - defaultParams = { - ...agentRuntimeImpl, - stream: createMockStream([]), - runId: 'test-run-id', - ancestorRunIds: [], - agentStepId: 'test-step', - clientSessionId: 'test-session', - fingerprintId: 'test-fingerprint', - userInputId: 'test-input', - userId: TEST_USER_ID, - repoId: 'test-repo', - repoUrl: undefined, - agentTemplate: testAgent, - agentState, - localAgentTemplates: { 'test-agent': testAgent }, - fileContext: mockFileContext, - messages: [], - system: 'Test system prompt', - agentContext: {}, - onResponseChunk: mock(() => {}), - onCostCalculated: mock(async () => {}), - fullResponse: '', - prompt: '', - signal: new AbortController().signal, - tools: {}, - } - - // Mock analytics and tracing - spyOn(analytics, 'initAnalytics').mockImplementation(() => {}) - analytics.initAnalytics(TEST_AGENT_RUNTIME_IMPL) - spyOn(analytics, 'trackEvent').mockImplementation(() => {}) - spyOn(bigquery, 'insertTrace').mockImplementation(() => - Promise.resolve(true), - ) - - // Mock websocket actions - agentRuntimeImpl.requestFiles = async () => ({}) - agentRuntimeImpl.requestOptionalFile = async () => null - agentRuntimeImpl.requestToolCall = async () => ({ - output: [ - { - type: 'json', - value: 'Tool call success', - }, - ], - }) - - // Mock LLM APIs - agentRuntimeImpl.promptAiSdk = async function () { - return 'Test response' - } - - // Mock generateCompactId for consistent test results - spyOn(stringUtils, 'generateCompactId').mockReturnValue('test-tool-call-id') - }) - - afterEach(() => { - mock.restore() - agentRuntimeImpl = { ...TEST_AGENT_RUNTIME_IMPL } - }) - - function createMockStream(chunks: StreamChunk[]) { - async function* generator() { - for (const chunk of chunks) { - yield chunk - } - return 'mock-message-id' - } - return generator() - } - - function textChunk(text: string): StreamChunk { - return { type: 'text' as const, text } - } - - test('should add tool result errors to message history after stream completes', async () => { - // With native tools, malformed tool calls are handled at the API level. - // This test now verifies that an unknown tool is properly handled. - const chunks: StreamChunk[] = [ - createToolCallChunk('unknown_tool_xyz', { paths: ['test.ts'] }), - createToolCallChunk('end_turn', {}), - ] - - const stream = createMockStream(chunks) - - await processStream({ - ...defaultParams, - stream, - }) - - // Should have tool result errors in the final message history - const toolMessages: ToolMessage[] = - defaultParams.agentState.messageHistory.filter( - (m: Message) => m.role === 'tool', - ) - - expect(toolMessages.length).toBeGreaterThan(0) - - // Find the error tool result for the unknown tool - const errorToolResult = toolMessages.find( - (m) => - m.content?.[0]?.type === 'json' && - (m.content[0] as any)?.value?.errorMessage, - ) - - expect(errorToolResult).toBeDefined() - expect( - (errorToolResult?.content?.[0] as any)?.value?.errorMessage, - ).toContain('not found') - }) - - test('should handle multiple unknown tool calls', async () => { - const chunks: StreamChunk[] = [ - createToolCallChunk('unknown_tool_1', { param: 'value1' }), - textChunk('Some text between calls'), - createToolCallChunk('unknown_tool_2', { param: 'value2' }), - createToolCallChunk('end_turn', {}), - ] - - const stream = createMockStream(chunks) - - await processStream({ - ...defaultParams, - stream, - }) - - // Should have multiple error tool results - const toolMessages = defaultParams.agentState.messageHistory.filter( - (m: Message) => m.role === 'tool', - ) as ToolMessage[] - - const errorMessages = toolMessages.filter( - (m) => - m.content?.[0]?.type === 'json' && - (m.content[0] as any)?.value?.errorMessage, - ) - - expect(errorMessages.length).toBe(2) - }) - - test('should preserve original toolResults array alongside message history', async () => { - const chunks: StreamChunk[] = [ - createToolCallChunk('unknown_tool_xyz', { param: 'value' }), - createToolCallChunk('end_turn', {}), - ] - - const stream = createMockStream(chunks) - - const result = await processStream({ - ...defaultParams, - stream, - }) - - // Should have error in both toolResults and message history - expect(result.toolResults.length).toBeGreaterThan(0) - - const errorToolResult = result.toolResults.find( - (tr) => - tr.content?.[0]?.type === 'json' && - (tr.content[0] as any)?.value?.errorMessage, - ) - - expect(errorToolResult).toBeDefined() - - const toolMessages = defaultParams.agentState.messageHistory.filter( - (m: Message) => m.role === 'tool', - ) as ToolMessage[] - - expect(toolMessages.length).toBeGreaterThan(0) - }) - - test('should handle unknown tool names and add error to message history', async () => { - const chunks: StreamChunk[] = [ - createToolCallChunk('unknown_tool', { param: 'value' }), - createToolCallChunk('end_turn', {}), - ] - - const stream = createMockStream(chunks) - - await processStream({ - ...defaultParams, - stream, - }) - - const toolMessages = defaultParams.agentState.messageHistory.filter( - (m: Message) => m.role === 'tool', - ) as ToolMessage[] - - const errorMessage = toolMessages.find( - (m) => - m.toolName === 'unknown_tool' && - m.content?.[0]?.type === 'json' && - (m.content[0] as any)?.value?.errorMessage, - ) - - expect(errorMessage).toBeDefined() - expect((errorMessage?.content?.[0] as any)?.value?.errorMessage).toContain( - 'Tool unknown_tool not found', - ) - }) - - test('should not affect valid tool calls in message history', async () => { - const chunks: StreamChunk[] = [ - // Valid tool call - createToolCallChunk('read_files', { paths: ['test.ts'] }), - // Unknown tool call - createToolCallChunk('unknown_tool_xyz', { param: 'value' }), - createToolCallChunk('end_turn', {}), - ] - - const stream = createMockStream(chunks) - - await processStream({ - ...defaultParams, - requestFiles: async ({ filePaths }) => { - return Object.fromEntries( - filePaths.map((path) => [path, `${path} content`]), - ) - }, - stream, - }) - - const toolMessages = defaultParams.agentState.messageHistory.filter( - (m: Message) => m.role === 'tool', - ) as ToolMessage[] - - // Should have both valid and error tool results - const validResults = toolMessages.filter( - (m) => - m.toolName === 'read_files' && - !(m.content?.[0] as any)?.value?.errorMessage, - ) - - const errorResults = toolMessages.filter( - (m) => - m.content?.[0]?.type === 'json' && - (m.content[0] as any)?.value?.errorMessage, - ) - - expect(validResults.length).toBeGreaterThan(0) - expect(errorResults.length).toBeGreaterThan(0) - }) - - test('should handle stream with only unknown tool calls', async () => { - const chunks: StreamChunk[] = [ - createToolCallChunk('unknown_tool_1', { param: 'value1' }), - createToolCallChunk('unknown_tool_2', { param: 'value2' }), - ] - - const stream = createMockStream(chunks) - - await processStream({ - ...defaultParams, - stream, - }) - - const toolMessages = defaultParams.agentState.messageHistory.filter( - (m: Message) => m.role === 'tool', - ) as ToolMessage[] - - expect(toolMessages.length).toBe(2) - toolMessages.forEach((msg) => { - expect(msg.content?.[0]?.type).toBe('json') - expect((msg.content?.[0] as any)?.value?.errorMessage).toContain( - 'not found', - ) - }) - }) -}) diff --git a/packages/agent-runtime/src/__tests__/prompt-caching-subagents.test.ts b/packages/agent-runtime/src/__tests__/prompt-caching-subagents.test.ts index 90cf1e53b7..48e10960f2 100644 --- a/packages/agent-runtime/src/__tests__/prompt-caching-subagents.test.ts +++ b/packages/agent-runtime/src/__tests__/prompt-caching-subagents.test.ts @@ -136,7 +136,6 @@ describe('Prompt Caching for Subagents with inheritParentSystemPrompt', () => { ancestorRunIds: [], onResponseChunk: () => {}, signal: new AbortController().signal, - tools: {}, } }) diff --git a/packages/agent-runtime/src/__tests__/propose-tools.test.ts b/packages/agent-runtime/src/__tests__/propose-tools.test.ts new file mode 100644 index 0000000000..d404b3acbd --- /dev/null +++ b/packages/agent-runtime/src/__tests__/propose-tools.test.ts @@ -0,0 +1,716 @@ +import { TEST_USER_ID } from '@codebuff/common/old-constants' +import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime' +import { getInitialSessionState } from '@codebuff/common/types/session-state' +import { + assistantMessage, + userMessage, +} from '@codebuff/common/util/messages' +import { + afterEach, + beforeEach, + describe, + expect, + it, + mock, + spyOn, +} from 'bun:test' + +import { + clearAgentGeneratorCache, + runProgrammaticStep, +} from '../run-programmatic-step' +import { clearAllProposedContent } from '../tools/handlers/tool/proposed-content-store' +import { mockFileContext } from './test-utils' +import * as toolExecutor from '../tools/tool-executor' + +import type { AgentTemplate, StepGenerator } from '../templates/types' +import type { executeToolCall } from '../tools/tool-executor' +import type { + AgentRuntimeDeps, + AgentRuntimeScopedDeps, +} from '@codebuff/common/types/contracts/agent-runtime' +import type { Logger } from '@codebuff/common/types/contracts/logger' +import type { ParamsOf } from '@codebuff/common/types/function-params' +import type { ToolMessage } from '@codebuff/common/types/messages/codebuff-message' +import type { AgentState } from '@codebuff/common/types/session-state' + +const logger: Logger = { + debug: () => {}, + error: () => {}, + info: () => {}, + warn: () => {}, +} + +/** + * Tests for propose_str_replace and propose_write_file tools. + * These tools allow agents to propose file edits without applying them, + * returning unified diffs instead. This is useful for best-of-n editor patterns + * where multiple implementations are generated and one is selected. + */ +describe('propose_str_replace and propose_write_file tools', () => { + let mockTemplate: AgentTemplate + let mockAgentState: AgentState + let mockParams: ParamsOf + let executeToolCallSpy: ReturnType> + let agentRuntimeImpl: AgentRuntimeDeps & AgentRuntimeScopedDeps + + // Mock file system - maps file paths to their contents + const mockFiles: Record = {} + + beforeEach(() => { + // Reset mock file system + mockFiles['src/utils.ts'] = `export function add(a: number, b: number): number { + return a + b; +} + +export function subtract(a: number, b: number): number { + return a - b; +} +` + mockFiles['src/index.ts'] = `import { add } from './utils'; +console.log(add(1, 2)); +` + + agentRuntimeImpl = { + ...TEST_AGENT_RUNTIME_IMPL, + addAgentStep: async () => 'test-agent-step-id', + sendAction: () => {}, + } + + // Mock executeToolCall to handle propose_* tools + executeToolCallSpy = spyOn( + toolExecutor, + 'executeToolCall', + ).mockImplementation(async (options: ParamsOf) => { + const { toolName, input, toolResults, agentState } = options + + if (toolName === 'propose_str_replace') { + const { path, replacements } = input as { + path: string + replacements: Array<{ old: string; new: string; allowMultiple: boolean }> + } + + // Get current content (from proposed state or mock files) + let content = mockFiles[path] ?? null + + if (content === null) { + const errorResult: ToolMessage = { + role: 'tool', + toolName: 'propose_str_replace', + toolCallId: `${toolName}-call-id`, + content: [{ type: 'json', value: { file: path, errorMessage: `File not found: ${path}` } }], + } + toolResults.push(errorResult) + agentState.messageHistory.push(errorResult) + return + } + + // Apply replacements + const errors: string[] = [] + for (const replacement of replacements) { + if (!content.includes(replacement.old)) { + errors.push(`String not found: "${replacement.old.slice(0, 50)}..."`) + continue + } + if (replacement.allowMultiple) { + content = content.replaceAll(replacement.old, replacement.new) + } else { + content = content.replace(replacement.old, replacement.new) + } + } + + if (errors.length > 0) { + const errorResult: ToolMessage = { + role: 'tool', + toolName: 'propose_str_replace', + toolCallId: `${toolName}-call-id`, + content: [{ type: 'json', value: { file: path, errorMessage: errors.join('; ') } }], + } + toolResults.push(errorResult) + agentState.messageHistory.push(errorResult) + return + } + + // Generate unified diff + const originalContent = mockFiles[path]! + const diff = generateSimpleDiff(path, originalContent, content) + + // Store proposed content for future calls + mockFiles[path] = content + + const successResult: ToolMessage = { + role: 'tool', + toolName: 'propose_str_replace', + toolCallId: `${toolName}-call-id`, + content: [{ + type: 'json', + value: { + file: path, + message: 'Proposed string replacements', + unifiedDiff: diff, + }, + }], + } + toolResults.push(successResult) + agentState.messageHistory.push(successResult) + } else if (toolName === 'propose_write_file') { + const { path, content: newContent } = input as { + path: string + instructions: string + content: string + } + + const originalContent = mockFiles[path] ?? '' + const isNewFile = !(path in mockFiles) + + // Generate unified diff + const diff = generateSimpleDiff(path, originalContent, newContent) + + // Store proposed content + mockFiles[path] = newContent + + const successResult: ToolMessage = { + role: 'tool', + toolName: 'propose_write_file', + toolCallId: `${toolName}-call-id`, + content: [{ + type: 'json', + value: { + file: path, + message: isNewFile ? `Proposed new file ${path}` : `Proposed changes to ${path}`, + unifiedDiff: diff, + }, + }], + } + toolResults.push(successResult) + agentState.messageHistory.push(successResult) + } else if (toolName === 'set_output') { + agentState.output = input + const result: ToolMessage = { + role: 'tool', + toolName: 'set_output', + toolCallId: `${toolName}-call-id`, + content: [{ type: 'json', value: 'Output set successfully' }], + } + toolResults.push(result) + agentState.messageHistory.push(result) + } else if (toolName === 'end_turn') { + // No-op for end_turn + } + }) + + // Mock crypto.randomUUID + spyOn(crypto, 'randomUUID').mockImplementation( + () => 'mock-uuid-0000-0000-0000-000000000000' as `${string}-${string}-${string}-${string}-${string}`, + ) + + // Create mock template for implementor agent + mockTemplate = { + id: 'test-implementor', + displayName: 'Test Implementor', + spawnerPrompt: 'Testing propose tools', + model: 'claude-3-5-sonnet-20241022', + inputSchema: {}, + outputMode: 'structured_output', + includeMessageHistory: true, + inheritParentSystemPrompt: false, + mcpServers: {}, + toolNames: ['propose_str_replace', 'propose_write_file', 'set_output', 'end_turn'], + spawnableAgents: [], + systemPrompt: 'You are a code implementor that proposes changes.', + instructionsPrompt: 'Implement the requested changes using propose_str_replace or propose_write_file.', + stepPrompt: '', + handleSteps: undefined, + } as AgentTemplate + + // Create mock agent state + const sessionState = getInitialSessionState(mockFileContext) + mockAgentState = { + ...sessionState.mainAgentState, + agentId: 'test-implementor-id', + runId: 'test-run-id' as `${string}-${string}-${string}-${string}-${string}`, + messageHistory: [ + userMessage('Add a multiply function to src/utils.ts'), + assistantMessage('I will implement the changes.'), + ], + output: undefined, + directCreditsUsed: 0, + childRunIds: [], + } + + // Create mock params + mockParams = { + ...agentRuntimeImpl, + runId: 'test-run-id', + ancestorRunIds: [], + repoId: undefined, + repoUrl: undefined, + agentState: mockAgentState, + template: mockTemplate, + prompt: 'Add a multiply function to src/utils.ts', + toolCallParams: {}, + userId: TEST_USER_ID, + userInputId: 'test-user-input', + clientSessionId: 'test-session', + fingerprintId: 'test-fingerprint', + onResponseChunk: () => {}, + onCostCalculated: async () => {}, + fileContext: mockFileContext, + localAgentTemplates: {}, + system: 'Test system prompt', + stepsComplete: false, + stepNumber: 1, + tools: {}, + logger, + signal: new AbortController().signal, + } + }) + + afterEach(() => { + mock.restore() + clearAgentGeneratorCache({ logger }) + clearAllProposedContent() + }) + + describe('propose_str_replace', () => { + it('should propose string replacement and return unified diff', async () => { + const toolResultsCapture: any[] = [] + + const mockGenerator = (function* () { + const step = yield { + toolName: 'propose_str_replace', + input: { + path: 'src/utils.ts', + replacements: [{ + old: 'export function subtract(a: number, b: number): number {\n return a - b;\n}', + new: `export function subtract(a: number, b: number): number { + return a - b; +} + +export function multiply(a: number, b: number): number { + return a * b; +}`, + allowMultiple: false, + }], + }, + } + toolResultsCapture.push(step.toolResult) + + const firstResult = step.toolResult?.[0] + const unifiedDiff = firstResult?.type === 'json' ? (firstResult.value as { unifiedDiff?: string })?.unifiedDiff : undefined + yield { + toolName: 'set_output', + input: { + toolCalls: [{ toolName: 'propose_str_replace', input: step }], + toolResults: step.toolResult, + unifiedDiffs: unifiedDiff ?? '', + }, + } + yield { toolName: 'end_turn', input: {} } + })() as StepGenerator + + mockTemplate.handleSteps = () => mockGenerator + + const result = await runProgrammaticStep(mockParams) + + expect(result.endTurn).toBe(true) + expect(executeToolCallSpy).toHaveBeenCalledWith( + expect.objectContaining({ + toolName: 'propose_str_replace', + }), + ) + + // Verify tool result contains unified diff + expect(toolResultsCapture).toHaveLength(1) + const toolResult = toolResultsCapture[0] + expect(toolResult).toBeDefined() + expect(toolResult[0].type).toBe('json') + const jsonResult = toolResult[0] as { type: 'json'; value: { file: string; unifiedDiff: string } } + expect(jsonResult.value.file).toBe('src/utils.ts') + expect(jsonResult.value.unifiedDiff).toContain('+export function multiply') + expect(jsonResult.value.unifiedDiff).toContain('return a * b') + }) + + it('should return error when string not found', async () => { + const toolResultsCapture: any[] = [] + + const mockGenerator = (function* () { + const step = yield { + toolName: 'propose_str_replace', + input: { + path: 'src/utils.ts', + replacements: [{ + old: 'nonexistent string that does not exist in the file', + new: 'replacement', + allowMultiple: false, + }], + }, + } + toolResultsCapture.push(step.toolResult) + yield { toolName: 'end_turn', input: {} } + })() as StepGenerator + + mockTemplate.handleSteps = () => mockGenerator + + await runProgrammaticStep(mockParams) + + expect(toolResultsCapture).toHaveLength(1) + const toolResult = toolResultsCapture[0] + const jsonResult = toolResult[0] as { type: 'json'; value: { errorMessage: string } } + expect(jsonResult.value.errorMessage).toContain('String not found') + }) + + it('should stack multiple replacements on the same file', async () => { + const toolResultsCapture: any[] = [] + + const mockGenerator = (function* () { + // First replacement + const step1 = yield { + toolName: 'propose_str_replace', + input: { + path: 'src/utils.ts', + replacements: [{ + old: 'return a + b;', + new: 'return a + b; // addition', + allowMultiple: false, + }], + }, + } + toolResultsCapture.push({ step: 1, result: step1.toolResult }) + + // Second replacement should work on the already-modified content + const step2 = yield { + toolName: 'propose_str_replace', + input: { + path: 'src/utils.ts', + replacements: [{ + old: 'return a - b;', + new: 'return a - b; // subtraction', + allowMultiple: false, + }], + }, + } + toolResultsCapture.push({ step: 2, result: step2.toolResult }) + + yield { toolName: 'end_turn', input: {} } + })() as StepGenerator + + mockTemplate.handleSteps = () => mockGenerator + + await runProgrammaticStep(mockParams) + + expect(toolResultsCapture).toHaveLength(2) + + // Both replacements should succeed + const result0 = toolResultsCapture[0].result[0] as { type: 'json'; value: { unifiedDiff: string } } + const result1 = toolResultsCapture[1].result[0] as { type: 'json'; value: { unifiedDiff: string } } + expect(result0.value.unifiedDiff).toContain('// addition') + expect(result1.value.unifiedDiff).toContain('// subtraction') + + // Final file should have both changes + expect(mockFiles['src/utils.ts']).toContain('// addition') + expect(mockFiles['src/utils.ts']).toContain('// subtraction') + }) + }) + + describe('propose_write_file', () => { + it('should propose new file creation and return unified diff', async () => { + const toolResultsCapture: any[] = [] + + const mockGenerator = (function* () { + const step = yield { + toolName: 'propose_write_file', + input: { + path: 'src/multiply.ts', + instructions: 'Create multiply function', + content: `export function multiply(a: number, b: number): number { + return a * b; +} +`, + }, + } + toolResultsCapture.push(step.toolResult) + yield { toolName: 'end_turn', input: {} } + })() as StepGenerator + + mockTemplate.handleSteps = () => mockGenerator + + await runProgrammaticStep(mockParams) + + expect(toolResultsCapture).toHaveLength(1) + const toolResult = toolResultsCapture[0] + const jsonResult = toolResult[0] as { type: 'json'; value: { file: string; message: string; unifiedDiff: string } } + expect(jsonResult.value.file).toBe('src/multiply.ts') + expect(jsonResult.value.message).toContain('new file') + expect(jsonResult.value.unifiedDiff).toContain('+export function multiply') + }) + + it('should propose file edit and return unified diff', async () => { + const toolResultsCapture: any[] = [] + + const mockGenerator = (function* () { + const step = yield { + toolName: 'propose_write_file', + input: { + path: 'src/utils.ts', + instructions: 'Add multiply function', + content: `export function add(a: number, b: number): number { + return a + b; +} + +export function subtract(a: number, b: number): number { + return a - b; +} + +export function multiply(a: number, b: number): number { + return a * b; +} +`, + }, + } + toolResultsCapture.push(step.toolResult) + yield { toolName: 'end_turn', input: {} } + })() as StepGenerator + + mockTemplate.handleSteps = () => mockGenerator + + await runProgrammaticStep(mockParams) + + expect(toolResultsCapture).toHaveLength(1) + const toolResult = toolResultsCapture[0] + const jsonResult = toolResult[0] as { type: 'json'; value: { file: string; message: string; unifiedDiff: string } } + expect(jsonResult.value.file).toBe('src/utils.ts') + expect(jsonResult.value.message).toContain('changes') + expect(jsonResult.value.unifiedDiff).toContain('+export function multiply') + }) + }) + + describe('implementor agent workflow', () => { + it('should receive tool results from previous tool calls across multiple steps', async () => { + /** + * This test verifies that when an agent makes multiple tool calls, + * each subsequent yield receives the tool result from the previous call. + * This is critical for the implementor2 pattern where the agent needs to + * see the unified diff results to know what changes were proposed. + */ + const receivedToolResults: any[] = [] + + const mockGenerator = (function* () { + // First tool call - propose_str_replace + const step1 = yield { + toolName: 'propose_str_replace', + input: { + path: 'src/utils.ts', + replacements: [{ + old: 'return a + b;', + new: 'return a + b; // first change', + allowMultiple: false, + }], + }, + } + const step1First = step1.toolResult?.[0] + const step1HasDiff = step1First?.type === 'json' && !!(step1First.value as { unifiedDiff?: string })?.unifiedDiff + receivedToolResults.push({ + step: 1, + toolResult: step1.toolResult, + hasUnifiedDiff: step1HasDiff, + }) + + // Second tool call - another propose_str_replace + const step2 = yield { + toolName: 'propose_str_replace', + input: { + path: 'src/utils.ts', + replacements: [{ + old: 'return a - b;', + new: 'return a - b; // second change', + allowMultiple: false, + }], + }, + } + const step2First = step2.toolResult?.[0] + const step2HasDiff = step2First?.type === 'json' && !!(step2First.value as { unifiedDiff?: string })?.unifiedDiff + receivedToolResults.push({ + step: 2, + toolResult: step2.toolResult, + hasUnifiedDiff: step2HasDiff, + }) + + // Third tool call - propose_write_file + const step3 = yield { + toolName: 'propose_write_file', + input: { + path: 'src/new-file.ts', + instructions: 'Create new file', + content: 'export const newFile = true;', + }, + } + const step3First = step3.toolResult?.[0] + const step3HasDiff = step3First?.type === 'json' && !!(step3First.value as { unifiedDiff?: string })?.unifiedDiff + receivedToolResults.push({ + step: 3, + toolResult: step3.toolResult, + hasUnifiedDiff: step3HasDiff, + }) + + yield { toolName: 'end_turn', input: {} } + })() as StepGenerator + + mockTemplate.handleSteps = () => mockGenerator + + const result = await runProgrammaticStep(mockParams) + + expect(result.endTurn).toBe(true) + + // Verify we received tool results for all 3 steps + expect(receivedToolResults).toHaveLength(3) + + // Step 1: Should have received tool result with unified diff + expect(receivedToolResults[0].step).toBe(1) + expect(receivedToolResults[0].toolResult).toBeDefined() + expect(receivedToolResults[0].hasUnifiedDiff).toBe(true) + const step1Result = receivedToolResults[0].toolResult[0] as { type: 'json'; value: { file: string; unifiedDiff: string } } + expect(step1Result.value.file).toBe('src/utils.ts') + expect(step1Result.value.unifiedDiff).toContain('first change') + + // Step 2: Should have received tool result with unified diff + expect(receivedToolResults[1].step).toBe(2) + expect(receivedToolResults[1].toolResult).toBeDefined() + expect(receivedToolResults[1].hasUnifiedDiff).toBe(true) + const step2Result = receivedToolResults[1].toolResult[0] as { type: 'json'; value: { file: string; unifiedDiff: string } } + expect(step2Result.value.file).toBe('src/utils.ts') + expect(step2Result.value.unifiedDiff).toContain('second change') + + // Step 3: Should have received tool result with unified diff for new file + expect(receivedToolResults[2].step).toBe(3) + expect(receivedToolResults[2].toolResult).toBeDefined() + expect(receivedToolResults[2].hasUnifiedDiff).toBe(true) + const step3Result = receivedToolResults[2].toolResult[0] as { type: 'json'; value: { file: string; message: string } } + expect(step3Result.value.file).toBe('src/new-file.ts') + expect(step3Result.value.message).toContain('new file') + }) + + it('should collect tool calls and results for output', async () => { + /** + * This test simulates the editor-implementor2 workflow: + * 1. Agent makes propose_* tool calls + * 2. Tool results (with unified diffs) are captured + * 3. Agent extracts tool calls and diffs for set_output + */ + // Capture tool results as they come in + const capturedToolResults: any[] = [] + const capturedToolCalls: { toolName: string; input: any }[] = [] + + const mockGenerator = (function* () { + // Make a propose_str_replace call + const step1 = yield { + toolName: 'propose_str_replace', + input: { + path: 'src/utils.ts', + replacements: [{ + old: 'export function subtract(a: number, b: number): number {\n return a - b;\n}', + new: `export function subtract(a: number, b: number): number { + return a - b; +} + +export function multiply(a: number, b: number): number { + return a * b; +}`, + allowMultiple: false, + }], + }, + } + + // Capture the tool call and result + capturedToolCalls.push({ + toolName: 'propose_str_replace', + input: step1, + }) + const step1First = step1.toolResult?.[0] + if (step1First?.type === 'json' && step1First.value) { + capturedToolResults.push(step1First.value) + } + + // Generate unified diffs string from captured results + const unifiedDiffs = capturedToolResults + .filter((result: any) => result.unifiedDiff) + .map((result: any) => `--- ${result.file} ---\n${result.unifiedDiff}`) + .join('\n\n') + + yield { + toolName: 'set_output', + input: { + toolCalls: capturedToolCalls, + toolResults: capturedToolResults, + unifiedDiffs, + }, + } + yield { toolName: 'end_turn', input: {} } + })() as StepGenerator + + mockTemplate.handleSteps = () => mockGenerator + + const result = await runProgrammaticStep(mockParams) + + expect(result.endTurn).toBe(true) + expect(result.agentState.output).toBeDefined() + + const output = result.agentState.output as { + toolCalls: any[] + toolResults: any[] + unifiedDiffs: string + } + + // Verify tool calls were captured + expect(output.toolCalls).toHaveLength(1) + expect(output.toolCalls[0].toolName).toBe('propose_str_replace') + + // Verify tool results were captured + expect(output.toolResults).toHaveLength(1) + expect(output.toolResults[0].file).toBe('src/utils.ts') + expect(output.toolResults[0].unifiedDiff).toContain('+export function multiply') + + // Verify unified diffs string was generated + expect(output.unifiedDiffs).toContain('--- src/utils.ts ---') + expect(output.unifiedDiffs).toContain('+export function multiply') + }) + }) +}) + +/** + * Simple diff generator for testing purposes. + * In production, the actual handlers use the 'diff' library. + */ +function generateSimpleDiff(path: string, oldContent: string, newContent: string): string { + const oldLines = oldContent.split('\n') + const newLines = newContent.split('\n') + + const diffLines: string[] = [] + const maxLen = Math.max(oldLines.length, newLines.length) + + let inChange = false + let changeStart = 0 + + for (let i = 0; i < maxLen; i++) { + const oldLine = oldLines[i] + const newLine = newLines[i] + + if (oldLine !== newLine) { + if (!inChange) { + inChange = true + changeStart = i + diffLines.push(`@@ -${i + 1},${oldLines.length - i} +${i + 1},${newLines.length - i} @@`) + } + if (oldLine !== undefined) { + diffLines.push(`-${oldLine}`) + } + if (newLine !== undefined) { + diffLines.push(`+${newLine}`) + } + } else if (inChange && oldLine === newLine) { + diffLines.push(` ${oldLine}`) + } + } + + return diffLines.join('\n') +} diff --git a/packages/agent-runtime/src/__tests__/spawn-agents-message-history.test.ts b/packages/agent-runtime/src/__tests__/spawn-agents-message-history.test.ts index cfb92f3808..41c98ea92d 100644 --- a/packages/agent-runtime/src/__tests__/spawn-agents-message-history.test.ts +++ b/packages/agent-runtime/src/__tests__/spawn-agents-message-history.test.ts @@ -143,7 +143,8 @@ describe('Spawn Agents Message History', () => { // Verify that the subagent's message history contains the filtered messages // expireMessages filters based on timeToLive property, not role // Since the system message doesn't have timeToLive, it will be included - expect(capturedSubAgentState.messageHistory).toHaveLength(4) // System + user + assistant messages + // System + user + assistant messages + spawn message + expect(capturedSubAgentState.messageHistory).toHaveLength(5) // Verify system message is included (because it has no timeToLive property) const systemMessages = capturedSubAgentState.messageHistory.filter( @@ -173,6 +174,14 @@ describe('Spawn Agents Message History', () => { (msg: any) => msg.content[0]?.text === 'How are you?', ), ).toBeTruthy() + + // Verify the subagent spawn message is included with proper structure + const spawnMessage = capturedSubAgentState.messageHistory.find( + (msg: any) => msg.tags?.includes('SUBAGENT_SPAWN'), + ) + expect(spawnMessage).toBeTruthy() + expect(spawnMessage.role).toBe('user') + expect(spawnMessage.content[0]?.text).toContain('Subagent child-agent has been spawned') }) it('should not include conversation history when includeMessageHistory is false', async () => { @@ -215,8 +224,15 @@ describe('Spawn Agents Message History', () => { toolCall, }) - // Verify that the subagent's message history is empty when there are no messages to pass - expect(capturedSubAgentState.messageHistory).toHaveLength(0) + // Verify that the subagent's message history contains only the spawn message + // when includeMessageHistory is true (even with empty parent history) + expect(capturedSubAgentState.messageHistory).toHaveLength(1) + + // Verify the spawn message structure + const spawnMessage = capturedSubAgentState.messageHistory[0] + expect(spawnMessage.role).toBe('user') + expect(spawnMessage.tags).toContain('SUBAGENT_SPAWN') + expect(spawnMessage.content[0]?.text).toContain('Subagent child-agent has been spawned') }) it('should handle message history with only system messages', async () => { @@ -240,10 +256,17 @@ describe('Spawn Agents Message History', () => { // Verify that system messages without timeToLive are included // expireMessages only filters messages with timeToLive='userPrompt' - expect(capturedSubAgentState.messageHistory).toHaveLength(2) + // Plus 1 for the subagent spawn message + expect(capturedSubAgentState.messageHistory).toHaveLength(3) const systemMessages = capturedSubAgentState.messageHistory.filter( (msg: any) => msg.role === 'system', ) expect(systemMessages).toHaveLength(2) + + // Verify spawn message is present + const spawnMessage = capturedSubAgentState.messageHistory.find( + (msg: any) => msg.tags?.includes('SUBAGENT_SPAWN'), + ) + expect(spawnMessage).toBeTruthy() }) }) diff --git a/packages/agent-runtime/src/__tests__/spawn-agents-permissions.test.ts b/packages/agent-runtime/src/__tests__/spawn-agents-permissions.test.ts index ef3ed0e7b9..3fe3107a81 100644 --- a/packages/agent-runtime/src/__tests__/spawn-agents-permissions.test.ts +++ b/packages/agent-runtime/src/__tests__/spawn-agents-permissions.test.ts @@ -29,6 +29,10 @@ describe('Spawn Agents Permissions', () => { typeof handleSpawnAgents, 'agentState' | 'agentTemplate' | 'localAgentTemplates' | 'toolCall' > + let handleSpawnAgentInlineBaseParams: ParamsExcluding< + typeof handleSpawnAgentInline, + 'agentState' | 'agentTemplate' | 'localAgentTemplates' | 'toolCall' + > const createMockAgent = ( id: string, @@ -67,12 +71,16 @@ describe('Spawn Agents Permissions', () => { sendSubagentChunk: mockSendSubagentChunk, signal: new AbortController().signal, system: 'Test system prompt', - tools: {}, userId: TEST_USER_ID, userInputId: 'test-input', writeToClient: () => {}, } + handleSpawnAgentInlineBaseParams = { + ...handleSpawnAgentsBaseParams, + tools: {}, + } + // Mock sendSubagentChunk mockSendSubagentChunk = mock(() => {}) @@ -426,7 +434,7 @@ describe('Spawn Agents Permissions', () => { // Should not throw await handleSpawnAgentInline({ - ...handleSpawnAgentsBaseParams, + ...handleSpawnAgentInlineBaseParams, agentState: sessionState.mainAgentState, agentTemplate: parentAgent, localAgentTemplates: { thinker: childAgent }, @@ -443,7 +451,7 @@ describe('Spawn Agents Permissions', () => { const toolCall = createInlineSpawnToolCall('reviewer') // Try to spawn reviewer const result = handleSpawnAgentInline({ - ...handleSpawnAgentsBaseParams, + ...handleSpawnAgentInlineBaseParams, agentState: sessionState.mainAgentState, agentTemplate: parentAgent, localAgentTemplates: { reviewer: childAgent }, @@ -462,7 +470,7 @@ describe('Spawn Agents Permissions', () => { const toolCall = createInlineSpawnToolCall('nonexistent') const result = handleSpawnAgentInline({ - ...handleSpawnAgentsBaseParams, + ...handleSpawnAgentInlineBaseParams, agentState: sessionState.mainAgentState, agentTemplate: parentAgent, localAgentTemplates: {}, // Empty - agent not found @@ -481,7 +489,7 @@ describe('Spawn Agents Permissions', () => { // Should not throw await handleSpawnAgentInline({ - ...handleSpawnAgentsBaseParams, + ...handleSpawnAgentInlineBaseParams, agentState: sessionState.mainAgentState, agentTemplate: parentAgent, localAgentTemplates: { 'codebuff/thinker@1.0.0': childAgent }, @@ -499,7 +507,7 @@ describe('Spawn Agents Permissions', () => { // Should not throw await handleSpawnAgentInline({ - ...handleSpawnAgentsBaseParams, + ...handleSpawnAgentInlineBaseParams, agentState: sessionState.mainAgentState, agentTemplate: parentAgent, localAgentTemplates: { @@ -519,7 +527,7 @@ describe('Spawn Agents Permissions', () => { const toolCall = createInlineSpawnToolCall('codebuff/thinker@2.0.0') const result = handleSpawnAgentInline({ - ...handleSpawnAgentsBaseParams, + ...handleSpawnAgentInlineBaseParams, agentState: sessionState.mainAgentState, agentTemplate: parentAgent, localAgentTemplates: { 'codebuff/thinker@2.0.0': childAgent }, diff --git a/packages/agent-runtime/src/run-agent-step.ts b/packages/agent-runtime/src/run-agent-step.ts index 99a6fae0a7..323d932411 100644 --- a/packages/agent-runtime/src/run-agent-step.ts +++ b/packages/agent-runtime/src/run-agent-step.ts @@ -506,6 +506,7 @@ export async function loopAgentSteps( | 'system' | 'template' | 'toolCallParams' + | 'tools' > & ParamsExcluding & ParamsExcluding< diff --git a/packages/agent-runtime/src/run-programmatic-step.ts b/packages/agent-runtime/src/run-programmatic-step.ts index 1aee165829..dc2c2c771c 100644 --- a/packages/agent-runtime/src/run-programmatic-step.ts +++ b/packages/agent-runtime/src/run-programmatic-step.ts @@ -2,6 +2,7 @@ import { getErrorObject } from '@codebuff/common/util/error' import { assistantMessage } from '@codebuff/common/util/messages' import { cloneDeep } from 'lodash' +import { clearProposedContentForRun } from './tools/handlers/tool/proposed-content-store' import { executeToolCall } from './tools/tool-executor' import { parseTextWithToolCalls } from './util/parse-tool-calls-from-text' @@ -38,6 +39,7 @@ export const runIdToStepAll: Set = new Set() // Function to clear the generator cache for testing purposes export function clearAgentGeneratorCache(params: { logger: Logger }) { for (const key in runIdToGenerator) { + clearProposedContentForRun(key) delete runIdToGenerator[key] } runIdToStepAll.clear() @@ -377,6 +379,7 @@ export async function runProgrammaticStep( if (endTurn) { delete runIdToGenerator[agentState.runId] runIdToStepAll.delete(agentState.runId) + clearProposedContentForRun(agentState.runId) } } } diff --git a/packages/agent-runtime/src/tools/handlers/list.ts b/packages/agent-runtime/src/tools/handlers/list.ts index 4c5fb752c9..d75eb829a9 100644 --- a/packages/agent-runtime/src/tools/handlers/list.ts +++ b/packages/agent-runtime/src/tools/handlers/list.ts @@ -9,6 +9,8 @@ import { handleFindFiles } from './tool/find-files' import { handleGlob } from './tool/glob' import { handleListDirectory } from './tool/list-directory' import { handleLookupAgentInfo } from './tool/lookup-agent-info' +import { handleProposeStrReplace } from './tool/propose-str-replace' +import { handleProposeWriteFile } from './tool/propose-write-file' import { handleReadDocs } from './tool/read-docs' import { handleReadFiles } from './tool/read-files' import { handleReadSubtree } from './tool/read-subtree' @@ -51,6 +53,8 @@ export const codebuffToolHandlers = { glob: handleGlob, list_directory: handleListDirectory, lookup_agent_info: handleLookupAgentInfo, + propose_str_replace: handleProposeStrReplace, + propose_write_file: handleProposeWriteFile, read_docs: handleReadDocs, read_files: handleReadFiles, read_subtree: handleReadSubtree, diff --git a/packages/agent-runtime/src/tools/handlers/tool/propose-str-replace.ts b/packages/agent-runtime/src/tools/handlers/tool/propose-str-replace.ts new file mode 100644 index 0000000000..6c1bd2248f --- /dev/null +++ b/packages/agent-runtime/src/tools/handlers/tool/propose-str-replace.ts @@ -0,0 +1,108 @@ +import { processStrReplace } from '../../../process-str-replace' +import { + getProposedContent, + setProposedContent, +} from './proposed-content-store' + +import type { CodebuffToolHandlerFunction } from '../handler-function-type' +import type { + CodebuffToolCall, + CodebuffToolOutput, +} from '@codebuff/common/tools/list' +import type { RequestOptionalFileFn } from '@codebuff/common/types/contracts/client' +import type { Logger } from '@codebuff/common/types/contracts/logger' +import type { ParamsExcluding } from '@codebuff/common/types/function-params' +import type { AgentState } from '@codebuff/common/types/session-state' + +export const handleProposeStrReplace = (async ( + params: { + previousToolCallFinished: Promise + toolCall: CodebuffToolCall<'propose_str_replace'> + + logger: Logger + agentState: AgentState + runId: string + + requestOptionalFile: RequestOptionalFileFn + } & ParamsExcluding, +): Promise<{ output: CodebuffToolOutput<'propose_str_replace'> }> => { + const { + previousToolCallFinished, + toolCall, + + logger, + runId, + + requestOptionalFile, + } = params + const { path, replacements } = toolCall.input + + // Get content from proposed state first (by runId), then fall back to disk + const getProposedOrDiskContent = async (): Promise => { + const proposedContent = getProposedContent(runId, path) + if (proposedContent !== undefined) { + return proposedContent + } + return requestOptionalFile({ ...params, filePath: path }) + } + + const latestContentPromise = getProposedOrDiskContent() + + const strReplaceResultPromise = processStrReplace({ + path, + replacements, + initialContentPromise: latestContentPromise, + logger, + }).catch((error: any) => { + logger.error(error, 'Error processing propose_str_replace') + return { + tool: 'str_replace' as const, + path, + error: 'Unknown error: Failed to process the propose_str_replace.', + } + }) + + // Store the proposed content for future propose calls on the same file (by runId) + setProposedContent( + runId, + path, + strReplaceResultPromise.then((result) => + 'content' in result ? result.content : null, + ), + ) + + await previousToolCallFinished + + const strReplaceResult = await strReplaceResultPromise + + if ('error' in strReplaceResult) { + return { + output: [ + { + type: 'json', + value: { + file: path, + errorMessage: strReplaceResult.error, + }, + }, + ], + } + } + + const message = strReplaceResult.messages.length > 0 + ? strReplaceResult.messages.join('\n\n') + : 'Proposed string replacement' + + return { + output: [ + { + type: 'json', + value: { + file: path, + message, + unifiedDiff: strReplaceResult.patch, + }, + }, + ], + } +}) satisfies CodebuffToolHandlerFunction<'propose_str_replace'> diff --git a/packages/agent-runtime/src/tools/handlers/tool/propose-write-file.ts b/packages/agent-runtime/src/tools/handlers/tool/propose-write-file.ts new file mode 100644 index 0000000000..b20d19ee10 --- /dev/null +++ b/packages/agent-runtime/src/tools/handlers/tool/propose-write-file.ts @@ -0,0 +1,87 @@ +import { createPatch } from 'diff' + +import { + getProposedContent, + setProposedContent, +} from './proposed-content-store' + +import type { CodebuffToolHandlerFunction } from '../handler-function-type' +import type { + CodebuffToolCall, + CodebuffToolOutput, +} from '@codebuff/common/tools/list' +import type { RequestOptionalFileFn } from '@codebuff/common/types/contracts/client' +import type { Logger } from '@codebuff/common/types/contracts/logger' +import type { ParamsExcluding } from '@codebuff/common/types/function-params' + +/** + * Proposes writing a file without actually applying the changes. + * Simply overwrites the file exactly with the given content (creating if it doesn't exist). + * Returns a unified diff of the changes for review. + */ +export const handleProposeWriteFile = (async ( + params: { + previousToolCallFinished: Promise + toolCall: CodebuffToolCall<'propose_write_file'> + + logger: Logger + runId: string + + requestOptionalFile: RequestOptionalFileFn + } & ParamsExcluding, +): Promise<{ output: CodebuffToolOutput<'propose_write_file'> }> => { + const { + previousToolCallFinished, + toolCall, + logger, + runId, + requestOptionalFile, + } = params + const { path, content } = toolCall.input + + // Get content from proposed state first (by runId), then fall back to disk + const getProposedOrDiskContent = async (): Promise => { + const proposedContent = getProposedContent(runId, path) + if (proposedContent !== undefined) { + return proposedContent + } + return requestOptionalFile({ ...params, filePath: path }) + } + + const initialContent = await getProposedOrDiskContent() + + // Normalize content (remove leading newline if present) + const newContent = content.startsWith('\n') ? content.slice(1) : content + + // Store the proposed content for future propose calls on the same file (by runId) + setProposedContent(runId, path, Promise.resolve(newContent)) + + await previousToolCallFinished + + // Generate unified diff + const oldContent = initialContent ?? '' + let patch = createPatch(path, oldContent, newContent) + + // Strip the header lines, keep only from @@ onwards + const lines = patch.split('\n') + const hunkStartIndex = lines.findIndex((line) => line.startsWith('@@')) + if (hunkStartIndex !== -1) { + patch = lines.slice(hunkStartIndex).join('\n') + } + + const isNewFile = initialContent === null + const message = isNewFile ? `Proposed new file ${path}` : `Proposed changes to ${path}` + + return { + output: [ + { + type: 'json', + value: { + file: path, + message, + unifiedDiff: patch, + }, + }, + ], + } +}) as CodebuffToolHandlerFunction<'propose_write_file'> diff --git a/packages/agent-runtime/src/tools/handlers/tool/proposed-content-store.ts b/packages/agent-runtime/src/tools/handlers/tool/proposed-content-store.ts new file mode 100644 index 0000000000..77310c83ee --- /dev/null +++ b/packages/agent-runtime/src/tools/handlers/tool/proposed-content-store.ts @@ -0,0 +1,64 @@ +/** + * Store for proposed file content by runId. + * This allows propose_str_replace and propose_write_file tools to + * track proposed changes within an agent run, isolated by runId. + */ + +/** Map of runId -> path -> Promise */ +const proposedContentByRunId = new Map< + string, + Record> +>() + +/** + * Get the proposed content map for a specific runId. + * Creates an empty record if none exists. + */ +export function getProposedContentForRun( + runId: string, +): Record> { + let contentByPath = proposedContentByRunId.get(runId) + if (!contentByPath) { + contentByPath = {} + proposedContentByRunId.set(runId, contentByPath) + } + return contentByPath +} + +/** + * Get proposed content for a specific file in a run. + */ +export function getProposedContent( + runId: string, + path: string, +): Promise | undefined { + const contentByPath = proposedContentByRunId.get(runId) + return contentByPath?.[path] +} + +/** + * Set proposed content for a specific file in a run. + */ +export function setProposedContent( + runId: string, + path: string, + content: Promise, +): void { + const contentByPath = getProposedContentForRun(runId) + contentByPath[path] = content +} + +/** + * Clear all proposed content for a specific runId. + * Should be called when an agent run completes. + */ +export function clearProposedContentForRun(runId: string): void { + proposedContentByRunId.delete(runId) +} + +/** + * Clear all proposed content (for testing purposes). + */ +export function clearAllProposedContent(): void { + proposedContentByRunId.clear() +} diff --git a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-inline.ts b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-inline.ts index 5c7070b9b7..7c4f9ce4c7 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-inline.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-inline.ts @@ -4,6 +4,7 @@ import { logAgentSpawn, executeSubagent, createAgentState, + extractSubagentContextParams, } from './spawn-agent-utils' import type { CodebuffToolHandlerFunction } from '../handler-function-type' @@ -75,9 +76,13 @@ export const handleSpawnAgentInline = (async ( await previousToolCallFinished const { agentTemplate, agentType } = await validateAndGetAgentTemplate({ - ...params, agentTypeStr, parentAgentTemplate, + localAgentTemplates: params.localAgentTemplates, + logger, + fetchAgentFromDatabase: params.fetchAgentFromDatabase, + databaseAgentCache: params.databaseAgentCache, + apiKey: params.apiKey, }) validateAgentInput(agentTemplate, agentType, prompt, spawnParams) @@ -105,7 +110,6 @@ export const handleSpawnAgentInline = (async ( } logAgentSpawn({ - ...params, agentTemplate: inlineTemplate, agentType, agentId: childAgentState.agentId, @@ -113,10 +117,17 @@ export const handleSpawnAgentInline = (async ( prompt, spawnParams, inline: true, + logger, }) + // Extract common context params to avoid bugs from spreading all params + const contextParams = extractSubagentContextParams(params) + const result = await executeSubagent({ - ...params, + ...contextParams, + + // Spawn-specific params + ancestorRunIds: parentAgentState.ancestorRunIds, userInputId: `${userInputId}-inline-${agentType}${childAgentState.agentId}`, prompt: prompt || '', spawnParams, diff --git a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts index 7934195212..7f4a43110b 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts @@ -4,9 +4,13 @@ import { generateCompactId } from '@codebuff/common/util/string' import { loopAgentSteps } from '../../../run-agent-step' import { getAgentTemplate } from '../../../templates/agent-registry' -import { filterUnfinishedToolCalls } from '../../../util/messages' +import { filterUnfinishedToolCalls, withSystemTags } from '../../../util/messages' import type { AgentTemplate } from '@codebuff/common/types/agent-template' +import type { + AgentRuntimeDeps, + AgentRuntimeScopedDeps, +} from '@codebuff/common/types/contracts/agent-runtime' import type { Logger } from '@codebuff/common/types/contracts/logger' import type { ParamsExcluding, @@ -19,6 +23,79 @@ import type { AgentTemplateType, Subgoal, } from '@codebuff/common/types/session-state' +import type { ProjectFileContext } from '@codebuff/common/util/file' +import { Message } from '@codebuff/common/types/messages/codebuff-message' + +/** + * Common context params needed for spawning subagents. + * These are the params that don't change between different spawn calls + * and are passed through from the parent agent runtime. + */ +export type SubagentContextParams = AgentRuntimeDeps & + AgentRuntimeScopedDeps & { + clientSessionId: string + fileContext: ProjectFileContext + localAgentTemplates: Record + repoId: string | undefined + repoUrl: string | undefined + signal: AbortSignal + userId: string | undefined + } + +/** + * Extracts the common context params needed for spawning subagents. + * This avoids bugs from spreading all params with `...params` which can + * accidentally pass through params that should be overridden. + */ +export function extractSubagentContextParams( + params: SubagentContextParams, +): SubagentContextParams { + return { + // AgentRuntimeDeps - Environment + clientEnv: params.clientEnv, + ciEnv: params.ciEnv, + // AgentRuntimeDeps - Database + getUserInfoFromApiKey: params.getUserInfoFromApiKey, + fetchAgentFromDatabase: params.fetchAgentFromDatabase, + startAgentRun: params.startAgentRun, + finishAgentRun: params.finishAgentRun, + addAgentStep: params.addAgentStep, + // AgentRuntimeDeps - Billing + consumeCreditsWithFallback: params.consumeCreditsWithFallback, + // AgentRuntimeDeps - LLM + promptAiSdkStream: params.promptAiSdkStream, + promptAiSdk: params.promptAiSdk, + promptAiSdkStructured: params.promptAiSdkStructured, + // AgentRuntimeDeps - Mutable State + databaseAgentCache: params.databaseAgentCache, + liveUserInputRecord: params.liveUserInputRecord, + sessionConnections: params.sessionConnections, + // AgentRuntimeDeps - Analytics + trackEvent: params.trackEvent, + // AgentRuntimeDeps - Other + logger: params.logger, + fetch: params.fetch, + + // AgentRuntimeScopedDeps - Client (WebSocket) + handleStepsLogChunk: params.handleStepsLogChunk, + requestToolCall: params.requestToolCall, + requestMcpToolData: params.requestMcpToolData, + requestFiles: params.requestFiles, + requestOptionalFile: params.requestOptionalFile, + sendAction: params.sendAction, + sendSubagentChunk: params.sendSubagentChunk, + apiKey: params.apiKey, + + // Core context params + clientSessionId: params.clientSessionId, + fileContext: params.fileContext, + localAgentTemplates: params.localAgentTemplates, + repoId: params.repoId, + repoUrl: params.repoUrl, + signal: params.signal, + userId: params.userId, + } +} /** * Checks if a parent agent is allowed to spawn a child agent @@ -166,9 +243,21 @@ export function createAgentState( // When including message history, filter out any tool calls that don't have // corresponding tool responses. This prevents the spawned agent from seeing // unfinished tool calls which throw errors in the Anthropic API. - const messageHistory = agentTemplate.includeMessageHistory - ? filterUnfinishedToolCalls(parentAgentState.messageHistory) - : [] + let messageHistory: Message[] = [] + + if (agentTemplate.includeMessageHistory) { + messageHistory = filterUnfinishedToolCalls(parentAgentState.messageHistory) + messageHistory.push({ + role: 'user', + content: [ + { + type: 'text', + text: withSystemTags(`Subagent ${agentType} has been spawned.`), + }, + ], + tags: ['SUBAGENT_SPAWN'], + }) + } return { agentId, diff --git a/packages/agent-runtime/src/tools/handlers/tool/spawn-agents.ts b/packages/agent-runtime/src/tools/handlers/tool/spawn-agents.ts index 596a3c0649..c80483a07d 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/spawn-agents.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/spawn-agents.ts @@ -6,6 +6,7 @@ import { createAgentState, logAgentSpawn, executeSubagent, + extractSubagentContextParams, } from './spawn-agent-utils' import type { CodebuffToolHandlerFunction } from '../handler-function-type' @@ -111,8 +112,14 @@ export const handleSpawnAgents = (async ( logger, }) + // Extract common context params to avoid bugs from spreading all params + const contextParams = extractSubagentContextParams(params) + const result = await executeSubagent({ - ...params, + ...contextParams, + + // Spawn-specific params + ancestorRunIds: parentAgentState.ancestorRunIds, userInputId: `${userInputId}-${agentType}${subAgentState.agentId}`, prompt: prompt || '', spawnParams, @@ -121,6 +128,8 @@ export const handleSpawnAgents = (async ( agentState: subAgentState, fingerprintId, isOnlyChild: agents.length === 1, + excludeToolFromMessageHistory: false, + fromHandleSteps: false, parentSystemPrompt, parentTools: agentTemplate.inheritParentSystemPrompt ? parentTools diff --git a/packages/agent-runtime/src/tools/handlers/tool/write-file.ts b/packages/agent-runtime/src/tools/handlers/tool/write-file.ts index f80382270f..dfd6247a81 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/write-file.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/write-file.ts @@ -51,7 +51,7 @@ export function getFileProcessingValues( } for (const [key, value] of Object.entries(state)) { const typedKey = key as keyof typeof fileProcessingValues - if (fileProcessingValues[typedKey] !== undefined) { + if (typedKey in fileProcessingValues) { fileProcessingValues[typedKey] = value as any } } diff --git a/packages/agent-runtime/src/tools/stream-parser.ts b/packages/agent-runtime/src/tools/stream-parser.ts index 4c0f03f97b..00cb52de9b 100644 --- a/packages/agent-runtime/src/tools/stream-parser.ts +++ b/packages/agent-runtime/src/tools/stream-parser.ts @@ -30,13 +30,6 @@ import type { import type { PrintModeEvent } from '@codebuff/common/types/print-mode' import type { Subgoal } from '@codebuff/common/types/session-state' import type { ProjectFileContext } from '@codebuff/common/util/file' -import type { ToolCallPart } from 'ai' - -export type ToolCallError = { - toolName?: string - args: Record - error: string -} & Omit export async function processStream( params: { diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts index 30106cb996..05757d2c1b 100644 --- a/packages/agent-runtime/src/tools/tool-executor.ts +++ b/packages/agent-runtime/src/tools/tool-executor.ts @@ -54,51 +54,21 @@ export function parseRawToolCall(params: { toolCallId: string input: Record } - autoInsertEndStepParam?: boolean }): CodebuffToolCall | ToolCallError { - const { rawToolCall, autoInsertEndStepParam = false } = params + const { rawToolCall } = params const toolName = rawToolCall.toolName - if (!(toolName in toolParams)) { - return { - toolName, - toolCallId: rawToolCall.toolCallId, - input: rawToolCall.input, - error: `Tool ${toolName} not found`, - } - } - const validName = toolName as T - - // const processedParameters: Record = {} - // for (const [param, val] of Object.entries(rawToolCall.input ?? {})) { - // processedParameters[param] = val - // } - - // Add the required codebuff_end_step parameter with the correct value for this tool if requested - // if (autoInsertEndStepParam) { - // processedParameters[endsAgentStepParam] = - // toolParams[validName].endsAgentStep - // } - - // const paramsSchema = toolParams[validName].endsAgentStep - // ? ( - // toolParams[validName].inputSchema satisfies z.ZodObject as z.ZodObject - // ).extend({ - // [endsAgentStepParam]: z.literal(toolParams[validName].endsAgentStep), - // }) - // : toolParams[validName].inputSchema - const processedParameters = rawToolCall.input - const paramsSchema = toolParams[validName].inputSchema + const paramsSchema = toolParams[toolName].inputSchema const result = paramsSchema.safeParse(processedParameters) if (!result.success) { return { - toolName: validName, + toolName, toolCallId: rawToolCall.toolCallId, input: rawToolCall.input, - error: `Invalid parameters for ${validName}: ${JSON.stringify( + error: `Invalid parameters for ${toolName}: ${JSON.stringify( result.error.issues, null, 2, @@ -111,7 +81,7 @@ export function parseRawToolCall(params: { } return { - toolName: validName, + toolName, input: result.data, toolCallId: rawToolCall.toolCallId, } as CodebuffToolCall @@ -163,7 +133,6 @@ export function executeToolCall( const { toolName, input, - autoInsertEndStepParam = false, excludeToolFromMessageHistory = false, fromHandleSteps = false, @@ -188,8 +157,24 @@ export function executeToolCall( toolCallId, input, }, - autoInsertEndStepParam, }) + + // Filter out restricted tools - emit error instead of tool call/result + // This prevents the CLI from showing tool calls that the agent doesn't have permission to use + if ( + toolCall.toolName && + !agentTemplate.toolNames.includes(toolCall.toolName) && + !fromHandleSteps + ) { + // Emit an error event instead of tool call/result pair + // The stream parser will convert this to a user message for proper API compliance + onResponseChunk({ + type: 'error', + message: `Tool \`${toolName}\` is not currently available. Make sure to only use tools provided at the start of the conversation AND that you most recently have permission to use.`, + }) + return previousToolCallFinished + } + if ('error' in toolCall) { const toolResult: ToolMessage = { role: 'tool', @@ -208,21 +193,6 @@ export function executeToolCall( return previousToolCallFinished } - // Filter out restricted tools - emit error instead of tool call/result - // This prevents the CLI from showing tool calls that the agent doesn't have permission to use - if ( - !agentTemplate.toolNames.includes(toolCall.toolName) && - !fromHandleSteps - ) { - // Emit an error event instead of tool call/result pair - // The stream parser will convert this to a user message for proper API compliance - onResponseChunk({ - type: 'error', - message: `Tool \`${toolName}\` is not currently available. Make sure to only use tools listed in the system instructions.`, - }) - return previousToolCallFinished - } - // Only emit tool_call event after permission check passes onResponseChunk({ type: 'tool_call', @@ -399,27 +369,11 @@ export async function executeCustomToolCall( }, autoInsertEndStepParam, }) - if ('error' in toolCall) { - const toolResult: ToolMessage = { - role: 'tool', - toolName, - toolCallId: toolCall.toolCallId, - content: jsonToolResult({ - errorMessage: toolCall.error, - }), - } - toolResults.push(cloneDeep(toolResult)) - toolResultsToAddAfterStream.push(cloneDeep(toolResult)) - logger.debug( - { toolCall, error: toolCall.error }, - `${toolName} error: ${toolCall.error}`, - ) - return previousToolCallFinished - } // Filter out restricted tools - emit error instead of tool call/result // This prevents the CLI from showing tool calls that the agent doesn't have permission to use if ( + toolCall.toolName && !(agentTemplate.toolNames as string[]).includes(toolCall.toolName) && !fromHandleSteps && !( @@ -436,6 +390,24 @@ export async function executeCustomToolCall( return previousToolCallFinished } + if ('error' in toolCall) { + const toolResult: ToolMessage = { + role: 'tool', + toolName, + toolCallId: toolCall.toolCallId, + content: jsonToolResult({ + errorMessage: toolCall.error, + }), + } + toolResults.push(cloneDeep(toolResult)) + toolResultsToAddAfterStream.push(cloneDeep(toolResult)) + logger.debug( + { toolCall, error: toolCall.error }, + `${toolName} error: ${toolCall.error}`, + ) + return previousToolCallFinished + } + // Only emit tool_call event after permission check passes onResponseChunk({ type: 'tool_call', diff --git a/sdk/src/run.ts b/sdk/src/run.ts index 0d3d3039a2..7e0b166cfe 100644 --- a/sdk/src/run.ts +++ b/sdk/src/run.ts @@ -833,7 +833,6 @@ export async function runOnce({ clientSessionId: promptId, userId, signal: signal ?? new AbortController().signal, - tools: {}, }).catch((error) => { // Let retryable errors and PaymentRequiredError propagate so the retry wrapper can handle them const isRetryable = isRetryableError(error) From e33c1224444c4f205719bf23c1752a929e4cc022 Mon Sep 17 00:00:00 2001 From: brandonkachen Date: Sun, 14 Dec 2025 22:35:23 -0800 Subject: [PATCH 016/885] fix(cli): revalidate auth when token changes - Key auth validation query by sha256(apiKey) to avoid stale cache and avoid putting the raw token in the cache key - Resolve CODEBUFF_API_KEY via CiEnv and make auth token resolution injectable --- .../hooks/__tests__/use-auth-query.test.ts | 22 ++++++++++ cli/src/hooks/use-auth-query.ts | 13 ++++-- cli/src/hooks/use-auth-state.ts | 40 +++++++++---------- cli/src/index.tsx | 13 +++--- cli/src/utils/auth.ts | 10 +++-- 5 files changed, 63 insertions(+), 35 deletions(-) create mode 100644 cli/src/hooks/__tests__/use-auth-query.test.ts diff --git a/cli/src/hooks/__tests__/use-auth-query.test.ts b/cli/src/hooks/__tests__/use-auth-query.test.ts new file mode 100644 index 0000000000..fd50c0a22b --- /dev/null +++ b/cli/src/hooks/__tests__/use-auth-query.test.ts @@ -0,0 +1,22 @@ +import { describe, expect, test } from 'bun:test' + +import { authQueryKeys } from '../use-auth-query' + +describe('authQueryKeys.validation', () => { + test('changes when api key changes', () => { + const firstKey = authQueryKeys.validation('token-1') + const secondKey = authQueryKeys.validation('token-2') + + expect(firstKey).not.toEqual(secondKey) + }) + + test('does not include the raw api key', () => { + const token = 'secret-token-123' + const key = authQueryKeys.validation(token) + const [, , apiKeyHash] = key + + expect(key).not.toContain(token) + expect(apiKeyHash).toMatch(/^[0-9a-f]{64}$/) + }) +}) + diff --git a/cli/src/hooks/use-auth-query.ts b/cli/src/hooks/use-auth-query.ts index 47ce531fdf..2063344629 100644 --- a/cli/src/hooks/use-auth-query.ts +++ b/cli/src/hooks/use-auth-query.ts @@ -1,4 +1,6 @@ -import { API_KEY_ENV_VAR } from '@codebuff/common/old-constants' +import { createHash } from 'crypto' + +import { getCiEnv } from '@codebuff/common/env-ci' import { AuthenticationError, ErrorCodes, @@ -22,12 +24,16 @@ import { logger as defaultLogger, loggerContext } from '../utils/logger' import type { GetUserInfoFromApiKeyFn } from '@codebuff/common/types/contracts/database' import type { Logger } from '@codebuff/common/types/contracts/logger' +const getApiKeyHash = (apiKey: string): string => { + return createHash('sha256').update(apiKey).digest('hex') +} + // Query keys for type-safe cache management export const authQueryKeys = { all: ['auth'] as const, user: () => [...authQueryKeys.all, 'user'] as const, validation: (apiKey: string) => - [...authQueryKeys.all, 'validation', apiKey] as const, + [...authQueryKeys.all, 'validation', getApiKeyHash(apiKey)] as const, } interface ValidateAuthParams { @@ -122,8 +128,7 @@ export function useAuthQuery(deps: UseAuthQueryDeps = {}) { } = deps const userCredentials = getUserCredentials() - const apiKey = - userCredentials?.authToken || process.env[API_KEY_ENV_VAR] || '' + const apiKey = userCredentials?.authToken || getCiEnv().CODEBUFF_API_KEY || '' return useQuery({ queryKey: authQueryKeys.validation(apiKey), diff --git a/cli/src/hooks/use-auth-state.ts b/cli/src/hooks/use-auth-state.ts index d356c5aff9..ee6c41329e 100644 --- a/cli/src/hooks/use-auth-state.ts +++ b/cli/src/hooks/use-auth-state.ts @@ -10,6 +10,17 @@ import { loggerContext } from '../utils/logger' import type { MultilineInputHandle } from '../components/multiline-input' import type { User } from '../utils/auth' +const setAuthLoggerContext = (params: { userId: string; email: string }) => { + loggerContext.userId = params.userId + loggerContext.userEmail = params.email + identifyUser(params.userId, { email: params.email }) +} + +const clearAuthLoggerContext = () => { + delete loggerContext.userId + delete loggerContext.userEmail +} + interface UseAuthStateOptions { requireAuth: boolean | null inputRef: React.MutableRefObject @@ -27,8 +38,7 @@ export const useAuthState = ({ const logoutMutation = useLogoutMutation() const { resetLoginState } = useLoginStore() - const initialAuthState = - requireAuth === false ? true : requireAuth === true ? false : null + const initialAuthState = requireAuth === null ? null : !requireAuth const [isAuthenticated, setIsAuthenticated] = useState( initialAuthState, ) @@ -55,23 +65,15 @@ export const useAuthState = ({ authToken: userCredentials?.authToken || '', } setUser(userData) - - // Set logger context for analytics - loggerContext.userId = authQuery.data.id - loggerContext.userEmail = authQuery.data.email - - // Identify user with PostHog - identifyUser(authQuery.data.id, { - email: authQuery.data.email, + setAuthLoggerContext({ + userId: authQuery.data.id, + email: authQuery.data.email || '', }) } } else if (authQuery.isError) { setIsAuthenticated(false) setUser(null) - - // Clear logger context on auth error - delete loggerContext.userId - delete loggerContext.userEmail + clearAuthLoggerContext() } }, [authQuery.isSuccess, authQuery.isError, authQuery.data, user]) @@ -85,14 +87,10 @@ export const useAuthState = ({ setInputFocused(true) setUser(loggedInUser) setIsAuthenticated(true) - - // Set logger context for analytics + if (loggedInUser.id && loggedInUser.email) { - loggerContext.userId = loggedInUser.id - loggerContext.userEmail = loggedInUser.email - - // Identify user with PostHog - identifyUser(loggedInUser.id, { + setAuthLoggerContext({ + userId: loggedInUser.id, email: loggedInUser.email, }) } diff --git a/cli/src/index.tsx b/cli/src/index.tsx index cc42b562f0..e6bdc3a2b8 100644 --- a/cli/src/index.tsx +++ b/cli/src/index.tsx @@ -3,7 +3,6 @@ import { promises as fs } from 'fs' import { createRequire } from 'module' -import { API_KEY_ENV_VAR } from '@codebuff/common/old-constants' import { getProjectFileTree } from '@codebuff/common/project-file-tree' import { createCliRenderer } from '@opentui/core' import { createRoot } from '@opentui/react' @@ -21,7 +20,8 @@ import { handlePublish } from './commands/publish' import { initializeApp } from './init/init-app' import { getProjectRoot } from './project-files' import { initAnalytics } from './utils/analytics' -import { getUserCredentials } from './utils/auth' +import { getAuthTokenDetails } from './utils/auth' +import { getCliEnv } from './utils/env' import { initializeAgentRegistry } from './utils/local-agent-registry' import { clearLogFile, logger } from './utils/logger' import { detectTerminalTheme } from './utils/terminal-color-detection' @@ -33,8 +33,9 @@ import type { AgentMode } from './utils/constants' const require = createRequire(import.meta.url) function loadPackageVersion(): string { - if (process.env.CODEBUFF_CLI_VERSION) { - return process.env.CODEBUFF_CLI_VERSION + const env = getCliEnv() + if (env.CODEBUFF_CLI_VERSION) { + return env.CODEBUFF_CLI_VERSION } try { @@ -215,9 +216,7 @@ async function main(): Promise { const [fileTree, setFileTree] = React.useState([]) React.useEffect(() => { - const userCredentials = getUserCredentials() - const apiKey = - userCredentials?.authToken || process.env[API_KEY_ENV_VAR] || '' + const apiKey = getAuthTokenDetails().token ?? '' if (!apiKey) { setRequireAuth(true) diff --git a/cli/src/utils/auth.ts b/cli/src/utils/auth.ts index 2261a52ee1..04c98e73b3 100644 --- a/cli/src/utils/auth.ts +++ b/cli/src/utils/auth.ts @@ -2,10 +2,12 @@ import fs from 'fs' import os from 'os' import path from 'path' +import { getCiEnv } from '@codebuff/common/env-ci' import { env } from '@codebuff/common/env' -import { API_KEY_ENV_VAR } from '@codebuff/common/old-constants' import { z } from 'zod' +import type { CiEnv } from '@codebuff/common/types/contracts/env' + import { getApiClient, setApiClientAuthToken } from './codebuff-api' import { logger } from './logger' @@ -107,13 +109,15 @@ export interface AuthTokenDetails { /** * Resolve the auth token and track where it came from. */ -export const getAuthTokenDetails = (): AuthTokenDetails => { +export const getAuthTokenDetails = ( + ciEnv: CiEnv = getCiEnv(), +): AuthTokenDetails => { const userCredentials = getUserCredentials() if (userCredentials?.authToken) { return { token: userCredentials.authToken, source: 'credentials' } } - const envToken = process.env[API_KEY_ENV_VAR] + const envToken = ciEnv.CODEBUFF_API_KEY if (envToken) { return { token: envToken, source: 'environment' } } From edd498a01b3bb05dea7cc36367facf8e6c567f10 Mon Sep 17 00:00:00 2001 From: brandonkachen Date: Sun, 14 Dec 2025 22:35:29 -0800 Subject: [PATCH 017/885] refactor(web): extract v1 route helpers - Add shared helpers for JSON parsing, auth, and credit charging - Refactor docs-search and web-search routes to inject typed server env deps --- .../src/llm-api/codebuff-web-api.ts | 190 ++++++++--------- .../agent-runtime/src/llm-api/linkup-api.ts | 34 ++- web/src/app/api/v1/_helpers.ts | 201 ++++++++++++++++++ web/src/app/api/v1/docs-search/_post.ts | 138 ++++-------- web/src/app/api/v1/web-search/_post.ts | 140 ++++-------- 5 files changed, 383 insertions(+), 320 deletions(-) create mode 100644 web/src/app/api/v1/_helpers.ts diff --git a/packages/agent-runtime/src/llm-api/codebuff-web-api.ts b/packages/agent-runtime/src/llm-api/codebuff-web-api.ts index 697a8ab740..34bb75bfbe 100644 --- a/packages/agent-runtime/src/llm-api/codebuff-web-api.ts +++ b/packages/agent-runtime/src/llm-api/codebuff-web-api.ts @@ -9,17 +9,39 @@ interface CodebuffWebApiEnv { ciEnv: CiEnv } -export async function callWebSearchAPI(params: { - query: string - depth?: 'standard' | 'deep' - repoUrl?: string | null +const tryParseJson = (text: string): unknown => { + try { + return JSON.parse(text) + } catch { + return null + } +} + +const getStringField = (value: unknown, key: string): string | undefined => { + if (!value || typeof value !== 'object') return undefined + const record = value as Record + const field = record[key] + return typeof field === 'string' ? field : undefined +} + +const getNumberField = (value: unknown, key: string): number | undefined => { + if (!value || typeof value !== 'object') return undefined + const record = value as Record + const field = record[key] + return typeof field === 'number' ? field : undefined +} + +const callCodebuffV1 = async (params: { + endpoint: '/api/v1/web-search' | '/api/v1/docs-search' + payload: unknown fetch: typeof globalThis.fetch logger: Logger env: CodebuffWebApiEnv baseUrl?: string apiKey?: string -}): Promise<{ result?: string; error?: string; creditsUsed?: number }> { - const { query, depth = 'standard', repoUrl, fetch, logger, env } = params + requestName: 'web-search' | 'docs-search' +}): Promise<{ json?: unknown; error?: string; creditsUsed?: number }> => { + const { endpoint, payload, fetch, logger, env, requestName } = params const baseUrl = params.baseUrl ?? env.clientEnv.NEXT_PUBLIC_CODEBUFF_APP_URL const apiKey = params.apiKey ?? env.ciEnv.CODEBUFF_API_KEY @@ -27,8 +49,7 @@ export async function callWebSearchAPI(params: { return { error: 'Missing Codebuff base URL or API key' } } - const url = `${baseUrl}/api/v1/web-search` - const payload = { query, depth, ...(repoUrl ? { repoUrl } : {}) } + const url = `${baseUrl}${endpoint}` try { const res = await withTimeout( @@ -45,18 +66,14 @@ export async function callWebSearchAPI(params: { ) const text = await res.text() - const tryJson = () => { - try { - return JSON.parse(text) - } catch { - return null - } - } + const json = tryParseJson(text) if (!res.ok) { - const maybe = tryJson() const err = - (maybe && (maybe.error || maybe.message)) || text || 'Request failed' + getStringField(json, 'error') ?? + getStringField(json, 'message') ?? + text ?? + 'Request failed' logger.warn( { url, @@ -64,21 +81,12 @@ export async function callWebSearchAPI(params: { statusText: res.statusText, body: text?.slice(0, 500), }, - 'Web API web-search request failed', + `Web API ${requestName} request failed`, ) - return { error: typeof err === 'string' ? err : 'Unknown error' } + return { error: err } } - const data = tryJson() - if (data && typeof data.result === 'string') { - return { - result: data.result, - creditsUsed: - typeof data.creditsUsed === 'number' ? data.creditsUsed : undefined, - } - } - if (data && typeof data.error === 'string') return { error: data.error } - return { error: 'Invalid response format' } + return { json, creditsUsed: getNumberField(json, 'creditsUsed') } } catch (error) { logger.error( { @@ -87,12 +95,46 @@ export async function callWebSearchAPI(params: { ? { name: error.name, message: error.message, stack: error.stack } : error, }, - 'Web API web-search network error', + `Web API ${requestName} network error`, ) return { error: error instanceof Error ? error.message : 'Network error' } } } +export async function callWebSearchAPI(params: { + query: string + depth?: 'standard' | 'deep' + repoUrl?: string | null + fetch: typeof globalThis.fetch + logger: Logger + env: CodebuffWebApiEnv + baseUrl?: string + apiKey?: string +}): Promise<{ result?: string; error?: string; creditsUsed?: number }> { + const { query, depth = 'standard', repoUrl, fetch, logger, env } = params + const payload = { query, depth, ...(repoUrl ? { repoUrl } : {}) } + + const res = await callCodebuffV1({ + endpoint: '/api/v1/web-search', + payload, + fetch, + logger, + env, + baseUrl: params.baseUrl, + apiKey: params.apiKey, + requestName: 'web-search', + }) + if (res.error) return { error: res.error } + + const result = getStringField(res.json, 'result') + if (result) { + return { result, creditsUsed: res.creditsUsed } + } + + const error = getStringField(res.json, 'error') + return { error: error ?? 'Invalid response format' } +} + export async function callDocsSearchAPI(params: { libraryTitle: string topic?: string @@ -105,78 +147,28 @@ export async function callDocsSearchAPI(params: { apiKey?: string }): Promise<{ documentation?: string; error?: string; creditsUsed?: number }> { const { libraryTitle, topic, maxTokens, repoUrl, fetch, logger, env } = params - const baseUrl = params.baseUrl ?? env.clientEnv.NEXT_PUBLIC_CODEBUFF_APP_URL - const apiKey = params.apiKey ?? env.ciEnv.CODEBUFF_API_KEY - - if (!baseUrl || !apiKey) { - return { error: 'Missing Codebuff base URL or API key' } - } - - const url = `${baseUrl}/api/v1/docs-search` - const payload: Record = { libraryTitle } + const payload: Record = { libraryTitle } if (topic) payload.topic = topic if (typeof maxTokens === 'number') payload.maxTokens = maxTokens if (repoUrl) payload.repoUrl = repoUrl - try { - const res = await withTimeout( - fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${apiKey}`, - 'x-codebuff-api-key': apiKey, - }, - body: JSON.stringify(payload), - }), - FETCH_TIMEOUT_MS, - ) - - const text = await res.text() - const tryJson = () => { - try { - return JSON.parse(text) as any - } catch { - return null - } - } - - if (!res.ok) { - const maybe = tryJson() - const err = - (maybe && (maybe.error || maybe.message)) || text || 'Request failed' - logger.warn( - { - url, - status: res.status, - statusText: res.statusText, - body: text?.slice(0, 500), - }, - 'Web API docs-search request failed', - ) - return { error: typeof err === 'string' ? err : 'Unknown error' } - } - - const data = tryJson() - if (data && typeof data.documentation === 'string') { - return { - documentation: data.documentation, - creditsUsed: - typeof data.creditsUsed === 'number' ? data.creditsUsed : undefined, - } - } - if (data && typeof data.error === 'string') return { error: data.error } - return { error: 'Invalid response format' } - } catch (error) { - logger.error( - { - error: - error instanceof Error - ? { name: error.name, message: error.message, stack: error.stack } - : error, - }, - 'Web API docs-search network error', - ) - return { error: error instanceof Error ? error.message : 'Network error' } + const res = await callCodebuffV1({ + endpoint: '/api/v1/docs-search', + payload, + fetch, + logger, + env, + baseUrl: params.baseUrl, + apiKey: params.apiKey, + requestName: 'docs-search', + }) + if (res.error) return { error: res.error } + + const documentation = getStringField(res.json, 'documentation') + if (documentation) { + return { documentation, creditsUsed: res.creditsUsed } } + + const error = getStringField(res.json, 'error') + return { error: error ?? 'Invalid response format' } } diff --git a/packages/agent-runtime/src/llm-api/linkup-api.ts b/packages/agent-runtime/src/llm-api/linkup-api.ts index 41c5d6b956..accf964d41 100644 --- a/packages/agent-runtime/src/llm-api/linkup-api.ts +++ b/packages/agent-runtime/src/llm-api/linkup-api.ts @@ -2,7 +2,7 @@ import { withTimeout } from '@codebuff/common/util/promise' import type { Logger } from '@codebuff/common/types/contracts/logger' -interface LinkupEnv { +export interface LinkupEnv { LINKUP_API_KEY: string } @@ -20,12 +20,14 @@ export interface LinkupSearchResponse { sources: LinkupSearchResult[] } -/** - * Searches the web using Linkup API - * @param query The search query - * @param options Search options including depth and max results - * @returns Array containing a single result with the sourced answer or null if the request fails - */ +const headersToRecord = (headers: Headers): Record => { + const record: Record = {} + headers.forEach((value, key) => { + record[key] = value + }) + return record +} + export async function searchWeb(options: { query: string depth?: 'standard' | 'deep' @@ -57,7 +59,7 @@ export async function searchWeb(options: { try { const fetchStartTime = Date.now() const response = await withTimeout( - fetch(`${LINKUP_API_BASE_URL}/search`, { + fetch(requestUrl, { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -92,15 +94,7 @@ export async function searchWeb(options: { responseBody: responseBody.substring(0, 500), // Truncate long responses fetchDuration, totalDuration: Date.now() - apiStartTime, - headers: response.headers - ? (() => { - const headerObj: Record = {} - response.headers.forEach((value, key) => { - headerObj[key] = value - }) - return headerObj - })() - : 'No headers', + headers: headersToRecord(response.headers), }, `Request failed with ${response.status}: ${response.statusText}`, ) @@ -108,10 +102,11 @@ export async function searchWeb(options: { } let data: LinkupSearchResponse + let parseDuration = 0 try { const parseStartTime = Date.now() data = (await response.json()) as LinkupSearchResponse - const parseDuration = Date.now() - parseStartTime + parseDuration = Date.now() - parseStartTime } catch (jsonError) { logger.error( { @@ -124,6 +119,7 @@ export async function searchWeb(options: { } : jsonError, fetchDuration, + parseDuration, totalDuration: Date.now() - apiStartTime, status: response.status, statusText: response.statusText, @@ -142,6 +138,7 @@ export async function searchWeb(options: { answerLength: data?.answer?.length || 0, sourcesCount: data?.sources?.length || 0, fetchDuration, + parseDuration, totalDuration: Date.now() - apiStartTime, }, 'Invalid response format - missing or invalid answer field', @@ -156,6 +153,7 @@ export async function searchWeb(options: { answerLength: data.answer.length, sourcesCount: data.sources?.length || 0, fetchDuration, + parseDuration, totalDuration, success: true, }, diff --git a/web/src/app/api/v1/_helpers.ts b/web/src/app/api/v1/_helpers.ts new file mode 100644 index 0000000000..ac705ac46d --- /dev/null +++ b/web/src/app/api/v1/_helpers.ts @@ -0,0 +1,201 @@ +import { NextResponse } from 'next/server' +import type { ZodType } from 'zod' + +import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events' +import { extractApiKeyFromHeader } from '@/util/auth' + +import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics' +import type { + ConsumeCreditsWithFallbackFn, + GetUserUsageDataFn, +} from '@codebuff/common/types/contracts/billing' +import type { GetUserInfoFromApiKeyFn } from '@codebuff/common/types/contracts/database' +import type { Logger, LoggerWithContextFn } from '@codebuff/common/types/contracts/logger' +import type { NextRequest } from 'next/server' + +export type HandlerResult = + | { ok: true; data: T } + | { ok: false; response: NextResponse } + +export const parseJsonBody = async (params: { + req: NextRequest + schema: ZodType + logger: Logger + trackEvent: TrackEventFn + validationErrorEvent: AnalyticsEvent +}): Promise> => { + const { req, schema, logger, trackEvent, validationErrorEvent } = params + + let json: unknown + try { + json = await req.json() + } catch { + trackEvent({ + event: validationErrorEvent, + userId: 'unknown', + properties: { error: 'Invalid JSON' }, + logger, + }) + return { + ok: false, + response: NextResponse.json( + { error: 'Invalid JSON in request body' }, + { status: 400 }, + ), + } + } + + const parsed = schema.safeParse(json) + if (!parsed.success) { + trackEvent({ + event: validationErrorEvent, + userId: 'unknown', + properties: { issues: parsed.error.format() }, + logger, + }) + return { + ok: false, + response: NextResponse.json( + { error: 'Invalid request body', details: parsed.error.format() }, + { status: 400 }, + ), + } + } + + return { ok: true, data: parsed.data } +} + +export const requireUserFromApiKey = async (params: { + req: NextRequest + getUserInfoFromApiKey: GetUserInfoFromApiKeyFn + logger: Logger + loggerWithContext: LoggerWithContextFn + trackEvent: TrackEventFn + authErrorEvent: AnalyticsEvent +}): Promise< + HandlerResult<{ userId: string; userInfo: any; logger: Logger }> +> => { + const { + req, + getUserInfoFromApiKey, + logger: baseLogger, + loggerWithContext, + trackEvent, + authErrorEvent, + } = params + + const apiKey = extractApiKeyFromHeader(req) + if (!apiKey) { + trackEvent({ + event: authErrorEvent, + userId: 'unknown', + properties: { reason: 'Missing API key' }, + logger: baseLogger, + }) + return { + ok: false, + response: NextResponse.json({ message: 'Unauthorized' }, { status: 401 }), + } + } + + const userInfo = await getUserInfoFromApiKey({ + apiKey, + fields: ['id', 'email', 'discord_id'], + logger: baseLogger, + }) + if (!userInfo) { + trackEvent({ + event: authErrorEvent, + userId: 'unknown', + properties: { reason: 'Invalid API key' }, + logger: baseLogger, + }) + return { + ok: false, + response: NextResponse.json( + { message: 'Invalid Codebuff API key' }, + { status: 401 }, + ), + } + } + + const logger = loggerWithContext({ userInfo }) + return { ok: true, data: { userId: userInfo.id, userInfo, logger } } +} + +export const checkCreditsAndCharge = async (params: { + userId: string + creditsToCharge: number + repoUrl?: string + context: string + operationName?: string + logger: Logger + trackEvent: TrackEventFn + insufficientCreditsEvent: AnalyticsEvent + getUserUsageData: GetUserUsageDataFn + consumeCreditsWithFallback: ConsumeCreditsWithFallbackFn +}): Promise> => { + const { + userId, + creditsToCharge, + repoUrl, + context, + operationName, + logger, + trackEvent, + insufficientCreditsEvent, + getUserUsageData, + consumeCreditsWithFallback, + } = params + + const { + balance: { totalRemaining }, + nextQuotaReset, + } = await getUserUsageData({ userId, logger }) + + if (totalRemaining <= 0 || totalRemaining < creditsToCharge) { + trackEvent({ + event: insufficientCreditsEvent, + userId, + properties: { totalRemaining, required: creditsToCharge, nextQuotaReset }, + logger, + }) + return { + ok: false, + response: NextResponse.json( + { + message: 'Insufficient credits', + totalRemaining, + required: creditsToCharge, + nextQuotaReset, + }, + { status: 402 }, + ), + } + } + + const chargeResult = await consumeCreditsWithFallback({ + userId, + creditsToCharge, + repoUrl, + context, + logger, + }) + + if (!chargeResult.success) { + const name = operationName ?? context + logger.error( + { userId, creditsToCharge, error: chargeResult.error }, + `Failed to charge credits for ${name}`, + ) + return { + ok: false, + response: NextResponse.json( + { error: 'Failed to charge credits' }, + { status: 500 }, + ), + } + } + + return { ok: true, data: { creditsUsed: creditsToCharge } } +} diff --git a/web/src/app/api/v1/docs-search/_post.ts b/web/src/app/api/v1/docs-search/_post.ts index cdf4703945..fa0b413c3d 100644 --- a/web/src/app/api/v1/docs-search/_post.ts +++ b/web/src/app/api/v1/docs-search/_post.ts @@ -3,6 +3,11 @@ import { PROFIT_MARGIN } from '@codebuff/common/old-constants' import { NextResponse } from 'next/server' import { z } from 'zod' +import { + checkCreditsAndCharge, + parseJsonBody, + requireUserFromApiKey, +} from '../_helpers' import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics' import type { GetUserUsageDataFn, @@ -16,7 +21,6 @@ import type { import type { NextRequest } from 'next/server' import { fetchContext7LibraryDocumentation } from '@codebuff/agent-runtime/llm-api/context7-api' -import { extractApiKeyFromHeader } from '@/util/auth' const bodySchema = z.object({ libraryTitle: z.string().min(1, 'libraryTitle is required'), @@ -44,72 +48,30 @@ export async function postDocsSearch(params: { consumeCreditsWithFallback, fetch, } = params - let { logger } = params + const baseLogger = params.logger - // Parse JSON body - let json: unknown - try { - json = await req.json() - } catch (e) { - trackEvent({ - event: AnalyticsEvent.DOCS_SEARCH_VALIDATION_ERROR, - userId: 'unknown', - properties: { error: 'Invalid JSON' }, - logger, - }) - return NextResponse.json( - { error: 'Invalid JSON in request body' }, - { status: 400 }, - ) - } - - // Validate body - const parsed = bodySchema.safeParse(json) - if (!parsed.success) { - trackEvent({ - event: AnalyticsEvent.DOCS_SEARCH_VALIDATION_ERROR, - userId: 'unknown', - properties: { issues: parsed.error.format() }, - logger, - }) - return NextResponse.json( - { error: 'Invalid request body', details: parsed.error.format() }, - { status: 400 }, - ) - } - const { libraryTitle, topic, maxTokens, repoUrl } = parsed.data + const parsedBody = await parseJsonBody({ + req, + schema: bodySchema, + logger: baseLogger, + trackEvent, + validationErrorEvent: AnalyticsEvent.DOCS_SEARCH_VALIDATION_ERROR, + }) + if (!parsedBody.ok) return parsedBody.response - // Auth - const apiKey = extractApiKeyFromHeader(req) - if (!apiKey) { - trackEvent({ - event: AnalyticsEvent.DOCS_SEARCH_AUTH_ERROR, - userId: 'unknown', - properties: { reason: 'Missing API key' }, - logger, - }) - return NextResponse.json({ message: 'Unauthorized' }, { status: 401 }) - } + const { libraryTitle, topic, maxTokens, repoUrl } = parsedBody.data - const userInfo = await getUserInfoFromApiKey({ - apiKey, - fields: ['id', 'email', 'discord_id'], - logger, + const authed = await requireUserFromApiKey({ + req, + getUserInfoFromApiKey, + logger: baseLogger, + loggerWithContext, + trackEvent, + authErrorEvent: AnalyticsEvent.DOCS_SEARCH_AUTH_ERROR, }) - if (!userInfo) { - trackEvent({ - event: AnalyticsEvent.DOCS_SEARCH_AUTH_ERROR, - userId: 'unknown', - properties: { reason: 'Invalid API key' }, - logger, - }) - return NextResponse.json( - { message: 'Invalid Codebuff API key' }, - { status: 401 }, - ) - } - logger = loggerWithContext({ userInfo }) - const userId = userInfo.id + if (!authed.ok) return authed.response + + const { userId, logger } = authed.data // Track request trackEvent({ @@ -123,47 +85,19 @@ export async function postDocsSearch(params: { const baseCost = 1 const creditsToCharge = Math.round(baseCost * (1 + PROFIT_MARGIN)) - // Check credits - const { - balance: { totalRemaining }, - nextQuotaReset, - } = await getUserUsageData({ userId, logger }) - if (totalRemaining <= 0 || totalRemaining < creditsToCharge) { - trackEvent({ - event: AnalyticsEvent.DOCS_SEARCH_INSUFFICIENT_CREDITS, - userId, - properties: { totalRemaining, required: creditsToCharge, nextQuotaReset }, - logger, - }) - return NextResponse.json( - { - message: 'Insufficient credits', - totalRemaining, - required: creditsToCharge, - nextQuotaReset, - }, - { status: 402 }, - ) - } - - // Charge upfront with delegation fallback - const chargeResult = await consumeCreditsWithFallback({ + const credits = await checkCreditsAndCharge({ userId, creditsToCharge, repoUrl, context: 'documentation lookup', + operationName: 'docs search', logger, + trackEvent, + insufficientCreditsEvent: AnalyticsEvent.DOCS_SEARCH_INSUFFICIENT_CREDITS, + getUserUsageData, + consumeCreditsWithFallback, }) - if (!chargeResult.success) { - logger.error( - { userId, creditsToCharge, error: chargeResult.error }, - 'Failed to charge credits for docs search', - ) - return NextResponse.json( - { error: 'Failed to charge credits' }, - { status: 500 }, - ) - } + if (!credits.ok) return credits.response // Perform docs fetch try { @@ -182,15 +116,19 @@ export async function postDocsSearch(params: { properties: { reason: 'No documentation' }, logger, }) + const topicSuffix = topic ? ` with topic "${topic}"` : '' return NextResponse.json( { - error: `No documentation found for "${libraryTitle}"${topic ? ` with topic "${topic}"` : ''}`, + error: `No documentation found for "${libraryTitle}"${topicSuffix}`, }, { status: 200 }, ) } - return NextResponse.json({ documentation, creditsUsed: creditsToCharge }) + return NextResponse.json({ + documentation, + creditsUsed: credits.data.creditsUsed, + }) } catch (error) { logger.error( { diff --git a/web/src/app/api/v1/web-search/_post.ts b/web/src/app/api/v1/web-search/_post.ts index d851f5b9ec..9b7552183b 100644 --- a/web/src/app/api/v1/web-search/_post.ts +++ b/web/src/app/api/v1/web-search/_post.ts @@ -3,6 +3,11 @@ import { PROFIT_MARGIN } from '@codebuff/common/old-constants' import { NextResponse } from 'next/server' import { z } from 'zod' +import { + checkCreditsAndCharge, + parseJsonBody, + requireUserFromApiKey, +} from '../_helpers' import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics' import type { GetUserUsageDataFn, @@ -16,11 +21,8 @@ import type { import type { NextRequest } from 'next/server' import { searchWeb } from '@codebuff/agent-runtime/llm-api/linkup-api' -import { extractApiKeyFromHeader } from '@/util/auth' -interface WebSearchEnvDeps { - LINKUP_API_KEY: string -} +import type { LinkupEnv } from '@codebuff/agent-runtime/llm-api/linkup-api' const bodySchema = z.object({ query: z.string().min(1, 'query is required'), @@ -37,7 +39,7 @@ export async function postWebSearch(params: { getUserUsageData: GetUserUsageDataFn consumeCreditsWithFallback: ConsumeCreditsWithFallbackFn fetch: typeof globalThis.fetch - serverEnv: WebSearchEnvDeps + serverEnv: LinkupEnv }) { const { req, @@ -49,72 +51,30 @@ export async function postWebSearch(params: { fetch, serverEnv, } = params - let { logger } = params - - // Parse JSON body - let json: unknown - try { - json = await req.json() - } catch (e) { - trackEvent({ - event: AnalyticsEvent.WEB_SEARCH_VALIDATION_ERROR, - userId: 'unknown', - properties: { error: 'Invalid JSON' }, - logger, - }) - return NextResponse.json( - { error: 'Invalid JSON in request body' }, - { status: 400 }, - ) - } + const baseLogger = params.logger - // Validate body - const parsed = bodySchema.safeParse(json) - if (!parsed.success) { - trackEvent({ - event: AnalyticsEvent.WEB_SEARCH_VALIDATION_ERROR, - userId: 'unknown', - properties: { issues: parsed.error.format() }, - logger, - }) - return NextResponse.json( - { error: 'Invalid request body', details: parsed.error.format() }, - { status: 400 }, - ) - } - const { query, depth, repoUrl } = parsed.data + const parsedBody = await parseJsonBody({ + req, + schema: bodySchema, + logger: baseLogger, + trackEvent, + validationErrorEvent: AnalyticsEvent.WEB_SEARCH_VALIDATION_ERROR, + }) + if (!parsedBody.ok) return parsedBody.response - // Auth - const apiKey = extractApiKeyFromHeader(req) - if (!apiKey) { - trackEvent({ - event: AnalyticsEvent.WEB_SEARCH_AUTH_ERROR, - userId: 'unknown', - properties: { reason: 'Missing API key' }, - logger, - }) - return NextResponse.json({ message: 'Unauthorized' }, { status: 401 }) - } + const { query, depth, repoUrl } = parsedBody.data - const userInfo = await getUserInfoFromApiKey({ - apiKey, - fields: ['id', 'email', 'discord_id'], - logger, + const authed = await requireUserFromApiKey({ + req, + getUserInfoFromApiKey, + logger: baseLogger, + loggerWithContext, + trackEvent, + authErrorEvent: AnalyticsEvent.WEB_SEARCH_AUTH_ERROR, }) - if (!userInfo) { - trackEvent({ - event: AnalyticsEvent.WEB_SEARCH_AUTH_ERROR, - userId: 'unknown', - properties: { reason: 'Invalid API key' }, - logger, - }) - return NextResponse.json( - { message: 'Invalid Codebuff API key' }, - { status: 401 }, - ) - } - logger = loggerWithContext({ userInfo }) - const userId = userInfo.id + if (!authed.ok) return authed.response + + const { userId, logger } = authed.data // Track request trackEvent({ @@ -124,50 +84,21 @@ export async function postWebSearch(params: { logger, }) - // Check credits (pre-check) - const { - balance: { totalRemaining }, - nextQuotaReset, - } = await getUserUsageData({ userId, logger }) const baseCost = depth === 'deep' ? 5 : 1 const creditsToCharge = Math.round(baseCost * (1 + PROFIT_MARGIN)) - if (totalRemaining <= 0 || totalRemaining < creditsToCharge) { - trackEvent({ - event: AnalyticsEvent.WEB_SEARCH_INSUFFICIENT_CREDITS, - userId, - properties: { totalRemaining, required: creditsToCharge, nextQuotaReset }, - logger, - }) - return NextResponse.json( - { - message: 'Insufficient credits', - totalRemaining, - required: creditsToCharge, - nextQuotaReset, - }, - { status: 402 }, - ) - } - - // Charge credits upfront with delegation fallback - const chargeResult = await consumeCreditsWithFallback({ + const credits = await checkCreditsAndCharge({ userId, creditsToCharge, repoUrl, context: 'web search', logger, + trackEvent, + insufficientCreditsEvent: AnalyticsEvent.WEB_SEARCH_INSUFFICIENT_CREDITS, + getUserUsageData, + consumeCreditsWithFallback, }) - if (!chargeResult.success) { - logger.error( - { userId, creditsToCharge, error: chargeResult.error }, - 'Failed to charge credits for web search', - ) - return NextResponse.json( - { error: 'Failed to charge credits' }, - { status: 500 }, - ) - } + if (!credits.ok) return credits.response // Perform search try { @@ -186,7 +117,10 @@ export async function postWebSearch(params: { ) } - return NextResponse.json({ result, creditsUsed: creditsToCharge }) + return NextResponse.json({ + result, + creditsUsed: credits.data.creditsUsed, + }) } catch (error) { logger.error( { From 2a263bd20c63bdc8a1d7496d4d874b75c52407b1 Mon Sep 17 00:00:00 2001 From: brandonkachen Date: Sun, 14 Dec 2025 22:35:36 -0800 Subject: [PATCH 018/885] refactor(analytics): inject client env config Avoid importing client env at module import time; callers inject the needed NEXT_PUBLIC_* config. --- common/src/analytics.ts | 73 +++++++++++++++++++++++++------------- web/src/lib/server-init.ts | 2 ++ 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/common/src/analytics.ts b/common/src/analytics.ts index 7a647cd9b5..7954764a02 100644 --- a/common/src/analytics.ts +++ b/common/src/analytics.ts @@ -1,41 +1,66 @@ import { PostHog } from 'posthog-node' -import type { ClientEnv } from '@codebuff/common/types/contracts/env' import type { AnalyticsEvent } from './constants/analytics-events' import type { Logger } from '@codebuff/common/types/contracts/logger' +import type { ClientEnv } from './env-schema' let client: PostHog | undefined -// Lazy load env to avoid validation at import time in test environments -let _cachedEnv: ClientEnv | undefined -let _cachedIsProd: boolean | undefined +type EnvName = 'dev' | 'test' | 'prod' -const getEnv = (): ClientEnv => { - if (_cachedEnv === undefined) { - _cachedEnv = require('@codebuff/common/env').env as ClientEnv - } - return _cachedEnv +type AnalyticsConfig = { + envName: EnvName + posthogApiKey: string + posthogHostUrl: string } -const getIsProd = (): boolean => { - if (_cachedIsProd === undefined) { - _cachedIsProd = require('@codebuff/common/env').IS_PROD as boolean - } - return _cachedIsProd +let analyticsConfig: AnalyticsConfig | null = null + +export const configureAnalytics = (config: AnalyticsConfig | null) => { + analyticsConfig = config + client = undefined +} + +const getConfigFromClientEnv = ( + clientEnv: Pick< + ClientEnv, + | 'NEXT_PUBLIC_CB_ENVIRONMENT' + | 'NEXT_PUBLIC_POSTHOG_API_KEY' + | 'NEXT_PUBLIC_POSTHOG_HOST_URL' + >, +): AnalyticsConfig | null => { + const envName = clientEnv.NEXT_PUBLIC_CB_ENVIRONMENT + const posthogApiKey = clientEnv.NEXT_PUBLIC_POSTHOG_API_KEY + const posthogHostUrl = clientEnv.NEXT_PUBLIC_POSTHOG_HOST_URL + + if (!envName) return null + if (!posthogApiKey || !posthogHostUrl) return null + + return { envName, posthogApiKey, posthogHostUrl } } -export function initAnalytics({ logger }: { logger: Logger }) { - const env = getEnv() - if (!env.NEXT_PUBLIC_POSTHOG_API_KEY || !env.NEXT_PUBLIC_POSTHOG_HOST_URL) { - logger.warn( - 'Analytics environment variables not set - analytics will be disabled', - ) +export function initAnalytics({ + logger, + clientEnv, +}: { + logger: Logger + clientEnv?: Parameters[0] +}) { + if (clientEnv) { + configureAnalytics(getConfigFromClientEnv(clientEnv)) + } + + if (analyticsConfig?.envName !== 'prod') { + return + } + + if (!analyticsConfig) { return } try { - client = new PostHog(env.NEXT_PUBLIC_POSTHOG_API_KEY, { - host: env.NEXT_PUBLIC_POSTHOG_HOST_URL, + client = new PostHog(analyticsConfig.posthogApiKey, { + host: analyticsConfig.posthogHostUrl, flushAt: 1, flushInterval: 0, }) @@ -50,7 +75,7 @@ export async function flushAnalytics() { } try { await client.flush() - } catch (error) {} + } catch {} } export function trackEvent({ @@ -64,7 +89,7 @@ export function trackEvent({ properties?: Record logger: Logger }) { - if (!getIsProd()) { + if (analyticsConfig?.envName !== 'prod') { // Note (James): This log was too noisy. Reenable it as you need to test something. // logger.info({ payload: { event, properties } }, event) return diff --git a/web/src/lib/server-init.ts b/web/src/lib/server-init.ts index 535309dda2..7a77ffd47e 100644 --- a/web/src/lib/server-init.ts +++ b/web/src/lib/server-init.ts @@ -1,4 +1,5 @@ import { initAnalytics } from '@codebuff/common/analytics' +import { env } from '@codebuff/common/env' // Errors if this file is included in client bundles import 'server-only' @@ -14,6 +15,7 @@ export function initializeServer({ logger }: { logger: Logger }) { try { initAnalytics({ logger, + clientEnv: env, }) // Initialize other services as needed initialized = true From 7fc1684228d05eca3fd8605e8f7b28d62edd11f1 Mon Sep 17 00:00:00 2001 From: brandonkachen Date: Sun, 14 Dec 2025 22:35:49 -0800 Subject: [PATCH 019/885] feat(env): enforce package boundaries + test defaults Typecheck-time enforcement (scripts/check-env-architecture.ts): - Ban common/src/** from importing @codebuff/internal/* (layering/secrets) - Ban 'use client' modules from importing @codebuff/internal/env - Enforce CLI/SDK process.env access through designated env helpers Runtime guards (@codebuff/internal/env conditional exports): - browser/react-server conditions route to 'server-only' stubs - Provides defense-in-depth for Next.js bundle boundaries Other changes: - Route CLI/SDK process.env access through env helpers - Preload safe NEXT_PUBLIC_* defaults in test setup - Clarify test-only agent runtime fixtures --- bunfig.toml | 1 + cli/src/commands/router.ts | 6 +- cli/src/hooks/use-theme.tsx | 4 +- cli/src/hooks/use-why-did-you-update.ts | 2 +- cli/src/types/env.ts | 3 + cli/src/utils/chat-scroll-accel.ts | 18 +- cli/src/utils/codebuff-client.ts | 4 +- cli/src/utils/env.ts | 22 +- common/src/__tests__/agent-validation.test.ts | 33 - common/src/env.ts | 48 +- common/src/project-file-tree.ts | 7 +- common/src/testing/fixtures/agent-runtime.ts | 127 ++++ common/src/testing/impl/agent-runtime.ts | 122 +-- common/src/util/partial-json-delta.ts | 10 +- knowledge.md | 696 ++---------------- package.json | 2 +- packages/internal/package.json | 9 + packages/internal/src/env.browser.ts | 3 + packages/internal/src/env.react-server.ts | 3 + scripts/check-env-architecture.ts | 600 +++++++++++++++ sdk/bunfig.toml | 9 + sdk/src/__tests__/run.integration.test.ts | 4 +- sdk/src/agents/load-agents.ts | 4 +- sdk/src/client.ts | 7 +- sdk/src/env.ts | 14 + sdk/src/impl/llm.ts | 6 +- sdk/src/native/ripgrep.ts | 13 +- sdk/src/tools/run-terminal-command.ts | 7 +- sdk/test/setup-env.ts | 27 + web/jest.config.cjs | 1 + 30 files changed, 987 insertions(+), 825 deletions(-) create mode 100644 common/src/testing/fixtures/agent-runtime.ts create mode 100644 packages/internal/src/env.browser.ts create mode 100644 packages/internal/src/env.react-server.ts create mode 100644 scripts/check-env-architecture.ts create mode 100644 sdk/bunfig.toml create mode 100644 sdk/test/setup-env.ts diff --git a/bunfig.toml b/bunfig.toml index f1aaabf9ef..3f1eda94db 100644 --- a/bunfig.toml +++ b/bunfig.toml @@ -7,3 +7,4 @@ linkWorkspacePackages = true [test] # Exclude test repositories from test execution to prevent timeouts exclude = ["evals/test-repos/**"] +preload = ["./sdk/test/setup-env.ts"] diff --git a/cli/src/commands/router.ts b/cli/src/commands/router.ts index 53c18f758e..083e13c7e1 100644 --- a/cli/src/commands/router.ts +++ b/cli/src/commands/router.ts @@ -1,5 +1,6 @@ import { runTerminalCommand } from '@codebuff/sdk' + import { findCommand, type RouterParams, @@ -25,10 +26,9 @@ import { createRunTerminalToolResult, } from '../utils/bash-messages' import { showClipboardMessage } from '../utils/clipboard' +import { getSystemProcessEnv } from '../utils/env' import { getSystemMessage, getUserMessage } from '../utils/message-history' - - /** * Run a bash command with automatic ghost/direct mode selection. * Uses ghost mode when streaming or chain in progress, otherwise adds directly to chat history. @@ -74,7 +74,7 @@ export function runBashCommand(command: string) { process_type: 'SYNC', cwd: commandCwd, timeout_seconds: -1, - env: process.env, + env: getSystemProcessEnv(), }) .then(([{ value }]) => { const stdout = 'stdout' in value ? value.stdout || '' : '' diff --git a/cli/src/hooks/use-theme.tsx b/cli/src/hooks/use-theme.tsx index af336e97d1..010f29b6d1 100644 --- a/cli/src/hooks/use-theme.tsx +++ b/cli/src/hooks/use-theme.tsx @@ -7,6 +7,7 @@ import { create } from 'zustand' import { themeConfig, buildTheme } from '../utils/theme-config' +import { getCliEnv } from '../utils/env' import { chatThemes, cloneChatTheme, @@ -58,7 +59,8 @@ const THEME_PRIORITY: ThemeDetector[] = [ ] export const detectSystemTheme = (): ThemeName => { - const envPreference = process.env.OPEN_TUI_THEME ?? process.env.OPENTUI_THEME + const env = getCliEnv() + const envPreference = env.OPEN_TUI_THEME ?? env.OPENTUI_THEME const normalizedEnv = envPreference?.toLowerCase() if (normalizedEnv === 'dark' || normalizedEnv === 'light') { diff --git a/cli/src/hooks/use-why-did-you-update.ts b/cli/src/hooks/use-why-did-you-update.ts index 0f77817616..3d1b0a3c2b 100644 --- a/cli/src/hooks/use-why-did-you-update.ts +++ b/cli/src/hooks/use-why-did-you-update.ts @@ -24,7 +24,7 @@ import { logger } from '../utils/logger' * function MyComponent(props: MyProps) { * useWhyDidYouUpdate('MyComponent', props, { * logLevel: 'debug', - * enabled: process.env.NODE_ENV === 'development' + * enabled: getCliEnv().NODE_ENV === 'development' * }) * return
...
* } diff --git a/cli/src/types/env.ts b/cli/src/types/env.ts index 4b91f8b288..5acb914a06 100644 --- a/cli/src/types/env.ts +++ b/cli/src/types/env.ts @@ -20,6 +20,8 @@ export type CliEnv = BaseEnv & { KITTY_WINDOW_ID?: string SIXEL_SUPPORT?: string ZED_NODE_ENV?: string + ZED_TERM?: string + ZED_SHELL?: string // VS Code family detection VSCODE_THEME_KIND?: string @@ -54,6 +56,7 @@ export type CliEnv = BaseEnv & { CODEBUFF_CLI_VERSION?: string CODEBUFF_CLI_TARGET?: string CODEBUFF_RG_PATH?: string + CODEBUFF_SCROLL_MULTIPLIER?: string } /** diff --git a/cli/src/utils/chat-scroll-accel.ts b/cli/src/utils/chat-scroll-accel.ts index 688c26b65a..2d1ff38689 100644 --- a/cli/src/utils/chat-scroll-accel.ts +++ b/cli/src/utils/chat-scroll-accel.ts @@ -1,9 +1,9 @@ import { Queue } from './arrays' import { clamp } from './math' +import { getCliEnv } from './env' import type { ScrollAcceleration } from '@opentui/core' - -const SCROLL_MULTIPLIER = 'CODEBUFF_SCROLL_MULTIPLIER' +import type { CliEnv } from '../types/env' const ENVIRONMENT_TYPE_VARS = [ 'TERM_PROGRAM', @@ -30,18 +30,20 @@ type ScrollEnvironment = { multiplier: number } -const resolveScrollEnvironment = (): ScrollEnvironment => { - let multiplier = parseFloat(process.env[SCROLL_MULTIPLIER] ?? '') +const resolveScrollEnvironment = ( + env: CliEnv = getCliEnv(), +): ScrollEnvironment => { + let multiplier = parseFloat(env.CODEBUFF_SCROLL_MULTIPLIER ?? '') if (Number.isNaN(multiplier)) { multiplier = 1 } for (const hintVar of ENVIRONMENT_TYPE_VARS) { - const value = process.env[hintVar] - for (const env of ENVIRONMENTS) { - if (value?.includes(env)) { - return { type: env, multiplier } + const value = env[hintVar] + for (const environment of ENVIRONMENTS) { + if (value?.includes(environment)) { + return { type: environment, multiplier } } } } diff --git a/cli/src/utils/codebuff-client.ts b/cli/src/utils/codebuff-client.ts index d4947180ed..c2a091c57f 100644 --- a/cli/src/utils/codebuff-client.ts +++ b/cli/src/utils/codebuff-client.ts @@ -1,9 +1,9 @@ -import { getCliEnv } from './env' import { API_KEY_ENV_VAR } from '@codebuff/common/old-constants' import { AskUserBridge } from '@codebuff/common/utils/ask-user-bridge' import { CodebuffClient } from '@codebuff/sdk' import { getAuthTokenDetails } from './auth' +import { getCliEnv, getSystemProcessEnv } from './env' import { loadAgentDefinitions } from './local-agent-registry' import { logger } from './logger' import { getRgPath } from '../native/ripgrep' @@ -64,7 +64,7 @@ export async function getCodebuffClient(): Promise { try { const rgPath = await getRgPath() // Note: We still set process.env here because SDK reads from it - process.env.CODEBUFF_RG_PATH = rgPath + getSystemProcessEnv().CODEBUFF_RG_PATH = rgPath } catch (error) { logger.error(error, 'Failed to set up ripgrep binary for SDK') } diff --git a/cli/src/utils/env.ts b/cli/src/utils/env.ts index ad7593c746..ee62e85b58 100644 --- a/cli/src/utils/env.ts +++ b/cli/src/utils/env.ts @@ -5,10 +5,7 @@ * process env with CLI-specific vars for terminal/IDE detection. */ -import { - getBaseEnv, - createTestBaseEnv, -} from '@codebuff/common/env-process' +import { getBaseEnv, createTestBaseEnv } from '@codebuff/common/env-process' import type { CliEnv } from '../types/env' @@ -23,6 +20,8 @@ export const getCliEnv = (): CliEnv => ({ KITTY_WINDOW_ID: process.env.KITTY_WINDOW_ID, SIXEL_SUPPORT: process.env.SIXEL_SUPPORT, ZED_NODE_ENV: process.env.ZED_NODE_ENV, + ZED_TERM: process.env.ZED_TERM, + ZED_SHELL: process.env.ZED_SHELL, // VS Code family detection VSCODE_THEME_KIND: process.env.VSCODE_THEME_KIND, @@ -57,21 +56,29 @@ export const getCliEnv = (): CliEnv => ({ CODEBUFF_CLI_VERSION: process.env.CODEBUFF_CLI_VERSION, CODEBUFF_CLI_TARGET: process.env.CODEBUFF_CLI_TARGET, CODEBUFF_RG_PATH: process.env.CODEBUFF_RG_PATH, + CODEBUFF_SCROLL_MULTIPLIER: process.env.CODEBUFF_SCROLL_MULTIPLIER, }) +/** + * Get the raw system process.env object. + * Use this when you need to pass the full environment to subprocesses + * or when you need to set environment variables at runtime. + */ +export const getSystemProcessEnv = (): NodeJS.ProcessEnv => process.env + /** * Create a test CliEnv with optional overrides. * Composes from createTestBaseEnv() for DRY. */ -export const createTestCliEnv = ( - overrides: Partial = {}, -): CliEnv => ({ +export const createTestCliEnv = (overrides: Partial = {}): CliEnv => ({ ...createTestBaseEnv(), // CLI-specific defaults KITTY_WINDOW_ID: undefined, SIXEL_SUPPORT: undefined, ZED_NODE_ENV: undefined, + ZED_TERM: undefined, + ZED_SHELL: undefined, VSCODE_THEME_KIND: undefined, VSCODE_COLOR_THEME_KIND: undefined, VSCODE_GIT_IPC_HANDLE: undefined, @@ -94,5 +101,6 @@ export const createTestCliEnv = ( CODEBUFF_CLI_VERSION: undefined, CODEBUFF_CLI_TARGET: undefined, CODEBUFF_RG_PATH: undefined, + CODEBUFF_SCROLL_MULTIPLIER: undefined, ...overrides, }) diff --git a/common/src/__tests__/agent-validation.test.ts b/common/src/__tests__/agent-validation.test.ts index 34309e31be..99c794de67 100644 --- a/common/src/__tests__/agent-validation.test.ts +++ b/common/src/__tests__/agent-validation.test.ts @@ -74,39 +74,6 @@ describe('Agent Validation', () => { expect(result.templates.brainstormer.id).toBe('brainstormer') }) - test.skip('should validate spawnable agents', async () => { - const fileContext: ProjectFileContext = { - ...mockFileContext, - agentTemplates: { - 'invalid.ts': { - id: 'invalid_agent', - version: '1.0.0', - displayName: 'Invalid', - spawnerPrompt: 'Invalid agent', - model: 'anthropic/claude-4-sonnet-20250522', - systemPrompt: 'Test', - instructionsPrompt: 'Test', - stepPrompt: 'Test', - spawnableAgents: ['nonexistent_agent'], - outputMode: 'last_message', - includeMessageHistory: true, - inheritParentSystemPrompt: false, - toolNames: ['end_turn'], - }, - }, - } - - const result = validateAgents({ - agentTemplates: fileContext.agentTemplates || {}, - logger, - }) - - expect(result.validationErrors).toHaveLength(1) - expect(result.validationErrors[0].message).toContain( - 'Invalid spawnable agents: nonexistent_agent', - ) - }) - it('should merge static and dynamic templates', async () => { const fileContext: ProjectFileContext = { ...mockFileContext, diff --git a/common/src/env.ts b/common/src/env.ts index c5cfac6a4c..0175fb3241 100644 --- a/common/src/env.ts +++ b/common/src/env.ts @@ -1,11 +1,49 @@ -import { clientEnvSchema, clientProcessEnv } from './env-schema' +import { + clientEnvSchema, + clientProcessEnv, + type ClientInput, +} from './env-schema' -// Only log environment in non-production -if (process.env.NEXT_PUBLIC_CB_ENVIRONMENT !== 'prod') { - console.log('Using environment:', process.env.NEXT_PUBLIC_CB_ENVIRONMENT) +const isTestRuntime = + process.env.NODE_ENV === 'test' || process.env.BUN_ENV === 'test' + +const TEST_ENV_DEFAULTS: ClientInput = { + NEXT_PUBLIC_CB_ENVIRONMENT: 'test', + NEXT_PUBLIC_CODEBUFF_APP_URL: 'http://localhost:3000', + NEXT_PUBLIC_SUPPORT_EMAIL: 'support@codebuff.com', + NEXT_PUBLIC_POSTHOG_API_KEY: 'test-posthog-key', + NEXT_PUBLIC_POSTHOG_HOST_URL: 'https://us.i.posthog.com', + NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY: 'pk_test_placeholder', + NEXT_PUBLIC_STRIPE_CUSTOMER_PORTAL: + 'https://billing.stripe.com/p/login/test_placeholder', + NEXT_PUBLIC_GOOGLE_SITE_VERIFICATION_ID: 'test-verification', + NEXT_PUBLIC_WEB_PORT: '3000', +} + +const envInput = isTestRuntime + ? { ...TEST_ENV_DEFAULTS, ...clientProcessEnv } + : clientProcessEnv + +const parsedEnv = clientEnvSchema.safeParse(envInput) +if (!parsedEnv.success) { + throw parsedEnv.error } -export const env = clientEnvSchema.parse(clientProcessEnv) +export const env = parsedEnv.data + +// Populate process.env with defaults during tests so direct access works +if (isTestRuntime) { + for (const [key, value] of Object.entries(TEST_ENV_DEFAULTS)) { + if (!process.env[key] && typeof value === 'string') { + process.env[key] = value + } + } +} + +// Only log environment in non-production +if (env.NEXT_PUBLIC_CB_ENVIRONMENT !== 'prod') { + console.log('Using environment:', env.NEXT_PUBLIC_CB_ENVIRONMENT) +} // Derived environment constants for convenience export const IS_DEV = env.NEXT_PUBLIC_CB_ENVIRONMENT === 'dev' diff --git a/common/src/project-file-tree.ts b/common/src/project-file-tree.ts index cad6a11eac..cfaaaf374b 100644 --- a/common/src/project-file-tree.ts +++ b/common/src/project-file-tree.ts @@ -169,7 +169,12 @@ export async function parseGitignore(params: { for (const ignoreFilePath of ignoreFiles) { if (!(await fileExists({ filePath: ignoreFilePath, fs }))) continue - const ignoreContent = await fs.readFile(ignoreFilePath, 'utf8') + let ignoreContent: string + try { + ignoreContent = await fs.readFile(ignoreFilePath, 'utf8') + } catch { + continue + } const lines = ignoreContent.split('\n') for (let line of lines) { line = line.trim() diff --git a/common/src/testing/fixtures/agent-runtime.ts b/common/src/testing/fixtures/agent-runtime.ts new file mode 100644 index 0000000000..197a175d42 --- /dev/null +++ b/common/src/testing/fixtures/agent-runtime.ts @@ -0,0 +1,127 @@ +/** + * Test-only AgentRuntime dependency fixture. + * + * This file intentionally hardcodes dummy values (e.g. API keys) for tests. + * Do not import from production code. + */ + +import type { AgentTemplate } from '../../types/agent-template' +import type { + AgentRuntimeDeps, + AgentRuntimeScopedDeps, +} from '../../types/contracts/agent-runtime' +import type { ClientEnv, CiEnv } from '../../types/contracts/env' +import type { Logger } from '../../types/contracts/logger' + +export const testLogger: Logger = { + debug: () => {}, + error: () => {}, + info: () => {}, + warn: () => {}, +} + +export const testFetch = async () => { + throw new Error('fetch not implemented in test runtime') +} +testFetch.preconnect = async () => { + throw new Error('fetch.preconnect not implemented in test runtime') +} + +export const testClientEnv: ClientEnv = { + NEXT_PUBLIC_CB_ENVIRONMENT: 'test', + NEXT_PUBLIC_CODEBUFF_APP_URL: 'https://test.codebuff.com', + NEXT_PUBLIC_SUPPORT_EMAIL: 'support@codebuff.test', + NEXT_PUBLIC_POSTHOG_API_KEY: 'test-posthog-key', + NEXT_PUBLIC_POSTHOG_HOST_URL: 'https://test.posthog.com', + NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY: 'pk_test_123', + NEXT_PUBLIC_STRIPE_CUSTOMER_PORTAL: 'https://test.stripe.com/portal', + NEXT_PUBLIC_GOOGLE_SITE_VERIFICATION_ID: undefined, + NEXT_PUBLIC_WEB_PORT: 3000, +} + +export const testCiEnv: CiEnv = { + CI: undefined, + GITHUB_ACTIONS: undefined, + RENDER: undefined, + IS_PULL_REQUEST: undefined, + CODEBUFF_GITHUB_TOKEN: undefined, + CODEBUFF_API_KEY: 'test-api-key', +} + +export const TEST_AGENT_RUNTIME_IMPL = Object.freeze< + AgentRuntimeDeps & AgentRuntimeScopedDeps +>({ + // Environment + clientEnv: testClientEnv, + ciEnv: testCiEnv, + + // Database + getUserInfoFromApiKey: async () => ({ + id: 'test-user-id', + email: 'test-email', + discord_id: 'test-discord-id', + referral_code: 'ref-test-code', + }), + fetchAgentFromDatabase: async () => null, + startAgentRun: async () => 'test-agent-run-id', + finishAgentRun: async () => {}, + addAgentStep: async () => 'test-agent-step-id', + + // Billing + consumeCreditsWithFallback: async () => { + throw new Error( + 'consumeCreditsWithFallback not implemented in test runtime', + ) + }, + + // LLM + promptAiSdkStream: async function* () { + throw new Error('promptAiSdkStream not implemented in test runtime') + }, + promptAiSdk: async function () { + throw new Error('promptAiSdk not implemented in test runtime') + }, + promptAiSdkStructured: async function () { + throw new Error('promptAiSdkStructured not implemented in test runtime') + }, + + // Mutable State + databaseAgentCache: new Map(), + liveUserInputRecord: {}, + sessionConnections: {}, + + // Analytics + trackEvent: () => {}, + + // Other + logger: testLogger, + fetch: testFetch, + + // Scoped deps + + // Database + handleStepsLogChunk: () => { + throw new Error('handleStepsLogChunk not implemented in test runtime') + }, + requestToolCall: () => { + throw new Error('requestToolCall not implemented in test runtime') + }, + requestMcpToolData: () => { + throw new Error('requestMcpToolData not implemented in test runtime') + }, + requestFiles: () => { + throw new Error('requestFiles not implemented in test runtime') + }, + requestOptionalFile: () => { + throw new Error('requestOptionalFile not implemented in test runtime') + }, + sendSubagentChunk: () => { + throw new Error('sendSubagentChunk not implemented in test runtime') + }, + sendAction: () => { + throw new Error('sendAction not implemented in test runtime') + }, + + apiKey: 'test-api-key', +}) + diff --git a/common/src/testing/impl/agent-runtime.ts b/common/src/testing/impl/agent-runtime.ts index e28e1a034d..8f8179fcbc 100644 --- a/common/src/testing/impl/agent-runtime.ts +++ b/common/src/testing/impl/agent-runtime.ts @@ -1,119 +1,5 @@ -import type { AgentTemplate } from '../../types/agent-template' -import type { - AgentRuntimeDeps, - AgentRuntimeScopedDeps, -} from '../../types/contracts/agent-runtime' -import type { ClientEnv, CiEnv } from '../../types/contracts/env' -import type { Logger } from '../../types/contracts/logger' +/** + * @deprecated Use `@codebuff/common/testing/fixtures/agent-runtime` instead. + */ -export const testLogger: Logger = { - debug: () => {}, - error: () => {}, - info: () => {}, - warn: () => {}, -} - -export const testFetch = async () => { - throw new Error('fetch not implemented in test runtime') -} -testFetch.preconnect = async () => { - throw new Error('fetch.preconnect not implemented in test runtime') -} - -export const testClientEnv: ClientEnv = { - NEXT_PUBLIC_CB_ENVIRONMENT: 'test', - NEXT_PUBLIC_CODEBUFF_APP_URL: 'https://test.codebuff.com', - NEXT_PUBLIC_SUPPORT_EMAIL: 'support@codebuff.test', - NEXT_PUBLIC_POSTHOG_API_KEY: 'test-posthog-key', - NEXT_PUBLIC_POSTHOG_HOST_URL: 'https://test.posthog.com', - NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY: 'pk_test_123', - NEXT_PUBLIC_STRIPE_CUSTOMER_PORTAL: 'https://test.stripe.com/portal', - NEXT_PUBLIC_GOOGLE_SITE_VERIFICATION_ID: undefined, - NEXT_PUBLIC_WEB_PORT: 3000, -} - -export const testCiEnv: CiEnv = { - CI: undefined, - GITHUB_ACTIONS: undefined, - RENDER: undefined, - IS_PULL_REQUEST: undefined, - CODEBUFF_GITHUB_TOKEN: undefined, - CODEBUFF_API_KEY: 'test-api-key', -} - -export const TEST_AGENT_RUNTIME_IMPL = Object.freeze< - AgentRuntimeDeps & AgentRuntimeScopedDeps ->({ - // Environment - clientEnv: testClientEnv, - ciEnv: testCiEnv, - - // Database - getUserInfoFromApiKey: async () => ({ - id: 'test-user-id', - email: 'test-email', - discord_id: 'test-discord-id', - referral_code: 'ref-test-code', - }), - fetchAgentFromDatabase: async () => null, - startAgentRun: async () => 'test-agent-run-id', - finishAgentRun: async () => {}, - addAgentStep: async () => 'test-agent-step-id', - - // Billing - consumeCreditsWithFallback: async () => { - throw new Error( - 'consumeCreditsWithFallback not implemented in test runtime', - ) - }, - - // LLM - promptAiSdkStream: async function* () { - throw new Error('promptAiSdkStream not implemented in test runtime') - }, - promptAiSdk: async function () { - throw new Error('promptAiSdk not implemented in test runtime') - }, - promptAiSdkStructured: async function () { - throw new Error('promptAiSdkStructured not implemented in test runtime') - }, - - // Mutable State - databaseAgentCache: new Map(), - liveUserInputRecord: {}, - sessionConnections: {}, - - // Analytics - trackEvent: () => {}, - - // Other - logger: testLogger, - fetch: testFetch, - - // Scoped deps - - // Database - handleStepsLogChunk: () => { - throw new Error('handleStepsLogChunk not implemented in test runtime') - }, - requestToolCall: () => { - throw new Error('requestToolCall not implemented in test runtime') - }, - requestMcpToolData: () => { - throw new Error('requestMcpToolData not implemented in test runtime') - }, - requestFiles: () => { - throw new Error('requestFiles not implemented in test runtime') - }, - requestOptionalFile: () => { - throw new Error('requestOptionalFile not implemented in test runtime') - }, - sendSubagentChunk: () => { - throw new Error('sendSubagentChunk not implemented in test runtime') - }, - sendAction: () => { - throw new Error('sendAction not implemented in test runtime') - }, - - apiKey: 'test-api-key', -}) +export * from '../fixtures/agent-runtime' diff --git a/common/src/util/partial-json-delta.ts b/common/src/util/partial-json-delta.ts index 28888fe0f0..b7a774cae2 100644 --- a/common/src/util/partial-json-delta.ts +++ b/common/src/util/partial-json-delta.ts @@ -5,17 +5,17 @@ export function parsePartialJsonObjectSingle(content: string): { } { try { return { lastParamComplete: true, params: JSON.parse(content) } - } catch (error) {} + } catch {} if (!content.match(/\d$/)) { try { return { lastParamComplete: true, params: JSON.parse(content + '}') } - } catch (error) {} + } catch {} } try { return { lastParamComplete: false, params: JSON.parse(content + '"}') } - } catch (error) {} + } catch {} if (content.endsWith('\\')) { try { @@ -23,7 +23,7 @@ export function parsePartialJsonObjectSingle(content: string): { lastParamComplete: false, params: JSON.parse(content.slice(0, -1) + '"}'), } - } catch (error) {} + } catch {} } let lastIndex = content.lastIndexOf(',') @@ -33,7 +33,7 @@ export function parsePartialJsonObjectSingle(content: string): { lastParamComplete: true, params: JSON.parse(content.slice(0, lastIndex) + '}'), } - } catch (error) {} + } catch {} lastIndex = content.lastIndexOf(',', lastIndex - 1) } diff --git a/knowledge.md b/knowledge.md index e911a15c06..6200abf3ee 100644 --- a/knowledge.md +++ b/knowledge.md @@ -1,683 +1,129 @@ # Codebuff -Codebuff is a tool for editing codebases via natural language instruction to Buffy, an expert AI programming assistant. +Codebuff is a tool for editing codebases via natural-language instructions to Buffy (an expert AI programming assistant). -## Project Goals +## Goals -1. **Developer Productivity**: Reduce time and effort for common programming tasks -2. **Learning and Adaptation**: Develop a system that learns from user interactions -3. **Focus on power users**: Make expert software engineers move even faster +- Make expert engineers faster (power-user focus). +- Reduce time/effort for common programming tasks. +- Improve via iteration/feedback (learn/adapt from usage). ## Key Technologies -- **TypeScript**: Primary programming language -- **Bun**: Package manager and runtime -- **LLMs**: Multiple providers (Anthropic, OpenAI, Gemini, etc.) for various coding tasks +- TypeScript monorepo (Bun workspaces) +- Bun runtime + package manager +- Next.js (web app + API routes) +- Multiple LLM providers (Anthropic/OpenAI/Gemini/etc.) -## Main Components +## Repo Map -1. **LLM Integration**: Processes natural language instructions and generates code changes -2. **File Management**: Reads, parses, and modifies project files -3. **Action Handling**: Processes various client and server actions -4. **Knowledge Management**: Handles creation, updating, and organization of knowledge files -5. **Terminal Command Execution**: Allows running shell commands in user's terminal +- `cli/`: TUI client (OpenTUI + React) and local UX +- `sdk/`: JS/TS SDK used by the CLI and external users +- `web/`: Next.js app + API routes (the “web API”) +- `packages/agent-runtime/`: agent runtime + tool handling (server-side) +- `common/`: shared types, tools, schemas, utilities +- `.agents/`: local agent templates (prompt + programmatic agents) -## API Flow +## Request Flow -1. The SDK/CLI sends user input and file context to the Codebuff web API. -2. The agent runtime processes the request and streams response chunks back through the SDK callbacks. -3. Tools run locally via the SDK's helpers (file edits, terminal commands, search) to satisfy model tool calls. +1. CLI/SDK sends user input + context to the Codebuff web API. +2. Agent runtime streams events/chunks back through SDK callbacks. +3. Tools execute locally (file edits, terminal commands, search) to satisfy tool calls. -## Tool Handling System +## Development -- Tools are defined in `common/src/tools` and executed via the SDK tool helpers and agent runtime -- Available tools: read_files, write_file, str_replace, run_terminal_command, code_search, browser_logs, spawn_agents, web_search, read_docs, run_file_change_hooks, and others -- Tool calls request additional information or perform actions based on the current project state - -## Agent System - -- **LLM-based Agents**: Traditional agents defined in `.agents/` subdirectories using prompts and LLM models -- **Programmatic Agents**: Custom agents using JavaScript/TypeScript generator functions in `.agents/` -- **Dynamic Agent Templates**: User-defined agents in TypeScript files with `handleSteps` generator functions -- Agent templates define available tools, spawnable sub-agents, and execution behavior -- Programmatic agents allow complex orchestration logic, conditional flows, and iterative refinement -- Generator functions execute in secure QuickJS sandbox for safety -- Both types integrate seamlessly through the same tool execution system - -### Shell Shims (Direct Commands) - -Codebuff supports shell shims for direct command invocation without the `codebuff` prefix. - -- **Cross-platform**: Works on Windows (CMD/PowerShell), macOS, and Linux (bash/zsh/fish) -- **Store integration**: Uses fully qualified agent IDs from the agent store -- **Easy management**: Install, update, list, and uninstall shims via CLI commands### Quick Start (Recommended) +Start everything: ```bash -# One-step setup: install and add to PATH automatically -codebuff shims install codebuff/base-lite@1.0.0 - -# Use immediately in current session (follow the printed instruction) -eval "$(codebuff shims env)" - -# Now use direct commands! -base-lite "fix this bug" # Works right away! +bun dev ``` -## Development Workflow - -### Starting the Development Environment +Or run services + CLI separately: ```bash -# Full development environment (services + CLI) -bun dev # Starts db, studio, sdk, web, then CLI - # Ctrl+C stops everything - -# Or run services and CLI separately -bun up # Start services in background, exits when ready -bun start-cli # Start CLI in foreground -bun down # Stop background services -bun ps # Check if services are running +bun up +bun start-cli +bun ps +bun down ``` -**Services started:** -- `db` - PostgreSQL database (via Docker) -- `studio` - Drizzle Studio for database inspection -- `sdk` - SDK build (one-time) -- `web` - Next.js web server +Worktrees (run multiple stacks on different ports): create `.env.development.local`: -**Logs:** All service logs are written to `debug/console/`: -- `db.log`, `studio.log`, `sdk.log`, `web.log` - -**Worktree Support:** Each worktree can run on different ports. Create `.env.development.local` with: -``` +```bash PORT=3001 NEXT_PUBLIC_WEB_PORT=3001 NEXT_PUBLIC_CODEBUFF_APP_URL=http://localhost:3001 ``` -The `bun down` command is worktree-safe - it only kills services on the port configured for that worktree. - -## Package Management - -- Use Bun for all package management operations -- Run commands with `bun` instead of `npm` (e.g., `bun install` not `npm install`) -- Use `bun run` for script execution +Logs: `debug/console/` (`db.log`, `studio.log`, `sdk.log`, `web.log`). -## Git Workflow Best Practices +Package management: -### Never Force Push to Main +- Use `bun install`, `bun run ...` (avoid `npm`). -**Never use `git push --force` or `git push --force-with-lease` on the main branch unless the user explicitly and clearly asks for it.** This can overwrite other developers' work and cause CI/deployment issues. +## Agents And Tools -- A simple "push" request is NOT permission to force push - only a regular push should be attempted -- If a push is rejected due to diverged history, **stop and ask the user** what they want to do -- Do NOT automatically escalate to force push when a regular push fails -- Only force push if the user explicitly says something like "force push to main" or "yes, force push" -- If you need to amend a commit that's already on main, create a new commit instead -- Force pushing is only acceptable on feature branches where you're the only contributor -- If a push is rejected, use `git pull --rebase` to integrate remote changes first +Agents: -### Preserving Uncommitted Changes - -**NEVER use `git checkout HEAD --` or `git restore` on files to exclude them from a commit.** This destructively discards uncommitted work. - -When the user says "don't commit file X" or "exclude file X from the commit": -- ✅ Only `git add` the specific files they DO want committed -- ✅ Leave other files in their current state (staged or unstaged) -- ❌ NEVER run `git checkout HEAD -- ` or `git restore ` - this permanently deletes uncommitted changes - -Correct approach for amending a commit with specific files: -```bash -# Only add the files to include -git add path/to/file-to-include.ts -git commit --amend --no-edit -``` +- Prompt/programmatic agents live in `.agents/` (programmatic agents use `handleSteps` generators). +- Generator functions execute in a sandbox; agent templates define tool access and subagents. -### Interactive Git Commands - -**Always use tmux when running interactive git commands** (e.g., `git rebase --continue`, `git add -p`, `git commit --amend`). - -- Codebuff agents cannot interact with prompts that require user input -- Interactive git commands will hang if run directly through the commander agent -- Use tmux to provide an interactive session where the user can handle git prompts manually -- For automated operations, prefer non-interactive git commands when possible (e.g., `git rebase --continue` after resolving conflicts programmatically) - -**Common Interactive Git Commands (require tmux):** - -- `git rebase --continue` - Continue rebase after resolving conflicts -- `git rebase --skip` - Skip current commit during rebase -- `git rebase --abort` - Abort rebase operation -- `git rebase -i` / `git rebase --interactive` - Interactive rebase with editor -- `git add -p` / `git add --patch` - Interactively stage hunks -- `git add -i` / `git add --interactive` - Interactive staging -- `git commit --amend` - Amend last commit (opens editor) -- `git commit -v` / `git commit --verbose` - Commit with diff in editor -- `git merge --continue` - Continue merge after resolving conflicts -- `git merge --abort` - Abort merge operation -- `git cherry-pick --continue` - Continue cherry-pick after conflicts -- `git cherry-pick --abort` - Abort cherry-pick operation -- `git stash save -p` - Interactively stash changes -- `git checkout -p` - Interactively discard changes -- `git reset -p` - Interactively unstage changes -- `git clean -i` - Interactively clean untracked files -- Any git command that opens an editor (commit messages, rebase todo list, etc.) - -**Example:** +Shell shims (direct commands without `codebuff` prefix): ```bash -# ❌ Bad: Will hang waiting for input -git rebase --continue - -# ✅ Good: Run in tmux for manual interaction -tmux new-session -d -s git-rebase -tmux send-keys -t git-rebase 'git rebase --continue' C-m -tmux attach -t git-rebase -``` - -## TypeScript Build State Management - -### Cleaning Build State - -- Use `bun run clean-ts` to remove all TypeScript build artifacts (.tsbuildinfo files and .next cache) -- This resolves infinite loop issues in the typechecker caused by corrupted or stale build cache - -### Common Issues - -- Typechecker infinite loops are often caused by stale .tsbuildinfo files or circular project references -- Always clean build state when encountering persistent type errors or infinite loops -- The monorepo structure with project references can sometimes create dependency cycles - -## Error Handling Philosophy - -**Prefer `ErrorOr` return types over throwing errors.** - -- Return type `ErrorOr` for operations that fail -- Return `success(value)` or `failure(error)` from `common/src/util/error.ts` - - e.g. `return failure(new Error('File not found'))` -- Allows callers to handle errors explicitly without try-catch -- Makes error cases visible in function signatures - -## Error Handling and Debugging - -- Error messages are logged to console and debug log files - -## Security Considerations - -- Project uses environment variables for sensitive information (API keys) -- User input is validated and sanitized before processing -- File operations are restricted to project directory - -## API Endpoint Architecture - -### Dependency Injection Pattern - -All API endpoints in `web/src/app/api/v1/` follow a consistent dependency injection pattern for improved testability and maintainability. - -**Structure:** - -1. **Implementation file** (`web/src/api/v1/.ts`) - Contains business logic with injected dependencies -2. **Route handler** (`web/src/app/api/v1//route.ts`) - Minimal wrapper that injects dependencies -3. **Contract types** (`common/src/types/contracts/.ts`) - Type definitions for injected functions -4. **Unit tests** (`web/src/api/v1/__tests__/.test.ts`) - Comprehensive tests with mocked dependencies - -**Example:** - -```typescript -// Implementation file - Contains business logic -export async function myEndpoint(params: { - req: NextRequest - getDependency: GetDependencyFn - logger: Logger - anotherDep: AnotherDepFn -}) { - // Business logic here -} - -// Route handler - Minimal wrapper -export async function GET(req: NextRequest) { - return myEndpointGet({ req, getDependency, logger, anotherDep }) -} - -// Contract type (in common/src/types/contracts/) -export type GetDependencyFn = (params: SomeParams) => Promise -``` - -**Benefits:** - -- Easy to mock dependencies in unit tests -- Type-safe function contracts shared across the codebase -- Clear separation between routing and business logic -- Consistent pattern across all endpoints - -**Contract Types Location:** -All contract types live in `common/src/types/contracts/`. - -**Contract Type Pattern:** -For generic function types, use separate Input/Output types: - -```typescript -// Define input type -export type MyFunctionInput = { - param1: string - param2: T -} - -// Define output type -export type MyFunctionOutput = Promise> - -// Define function type using Input/Output -export type MyFunctionFn = ( - params: MyFunctionInput, -) => MyFunctionOutput -``` - -## Testing Guidelines - -### Dependency Injection (Primary Approach) - -**Prefer dependency injection over mocking.** Design functions to accept dependencies as parameters with contract types defined in `common/src/types/contracts/`. - -```typescript -// ✅ Good: Dependency injection with contract types -import type { TrackEventFn } from '@codebuff/common/types/contracts/analytics' -import type { Logger } from '@codebuff/common/types/contracts/logger' - -export async function myFunction(params: { - trackEvent: TrackEventFn - logger: Logger - getData: GetDataFn -}) { - const { trackEvent, logger, getData } = params - // Use injected dependencies -} - -// Test with simple mock implementations -const mockTrackEvent: TrackEventFn = mock(() => {}) -const mockLogger: Logger = { - error: mock(() => {}), - // ... other methods -} -``` - -**Benefits:** - -- No need for `spyOn()` or `mock.module()` -- Clear, type-safe dependencies -- Easy to test with simple mock objects -- Better code architecture and maintainability - -### When to Use spyOn (Secondary Approach) - -Use `spyOn()` only when dependency injection is impractical: - -- Mocking global functions (Date.now, setTimeout) -- Testing legacy code without DI -- Overriding internal module behavior temporarily - -```typescript -// Use spyOn for globals -spyOn(Date, 'now').mockImplementation(() => 1234567890) -``` - -### Avoid mock.module() - -**Never use `mock.module()` for functions.** It pollutes global state and carries over between test files. - -Only use for overriding module constants when absolutely necessary: - -- Use wrapper functions in `@codebuff/common/testing/mock-modules.ts` - - Use `await mockModule(...)` as a drop-in replacement for `mock.module` - - Call `clearMockedModules()` in `afterAll` (or `afterEach`) - -### Test Setup Patterns - -Extract duplicative mock state to `beforeEach`: - -```typescript -describe('My Tests', () => { - let mockLogger: Logger - let mockTrackEvent: TrackEventFn - - beforeEach(() => { - mockLogger = { - error: mock(() => {}), - warn: mock(() => {}), - info: mock(() => {}), - debug: mock(() => {}), - } - mockTrackEvent = mock(() => {}) - }) - - afterEach(() => { - mock.restore() - }) - - test('works with injected dependencies', async () => { - await myFunction({ logger: mockLogger, trackEvent: mockTrackEvent }) - expect(mockTrackEvent).toHaveBeenCalled() - }) -}) +codebuff shims install codebuff/base-lite@1.0.0 +eval "$(codebuff shims env)" +base-lite "fix this bug" ``` -## Constants and Configuration +Tools: -Important constants are centralized in `common/src/constants.ts`: +- Tool definitions live in `common/src/tools` and are executed via the SDK helpers + agent-runtime. -- `CREDITS_REFERRAL_BONUS`: Credits awarded for successful referral -- Credit limits for different user types - -## Referral System +## Git Safety Rules -**IMPORTANT**: Referral codes must be applied through the CLI, not through the web interface. +- Never force-push `main` unless explicitly requested. +- To exclude files from a commit: stage only what you want (`git add `). Never use `git restore`/`git checkout HEAD -- ` to “uncommit” changes. +- Run interactive git commands in tmux (anything that opens an editor or prompts). -- Web onboarding flow shows instructions for entering codes in CLI -- Users must type their referral code in the Codebuff terminal after login -- Auto-redemption during web login was removed to prevent abuse -- The `redeemReferralCode` function in `web/src/app/api/referrals/helpers.ts` processes the actual credit granting +## Error Handling -### OAuth Referral Code Preservation +Prefer `ErrorOr` return values (`success(...)`/`failure(...)` in `common/src/util/error.ts`) over throwing. -**Problem**: NextAuth doesn't preserve referral codes through OAuth flow because: +## Testing -- NextAuth generates its own state parameter for CSRF/PKCE protection -- Custom state parameters are ignored/overwritten -- OAuth callback URLs don't always survive the round trip +- Prefer dependency injection over module mocking; define contracts in `common/src/types/contracts/`. +- Use `spyOn()` only for globals / legacy seams. +- Avoid `mock.module()` for functions; use `@codebuff/common/testing/mock-modules.ts` helpers for constants only. -**Solution**: Multi-layer approach implemented in SignInButton and ReferralRedirect components: - -1. **Primary**: Use absolute callback URLs with referral codes for better NextAuth preservation -2. **Fallback**: Store referral codes in localStorage before OAuth starts -3. **Recovery**: ReferralRedirect component on home page catches missed referrals and redirects to onboard page +CLI hook testing note: React 19 + Bun + RTL `renderHook()` is unreliable; prefer integration tests via components for hook behavior. ## Environment Variables -This project uses standard `.env.*` files for environment configuration. Bun natively loads these files automatically. - -### Environment Dependency Injection Architecture - -The codebase uses a structured dependency injection pattern for environment variables, making code more testable and removing direct `process.env.*` calls. - -#### Architecture Overview - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ BASE TYPES (common) │ -├─────────────────────────────────────────────────────────────────┤ -│ BaseEnv - OS/runtime vars (SHELL, HOME, TERM, etc.) │ -│ BaseCiEnv - CI vars (CI, GITHUB_ACTIONS, etc.) │ -│ ClientEnv - Public vars (NEXT_PUBLIC_*) from env-schema │ -└─────────────────────────────────────────────────────────────────┘ - │ - extends │ - ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ PACKAGE-SPECIFIC ENVS │ -├─────────────────────────────────────────────────────────────────┤ -│ CLI: CliEnv = BaseEnv & { CLI-specific } │ -│ SDK: SdkEnv = BaseEnv & { SDK-specific } │ -│ Server: ServerEnv = ClientEnv & { server secrets } │ -│ Evals: EvalsEnv = BaseCiEnv & { eval-specific } │ -└─────────────────────────────────────────────────────────────────┘ -``` - -#### Key Files - -| Package | Type Definition | Helper Functions | Tests | -|---------|-----------------|------------------|-------| -| common | `common/src/types/contracts/env.ts` | `common/src/env-process.ts` (getBaseEnv), `common/src/env-ci.ts` | `common/src/__tests__/env-*.test.ts` | -| CLI | `cli/src/types/env.ts` (CliEnv) | `cli/src/utils/env.ts` (getCliEnv) | `cli/src/__tests__/utils/env.test.ts` | -| SDK | `sdk/src/types/env.ts` (SdkEnv) | `sdk/src/env.ts` (getSdkEnv) | `sdk/src/__tests__/env.test.ts` | -| Server | `packages/internal/src/types/contracts/env.ts` | `packages/internal/src/env.ts` | - | -| Evals | `evals/types/env.ts` (`EvalsCiEnv`) | - | - | - -#### Usage Pattern - -**In CLI code:** -```typescript -import type { CliEnv } from '../types/env' -import { getCliEnv } from './env' - -// Function accepts injectable env with default -export function detectShell(env: CliEnv = getCliEnv()): string { - return env.SHELL || env.COMSPEC || 'unknown' -} -``` - -**In SDK code:** -```typescript -import type { SdkEnv } from './types/env' -import { getSdkEnv } from './env' - -export function getRipgrepPath(env: SdkEnv = getSdkEnv()): string { - return env.CODEBUFF_RG_PATH || '/usr/bin/rg' -} -``` - -**In tests:** -```typescript -import { createTestCliEnv } from '../../utils/env' - -test('detects VS Code terminal', () => { - const env = createTestCliEnv({ - VSCODE_PID: '1234', - TERM_PROGRAM: 'vscode', - }) - expect(isVSCodeTerminal(env)).toBe(true) -}) -``` - -#### ESLint Enforcement - -Lint rules in `eslint.config.js` enforce that packages use their own env types: - -- **CLI**: Must use `CliEnv` and `getCliEnv()`, not `ProcessEnv` from common -- **SDK**: Must use `SdkEnv` and `getSdkEnv()`, not `ProcessEnv` from common - -```javascript -// eslint.config.js - Enforces package-specific env types -{ - files: ['cli/src/**/*.{ts,tsx}'], - rules: { - 'no-restricted-imports': ['error', { - paths: [{ - name: '@codebuff/common/env-process', - importNames: ['getProcessEnv', 'processEnv'], - message: 'CLI should use getCliEnv() from "../utils/env"', - }], - }], - }, -} -``` - -#### Base vs Extended Types - -| Type | Location | Purpose | -|------|----------|--------| -| `BaseEnv` | common | OS-level vars (SHELL, HOME, TERM, NODE_ENV, PATH) | -| `BaseCiEnv` | common | CI platform vars (CI, GITHUB_ACTIONS, RENDER) | -| `ClientEnv` | common | Public NEXT_PUBLIC_* vars (validated via Zod) | -| `ProcessEnv` | common | Full process env (BaseEnv + all extensions) | -| `CiEnv` | common | Extended CI env (BaseCiEnv + CODEBUFF_API_KEY, etc.) | -| `CliEnv` | CLI | BaseEnv + terminal/IDE detection vars | -| `SdkEnv` | SDK | BaseEnv + binary path/build flag vars | -| `ServerEnv` | internal | ClientEnv + server secrets (API keys, DB URLs) | -| `EvalsCiEnv` | evals | BaseCiEnv + eval-specific vars (EVAL_RESULTS_EMAIL) | - -#### Benefits - -1. **Testability**: Functions accept env as a parameter, easy to mock in tests -2. **Type Safety**: Each package has its own typed env with only relevant vars -3. **No Global State**: No direct `process.env.*` calls that pollute tests -4. **Lint Enforcement**: ESLint prevents accidental use of wrong env types -5. **Snapshot Isolation**: `get*ProcessEnv()` returns a snapshot that doesn't change - -### File Hierarchy - -Bun loads `.env` files in this order (highest precedence last): - -1. `.env.local` - Main secrets file synced from Infisical (gitignored) -2. `.env.development.local` - Worktree-specific overrides like ports (gitignored, highest precedence) - -**Note**: Bun also supports `.env` and `.env.development`, but this project only uses `.env.local` and `.env.development.local`. - -### Infisical Integration - -Infisical is used as a background sync mechanism to populate `.env.local`: +Quick rules: -- The `.bin/bun` wrapper syncs secrets from Infisical to `.env.local` -- Caching: 15-minute TTL (configurable via `INFISICAL_CACHE_TTL`) -- Cache invalidates when `.infisical.json` is modified or TTL expires -- If Infisical is not set up, existing `.env.local` is used directly +- Public client env: `NEXT_PUBLIC_*` only, validated in `common/src/env-schema.ts` (used via `@codebuff/common/env`). +- Server secrets: validated in `packages/internal/src/env-schema.ts` (used via `@codebuff/internal/env`). +- Runtime/OS env: pass typed snapshots instead of reading `process.env` throughout the codebase. -**Setup Options**: -1. **With Infisical**: Run `infisical login`, secrets auto-sync to `.env.local` -2. **Without Infisical**: Copy `.env.example` to `.env.local` and fill in values +Env DI helpers: -### Worktree Support +- Base contracts: `common/src/types/contracts/env.ts` (`BaseEnv`, `BaseCiEnv`, `ClientEnv`, `CiEnv`) +- Helpers: `common/src/env-process.ts`, `common/src/env-ci.ts` +- CLI: `cli/src/utils/env.ts` (`getCliEnv`, `createTestCliEnv`) +- SDK: `sdk/src/env.ts` (`getSdkEnv`, `createTestSdkEnv`) -For git worktrees running on different ports: +Bun loads (highest precedence last): -- Create `.env.development.local` in the worktree with port overrides -- Bun loads `.env.development.local` with highest precedence, so it overrides ports from `.env.local` -- The `scripts/init-worktree.ts` script creates this file automatically +- `.env.local` (Infisical-synced secrets, gitignored) +- `.env.development.local` (worktree overrides like ports, gitignored) -#### Worktree App URL Configuration +Releases: release scripts read `CODEBUFF_GITHUB_TOKEN`. -The `init-worktree.ts` script sets `NEXT_PUBLIC_CODEBUFF_APP_URL=http://localhost:${webPort}` in the root `.env.development.local`. This means: - -- **CLI**: Will hit the local web server instead of production -- **SDK**: Will also use the local web server for API calls -- **SDK E2E Tests**: Require the local web server to be running - -**Running SDK E2E tests in a worktree:** - -```bash -# First, start the web server in one terminal -bun run --cwd web dev - -# Then run SDK E2E tests in another terminal -bun test sdk/e2e -``` - -If you need to run SDK tests against production instead, override the environment variable: - -```bash -NEXT_PUBLIC_CODEBUFF_APP_URL=https://codebuff.com bun test sdk/e2e -``` +## Database Migrations -### Bun Wrapper Script (`.bin/bun`) +Edit schema using Drizzle’s TS DSL (don’t hand-write migration SQL), then run the internal DB scripts to generate/apply migrations. -The wrapper's role is simple: ensure `.env.local` is synced from Infisical before running bun. - -- Checks `NEXT_PUBLIC_INFISICAL_UP` to prevent nested syncs -- Uses `INFISICAL_DISABLE_UPDATE_CHECK=true` for faster startup -- 10-second timeout on Infisical commands to prevent hangs -- Falls back gracefully if Infisical is not available - -### Release Process - -The release mechanism uses the `CODEBUFF_GITHUB_TOKEN` environment variable directly. - -Environment variables are defined and validated in `packages/internal/src/env.ts`. This module provides type-safe `env` objects for use throughout the monorepo. - -## Python Package - -A Python package skeleton exists in python-app. Currently a placeholder that suggests installing the npm version. - -## Project Templates - -Codebuff provides starter templates for initializing new projects: - -```bash -codebuff --create