diff --git a/services/cloud-agent-next/.dev.vars.example b/services/cloud-agent-next/.dev.vars.example index 71c76544be..325741ae5e 100644 --- a/services/cloud-agent-next/.dev.vars.example +++ b/services/cloud-agent-next/.dev.vars.example @@ -71,6 +71,10 @@ R2_ATTACHMENTS_BUCKET="" R2_ATTACHMENTS_READONLY_ACCESS_KEY_ID="" R2_ATTACHMENTS_READONLY_SECRET_ACCESS_KEY="" +# Optional: production R2 credentials for Cloudflare Sandbox directory backups +R2_ACCESS_KEY_ID="" +R2_SECRET_ACCESS_KEY="" + # Session ingest URL used by the wrapper to post session data. # Sandbox containers reach the host via `host.docker.internal`. # @url cloudflare-session-ingest diff --git a/services/cloud-agent-next/src/agent-sandbox/cloudflare/cloudflare-agent-sandbox.test.ts b/services/cloud-agent-next/src/agent-sandbox/cloudflare/cloudflare-agent-sandbox.test.ts index b456484588..377c8e9b75 100644 --- a/services/cloud-agent-next/src/agent-sandbox/cloudflare/cloudflare-agent-sandbox.test.ts +++ b/services/cloud-agent-next/src/agent-sandbox/cloudflare/cloudflare-agent-sandbox.test.ts @@ -1,22 +1,55 @@ -import { describe, expect, it, vi } from 'vitest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +const { mockLogWorkspaceBackupDisabled, mockLogWorkspaceBackupLifecycle } = vi.hoisted(() => ({ + mockLogWorkspaceBackupDisabled: vi.fn(), + mockLogWorkspaceBackupLifecycle: vi.fn(), +})); + +vi.mock('../../workspace-backup-observability.js', () => ({ + logWorkspaceBackupDisabled: mockLogWorkspaceBackupDisabled, + logWorkspaceBackupLifecycle: mockLogWorkspaceBackupLifecycle, +})); + import type { Env, SandboxInstance } from '../../types.js'; import type { SessionMetadata } from '../../persistence/session-metadata.js'; -import { WrapperClient } from '../../kilo/wrapper-client.js'; +import { WrapperClient, WrapperError } from '../../kilo/wrapper-client.js'; import { WRAPPER_VERSION } from '../../shared/wrapper-version.js'; import type { EnsureWrapperRequest } from '../protocol.js'; -import { CloudflareAgentSandbox } from './cloudflare-agent-sandbox.js'; +import { CloudflareAgentSandbox, deriveSetupEnvironment } from './cloudflare-agent-sandbox.js'; +import { buildWorkspaceBackupCandidate } from '../../workspace-backup-cache.js'; import { SandboxCapacityInspectionError, WorkspaceCapacityAdmissionRejectedError, + type WorkspaceFilesystemPreparationError, } from '../../workspace-errors.js'; vi.mock('@cloudflare/sandbox', () => ({ getSandbox: vi.fn() })); -function metadata(options?: { devcontainer?: boolean }): SessionMetadata { +afterEach(() => { + vi.clearAllMocks(); + vi.restoreAllMocks(); +}); + +function metadata(options?: { devcontainer?: boolean; withProfile?: boolean }): SessionMetadata { return { metadataSchemaVersion: 2, identity: { sessionId: 'agent_cloudflare', userId: 'user_cloudflare', orgId: 'org_cloudflare' }, auth: {}, + ...(options?.withProfile + ? { + profile: { + envVars: { CACHE_VARIANT: 'profile-env' }, + encryptedSecrets: { + API_TOKEN: { + encryptedData: 'encrypted-token', + encryptedDEK: 'encrypted-dek', + algorithm: 'rsa-aes-256-gcm', + version: 1, + }, + }, + }, + } + : {}), workspace: { sandboxId: options?.devcontainer ? 'dind-abcdef' : 'ses-abcdef', }, @@ -37,9 +70,13 @@ function metadata(options?: { devcontainer?: boolean }): SessionMetadata { function ensureRequest(options?: { devcontainer?: boolean; leased?: boolean; + cacheEligible?: boolean; }): EnsureWrapperRequest { const sandboxId = options?.devcontainer ? 'dind-abcdef' : 'ses-abcdef'; - const sessionMetadata = metadata(options); + const sessionMetadata = metadata({ + devcontainer: options?.devcontainer, + withProfile: options?.cacheEligible, + }); return { plan: { scope: { sessionId: 'agent_cloudflare', userId: 'user_cloudflare', orgId: 'org_cloudflare' }, @@ -71,10 +108,132 @@ function ensureRequest(options?: { kiloSessionId: 'kilo_cloudflare', }, context: { workspacePath: '/workspace/cloudflare' }, + ...(options?.cacheEligible + ? { + readyRequest: { + agentSessionId: 'agent_cloudflare', + userId: 'user_cloudflare', + orgId: 'org_cloudflare', + sandboxId, + kiloSessionId: 'kilo_cloudflare', + workspace: { + workspacePath: '/workspace/cloudflare', + sessionHome: '/home/agent_cloudflare', + branchName: 'session/agent_cloudflare', + }, + repo: { kind: 'github', repo: 'acme/repo' }, + materialized: { + env: { + CACHE_VARIANT: 'resolved-profile-env', + API_TOKEN: 'resolved-secret', + MATERIALIZED_TOKEN: 'runtime-token', + }, + setupCommands: ['pnpm install', 'node ./scripts/custom-setup.mjs --arbitrary'], + }, + session: { + ingestUrl: 'https://worker.example/ingest', + workerAuthToken: 'worker-token', + wrapperRunId: 'wr_cloudflare', + wrapperGeneration: 1, + wrapperConnectionId: 'conn_cloudflare', + }, + }, + } + : {}), }, }; } +describe('deriveSetupEnvironment', () => { + it('uses materialized plain values and persisted encrypted secret identities only', () => { + expect( + deriveSetupEnvironment( + { + envVars: { CACHE_VARIANT: 'profile-value' }, + encryptedSecrets: { + API_TOKEN: { + encryptedData: 'encrypted-token', + encryptedDEK: 'encrypted-dek', + algorithm: 'rsa-aes-256-gcm', + version: 1, + }, + }, + }, + { + CACHE_VARIANT: 'resolved-profile-value', + API_TOKEN: 'resolved-secret', + MATERIALIZED_TOKEN: 'unrelated-token', + } + ) + ).toEqual({ + variables: { CACHE_VARIANT: 'resolved-profile-value' }, + secretIdentities: { + API_TOKEN: + '{"algorithm":"rsa-aes-256-gcm","version":1,"encryptedData":"encrypted-token","encryptedDEK":"encrypted-dek"}', + }, + }); + }); + + it('uses encrypted secret identity when a key is also declared as a plain variable', async () => { + const plaintextSecret = 'resolved-plaintext-secret'; + const setupEnvironment = deriveSetupEnvironment( + { + envVars: { API_TOKEN: 'profile-plaintext' }, + encryptedSecrets: { + API_TOKEN: { + encryptedData: 'encrypted-token', + encryptedDEK: 'encrypted-dek', + algorithm: 'rsa-aes-256-gcm', + version: 1, + }, + }, + }, + { API_TOKEN: plaintextSecret } + ); + + expect(setupEnvironment).toEqual({ + variables: {}, + secretIdentities: { + API_TOKEN: + '{"algorithm":"rsa-aes-256-gcm","version":1,"encryptedData":"encrypted-token","encryptedDEK":"encrypted-dek"}', + }, + }); + expect(JSON.stringify(setupEnvironment)).not.toContain(plaintextSecret); + + if (!setupEnvironment) throw new Error('expected setup environment'); + const candidate = await buildWorkspaceBackupCandidate({ + fresh: true, + devcontainer: false, + setupCommands: ['npm ci'], + setupEnvironment, + userId: 'user_cloudflare', + repository: { type: 'github', repo: 'kilocode/example' }, + }); + + expect(candidate?.objectKey).toMatch(/^workspace-backups\/v1\/[a-f0-9]{64}\.json$/); + expect(candidate?.objectKey).not.toContain(plaintextSecret); + }); + + it('returns null when a declared plain runtime value is missing', () => { + expect( + deriveSetupEnvironment( + { + envVars: { CACHE_VARIANT: 'profile-value' }, + encryptedSecrets: { + API_TOKEN: { + encryptedData: 'encrypted-token', + encryptedDEK: 'encrypted-dek', + algorithm: 'rsa-aes-256-gcm', + version: 1, + }, + }, + }, + { API_TOKEN: 'resolved-secret' } + ) + ).toBeNull(); + }); +}); + describe('CloudflareAgentSandbox', () => { it('starts an ordinary bootstrap wrapper through the adapter', async () => { const bootstrapSession = {}; @@ -105,6 +264,714 @@ describe('CloudflareAgentSandbox', () => { ensureBootstrapWrapper.mockRestore(); }); + it('reports malformed worker URLs before degrading to a cold bootstrap', async () => { + const request = ensureRequest({ cacheEligible: true }); + const bucket = { get: vi.fn(), put: vi.fn() }; + const ensureSessionReady = vi.fn().mockResolvedValue({ kiloSessionId: 'kilo_cloudflare' }); + vi.spyOn(WrapperClient, 'ensureBootstrapWrapper').mockResolvedValueOnce({ + client: { ensureSessionReady } as unknown as WrapperClient, + }); + const createBackup = vi.fn(); + const sandbox = new CloudflareAgentSandbox( + { WORKER_URL: 'not a URL', BACKUP_BUCKET: bucket } as unknown as Env, + metadata(), + { + resolveSandbox: () => + ({ + exec: vi + .fn() + .mockResolvedValueOnce({ exitCode: 1, stdout: '', stderr: '' }) + .mockResolvedValueOnce({ + exitCode: 0, + stdout: '3145728000 10485760000\n', + stderr: '', + }), + createSession: vi.fn().mockResolvedValue({}), + createBackup, + }) as unknown as SandboxInstance, + } + ); + + await expect(sandbox.ensureWrapper(request)).resolves.toMatchObject({ + status: 'session-ready', + }); + expect(mockLogWorkspaceBackupDisabled).toHaveBeenCalledOnce(); + expect(mockLogWorkspaceBackupDisabled).toHaveBeenCalledWith('invalid_worker_url'); + expect(bucket.get).not.toHaveBeenCalled(); + expect(createBackup).not.toHaveBeenCalled(); + expect(bucket.put).not.toHaveBeenCalled(); + }); + + it('bypasses cache lookup and publication when a declared plain runtime value is missing', async () => { + const request = ensureRequest({ cacheEligible: true }); + const onProgress = vi.fn(); + request.onProgress = onProgress; + if (!request.prepared.readyRequest) throw new Error('expected ready request'); + delete request.prepared.readyRequest.materialized.env.CACHE_VARIANT; + const bucket = { get: vi.fn(), put: vi.fn() }; + const ensureSessionReady = vi.fn().mockResolvedValue({ kiloSessionId: 'kilo_cloudflare' }); + vi.spyOn(WrapperClient, 'ensureBootstrapWrapper').mockResolvedValueOnce({ + client: { ensureSessionReady } as unknown as WrapperClient, + }); + const createBackup = vi.fn(); + const sandbox = new CloudflareAgentSandbox( + { WORKER_URL: 'http://localhost:8787', BACKUP_BUCKET: bucket } as unknown as Env, + metadata(), + { + resolveSandbox: () => + ({ + exec: vi + .fn() + .mockResolvedValueOnce({ exitCode: 1, stdout: '', stderr: '' }) + .mockResolvedValueOnce({ + exitCode: 0, + stdout: '3145728000 10485760000\n', + stderr: '', + }), + createSession: vi.fn().mockResolvedValue({}), + createBackup, + }) as unknown as SandboxInstance, + } + ); + + await expect(sandbox.ensureWrapper(request)).resolves.toMatchObject({ + status: 'session-ready', + }); + expect(ensureSessionReady).toHaveBeenCalledWith(request.prepared.readyRequest); + expect(bucket.get).not.toHaveBeenCalled(); + expect(createBackup).not.toHaveBeenCalled(); + expect(bucket.put).not.toHaveBeenCalled(); + expect(onProgress).not.toHaveBeenCalledWith( + 'workspace_restore', + 'Restoring prepared workspace...' + ); + expect(onProgress).not.toHaveBeenCalledWith('workspace_backup', 'Saving prepared workspace...'); + expect(mockLogWorkspaceBackupLifecycle).not.toHaveBeenCalled(); + }); + + it('restores an organization candidate published for a different user', async () => { + const sourceCommit = 'a'.repeat(40); + const request = ensureRequest({ cacheEligible: true }); + const onProgress = vi.fn((step: string) => { + if (step === 'workspace_restore') throw new Error('progress listener unavailable'); + }); + request.onProgress = onProgress; + if (request.prepared.readyRequest) { + request.prepared.readyRequest.workspace.upstreamBranch = 'feature/branch-independent'; + } + const candidate = await buildWorkspaceBackupCandidate({ + fresh: true, + devcontainer: false, + setupCommands: ['pnpm install', 'node ./scripts/custom-setup.mjs --arbitrary'], + setupEnvironment: { + variables: { CACHE_VARIANT: 'resolved-profile-env' }, + secretIdentities: { + API_TOKEN: + '{"algorithm":"rsa-aes-256-gcm","version":1,"encryptedData":"encrypted-token","encryptedDEK":"encrypted-dek"}', + }, + }, + userId: 'different_org_member', + orgId: 'org_cloudflare', + repository: { type: 'github', repo: 'acme/repo' }, + }); + if (!candidate) throw new Error('expected eligible candidate'); + const bucket = { + get: vi.fn().mockResolvedValue({ + json: vi.fn().mockResolvedValue({ + schema: 'workspace-backup-v1', + digest: candidate.digest, + owner: { type: 'organization', organizationId: 'org_cloudflare' }, + sourceCommit, + createdAt: Date.now() - 1_000, + expiresAt: Date.now() + 10_000, + backup: { id: 'backup-1', dir: '/workspace/source' }, + }), + }), + put: vi.fn(), + }; + const activeOrigin = 'https://token@github.com/acme/repo.git'; + const bootstrapSession = { + exec: vi + .fn() + .mockResolvedValueOnce({ exitCode: 0, stdout: `${sourceCommit}\n` }) + .mockResolvedValueOnce({ exitCode: 0, stdout: `${activeOrigin}\n` }) + .mockResolvedValue({ exitCode: 0, stdout: '' }), + }; + const ensureSessionReady = vi.fn().mockResolvedValue({ + kiloSessionId: 'kilo_cloudflare', + workspaceReady: { branchName: 'restored-branch' }, + }); + vi.spyOn(WrapperClient, 'ensureBootstrapWrapper').mockResolvedValueOnce({ + client: { ensureSessionReady } as unknown as WrapperClient, + }); + const exec = vi.fn(async (command: string) => { + if (command.endsWith('&& echo exists')) { + return { exitCode: 1, stdout: '', stderr: '' }; + } + return { exitCode: 0, stdout: `${sourceCommit}\n`, stderr: '' }; + }); + const sandboxApi = { + exec, + createSession: vi.fn().mockResolvedValue(bootstrapSession), + restoreBackup: vi.fn().mockResolvedValue(undefined), + createBackup: vi.fn().mockResolvedValue({ + id: 'backup-republished-hit', + dir: '/workspace/cloudflare', + }), + }; + const sandbox = new CloudflareAgentSandbox( + { + WORKER_URL: 'http://localhost:8787', + R2_BUCKET: bucket, + BACKUP_BUCKET: bucket, + } as unknown as Env, + metadata(), + { resolveSandbox: () => sandboxApi as unknown as SandboxInstance } + ); + + await expect(sandbox.ensureWrapper(request)).resolves.toMatchObject({ + status: 'session-ready', + ready: { branchName: 'restored-branch' }, + }); + expect(sandboxApi.restoreBackup).toHaveBeenCalledWith({ + id: 'backup-1', + dir: '/workspace/cloudflare', + }); + expect(ensureSessionReady).toHaveBeenCalledWith( + expect.objectContaining({ + workspace: expect.objectContaining({ + upstreamBranch: 'feature/branch-independent', + restoredFromBackup: true, + }), + materialized: { + env: { + CACHE_VARIANT: 'resolved-profile-env', + API_TOKEN: 'resolved-secret', + MATERIALIZED_TOKEN: 'runtime-token', + }, + setupCommands: ['pnpm install', 'node ./scripts/custom-setup.mjs --arbitrary'], + }, + }) + ); + expect(ensureSessionReady.mock.calls[0]?.[0]).not.toHaveProperty('setupCache'); + const rmInvocation = exec.mock.invocationCallOrder.find( + (_, index) => exec.mock.calls[index]?.[0] === "rm -rf -- '/workspace/cloudflare'" + ); + const mkdirInvocation = exec.mock.invocationCallOrder.find( + (_, index) => exec.mock.calls[index]?.[0] === "mkdir -p -- '/workspace'" + ); + const restoreInvocation = sandboxApi.restoreBackup.mock.invocationCallOrder[0]; + expect(rmInvocation).toBeDefined(); + expect(mkdirInvocation).toBeDefined(); + expect(restoreInvocation).toBeDefined(); + expect(rmInvocation).toBeLessThan(mkdirInvocation ?? 0); + expect(mkdirInvocation).toBeLessThan(restoreInvocation ?? 0); + expect(exec).toHaveBeenCalledWith( + expect.stringContaining(`rev-parse --verify HEAD)" = '${sourceCommit}'`) + ); + expect(sandboxApi.createBackup).not.toHaveBeenCalled(); + expect(bucket.put).not.toHaveBeenCalled(); + expect(onProgress).toHaveBeenCalledWith('workspace_restore', 'Restoring prepared workspace...'); + expect(onProgress).not.toHaveBeenCalledWith('workspace_backup', 'Saving prepared workspace...'); + expect(mockLogWorkspaceBackupLifecycle).toHaveBeenNthCalledWith(1, { + operation: 'restore', + outcome: 'started', + }); + expect(mockLogWorkspaceBackupLifecycle).toHaveBeenNthCalledWith(2, { + operation: 'restore', + outcome: 'completed', + durationMs: expect.any(Number), + }); + expect(mockLogWorkspaceBackupLifecycle).toHaveBeenCalledTimes(2); + vi.restoreAllMocks(); + }); + + it('rejects restore when the workspace cannot be removed first', async () => { + const sourceCommit = 'b'.repeat(40); + const request = ensureRequest({ cacheEligible: true }); + const candidate = await buildWorkspaceBackupCandidate({ + fresh: true, + devcontainer: false, + setupCommands: ['pnpm install', 'node ./scripts/custom-setup.mjs --arbitrary'], + setupEnvironment: { + variables: { CACHE_VARIANT: 'resolved-profile-env' }, + secretIdentities: { + API_TOKEN: + '{"algorithm":"rsa-aes-256-gcm","version":1,"encryptedData":"encrypted-token","encryptedDEK":"encrypted-dek"}', + }, + }, + userId: 'user_cloudflare', + orgId: 'org_cloudflare', + repository: { type: 'github', repo: 'acme/repo' }, + }); + if (!candidate) throw new Error('expected eligible candidate'); + const bucket = { + get: vi.fn().mockResolvedValue({ + json: vi.fn().mockResolvedValue({ + schema: 'workspace-backup-v1', + digest: candidate.digest, + owner: { type: 'organization', organizationId: 'org_cloudflare' }, + sourceCommit, + createdAt: Date.now() - 1_000, + expiresAt: Date.now() + 10_000, + backup: { id: 'backup-1', dir: '/workspace/source' }, + }), + }), + put: vi.fn(), + }; + const createSession = vi.fn(); + const restoreBackup = vi.fn(); + const sandbox = new CloudflareAgentSandbox( + { WORKER_URL: 'http://localhost:8787', BACKUP_BUCKET: bucket } as unknown as Env, + metadata(), + { + resolveSandbox: () => + ({ + exec: vi + .fn() + .mockResolvedValueOnce({ exitCode: 1, stdout: '', stderr: '' }) + .mockResolvedValue({ exitCode: 1, stdout: '', stderr: 'rm failed' }), + createSession, + restoreBackup, + }) as unknown as SandboxInstance, + } + ); + + await expect(sandbox.ensureWrapper(request)).rejects.toMatchObject({ + name: 'WorkspaceFilesystemPreparationError', + target: 'workspace_directory', + } satisfies Partial); + expect(restoreBackup).not.toHaveBeenCalled(); + expect(createSession).not.toHaveBeenCalled(); + expect(mockLogWorkspaceBackupLifecycle).toHaveBeenNthCalledWith(1, { + operation: 'restore', + outcome: 'started', + }); + expect(mockLogWorkspaceBackupLifecycle).toHaveBeenNthCalledWith(2, { + operation: 'restore', + outcome: 'failed', + durationMs: expect.any(Number), + failureCategory: 'workspace_cleanup_failed', + }); + }); + + it('rejects restore when the workspace parent cannot be created', async () => { + const sourceCommit = 'b'.repeat(40); + const request = ensureRequest({ cacheEligible: true }); + const candidate = await buildWorkspaceBackupCandidate({ + fresh: true, + devcontainer: false, + setupCommands: ['pnpm install', 'node ./scripts/custom-setup.mjs --arbitrary'], + setupEnvironment: { + variables: { CACHE_VARIANT: 'resolved-profile-env' }, + secretIdentities: { + API_TOKEN: + '{"algorithm":"rsa-aes-256-gcm","version":1,"encryptedData":"encrypted-token","encryptedDEK":"encrypted-dek"}', + }, + }, + userId: 'user_cloudflare', + orgId: 'org_cloudflare', + repository: { type: 'github', repo: 'acme/repo' }, + }); + if (!candidate) throw new Error('expected eligible candidate'); + const bucket = { + get: vi.fn().mockResolvedValue({ + json: vi.fn().mockResolvedValue({ + schema: 'workspace-backup-v1', + digest: candidate.digest, + owner: { type: 'organization', organizationId: 'org_cloudflare' }, + sourceCommit, + createdAt: Date.now() - 1_000, + expiresAt: Date.now() + 10_000, + backup: { id: 'backup-1', dir: '/workspace/source' }, + }), + }), + put: vi.fn(), + }; + const createSession = vi.fn(); + const restoreBackup = vi.fn(); + const exec = vi.fn(async (command: string) => { + if (command.startsWith('test -d')) return { exitCode: 1, stdout: '', stderr: '' }; + if (command === "mkdir -p -- '/workspace'") { + return { exitCode: 1, stdout: '', stderr: 'mkdir failed' }; + } + return { exitCode: 0, stdout: '', stderr: '' }; + }); + const sandbox = new CloudflareAgentSandbox( + { WORKER_URL: 'http://localhost:8787', BACKUP_BUCKET: bucket } as unknown as Env, + metadata(), + { + resolveSandbox: () => + ({ exec, createSession, restoreBackup }) as unknown as SandboxInstance, + } + ); + + await expect(sandbox.ensureWrapper(request)).rejects.toMatchObject({ + name: 'WorkspaceFilesystemPreparationError', + target: 'workspace_directory', + message: 'Failed to create workspace parent directory: mkdir failed', + } satisfies Partial); + expect(exec).toHaveBeenCalledWith("rm -rf -- '/workspace/cloudflare'"); + expect(exec).toHaveBeenCalledWith("mkdir -p -- '/workspace'"); + expect(restoreBackup).not.toHaveBeenCalled(); + expect(createSession).not.toHaveBeenCalled(); + expect(mockLogWorkspaceBackupLifecycle).toHaveBeenNthCalledWith(1, { + operation: 'restore', + outcome: 'started', + }); + expect(mockLogWorkspaceBackupLifecycle).toHaveBeenNthCalledWith(2, { + operation: 'restore', + outcome: 'failed', + durationMs: expect.any(Number), + failureCategory: 'workspace_parent_prepare_failed', + }); + }); + + it('falls cold after restore rejection, succeeds setup, and publishes exactly once', async () => { + const sourceCommit = 'd'.repeat(40); + const request = ensureRequest({ cacheEligible: true }); + const candidate = await buildWorkspaceBackupCandidate({ + fresh: true, + devcontainer: false, + setupCommands: ['pnpm install', 'node ./scripts/custom-setup.mjs --arbitrary'], + setupEnvironment: { + variables: { CACHE_VARIANT: 'resolved-profile-env' }, + secretIdentities: { + API_TOKEN: + '{"algorithm":"rsa-aes-256-gcm","version":1,"encryptedData":"encrypted-token","encryptedDEK":"encrypted-dek"}', + }, + }, + userId: 'user_cloudflare', + orgId: 'org_cloudflare', + repository: { type: 'github', repo: 'acme/repo' }, + }); + if (!candidate) throw new Error('expected eligible candidate'); + const bucket = { + get: vi.fn().mockResolvedValue({ + json: vi.fn().mockResolvedValue({ + schema: 'workspace-backup-v1', + digest: candidate.digest, + owner: { type: 'organization', organizationId: 'org_cloudflare' }, + sourceCommit, + createdAt: Date.now() - 1_000, + expiresAt: Date.now() + 10_000, + backup: { id: 'backup-1', dir: '/workspace/source' }, + }), + }), + put: vi.fn(), + }; + const ensureSessionReady = vi.fn().mockResolvedValue({ kiloSessionId: 'kilo_cloudflare' }); + vi.spyOn(WrapperClient, 'ensureBootstrapWrapper').mockResolvedValueOnce({ + client: { ensureSessionReady } as unknown as WrapperClient, + }); + const activeOrigin = 'https://token@github.com/acme/repo.git'; + const bootstrapSession = { + exec: vi + .fn() + .mockResolvedValueOnce({ exitCode: 0, stdout: `${sourceCommit}\n` }) + .mockResolvedValueOnce({ exitCode: 0, stdout: `${activeOrigin}\n` }) + .mockResolvedValue({ exitCode: 0, stdout: '' }), + }; + const exec = vi + .fn() + .mockResolvedValueOnce({ exitCode: 1, stdout: '', stderr: '' }) + .mockResolvedValueOnce({ exitCode: 0, stdout: '', stderr: '' }) + .mockResolvedValueOnce({ exitCode: 0, stdout: '', stderr: '' }) + .mockResolvedValueOnce({ exitCode: 0, stdout: '', stderr: '' }) + .mockResolvedValueOnce({ exitCode: 0, stdout: '3145728000 10485760000\n', stderr: '' }); + const sandboxApi = { + exec, + restoreBackup: vi.fn().mockRejectedValue(new Error('backup unavailable')), + createSession: vi.fn().mockResolvedValue(bootstrapSession), + createBackup: vi.fn().mockResolvedValue({ + id: 'backup-republished', + dir: '/workspace/cloudflare', + }), + }; + const sandbox = new CloudflareAgentSandbox( + { + WORKER_URL: 'http://localhost:8787', + BACKUP_BUCKET: bucket, + } as unknown as Env, + metadata(), + { resolveSandbox: () => sandboxApi as unknown as SandboxInstance } + ); + + await expect(sandbox.ensureWrapper(request)).resolves.toMatchObject({ + status: 'session-ready', + }); + expect(ensureSessionReady).toHaveBeenCalledOnce(); + expect(ensureSessionReady).toHaveBeenCalledWith(request.prepared.readyRequest); + expect(exec.mock.calls.filter(([command]) => command.includes('rm -rf'))).toHaveLength(2); + expect(sandboxApi.createBackup).toHaveBeenCalledWith({ + dir: '/workspace/cloudflare', + ttl: 86_400, + localBucket: true, + }); + expect(bucket.put).toHaveBeenCalledOnce(); + expect(mockLogWorkspaceBackupLifecycle.mock.calls).toEqual([ + [{ operation: 'restore', outcome: 'started' }], + [ + { + operation: 'restore', + outcome: 'failed', + durationMs: expect.any(Number), + failureCategory: 'backup_restore_failed', + }, + ], + [{ operation: 'create', outcome: 'started' }], + [ + { + operation: 'create', + outcome: 'completed', + durationMs: expect.any(Number), + }, + ], + ]); + vi.restoreAllMocks(); + }); + + it('does not retry a restored workspace when setup fails', async () => { + const sourceCommit = 'c'.repeat(40); + const request = ensureRequest({ cacheEligible: true }); + const candidate = await buildWorkspaceBackupCandidate({ + fresh: true, + devcontainer: false, + setupCommands: ['pnpm install', 'node ./scripts/custom-setup.mjs --arbitrary'], + setupEnvironment: { + variables: { CACHE_VARIANT: 'resolved-profile-env' }, + secretIdentities: { + API_TOKEN: + '{"algorithm":"rsa-aes-256-gcm","version":1,"encryptedData":"encrypted-token","encryptedDEK":"encrypted-dek"}', + }, + }, + userId: 'user_cloudflare', + orgId: 'org_cloudflare', + repository: { type: 'github', repo: 'acme/repo' }, + }); + if (!candidate) throw new Error('expected eligible candidate'); + const bucket = { + get: vi.fn().mockResolvedValue({ + json: vi.fn().mockResolvedValue({ + schema: 'workspace-backup-v1', + digest: candidate.digest, + owner: { type: 'organization', organizationId: 'org_cloudflare' }, + sourceCommit, + createdAt: Date.now() - 1_000, + expiresAt: Date.now() + 10_000, + backup: { id: 'backup-1', dir: '/workspace/source' }, + }), + }), + put: vi.fn(), + }; + const setupError = new WrapperError('restored setup failed', 'WORKSPACE_SETUP_FAILED', 503); + const ensureSessionReady = vi.fn().mockRejectedValue(setupError); + vi.spyOn(WrapperClient, 'ensureBootstrapWrapper').mockResolvedValueOnce({ + client: { ensureSessionReady } as unknown as WrapperClient, + }); + const exec = vi + .fn() + .mockResolvedValueOnce({ exitCode: 1, stdout: '', stderr: '' }) + .mockResolvedValueOnce({ exitCode: 0, stdout: '', stderr: '' }) + .mockResolvedValueOnce({ exitCode: 0, stdout: '', stderr: '' }) + .mockResolvedValueOnce({ exitCode: 0, stdout: `${sourceCommit}\n`, stderr: '' }) + .mockResolvedValue({ exitCode: 1, stdout: '', stderr: 'rm failed' }); + const sandbox = new CloudflareAgentSandbox( + { WORKER_URL: 'http://localhost:8787', BACKUP_BUCKET: bucket } as unknown as Env, + metadata(), + { + resolveSandbox: () => + ({ + exec, + restoreBackup: vi.fn(), + createSession: vi.fn().mockResolvedValue({ exec: vi.fn() }), + }) as unknown as SandboxInstance, + } + ); + + await expect(sandbox.ensureWrapper(request)).rejects.toBe(setupError); + expect(ensureSessionReady).toHaveBeenCalledOnce(); + expect(exec.mock.calls.filter(([command]) => command.includes('rm -rf'))).toHaveLength(1); + vi.restoreAllMocks(); + }); + + it('keeps index publication failure nonfatal after restoring the authenticated origin', async () => { + const request = ensureRequest({ cacheEligible: true }); + const bucket = { + get: vi.fn().mockResolvedValue(null), + put: vi.fn().mockRejectedValue(new Error('index unavailable')), + }; + const activeOrigin = 'https://token@github.com/acme/repo.git'; + const bootstrapSession = { + exec: vi + .fn() + .mockResolvedValueOnce({ exitCode: 0, stdout: `${'e'.repeat(40)}\n` }) + .mockResolvedValueOnce({ exitCode: 0, stdout: `${activeOrigin}\n` }) + .mockResolvedValueOnce({ exitCode: 0, stdout: '' }) + .mockResolvedValueOnce({ exitCode: 0, stdout: '' }), + }; + const ensureSessionReady = vi.fn().mockResolvedValue({ kiloSessionId: 'kilo_cloudflare' }); + vi.spyOn(WrapperClient, 'ensureBootstrapWrapper').mockResolvedValueOnce({ + client: { ensureSessionReady } as unknown as WrapperClient, + }); + const sandboxApi = { + exec: vi + .fn() + .mockResolvedValueOnce({ exitCode: 1, stdout: '', stderr: '' }) + .mockResolvedValueOnce({ exitCode: 0, stdout: '3145728000 10485760000\n', stderr: '' }), + createSession: vi.fn().mockResolvedValue(bootstrapSession), + createBackup: vi.fn().mockResolvedValue({ id: 'backup-new', dir: '/workspace/cloudflare' }), + }; + const sandbox = new CloudflareAgentSandbox( + { WORKER_URL: 'http://localhost:8787', BACKUP_BUCKET: bucket } as unknown as Env, + metadata(), + { resolveSandbox: () => sandboxApi as unknown as SandboxInstance } + ); + + await expect(sandbox.ensureWrapper(request)).resolves.toMatchObject({ + status: 'session-ready', + }); + expect(ensureSessionReady).toHaveBeenCalledWith(request.prepared.readyRequest); + expect(bootstrapSession.exec).toHaveBeenNthCalledWith( + 3, + expect.stringContaining("remote set-url origin 'https://github.com/acme/repo.git'") + ); + expect(bootstrapSession.exec).toHaveBeenNthCalledWith( + 4, + expect.stringContaining(`remote set-url origin '${activeOrigin}'`) + ); + expect(sandboxApi.createBackup).toHaveBeenCalledWith({ + dir: '/workspace/cloudflare', + ttl: 86_400, + localBucket: true, + }); + expect(bucket.put).toHaveBeenCalledOnce(); + expect(mockLogWorkspaceBackupLifecycle).toHaveBeenNthCalledWith(1, { + operation: 'create', + outcome: 'started', + }); + expect(mockLogWorkspaceBackupLifecycle).toHaveBeenNthCalledWith(2, { + operation: 'create', + outcome: 'failed', + durationMs: expect.any(Number), + failureCategory: 'index_write_failed', + }); + vi.restoreAllMocks(); + }); + + it('fails recoverably after two authenticated-origin restoration failures without writing an index', async () => { + const request = ensureRequest({ cacheEligible: true }); + const bucket = { get: vi.fn().mockResolvedValue(null), put: vi.fn() }; + const bootstrapSession = { + exec: vi + .fn() + .mockResolvedValueOnce({ exitCode: 0, stdout: `${'f'.repeat(40)}\n` }) + .mockResolvedValueOnce({ exitCode: 0, stdout: 'https://token@github.com/acme/repo.git\n' }) + .mockResolvedValueOnce({ exitCode: 0, stdout: '' }) + .mockResolvedValueOnce({ exitCode: 1, stdout: '', stderr: 'restore failed' }) + .mockResolvedValueOnce({ exitCode: 1, stdout: '', stderr: 'restore failed again' }), + }; + const ensureSessionReady = vi.fn().mockResolvedValue({ kiloSessionId: 'kilo_cloudflare' }); + vi.spyOn(WrapperClient, 'ensureBootstrapWrapper').mockResolvedValueOnce({ + client: { ensureSessionReady } as unknown as WrapperClient, + }); + const sandboxApi = { + exec: vi + .fn() + .mockResolvedValueOnce({ exitCode: 1, stdout: '', stderr: '' }) + .mockResolvedValueOnce({ exitCode: 0, stdout: '3145728000 10485760000\n', stderr: '' }), + createSession: vi.fn().mockResolvedValue(bootstrapSession), + createBackup: vi.fn().mockResolvedValue({ id: 'backup-new', dir: '/workspace/cloudflare' }), + }; + const sandbox = new CloudflareAgentSandbox( + { WORKER_URL: 'http://localhost:8787', BACKUP_BUCKET: bucket } as unknown as Env, + metadata(), + { resolveSandbox: () => sandboxApi as unknown as SandboxInstance } + ); + + const preparation = sandbox.ensureWrapper(request); + await expect(preparation).rejects.toMatchObject({ + name: 'WorkspaceFilesystemPreparationError', + target: 'workspace_directory', + message: 'Failed to restore workspace repository authentication', + cause: expect.objectContaining({ + message: 'Authenticated workspace origin restoration failed after two attempts', + }), + } satisfies Partial); + await expect(preparation).rejects.not.toThrow('token'); + expect(ensureSessionReady).toHaveBeenCalledOnce(); + expect(bootstrapSession.exec).toHaveBeenCalledTimes(5); + expect(bucket.put).not.toHaveBeenCalled(); + expect(mockLogWorkspaceBackupLifecycle).toHaveBeenNthCalledWith(1, { + operation: 'create', + outcome: 'started', + }); + expect(mockLogWorkspaceBackupLifecycle).toHaveBeenNthCalledWith(2, { + operation: 'create', + outcome: 'failed', + durationMs: expect.any(Number), + failureCategory: 'authenticated_origin_restore_failed', + }); + vi.restoreAllMocks(); + }); + + it('keeps createBackup failure nonfatal and does not write an index', async () => { + const request = ensureRequest({ cacheEligible: true }); + const onProgress = vi.fn((step: string) => { + if (step === 'workspace_backup') throw new Error('progress listener unavailable'); + }); + request.onProgress = onProgress; + const bucket = { get: vi.fn().mockResolvedValue(null), put: vi.fn() }; + const bootstrapSession = { + exec: vi + .fn() + .mockResolvedValueOnce({ exitCode: 0, stdout: `${'f'.repeat(40)}\n` }) + .mockResolvedValueOnce({ exitCode: 0, stdout: 'https://token@github.com/acme/repo.git\n' }) + .mockResolvedValue({ exitCode: 0, stdout: '' }), + }; + vi.spyOn(WrapperClient, 'ensureBootstrapWrapper').mockResolvedValueOnce({ + client: { + ensureSessionReady: vi.fn().mockResolvedValue({ kiloSessionId: 'kilo_cloudflare' }), + } as unknown as WrapperClient, + }); + const sandboxApi = { + exec: vi + .fn() + .mockResolvedValueOnce({ exitCode: 1, stdout: '', stderr: '' }) + .mockResolvedValueOnce({ exitCode: 0, stdout: '3145728000 10485760000\n', stderr: '' }), + createSession: vi.fn().mockResolvedValue(bootstrapSession), + createBackup: vi.fn().mockRejectedValue(new Error('backup unavailable')), + }; + const sandbox = new CloudflareAgentSandbox( + { WORKER_URL: 'http://localhost:8787', BACKUP_BUCKET: bucket } as unknown as Env, + metadata(), + { resolveSandbox: () => sandboxApi as unknown as SandboxInstance } + ); + + await expect(sandbox.ensureWrapper(request)).resolves.toMatchObject({ + status: 'session-ready', + }); + expect(bucket.put).not.toHaveBeenCalled(); + expect(bootstrapSession.exec).toHaveBeenCalledTimes(4); + expect(onProgress).toHaveBeenCalledWith('workspace_backup', 'Saving prepared workspace...'); + expect(onProgress).not.toHaveBeenCalledWith( + 'workspace_restore', + 'Restoring prepared workspace...' + ); + expect(mockLogWorkspaceBackupLifecycle).toHaveBeenNthCalledWith(1, { + operation: 'create', + outcome: 'started', + }); + expect(mockLogWorkspaceBackupLifecycle).toHaveBeenNthCalledWith(2, { + operation: 'create', + outcome: 'failed', + durationMs: expect.any(Number), + failureCategory: 'backup_create_failed', + }); + expect(mockLogWorkspaceBackupLifecycle).toHaveBeenCalledTimes(2); + vi.restoreAllMocks(); + }); + it('types ENOSPC during the cold bootstrap probe as sandbox unusable', async () => { const createSession = vi.fn(); const sandbox = new CloudflareAgentSandbox({} as Env, metadata(), { diff --git a/services/cloud-agent-next/src/agent-sandbox/cloudflare/cloudflare-agent-sandbox.ts b/services/cloud-agent-next/src/agent-sandbox/cloudflare/cloudflare-agent-sandbox.ts index 80ade47f2b..c1e5939fcf 100644 --- a/services/cloud-agent-next/src/agent-sandbox/cloudflare/cloudflare-agent-sandbox.ts +++ b/services/cloud-agent-next/src/agent-sandbox/cloudflare/cloudflare-agent-sandbox.ts @@ -16,10 +16,11 @@ import type { import type { SessionMetadata } from '../../persistence/session-metadata.js'; import type { SandboxDeleteReason, WrapperStopReason } from '../protocol.js'; import { getSandbox } from '@cloudflare/sandbox'; +import { posix } from 'node:path'; import { SANDBOX_SLEEP_AFTER_SECONDS } from '../../core/lease.js'; import { generateSandboxId, getSandboxNamespace } from '../../sandbox-id.js'; import { SessionService } from '../../session-service.js'; -import { WrapperClient, WrapperContainerClient } from '../../kilo/wrapper-client.js'; +import { WrapperClient, WrapperContainerClient, WrapperError } from '../../kilo/wrapper-client.js'; import { discoverSessionWrappers, findWrapperForSession, @@ -40,14 +41,78 @@ import { SANDBOX_WORKSPACE_PROBE_TIMEOUT_MESSAGE } from '../../sandbox-recovery. import { withTimeout } from '@kilocode/worker-utils'; import { WRAPPER_VERSION } from '../../shared/wrapper-version.js'; import { ExecutionError } from '../../execution/errors.js'; +import { readProfileBundle, type SessionProfileBundle } from '../../session-profile.js'; +import { + logWorkspaceBackupDisabled, + logWorkspaceBackupLifecycle, + type WorkspaceBackupFailureCategory, +} from '../../workspace-backup-observability.js'; +import { + buildWorkspaceBackupCandidate, + createWorkspaceBackupRecord, + loadWorkspaceBackupRecord, + storeWorkspaceBackupRecord, + WORKSPACE_BACKUP_TTL_MS, + type WorkspaceBackupCandidate, +} from '../../workspace-backup-cache.js'; import { isSandboxFilesystemUnusableError, SandboxCapacityInspectionError, + WorkspaceFilesystemPreparationError, } from '../../workspace-errors.js'; const PREPARE_WORKSPACE_TIMEOUT_MS = 10 * 60 * 1000; const DEFAULT_STOP_OBSERVATION_DELAYS_MS = [100, 500, 1_000]; +function shellQuote(value: string): string { + return `'${value.replaceAll("'", `'"'"'`)}'`; +} + +function elapsedMs(startedAt: number): number { + return Math.max(0, Date.now() - startedAt); +} + +function reportWorkspaceBackupProgress( + onProgress: EnsureWrapperRequest['onProgress'], + step: 'workspace_restore' | 'workspace_backup', + message: string +): void { + try { + onProgress?.(step, message); + } catch { + return; + } +} + +export function deriveSetupEnvironment( + profile: Pick, + materializedEnvironment: Record +): { + variables: Record; + secretIdentities: Record; +} | null { + const encryptedSecrets = profile.encryptedSecrets ?? {}; + const variables: Record = {}; + for (const key of Object.keys(profile.envVars ?? {})) { + if (Object.hasOwn(encryptedSecrets, key)) continue; + if (!Object.hasOwn(materializedEnvironment, key)) return null; + const value = materializedEnvironment[key]; + if (value === undefined) return null; + variables[key] = value; + } + + const secretIdentities: Record = {}; + for (const [key, envelope] of Object.entries(encryptedSecrets)) { + secretIdentities[key] = JSON.stringify({ + algorithm: envelope.algorithm, + version: envelope.version, + encryptedData: envelope.encryptedData, + encryptedDEK: envelope.encryptedDEK, + }); + } + return { variables, secretIdentities }; +} + function withWorkspacePreparationTimeout(operation: Promise, step: string): Promise { return withTimeout( operation, @@ -174,6 +239,251 @@ export class CloudflareAgentSandbox implements AgentSandbox { return this.usesDevcontainerRuntime() ? sessionId : `${sessionId}-bootstrap`; } + private backupMode(): { bucket: R2Bucket; localBucket: boolean } | null { + const bucket = this.env.BACKUP_BUCKET; + if (!bucket) return null; + try { + const hostname = new URL(this.env.WORKER_URL ?? '').hostname; + if ( + hostname === 'localhost' || + hostname === '127.0.0.1' || + hostname === 'host.docker.internal' + ) { + return { bucket, localBucket: true }; + } + } catch { + logWorkspaceBackupDisabled('invalid_worker_url'); + return null; + } + if ( + !this.env.BACKUP_BUCKET_NAME || + !this.env.CLOUDFLARE_R2_ACCOUNT_ID || + !this.env.R2_ACCESS_KEY_ID || + !this.env.R2_SECRET_ACCESS_KEY + ) { + return null; + } + return { bucket, localBucket: false }; + } + + private async buildBackupCandidate(request: EnsureWrapperRequest) { + const readyRequest = request.prepared.readyRequest; + if (!readyRequest) return null; + const profile = readProfileBundle(request.plan.workspace.metadata); + const setupEnvironment = deriveSetupEnvironment(profile, readyRequest.materialized.env); + if (setupEnvironment === null) return null; + const repo = readyRequest.repo; + return buildWorkspaceBackupCandidate({ + fresh: request.plan.workspace.metadata.lifecycle.preparedAt === undefined, + devcontainer: readyRequest.devcontainer?.requested === true, + setupCommands: readyRequest.materialized.setupCommands, + setupEnvironment, + userId: request.plan.scope.userId, + orgId: request.plan.scope.orgId, + repository: + repo?.kind === 'github' + ? { type: 'github', repo: repo.repo } + : repo + ? { type: repo.platform === 'gitlab' ? 'gitlab' : 'git', url: repo.url } + : undefined, + shallow: repo?.shallow, + }); + } + + private async cleanWorkspaceTarget( + sandbox: SandboxInstance, + workspacePath: string + ): Promise { + const removal = await sandbox.exec(`rm -rf -- ${shellQuote(workspacePath)}`); + if (removal.exitCode !== 0) { + throw new WorkspaceFilesystemPreparationError( + 'workspace_directory', + `Failed to remove workspace directory: ${removal.stderr || `exit code ${removal.exitCode}`}`, + removal + ); + } + } + + private async prepareWorkspaceRestoreParent( + sandbox: SandboxInstance, + workspacePath: string + ): Promise { + const parentPath = posix.dirname(workspacePath); + const creation = await sandbox.exec(`mkdir -p -- ${shellQuote(parentPath)}`); + if (creation.exitCode !== 0) { + throw new WorkspaceFilesystemPreparationError( + 'workspace_directory', + `Failed to create workspace parent directory: ${creation.stderr || `exit code ${creation.exitCode}`}`, + creation + ); + } + } + + private async restoreWorkspaceBackup( + sandbox: SandboxInstance, + workspacePath: string, + candidate: WorkspaceBackupCandidate, + bucket: R2Bucket, + onProgress?: EnsureWrapperRequest['onProgress'] + ): Promise { + const record = await loadWorkspaceBackupRecord(bucket, candidate); + if (!record) return undefined; + reportWorkspaceBackupProgress( + onProgress, + 'workspace_restore', + 'Restoring prepared workspace...' + ); + const startedAt = Date.now(); + logWorkspaceBackupLifecycle({ operation: 'restore', outcome: 'started' }); + let failureCategory: WorkspaceBackupFailureCategory = 'workspace_cleanup_failed'; + try { + await this.cleanWorkspaceTarget(sandbox, workspacePath); + failureCategory = 'workspace_parent_prepare_failed'; + await this.prepareWorkspaceRestoreParent(sandbox, workspacePath); + failureCategory = 'backup_restore_failed'; + await sandbox.restoreBackup({ ...record.backup, dir: workspacePath }); + failureCategory = 'backup_validation_failed'; + const validation = await sandbox.exec( + `test -d ${shellQuote(`${workspacePath}/.git`)} && test "$(git -C ${shellQuote(workspacePath)} rev-parse --verify HEAD)" = ${shellQuote(record.sourceCommit)} && test "$(git -C ${shellQuote(workspacePath)} remote get-url origin)" = ${shellQuote(candidate.canonicalRepository)}` + ); + if (validation.exitCode !== 0) throw new Error('restored workspace validation failed'); + logWorkspaceBackupLifecycle({ + operation: 'restore', + outcome: 'completed', + durationMs: elapsedMs(startedAt), + }); + return record.sourceCommit; + } catch (error) { + if (error instanceof WorkspaceFilesystemPreparationError) { + logWorkspaceBackupLifecycle({ + operation: 'restore', + outcome: 'failed', + durationMs: elapsedMs(startedAt), + failureCategory, + }); + throw error; + } + try { + await this.cleanWorkspaceTarget(sandbox, workspacePath); + } catch (cleanupError) { + logWorkspaceBackupLifecycle({ + operation: 'restore', + outcome: 'failed', + durationMs: elapsedMs(startedAt), + failureCategory: 'fallback_cleanup_failed', + }); + throw cleanupError; + } + logWorkspaceBackupLifecycle({ + operation: 'restore', + outcome: 'failed', + durationMs: elapsedMs(startedAt), + failureCategory, + }); + return undefined; + } + } + + private async publishWorkspaceBackup(options: { + sandbox: SandboxInstance; + bootstrapSession: Awaited>; + workspacePath: string; + candidate: WorkspaceBackupCandidate; + bucket: R2Bucket; + localBucket: boolean; + onProgress?: EnsureWrapperRequest['onProgress']; + }): Promise { + const { sandbox, bootstrapSession, workspacePath, candidate, bucket, localBucket, onProgress } = + options; + reportWorkspaceBackupProgress(onProgress, 'workspace_backup', 'Saving prepared workspace...'); + const startedAt = Date.now(); + logWorkspaceBackupLifecycle({ operation: 'create', outcome: 'started' }); + let failureCategory: WorkspaceBackupFailureCategory = 'source_commit_read_failed'; + try { + const head = await bootstrapSession.exec( + `git -C ${shellQuote(workspacePath)} rev-parse --verify HEAD` + ); + const sourceCommit = head.stdout.trim(); + if (head.exitCode !== 0 || !/^[a-f0-9]{40,64}$/i.test(sourceCommit)) { + throw new Error('Cannot publish workspace backup without a readable HEAD'); + } + failureCategory = 'active_origin_read_failed'; + const origin = await bootstrapSession.exec( + `git -C ${shellQuote(workspacePath)} remote get-url origin` + ); + if (origin.exitCode !== 0 || !origin.stdout.trim()) { + throw new Error('Cannot capture active workspace origin'); + } + const activeOrigin = origin.stdout.trim(); + let originChanged = false; + let backup: Awaited> | undefined; + let publicationFailure: + | { error: unknown; category: WorkspaceBackupFailureCategory } + | undefined; + let originRestored = true; + try { + failureCategory = 'canonical_origin_set_failed'; + const setCanonical = await bootstrapSession.exec( + `git -C ${shellQuote(workspacePath)} remote set-url origin ${shellQuote(candidate.canonicalRepository)}` + ); + if (setCanonical.exitCode !== 0) + throw new Error('Failed to set canonical workspace origin'); + originChanged = true; + failureCategory = 'backup_create_failed'; + backup = await sandbox.createBackup({ + dir: workspacePath, + ttl: WORKSPACE_BACKUP_TTL_MS / 1000, + ...(localBucket ? { localBucket: true } : {}), + }); + } catch (error) { + publicationFailure = { error, category: failureCategory }; + } finally { + if (originChanged) { + failureCategory = 'authenticated_origin_restore_failed'; + const restoreCommand = `git -C ${shellQuote(workspacePath)} remote set-url origin ${shellQuote(activeOrigin)}`; + originRestored = false; + for (let attempt = 1; attempt <= 2; attempt += 1) { + const restoreOrigin = await bootstrapSession.exec(restoreCommand); + if (restoreOrigin.exitCode === 0) { + originRestored = true; + break; + } + } + } + } + if (!originRestored) { + throw new WorkspaceFilesystemPreparationError( + 'workspace_directory', + 'Failed to restore workspace repository authentication', + new Error('Authenticated workspace origin restoration failed after two attempts') + ); + } + if (publicationFailure) { + failureCategory = publicationFailure.category; + throw publicationFailure.error; + } + failureCategory = 'backup_create_failed'; + if (!backup) throw new Error('Workspace backup creation returned no handle'); + failureCategory = 'backup_record_create_failed'; + const record = createWorkspaceBackupRecord(candidate, backup, sourceCommit); + failureCategory = 'index_write_failed'; + await storeWorkspaceBackupRecord(bucket, candidate, record); + logWorkspaceBackupLifecycle({ + operation: 'create', + outcome: 'completed', + durationMs: elapsedMs(startedAt), + }); + } catch (error) { + logWorkspaceBackupLifecycle({ + operation: 'create', + outcome: 'failed', + durationMs: elapsedMs(startedAt), + failureCategory, + }); + throw error; + } + } + async ensureWrapper(request: EnsureWrapperRequest) { const { plan, prepared } = request; const { sessionId, userId, orgId } = plan.scope; @@ -228,8 +538,23 @@ export class CloudflareAgentSandbox implements AgentSandbox { }; } - const workspaceWarm = await this.workspaceHasGit(sandbox, prepared.context.workspacePath); - if (!workspaceWarm) { + const workspacePath = prepared.context.workspacePath; + const workspaceWarm = await this.workspaceHasGit(sandbox, workspacePath); + const backupMode = !workspaceWarm ? this.backupMode() : null; + const backupCandidate = backupMode ? await this.buildBackupCandidate(request) : null; + const restoredSourceCommit = + backupCandidate && backupMode + ? await this.restoreWorkspaceBackup( + sandbox, + workspacePath, + backupCandidate, + backupMode.bucket, + request.onProgress + ) + : undefined; + const workspaceRestored = restoredSourceCommit !== undefined; + let shouldPublishBackup = backupCandidate !== null && !workspaceRestored; + if (!workspaceWarm && !workspaceRestored) { request.onProgress?.('disk_check', 'Checking disk space...'); await checkDiskAndCleanBeforeSetup(sandbox, orgId, userId, sessionId, { inspectContainers: sandboxId.startsWith('dind-'), @@ -246,7 +571,68 @@ export class CloudflareAgentSandbox implements AgentSandbox { userId, ...(request.leasedInstance ? { leasedInstance: request.leasedInstance } : {}), }); - return { status: 'wrapper-running' as const, client: wrapper.client }; + if (!prepared.readyRequest) { + return { status: 'wrapper-running' as const, client: wrapper.client }; + } + + const readyRequest = workspaceRestored + ? { + ...prepared.readyRequest, + workspace: { + ...prepared.readyRequest.workspace, + restoredFromBackup: true, + }, + } + : prepared.readyRequest; + let readyResult: Awaited>; + try { + readyResult = await withWorkspacePreparationTimeout( + wrapper.client.ensureSessionReady(readyRequest), + 'wrapper readiness' + ); + } catch (error) { + if ( + !workspaceRestored || + !(error instanceof WrapperError) || + error.code !== 'WORKSPACE_RECONCILIATION_FAILED' + ) { + throw error; + } + + await this.cleanWorkspaceTarget(sandbox, workspacePath); + request.onProgress?.('disk_check', 'Checking disk space...'); + await checkDiskAndCleanBeforeSetup(sandbox, orgId, userId, sessionId, { + inspectContainers: sandboxId.startsWith('dind-'), + }); + shouldPublishBackup = true; + readyResult = await withWorkspacePreparationTimeout( + wrapper.client.ensureSessionReady(prepared.readyRequest), + 'wrapper readiness after restored workspace fallback' + ); + } + if (shouldPublishBackup && backupCandidate && backupMode) { + try { + await this.publishWorkspaceBackup({ + sandbox, + bootstrapSession, + workspacePath, + candidate: backupCandidate, + bucket: backupMode.bucket, + localBucket: backupMode.localBucket, + onProgress: request.onProgress, + }); + } catch (error) { + if (error instanceof WorkspaceFilesystemPreparationError) throw error; + } + } + return { + status: 'session-ready' as const, + client: wrapper.client, + ready: readyResult.workspaceReady + ? { ...prepared.ready, ...readyResult.workspaceReady } + : prepared.ready, + kiloSessionId: readyResult.kiloSessionId, + }; } async discoverSessionWrappers(): Promise { diff --git a/services/cloud-agent-next/src/agent-sandbox/protocol.ts b/services/cloud-agent-next/src/agent-sandbox/protocol.ts index e8f874c221..3e14412968 100644 --- a/services/cloud-agent-next/src/agent-sandbox/protocol.ts +++ b/services/cloud-agent-next/src/agent-sandbox/protocol.ts @@ -1,5 +1,6 @@ import type { WrapperClient } from '../kilo/wrapper-client.js'; import type { TerminalWrapperClient } from '../terminal/access.js'; +import type { WrapperSessionReadyRequest } from '../shared/wrapper-bootstrap.js'; import type { FencedLegacyExecutionRequest, FencedWrapperDispatchRequest, @@ -64,6 +65,7 @@ export type EnsureWrapperRequest = { prepared: { ready: WorkspaceReady; context: { workspacePath: string }; + readyRequest?: WrapperSessionReadyRequest; }; onProgress?: (step: string, message: string) => void; }; diff --git a/services/cloud-agent-next/src/kilo/wrapper-client.test.ts b/services/cloud-agent-next/src/kilo/wrapper-client.test.ts index d809dd46a9..5ea9caf66a 100644 --- a/services/cloud-agent-next/src/kilo/wrapper-client.test.ts +++ b/services/cloud-agent-next/src/kilo/wrapper-client.test.ts @@ -2132,6 +2132,21 @@ describe('WrapperClient', () => { expect(error.name).toBe('WrapperError'); }); + it('maps workspace reconciliation failures to service unavailable', async () => { + const session = createMockSession( + createErrorResponse( + 'WORKSPACE_RECONCILIATION_FAILED', + 'Restored workspace could not be reconciled' + ) + ); + const client = new WrapperClient({ session, port: defaultPort }); + + await expect(client.ensureSessionReady({} as never)).rejects.toMatchObject({ + code: 'WORKSPACE_RECONCILIATION_FAILED', + statusCode: 503, + }); + }); + it('WrapperNotReadyError has correct properties', () => { const error = new WrapperNotReadyError('Not ready'); diff --git a/services/cloud-agent-next/src/kilo/wrapper-client.ts b/services/cloud-agent-next/src/kilo/wrapper-client.ts index 2b9208528b..8afdcab554 100644 --- a/services/cloud-agent-next/src/kilo/wrapper-client.ts +++ b/services/cloud-agent-next/src/kilo/wrapper-client.ts @@ -205,6 +205,7 @@ const ERROR_STATUS_CODES: Record = { JOB_CONFLICT: 409, NOT_FOUND: 404, INVALID_REQUEST: 400, + WORKSPACE_RECONCILIATION_FAILED: 503, WORKSPACE_SETUP_FAILED: 503, KILO_SERVER_FAILED: 503, SEND_ERROR: 500, diff --git a/services/cloud-agent-next/src/router.test.ts b/services/cloud-agent-next/src/router.test.ts index 8f5b6cb04f..02c29f100c 100644 --- a/services/cloud-agent-next/src/router.test.ts +++ b/services/cloud-agent-next/src/router.test.ts @@ -198,6 +198,16 @@ describe('router sessionId validation', () => { } }); + it('should reject the system-managed pnpm store variable', () => { + const result = envVarsSchema.safeParse({ + pnpm_config_store_dir: '/custom/pnpm-store', + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues[0]?.message).toContain('pnpm_config_store_dir'); + } + }); + it('should reject multiple reserved variables', () => { const result = envVarsSchema.safeParse({ HOME: '/custom/home', diff --git a/services/cloud-agent-next/src/session-service.test.ts b/services/cloud-agent-next/src/session-service.test.ts index 751e2e9326..cae521e97e 100644 --- a/services/cloud-agent-next/src/session-service.test.ts +++ b/services/cloud-agent-next/src/session-service.test.ts @@ -75,6 +75,7 @@ import { parseSessionMetadata } from './persistence/session-metadata.js'; import type { ExecutionSession, SandboxInstance, SessionId } from './types.js'; import type { FencedWrapperDispatchRequest } from './execution/types.js'; import { buildCloudAgentRules } from './shared/cloud-agent-rules.js'; +import { PNPM_STORE_DIR, PNPM_STORE_ENV_VAR } from './shared/runtime-environment.js'; import { SandboxCapacityInspectionError, WorkspaceCapacityAdmissionRejectedError, @@ -85,6 +86,30 @@ type MockExecutionSession = ExecutionSession & { gitCheckout: ReturnType; }; +describe('SessionService.buildRuntimeEnv', () => { + it('forces a stable pnpm store while preserving the session home', () => { + const service = new SessionService(); + const context = service.buildContext({ + sandboxId: 'usr-test', + userId: 'user_test', + sessionId: 'agent_test', + envVars: { + [PNPM_STORE_ENV_VAR]: '/home/old-session/.local/share/pnpm/store/v11', + }, + }); + + const runtimeEnv = service.buildRuntimeEnv({ + context, + env: createEnv(), + originalToken: 'kilo-token', + }); + + expect(runtimeEnv.HOME).toBe('/home/agent_test'); + expect(runtimeEnv.SESSION_HOME).toBe('/home/agent_test'); + expect(runtimeEnv[PNPM_STORE_ENV_VAR]).toBe(PNPM_STORE_DIR); + }); +}); + describe('code-review command guard policy', () => { it('allows required review publication and remote refresh commands while denying repository mutation', () => { const policy = getCommandGuardPolicy('code-review'); diff --git a/services/cloud-agent-next/src/session-service.ts b/services/cloud-agent-next/src/session-service.ts index 837d7547e4..75c9cee29d 100644 --- a/services/cloud-agent-next/src/session-service.ts +++ b/services/cloud-agent-next/src/session-service.ts @@ -67,6 +67,7 @@ import { type WrapperWorkspaceReady, } from './shared/wrapper-bootstrap.js'; import { buildCloudAgentRules } from './shared/cloud-agent-rules.js'; +import { PNPM_STORE_DIR, PNPM_STORE_ENV_VAR } from './shared/runtime-environment.js'; import type { FencedLegacyExecutionRequest, FencedWrapperDispatchRequest, @@ -1032,6 +1033,7 @@ export class SessionService { HOME: sessionHome, SESSION_ID: sessionId, SESSION_HOME: sessionHome, + [PNPM_STORE_ENV_VAR]: PNPM_STORE_DIR, // Inject Kilocode credentials (with override support) KILOCODE_TOKEN: kilocodeToken, KILO_AUTH_CONTENT: JSON.stringify({ kilo: { type: 'api', key: originalToken } }), diff --git a/services/cloud-agent-next/src/shared/protocol.ts b/services/cloud-agent-next/src/shared/protocol.ts index 3d417ac574..531bb7dae8 100644 --- a/services/cloud-agent-next/src/shared/protocol.ts +++ b/services/cloud-agent-next/src/shared/protocol.ts @@ -108,6 +108,8 @@ export type PreparingStep = | 'branch' | 'devcontainer_setup' | 'setup_commands' + | 'workspace_restore' + | 'workspace_backup' | 'kilo_server' | 'kilo_session' | 'ready' diff --git a/services/cloud-agent-next/src/shared/runtime-environment.ts b/services/cloud-agent-next/src/shared/runtime-environment.ts new file mode 100644 index 0000000000..d6d587fcc7 --- /dev/null +++ b/services/cloud-agent-next/src/shared/runtime-environment.ts @@ -0,0 +1,2 @@ +export const PNPM_STORE_DIR = '/var/cache/kilo/pnpm-store'; +export const PNPM_STORE_ENV_VAR = 'pnpm_config_store_dir'; diff --git a/services/cloud-agent-next/src/shared/wrapper-bootstrap.ts b/services/cloud-agent-next/src/shared/wrapper-bootstrap.ts index 3894556fc8..5a97d615db 100644 --- a/services/cloud-agent-next/src/shared/wrapper-bootstrap.ts +++ b/services/cloud-agent-next/src/shared/wrapper-bootstrap.ts @@ -31,6 +31,7 @@ export type WrapperBootstrapWorkspace = { upstreamBranch?: string; strictBranch?: boolean; preferSnapshot?: boolean; + restoredFromBackup?: boolean; }; export type WrapperBootstrapRuntimeSkill = { @@ -151,6 +152,7 @@ export type WrapperSessionReadyErrorResponse = { code: | 'INVALID_REQUEST' | 'WRAPPER_FINALIZING' + | 'WORKSPACE_RECONCILIATION_FAILED' | 'WORKSPACE_SETUP_FAILED' | 'KILO_SERVER_FAILED'; message: string; @@ -197,6 +199,12 @@ export function isWrapperSessionReadyRequest(value: unknown): value is WrapperSe if (!hasString(workspace, 'workspacePath')) return false; if (!hasString(workspace, 'sessionHome')) return false; if (!hasString(workspace, 'branchName')) return false; + if ( + workspace.restoredFromBackup !== undefined && + typeof workspace.restoredFromBackup !== 'boolean' + ) { + return false; + } const devcontainer = value.devcontainer; if (devcontainer !== undefined) { diff --git a/services/cloud-agent-next/src/shared/wrapper-version.ts b/services/cloud-agent-next/src/shared/wrapper-version.ts index 5c407c0576..533a083467 100644 --- a/services/cloud-agent-next/src/shared/wrapper-version.ts +++ b/services/cloud-agent-next/src/shared/wrapper-version.ts @@ -1 +1 @@ -export const WRAPPER_VERSION = '2.3.0'; +export const WRAPPER_VERSION = '2.4.0'; diff --git a/services/cloud-agent-next/src/types.ts b/services/cloud-agent-next/src/types.ts index ca6125f5f8..ca3e591159 100644 --- a/services/cloud-agent-next/src/types.ts +++ b/services/cloud-agent-next/src/types.ts @@ -8,6 +8,7 @@ import type { SessionIngestBinding } from './session-ingest-binding.js'; import * as z from 'zod'; import { Limits } from './schema.js'; import { SESSION_ID_RE } from './shared/protocol.js'; +import { PNPM_STORE_ENV_VAR } from './shared/runtime-environment.js'; export const sessionIdSchema = z.string().regex(SESSION_ID_RE, 'Invalid session ID format'); @@ -20,7 +21,12 @@ export const gitUrlSchema = z .url() .refine(url => url.startsWith('https://'), 'Only HTTPS URLs are supported'); -export const RESERVED_ENV_VARS = ['HOME', 'SESSION_ID', 'SESSION_HOME'] as const; +export const RESERVED_ENV_VARS = [ + 'HOME', + 'SESSION_ID', + 'SESSION_HOME', + PNPM_STORE_ENV_VAR, +] as const; export const envVarsSchema = z .record( @@ -201,6 +207,8 @@ export type Env = { INTERNAL_API_SECRET_PROD: SecretsStoreSecret; /** R2 bucket for storing session logs */ R2_BUCKET: R2Bucket; + /** R2 bucket used by Cloudflare Sandbox directory backups */ + BACKUP_BUCKET?: R2Bucket; /** Queue for callback messages (optional - supports incremental rollout) */ CALLBACK_QUEUE?: Queue; /** Dedicated best-effort Cloud Agent reporting queue. */ @@ -231,6 +239,8 @@ export type Env = { INTERNAL_API_SECRET?: string; /** Worker base URL for building WebSocket ingest endpoint */ WORKER_URL?: string; + /** Sandbox control transport; local dev uses RPC for streaming backup restores */ + SANDBOX_TRANSPORT?: 'http' | 'websocket' | 'rpc'; /** * RSA private key for decrypting encrypted secrets from agent environment profiles. * Required when using encryptedSecrets feature. PEM format (base64-encoded). @@ -250,6 +260,14 @@ export type Env = { R2_ATTACHMENTS_READONLY_SECRET_ACCESS_KEY?: string; /** R2 bucket name for image attachments */ R2_ATTACHMENTS_BUCKET?: string; + /** R2 bucket name used by Cloudflare Sandbox directory backups */ + BACKUP_BUCKET_NAME?: string; + /** Cloudflare account ID used for R2 backup presigning */ + CLOUDFLARE_R2_ACCOUNT_ID?: string; + /** R2 access key ID used for backup uploads */ + R2_ACCESS_KEY_ID?: string; + /** R2 secret access key used for backup uploads */ + R2_SECRET_ACCESS_KEY?: string; /** * Hyperdrive binding for reading Postgres (agent environment profiles). * The `connectionString` is proxied through Hyperdrive so the worker diff --git a/services/cloud-agent-next/src/workspace-backup-cache.test.ts b/services/cloud-agent-next/src/workspace-backup-cache.test.ts new file mode 100644 index 0000000000..03331b9fcc --- /dev/null +++ b/services/cloud-agent-next/src/workspace-backup-cache.test.ts @@ -0,0 +1,278 @@ +import type { DirectoryBackup } from '@cloudflare/sandbox'; +import { describe, expect, it, vi } from 'vitest'; + +import { + buildWorkspaceBackupCandidate, + createWorkspaceBackupRecord, + loadWorkspaceBackupRecord, + storeWorkspaceBackupRecord, + WORKSPACE_BACKUP_TTL_MS, + type WorkspaceBackupCandidateRequest, +} from './workspace-backup-cache.js'; + +const eligibleRequest = { + fresh: true, + devcontainer: false, + setupCommands: [' npm ci ', ' npm run build\n'], + setupEnvironment: { + variables: { NODE_ENV: 'test', FEATURE_FLAG: 'enabled' }, + secretIdentities: { API_TOKEN: '{"version":1,"encryptedData":"ciphertext"}' }, + }, + userId: 'user-1', + orgId: 'org-1', + repository: { type: 'git' as const, url: 'https://token@example.com/acme/repo.git' }, + shallow: true, +}; + +const backup: DirectoryBackup = { + id: 'backup-1', + dir: '/workspace/repo', + localBucket: true, +}; + +function bucketWith(value: unknown) { + return { + get: vi.fn(async () => + value === null + ? null + : { + json: async () => value, + } + ), + put: vi.fn(async () => undefined), + }; +} + +describe('workspace backup cache policy', () => { + it.each([[undefined], [[]]])( + 'rejects requests without setup commands: %j', + async setupCommands => { + await expect( + buildWorkspaceBackupCandidate({ ...eligibleRequest, setupCommands }) + ).resolves.toBeNull(); + } + ); + + it.each([[['npm ci']], [['echo first', 'custom-tool --prepare', 'npm test']]])( + 'accepts fresh repository requests with setup commands: %j', + async setupCommands => { + await expect( + buildWorkspaceBackupCandidate({ ...eligibleRequest, setupCommands }) + ).resolves.not.toBeNull(); + } + ); + + it('builds a credential-free opaque v1 organization candidate', async () => { + const first = await buildWorkspaceBackupCandidate(eligibleRequest); + const second = await buildWorkspaceBackupCandidate({ + ...eligibleRequest, + repository: { type: 'git', url: 'https://other-secret@example.com/acme/repo.git' }, + }); + + expect(first).not.toBeNull(); + if (!first) return; + expect(first).toEqual(second); + expect(first.owner).toEqual({ type: 'organization', organizationId: 'org-1' }); + expect(first.canonicalRepository).toBe('https://example.com/acme/repo.git'); + expect(first.digest).toMatch(/^[a-f0-9]{64}$/); + expect(first.objectKey).toBe(`workspace-backups/v1/${first.digest}.json`); + + for (const value of ['user-1', 'org-1', 'repo', 'npm', 'NODE_ENV']) { + expect(first.objectKey).not.toContain(value); + } + }); + + it('shares organization candidates across users', async () => { + const first = await buildWorkspaceBackupCandidate(eligibleRequest); + const second = await buildWorkspaceBackupCandidate({ ...eligibleRequest, userId: 'user-2' }); + + expect(first).toEqual(second); + }); + + it('isolates personal candidates by user', async () => { + const first = await buildWorkspaceBackupCandidate({ ...eligibleRequest, orgId: undefined }); + const second = await buildWorkspaceBackupCandidate({ + ...eligibleRequest, + orgId: undefined, + userId: 'user-2', + }); + + expect(first?.owner).toEqual({ type: 'user', userId: 'user-1' }); + expect(second?.owner).toEqual({ type: 'user', userId: 'user-2' }); + expect(first?.digest).not.toBe(second?.digest); + }); + + it('invalidates on repository, clone shape, setup commands, and setup environment', async () => { + const base = await buildWorkspaceBackupCandidate(eligibleRequest); + const variants = await Promise.all([ + buildWorkspaceBackupCandidate({ ...eligibleRequest, orgId: 'org-2' }), + buildWorkspaceBackupCandidate({ + ...eligibleRequest, + repository: { type: 'github', repo: 'acme/other' }, + }), + buildWorkspaceBackupCandidate({ ...eligibleRequest, shallow: false }), + buildWorkspaceBackupCandidate({ ...eligibleRequest, setupCommands: ['npm ci'] }), + buildWorkspaceBackupCandidate({ + ...eligibleRequest, + setupEnvironment: { + ...eligibleRequest.setupEnvironment, + variables: { ...eligibleRequest.setupEnvironment.variables, NODE_ENV: 'production' }, + }, + }), + ]); + + expect(new Set([base?.digest, ...variants.map(value => value?.digest)]).size).toBe(6); + }); + + it('canonicalizes setup environment key order', async () => { + const first = await buildWorkspaceBackupCandidate(eligibleRequest); + const reordered = await buildWorkspaceBackupCandidate({ + ...eligibleRequest, + setupEnvironment: { + variables: { FEATURE_FLAG: 'enabled', NODE_ENV: 'test' }, + secretIdentities: eligibleRequest.setupEnvironment.secretIdentities, + }, + }); + + expect(first?.digest).toBe(reordered?.digest); + }); + + it('invalidates on encrypted secret identity without using plaintext', async () => { + const base = await buildWorkspaceBackupCandidate(eligibleRequest); + const changedEnvelope = await buildWorkspaceBackupCandidate({ + ...eligibleRequest, + setupEnvironment: { + ...eligibleRequest.setupEnvironment, + secretIdentities: { + API_TOKEN: '{"version":1,"encryptedData":"different-ciphertext"}', + }, + }, + }); + + expect(base?.digest).not.toBe(changedEnvelope?.digest); + }); + + it('preserves exact setup command bytes and order', async () => { + const base = await buildWorkspaceBackupCandidate(eligibleRequest); + const trimmed = await buildWorkspaceBackupCandidate({ + ...eligibleRequest, + setupCommands: ['npm ci', 'npm run build'], + }); + const newlineChanged = await buildWorkspaceBackupCandidate({ + ...eligibleRequest, + setupCommands: [' npm ci ', ' npm run build\r\n'], + }); + const reversed = await buildWorkspaceBackupCandidate({ + ...eligibleRequest, + setupCommands: [...eligibleRequest.setupCommands].reverse(), + }); + + expect(base?.digest).not.toBe(trimmed?.digest); + expect(base?.digest).not.toBe(newlineChanged?.digest); + expect(base?.digest).not.toBe(reversed?.digest); + }); + + it.each<[string, Partial]>([ + ['resume', { fresh: false }], + ['devcontainer', { devcontainer: true }], + ['empty user', { userId: '' }], + ['empty organization', { orgId: '' }], + ['invalid repository', { repository: { type: 'git', url: 'not-a-url' } }], + ['invalid GitHub repository', { repository: { type: 'github', repo: 'invalid' } }], + ])('rejects %s requests', async (_label, override) => { + await expect( + buildWorkspaceBackupCandidate({ ...eligibleRequest, ...override }) + ).resolves.toBeNull(); + }); + + it('creates a 24-hour v1 record retaining owner and localBucket and stores it as JSON', async () => { + const candidate = await buildWorkspaceBackupCandidate(eligibleRequest); + expect(candidate).not.toBeNull(); + if (!candidate) return; + + const now = Date.parse('2026-06-10T12:00:00.000Z'); + const record = createWorkspaceBackupRecord(candidate, backup, 'a'.repeat(40), now); + const bucket = bucketWith(null); + + expect(record.schema).toBe('workspace-backup-v1'); + expect(record.owner).toEqual(candidate.owner); + expect(record.createdAt).toBe(now); + expect(record.expiresAt).toBe(now + WORKSPACE_BACKUP_TTL_MS); + expect(record.backup.localBucket).toBe(true); + + await storeWorkspaceBackupRecord(bucket as unknown as R2Bucket, candidate, record); + expect(bucket.put).toHaveBeenCalledWith(candidate.objectKey, JSON.stringify(record), { + httpMetadata: { contentType: 'application/json' }, + }); + }); + + it('loads only valid, matching, unexpired v1 records with the exact owner', async () => { + const candidate = await buildWorkspaceBackupCandidate(eligibleRequest); + expect(candidate).not.toBeNull(); + if (!candidate) return; + + const now = Date.parse('2026-06-10T12:00:00.000Z'); + const record = createWorkspaceBackupRecord(candidate, backup, 'b'.repeat(40), now); + + await expect( + loadWorkspaceBackupRecord(bucketWith(record) as unknown as R2Bucket, candidate, now + 1) + ).resolves.toEqual(record); + + const invalidRecords = [ + { ...record, schema: 'workspace-backup-v2' }, + { ...record, expiresAt: now }, + { ...record, digest: '0'.repeat(64) }, + { ...record, owner: { type: 'organization', organizationId: 'other-org' } }, + { ...record, owner: { type: 'user', userId: 'org-1' } }, + { invalid: true }, + ]; + for (const invalidRecord of invalidRecords) { + await expect( + loadWorkspaceBackupRecord(bucketWith(invalidRecord) as unknown as R2Bucket, candidate, now) + ).resolves.toBeNull(); + } + }); + + it('rejects storing a record whose owner does not exactly match the candidate', async () => { + const candidate = await buildWorkspaceBackupCandidate(eligibleRequest); + if (!candidate) throw new Error('expected eligible candidate'); + const record = createWorkspaceBackupRecord(candidate, backup, 'c'.repeat(40), 1); + const bucket = bucketWith(null); + + await expect( + storeWorkspaceBackupRecord(bucket as unknown as R2Bucket, candidate, { + ...record, + owner: { type: 'user', userId: 'org-1' }, + }) + ).rejects.toThrow('Workspace backup record does not match its cache candidate'); + expect(bucket.put).not.toHaveBeenCalled(); + }); + + it('treats R2 lookup failures as cold misses', async () => { + const candidate = await buildWorkspaceBackupCandidate(eligibleRequest); + if (!candidate) throw new Error('expected eligible candidate'); + const bucket = { get: vi.fn().mockRejectedValue(new Error('R2 unavailable')) }; + + await expect( + loadWorkspaceBackupRecord(bucket as unknown as R2Bucket, candidate) + ).resolves.toBeNull(); + }); + + it.each([ + ['future creation', { createdAt: 101, expiresAt: 102 }], + ['nonpositive lifetime', { createdAt: 100, expiresAt: 100 }], + ['oversized lifetime', { createdAt: 1, expiresAt: 1 + WORKSPACE_BACKUP_TTL_MS + 1 }], + ])('rejects records with %s', async (_label, timestamps) => { + const candidate = await buildWorkspaceBackupCandidate(eligibleRequest); + if (!candidate) throw new Error('expected eligible candidate'); + const record = createWorkspaceBackupRecord(candidate, backup, 'd'.repeat(40), 1); + + await expect( + loadWorkspaceBackupRecord( + bucketWith({ ...record, ...timestamps }) as unknown as R2Bucket, + candidate, + 100 + ) + ).resolves.toBeNull(); + }); +}); diff --git a/services/cloud-agent-next/src/workspace-backup-cache.ts b/services/cloud-agent-next/src/workspace-backup-cache.ts new file mode 100644 index 0000000000..73e287eb1a --- /dev/null +++ b/services/cloud-agent-next/src/workspace-backup-cache.ts @@ -0,0 +1,264 @@ +import type { DirectoryBackup } from '@cloudflare/sandbox'; +import * as z from 'zod'; + +import { WRAPPER_VERSION } from './shared/wrapper-version.js'; + +const CACHE_SCHEMA = 'workspace-backup-v1'; +const CACHE_OBJECT_PREFIX = 'workspace-backups/v1'; + +export const WORKSPACE_BACKUP_TTL_MS = 24 * 60 * 60 * 1000; + +const digestSchema = z.string().regex(/^[a-f0-9]{64}$/); +const directoryBackupSchema = z + .object({ + id: z.string().min(1), + dir: z.string().min(1), + localBucket: z.boolean().optional(), + }) + .strip(); + +const workspaceBackupOwnerSchema = z.discriminatedUnion('type', [ + z.object({ type: z.literal('organization'), organizationId: z.string().min(1) }).strict(), + z.object({ type: z.literal('user'), userId: z.string().min(1) }).strict(), +]); + +const workspaceBackupRecordSchema = z + .object({ + schema: z.literal(CACHE_SCHEMA), + digest: digestSchema, + owner: workspaceBackupOwnerSchema, + sourceCommit: z.string().regex(/^[a-f0-9]{40,64}$/i), + createdAt: z.number().int().nonnegative(), + expiresAt: z.number().int().positive(), + backup: directoryBackupSchema, + }) + .strip(); + +export type WorkspaceBackupRepository = + | { type: 'github'; repo: string } + | { type: 'git' | 'gitlab'; url: string }; + +export type WorkspaceBackupOwner = + | { type: 'organization'; organizationId: string } + | { type: 'user'; userId: string }; + +export type WorkspaceBackupCandidateRequest = { + fresh: boolean; + devcontainer: boolean; + setupCommands?: string[]; + setupEnvironment: { + variables: Record; + secretIdentities: Record; + }; + userId: string; + orgId?: string; + repository?: WorkspaceBackupRepository; + shallow?: boolean; +}; + +export type WorkspaceBackupCandidate = { + digest: string; + objectKey: string; + owner: WorkspaceBackupOwner; + canonicalRepository: string; +}; + +export type WorkspaceBackupRecord = z.infer; + +type WorkspaceBackupKey = { + schema: typeof CACHE_SCHEMA; + wrapperVersion: string; + owner: WorkspaceBackupOwner; + repository: string; + shallow: boolean; + setupCommands: string[]; + setupEnvironment: { + variables: Record; + secretIdentities: Record; + }; +}; + +function canonicalizeRepository(repository: WorkspaceBackupRepository | undefined): string | null { + if (!repository) return null; + + if (repository.type === 'github') { + if (!/^[a-zA-Z0-9_.-]+\/[a-zA-Z0-9_.-]+$/.test(repository.repo)) return null; + return `https://github.com/${repository.repo}.git`; + } + + try { + const url = new URL(repository.url); + if (url.protocol !== 'https:' || !url.hostname || url.hash || url.search) return null; + + url.username = ''; + url.password = ''; + url.hostname = url.hostname.toLowerCase(); + url.pathname = url.pathname.replace(/\/+$/, ''); + if (url.pathname === '') return null; + + return url.toString(); + } catch { + return null; + } +} + +function canonicalJson(value: unknown): string | null { + const ancestors = new WeakSet(); + + function serialize(current: unknown): string | null { + if (current === null) return 'null'; + if (typeof current === 'string' || typeof current === 'boolean') { + return JSON.stringify(current); + } + if (typeof current === 'number') { + return Number.isFinite(current) ? JSON.stringify(current) : null; + } + if (typeof current !== 'object') return null; + if (ancestors.has(current)) return null; + + ancestors.add(current); + try { + if (Array.isArray(current)) { + const values: string[] = []; + for (let index = 0; index < current.length; index += 1) { + if (!Object.hasOwn(current, index)) return null; + const serialized = serialize(current[index]); + if (serialized === null) return null; + values.push(serialized); + } + return `[${values.join(',')}]`; + } + + if (Object.prototype.toString.call(current) !== '[object Object]') return null; + + const entries: string[] = []; + const sortedEntries = Object.entries(current).sort(([left], [right]) => + left < right ? -1 : left > right ? 1 : 0 + ); + for (const [key, nestedValue] of sortedEntries) { + if (nestedValue === undefined) continue; + const serialized = serialize(nestedValue); + if (serialized === null) return null; + entries.push(`${JSON.stringify(key)}:${serialized}`); + } + return `{${entries.join(',')}}`; + } finally { + ancestors.delete(current); + } + } + + try { + return serialize(value); + } catch { + return null; + } +} + +async function sha256(value: string): Promise { + const bytes = new TextEncoder().encode(value); + const hash = await crypto.subtle.digest('SHA-256', bytes); + return Array.from(new Uint8Array(hash), byte => byte.toString(16).padStart(2, '0')).join(''); +} + +function ownersEqual(left: WorkspaceBackupOwner, right: WorkspaceBackupOwner): boolean { + if (left.type !== right.type) return false; + if (left.type === 'organization' && right.type === 'organization') { + return left.organizationId === right.organizationId; + } + if (left.type === 'user' && right.type === 'user') return left.userId === right.userId; + return false; +} + +export async function buildWorkspaceBackupCandidate( + request: WorkspaceBackupCandidateRequest +): Promise { + if ( + !request.fresh || + request.devcontainer || + request.userId.length === 0 || + request.orgId === '' || + !request.setupCommands?.length + ) + return null; + + const canonicalRepository = canonicalizeRepository(request.repository); + if (!canonicalRepository) return null; + + const owner: WorkspaceBackupOwner = request.orgId + ? { type: 'organization', organizationId: request.orgId } + : { type: 'user', userId: request.userId }; + const key = { + schema: CACHE_SCHEMA, + wrapperVersion: WRAPPER_VERSION, + owner, + repository: canonicalRepository, + shallow: request.shallow ?? false, + setupCommands: request.setupCommands ?? [], + setupEnvironment: request.setupEnvironment, + } satisfies WorkspaceBackupKey; + const canonicalKey = canonicalJson(key); + if (canonicalKey === null) return null; + + const digest = await sha256(canonicalKey); + return { + digest, + objectKey: `${CACHE_OBJECT_PREFIX}/${digest}.json`, + owner, + canonicalRepository, + }; +} + +export async function loadWorkspaceBackupRecord( + bucket: R2Bucket, + candidate: WorkspaceBackupCandidate, + now = Date.now() +): Promise { + try { + const object = await bucket.get(candidate.objectKey); + if (!object) return null; + + const parsed = workspaceBackupRecordSchema.safeParse(await object.json()); + if (!parsed.success) return null; + if (parsed.data.digest !== candidate.digest) return null; + if (!ownersEqual(parsed.data.owner, candidate.owner)) return null; + if (parsed.data.createdAt > now) return null; + if (parsed.data.expiresAt <= parsed.data.createdAt) return null; + if (parsed.data.expiresAt - parsed.data.createdAt > WORKSPACE_BACKUP_TTL_MS) return null; + if (parsed.data.expiresAt <= now) return null; + return parsed.data; + } catch { + return null; + } +} + +export function createWorkspaceBackupRecord( + candidate: WorkspaceBackupCandidate, + backup: DirectoryBackup, + sourceCommit: string, + now = Date.now() +): WorkspaceBackupRecord { + return workspaceBackupRecordSchema.parse({ + schema: CACHE_SCHEMA, + digest: candidate.digest, + owner: candidate.owner, + sourceCommit, + createdAt: now, + expiresAt: now + WORKSPACE_BACKUP_TTL_MS, + backup, + }); +} + +export async function storeWorkspaceBackupRecord( + bucket: R2Bucket, + candidate: WorkspaceBackupCandidate, + record: WorkspaceBackupRecord +): Promise { + const validated = workspaceBackupRecordSchema.parse(record); + if (validated.digest !== candidate.digest || !ownersEqual(validated.owner, candidate.owner)) { + throw new Error('Workspace backup record does not match its cache candidate'); + } + + await bucket.put(candidate.objectKey, JSON.stringify(validated), { + httpMetadata: { contentType: 'application/json' }, + }); +} diff --git a/services/cloud-agent-next/src/workspace-backup-observability.test.ts b/services/cloud-agent-next/src/workspace-backup-observability.test.ts new file mode 100644 index 0000000000..44cf1d0601 --- /dev/null +++ b/services/cloud-agent-next/src/workspace-backup-observability.test.ts @@ -0,0 +1,101 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const { mockInfo, mockWarn, mockWithFields, mockWithTags } = vi.hoisted(() => { + const info = vi.fn(); + const warn = vi.fn(); + const withFields = vi.fn(() => ({ info, warn })); + const withTags = vi.fn(() => ({ withFields })); + return { + mockInfo: info, + mockWarn: warn, + mockWithFields: withFields, + mockWithTags: withTags, + }; +}); + +vi.mock('./logger.js', () => ({ + logger: { + withTags: mockWithTags, + }, +})); + +import { + logWorkspaceBackupDisabled, + logWorkspaceBackupLifecycle, +} from './workspace-backup-observability.js'; + +describe('workspace backup observability', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('emits a bounded warning when backup configuration is disabled', () => { + logWorkspaceBackupDisabled('invalid_worker_url'); + + expect(mockWithTags).toHaveBeenCalledWith({ + logTag: 'workspace_backup.configuration.disabled', + }); + expect(mockWithFields).toHaveBeenCalledWith({ reason: 'invalid_worker_url' }); + expect(mockWarn).toHaveBeenCalledWith('workspace_backup.configuration.disabled'); + expect(mockInfo).not.toHaveBeenCalled(); + }); + + it('emits a searchable structured start event', () => { + logWorkspaceBackupLifecycle({ operation: 'restore', outcome: 'started' }); + + expect(mockWithTags).toHaveBeenCalledWith({ + logTag: 'workspace_backup.restore.started', + }); + expect(mockWithFields).toHaveBeenCalledWith({ + operation: 'restore', + outcome: 'started', + }); + expect(mockInfo).toHaveBeenCalledWith('workspace_backup.restore.started'); + expect(mockWarn).not.toHaveBeenCalled(); + }); + + it('emits duration on a completed event', () => { + logWorkspaceBackupLifecycle({ operation: 'create', outcome: 'completed', durationMs: 42 }); + + expect(mockWithTags).toHaveBeenCalledWith({ + logTag: 'workspace_backup.create.completed', + }); + expect(mockWithFields).toHaveBeenCalledWith({ + operation: 'create', + outcome: 'completed', + durationMs: 42, + }); + expect(mockInfo).toHaveBeenCalledWith('workspace_backup.create.completed'); + }); + + it('emits only a bounded category and duration on failure', () => { + logWorkspaceBackupLifecycle({ + operation: 'restore', + outcome: 'failed', + durationMs: 17, + failureCategory: 'backup_validation_failed', + }); + + expect(mockWithTags).toHaveBeenCalledWith({ + logTag: 'workspace_backup.restore.failed', + }); + expect(mockWithFields).toHaveBeenCalledWith({ + operation: 'restore', + outcome: 'failed', + durationMs: 17, + failureCategory: 'backup_validation_failed', + }); + expect(mockWarn).toHaveBeenCalledWith('workspace_backup.restore.failed'); + expect(mockInfo).not.toHaveBeenCalled(); + }); + + it('does not let logging failures change backup behavior', () => { + mockWithTags.mockImplementationOnce(() => { + throw new Error('logger unavailable'); + }); + + expect(() => + logWorkspaceBackupLifecycle({ operation: 'create', outcome: 'started' }) + ).not.toThrow(); + }); +}); diff --git a/services/cloud-agent-next/src/workspace-backup-observability.ts b/services/cloud-agent-next/src/workspace-backup-observability.ts new file mode 100644 index 0000000000..72a084aa2f --- /dev/null +++ b/services/cloud-agent-next/src/workspace-backup-observability.ts @@ -0,0 +1,58 @@ +import { logger } from './logger.js'; + +export type WorkspaceBackupOperation = 'restore' | 'create'; +export type WorkspaceBackupDisabledReason = 'invalid_worker_url'; + +export type WorkspaceBackupFailureCategory = + | 'workspace_cleanup_failed' + | 'workspace_parent_prepare_failed' + | 'backup_restore_failed' + | 'backup_validation_failed' + | 'fallback_cleanup_failed' + | 'source_commit_read_failed' + | 'active_origin_read_failed' + | 'canonical_origin_set_failed' + | 'backup_create_failed' + | 'authenticated_origin_restore_failed' + | 'backup_record_create_failed' + | 'index_write_failed'; + +export type WorkspaceBackupLifecycleEvent = + | { + operation: WorkspaceBackupOperation; + outcome: 'started'; + } + | { + operation: WorkspaceBackupOperation; + outcome: 'completed'; + durationMs: number; + } + | { + operation: WorkspaceBackupOperation; + outcome: 'failed'; + durationMs: number; + failureCategory: WorkspaceBackupFailureCategory; + }; + +export function logWorkspaceBackupDisabled(reason: WorkspaceBackupDisabledReason): void { + try { + const eventName = 'workspace_backup.configuration.disabled'; + logger.withTags({ logTag: eventName }).withFields({ reason }).warn(eventName); + } catch { + return; + } +} + +export function logWorkspaceBackupLifecycle(event: WorkspaceBackupLifecycleEvent): void { + try { + const eventName = `workspace_backup.${event.operation}.${event.outcome}`; + const eventLogger = logger.withTags({ logTag: eventName }).withFields(event); + if (event.outcome === 'failed') { + eventLogger.warn(eventName); + return; + } + eventLogger.info(eventName); + } catch { + return; + } +} diff --git a/services/cloud-agent-next/worker-configuration.d.ts b/services/cloud-agent-next/worker-configuration.d.ts index 2a5d3d6657..f29078f8a2 100644 --- a/services/cloud-agent-next/worker-configuration.d.ts +++ b/services/cloud-agent-next/worker-configuration.d.ts @@ -1,15 +1,19 @@ /* eslint-disable */ -// Generated by Wrangler by running `wrangler types` (hash: 5ef93277894d40ea0ccd0e8f8441deea) +// Generated by Wrangler by running `wrangler types` (hash: c31bae631e5de94fd462d84b19cca306) // Runtime types generated with workerd@1.20260603.1 2026-06-03 nodejs_compat interface __BaseEnv_Env { R2_BUCKET: R2Bucket; + BACKUP_BUCKET: R2Bucket; HYPERDRIVE: Hyperdrive; CALLBACK_QUEUE: Queue; CLOUD_AGENT_REPORT_QUEUE: Queue; INTERNAL_API_SECRET_PROD: SecretsStoreSecret; GITHUB_LITE_APP_SLUG: "" | "kiloconnect-lite"; GITHUB_LITE_APP_BOT_USER_ID: "" | "257753004"; + SANDBOX_TRANSPORT?: "rpc"; R2_ATTACHMENTS_BUCKET: "cloud-agent-attachments-dev" | "cloud-agent-attachments"; + BACKUP_BUCKET_NAME: "kilocode-sessions-dev" | "kilocode-sessions"; + CLOUDFLARE_R2_ACCOUNT_ID: "e115e769bcdd4c3d66af59d3332cb394"; PER_SESSION_SANDBOX_ORG_IDS?: "*"; NEXTAUTH_SECRET: string; KILO_SESSION_INGEST_URL: string; @@ -33,6 +37,8 @@ interface __BaseEnv_Env { GITHUB_LITE_APP_ID: string; GITHUB_LITE_APP_PRIVATE_KEY: string; KILOCODE_SANDBOX_BACKEND_BASE_URL: string; + R2_ACCESS_KEY_ID: string; + R2_SECRET_ACCESS_KEY: string; Sandbox: DurableObjectNamespace; SandboxSmall: DurableObjectNamespace; SandboxDIND: DurableObjectNamespace; @@ -49,13 +55,17 @@ declare namespace Cloudflare { } interface DevEnv { R2_BUCKET: R2Bucket; + BACKUP_BUCKET: R2Bucket; HYPERDRIVE: Hyperdrive; CALLBACK_QUEUE: Queue; CLOUD_AGENT_REPORT_QUEUE: Queue; INTERNAL_API_SECRET_PROD: SecretsStoreSecret; GITHUB_LITE_APP_SLUG: ""; GITHUB_LITE_APP_BOT_USER_ID: ""; + SANDBOX_TRANSPORT: "rpc"; R2_ATTACHMENTS_BUCKET: "cloud-agent-attachments-dev"; + BACKUP_BUCKET_NAME: "kilocode-sessions-dev"; + CLOUDFLARE_R2_ACCOUNT_ID: "e115e769bcdd4c3d66af59d3332cb394"; PER_SESSION_SANDBOX_ORG_IDS: "*"; NEXTAUTH_SECRET: string; KILO_SESSION_INGEST_URL: string; @@ -79,6 +89,8 @@ declare namespace Cloudflare { GITHUB_LITE_APP_ID: string; GITHUB_LITE_APP_PRIVATE_KEY: string; KILOCODE_SANDBOX_BACKEND_BASE_URL: string; + R2_ACCESS_KEY_ID: string; + R2_SECRET_ACCESS_KEY: string; Sandbox: DurableObjectNamespace; SandboxSmall: DurableObjectNamespace; SandboxDIND: DurableObjectNamespace; @@ -95,7 +107,7 @@ type StringifyValues> = { [Binding in keyof EnvType]: EnvType[Binding] extends string ? EnvType[Binding] : string; }; declare namespace NodeJS { - interface ProcessEnv extends StringifyValues> {} + interface ProcessEnv extends StringifyValues> {} } declare module "*.sql" { const value: string; diff --git a/services/cloud-agent-next/wrangler.jsonc b/services/cloud-agent-next/wrangler.jsonc index 967f7aaa4b..ada95c2c59 100644 --- a/services/cloud-agent-next/wrangler.jsonc +++ b/services/cloud-agent-next/wrangler.jsonc @@ -52,6 +52,8 @@ "CLI_TIMEOUT_SECONDS": "900", "REAPER_INTERVAL_MS": "300000", "R2_ATTACHMENTS_BUCKET": "cloud-agent-attachments", + "BACKUP_BUCKET_NAME": "kilocode-sessions", + "CLOUDFLARE_R2_ACCOUNT_ID": "e115e769bcdd4c3d66af59d3332cb394", "WS_ALLOWED_ORIGINS": "https://app.kilo.ai,https://api.kilo.ai", "KILO_SESSION_INGEST_URL": "https://ingest.kilosessions.ai", }, @@ -139,6 +141,10 @@ "binding": "R2_BUCKET", "bucket_name": "kilocode-sessions", }, + { + "binding": "BACKUP_BUCKET", + "bucket_name": "kilocode-sessions", + }, ], "containers": [ { @@ -284,9 +290,12 @@ "GITHUB_LITE_APP_SLUG": "", "GITHUB_LITE_APP_BOT_USER_ID": "", "WORKER_URL": "http://localhost:8794", + "SANDBOX_TRANSPORT": "rpc", "CLI_TIMEOUT_SECONDS": "900", "REAPER_INTERVAL_MS": "300000", "R2_ATTACHMENTS_BUCKET": "cloud-agent-attachments-dev", + "BACKUP_BUCKET_NAME": "kilocode-sessions-dev", + "CLOUDFLARE_R2_ACCOUNT_ID": "e115e769bcdd4c3d66af59d3332cb394", "WS_ALLOWED_ORIGINS": "http://localhost:3000,http://host.docker.internal:3000", "KILO_SESSION_INGEST_URL": "http://localhost:8800", "PER_SESSION_SANDBOX_ORG_IDS": "*", @@ -320,6 +329,10 @@ "binding": "R2_BUCKET", "bucket_name": "kilocode-sessions-dev", }, + { + "binding": "BACKUP_BUCKET", + "bucket_name": "kilocode-sessions-dev", + }, ], "containers": [ { diff --git a/services/cloud-agent-next/wrapper/src/main.ts b/services/cloud-agent-next/wrapper/src/main.ts index 1040da4545..0a2100f87c 100644 --- a/services/cloud-agent-next/wrapper/src/main.ts +++ b/services/cloud-agent-next/wrapper/src/main.ts @@ -30,6 +30,7 @@ import type { import { materializePromptAttachments, prepareWrapperBootstrapWorkspace, + workspaceBootstrapErrorCode, } from './session-bootstrap.js'; // --------------------------------------------------------------------------- @@ -594,7 +595,9 @@ async function main() { return { status: 'error', error: { - code: message.includes('Kilo server') ? 'KILO_SERVER_FAILED' : 'WORKSPACE_SETUP_FAILED', + code: message.includes('Kilo server') + ? 'KILO_SERVER_FAILED' + : workspaceBootstrapErrorCode(error), message, retryable: true, }, diff --git a/services/cloud-agent-next/wrapper/src/server.test.ts b/services/cloud-agent-next/wrapper/src/server.test.ts index 62a824c2b8..d328658ab5 100644 --- a/services/cloud-agent-next/wrapper/src/server.test.ts +++ b/services/cloud-agent-next/wrapper/src/server.test.ts @@ -9,6 +9,7 @@ import { type WrapperServer, } from './server'; import type { WrapperKiloClient, WrapperPty, WrapperPtySize } from './kilo-api'; +import { PNPM_STORE_DIR, PNPM_STORE_ENV_VAR } from '../../src/shared/runtime-environment.js'; type PtyCall = { cwd: string; @@ -120,7 +121,7 @@ describe('wrapper health', () => { }); describe('wrapper PTY routes', () => { - it('creates a workspace PTY and applies the requested size', async () => { + it('creates a workspace PTY with the stable pnpm store and applies the requested size', async () => { const { fetchHandler, ptyCalls, resizeCalls } = createTestFetch(); const response = await fetchHandler( @@ -147,6 +148,7 @@ describe('wrapper PTY routes', () => { env: { PROMPT_COMMAND: "PS1='\\n\\W\\n\\$ '", PS1: '\\n\\W\\n\\$ ', + [PNPM_STORE_ENV_VAR]: PNPM_STORE_DIR, }, }, ]); diff --git a/services/cloud-agent-next/wrapper/src/server.ts b/services/cloud-agent-next/wrapper/src/server.ts index 4b83f1d81b..1a08ade06e 100644 --- a/services/cloud-agent-next/wrapper/src/server.ts +++ b/services/cloud-agent-next/wrapper/src/server.ts @@ -28,6 +28,7 @@ import { type WrapperSessionReadyResponse, } from '../../src/shared/wrapper-bootstrap.js'; import { createProxyRequest } from '../../src/shared/http-proxy.js'; +import { PNPM_STORE_DIR, PNPM_STORE_ENV_VAR } from '../../src/shared/runtime-environment.js'; import type { SessionBoundFeedPolicy } from './global-feed-manager.js'; // --------------------------------------------------------------------------- @@ -144,6 +145,7 @@ const WORKSPACE_TERMINAL_ENV = { // Shell startup files may replace inherited PS1, so reapply it before each prompt. PROMPT_COMMAND: "PS1='\\n\\W\\n\\$ '", PS1: '\\n\\W\\n\\$ ', + [PNPM_STORE_ENV_VAR]: PNPM_STORE_DIR, } satisfies Record; function jsonResponse(data: unknown, status = 200): Response { diff --git a/services/cloud-agent-next/wrapper/src/session-bootstrap.test.ts b/services/cloud-agent-next/wrapper/src/session-bootstrap.test.ts index 4095bd39b3..5022055b12 100644 --- a/services/cloud-agent-next/wrapper/src/session-bootstrap.test.ts +++ b/services/cloud-agent-next/wrapper/src/session-bootstrap.test.ts @@ -6,6 +6,8 @@ import path from 'node:path'; import { materializePromptAttachments, prepareWrapperBootstrapWorkspace, + RestoredWorkspaceReconciliationError, + workspaceBootstrapErrorCode, type WrapperBootstrapDeps, } from './session-bootstrap'; import type { @@ -13,6 +15,7 @@ import type { WrapperSessionReadyRequest, } from '../../src/shared/wrapper-bootstrap'; import { buildCloudAgentRules } from '../../src/shared/cloud-agent-rules.js'; +import { PNPM_STORE_DIR, PNPM_STORE_ENV_VAR } from '../../src/shared/runtime-environment.js'; function makeRequest(tmpDir: string, overrides: Partial = {}) { const request: WrapperSessionReadyRequest = { @@ -38,6 +41,7 @@ function makeRequest(tmpDir: string, overrides: Partial { HOME: process.env.HOME, KILOCODE_TOKEN: process.env.KILOCODE_TOKEN, GH_TOKEN: process.env.GH_TOKEN, + [PNPM_STORE_ENV_VAR]: process.env[PNPM_STORE_ENV_VAR], }; }); @@ -399,6 +404,134 @@ describe('prepareWrapperBootstrapWorkspace', () => { ]); }); + it('reconciles a same-commit restored workspace before running every setup command', async () => { + const request = makeRequest(tmpDir); + request.workspace.branchName = 'session/new'; + request.workspace.upstreamBranch = 'feature/source'; + request.workspace.restoredFromBackup = true; + request.materialized.setupCommands = ['prepare one', 'prepare two']; + await fsp.mkdir(path.join(request.workspace.workspacePath, '.git'), { recursive: true }); + const events: string[] = []; + + await prepareWrapperBootstrapWorkspace(request, undefined, { + git: async args => { + events.push(`git:${args.join(' ')}`); + return { stdout: '', stderr: '', exitCode: 0 }; + }, + runProcess: async (command, args) => { + events.push(`process:${command} ${args.join(' ')}`); + expect(process.env.HOME).toBe(request.workspace.sessionHome); + expect(process.env.KILOCODE_TOKEN).toBe('kilo-token'); + expect(process.env[PNPM_STORE_ENV_VAR]).toBe(PNPM_STORE_DIR); + expect( + fs.existsSync(path.join(request.workspace.sessionHome, '.local/share/kilo/auth.json')) + ).toBe(true); + expect( + fs.existsSync(path.join(request.workspace.sessionHome, '.kilocode/rules/cloud-agent.md')) + ).toBe(true); + expect( + fs.existsSync( + path.join(request.workspace.sessionHome, '.kilocode/skills/test-skill/SKILL.md') + ) + ).toBe(true); + return { stdout: '', stderr: '', exitCode: 0 }; + }, + restoreSession: async () => ({ + ok: true, + downloaded: false, + imported: true, + diffs: { applied: 0, skipped: 0, total: 0 }, + }), + }); + + expect(events).toContain( + 'git:remote set-url origin https://x-access-token:gh-token@github.com/acme/repo.git' + ); + const fetchIndex = events.indexOf('git:fetch origin feature/source'); + const checkoutIndex = events.indexOf('git:checkout -B session/new FETCH_HEAD'); + const firstSetupIndex = events.indexOf('process:sh -lc prepare one'); + expect(fetchIndex).toBeGreaterThan(-1); + expect(checkoutIndex).toBeGreaterThan(fetchIndex); + expect(firstSetupIndex).toBeGreaterThan(checkoutIndex); + expect(events.filter(event => event.startsWith('process:'))).toEqual([ + 'process:sh -lc prepare one', + 'process:sh -lc prepare two', + ]); + }); + + it('keeps restored workspace setup failures as ordinary setup failures', async () => { + const request = makeRequest(tmpDir); + request.workspace.restoredFromBackup = true; + await fsp.mkdir(path.join(request.workspace.workspacePath, '.git'), { recursive: true }); + + let setupError: unknown; + try { + await prepareWrapperBootstrapWorkspace(request, undefined, { + git: async args => { + if (args.join(' ') === 'ls-remote --symref origin HEAD') { + return { stdout: 'ref: refs/heads/main\tHEAD\n', stderr: '', exitCode: 0 }; + } + return { stdout: '', stderr: '', exitCode: 0 }; + }, + runProcess: async () => ({ stdout: '', stderr: 'install failed', exitCode: 17 }), + restoreSession: async () => ({ + ok: true, + downloaded: false, + imported: true, + diffs: { applied: 0, skipped: 0, total: 0 }, + }), + }); + } catch (error) { + setupError = error; + } + + expect(setupError).toBeInstanceOf(Error); + expect(setupError).not.toBeInstanceOf(RestoredWorkspaceReconciliationError); + expect(workspaceBootstrapErrorCode(setupError)).toBe('WORKSPACE_SETUP_FAILED'); + expect((setupError as Error).message).toContain( + 'Setup command failed: pnpm install (exit code 17)' + ); + }); + + it('classifies restored workspace reconciliation failures before setup', async () => { + const request = makeRequest(tmpDir); + request.workspace.restoredFromBackup = true; + await fsp.mkdir(path.join(request.workspace.workspacePath, '.git'), { recursive: true }); + let setupRan = false; + + let reconciliationError: unknown; + try { + await prepareWrapperBootstrapWorkspace(request, undefined, { + git: async args => { + if (args.join(' ') === 'ls-remote --symref origin HEAD') { + return { stdout: 'ref: refs/heads/main\tHEAD\n', stderr: '', exitCode: 0 }; + } + if (args.join(' ') === 'fetch origin main') { + return { stdout: '', stderr: 'remote unavailable', exitCode: 1 }; + } + return { stdout: '', stderr: '', exitCode: 0 }; + }, + runProcess: async () => { + setupRan = true; + return { stdout: '', stderr: '', exitCode: 0 }; + }, + }); + } catch (error) { + reconciliationError = error; + } + + expect(reconciliationError).toBeInstanceOf(RestoredWorkspaceReconciliationError); + expect(workspaceBootstrapErrorCode(reconciliationError)).toBe( + 'WORKSPACE_RECONCILIATION_FAILED' + ); + expect((reconciliationError as Error).message).toBe( + 'Failed to fetch authoritative remote state' + ); + expect(setupRan).toBe(false); + expect(fs.existsSync(request.workspace.workspacePath)).toBe(false); + expect(fs.existsSync(request.workspace.sessionHome)).toBe(false); + }); + it('appends downloaded attachments to existing prompt parts', async () => { const prompt: WrapperPromptRequest = { message: { diff --git a/services/cloud-agent-next/wrapper/src/session-bootstrap.ts b/services/cloud-agent-next/wrapper/src/session-bootstrap.ts index fa97539a5c..67580f6f91 100644 --- a/services/cloud-agent-next/wrapper/src/session-bootstrap.ts +++ b/services/cloud-agent-next/wrapper/src/session-bootstrap.ts @@ -46,6 +46,21 @@ export type WrapperBootstrapDeps = { restoreSession?: typeof restoreSession; }; +export class RestoredWorkspaceReconciliationError extends Error { + constructor(message: string, options?: ErrorOptions) { + super(message, options); + this.name = 'RestoredWorkspaceReconciliationError'; + } +} + +export function workspaceBootstrapErrorCode( + error: unknown +): 'WORKSPACE_RECONCILIATION_FAILED' | 'WORKSPACE_SETUP_FAILED' { + return error instanceof RestoredWorkspaceReconciliationError + ? 'WORKSPACE_RECONCILIATION_FAILED' + : 'WORKSPACE_SETUP_FAILED'; +} + function sanitizeGitOutput(output: string): string { return output.replace(/(oauth2|x-access-token|x-token-auth):([^@]+)@/gi, '$1:***@'); } @@ -220,17 +235,43 @@ async function prepareBranch( } } +function repositoryUrls(request: WrapperSessionReadyRequest): { + canonical: string; + authenticated: string; +} | null { + const repo = request.repo; + if (!repo) return null; + const canonical = repo.kind === 'github' ? `https://github.com/${repo.repo}.git` : repo.url; + const platform = repo.kind === 'git' ? repo.platform : 'github'; + return { + canonical, + authenticated: authenticatedUrl(canonical, repo.token, platform), + }; +} + +async function setOriginUrl( + request: WrapperSessionReadyRequest, + runGit: GitRunner, + url: string +): Promise { + const result = await runGit(['remote', 'set-url', 'origin', url], { + cwd: request.workspace.workspacePath, + timeoutMs: GIT_COMMAND_TIMEOUT_MS, + }); + if (result.exitCode !== 0) { + throw new Error('Failed to update git remote URL'); + } +} + async function refreshGitRemoteToken( request: WrapperSessionReadyRequest, runGit: GitRunner ): Promise { const repo = request.repo; - if (!repo?.refreshRemote || !repo.token) return; + const urls = repositoryUrls(request); + if (!repo?.refreshRemote || !repo.token || !urls) return; - const gitUrl = repo.kind === 'github' ? `https://github.com/${repo.repo}.git` : repo.url; - const platform = repo.kind === 'git' ? repo.platform : 'github'; - const nextUrl = authenticatedUrl(gitUrl, repo.token, platform); - const result = await runGit(['remote', 'set-url', 'origin', nextUrl], { + const result = await runGit(['remote', 'set-url', 'origin', urls.authenticated], { cwd: request.workspace.workspacePath, timeoutMs: GIT_COMMAND_TIMEOUT_MS, }); @@ -367,6 +408,50 @@ async function restoreOrBootstrapKiloSession( await bootstrapEmptyKiloSession(request, restore); } +async function reconcileRestoredWorkspace( + request: WrapperSessionReadyRequest, + runGit: GitRunner +): Promise { + const { workspacePath, branchName, upstreamBranch, strictBranch } = request.workspace; + if (strictBranch && isSyntheticReviewRef(branchName)) { + await fetchSyntheticReviewRef(runGit, workspacePath, branchName); + return; + } + + let sourceBranch: string; + if (upstreamBranch) { + sourceBranch = upstreamBranch; + } else if (strictBranch) { + sourceBranch = branchName; + } else { + const defaultBranchResult = await runGit(['ls-remote', '--symref', 'origin', 'HEAD'], { + cwd: workspacePath, + timeoutMs: GIT_COMMAND_TIMEOUT_MS, + }); + const defaultBranchMatch = defaultBranchResult.stdout.match(/^ref: refs\/heads\/(.+)\s+HEAD$/m); + if (defaultBranchResult.exitCode !== 0 || !defaultBranchMatch?.[1]) { + throw new Error('Failed to resolve authoritative remote default branch'); + } + sourceBranch = defaultBranchMatch[1]; + } + + const fetchResult = await runGit(['fetch', 'origin', sourceBranch], { + cwd: workspacePath, + timeoutMs: GIT_COMMAND_TIMEOUT_MS, + }); + if (fetchResult.exitCode !== 0) { + throw new Error('Failed to fetch authoritative remote state'); + } + + const checkoutResult = await runGit(['checkout', '-B', branchName, 'FETCH_HEAD'], { + cwd: workspacePath, + timeoutMs: GIT_COMMAND_TIMEOUT_MS, + }); + if (checkoutResult.exitCode !== 0) { + throw new Error(`Failed to create session branch ${branchName} from origin/${sourceBranch}`); + } +} + async function runSetupCommands( request: WrapperSessionReadyRequest, run: ProcessRunner, @@ -456,7 +541,9 @@ export async function prepareWrapperBootstrapWorkspace( Object.assign(process.env, request.materialized.env); const workspaceWasWarm = await exists(path.join(request.workspace.workspacePath, '.git')); - const workspaceNeedsBootstrap = !workspaceWasWarm || !request.workspace.preferSnapshot; + const restoredFromBackup = request.workspace.restoredFromBackup === true; + const workspaceNeedsBootstrap = + restoredFromBackup || !workspaceWasWarm || !request.workspace.preferSnapshot; logToFile( `bootstrap workspace plan kiloSessionId=${request.kiloSessionId} preferSnapshot=${request.workspace.preferSnapshot} workspaceWasWarm=${workspaceWasWarm} workspaceNeedsBootstrap=${workspaceNeedsBootstrap} workspacePath=${request.workspace.workspacePath} sessionHome=${request.workspace.sessionHome} home=${process.env.HOME ?? '(unset)'} homeMatchesSessionHome=${process.env.HOME === request.workspace.sessionHome} repoKind=${request.repo?.kind ?? '(none)'} setupCommandCount=${request.materialized.setupCommands?.length ?? 0} runtimeSkillCount=${request.materialized.runtimeSkills?.length ?? 0}` ); @@ -488,7 +575,18 @@ export async function prepareWrapperBootstrapWorkspace( logToFile( `bootstrap branch preparation starting kiloSessionId=${request.kiloSessionId} branchName=${request.workspace.branchName} strictBranch=${request.workspace.strictBranch ?? false}` ); - await prepareBranch(request, runGit); + if (restoredFromBackup) { + try { + const urls = repositoryUrls(request); + if (urls) await setOriginUrl(request, runGit, urls.authenticated); + await reconcileRestoredWorkspace(request, runGit); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new RestoredWorkspaceReconciliationError(message, { cause: error }); + } + } else { + await prepareBranch(request, runGit); + } logToFile( `bootstrap branch preparation ready kiloSessionId=${request.kiloSessionId} branchName=${request.workspace.branchName}` );