diff --git a/apps/desktop/src/main/services/chat/agentChatService.test.ts b/apps/desktop/src/main/services/chat/agentChatService.test.ts index a94e9f064..92f591df3 100644 --- a/apps/desktop/src/main/services/chat/agentChatService.test.ts +++ b/apps/desktop/src/main/services/chat/agentChatService.test.ts @@ -11390,6 +11390,64 @@ describe("createAgentChatService", () => { )).toEqual(["event-2", "event-3", "event-4"]); }); + it("byte-caps a snapshot whose merged events exceed the response budget", async () => { + // Regression: individual chat events can carry multi-MB tool outputs. + // Event-count caps alone let a snapshot serialize past the desktop RPC + // client's 16 MiB per-message limit, which used to fail every in-flight + // call on the shared runtime socket. + const { service } = createService(); + const session = await service.createSession({ + laneId: "lane-1", + provider: "codex", + model: "gpt-5.4", + }); + + const envelopes: AgentChatEventEnvelope[] = Array.from({ length: 4 }, (_, index) => ({ + sessionId: session.id, + timestamp: `2026-04-23T10:0${index}:00.000Z`, + event: { type: "text", text: `event-${index}-${"x".repeat(3_000_000)}` }, + sequence: index + 1, + })); + const transcriptFile = path.join(tmpRoot, "transcripts", `${session.id}.chat.jsonl`); + fs.writeFileSync(transcriptFile, "ignored\n", "utf8"); + vi.mocked(parseAgentChatTranscript).mockReturnValue(envelopes); + + const history = service.getChatEventHistory(session.id); + + // 4 × ~3 MB events exceed the 8 MB response budget: only the newest + // events that fit are returned, and the trim is reported as window + // truncation so clients know to page for the rest. + expect(history.events.length).toBeLessThan(envelopes.length); + expect(history.events.length).toBeGreaterThan(0); + expect(history.windowTruncated).toBe(true); + expect(history.truncated).toBe(true); + expect(JSON.stringify(history.events).length).toBeLessThanOrEqual(8_000_000); + const lastEvent = history.events.at(-1)?.event; + expect(lastEvent?.type === "text" ? lastEvent.text.startsWith("event-3-") : false).toBe(true); + }); + + it("always returns at least the newest event even when it alone exceeds the byte budget", async () => { + const { service } = createService(); + const session = await service.createSession({ + laneId: "lane-1", + provider: "codex", + model: "gpt-5.4", + }); + + const giant: AgentChatEventEnvelope = { + sessionId: session.id, + timestamp: "2026-04-23T10:00:00.000Z", + event: { type: "text", text: "giant-".concat("y".repeat(9_000_000)) }, + sequence: 1, + }; + const transcriptFile = path.join(tmpRoot, "transcripts", `${session.id}.chat.jsonl`); + fs.writeFileSync(transcriptFile, "ignored\n", "utf8"); + vi.mocked(parseAgentChatTranscript).mockReturnValue([giant]); + + const history = service.getChatEventHistory(session.id); + expect(history.events).toHaveLength(1); + }); + it("marks window truncation when the service response cap removes events", async () => { const { service } = createService(); const session = await service.createSession({ diff --git a/apps/desktop/src/main/services/chat/agentChatService.ts b/apps/desktop/src/main/services/chat/agentChatService.ts index b784aeac9..fa3b33339 100644 --- a/apps/desktop/src/main/services/chat/agentChatService.ts +++ b/apps/desktop/src/main/services/chat/agentChatService.ts @@ -5297,7 +5297,70 @@ export function createAgentChatService(args: { const CHAT_EVENT_HISTORY_RESPONSE_MAX_PER_SESSION = 20_000; const CHAT_EVENT_HISTORY_TRANSCRIPT_MAX_BYTES = 2_000_000; const CHAT_EVENT_HISTORY_TRANSCRIPT_CACHE_MAX_SESSIONS = 32; + // Byte budgets alongside the event-count caps above. Individual events are + // unbounded (multi-MB tool outputs exist in real transcripts), so count caps + // alone cannot keep a history snapshot under the desktop RPC client's + // 16 MiB per-message limit; one over-limit response used to fail every + // in-flight call on the shared runtime socket. The ring budget is the + // working bound (ring 4 MB + 2 MB transcript tail ≈ 6 MB merged); the + // response budget is a backstop that should not trigger in practice. + const CHAT_EVENT_HISTORY_BUFFER_MAX_CHARS = 4_000_000; + const CHAT_EVENT_HISTORY_RESPONSE_MAX_CHARS = 8_000_000; const eventHistoryBySession = new Map(); + + const safeJsonChars = (value: unknown): number => { + try { + return JSON.stringify(value).length; + } catch { + return 2_048; + } + }; + + // Keep the newest items whose cumulative serialized size fits the budget + // (always at least the newest one, even when it alone exceeds it). + const keepNewestWithinCharBudget = ( + items: T[], + maxChars: number, + sizeOf: (item: T) => number, + ): T[] => { + let total = 0; + let start = items.length; + while (start > 0) { + const next = total + sizeOf(items[start - 1]!); + if (start < items.length && next > maxChars) break; + total = next; + start -= 1; + } + return start > 0 ? items.slice(start) : items; + }; + + // Envelopes are immutable once recorded; cache their serialized size so + // byte-budget trims do not re-stringify multi-MB events on every snapshot. + const envelopeSizeCache = new WeakMap(); + + const estimateEnvelopeChars = (envelope: AgentChatEventEnvelope): number => { + let size = envelopeSizeCache.get(envelope); + if (size == null) { + size = safeJsonChars(envelope); + envelopeSizeCache.set(envelope, size); + } + return size; + }; + + const trimEnvelopesToByteBudget = ( + envelopes: AgentChatEventEnvelope[], + maxChars: number, + ): AgentChatEventEnvelope[] => keepNewestWithinCharBudget(envelopes, maxChars, estimateEnvelopeChars); + + // The single policy for what bounds the in-memory event ring: an event-count + // cap, then a byte budget. Applied wherever the ring is (re)written. + const boundRingEnvelopes = (envelopes: AgentChatEventEnvelope[]): AgentChatEventEnvelope[] => + trimEnvelopesToByteBudget( + envelopes.length > CHAT_EVENT_HISTORY_BUFFER_MAX_PER_SESSION + ? envelopes.slice(-CHAT_EVENT_HISTORY_BUFFER_MAX_PER_SESSION) + : envelopes, + CHAT_EVENT_HISTORY_BUFFER_MAX_CHARS, + ); const transcriptHistoryCacheBySession = new Map { const current = eventHistoryBySession.get(envelope.sessionId) ?? []; current.push(envelope); - if (current.length > CHAT_EVENT_HISTORY_BUFFER_MAX_PER_SESSION) { - current.splice(0, current.length - CHAT_EVENT_HISTORY_BUFFER_MAX_PER_SESSION); - } - eventHistoryBySession.set(envelope.sessionId, current); + eventHistoryBySession.set(envelope.sessionId, boundRingEnvelopes(current)); }; const rememberTranscriptHistoryCache = ( @@ -6790,18 +6850,26 @@ export function createAgentChatService(args: { if (merged.length > CHAT_EVENT_HISTORY_RESPONSE_MAX_PER_SESSION) { merged = merged.slice(-CHAT_EVENT_HISTORY_RESPONSE_MAX_PER_SESSION); } - eventHistoryBySession.set(trimmedId, merged.slice(-CHAT_EVENT_HISTORY_BUFFER_MAX_PER_SESSION)); + eventHistoryBySession.set(trimmedId, boundRingEnvelopes(merged.slice())); const parentVisibleMerged = merged.filter((entry) => !isCodexSubagentTranscriptEnvelope(entry)); const parentVisibleLength = parentVisibleMerged.length; const transcriptTruncated = transcriptHistory.truncated; + const countWindowed = parentVisibleLength > maxEvents + ? parentVisibleMerged.slice(-maxEvents) + : parentVisibleMerged; + // Backstop byte budget so the serialized snapshot always fits one RPC + // message. The ring and transcript-tail budgets keep snapshots well under + // it, so it only trims when a single envelope dwarfs both (>~6 MB); such + // trimmed events sit AFTER tailStartOffset and are not reachable through + // getChatEventHistoryPage (which pages strictly older) — an accepted + // seam, the alternative being a response the client must discard. + const windowed = trimEnvelopesToByteBudget(countWindowed, CHAT_EVENT_HISTORY_RESPONSE_MAX_CHARS); const windowTruncated = mergedLengthBeforeResponseCap > CHAT_EVENT_HISTORY_RESPONSE_MAX_PER_SESSION - || parentVisibleLength > maxEvents; + || parentVisibleLength > maxEvents + || windowed.length < countWindowed.length; const truncated = transcriptTruncated || windowTruncated; - const windowed = parentVisibleLength > maxEvents - ? parentVisibleMerged.slice(-maxEvents) - : parentVisibleMerged; return { sessionId: trimmedId, events: windowed, @@ -25369,6 +25437,20 @@ export function createAgentChatService(args: { return options.limit !== undefined ? sliced.slice(0, options.limit) : sliced; } + // Subagent transcripts merge unbounded sources (full persisted transcript, + // live `thread/turns/list?itemsView=full` pulls) and individual messages can + // carry multi-MB tool outputs. Keep the newest messages that fit one RPC + // response (always at least one) so a single fetch can never exceed the + // desktop client's per-message limit. Note for `offset`/`limit` callers: the + // bound keeps the newest suffix of the requested window, so a response may + // start later than `offset` and a short page does not imply end-of-data. + const SUBAGENT_TRANSCRIPT_RESPONSE_MAX_CHARS = 4_000_000; + + const boundSubagentTranscriptResponse = ( + messages: AgentChatSubagentTranscriptMessage[], + ): AgentChatSubagentTranscriptMessage[] => + keepNewestWithinCharBudget(messages, SUBAGENT_TRANSCRIPT_RESPONSE_MAX_CHARS, safeJsonChars); + function mergeSubagentTranscriptMessages( left: AgentChatSubagentTranscriptMessage[], right: AgentChatSubagentTranscriptMessage[], @@ -25452,8 +25534,11 @@ export function createAgentChatService(args: { * - **Cursor**: SDK `task` events tag every lifecycle envelope with the * subagent's `agentId`; we filter the parent stream by that value. * - **Everything else (droid, lmstudio, …)**: `null`. + * + * Exposed via getSubagentTranscript, which byte-bounds whatever this + * returns — new runtime branches here need no size handling of their own. */ - const getSubagentTranscript = async ({ + const collectSubagentTranscript = async ({ sessionId, agentId, laneId, @@ -25638,6 +25723,16 @@ export function createAgentChatService(args: { }); }; + // The byte bound is a response-boundary invariant: enforce it once here so + // every runtime branch in collectSubagentTranscript stays bounded by + // construction rather than by remembering to wrap each return. + const getSubagentTranscript = async ( + args: AgentChatSubagentTranscriptArgs, + ): Promise => { + const messages = await collectSubagentTranscript(args); + return messages ? boundSubagentTranscriptResponse(messages) : messages; + }; + const normalizeClaudeContextUsage = ( usage: SDKControlGetContextUsageResponse, ): AgentChatContextUsage => { diff --git a/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.test.ts b/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.test.ts index 007ff3577..55422dcb2 100644 --- a/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.test.ts +++ b/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.test.ts @@ -166,6 +166,76 @@ describe("RuntimeRpcClient", () => { expect(transport.writes).toEqual([]); }); + it("rejects only the request answered by an oversized response and keeps the connection alive", async () => { + const transport = new MockTransport(); + const client = new RuntimeRpcClient(transport); + + const oversized = client.call("chat.getChatEventHistory", {}); + const unrelated = client.call("projects.list", {}); + const oversizedId = requestId(transport.writes[0]!); + const unrelatedId = requestId(transport.writes[1]!); + + // Stream a single >16 MiB response line in chunks, as a socket would. + const head = `{"jsonrpc":"2.0","id":${oversizedId},"result":{"events":["`; + transport.emitData(head); + const filler = "x".repeat(1024 * 1024); + for (let i = 0; i < 17; i++) transport.emitData(filler); + transport.emitData(`"]}}\n`); + + await expect(oversized).rejects.toThrow( + /response for method chat\.getChatEventHistory exceeded 16 MiB .* and was discarded/, + ); + expect(client.isClosed()).toBe(false); + + // The unrelated in-flight call and brand-new calls still complete. + transport.emitData({ jsonrpc: "2.0", id: unrelatedId, result: ["project"] }); + await expect(unrelated).resolves.toEqual(["project"]); + const after = client.call("projects.list", {}); + transport.emitData({ jsonrpc: "2.0", id: requestId(transport.writes[2]!), result: ["still-alive"] }); + await expect(after).resolves.toEqual(["still-alive"]); + }); + + it("parses lines that arrive in the same chunk after an oversized line ends", async () => { + const transport = new MockTransport(); + const client = new RuntimeRpcClient(transport); + + const oversized = client.call("chat.getChatEventHistory", {}); + const follower = client.call("projects.list", {}); + const oversizedId = requestId(transport.writes[0]!); + const followerId = requestId(transport.writes[1]!); + + transport.emitData(`{"jsonrpc":"2.0","id":${oversizedId},"result":"`); + transport.emitData("y".repeat(17 * 1024 * 1024)); + // The oversized line terminator and the follower response share a chunk. + transport.emitData(`"}\n{"jsonrpc":"2.0","id":${followerId},"result":"ok"}\n`); + + await expect(oversized).rejects.toThrow(/exceeded 16 MiB/); + await expect(follower).resolves.toBe("ok"); + expect(client.isClosed()).toBe(false); + }); + + it("discards an oversized notification without rejecting pending calls", async () => { + const transport = new MockTransport(); + const client = new RuntimeRpcClient(transport); + const warn = vi.spyOn(console, "warn").mockImplementation(() => {}); + try { + const pending = client.call("projects.list", {}); + const pendingId = requestId(transport.writes[0]!); + + // A notification whose params embed an "id" field matching the pending + // call must not be mistaken for that call's response. + transport.emitData(`{"jsonrpc":"2.0","method":"runtime/event","params":{"id":${pendingId},"blob":"`); + transport.emitData("z".repeat(17 * 1024 * 1024)); + transport.emitData(`"}}\n`); + + expect(client.isClosed()).toBe(false); + transport.emitData({ jsonrpc: "2.0", id: pendingId, result: ["project"] }); + await expect(pending).resolves.toEqual(["project"]); + } finally { + warn.mockRestore(); + } + }); + it("dispatches JSON-RPC notifications without resolving pending calls", async () => { const transport = new MockTransport(); const client = new RuntimeRpcClient(transport); diff --git a/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts b/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts index 56ef432b0..3de7cddf9 100644 --- a/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts +++ b/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts @@ -13,6 +13,10 @@ type PendingRequest = { }; const MAX_RPC_BUFFER_CHARS = 16 * 1024 * 1024; +// Head of an oversized line retained to identify which request it answers. +// The JSON-RPC envelope id appears within the first few dozen chars of a +// response; 4 KiB is generous while keeping discarded-line memory bounded. +const OVERSIZED_LINE_PREFIX_CHARS = 4096; const MAX_RPC_TIMEOUT_MS = 2_147_483_647; type RuntimeRpcCallOptions = { @@ -35,9 +39,15 @@ function normalizeRuntimeRpcTimeoutMs(value: number): number { return Math.ceil(timeoutMs); } +type OversizedLineState = { + prefix: string; + discardedChars: number; +}; + export class RuntimeRpcClient { private nextId = 1; private buffer = ""; + private oversizedLine: OversizedLineState | null = null; private readonly pending = new Map(); private readonly notificationHandlers = new Map void>>(); private readonly disconnectCallbacks = new Set<(error: Error) => void>(); @@ -144,11 +154,21 @@ export class RuntimeRpcClient { private onData(chunk: string): void { if (this.closedError) return; - this.buffer += chunk; - if (this.buffer.length > MAX_RPC_BUFFER_CHARS) { - this.failConnection(new Error("Remote ADE service response buffer exceeded 16 MiB.")); - return; + if (this.oversizedLine) { + // An oversized line is being discarded as it streams. Drop chunks until + // its terminating newline, then resume normal parsing on the remainder. + const newline = chunk.indexOf("\n"); + if (newline < 0) { + this.oversizedLine.discardedChars += chunk.length; + return; + } + const oversized = this.oversizedLine; + this.oversizedLine = null; + oversized.discardedChars += newline; + this.rejectOversizedLine(oversized); + chunk = chunk.slice(newline + 1); } + this.buffer += chunk; while (true) { const newline = this.buffer.indexOf("\n"); if (newline < 0) break; @@ -156,7 +176,57 @@ export class RuntimeRpcClient { this.buffer = this.buffer.slice(newline + 1); if (!line) continue; this.handleLine(line); + if (this.closedError) return; } + // The leftover is one partial line. If it has outgrown the buffer cap, + // reject only the request it answers and discard the rest of the line as + // it streams — a single oversized response must not take down the shared + // connection (every project pane multiplexes over it). Note the cap + // bounds PARTIAL-LINE ACCUMULATION, not message size: a complete line + // delivered within one chunk is parsed regardless of length. Real + // transports chunk at ~64 KB, so any >16 MiB line lands here in practice. + if (this.buffer.length > MAX_RPC_BUFFER_CHARS) { + this.oversizedLine = { + prefix: this.buffer.slice(0, OVERSIZED_LINE_PREFIX_CHARS), + discardedChars: this.buffer.length, + }; + this.buffer = ""; + } + } + + /** + * Fail the single request answered by a discarded oversized line. The + * envelope id is recovered from the retained line head; a `"method"` key + * appearing before `"id"` marks the line as a notification (its params may + * embed unrelated `"id"` fields), in which case nothing is rejected. + * + * Heuristic assumption: the daemon serializes response envelopes with the + * top-level `id` before `result` (jsonrpc.ts writeMessage key order), so + * the first `"id"` in a response line is the envelope id, never one nested + * inside the result. If a misfire ever happens anyway, the failure mode is + * benign: no pending entry matches, the line is warn-logged and dropped, + * and the real caller times out instead of receiving a wrong rejection. + */ + private rejectOversizedLine(oversized: OversizedLineState): void { + const idMatch = /"id"\s*:\s*(\d+)/.exec(oversized.prefix); + const methodMatch = /"method"\s*:/.exec(oversized.prefix); + const responseId = idMatch && (!methodMatch || idMatch.index < methodMatch.index) + ? Number(idMatch[1]) + : null; + const approxMiB = (oversized.discardedChars / (1024 * 1024)).toFixed(1); + const pending = responseId != null ? this.pending.get(responseId) : undefined; + if (!pending || responseId == null) { + console.warn("Remote ADE service sent an oversized message; discarded", { + approxMiB, + hasResponseId: responseId != null, + }); + return; + } + this.pending.delete(responseId); + clearTimeout(pending.timer); + pending.reject(new Error( + `Remote ADE service response for method ${pending.method} exceeded 16 MiB (~${approxMiB} MiB) and was discarded.`, + )); } private handleLine(line: string): void { diff --git a/apps/ios/ADE/Services/SyncService.swift b/apps/ios/ADE/Services/SyncService.swift index a7e90e383..0136abb84 100644 --- a/apps/ios/ADE/Services/SyncService.swift +++ b/apps/ios/ADE/Services/SyncService.swift @@ -4798,45 +4798,45 @@ final class SyncService: ObservableObject { return response.entries } - /// One page of a paginated chat transcript walk. `nextCursor` is an opaque - /// host token identifying the position just before the oldest entry - /// returned; pass it back via `fetchChatTranscriptPage(cursor:)` to load - /// the previous (older) page. `nil` means the start of the transcript was - /// reached. + /// One page of a paginated chat transcript walk. `nextCursor` is the + /// host-side index of the oldest entry returned (the transcript is + /// append-only, so indices are stable); pass it back via + /// `fetchChatTranscriptPage(cursor:)` to load the previous (older) page. + /// `nil` means the start of the transcript was reached. struct AgentChatTranscriptPage: Equatable { var sessionId: String var entries: [AgentChatTranscriptEntry] var truncated: Bool var totalEntries: Int - var nextCursor: String? + var nextCursor: Int? } /// Fetch a transcript page. Without `cursor` this returns the newest /// entries (same data as `fetchChatTranscriptResponse`) plus a cursor for /// walking backwards; with `cursor` it returns the page strictly BEFORE - /// that point. Older hosts that predate pagination simply omit + /// that index. Older hosts that predate pagination simply omit /// `nextCursor`, which surfaces here as `nil` (no more pages). func fetchChatTranscriptPage( sessionId: String, - cursor: String? = nil, + cursor: Int? = nil, limit: Int = 200, maxChars: Int = 600_000 ) async throws -> AgentChatTranscriptPage { var args: [String: Any] = ["sessionId": sessionId, "limit": limit, "maxChars": maxChars] - if let cursor, !cursor.isEmpty { - args["cursor"] = cursor + if let cursor, cursor > 0 { + args["cursor"] = String(cursor) } let response = try await sendCommand(action: "chat.getTranscript", args: args) if let payload = response as? [String: Any], payload["queued"] as? Bool == true { throw QueuedRemoteCommandError(action: "chat.getTranscript") } let transcript = try decode(response, as: AgentChatTranscriptResponse.self) - var nextCursor: String? + var nextCursor: Int? if let dict = response as? [String: Any], let rawCursor = dict["nextCursor"] { - if let text = rawCursor as? String, !text.isEmpty { - nextCursor = text + if let text = rawCursor as? String { + nextCursor = Int(text) } else if let number = rawCursor as? NSNumber, !(rawCursor is Bool) { - nextCursor = number.stringValue + nextCursor = number.intValue } } return AgentChatTranscriptPage( diff --git a/apps/ios/ADE/Views/Work/WorkChatSessionView+Actions.swift b/apps/ios/ADE/Views/Work/WorkChatSessionView+Actions.swift index 24c5702ee..bdf6c23a0 100644 --- a/apps/ios/ADE/Views/Work/WorkChatSessionView+Actions.swift +++ b/apps/ios/ADE/Views/Work/WorkChatSessionView+Actions.swift @@ -70,6 +70,14 @@ extension WorkChatSessionView { visibleTimelineCount += workTimelinePageSize refreshTimelinePresentation() } + // Once the locally-buffered timeline is nearly exhausted, pull the next + // older transcript page from the host so scroll-back continues through + // the full history instead of stopping at the initial tail fetch. + if hasOlderTranscriptHistory, + hiddenTimelineCount <= workTimelinePageSize * 2, + let onLoadOlderTranscript { + Task { await onLoadOlderTranscript() } + } } @MainActor diff --git a/apps/ios/ADE/Views/Work/WorkChatSessionView.swift b/apps/ios/ADE/Views/Work/WorkChatSessionView.swift index dadd2ec03..1e0984c79 100644 --- a/apps/ios/ADE/Views/Work/WorkChatSessionView.swift +++ b/apps/ios/ADE/Views/Work/WorkChatSessionView.swift @@ -25,6 +25,7 @@ struct WorkChatSessionView: View { @State var actionInFlight = false @State var isNearBottom = true @State var unreadBelowCount = 0 + @State var lastTimelineTailId: String? @State var artifactDrawerPresented = false @State var timelineSnapshot = WorkChatTimelineSnapshot.empty @State var timelinePresentation = WorkTimelinePresentation.empty @@ -57,6 +58,10 @@ struct WorkChatSessionView: View { let onSelectEffort: @MainActor (String) async -> Void var lanes: [LaneSummary] = [] + // Host-side scroll-back: true while older transcript pages remain on the + // host beyond what the phone has fetched; the callback pulls the next page. + var hasOlderTranscriptHistory: Bool = false + var onLoadOlderTranscript: (@MainActor () async -> Void)? = nil @State var steerEditDrafts: [String: String] = [:] @State var modelPickerPresented = false @@ -257,14 +262,15 @@ struct WorkChatSessionView: View { message: isLive ? "Send a message to start streaming the transcript." : "Reconnect to load the latest chat history from the machine." ) } else { - if hiddenTimelineCount > 0 { + if hiddenTimelineCount > 0 || hasOlderTranscriptHistory { + let nextPageCount = min(hiddenTimelineCount, workTimelinePageSize) + let loadEarlierTitle = nextPageCount > 0 + ? "Load \(nextPageCount) earlier message\(nextPageCount == 1 ? "" : "s")" + : "Load earlier messages" Button { loadEarlierTimelineEntries() } label: { - Label( - "Load \(min(hiddenTimelineCount, workTimelinePageSize)) earlier message\(min(hiddenTimelineCount, workTimelinePageSize) == 1 ? "" : "s")", - systemImage: "chevron.up.circle" - ) + Label(loadEarlierTitle, systemImage: "chevron.up.circle") .font(.footnote.weight(.semibold)) .foregroundStyle(ADEColor.accent) .frame(maxWidth: .infinity) @@ -454,8 +460,14 @@ struct WorkChatSessionView: View { } } .onChange(of: timeline.count) { oldCount, newCount in + let previousTailId = lastTimelineTailId + lastTimelineTailId = timeline.last?.id let delta = newCount - oldCount guard delta > 0 else { return } + // Older-page prepends grow the timeline above the viewport — the + // newest entry stays put. Don't autoscroll to the bottom or flag + // the prepended entries as "new messages below". + if let previousTailId, previousTailId == timeline.last?.id { return } if isNearBottom { scrollToLatest(proxy, animated: false) } else { diff --git a/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift b/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift index 9136d1345..cbaab5736 100644 --- a/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift +++ b/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift @@ -123,6 +123,13 @@ struct WorkSessionDestinationView: View { @State var chatSummary: AgentChatSessionSummary? @State var transcript: [WorkChatEnvelope] = [] @State var fallbackEntries: [AgentChatTranscriptEntry] = [] + // Canonical transcript entries keyed by their host-side index. Tail + // refreshes overwrite the newest indices while "load earlier" pages fill + // older ones, so a poll can never clobber scroll-back history. The cursor + // is the oldest fetched index (0 = transcript head reached). + @State var transcriptEntriesByIndex: [Int: AgentChatTranscriptEntry] = [:] + @State var olderTranscriptCursor: Int? + @State var olderTranscriptLoading = false @State var artifacts: [ComputerUseArtifactSummary] = [] @State var localEchoMessages: [WorkLocalEchoMessage] = [] @State var optimisticPendingSteers: [WorkPendingSteerModel] = [] @@ -351,7 +358,9 @@ struct WorkSessionDestinationView: View { onSelectModel: selectModel, onSelectRuntimeMode: selectRuntimeMode, onSelectEffort: selectReasoningEffort, - lanes: lanes + lanes: lanes, + hasOlderTranscriptHistory: hasOlderTranscriptHistory, + onLoadOlderTranscript: loadOlderTranscriptEntries ) } else { TerminalSessionScreen(session: session) @@ -454,10 +463,11 @@ struct WorkSessionDestinationView: View { || (liveTranscript.isEmpty && transcript.isEmpty) || (!liveTranscript.isEmpty && status != "active") let fallbackMaxChars = status == "active" ? 32_000 : 120_000 - if shouldFetchFallback, let response = try? await syncService.fetchChatTranscriptResponse(sessionId: sessionId, maxChars: fallbackMaxChars) { - fetchedFallbackEntries = response.entries + if shouldFetchFallback, let page = try? await syncService.fetchChatTranscriptPage(sessionId: sessionId, maxChars: fallbackMaxChars) { + recordTranscriptPage(page, before: nil) + fetchedFallbackEntries = combinedTranscriptEntries() fetchedFallbackEntriesAvailable = true - fallbackTranscript = makeWorkChatTranscript(from: response.entries, sessionId: sessionId) + fallbackTranscript = makeWorkChatTranscript(from: fetchedFallbackEntries, sessionId: sessionId) } // Chat-only fallback: parses chat envelopes out of the raw terminal buffer. @@ -510,6 +520,74 @@ struct WorkSessionDestinationView: View { } } + /// Fold one host transcript page into the index-keyed store. `cursor` is + /// the `before` index the page was requested with (nil for a tail fetch). + /// Host indices are stable because the transcript is append-only. + @MainActor + func recordTranscriptPage(_ page: SyncService.AgentChatTranscriptPage, before cursor: Int?) { + let end = min(cursor ?? page.totalEntries, page.totalEntries) + let start = max(0, end - page.entries.count) + let pageCursor = page.nextCursor ?? 0 + if cursor == nil { + // Tail refresh. If the new window starts past everything stored (a + // burst of entries landed between polls), stitching would render a + // transcript with a silent hole — reset to the fresh tail instead and + // re-anchor scroll-back below it. + let nextContiguousIndex = transcriptEntriesByIndex.keys.max().map { $0 + 1 } ?? 0 + if start > nextContiguousIndex, !page.entries.isEmpty { + transcriptEntriesByIndex = [:] + olderTranscriptCursor = pageCursor + } else if olderTranscriptCursor == nil { + // First fetch establishes the scroll-back anchor; later contiguous + // polls must not move it forward past pages the user already loaded. + olderTranscriptCursor = pageCursor + } + } else { + olderTranscriptCursor = min(olderTranscriptCursor ?? pageCursor, pageCursor) + } + for (offset, entry) in page.entries.enumerated() { + transcriptEntriesByIndex[start + offset] = entry + } + } + + @MainActor + func combinedTranscriptEntries() -> [AgentChatTranscriptEntry] { + transcriptEntriesByIndex.keys.sorted().compactMap { transcriptEntriesByIndex[$0] } + } + + var hasOlderTranscriptHistory: Bool { + (olderTranscriptCursor ?? 0) > 0 + } + + /// Fetch the next strictly-older transcript page from the host and prepend + /// it to the fallback entries that feed the chat timeline. + @MainActor + func loadOlderTranscriptEntries() async { + guard !olderTranscriptLoading, let cursor = olderTranscriptCursor, cursor > 0 else { return } + olderTranscriptLoading = true + defer { olderTranscriptLoading = false } + guard let page = try? await syncService.fetchChatTranscriptPage( + sessionId: sessionId, + cursor: cursor + ) else { return } + recordTranscriptPage(page, before: cursor) + let combined = combinedTranscriptEntries() + if !combined.isEmpty, combined != fallbackEntries { + fallbackEntries = combined + } + // fallbackEntries only feed the timeline while `transcript` is empty + // (buildWorkTimeline), so splice the older entries into the rendered + // transcript right away — otherwise the fetched page stays invisible + // until the next loadTranscript poll. preferredWorkTranscript backfills + // by role+turnId+text identity, so entries already rendered from live + // events are not duplicated. + let olderTranscript = makeWorkChatTranscript(from: combined, sessionId: sessionId) + let merged = preferredWorkTranscript(current: [], fallback: olderTranscript, eventTranscript: transcript) + if !merged.isEmpty, merged != transcript { + transcript = merged + } + } + @MainActor func refreshChatStateAfterAction(forceRemote: Bool = true) async { let preferLightweight = syncService.prefersReducedSyncLoad