From 659af0d95180458b8fe0ce6988b7ebc5f2eecb45 Mon Sep 17 00:00:00 2001
From: Arul Sharma <31745423+arul28@users.noreply.github.com>
Date: Thu, 11 Jun 2026 05:39:15 -0400
Subject: [PATCH 1/2] Chat history can no longer take down the ADE brain
 socket; full scroll-back on every surface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A single >16 MiB chat-history response used to kill the desktop's shared
runtime connection (RuntimeRpcClient.failConnection), rejecting every
in-flight call app-wide and looping as the chat re-requested the same
oversized snapshot — the "Remote ADE service response buffer exceeded
16 MiB" incident.

Desktop RPC client: an oversized line is now discarded as it streams;
only the pending request it answers is rejected (envelope id recovered
from the retained line head), notifications are warn-and-dropped, and
the connection plus all other in-flight calls survive. Memory during
discard is bounded to a 4 KiB prefix.

Brain (agentChatService): chat event history is byte-bounded end to
end — the in-memory ring keeps the newest 4 MB (count cap unchanged),
the snapshot response has an 8 MB backstop that reports
windowTruncated, and subagent transcripts are bounded once at the
getSubagentTranscript boundary (4 MB, newest-first) so every runtime
branch stays bounded by construction. One generic
keepNewestWithinCharBudget helper backs all three.

iOS: wires the previously-unused transcript paging end to end. The tail
fetch now records the host index cursor, "Load earlier messages"
fetches strictly-older pages once the local buffer runs out, an
index-keyed store lets tail polls and older pages merge without
clobbering, a gap guard resets cleanly when a burst of entries lands
between polls, and prepended pages no longer trigger autoscroll or the
"new messages below" pill. Desktop (AgentChatPane infinite scroll) and
the ade code TUI already paged via getChatEventHistoryPage; with the
bounded snapshots all three surfaces stream fast and can still walk the
full transcript.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 .../services/chat/agentChatService.test.ts    |  58 +++++++++
 .../main/services/chat/agentChatService.ts    | 115 ++++++++++++++++--
 .../remoteRuntime/runtimeRpcClient.test.ts    |  70 +++++++++++
 .../remoteRuntime/runtimeRpcClient.ts         |  71 ++++++++++-
 apps/ios/ADE/Services/SyncService.swift       |  28 ++---
 .../Work/WorkChatSessionView+Actions.swift    |   8 ++
 .../ADE/Views/Work/WorkChatSessionView.swift  |  22 +++-
 .../Work/WorkSessionDestinationView.swift     |  75 +++++++++++-
 8 files changed, 410 insertions(+), 37 deletions(-)

diff --git a/apps/desktop/src/main/services/chat/agentChatService.test.ts b/apps/desktop/src/main/services/chat/agentChatService.test.ts
index a94e9f064..92f591df3 100644
--- a/apps/desktop/src/main/services/chat/agentChatService.test.ts
+++ b/apps/desktop/src/main/services/chat/agentChatService.test.ts
@@ -11390,6 +11390,64 @@ describe("createAgentChatService", () => {
       )).toEqual(["event-2", "event-3", "event-4"]);
     });
 
+    it("byte-caps a snapshot whose merged events exceed the response budget", async () => {
+      // Regression: individual chat events can carry multi-MB tool outputs.
+      // Event-count caps alone let a snapshot serialize past the desktop RPC
+      // client's 16 MiB per-message limit, which used to fail every in-flight
+      // call on the shared runtime socket.
+      const { service } = createService();
+      const session = await service.createSession({
+        laneId: "lane-1",
+        provider: "codex",
+        model: "gpt-5.4",
+      });
+
+      const envelopes: AgentChatEventEnvelope[] = Array.from({ length: 4 }, (_, index) => ({
+        sessionId: session.id,
+        timestamp: `2026-04-23T10:0${index}:00.000Z`,
+        event: { type: "text", text: `event-${index}-${"x".repeat(3_000_000)}` },
+        sequence: index + 1,
+      }));
+      const transcriptFile = path.join(tmpRoot, "transcripts", `${session.id}.chat.jsonl`);
+      fs.writeFileSync(transcriptFile, "ignored\n", "utf8");
+      vi.mocked(parseAgentChatTranscript).mockReturnValue(envelopes);
+
+      const history = service.getChatEventHistory(session.id);
+
+      // 4 × ~3 MB events exceed the 8 MB response budget: only the newest
+      // events that fit are returned, and the trim is reported as window
+      // truncation so clients know to page for the rest.
+      expect(history.events.length).toBeLessThan(envelopes.length);
+      expect(history.events.length).toBeGreaterThan(0);
+      expect(history.windowTruncated).toBe(true);
+      expect(history.truncated).toBe(true);
+      expect(JSON.stringify(history.events).length).toBeLessThanOrEqual(8_000_000);
+      const lastEvent = history.events.at(-1)?.event;
+      expect(lastEvent?.type === "text" ? lastEvent.text.startsWith("event-3-") : false).toBe(true);
+    });
+
+    it("always returns at least the newest event even when it alone exceeds the byte budget", async () => {
+      const { service } = createService();
+      const session = await service.createSession({
+        laneId: "lane-1",
+        provider: "codex",
+        model: "gpt-5.4",
+      });
+
+      const giant: AgentChatEventEnvelope = {
+        sessionId: session.id,
+        timestamp: "2026-04-23T10:00:00.000Z",
+        event: { type: "text", text: "giant-".concat("y".repeat(9_000_000)) },
+        sequence: 1,
+      };
+      const transcriptFile = path.join(tmpRoot, "transcripts", `${session.id}.chat.jsonl`);
+      fs.writeFileSync(transcriptFile, "ignored\n", "utf8");
+      vi.mocked(parseAgentChatTranscript).mockReturnValue([giant]);
+
+      const history = service.getChatEventHistory(session.id);
+      expect(history.events).toHaveLength(1);
+    });
+
     it("marks window truncation when the service response cap removes events", async () => {
       const { service } = createService();
       const session = await service.createSession({
diff --git a/apps/desktop/src/main/services/chat/agentChatService.ts b/apps/desktop/src/main/services/chat/agentChatService.ts
index b784aeac9..fa3b33339 100644
--- a/apps/desktop/src/main/services/chat/agentChatService.ts
+++ b/apps/desktop/src/main/services/chat/agentChatService.ts
@@ -5297,7 +5297,70 @@ export function createAgentChatService(args: {
   const CHAT_EVENT_HISTORY_RESPONSE_MAX_PER_SESSION = 20_000;
   const CHAT_EVENT_HISTORY_TRANSCRIPT_MAX_BYTES = 2_000_000;
   const CHAT_EVENT_HISTORY_TRANSCRIPT_CACHE_MAX_SESSIONS = 32;
+  // Byte budgets alongside the event-count caps above. Individual events are
+  // unbounded (multi-MB tool outputs exist in real transcripts), so count caps
+  // alone cannot keep a history snapshot under the desktop RPC client's
+  // 16 MiB per-message limit; one over-limit response used to fail every
+  // in-flight call on the shared runtime socket. The ring budget is the
+  // working bound (ring 4 MB + 2 MB transcript tail ≈ 6 MB merged); the
+  // response budget is a backstop that should not trigger in practice.
+  const CHAT_EVENT_HISTORY_BUFFER_MAX_CHARS = 4_000_000;
+  const CHAT_EVENT_HISTORY_RESPONSE_MAX_CHARS = 8_000_000;
   const eventHistoryBySession = new Map<string, AgentChatEventEnvelope[]>();
+
+  const safeJsonChars = (value: unknown): number => {
+    try {
+      return JSON.stringify(value).length;
+    } catch {
+      return 2_048;
+    }
+  };
+
+  // Keep the newest items whose cumulative serialized size fits the budget
+  // (always at least the newest one, even when it alone exceeds it).
+  const keepNewestWithinCharBudget = <T>(
+    items: T[],
+    maxChars: number,
+    sizeOf: (item: T) => number,
+  ): T[] => {
+    let total = 0;
+    let start = items.length;
+    while (start > 0) {
+      const next = total + sizeOf(items[start - 1]!);
+      if (start < items.length && next > maxChars) break;
+      total = next;
+      start -= 1;
+    }
+    return start > 0 ? items.slice(start) : items;
+  };
+
+  // Envelopes are immutable once recorded; cache their serialized size so
+  // byte-budget trims do not re-stringify multi-MB events on every snapshot.
+  const envelopeSizeCache = new WeakMap<AgentChatEventEnvelope, number>();
+
+  const estimateEnvelopeChars = (envelope: AgentChatEventEnvelope): number => {
+    let size = envelopeSizeCache.get(envelope);
+    if (size == null) {
+      size = safeJsonChars(envelope);
+      envelopeSizeCache.set(envelope, size);
+    }
+    return size;
+  };
+
+  const trimEnvelopesToByteBudget = (
+    envelopes: AgentChatEventEnvelope[],
+    maxChars: number,
+  ): AgentChatEventEnvelope[] => keepNewestWithinCharBudget(envelopes, maxChars, estimateEnvelopeChars);
+
+  // The single policy for what bounds the in-memory event ring: an event-count
+  // cap, then a byte budget. Applied wherever the ring is (re)written.
+  const boundRingEnvelopes = (envelopes: AgentChatEventEnvelope[]): AgentChatEventEnvelope[] =>
+    trimEnvelopesToByteBudget(
+      envelopes.length > CHAT_EVENT_HISTORY_BUFFER_MAX_PER_SESSION
+        ? envelopes.slice(-CHAT_EVENT_HISTORY_BUFFER_MAX_PER_SESSION)
+        : envelopes,
+      CHAT_EVENT_HISTORY_BUFFER_MAX_CHARS,
+    );
   const transcriptHistoryCacheBySession = new Map<string, {
     transcriptPath: string;
     size: number;
@@ -5311,10 +5374,7 @@ export function createAgentChatService(args: {
   const recordChatEventInHistory = (envelope: AgentChatEventEnvelope): void => {
     const current = eventHistoryBySession.get(envelope.sessionId) ?? [];
     current.push(envelope);
-    if (current.length > CHAT_EVENT_HISTORY_BUFFER_MAX_PER_SESSION) {
-      current.splice(0, current.length - CHAT_EVENT_HISTORY_BUFFER_MAX_PER_SESSION);
-    }
-    eventHistoryBySession.set(envelope.sessionId, current);
+    eventHistoryBySession.set(envelope.sessionId, boundRingEnvelopes(current));
   };
 
   const rememberTranscriptHistoryCache = (
@@ -6790,18 +6850,26 @@ export function createAgentChatService(args: {
     if (merged.length > CHAT_EVENT_HISTORY_RESPONSE_MAX_PER_SESSION) {
       merged = merged.slice(-CHAT_EVENT_HISTORY_RESPONSE_MAX_PER_SESSION);
     }
-    eventHistoryBySession.set(trimmedId, merged.slice(-CHAT_EVENT_HISTORY_BUFFER_MAX_PER_SESSION));
+    eventHistoryBySession.set(trimmedId, boundRingEnvelopes(merged.slice()));
 
     const parentVisibleMerged = merged.filter((entry) => !isCodexSubagentTranscriptEnvelope(entry));
     const parentVisibleLength = parentVisibleMerged.length;
     const transcriptTruncated = transcriptHistory.truncated;
+    const countWindowed = parentVisibleLength > maxEvents
+      ? parentVisibleMerged.slice(-maxEvents)
+      : parentVisibleMerged;
+    // Backstop byte budget so the serialized snapshot always fits one RPC
+    // message. The ring and transcript-tail budgets keep snapshots well under
+    // it, so it only trims when a single envelope dwarfs both (>~6 MB); such
+    // trimmed events sit AFTER tailStartOffset and are not reachable through
+    // getChatEventHistoryPage (which pages strictly older) — an accepted
+    // seam, the alternative being a response the client must discard.
+    const windowed = trimEnvelopesToByteBudget(countWindowed, CHAT_EVENT_HISTORY_RESPONSE_MAX_CHARS);
     const windowTruncated =
       mergedLengthBeforeResponseCap > CHAT_EVENT_HISTORY_RESPONSE_MAX_PER_SESSION
-      || parentVisibleLength > maxEvents;
+      || parentVisibleLength > maxEvents
+      || windowed.length < countWindowed.length;
     const truncated = transcriptTruncated || windowTruncated;
-    const windowed = parentVisibleLength > maxEvents
-      ? parentVisibleMerged.slice(-maxEvents)
-      : parentVisibleMerged;
     return {
       sessionId: trimmedId,
       events: windowed,
@@ -25369,6 +25437,20 @@ export function createAgentChatService(args: {
     return options.limit !== undefined ? sliced.slice(0, options.limit) : sliced;
   }
 
+  // Subagent transcripts merge unbounded sources (full persisted transcript,
+  // live `thread/turns/list?itemsView=full` pulls) and individual messages can
+  // carry multi-MB tool outputs. Keep the newest messages that fit one RPC
+  // response (always at least one) so a single fetch can never exceed the
+  // desktop client's per-message limit. Note for `offset`/`limit` callers: the
+  // bound keeps the newest suffix of the requested window, so a response may
+  // start later than `offset` and a short page does not imply end-of-data.
+  const SUBAGENT_TRANSCRIPT_RESPONSE_MAX_CHARS = 4_000_000;
+
+  const boundSubagentTranscriptResponse = (
+    messages: AgentChatSubagentTranscriptMessage[],
+  ): AgentChatSubagentTranscriptMessage[] =>
+    keepNewestWithinCharBudget(messages, SUBAGENT_TRANSCRIPT_RESPONSE_MAX_CHARS, safeJsonChars);
+
   function mergeSubagentTranscriptMessages(
     left: AgentChatSubagentTranscriptMessage[],
     right: AgentChatSubagentTranscriptMessage[],
@@ -25452,8 +25534,11 @@ export function createAgentChatService(args: {
    * - **Cursor**: SDK `task` events tag every lifecycle envelope with the
    *   subagent's `agentId`; we filter the parent stream by that value.
    * - **Everything else (droid, lmstudio, …)**: `null`.
+   *
+   * Exposed via getSubagentTranscript, which byte-bounds whatever this
+   * returns — new runtime branches here need no size handling of their own.
    */
-  const getSubagentTranscript = async ({
+  const collectSubagentTranscript = async ({
     sessionId,
     agentId,
     laneId,
@@ -25638,6 +25723,16 @@ export function createAgentChatService(args: {
     });
   };
 
+  // The byte bound is a response-boundary invariant: enforce it once here so
+  // every runtime branch in collectSubagentTranscript stays bounded by
+  // construction rather than by remembering to wrap each return.
+  const getSubagentTranscript = async (
+    args: AgentChatSubagentTranscriptArgs,
+  ): Promise<AgentChatSubagentTranscriptMessage[] | null> => {
+    const messages = await collectSubagentTranscript(args);
+    return messages ? boundSubagentTranscriptResponse(messages) : messages;
+  };
+
   const normalizeClaudeContextUsage = (
     usage: SDKControlGetContextUsageResponse,
   ): AgentChatContextUsage => {
diff --git a/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.test.ts b/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.test.ts
index 007ff3577..55422dcb2 100644
--- a/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.test.ts
+++ b/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.test.ts
@@ -166,6 +166,76 @@ describe("RuntimeRpcClient", () => {
     expect(transport.writes).toEqual([]);
   });
 
+  it("rejects only the request answered by an oversized response and keeps the connection alive", async () => {
+    const transport = new MockTransport();
+    const client = new RuntimeRpcClient(transport);
+
+    const oversized = client.call("chat.getChatEventHistory", {});
+    const unrelated = client.call("projects.list", {});
+    const oversizedId = requestId(transport.writes[0]!);
+    const unrelatedId = requestId(transport.writes[1]!);
+
+    // Stream a single >16 MiB response line in chunks, as a socket would.
+    const head = `{"jsonrpc":"2.0","id":${oversizedId},"result":{"events":["`;
+    transport.emitData(head);
+    const filler = "x".repeat(1024 * 1024);
+    for (let i = 0; i < 17; i++) transport.emitData(filler);
+    transport.emitData(`"]}}\n`);
+
+    await expect(oversized).rejects.toThrow(
+      /response for method chat\.getChatEventHistory exceeded 16 MiB .* and was discarded/,
+    );
+    expect(client.isClosed()).toBe(false);
+
+    // The unrelated in-flight call and brand-new calls still complete.
+    transport.emitData({ jsonrpc: "2.0", id: unrelatedId, result: ["project"] });
+    await expect(unrelated).resolves.toEqual(["project"]);
+    const after = client.call("projects.list", {});
+    transport.emitData({ jsonrpc: "2.0", id: requestId(transport.writes[2]!), result: ["still-alive"] });
+    await expect(after).resolves.toEqual(["still-alive"]);
+  });
+
+  it("parses lines that arrive in the same chunk after an oversized line ends", async () => {
+    const transport = new MockTransport();
+    const client = new RuntimeRpcClient(transport);
+
+    const oversized = client.call("chat.getChatEventHistory", {});
+    const follower = client.call("projects.list", {});
+    const oversizedId = requestId(transport.writes[0]!);
+    const followerId = requestId(transport.writes[1]!);
+
+    transport.emitData(`{"jsonrpc":"2.0","id":${oversizedId},"result":"`);
+    transport.emitData("y".repeat(17 * 1024 * 1024));
+    // The oversized line terminator and the follower response share a chunk.
+    transport.emitData(`"}\n{"jsonrpc":"2.0","id":${followerId},"result":"ok"}\n`);
+
+    await expect(oversized).rejects.toThrow(/exceeded 16 MiB/);
+    await expect(follower).resolves.toBe("ok");
+    expect(client.isClosed()).toBe(false);
+  });
+
+  it("discards an oversized notification without rejecting pending calls", async () => {
+    const transport = new MockTransport();
+    const client = new RuntimeRpcClient(transport);
+    const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
+    try {
+      const pending = client.call("projects.list", {});
+      const pendingId = requestId(transport.writes[0]!);
+
+      // A notification whose params embed an "id" field matching the pending
+      // call must not be mistaken for that call's response.
+      transport.emitData(`{"jsonrpc":"2.0","method":"runtime/event","params":{"id":${pendingId},"blob":"`);
+      transport.emitData("z".repeat(17 * 1024 * 1024));
+      transport.emitData(`"}}\n`);
+
+      expect(client.isClosed()).toBe(false);
+      transport.emitData({ jsonrpc: "2.0", id: pendingId, result: ["project"] });
+      await expect(pending).resolves.toEqual(["project"]);
+    } finally {
+      warn.mockRestore();
+    }
+  });
+
   it("dispatches JSON-RPC notifications without resolving pending calls", async () => {
     const transport = new MockTransport();
     const client = new RuntimeRpcClient(transport);
diff --git a/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts b/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts
index 56ef432b0..723aac0d8 100644
--- a/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts
+++ b/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts
@@ -13,6 +13,10 @@ type PendingRequest = {
 };
 
 const MAX_RPC_BUFFER_CHARS = 16 * 1024 * 1024;
+// Head of an oversized line retained to identify which request it answers.
+// The JSON-RPC envelope id appears within the first few dozen chars of a
+// response; 4 KiB is generous while keeping discarded-line memory bounded.
+const OVERSIZED_LINE_PREFIX_CHARS = 4096;
 const MAX_RPC_TIMEOUT_MS = 2_147_483_647;
 
 type RuntimeRpcCallOptions = {
@@ -35,9 +39,15 @@ function normalizeRuntimeRpcTimeoutMs(value: number): number {
   return Math.ceil(timeoutMs);
 }
 
+type OversizedLineState = {
+  prefix: string;
+  discardedChars: number;
+};
+
 export class RuntimeRpcClient {
   private nextId = 1;
   private buffer = "";
+  private oversizedLine: OversizedLineState | null = null;
   private readonly pending = new Map<number, PendingRequest>();
   private readonly notificationHandlers = new Map<string, Set<(params: unknown) => void>>();
   private readonly disconnectCallbacks = new Set<(error: Error) => void>();
@@ -144,11 +154,21 @@ export class RuntimeRpcClient {
 
   private onData(chunk: string): void {
     if (this.closedError) return;
-    this.buffer += chunk;
-    if (this.buffer.length > MAX_RPC_BUFFER_CHARS) {
-      this.failConnection(new Error("Remote ADE service response buffer exceeded 16 MiB."));
-      return;
+    if (this.oversizedLine) {
+      // An oversized line is being discarded as it streams. Drop chunks until
+      // its terminating newline, then resume normal parsing on the remainder.
+      const newline = chunk.indexOf("\n");
+      if (newline < 0) {
+        this.oversizedLine.discardedChars += chunk.length;
+        return;
+      }
+      const oversized = this.oversizedLine;
+      this.oversizedLine = null;
+      oversized.discardedChars += newline;
+      this.rejectOversizedLine(oversized);
+      chunk = chunk.slice(newline + 1);
     }
+    this.buffer += chunk;
     while (true) {
       const newline = this.buffer.indexOf("\n");
       if (newline < 0) break;
@@ -156,7 +176,50 @@ export class RuntimeRpcClient {
       this.buffer = this.buffer.slice(newline + 1);
       if (!line) continue;
       this.handleLine(line);
+      if (this.closedError) return;
     }
+    // The leftover is one partial line. If it has outgrown the buffer cap,
+    // reject only the request it answers and discard the rest of the line as
+    // it streams — a single oversized response must not take down the shared
+    // connection (every project pane multiplexes over it). Note the cap
+    // bounds PARTIAL-LINE ACCUMULATION, not message size: a complete line
+    // delivered within one chunk is parsed regardless of length. Real
+    // transports chunk at ~64 KB, so any >16 MiB line lands here in practice.
+    if (this.buffer.length > MAX_RPC_BUFFER_CHARS) {
+      this.oversizedLine = {
+        prefix: this.buffer.slice(0, OVERSIZED_LINE_PREFIX_CHARS),
+        discardedChars: this.buffer.length,
+      };
+      this.buffer = "";
+    }
+  }
+
+  /**
+   * Fail the single request answered by a discarded oversized line. The
+   * envelope id is recovered from the retained line head; a `"method"` key
+   * appearing before `"id"` marks the line as a notification (its params may
+   * embed unrelated `"id"` fields), in which case nothing is rejected.
+   */
+  private rejectOversizedLine(oversized: OversizedLineState): void {
+    const idMatch = /"id"\s*:\s*(\d+)/.exec(oversized.prefix);
+    const methodMatch = /"method"\s*:/.exec(oversized.prefix);
+    const responseId = idMatch && (!methodMatch || idMatch.index < methodMatch.index)
+      ? Number(idMatch[1])
+      : null;
+    const approxMiB = (oversized.discardedChars / (1024 * 1024)).toFixed(1);
+    const pending = responseId != null ? this.pending.get(responseId) : undefined;
+    if (!pending || responseId == null) {
+      console.warn("Remote ADE service sent an oversized message; discarded", {
+        approxMiB,
+        hasResponseId: responseId != null,
+      });
+      return;
+    }
+    this.pending.delete(responseId);
+    clearTimeout(pending.timer);
+    pending.reject(new Error(
+      `Remote ADE service response for method ${pending.method} exceeded 16 MiB (~${approxMiB} MiB) and was discarded.`,
+    ));
   }
 
   private handleLine(line: string): void {
diff --git a/apps/ios/ADE/Services/SyncService.swift b/apps/ios/ADE/Services/SyncService.swift
index a7e90e383..0136abb84 100644
--- a/apps/ios/ADE/Services/SyncService.swift
+++ b/apps/ios/ADE/Services/SyncService.swift
@@ -4798,45 +4798,45 @@ final class SyncService: ObservableObject {
     return response.entries
   }
 
-  /// One page of a paginated chat transcript walk. `nextCursor` is an opaque
-  /// host token identifying the position just before the oldest entry
-  /// returned; pass it back via `fetchChatTranscriptPage(cursor:)` to load
-  /// the previous (older) page. `nil` means the start of the transcript was
-  /// reached.
+  /// One page of a paginated chat transcript walk. `nextCursor` is the
+  /// host-side index of the oldest entry returned (the transcript is
+  /// append-only, so indices are stable); pass it back via
+  /// `fetchChatTranscriptPage(cursor:)` to load the previous (older) page.
+  /// `nil` means the start of the transcript was reached.
   struct AgentChatTranscriptPage: Equatable {
     var sessionId: String
     var entries: [AgentChatTranscriptEntry]
     var truncated: Bool
     var totalEntries: Int
-    var nextCursor: String?
+    var nextCursor: Int?
   }
 
   /// Fetch a transcript page. Without `cursor` this returns the newest
   /// entries (same data as `fetchChatTranscriptResponse`) plus a cursor for
   /// walking backwards; with `cursor` it returns the page strictly BEFORE
-  /// that point. Older hosts that predate pagination simply omit
+  /// that index. Older hosts that predate pagination simply omit
   /// `nextCursor`, which surfaces here as `nil` (no more pages).
   func fetchChatTranscriptPage(
     sessionId: String,
-    cursor: String? = nil,
+    cursor: Int? = nil,
     limit: Int = 200,
     maxChars: Int = 600_000
   ) async throws -> AgentChatTranscriptPage {
     var args: [String: Any] = ["sessionId": sessionId, "limit": limit, "maxChars": maxChars]
-    if let cursor, !cursor.isEmpty {
-      args["cursor"] = cursor
+    if let cursor, cursor > 0 {
+      args["cursor"] = String(cursor)
     }
     let response = try await sendCommand(action: "chat.getTranscript", args: args)
     if let payload = response as? [String: Any], payload["queued"] as? Bool == true {
       throw QueuedRemoteCommandError(action: "chat.getTranscript")
     }
     let transcript = try decode(response, as: AgentChatTranscriptResponse.self)
-    var nextCursor: String?
+    var nextCursor: Int?
     if let dict = response as? [String: Any], let rawCursor = dict["nextCursor"] {
-      if let text = rawCursor as? String, !text.isEmpty {
-        nextCursor = text
+      if let text = rawCursor as? String {
+        nextCursor = Int(text)
       } else if let number = rawCursor as? NSNumber, !(rawCursor is Bool) {
-        nextCursor = number.stringValue
+        nextCursor = number.intValue
       }
     }
     return AgentChatTranscriptPage(
diff --git a/apps/ios/ADE/Views/Work/WorkChatSessionView+Actions.swift b/apps/ios/ADE/Views/Work/WorkChatSessionView+Actions.swift
index 24c5702ee..bdf6c23a0 100644
--- a/apps/ios/ADE/Views/Work/WorkChatSessionView+Actions.swift
+++ b/apps/ios/ADE/Views/Work/WorkChatSessionView+Actions.swift
@@ -70,6 +70,14 @@ extension WorkChatSessionView {
       visibleTimelineCount += workTimelinePageSize
       refreshTimelinePresentation()
     }
+    // Once the locally-buffered timeline is nearly exhausted, pull the next
+    // older transcript page from the host so scroll-back continues through
+    // the full history instead of stopping at the initial tail fetch.
+    if hasOlderTranscriptHistory,
+       hiddenTimelineCount <= workTimelinePageSize * 2,
+       let onLoadOlderTranscript {
+      Task { await onLoadOlderTranscript() }
+    }
   }
 
   @MainActor
diff --git a/apps/ios/ADE/Views/Work/WorkChatSessionView.swift b/apps/ios/ADE/Views/Work/WorkChatSessionView.swift
index dadd2ec03..1e0984c79 100644
--- a/apps/ios/ADE/Views/Work/WorkChatSessionView.swift
+++ b/apps/ios/ADE/Views/Work/WorkChatSessionView.swift
@@ -25,6 +25,7 @@ struct WorkChatSessionView: View {
   @State var actionInFlight = false
   @State var isNearBottom = true
   @State var unreadBelowCount = 0
+  @State var lastTimelineTailId: String?
   @State var artifactDrawerPresented = false
   @State var timelineSnapshot = WorkChatTimelineSnapshot.empty
   @State var timelinePresentation = WorkTimelinePresentation.empty
@@ -57,6 +58,10 @@ struct WorkChatSessionView: View {
   let onSelectEffort: @MainActor (String) async -> Void
 
   var lanes: [LaneSummary] = []
+  // Host-side scroll-back: true while older transcript pages remain on the
+  // host beyond what the phone has fetched; the callback pulls the next page.
+  var hasOlderTranscriptHistory: Bool = false
+  var onLoadOlderTranscript: (@MainActor () async -> Void)? = nil
 
   @State var steerEditDrafts: [String: String] = [:]
   @State var modelPickerPresented = false
@@ -257,14 +262,15 @@ struct WorkChatSessionView: View {
         message: isLive ? "Send a message to start streaming the transcript." : "Reconnect to load the latest chat history from the machine."
       )
     } else {
-      if hiddenTimelineCount > 0 {
+      if hiddenTimelineCount > 0 || hasOlderTranscriptHistory {
+        let nextPageCount = min(hiddenTimelineCount, workTimelinePageSize)
+        let loadEarlierTitle = nextPageCount > 0
+          ? "Load \(nextPageCount) earlier message\(nextPageCount == 1 ? "" : "s")"
+          : "Load earlier messages"
         Button {
           loadEarlierTimelineEntries()
         } label: {
-          Label(
-            "Load \(min(hiddenTimelineCount, workTimelinePageSize)) earlier message\(min(hiddenTimelineCount, workTimelinePageSize) == 1 ? "" : "s")",
-            systemImage: "chevron.up.circle"
-          )
+          Label(loadEarlierTitle, systemImage: "chevron.up.circle")
           .font(.footnote.weight(.semibold))
           .foregroundStyle(ADEColor.accent)
           .frame(maxWidth: .infinity)
@@ -454,8 +460,14 @@ struct WorkChatSessionView: View {
         }
       }
       .onChange(of: timeline.count) { oldCount, newCount in
+        let previousTailId = lastTimelineTailId
+        lastTimelineTailId = timeline.last?.id
         let delta = newCount - oldCount
         guard delta > 0 else { return }
+        // Older-page prepends grow the timeline above the viewport — the
+        // newest entry stays put. Don't autoscroll to the bottom or flag
+        // the prepended entries as "new messages below".
+        if let previousTailId, previousTailId == timeline.last?.id { return }
         if isNearBottom {
           scrollToLatest(proxy, animated: false)
         } else {
diff --git a/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift b/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift
index 9136d1345..c1ca54338 100644
--- a/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift
+++ b/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift
@@ -123,6 +123,13 @@ struct WorkSessionDestinationView: View {
   @State var chatSummary: AgentChatSessionSummary?
   @State var transcript: [WorkChatEnvelope] = []
   @State var fallbackEntries: [AgentChatTranscriptEntry] = []
+  // Canonical transcript entries keyed by their host-side index. Tail
+  // refreshes overwrite the newest indices while "load earlier" pages fill
+  // older ones, so a poll can never clobber scroll-back history. The cursor
+  // is the oldest fetched index (0 = transcript head reached).
+  @State var transcriptEntriesByIndex: [Int: AgentChatTranscriptEntry] = [:]
+  @State var olderTranscriptCursor: Int?
+  @State var olderTranscriptLoading = false
   @State var artifacts: [ComputerUseArtifactSummary] = []
   @State var localEchoMessages: [WorkLocalEchoMessage] = []
   @State var optimisticPendingSteers: [WorkPendingSteerModel] = []
@@ -351,7 +358,9 @@ struct WorkSessionDestinationView: View {
           onSelectModel: selectModel,
           onSelectRuntimeMode: selectRuntimeMode,
           onSelectEffort: selectReasoningEffort,
-          lanes: lanes
+          lanes: lanes,
+          hasOlderTranscriptHistory: hasOlderTranscriptHistory,
+          onLoadOlderTranscript: loadOlderTranscriptEntries
         )
       } else {
         TerminalSessionScreen(session: session)
@@ -454,10 +463,11 @@ struct WorkSessionDestinationView: View {
       || (liveTranscript.isEmpty && transcript.isEmpty)
       || (!liveTranscript.isEmpty && status != "active")
     let fallbackMaxChars = status == "active" ? 32_000 : 120_000
-    if shouldFetchFallback, let response = try? await syncService.fetchChatTranscriptResponse(sessionId: sessionId, maxChars: fallbackMaxChars) {
-      fetchedFallbackEntries = response.entries
+    if shouldFetchFallback, let page = try? await syncService.fetchChatTranscriptPage(sessionId: sessionId, maxChars: fallbackMaxChars) {
+      recordTranscriptPage(page, before: nil)
+      fetchedFallbackEntries = combinedTranscriptEntries()
       fetchedFallbackEntriesAvailable = true
-      fallbackTranscript = makeWorkChatTranscript(from: response.entries, sessionId: sessionId)
+      fallbackTranscript = makeWorkChatTranscript(from: fetchedFallbackEntries, sessionId: sessionId)
     }
 
     // Chat-only fallback: parses chat envelopes out of the raw terminal buffer.
@@ -510,6 +520,63 @@ struct WorkSessionDestinationView: View {
     }
   }
 
+  /// Fold one host transcript page into the index-keyed store. `cursor` is
+  /// the `before` index the page was requested with (nil for a tail fetch).
+  /// Host indices are stable because the transcript is append-only.
+  @MainActor
+  func recordTranscriptPage(_ page: SyncService.AgentChatTranscriptPage, before cursor: Int?) {
+    let end = min(cursor ?? page.totalEntries, page.totalEntries)
+    let start = max(0, end - page.entries.count)
+    let pageCursor = page.nextCursor ?? 0
+    if cursor == nil {
+      // Tail refresh. If the new window starts past everything stored (a
+      // burst of entries landed between polls), stitching would render a
+      // transcript with a silent hole — reset to the fresh tail instead and
+      // re-anchor scroll-back below it.
+      let nextContiguousIndex = transcriptEntriesByIndex.keys.max().map { $0 + 1 } ?? 0
+      if start > nextContiguousIndex, !page.entries.isEmpty {
+        transcriptEntriesByIndex = [:]
+        olderTranscriptCursor = pageCursor
+      } else if olderTranscriptCursor == nil {
+        // First fetch establishes the scroll-back anchor; later contiguous
+        // polls must not move it forward past pages the user already loaded.
+        olderTranscriptCursor = pageCursor
+      }
+    } else {
+      olderTranscriptCursor = min(olderTranscriptCursor ?? pageCursor, pageCursor)
+    }
+    for (offset, entry) in page.entries.enumerated() {
+      transcriptEntriesByIndex[start + offset] = entry
+    }
+  }
+
+  @MainActor
+  func combinedTranscriptEntries() -> [AgentChatTranscriptEntry] {
+    transcriptEntriesByIndex.keys.sorted().compactMap { transcriptEntriesByIndex[$0] }
+  }
+
+  var hasOlderTranscriptHistory: Bool {
+    (olderTranscriptCursor ?? 0) > 0
+  }
+
+  /// Fetch the next strictly-older transcript page from the host and prepend
+  /// it to the fallback entries that feed the chat timeline.
+  @MainActor
+  func loadOlderTranscriptEntries() async {
+    guard !olderTranscriptLoading, let cursor = olderTranscriptCursor, cursor > 0 else { return }
+    olderTranscriptLoading = true
+    defer { olderTranscriptLoading = false }
+    guard let page = try? await syncService.fetchChatTranscriptPage(
+      sessionId: sessionId,
+      cursor: cursor
+    ) else { return }
+    recordTranscriptPage(page, before: cursor)
+    let combined = combinedTranscriptEntries()
+    if !combined.isEmpty, combined != fallbackEntries {
+      fallbackEntries = combined
+    }
+  }
+
   @MainActor
   func refreshChatStateAfterAction(forceRemote: Bool = true) async {
     let preferLightweight = syncService.prefersReducedSyncLoad

From 344f338b18b516b0f89f54e39022563ca9b82a00 Mon Sep 17 00:00:00 2001
From: Arul Sharma <31745423+arul28@users.noreply.github.com>
Date: Thu, 11 Jun 2026 12:20:54 -0400
Subject: [PATCH 2/2] =?UTF-8?q?ship:=20iteration=201=20=E2=80=94=20address?=
 =?UTF-8?q?=20Greptile=20review=20(P1=20render=20older=20pages=20immediate?=
 =?UTF-8?q?ly,=20P2=20document=20id=20heuristic)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

P1: loadOlderTranscriptEntries now splices the fetched page into the
rendered transcript via preferredWorkTranscript instead of parking it in
fallbackEntries (which buildWorkTimeline only reads while transcript is
empty) — "Load earlier messages" shows the page immediately instead of
after the next poll.

P2: document the response-id extraction assumption (daemon serializes
the envelope id before result) and its benign failure mode.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 .../main/services/remoteRuntime/runtimeRpcClient.ts   |  7 +++++++
 .../ADE/Views/Work/WorkSessionDestinationView.swift   | 11 +++++++++++
 2 files changed, 18 insertions(+)

diff --git a/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts b/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts
index 723aac0d8..3de7cddf9 100644
--- a/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts
+++ b/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts
@@ -199,6 +199,13 @@ export class RuntimeRpcClient {
    * envelope id is recovered from the retained line head; a `"method"` key
    * appearing before `"id"` marks the line as a notification (its params may
    * embed unrelated `"id"` fields), in which case nothing is rejected.
+   *
+   * Heuristic assumption: the daemon serializes response envelopes with the
+   * top-level `id` before `result` (jsonrpc.ts writeMessage key order), so
+   * the first `"id"` in a response line is the envelope id, never one nested
+   * inside the result. If a misfire ever happens anyway, the failure mode is
+   * benign: no pending entry matches, the line is warn-logged and dropped,
+   * and the real caller times out instead of receiving a wrong rejection.
    */
   private rejectOversizedLine(oversized: OversizedLineState): void {
     const idMatch = /"id"\s*:\s*(\d+)/.exec(oversized.prefix);
diff --git a/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift b/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift
index c1ca54338..cbaab5736 100644
--- a/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift
+++ b/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift
@@ -575,6 +575,17 @@ struct WorkSessionDestinationView: View {
     if !combined.isEmpty, combined != fallbackEntries {
       fallbackEntries = combined
     }
+    // fallbackEntries only feed the timeline while `transcript` is empty
+    // (buildWorkTimeline), so splice the older entries into the rendered
+    // transcript right away — otherwise the fetched page stays invisible
+    // until the next loadTranscript poll. preferredWorkTranscript backfills
+    // by role+turnId+text identity, so entries already rendered from live
+    // events are not duplicated.
+    let olderTranscript = makeWorkChatTranscript(from: combined, sessionId: sessionId)
+    let merged = preferredWorkTranscript(current: [], fallback: olderTranscript, eventTranscript: transcript)
+    if !merged.isEmpty, merged != transcript {
+      transcript = merged
+    }
   }
 
   @MainActor