diff --git a/apps/desktop/src/main/services/chat/agentChatService.test.ts b/apps/desktop/src/main/services/chat/agentChatService.test.ts
index a94e9f064..92f591df3 100644
--- a/apps/desktop/src/main/services/chat/agentChatService.test.ts
+++ b/apps/desktop/src/main/services/chat/agentChatService.test.ts
@@ -11390,6 +11390,64 @@ describe("createAgentChatService", () => {
       )).toEqual(["event-2", "event-3", "event-4"]);
     });
 
+    it("byte-caps a snapshot whose merged events exceed the response budget", async () => {
+      // Regression: individual chat events can carry multi-MB tool outputs.
+      // Event-count caps alone let a snapshot serialize past the desktop RPC
+      // client's 16 MiB per-message limit, which used to fail every in-flight
+      // call on the shared runtime socket.
+      const { service } = createService();
+      const session = await service.createSession({
+        laneId: "lane-1",
+        provider: "codex",
+        model: "gpt-5.4",
+      });
+
+      const envelopes: AgentChatEventEnvelope[] = Array.from({ length: 4 }, (_, index) => ({
+        sessionId: session.id,
+        timestamp: `2026-04-23T10:0${index}:00.000Z`,
+        event: { type: "text", text: `event-${index}-${"x".repeat(3_000_000)}` },
+        sequence: index + 1,
+      }));
+      const transcriptFile = path.join(tmpRoot, "transcripts", `${session.id}.chat.jsonl`);
+      fs.writeFileSync(transcriptFile, "ignored\n", "utf8");
+      vi.mocked(parseAgentChatTranscript).mockReturnValue(envelopes);
+
+      const history = service.getChatEventHistory(session.id);
+
+      // 4 × ~3 MB events exceed the 8 MB response budget: only the newest
+      // events that fit are returned, and the trim is reported as window
+      // truncation so clients know to page for the rest.
+      expect(history.events.length).toBeLessThan(envelopes.length);
+      expect(history.events.length).toBeGreaterThan(0);
+      expect(history.windowTruncated).toBe(true);
+      expect(history.truncated).toBe(true);
+      expect(JSON.stringify(history.events).length).toBeLessThanOrEqual(8_000_000);
+      const lastEvent = history.events.at(-1)?.event;
+      expect(lastEvent?.type === "text" ? lastEvent.text.startsWith("event-3-") : false).toBe(true);
+    });
+
+    it("always returns at least the newest event even when it alone exceeds the byte budget", async () => {
+      const { service } = createService();
+      const session = await service.createSession({
+        laneId: "lane-1",
+        provider: "codex",
+        model: "gpt-5.4",
+      });
+
+      const giant: AgentChatEventEnvelope = {
+        sessionId: session.id,
+        timestamp: "2026-04-23T10:00:00.000Z",
+        event: { type: "text", text: "giant-".concat("y".repeat(9_000_000)) },
+        sequence: 1,
+      };
+      const transcriptFile = path.join(tmpRoot, "transcripts", `${session.id}.chat.jsonl`);
+      fs.writeFileSync(transcriptFile, "ignored\n", "utf8");
+      vi.mocked(parseAgentChatTranscript).mockReturnValue([giant]);
+
+      const history = service.getChatEventHistory(session.id);
+      expect(history.events).toHaveLength(1);
+    });
+
     it("marks window truncation when the service response cap removes events", async () => {
       const { service } = createService();
       const session = await service.createSession({
diff --git a/apps/desktop/src/main/services/chat/agentChatService.ts b/apps/desktop/src/main/services/chat/agentChatService.ts
index b784aeac9..fa3b33339 100644
--- a/apps/desktop/src/main/services/chat/agentChatService.ts
+++ b/apps/desktop/src/main/services/chat/agentChatService.ts
@@ -5297,7 +5297,70 @@ export function createAgentChatService(args: {
   const CHAT_EVENT_HISTORY_RESPONSE_MAX_PER_SESSION = 20_000;
   const CHAT_EVENT_HISTORY_TRANSCRIPT_MAX_BYTES = 2_000_000;
   const CHAT_EVENT_HISTORY_TRANSCRIPT_CACHE_MAX_SESSIONS = 32;
+  // Byte budgets alongside the event-count caps above. Individual events are
+  // unbounded (multi-MB tool outputs exist in real transcripts), so count caps
+  // alone cannot keep a history snapshot under the desktop RPC client's
+  // 16 MiB per-message limit; one over-limit response used to fail every
+  // in-flight call on the shared runtime socket. The ring budget is the
+  // working bound (ring 4 MB + 2 MB transcript tail ≈ 6 MB merged); the
+  // response budget is a backstop that should not trigger in practice.
+  const CHAT_EVENT_HISTORY_BUFFER_MAX_CHARS = 4_000_000;
+  const CHAT_EVENT_HISTORY_RESPONSE_MAX_CHARS = 8_000_000;
   const eventHistoryBySession = new Map<string, AgentChatEventEnvelope[]>();
+
+  const safeJsonChars = (value: unknown): number => {
+    try {
+      return JSON.stringify(value).length;
+    } catch {
+      return 2_048;
+    }
+  };
+
+  // Keep the newest items whose cumulative serialized size fits the budget
+  // (always at least the newest one, even when it alone exceeds it).
+  const keepNewestWithinCharBudget = <T>(
+    items: T[],
+    maxChars: number,
+    sizeOf: (item: T) => number,
+  ): T[] => {
+    let total = 0;
+    let start = items.length;
+    while (start > 0) {
+      const next = total + sizeOf(items[start - 1]!);
+      if (start < items.length && next > maxChars) break;
+      total = next;
+      start -= 1;
+    }
+    return start > 0 ? items.slice(start) : items;
+  };
+
+  // Envelopes are immutable once recorded; cache their serialized size so
+  // byte-budget trims do not re-stringify multi-MB events on every snapshot.
+  const envelopeSizeCache = new WeakMap<AgentChatEventEnvelope, number>();
+
+  const estimateEnvelopeChars = (envelope: AgentChatEventEnvelope): number => {
+    let size = envelopeSizeCache.get(envelope);
+    if (size == null) {
+      size = safeJsonChars(envelope);
+      envelopeSizeCache.set(envelope, size);
+    }
+    return size;
+  };
+
+  const trimEnvelopesToByteBudget = (
+    envelopes: AgentChatEventEnvelope[],
+    maxChars: number,
+  ): AgentChatEventEnvelope[] => keepNewestWithinCharBudget(envelopes, maxChars, estimateEnvelopeChars);
+
+  // The single policy for what bounds the in-memory event ring: an event-count
+  // cap, then a byte budget. Applied wherever the ring is (re)written.
+  const boundRingEnvelopes = (envelopes: AgentChatEventEnvelope[]): AgentChatEventEnvelope[] =>
+    trimEnvelopesToByteBudget(
+      envelopes.length > CHAT_EVENT_HISTORY_BUFFER_MAX_PER_SESSION
+        ? envelopes.slice(-CHAT_EVENT_HISTORY_BUFFER_MAX_PER_SESSION)
+        : envelopes,
+      CHAT_EVENT_HISTORY_BUFFER_MAX_CHARS,
+    );
   const transcriptHistoryCacheBySession = new Map<string, {
     transcriptPath: string;
     size: number;
@@ -5311,10 +5374,7 @@ export function createAgentChatService(args: {
   const recordChatEventInHistory = (envelope: AgentChatEventEnvelope): void => {
     const current = eventHistoryBySession.get(envelope.sessionId) ?? [];
     current.push(envelope);
-    if (current.length > CHAT_EVENT_HISTORY_BUFFER_MAX_PER_SESSION) {
-      current.splice(0, current.length - CHAT_EVENT_HISTORY_BUFFER_MAX_PER_SESSION);
-    }
-    eventHistoryBySession.set(envelope.sessionId, current);
+    eventHistoryBySession.set(envelope.sessionId, boundRingEnvelopes(current));
   };
 
   const rememberTranscriptHistoryCache = (
@@ -6790,18 +6850,26 @@ export function createAgentChatService(args: {
     if (merged.length > CHAT_EVENT_HISTORY_RESPONSE_MAX_PER_SESSION) {
       merged = merged.slice(-CHAT_EVENT_HISTORY_RESPONSE_MAX_PER_SESSION);
     }
-    eventHistoryBySession.set(trimmedId, merged.slice(-CHAT_EVENT_HISTORY_BUFFER_MAX_PER_SESSION));
+    eventHistoryBySession.set(trimmedId, boundRingEnvelopes(merged.slice()));
 
     const parentVisibleMerged = merged.filter((entry) => !isCodexSubagentTranscriptEnvelope(entry));
     const parentVisibleLength = parentVisibleMerged.length;
     const transcriptTruncated = transcriptHistory.truncated;
+    const countWindowed = parentVisibleLength > maxEvents
+      ? parentVisibleMerged.slice(-maxEvents)
+      : parentVisibleMerged;
+    // Backstop byte budget so the serialized snapshot always fits one RPC
+    // message. The ring and transcript-tail budgets keep snapshots well under
+    // it, so it only trims when a single envelope dwarfs both (>~6 MB); such
+    // trimmed events sit AFTER tailStartOffset and are not reachable through
+    // getChatEventHistoryPage (which pages strictly older) — an accepted
+    // seam, the alternative being a response the client must discard.
+    const windowed = trimEnvelopesToByteBudget(countWindowed, CHAT_EVENT_HISTORY_RESPONSE_MAX_CHARS);
     const windowTruncated =
       mergedLengthBeforeResponseCap > CHAT_EVENT_HISTORY_RESPONSE_MAX_PER_SESSION
-      || parentVisibleLength > maxEvents;
+      || parentVisibleLength > maxEvents
+      || windowed.length < countWindowed.length;
     const truncated = transcriptTruncated || windowTruncated;
-    const windowed = parentVisibleLength > maxEvents
-      ? parentVisibleMerged.slice(-maxEvents)
-      : parentVisibleMerged;
     return {
       sessionId: trimmedId,
       events: windowed,
@@ -25369,6 +25437,20 @@ export function createAgentChatService(args: {
     return options.limit !== undefined ? sliced.slice(0, options.limit) : sliced;
   }
 
+  // Subagent transcripts merge unbounded sources (full persisted transcript,
+  // live `thread/turns/list?itemsView=full` pulls) and individual messages can
+  // carry multi-MB tool outputs. Keep the newest messages that fit one RPC
+  // response (always at least one) so a single fetch can never exceed the
+  // desktop client's per-message limit. Note for `offset`/`limit` callers: the
+  // bound keeps the newest suffix of the requested window, so a response may
+  // start later than `offset` and a short page does not imply end-of-data.
+  const SUBAGENT_TRANSCRIPT_RESPONSE_MAX_CHARS = 4_000_000;
+
+  const boundSubagentTranscriptResponse = (
+    messages: AgentChatSubagentTranscriptMessage[],
+  ): AgentChatSubagentTranscriptMessage[] =>
+    keepNewestWithinCharBudget(messages, SUBAGENT_TRANSCRIPT_RESPONSE_MAX_CHARS, safeJsonChars);
+
   function mergeSubagentTranscriptMessages(
     left: AgentChatSubagentTranscriptMessage[],
     right: AgentChatSubagentTranscriptMessage[],
@@ -25452,8 +25534,11 @@ export function createAgentChatService(args: {
    * - **Cursor**: SDK `task` events tag every lifecycle envelope with the
    *   subagent's `agentId`; we filter the parent stream by that value.
    * - **Everything else (droid, lmstudio, …)**: `null`.
+   *
+   * Exposed via getSubagentTranscript, which byte-bounds whatever this
+   * returns — new runtime branches here need no size handling of their own.
    */
-  const getSubagentTranscript = async ({
+  const collectSubagentTranscript = async ({
     sessionId,
     agentId,
     laneId,
@@ -25638,6 +25723,16 @@ export function createAgentChatService(args: {
     });
   };
 
+  // The byte bound is a response-boundary invariant: enforce it once here so
+  // every runtime branch in collectSubagentTranscript stays bounded by
+  // construction rather than by remembering to wrap each return.
+  const getSubagentTranscript = async (
+    args: AgentChatSubagentTranscriptArgs,
+  ): Promise<AgentChatSubagentTranscriptMessage[] | null> => {
+    const messages = await collectSubagentTranscript(args);
+    return messages ? boundSubagentTranscriptResponse(messages) : messages;
+  };
+
   const normalizeClaudeContextUsage = (
     usage: SDKControlGetContextUsageResponse,
   ): AgentChatContextUsage => {
diff --git a/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.test.ts b/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.test.ts
index 007ff3577..55422dcb2 100644
--- a/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.test.ts
+++ b/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.test.ts
@@ -166,6 +166,76 @@ describe("RuntimeRpcClient", () => {
     expect(transport.writes).toEqual([]);
   });
 
+  it("rejects only the request answered by an oversized response and keeps the connection alive", async () => {
+    const transport = new MockTransport();
+    const client = new RuntimeRpcClient(transport);
+
+    const oversized = client.call("chat.getChatEventHistory", {});
+    const unrelated = client.call("projects.list", {});
+    const oversizedId = requestId(transport.writes[0]!);
+    const unrelatedId = requestId(transport.writes[1]!);
+
+    // Stream a single >16 MiB response line in chunks, as a socket would.
+    const head = `{"jsonrpc":"2.0","id":${oversizedId},"result":{"events":["`;
+    transport.emitData(head);
+    const filler = "x".repeat(1024 * 1024);
+    for (let i = 0; i < 17; i++) transport.emitData(filler);
+    transport.emitData(`"]}}\n`);
+
+    await expect(oversized).rejects.toThrow(
+      /response for method chat\.getChatEventHistory exceeded 16 MiB .* and was discarded/,
+    );
+    expect(client.isClosed()).toBe(false);
+
+    // The unrelated in-flight call and brand-new calls still complete.
+    transport.emitData({ jsonrpc: "2.0", id: unrelatedId, result: ["project"] });
+    await expect(unrelated).resolves.toEqual(["project"]);
+    const after = client.call("projects.list", {});
+    transport.emitData({ jsonrpc: "2.0", id: requestId(transport.writes[2]!), result: ["still-alive"] });
+    await expect(after).resolves.toEqual(["still-alive"]);
+  });
+
+  it("parses lines that arrive in the same chunk after an oversized line ends", async () => {
+    const transport = new MockTransport();
+    const client = new RuntimeRpcClient(transport);
+
+    const oversized = client.call("chat.getChatEventHistory", {});
+    const follower = client.call("projects.list", {});
+    const oversizedId = requestId(transport.writes[0]!);
+    const followerId = requestId(transport.writes[1]!);
+
+    transport.emitData(`{"jsonrpc":"2.0","id":${oversizedId},"result":"`);
+    transport.emitData("y".repeat(17 * 1024 * 1024));
+    // The oversized line terminator and the follower response share a chunk.
+    transport.emitData(`"}\n{"jsonrpc":"2.0","id":${followerId},"result":"ok"}\n`);
+
+    await expect(oversized).rejects.toThrow(/exceeded 16 MiB/);
+    await expect(follower).resolves.toBe("ok");
+    expect(client.isClosed()).toBe(false);
+  });
+
+  it("discards an oversized notification without rejecting pending calls", async () => {
+    const transport = new MockTransport();
+    const client = new RuntimeRpcClient(transport);
+    const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
+    try {
+      const pending = client.call("projects.list", {});
+      const pendingId = requestId(transport.writes[0]!);
+
+      // A notification whose params embed an "id" field matching the pending
+      // call must not be mistaken for that call's response.
+      transport.emitData(`{"jsonrpc":"2.0","method":"runtime/event","params":{"id":${pendingId},"blob":"`);
+      transport.emitData("z".repeat(17 * 1024 * 1024));
+      transport.emitData(`"}}\n`);
+
+      expect(client.isClosed()).toBe(false);
+      transport.emitData({ jsonrpc: "2.0", id: pendingId, result: ["project"] });
+      await expect(pending).resolves.toEqual(["project"]);
+    } finally {
+      warn.mockRestore();
+    }
+  });
+
   it("dispatches JSON-RPC notifications without resolving pending calls", async () => {
     const transport = new MockTransport();
     const client = new RuntimeRpcClient(transport);
diff --git a/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts b/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts
index 56ef432b0..3de7cddf9 100644
--- a/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts
+++ b/apps/desktop/src/main/services/remoteRuntime/runtimeRpcClient.ts
@@ -13,6 +13,10 @@ type PendingRequest = {
 };
 
 const MAX_RPC_BUFFER_CHARS = 16 * 1024 * 1024;
+// Head of an oversized line retained to identify which request it answers.
+// The JSON-RPC envelope id appears within the first few dozen chars of a
+// response; 4 KiB is generous while keeping discarded-line memory bounded.
+const OVERSIZED_LINE_PREFIX_CHARS = 4096;
 const MAX_RPC_TIMEOUT_MS = 2_147_483_647;
 
 type RuntimeRpcCallOptions = {
@@ -35,9 +39,15 @@ function normalizeRuntimeRpcTimeoutMs(value: number): number {
   return Math.ceil(timeoutMs);
 }
 
+type OversizedLineState = {
+  prefix: string;
+  discardedChars: number;
+};
+
 export class RuntimeRpcClient {
   private nextId = 1;
   private buffer = "";
+  private oversizedLine: OversizedLineState | null = null;
   private readonly pending = new Map<number, PendingRequest>();
   private readonly notificationHandlers = new Map<string, Set<(params: unknown) => void>>();
   private readonly disconnectCallbacks = new Set<(error: Error) => void>();
@@ -144,11 +154,21 @@ export class RuntimeRpcClient {
 
   private onData(chunk: string): void {
     if (this.closedError) return;
-    this.buffer += chunk;
-    if (this.buffer.length > MAX_RPC_BUFFER_CHARS) {
-      this.failConnection(new Error("Remote ADE service response buffer exceeded 16 MiB."));
-      return;
+    if (this.oversizedLine) {
+      // An oversized line is being discarded as it streams. Drop chunks until
+      // its terminating newline, then resume normal parsing on the remainder.
+      const newline = chunk.indexOf("\n");
+      if (newline < 0) {
+        this.oversizedLine.discardedChars += chunk.length;
+        return;
+      }
+      const oversized = this.oversizedLine;
+      this.oversizedLine = null;
+      oversized.discardedChars += newline;
+      this.rejectOversizedLine(oversized);
+      chunk = chunk.slice(newline + 1);
     }
+    this.buffer += chunk;
     while (true) {
       const newline = this.buffer.indexOf("\n");
       if (newline < 0) break;
@@ -156,7 +176,57 @@ export class RuntimeRpcClient {
       this.buffer = this.buffer.slice(newline + 1);
       if (!line) continue;
       this.handleLine(line);
+      if (this.closedError) return;
     }
+    // The leftover is one partial line. If it has outgrown the buffer cap,
+    // reject only the request it answers and discard the rest of the line as
+    // it streams — a single oversized response must not take down the shared
+    // connection (every project pane multiplexes over it). Note the cap
+    // bounds PARTIAL-LINE ACCUMULATION, not message size: a complete line
+    // delivered within one chunk is parsed regardless of length. Real
+    // transports chunk at ~64 KB, so any >16 MiB line lands here in practice.
+    if (this.buffer.length > MAX_RPC_BUFFER_CHARS) {
+      this.oversizedLine = {
+        prefix: this.buffer.slice(0, OVERSIZED_LINE_PREFIX_CHARS),
+        discardedChars: this.buffer.length,
+      };
+      this.buffer = "";
+    }
+  }
+
+  /**
+   * Fail the single request answered by a discarded oversized line. The
+   * envelope id is recovered from the retained line head; a `"method"` key
+   * appearing before `"id"` marks the line as a notification (its params may
+   * embed unrelated `"id"` fields), in which case nothing is rejected.
+   *
+   * Heuristic assumption: the daemon serializes response envelopes with the
+   * top-level `id` before `result` (jsonrpc.ts writeMessage key order), so
+   * the first `"id"` in a response line is the envelope id, never one nested
+   * inside the result. If a misfire ever happens anyway, the failure mode is
+   * benign: no pending entry matches, the line is warn-logged and dropped,
+   * and the real caller times out instead of receiving a wrong rejection.
+   */
+  private rejectOversizedLine(oversized: OversizedLineState): void {
+    const idMatch = /"id"\s*:\s*(\d+)/.exec(oversized.prefix);
+    const methodMatch = /"method"\s*:/.exec(oversized.prefix);
+    const responseId = idMatch && (!methodMatch || idMatch.index < methodMatch.index)
+      ? Number(idMatch[1])
+      : null;
+    const approxMiB = (oversized.discardedChars / (1024 * 1024)).toFixed(1);
+    const pending = responseId != null ? this.pending.get(responseId) : undefined;
+    if (!pending || responseId == null) {
+      console.warn("Remote ADE service sent an oversized message; discarded", {
+        approxMiB,
+        hasResponseId: responseId != null,
+      });
+      return;
+    }
+    this.pending.delete(responseId);
+    clearTimeout(pending.timer);
+    pending.reject(new Error(
+      `Remote ADE service response for method ${pending.method} exceeded 16 MiB (~${approxMiB} MiB) and was discarded.`,
+    ));
   }
 
   private handleLine(line: string): void {
diff --git a/apps/ios/ADE/Services/SyncService.swift b/apps/ios/ADE/Services/SyncService.swift
index a7e90e383..0136abb84 100644
--- a/apps/ios/ADE/Services/SyncService.swift
+++ b/apps/ios/ADE/Services/SyncService.swift
@@ -4798,45 +4798,45 @@ final class SyncService: ObservableObject {
     return response.entries
   }
 
-  /// One page of a paginated chat transcript walk. `nextCursor` is an opaque
-  /// host token identifying the position just before the oldest entry
-  /// returned; pass it back via `fetchChatTranscriptPage(cursor:)` to load
-  /// the previous (older) page. `nil` means the start of the transcript was
-  /// reached.
+  /// One page of a paginated chat transcript walk. `nextCursor` is the
+  /// host-side index of the oldest entry returned (the transcript is
+  /// append-only, so indices are stable); pass it back via
+  /// `fetchChatTranscriptPage(cursor:)` to load the previous (older) page.
+  /// `nil` means the start of the transcript was reached.
   struct AgentChatTranscriptPage: Equatable {
     var sessionId: String
     var entries: [AgentChatTranscriptEntry]
     var truncated: Bool
     var totalEntries: Int
-    var nextCursor: String?
+    var nextCursor: Int?
   }
 
   /// Fetch a transcript page. Without `cursor` this returns the newest
   /// entries (same data as `fetchChatTranscriptResponse`) plus a cursor for
   /// walking backwards; with `cursor` it returns the page strictly BEFORE
-  /// that point. Older hosts that predate pagination simply omit
+  /// that index. Older hosts that predate pagination simply omit
   /// `nextCursor`, which surfaces here as `nil` (no more pages).
   func fetchChatTranscriptPage(
     sessionId: String,
-    cursor: String? = nil,
+    cursor: Int? = nil,
     limit: Int = 200,
     maxChars: Int = 600_000
   ) async throws -> AgentChatTranscriptPage {
     var args: [String: Any] = ["sessionId": sessionId, "limit": limit, "maxChars": maxChars]
-    if let cursor, !cursor.isEmpty {
-      args["cursor"] = cursor
+    if let cursor, cursor > 0 {
+      args["cursor"] = String(cursor)
     }
     let response = try await sendCommand(action: "chat.getTranscript", args: args)
     if let payload = response as? [String: Any], payload["queued"] as? Bool == true {
       throw QueuedRemoteCommandError(action: "chat.getTranscript")
     }
     let transcript = try decode(response, as: AgentChatTranscriptResponse.self)
-    var nextCursor: String?
+    var nextCursor: Int?
     if let dict = response as? [String: Any], let rawCursor = dict["nextCursor"] {
-      if let text = rawCursor as? String, !text.isEmpty {
-        nextCursor = text
+      if let text = rawCursor as? String {
+        nextCursor = Int(text)
       } else if let number = rawCursor as? NSNumber, !(rawCursor is Bool) {
-        nextCursor = number.stringValue
+        nextCursor = number.intValue
       }
     }
     return AgentChatTranscriptPage(
diff --git a/apps/ios/ADE/Views/Work/WorkChatSessionView+Actions.swift b/apps/ios/ADE/Views/Work/WorkChatSessionView+Actions.swift
index 24c5702ee..bdf6c23a0 100644
--- a/apps/ios/ADE/Views/Work/WorkChatSessionView+Actions.swift
+++ b/apps/ios/ADE/Views/Work/WorkChatSessionView+Actions.swift
@@ -70,6 +70,14 @@ extension WorkChatSessionView {
       visibleTimelineCount += workTimelinePageSize
       refreshTimelinePresentation()
     }
+    // Once the locally-buffered timeline is nearly exhausted, pull the next
+    // older transcript page from the host so scroll-back continues through
+    // the full history instead of stopping at the initial tail fetch.
+    if hasOlderTranscriptHistory,
+       hiddenTimelineCount <= workTimelinePageSize * 2,
+       let onLoadOlderTranscript {
+      Task { await onLoadOlderTranscript() }
+    }
   }
 
   @MainActor
diff --git a/apps/ios/ADE/Views/Work/WorkChatSessionView.swift b/apps/ios/ADE/Views/Work/WorkChatSessionView.swift
index dadd2ec03..1e0984c79 100644
--- a/apps/ios/ADE/Views/Work/WorkChatSessionView.swift
+++ b/apps/ios/ADE/Views/Work/WorkChatSessionView.swift
@@ -25,6 +25,7 @@ struct WorkChatSessionView: View {
   @State var actionInFlight = false
   @State var isNearBottom = true
   @State var unreadBelowCount = 0
+  @State var lastTimelineTailId: String?
   @State var artifactDrawerPresented = false
   @State var timelineSnapshot = WorkChatTimelineSnapshot.empty
   @State var timelinePresentation = WorkTimelinePresentation.empty
@@ -57,6 +58,10 @@ struct WorkChatSessionView: View {
   let onSelectEffort: @MainActor (String) async -> Void
 
   var lanes: [LaneSummary] = []
+  // Host-side scroll-back: true while older transcript pages remain on the
+  // host beyond what the phone has fetched; the callback pulls the next page.
+  var hasOlderTranscriptHistory: Bool = false
+  var onLoadOlderTranscript: (@MainActor () async -> Void)? = nil
 
   @State var steerEditDrafts: [String: String] = [:]
   @State var modelPickerPresented = false
@@ -257,14 +262,15 @@ struct WorkChatSessionView: View {
         message: isLive ? "Send a message to start streaming the transcript." : "Reconnect to load the latest chat history from the machine."
       )
     } else {
-      if hiddenTimelineCount > 0 {
+      if hiddenTimelineCount > 0 || hasOlderTranscriptHistory {
+        let nextPageCount = min(hiddenTimelineCount, workTimelinePageSize)
+        let loadEarlierTitle = nextPageCount > 0
+          ? "Load \(nextPageCount) earlier message\(nextPageCount == 1 ? "" : "s")"
+          : "Load earlier messages"
         Button {
           loadEarlierTimelineEntries()
         } label: {
-          Label(
-            "Load \(min(hiddenTimelineCount, workTimelinePageSize)) earlier message\(min(hiddenTimelineCount, workTimelinePageSize) == 1 ? "" : "s")",
-            systemImage: "chevron.up.circle"
-          )
+          Label(loadEarlierTitle, systemImage: "chevron.up.circle")
           .font(.footnote.weight(.semibold))
           .foregroundStyle(ADEColor.accent)
           .frame(maxWidth: .infinity)
@@ -454,8 +460,14 @@ struct WorkChatSessionView: View {
         }
       }
       .onChange(of: timeline.count) { oldCount, newCount in
+        let previousTailId = lastTimelineTailId
+        lastTimelineTailId = timeline.last?.id
         let delta = newCount - oldCount
         guard delta > 0 else { return }
+        // Older-page prepends grow the timeline above the viewport — the
+        // newest entry stays put. Don't autoscroll to the bottom or flag
+        // the prepended entries as "new messages below".
+        if let previousTailId, previousTailId == timeline.last?.id { return }
         if isNearBottom {
           scrollToLatest(proxy, animated: false)
         } else {
diff --git a/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift b/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift
index 9136d1345..cbaab5736 100644
--- a/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift
+++ b/apps/ios/ADE/Views/Work/WorkSessionDestinationView.swift
@@ -123,6 +123,13 @@ struct WorkSessionDestinationView: View {
   @State var chatSummary: AgentChatSessionSummary?
   @State var transcript: [WorkChatEnvelope] = []
   @State var fallbackEntries: [AgentChatTranscriptEntry] = []
+  // Canonical transcript entries keyed by their host-side index. Tail
+  // refreshes overwrite the newest indices while "load earlier" pages fill
+  // older ones, so a poll can never clobber scroll-back history. The cursor
+  // is the oldest fetched index (0 = transcript head reached).
+  @State var transcriptEntriesByIndex: [Int: AgentChatTranscriptEntry] = [:]
+  @State var olderTranscriptCursor: Int?
+  @State var olderTranscriptLoading = false
   @State var artifacts: [ComputerUseArtifactSummary] = []
   @State var localEchoMessages: [WorkLocalEchoMessage] = []
   @State var optimisticPendingSteers: [WorkPendingSteerModel] = []
@@ -351,7 +358,9 @@ struct WorkSessionDestinationView: View {
           onSelectModel: selectModel,
           onSelectRuntimeMode: selectRuntimeMode,
           onSelectEffort: selectReasoningEffort,
-          lanes: lanes
+          lanes: lanes,
+          hasOlderTranscriptHistory: hasOlderTranscriptHistory,
+          onLoadOlderTranscript: loadOlderTranscriptEntries
         )
       } else {
         TerminalSessionScreen(session: session)
@@ -454,10 +463,11 @@ struct WorkSessionDestinationView: View {
       || (liveTranscript.isEmpty && transcript.isEmpty)
       || (!liveTranscript.isEmpty && status != "active")
     let fallbackMaxChars = status == "active" ? 32_000 : 120_000
-    if shouldFetchFallback, let response = try? await syncService.fetchChatTranscriptResponse(sessionId: sessionId, maxChars: fallbackMaxChars) {
-      fetchedFallbackEntries = response.entries
+    if shouldFetchFallback, let page = try? await syncService.fetchChatTranscriptPage(sessionId: sessionId, maxChars: fallbackMaxChars) {
+      recordTranscriptPage(page, before: nil)
+      fetchedFallbackEntries = combinedTranscriptEntries()
       fetchedFallbackEntriesAvailable = true
-      fallbackTranscript = makeWorkChatTranscript(from: response.entries, sessionId: sessionId)
+      fallbackTranscript = makeWorkChatTranscript(from: fetchedFallbackEntries, sessionId: sessionId)
     }
 
     // Chat-only fallback: parses chat envelopes out of the raw terminal buffer.
@@ -510,6 +520,74 @@ struct WorkSessionDestinationView: View {
     }
   }
 
+  /// Fold one host transcript page into the index-keyed store. `cursor` is
+  /// the `before` index the page was requested with (nil for a tail fetch).
+  /// Host indices are stable because the transcript is append-only.
+  @MainActor
+  func recordTranscriptPage(_ page: SyncService.AgentChatTranscriptPage, before cursor: Int?) {
+    let end = min(cursor ?? page.totalEntries, page.totalEntries)
+    let start = max(0, end - page.entries.count)
+    let pageCursor = page.nextCursor ?? 0
+    if cursor == nil {
+      // Tail refresh. If the new window starts past everything stored (a
+      // burst of entries landed between polls), stitching would render a
+      // transcript with a silent hole — reset to the fresh tail instead and
+      // re-anchor scroll-back below it.
+      let nextContiguousIndex = transcriptEntriesByIndex.keys.max().map { $0 + 1 } ?? 0
+      if start > nextContiguousIndex, !page.entries.isEmpty {
+        transcriptEntriesByIndex = [:]
+        olderTranscriptCursor = pageCursor
+      } else if olderTranscriptCursor == nil {
+        // First fetch establishes the scroll-back anchor; later contiguous
+        // polls must not move it forward past pages the user already loaded.
+        olderTranscriptCursor = pageCursor
+      }
+    } else {
+      olderTranscriptCursor = min(olderTranscriptCursor ?? pageCursor, pageCursor)
+    }
+    for (offset, entry) in page.entries.enumerated() {
+      transcriptEntriesByIndex[start + offset] = entry
+    }
+  }
+
+  @MainActor
+  func combinedTranscriptEntries() -> [AgentChatTranscriptEntry] {
+    transcriptEntriesByIndex.keys.sorted().compactMap { transcriptEntriesByIndex[$0] }
+  }
+
+  var hasOlderTranscriptHistory: Bool {
+    (olderTranscriptCursor ?? 0) > 0
+  }
+
+  /// Fetch the next strictly-older transcript page from the host and prepend
+  /// it to the fallback entries that feed the chat timeline.
+  @MainActor
+  func loadOlderTranscriptEntries() async {
+    guard !olderTranscriptLoading, let cursor = olderTranscriptCursor, cursor > 0 else { return }
+    olderTranscriptLoading = true
+    defer { olderTranscriptLoading = false }
+    guard let page = try? await syncService.fetchChatTranscriptPage(
+      sessionId: sessionId,
+      cursor: cursor
+    ) else { return }
+    recordTranscriptPage(page, before: cursor)
+    let combined = combinedTranscriptEntries()
+    if !combined.isEmpty, combined != fallbackEntries {
+      fallbackEntries = combined
+    }
+    // fallbackEntries only feed the timeline while `transcript` is empty
+    // (buildWorkTimeline), so splice the older entries into the rendered
+    // transcript right away — otherwise the fetched page stays invisible
+    // until the next loadTranscript poll. preferredWorkTranscript backfills
+    // by role+turnId+text identity, so entries already rendered from live
+    // events are not duplicated.
+    let olderTranscript = makeWorkChatTranscript(from: combined, sessionId: sessionId)
+    let merged = preferredWorkTranscript(current: [], fallback: olderTranscript, eventTranscript: transcript)
+    if !merged.isEmpty, merged != transcript {
+      transcript = merged
+    }
+  }
+
   @MainActor
   func refreshChatStateAfterAction(forceRemote: Bool = true) async {
     let preferLightweight = syncService.prefersReducedSyncLoad