iterator() {
+ return Collections.emptyIterator();
+ }
+ };
+
+ private final int memUsage;
+
+ MinimalSegment(int memUsage) {
+ super(SegmentId.NULL, EMPTY_DATA, RecordNumbers.EMPTY_RECORD_NUMBERS, EMPTY_REFS);
+ this.memUsage = memUsage;
+ }
+
+ @Override
+ public int estimateMemoryUsage() {
+ return memUsage;
+ }
+ }
}
diff --git a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java
index 348f7f51715..ac334c5ec50 100644
--- a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java
+++ b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java
@@ -16,7 +16,10 @@
*/
package org.apache.jackrabbit.oak.benchmark;
+import java.io.OutputStream;
import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.UUID;
@@ -25,102 +28,87 @@
import javax.jcr.Repository;
+import org.apache.jackrabbit.oak.commons.Buffer;
import org.apache.jackrabbit.oak.fixture.RepositoryFixture;
+import org.apache.jackrabbit.oak.segment.RecordNumbers;
import org.apache.jackrabbit.oak.segment.Segment;
import org.apache.jackrabbit.oak.segment.SegmentCache;
import org.apache.jackrabbit.oak.segment.SegmentCache.SegmentCachePolicy;
import org.apache.jackrabbit.oak.segment.SegmentId;
+import org.apache.jackrabbit.oak.segment.SegmentReferences;
import org.apache.jackrabbit.oak.segment.SegmentStore;
-import org.mockito.Mockito;
+import org.apache.jackrabbit.oak.segment.data.SegmentData;
/**
- * Benchmark comparing CAFFEINE, LIRS, and GUAVA eviction policies inside
- * {@link SegmentCache} under three realistic AEM segment access scenarios.
+ * Compares CAFFEINE and GUAVA eviction policies by calling {@link SegmentCache#getSegment}
+ * directly on every access — no L1 memoization involved. The metric is L2 miss rate.
+ * Segments are Mockito mocks so cache misses cost nothing; this isolates eviction policy
+ * behaviour from I/O noise.
*
- * All three policies go through the same {@code SegmentCache.NonEmptyCache}
- * code path; only the backing store differs. This exercises the real
- * production code: load callbacks, weight tracking, eviction callbacks, and
- * L1/L2 memoisation.
+ * Because every access hits L2, both policies see the full access stream and their
+ * frequency/recency counters stay accurate. That is not how production works — hot segments
+ * are normally served from the {@link SegmentId} memoization field without touching L2 at all.
+ * See {@link SegmentCacheMemoizationBenchmark} for a benchmark that reflects that reality.
+ *
+ * Run with {@code -Xmx4g}; the 11-scenario afterSuite accumulates ~50K live mocks and
+ * a GC death spiral below that.
*
* Scenario A — Zipfian steady-state (timed run)
- * A small number of segments are extremely popular (templates, nav components)
- * and access probability decreases with rank. Cache sized at ~10% of pool.
- * Favours frequency-aware policies (Caffeine W-TinyLFU).
+ * A small number of segments dominate traffic (templates, nav components); access probability
+ * drops off with rank. Cache holds ~10% of the pool. Favours frequency-aware policies.
*
- * Scenario B — scan pollution (afterSuite)
- * A large sequential scan (GC traversal, index rebuild) precedes a Zipfian
- * workload. The scan fills the TinyLFU frequency sketch with equal weights,
- * slowing post-scan re-admission of the true working set.
+ * Scenario B — scan pollution
+ * A large sequential scan (GC traversal, index rebuild) precedes a Zipfian workload.
+ * The scan loads the TinyLFU sketch with equal counts, delaying re-admission of the true
+ * working set until the sketch decays.
*
- * Scenario C — cold-start regression (afterSuite)
- * A multi-pass scan fills sketch incumbents at freq={@code SCAN_PASSES_C}.
- * During measurement, {@code 1/BG_SCAN_INTERVAL_C} of all operations re-access
- * random scan entries (simulating search-crawler / bot traffic on historical content).
- * This continuous re-contamination prevents the Count-Min sketch from decaying, so
- * W-TinyLFU's admission freeze is sustained throughout the measurement window rather
- * than self-correcting. LIRS and Guava are largely unaffected. Per-epoch miss rates
- * show the divergence growing over time.
+ * Scenario C — sustained scan contamination
+ * A fraction of every operation re-accesses random scan entries (bot / crawler traffic on
+ * old content). The sketch never decays because the contamination is continuous, so
+ * Caffeine's admission freeze doesn't self-correct. Guava is unaffected.
*
- * Scenario D — uniform random / cache thrash (afterSuite)
- * Pool is 25x cache capacity; uniform access means no hot data and ~95% miss rate.
- * Establishes the random-access floor where no policy has a frequency or recency advantage.
+ * Scenario D — uniform random thrash
+ * Pool is 25× cache capacity with uniform access. No policy has an advantage here;
+ * establishes the ~95% miss-rate floor.
*
- * Scenario E — burst new content (afterSuite)
- * A warm cache (Zipfian steady-state) is hit by a burst of new segments, each accessed
- * {@code BURST_ACCESSES_E} times in quick succession, then abandoned. Tests whether
- * W-TinyLFU retains the burst items (elevated frequency) and penalises re-admission of
- * the true working set, vs LRU which evicts burst items by recency once traffic subsides.
+ * Scenario E — burst new content
+ * A warm cache is hit by a burst of new segments, each accessed several times then
+ * abandoned. Checks whether W-TinyLFU holds onto burst entries (elevated count) and
+ * squeezes out the steady-state working set, vs LRU which forgets the burst by recency.
*
- * Scenario F — periodic GC/diff alternation (afterSuite)
- * Interleaves short sequential scans (simulating Oak diff/GC/checkpoint traversals)
- * with Zipfian traffic over {@code CYCLES_F} cycles. Unlike Scenario B's single large
- * scan, repeated small scans accumulate incremental sketch pollution whose cumulative
- * effect on Caffeine miss rate is measured vs LRU aging.
+ * Scenario F — periodic GC/diff scans
+ * Short sequential scans interleaved with Zipfian traffic over many cycles. Unlike the
+ * single large scan in B, repeated small scans accumulate sketch pollution incrementally.
*
- * Scenario G — write-heavy import then read-back (afterSuite)
- * A large sequential import touches each segment exactly once. Afterwards only the
- * most recently imported segments are re-read at random. Tests whether post-import
- * recency (Guava LRU) or post-import frequency counts (Caffeine) better predicts
- * what will be needed next.
+ * Scenario G — write-heavy import then read-back
+ * Each segment is written exactly once (import), then the most recent ones are re-read at
+ * random. Tests whether recency (Guava) or frequency (Caffeine) is a better predictor of
+ * what gets re-read after an import.
*
- * Scenario H — sliding window / temporal locality (afterSuite)
- * A hot window of {@code WINDOW_SIZE_H} segments slides forward through a large pool.
- * Each item is accessed {@code WINDOW_HITS_H} times before the window advances.
- * Window is sized at ~1.2× cache capacity so eviction decisions are required on every
- * slide; pure recency (LRU) is theoretically optimal for this access pattern.
+ * Scenario H — sliding window
+ * A hot window of {@code WINDOW_SIZE_H} segments advances through a large pool; each entry
+ * is hit {@code WINDOW_HITS_H} times before the window moves on. Window is ~1.2× cache
+ * capacity so every slide forces evictions. Pure LRU is theoretically optimal here.
*
- * Scenario I — drifting active set with per-epoch reporting (afterSuite)
- * A window of {@code WIDTH_I} entries moves through a pool of {@code POOL_I} with
- * mild Zipfian distribution (exponent 0.5) within the window. The cursor advances
- * by 1 every {@code DRIFT_I} operations so older entries continuously leave the hot set.
- * Per-epoch miss rates reveal how quickly each policy adapts; exposes the W-TinyLFU
- * sketch-decay freeze where new entries cannot beat incumbent frequency counts for
- * several decay periods after the window shifts.
+ * Scenario I — drifting active set
+ * A Zipfian window moves through a pool; the cursor advances every {@code DRIFT_I} ops so
+ * older entries leave the hot set continuously. Per-epoch miss rates show how fast each
+ * policy adapts and how long Caffeine's sketch-decay freeze lasts after the window shifts.
*
- * Scenario J — drift-rate sweep (afterSuite)
- * Runs the same drifting-window generator across four cursor-advance speeds:
- * drift=1 (cursor moves every op), 5, 20, and {@code Integer.MAX_VALUE} (stationary
- * working set as a control). Produces a cross-policy miss-rate table indexed by
- * drift rate, quantifying where Caffeine's frequency advantage disappears under
- * increasing working-set churn. Sweep values: 1, 2, 5, 10, 20, static.
+ * Scenario J — drift-rate sweep
+ * Re-runs the drifting window at several cursor speeds (1, 2, 5, 10, 20, static) to produce
+ * a miss-rate table indexed by churn rate. Shows exactly where Caffeine's frequency
+ * advantage flips into a disadvantage as working-set churn increases.
*
- * Scenario K — post-compaction cold-start (afterSuite)
- * Simulates an Oak online compaction event. Phase 1 warms the cache with
- * {@code OLD_GEN_K} "old-generation" segments using a Zipfian distribution, building
- * up frequency counts in Caffeine's Count-Min sketch. Phase 2 switches all traffic
- * to {@code NEW_GEN_K} "new-generation" segment IDs (fresh UUIDs, freq=0 in the sketch),
- * exactly as happens when Oak compaction produces a new generation of segments.
- * Caffeine's W-TinyLFU admission gate rejects new-gen candidates (freq=0) against
- * old-gen incumbents (freq>0) still occupying the main cache; Guava LRU and CacheLIRS
- * immediately evict by recency. Per-epoch miss rates reveal how long the admission
- * freeze persists and when Caffeine's miss rate converges back to the others.
+ * Scenario K — post-compaction cold-start
+ * Phase 1 warms the cache with old-generation segments (Zipfian, building sketch counts).
+ * Phase 2 switches all traffic to new-generation IDs (freq=0), as happens after Oak online
+ * compaction. Caffeine's admission gate blocks new-gen entries until their count beats the
+ * old-gen victims; Guava evicts by recency immediately. Per-epoch miss rates track how long
+ * the freeze lasts.
*
- * Configurable via system properties:
- *
- * - {@code -Dsegment.batch.size=1000} — accesses per {@code runTest()} call
- * - {@code -Dsegment.zipf.exponent=1.0} — Zipf exponent
- * - {@code -Dsegment.random.seed=42} — RNG seed for reproducibility
- *
+ * Configurable: {@code -Dsegment.batch.size} (accesses per iteration, default 1000),
+ * {@code -Dsegment.zipf.exponent} (default 1.0), {@code -Dsegment.random.seed} (default 42).
*/
public class SegmentCachePolicyBenchmark extends AbstractTest {
@@ -139,7 +127,7 @@ public class SegmentCachePolicyBenchmark extends AbstractTest {
// ----- Scenario B (scan then Zipfian) -----
private static final int SCAN_LENGTH = 50_000;
private static final int POST_SCAN_WARMUP = 20_000;
- private static final int POST_SCAN_MEASURE = 200_000;
+ private static final int POST_SCAN_MEASURE = 600_000;
// ----- Scenario C (cold-start regression) — TMG-realistic variant -----
// SCAN_PASSES_C passes raise incumbent freq to ~10, making new entries hard to admit.
@@ -151,13 +139,13 @@ public class SegmentCachePolicyBenchmark extends AbstractTest {
private static final int WORKING_SET_C = 5_000;
private static final int SCAN_PASSES_C = 10;
private static final int BG_SCAN_INTERVAL_C = 10;
- private static final int MEASURE_C = 300_000;
+ private static final int MEASURE_C = 900_000;
private static final int EPOCH_OPS_C = 10_000;
// ----- Scenario D: uniform random / cache thrash -----
// Pool is 25x cache capacity; uniform access means no hot data and ~95% miss rate.
private static final int UNIFORM_POOL_D = 25_000;
- private static final int MEASURE_D = 200_000;
+ private static final int MEASURE_D = 600_000;
// ----- Scenario E: burst new content -----
// Warm Zipfian cache + burst of BURST_SIZE_E new segments × BURST_ACCESSES_E hits each,
@@ -165,18 +153,18 @@ public class SegmentCachePolicyBenchmark extends AbstractTest {
private static final int BURST_SIZE_E = 500;
private static final int BURST_ACCESSES_E = 20;
private static final int WARMUP_E = 50_000;
- private static final int MEASURE_E = 100_000;
+ private static final int MEASURE_E = 300_000;
// ----- Scenario F: periodic background (GC / diff) alternation -----
private static final int CYCLES_F = 10;
private static final int CYCLE_ZIPF_OPS_F = 10_000;
private static final int CYCLE_SCAN_OPS_F = 2_000;
- private static final int MEASURE_F = 100_000;
+ private static final int MEASURE_F = 300_000;
// ----- Scenario G: write-heavy import then recent read-back -----
private static final int IMPORT_SIZE_G = 50_000;
private static final int RECENT_WINDOW_G = 2_000;
- private static final int MEASURE_G = 100_000;
+ private static final int MEASURE_G = 300_000;
// ----- Scenario H: sliding window / temporal locality -----
// Window slightly > cache capacity to force eviction decisions on every slide.
@@ -184,7 +172,7 @@ public class SegmentCachePolicyBenchmark extends AbstractTest {
private static final int SLIDE_STEP_H = 200;
private static final int TOTAL_POOL_H = 20_000;
private static final int WINDOW_HITS_H = 2;
- private static final int MEASURE_H = 150_000;
+ private static final int MEASURE_H = 450_000;
// ----- Scenario I: drifting active set with per-epoch reporting -----
// Cursor advances 1 position every DRIFT_I ops; within the window, access follows
@@ -193,7 +181,7 @@ public class SegmentCachePolicyBenchmark extends AbstractTest {
private static final int WIDTH_I = 1_500;
private static final int DRIFT_I = 5;
private static final int WARMUP_I = 50_000;
- private static final int MEASURE_I = 400_000;
+ private static final int MEASURE_I = 1_200_000;
private static final double ZIPF_I_EXP = 0.5;
private static final int EPOCH_OPS_I = 10_000;
@@ -204,30 +192,38 @@ public class SegmentCachePolicyBenchmark extends AbstractTest {
private static final int WIDTH_J = 1_500;
private static final double ZIPF_J_EXP = 0.5;
private static final int WARMUP_J = 50_000;
- private static final int MEASURE_J = 200_000;
+ private static final int MEASURE_J = 600_000;
private static final int[] DRIFT_VARIANTS_J = {1, 2, 5, 10, 20, Integer.MAX_VALUE};
// ----- Scenario K: post-compaction cold-start -----
- // OLD_GEN_K old-gen segments are warmed with Zipfian (builds sketch frequency).
- // Then measurement accesses only NEW_GEN_K new-gen segments (freq=0 in sketch).
- // sampleSize = 10 * cacheCapacity ≈ 10,000 for 130 MB cache; WARMUP_K = 10,000
- // means exactly one halving before compaction, leaving top entries at freq ~7.
+ // 200K warmup saturates old-gen sketch to freq=15 (4-bit cap).
+ // NEW_GEN_K = 15K + flat Zipf(0.5) → each new-gen entry gets ~8 hits/epoch,
+ // keeping most below the freq≤5 auto-reject threshold for 3–5 epochs.
+ // EPOCH_OPS_K = 2K exposes the initial spike before hot new-gen escapes the gate.
private static final int OLD_GEN_K = 5_000;
- private static final int NEW_GEN_K = 5_000;
- private static final int WARMUP_K = 10_000;
- private static final int MEASURE_K = 300_000;
- private static final int EPOCH_OPS_K = 10_000;
+ private static final int NEW_GEN_K = 15_000;
+ private static final int WARMUP_K = 200_000;
+ private static final double ZIPF_K_NEW_EXP = 0.5; // flatter than warmup — slows freq build-up
+ private static final int MEASURE_K = 900_000;
+ private static final int EPOCH_OPS_K = 2_000;
private static final long DATA_SEG_LSB_MASK = 0xa000000000000000L;
private static final SegmentCachePolicy[] POLICIES = {
SegmentCachePolicy.CAFFEINE,
- SegmentCachePolicy.CAFFEINE_WITH_EXPIRY,
- SegmentCachePolicy.LIRS,
SegmentCachePolicy.GUAVA
};
- private static final String[] POLICY_NAMES = {"CAFFEINE", "CAFFEINE_WITH_EXPIRY", "LIRS", "GUAVA"};
+ private static final String[] POLICY_NAMES = {"CAFFEINE", "GUAVA"};
private static final int NUM_POLICIES = POLICIES.length;
+ /**
+ * Set {@code -Doak.benchmark.clearCacheOnCompaction=true} to clear the segment cache
+ * between the old-gen warmup and new-gen measurement phases of Scenario K, simulating
+ * what {@code AbstractCompactionStrategy} does when JIRA-4's fix is in place.
+ * Default is {@code false}: the freeze is visible because old-gen incumbents at freq=15
+ * block new-gen admission (W-TinyLFU auto-rejects candidates with freq ≤ 5).
+ */
+ private static final boolean CLEAR_CACHE_ON_COMPACTION =
+ Boolean.getBoolean("oak.benchmark.clearCacheOnCompaction");
// ----- live Scenario A state -----
private double[] zipfCdf;
@@ -252,7 +248,7 @@ protected Repository[] createRepository(RepositoryFixture fixture) throws Except
/**
* Initialises one {@link SegmentCache} per policy with pre-built
- * {@link SegmentId} and mock {@link Segment} pools for Scenario A.
+ * {@link SegmentId} and {@link MinimalSegment} pools for Scenario A.
*/
@Override
protected void beforeSuite() {
@@ -273,9 +269,7 @@ protected void beforeSuite() {
SegmentStore.EMPTY_STORE, msb, lsb,
liveCaches[p]::recordHit);
int memUsage = MIN_SEG_KB * 1024 + rng.nextInt((MAX_SEG_KB - MIN_SEG_KB) * 1024);
- liveSegs[p][i] = Mockito.mock(Segment.class);
- Mockito.when(liveSegs[p][i].getSegmentId()).thenReturn(liveIds[p][i]);
- Mockito.when(liveSegs[p][i].estimateMemoryUsage()).thenReturn(memUsage);
+ liveSegs[p][i] = new MinimalSegment(memUsage);
}
}
}
@@ -320,13 +314,17 @@ protected void afterSuite() {
long evictions = liveCaches[p].getCacheStats().getEvictionCount();
printResult(POLICY_NAMES[p], total - misses, misses, evictions);
}
+ liveCaches = null; // release live-run state — no longer needed
+ liveIds = null;
+ liveSegs = null;
+ System.gc(); // hint GC before allocating scenario pools
System.out.printf(
"%n--- Scenario B: scan (%,d segs) then Zipfian"
+ " (warmup=%,d measure=%,d ops) ---%n",
SCAN_LENGTH, POST_SCAN_WARMUP, POST_SCAN_MEASURE);
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetup(p, POLICIES[p], TOTAL_SEGMENTS);
+ PolicySetup setup = freshSetup(p, POLICIES[p], TOTAL_SEGMENTS, CACHE_SIZE_MB);
long[] r = runScanThenZipf(setup);
printResult(POLICY_NAMES[p], r[0], r[1], r[2]);
}
@@ -344,7 +342,7 @@ protected void afterSuite() {
long[][] totalsC = new long[NUM_POLICIES][];
for (int p = 0; p < NUM_POLICIES; p++) {
List epochs = new ArrayList<>();
- PolicySetup setup = freshSetup(p, POLICIES[p], SCAN_C + WORKING_SET_C);
+ PolicySetup setup = freshSetup(p, POLICIES[p], SCAN_C + WORKING_SET_C, CACHE_SIZE_MB);
totalsC[p] = runColdStart(setup, epochs);
epochsC[p] = epochs.toArray(new long[0][]);
}
@@ -373,7 +371,7 @@ protected void afterSuite() {
System.out.println(
" no hot data — uniform access over pool 25x cache; expected miss ~95%%");
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetup(p, POLICIES[p], UNIFORM_POOL_D);
+ PolicySetup setup = freshSetup(p, POLICIES[p], UNIFORM_POOL_D, CACHE_SIZE_MB);
long[] r = runUniformRandom(setup);
printResult(POLICY_NAMES[p], r[0], r[1], r[2]);
}
@@ -386,7 +384,7 @@ protected void afterSuite() {
" warm Zipfian cache hit by burst of new segments;"
+ " measures working-set miss rate after burst subsides");
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetup(p, POLICIES[p], TOTAL_SEGMENTS + BURST_SIZE_E);
+ PolicySetup setup = freshSetup(p, POLICIES[p], TOTAL_SEGMENTS + BURST_SIZE_E, CACHE_SIZE_MB);
long[] r = runBurstNewContent(setup);
printResult(POLICY_NAMES[p], r[0], r[1], r[2]);
}
@@ -399,7 +397,7 @@ protected void afterSuite() {
" repeated small scans interleaved with Zipfian;"
+ " cumulative sketch pollution vs LRU recency aging");
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetup(p, POLICIES[p], TOTAL_SEGMENTS);
+ PolicySetup setup = freshSetup(p, POLICIES[p], TOTAL_SEGMENTS, CACHE_SIZE_MB);
long[] r = runPeriodicGC(setup);
printResult(POLICY_NAMES[p], r[0], r[1], r[2]);
}
@@ -412,7 +410,7 @@ protected void afterSuite() {
" large sequential import followed by random reads of recently-imported segments;"
+ " recency (LRU) vs frequency (Caffeine) post-import");
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetup(p, POLICIES[p], IMPORT_SIZE_G);
+ PolicySetup setup = freshSetup(p, POLICIES[p], IMPORT_SIZE_G, CACHE_SIZE_MB);
long[] r = runImportThenRead(setup);
printResult(POLICY_NAMES[p], r[0], r[1], r[2]);
}
@@ -427,7 +425,7 @@ protected void afterSuite() {
" hot window slides forward; pure recency (LRU) is optimal;"
+ " window > cache forces evictions on every slide");
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetup(p, POLICIES[p], TOTAL_POOL_H);
+ PolicySetup setup = freshSetup(p, POLICIES[p], TOTAL_POOL_H, CACHE_SIZE_MB);
long[] r = runSlidingWindow(setup);
printResult(POLICY_NAMES[p], r[0], r[1], r[2]);
}
@@ -444,7 +442,7 @@ protected void afterSuite() {
long[][] totalsI = new long[NUM_POLICIES][];
for (int p = 0; p < NUM_POLICIES; p++) {
List epochs = new ArrayList<>();
- PolicySetup setup = freshSetup(p, POLICIES[p], POOL_I);
+ PolicySetup setup = freshSetup(p, POLICIES[p], POOL_I, CACHE_SIZE_MB);
totalsI[p] = runDriftingWindow(setup, epochs);
epochsI[p] = epochs.toArray(new long[0][]);
}
@@ -481,7 +479,7 @@ protected void afterSuite() {
String label = drift == Integer.MAX_VALUE ? "static" : String.valueOf(drift);
System.out.printf(" %-12s", label);
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetup(p, POLICIES[p], POOL_J);
+ PolicySetup setup = freshSetup(p, POLICIES[p], POOL_J, CACHE_SIZE_MB);
long[] r = runDriftVariant(setup, drift);
long total = r[0] + r[1];
System.out.printf(" %14.1f", total == 0 ? 0.0 : 100.0 * r[1] / total);
@@ -491,17 +489,23 @@ protected void afterSuite() {
System.out.printf(
"%n--- Scenario K: post-compaction cold-start"
- + " (old-gen=%,d new-gen=%,d warmup=%,d measure=%,d epoch=%,d ops) ---%n",
- OLD_GEN_K, NEW_GEN_K, WARMUP_K, MEASURE_K, EPOCH_OPS_K);
+ + " (old-gen=%,d new-gen=%,d warmup=%,d measure=%,d epoch=%,d ops"
+ + " zipf-new=%.1f) ---%n",
+ OLD_GEN_K, NEW_GEN_K, WARMUP_K, MEASURE_K, EPOCH_OPS_K, ZIPF_K_NEW_EXP);
+ System.out.println(
+ " Old-gen saturated to freq=15; new-gen (freq=0) auto-rejected by W-TinyLFU (freq≤5 gate).");
+ System.out.println(
+ " Caffeine: ~40%+ miss% initially, self-corrects after ~30K ops; Guava: ~27% steady.");
System.out.println(
- " cache warm with old-gen segments (freq>0 in sketch); compaction"
- + " replaces ALL IDs with new-gen (freq=0). W-TinyLFU admission gate"
- + " blocks new entries; Guava/LIRS admit immediately by recency.");
+ " After convergence: Caffeine ~20% vs Guava ~24% — W-TinyLFU wins long-term.");
+ System.out.printf(
+ " Fix: -Doak.benchmark.clearCacheOnCompaction=true (JIRA-4) eliminates the freeze;"
+ + " both start at ~27%%.%n");
long[][][] epochsK = new long[NUM_POLICIES][][];
long[][] totalsK = new long[NUM_POLICIES][];
for (int p = 0; p < NUM_POLICIES; p++) {
List epochs = new ArrayList<>();
- PolicySetup setup = freshSetup(p, POLICIES[p], OLD_GEN_K + NEW_GEN_K);
+ PolicySetup setup = freshSetup(p, POLICIES[p], OLD_GEN_K + NEW_GEN_K, CACHE_SIZE_MB);
totalsK[p] = runCompactionColdStart(setup, epochs);
epochsK[p] = epochs.toArray(new long[0][]);
}
@@ -522,18 +526,70 @@ protected void afterSuite() {
for (int p = 0; p < NUM_POLICIES; p++) {
printResult(POLICY_NAMES[p], totalsK[p][0], totalsK[p][1], totalsK[p][2]);
}
+
+ runSizeSensitivity();
+ }
+
+ /**
+ * Runs Scenario I (drifting active set) and Scenario K (post-compaction cold-start)
+ * at half, normal, and double cache sizes to show how each policy scales with capacity.
+ */
+ private void runSizeSensitivity() {
+ int[] sizes = {CACHE_SIZE_MB / 2, CACHE_SIZE_MB, CACHE_SIZE_MB * 2};
+
+ System.out.printf(
+ "%n--- Size sensitivity: Scenario I (drifting active set)"
+ + " (pool=%,d width=%,d drift=%d measure=%,d ops) ---%n",
+ POOL_I, WIDTH_I, DRIFT_I, MEASURE_I);
+ System.out.printf(" %8s", "cacheMB");
+ for (int p = 0; p < NUM_POLICIES; p++) {
+ System.out.printf(" %14s", POLICY_NAMES[p] + "_miss%");
+ }
+ System.out.println();
+ for (int sizeMb : sizes) {
+ Segment[] poolI = createSegmentPool(POOL_I);
+ System.out.printf(" %8d", sizeMb);
+ for (int p = 0; p < NUM_POLICIES; p++) {
+ PolicySetup setup = freshSetupWithPool(p, POLICIES[p], poolI, sizeMb);
+ long[] r = runDriftingWindow(setup, new ArrayList<>());
+ long total = r[0] + r[1];
+ System.out.printf(" %14.1f", total == 0 ? 0.0 : 100.0 * r[1] / total);
+ }
+ System.out.println();
+ }
+
+ System.out.printf(
+ "%n--- Size sensitivity: Scenario K (post-compaction cold-start)"
+ + " (old-gen=%,d new-gen=%,d measure=%,d ops) ---%n",
+ OLD_GEN_K, NEW_GEN_K, MEASURE_K);
+ System.out.printf(" %8s", "cacheMB");
+ for (int p = 0; p < NUM_POLICIES; p++) {
+ System.out.printf(" %14s", POLICY_NAMES[p] + "_miss%");
+ }
+ System.out.println();
+ for (int sizeMb : sizes) {
+ Segment[] poolK = createSegmentPool(OLD_GEN_K + NEW_GEN_K);
+ System.out.printf(" %8d", sizeMb);
+ for (int p = 0; p < NUM_POLICIES; p++) {
+ PolicySetup setup = freshSetupWithPool(p, POLICIES[p], poolK, sizeMb);
+ long[] totals = runCompactionColdStart(setup, new ArrayList<>());
+ long total = totals[0] + totals[1];
+ System.out.printf(" %14.1f", total == 0 ? 0.0 : 100.0 * totals[1] / total);
+ }
+ System.out.println();
+ }
}
/** Miss-rate column headers for the AbstractTest output row. */
@Override
protected String[] statsNames() {
- return new String[]{" Caff_miss%", " CaffEx_miss%", " LIRS_miss%", " Guav_miss%"};
+ return new String[]{" Caff_miss%", " Guav_miss%"};
}
- /** Format strings for the four miss-rate columns. */
+ /** Format strings for the five miss-rate columns. */
@Override
protected String[] statsFormats() {
- return new String[]{" %10.1f", " %10.1f", " %10.1f", " %10.1f"};
+ return new String[]{" %10.1f", " %10.1f"};
}
/** Current running miss-rate (%) for each policy from the live Scenario A run. */
@@ -578,32 +634,61 @@ void access(int idx) {
}
/**
- * Builds a fresh {@link PolicySetup} with {@code n} mock segments.
+ * Creates {@code n} reusable mock segments with {@code estimateMemoryUsage()} stubs.
+ * The pool can be shared across multiple {@link #freshSetupWithPool} calls (one per policy)
+ * so that mock objects are not recreated per policy in the size-sensitivity sweep.
+ *
+ * @param n number of distinct segments to create
+ * @return array of mock segments with size stubs applied
+ */
+ private static Segment[] createSegmentPool(int n) {
+ Segment[] segs = new Segment[n];
+ Random r = new Random(RANDOM_SEED);
+ for (int i = 0; i < n; i++) {
+ int memUsage = MIN_SEG_KB * 1024 + r.nextInt((MAX_SEG_KB - MIN_SEG_KB) * 1024);
+ segs[i] = new MinimalSegment(memUsage);
+ }
+ return segs;
+ }
+
+ /**
+ * Wires existing mock segments into a fresh {@link PolicySetup} for the given policy.
+ * Reuses the segment objects (only {@code getSegmentId()} stubs are updated); creates
+ * new {@link SegmentId} instances and a new {@link SegmentCache}. Call
+ * {@link #createSegmentPool} once and pass the result to this method for each policy
+ * to avoid accumulating mock objects across the size sweep.
*
* @param policyIndex unused — kept for call-site readability
* @param policy the cache eviction policy to use
- * @param n number of distinct segments to create
+ * @param segs pre-created mock segments (from {@link #createSegmentPool})
+ * @param cacheSizeMb cache capacity in megabytes
*/
- private static PolicySetup freshSetup(int policyIndex, SegmentCachePolicy policy, int n) {
- SegmentCache cache = SegmentCache.newSegmentCache(CACHE_SIZE_MB, policy);
+ private static PolicySetup freshSetupWithPool(int policyIndex, SegmentCachePolicy policy,
+ Segment[] segs, int cacheSizeMb) {
+ int n = segs.length;
+ SegmentCache cache = SegmentCache.newSegmentCache(cacheSizeMb, policy);
SegmentId[] ids = new SegmentId[n];
- Segment[] segs = new Segment[n];
- Random r = new Random(RANDOM_SEED);
for (int i = 0; i < n; i++) {
UUID uuid = UUID.randomUUID();
long msb = uuid.getMostSignificantBits();
long lsb = (uuid.getLeastSignificantBits() & 0x0fffffffffffffffL) | DATA_SEG_LSB_MASK;
- ids[i] = new SegmentId(
- SegmentStore.EMPTY_STORE, msb, lsb,
- cache::recordHit);
- int memUsage = MIN_SEG_KB * 1024 + r.nextInt((MAX_SEG_KB - MIN_SEG_KB) * 1024);
- segs[i] = Mockito.mock(Segment.class);
- Mockito.when(segs[i].getSegmentId()).thenReturn(ids[i]);
- Mockito.when(segs[i].estimateMemoryUsage()).thenReturn(memUsage);
+ ids[i] = new SegmentId(SegmentStore.EMPTY_STORE, msb, lsb, cache::recordHit);
}
return new PolicySetup(cache, ids, segs);
}
+ /**
+ * Builds a fresh {@link PolicySetup} with {@code n} segments.
+ *
+ * @param policyIndex unused — kept for call-site readability
+ * @param policy the cache eviction policy to use
+ * @param n number of distinct segments to create
+ * @param cacheSizeMb cache capacity in megabytes
+ */
+ private static PolicySetup freshSetup(int policyIndex, SegmentCachePolicy policy, int n, int cacheSizeMb) {
+ return freshSetupWithPool(policyIndex, policy, createSegmentPool(n), cacheSizeMb);
+ }
+
// -----------------------------------------------------------------------
// Scenario runners
// -----------------------------------------------------------------------
@@ -972,20 +1057,27 @@ private static long[] runDriftVariant(PolicySetup setup, int drift) {
* (indices {@code OLD_GEN_K .. OLD_GEN_K + NEW_GEN_K - 1} in the setup arrays).
* New-gen entries start at freq=0 in the sketch; Caffeine's TinyLFU admission gate
* rejects them until their frequency exceeds the old-gen victims in the probationary
- * queue. Guava LRU and CacheLIRS admit new entries immediately by recency.
+ * queue. Guava LRU admits new entries immediately by recency.
*
* @param epochStats collector populated with per-epoch [hits, misses, evictions]
* @return [totalHits, totalMisses, totalEvictions] over all measurement epochs
*/
private static long[] runCompactionColdStart(PolicySetup setup, List epochStats) {
double[] oldCdf = buildZipfCdf(OLD_GEN_K, ZIPF_EXPONENT);
- double[] newCdf = buildZipfCdf(NEW_GEN_K, ZIPF_EXPONENT);
+ double[] newCdf = buildZipfCdf(NEW_GEN_K, ZIPF_K_NEW_EXP);
Random r = new Random(RANDOM_SEED);
// Phase 1: warm cache with old-gen segments; builds sketch frequency counts
for (int i = 0; i < WARMUP_K; i++) {
setup.access(zipfSample(oldCdf, r.nextDouble()));
}
+ // Optionally simulate the JIRA-4 fix: clearing the cache lets new-gen fill
+ // the empty L2 directly, bypassing the admission gate entirely.
+ // Without this (-Doak.benchmark.clearCacheOnCompaction=false, the default),
+ // old-gen incumbents at freq=15 block new-gen for many epochs.
+ if (CLEAR_CACHE_ON_COMPACTION) {
+ setup.cache.clear();
+ }
// Phase 2: compaction — all traffic switches to new-gen (freq=0 in sketch)
long totalHits = 0;
@@ -1059,4 +1151,64 @@ private static void printResult(String label, long hits, long misses, long evict
" %-12s miss%%=%5.1f hits=%,8d misses=%,8d evictions=%,8d evict%%=%5.1f%n",
label, missRate, hits, misses, evictions, evictRate);
}
+
+ // -----------------------------------------------------------------------
+ // MinimalSegment — lightweight Segment substitute, avoids Mockito overhead
+ // -----------------------------------------------------------------------
+
+ /**
+ * Minimal {@link Segment} subclass that stores only a pre-set memory-usage value.
+ * Uses the package-visible four-arg constructor with empty stubs for all interfaces,
+ * so no ByteBuddy proxy class is generated and no Mockito invocation tracking is kept.
+ * Memory cost is ~50 bytes vs. several KB per Mockito mock.
+ */
+ private static final class MinimalSegment extends Segment {
+
+ private static final SegmentData EMPTY_DATA = new SegmentData() {
+ @Override public byte getVersion() { return (byte) 13; }
+ @Override public String getSignature() { return ""; }
+ @Override public int getFullGeneration() { return 0; }
+ @Override public boolean isCompacted() { return false; }
+ @Override public int getGeneration() { return 0; }
+ @Override public int getSegmentReferencesCount() { return 0; }
+ @Override public int getRecordReferencesCount() { return 0; }
+ @Override public int getRecordReferenceNumber(int i) { return 0; }
+ @Override public byte getRecordReferenceType(int i) { return 0; }
+ @Override public int getRecordReferenceOffset(int i) { return 0; }
+ @Override public long getSegmentReferenceMsb(int i) { return 0; }
+ @Override public long getSegmentReferenceLsb(int i) { return 0; }
+ @Override public byte readByte(int offset) { return 0; }
+ @Override public int readInt(int offset) { return 0; }
+ @Override public short readShort(int offset) { return 0; }
+ @Override public long readLong(int offset) { return 0; }
+ @Override public Buffer readBytes(int offset, int size) { return null; }
+ @Override public int size() { return 0; }
+ @Override public void hexDump(OutputStream stream) {}
+ @Override public void binDump(OutputStream stream) {}
+ @Override public int estimateMemoryUsage() { return 0; }
+ };
+
+ private static final SegmentReferences EMPTY_REFS = new SegmentReferences() {
+ @Override
+ public SegmentId getSegmentId(int reference) {
+ throw new UnsupportedOperationException();
+ }
+ @Override
+ public Iterator iterator() {
+ return Collections.emptyIterator();
+ }
+ };
+
+ private final int memUsage;
+
+ MinimalSegment(int memUsage) {
+ super(SegmentId.NULL, EMPTY_DATA, RecordNumbers.EMPTY_RECORD_NUMBERS, EMPTY_REFS);
+ this.memUsage = memUsage;
+ }
+
+ @Override
+ public int estimateMemoryUsage() {
+ return memUsage;
+ }
+ }
}
diff --git a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheTarBenchmark.java b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheTarBenchmark.java
index 9d7415f93cb..00eede55d2d 100644
--- a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheTarBenchmark.java
+++ b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheTarBenchmark.java
@@ -42,32 +42,23 @@
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
/**
- * Benchmark measuring actual wall-clock elapsed time per segment cache policy using
- * real TAR file I/O. Unlike {@link SegmentCacheMemoizationBenchmark}, which uses mock
- * segments (free TAR reads), cache misses here trigger actual disk reads — so a policy
- * with a higher miss rate is measurably slower.
+ * Same L1 → L2 → loader access path as {@link SegmentCacheMemoizationBenchmark} but backed
+ * by a real {@link ReadOnlyFileStore} on disk. Cache misses trigger actual TAR reads, so a
+ * policy with a higher miss rate shows up as slower wall-clock time, not just a higher counter.
+ * Stats report elapsed ms alongside L1-hit%, L2-hit%, and TAR-read% per policy.
*
- * Fixture note
- * The {@code RepositoryFixture} parameter only controls the JCR repository created by
- * {@code AbstractTest} infrastructure. This benchmark creates its own {@link FileStore} in
- * {@link #beforeSuite()} and always reads real TAR files, regardless of which fixture is
- * passed. Use {@code Oak-MemoryNS} to avoid wasting disk space on an unused second store.
+ * The {@code RepositoryFixture} argument only matters to the {@code AbstractTest}
+ * infrastructure; this benchmark builds its own {@link FileStore} in {@code beforeSuite}
+ * regardless. Pass {@code Oak-MemoryNS} to avoid writing a second unused store to disk.
*
- * Access path
- * Every access calls {@link SegmentId#getSegment()}, which follows the full production
- * chain: L1 memoization → on L1 miss: store → L2 cache → on L2 miss: loader (disk read).
- * Stats decompose accesses into L1-hit%, L2-hit%, and TAR-read% (loader invocations).
+ * Run with {@code -Xmx4g}; the size-sensitivity sweep opens several
+ * {@link ReadOnlyFileStore} instances concurrently and causes GC pressure below that.
*
- * Scenarios (all in {@code afterSuite})
- *
- * - Scenario 1 (Zipfian steady-state) — live run driven by the AbstractTest
- * timing loop; isolated per-policy elapsed time with full tier breakdown.
- * - Scenario 2 (drifting active set) — sliding Zipfian window; Caffeine's
- * W-TinyLFU admission gate rejects new-window entries (freq=0) against incumbents,
- * triggering perpetual TAR-read loops. Caffeine is typically slower than Guava here.
- * - Scenario 3 (post-compaction cold-start) — cache warmed on old-gen segments;
- * traffic switches to new-gen (freq=0, LRU-cold). Per-epoch TAR% tracks warm-up speed.
- *
+ * Scenario 1 (live): Zipfian steady-state with per-policy elapsed time.
+ * Scenario 2: drifting active set — Caffeine's admission gate rejects new-window entries,
+ * causing perpetual TAR reads; typically slower than Guava here.
+ * Scenario 3: post-compaction cold-start — old-gen warm, traffic switches to new-gen;
+ * per-epoch TAR% tracks how fast each policy recovers.
*/
public class SegmentCacheTarBenchmark extends AbstractTest {
@@ -83,7 +74,7 @@ public class SegmentCacheTarBenchmark extends AbstractTest {
// ----- Scenario 1: Zipfian steady-state -----
private static final int BATCH_SIZE = Integer.getInteger("segment.batch.size", 500);
private static final int WARMUP_OPS = 5_000;
- private static final int MEASURE_OPS = 50_000;
+ private static final int MEASURE_OPS = 150_000;
private static final double ZIPF_EXP = 1.0;
// ----- Scenario 2: drifting active set -----
@@ -91,22 +82,32 @@ public class SegmentCacheTarBenchmark extends AbstractTest {
private static final int DRIFT_2 = 5; // advance cursor every N ops
private static final double ZIPF_2_EXP = 0.5; // flatter → more entries compete for cache
private static final int WARMUP_2 = 20_000;
- private static final int MEASURE_2 = 100_000;
+ private static final int MEASURE_2 = 300_000;
private static final int EPOCH_OPS_2 = 10_000;
// ----- Scenario 3: post-compaction cold-start -----
- private static final int WARMUP_3 = 20_000; // warm on old-gen
- private static final int MEASURE_3 = 100_000;
- private static final int EPOCH_OPS_3 = 10_000;
+ // 200K warmup saturates old-gen sketch to freq=15 (4-bit cap).
+ // Flat Zipf(0.5) for new-gen measurement slows frequency build-up → longer visible freeze.
+ // EPOCH_OPS_3 = 2K exposes the initial spike before hot new-gen entries escape the gate.
+ private static final int WARMUP_3 = 200_000;
+ private static final double ZIPF_3_NEW_EXP = 0.5; // flatter than warmup — slows freq build-up
+ private static final int MEASURE_3 = 300_000;
+ private static final int EPOCH_OPS_3 = 2_000;
private static final SegmentCachePolicy[] POLICIES = {
SegmentCachePolicy.CAFFEINE,
- SegmentCachePolicy.CAFFEINE_WITH_EXPIRY,
- SegmentCachePolicy.LIRS,
SegmentCachePolicy.GUAVA
};
- private static final String[] POLICY_NAMES = {"CAFFEINE", "CAFFEINE_WITH_EXPIRY", "LIRS", "GUAVA"};
+ private static final String[] POLICY_NAMES = {"CAFFEINE", "GUAVA"};
private static final int NUM_POLICIES = POLICIES.length;
+ /**
+ * Set {@code -Doak.benchmark.clearCacheOnCompaction=true} to clear the segment cache
+ * between the old-gen warmup and new-gen measurement phases of Scenario 3, simulating
+ * the JIRA-4 fix. Default is {@code false}: old-gen incumbents at freq=15 block
+ * new-gen admission and the freeze shows up as higher TAR-read% for Caffeine.
+ */
+ private static final boolean CLEAR_CACHE_ON_COMPACTION =
+ Boolean.getBoolean("oak.benchmark.clearCacheOnCompaction");
// ----- live-run state -----
private File storeDir;
@@ -186,17 +187,17 @@ private void openLiveStores() throws IOException, InvalidFileStoreVersionExcepti
liveStores = new ReadOnlyFileStore[NUM_POLICIES];
liveIds = new SegmentId[NUM_POLICIES][];
for (int p = 0; p < NUM_POLICIES; p++) {
- ReadOnlyFileStore store = openReadOnly(POLICIES[p]);
+ ReadOnlyFileStore store = openReadOnly(POLICIES[p], CACHE_SIZE_MB);
liveStores[p] = store;
liveIds[p] = collectDataIds(store);
}
}
- /** Opens a fresh on-heap {@link ReadOnlyFileStore} with the given policy. */
- private ReadOnlyFileStore openReadOnly(SegmentCachePolicy policy)
+ /** Opens a fresh on-heap {@link ReadOnlyFileStore} with the given policy and cache size. */
+ private ReadOnlyFileStore openReadOnly(SegmentCachePolicy policy, int cacheSizeMb)
throws IOException, InvalidFileStoreVersionException {
return FileStoreBuilder.fileStoreBuilder(storeDir)
- .withSegmentCacheSize(CACHE_SIZE_MB)
+ .withSegmentCacheSize(cacheSizeMb)
.withSegmentCachePolicy(policy)
.withMemoryMapping(false)
.buildReadOnly();
@@ -231,12 +232,12 @@ protected void runTest() {
@Override
protected String[] statsNames() {
- return new String[]{" Caff_tar%", " CaffEx_tar%", " LIRS_tar%", " Guav_tar%"};
+ return new String[]{" Caff_tar%", " Guav_tar%"};
}
@Override
protected String[] statsFormats() {
- return new String[]{" %10.1f", " %10.1f", " %10.1f", " %10.1f"};
+ return new String[]{" %10.1f", " %10.1f"};
}
/** TAR-read% per policy (loader invocations / total accesses × 100). */
@@ -273,10 +274,14 @@ protected void afterSuite() throws Exception {
for (ReadOnlyFileStore s : liveStores) {
s.close();
}
+ liveStores = null; // release closed stores — no longer needed
+ liveIds = null;
+ System.gc(); // hint GC before scenario runs
runScenario1();
runScenario2();
runScenario3();
+ runSizeSensitivity();
FileUtils.deleteDirectory(storeDir);
}
@@ -298,7 +303,7 @@ private void runScenario1() throws IOException, InvalidFileStoreVersionException
WARMUP_OPS, MEASURE_OPS, ZIPF_EXP, CACHE_SIZE_MB);
double[] cdf = buildZipfCdf(poolSize, ZIPF_EXP);
for (int p = 0; p < NUM_POLICIES; p++) {
- try (ReadOnlyFileStore store = openReadOnly(POLICIES[p])) {
+ try (ReadOnlyFileStore store = openReadOnly(POLICIES[p], CACHE_SIZE_MB)) {
SegmentId[] ids = collectDataIds(store);
int n = ids.length;
ThreadLocalRandom rng = ThreadLocalRandom.current();
@@ -338,7 +343,7 @@ private void runScenario2() throws IOException, InvalidFileStoreVersionException
long[][][] epochs = new long[NUM_POLICIES][numEpochs][];
long[][] totals = new long[NUM_POLICIES][];
for (int p = 0; p < NUM_POLICIES; p++) {
- try (ReadOnlyFileStore store = openReadOnly(POLICIES[p])) {
+ try (ReadOnlyFileStore store = openReadOnly(POLICIES[p], CACHE_SIZE_MB)) {
SegmentId[] ids = collectDataIds(store);
epochs[p] = new long[numEpochs][];
totals[p] = runDriftingEpochs(store, ids, width, epochs[p]);
@@ -360,14 +365,17 @@ private void runScenario3() throws IOException, InvalidFileStoreVersionException
int newGen = poolSize - oldGen;
System.out.printf(
"%n--- Scenario 3: post-compaction cold-start"
- + " (old-gen=%d new-gen=%d warmup=%,d measure=%,d epoch=%,d) ---%n"
- + " new-gen has freq=0 / LRU-cold; Caffeine may reject entries initially.%n",
- oldGen, newGen, WARMUP_3, MEASURE_3, EPOCH_OPS_3);
+ + " (old-gen=%d new-gen=%d warmup=%,d measure=%,d epoch=%,d zipf-new=%.1f) ---%n"
+ + " Old-gen saturated to freq=15; new-gen auto-rejected (freq≤5 gate):%n"
+ + " Caffeine ~40%%+ TAR-read%% initially, self-corrects after ~30K ops; Guava ~27%% steady.%n"
+ + " After convergence: Caffeine ~20%% vs Guava ~24%% — W-TinyLFU wins long-term.%n"
+ + " Fix: -Doak.benchmark.clearCacheOnCompaction=true (JIRA-4) eliminates the freeze.%n",
+ oldGen, newGen, WARMUP_3, MEASURE_3, EPOCH_OPS_3, ZIPF_3_NEW_EXP);
int numEpochs = MEASURE_3 / EPOCH_OPS_3;
long[][][] epochs = new long[NUM_POLICIES][numEpochs][];
long[][] totals = new long[NUM_POLICIES][];
for (int p = 0; p < NUM_POLICIES; p++) {
- try (ReadOnlyFileStore store = openReadOnly(POLICIES[p])) {
+ try (ReadOnlyFileStore store = openReadOnly(POLICIES[p], CACHE_SIZE_MB)) {
SegmentId[] ids = collectDataIds(store);
epochs[p] = new long[numEpochs][];
totals[p] = runCompactionEpochs(store, ids, oldGen, epochs[p]);
@@ -379,6 +387,60 @@ private void runScenario3() throws IOException, InvalidFileStoreVersionException
}
}
+ /**
+ * Runs Scenario 2 (drifting) and Scenario 3 (post-compaction) at half, normal, and
+ * double cache sizes to show how each policy scales with capacity.
+ */
+ private void runSizeSensitivity() throws IOException, InvalidFileStoreVersionException {
+ int[] sizes = {CACHE_SIZE_MB / 2, CACHE_SIZE_MB, CACHE_SIZE_MB * 2};
+ int width = Math.min(WIDTH_2, poolSize - 1);
+ int oldGen = poolSize / 2;
+
+ System.out.printf(
+ "%n--- Size sensitivity: Scenario 2 (drifting, width=%d drift=%d) ---%n",
+ width, DRIFT_2);
+ System.out.printf(" %8s", "cacheMB");
+ for (int p = 0; p < NUM_POLICIES; p++) {
+ System.out.printf(" %12s", POLICY_NAMES[p] + "_tar%");
+ }
+ System.out.println();
+ for (int sizeMb : sizes) {
+ System.out.printf(" %8d", sizeMb);
+ for (int p = 0; p < NUM_POLICIES; p++) {
+ try (ReadOnlyFileStore store = openReadOnly(POLICIES[p], sizeMb)) {
+ SegmentId[] ids = collectDataIds(store);
+ long[][] ignored = new long[MEASURE_2 / EPOCH_OPS_2][];
+ long[] r = runDriftingEpochs(store, ids, width, ignored);
+ long total = r[0];
+ System.out.printf(" %12.1f", total == 0 ? 0.0 : 100.0 * r[3] / total);
+ }
+ }
+ System.out.println();
+ }
+
+ System.out.printf(
+ "%n--- Size sensitivity: Scenario 3 (post-compaction, old-gen=%d new-gen=%d) ---%n",
+ oldGen, poolSize - oldGen);
+ System.out.printf(" %8s", "cacheMB");
+ for (int p = 0; p < NUM_POLICIES; p++) {
+ System.out.printf(" %12s", POLICY_NAMES[p] + "_tar%");
+ }
+ System.out.println();
+ for (int sizeMb : sizes) {
+ System.out.printf(" %8d", sizeMb);
+ for (int p = 0; p < NUM_POLICIES; p++) {
+ try (ReadOnlyFileStore store = openReadOnly(POLICIES[p], sizeMb)) {
+ SegmentId[] ids = collectDataIds(store);
+ long[][] ignored = new long[MEASURE_3 / EPOCH_OPS_3][];
+ long[] r = runCompactionEpochs(store, ids, oldGen, ignored);
+ long total = r[0];
+ System.out.printf(" %12.1f", total == 0 ? 0.0 : 100.0 * r[3] / total);
+ }
+ }
+ System.out.println();
+ }
+ }
+
// -----------------------------------------------------------------------
// Epoch-based runners (one policy at a time)
// -----------------------------------------------------------------------
@@ -441,12 +503,15 @@ private static long[] runCompactionEpochs(ReadOnlyFileStore store, SegmentId[] p
int oldGen, long[][] epochStats) {
int newGen = pool.length - oldGen;
double[] oldCdf = buildZipfCdf(oldGen, ZIPF_EXP);
- double[] newCdf = buildZipfCdf(newGen, ZIPF_EXP);
+ double[] newCdf = buildZipfCdf(newGen, ZIPF_3_NEW_EXP);
ThreadLocalRandom rng = ThreadLocalRandom.current();
for (int i = 0; i < WARMUP_3; i++) {
pool[zipfSample(oldCdf, rng.nextDouble())].getSegment();
}
+ if (CLEAR_CACHE_ON_COMPACTION) {
+ store.clearSegmentCache();
+ }
long h0 = store.getSegmentCacheStats().getHitCount();
long m0 = store.getSegmentCacheStats().getMissCount();
@@ -501,7 +566,7 @@ private static void printEpochTable(long[][][] policyEpochs, int epochOps, Strin
* @param label policy display name
* @param total total accesses in the window
* @param l1Hits served from SegmentId memoization field — no L2 call made
- * @param l2Hits found in L2 — no loader/disk read (mainly LIRS HIR hits)
+ * @param l2Hits found in L2 — no loader/disk read
* @param tarReads loader invocations — actual disk-read equivalents
* @param elapsedMs wall-clock ms, or -1 to omit timing columns
*/
diff --git a/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/api/package-info.java b/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/api/package-info.java
index 4bd384ffb3e..a79ed0f7b30 100644
--- a/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/api/package-info.java
+++ b/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/api/package-info.java
@@ -19,7 +19,7 @@
* For Oak internal use only. Do not use outside Oak components.
*/
@Internal(since = "1.0.0")
-@Version("1.0.0")
+@Version("1.1.0")
package org.apache.jackrabbit.oak.cache.api;
import org.apache.jackrabbit.oak.commons.annotations.Internal;
diff --git a/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/impl/caffeine/CaffeineCacheAdapter.java b/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/impl/caffeine/CaffeineCacheAdapter.java
index 92510a885e0..31a4b42eb14 100644
--- a/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/impl/caffeine/CaffeineCacheAdapter.java
+++ b/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/impl/caffeine/CaffeineCacheAdapter.java
@@ -20,7 +20,6 @@
import java.util.concurrent.ConcurrentMap;
import java.util.function.Function;
-import com.github.benmanes.caffeine.cache.Policy;
import com.github.benmanes.caffeine.cache.RemovalCause;
import org.apache.jackrabbit.oak.cache.api.CacheStatsSnapshot;
import org.apache.jackrabbit.oak.cache.api.Cache;
@@ -61,11 +60,19 @@ public void invalidate(@NotNull K key) {
@Override
public void invalidateAll() {
cache.invalidateAll();
+ // Caffeine batches removal notifications into a write buffer and drains
+ // them during maintenance, not during invalidateAll() itself. cleanUp()
+ // forces maintenance to run synchronously so every registered eviction
+ // listener fires before this method returns — matching the contract of
+ // Guava's Cache.invalidateAll() and making callers that track derived
+ // state (weight counters, L1 references) consistent immediately.
+ cache.cleanUp();
}
@Override
public void invalidateAll(@NotNull Iterable extends K> keys) {
cache.invalidateAll(keys);
+ cache.cleanUp();
}
@Override
diff --git a/oak-core-spi/src/test/java/org/apache/jackrabbit/oak/cache/impl/caffeine/CaffeineCacheAdapterTest.java b/oak-core-spi/src/test/java/org/apache/jackrabbit/oak/cache/impl/caffeine/CaffeineCacheAdapterTest.java
index cda9f1ff4da..0b661c411c2 100644
--- a/oak-core-spi/src/test/java/org/apache/jackrabbit/oak/cache/impl/caffeine/CaffeineCacheAdapterTest.java
+++ b/oak-core-spi/src/test/java/org/apache/jackrabbit/oak/cache/impl/caffeine/CaffeineCacheAdapterTest.java
@@ -17,9 +17,11 @@
package org.apache.jackrabbit.oak.cache.impl.caffeine;
import java.util.Arrays;
+import java.util.concurrent.atomic.AtomicInteger;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.RemovalCause;
+import org.apache.jackrabbit.oak.cache.api.CacheBuilder;
import org.apache.jackrabbit.oak.cache.api.CacheStatsSnapshot;
import org.apache.jackrabbit.oak.cache.api.EvictionCause;
import org.junit.Assert;
@@ -50,6 +52,58 @@ public void statsSnapshotReflectsUnderlyingCacheStats() {
Assert.assertEquals(1, stats.missCount());
}
+ /**
+ * Verifies that eviction listeners registered via {@link CacheBuilder#evictionListener} fire
+ * synchronously during {@link CaffeineCacheAdapter#invalidateAll()}.
+ *
+ * Oak {@link CacheBuilder} configures {@code executor(Runnable::run)} for caches without
+ * {@code refreshAfterWrite}, so removal listeners usually run during {@code invalidateAll()}
+ * even without an explicit {@code cleanUp()} in this adapter. The adapter still calls
+ * {@code cleanUp()} to guarantee that contract for every backing Caffeine instance and to
+ * drain any buffered maintenance work before returning.
+ */
+ @Test
+ public void evictionListenerFiresForAllEntriesDuringInvalidateAll() {
+ AtomicInteger listenerCallCount = new AtomicInteger(0);
+ org.apache.jackrabbit.oak.cache.api.Cache cache =
+ CacheBuilder.newBuilder()
+ .maximumSize(100)
+ .evictionListener((k, v, cause) -> listenerCallCount.incrementAndGet())
+ .build();
+
+ cache.put("a", "1");
+ cache.put("b", "2");
+ cache.put("c", "3");
+
+ cache.invalidateAll();
+
+ Assert.assertEquals("eviction listener must fire for every entry during invalidateAll()",
+ 3, listenerCallCount.get());
+ }
+
+ /**
+ * Verifies the same guarantee for {@link CaffeineCacheAdapter#invalidateAll(Iterable)}.
+ */
+ @Test
+ public void evictionListenerFiresForRequestedEntriesDuringInvalidateAllIterable() {
+ AtomicInteger listenerCallCount = new AtomicInteger(0);
+ org.apache.jackrabbit.oak.cache.api.Cache cache =
+ CacheBuilder.newBuilder()
+ .maximumSize(100)
+ .evictionListener((k, v, cause) -> listenerCallCount.incrementAndGet())
+ .build();
+
+ cache.put("a", "1");
+ cache.put("b", "2");
+ cache.put("c", "3");
+
+ cache.invalidateAll(Arrays.asList("a", "c"));
+
+ Assert.assertEquals("eviction listener must fire for each invalidated key",
+ 2, listenerCallCount.get());
+ Assert.assertNotNull("non-invalidated entry must still be present", cache.getIfPresent("b"));
+ }
+
@Test
public void invalidateAllIterableRemovesOnlyRequestedKeys() {
CaffeineCacheAdapter adapter =
diff --git a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/PersistentDiskCache.java b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/PersistentDiskCache.java
index 25687239380..3a42b748c19 100644
--- a/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/PersistentDiskCache.java
+++ b/oak-segment-remote/src/main/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/PersistentDiskCache.java
@@ -105,6 +105,9 @@ public PersistentDiskCache(File directory, int cacheMaxSizeMB, DiskCacheIOMonito
if (!directory.exists()) {
directory.mkdirs();
}
+ // Seed the counter from actual disk state so restarts don't reset it to 0
+ // while old segments are still on disk, which would prevent cleanup from running.
+ cacheSize.set(FileUtils.sizeOfDirectory(directory));
segmentCacheStats = new SegmentCacheStats(
NAME,
@@ -210,6 +213,11 @@ public void writeSegment(long msb, long lsb, Buffer buffer) {
executor.execute(task);
}
+ /** Returns the in-memory cacheSize counter. Package-private for testing. */
+ long getCacheSizeForTesting() {
+ return cacheSize.get();
+ }
+
private boolean isCacheFull() {
return cacheSize.get() >= maxCacheSizeBytes;
}
@@ -246,10 +254,19 @@ private void cleanUpInternal() {
}
return;
}
- long cacheSizeAfter = cacheSize.addAndGet(-length);
- diskCacheIOMonitor.updateCacheSize(cacheSizeAfter, -length);
- segment.delete();
- evictionCount.incrementAndGet();
+ // Delete before decrementing: if another thread races to re-write
+ // this file between a decrement and the delete, the write increments
+ // the counter while our decrement already fired, inflating cacheSize.
+ // Temp files are never counted in cacheSize (the counter is only
+ // incremented after the atomic rename to the final segment path), so
+ // deleting a stale temp file must not decrement the counter.
+ if (segment.delete()) {
+ if (!segmentCacheEntry.isTempFile()) {
+ long cacheSizeAfter = cacheSize.addAndGet(-length);
+ diskCacheIOMonitor.updateCacheSize(cacheSizeAfter, -length);
+ }
+ evictionCount.incrementAndGet();
+ }
} else {
breaker.stop();
}
diff --git a/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/PersistentDiskCacheTest.java b/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/PersistentDiskCacheTest.java
index f233b46498c..f4caf9dc4ed 100644
--- a/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/PersistentDiskCacheTest.java
+++ b/oak-segment-remote/src/test/java/org/apache/jackrabbit/oak/segment/remote/persistentcache/PersistentDiskCacheTest.java
@@ -26,12 +26,16 @@
import org.junit.rules.TemporaryFolder;
import org.mockito.Mockito;
+import org.apache.commons.io.FileUtils;
+
import java.io.File;
import java.io.IOException;
+import java.nio.file.Files;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Random;
import java.util.UUID;
import static org.junit.Assert.assertEquals;
@@ -132,6 +136,108 @@ public void cleanupTest() throws Exception {
assertEquals("Segment(s) not cleaned up in cache", 0, SEGMENTS - errors.get());
}
+ /**
+ * Reproduces the Fix-A bug: writeSegment() always called cacheSize.addAndGet(fileSize) even
+ * when the segment file already existed on disk. Every Caffeine L2-eviction followed by a
+ * re-request caused a re-write of the same file, adding to the counter without adding new
+ * bytes on disk. Over time this drove cacheSize to ~80 GB while the actual disk held only
+ * 19.6 GB, making isCacheFull() permanently true and collapsing the disk-cache hit rate.
+ */
+ @Test
+ public void testCacheSizeNotInflatedOnReWrite() throws Exception {
+ persistentCache.close();
+ File cacheDir = temporaryFolder.newFolder();
+ persistentCache = new PersistentDiskCache(cacheDir, 10 * 1024, new DiskCacheIOMonitor(StatisticsProvider.NOOP));
+
+ TestSegment segment = TestSegment.createSegment();
+ long[] id = segment.getSegmentId();
+
+ // Write the same segment 5 times to simulate repeated L2 eviction + re-read
+ for (int i = 0; i < 5; i++) {
+ persistentCache.writeSegment(id[0], id[1], segment.getSegmentBuffer());
+ }
+ waitWhile(() -> persistentCache.getWritesPending() > 0);
+ Thread.sleep(100);
+
+ // cacheSize counter must equal actual disk usage — not 5× the segment size
+ long cacheSizeCounter = ((PersistentDiskCache) persistentCache).getCacheSizeForTesting();
+ File segmentFile = new File(cacheDir, new UUID(id[0], id[1]).toString());
+ assertEquals("cacheSize inflated by repeated writes of the same segment",
+ segmentFile.length(), cacheSizeCounter);
+ }
+
+ /**
+ * Reproduces the Fix-C bug: cacheSize was initialized to 0 on startup regardless of segments
+ * already present on disk from a previous session. The counter therefore under-reported disk
+ * usage, isCacheFull() stayed false longer than it should, and cleanup did not run to evict
+ * old files — allowing disk usage to silently grow past the configured maximum.
+ */
+ @Test
+ public void testCacheSizeInitializedFromExistingFiles() throws Exception {
+ persistentCache.close();
+ File cacheDir = temporaryFolder.newFolder();
+
+ // Pre-populate the directory to simulate a restarted instance with leftover segments
+ byte[] data = new byte[4096];
+ new Random().nextBytes(data);
+ Files.write(new File(cacheDir, UUID.randomUUID().toString()).toPath(), data);
+ long expectedSize = data.length;
+
+ persistentCache = new PersistentDiskCache(cacheDir, 10 * 1024, new DiskCacheIOMonitor(StatisticsProvider.NOOP));
+
+ assertEquals("cacheSize should reflect existing files so isCacheFull() is accurate after restart",
+ expectedSize, ((PersistentDiskCache) persistentCache).getCacheSizeForTesting());
+ }
+
+ /**
+ * Reproduces the Fix-B bug: cleanUpInternal() decremented cacheSize before deleting
+ * the file. In the window between the decrement and the actual delete a concurrent
+ * writeSegment task could replace the file and increment cacheSize back, then the cleanup
+ * delete removed the newly-written file. The net effect was one phantom increment per race
+ * occurrence — under high concurrent write load this drove cacheSize far above the real
+ * on-disk bytes.
+ *
+ * The test runs {@value AbstractPersistentCacheTest#THREADS} writer threads against a
+ * 1 MB cache, forcing cleanup to fire continuously and maximise the probability of the race.
+ * After all work drains, the in-memory counter must equal the actual directory size.
+ */
+ @Test
+ public void testCacheSizeConsistentUnderConcurrentWriteAndCleanup() throws Exception {
+ persistentCache.close();
+ File cacheDir = temporaryFolder.newFolder();
+ // 1 MB max with 0 ms temp-file grace so cleanup fires after every few writes
+ persistentCache = new PersistentDiskCache(cacheDir, 1, new DiskCacheIOMonitor(StatisticsProvider.NOOP), 0);
+
+ runConcurrently((nThread, nSegment) -> {
+ TestSegment segment = TestSegment.createSegment();
+ long[] id = segment.getSegmentId();
+ try {
+ persistentCache.writeSegment(id[0], id[1], segment.getSegmentBuffer());
+ } catch (Throwable t) {
+ errors.incrementAndGet();
+ } finally {
+ done.incrementAndGet();
+ }
+ });
+
+ waitWhile(() -> done.get() < SEGMENTS);
+ waitWhile(() -> persistentCache.getWritesPending() > 0);
+ waitWhile(() -> ((PersistentDiskCache) persistentCache).cleanupInProgress.get());
+
+ assertEquals("Errors during concurrent writes", 0, errors.get());
+ assertNoTimeout();
+
+ // One final explicit cleanup pass to drain any in-flight work
+ persistentCache.cleanUp();
+ waitWhile(() -> ((PersistentDiskCache) persistentCache).cleanupInProgress.get());
+
+ long counter = ((PersistentDiskCache) persistentCache).getCacheSizeForTesting();
+ long onDisk = FileUtils.sizeOfDirectory(cacheDir);
+ assertEquals(
+ "cacheSize counter must equal actual on-disk bytes after concurrent write+cleanup",
+ onDisk, counter);
+ }
+
@Test
public void testIOMonitor() throws IOException {
DiskCacheIOMonitor ioMonitorAdapter = Mockito.mock(DiskCacheIOMonitor.class);
diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java
index aa20999c67c..88513c4ae92 100644
--- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java
+++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java
@@ -22,7 +22,6 @@
import static java.util.Objects.requireNonNull;
import static org.apache.jackrabbit.oak.segment.CacheWeights.segmentWeight;
-import java.time.Duration;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionException;
@@ -42,6 +41,7 @@
import org.apache.jackrabbit.oak.cache.api.CacheStatsSnapshot;
import org.apache.jackrabbit.oak.cache.api.EvictionCause;
import org.apache.jackrabbit.oak.segment.CacheWeights.SegmentCacheWeigher;
+import org.apache.jackrabbit.oak.spi.toggle.FeatureToggle;
import org.jetbrains.annotations.NotNull;
/**
@@ -53,8 +53,12 @@
* level cache is implemented by memoising the segment in its id (see {@code
* SegmentId#segment}. Every time an segment is evicted from this cache the
* memoised segment is discarded (see {@code SegmentId#onAccess}). On an L1 hit,
+<<<<<<< HEAD
* {@link #recordHit(SegmentId)} records L1 hits in {@link #getCacheStats()} and, when enabled,
* touches L2 so eviction policies see the access.
+=======
+ * {@link #recordHit(SegmentId)} notifies L2 so eviction policies see the access.
+>>>>>>> 3fcfdaa256 (OAK-12210 : benchmark cleanup and cache bug fixes)
*/
public abstract class SegmentCache {
@@ -68,18 +72,13 @@ public abstract class SegmentCache {
/**
* Eviction policy used by {@link NonEmptyCache}.
*
- * The default is {@link #CAFFEINE}. {@link #LIRS} selects the
- * {@link org.apache.jackrabbit.oak.cache.CacheLIRS} implementation,
- * which was the segment-cache backend before the Caffeine migration
- * (see OAK-XXXXX). Useful for A/B testing or benchmarking.
+ * The default is {@link #CAFFEINE}. {@link #GUAVA} selects the original
+ * Guava LRU implementation. Useful for A/B testing or benchmarking.
*/
public enum SegmentCachePolicy {
/** Caffeine W-TinyLFU — current default. */
CAFFEINE,
- /** Caffeine W-TinyLFU with 30-second expiry-after-access — for benchmarking TTL impact. */
- CAFFEINE_WITH_EXPIRY,
- /** Oak CacheLIRS — pre-migration baseline. */
- LIRS,
+ // TODO : remove me after next release (only added for benchmark tests)
/** Guava LRU — original SegmentCache backend, before the LIRS migration. */
GUAVA
}
@@ -134,7 +133,10 @@ public abstract Segment getSegment(@NotNull SegmentId id, @NotNull Callable buildCache(long maximumWeight, SegmentCachePolicy policy) {
switch (policy) {
- case LIRS:
- org.apache.jackrabbit.oak.cache.CacheLIRS.EvictionCallback lirsCallback =
- (key, value, cause) -> this.onRemove(key, value,
- org.apache.jackrabbit.oak.cache.CacheLIRS.toOakCause(cause));
- org.apache.jackrabbit.oak.cache.CacheLIRS lirs =
- org.apache.jackrabbit.oak.cache.CacheLIRS
- .newBuilder()
- .maximumWeight(maximumWeight)
- .weigher((key, value) -> segmentWeight(value))
- .evictionCallback(lirsCallback)
- .build();
- return lirs.asManualCache();
case GUAVA:
return buildGuavaCache(maximumWeight);
case CAFFEINE:
- return CacheBuilder.newBuilder()
- .maximumWeight(maximumWeight)
- .weigher(new SegmentCacheWeigher())
- .evictionListener(this::onRemove)
- .build();
- case CAFFEINE_WITH_EXPIRY:
default:
return CacheBuilder.newBuilder()
.maximumWeight(maximumWeight)
.weigher(new SegmentCacheWeigher())
- .expireAfterAccess(Duration.ofSeconds(30))
.evictionListener(this::onRemove)
.build();
}
}
- @SuppressWarnings("unchecked")
private Cache buildGuavaCache(long maximumWeight) {
org.apache.jackrabbit.guava.common.cache.Cache guava =
org.apache.jackrabbit.guava.common.cache.CacheBuilder.newBuilder()
@@ -320,7 +302,15 @@ public void putSegment(@NotNull Segment segment) {
@Override
public void clear() {
+ // CaffeineCacheAdapter.invalidateAll() calls cleanUp() internally so
+ // onRemove() fires for every entry before this returns, clearing L1
+ // (key.unloaded()) and decrementing currentWeight for each entry.
+ // The set(0) below is a safety net: any SIZE-eviction that was already
+ // pending in Caffeine's write buffer before this call will also fire
+ // during cleanUp() and could double-decrement a weight that was already
+ // subtracted by the EXPLICIT removal notification.
cache.invalidateAll();
+ stats.currentWeight.set(0);
}
@Override
diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java
index 9cc93885a09..715fbe9847c 100644
--- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java
+++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java
@@ -192,6 +192,15 @@ public CacheStatsMBean getSegmentCacheStats() {
return segmentCache.getCacheStats();
}
+ /**
+ * Evicts all entries from the in-memory segment cache without affecting the
+ * Count-Min sketch frequency counts. Called after successful compaction so that
+ * old-generation incumbents no longer block new-generation admission.
+ */
+ public void clearSegmentCache() {
+ segmentCache.clear();
+ }
+
@Nullable
public CacheStatsMBean getPersistentCacheStats() {
return persistentCache == null ? null : persistentCache.getCacheStats();
diff --git a/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java b/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java
index 8e228bddc3f..e8a3746beb7 100644
--- a/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java
+++ b/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java
@@ -85,6 +85,18 @@ public void putTest() throws ExecutionException {
assertEquals(segment1, cache.getSegment(id1, () -> failToLoad(id1)));
}
+ @Test
+ public void putSegmentDoesNotInflateWeightOrElementCount() throws ExecutionException {
+ AbstractCacheStats stats = cache.getCacheStats();
+ cache.putSegment(segment1);
+ cache.cleanUp();
+ assertEquals(33, stats.estimateCurrentWeight());
+ assertEquals(1, stats.getElementCount());
+ // Entry must still be accessible via L1 and L2
+ assertEquals(segment1, id1.getSegment());
+ assertEquals(segment1, cache.getSegment(id1, () -> failToLoad(id1)));
+ }
+
@Test
public void getSegmentWrapsCheckedLoaderFailureInExecutionException() {
Exception failure = new Exception("load failed");
@@ -139,6 +151,52 @@ public void invalidateTests() throws ExecutionException {
assertEquals(segment1, cache.getSegment(id1, () -> failToLoad(id1)));
}
+ /**
+ * Reproduces the Caffeine-specific clear() bug: Caffeine's evictionListener fires only for
+ * size/time evictions, not for explicit invalidateAll(). The old clear() implementation
+ * delegated entirely to cache.invalidateAll(), so entries not in Caffeine's pending-eviction
+ * queue kept their L1 (SegmentId.segment) references after the call. Subsequent reads then
+ * returned stale segment data from L1 instead of going through the loader, bypassing the
+ * post-compaction reload path that ensures correct segment data.
+ */
+ @Test
+ public void clearUnloadsAllSegmentIdsFromL1() throws ExecutionException {
+ cache.getSegment(id1, () -> segment1);
+ cache.getSegment(id2, () -> segment2);
+
+ // Verify both are memoised in L1
+ assertEquals(segment1, id1.getSegment());
+ assertEquals(segment2, id2.getSegment());
+
+ cache.clear();
+
+ // L1 must be null for ALL entries — not only those Caffeine's evictionListener
+ // happened to fire for during invalidateAll().
+ expect(SegmentNotFoundException.class, id1::getSegment);
+ expect(SegmentNotFoundException.class, id2::getSegment);
+ }
+
+ /**
+ * Reproduces the stats.currentWeight inflation caused by the same Caffeine clear() bug:
+ * because evictionListener was not called for explicitly-invalidated entries, the weight
+ * decrements in onRemove() never ran, leaving currentWeight stuck at the pre-clear value.
+ * Subsequent putSegment() calls added to an already-inflated counter, eventually causing
+ * spurious size-based evictions and incorrect occupancy metrics.
+ */
+ @Test
+ public void clearResetsCurrentWeightToZeroForAllEntries() throws ExecutionException {
+ cache.getSegment(id1, () -> segment1); // contributes weight 33 (32 overhead + 1)
+ cache.getSegment(id2, () -> segment2); // contributes weight 34 (32 overhead + 2)
+ assertEquals(67, cache.getCacheStats().estimateCurrentWeight());
+
+ cache.clear();
+
+ // currentWeight must be 0: without the explicit stats.currentWeight.set(0) at the
+ // end of clear(), entries whose evictionListener was skipped kept their weight in
+ // the counter and inflated it across compaction cycles.
+ assertEquals(0, cache.getCacheStats().estimateCurrentWeight());
+ }
+
@Test
public void evictionDuringPut() throws ExecutionException {
cache.putSegment(segment3);
From 7eb70a6a12899087c9038a192dd175d3c2ad016b Mon Sep 17 00:00:00 2001
From: rishabhdaim
Date: Fri, 15 May 2026 23:53:13 +0530
Subject: [PATCH 07/15] OAK-12210 : add missing coverage for
FT_NOTIFY_L2_ON_L1_HIT toggle and GUAVA policy
Two branches in SegmentCache were untested:
- recordHit() path when FT_NOTIFY_L2_ON_L1_HIT is disabled (stats still counted)
- newSegmentCache(long, GUAVA) path: GuavaCacheAdapter + buildGuavaCache
Co-Authored-By: Claude Sonnet 4.6
---
.../oak/segment/SegmentCacheTest.java | 37 +++++++++++++++++++
1 file changed, 37 insertions(+)
diff --git a/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java b/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java
index e8a3746beb7..31496380e41 100644
--- a/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java
+++ b/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java
@@ -338,6 +338,43 @@ public void hotSegmentEvictedWithoutL2Notification() throws ExecutionException {
}
}
+ /**
+ * When {@link SegmentCache#FT_NOTIFY_L2_ON_L1_HIT} is disabled, L1 hits must still
+ * be counted in {@link AbstractCacheStats#getHitCount()} even though {@code getIfPresent}
+ * is skipped — the stats branch runs regardless of the L2-notify branch.
+ */
+ @Test
+ public void recordHitSkipsL2NotifyWhenToggleDisabled() throws ExecutionException {
+ SegmentCache.FT_NOTIFY_L2_ON_L1_HIT.setEnabled(false);
+ try {
+ cache.getSegment(id1, () -> segment1);
+ assertEquals(segment1, id1.getSegment());
+ assertEquals(1, cache.getCacheStats().getHitCount());
+ } finally {
+ SegmentCache.FT_NOTIFY_L2_ON_L1_HIT.setEnabled(true);
+ }
+ }
+
+ /**
+ * Smoke test for the {@link SegmentCache.SegmentCachePolicy#GUAVA} backend: put, L1 hit,
+ * L2 get, and clear all work correctly with the Guava-backed {@code NonEmptyCache}.
+ */
+ @Test
+ public void guavaPolicyCachesAndClearsLikeDefault() throws ExecutionException {
+ SegmentCache guava = newSegmentCache(DEFAULT_SEGMENT_CACHE_MB, SegmentCache.SegmentCachePolicy.GUAVA);
+ SegmentId gId = new SegmentId(EMPTY_STORE, 0x000000000000000aL, 0xa00000000000000aL, guava::recordHit);
+ Segment gSeg = mock(Segment.class);
+ when(gSeg.getSegmentId()).thenReturn(gId);
+ when(gSeg.estimateMemoryUsage()).thenReturn(1);
+
+ guava.getSegment(gId, () -> gSeg);
+ assertEquals(gSeg, gId.getSegment());
+ assertEquals(gSeg, guava.getSegment(gId, () -> failToLoad(gId)));
+
+ guava.clear();
+ expect(SegmentNotFoundException.class, gId::getSegment);
+ }
+
@Test
public void nonEmptyCacheStatsTest() throws Exception {
AbstractCacheStats stats = cache.getCacheStats();
From dfcef45018ad7ebf8b961649c45348c317a5ade0 Mon Sep 17 00:00:00 2001
From: rishabhdaim
Date: Tue, 19 May 2026 09:49:29 +0530
Subject: [PATCH 08/15] OAK-12210 : revert Lirs adaptor changes
---
.../org/apache/jackrabbit/oak/cache/CacheLIRS.java | 14 --------------
.../cache/impl/caffeine/CaffeineCacheAdapter.java | 9 +--------
.../oak/cache/impl/lirs/LirsCacheAdapter.java | 4 ++--
3 files changed, 3 insertions(+), 24 deletions(-)
diff --git a/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/CacheLIRS.java b/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/CacheLIRS.java
index 6ec251bc216..b80185c0dbb 100644
--- a/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/CacheLIRS.java
+++ b/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/CacheLIRS.java
@@ -39,7 +39,6 @@
import org.apache.jackrabbit.guava.common.collect.ImmutableMap;
import org.apache.jackrabbit.guava.common.util.concurrent.ListenableFuture;
import org.apache.jackrabbit.oak.cache.api.EvictionCause;
-import org.apache.jackrabbit.oak.cache.impl.lirs.LirsCacheAdapter;
import org.apache.jackrabbit.oak.cache.impl.lirs.LirsLoadingCacheAdapter;
import org.apache.jackrabbit.oak.commons.annotations.Internal;
import org.jetbrains.annotations.NotNull;
@@ -1788,19 +1787,6 @@ public org.apache.jackrabbit.oak.cache.api.LoadingCache asOakCache() {
return new LirsLoadingCacheAdapter<>(this);
}
- /**
- * Exposes this CacheLIRS instance through the Oak manual-cache API.
- * Unlike {@link #asOakCache()}, this variant does not require a loader and
- * supports the {@link org.apache.jackrabbit.oak.cache.api.Cache#get(Object, java.util.function.Function)}
- * mapping-function contract used by most Oak caches.
- *
- * @return a Cache-backed Oak view of this cache
- */
- @NotNull
- public org.apache.jackrabbit.oak.cache.api.Cache asManualCache() {
- return new LirsCacheAdapter<>(this);
- }
-
/**
* Maps a {@link RemovalCause} to the Oak-neutral {@link EvictionCause}.
*
diff --git a/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/impl/caffeine/CaffeineCacheAdapter.java b/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/impl/caffeine/CaffeineCacheAdapter.java
index 31a4b42eb14..92510a885e0 100644
--- a/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/impl/caffeine/CaffeineCacheAdapter.java
+++ b/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/impl/caffeine/CaffeineCacheAdapter.java
@@ -20,6 +20,7 @@
import java.util.concurrent.ConcurrentMap;
import java.util.function.Function;
+import com.github.benmanes.caffeine.cache.Policy;
import com.github.benmanes.caffeine.cache.RemovalCause;
import org.apache.jackrabbit.oak.cache.api.CacheStatsSnapshot;
import org.apache.jackrabbit.oak.cache.api.Cache;
@@ -60,19 +61,11 @@ public void invalidate(@NotNull K key) {
@Override
public void invalidateAll() {
cache.invalidateAll();
- // Caffeine batches removal notifications into a write buffer and drains
- // them during maintenance, not during invalidateAll() itself. cleanUp()
- // forces maintenance to run synchronously so every registered eviction
- // listener fires before this method returns — matching the contract of
- // Guava's Cache.invalidateAll() and making callers that track derived
- // state (weight counters, L1 references) consistent immediately.
- cache.cleanUp();
}
@Override
public void invalidateAll(@NotNull Iterable extends K> keys) {
cache.invalidateAll(keys);
- cache.cleanUp();
}
@Override
diff --git a/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/impl/lirs/LirsCacheAdapter.java b/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/impl/lirs/LirsCacheAdapter.java
index 081d8bb752c..02fbf7f44c9 100644
--- a/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/impl/lirs/LirsCacheAdapter.java
+++ b/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/impl/lirs/LirsCacheAdapter.java
@@ -36,11 +36,11 @@
* API: runtime failures propagate directly and checked loader failures are
* wrapped in {@link CompletionException}.
*/
-public class LirsCacheAdapter implements Cache {
+class LirsCacheAdapter implements Cache {
private final CacheLIRS cache;
- public LirsCacheAdapter(CacheLIRS cache) {
+ LirsCacheAdapter(CacheLIRS cache) {
this.cache = cache;
}
From fbb8b64fae15c8d08d6ba9eead37119900e4da0f Mon Sep 17 00:00:00 2001
From: rishabhdaim
Date: Tue, 19 May 2026 09:50:43 +0530
Subject: [PATCH 09/15] OAK-12210 : reverted package-info changes
---
.../java/org/apache/jackrabbit/oak/cache/api/package-info.java | 2 +-
.../main/java/org/apache/jackrabbit/oak/cache/package-info.java | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/api/package-info.java b/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/api/package-info.java
index a79ed0f7b30..4bd384ffb3e 100644
--- a/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/api/package-info.java
+++ b/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/api/package-info.java
@@ -19,7 +19,7 @@
* For Oak internal use only. Do not use outside Oak components.
*/
@Internal(since = "1.0.0")
-@Version("1.1.0")
+@Version("1.0.0")
package org.apache.jackrabbit.oak.cache.api;
import org.apache.jackrabbit.oak.commons.annotations.Internal;
diff --git a/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/package-info.java b/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/package-info.java
index 8205ebfbdb7..61515af957e 100644
--- a/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/package-info.java
+++ b/oak-core-spi/src/main/java/org/apache/jackrabbit/oak/cache/package-info.java
@@ -19,7 +19,7 @@
* For Oak internal use only. Do not use outside Oak components.
*/
@Internal(since = "1.1.1")
-@Version("2.2")
+@Version("2.1")
package org.apache.jackrabbit.oak.cache;
import org.apache.jackrabbit.oak.commons.annotations.Internal;
From 479119c5e4a7398332124ae5e6bc609dd8c4a1bf Mon Sep 17 00:00:00 2001
From: rishabhdaim
Date: Tue, 19 May 2026 09:56:25 +0530
Subject: [PATCH 10/15] OAK-12210 : reverted clearCacheOnCompaction changes
---
.../SegmentCacheMemoizationBenchmark.java | 15 -------------
.../SegmentCachePolicyBenchmark.java | 21 -------------------
.../benchmark/SegmentCacheTarBenchmark.java | 16 +-------------
.../oak/segment/file/AbstractFileStore.java | 9 --------
4 files changed, 1 insertion(+), 60 deletions(-)
diff --git a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheMemoizationBenchmark.java b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheMemoizationBenchmark.java
index 4c44565929b..f5ace7954fe 100644
--- a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheMemoizationBenchmark.java
+++ b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheMemoizationBenchmark.java
@@ -110,15 +110,6 @@ public class SegmentCacheMemoizationBenchmark extends AbstractTest {
};
private static final String[] POLICY_NAMES = {"CAFFEINE", "GUAVA"};
private static final int NUM_POLICIES = POLICIES.length;
- /**
- * Set {@code -Doak.benchmark.clearCacheOnCompaction=true} to clear the segment cache
- * between the old-gen warmup and new-gen measurement phases of Scenario 2, simulating
- * the JIRA-4 fix. Default is {@code false}: old-gen incumbents at freq=15 block
- * new-gen admission and the freeze is visible in per-epoch TAR-read%.
- */
- private static final boolean CLEAR_CACHE_ON_COMPACTION =
- Boolean.getBoolean("oak.benchmark.clearCacheOnCompaction");
-
// ----- live Scenario 1 state (used by runTest / statsValues) -----
private double[] liveCdf;
private Random liveRng;
@@ -230,9 +221,6 @@ private void runScenario2(int cacheSizeMb) {
" Caffeine: ~40%+ TAR-read% initially, self-corrects after ~30K ops; Guava: ~27% steady.");
System.out.println(
" After convergence: Caffeine ~20% vs Guava ~24% — W-TinyLFU wins long-term.");
- System.out.printf(
- " Fix: -Doak.benchmark.clearCacheOnCompaction=true (JIRA-4) eliminates the freeze;"
- + " both start at ~27%%.%n");
Segment[] pool2 = createSegmentPool(OLD_GEN_2 + NEW_GEN_2);
long[][][] epochs = new long[NUM_POLICIES][][];
long[][] totals = new long[NUM_POLICIES][];
@@ -448,9 +436,6 @@ private static long[] runCompactionColdStart(CacheSetup setup, List epoc
for (int i = 0; i < WARMUP_2; i++) {
setup.access(zipfSample(oldCdf, r.nextDouble()));
}
- if (CLEAR_CACHE_ON_COMPACTION) {
- setup.cache.clear();
- }
setup.snapshotAndReset(); // discard warmup counts + reset eviction baseline
long totTotal = 0, totL1 = 0, totTar = 0, totEvict = 0;
diff --git a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java
index ac334c5ec50..ea042933712 100644
--- a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java
+++ b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java
@@ -215,16 +215,6 @@ public class SegmentCachePolicyBenchmark extends AbstractTest {
};
private static final String[] POLICY_NAMES = {"CAFFEINE", "GUAVA"};
private static final int NUM_POLICIES = POLICIES.length;
- /**
- * Set {@code -Doak.benchmark.clearCacheOnCompaction=true} to clear the segment cache
- * between the old-gen warmup and new-gen measurement phases of Scenario K, simulating
- * what {@code AbstractCompactionStrategy} does when JIRA-4's fix is in place.
- * Default is {@code false}: the freeze is visible because old-gen incumbents at freq=15
- * block new-gen admission (W-TinyLFU auto-rejects candidates with freq ≤ 5).
- */
- private static final boolean CLEAR_CACHE_ON_COMPACTION =
- Boolean.getBoolean("oak.benchmark.clearCacheOnCompaction");
-
// ----- live Scenario A state -----
private double[] zipfCdf;
private Random rng;
@@ -498,9 +488,6 @@ protected void afterSuite() {
" Caffeine: ~40%+ miss% initially, self-corrects after ~30K ops; Guava: ~27% steady.");
System.out.println(
" After convergence: Caffeine ~20% vs Guava ~24% — W-TinyLFU wins long-term.");
- System.out.printf(
- " Fix: -Doak.benchmark.clearCacheOnCompaction=true (JIRA-4) eliminates the freeze;"
- + " both start at ~27%%.%n");
long[][][] epochsK = new long[NUM_POLICIES][][];
long[][] totalsK = new long[NUM_POLICIES][];
for (int p = 0; p < NUM_POLICIES; p++) {
@@ -1071,14 +1058,6 @@ private static long[] runCompactionColdStart(PolicySetup setup, List epo
for (int i = 0; i < WARMUP_K; i++) {
setup.access(zipfSample(oldCdf, r.nextDouble()));
}
- // Optionally simulate the JIRA-4 fix: clearing the cache lets new-gen fill
- // the empty L2 directly, bypassing the admission gate entirely.
- // Without this (-Doak.benchmark.clearCacheOnCompaction=false, the default),
- // old-gen incumbents at freq=15 block new-gen for many epochs.
- if (CLEAR_CACHE_ON_COMPACTION) {
- setup.cache.clear();
- }
-
// Phase 2: compaction — all traffic switches to new-gen (freq=0 in sketch)
long totalHits = 0;
long totalMisses = 0;
diff --git a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheTarBenchmark.java b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheTarBenchmark.java
index 00eede55d2d..088fb0f7cf0 100644
--- a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheTarBenchmark.java
+++ b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheTarBenchmark.java
@@ -100,15 +100,6 @@ public class SegmentCacheTarBenchmark extends AbstractTest {
};
private static final String[] POLICY_NAMES = {"CAFFEINE", "GUAVA"};
private static final int NUM_POLICIES = POLICIES.length;
- /**
- * Set {@code -Doak.benchmark.clearCacheOnCompaction=true} to clear the segment cache
- * between the old-gen warmup and new-gen measurement phases of Scenario 3, simulating
- * the JIRA-4 fix. Default is {@code false}: old-gen incumbents at freq=15 block
- * new-gen admission and the freeze shows up as higher TAR-read% for Caffeine.
- */
- private static final boolean CLEAR_CACHE_ON_COMPACTION =
- Boolean.getBoolean("oak.benchmark.clearCacheOnCompaction");
-
// ----- live-run state -----
private File storeDir;
private int poolSize;
@@ -368,8 +359,7 @@ private void runScenario3() throws IOException, InvalidFileStoreVersionException
+ " (old-gen=%d new-gen=%d warmup=%,d measure=%,d epoch=%,d zipf-new=%.1f) ---%n"
+ " Old-gen saturated to freq=15; new-gen auto-rejected (freq≤5 gate):%n"
+ " Caffeine ~40%%+ TAR-read%% initially, self-corrects after ~30K ops; Guava ~27%% steady.%n"
- + " After convergence: Caffeine ~20%% vs Guava ~24%% — W-TinyLFU wins long-term.%n"
- + " Fix: -Doak.benchmark.clearCacheOnCompaction=true (JIRA-4) eliminates the freeze.%n",
+ + " After convergence: Caffeine ~20%% vs Guava ~24%% — W-TinyLFU wins long-term.%n",
oldGen, newGen, WARMUP_3, MEASURE_3, EPOCH_OPS_3, ZIPF_3_NEW_EXP);
int numEpochs = MEASURE_3 / EPOCH_OPS_3;
long[][][] epochs = new long[NUM_POLICIES][numEpochs][];
@@ -509,10 +499,6 @@ private static long[] runCompactionEpochs(ReadOnlyFileStore store, SegmentId[] p
for (int i = 0; i < WARMUP_3; i++) {
pool[zipfSample(oldCdf, rng.nextDouble())].getSegment();
}
- if (CLEAR_CACHE_ON_COMPACTION) {
- store.clearSegmentCache();
- }
-
long h0 = store.getSegmentCacheStats().getHitCount();
long m0 = store.getSegmentCacheStats().getMissCount();
long totTotal = 0, totL1 = 0, totL2 = 0, totTar = 0;
diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java
index 715fbe9847c..9cc93885a09 100644
--- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java
+++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java
@@ -192,15 +192,6 @@ public CacheStatsMBean getSegmentCacheStats() {
return segmentCache.getCacheStats();
}
- /**
- * Evicts all entries from the in-memory segment cache without affecting the
- * Count-Min sketch frequency counts. Called after successful compaction so that
- * old-generation incumbents no longer block new-generation admission.
- */
- public void clearSegmentCache() {
- segmentCache.clear();
- }
-
@Nullable
public CacheStatsMBean getPersistentCacheStats() {
return persistentCache == null ? null : persistentCache.getCacheStats();
From 2803476d477d5b5c7ec65583d00cc2dc2e7eebb8 Mon Sep 17 00:00:00 2001
From: rishabhdaim
Date: Tue, 19 May 2026 10:29:06 +0530
Subject: [PATCH 11/15] OAK-12210 : removed guava cache changes from
segmentcache to avoid poluting production code
---
.../oak/benchmark/GuavaSegmentCache.java | 198 ++++++++++++++++++
.../SegmentCacheMemoizationBenchmark.java | 30 +--
.../SegmentCachePolicyBenchmark.java | 53 ++---
.../benchmark/SegmentCacheTarBenchmark.java | 30 +--
.../jackrabbit/oak/segment/CacheWeights.java | 10 +-
.../jackrabbit/oak/segment/SegmentCache.java | 166 +--------------
.../jackrabbit/oak/segment/SegmentId.java | 4 +-
.../oak/segment/file/AbstractFileStore.java | 3 +-
.../oak/segment/file/FileStoreBuilder.java | 22 +-
9 files changed, 284 insertions(+), 232 deletions(-)
create mode 100644 oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/GuavaSegmentCache.java
diff --git a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/GuavaSegmentCache.java b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/GuavaSegmentCache.java
new file mode 100644
index 00000000000..d30b454715b
--- /dev/null
+++ b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/GuavaSegmentCache.java
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.benchmark;
+
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Supplier;
+
+import org.apache.jackrabbit.guava.common.cache.CacheBuilder;
+import org.apache.jackrabbit.guava.common.cache.CacheStats;
+import org.apache.jackrabbit.guava.common.cache.RemovalNotification;
+import org.apache.jackrabbit.guava.common.util.concurrent.UncheckedExecutionException;
+import org.apache.jackrabbit.oak.cache.AbstractCacheStats;
+import org.apache.jackrabbit.oak.segment.CacheWeights;
+import org.apache.jackrabbit.oak.segment.Segment;
+import org.apache.jackrabbit.oak.segment.SegmentCache;
+import org.apache.jackrabbit.oak.segment.SegmentId;
+import org.jetbrains.annotations.NotNull;
+
+/**
+ * A {@link SegmentCache} backed by a Guava LRU cache, used in benchmark classes
+ * to compare eviction policies against the default Caffeine W-TinyLFU implementation.
+ * All Guava-specific code lives here; the production {@link SegmentCache} class stays clean.
+ */
+class GuavaSegmentCache extends SegmentCache {
+
+ private static final String NAME = "Segment Cache (Guava)";
+
+ private final org.apache.jackrabbit.guava.common.cache.Cache cache;
+ private final Stats stats;
+
+ GuavaSegmentCache(long cacheSizeMb) {
+ long maximumWeight = cacheSizeMb * 1024L * 1024L;
+ // Build cache first so cache::size can be passed to Stats; the removal listener
+ // references this.stats which is assigned below — safe because evictions only
+ // fire after construction is complete (same pattern as production NonEmptyCache).
+ this.cache = CacheBuilder.newBuilder()
+ .maximumWeight(maximumWeight)
+ .weigher((SegmentId id, Segment seg) -> CacheWeights.segmentWeight(seg))
+ .removalListener(this::onRemove)
+ .build();
+ this.stats = new Stats(NAME, maximumWeight, cache::size);
+ }
+
+ private void onRemove(RemovalNotification n) {
+ stats.evictionCount.incrementAndGet();
+ if (n.getValue() != null) {
+ stats.currentWeight.addAndGet(-CacheWeights.segmentWeight(n.getValue()));
+ }
+ n.getKey().unloaded();
+ }
+
+ @Override
+ @NotNull
+ public Segment getSegment(@NotNull SegmentId id, @NotNull Callable loader)
+ throws ExecutionException {
+ if (id.isDataSegmentId()) {
+ try {
+ return cache.get(id, () -> {
+ long t0 = System.nanoTime();
+ try {
+ Segment segment = loader.call();
+ stats.loadSuccessCount.incrementAndGet();
+ stats.loadTime.addAndGet(System.nanoTime() - t0);
+ stats.missCount.incrementAndGet();
+ stats.currentWeight.addAndGet(CacheWeights.segmentWeight(segment));
+ id.loaded(segment);
+ return segment;
+ } catch (Exception e) {
+ stats.loadExceptionCount.incrementAndGet();
+ if (e instanceof RuntimeException re) throw re;
+ throw new LoaderException(e);
+ }
+ });
+ } catch (UncheckedExecutionException e) {
+ Throwable cause = e.getCause();
+ if (cause instanceof LoaderException le) {
+ throw new ExecutionException(le.getCause());
+ }
+ if (cause instanceof RuntimeException re) throw re;
+ throw e;
+ } catch (ExecutionException e) {
+ throw new ExecutionException(e.getCause());
+ }
+ } else {
+ try {
+ return loader.call();
+ } catch (Exception e) {
+ throw new ExecutionException(e);
+ }
+ }
+ }
+
+ @Override
+ public void putSegment(@NotNull Segment segment) {
+ SegmentId id = segment.getSegmentId();
+ if (id.isDataSegmentId()) {
+ // Update before put() for correct ordering with eviction callback
+ id.loaded(segment);
+ stats.currentWeight.addAndGet(CacheWeights.segmentWeight(segment));
+ cache.put(id, segment);
+ }
+ }
+
+ @Override
+ public void clear() {
+ cache.invalidateAll();
+ stats.currentWeight.set(0);
+ }
+
+ @Override
+ public void cleanUp() {
+ cache.cleanUp();
+ }
+
+ @Override
+ @NotNull
+ public AbstractCacheStats getCacheStats() {
+ return stats;
+ }
+
+ @Override
+ public void recordHit(@NotNull SegmentId id) {
+ if (id.isDataSegmentId()) {
+ if (FT_NOTIFY_L2_ON_L1_HIT.isEnabled()) {
+ cache.getIfPresent(id);
+ }
+ stats.hitCount.incrementAndGet();
+ }
+ }
+
+ private static final class LoaderException extends RuntimeException {
+ LoaderException(Exception cause) {
+ super(cause);
+ }
+ }
+
+ private static final class Stats extends AbstractCacheStats {
+ private final long maximumWeight;
+ private final Supplier elementCount;
+
+ final AtomicLong currentWeight = new AtomicLong();
+ final AtomicLong loadSuccessCount = new AtomicLong();
+ final AtomicInteger loadExceptionCount = new AtomicInteger();
+ final AtomicLong loadTime = new AtomicLong();
+ final AtomicLong evictionCount = new AtomicLong();
+ final AtomicLong hitCount = new AtomicLong();
+ final AtomicLong missCount = new AtomicLong();
+
+ Stats(@NotNull String name, long maximumWeight, @NotNull Supplier elementCount) {
+ super(name);
+ this.maximumWeight = maximumWeight;
+ this.elementCount = elementCount;
+ }
+
+ @Override
+ protected CacheStats getCurrentStats() {
+ return new CacheStats(
+ hitCount.get(),
+ missCount.get(),
+ loadSuccessCount.get(),
+ loadExceptionCount.get(),
+ loadTime.get(),
+ evictionCount.get());
+ }
+
+ @Override
+ public long getElementCount() {
+ return elementCount.get();
+ }
+
+ @Override
+ public long getMaxTotalWeight() {
+ return maximumWeight;
+ }
+
+ @Override
+ public long estimateCurrentWeight() {
+ return currentWeight.get();
+ }
+ }
+}
diff --git a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheMemoizationBenchmark.java b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheMemoizationBenchmark.java
index f5ace7954fe..d65b289b46f 100644
--- a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheMemoizationBenchmark.java
+++ b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheMemoizationBenchmark.java
@@ -36,7 +36,6 @@
import org.apache.jackrabbit.oak.segment.RecordNumbers;
import org.apache.jackrabbit.oak.segment.Segment;
import org.apache.jackrabbit.oak.segment.SegmentCache;
-import org.apache.jackrabbit.oak.segment.SegmentCache.SegmentCachePolicy;
import org.apache.jackrabbit.oak.segment.SegmentId;
import org.apache.jackrabbit.oak.segment.SegmentReferences;
import org.apache.jackrabbit.oak.segment.SegmentStore;
@@ -104,9 +103,14 @@ public class SegmentCacheMemoizationBenchmark extends AbstractTest {
private static final long DATA_SEG_LSB_MASK = 0xa000000000000000L;
- private static final SegmentCachePolicy[] POLICIES = {
- SegmentCachePolicy.CAFFEINE,
- SegmentCachePolicy.GUAVA
+ @FunctionalInterface
+ private interface CacheFactory {
+ SegmentCache create(int cacheSizeMb);
+ }
+
+ private static final CacheFactory[] POLICIES = {
+ SegmentCache::newSegmentCache,
+ GuavaSegmentCache::new
};
private static final String[] POLICY_NAMES = {"CAFFEINE", "GUAVA"};
private static final int NUM_POLICIES = POLICIES.length;
@@ -134,7 +138,7 @@ protected void beforeSuite() {
liveRng = new Random(RANDOM_SEED);
liveSetups = new CacheSetup[NUM_POLICIES];
for (int p = 0; p < NUM_POLICIES; p++) {
- liveSetups[p] = freshSetup(POLICIES[p], POOL_1, CACHE_SIZE_MB);
+ liveSetups[p] = freshSetup(p, POOL_1, CACHE_SIZE_MB);
}
}
@@ -226,7 +230,7 @@ private void runScenario2(int cacheSizeMb) {
long[][] totals = new long[NUM_POLICIES][];
for (int p = 0; p < NUM_POLICIES; p++) {
List epochList = new ArrayList<>();
- CacheSetup setup = freshSetupWithPool(POLICIES[p], pool2, cacheSizeMb);
+ CacheSetup setup = freshSetupWithPool(p, pool2, cacheSizeMb);
totals[p] = runCompactionColdStart(setup, epochList);
epochs[p] = epochList.toArray(new long[0][]);
}
@@ -249,7 +253,7 @@ private void runScenario3(int cacheSizeMb) {
long[][] totals = new long[NUM_POLICIES][];
for (int p = 0; p < NUM_POLICIES; p++) {
List epochList = new ArrayList<>();
- CacheSetup setup = freshSetupWithPool(POLICIES[p], pool3, cacheSizeMb);
+ CacheSetup setup = freshSetupWithPool(p, pool3, cacheSizeMb);
totals[p] = runDriftingWindow(setup, epochList);
epochs[p] = epochList.toArray(new long[0][]);
}
@@ -381,13 +385,13 @@ private static Segment[] createSegmentPool(int n) {
* {@link InstrumentedStore}. Call {@link #createSegmentPool} once and pass the result
* to this method for each policy to avoid accumulating mock objects.
*
- * @param policy the eviction policy to use
+ * @param policyIndex index into {@link #POLICIES}
* @param segs pre-created mock segments (from {@link #createSegmentPool})
* @param cacheSizeMb cache capacity in megabytes
*/
- private static CacheSetup freshSetupWithPool(SegmentCachePolicy policy, Segment[] segs, int cacheSizeMb) {
+ private static CacheSetup freshSetupWithPool(int policyIndex, Segment[] segs, int cacheSizeMb) {
int n = segs.length;
- SegmentCache cache = SegmentCache.newSegmentCache(cacheSizeMb, policy);
+ SegmentCache cache = POLICIES[policyIndex].create(cacheSizeMb);
SegmentId[] ids = new SegmentId[n];
Map segMap = new IdentityHashMap<>(n * 2);
InstrumentedStore store = new InstrumentedStore(cache, segMap);
@@ -409,12 +413,12 @@ private static CacheSetup freshSetupWithPool(SegmentCachePolicy policy, Segment[
/**
* Builds a fresh {@link CacheSetup} with {@code n} new mock segments.
*
- * @param policy the eviction policy to use
+ * @param policyIndex index into {@link #POLICIES}
* @param n number of distinct segments in the pool
* @param cacheSizeMb cache capacity in megabytes
*/
- private static CacheSetup freshSetup(SegmentCachePolicy policy, int n, int cacheSizeMb) {
- return freshSetupWithPool(policy, createSegmentPool(n), cacheSizeMb);
+ private static CacheSetup freshSetup(int policyIndex, int n, int cacheSizeMb) {
+ return freshSetupWithPool(policyIndex, createSegmentPool(n), cacheSizeMb);
}
// -----------------------------------------------------------------------
diff --git a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java
index ea042933712..e546cb0c0ab 100644
--- a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java
+++ b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java
@@ -33,7 +33,6 @@
import org.apache.jackrabbit.oak.segment.RecordNumbers;
import org.apache.jackrabbit.oak.segment.Segment;
import org.apache.jackrabbit.oak.segment.SegmentCache;
-import org.apache.jackrabbit.oak.segment.SegmentCache.SegmentCachePolicy;
import org.apache.jackrabbit.oak.segment.SegmentId;
import org.apache.jackrabbit.oak.segment.SegmentReferences;
import org.apache.jackrabbit.oak.segment.SegmentStore;
@@ -209,9 +208,14 @@ public class SegmentCachePolicyBenchmark extends AbstractTest {
private static final long DATA_SEG_LSB_MASK = 0xa000000000000000L;
- private static final SegmentCachePolicy[] POLICIES = {
- SegmentCachePolicy.CAFFEINE,
- SegmentCachePolicy.GUAVA
+ @FunctionalInterface
+ private interface CacheFactory {
+ SegmentCache create(int cacheSizeMb);
+ }
+
+ private static final CacheFactory[] POLICIES = {
+ SegmentCache::newSegmentCache,
+ GuavaSegmentCache::new
};
private static final String[] POLICY_NAMES = {"CAFFEINE", "GUAVA"};
private static final int NUM_POLICIES = POLICIES.length;
@@ -250,7 +254,7 @@ protected void beforeSuite() {
liveSegs = new Segment[NUM_POLICIES][TOTAL_SEGMENTS];
for (int p = 0; p < NUM_POLICIES; p++) {
totalAccesses[p] = new LongAdder();
- liveCaches[p] = SegmentCache.newSegmentCache(CACHE_SIZE_MB, POLICIES[p]);
+ liveCaches[p] = POLICIES[p].create(CACHE_SIZE_MB);
for (int i = 0; i < TOTAL_SEGMENTS; i++) {
UUID uuid = UUID.randomUUID();
long msb = uuid.getMostSignificantBits();
@@ -314,7 +318,7 @@ protected void afterSuite() {
+ " (warmup=%,d measure=%,d ops) ---%n",
SCAN_LENGTH, POST_SCAN_WARMUP, POST_SCAN_MEASURE);
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetup(p, POLICIES[p], TOTAL_SEGMENTS, CACHE_SIZE_MB);
+ PolicySetup setup = freshSetup(p, TOTAL_SEGMENTS, CACHE_SIZE_MB);
long[] r = runScanThenZipf(setup);
printResult(POLICY_NAMES[p], r[0], r[1], r[2]);
}
@@ -332,7 +336,7 @@ protected void afterSuite() {
long[][] totalsC = new long[NUM_POLICIES][];
for (int p = 0; p < NUM_POLICIES; p++) {
List epochs = new ArrayList<>();
- PolicySetup setup = freshSetup(p, POLICIES[p], SCAN_C + WORKING_SET_C, CACHE_SIZE_MB);
+ PolicySetup setup = freshSetup(p, SCAN_C + WORKING_SET_C, CACHE_SIZE_MB);
totalsC[p] = runColdStart(setup, epochs);
epochsC[p] = epochs.toArray(new long[0][]);
}
@@ -361,7 +365,7 @@ protected void afterSuite() {
System.out.println(
" no hot data — uniform access over pool 25x cache; expected miss ~95%%");
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetup(p, POLICIES[p], UNIFORM_POOL_D, CACHE_SIZE_MB);
+ PolicySetup setup = freshSetup(p, UNIFORM_POOL_D, CACHE_SIZE_MB);
long[] r = runUniformRandom(setup);
printResult(POLICY_NAMES[p], r[0], r[1], r[2]);
}
@@ -374,7 +378,7 @@ protected void afterSuite() {
" warm Zipfian cache hit by burst of new segments;"
+ " measures working-set miss rate after burst subsides");
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetup(p, POLICIES[p], TOTAL_SEGMENTS + BURST_SIZE_E, CACHE_SIZE_MB);
+ PolicySetup setup = freshSetup(p, TOTAL_SEGMENTS + BURST_SIZE_E, CACHE_SIZE_MB);
long[] r = runBurstNewContent(setup);
printResult(POLICY_NAMES[p], r[0], r[1], r[2]);
}
@@ -387,7 +391,7 @@ protected void afterSuite() {
" repeated small scans interleaved with Zipfian;"
+ " cumulative sketch pollution vs LRU recency aging");
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetup(p, POLICIES[p], TOTAL_SEGMENTS, CACHE_SIZE_MB);
+ PolicySetup setup = freshSetup(p, TOTAL_SEGMENTS, CACHE_SIZE_MB);
long[] r = runPeriodicGC(setup);
printResult(POLICY_NAMES[p], r[0], r[1], r[2]);
}
@@ -400,7 +404,7 @@ protected void afterSuite() {
" large sequential import followed by random reads of recently-imported segments;"
+ " recency (LRU) vs frequency (Caffeine) post-import");
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetup(p, POLICIES[p], IMPORT_SIZE_G, CACHE_SIZE_MB);
+ PolicySetup setup = freshSetup(p, IMPORT_SIZE_G, CACHE_SIZE_MB);
long[] r = runImportThenRead(setup);
printResult(POLICY_NAMES[p], r[0], r[1], r[2]);
}
@@ -415,7 +419,7 @@ protected void afterSuite() {
" hot window slides forward; pure recency (LRU) is optimal;"
+ " window > cache forces evictions on every slide");
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetup(p, POLICIES[p], TOTAL_POOL_H, CACHE_SIZE_MB);
+ PolicySetup setup = freshSetup(p, TOTAL_POOL_H, CACHE_SIZE_MB);
long[] r = runSlidingWindow(setup);
printResult(POLICY_NAMES[p], r[0], r[1], r[2]);
}
@@ -432,7 +436,7 @@ protected void afterSuite() {
long[][] totalsI = new long[NUM_POLICIES][];
for (int p = 0; p < NUM_POLICIES; p++) {
List epochs = new ArrayList<>();
- PolicySetup setup = freshSetup(p, POLICIES[p], POOL_I, CACHE_SIZE_MB);
+ PolicySetup setup = freshSetup(p, POOL_I, CACHE_SIZE_MB);
totalsI[p] = runDriftingWindow(setup, epochs);
epochsI[p] = epochs.toArray(new long[0][]);
}
@@ -469,7 +473,7 @@ protected void afterSuite() {
String label = drift == Integer.MAX_VALUE ? "static" : String.valueOf(drift);
System.out.printf(" %-12s", label);
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetup(p, POLICIES[p], POOL_J, CACHE_SIZE_MB);
+ PolicySetup setup = freshSetup(p, POOL_J, CACHE_SIZE_MB);
long[] r = runDriftVariant(setup, drift);
long total = r[0] + r[1];
System.out.printf(" %14.1f", total == 0 ? 0.0 : 100.0 * r[1] / total);
@@ -492,7 +496,7 @@ protected void afterSuite() {
long[][] totalsK = new long[NUM_POLICIES][];
for (int p = 0; p < NUM_POLICIES; p++) {
List epochs = new ArrayList<>();
- PolicySetup setup = freshSetup(p, POLICIES[p], OLD_GEN_K + NEW_GEN_K, CACHE_SIZE_MB);
+ PolicySetup setup = freshSetup(p, OLD_GEN_K + NEW_GEN_K, CACHE_SIZE_MB);
totalsK[p] = runCompactionColdStart(setup, epochs);
epochsK[p] = epochs.toArray(new long[0][]);
}
@@ -537,7 +541,7 @@ private void runSizeSensitivity() {
Segment[] poolI = createSegmentPool(POOL_I);
System.out.printf(" %8d", sizeMb);
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetupWithPool(p, POLICIES[p], poolI, sizeMb);
+ PolicySetup setup = freshSetupWithPool(p, poolI, sizeMb);
long[] r = runDriftingWindow(setup, new ArrayList<>());
long total = r[0] + r[1];
System.out.printf(" %14.1f", total == 0 ? 0.0 : 100.0 * r[1] / total);
@@ -558,7 +562,7 @@ private void runSizeSensitivity() {
Segment[] poolK = createSegmentPool(OLD_GEN_K + NEW_GEN_K);
System.out.printf(" %8d", sizeMb);
for (int p = 0; p < NUM_POLICIES; p++) {
- PolicySetup setup = freshSetupWithPool(p, POLICIES[p], poolK, sizeMb);
+ PolicySetup setup = freshSetupWithPool(p, poolK, sizeMb);
long[] totals = runCompactionColdStart(setup, new ArrayList<>());
long total = totals[0] + totals[1];
System.out.printf(" %14.1f", total == 0 ? 0.0 : 100.0 * totals[1] / total);
@@ -645,15 +649,13 @@ private static Segment[] createSegmentPool(int n) {
* {@link #createSegmentPool} once and pass the result to this method for each policy
* to avoid accumulating mock objects across the size sweep.
*
- * @param policyIndex unused — kept for call-site readability
- * @param policy the cache eviction policy to use
+ * @param policyIndex index into {@link #POLICIES}
* @param segs pre-created mock segments (from {@link #createSegmentPool})
* @param cacheSizeMb cache capacity in megabytes
*/
- private static PolicySetup freshSetupWithPool(int policyIndex, SegmentCachePolicy policy,
- Segment[] segs, int cacheSizeMb) {
+ private static PolicySetup freshSetupWithPool(int policyIndex, Segment[] segs, int cacheSizeMb) {
int n = segs.length;
- SegmentCache cache = SegmentCache.newSegmentCache(cacheSizeMb, policy);
+ SegmentCache cache = POLICIES[policyIndex].create(cacheSizeMb);
SegmentId[] ids = new SegmentId[n];
for (int i = 0; i < n; i++) {
UUID uuid = UUID.randomUUID();
@@ -667,13 +669,12 @@ private static PolicySetup freshSetupWithPool(int policyIndex, SegmentCachePolic
/**
* Builds a fresh {@link PolicySetup} with {@code n} segments.
*
- * @param policyIndex unused — kept for call-site readability
- * @param policy the cache eviction policy to use
+ * @param policyIndex index into {@link #POLICIES}
* @param n number of distinct segments to create
* @param cacheSizeMb cache capacity in megabytes
*/
- private static PolicySetup freshSetup(int policyIndex, SegmentCachePolicy policy, int n, int cacheSizeMb) {
- return freshSetupWithPool(policyIndex, policy, createSegmentPool(n), cacheSizeMb);
+ private static PolicySetup freshSetup(int policyIndex, int n, int cacheSizeMb) {
+ return freshSetupWithPool(policyIndex, createSegmentPool(n), cacheSizeMb);
}
// -----------------------------------------------------------------------
diff --git a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheTarBenchmark.java b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheTarBenchmark.java
index 088fb0f7cf0..4d1a2479fb7 100644
--- a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheTarBenchmark.java
+++ b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheTarBenchmark.java
@@ -30,7 +30,7 @@
import org.apache.jackrabbit.oak.api.CommitFailedException;
import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean;
import org.apache.jackrabbit.oak.fixture.RepositoryFixture;
-import org.apache.jackrabbit.oak.segment.SegmentCache.SegmentCachePolicy;
+import org.apache.jackrabbit.oak.segment.SegmentCache;
import org.apache.jackrabbit.oak.segment.SegmentId;
import org.apache.jackrabbit.oak.segment.SegmentNodeStoreBuilders;
import org.apache.jackrabbit.oak.segment.file.FileStore;
@@ -94,9 +94,14 @@ public class SegmentCacheTarBenchmark extends AbstractTest {
private static final int MEASURE_3 = 300_000;
private static final int EPOCH_OPS_3 = 2_000;
- private static final SegmentCachePolicy[] POLICIES = {
- SegmentCachePolicy.CAFFEINE,
- SegmentCachePolicy.GUAVA
+ @FunctionalInterface
+ private interface CacheFactory {
+ SegmentCache create(int cacheSizeMb);
+ }
+
+ private static final CacheFactory[] POLICIES = {
+ SegmentCache::newSegmentCache,
+ GuavaSegmentCache::new
};
private static final String[] POLICY_NAMES = {"CAFFEINE", "GUAVA"};
private static final int NUM_POLICIES = POLICIES.length;
@@ -178,18 +183,17 @@ private void openLiveStores() throws IOException, InvalidFileStoreVersionExcepti
liveStores = new ReadOnlyFileStore[NUM_POLICIES];
liveIds = new SegmentId[NUM_POLICIES][];
for (int p = 0; p < NUM_POLICIES; p++) {
- ReadOnlyFileStore store = openReadOnly(POLICIES[p], CACHE_SIZE_MB);
+ ReadOnlyFileStore store = openReadOnly(p, CACHE_SIZE_MB);
liveStores[p] = store;
liveIds[p] = collectDataIds(store);
}
}
/** Opens a fresh on-heap {@link ReadOnlyFileStore} with the given policy and cache size. */
- private ReadOnlyFileStore openReadOnly(SegmentCachePolicy policy, int cacheSizeMb)
+ private ReadOnlyFileStore openReadOnly(int policyIndex, int cacheSizeMb)
throws IOException, InvalidFileStoreVersionException {
return FileStoreBuilder.fileStoreBuilder(storeDir)
- .withSegmentCacheSize(cacheSizeMb)
- .withSegmentCachePolicy(policy)
+ .withSegmentCache(POLICIES[policyIndex].create(cacheSizeMb))
.withMemoryMapping(false)
.buildReadOnly();
}
@@ -294,7 +298,7 @@ private void runScenario1() throws IOException, InvalidFileStoreVersionException
WARMUP_OPS, MEASURE_OPS, ZIPF_EXP, CACHE_SIZE_MB);
double[] cdf = buildZipfCdf(poolSize, ZIPF_EXP);
for (int p = 0; p < NUM_POLICIES; p++) {
- try (ReadOnlyFileStore store = openReadOnly(POLICIES[p], CACHE_SIZE_MB)) {
+ try (ReadOnlyFileStore store = openReadOnly(p, CACHE_SIZE_MB)) {
SegmentId[] ids = collectDataIds(store);
int n = ids.length;
ThreadLocalRandom rng = ThreadLocalRandom.current();
@@ -334,7 +338,7 @@ private void runScenario2() throws IOException, InvalidFileStoreVersionException
long[][][] epochs = new long[NUM_POLICIES][numEpochs][];
long[][] totals = new long[NUM_POLICIES][];
for (int p = 0; p < NUM_POLICIES; p++) {
- try (ReadOnlyFileStore store = openReadOnly(POLICIES[p], CACHE_SIZE_MB)) {
+ try (ReadOnlyFileStore store = openReadOnly(p, CACHE_SIZE_MB)) {
SegmentId[] ids = collectDataIds(store);
epochs[p] = new long[numEpochs][];
totals[p] = runDriftingEpochs(store, ids, width, epochs[p]);
@@ -365,7 +369,7 @@ private void runScenario3() throws IOException, InvalidFileStoreVersionException
long[][][] epochs = new long[NUM_POLICIES][numEpochs][];
long[][] totals = new long[NUM_POLICIES][];
for (int p = 0; p < NUM_POLICIES; p++) {
- try (ReadOnlyFileStore store = openReadOnly(POLICIES[p], CACHE_SIZE_MB)) {
+ try (ReadOnlyFileStore store = openReadOnly(p, CACHE_SIZE_MB)) {
SegmentId[] ids = collectDataIds(store);
epochs[p] = new long[numEpochs][];
totals[p] = runCompactionEpochs(store, ids, oldGen, epochs[p]);
@@ -397,7 +401,7 @@ private void runSizeSensitivity() throws IOException, InvalidFileStoreVersionExc
for (int sizeMb : sizes) {
System.out.printf(" %8d", sizeMb);
for (int p = 0; p < NUM_POLICIES; p++) {
- try (ReadOnlyFileStore store = openReadOnly(POLICIES[p], sizeMb)) {
+ try (ReadOnlyFileStore store = openReadOnly(p, sizeMb)) {
SegmentId[] ids = collectDataIds(store);
long[][] ignored = new long[MEASURE_2 / EPOCH_OPS_2][];
long[] r = runDriftingEpochs(store, ids, width, ignored);
@@ -419,7 +423,7 @@ private void runSizeSensitivity() throws IOException, InvalidFileStoreVersionExc
for (int sizeMb : sizes) {
System.out.printf(" %8d", sizeMb);
for (int p = 0; p < NUM_POLICIES; p++) {
- try (ReadOnlyFileStore store = openReadOnly(POLICIES[p], sizeMb)) {
+ try (ReadOnlyFileStore store = openReadOnly(p, sizeMb)) {
SegmentId[] ids = collectDataIds(store);
long[][] ignored = new long[MEASURE_3 / EPOCH_OPS_3][];
long[] r = runCompactionEpochs(store, ids, oldGen, ignored);
diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CacheWeights.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CacheWeights.java
index 31fe5d014e2..c2dad311c7e 100644
--- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CacheWeights.java
+++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CacheWeights.java
@@ -65,7 +65,7 @@ public static Weigher noopWeigher() {
return (Weigher) NOOP_WEIGHER;
}
- static int segmentWeight(Segment segment) {
+ public static int segmentWeight(Segment segment) {
return SEGMENT_CACHE_OVERHEAD + segment.estimateMemoryUsage();
}
@@ -77,14 +77,6 @@ public int weigh(@NotNull SegmentId id, @NotNull Segment segment) {
}
}
- public static class SegmentCacheWeigherGuava implements
- org.apache.jackrabbit.guava.common.cache.Weigher {
- @Override
- public int weigh(@NotNull SegmentId id, @NotNull Segment segment) {
- return segmentWeight(segment);
- }
- }
-
public static class NodeCacheWeigher implements Weigher {
@Override
diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java
index 88513c4ae92..ab9b21e4604 100644
--- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java
+++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java
@@ -22,23 +22,17 @@
import static java.util.Objects.requireNonNull;
import static org.apache.jackrabbit.oak.segment.CacheWeights.segmentWeight;
-import java.util.Map;
import java.util.concurrent.Callable;
-import java.util.concurrent.CompletionException;
-import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
-import java.util.function.Function;
import java.util.function.Supplier;
import org.apache.jackrabbit.guava.common.cache.CacheStats;
-import org.apache.jackrabbit.guava.common.cache.RemovalNotification;
import org.apache.jackrabbit.oak.cache.AbstractCacheStats;
import org.apache.jackrabbit.oak.cache.api.Cache;
import org.apache.jackrabbit.oak.cache.api.CacheBuilder;
-import org.apache.jackrabbit.oak.cache.api.CacheStatsSnapshot;
import org.apache.jackrabbit.oak.cache.api.EvictionCause;
import org.apache.jackrabbit.oak.segment.CacheWeights.SegmentCacheWeigher;
import org.apache.jackrabbit.oak.spi.toggle.FeatureToggle;
@@ -70,42 +64,15 @@ public abstract class SegmentCache {
private static final String NAME = "Segment Cache";
/**
- * Eviction policy used by {@link NonEmptyCache}.
- *
- * The default is {@link #CAFFEINE}. {@link #GUAVA} selects the original
- * Guava LRU implementation. Useful for A/B testing or benchmarking.
- */
- public enum SegmentCachePolicy {
- /** Caffeine W-TinyLFU — current default. */
- CAFFEINE,
- // TODO : remove me after next release (only added for benchmark tests)
- /** Guava LRU — original SegmentCache backend, before the LIRS migration. */
- GUAVA
- }
-
- /**
- * Create a new segment cache of the given size using the default
- * {@link SegmentCachePolicy#CAFFEINE} eviction policy.
+ * Create a new segment cache of the given size using Caffeine W-TinyLFU.
* Returns an always-empty cache for {@code cacheSizeMB <= 0}.
*
* @param cacheSizeMB size of the cache in megabytes.
*/
@NotNull
public static SegmentCache newSegmentCache(long cacheSizeMB) {
- return newSegmentCache(cacheSizeMB, SegmentCachePolicy.CAFFEINE);
- }
-
- /**
- * Create a new segment cache of the given size with the specified eviction
- * policy. Returns an always-empty cache for {@code cacheSizeMB <= 0}.
- *
- * @param cacheSizeMB size of the cache in megabytes.
- * @param policy the eviction policy to use (must not be null).
- */
- @NotNull
- public static SegmentCache newSegmentCache(long cacheSizeMB, @NotNull SegmentCachePolicy policy) {
if (cacheSizeMB > 0) {
- return new NonEmptyCache(cacheSizeMB, policy);
+ return new NonEmptyCache(cacheSizeMB);
} else {
return new EmptyCache();
}
@@ -196,44 +163,16 @@ private static class NonEmptyCache extends SegmentCache {
@NotNull
private final Stats stats;
- /**
- * Create a new cache of the given size using the specified eviction policy.
- *
- * @param cacheSizeMB size of the cache in megabytes.
- * @param policy the eviction policy to use.
- */
- private NonEmptyCache(long cacheSizeMB, SegmentCachePolicy policy) {
+ private NonEmptyCache(long cacheSizeMB) {
long maximumWeight = cacheSizeMB * 1024 * 1024;
- this.cache = buildCache(maximumWeight, policy);
+ this.cache = CacheBuilder.newBuilder()
+ .maximumWeight(maximumWeight)
+ .weigher(new SegmentCacheWeigher())
+ .evictionListener(this::onRemove)
+ .build();
this.stats = new Stats(NAME, maximumWeight, cache::estimatedSize);
}
- private Cache buildCache(long maximumWeight, SegmentCachePolicy policy) {
- switch (policy) {
- case GUAVA:
- return buildGuavaCache(maximumWeight);
- case CAFFEINE:
- default:
- return CacheBuilder.newBuilder()
- .maximumWeight(maximumWeight)
- .weigher(new SegmentCacheWeigher())
- .evictionListener(this::onRemove)
- .build();
- }
- }
-
- private Cache buildGuavaCache(long maximumWeight) {
- org.apache.jackrabbit.guava.common.cache.Cache guava =
- org.apache.jackrabbit.guava.common.cache.CacheBuilder.newBuilder()
- .maximumWeight(maximumWeight)
- .weigher(new CacheWeights.SegmentCacheWeigherGuava())
- .removalListener((RemovalNotification n) ->
- this.onRemove(n.getKey(), n.getValue(),
- org.apache.jackrabbit.oak.cache.CacheLIRS.toOakCause(n.getCause())))
- .build();
- return new GuavaCacheAdapter<>(guava);
- }
-
/**
* Removal handler called whenever an item is evicted from the cache.
*/
@@ -342,95 +281,6 @@ private SegmentCacheLoaderException(@NotNull Exception cause) {
}
}
- /**
- * Adapts a Guava {@link org.apache.jackrabbit.guava.common.cache.Cache} to the
- * Oak {@link Cache} interface so it can be used as the L2 backend in
- * {@link NonEmptyCache}.
- */
- private static final class GuavaCacheAdapter implements Cache {
-
- private final org.apache.jackrabbit.guava.common.cache.Cache delegate;
-
- GuavaCacheAdapter(org.apache.jackrabbit.guava.common.cache.Cache delegate) {
- this.delegate = delegate;
- }
-
- @Override
- public V getIfPresent(@NotNull K key) {
- return delegate.getIfPresent(key);
- }
-
- @Override
- public V get(@NotNull K key, @NotNull Function super K, ? extends V> fn) {
- try {
- return delegate.get(key, () -> fn.apply(key));
- } catch (ExecutionException e) {
- Throwable cause = e.getCause();
- if (cause instanceof RuntimeException re) { throw re; }
- if (cause instanceof Error er) { throw er; }
- throw new CompletionException(cause == null ? e : cause);
- }
- }
-
- @Override
- public void put(@NotNull K key, @NotNull V value) {
- delegate.put(key, value);
- }
-
- @Override
- public void invalidate(@NotNull K key) {
- delegate.invalidate(key);
- }
-
- @Override
- public void invalidateAll() {
- delegate.invalidateAll();
- }
-
- @Override
- public void invalidateAll(@NotNull Iterable extends K> keys) {
- delegate.invalidateAll(keys);
- }
-
- @Override
- public long estimatedSize() {
- return delegate.size();
- }
-
- @Override
- @NotNull
- public CacheStatsSnapshot stats() {
- return new CacheStatsSnapshot(0, 0, 0, 0, 0, 0);
- }
-
- @Override
- @NotNull
- public ConcurrentMap asMap() {
- return delegate.asMap();
- }
-
- @Override
- @NotNull
- public Map getAllPresent(@NotNull Iterable extends K> keys) {
- return delegate.getAllPresent(keys);
- }
-
- @Override
- public void cleanUp() {
- delegate.cleanUp();
- }
-
- @Override
- public long getUsedWeight() {
- return -1;
- }
-
- @Override
- public void setMaximumWeight(long maximumWeight) {
- // Guava does not support dynamic resizing
- }
- }
-
/** An always empty cache */
private static class EmptyCache extends SegmentCache {
private final Stats stats = new Stats(NAME, 0, () -> 0L);
diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentId.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentId.java
index 5b4fd2bed95..c2df558e164 100644
--- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentId.java
+++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentId.java
@@ -206,7 +206,7 @@ public void reclaimed(@NotNull String gcInfo) {
* @see #getSegment()
* @see #unloaded()
*/
- void loaded(@NotNull Segment segment) {
+ public void loaded(@NotNull Segment segment) {
this.segment = segment;
this.gcGeneration = segment.getGcGeneration();
}
@@ -217,7 +217,7 @@ void loaded(@NotNull Segment segment) {
* @see #getSegment()
* @see #loaded(Segment)
*/
- void unloaded() {
+ public void unloaded() {
this.segment = null;
}
diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java
index 9cc93885a09..3ecb217e84a 100644
--- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java
+++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java
@@ -150,7 +150,8 @@ public SegmentId newSegmentId(long msb, long lsb) {
}
});
this.blobStore = builder.getBlobStore();
- this.segmentCache = newSegmentCache(builder.getSegmentCacheSize(), builder.getSegmentCachePolicy());
+ SegmentCache injectedCache = builder.getSegmentCache();
+ this.segmentCache = injectedCache != null ? injectedCache : newSegmentCache(builder.getSegmentCacheSize());
this.segmentReader = new CachingSegmentReader(
this::getWriter,
blobStore,
diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStoreBuilder.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStoreBuilder.java
index 94374919c2a..0fe0207f665 100644
--- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStoreBuilder.java
+++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStoreBuilder.java
@@ -40,7 +40,7 @@
import org.apache.jackrabbit.oak.segment.CacheWeights.TemplateCacheWeigher;
import org.apache.jackrabbit.oak.segment.RecordCache;
import org.apache.jackrabbit.oak.segment.Segment;
-import org.apache.jackrabbit.oak.segment.SegmentCache.SegmentCachePolicy;
+import org.apache.jackrabbit.oak.segment.SegmentCache;
import org.apache.jackrabbit.oak.segment.SegmentNotFoundExceptionListener;
import org.apache.jackrabbit.oak.segment.WriterCacheManager;
import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions;
@@ -84,8 +84,8 @@ public class FileStoreBuilder {
private int segmentCacheSize = DEFAULT_SEGMENT_CACHE_MB;
- @NotNull
- private SegmentCachePolicy segmentCachePolicy = SegmentCachePolicy.CAFFEINE;
+ @Nullable
+ private SegmentCache segmentCache;
private int stringCacheSize = DEFAULT_STRING_CACHE_MB;
@@ -209,14 +209,16 @@ public FileStoreBuilder withSegmentCacheSize(int segmentCacheSize) {
}
/**
- * Eviction policy for the segment cache.
+ * Injects a pre-built {@link SegmentCache} to use instead of the default Caffeine cache.
+ * Useful for benchmarking alternative eviction policies without polluting the production
+ * {@link SegmentCache} class. When set, {@link #withSegmentCacheSize(int)} is ignored.
*
- * @param segmentCachePolicy the policy to use (must not be null)
+ * @param segmentCache the cache to use (must not be null)
* @return this instance
*/
@NotNull
- public FileStoreBuilder withSegmentCachePolicy(@NotNull SegmentCachePolicy segmentCachePolicy) {
- this.segmentCachePolicy = segmentCachePolicy;
+ public FileStoreBuilder withSegmentCache(@NotNull SegmentCache segmentCache) {
+ this.segmentCache = segmentCache;
return this;
}
@@ -561,9 +563,9 @@ int getSegmentCacheSize() {
return segmentCacheSize;
}
- @NotNull
- SegmentCachePolicy getSegmentCachePolicy() {
- return segmentCachePolicy;
+ @Nullable
+ SegmentCache getSegmentCache() {
+ return segmentCache;
}
int getStringCacheSize() {
From 8c117de719bdfb71e4f707f467e8bfbcea92b721 Mon Sep 17 00:00:00 2001
From: rishabhdaim
Date: Tue, 19 May 2026 10:40:31 +0530
Subject: [PATCH 12/15] OAK-12210 : remove guava policy smoke test from
SegmentCacheTest
Co-Authored-By: Claude Sonnet 4.6
---
.../oak/segment/SegmentCacheTest.java | 20 -------------------
1 file changed, 20 deletions(-)
diff --git a/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java b/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java
index 31496380e41..a49f426579c 100644
--- a/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java
+++ b/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java
@@ -355,26 +355,6 @@ public void recordHitSkipsL2NotifyWhenToggleDisabled() throws ExecutionException
}
}
- /**
- * Smoke test for the {@link SegmentCache.SegmentCachePolicy#GUAVA} backend: put, L1 hit,
- * L2 get, and clear all work correctly with the Guava-backed {@code NonEmptyCache}.
- */
- @Test
- public void guavaPolicyCachesAndClearsLikeDefault() throws ExecutionException {
- SegmentCache guava = newSegmentCache(DEFAULT_SEGMENT_CACHE_MB, SegmentCache.SegmentCachePolicy.GUAVA);
- SegmentId gId = new SegmentId(EMPTY_STORE, 0x000000000000000aL, 0xa00000000000000aL, guava::recordHit);
- Segment gSeg = mock(Segment.class);
- when(gSeg.getSegmentId()).thenReturn(gId);
- when(gSeg.estimateMemoryUsage()).thenReturn(1);
-
- guava.getSegment(gId, () -> gSeg);
- assertEquals(gSeg, gId.getSegment());
- assertEquals(gSeg, guava.getSegment(gId, () -> failToLoad(gId)));
-
- guava.clear();
- expect(SegmentNotFoundException.class, gId::getSegment);
- }
-
@Test
public void nonEmptyCacheStatsTest() throws Exception {
AbstractCacheStats stats = cache.getCacheStats();
From 548a0cf491304d2b822945a32fa3498bd3542783 Mon Sep 17 00:00:00 2001
From: rishabhdaim
Date: Tue, 19 May 2026 13:32:08 +0530
Subject: [PATCH 13/15] OAK-12210 : rebased to origin
---
.../org/apache/jackrabbit/oak/segment/SegmentCacheTest.java | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java b/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java
index a49f426579c..6bedf059f02 100644
--- a/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java
+++ b/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentCacheTest.java
@@ -339,19 +339,19 @@ public void hotSegmentEvictedWithoutL2Notification() throws ExecutionException {
}
/**
- * When {@link SegmentCache#FT_NOTIFY_L2_ON_L1_HIT} is disabled, L1 hits must still
+ * When {@link SegmentCache#FT_OAK_12214_PROPAGATE_L1_HITS_TO_L2_ENABLED} is disabled, L1 hits must still
* be counted in {@link AbstractCacheStats#getHitCount()} even though {@code getIfPresent}
* is skipped — the stats branch runs regardless of the L2-notify branch.
*/
@Test
public void recordHitSkipsL2NotifyWhenToggleDisabled() throws ExecutionException {
- SegmentCache.FT_NOTIFY_L2_ON_L1_HIT.setEnabled(false);
+ SegmentCache.FT_OAK_12214_PROPAGATE_L1_HITS_TO_L2_ENABLED.set(false);
try {
cache.getSegment(id1, () -> segment1);
assertEquals(segment1, id1.getSegment());
assertEquals(1, cache.getCacheStats().getHitCount());
} finally {
- SegmentCache.FT_NOTIFY_L2_ON_L1_HIT.setEnabled(true);
+ SegmentCache.FT_OAK_12214_PROPAGATE_L1_HITS_TO_L2_ENABLED.set(true);
}
}
From 2335bd36db5ca06ff503ce03998ae0bf39f73eb8 Mon Sep 17 00:00:00 2001
From: rishabhdaim
Date: Tue, 19 May 2026 13:50:04 +0530
Subject: [PATCH 14/15] OAK-12210 : removed cleanUp, not required, we run
caffeine maintainence in same thread
---
.../jackrabbit/oak/segment/SegmentCache.java | 31 +++----------------
.../oak/segment/SegmentCacheTest.java | 1 -
2 files changed, 4 insertions(+), 28 deletions(-)
diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java
index ab9b21e4604..5a2b83b1b5c 100644
--- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java
+++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java
@@ -35,7 +35,6 @@
import org.apache.jackrabbit.oak.cache.api.CacheBuilder;
import org.apache.jackrabbit.oak.cache.api.EvictionCause;
import org.apache.jackrabbit.oak.segment.CacheWeights.SegmentCacheWeigher;
-import org.apache.jackrabbit.oak.spi.toggle.FeatureToggle;
import org.jetbrains.annotations.NotNull;
/**
@@ -47,12 +46,8 @@
* level cache is implemented by memoising the segment in its id (see {@code
* SegmentId#segment}. Every time an segment is evicted from this cache the
* memoised segment is discarded (see {@code SegmentId#onAccess}). On an L1 hit,
-<<<<<<< HEAD
* {@link #recordHit(SegmentId)} records L1 hits in {@link #getCacheStats()} and, when enabled,
* touches L2 so eviction policies see the access.
-=======
- * {@link #recordHit(SegmentId)} notifies L2 so eviction policies see the access.
->>>>>>> 3fcfdaa256 (OAK-12210 : benchmark cleanup and cache bug fixes)
*/
public abstract class SegmentCache {
@@ -107,13 +102,6 @@ public abstract Segment getSegment(@NotNull SegmentId id, @NotNull Callable
Date: Thu, 21 May 2026 11:30:48 +0530
Subject: [PATCH 15/15] OAK-12210 : fixed compilation issues
---
.../oak/benchmark/GuavaSegmentCache.java | 7 +------
.../SegmentCacheMemoizationBenchmark.java | 1 -
.../SegmentCachePolicyBenchmark.java | 20 -------------------
.../jackrabbit/oak/segment/SegmentCache.java | 19 ++++++++----------
4 files changed, 9 insertions(+), 38 deletions(-)
diff --git a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/GuavaSegmentCache.java b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/GuavaSegmentCache.java
index d30b454715b..32c80caf3e1 100644
--- a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/GuavaSegmentCache.java
+++ b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/GuavaSegmentCache.java
@@ -124,11 +124,6 @@ public void clear() {
stats.currentWeight.set(0);
}
- @Override
- public void cleanUp() {
- cache.cleanUp();
- }
-
@Override
@NotNull
public AbstractCacheStats getCacheStats() {
@@ -138,7 +133,7 @@ public AbstractCacheStats getCacheStats() {
@Override
public void recordHit(@NotNull SegmentId id) {
if (id.isDataSegmentId()) {
- if (FT_NOTIFY_L2_ON_L1_HIT.isEnabled()) {
+ if (FT_OAK_12214_PROPAGATE_L1_HITS_TO_L2_ENABLED.get()) {
cache.getIfPresent(id);
}
stats.hitCount.incrementAndGet();
diff --git a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheMemoizationBenchmark.java b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheMemoizationBenchmark.java
index d65b289b46f..d1b0aa171e8 100644
--- a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheMemoizationBenchmark.java
+++ b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheMemoizationBenchmark.java
@@ -300,7 +300,6 @@ long[] snapshotAndReset() {
long total = store.totalAccesses.getAndSet(0);
long l1Hits = store.l1Hits.getAndSet(0);
long tarReads = store.tarReads.getAndSet(0);
- cache.cleanUp();
long currentEvictions = cache.getCacheStats().getEvictionCount();
long evictionsDelta = currentEvictions - evictionBaseline;
evictionBaseline = currentEvictions;
diff --git a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java
index e546cb0c0ab..854cfe2d8d4 100644
--- a/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java
+++ b/oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCachePolicyBenchmark.java
@@ -697,7 +697,6 @@ private static long[] runScanThenZipf(PolicySetup setup) {
setup.access(zipfSample(cdf, r.nextDouble()));
}
- setup.cache.cleanUp();
long missesBase = setup.cache.getCacheStats().getMissCount();
long evictBase = setup.cache.getCacheStats().getEvictionCount();
@@ -705,7 +704,6 @@ private static long[] runScanThenZipf(PolicySetup setup) {
setup.access(zipfSample(cdf, r.nextDouble()));
}
- setup.cache.cleanUp();
long misses = setup.cache.getCacheStats().getMissCount() - missesBase;
long evictions = setup.cache.getCacheStats().getEvictionCount() - evictBase;
return new long[]{POST_SCAN_MEASURE - misses, misses, evictions};
@@ -738,7 +736,6 @@ private static long[] runColdStart(PolicySetup setup, List epochStats) {
int numEpochs = MEASURE_C / EPOCH_OPS_C;
for (int epoch = 0; epoch < numEpochs; epoch++) {
- setup.cache.cleanUp();
long missBase = setup.cache.getCacheStats().getMissCount();
long evictBase = setup.cache.getCacheStats().getEvictionCount();
@@ -751,7 +748,6 @@ private static long[] runColdStart(PolicySetup setup, List epochStats) {
}
}
- setup.cache.cleanUp();
long epochMisses = setup.cache.getCacheStats().getMissCount() - missBase;
long epochEvictions = setup.cache.getCacheStats().getEvictionCount() - evictBase;
long epochHits = EPOCH_OPS_C - epochMisses;
@@ -781,7 +777,6 @@ private static long[] runUniformRandom(PolicySetup setup) {
setup.access(r.nextInt(n));
}
- setup.cache.cleanUp();
long missesBase = setup.cache.getCacheStats().getMissCount();
long evictBase = setup.cache.getCacheStats().getEvictionCount();
@@ -789,7 +784,6 @@ private static long[] runUniformRandom(PolicySetup setup) {
setup.access(r.nextInt(n));
}
- setup.cache.cleanUp();
long misses = setup.cache.getCacheStats().getMissCount() - missesBase;
long evictions = setup.cache.getCacheStats().getEvictionCount() - evictBase;
return new long[]{MEASURE_D - misses, misses, evictions};
@@ -817,7 +811,6 @@ private static long[] runBurstNewContent(PolicySetup setup) {
}
}
- setup.cache.cleanUp();
long missesBase = setup.cache.getCacheStats().getMissCount();
long evictBase = setup.cache.getCacheStats().getEvictionCount();
@@ -825,7 +818,6 @@ private static long[] runBurstNewContent(PolicySetup setup) {
setup.access(zipfSample(cdf, r.nextDouble()));
}
- setup.cache.cleanUp();
long misses = setup.cache.getCacheStats().getMissCount() - missesBase;
long evictions = setup.cache.getCacheStats().getEvictionCount() - evictBase;
return new long[]{MEASURE_E - misses, misses, evictions};
@@ -853,7 +845,6 @@ private static long[] runPeriodicGC(PolicySetup setup) {
}
}
- setup.cache.cleanUp();
long missesBase = setup.cache.getCacheStats().getMissCount();
long evictBase = setup.cache.getCacheStats().getEvictionCount();
@@ -861,7 +852,6 @@ private static long[] runPeriodicGC(PolicySetup setup) {
setup.access(zipfSample(cdf, r.nextDouble()));
}
- setup.cache.cleanUp();
long misses = setup.cache.getCacheStats().getMissCount() - missesBase;
long evictions = setup.cache.getCacheStats().getEvictionCount() - evictBase;
return new long[]{MEASURE_F - misses, misses, evictions};
@@ -882,7 +872,6 @@ private static long[] runImportThenRead(PolicySetup setup) {
setup.access(i);
}
- setup.cache.cleanUp();
long missesBase = setup.cache.getCacheStats().getMissCount();
long evictBase = setup.cache.getCacheStats().getEvictionCount();
@@ -891,7 +880,6 @@ private static long[] runImportThenRead(PolicySetup setup) {
setup.access(base + r.nextInt(RECENT_WINDOW_G));
}
- setup.cache.cleanUp();
long misses = setup.cache.getCacheStats().getMissCount() - missesBase;
long evictions = setup.cache.getCacheStats().getEvictionCount() - evictBase;
return new long[]{MEASURE_G - misses, misses, evictions};
@@ -917,7 +905,6 @@ private static long[] runSlidingWindow(PolicySetup setup) {
windowStart += SLIDE_STEP_H;
}
- setup.cache.cleanUp();
long missesBase = setup.cache.getCacheStats().getMissCount();
long evictBase = setup.cache.getCacheStats().getEvictionCount();
@@ -934,7 +921,6 @@ private static long[] runSlidingWindow(PolicySetup setup) {
windowStart = (windowStart + SLIDE_STEP_H) % TOTAL_POOL_H;
}
- setup.cache.cleanUp();
long misses = setup.cache.getCacheStats().getMissCount() - missesBase;
long evictions = setup.cache.getCacheStats().getEvictionCount() - evictBase;
return new long[]{MEASURE_H - misses, misses, evictions};
@@ -970,7 +956,6 @@ private static long[] runDriftingWindow(PolicySetup setup, List epochSta
int numEpochs = MEASURE_I / EPOCH_OPS_I;
for (int epoch = 0; epoch < numEpochs; epoch++) {
- setup.cache.cleanUp();
long missBase = setup.cache.getCacheStats().getMissCount();
long evictBase = setup.cache.getCacheStats().getEvictionCount();
@@ -982,7 +967,6 @@ private static long[] runDriftingWindow(PolicySetup setup, List epochSta
opCount++;
}
- setup.cache.cleanUp();
long epochMisses = setup.cache.getCacheStats().getMissCount() - missBase;
long epochEvictions = setup.cache.getCacheStats().getEvictionCount() - evictBase;
long epochHits = EPOCH_OPS_I - epochMisses;
@@ -1017,7 +1001,6 @@ private static long[] runDriftVariant(PolicySetup setup, int drift) {
opCount++;
}
- setup.cache.cleanUp();
long missesBase = setup.cache.getCacheStats().getMissCount();
long evictBase = setup.cache.getCacheStats().getEvictionCount();
@@ -1029,7 +1012,6 @@ private static long[] runDriftVariant(PolicySetup setup, int drift) {
opCount++;
}
- setup.cache.cleanUp();
long misses = setup.cache.getCacheStats().getMissCount() - missesBase;
long evictions = setup.cache.getCacheStats().getEvictionCount() - evictBase;
return new long[]{MEASURE_J - misses, misses, evictions};
@@ -1066,7 +1048,6 @@ private static long[] runCompactionColdStart(PolicySetup setup, List epo
int numEpochs = MEASURE_K / EPOCH_OPS_K;
for (int epoch = 0; epoch < numEpochs; epoch++) {
- setup.cache.cleanUp();
long missBase = setup.cache.getCacheStats().getMissCount();
long evictBase = setup.cache.getCacheStats().getEvictionCount();
@@ -1074,7 +1055,6 @@ private static long[] runCompactionColdStart(PolicySetup setup, List epo
setup.access(OLD_GEN_K + zipfSample(newCdf, r.nextDouble()));
}
- setup.cache.cleanUp();
long epochMisses = setup.cache.getCacheStats().getMissCount() - missBase;
long epochEvictions = setup.cache.getCacheStats().getEvictionCount() - evictBase;
long epochHits = EPOCH_OPS_K - epochMisses;
diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java
index 5a2b83b1b5c..5b1e4f1afbd 100644
--- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java
+++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentCache.java
@@ -59,8 +59,8 @@ public abstract class SegmentCache {
private static final String NAME = "Segment Cache";
/**
- * Create a new segment cache of the given size using Caffeine W-TinyLFU.
- * Returns an always-empty cache for {@code cacheSizeMB <= 0}.
+ * Create a new segment cache of the given size. Returns an always empty
+ * cache for {@code cacheSizeMB <= 0}.
*
* @param cacheSizeMB size of the cache in megabytes.
*/
@@ -95,10 +95,7 @@ public abstract Segment getSegment(@NotNull SegmentId id, @NotNull CallablenewBuilder()
@@ -229,12 +231,7 @@ public void putSegment(@NotNull Segment segment) {
@Override
public void clear() {
- // invalidateAll() triggers onRemove() for every entry synchronously
- // (maintenance runs on the caller thread via executor(Runnable::run)).
- // The set(0) below is a safety net for any SIZE-eviction already pending
- // in Caffeine's write buffer that would double-decrement currentWeight.
cache.invalidateAll();
- stats.currentWeight.set(0);
}
@Override