From 5fe56b8824d87e7e8c2426f2ebe5544aa679844f Mon Sep 17 00:00:00 2001 From: Rolf Bjarne Kvinge Date: Tue, 5 May 2026 12:56:46 +0200 Subject: [PATCH 1/7] [tests] Add debug logging to LayerTest to diagnose random GC crash (#25171) Add Console.WriteLine logging to every test method in LayerTest, with entry/exit markers and thread IDs. Adds detailed logging around all GC.Collect() calls and background thread operations to help pinpoint which test is running when the crash in xamarin_coreclr_reference_tracking_is_referenced_callback occurs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../monotouch-test/CoreAnimation/LayerTest.cs | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/monotouch-test/CoreAnimation/LayerTest.cs b/tests/monotouch-test/CoreAnimation/LayerTest.cs index ed7f174462d7..e253a3f25927 100644 --- a/tests/monotouch-test/CoreAnimation/LayerTest.cs +++ b/tests/monotouch-test/CoreAnimation/LayerTest.cs @@ -9,6 +9,7 @@ using System.Diagnostics; using System.Linq; +using System.Runtime.CompilerServices; using System.Threading; using CoreGraphics; using CoreAnimation; @@ -19,9 +20,15 @@ namespace MonoTouchFixtures.CoreAnimation { [Preserve (AllMembers = true)] public class LayerTest { + static void Log (string message, [CallerMemberName] string member = "") + { + Console.WriteLine ($"[LayerTest.{member}] [Thread {Thread.CurrentThread.ManagedThreadId}] {message}"); + } + [Test] public void Mask () { + Log ("start"); using (CALayer layer = new CALayer ()) { Assert.Null (layer.Mask, "Mask/default"); layer.Mask = new CALayer (); @@ -29,11 +36,13 @@ public void Mask () layer.Mask = null; Assert.Null (layer.Mask, "Mask/nullable"); } + Log ("done"); } [Test] public void CAActionTest () { + Log ("start"); // bug 2441 CAActionTestClass obj = new CAActionTestClass (); Assert.IsNull (obj.ActionForKey ("animation"), "a"); @@ -53,6 +62,7 @@ public void CAActionTest () Assert.That (obj.ActionForKey ("basicAnimation") == dict [basicAnimationKey], "f"); Assert.IsNull (CAActionTestClass.DefaultActionForKey ("animation"), "g"); Assert.IsNull (CALayer.DefaultActionForKey ("animation"), "h"); + Log ("done"); } class CAActionTestClass : CALayer { @@ -62,39 +72,47 @@ class CAActionTestClass : CALayer { [Test] public void ConvertPoint () { + Log ("start"); using (CALayer layer = new CALayer ()) { Assert.True (layer.ConvertPointFromLayer (CGPoint.Empty, null).IsEmpty, "From/Empty/null"); Assert.True (layer.ConvertPointToLayer (CGPoint.Empty, null).IsEmpty, "To/Empty/null"); } + Log ("done"); } [Test] public void ConvertRect () { + Log ("start"); using (CALayer layer = new CALayer ()) { Assert.True (layer.ConvertRectFromLayer (CGRect.Empty, null).IsEmpty, "From/Empty/null"); Assert.True (layer.ConvertRectToLayer (CGRect.Empty, null).IsEmpty, "To/Empty/null"); } + Log ("done"); } [Test] public void ConvertTime () { + Log ("start"); using (CALayer layer = new CALayer ()) { Assert.That (layer.ConvertTimeFromLayer (0.0d, null), Is.EqualTo (0.0d), "From/0.0d/null"); Assert.That (layer.ConvertTimeToLayer (0.0d, null), Is.EqualTo (0.0d), "To/0.0d/null"); } + Log ("done"); } [Test] public void AddAnimation () { + Log ("start"); using (var layer = new CALayer ()) { var animation = new CABasicAnimation (); Assert.IsNull (layer.AnimationForKey ("key"), "#key A"); layer.AddAnimation (animation, "key"); Assert.IsNotNull (layer.AnimationForKey ("key"), "#key B"); } + Log ("done"); } @@ -103,6 +121,7 @@ public void AddAnimation () [Test] public void TestBug26532 () { + Log ("start"); TextLayersDisposed = 0; Generation++; @@ -110,6 +129,7 @@ public void TestBug26532 () Exception ex = null; var thread = new Thread (() => { try { + Log ("background thread started"); var frame = new CGRect (0, 0, 200, 200); using (var layer = new CALayer ()) { for (int i = 0; i < layerCount; i++) { @@ -119,31 +139,43 @@ public void TestBug26532 () layer.AddSublayer (textLayer); } + Log ("calling GC.Collect on background thread"); GC.Collect (); + Log ("GC.Collect on background thread completed"); foreach (var slayer in layer.Sublayers.OfType ()) { Assert.AreEqual ("42", slayer.Secret); } + Log ("removing sublayers"); foreach (var slayer in layer.Sublayers.OfType ()) slayer.RemoveFromSuperLayer (); + Log ("sublayers removed"); } + Log ("background thread done"); } catch (Exception e) { + Log ($"background thread exception: {e}"); ex = e; } }); thread.Start (); thread.Join (); + Log ("background thread joined, starting GC loop"); var watch = new Stopwatch (); watch.Start (); + int gcCount = 0; while (watch.ElapsedMilliseconds < 2000 && TextLayersDisposed < layerCount / 2) { + gcCount++; + Log ($"GC.Collect iteration {gcCount}, TextLayersDisposed={TextLayersDisposed}"); GC.Collect (); NSRunLoop.Main.RunUntil (NSDate.Now.AddSeconds (0.05)); } + Log ($"GC loop done after {gcCount} iterations, TextLayersDisposed={TextLayersDisposed}"); Assert.IsNull (ex, "Exceptions"); Assert.That (TextLayersDisposed, Is.AtLeast (layerCount / 2), "disposed text layers"); + Log ("done"); } public class TextCALayer : CALayer { @@ -172,22 +204,30 @@ class LayerDelegate : CALayerDelegate { } [Test] public void TestCALayerDelegateDispose () { + Log ("start"); var del = new LayerDelegate (); var t = new Thread (() => { + Log ("background thread: creating Layer, setting delegate, disposing"); var l = new Layer (); l.Delegate = del; l.Dispose (); + Log ("background thread: done"); }) { IsBackground = true, }; t.Start (); t.Join (); + Log ("background thread joined, calling GC.Collect #1"); GC.Collect (); + Log ("GC.Collect #1 done, running runloop"); NSRunLoop.Main.RunUntil (NSDate.Now.AddSeconds (0.1)); + Log ("runloop done, calling GC.Collect #2"); GC.Collect (); + Log ("GC.Collect #2 done, disposing delegate"); del.Dispose (); + Log ("done"); } } } From 68906c27f436a4f5921d9f833ba326feda73e419 Mon Sep 17 00:00:00 2001 From: Rolf Bjarne Kvinge Date: Tue, 5 May 2026 15:21:25 +0200 Subject: [PATCH 2/7] Retrigger CI (run 2) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 9832dd23e805..6ee9d03482fd 100644 --- a/README.md +++ b/README.md @@ -68,3 +68,4 @@ For legacy Xamarin.iOS and Xamarin.Mac downloads (discontinued), see [Downloads] Copyright (c) .NET Foundation Contributors. All rights reserved. Licensed under the [MIT](https://github.com/dotnet/macios/blob/main/LICENSE) License. + From 38efdd7d83f299a4c531c258c205e2d4f36a8bc9 Mon Sep 17 00:00:00 2001 From: Rolf Bjarne Kvinge Date: Wed, 6 May 2026 08:20:37 +0200 Subject: [PATCH 3/7] Retrigger CI (run 3) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 6ee9d03482fd..1bc7bbf99080 100644 --- a/README.md +++ b/README.md @@ -69,3 +69,4 @@ For legacy Xamarin.iOS and Xamarin.Mac downloads (discontinued), see [Downloads] Copyright (c) .NET Foundation Contributors. All rights reserved. Licensed under the [MIT](https://github.com/dotnet/macios/blob/main/LICENSE) License. + From c53e4591deafde3d456f4135e350084dceb8bdcf Mon Sep 17 00:00:00 2001 From: Rolf Bjarne Kvinge Date: Wed, 6 May 2026 09:20:05 +0200 Subject: [PATCH 4/7] [runtime] Fix race condition in xamarin_release_managed_ref for user types (#25171) The framework_peer_waypoint was only called for non-user types, but the same race condition exists for user types: the GC's toggle-ref callback can read the native handle and call [handle retainCount] while another thread is in the process of calling objc_release on the same handle. Move the waypoint outside the if/else block so it applies to both user types and non-user types, ensuring that objc_release never races with the GC's reference tracking callbacks. The race manifests as a SIGSEGV in objc_msgSend (called from xamarin_gc_toggleref_callback) when GC.Collect() is called on one thread while NSObject_Disposer.Drain is releasing native objects on the main thread. This is particularly likely on ARM64 where memory ordering is weak and the HasManagedRef flag write may not be visible to the GC thread before the release completes. Fixes https://github.com/dotnet/macios/issues/25171 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- runtime/runtime.m | 138 +++++++++++++++++++++++----------------------- 1 file changed, 69 insertions(+), 69 deletions(-) diff --git a/runtime/runtime.m b/runtime/runtime.m index 63d5d0fe7199..2d480eaba405 100644 --- a/runtime/runtime.m +++ b/runtime/runtime.m @@ -1752,77 +1752,77 @@ -(struct NSObjectData*) xamarinGetNSObjectData; if (user_type) { /* clear MANAGED_REF_BIT */ set_gchandle_flags_safe (self, (enum XamarinGCHandleFlags) (get_flags_safe (self) & ~XamarinGCHandleFlags_HasManagedRef)); - } else { - // - // This waypoint (lock+unlock) is needed so that we can safely call retainCount in the - // toggleref callback. - // - // The race is between the following actions (given a managed object Z): - // - // a1) Thread A nulls out the handle for Z - // a2) Thread A calls release on Z's original handle - // b1) Thread B fetches the handle for Z - // b2) Thread B calls retainCount on Z's handle - // - // Possible execution orders: - // - // 1) a1-*: all such orders are safe, because b1 will read NULL and - // b2 won't do anything - // 2) b1-b2-a1-a2: retainCount before release is safe. - // 3) b1-a1-b2-a2: retainCount before release is safe. - // 4) b1-a1-a2-b2: unsafe; this tries to call retainCount after - // release. - // - // Order 4 would look like this: - // - // * Thread B runs a GC, and starts calling toggleref callbacks. - // * Thread B fetches the handle (H) for object Z in a toggleref - // callback. - // * Thread A calls xamarin_release_managed_ref for object Z, and - // calls release on H, deallocating it. - // * Thread B tries to call retainCount on H, which is now a - // deallocated object. - // - // Solution: lock/unlock the framework peer lock here. This looks - // weird (since nothing happens inside the lock), but it works: - // - // * Thread B runs a GC, locks the framework peer lock, and starts - // calling toggleref callbacks. - // * Thread B fetches the handle (H) for object Z in a toggleref - // callback. - // * Thread A calls xamarin_release_managed_ref for object Z (and - // also nulls out the handle for Z) - // * Thread A tries to lock the framework peer lock, and blocks - // (before calling release on H) - // * Thread B successfully calls retainCount on H - // * Thread B finishes processing all toggleref callbacks, completes - // the GC, and unlocks the framework peer lock. - // * Thread A wakes up, and calls release on H. - // - // An alternative phrasing would be to say that the lock prevents both - // a1 and a2 from happening between b1 and b2 from above, thus making - // order 4 impossible. - // - // Q) Why not just unlock after calling release, to avoid the strange- - // looking empty lock? - // A) Because calling release on an object might end up calling - // managed code (the native object can override dealloc and do all - // sorts of strange things, any of which may end up invoking - // managed code), and we can deadlock: - // 1) Thread T calls release on a native object. - // 2) Thread T executes managed code, which blocks on something - // that's supposed to happen on another thread U. - // 3) Thread U causes a garbage collection. - // 4) Thread U tries to lock the framework peer lock before running - // the GC, and deadlocks because thread T already has the - // framework peer lock. - // - // This is https://github.com/dotnet/macios/issues/3943 - // - // See also comment in xamarin_marshal_return_value_impl - xamarin_framework_peer_waypoint_safe (); } + // + // This waypoint (lock+unlock) is needed so that we can safely call retainCount in the + // toggleref callback. + // + // The race is between the following actions (given a managed object Z): + // + // a1) Thread A nulls out the handle for Z + // a2) Thread A calls release on Z's original handle + // b1) Thread B fetches the handle for Z + // b2) Thread B calls retainCount on Z's handle + // + // Possible execution orders: + // + // 1) a1-*: all such orders are safe, because b1 will read NULL and + // b2 won't do anything + // 2) b1-b2-a1-a2: retainCount before release is safe. + // 3) b1-a1-b2-a2: retainCount before release is safe. + // 4) b1-a1-a2-b2: unsafe; this tries to call retainCount after + // release. + // + // Order 4 would look like this: + // + // * Thread B runs a GC, and starts calling toggleref callbacks. + // * Thread B fetches the handle (H) for object Z in a toggleref + // callback. + // * Thread A calls xamarin_release_managed_ref for object Z, and + // calls release on H, deallocating it. + // * Thread B tries to call retainCount on H, which is now a + // deallocated object. + // + // Solution: lock/unlock the framework peer lock here. This looks + // weird (since nothing happens inside the lock), but it works: + // + // * Thread B runs a GC, locks the framework peer lock, and starts + // calling toggleref callbacks. + // * Thread B fetches the handle (H) for object Z in a toggleref + // callback. + // * Thread A calls xamarin_release_managed_ref for object Z (and + // also nulls out the handle for Z) + // * Thread A tries to lock the framework peer lock, and blocks + // (before calling release on H) + // * Thread B successfully calls retainCount on H + // * Thread B finishes processing all toggleref callbacks, completes + // the GC, and unlocks the framework peer lock. + // * Thread A wakes up, and calls release on H. + // + // An alternative phrasing would be to say that the lock prevents both + // a1 and a2 from happening between b1 and b2 from above, thus making + // order 4 impossible. + // + // Q) Why not just unlock after calling release, to avoid the strange- + // looking empty lock? + // A) Because calling release on an object might end up calling + // managed code (the native object can override dealloc and do all + // sorts of strange things, any of which may end up invoking + // managed code), and we can deadlock: + // 1) Thread T calls release on a native object. + // 2) Thread T executes managed code, which blocks on something + // that's supposed to happen on another thread U. + // 3) Thread U causes a garbage collection. + // 4) Thread U tries to lock the framework peer lock before running + // the GC, and deadlocks because thread T already has the + // framework peer lock. + // + // This is https://github.com/dotnet/macios/issues/3943 + // + // See also comment in xamarin_marshal_return_value_impl + xamarin_framework_peer_waypoint_safe (); + xamarin_handle_to_be_released = self; objc_release (self); From f94a3bb0d3fe5591faeb94a17cc93ec789d55bea Mon Sep 17 00:00:00 2001 From: Rolf Bjarne Kvinge Date: Wed, 6 May 2026 13:56:24 +0200 Subject: [PATCH 5/7] Retrigger CI (run 5) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1bc7bbf99080..51108b62d66e 100644 --- a/README.md +++ b/README.md @@ -70,3 +70,4 @@ Copyright (c) .NET Foundation Contributors. All rights reserved. Licensed under the [MIT](https://github.com/dotnet/macios/blob/main/LICENSE) License. + From 2615017d672058379adadc357b7fc7124612bf38 Mon Sep 17 00:00:00 2001 From: Rolf Bjarne Kvinge Date: Thu, 7 May 2026 08:27:06 +0200 Subject: [PATCH 6/7] Retrigger CI (run 6) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 51108b62d66e..f1bcc32ddbe1 100644 --- a/README.md +++ b/README.md @@ -71,3 +71,4 @@ Licensed under the [MIT](https://github.com/dotnet/macios/blob/main/LICENSE) Lic + From 1c255e4cb6f46948456f6cd2646d6e36615ab8de Mon Sep 17 00:00:00 2001 From: Rolf Bjarne Kvinge Date: Fri, 8 May 2026 13:21:41 +0200 Subject: [PATCH 7/7] Retrigger CI (run 7) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index f1bcc32ddbe1..b5367ee826c0 100644 --- a/README.md +++ b/README.md @@ -72,3 +72,4 @@ Licensed under the [MIT](https://github.com/dotnet/macios/blob/main/LICENSE) Lic +