From 390a2bce9c84ac097f95c82eadb7731ca03eabd0 Mon Sep 17 00:00:00 2001 From: kmontemayor Date: Wed, 11 Mar 2026 16:31:52 +0000 Subject: [PATCH 1/4] Bump wait time to reduce log spam --- gigl/distributed/utils/networking.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gigl/distributed/utils/networking.py b/gigl/distributed/utils/networking.py index 67b53d445..9a8178374 100644 --- a/gigl/distributed/utils/networking.py +++ b/gigl/distributed/utils/networking.py @@ -232,7 +232,7 @@ def write_readiness_signal(readiness_uri: Uri) -> None: def wait_for_readiness_signal( readiness_uri: Uri, timeout: float = 3600.0, - poll_interval: float = 10.0, + poll_interval: float = 60.0, log_every_n_attempts: int = 10, ) -> None: """Poll for a readiness sentinel file before initiating RPC connections. @@ -243,7 +243,7 @@ def wait_for_readiness_signal( readiness_uri: The URI to poll for the sentinel file. Supports both GcsUri (production) and LocalUri (testing). timeout: Maximum time in seconds to wait for the signal. Defaults to 3600. - poll_interval: Time in seconds between poll attempts. Defaults to 10. + poll_interval: Time in seconds between poll attempts. Defaults to 60. Raises: TimeoutError: If the readiness signal is not found within the timeout. From 7f5fc9cb51fd1161d49e9982310a7ab163585e1f Mon Sep 17 00:00:00 2001 From: kmontemayor Date: Wed, 11 Mar 2026 16:34:22 +0000 Subject: [PATCH 2/4] update --- gigl/distributed/utils/networking.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gigl/distributed/utils/networking.py b/gigl/distributed/utils/networking.py index 9a8178374..46d01f5a0 100644 --- a/gigl/distributed/utils/networking.py +++ b/gigl/distributed/utils/networking.py @@ -232,8 +232,8 @@ def write_readiness_signal(readiness_uri: Uri) -> None: def wait_for_readiness_signal( readiness_uri: Uri, timeout: float = 3600.0, - poll_interval: float = 60.0, - log_every_n_attempts: int = 10, + poll_interval: float = 10.0, + log_every_n_attempts: int = 60, ) -> None: """Poll for a readiness sentinel file before initiating RPC connections. @@ -243,7 +243,8 @@ def wait_for_readiness_signal( readiness_uri: The URI to poll for the sentinel file. Supports both GcsUri (production) and LocalUri (testing). timeout: Maximum time in seconds to wait for the signal. Defaults to 3600. - poll_interval: Time in seconds between poll attempts. Defaults to 60. + poll_interval: Time in seconds between poll attempts. Defaults to 10. + log_every_n_attempts: Number of attempts between log messages. Defaults to 60. Raises: TimeoutError: If the readiness signal is not found within the timeout. From 11f8f7dc994f6bd320fd56a116b3a9d311b34148 Mon Sep 17 00:00:00 2001 From: kmontemayor Date: Wed, 11 Mar 2026 20:57:47 +0000 Subject: [PATCH 3/4] update --- gigl/distributed/utils/networking.py | 14 +++++++------- tests/unit/distributed/utils/networking_test.py | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/gigl/distributed/utils/networking.py b/gigl/distributed/utils/networking.py index 46d01f5a0..75c458ea8 100644 --- a/gigl/distributed/utils/networking.py +++ b/gigl/distributed/utils/networking.py @@ -232,8 +232,8 @@ def write_readiness_signal(readiness_uri: Uri) -> None: def wait_for_readiness_signal( readiness_uri: Uri, timeout: float = 3600.0, - poll_interval: float = 10.0, - log_every_n_attempts: int = 60, + poll_interval_s: float = 10.0, + log_every_n_attempts: int = 30, ) -> None: """Poll for a readiness sentinel file before initiating RPC connections. @@ -243,14 +243,14 @@ def wait_for_readiness_signal( readiness_uri: The URI to poll for the sentinel file. Supports both GcsUri (production) and LocalUri (testing). timeout: Maximum time in seconds to wait for the signal. Defaults to 3600. - poll_interval: Time in seconds between poll attempts. Defaults to 10. - log_every_n_attempts: Number of attempts between log messages. Defaults to 60. + poll_interval_s: Time in seconds between poll attempts. Defaults to 10. + log_every_n_attempts: Number of attempts between log messages. Defaults to 30. Raises: TimeoutError: If the readiness signal is not found within the timeout. """ logger.info( - f"Waiting for readiness signal at {readiness_uri} (timeout={timeout}s, poll_interval={poll_interval}s)" + f"Waiting for readiness signal at {readiness_uri} (timeout={timeout}s, poll_interval={poll_interval_s}s)" ) file_loader = FileLoader() start_time = time.monotonic() @@ -266,10 +266,10 @@ def wait_for_readiness_signal( ) if attempt % log_every_n_attempts == 0: logger.info( - f"Readiness signal not yet available at {readiness_uri}. Elapsed: {elapsed:.0f}s. Retrying in {poll_interval}s..." + f"Readiness signal not yet available at {readiness_uri}. Elapsed: {elapsed:.0f}s. Retrying in {poll_interval_s}s... Expect the next log message in {log_every_n_attempts * poll_interval_s}s" ) attempt += 1 - time.sleep(poll_interval) + time.sleep(poll_interval_s) def get_graph_store_info() -> GraphStoreInfo: diff --git a/tests/unit/distributed/utils/networking_test.py b/tests/unit/distributed/utils/networking_test.py index 9a811cbb4..cb92aa8e3 100644 --- a/tests/unit/distributed/utils/networking_test.py +++ b/tests/unit/distributed/utils/networking_test.py @@ -550,7 +550,7 @@ def test_wait_for_readiness_signal(self) -> None: self.addCleanup(temp_dir.cleanup) readiness_uri = LocalUri(temp_dir.name) / "readiness.txt" with self.assertRaises(TimeoutError): - wait_for_readiness_signal(readiness_uri, timeout=0.1, poll_interval=0.01) + wait_for_readiness_signal(readiness_uri, timeout=0.1, poll_interval_s=0.01) write_readiness_signal(readiness_uri) From d596855df7f9bc62ecb882238fe920332fa5be33 Mon Sep 17 00:00:00 2001 From: kmontemayor Date: Wed, 11 Mar 2026 20:59:14 +0000 Subject: [PATCH 4/4] update --- gigl/distributed/utils/networking.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gigl/distributed/utils/networking.py b/gigl/distributed/utils/networking.py index 75c458ea8..9e116cd61 100644 --- a/gigl/distributed/utils/networking.py +++ b/gigl/distributed/utils/networking.py @@ -245,6 +245,8 @@ def wait_for_readiness_signal( timeout: Maximum time in seconds to wait for the signal. Defaults to 3600. poll_interval_s: Time in seconds between poll attempts. Defaults to 10. log_every_n_attempts: Number of attempts between log messages. Defaults to 30. + e.g. with poll_interval set to 10, and log_every_n_attempts set to 30, we will log every 300 seconds (5 minutes). + Raises: TimeoutError: If the readiness signal is not found within the timeout.