Snapchat · kmontemayor2-sc · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026
@@ -211,6 +211,8 @@ def _setup_dataloaders(
         channel_size=sampling_worker_shared_channel_size,
         process_start_gap_seconds=process_start_gap_seconds,
         shuffle=shuffle,
+        # Labels as an AnchorLabels edge-list; see the AnchorLabels class docstring.
+        use_list_output=True,
     )
 
     print(f"---Rank {rank} finished setting up main loader for split={split}")
@@ -299,16 +301,15 @@ def _compute_loss(
     ).to(device)
     random_negative_batch_size = random_negative_data[labeled_node_type].batch_size
 
-    positive_idx: torch.Tensor = torch.cat(list(main_data.y_positive.values())).to(
-        device
-    )
-    repeated_query_node_idx = query_node_idx.repeat_interleave(
-        torch.tensor([len(v) for v in main_data.y_positive.values()]).to(device)
-    )
+    # main_data.y_positive is an AnchorLabels edge-list; read the co-indexed [E]
+    # label_index and query_node_idx[anchor_index] directly. See
+    # homogeneous_training._compute_loss for why this equals the historical dict read.
+    positive_idx: torch.Tensor = main_data.y_positive.label_index.to(device)  # [E]
+    repeated_query_node_idx = query_node_idx[
+        main_data.y_positive.anchor_index.to(device)  # [E]
+    ]
     if hasattr(main_data, "y_negative"):
-        hard_negative_idx: torch.Tensor = torch.cat(
-            list(main_data.y_negative.values())
-        ).to(device)
+        hard_negative_idx: torch.Tensor = main_data.y_negative.label_index.to(device)
     else:
         hard_negative_idx = torch.empty(0, dtype=torch.long).to(device)
 

@@ -241,6 +241,8 @@ def _setup_dataloaders(
         channel_size=sampling_worker_shared_channel_size,
         process_start_gap_seconds=process_start_gap_seconds,
         shuffle=shuffle,
+        # Labels as an AnchorLabels edge-list; see the AnchorLabels class docstring.
+        use_list_output=True,
     )
 
     logger.info(f"---Rank {rank} finished setting up main loader for split={split}")
@@ -305,16 +307,15 @@ def _compute_loss(
     query_node_idx: torch.Tensor = torch.arange(main_data.batch_size).to(device)
     random_negative_batch_size = random_negative_data.batch_size
 
-    positive_idx: torch.Tensor = torch.cat(list(main_data.y_positive.values())).to(
-        device
-    )
-    repeated_query_node_idx = query_node_idx.repeat_interleave(
-        torch.tensor([len(v) for v in main_data.y_positive.values()]).to(device)
-    )
+    # main_data.y_positive is an AnchorLabels edge-list; read the co-indexed [E]
+    # label_index and query_node_idx[anchor_index] directly. See
+    # homogeneous_training._compute_loss for why this equals the historical dict read.
+    positive_idx: torch.Tensor = main_data.y_positive.label_index.to(device)  # [E]
+    repeated_query_node_idx = query_node_idx[
+        main_data.y_positive.anchor_index.to(device)  # [E]
+    ]
     if hasattr(main_data, "y_negative"):
-        hard_negative_idx: torch.Tensor = torch.cat(
-            list(main_data.y_negative.values())
-        ).to(device)
+        hard_negative_idx: torch.Tensor = main_data.y_negative.label_index.to(device)
     else:
         hard_negative_idx = torch.empty(0, dtype=torch.long).to(device)
 

@@ -144,6 +144,8 @@ def _setup_dataloaders(
         # This is done so that each process on the current machine which initializes a `main_loader` doesn't compete for memory, causing potential OOM
         process_start_gap_seconds=process_start_gap_seconds,
         shuffle=shuffle,
+        # Labels as an AnchorLabels edge-list; see the AnchorLabels class docstring.
+        use_list_output=True,
     )
 
     logger.info(f"---Rank {rank} finished setting up main loader")
@@ -223,18 +225,15 @@ def _compute_loss(
     ).to(device)
     random_negative_batch_size = random_negative_data[labeled_node_type].batch_size
 
-    # main_data.y_positive is a dict[query_node_local_index: int, labeled_node_local_indices: torch.Tensor], even in the heterogeneous setting.
-    positive_idx: torch.Tensor = torch.cat(list(main_data.y_positive.values())).to(
-        device
-    )
-    # We also extract a repeated query node index tensor which upsamples each query node based on the number of positives it has
-    repeated_query_node_idx = query_node_idx.repeat_interleave(
-        torch.tensor([len(v) for v in main_data.y_positive.values()]).to(device)
-    )
+    # main_data.y_positive is an AnchorLabels edge-list; read the co-indexed [E]
+    # label_index and query_node_idx[anchor_index] directly. See
+    # homogeneous_training._compute_loss for why this equals the historical dict read.
+    positive_idx: torch.Tensor = main_data.y_positive.label_index.to(device)  # [E]
+    repeated_query_node_idx = query_node_idx[
+        main_data.y_positive.anchor_index.to(device)  # [E]
+    ]
     if hasattr(main_data, "y_negative"):
-        hard_negative_idx: torch.Tensor = torch.cat(
-            list(main_data.y_negative.values())
-        ).to(device)
+        hard_negative_idx: torch.Tensor = main_data.y_negative.label_index.to(device)
     else:
         hard_negative_idx = torch.empty(0, dtype=torch.long).to(device)
 

@@ -134,6 +134,8 @@ def _setup_dataloaders(
         # This is done so that each process on the current machine which initializes a `main_loader` doesn't compete for memory, causing potential OOM
         process_start_gap_seconds=process_start_gap_seconds,
         shuffle=shuffle,
+        # Labels as an AnchorLabels edge-list; see the AnchorLabels class docstring.
+        use_list_output=True,
     )
 
     logger.info(f"---Rank {rank} finished setting up main loader")
@@ -190,18 +192,21 @@ def _compute_loss(
     query_node_idx: torch.Tensor = torch.arange(main_data.batch_size).to(device)
     random_negative_batch_size = random_negative_data.batch_size
 
-    # main_data.y_positive is a dict[query_node_local_index: int, labeled_node_local_indices: torch.Tensor]
-    positive_idx: torch.Tensor = torch.cat(list(main_data.y_positive.values())).to(
-        device
-    )
-    # We also extract a repeated query node index tensor which upsamples each query node based on the number of positives it has
-    repeated_query_node_idx = query_node_idx.repeat_interleave(
-        torch.tensor([len(v) for v in main_data.y_positive.values()]).to(device)
-    )
+    # main_data.y_positive is an AnchorLabels edge-list (use_list_output=True), two
+    # co-indexed [E] tensors: label_index holds the local label node per
+    # (anchor, label) pair, anchor_index the matching local anchor row. Pairs are
+    # grouped by anchor, so reading label_index/anchor_index directly yields the same
+    # (query, label) pairs as the historical dict read (torch.cat(list(values())) +
+    # repeat_interleave over per-anchor lengths). The within-anchor label order may
+    # differ, but the contrastive loss is order-invariant, so the result is
+    # equivalent. (This is the canonical explanation; the other link-prediction
+    # examples point here rather than restating it.)
+    positive_idx: torch.Tensor = main_data.y_positive.label_index.to(device)  # [E]
+    repeated_query_node_idx = query_node_idx[
+        main_data.y_positive.anchor_index.to(device)  # [E]
+    ]
     if hasattr(main_data, "y_negative"):
-        hard_negative_idx: torch.Tensor = torch.cat(
-            list(main_data.y_negative.values())
-        ).to(device)
+        hard_negative_idx: torch.Tensor = main_data.y_negative.label_index.to(device)
     else:
         hard_negative_idx = torch.empty(0, dtype=torch.long).to(device)
 

@@ -3,6 +3,7 @@
 """
 
 __all__ = [
+    "AnchorLabels",
     "DistABLPLoader",
     "DistNeighborLoader",
     "DistDataset",
@@ -17,7 +18,7 @@
     build_dataset,
     build_dataset_from_task_config_uri,
 )
-from gigl.distributed.dist_ablp_neighborloader import DistABLPLoader
+from gigl.distributed.dist_ablp_neighborloader import AnchorLabels, DistABLPLoader
 from gigl.distributed.dist_context import DistributedContext
 from gigl.distributed.dist_dataset import DistDataset
 from gigl.distributed.dist_partitioner import DistPartitioner