From 6929cf863ca28815faadd3dd781dc6b3f3ca56cc Mon Sep 17 00:00:00 2001 From: ppraneth Date: Tue, 6 Jan 2026 10:05:29 +0530 Subject: [PATCH 1/3] opti --- miles/utils/data.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/miles/utils/data.py b/miles/utils/data.py index 6e64ef678..3ad070974 100644 --- a/miles/utils/data.py +++ b/miles/utils/data.py @@ -1,3 +1,4 @@ +import heapq import itertools import json import logging @@ -256,17 +257,21 @@ def __len__(self): def get_minimum_num_micro_batch_size(total_lengths, max_tokens_per_gpu): - # use first fit to get the number of micro batches - batches = [] + # Use a Max-Heap to track remaining capacity in each bin + # Python's heapq is a min-heap, so we store negative remaining capacity + remaining_capacities = [] for length in total_lengths: - for i in range(len(batches)): - if batches[i] + length <= max_tokens_per_gpu: - batches[i] += length - break + if remaining_capacities and (-remaining_capacities[0] >= length): + # Take the bin with the MOST space + most_space = -heapq.heappop(remaining_capacities) + new_space = most_space - length + heapq.heappush(remaining_capacities, -new_space) else: - batches.append(length) + # Create a new bin + new_space = max_tokens_per_gpu - length + heapq.heappush(remaining_capacities, -new_space) - return len(batches) + return len(remaining_capacities) def process_rollout_data(args, rollout_data_ref, dp_rank, dp_size): From be2d4653d42e2549d787d8564e49d68d0892708a Mon Sep 17 00:00:00 2001 From: ppraneth Date: Wed, 7 Jan 2026 07:50:10 +0530 Subject: [PATCH 2/3] opti-2 --- miles/utils/data.py | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/miles/utils/data.py b/miles/utils/data.py index 3ad070974..c741b0620 100644 --- a/miles/utils/data.py +++ b/miles/utils/data.py @@ -1,4 +1,4 @@ -import heapq +import bisect import itertools import json import logging @@ -257,21 +257,31 @@ def __len__(self): def get_minimum_num_micro_batch_size(total_lengths, max_tokens_per_gpu): - # Use a Max-Heap to track remaining capacity in each bin - # Python's heapq is a min-heap, so we store negative remaining capacity - remaining_capacities = [] - for length in total_lengths: - if remaining_capacities and (-remaining_capacities[0] >= length): - # Take the bin with the MOST space - most_space = -heapq.heappop(remaining_capacities) - new_space = most_space - length - heapq.heappush(remaining_capacities, -new_space) + + # Sort lengths in descending order (The "Decreasing" part of BFD). + sorted_lengths = sorted(total_lengths, reverse=True) + + # Maintain a sorted list of current bin totals (filled capacities). + # This allows us to use binary search (bisect) to find the 'Best-Fit' in O(log B). + bin_totals = [] + + for length in sorted_lengths: + # The 'Best-Fit' bin is the one with the smallest remaining space that still fits. + # Mathematically, we want the bin with the largest filled capacity <= (limit - length). + threshold = max_tokens_per_gpu - length + + # Binary search for the best bin candidate. + idx = bisect.bisect_right(bin_totals, threshold) + + if idx > 0: + # Pop and re-insert to maintain the sorted order of bin_totals. + current_fill = bin_totals.pop(idx - 1) + bisect.insort(bin_totals, current_fill + length) else: - # Create a new bin - new_space = max_tokens_per_gpu - length - heapq.heappush(remaining_capacities, -new_space) + # No existing bin fits the current sequence; create a new bin. + bisect.insort(bin_totals, length) - return len(remaining_capacities) + return len(bin_totals) def process_rollout_data(args, rollout_data_ref, dp_rank, dp_size): From 51e39d29084912426ef31109fbb1644a03df2ccb Mon Sep 17 00:00:00 2001 From: ppraneth Date: Wed, 7 Jan 2026 07:50:37 +0530 Subject: [PATCH 3/3] opti-2 --- miles/utils/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/miles/utils/data.py b/miles/utils/data.py index c741b0620..f9f5235c3 100644 --- a/miles/utils/data.py +++ b/miles/utils/data.py @@ -258,7 +258,7 @@ def __len__(self): def get_minimum_num_micro_batch_size(total_lengths, max_tokens_per_gpu): - # Sort lengths in descending order (The "Decreasing" part of BFD). + # Sort lengths in descending order . sorted_lengths = sorted(total_lengths, reverse=True) # Maintain a sorted list of current bin totals (filled capacities).