Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import io.agentscope.core.model.ChatUsage;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;

/**
* An aggregator for streaming {@link ChatResponse}.
Expand All @@ -41,9 +40,9 @@ final class StreamChatResponseAggregator {
private final ThinkingAccumulator thinkingAcc = new ThinkingAccumulator();
private final ToolCallsAccumulator toolCallsAcc = new ToolCallsAccumulator();

// Usage
private final AtomicInteger inputTokens = new AtomicInteger(0);
private final AtomicInteger outputTokens = new AtomicInteger(0);
// Usage: take the max value from all chunks, since providers report cumulative totals
private int inputTokens;
private int outputTokens;
private double time;

private String finishReason;
Expand Down Expand Up @@ -73,8 +72,8 @@ public void append(ChatResponse chunk) {

ChatUsage usage = chunk.getUsage();
if (usage != null) {
inputTokens.addAndGet(usage.getInputTokens());
outputTokens.addAndGet(usage.getOutputTokens());
inputTokens = Math.max(inputTokens, usage.getInputTokens());
outputTokens = Math.max(outputTokens, usage.getOutputTokens());
time = usage.getTime();
}
Comment on lines 73 to 78
Copy link

Copilot AI Apr 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

inputTokens/outputTokens were switched to take the max across chunks (cumulative semantics), but time is still overwritten with the latest chunk’s value. If providers emit cumulative elapsed time (similar to cumulative token totals), this can regress to a smaller value when later chunks omit/reset time. Consider aggregating time consistently (e.g., take max across chunks, or only update when the new value is greater/non-zero).

Copilot uses AI. Check for mistakes.

Expand All @@ -95,8 +94,8 @@ public ChatResponse getResponse() {
.content(contentBlocks)
.usage(
ChatUsage.builder()
.inputTokens(inputTokens.get())
.outputTokens(outputTokens.get())
.inputTokens(inputTokens)
.outputTokens(outputTokens)
.time(time)
.build())
.finishReason(finishReason)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright 2024-2026 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.agentscope.core.tracing.telemetry;

import static org.junit.jupiter.api.Assertions.assertEquals;

import io.agentscope.core.message.TextBlock;
import io.agentscope.core.model.ChatResponse;
import io.agentscope.core.model.ChatUsage;
import java.util.List;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;

@DisplayName("StreamChatResponseAggregator Tests")
class StreamChatResponseAggregatorTest {

@Test
@DisplayName("Cumulative usage should take max, not sum")
void testCumulativeUsageTakesMax() {
StreamChatResponseAggregator agg = StreamChatResponseAggregator.create();

for (int i = 1; i <= 5; i++) {
agg.append(
ChatResponse.builder()
.id("test-id")
.content(List.of(TextBlock.builder().text("chunk" + i).build()))
.usage(
ChatUsage.builder()
.inputTokens(100)
.outputTokens(i * 20)
.time(i * 0.5)
.build())
.finishReason(i == 5 ? "stop" : null)
.build());
}

ChatResponse response = agg.getResponse();
assertEquals("test-id", response.getId());
assertEquals(100, response.getUsage().getInputTokens());
assertEquals(100, response.getUsage().getOutputTokens());
assertEquals("stop", response.getFinishReason());
}

@Test
@DisplayName("Only last chunk carries usage (OpenAI style)")
void testOnlyLastChunkHasUsage() {
StreamChatResponseAggregator agg = StreamChatResponseAggregator.create();

for (int i = 0; i < 3; i++) {
agg.append(
ChatResponse.builder()
.id("openai-id")
.content(List.of(TextBlock.builder().text("part" + i).build()))
.build());
}

agg.append(
ChatResponse.builder()
.id("openai-id")
.usage(ChatUsage.builder().inputTokens(200).outputTokens(150).build())
.finishReason("stop")
.build());

ChatResponse response = agg.getResponse();
assertEquals(200, response.getUsage().getInputTokens());
assertEquals(150, response.getUsage().getOutputTokens());
}
}
Loading