Skip to content

Commit 30ea8ed

Browse files
feat(sync): llama.cpp to b8589
thinking_forced_open (bool) replaced with generation_prompt (string) upstream in llama.cpp#20424 — carries actual prefill text instead of a narrow flag. Updated headers, stubs, and tests.
1 parent 179d4c7 commit 30ea8ed

8 files changed

Lines changed: 21 additions & 22 deletions

File tree

.llama-cpp-version

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# llama.cpp version for integration tests
2-
# Last updated: 2026-03-16
2+
# Last updated: 2026-04-01
33
# Format: git commit hash or tag
4-
b8369
4+
b8589

include/lloyal/branch.hpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,8 @@ constexpr uint32_t INDEX_MASK = 0xFFFF; ///< Mask for slot index field
107107
* init_tenancy(), and when the last active branch is released. retainOnly()
108108
* resets it to the surviving branch's position.
109109
*
110-
* Conservative: overcounts if individual branches are pruned mid-run
111-
* (prune does NOT decrement), which is safe — it triggers soft limits
112-
* sooner rather than later.
110+
* Decremented on release: each pruned branch subtracts its unique cells
111+
* (position - fork_head). Pressure recovers as branches are freed.
113112
*/
114113
struct KvPressure {
115114
uint32_t n_ctx; ///< Total KV capacity

include/lloyal/chat_in.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ struct FormatResult {
8989
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY; ///< Detected chat format
9090
std::string grammar; ///< GBNF grammar for constrained sampling
9191
bool grammar_lazy = false; ///< Whether grammar should use lazy compilation
92-
bool thinking_forced_open = false; ///< Whether thinking tag is forced open
92+
std::string generation_prompt; ///< Generation prompt prefill (e.g. "<think>")
9393
std::vector<common_grammar_trigger> grammar_triggers; ///< Triggers for lazy grammar activation
9494
std::vector<std::string> preserved_tokens; ///< Tokens to preserve during grammar constraining
9595
std::string parser; ///< PEG parser definition (for PEG formats)
@@ -206,7 +206,7 @@ inline FormatResult format(const llama_model *model, const FormatInputs& inputs)
206206
result.format = params.format;
207207
result.grammar = params.grammar;
208208
result.grammar_lazy = params.grammar_lazy;
209-
result.thinking_forced_open = params.thinking_forced_open;
209+
result.generation_prompt = params.generation_prompt;
210210
result.grammar_triggers = params.grammar_triggers;
211211
result.preserved_tokens = params.preserved_tokens;
212212
result.parser = params.parser;

include/lloyal/chat_out.hpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ struct ToolCall {
6060
*
6161
* @code{.cpp}
6262
* auto parsed = chat_out::parse(raw_output, fmt.format, fmt.reasoning_format,
63-
* false, fmt.thinking_forced_open, fmt.parser);
63+
* false, fmt.generation_prompt, fmt.parser);
6464
*
6565
* // Build assistant message with separate fields
6666
* json assistant_msg = {{"role", "assistant"}, {"content", parsed.content}};
@@ -91,7 +91,7 @@ struct ParseResult {
9191
* @param format The chat format (from chat_in::FormatResult.format)
9292
* @param reasoning_format How to handle reasoning/thinking blocks
9393
* @param is_partial True if output is incomplete (streaming)
94-
* @param thinking_forced_open Whether thinking tag was forced open
94+
* @param generation_prompt Generation prompt prefill text (e.g. "<think>")
9595
* @param parser_data Serialized PEG parser (from chat_in::FormatResult.parser).
9696
* Required for PEG format models; ignored for others.
9797
*
@@ -109,7 +109,7 @@ struct ParseResult {
109109
* auto fmt = chat_in::format(model, inputs);
110110
* // ... generate tokens ...
111111
* auto parsed = chat_out::parse(output_text, fmt.format, fmt.reasoning_format,
112-
* false, fmt.thinking_forced_open, fmt.parser);
112+
* false, fmt.generation_prompt, fmt.parser);
113113
* if (!parsed.tool_calls.empty()) {
114114
* // Handle tool calls
115115
* }
@@ -129,7 +129,7 @@ struct ParseResult {
129129
*
130130
* // Parse: separates reasoning from content
131131
* auto parsed = chat_out::parse(raw_output, fmt.format,
132-
* fmt.reasoning_format, false, fmt.thinking_forced_open, fmt.parser);
132+
* fmt.reasoning_format, false, fmt.generation_prompt, fmt.parser);
133133
*
134134
* // Store with separate fields for correct re-formatting on cold restart
135135
* json msg = {{"role", "assistant"}, {"content", parsed.content}};
@@ -144,7 +144,7 @@ inline ParseResult parse(
144144
common_chat_format format,
145145
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE,
146146
bool is_partial = false,
147-
bool thinking_forced_open = false,
147+
const std::string& generation_prompt = "",
148148
const std::string& parser_data = ""
149149
) {
150150
ParseResult result;
@@ -154,7 +154,7 @@ inline ParseResult parse(
154154
common_chat_parser_params syntax;
155155
syntax.format = format;
156156
syntax.reasoning_format = reasoning_format;
157-
syntax.thinking_forced_open = thinking_forced_open;
157+
syntax.generation_prompt = generation_prompt;
158158

159159
// Load serialized PEG parser if provided (required for PEG format models)
160160
if (!parser_data.empty()) {
@@ -222,7 +222,7 @@ inline ParseResult parse(
222222

223223
// Delegate to explicit-format overload
224224
return parse(output, params.format, COMMON_REASONING_FORMAT_NONE, is_partial,
225-
params.thinking_forced_open);
225+
params.generation_prompt);
226226

227227
} catch (const std::exception& e) {
228228
LLOYAL_LOG_DEBUG("[chat_out::parse] Auto-detect failed: %s", e.what());

tests/chat_in_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,5 +368,5 @@ TEST_CASE("ChatIn: format returns default format when no tools") {
368368
CHECK(result.preserved_tokens.empty());
369369
CHECK(result.parser.empty());
370370
CHECK(!result.grammar_lazy);
371-
CHECK(!result.thinking_forced_open);
371+
CHECK(result.generation_prompt.empty());
372372
}

tests/chat_out_test.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,13 @@ TEST_CASE("ChatOut: parse empty string") {
8181
CHECK(result.tool_calls.empty());
8282
}
8383

84-
TEST_CASE("ChatOut: parse with thinking_forced_open") {
84+
TEST_CASE("ChatOut: parse with generation_prompt") {
8585
auto result = lloyal::chat_out::parse(
8686
"still thinking",
8787
COMMON_CHAT_FORMAT_DEEPSEEK_R1,
8888
COMMON_REASONING_FORMAT_DEEPSEEK,
89-
false, // is_partial
90-
true // thinking_forced_open
89+
false, // is_partial
90+
"<think>" // generation_prompt
9191
);
9292

9393
// Stub passthrough — just verify no crash

tests/integration/chat_out_integration_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ TEST_CASE("ChatOut Integration: parse with format from chat_in roundtrip") {
9393
fmt.format,
9494
fmt.reasoning_format,
9595
false,
96-
fmt.thinking_forced_open,
96+
fmt.generation_prompt,
9797
fmt.parser
9898
);
9999

tests/stubs/chat.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ struct common_chat_params {
111111
std::string prompt;
112112
std::string grammar;
113113
bool grammar_lazy = false;
114-
bool thinking_forced_open = false;
114+
std::string generation_prompt;
115115
std::vector<common_grammar_trigger> grammar_triggers;
116116
std::vector<std::string> preserved_tokens;
117117
std::vector<std::string> additional_stops;
@@ -123,14 +123,14 @@ struct common_chat_parser_params {
123123
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
124124
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
125125
bool reasoning_in_content = false;
126-
bool thinking_forced_open = false;
126+
std::string generation_prompt;
127127
bool parse_tool_calls = true;
128128
common_peg_arena parser = {};
129129

130130
common_chat_parser_params() = default;
131131
common_chat_parser_params(const common_chat_params& chat_params) {
132132
format = chat_params.format;
133-
thinking_forced_open = chat_params.thinking_forced_open;
133+
generation_prompt = chat_params.generation_prompt;
134134
}
135135
};
136136

0 commit comments

Comments
 (0)