22 * liblloyal-node TypeScript Definitions
33 *
44 * N-API bindings for liblloyal - Node.js native addon for llama.cpp inference
5+ *
6+ * @categoryDescription Core
7+ * Entry points, context lifecycle, and the main inference interface.
8+ *
9+ * @categoryDescription Sampling
10+ * Sampler chain configuration — temperature, penalties, nucleus sampling, and advanced filters.
11+ *
12+ * @categoryDescription Chat
13+ * Chat template formatting, output parsing, tool calls, and reasoning extraction.
14+ *
15+ * @categoryDescription Branching
16+ * Parallel and tree-structured generation with batched GPU dispatch.
517 */
618
719/**
1426 *
1527 * If the requested variant is unavailable (package not installed or
1628 * runtime libraries missing), loading automatically falls back to CPU.
29+ *
30+ * @category Core
1731 */
1832export type GpuVariant = 'default' | 'cuda' | 'vulkan' ;
1933
@@ -22,6 +36,8 @@ export type GpuVariant = 'default' | 'cuda' | 'vulkan';
2236 *
2337 * Controls which native binary variant is loaded when creating a context.
2438 * Use this for explicit GPU variant selection with automatic fallback.
39+ *
40+ * @category Core
2541 */
2642export interface LoadOptions {
2743 /**
@@ -48,6 +64,8 @@ export interface LoadOptions {
4864
4965/**
5066 * Pooling type for embedding extraction
67+ *
68+ * @category Core
5169 */
5270export enum PoolingType {
5371 /** No pooling - raw per-token embeddings */
@@ -76,6 +94,8 @@ export enum PoolingType {
7694 * - **nSeqMax**: Set ≥ your max concurrent branch count + 1 (root sequence).
7795 * Each sequence shares the same KV cache memory pool — cost is metadata only
7896 * under unified KV, not a per-sequence memory multiplier.
97+ *
98+ * @category Core
7999 */
80100export interface ContextOptions {
81101 /** Path to .gguf model file */
@@ -137,6 +157,8 @@ export interface ContextOptions {
137157 *
138158 * Only commonly-used values are listed. The full set matches llama.cpp's
139159 * `common_chat_format` enum (30+ formats).
160+ *
161+ * @category Chat
140162 */
141163export enum ChatFormat {
142164 /** Plain content, no special formatting */
@@ -152,6 +174,8 @@ export enum ChatFormat {
152174 *
153175 * @see {@link FormatChatOptions.reasoningFormat } for input-side usage
154176 * @see {@link ParseChatOutputOptions.reasoningFormat } for output-side usage
177+ *
178+ * @category Chat
155179 */
156180export enum ReasoningFormat {
157181 /** No reasoning extraction (default) */
@@ -171,6 +195,8 @@ export enum ReasoningFormat {
171195 *
172196 * @see {@link GrammarTrigger }
173197 * @see {@link FormattedChatResult.grammarTriggers }
198+ *
199+ * @category Chat
174200 */
175201export enum GrammarTriggerType {
176202 /** Trigger on a specific token ID */
@@ -197,6 +223,8 @@ export enum GrammarTriggerType {
197223 * reasoningFormat: 'auto',
198224 * });
199225 * ```
226+ *
227+ * @category Chat
200228 */
201229export interface FormatChatOptions {
202230 /** Custom Jinja2 template override (bypasses model's built-in template) */
@@ -264,6 +292,8 @@ export interface FormatChatOptions {
264292 * Defines conditions for lazy grammar activation. When `grammarLazy` is true
265293 * in {@link FormattedChatResult}, generation runs unconstrained until one of
266294 * these triggers fires, at which point the grammar is activated.
295+ *
296+ * @category Chat
267297 */
268298export interface GrammarTrigger {
269299 /** Trigger type */
@@ -294,6 +324,8 @@ export interface GrammarTrigger {
294324 * ```
295325 *
296326 * @see {@link SessionContext.parseChatOutput }
327+ *
328+ * @category Chat
297329 */
298330export interface FormattedChatResult {
299331 /** Formatted prompt string ready for tokenization */
@@ -335,6 +367,8 @@ export interface FormattedChatResult {
335367 * fields from {@link FormattedChatResult}.
336368 *
337369 * @see {@link FormattedChatResult }
370+ *
371+ * @category Chat
338372 */
339373export interface ParseChatOutputOptions {
340374 /**
@@ -367,6 +401,8 @@ export interface ParseChatOutputOptions {
367401 * await executeTool(tc.name, args);
368402 * }
369403 * ```
404+ *
405+ * @category Chat
370406 */
371407export interface ParsedToolCall {
372408 /** Tool/function name */
@@ -392,6 +428,8 @@ export interface ParsedToolCall {
392428 * console.log(result.content);
393429 * }
394430 * ```
431+ *
432+ * @category Chat
395433 */
396434export interface ParseChatOutputResult {
397435 /** Main response text */
@@ -410,6 +448,8 @@ export interface ParseChatOutputResult {
410448
411449/**
412450 * Penalty parameters for repetition control
451+ *
452+ * @category Sampling
413453 */
414454export interface PenaltyParams {
415455 /** Repetition penalty (1.0 = disabled, >1.0 = penalize repeats) */
@@ -433,6 +473,8 @@ export interface PenaltyParams {
433473 * where temperature alone produces inconsistent quality.
434474 *
435475 * Use Mirostat v2 (mode: 2) for most cases - it's more stable than v1.
476+ *
477+ * @category Sampling
436478 */
437479export interface MirostatParams {
438480 /** Mirostat mode (0 = disabled, 1 = v1, 2 = v2). Recommended: 2 */
@@ -451,6 +493,8 @@ export interface MirostatParams {
451493 * Penalizes repetition of token sequences, more sophisticated than
452494 * simple repetition penalty. Useful for reducing loops and redundancy
453495 * in generated text.
496+ *
497+ * @category Sampling
454498 */
455499export interface DryParams {
456500 /** Penalty strength (0.0 = disabled, higher = stronger penalty) */
@@ -471,6 +515,8 @@ export interface DryParams {
471515 *
472516 * Excludes very high probability tokens to increase output diversity.
473517 * Useful when model is overly confident and produces repetitive text.
518+ *
519+ * @category Sampling
474520 */
475521export interface XtcParams {
476522 /** Probability of applying XTC (0.0 = disabled, 1.0 = always). Typical: 0.1 */
@@ -482,6 +528,8 @@ export interface XtcParams {
482528
483529/**
484530 * Advanced sampling parameters
531+ *
532+ * @category Sampling
485533 */
486534export interface AdvancedSamplingParams {
487535 /** Locally typical sampling (1.0 = disabled) */
@@ -518,6 +566,8 @@ export interface AdvancedSamplingParams {
518566 * - Balanced: `{ temperature: 0.7 }`
519567 * - Creative: `{ temperature: 1.0 }`
520568 * - Deterministic greedy: `{ temperature: 0, topK: 0, topP: 1.0, minP: 0 }`
569+ *
570+ * @category Sampling
521571 */
522572export interface SamplingParams {
523573 // ===== COMMON CONTROLS =====
@@ -582,6 +632,8 @@ export interface SamplingParams {
582632 *
583633 * Use {@link createContext} to initialize, and `dispose()` when done to free
584634 * GPU/CPU memory.
635+ *
636+ * @category Core
585637 */
586638export interface SessionContext {
587639 // ===== THE GENERATION LOOP =====
@@ -1883,6 +1935,8 @@ export interface SessionContext {
18831935 * { gpuVariant: 'cuda' }
18841936 * );
18851937 * ```
1938+ *
1939+ * @category Core
18861940 */
18871941export function createContext (
18881942 options : ContextOptions ,
@@ -1932,6 +1986,8 @@ export function createContext(
19321986 * // Create context from loaded binary
19331987 * const ctx = await binary.createContext({ modelPath: './model.gguf' });
19341988 * ```
1989+ *
1990+ * @category Core
19351991 */
19361992export function loadBinary ( variant ?: GpuVariant ) : {
19371993 createContext ( options : ContextOptions ) : Promise < SessionContext > ;
@@ -1993,6 +2049,8 @@ export function loadBinary(variant?: GpuVariant): {
19932049 * return logits[0];
19942050 * });
19952051 * ```
2052+ *
2053+ * @category Core
19962054 */
19972055export function withLogits < T > (
19982056 ctx : SessionContext ,
@@ -2001,6 +2059,8 @@ export function withLogits<T>(
20012059
20022060/**
20032061 * Result from Branch.produce()
2062+ *
2063+ * @category Branching
20042064 */
20052065export interface Produced {
20062066 /** Sampled token ID */
@@ -2051,6 +2111,8 @@ export interface Produced {
20512111 * const best = candidates.reduce((a, b) => a.perplexity < b.perplexity ? a : b);
20522112 * for (const c of candidates) { if (c !== best) c.prune(); }
20532113 * ```
2114+ *
2115+ * @category Branching
20542116 */
20552117export class Branch {
20562118 /**
@@ -2342,6 +2404,8 @@ export class Branch {
23422404 * ]);
23432405 * // Bin-packed into ceil(1012 / nBatch) GPU dispatches
23442406 * ```
2407+ *
2408+ * @category Branching
23452409 */
23462410export class BranchStore {
23472411 constructor ( ctx : SessionContext ) ;
0 commit comments