Skip to content

Commit e618f21

Browse files
feat(docs): re-org
1 parent 400c2c0 commit e618f21

2 files changed

Lines changed: 68 additions & 9 deletions

File tree

lib/index.d.ts

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22
* liblloyal-node TypeScript Definitions
33
*
44
* N-API bindings for liblloyal - Node.js native addon for llama.cpp inference
5+
*
6+
* @categoryDescription Core
7+
* Entry points, context lifecycle, and the main inference interface.
8+
*
9+
* @categoryDescription Sampling
10+
* Sampler chain configuration — temperature, penalties, nucleus sampling, and advanced filters.
11+
*
12+
* @categoryDescription Chat
13+
* Chat template formatting, output parsing, tool calls, and reasoning extraction.
14+
*
15+
* @categoryDescription Branching
16+
* Parallel and tree-structured generation with batched GPU dispatch.
517
*/
618

719
/**
@@ -14,6 +26,8 @@
1426
*
1527
* If the requested variant is unavailable (package not installed or
1628
* runtime libraries missing), loading automatically falls back to CPU.
29+
*
30+
* @category Core
1731
*/
1832
export type GpuVariant = 'default' | 'cuda' | 'vulkan';
1933

@@ -22,6 +36,8 @@ export type GpuVariant = 'default' | 'cuda' | 'vulkan';
2236
*
2337
* Controls which native binary variant is loaded when creating a context.
2438
* Use this for explicit GPU variant selection with automatic fallback.
39+
*
40+
* @category Core
2541
*/
2642
export interface LoadOptions {
2743
/**
@@ -48,6 +64,8 @@ export interface LoadOptions {
4864

4965
/**
5066
* Pooling type for embedding extraction
67+
*
68+
* @category Core
5169
*/
5270
export enum PoolingType {
5371
/** No pooling - raw per-token embeddings */
@@ -76,6 +94,8 @@ export enum PoolingType {
7694
* - **nSeqMax**: Set ≥ your max concurrent branch count + 1 (root sequence).
7795
* Each sequence shares the same KV cache memory pool — cost is metadata only
7896
* under unified KV, not a per-sequence memory multiplier.
97+
*
98+
* @category Core
7999
*/
80100
export interface ContextOptions {
81101
/** Path to .gguf model file */
@@ -137,6 +157,8 @@ export interface ContextOptions {
137157
*
138158
* Only commonly-used values are listed. The full set matches llama.cpp's
139159
* `common_chat_format` enum (30+ formats).
160+
*
161+
* @category Chat
140162
*/
141163
export enum ChatFormat {
142164
/** Plain content, no special formatting */
@@ -152,6 +174,8 @@ export enum ChatFormat {
152174
*
153175
* @see {@link FormatChatOptions.reasoningFormat} for input-side usage
154176
* @see {@link ParseChatOutputOptions.reasoningFormat} for output-side usage
177+
*
178+
* @category Chat
155179
*/
156180
export enum ReasoningFormat {
157181
/** No reasoning extraction (default) */
@@ -171,6 +195,8 @@ export enum ReasoningFormat {
171195
*
172196
* @see {@link GrammarTrigger}
173197
* @see {@link FormattedChatResult.grammarTriggers}
198+
*
199+
* @category Chat
174200
*/
175201
export enum GrammarTriggerType {
176202
/** Trigger on a specific token ID */
@@ -197,6 +223,8 @@ export enum GrammarTriggerType {
197223
* reasoningFormat: 'auto',
198224
* });
199225
* ```
226+
*
227+
* @category Chat
200228
*/
201229
export interface FormatChatOptions {
202230
/** Custom Jinja2 template override (bypasses model's built-in template) */
@@ -264,6 +292,8 @@ export interface FormatChatOptions {
264292
* Defines conditions for lazy grammar activation. When `grammarLazy` is true
265293
* in {@link FormattedChatResult}, generation runs unconstrained until one of
266294
* these triggers fires, at which point the grammar is activated.
295+
*
296+
* @category Chat
267297
*/
268298
export interface GrammarTrigger {
269299
/** Trigger type */
@@ -294,6 +324,8 @@ export interface GrammarTrigger {
294324
* ```
295325
*
296326
* @see {@link SessionContext.parseChatOutput}
327+
*
328+
* @category Chat
297329
*/
298330
export interface FormattedChatResult {
299331
/** Formatted prompt string ready for tokenization */
@@ -335,6 +367,8 @@ export interface FormattedChatResult {
335367
* fields from {@link FormattedChatResult}.
336368
*
337369
* @see {@link FormattedChatResult}
370+
*
371+
* @category Chat
338372
*/
339373
export interface ParseChatOutputOptions {
340374
/**
@@ -367,6 +401,8 @@ export interface ParseChatOutputOptions {
367401
* await executeTool(tc.name, args);
368402
* }
369403
* ```
404+
*
405+
* @category Chat
370406
*/
371407
export interface ParsedToolCall {
372408
/** Tool/function name */
@@ -392,6 +428,8 @@ export interface ParsedToolCall {
392428
* console.log(result.content);
393429
* }
394430
* ```
431+
*
432+
* @category Chat
395433
*/
396434
export interface ParseChatOutputResult {
397435
/** Main response text */
@@ -410,6 +448,8 @@ export interface ParseChatOutputResult {
410448

411449
/**
412450
* Penalty parameters for repetition control
451+
*
452+
* @category Sampling
413453
*/
414454
export interface PenaltyParams {
415455
/** Repetition penalty (1.0 = disabled, >1.0 = penalize repeats) */
@@ -433,6 +473,8 @@ export interface PenaltyParams {
433473
* where temperature alone produces inconsistent quality.
434474
*
435475
* Use Mirostat v2 (mode: 2) for most cases - it's more stable than v1.
476+
*
477+
* @category Sampling
436478
*/
437479
export interface MirostatParams {
438480
/** Mirostat mode (0 = disabled, 1 = v1, 2 = v2). Recommended: 2 */
@@ -451,6 +493,8 @@ export interface MirostatParams {
451493
* Penalizes repetition of token sequences, more sophisticated than
452494
* simple repetition penalty. Useful for reducing loops and redundancy
453495
* in generated text.
496+
*
497+
* @category Sampling
454498
*/
455499
export interface DryParams {
456500
/** Penalty strength (0.0 = disabled, higher = stronger penalty) */
@@ -471,6 +515,8 @@ export interface DryParams {
471515
*
472516
* Excludes very high probability tokens to increase output diversity.
473517
* Useful when model is overly confident and produces repetitive text.
518+
*
519+
* @category Sampling
474520
*/
475521
export interface XtcParams {
476522
/** Probability of applying XTC (0.0 = disabled, 1.0 = always). Typical: 0.1 */
@@ -482,6 +528,8 @@ export interface XtcParams {
482528

483529
/**
484530
* Advanced sampling parameters
531+
*
532+
* @category Sampling
485533
*/
486534
export interface AdvancedSamplingParams {
487535
/** Locally typical sampling (1.0 = disabled) */
@@ -518,6 +566,8 @@ export interface AdvancedSamplingParams {
518566
* - Balanced: `{ temperature: 0.7 }`
519567
* - Creative: `{ temperature: 1.0 }`
520568
* - Deterministic greedy: `{ temperature: 0, topK: 0, topP: 1.0, minP: 0 }`
569+
*
570+
* @category Sampling
521571
*/
522572
export interface SamplingParams {
523573
// ===== COMMON CONTROLS =====
@@ -582,6 +632,8 @@ export interface SamplingParams {
582632
*
583633
* Use {@link createContext} to initialize, and `dispose()` when done to free
584634
* GPU/CPU memory.
635+
*
636+
* @category Core
585637
*/
586638
export interface SessionContext {
587639
// ===== THE GENERATION LOOP =====
@@ -1883,6 +1935,8 @@ export interface SessionContext {
18831935
* { gpuVariant: 'cuda' }
18841936
* );
18851937
* ```
1938+
*
1939+
* @category Core
18861940
*/
18871941
export function createContext(
18881942
options: ContextOptions,
@@ -1932,6 +1986,8 @@ export function createContext(
19321986
* // Create context from loaded binary
19331987
* const ctx = await binary.createContext({ modelPath: './model.gguf' });
19341988
* ```
1989+
*
1990+
* @category Core
19351991
*/
19361992
export function loadBinary(variant?: GpuVariant): {
19371993
createContext(options: ContextOptions): Promise<SessionContext>;
@@ -1993,6 +2049,8 @@ export function loadBinary(variant?: GpuVariant): {
19932049
* return logits[0];
19942050
* });
19952051
* ```
2052+
*
2053+
* @category Core
19962054
*/
19972055
export function withLogits<T>(
19982056
ctx: SessionContext,
@@ -2001,6 +2059,8 @@ export function withLogits<T>(
20012059

20022060
/**
20032061
* Result from Branch.produce()
2062+
*
2063+
* @category Branching
20042064
*/
20052065
export interface Produced {
20062066
/** Sampled token ID */
@@ -2051,6 +2111,8 @@ export interface Produced {
20512111
* const best = candidates.reduce((a, b) => a.perplexity < b.perplexity ? a : b);
20522112
* for (const c of candidates) { if (c !== best) c.prune(); }
20532113
* ```
2114+
*
2115+
* @category Branching
20542116
*/
20552117
export class Branch {
20562118
/**
@@ -2342,6 +2404,8 @@ export class Branch {
23422404
* ]);
23432405
* // Bin-packed into ceil(1012 / nBatch) GPU dispatches
23442406
* ```
2407+
*
2408+
* @category Branching
23452409
*/
23462410
export class BranchStore {
23472411
constructor(ctx: SessionContext);

typedoc.json

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,18 @@
1717
],
1818
"navigation": {
1919
"includeCategories": true,
20-
"includeGroups": true
20+
"includeGroups": false
2121
},
22-
"categorizeByGroup": true,
22+
"categorizeByGroup": false,
2323
"defaultCategory": "Core",
2424
"categoryOrder": [
2525
"Core",
26-
"Configuration",
2726
"Sampling",
27+
"Chat",
28+
"Branching",
2829
"*"
2930
],
3031
"sort": ["kind", "instance-first", "required-first", "alphabetical"],
31-
"kindSortOrder": [
32-
"Function",
33-
"Interface",
34-
"Enum",
35-
"TypeAlias"
36-
],
3732
"excludePrivate": true,
3833
"excludeProtected": false,
3934
"excludeExternals": false,

0 commit comments

Comments
 (0)