llamastack · stainless-app · Feb 5, 2026 · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.5.0-alpha.2"
+  ".": "0.5.0-alpha.3"
 }
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 108
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-958e990011d6b4c27513743a151ec4c80c3103650a80027380d15f1d6b108e32.yml
-openapi_spec_hash: 5b49d825dbc2a26726ca752914a65114
-config_hash: 19b84a0a93d566334ae134dafc71991f
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-621e8b8ef37d5ebf024fe3bf6a59486a90debf01acca2c9bb4e9032e2dff92d3.yml
+openapi_spec_hash: 51f623cd3ea4addf8f939dd4ef8962c8
+config_hash: 6aa61d4143c3e3df785972c0287d1370
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,34 @@
 # Changelog
 
+## 0.5.0-alpha.3 (2026-02-19)
+
+Full Changelog: [v0.5.0-alpha.2...v0.5.0-alpha.3](https://github.com/llamastack/llama-stack-client-typescript/compare/v0.5.0-alpha.2...v0.5.0-alpha.3)
+
+### ⚠ BREAKING CHANGES
+
+* improve consistency of post-training API endpoints
+
+### Features
+
+* Add prompt_cache_key parameter support ([e6399ef](https://github.com/llamastack/llama-stack-client-typescript/commit/e6399ef9e8b34b6cf3238c4c8f8b7d0134a8788e))
+* add support for /responses background parameter ([c7e9581](https://github.com/llamastack/llama-stack-client-typescript/commit/c7e9581d7df28f5c97dd9ecc903c942f33972649))
+* Add truncation parameter support ([3119d6b](https://github.com/llamastack/llama-stack-client-typescript/commit/3119d6bb8a1d38d6fa1958daeed0fb5dbb12b1e1))
+* improve consistency of post-training API endpoints ([93376fc](https://github.com/llamastack/llama-stack-client-typescript/commit/93376fcbe864d4e8becd957301e6df15b3d803f8))
+* **vector_io:** Implement Contextual Retrieval for improved RAG search quality ([abf9c27](https://github.com/llamastack/llama-stack-client-typescript/commit/abf9c27dcee824cd87f5b1889c456d99a7a94fb9))
+
+
+### Bug Fixes
+
+* align chat completion usage schema with OpenAI spec ([6fa6eb8](https://github.com/llamastack/llama-stack-client-typescript/commit/6fa6eb87d96902586d4feb82747a5cdf99622045))
+* **inference:** use flat response message model for chat/completions ([d7033cd](https://github.com/llamastack/llama-stack-client-typescript/commit/d7033cd2af32a823514135971b3775037992393c))
+* **vector_io:** align Protocol signatures with request models ([11a1a20](https://github.com/llamastack/llama-stack-client-typescript/commit/11a1a20e82bae62f8210cb26091d3df8bf648f15))
+
+
+### Chores
+
+* **api:** minor updates ([1286b39](https://github.com/llamastack/llama-stack-client-typescript/commit/1286b396a69ae48b41ad125405f005264339116d))
+* **internal:** avoid type checking errors with ts-reset ([2cffe20](https://github.com/llamastack/llama-stack-client-typescript/commit/2cffe2040a77c3310db5d1803fc3800b4dd72f28))
+
 ## 0.5.0-alpha.2 (2026-02-05)
 
 Full Changelog: [v0.4.0-alpha.7...v0.5.0-alpha.2](https://github.com/llamastack/llama-stack-client-typescript/compare/v0.4.0-alpha.7...v0.5.0-alpha.2)

diff --git a/api.md b/api.md
@@ -407,9 +407,9 @@ Types:
 Methods:
 
 - <code title="get /v1alpha/post-training/jobs">client.alpha.postTraining.job.<a href="./src/resources/alpha/post-training/job.ts">list</a>() -> JobListResponse</code>
-- <code title="get /v1alpha/post-training/job/artifacts">client.alpha.postTraining.job.<a href="./src/resources/alpha/post-training/job.ts">artifacts</a>() -> JobArtifactsResponse</code>
-- <code title="post /v1alpha/post-training/job/cancel">client.alpha.postTraining.job.<a href="./src/resources/alpha/post-training/job.ts">cancel</a>() -> void</code>
-- <code title="get /v1alpha/post-training/job/status">client.alpha.postTraining.job.<a href="./src/resources/alpha/post-training/job.ts">status</a>() -> JobStatusResponse</code>
+- <code title="get /v1alpha/post-training/jobs/{job_uuid}/artifacts">client.alpha.postTraining.job.<a href="./src/resources/alpha/post-training/job.ts">artifacts</a>(jobUuid) -> JobArtifactsResponse</code>
+- <code title="post /v1alpha/post-training/jobs/{job_uuid}/cancel">client.alpha.postTraining.job.<a href="./src/resources/alpha/post-training/job.ts">cancel</a>(jobUuid) -> void</code>
+- <code title="get /v1alpha/post-training/jobs/{job_uuid}/status">client.alpha.postTraining.job.<a href="./src/resources/alpha/post-training/job.ts">status</a>(jobUuid) -> JobStatusResponse</code>
 
 ## Benchmarks
 

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "llama-stack-client",
-  "version": "0.5.0-alpha.2",
+  "version": "0.5.0-alpha.3",
   "description": "The official TypeScript library for the Llama Stack Client API",
   "author": "Llama Stack Client <llamastack@meta.com>",
   "types": "dist/index.d.ts",

diff --git a/src/resources/alpha/post-training/job.ts b/src/resources/alpha/post-training/job.ts
@@ -23,15 +23,15 @@ export class Job extends APIResource {
   /**
    * Get the artifacts of a training job.
    */
-  artifacts(options?: Core.RequestOptions): Core.APIPromise<JobArtifactsResponse> {
-    return this._client.get('/v1alpha/post-training/job/artifacts', options);
+  artifacts(jobUuid: string, options?: Core.RequestOptions): Core.APIPromise<JobArtifactsResponse> {
+    return this._client.get(`/v1alpha/post-training/jobs/${jobUuid}/artifacts`, options);
   }
 
   /**
    * Cancel a training job.
    */
-  cancel(options?: Core.RequestOptions): Core.APIPromise<void> {
-    return this._client.post('/v1alpha/post-training/job/cancel', {
+  cancel(jobUuid: string, options?: Core.RequestOptions): Core.APIPromise<void> {
+    return this._client.post(`/v1alpha/post-training/jobs/${jobUuid}/cancel`, {
       ...options,
       headers: { Accept: '*/*', ...options?.headers },
     });
@@ -40,8 +40,8 @@ export class Job extends APIResource {
   /**
    * Get the status of a training job.
    */
-  status(options?: Core.RequestOptions): Core.APIPromise<JobStatusResponse> {
-    return this._client.get('/v1alpha/post-training/job/status', options);
+  status(jobUuid: string, options?: Core.RequestOptions): Core.APIPromise<JobStatusResponse> {
+    return this._client.get(`/v1alpha/post-training/jobs/${jobUuid}/status`, options);
   }
 }
 

diff --git a/src/resources/chat/chat.ts b/src/resources/chat/chat.ts
@@ -53,7 +53,12 @@ export interface ChatCompletionChunk {
   object?: 'chat.completion.chunk';
 
   /**
-   * Usage information for OpenAI chat completion.
+   * The service tier that was used for this response.
+   */
+  service_tier?: string | null;
+
+  /**
+   * Token usage information (typically included in final chunk with stream_options).
    */
   usage?: ChatCompletionChunk.Usage | null;
 }
@@ -278,54 +283,54 @@ export namespace ChatCompletionChunk {
   }
 
   /**
-   * Usage information for OpenAI chat completion.
+   * Token usage information (typically included in final chunk with stream_options).
    */
   export interface Usage {
     /**
      * Number of tokens in the completion.
      */
-    completion_tokens: number;
+    completion_tokens?: number;
 
     /**
-     * Number of tokens in the prompt.
+     * Detailed breakdown of output token usage.
      */
-    prompt_tokens: number;
+    completion_tokens_details?: Usage.CompletionTokensDetails;
 
     /**
-     * Total tokens used (prompt + completion).
+     * Number of tokens in the prompt.
      */
-    total_tokens: number;
+    prompt_tokens?: number;
 
     /**
-     * Token details for output tokens in OpenAI chat completion usage.
+     * Detailed breakdown of input token usage.
      */
-    completion_tokens_details?: Usage.CompletionTokensDetails | null;
+    prompt_tokens_details?: Usage.PromptTokensDetails;
 
     /**
-     * Token details for prompt tokens in OpenAI chat completion usage.
+     * Total tokens used (prompt + completion).
      */
-    prompt_tokens_details?: Usage.PromptTokensDetails | null;
+    total_tokens?: number;
   }
 
   export namespace Usage {
     /**
-     * Token details for output tokens in OpenAI chat completion usage.
+     * Detailed breakdown of output token usage.
      */
     export interface CompletionTokensDetails {
       /**
        * Number of tokens used for reasoning (o1/o3 models).
        */
-      reasoning_tokens?: number | null;
+      reasoning_tokens?: number;
     }
 
     /**
-     * Token details for prompt tokens in OpenAI chat completion usage.
+     * Detailed breakdown of input token usage.
      */
     export interface PromptTokensDetails {
       /**
        * Number of tokens retrieved from cache.
        */
-      cached_tokens?: number | null;
+      cached_tokens?: number;
     }
   }
 }