Skip to content

Commit c11c927

Browse files
fix: stream serialization of large NMRium state objects (#101)
* fix: stream serialization of large NMRium state objects refactor: include the meta and info object in case of serialize as dataSource * refactor: stream nmr-cli output as JSON response in parse-spectra endpoint
1 parent 5d89678 commit c11c927

File tree

5 files changed

+148
-72
lines changed

5 files changed

+148
-72
lines changed

app/routers/spectra.py

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ class UrlParseRequest(BaseModel):
4848
False,
4949
description="Enable ranges and zones automatic detection",
5050
)
51+
raw_data: bool = Field(
52+
False, description="Include raw data in the output (default: data source)")
5153

5254
model_config = {
5355
"json_schema_extra": {
@@ -92,8 +94,9 @@ def run_command(
9294
capture_snapshot: bool = False,
9395
auto_processing: bool = False,
9496
auto_detection: bool = False,
95-
) -> dict:
96-
"""Execute nmr-cli command in Docker container"""
97+
raw_data: bool = False,
98+
) -> StreamingResponse:
99+
"""Execute nmr-cli parse-spectra command in Docker container."""
97100

98101
cmd = ["nmr-cli", "parse-spectra"]
99102

@@ -108,41 +111,33 @@ def run_command(
108111
cmd.append("-p")
109112
if auto_detection:
110113
cmd.append("-d")
114+
if raw_data:
115+
cmd.append("-r")
111116

112117
try:
113118
result = subprocess.run(
114119
["docker", "exec", NMR_CLI_CONTAINER] + cmd,
115120
capture_output=True,
116-
text=False,
117-
timeout=120
121+
timeout=120,
118122
)
119123
except subprocess.TimeoutExpired:
120124
raise HTTPException(
121-
status_code=408,
122-
detail="Processing timeout exceeded"
123-
)
125+
status_code=408, detail="Processing timeout exceeded")
124126
except FileNotFoundError:
125127
raise HTTPException(
126-
status_code=500,
127-
detail="Docker not found or nmr-converter container not running."
128-
)
128+
status_code=500, detail="Docker not found or nmr-converter container not running.")
129129

130130
if result.returncode != 0:
131-
error_msg = result.stderr.decode(
132-
"utf-8") if result.stderr else "Unknown error"
133131
raise HTTPException(
134132
status_code=422,
135-
detail=f"NMR CLI error: {error_msg}"
133+
detail=f"NMR CLI error: {result.stderr.decode('utf-8') or 'Unknown error'}",
136134
)
137135

138-
# Parse output
139-
try:
140-
return json.loads(result.stdout.decode("utf-8"))
141-
except json.JSONDecodeError as e:
142-
raise HTTPException(
143-
status_code=500,
144-
detail=f"Invalid JSON from NMR CLI: {e}"
145-
)
136+
return StreamingResponse(
137+
io.BytesIO(result.stdout),
138+
media_type="application/json",
139+
headers={"Content-Disposition": "attachment; filename=parse-output.json"},
140+
)
146141

147142

148143
def run_publication_string_command(publication_string: str) -> dict:
@@ -229,16 +224,20 @@ def remove_file_from_container(container_path: str) -> None:
229224
class PeakItem(BaseModel):
230225
"""A single NMR peak."""
231226
x: float = Field(..., description="Chemical shift in ppm")
232-
y: Optional[float] = Field(1.0, description="Peak intensity (default: 1.0)")
233-
width: Optional[float] = Field(1.0, description="Peak width in Hz (default: 1.0)")
227+
y: Optional[float] = Field(
228+
1.0, description="Peak intensity (default: 1.0)")
229+
width: Optional[float] = Field(
230+
1.0, description="Peak width in Hz (default: 1.0)")
234231

235232

236233
class PeaksToNMRiumOptions(BaseModel):
237234
"""Options for peaks-to-NMRium conversion."""
238-
nucleus: Optional[str] = Field("1H", description="Nucleus type (e.g. '1H', '13C')")
235+
nucleus: Optional[str] = Field(
236+
"1H", description="Nucleus type (e.g. '1H', '13C')")
239237
solvent: Optional[str] = Field("", description="NMR solvent")
240238
frequency: Optional[float] = Field(400, description="NMR frequency in MHz")
241-
nbPoints: Optional[int] = Field(131072, description="Number of points for spectrum generation", alias="nb_points")
239+
nbPoints: Optional[int] = Field(
240+
131072, description="Number of points for spectrum generation", alias="nb_points")
242241

243242
model_config = {"populate_by_name": True}
244243

@@ -276,7 +275,8 @@ class PeaksToNMRiumRequest(BaseModel):
276275
def run_peaks_to_nmrium_command(payload: dict) -> str:
277276
"""Execute nmr-cli peaks-to-nmrium command in Docker container via stdin."""
278277

279-
cmd = ["docker", "exec", "-i", NMR_CLI_CONTAINER, "nmr-cli", "peaks-to-nmrium"]
278+
cmd = ["docker", "exec", "-i", NMR_CLI_CONTAINER,
279+
"nmr-cli", "peaks-to-nmrium"]
280280
stdin_data = json.dumps(payload)
281281

282282
try:
@@ -298,7 +298,8 @@ def run_peaks_to_nmrium_command(payload: dict) -> str:
298298
)
299299

300300
if result.returncode != 0:
301-
error_msg = result.stderr.decode("utf-8") if result.stderr else "Unknown error"
301+
error_msg = result.stderr.decode(
302+
"utf-8") if result.stderr else "Unknown error"
302303
raise HTTPException(
303304
status_code=422,
304305
detail=f"NMR CLI error: {error_msg}",
@@ -344,7 +345,8 @@ def run_peaks_to_nmrium_command(payload: dict) -> str:
344345
},
345346
)
346347
async def parse_spectra_from_file(
347-
file: UploadFile = File(..., description="NMR spectra file to parse (JCAMP-DX, Bruker zip, etc.)"),
348+
file: UploadFile = File(
349+
..., description="NMR spectra file to parse (JCAMP-DX, Bruker zip, etc.)"),
348350
capture_snapshot: bool = Form(
349351
False,
350352
description="Generate an image snapshot of the spectra",
@@ -357,6 +359,8 @@ async def parse_spectra_from_file(
357359
False,
358360
description="Enable ranges and zones automatic detection",
359361
),
362+
raw_data: bool = Form(
363+
False, description="Include raw data in the output (default: data source references)")
360364
):
361365
"""
362366
## Parse spectra from an uploaded file
@@ -369,7 +373,7 @@ async def parse_spectra_from_file(
369373
| `capture_snapshot` | Capture an image snapshot of the spectra |
370374
| `auto_processing` | Automatically process FID → FT spectra |
371375
| `auto_detection` | Automatically detect ranges and zones |
372-
376+
| `raw_data` | Include raw data in the output (default: data source) |
373377
### Returns
374378
Parsed spectra data in NMRium-compatible JSON format.
375379
"""
@@ -398,6 +402,7 @@ async def parse_spectra_from_file(
398402
capture_snapshot=capture_snapshot,
399403
auto_processing=auto_processing,
400404
auto_detection=auto_detection,
405+
raw_data=raw_data,
401406
)
402407

403408
except HTTPException:
@@ -445,6 +450,7 @@ async def parse_spectra_from_url(request: UrlParseRequest):
445450
| `capture_snapshot` | Capture an image snapshot of the spectra |
446451
| `auto_processing` | Automatically process FID → FT spectra |
447452
| `auto_detection` | Automatically detect ranges and zones |
453+
| `raw_data` | Include raw data in the output (default: data source) |
448454
449455
### Returns
450456
Parsed spectra data in NMRium-compatible JSON format.
@@ -455,6 +461,7 @@ async def parse_spectra_from_url(request: UrlParseRequest):
455461
capture_snapshot=request.capture_snapshot,
456462
auto_processing=request.auto_processing,
457463
auto_detection=request.auto_detection,
464+
raw_data=request.raw_data,
458465
)
459466

460467
except HTTPException:

app/scripts/nmr-cli/package-lock.json

Lines changed: 11 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

app/scripts/nmr-cli/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"@zakodium/nmrium-core-plugins": "^0.6.25",
2121
"axios": "^1.13.2",
2222
"file-collection": "^6.5.0",
23+
"json-stream-stringify": "^3.1.6",
2324
"lodash.merge": "^4.6.2",
2425
"mf-parser": "^3.6.0",
2526
"ml-spectra-processing": "^14.19.0",

app/scripts/nmr-cli/src/index.ts

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
#!/usr/bin/env node
22
import yargs, { type Argv, type CommandModule, type Options } from 'yargs'
3-
import { loadSpectrumFromURL, loadSpectrumFromFilePath } from './parse/prase-spectra'
3+
import { parseSpectra } from './parse/prase-spectra'
44
import { generateSpectrumFromPublicationString } from './publication-string'
55
import { generateNMRiumFromPeaks } from './peaks-to-nmrium'
66
import type { PeaksToNMRiumInput } from './peaks-to-nmrium'
77
import { hideBin } from 'yargs/helpers'
88
import { parsePredictionCommand } from './prediction'
99
import { readFileSync } from 'fs'
10+
import { IncludeData } from '@zakodium/nmrium-core'
1011

1112
const usageMessage = `
1213
Usage: nmr-cli <command> [options]
@@ -23,7 +24,9 @@ Options for 'parse-spectra' command:
2324
-s, --capture-snapshot Capture snapshot
2425
-p, --auto-processing Automatic processing of spectrum (FID → FT spectra).
2526
-d, --auto-detection Enable ranges and zones automatic detection.
26-
27+
-o, --output Output file path (optional)
28+
-r, --raw-data Include raw data in the output instead of data source
29+
2730
Arguments for 'parse-publication-string' command:
2831
publicationString Publication string
2932
@@ -119,6 +122,17 @@ export interface FileOptionsArgs {
119122
* Perform automatic ranges and zones detection.
120123
*/
121124
d?: boolean;
125+
/**
126+
* -o, --output
127+
* Output file path
128+
*/
129+
o?: string;
130+
/**
131+
* -r, --raw-data
132+
* Include raw data in the output, defaults to dataSource
133+
*/
134+
r?: boolean;
135+
122136
}
123137

124138
// Define options for parsing a spectra file
@@ -150,6 +164,17 @@ const fileOptions: { [key in keyof FileOptionsArgs]: Options } = {
150164
describe: 'Ranges and zones auto detection',
151165
type: 'boolean',
152166
},
167+
o: {
168+
alias: 'output',
169+
type: 'string',
170+
description: 'Output file path',
171+
},
172+
r: {
173+
alias: 'raw-data',
174+
type: 'boolean',
175+
default: false,
176+
description: 'Include raw data in the output (default: dataSource)',
177+
},
153178
} as const
154179

155180
const parseFileCommand: CommandModule<{}, FileOptionsArgs> = {
@@ -161,22 +186,7 @@ const parseFileCommand: CommandModule<{}, FileOptionsArgs> = {
161186
.conflicts('u', 'dir') as Argv<FileOptionsArgs>
162187
},
163188
handler: argv => {
164-
165-
const { u, dir } = argv;
166-
// Handle parsing the spectra file logic based on argv options
167-
if (u) {
168-
loadSpectrumFromURL({ u, ...argv }).then(result => {
169-
console.log(JSON.stringify(result))
170-
})
171-
}
172-
173-
174-
if (dir) {
175-
loadSpectrumFromFilePath({ dir, ...argv }).then(result => {
176-
console.log(JSON.stringify(result))
177-
})
178-
}
179-
189+
parseSpectra(argv)
180190
},
181191
}
182192

0 commit comments

Comments
 (0)