Skip to content

Commit 5991017

Browse files
refactor: improve paragraph chunking logic in content chunker
1 parent 91ae5bb commit 5991017

1 file changed

Lines changed: 18 additions & 17 deletions

File tree

build/utils/markdown-chunker.ts

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -350,23 +350,24 @@ export function chunkContent(
350350
currentLines = 0;
351351
}
352352
}
353-
} else if (block.type === 'paragraph' && block.estimatedLines > linesPerPage) {
354-
if (currentChunk.trim()) {
355-
chunks.push(currentChunk.trim());
356-
currentChunk = '';
357-
currentLines = 0;
358-
}
359-
360-
const sentences = block.content.match(/[^.!?]+[.!?]+/g) || [block.content];
361-
for (const sentence of sentences) {
362-
const sentenceLines = Math.ceil(sentence.length / charsPerLine);
363-
if (currentLines + sentenceLines > linesPerPage && currentChunk.trim()) {
364-
chunks.push(currentChunk.trim());
365-
currentChunk = sentence;
366-
currentLines = sentenceLines;
367-
} else {
368-
currentChunk += sentence;
369-
currentLines += sentenceLines;
353+
} else if (block.type === 'paragraph') {
354+
const remainingSpace = linesPerPage - currentLines;
355+
356+
if (remainingSpace >= block.estimatedLines) {
357+
currentChunk += (currentChunk ? '\n\n' : '') + block.content;
358+
currentLines += block.estimatedLines;
359+
} else {
360+
const sentences = block.content.match(/[^.!?]+[.!?]+/g) || [block.content];
361+
for (const sentence of sentences) {
362+
const sentenceLines = Math.ceil(sentence.length / charsPerLine);
363+
if (currentLines + sentenceLines > linesPerPage && currentChunk.trim()) {
364+
chunks.push(currentChunk.trim());
365+
currentChunk = sentence.trim();
366+
currentLines = sentenceLines;
367+
} else {
368+
currentChunk += (currentChunk && !currentChunk.endsWith('\n\n') ? '' : '') + sentence;
369+
currentLines += sentenceLines;
370+
}
370371
}
371372
}
372373
} else {

0 commit comments

Comments
 (0)