117 lines
3.8 KiB
TypeScript
117 lines
3.8 KiB
TypeScript
// src/cli/helpers/markdownSplit.ts
|
|
// Markdown-aware content splitting for aggressive static promotion.
|
|
// Ported from Gemini CLI: packages/cli/src/ui/utils/markdownUtilities.ts
|
|
|
|
/**
|
|
* Checks if a given character index is inside a fenced code block (```).
|
|
* Counts fence markers before the index - odd count means inside a block.
|
|
* Only counts ``` at the start of a line (real markdown fences), not ones
|
|
* embedded in code like: content.indexOf("```")
|
|
*/
|
|
function isIndexInsideCodeBlock(content: string, indexToTest: number): boolean {
|
|
let fenceCount = 0;
|
|
let searchPos = 0;
|
|
while (searchPos < content.length) {
|
|
const nextFence = content.indexOf("```", searchPos);
|
|
if (nextFence === -1 || nextFence >= indexToTest) {
|
|
break;
|
|
}
|
|
// Only count as fence if at start of content or after a newline
|
|
if (nextFence === 0 || content[nextFence - 1] === "\n") {
|
|
fenceCount++;
|
|
}
|
|
searchPos = nextFence + 3;
|
|
}
|
|
return fenceCount % 2 === 1;
|
|
}
|
|
|
|
/**
|
|
* Finds the next fence marker (``` at start of line) starting from pos.
|
|
* Returns -1 if not found.
|
|
*/
|
|
function findNextLineFence(content: string, startPos: number): number {
|
|
let pos = startPos;
|
|
while (pos < content.length) {
|
|
const nextFence = content.indexOf("```", pos);
|
|
if (nextFence === -1) return -1;
|
|
// Only count as fence if at start of content or after a newline
|
|
if (nextFence === 0 || content[nextFence - 1] === "\n") {
|
|
return nextFence;
|
|
}
|
|
pos = nextFence + 3;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/**
|
|
* Finds the starting index of the code block that encloses the given index.
|
|
* Returns -1 if the index is not inside a code block.
|
|
*/
|
|
function findEnclosingCodeBlockStart(content: string, index: number): number {
|
|
if (!isIndexInsideCodeBlock(content, index)) {
|
|
return -1;
|
|
}
|
|
let currentSearchPos = 0;
|
|
while (currentSearchPos < index) {
|
|
const blockStartIndex = findNextLineFence(content, currentSearchPos);
|
|
if (blockStartIndex === -1 || blockStartIndex >= index) {
|
|
break;
|
|
}
|
|
const blockEndIndex = findNextLineFence(content, blockStartIndex + 3);
|
|
if (blockStartIndex < index) {
|
|
if (blockEndIndex === -1 || index < blockEndIndex + 3) {
|
|
return blockStartIndex;
|
|
}
|
|
}
|
|
if (blockEndIndex === -1) break;
|
|
currentSearchPos = blockEndIndex + 3;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
// Minimum content length before we consider splitting
|
|
// This prevents creating many tiny chunks which causes spacing issues
|
|
// Higher value = fewer splits = cleaner output but more content re-rendering
|
|
const MIN_SPLIT_LENGTH = 1500;
|
|
|
|
/**
|
|
* Finds the last safe split point in content (paragraph boundary not inside code block).
|
|
* Returns content.length if no safe split point found (meaning don't split).
|
|
*
|
|
* Used for aggressive static promotion during streaming - completed paragraphs
|
|
* can be committed to Ink's <Static> component to reduce flicker.
|
|
*/
|
|
export function findLastSafeSplitPoint(content: string): number {
|
|
// Don't split if content is too short - prevents excessive chunking
|
|
if (content.length < MIN_SPLIT_LENGTH) {
|
|
return content.length;
|
|
}
|
|
// If end of content is inside a code block, split before that block
|
|
const enclosingBlockStart = findEnclosingCodeBlockStart(
|
|
content,
|
|
content.length,
|
|
);
|
|
if (enclosingBlockStart !== -1) {
|
|
return enclosingBlockStart;
|
|
}
|
|
|
|
// Search for the last double newline (\n\n) not in a code block
|
|
let searchStartIndex = content.length;
|
|
while (searchStartIndex >= 0) {
|
|
const dnlIndex = content.lastIndexOf("\n\n", searchStartIndex);
|
|
if (dnlIndex === -1) {
|
|
break;
|
|
}
|
|
|
|
const potentialSplitPoint = dnlIndex + 2; // Split AFTER the \n\n
|
|
if (!isIndexInsideCodeBlock(content, potentialSplitPoint)) {
|
|
return potentialSplitPoint;
|
|
}
|
|
|
|
searchStartIndex = dnlIndex - 1;
|
|
}
|
|
|
|
// No safe split point found - don't split
|
|
return content.length;
|
|
}
|