diff --git a/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts b/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts index e8c3aa1ae8aa6..3c14bcf23b264 100644 --- a/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts +++ b/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts @@ -126,6 +126,30 @@ export function useProcessingState(): UseProcessingStateReturn { ); } + // Show input parsing progress when preparing + if ( + stateToUse.status === 'preparing' && + stateToUse.inputTokensProcessed !== undefined && + stateToUse.inputTokensTotal !== undefined && + stateToUse.inputTokensTotal > 0 + ) { + const inputPercent = Math.round( + (stateToUse.inputTokensProcessed / stateToUse.inputTokensTotal) * 100 + ); + details.push( + `Input: ${stateToUse.inputTokensProcessed}/${stateToUse.inputTokensTotal} (${inputPercent}%)` + ); + + // Show parsing tokens per second if available + if ( + currentConfig.showTokensPerSecond && + stateToUse.parsingTokensPerSecond !== undefined && + stateToUse.parsingTokensPerSecond > 0 + ) { + details.push(`${stateToUse.parsingTokensPerSecond.toFixed(1)} t/s`); + } + } + if (stateToUse.outputTokensUsed > 0) { // Handle infinite max_tokens (-1) case if (stateToUse.outputTokensMax <= 0) { @@ -144,7 +168,8 @@ export function useProcessingState(): UseProcessingStateReturn { if ( currentConfig.showTokensPerSecond && stateToUse.tokensPerSecond && - stateToUse.tokensPerSecond > 0 + stateToUse.tokensPerSecond > 0 && + stateToUse.status !== 'preparing' // Don't show generation t/s when parsing ) { details.push(`${stateToUse.tokensPerSecond.toFixed(1)} tokens/sec`); } diff --git a/tools/server/webui/src/lib/services/slots.ts b/tools/server/webui/src/lib/services/slots.ts index e99297d6a0506..27d31e0980cea 100644 --- a/tools/server/webui/src/lib/services/slots.ts +++ b/tools/server/webui/src/lib/services/slots.ts @@ -252,6 +252,11 @@ export class SlotsService { ? Math.round((promptProgress.processed / promptProgress.total) * 100) : undefined; + // Calculate parsing tokens per second from prompt_progress + const parsingTokensPerSecond = promptProgress && promptProgress.time_ms > 0 + ? (promptProgress.processed / promptProgress.time_ms) * 1000 + : undefined; + return { status: predictedTokens > 0 ? 'generating' : promptProgress ? 'preparing' : 'idle', tokensDecoded: predictedTokens, @@ -267,7 +272,10 @@ export class SlotsService { speculative: false, progressPercent, promptTokens, - cacheTokens + cacheTokens, + parsingTokensPerSecond, + inputTokensProcessed: promptProgress?.processed, + inputTokensTotal: promptProgress?.total }; } diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index 6ebc43db0e3ef..2458e456bce66 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -294,4 +294,7 @@ export interface ApiProcessingState { progressPercent?: number; promptTokens?: number; cacheTokens?: number; + parsingTokensPerSecond?: number; + inputTokensProcessed?: number; // Number of input tokens processed during parsing + inputTokensTotal?: number; // Total number of input tokens to process }