From d3753e25fc8070161cf6e3bbeef1f781cfe5d178 Mon Sep 17 00:00:00 2001 From: Michael Dailey Date: Tue, 9 Jun 2026 12:42:47 -0500 Subject: [PATCH] PDX-514: fix(mcp): base estimatedTokens on content text only RCA: estimateTokens serialized the whole ToolResult, counting the payload twice (escaped JSON in content[0].text plus the structuredContent object) and inflating the per-call estimate roughly 2x versus what the model actually reads. Fix: add estimateResultTokens summing content[].text and use it in attachMeta and the depth-guard accumulator; update docs/mcp.md formula and note plus tokenMeta tests; bump version to 1.6.2 in package.json and server.json. --- docs/mcp.md | 16 +++++----- package.json | 2 +- server.json | 4 +-- src/mcp/utils/tokenMeta.ts | 17 +++++++++-- test/unit/mcp/tokenMeta.test.ts | 54 +++++++++++++++++++++++++++++---- 5 files changed, 74 insertions(+), 19 deletions(-) diff --git a/docs/mcp.md b/docs/mcp.md index 03fbf98f..bf7b87a8 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -2745,11 +2745,11 @@ On `TOOL_BUDGET_EXCEEDED` errors the meta also includes the session cumulative t } ``` -| Field | Description | -| ----------------------------- | -------------------------------------------------------------------------------------------- | -| `tool` | Name of the tool that produced this response | -| `detailLevel` | Value of the `detail` argument passed by the caller (`"summary"`, `"standard"`, or `"full"`) | -| `estimatedTokens` | `ceil(len(JSON.stringify(response)) / 4)` — a rough character-to-token estimate | -| `sessionTotalEstimatedTokens` | Cumulative estimate for the session; only present on budget-exceeded errors | - -> **Implementation note:** `_meta` is intentionally placed only in `structuredContent`, never in `content[0].text`. LLM clients read `content[0].text`; including observability data there would waste tokens on every response. +| Field | Description | +| ----------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | +| `tool` | Name of the tool that produced this response | +| `detailLevel` | Value of the `detail` argument passed by the caller (`"summary"`, `"standard"`, or `"full"`) | +| `estimatedTokens` | `ceil(len(content[].text) / 4)` — rough char-to-token estimate of the text an LLM client reads; `structuredContent` is **not** counted | +| `sessionTotalEstimatedTokens` | Cumulative estimate for the session; only present on budget-exceeded errors | + +> **Implementation note:** `_meta` is intentionally placed only in `structuredContent`, never in `content[0].text`. LLM clients read `content[0].text`; including observability data there would waste tokens on every response. For the same reason, `estimatedTokens` is measured over `content[].text` only — the payload is duplicated in both `content[0].text` (as escaped JSON) and `structuredContent` (as an object), so estimating over the whole result would double-count it (~2x). The figure therefore reflects what the model actually consumes, not the duplicated `structuredContent`. diff --git a/package.json b/package.json index 9b976c6c..cd131641 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "@provartesting/provardx-cli", "description": "A plugin for the Salesforce CLI to orchestrate testing activities and report quality metrics to Provar Quality Hub", - "version": "1.6.1", + "version": "1.6.2", "mcpName": "io.github.ProvarTesting/provar", "license": "BSD-3-Clause", "plugins": [ diff --git a/server.json b/server.json index 6e47889d..9b57a70d 100644 --- a/server.json +++ b/server.json @@ -14,12 +14,12 @@ "url": "https://github.com/ProvarTesting/provardx-cli", "source": "github" }, - "version": "1.6.1", + "version": "1.6.2", "packages": [ { "registryType": "npm", "identifier": "@provartesting/provardx-cli", - "version": "1.6.1", + "version": "1.6.2", "transport": { "type": "stdio" }, diff --git a/src/mcp/utils/tokenMeta.ts b/src/mcp/utils/tokenMeta.ts index 23ab6f48..d5224b62 100644 --- a/src/mcp/utils/tokenMeta.ts +++ b/src/mcp/utils/tokenMeta.ts @@ -90,7 +90,7 @@ export function wrapWithDepthGuard( const result = await handler(args, extra); if (process.env['PROVAR_MCP_EMIT_TOKEN_META'] === 'true') { - entry.totalEstimatedTokens += estimateTokens(result); + entry.totalEstimatedTokens += estimateResultTokens(result); } const detailLevel = typeof args['detail'] === 'string' ? args['detail'] : 'standard'; @@ -106,6 +106,19 @@ export function estimateTokens(payload: unknown): number { return Math.ceil(JSON.stringify(payload).length / 4); } +/** + * Estimate the tokens an LLM client actually consumes from a tool result. + * Clients read `content[].text` only; `structuredContent` carries the same payload + * for programmatic consumers and is not tokenized into the model context. Estimating + * over the whole ToolResult double-counts the payload — it lives in both + * `content[0].text` (as escaped JSON) and `structuredContent` (as an object) — inflating + * the figure ~2x. Summing `content[].text` is the faithful per-call context-cost signal. + */ +export function estimateResultTokens(result: ToolResult): number { + const chars = result.content.reduce((sum, item) => sum + item.text.length, 0); + return Math.ceil(chars / 4); +} + /** * Appends a `_meta` key to `structuredContent` when PROVAR_MCP_EMIT_TOKEN_META=true. * The `content[0].text` string is intentionally left unchanged — LLMs read that @@ -125,7 +138,7 @@ export function attachMeta( const meta: Record = { tool: toolName, detailLevel, - estimatedTokens: estimateTokens(response), + estimatedTokens: estimateResultTokens(response), }; if (sessionTotalTokens !== undefined) { diff --git a/test/unit/mcp/tokenMeta.test.ts b/test/unit/mcp/tokenMeta.test.ts index b94bf8e7..64dad107 100644 --- a/test/unit/mcp/tokenMeta.test.ts +++ b/test/unit/mcp/tokenMeta.test.ts @@ -11,6 +11,7 @@ import { wrapWithDepthGuard, attachMeta, estimateTokens, + estimateResultTokens, type ToolResult, type AnyToolCallback, } from '../../../src/mcp/utils/tokenMeta.js'; @@ -210,16 +211,27 @@ describe('attachMeta', () => { }); }); - it('estimated_tokens is within ±50% of actual JSON length / 4', () => { + it('estimatedTokens reflects content[].text length, not the whole result', () => { withMeta(true, () => { const result = attachMeta(okResponse, 'my_tool', 'standard'); const meta = (result.structuredContent as Record)['_meta'] as Record; const estimate = meta['estimatedTokens'] as number; - const actual = Math.ceil(JSON.stringify(okResponse).length / 4); - assert.ok( - estimate >= actual * 0.5 && estimate <= actual * 1.5, - `estimate ${estimate} should be within ±50% of ${actual}` - ); + const actual = Math.ceil(okResponse.content.map((c) => c.text).join('').length / 4); + assert.strictEqual(estimate, actual); + }); + }); + + it('does not count structuredContent — tiny content + huge structuredContent stays tiny', () => { + withMeta(true, () => { + const fat: ToolResult = { + content: [{ type: 'text', text: 'ok' }], + structuredContent: { blob: 'x'.repeat(100_000) }, + }; + const result = attachMeta(fat, 'my_tool', 'standard'); + const meta = (result.structuredContent as Record)['_meta'] as Record; + const estimate = meta['estimatedTokens'] as number; + // content text is "ok" (2 chars) → ceil(2/4) = 1, regardless of the 100k-char structuredContent. + assert.strictEqual(estimate, 1); }); }); }); @@ -241,6 +253,36 @@ describe('estimateTokens', () => { }); }); +// --------------------------------------------------------------------------- +// estimateResultTokens +// --------------------------------------------------------------------------- + +describe('estimateResultTokens', () => { + it('sums content[].text lengths and ignores structuredContent', () => { + const result: ToolResult = { + content: [ + { type: 'text', text: 'aaaa' }, + { type: 'text', text: 'bbbb' }, + ], + structuredContent: { ignored: 'z'.repeat(10_000) }, + }; + // 8 chars of content text → ceil(8/4) = 2. + assert.strictEqual(estimateResultTokens(result), 2); + }); + + it('returns 0 for empty content', () => { + const result: ToolResult = { content: [], structuredContent: { a: 1 } }; + assert.strictEqual(estimateResultTokens(result), 0); + }); + + it('does not double-count the payload the way estimateTokens(result) would', () => { + // The payload lives in both content[0].text (escaped JSON) and structuredContent, + // so estimateTokens over the whole result counts it twice; estimateResultTokens + // counts the content text only. + assert.ok(estimateResultTokens(okResponse) < estimateTokens(okResponse)); + }); +}); + // --------------------------------------------------------------------------- // Integration: wrapWithDepthGuard + attachMeta // ---------------------------------------------------------------------------