From d3753e25fc8070161cf6e3bbeef1f781cfe5d178 Mon Sep 17 00:00:00 2001
From: Michael Dailey <michael.dailey@provartesting.com>
Date: Tue, 9 Jun 2026 12:42:47 -0500
Subject: [PATCH] PDX-514: fix(mcp): base estimatedTokens on content text only

RCA: estimateTokens serialized the whole ToolResult, counting the payload twice (escaped JSON in content[0].text plus the structuredContent object) and inflating the per-call estimate roughly 2x versus what the model actually reads.
Fix: add estimateResultTokens summing content[].text and use it in attachMeta and the depth-guard accumulator; update docs/mcp.md formula and note plus tokenMeta tests; bump version to 1.6.2 in package.json and server.json.
---
 docs/mcp.md                     | 16 +++++-----
 package.json                    |  2 +-
 server.json                     |  4 +--
 src/mcp/utils/tokenMeta.ts      | 17 +++++++++--
 test/unit/mcp/tokenMeta.test.ts | 54 +++++++++++++++++++++++++++++----
 5 files changed, 74 insertions(+), 19 deletions(-)

diff --git a/docs/mcp.md b/docs/mcp.md
index 03fbf98f..bf7b87a8 100644
--- a/docs/mcp.md
+++ b/docs/mcp.md
@@ -2745,11 +2745,11 @@ On `TOOL_BUDGET_EXCEEDED` errors the meta also includes the session cumulative t
 }
 ```
 
-| Field                         | Description                                                                                  |
-| ----------------------------- | -------------------------------------------------------------------------------------------- |
-| `tool`                        | Name of the tool that produced this response                                                 |
-| `detailLevel`                 | Value of the `detail` argument passed by the caller (`"summary"`, `"standard"`, or `"full"`) |
-| `estimatedTokens`             | `ceil(len(JSON.stringify(response)) / 4)` — a rough character-to-token estimate              |
-| `sessionTotalEstimatedTokens` | Cumulative estimate for the session; only present on budget-exceeded errors                  |
-
-> **Implementation note:** `_meta` is intentionally placed only in `structuredContent`, never in `content[0].text`. LLM clients read `content[0].text`; including observability data there would waste tokens on every response.
+| Field                         | Description                                                                                                                            |
+| ----------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- |
+| `tool`                        | Name of the tool that produced this response                                                                                           |
+| `detailLevel`                 | Value of the `detail` argument passed by the caller (`"summary"`, `"standard"`, or `"full"`)                                           |
+| `estimatedTokens`             | `ceil(len(content[].text) / 4)` — rough char-to-token estimate of the text an LLM client reads; `structuredContent` is **not** counted |
+| `sessionTotalEstimatedTokens` | Cumulative estimate for the session; only present on budget-exceeded errors                                                            |
+
+> **Implementation note:** `_meta` is intentionally placed only in `structuredContent`, never in `content[0].text`. LLM clients read `content[0].text`; including observability data there would waste tokens on every response. For the same reason, `estimatedTokens` is measured over `content[].text` only — the payload is duplicated in both `content[0].text` (as escaped JSON) and `structuredContent` (as an object), so estimating over the whole result would double-count it (~2x). The figure therefore reflects what the model actually consumes, not the duplicated `structuredContent`.
diff --git a/package.json b/package.json
index 9b976c6c..cd131641 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@provartesting/provardx-cli",
   "description": "A plugin for the Salesforce CLI to orchestrate testing activities and report quality metrics to Provar Quality Hub",
-  "version": "1.6.1",
+  "version": "1.6.2",
   "mcpName": "io.github.ProvarTesting/provar",
   "license": "BSD-3-Clause",
   "plugins": [
diff --git a/server.json b/server.json
index 6e47889d..9b57a70d 100644
--- a/server.json
+++ b/server.json
@@ -14,12 +14,12 @@
     "url": "https://github.com/ProvarTesting/provardx-cli",
     "source": "github"
   },
-  "version": "1.6.1",
+  "version": "1.6.2",
   "packages": [
     {
       "registryType": "npm",
       "identifier": "@provartesting/provardx-cli",
-      "version": "1.6.1",
+      "version": "1.6.2",
       "transport": {
         "type": "stdio"
       },
diff --git a/src/mcp/utils/tokenMeta.ts b/src/mcp/utils/tokenMeta.ts
index 23ab6f48..d5224b62 100644
--- a/src/mcp/utils/tokenMeta.ts
+++ b/src/mcp/utils/tokenMeta.ts
@@ -90,7 +90,7 @@ export function wrapWithDepthGuard(
     const result = await handler(args, extra);
 
     if (process.env['PROVAR_MCP_EMIT_TOKEN_META'] === 'true') {
-      entry.totalEstimatedTokens += estimateTokens(result);
+      entry.totalEstimatedTokens += estimateResultTokens(result);
     }
 
     const detailLevel = typeof args['detail'] === 'string' ? args['detail'] : 'standard';
@@ -106,6 +106,19 @@ export function estimateTokens(payload: unknown): number {
   return Math.ceil(JSON.stringify(payload).length / 4);
 }
 
+/**
+ * Estimate the tokens an LLM client actually consumes from a tool result.
+ * Clients read `content[].text` only; `structuredContent` carries the same payload
+ * for programmatic consumers and is not tokenized into the model context. Estimating
+ * over the whole ToolResult double-counts the payload — it lives in both
+ * `content[0].text` (as escaped JSON) and `structuredContent` (as an object) — inflating
+ * the figure ~2x. Summing `content[].text` is the faithful per-call context-cost signal.
+ */
+export function estimateResultTokens(result: ToolResult): number {
+  const chars = result.content.reduce((sum, item) => sum + item.text.length, 0);
+  return Math.ceil(chars / 4);
+}
+
 /**
  * Appends a `_meta` key to `structuredContent` when PROVAR_MCP_EMIT_TOKEN_META=true.
  * The `content[0].text` string is intentionally left unchanged — LLMs read that
@@ -125,7 +138,7 @@ export function attachMeta(
   const meta: Record<string, unknown> = {
     tool: toolName,
     detailLevel,
-    estimatedTokens: estimateTokens(response),
+    estimatedTokens: estimateResultTokens(response),
   };
 
   if (sessionTotalTokens !== undefined) {
diff --git a/test/unit/mcp/tokenMeta.test.ts b/test/unit/mcp/tokenMeta.test.ts
index b94bf8e7..64dad107 100644
--- a/test/unit/mcp/tokenMeta.test.ts
+++ b/test/unit/mcp/tokenMeta.test.ts
@@ -11,6 +11,7 @@ import {
   wrapWithDepthGuard,
   attachMeta,
   estimateTokens,
+  estimateResultTokens,
   type ToolResult,
   type AnyToolCallback,
 } from '../../../src/mcp/utils/tokenMeta.js';
@@ -210,16 +211,27 @@ describe('attachMeta', () => {
     });
   });
 
-  it('estimated_tokens is within ±50% of actual JSON length / 4', () => {
+  it('estimatedTokens reflects content[].text length, not the whole result', () => {
     withMeta(true, () => {
       const result = attachMeta(okResponse, 'my_tool', 'standard');
       const meta = (result.structuredContent as Record<string, unknown>)['_meta'] as Record<string, unknown>;
       const estimate = meta['estimatedTokens'] as number;
-      const actual = Math.ceil(JSON.stringify(okResponse).length / 4);
-      assert.ok(
-        estimate >= actual * 0.5 && estimate <= actual * 1.5,
-        `estimate ${estimate} should be within ±50% of ${actual}`
-      );
+      const actual = Math.ceil(okResponse.content.map((c) => c.text).join('').length / 4);
+      assert.strictEqual(estimate, actual);
+    });
+  });
+
+  it('does not count structuredContent — tiny content + huge structuredContent stays tiny', () => {
+    withMeta(true, () => {
+      const fat: ToolResult = {
+        content: [{ type: 'text', text: 'ok' }],
+        structuredContent: { blob: 'x'.repeat(100_000) },
+      };
+      const result = attachMeta(fat, 'my_tool', 'standard');
+      const meta = (result.structuredContent as Record<string, unknown>)['_meta'] as Record<string, unknown>;
+      const estimate = meta['estimatedTokens'] as number;
+      // content text is "ok" (2 chars) → ceil(2/4) = 1, regardless of the 100k-char structuredContent.
+      assert.strictEqual(estimate, 1);
     });
   });
 });
@@ -241,6 +253,36 @@ describe('estimateTokens', () => {
   });
 });
 
+// ---------------------------------------------------------------------------
+// estimateResultTokens
+// ---------------------------------------------------------------------------
+
+describe('estimateResultTokens', () => {
+  it('sums content[].text lengths and ignores structuredContent', () => {
+    const result: ToolResult = {
+      content: [
+        { type: 'text', text: 'aaaa' },
+        { type: 'text', text: 'bbbb' },
+      ],
+      structuredContent: { ignored: 'z'.repeat(10_000) },
+    };
+    // 8 chars of content text → ceil(8/4) = 2.
+    assert.strictEqual(estimateResultTokens(result), 2);
+  });
+
+  it('returns 0 for empty content', () => {
+    const result: ToolResult = { content: [], structuredContent: { a: 1 } };
+    assert.strictEqual(estimateResultTokens(result), 0);
+  });
+
+  it('does not double-count the payload the way estimateTokens(result) would', () => {
+    // The payload lives in both content[0].text (escaped JSON) and structuredContent,
+    // so estimateTokens over the whole result counts it twice; estimateResultTokens
+    // counts the content text only.
+    assert.ok(estimateResultTokens(okResponse) < estimateTokens(okResponse));
+  });
+});
+
 // ---------------------------------------------------------------------------
 // Integration: wrapWithDepthGuard + attachMeta
 // ---------------------------------------------------------------------------