Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions docs/mcp.md
Original file line number Diff line number Diff line change
Expand Up @@ -2745,11 +2745,11 @@ On `TOOL_BUDGET_EXCEEDED` errors the meta also includes the session cumulative t
}
```

| Field | Description |
| ----------------------------- | -------------------------------------------------------------------------------------------- |
| `tool` | Name of the tool that produced this response |
| `detailLevel` | Value of the `detail` argument passed by the caller (`"summary"`, `"standard"`, or `"full"`) |
| `estimatedTokens` | `ceil(len(JSON.stringify(response)) / 4)` — a rough character-to-token estimate |
| `sessionTotalEstimatedTokens` | Cumulative estimate for the session; only present on budget-exceeded errors |

> **Implementation note:** `_meta` is intentionally placed only in `structuredContent`, never in `content[0].text`. LLM clients read `content[0].text`; including observability data there would waste tokens on every response.
| Field | Description |
| ----------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- |
| `tool` | Name of the tool that produced this response |
| `detailLevel` | Value of the `detail` argument passed by the caller (`"summary"`, `"standard"`, or `"full"`) |
| `estimatedTokens` | `ceil(len(content[].text) / 4)` — rough char-to-token estimate of the text an LLM client reads; `structuredContent` is **not** counted |
| `sessionTotalEstimatedTokens` | Cumulative estimate for the session; only present on budget-exceeded errors |

> **Implementation note:** `_meta` is intentionally placed only in `structuredContent`, never in `content[0].text`. LLM clients read `content[0].text`; including observability data there would waste tokens on every response. For the same reason, `estimatedTokens` is measured over `content[].text` only — the payload is duplicated in both `content[0].text` (as escaped JSON) and `structuredContent` (as an object), so estimating over the whole result would double-count it (~2x). The figure therefore reflects what the model actually consumes, not the duplicated `structuredContent`.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@provartesting/provardx-cli",
"description": "A plugin for the Salesforce CLI to orchestrate testing activities and report quality metrics to Provar Quality Hub",
"version": "1.6.1",
"version": "1.6.2",
"mcpName": "io.github.ProvarTesting/provar",
"license": "BSD-3-Clause",
"plugins": [
Expand Down
4 changes: 2 additions & 2 deletions server.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
"url": "https://github.com/ProvarTesting/provardx-cli",
"source": "github"
},
"version": "1.6.1",
"version": "1.6.2",
"packages": [
{
"registryType": "npm",
"identifier": "@provartesting/provardx-cli",
"version": "1.6.1",
"version": "1.6.2",
"transport": {
"type": "stdio"
},
Expand Down
17 changes: 15 additions & 2 deletions src/mcp/utils/tokenMeta.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ export function wrapWithDepthGuard(
const result = await handler(args, extra);

if (process.env['PROVAR_MCP_EMIT_TOKEN_META'] === 'true') {
entry.totalEstimatedTokens += estimateTokens(result);
entry.totalEstimatedTokens += estimateResultTokens(result);
}

const detailLevel = typeof args['detail'] === 'string' ? args['detail'] : 'standard';
Expand All @@ -106,6 +106,19 @@ export function estimateTokens(payload: unknown): number {
return Math.ceil(JSON.stringify(payload).length / 4);
}

/**
* Estimate the tokens an LLM client actually consumes from a tool result.
* Clients read `content[].text` only; `structuredContent` carries the same payload
* for programmatic consumers and is not tokenized into the model context. Estimating
* over the whole ToolResult double-counts the payload — it lives in both
* `content[0].text` (as escaped JSON) and `structuredContent` (as an object) — inflating
* the figure ~2x. Summing `content[].text` is the faithful per-call context-cost signal.
*/
export function estimateResultTokens(result: ToolResult): number {
const chars = result.content.reduce((sum, item) => sum + item.text.length, 0);
return Math.ceil(chars / 4);
}

/**
* Appends a `_meta` key to `structuredContent` when PROVAR_MCP_EMIT_TOKEN_META=true.
* The `content[0].text` string is intentionally left unchanged — LLMs read that
Expand All @@ -125,7 +138,7 @@ export function attachMeta(
const meta: Record<string, unknown> = {
tool: toolName,
detailLevel,
estimatedTokens: estimateTokens(response),
estimatedTokens: estimateResultTokens(response),
};

if (sessionTotalTokens !== undefined) {
Expand Down
54 changes: 48 additions & 6 deletions test/unit/mcp/tokenMeta.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
wrapWithDepthGuard,
attachMeta,
estimateTokens,
estimateResultTokens,
type ToolResult,
type AnyToolCallback,
} from '../../../src/mcp/utils/tokenMeta.js';
Expand Down Expand Up @@ -210,16 +211,27 @@ describe('attachMeta', () => {
});
});

it('estimated_tokens is within ±50% of actual JSON length / 4', () => {
it('estimatedTokens reflects content[].text length, not the whole result', () => {
withMeta(true, () => {
const result = attachMeta(okResponse, 'my_tool', 'standard');
const meta = (result.structuredContent as Record<string, unknown>)['_meta'] as Record<string, unknown>;
const estimate = meta['estimatedTokens'] as number;
const actual = Math.ceil(JSON.stringify(okResponse).length / 4);
assert.ok(
estimate >= actual * 0.5 && estimate <= actual * 1.5,
`estimate ${estimate} should be within ±50% of ${actual}`
);
const actual = Math.ceil(okResponse.content.map((c) => c.text).join('').length / 4);
assert.strictEqual(estimate, actual);
});
});

it('does not count structuredContent — tiny content + huge structuredContent stays tiny', () => {
withMeta(true, () => {
const fat: ToolResult = {
content: [{ type: 'text', text: 'ok' }],
structuredContent: { blob: 'x'.repeat(100_000) },
};
const result = attachMeta(fat, 'my_tool', 'standard');
const meta = (result.structuredContent as Record<string, unknown>)['_meta'] as Record<string, unknown>;
const estimate = meta['estimatedTokens'] as number;
// content text is "ok" (2 chars) → ceil(2/4) = 1, regardless of the 100k-char structuredContent.
assert.strictEqual(estimate, 1);
});
});
});
Expand All @@ -241,6 +253,36 @@ describe('estimateTokens', () => {
});
});

// ---------------------------------------------------------------------------
// estimateResultTokens
// ---------------------------------------------------------------------------

describe('estimateResultTokens', () => {
it('sums content[].text lengths and ignores structuredContent', () => {
const result: ToolResult = {
content: [
{ type: 'text', text: 'aaaa' },
{ type: 'text', text: 'bbbb' },
],
structuredContent: { ignored: 'z'.repeat(10_000) },
};
// 8 chars of content text → ceil(8/4) = 2.
assert.strictEqual(estimateResultTokens(result), 2);
});

it('returns 0 for empty content', () => {
const result: ToolResult = { content: [], structuredContent: { a: 1 } };
assert.strictEqual(estimateResultTokens(result), 0);
});

it('does not double-count the payload the way estimateTokens(result) would', () => {
// The payload lives in both content[0].text (escaped JSON) and structuredContent,
// so estimateTokens over the whole result counts it twice; estimateResultTokens
// counts the content text only.
assert.ok(estimateResultTokens(okResponse) < estimateTokens(okResponse));
});
});

// ---------------------------------------------------------------------------
// Integration: wrapWithDepthGuard + attachMeta
// ---------------------------------------------------------------------------
Expand Down
Loading