diff --git a/.changeset/run-estimate-flag.md b/.changeset/run-estimate-flag.md new file mode 100644 index 000000000..ffc406479 --- /dev/null +++ b/.changeset/run-estimate-flag.md @@ -0,0 +1,6 @@ +--- +"lingo.dev": minor +"@lingo.dev/_sdk": minor +--- + +Add `lingo.dev run --estimate`: print the approximate cost of pending translations and exit without translating. The CLI computes the same change delta as a regular run, sends per-locale character counts to the new `/process/estimate` endpoint, and prints a per-locale cost breakdown. The SDK gains a matching `estimate()` method. diff --git a/packages/cli/src/cli/cmd/run/_types.ts b/packages/cli/src/cli/cmd/run/_types.ts index eea7eb68e..9608241f6 100644 --- a/packages/cli/src/cli/cmd/run/_types.ts +++ b/packages/cli/src/cli/cmd/run/_types.ts @@ -52,5 +52,6 @@ export const flagsSchema = z.object({ debounce: z.number().positive().prefault(5000), // 5 seconds default sound: z.boolean().optional(), pseudo: z.boolean().optional(), + estimate: z.boolean().prefault(false), }); export type CmdRunFlags = z.infer; diff --git a/packages/cli/src/cli/cmd/run/_utils.ts b/packages/cli/src/cli/cmd/run/_utils.ts index 8e91eb9c7..4d3e77f63 100644 --- a/packages/cli/src/cli/cmd/run/_utils.ts +++ b/packages/cli/src/cli/cmd/run/_utils.ts @@ -1,5 +1,61 @@ -import { CmdRunContext } from "./_types"; +import _ from "lodash"; +import { minimatch } from "minimatch"; + +import { CmdRunContext, CmdRunTask } from "./_types"; import { UserIdentity } from "../../utils/observability"; +import { safeDecode } from "../../utils/key-matching"; +import createBucketLoader from "../../loaders"; +import { Delta } from "../../utils/delta"; + +export function createLoaderForTask(assignedTask: CmdRunTask) { + const bucketLoader = createBucketLoader( + assignedTask.bucketType, + assignedTask.bucketPathPattern, + { + defaultLocale: assignedTask.sourceLocale, + injectLocale: assignedTask.injectLocale, + formatter: assignedTask.formatter, + keyColumn: assignedTask.keyColumn, + }, + assignedTask.lockedKeys, + assignedTask.lockedPatterns, + assignedTask.ignoredKeys, + assignedTask.preservedKeys, + assignedTask.localizableKeys, + ); + bucketLoader.setDefaultLocale(assignedTask.sourceLocale); + + return bucketLoader; +} + +/** + * The subset of source entries that actually needs translation for a task: + * delta-changed keys (or everything with --force), narrowed by --key filters. + * Shared by execute (what gets sent to the localizer) and estimate (what + * gets counted) so the two can never disagree on scope. + */ +export function computeProcessableData( + sourceData: Record, + delta: Delta, + force: boolean | undefined, + onlyKeys: string[], +): Record { + return _.chain(sourceData) + .entries() + .filter( + ([key]) => + delta.added.includes(key) || delta.updated.includes(key) || !!force, + ) + .filter( + ([key]) => + !onlyKeys.length || + onlyKeys.some((pattern) => + minimatch(safeDecode(key), safeDecode(pattern)), + ), + ) + .fromPairs() + .value(); +} /** * Determines the user's identity for tracking purposes. diff --git a/packages/cli/src/cli/cmd/run/estimate.spec.ts b/packages/cli/src/cli/cmd/run/estimate.spec.ts new file mode 100644 index 000000000..5a61b18c2 --- /dev/null +++ b/packages/cli/src/cli/cmd/run/estimate.spec.ts @@ -0,0 +1,61 @@ +import { describe, it, expect } from "vitest"; +import { countTranslatableChars } from "./estimate"; +import { computeProcessableData } from "./_utils"; +import { Delta } from "../../utils/delta"; + +const delta = (added: string[] = [], updated: string[] = []): Delta => ({ + added, + removed: [], + updated, + renamed: [], + hasChanges: !!added.length || !!updated.length, +}); + +describe("countTranslatableChars", () => { + it("sums the lengths of string leaf values only", () => { + expect( + countTranslatableChars({ + greeting: "Hello", // 5 + farewell: "Bye", // 3 + count: 42, + flag: true, + }), + ).toBe(8); + }); + + it("returns 0 for empty data", () => { + expect(countTranslatableChars({})).toBe(0); + }); +}); + +describe("computeProcessableData", () => { + const sourceData = { + "a.title": "Title", + "a.body": "Body", + "b.title": "Other", + }; + + it("keeps only delta-changed keys", () => { + const result = computeProcessableData( + sourceData, + delta(["a.title"], ["b.title"]), + false, + [], + ); + expect(Object.keys(result)).toEqual(["a.title", "b.title"]); + }); + + it("keeps everything with force", () => { + const result = computeProcessableData(sourceData, delta(), true, []); + expect(Object.keys(result)).toEqual(Object.keys(sourceData)); + }); + + it("narrows by key patterns", () => { + const result = computeProcessableData(sourceData, delta(), true, ["a.*"]); + expect(Object.keys(result)).toEqual(["a.title", "a.body"]); + }); + + it("returns empty when nothing changed", () => { + expect(computeProcessableData(sourceData, delta(), false, [])).toEqual({}); + }); +}); diff --git a/packages/cli/src/cli/cmd/run/estimate.ts b/packages/cli/src/cli/cmd/run/estimate.ts new file mode 100644 index 000000000..a610f35ef --- /dev/null +++ b/packages/cli/src/cli/cmd/run/estimate.ts @@ -0,0 +1,125 @@ +import chalk from "chalk"; +import { Listr } from "listr2"; + +import { colors } from "../../constants"; +import { CmdRunContext } from "./_types"; +import { commonTaskRendererOptions } from "./_const"; +import { createDeltaProcessor } from "../../utils/delta"; +import { computeProcessableData, createLoaderForTask } from "./_utils"; + +/** + * Translatable characters of a task's processable data: the sum of leaf + * string-value lengths — keys, markup and serialization syntax excluded. + * Matches how the server-side estimate counts characters. + */ +export function countTranslatableChars( + processableData: Record, +): number { + return Object.values(processableData).reduce( + (sum, value) => (typeof value === "string" ? sum + value.length : sum), + 0, + ); +} + +const formatUsd = (value: number) => + value < 0.01 && value > 0 ? "<$0.01" : `$${value.toFixed(2)}`; + +/** + * `run --estimate`: compute the same per-task translation delta as execute, + * but instead of translating, send per-locale character counts to + * `/process/estimate` and print the approximate cost. Nothing is translated, + * written, or billed; lockfile and target files stay untouched. + */ +export default async function estimate( + input: CmdRunContext, +): Promise { + console.log(chalk.hex(colors.orange)("[Estimate]")); + + if (!input.localizer?.estimate) { + throw new Error( + `Cost estimate is not available for the "${input.localizer?.id}" provider. ` + + `Estimates use Lingo.dev server-side pricing — remove --estimate or switch to the Lingo.dev provider.`, + ); + } + + const charsByLocale = new Map(); + + return new Listr( + [ + { + title: "Computing translation delta", + task: async (ctx, task) => { + if (!ctx.tasks.length) { + task.title = "Nothing to estimate — everything is up to date."; + return; + } + + for (const runTask of ctx.tasks) { + const bucketLoader = createLoaderForTask(runTask); + const deltaProcessor = createDeltaProcessor( + runTask.bucketPathPattern, + ); + const checksums = await deltaProcessor.loadChecksums(); + const sourceData = await bucketLoader.pull(runTask.sourceLocale); + const targetData = await bucketLoader.pull(runTask.targetLocale); + const delta = await deltaProcessor.calculateDelta({ + sourceData, + targetData, + checksums, + }); + const processableData = computeProcessableData( + sourceData, + delta, + ctx.flags.force, + runTask.onlyKeys, + ); + + const chars = countTranslatableChars(processableData); + charsByLocale.set( + runTask.targetLocale, + (charsByLocale.get(runTask.targetLocale) ?? 0) + chars, + ); + } + + task.title = `Delta computed for ${chalk.hex(colors.yellow)( + ctx.tasks.length.toString(), + )} task(s)`; + }, + }, + { + title: "Fetching cost estimate", + rendererOptions: { persistentOutput: true }, + task: async (ctx, task) => { + const items = [...charsByLocale.entries()].map( + ([targetLocale, sourceChars]) => ({ targetLocale, sourceChars }), + ); + + if (!items.length || items.every((item) => !item.sourceChars)) { + task.title = "Estimated cost: $0.00 — nothing needs translation."; + return; + } + + const result = await ctx.localizer!.estimate!(items); + + const lines = result.byLocale.map( + (row) => + ` ${chalk.hex(colors.yellow)(row.targetLocale)}: ~${formatUsd( + row.estimatedCostUsd, + )} ${chalk.dim( + `(${row.sourceChars.toLocaleString("en-US")} chars, ~${row.estimatedOutputTokens.toLocaleString("en-US")} tokens)`, + )}`, + ); + + task.title = `Estimated cost: ~${chalk.hex(colors.green)( + formatUsd(result.totals.estimatedTotalCostUsd), + )} ${chalk.dim("(estimate, not a quote — nothing was translated)")}`; + task.output = lines.join("\n"); + }, + }, + ], + { + exitOnError: true, + rendererOptions: commonTaskRendererOptions, + }, + ).run(input); +} diff --git a/packages/cli/src/cli/cmd/run/execute.ts b/packages/cli/src/cli/cmd/run/execute.ts index 746f9d145..ff0650e3e 100644 --- a/packages/cli/src/cli/cmd/run/execute.ts +++ b/packages/cli/src/cli/cmd/run/execute.ts @@ -2,14 +2,12 @@ import chalk from "chalk"; import { Listr, ListrTask } from "listr2"; import pLimit, { LimitFunction } from "p-limit"; import _ from "lodash"; -import { minimatch } from "minimatch"; -import { safeDecode } from "../../utils/key-matching"; import { colors } from "../../constants"; import { CmdRunContext, CmdRunTask, CmdRunTaskResult } from "./_types"; import { commonTaskRendererOptions } from "./_const"; -import createBucketLoader from "../../loaders"; import { createDeltaProcessor, Delta } from "../../utils/delta"; +import { computeProcessableData, createLoaderForTask } from "./_utils"; const WARN_CONCURRENCY_COUNT = 30; @@ -148,27 +146,6 @@ function createExecutionProgressMessage(ctx: CmdRunContext) { }, Failed ${chalk.red(failedTasksCount)}, Skipped ${chalk.dim(skippedTasksCount)}`; } -function createLoaderForTask(assignedTask: CmdRunTask) { - const bucketLoader = createBucketLoader( - assignedTask.bucketType, - assignedTask.bucketPathPattern, - { - defaultLocale: assignedTask.sourceLocale, - injectLocale: assignedTask.injectLocale, - formatter: assignedTask.formatter, - keyColumn: assignedTask.keyColumn, - }, - assignedTask.lockedKeys, - assignedTask.lockedPatterns, - assignedTask.ignoredKeys, - assignedTask.preservedKeys, - assignedTask.localizableKeys, - ); - bucketLoader.setDefaultLocale(assignedTask.sourceLocale); - - return bucketLoader; -} - function createWorkerTask(args: { ctx: CmdRunContext; assignedTasks: CmdRunTask[]; @@ -217,23 +194,12 @@ function createWorkerTask(args: { checksums: initialChecksums, }); - const processableData = _.chain(sourceData) - .entries() - .filter( - ([key, value]) => - delta.added.includes(key) || - delta.updated.includes(key) || - !!args.ctx.flags.force, - ) - .filter( - ([key]) => - !assignedTask.onlyKeys.length || - assignedTask.onlyKeys?.some((pattern) => - minimatch(safeDecode(key), safeDecode(pattern)), - ), - ) - .fromPairs() - .value(); + const processableData = computeProcessableData( + sourceData, + delta, + args.ctx.flags.force, + assignedTask.onlyKeys, + ); if (!Object.keys(processableData).length) { await fileIoLimiter(async () => { diff --git a/packages/cli/src/cli/cmd/run/index.ts b/packages/cli/src/cli/cmd/run/index.ts index 7b28ecd9f..fc51f4df1 100644 --- a/packages/cli/src/cli/cmd/run/index.ts +++ b/packages/cli/src/cli/cmd/run/index.ts @@ -6,6 +6,7 @@ import os from "os"; import setup from "./setup"; import plan from "./plan"; import execute from "./execute"; +import estimate from "./estimate"; import watch from "./watch"; import { CmdRunContext, flagsSchema } from "./_types"; import frozen from "./frozen"; @@ -123,6 +124,10 @@ export default new Command() "--pseudo", "Enable pseudo-localization mode: automatically pseudo-translates all extracted strings with accented characters and visual markers without calling any external API. Useful for testing UI internationalization readiness", ) + .option( + "--estimate", + "Print the estimated cost of pending translations and exit without translating. Computes the same change delta as a regular run and prices it via the Lingo.dev API; values are estimates, not quotes", + ) .action(async (args) => { let userIdentity: UserIdentity = null; try { @@ -134,6 +139,12 @@ export default new Command() localizer: null, }; + if (ctx.flags.estimate && (ctx.flags.watch || ctx.flags.frozen)) { + throw new Error( + "--estimate cannot be combined with --watch or --frozen. Run it on its own to preview the cost of the next run.", + ); + } + await pauseIfDebug(ctx.flags.debug); await renderClear(); await renderSpacer(); @@ -155,6 +166,12 @@ export default new Command() await plan(ctx); await renderSpacer(); + if (ctx.flags.estimate) { + await estimate(ctx); + await renderSpacer(); + return; + } + await frozen(ctx); await renderSpacer(); diff --git a/packages/cli/src/cli/localizer/_types.ts b/packages/cli/src/cli/localizer/_types.ts index da613fba6..8a98e9fd5 100644 --- a/packages/cli/src/cli/localizer/_types.ts +++ b/packages/cli/src/cli/localizer/_types.ts @@ -1,4 +1,5 @@ import { I18nConfig } from "@lingo.dev/_spec"; +import type { CostEstimate } from "@lingo.dev/_sdk"; export type LocalizerData = { sourceLocale: string; @@ -29,4 +30,11 @@ export interface ILocalizer { input: LocalizerData, onProgress?: LocalizerProgressFn, ) => Promise; + /** + * Estimate the cost of pending translations without translating anything. + * Only providers with server-side pricing implement this (Lingo.dev). + */ + estimate?: ( + items: { targetLocale: string; sourceChars: number }[], + ) => Promise; } diff --git a/packages/cli/src/cli/localizer/lingodotdev.ts b/packages/cli/src/cli/localizer/lingodotdev.ts index dc22a6d3c..dfb7ead73 100644 --- a/packages/cli/src/cli/localizer/lingodotdev.ts +++ b/packages/cli/src/cli/localizer/lingodotdev.ts @@ -81,5 +81,6 @@ export default function createLingoDotDevLocalizer( return processedData; }, + estimate: async (items) => engine.estimate(items), }; } diff --git a/packages/sdk/src/index.spec.ts b/packages/sdk/src/index.spec.ts index 7642eddbd..5074b687f 100644 --- a/packages/sdk/src/index.spec.ts +++ b/packages/sdk/src/index.spec.ts @@ -689,4 +689,102 @@ describe("LingoDotDevEngine", () => { expect(body.targetLocale).toBe("pt-PT"); }); }); + + describe("estimate", () => { + let mockFetch: ReturnType; + + beforeEach(() => { + mockFetch = vi.fn(); + global.fetch = mockFetch as any; + }); + + it("should POST per-locale char counts to /process/estimate and return the estimate", async () => { + const serverEstimate = { + approximate: true, + totals: { + sourceChars: 599, + estimatedOutputTokens: 150, + estimatedLlmCostUsd: 0.0006, + estimatedLocalizationCostUsd: 0.0003, + estimatedTotalCostUsd: 0.0009, + }, + byLocale: [ + { + targetLocale: "de", + sourceChars: 400, + estimatedOutputTokens: 100, + estimatedCostUsd: 0.0006, + }, + { + targetLocale: "fr", + sourceChars: 199, + estimatedOutputTokens: 50, + estimatedCostUsd: 0.0003, + }, + ], + }; + mockFetch.mockResolvedValue({ + ok: true, + json: async () => serverEstimate, + }); + + const engine = new LingoDotDevEngine({ apiKey: "test-key" }); + const result = await engine.estimate([ + { targetLocale: "de", sourceChars: 400 }, + { targetLocale: "fr", sourceChars: 199 }, + ]); + + const [url, options] = mockFetch.mock.calls[0]; + expect(url).toMatch(/\/process\/estimate$/); + expect(options.method).toBe("POST"); + expect(options.headers["X-API-Key"]).toBe("test-key"); + expect(JSON.parse(options.body)).toEqual({ + items: [ + { targetLocale: "de", sourceChars: 400 }, + { targetLocale: "fr", sourceChars: 199 }, + ], + }); + expect(result).toEqual(serverEstimate); + }); + + it("should normalize locale codes before sending", async () => { + mockFetch.mockResolvedValue({ + ok: true, + json: async () => ({ approximate: true, totals: {}, byLocale: [] }), + }); + + const engine = new LingoDotDevEngine({ apiKey: "test-key" }); + await engine.estimate([{ targetLocale: "pt_BR", sourceChars: 10 }]); + + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.items[0].targetLocale).toBe("pt-BR"); + }); + + it("should reject an empty items array client-side", async () => { + const engine = new LingoDotDevEngine({ apiKey: "test-key" }); + await expect(engine.estimate([])).rejects.toThrow(); + expect(mockFetch).not.toHaveBeenCalled(); + }); + + it("should reject negative char counts client-side", async () => { + const engine = new LingoDotDevEngine({ apiKey: "test-key" }); + await expect( + engine.estimate([{ targetLocale: "de", sourceChars: -1 }]), + ).rejects.toThrow(); + expect(mockFetch).not.toHaveBeenCalled(); + }); + + it("should surface server validation errors", async () => { + mockFetch.mockResolvedValue({ + ok: false, + status: 400, + text: async () => JSON.stringify({ message: "bad payload" }), + }); + + const engine = new LingoDotDevEngine({ apiKey: "test-key" }); + await expect( + engine.estimate([{ targetLocale: "de", sourceChars: 5 }]), + ).rejects.toThrow("Invalid request: bad payload"); + }); + }); }); diff --git a/packages/sdk/src/index.ts b/packages/sdk/src/index.ts index e2936879f..c2413c5ef 100644 --- a/packages/sdk/src/index.ts +++ b/packages/sdk/src/index.ts @@ -41,6 +41,35 @@ const localizationParamsSchema = Z.object({ triggerType: Z.enum(["cli", "ci"]).optional(), }); +const estimateItemsSchema = Z.array( + Z.object({ + targetLocale: normalizedLocaleCodeSchema, + sourceChars: Z.number().int().nonnegative(), + }), +).min(1); + +/** + * Approximate localization cost returned by `/process/estimate`. + * `approximate` is always true — the estimate is a chars→tokens heuristic, + * not a quote. Actual cost may differ. + */ +export type CostEstimate = { + approximate: boolean; + totals: { + sourceChars: number; + estimatedOutputTokens: number; + estimatedLlmCostUsd: number; + estimatedLocalizationCostUsd: number; + estimatedTotalCostUsd: number; + }; + byLocale: { + targetLocale: string; + sourceChars: number; + estimatedOutputTokens: number; + estimatedCostUsd: number; + }[]; +}; + /** * LingoDotDevEngine class for interacting with the LingoDotDev API * A powerful localization engine that supports various content types including @@ -872,6 +901,34 @@ export class LingoDotDevEngine { } } + /** + * Estimate the cost of localizing content BEFORE submitting it. + * Pure computation server-side — nothing is translated, stored, or billed. + * @param items - Per-target-locale character counts of translatable source + * text (sum of source string lengths, excluding keys and markup). Duplicate + * locales are summed by the server. + * @param signal - Optional AbortSignal to cancel the operation + * @returns Promise resolving to an approximate cost with per-locale breakdown + */ + async estimate( + items: { targetLocale: string; sourceChars: number }[], + signal?: AbortSignal, + ): Promise { + const parsedItems = estimateItemsSchema.parse(items); + const url = `${this.config.apiUrl}/process/estimate`; + + const res = await fetch(url, { + method: "POST", + headers: this.headers, + body: JSON.stringify({ items: parsedItems }), + signal, + }); + + await LingoDotDevEngine.throwOnHttpError(res, "Error estimating cost"); + + return res.json(); + } + async whoami( signal?: AbortSignal, ): Promise<{ email: string; id: string } | null> {