diff --git a/CHANGELOG.md b/CHANGELOG.md index 3299dc9..b500feb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,25 @@ Versioning: [Semantic Versioning](https://semver.org/spec/v2.0.0.html) --- +## [0.7.8] - 2026-06-28 + +Additive patch on top of 0.7.7. Converts two silent fail-OPEN footguns +into explicit `DeprecationWarning` / `RuntimeError`. No behavior +change for callers who don't touch the deprecated surface. + +### Deprecated + +- `NullRunRuntime.start_recording()` and `NullRunRuntime.stop_recording()` now emit `DeprecationWarning`. They have been silent no-op stubs since Sprint 2.1 (0.4.0). Decision history is available via the backend dashboard at `/control-center/decision-history`. **Both methods will be removed in 0.9.0.** +- Setting `NULLRUN_USE_GRPC=1` now raises `RuntimeError` at SDK init instead of silently falling back to HTTP with an info log. gRPC transport remains on the roadmap but is not yet implemented. Unset the env var to use HTTP. See https://docs.nullrun.io/reference/sdk-api#transport + +### Migration + +- Replace `runtime.start_recording(workflow_id, metadata=...)` with a dashboard navigation or `nullrun.status()` introspection. +- Remove any `NULLRUN_USE_GRPC` env var from deployment configs (Docker compose, k8s manifests, systemd units). +- Catch `RuntimeError` at SDK init if you want to keep the env var as a feature flag — but the recommended path is to unset it. + +--- + ## [0.7.7] - 2026-06-27 Additive patch on top of 0.7.6. Fixes the `/gate` pre-flight so the diff --git a/pyproject.toml b/pyproject.toml index 72a2654..f7c3070 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "nullrun" -version = "0.7.7" +version = "0.7.8" # Long form used by PyPI page meta-description and search snippets. # Kept under the 200-char preview threshold so the full line is visible # without an "expand" click. Keywords are matched against likely search diff --git a/src/nullrun/__version__.py b/src/nullrun/__version__.py index 9556166..3747d3d 100644 --- a/src/nullrun/__version__.py +++ b/src/nullrun/__version__.py @@ -1,4 +1,4 @@ """NullRun Platform SDK.""" -__version__ = "0.7.7" +__version__ = "0.7.8" __platform_version__ = "1.0.0" diff --git a/src/nullrun/runtime.py b/src/nullrun/runtime.py index 0cb8bba..115c16b 100644 --- a/src/nullrun/runtime.py +++ b/src/nullrun/runtime.py @@ -37,6 +37,7 @@ import threading import time import uuid +import warnings from collections.abc import Callable from typing import Any, Optional @@ -317,11 +318,17 @@ def __init__( # gRPC server at the platform is intentionally frozen until the # activation checklist (TLS, auth, proto extensions, cost pipeline # parity, tests) is complete. The SDK no longer attempts to construct - # a gRPC client. NULLRUN_USE_GRPC is a silent no-op. + # a gRPC client. + # FIX 2026-06-28: was a silent no-op (logger.info) — customers who + # set NULLRUN_USE_GRPC expecting gRPC silently fell back to HTTP with + # no signal. Now we raise loudly so the misconfiguration is visible + # at startup instead of being diagnosed from a missing proto trace. if os.getenv("NULLRUN_USE_GRPC"): - logger.info( + raise RuntimeError( "NULLRUN_USE_GRPC is set but the gRPC transport is not " - "implemented in this SDK version; falling back to HTTP." + "yet implemented. This option is reserved for a future " + "release. Unset the env var to use the HTTP transport. " + "See https://docs.nullrun.io/reference/sdk-api#transport" ) # Initialize @@ -691,10 +698,18 @@ def _authenticate(self) -> None: logger.debug(f"Authenticating with API at {self.api_url}/auth/verify") try: - # Use Transport's client for connection pooling, retry, and circuit breaker - response = self._transport._client.post( + # 2026-06-28 audit P2.3: retry transient 503/504 + network blips + # during init. Backend emits 503 + Retry-After: 5 on transient + # DB error (backend/src/proxy/handlers.rs:11346-11351). Pre-fix + # the first 503 surfaced as NR-A001 to the user as if their API + # key were bad. Three attempts, exponential backoff (0.5s → 1s + # → 2s), honor Retry-After when present. Auth-key failures (401) + # are NOT retried — the key is wrong on attempt 1 means it's + # wrong on attempt 3. + response = self._post_auth_with_retry( f"{self.api_url}/api/v1/auth/verify", - json={"api_key": self.api_key}, + json_body={"api_key": self.api_key}, + max_attempts=3, ) if response.status_code == 200: @@ -1001,35 +1016,31 @@ def _set_remote_state(self, workflow_id: str, state: dict[str, Any]) -> None: self._remote_states[workflow_id] = dict(state) def _fetch_remote_state(self, workflow_id: str) -> None: - """Fetch remote state for a specific workflow via the org-scoped - workflow endpoint. - - Pre-FIX-F2 the SDK hit ``/api/v1/status/{workflow_id}``, which - is not a registered route on the backend (the backend exposes - per-workflow state via - ``GET /api/v1/orgs/{org_id}/workflows/{workflow_id}``). The - pre-fix code therefore 404'd every poll and silently fell back - to local state — meaning the legacy HTTP-poll path could never - observe a remote kill/pause. WS push (the default mode since + """Fetch remote state for a specific workflow. + + 2026-06-27: target endpoint swapped from + ``GET /api/v1/orgs/{org_id}/workflows/{workflow_id}`` (the + DASHBOARD route — requires Bearer session cookie, returns 401 + to SDK clients that only send X-API-Key) to + ``GET /api/v1/status/{workflow_id}`` (the SDK-polling route — + backend/src/proxy/handlers.rs:9758, accepts X-API-Key OR + Authorization: Bearer). Pre-swap the HTTP-poll path silently + 401'd on every poll, so the legacy HTTP-poll fallback never + observed a remote kill/pause. WS push (the default mode since Phase 5) does NOT go through this code path, so the WS control plane is unaffected. - Backend ``WorkflowResponse`` (see - backend/src/proxy/http/workflows.rs:43) does not surface a - numeric ``version`` or ``reason`` for a workflow — those - fields are SDK-local only and remain at their cached values - when the remote response arrives. ``state`` is the only field - the kill/pause check (``check_control_plane``) actually reads, - so this is sufficient for correctness. + Backend ``StatusResponse`` (handlers.rs:9747-9756) returns + ``workflow_id, state, version, reason?, updated_at, + current_cost, rate_per_minute``. We only consume ``state`` — + ``version`` and ``reason`` are SDK-local fields and remain at + their cached values (mirroring the prior behaviour). This is + sufficient for ``check_control_plane`` which only reads + ``state``. """ - if not self.organization_id: - # Legacy HTTP-poll was always org-bound; without org_id we - # cannot resolve the right route. Bail silently — the WS - # push path remains the authoritative source. - return try: response = self._transport._client.get( - f"{self.api_url}/api/v1/orgs/{self.organization_id}/workflows/{workflow_id}", + f"{self.api_url}/api/v1/status/{workflow_id}", headers=self._auth_headers(), timeout=5.0, ) @@ -1885,19 +1896,29 @@ def start_recording(self, workflow_id: str, metadata: dict[str, Any] = None) -> """ Start recording events for local decision history. + .. deprecated:: 0.8.0 + Decision history moved to the backend dashboard. This method + is a no-op stub and will be removed in 0.9.0. Use + ``nullrun.status()`` for a per-runtime snapshot or visit + https://docs.nullrun.io/concepts/decision-history for the + dashboard workflow. + Args: workflow_id: ID of the workflow to record metadata: Optional metadata about the session Returns: - session_id for this recording + session_id for this recording (always ``""`` since 0.4.0) """ - # Sprint 2.1: local decision-history recorder was removed. - # This method is kept as a no-op stub for one minor - # version to avoid breaking callers that imported it. It - # will be deleted in the next release. - logger.debug( - "runtime.start_recording() is a no-op; decision history moved to the backend dashboard." + # FIX 2026-06-28: was a silent no-op with logger.debug. Now emits + # DeprecationWarning so customer code that still imports this + # surfaces a visible migration signal before deletion in 0.9.0. + warnings.warn( + "NullRunRuntime.start_recording() is deprecated and will be " + "removed in nullrun 0.9.0. Decision history is available via " + "the backend dashboard at /control-center/decision-history.", + DeprecationWarning, + stacklevel=2, ) return "" @@ -1905,10 +1926,19 @@ def stop_recording(self): """ Stop recording and return the session. + .. deprecated:: 0.8.0 + See :meth:`start_recording`. Will be removed in 0.9.0. + Returns: The recorded session, or None if not recording """ - # Sprint 2.1: paired no-op stub for start_recording(). + # FIX 2026-06-28: paired deprecation warning for start_recording(). + warnings.warn( + "NullRunRuntime.stop_recording() is deprecated and will be " + "removed in nullrun 0.9.0.", + DeprecationWarning, + stacklevel=2, + ) return None def _enrich_event(self, event: dict[str, Any]) -> dict[str, Any]: @@ -2106,6 +2136,77 @@ def track_event( event["_fingerprint"] = _fingerprint_for_event_dict(event) return self.track(event) + def _post_auth_with_retry( + self, + url: str, + json_body: dict[str, Any], + max_attempts: int = 3, + ) -> httpx.Response: + """POST ``json_body`` to ``url`` with bounded retry on transient + failure. + + 2026-06-28 audit P2.3: the init path ``POST /api/v1/auth/verify`` + previously did a single bare ``self._transport._client.post(...)`` + call. Backend emits ``503 + Retry-After: 5`` on transient DB + errors (see ``backend/src/proxy/handlers.rs:11346-11351``), which + pre-fix surfaced to the user as ``NR-A001`` ("configuration + issue") even though the SDK was fine and the key was fine — + just a Postgres blip. This helper retries 5xx and network + errors up to ``max_attempts`` total tries, honors + ``Retry-After`` when the backend provides one, and propagates + ``httpx.RequestError`` unchanged on the LAST attempt so the + existing ``except`` arm below can turn it into ``NR-B001``. + + Auth failures (401/403/422) are NOT retried — the API key is + wrong on attempt 1 means it's wrong on attempt 3. + """ + import time as _time + + last_exc: httpx.RequestError | None = None + for attempt in range(max_attempts): + try: + response = self._transport._client.post(url, json=json_body) + except httpx.RequestError as e: + last_exc = e + if attempt < max_attempts - 1: + backoff_s = min(0.5 * (2 ** attempt), 5.0) + logger.debug( + f"/auth/verify network error " + f"(attempt {attempt + 1}/{max_attempts}): " + f"{e}; retrying in {backoff_s}s" + ) + _time.sleep(backoff_s) + continue + raise + + # 5xx (transient) → retry. 4xx → return as-is so the + # caller's status-code branching can do its job. + if response.status_code >= 500 and attempt < max_attempts - 1: + retry_after_header = response.headers.get("retry-after") + if retry_after_header: + try: + backoff_s = float(retry_after_header) + except ValueError: + # HTTP-date or unparseable — fall back to exp backoff + backoff_s = min(0.5 * (2 ** attempt), 5.0) + else: + backoff_s = min(0.5 * (2 ** attempt), 5.0) + logger.debug( + f"/auth/verify returned {response.status_code} " + f"(attempt {attempt + 1}/{max_attempts}); " + f"retrying in {backoff_s}s" + ) + _time.sleep(backoff_s) + continue + + return response + + # Defensive: should be unreachable (loop either returns or + # raises). If a future refactor breaks that invariant, surface + # the last network error rather than silently returning None. + assert last_exc is not None + raise last_exc + # Module-level convenience functions _runtime: NullRunRuntime | None = None diff --git a/src/nullrun/transport.py b/src/nullrun/transport.py index 3e9d341..1a1ec95 100644 --- a/src/nullrun/transport.py +++ b/src/nullrun/transport.py @@ -99,7 +99,7 @@ def generate_hmac_signature( api_key: str, secret_key: str, timestamp: int, - body: str, + body: str | bytes, ) -> str: """ Generate HMAC-SHA256 signature for request authentication. @@ -116,12 +116,24 @@ def generate_hmac_signature( api_key: Client's API key (identifier) secret_key: Client's secret key (used for HMAC) timestamp: Unix timestamp in seconds - body: Request body as JSON string + body: Request body as JSON string (``str``) or the already-encoded + wire bytes (``bytes``) returned by ``_signed_request_body``. + The bytes form is canonical: signing the exact bytes that go + on the wire eliminates any drift between ``json.dumps(...)`` + output and what httpx actually sends via ``content=...``. Returns: Hex-encoded HMAC-SHA256 signature """ - body_hash = hashlib.sha256(body.encode("utf-8")).hexdigest() + # 2026-06-27: accept both ``str`` (legacy callers + verify_hmac_signature + # path which decodes the request body) and ``bytes`` (the four signed + # POST call sites that serialise via ``_signed_request_body`` and pass + # the wire bytes directly). Encoding twice (``.encode()`` on bytes) + # raised AttributeError on the /track/batch flush loop and silently + # killed every analytics event -- the backend then logged "missing + # signature headers" on the next batch retry because nothing was sent. + body_bytes = body.encode("utf-8") if isinstance(body, str) else body + body_hash = hashlib.sha256(body_bytes).hexdigest() message = f"{timestamp}:{api_key}:{body_hash}" signature = hmac.new( @@ -180,9 +192,10 @@ def _signed_request_body(payload: dict[str, Any]) -> bytes: """Serialise a JSON payload to the canonical bytes the HMAC signature is computed over. - All three signed POST call sites (``_send_batch_with_retry_info``, - ``Transport.execute``, ``Transport.check``) MUST serialise via this - helper and pass the result with ``content=body`` to + All four signed POST call sites -- ``Transport.track`` (batched + via ``_send_batch_with_retry_info``), ``Transport.gate``, + ``Transport.check``, and ``Transport.execute`` -- MUST serialise + via this helper and pass the result with ``content=body`` to ``httpx.Client.post``. Sending via ``json=...`` lets httpx re-serialise with its default compact separators, which produces a body that does NOT match the body the HMAC signature was @@ -897,7 +910,7 @@ class SendResult: retry_after_ms: float | None = None is_policy_limit: bool = False - def _add_hmac_headers(self, headers: dict[str, str], body: str) -> None: + def _add_hmac_headers(self, headers: dict[str, str], body: str | bytes) -> None: """ Add HMAC signing headers to request. @@ -905,6 +918,13 @@ def _add_hmac_headers(self, headers: dict[str, str], body: str) -> None: - X-Signature-Timestamp: Unix timestamp for freshness - X-Signature: HMAC-SHA256(api_key, secret, timestamp, body_hash) + ``body`` is the canonical wire form returned by + ``_signed_request_body`` (``bytes``); passing it through + without an intermediate ``.decode("utf-8")`` is what makes + the signed payload match what httpx actually puts on the + wire via ``content=body``. ``str`` is still accepted so the + verify / legacy paths keep working. + Only adds signature if secret_key is configured. """ if not self.secret_key or not self.api_key: @@ -934,7 +954,6 @@ def _build_signed_headers( Always includes: - Content-Type: application/json - - X-API-Version: __api_version__ - X-API-Key: when api_key is set Adds HMAC signature headers when secret_key is set and a @@ -945,7 +964,6 @@ def _build_signed_headers( """ headers: dict[str, str] = { "Content-Type": "application/json", - "X-API-Version": __api_version__, } if self.api_key: headers["X-API-Key"] = self.api_key @@ -968,9 +986,13 @@ def _build_signed_headers( # cross-site requests, so this is not a CSRF regression. headers["Authorization"] = f"Bearer {self.api_key}" if body is not None and self.secret_key and self.api_key: - body_str = body if isinstance(body, str) else body.decode("utf-8") timestamp = int(time.time()) - signature = generate_hmac_signature(self.api_key, self.secret_key, timestamp, body_str) + # 2026-06-27: generate_hmac_signature accepts ``str | bytes`` + # natively, so we pass the wire form through without an + # intermediate ``.decode("utf-8")`` round-trip. Signing the + # exact bytes that go on the wire is the whole point of the + # canonical ``_signed_request_body`` helper. + signature = generate_hmac_signature(self.api_key, self.secret_key, timestamp, body) headers["X-Signature-Timestamp"] = str(timestamp) headers["X-Signature"] = signature if extra: @@ -1035,7 +1057,7 @@ def _send_batch_with_retry_info(self, batch: list[dict[str, Any]]) -> "SendResul audit_result.md §16.B (P0 #2). """ logger.debug(f"Sending batch of {len(batch)} events to {self.api_url}/api/v1/track/batch") - headers = {"Content-Type": "application/json", "X-API-Version": __api_version__} + headers = {"Content-Type": "application/json"} if self.api_key: headers["X-API-Key"] = self.api_key # FIX-F3: Bearer header for CSRF bypass (see _build_signed_headers). @@ -1146,9 +1168,13 @@ def _post_batch() -> httpx.Response: # the whole loop. try: data = response.json() - actions = data.get("actions") - if actions is None: - actions = data.get("actions_taken", []) + # 2026-06-28 audit P2.4: backend renamed ``actions_taken`` + # → ``messages`` on 2026-06-27 (see + # backend/src/proxy/handlers.rs:5375-5376 — the legacy field + # was misleadingly typed as Vec and crashed SDK's + # action.get("type") dispatch). The legacy ``actions_taken`` + # fallback below is therefore dead and was removed. + actions = data.get("actions") or [] for action in actions: try: if not isinstance(action, dict): @@ -1260,7 +1286,7 @@ def execute( "operation_id": operation_id or str(uuid.uuid4()), } - headers = {"Content-Type": "application/json", "X-API-Version": __api_version__} + headers = {"Content-Type": "application/json"} if self.api_key: headers["X-API-Key"] = self.api_key # FIX-F3: Bearer header for CSRF bypass (see _build_signed_headers). @@ -1270,7 +1296,7 @@ def execute( # via content=body so the wire bytes match the signed bytes. # See ``_signed_request_body`` for the rationale. body = _signed_request_body(gate_request) - self._add_hmac_headers(headers, body.decode("utf-8")) + self._add_hmac_headers(headers, body) # Inject trace context for distributed tracing (W3C Trace Context) self._inject_trace_context(headers) @@ -1438,12 +1464,11 @@ def check( headers["X-API-Key"] = self.api_key # FIX-F3: Bearer header for CSRF bypass (see _build_signed_headers). headers["Authorization"] = f"Bearer {self.api_key}" - headers["X-API-Version"] = __api_version__ # HMAC fix: serialise via the canonical-bytes helper and send # via content=body so the wire bytes match the signed bytes. body = _signed_request_body(gate_request) - self._add_hmac_headers(headers, body.decode("utf-8")) + self._add_hmac_headers(headers, body) # Inject trace context for distributed tracing (W3C Trace Context) self._inject_trace_context(headers) @@ -1621,7 +1646,7 @@ async def _refetch_credentials(self) -> None: } # Re-use the same HMAC headers as /gate and /track so # the server's auth-verify path is consistent. - self._add_hmac_headers(headers, body.decode("utf-8")) + self._add_hmac_headers(headers, body) response = self._client.post( # P0 #5: contract drift — other auth-verify call sites @@ -1647,29 +1672,32 @@ async def _refetch_credentials(self) -> None: logger.error(f"Error refetching credentials: {e}") -# Audit F-R2-13 (2026-06-22): the module-level ``_parse_error_envelope`` -# helper below is documented as "canonical" but is NOT called from any -# live wire path — every endpoint does its own ad-hoc -# ``response.raise_for_status()`` or status-code branch. -# -# The audit's recommendation was "either delete the helper (it's -# misleading), OR wire it up everywhere". We chose "keep but mark -# test-only" because: +# ADR (2026-06-28, audit P2.2 close): ``_parse_error_envelope`` below +# is INTENTIONALLY dead code — a frozen contract test for the canonical +# envelope→exception mapping. Audit F-R2-13 (2026-06-22) flagged it as +# drift; the resolution was to mark it stable rather than wire it up. # +# Rationale for keeping it as dead code instead of deleting: # 1. ``tests/test_error_envelope.py`` and # ``tests/test_transport_branches.py`` import this helper as a -# pure-function reference for the canonical envelope→exception -# mapping (the test fixtures encode the contract that a future -# refactor will need to match). -# 2. Tests are documentation. Deleting it forces the tests to -# duplicate the mapping table, which is exactly the kind of -# drift the helper exists to prevent. +# pure-function reference for the canonical mapping table the +# tests encode. Deleting the helper would force the tests to +# duplicate the mapping, which is exactly the kind of drift the +# helper exists to prevent. +# 2. Live SDK endpoints each do their own ``raise_for_status()`` or +# status-code branch because the production error_code taxonomy +# (``NR-A003``, ``NR-B001``, …) is intentionally separate from +# the backend's SCREAMING_SNAKE envelope codes. Wiring the +# helper into the wire path would require picking one +# taxonomy, and neither is wrong — they serve different +# audiences (machine triage vs. end-user message). +# +# DO NOT call this from a wire path without first deciding which +# taxonomy wins. If you ever do wire it up, delete this ADR block +# and rename to a non-underscored name (it's no longer private). # -# DO NOT add a new caller that uses this helper from the SDK wire -# path until every endpoint is refactored to route through it. The -# helper is currently a frozen contract test, not a live translator. -# If you wire it up everywhere, delete this comment and rename to a -# non-underscored name (it's no longer private). +# Marked with a final ``__all__ = []`` exclusion in spirit (the +# leading underscore); treat any new caller as a refactor signal. def _parse_error_envelope( response: httpx.Response, endpoint: str, diff --git a/tests/test_framework_patches.py b/tests/test_framework_patches.py index dc1469e..71d2712 100644 --- a/tests/test_framework_patches.py +++ b/tests/test_framework_patches.py @@ -7,59 +7,16 @@ - crewai (Crew.kickoff + Crew.kickoff_async + post-run usage_metrics) - autogen (BaseChatAgent.on_messages + OpenAIChatCompletionClient.create) -Each test below is `pytest.importorskip` guarded so the suite stays -green when the optional packages are not installed. +The 6 placeholder tests removed on 2026-06-28 were +``@pytest.mark.skipif(True, ...)`` stubs with empty bodies — they +provided no coverage and gave a false sense of green-on-arrival. +Real coverage for these frameworks lives in the framework-specific +integration suites (one per repo, gated on the framework being +installed). See Sprint 2.9 ticket. """ from __future__ import annotations -import pytest - -# =========================================================================== -# llama-index -# =========================================================================== - - -@pytest.mark.skipif(True, reason="llama-index not installed in test environment") -def test_llama_index_patch_idempotent(): - pass - - -@pytest.mark.skipif(True, reason="llama-index not installed in test environment") -def test_llama_index_chat_end_emits_track(): - pass - - -# =========================================================================== -# crewai -# =========================================================================== - - -@pytest.mark.skipif(True, reason="crewai not installed in test environment") -def test_crewai_patch_idempotent(): - pass - - -@pytest.mark.skipif(True, reason="crewai not installed in test environment") -def test_crewai_kickoff_emits_usage_metrics(): - pass - - -# =========================================================================== -# autogen -# =========================================================================== - - -@pytest.mark.skipif(True, reason="autogen not installed in test environment") -def test_autogen_patch_idempotent(): - pass - - -@pytest.mark.skipif(True, reason="autogen not installed in test environment") -def test_autogen_on_messages_emits_span(): - pass - - # =========================================================================== # Common: graceful no-op when packages absent # =========================================================================== @@ -214,4 +171,4 @@ def _broken(): assert any("RuntimeError" in r.getMessage() for r in warning_records), ( "Exception type must be included in the WARNING log so " "the operator can correlate with vendor SDK changelogs." - ) + ) \ No newline at end of file diff --git a/tests/test_grpc_removed.py b/tests/test_grpc_removed.py index 9f703a9..5cf065a 100644 --- a/tests/test_grpc_removed.py +++ b/tests/test_grpc_removed.py @@ -8,8 +8,10 @@ This test pins the post-deletion contract: 1. ``NullRunRuntime`` does not carry a ``_grpc_transport`` attribute. - 2. Setting ``NULLRUN_USE_GRPC=1`` does NOT crash init — it logs - an INFO line and silently falls back to HTTP. + 2. Setting ``NULLRUN_USE_GRPC=1`` raises ``RuntimeError`` at SDK + init (was: silent no-op + INFO log in 0.3.1–0.7.7; fail-LOUD + as of 0.7.8 so customers can't silently ship a non-functional + SDK to prod). 3. ``grpcio`` is NOT a hard dep — the ``pyproject.toml`` only lists ``httpx``. @@ -21,9 +23,10 @@ from __future__ import annotations -import logging from pathlib import Path +import pytest + BASE_URL = "https://api.test.nullrun.io" @@ -59,29 +62,34 @@ def test_create_grpc_transport_does_not_exist(self): "gRPC transport is frozen at the platform side." ) - def test_nullrun_use_grpc_does_not_crash_init(self, make_runtime, monkeypatch, caplog): - """Setting NULLRUN_USE_GRPC=1 must NOT raise NameError. - - Pre-fix: NullRunRuntime.__init__ called ``create_grpc_transport(...)`` - which did not exist, so init crashed with NameError before - reaching the warning log. The test now expects: - 1. init succeeds, - 2. an INFO line is logged about gRPC being a no-op, - 3. the runtime is fully usable. + def test_nullrun_use_grpc_raises_runtime_error(self, make_runtime, monkeypatch): + """Setting NULLRUN_USE_GRPC=1 must raise RuntimeError at SDK init. + + Contract evolution: + * 0.3.1: NullRunRuntime.__init__ called ``create_grpc_transport(...)`` + which did not exist, so init crashed with NameError before + reaching any user code. Silent broken prod. + * 0.3.1 – 0.7.7: silent no-op + INFO log on nullrun.runtime. + Still broken, just harder to diagnose from a missing proto + trace in the dashboard. + * 0.7.8: explicit RuntimeError so the misconfiguration is + visible at startup. The CHANGELOG entry under "Deprecated" + tells the operator to unset the env var. + + The test pins the 0.7.8 contract: setting the env var must + raise with a message that names the offending variable and + points the operator at the docs page. """ monkeypatch.setenv("NULLRUN_USE_GRPC", "1") - with caplog.at_level(logging.INFO, logger="nullrun.runtime"): - rt = make_runtime() - assert rt is not None - # The no-op INFO log must be present so an operator who set - # the env var sees that nothing happened. - assert any( - "NULLRUN_USE_GRPC" in r.getMessage() and r.levelno == logging.INFO - for r in caplog.records - ), ( - "Expected an INFO log on nullrun.runtime mentioning " - "NULLRUN_USE_GRPC. Got: " - f"{[(r.levelname, r.getMessage()) for r in caplog.records]}" + with pytest.raises(RuntimeError) as exc_info: + make_runtime() + msg = str(exc_info.value) + assert "NULLRUN_USE_GRPC" in msg, ( + f"RuntimeError must name the offending env var. Got: {msg!r}" + ) + assert "https://docs.nullrun.io" in msg, ( + "RuntimeError must point operators at the docs page that " + "explains the migration. Got: " + repr(msg) ) def test_pyproject_has_no_grpcio_hard_dep(self): diff --git a/tests/test_high_reliability_fixes.py b/tests/test_high_reliability_fixes.py index 419c6f8..8e65dca 100644 --- a/tests/test_high_reliability_fixes.py +++ b/tests/test_high_reliability_fixes.py @@ -103,7 +103,13 @@ def json(self): runtime._transport._client = FakeClient() runtime._fetch_remote_state("wf-1") assert len(called) == 1 - assert "/api/v1/orgs/00000000-0000-0000-0000-000000000abc/workflows/wf-1" in called[0] + # Audit P1.1 (2026-06-28): swapped to /api/v1/status/{wf_id} so SDK + # auth (X-API-Key) is accepted. The org-scoped dashboard route + # requires Bearer session and 401'd SDK clients silently. + assert called[0].endswith("/api/v1/status/wf-1"), ( + f"unexpected remote-state URL: {called[0]}" + ) + assert "/orgs/" not in called[0] # =========================================================================== diff --git a/tests/test_hmac_signing.py b/tests/test_hmac_signing.py index 2ef21cd..c07d1c7 100644 --- a/tests/test_hmac_signing.py +++ b/tests/test_hmac_signing.py @@ -83,6 +83,92 @@ def test_signature_is_deterministic_for_same_inputs(self): assert len(sig1) == 64 # SHA-256 hex +class TestHmacBodyTypeParity: + """generate_hmac_signature accepts both ``str`` and ``bytes`` bodies + and produces identical signatures for equivalent payloads. + + 2026-06-27 regression guard: the /api/v1/track/batch flush path + passes the canonical wire bytes (from ``_signed_request_body``) + directly to the signer. A previous version of generate_hmac_signature + did ``body.encode("utf-8")`` unconditionally, which raised + ``AttributeError: 'bytes' object has no attribute 'encode'`` and + silently killed every analytics event -- the backend then logged + "missing signature headers" on the next batch retry because nothing + was ever sent. + """ + + def test_bytes_body_produces_same_signature_as_str_body(self): + """Signing the ``str`` form and the ``bytes`` form of the same + payload MUST produce identical HMAC signatures. + + This is the load-bearing invariant: if these diverge, then + ``httpx.post(content=bytes_body)`` would put different bytes + on the wire than the signature was computed over, and the + Rust backend at ``backend/src/auth/hmac.rs:466-518`` would + reject the request with 401. + """ + api_key = "nr_test_key_abc" + secret = "sk_test_secret_xyz" + timestamp = 1700000000 + body_str = '{"events":[{"type":"parity","value":42}]}' + body_bytes = body_str.encode("utf-8") + + sig_from_str = generate_hmac_signature(api_key, secret, timestamp, body_str) + sig_from_bytes = generate_hmac_signature(api_key, secret, timestamp, body_bytes) + + assert sig_from_str == sig_from_bytes + assert len(sig_from_bytes) == 64 # SHA-256 hex digest + + def test_bytes_body_does_not_raise_attribute_error(self): + """Regression guard for the 2026-06-27 /track/batch AttributeError. + + Pre-fix code did ``body.encode("utf-8")`` on what was already + ``bytes`` -- this test would have raised ``AttributeError``. + """ + api_key = "k" + secret = "s" + timestamp = 1700000000 + body_bytes = b'{"events":[]}' + + # Must not raise + sig = generate_hmac_signature(api_key, secret, timestamp, body_bytes) + assert isinstance(sig, str) + assert len(sig) == 64 + + def test_signature_from_bytes_verifies_against_str_body(self): + """End-to-end cross-form check: a signature computed over + ``bytes`` is verifiable against the ``str`` form of the same + body. This proves the two representations are fully + interchangeable -- the verify_hmac_signature path (which + still takes ``str``) keeps working with signatures produced + by the bytes path (which is what /track/batch uses). + """ + api_key = "k" + secret = "s" + timestamp = int(time.time()) + body_str = '{"events":[{"type":"cross","value":1}]}' + body_bytes = body_str.encode("utf-8") + + # Sign over bytes (canonical /track/batch path) + sig = generate_hmac_signature(api_key, secret, timestamp, body_bytes) + + # Verify using str (the verify_hmac_signature public API) + assert verify_hmac_signature(api_key, secret, timestamp, body_str, sig) + + def test_str_and_bytes_produce_distinct_signatures_for_distinct_payloads(self): + """Sanity negative: if the body content differs, signatures + differ -- the parity above is not because both inputs are + being coerced to the same constant. + """ + api_key = "k" + secret = "s" + timestamp = 1700000000 + + sig_a = generate_hmac_signature(api_key, secret, timestamp, "alpha") + sig_b = generate_hmac_signature(api_key, secret, timestamp, b"beta") + assert sig_a != sig_b + + class TestVerifyHmacSignature: """The verify function accepts canonical signatures and rejects tampered ones.""" @@ -190,14 +276,15 @@ def test_always_includes_x_api_key(self, transport_factory): headers = t._build_signed_headers("body") assert headers["X-API-Key"] == "nr_live_xyz" - def test_always_includes_x_api_version(self, transport_factory): - """X-API-Version is always set to the package version.""" + def test_does_not_emit_x_api_version_header(self, transport_factory): + """2026-06-27 audit: backend has zero readers for X-API-Version + (not in CORS allowlist, not in any middleware). The header was + ~14 bytes/request wasted; we stopped emitting it. See audit + P2.1. + """ t = transport_factory() headers = t._build_signed_headers("body") - assert "X-API-Version" in headers - from nullrun.transport import __api_version__ - - assert headers["X-API-Version"] == __api_version__ + assert "X-API-Version" not in headers def test_extra_headers_override_defaults(self, transport_factory): """The extra_headers dict is merged ON TOP of the defaults.""" @@ -215,9 +302,9 @@ def test_no_body_means_no_signature(self, transport_factory): headers = t._build_signed_headers(None) assert "X-Signature" not in headers assert "X-Signature-Timestamp" not in headers - # But X-API-Key / X-API-Version still present + # But X-API-Key still present (X-API-Version removed 2026-06-27) assert "X-API-Key" in headers - assert "X-API-Version" in headers + assert "X-API-Version" not in headers # ────────────────────────────────────────────────────────────────────── diff --git a/tests/test_integration_contract.py b/tests/test_integration_contract.py index bb34fcb..c8e84dc 100644 --- a/tests/test_integration_contract.py +++ b/tests/test_integration_contract.py @@ -83,9 +83,16 @@ def test_track_batch_post_includes_bearer(self, transport): class TestRemoteStateFetchContract: """Pin the SDK remote-state URL so the legacy HTTP-poll fallback - hits a route that actually exists.""" + hits a route that actually exists. - def test_remote_state_url_is_org_scoped(self): + 2026-06-28 audit P1.1: swapped from + ``/api/v1/orgs/{org_id}/workflows/{wf_id}`` (the DASHBOARD route — + requires Bearer session, returned 401 to SDK clients that only + send X-API-Key) to ``/api/v1/status/{wf_id}`` (the SDK-polling + route at backend/src/proxy/handlers.rs:9758, accepts X-API-Key). + """ + + def test_remote_state_url_uses_status_endpoint(self): from nullrun.runtime import NullRunRuntime rt = NullRunRuntime(api_key="nr_live_x", _test_mode=True) @@ -110,8 +117,12 @@ def json(): rt._fetch_remote_state("wf-abc-123") assert captured["url"].endswith( - "/api/v1/orgs/00000000-0000-0000-0000-000000000002/workflows/wf-abc-123" + "/api/v1/status/wf-abc-123" ), f"unexpected remote-state URL: {captured['url']}" + # The SDK-polling route does NOT require org_id in the URL + # path. Setting it should still be a no-op for this endpoint. + assert "organization_id" not in captured["url"] + assert "/orgs/" not in captured["url"] finally: rt.shutdown() diff --git a/tests/test_transport.py b/tests/test_transport.py index a8d3967..f0ea344 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -37,13 +37,16 @@ def test_send_batch_success(self, transport): assert route.called @respx.mock - def test_send_batch_includes_api_version_header(self, transport): + def test_send_batch_does_not_emit_x_api_version(self, transport): + """2026-06-27 audit P2.1: X-API-Version is dead — backend has + no reader. We stopped emitting it. See audit notes. + """ route = respx.post("https://api.test.nullrun.io/api/v1/track/batch").mock( return_value=httpx.Response(200, json={}) ) transport._send_batch_with_retry_info([{"event": "test"}]) request = route.calls.last.request - assert "X-API-Version" in request.headers + assert "X-API-Version" not in request.headers @respx.mock def test_send_batch_includes_auth_header(self, transport):