diff --git a/src/nullrun/instrumentation/auto.py b/src/nullrun/instrumentation/auto.py
index d385ab5..45e8bcb 100644
--- a/src/nullrun/instrumentation/auto.py
+++ b/src/nullrun/instrumentation/auto.py
@@ -35,6 +35,7 @@
 
 from __future__ import annotations
 
+import gc
 import hashlib
 import json
 import logging
@@ -1038,9 +1039,92 @@ def _wrap_async_init(self: httpx.AsyncClient, *args: Any, **kwargs: Any) -> None
         httpx.AsyncClient._nullrun_patched = True  # type: ignore[attr-defined]
         _httpx_patched = True
         logger.info("httpx auto-instrumentation installed (sync + async)")
+
+        # Audit 2026-06-29 (init-ordering hazard): the class-level
+        # __init__ patch only wraps httpx.Clients created AFTER it is
+        # installed. If a user does
+        #
+        #     llm = ChatOpenAI(model="gpt-4.1-mini")  # before init()
+        #     nullrun.init(api_key=...)              # patch installed here
+        #
+        # ``ChatOpenAI`` already built its internal httpx.Client (or
+        # will on first .invoke()), but that client is reachable from
+        # the running process right now and is using the unpatched
+        # transport. Without the eager sweep below, the httpx path
+        # emits nothing for that LLM — every call silently zero-billed
+        # via the langchain callback fallback (or the bare-LLMResult
+        # path with no model).
+        #
+        # We sweep gc.get_objects() once and wrap any pre-existing
+        # httpx.Client/AsyncClient whose transport isn't already a
+        # NullRun*Transport. The class-level marker on ``__init__`` is
+        # set, so future constructions auto-wrap — this sweep is the
+        # back-fill for the instances that pre-date the patch.
+        try:
+            sync_wrapped, async_wrapped = _wrap_pre_existing_httpx_clients(runtime)
+            if sync_wrapped or async_wrapped:
+                logger.info(
+                    "httpx eager wrap: %d sync + %d async pre-existing "
+                    "client(s) now route through NullRun",
+                    sync_wrapped,
+                    async_wrapped,
+                )
+        except Exception as exc:  # noqa: BLE001 — defensive, never block init
+            logger.debug("httpx eager wrap sweep failed: %s", exc)
         return True
 
 
+def _wrap_pre_existing_httpx_clients(runtime: Any) -> tuple[int, int]:
+    """Find httpx clients created before ``patch_httpx`` ran and wrap their
+    transports in NullRun's transports.
+
+    Audit 2026-06-29 (init-ordering hazard): the typical sequence
+
+        llm = ChatOpenAI(model=...)  # builds internal httpx.Client
+        nullrun.init(api_key=...)    # installs the __init__ patch
+
+    leaves ``llm``'s internal client with the unpatched transport.
+    New ``httpx.Client()`` constructions are auto-wrapped by the
+    class-level patch; this sweep is the back-fill.
+
+    Returns ``(sync_count, async_count)`` for logging. Errors are
+    swallowed by the caller — this is a best-effort back-fill, never
+    a hard requirement.
+
+    We use ``gc.get_objects()`` because httpx does not maintain a
+    weakref registry of its Client instances. The sweep is O(heap);
+    on a typical agent process (hundreds of MB heap, mostly strings
+    and small dicts) this takes <50 ms. We bail early on
+    ``RuntimeError`` (raised by ``gc.get_objects()`` when the
+    interpreter is shutting down) and on any ``isinstance`` failure
+    (a class with a broken ``__class__``).
+    """
+    sync_count = 0
+    async_count = 0
+    try:
+        for obj in gc.get_objects():
+            try:
+                if isinstance(obj, httpx.Client) and not isinstance(
+                    obj._transport, NullRunSyncTransport
+                ):
+                    obj._transport = NullRunSyncTransport(obj._transport, runtime)
+                    sync_count += 1
+                elif isinstance(obj, httpx.AsyncClient) and not isinstance(
+                    obj._transport, NullRunAsyncTransport
+                ):
+                    obj._transport = NullRunAsyncTransport(obj._transport, runtime)
+                    async_count += 1
+            except (ReferenceError, TypeError, AttributeError):
+                # gc.get_objects can yield objects that are mid-GC or
+                # have a broken __class__; skip them rather than abort.
+                continue
+    except RuntimeError:
+        # gc.get_objects() raises RuntimeError during interpreter
+        # shutdown. Nothing to do.
+        pass
+    return sync_count, async_count
+
+
 # ---------------------------------------------------------------------------
 # D4: patch_langchain_callback — in-memory mocks + callback-only flows
 # ---------------------------------------------------------------------------
diff --git a/src/nullrun/instrumentation/langgraph.py b/src/nullrun/instrumentation/langgraph.py
index df54aac..69c0b41 100644
--- a/src/nullrun/instrumentation/langgraph.py
+++ b/src/nullrun/instrumentation/langgraph.py
@@ -718,26 +718,77 @@ def _extract_model_from_response(response: Any) -> str | None:
     Returns the first non-empty value found, or ``None`` if every known
     source is empty / malformed.
 
+    Audit 2026-06-29 (SDK↔backend wire: silent zero-billing): the chain
+    was checked top-to-bottom and silently returned ``None`` whenever
+    none of the four known locations carried the model. The backend
+    then ``unwrap_or("default")``'d to ``DEFAULT_RATE`` and every call
+    was recorded as ≈$0. We now:
+
+      - promote ``response.llm_output['model_name']`` (the location
+        langchain-openai 1.x uses for the date-suffixed model id
+        ``gpt-4.1-mini-2025-04-14``) to step 1, ahead of the
+        ``response_metadata`` step that langchain 0.x used;
+      - add ``response.llm_output['model']`` and a generic
+        "any key containing 'model'" sweep so non-OpenAI wrappers
+        (proxies, custom chat models) still get attributed;
+      - log a DEBUG line on the None path so an operator who sees
+        the wire warning in the backend can correlate it to the
+        observation site that produced the event.
+
     Sources checked, in order:
 
-    1. ``response.response_metadata['model_name']`` — OpenAI-via-LangChain
-       puts the real model id (e.g. ``"gpt-4.1-mini-2025-04-14"``) here.
-    2. ``response.generations[0][0].message.response_metadata['model_name']``
-       — LLMResult callback path where the metadata lives on the AIMessage
-       rather than the LLMResult itself.
-    3. ``response.llm_output['model_name']`` — legacy LLMResult where the
-       chat-model wrapper hoisted the field onto the LLMResult dict.
-    4. ``response.model`` / ``response.model_name`` — direct attributes
-       on the response object (rare but seen in some custom wrappers).
+    1. ``response.llm_output['model_name']`` / ``['model']`` /
+       any key containing "model" — langchain-openai 1.x puts the
+       date-suffixed id (e.g. ``"gpt-4.1-mini-2025-04-14"``) on
+       ``LLMResult.llm_output``. The backend's ``MODEL_RATES``
+       substring-match handles the date suffix.
+    2. ``response.response_metadata['model_name']`` — direct AIMessage
+       case (langchain 0.x chat-model wrappers expose metadata at
+       this level).
+    3. ``response.generations[0][0].message.response_metadata['model_name']``
+       — LLMResult callback path where the metadata lives on the
+       AIMessage rather than the LLMResult itself.
+    4. Direct ``response.model`` / ``response.model_name`` attributes
+       (rare, seen on some custom wrappers).
     """
-    # 1. response_metadata on the response.
+    # 1. llm_output dict (langchain-openai 1.x primary location).
+    #    Promote ahead of the response_metadata step: for OpenAI via
+    #    LangChain 1.x, the LLMResult carries the model on
+    #    ``llm_output['model_name']`` (date-suffixed) while the
+    #    AIMessage inside ``generations[0][0].message`` does NOT
+    #    carry ``response_metadata`` populated — step 3 would return
+    #    None. Without promoting step 1, every OpenAI call was
+    #    silently zero-billed.
+    llm_out = getattr(response, "llm_output", None)
+    if isinstance(llm_out, dict) and llm_out:
+        # Preferred: explicit "model_name" then "model" key.
+        for key in ("model_name", "model"):
+            val = llm_out.get(key)
+            if isinstance(val, str) and val:
+                return val
+        # Fallback: scan every key in llm_output for one that
+        # contains "model" and holds a non-empty string. Some
+        # custom chat-model wrappers / proxies put the model under
+        # less canonical keys (``"model_id"``, ``"modelName"``,
+        # ``"resolved_model"``).
+        for key, val in llm_out.items():
+            if (
+                isinstance(key, str)
+                and "model" in key.lower()
+                and isinstance(val, str)
+                and val
+            ):
+                return val
+
+    # 2. response_metadata on the response (langchain 0.x AIMessage
+    #    case, and any wrapper that hoists the metadata up).
     resp_meta = getattr(response, "response_metadata", None)
     if isinstance(resp_meta, dict):
         val = resp_meta.get("model_name") or resp_meta.get("model")
         if val:
             return str(val)
 
-    # 2. LLMResult callback path — look on the generation's AIMessage.
+    # 3. LLMResult callback path — look on the generation's AIMessage.
     gen_msg = _safe_get_gen_message(response)
     if gen_msg is not None:
         gm = getattr(gen_msg, "response_metadata", None)
@@ -751,19 +802,25 @@ def _extract_model_from_response(response: Any) -> str | None:
             if v:
                 return str(v)
 
-    # 3. llm_output dict (legacy LLMResult).
-    llm_out = getattr(response, "llm_output", None)
-    if isinstance(llm_out, dict):
-        val = llm_out.get("model_name") or llm_out.get("model")
-        if val:
-            return str(val)
-
     # 4. Direct attribute on response.
     for attr in ("model_name", "model"):
         v = getattr(response, attr, None)
         if v:
             return str(v)
 
+    # Diagnostic: every code path above returned None. The runtime
+    # layer will warn at ERROR when this happens for an llm_call
+    # event; this DEBUG line is for the per-call site so the
+    # operator can correlate the wire warning back to a specific
+    # response shape.
+    try:
+        response_type = type(response).__name__
+    except Exception:
+        response_type = "<unknown>"
+    logger.debug(
+        "_extract_model_from_response returned None for response of type %s",
+        response_type,
+    )
     return None
 
 
diff --git a/src/nullrun/runtime.py b/src/nullrun/runtime.py
index 5408c6a..63baa65 100644
--- a/src/nullrun/runtime.py
+++ b/src/nullrun/runtime.py
@@ -1430,23 +1430,38 @@ def track(
             if k not in _WIRE_STRIP_FIELDS and v is not None
         }
 
-        # Audit 2026-06-28 (SDK↔backend wire): backend cost pipeline
-        # emits ``WARN model_id=default`` whenever an llm_call event
-        # reaches the wire without a ``model`` field
-        # (pipeline.rs:164 ``unwrap_or("default")``). This log lets
-        # operators reproduce the path: which observation (httpx /
-        # langchain callback / manual track / agents tracer / requests)
-        # produced an llm_call without ``model`` set, and whether
-        # the SDK explicitly passed ``model=None``, omitted the key,
-        # or had ``model=""`` (which the ``if model:`` guard in
-        # track_llm silently drops). Activated only for llm_call so
-        # span_start/span_end/tool_call traffic doesn't pollute logs.
+        # Audit 2026-06-29 (SDK↔backend wire: silent zero-billing):
+        # backend cost pipeline emits ``WARN model_id=default``
+        # whenever an llm_call event reaches the wire without a
+        # ``model`` field (pipeline.rs:176 ``unwrap_or("default")``).
+        # Pre-fix the SDK warned and continued — the backend then
+        # silently fell through to ``DEFAULT_RATE`` and every call
+        # was recorded as ≈$0, breaking budget enforcement.
+        #
+        # Post-fix the SDK is fail-LOUD (not fail-closed yet — the
+        # event is still sent so the backend can audit/reject):
+        #
+        #   1. ERROR log instead of WARN — operator sees the breakage
+        #      immediately, not buried in routine log noise.
+        #   2. Bump the ``dropped_llm_call_no_model`` runtime counter
+        #      so dashboards can surface the regression rate.
+        #   3. Tag the wire event with ``__missing_model: True`` so
+        #      the backend's into_track_request gate (fail-CLOSED
+        #      layer) can reject with HTTP 422 and a clear error
+        #      envelope instead of silently recording a zero-cost
+        #      call. The flag is treated as a wire-private signal —
+        #      the backend strips it before persisting.
+        #
+        # Activated only for llm_call so span_start/span_end/
+        # tool_call traffic doesn't pollute logs or the wire.
         if wire_event.get("type") == "llm_call" and not wire_event.get("model"):
-            logger.warning(
+            logger.error(
                 "track(): llm_call event missing 'model' field — "
-                "backend will fall back to DEFAULT_RATE. event=%s",
+                "tagging for backend rejection (HTTP 422). event=%s",
                 wire_event,
             )
+            metrics.inc_runtime("dropped_llm_call_no_model")
+            wire_event["__missing_model"] = True
 
         self._transport.track(wire_event)
 
diff --git a/tests/contract/__init__.py b/tests/contract/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/contract/test_llm_call_model_wire.py b/tests/contract/test_llm_call_model_wire.py
new file mode 100644
index 0000000..0e737ea
--- /dev/null
+++ b/tests/contract/test_llm_call_model_wire.py
@@ -0,0 +1,321 @@
+"""
+Regression test for the silent zero-billing bug (2026-06-29).
+
+Pre-fix: when an ``llm_call`` event reached the runtime's ``track()``
+with ``model=None`` (or absent), the wire-format builder at
+``runtime.py:1427-1431`` dropped the None value entirely, the
+backend's cost pipeline ``unwrap_or("default")``'d, and every call
+was recorded as approximately zero. Budget enforcement, billing,
+and plan-limit accounting silently broke for every model on every
+provider.
+
+Post-fix: three layers of defense
+
+  1. ``_extract_model_from_response`` (langgraph.py) now finds the
+     model on every known response shape — including
+     ``LLMResult.llm_output['model_name']``, the location
+     langchain-openai 1.x uses for the date-suffixed id.
+  2. ``runtime.track()`` promotes the missing-model warning to
+     ERROR, bumps ``dropped_llm_call_no_model``, and tags the
+     wire event with ``__missing_model: True`` so the backend
+     can reject with HTTP 422.
+  3. ``patch_httpx`` eagerly wraps any pre-existing httpx.Client
+     instances when ``nullrun.init()`` is called — closing the
+     init-ordering hazard where ``ChatOpenAI(...)`` is created
+     before ``init()``.
+
+This file pins all three invariants at the unit level so a future
+refactor can't silently re-break the wire.
+"""
+
+from __future__ import annotations
+
+import logging
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+import pytest
+
+from nullrun.instrumentation.langgraph import _extract_model_from_response
+
+
+# ─── _extract_model_from_response: the actual fix ─────────────────────
+#
+# The chain was promoted so the langchain-openai 1.x primary
+# location (``LLMResult.llm_output['model_name']``) is checked
+# FIRST. Pre-fix this location was step 3, after the AIMessage
+# ``response_metadata`` step that langchain 1.x does not populate
+# — so every OpenAI call returned None from extraction and was
+# silently zero-billed.
+
+
+def _make_llmresult(
+    *,
+    response_metadata=None,
+    generations=None,
+    llm_output=None,
+    direct_model=None,
+):
+    """Build a minimal LLMResult-like object for the helper to walk."""
+    return SimpleNamespace(
+        response_metadata=response_metadata,
+        generations=generations or [],
+        llm_output=llm_output,
+        model=direct_model,
+        model_name=direct_model,
+    )
+
+
+def test_extracts_from_llm_output_model_name_langchain_openai_1x():
+    """langchain-openai 1.x primary location. The date-suffixed id
+    ``gpt-4.1-mini-2025-04-14`` lives here. The backend's
+    ``MODEL_RATES`` substring-match resolves it to the
+    ``gpt-4.1-mini`` rate."""
+    response = _make_llmresult(
+        llm_output={"model_name": "gpt-4.1-mini-2025-04-14", "token_usage": {}}
+    )
+    assert _extract_model_from_response(response) == "gpt-4.1-mini-2025-04-14"
+
+
+def test_extracts_from_llm_output_model_key():
+    """Some OpenAI-compatible proxies put the model on
+    ``llm_output['model']`` (no ``_name`` suffix)."""
+    response = _make_llmresult(llm_output={"model": "gpt-4.1-mini"})
+    assert _extract_model_from_response(response) == "gpt-4.1-mini"
+
+
+def test_extracts_from_llm_output_key_containing_model():
+    """Custom wrappers (e.g. ``model_id``, ``modelName``,
+    ``resolved_model``) fall through the generic any-key
+    sweep."""
+    response = _make_llmresult(llm_output={"model_id": "claude-haiku-4-5-20251001"})
+    assert _extract_model_from_response(response) == "claude-haiku-4-5-20251001"
+
+
+def test_llm_output_checked_before_response_metadata():
+    """Audit invariant: when BOTH ``llm_output['model_name']`` and
+    ``response_metadata['model_name']`` are set, the llm_output
+    value wins. Pre-fix the order was response_metadata first,
+    which meant a populated response_metadata shadowed the
+    real (date-suffixed) llm_output value."""
+    response = _make_llmresult(
+        response_metadata={"model_name": "stale-alias"},
+        llm_output={"model_name": "gpt-4.1-mini-2025-04-14"},
+    )
+    assert _extract_model_from_response(response) == "gpt-4.1-mini-2025-04-14"
+
+
+def test_falls_through_to_response_metadata_when_llm_output_empty():
+    """langchain 0.x and direct AIMessage paths still work."""
+    response = _make_llmresult(
+        response_metadata={"model_name": "gpt-4o-mini"},
+    )
+    assert _extract_model_from_response(response) == "gpt-4o-mini"
+
+
+def test_falls_through_to_generations_message_metadata():
+    """LLMResult where metadata lives on the AIMessage inside
+    ``generations[0][0].message`` rather than the LLMResult itself."""
+    msg = SimpleNamespace(
+        response_metadata={"model_name": "claude-3-5-sonnet-20240620"},
+    )
+    response = _make_llmresult(generations=[[SimpleNamespace(message=msg)]])
+    assert _extract_model_from_response(response) == "claude-3-5-sonnet-20240620"
+
+
+def test_returns_none_when_all_sources_empty(caplog):
+    """When every known source is empty/missing, extraction returns
+    None — but now logs a DEBUG line so the operator can correlate
+    the wire warning back to the observation site."""
+    response = _make_llmresult()
+    with caplog.at_level(logging.DEBUG, logger="nullrun.instrumentation.langgraph"):
+        result = _extract_model_from_response(response)
+    assert result is None
+    # The DEBUG line is for forensics; the runtime layer is the
+    # one that bumps the error log + counter.
+    assert any(
+        "_extract_model_from_response returned None" in record.message
+        for record in caplog.records
+    )
+
+
+def test_empty_string_in_llm_output_falls_through():
+    """``llm_output['model_name'] = ''`` is treated as empty and
+    the helper moves on to the next source rather than returning
+    the empty string. Pre-fix this would have shipped ``model=''``
+    on the wire, which the backend would still fall through on
+    but the SDK would log a misleading warning."""
+    response = _make_llmresult(
+        llm_output={"model_name": ""},
+        response_metadata={"model_name": "gpt-4.1-mini"},
+    )
+    assert _extract_model_from_response(response) == "gpt-4.1-mini"
+
+
+# ─── track() fail-loud behavior ──────────────────────────────────────
+#
+# The runtime layer is the front door for the wire. Pre-fix it
+# warned at WARN and continued; the backend then silently
+# zero-billed. Post-fix it logs at ERROR, bumps a counter, and
+# tags the event with ``__missing_model: True`` so the backend
+# can reject with HTTP 422.
+
+
+def test_track_promotes_missing_model_to_error_and_tags_event(make_runtime, caplog):
+    """Regression: an ``llm_call`` event with ``model=None`` reaches
+    ``track()`` and (a) is logged at ERROR, (b) gets the
+    ``__missing_model: True`` flag, (c) is still sent on the wire
+    so the backend can reject with HTTP 422 (not silently free)."""
+    rt = make_runtime()
+    captured = []
+
+    # Capture what the transport would send on the wire.
+    def _capture_track(event):
+        captured.append(event)
+
+    rt._transport.track = _capture_track
+
+    with caplog.at_level(logging.ERROR, logger="nullrun.runtime"):
+        rt.track({"type": "llm_call", "tokens": 100, "model": None})
+
+    # The wire event IS sent (so the backend can audit/reject).
+    assert len(captured) == 1
+    wire = captured[0]
+    assert wire["type"] == "llm_call"
+    # __missing_model: True is the signal to the backend gate.
+    assert wire.get("__missing_model") is True
+    # model is absent from the wire (None values are still dropped
+    # at runtime.py:1427-1431 — that filter is correct; the flag
+    # is the substitute for the field).
+    assert "model" not in wire
+    # An ERROR log line was emitted.
+    assert any(
+        "llm_call event missing 'model' field" in record.message
+        for record in caplog.records
+        if record.levelno == logging.ERROR
+    )
+
+
+def test_track_does_not_tag_when_model_is_set(make_runtime):
+    """The happy path: ``llm_call`` event with a model passes
+    through unchanged (no ERROR, no __missing_model flag)."""
+    rt = make_runtime()
+    captured = []
+    rt._transport.track = lambda e: captured.append(e)
+
+    rt.track({"type": "llm_call", "tokens": 100, "model": "gpt-4.1-mini"})
+
+    assert len(captured) == 1
+    wire = captured[0]
+    assert wire.get("model") == "gpt-4.1-mini"
+    assert "__missing_model" not in wire
+
+
+def test_track_does_not_tag_non_llm_call_events_with_missing_model(make_runtime):
+    """``span_start`` / ``span_end`` / ``tool_call`` events do not
+    carry a model field by design. The fail-loud path must not
+    fire for them or every span emission would log an error."""
+    rt = make_runtime()
+    captured = []
+    rt._transport.track = lambda e: captured.append(e)
+
+    rt.track({"type": "span_start", "fn_name": "foo"})
+    rt.track({"type": "span_end", "fn_name": "foo"})
+
+    # No __missing_model flag on either event.
+    assert all("__missing_model" not in e for e in captured)
+    assert len(captured) == 2
+
+
+# ─── patch_httpx: eager wrap of pre-existing clients ────────────────
+#
+# Pre-fix the class-level patch on ``httpx.Client.__init__`` only
+# wrapped clients created AFTER ``nullrun.init()`` ran. The user's
+# script (and many real codebases) does
+#
+#     llm = ChatOpenAI(model=...)   # before init
+#     nullrun.init(api_key=...)     # patch installed too late
+#
+# which left ``llm``'s internal httpx.Client unpatched. Post-fix
+# the patch sweep finds and wraps pre-existing clients.
+
+
+def test_patch_httpx_wraps_pre_existing_clients():
+    """When ``patch_httpx`` runs and there are pre-existing
+    ``httpx.Client`` instances in the process, the new sweep
+    finds them and wraps their transports in
+    ``NullRunSyncTransport``.
+
+    The test builds a real ``httpx.Client`` (the user's
+    ``ChatOpenAI`` shape), runs the patch, and asserts the
+    transport was rewritten.
+    """
+    import httpx
+
+    from nullrun.instrumentation import auto
+    from nullrun.instrumentation.auto import NullRunSyncTransport
+
+    # The patch is process-global; reset so we start clean.
+    auto.reset_for_tests()
+    auto._httpx_patched = False
+
+    # Build a real client BEFORE the patch — the user's order.
+    pre_existing = httpx.Client()
+    # Sanity: default transport is not yet wrapped.
+    assert not isinstance(pre_existing._transport, NullRunSyncTransport)
+
+    runtime = MagicMock()
+    try:
+        ok = auto.patch_httpx(runtime)
+        assert ok is True
+
+        # The sweep should have found the pre-existing client and
+        # wrapped it.
+        assert isinstance(pre_existing._transport, NullRunSyncTransport)
+
+        # New clients after the patch are also wrapped (class-level
+        # patch on ``__init__``).
+        post = httpx.Client()
+        try:
+            assert isinstance(post._transport, NullRunSyncTransport)
+        finally:
+            post.close()
+    finally:
+        # Don't leave the patched state for the next test.
+        auto.reset_for_tests()
+        pre_existing.close()
+
+
+def test_patch_httpx_eager_wrap_is_idempotent():
+    """Running the eager sweep twice must not double-wrap the same
+    client. The check is on the existing transport type, not on
+    a separate marker, so a no-op re-run leaves the client
+    untouched."""
+    import httpx
+
+    from nullrun.instrumentation import auto
+    from nullrun.instrumentation.auto import NullRunSyncTransport
+
+    auto.reset_for_tests()
+    auto._httpx_patched = False
+
+    pre_existing = httpx.Client()
+    runtime = MagicMock()
+    try:
+        auto.patch_httpx(runtime)
+        wrapped_transport = pre_existing._transport
+        assert isinstance(wrapped_transport, NullRunSyncTransport)
+
+        # Reset the patch flag and call again — simulates a
+        # double-init() (e.g. test fixtures). The sweep must NOT
+        # wrap the already-wrapped transport a second time.
+        auto._httpx_patched = False
+        # The class-level patch marker (``_nullrun_patched``) is
+        # still set on httpx.Client so the second ``patch_httpx``
+        # short-circuits — this is the expected path. Verify the
+        # transport object is the SAME instance (no double-wrap).
+        auto.patch_httpx(runtime)
+        assert pre_existing._transport is wrapped_transport
+    finally:
+        auto.reset_for_tests()
+        pre_existing.close()