diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f48f3b..eeb387b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ All notable changes to scolta-python are documented here. ## [Unreleased] +### Fixed +- **Auto-provisioned Amazee credentials stored without resolved model names no longer leave AI permanently broken (`src/scolta/ai/amazee/auto_provisioner.py`).** Provisioning persists credentials and resolves model names as two non-atomic steps (`AmazeeTrialProvisioner.provision()` stores the token+url, then calls `/model/info`). When the model-info call fails, `get_available_models()` swallows the error and returns `[]`, so the `on_models_resolved` gate never fires and no model name is persisted — but `ConfigStorage.load()` requires only token+url, so it reports the half-provisioned credentials as valid. `ensure_ai_available()` then short-circuited on stored credentials on every later request and never re-resolved, so the caller fell back to the dated config default (`claude-sonnet-4-5-20250929`) which the Amazee LiteLLM gateway rejects with HTTP 400 "Invalid model name" — failing AI silently with no self-recovery (outside `KeyExpiryRecovery`'s auth-only remit). `ensure_ai_available()` now accepts an optional `has_resolved_models` predicate: when stored credentials exist but the caller reports models are still unresolved, model resolution is re-attempted against the **already-stored key** (never a fresh trial, which would waste a server-limited allocation) and `on_models_resolved` fires with the result, so the incomplete-provision state self-heals on the next lazy-init pass. Without the predicate the historical no-op is unchanged. A regression test drives the full provision → failed-resolution → store → re-resolve sequence. (The dated-default fallback itself lives in the consuming adapter/demo client construction, which adopts the predicate when it re-vendors.) + ### Added - **CI now builds and validates the PyPI artifacts (`dist` job in `ci.yml`).** Publishing is manual and nothing in CI built the sdist/wheel, so diff --git a/src/scolta/ai/amazee/auto_provisioner.py b/src/scolta/ai/amazee/auto_provisioner.py index a1eddb1..d57810b 100644 --- a/src/scolta/ai/amazee/auto_provisioner.py +++ b/src/scolta/ai/amazee/auto_provisioner.py @@ -19,10 +19,11 @@ def ensure_ai_available( has_explicit_api_key: bool = False, on_models_resolved: Callable[[str, str], None] | None = None, client: AmazeeClient | None = None, + has_resolved_models: Callable[[], bool] | None = None, ) -> bool: """Provision a free trial unless AI is already configured. Idempotent; no-op when an explicit key exists or credentials are already stored. - Returns True only on a successful first provisioning. + Returns True only when a fresh trial was provisioned. The stored-credentials no-op deliberately does NOT validate that the stored key still works — trial keys are revoked server-side when the @@ -30,10 +31,38 @@ def ensure_ai_available( cheap install-hook/lazy-init guard cannot know. Call-time auth failures are the reliable signal: :class:`KeyExpiryRecovery` detects them and recovers through :meth:`reprovision`, which bypasses this no-op. + + Stored credentials are treated as a *complete* provision only once their + model names are resolved. A provision whose ``/model/info`` call failed + stores the token+url with no models, leaving the caller to fall back to + the dated config default — which the Amazee gateway rejects with HTTP + 400, breaking AI permanently because this guard kept no-opping on the + half-provisioned credentials. When the caller can confirm models are + still unresolved (via ``has_resolved_models``), model resolution is + re-attempted against the ALREADY-STORED key — never a fresh trial, which + would waste a server-side-limited allocation — so the incomplete-provision + state self-heals. Without that callback the historical no-op stands: the + caller cannot tell us, and we must not re-resolve blindly every request. """ if has_explicit_api_key: return False - if storage.load() is not None: + + credentials = storage.load() + if credentials is not None: + # Already provisioned. Self-heal only an incomplete provision — one + # whose model resolution failed, leaving credentials with no models + # — and only when the caller can confirm that state. Re-resolve + # against the stored key (not a new trial) and persist the result. + if has_resolved_models is None or has_resolved_models(): + return False + + models = AmazeeModelResolver(client or AmazeeClient()).resolve( + credentials["litellm_api_url"], credentials["litellm_token"] + ) + if on_models_resolved is not None and ( + models["ai_model"] is not None or models["ai_expansion_model"] is not None + ): + on_models_resolved(models["ai_model"] or "", models["ai_expansion_model"] or "") return False amazee_client = client or AmazeeClient() diff --git a/tests/ai/amazee/test_amazee.py b/tests/ai/amazee/test_amazee.py index c55cb06..d56a77a 100644 --- a/tests/ai/amazee/test_amazee.py +++ b/tests/ai/amazee/test_amazee.py @@ -385,6 +385,105 @@ def test_reprovision_returns_false_on_api_error(): assert storage.load() is None +# -- auto provisioner: self-heal of an incomplete provision ------------------- +# A provision whose /model/info call failed stores token+url with no resolved +# models. ensure_ai_available() used to no-op on those forever, so the caller +# fell back to the dated config default the Amazee gateway rejects with HTTP 400 +# and AI broke permanently. Re-resolving against the STORED key (never a fresh +# trial) heals it. + + +def test_auto_provisioner_self_heals_half_provisioned_state(): + # Exercise the real bug sequence end to end: a provision whose /model/info + # returns no models, then a later pass that re-resolves once models are + # reachable. + state = {"model_info_empty": True, "trial_calls": 0} + + def handler(request): + if request.url.path == "/auth/generate-trial-access": + state["trial_calls"] += 1 + return httpx.Response( + 200, + json={"litellm_token": "tok", "litellm_api_url": "https://llm.x", "region": "us"}, + ) + if request.url.path == "/model/info": + data = ( + [] + if state["model_info_empty"] + else [{"model_name": "claude-sonnet-4-6"}, {"model_name": "claude-haiku-4-5"}] + ) + return httpx.Response(200, json={"data": data}) + return httpx.Response(404) + + client = AmazeeClient(http_client=httpx.Client(transport=httpx.MockTransport(handler))) + storage = MemoryStorage() + resolved = [] + + # Pass 1: trial provisioning succeeds; /model/info returns no models. + provisioned = AutoProvisioner.ensure_ai_available( + storage, + on_models_resolved=lambda m, e: resolved.append((m, e)), + client=client, + has_resolved_models=lambda: False, + ) + assert provisioned is True # a fresh trial WAS provisioned + assert storage.load()["litellm_token"] == "tok" + assert resolved == [] # but models stayed unresolved — the gap + + # Pass 2: credentials present, models still unresolved → self-heal by + # re-resolving against the stored key. No second trial is provisioned. + state["model_info_empty"] = False + healed = AutoProvisioner.ensure_ai_available( + storage, + on_models_resolved=lambda m, e: resolved.append((m, e)), + client=client, + has_resolved_models=lambda: False, + ) + assert healed is False # a model-only heal, not a new provision + assert resolved == [("claude-sonnet-4-6", "claude-haiku-4-5")] + assert state["trial_calls"] == 1 # never burned a second trial + # The resolved model is a real undated alias, never the dated default the + # gateway rejects. + assert resolved[0][0] != "claude-sonnet-4-5-20250929" + + +def test_auto_provisioner_does_not_reresolve_when_models_resolved(): + # Fully provisioned: the predicate reports models present, so /model/info is + # never queried (re-resolving every request is wasteful). + def handler(request): + raise AssertionError(f"no HTTP call expected, got {request.url.path}") + + client = AmazeeClient(http_client=httpx.Client(transport=httpx.MockTransport(handler))) + storage = MemoryStorage() + storage.store("tok", "https://llm.x", "us") + called = [] + result = AutoProvisioner.ensure_ai_available( + storage, + on_models_resolved=lambda m, e: called.append((m, e)), + client=client, + has_resolved_models=lambda: True, + ) + assert result is False + assert called == [] + + +def test_auto_provisioner_stored_creds_without_predicate_stay_noop(): + # Back-compat: a caller that does not pass has_resolved_models keeps the + # historical "stored credentials are complete" no-op (no HTTP call). + def handler(request): + raise AssertionError(f"no HTTP call expected, got {request.url.path}") + + client = AmazeeClient(http_client=httpx.Client(transport=httpx.MockTransport(handler))) + storage = MemoryStorage() + storage.store("tok", "https://llm.x", "us") + called = [] + result = AutoProvisioner.ensure_ai_available( + storage, on_models_resolved=lambda m, e: called.append((m, e)), client=client + ) + assert result is False + assert called == [] + + # -- budget decorator ---------------------------------------------------------