Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ All notable changes to scolta-python are documented here.

## [Unreleased]

### Fixed
- **Auto-provisioned Amazee credentials stored without resolved model names no longer leave AI permanently broken (`src/scolta/ai/amazee/auto_provisioner.py`).** Provisioning persists credentials and resolves model names as two non-atomic steps (`AmazeeTrialProvisioner.provision()` stores the token+url, then calls `/model/info`). When the model-info call fails, `get_available_models()` swallows the error and returns `[]`, so the `on_models_resolved` gate never fires and no model name is persisted — but `ConfigStorage.load()` requires only token+url, so it reports the half-provisioned credentials as valid. `ensure_ai_available()` then short-circuited on stored credentials on every later request and never re-resolved, so the caller fell back to the dated config default (`claude-sonnet-4-5-20250929`) which the Amazee LiteLLM gateway rejects with HTTP 400 "Invalid model name" — failing AI silently with no self-recovery (outside `KeyExpiryRecovery`'s auth-only remit). `ensure_ai_available()` now accepts an optional `has_resolved_models` predicate: when stored credentials exist but the caller reports models are still unresolved, model resolution is re-attempted against the **already-stored key** (never a fresh trial, which would waste a server-limited allocation) and `on_models_resolved` fires with the result, so the incomplete-provision state self-heals on the next lazy-init pass. Without the predicate the historical no-op is unchanged. A regression test drives the full provision → failed-resolution → store → re-resolve sequence. (The dated-default fallback itself lives in the consuming adapter/demo client construction, which adopts the predicate when it re-vendors.)

### Added
- **CI now builds and validates the PyPI artifacts (`dist` job in
`ci.yml`).** Publishing is manual and nothing in CI built the sdist/wheel, so
Expand Down
33 changes: 31 additions & 2 deletions src/scolta/ai/amazee/auto_provisioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,50 @@ def ensure_ai_available(
has_explicit_api_key: bool = False,
on_models_resolved: Callable[[str, str], None] | None = None,
client: AmazeeClient | None = None,
has_resolved_models: Callable[[], bool] | None = None,
) -> bool:
"""Provision a free trial unless AI is already configured. Idempotent;
no-op when an explicit key exists or credentials are already stored.
Returns True only on a successful first provisioning.
Returns True only when a fresh trial was provisioned.

The stored-credentials no-op deliberately does NOT validate that the
stored key still works — trial keys are revoked server-side when the
trial ends, and that expiry is not announced at provisioning time, so a
cheap install-hook/lazy-init guard cannot know. Call-time auth failures
are the reliable signal: :class:`KeyExpiryRecovery` detects them and
recovers through :meth:`reprovision`, which bypasses this no-op.

Stored credentials are treated as a *complete* provision only once their
model names are resolved. A provision whose ``/model/info`` call failed
stores the token+url with no models, leaving the caller to fall back to
the dated config default — which the Amazee gateway rejects with HTTP
400, breaking AI permanently because this guard kept no-opping on the
half-provisioned credentials. When the caller can confirm models are
still unresolved (via ``has_resolved_models``), model resolution is
re-attempted against the ALREADY-STORED key — never a fresh trial, which
would waste a server-side-limited allocation — so the incomplete-provision
state self-heals. Without that callback the historical no-op stands: the
caller cannot tell us, and we must not re-resolve blindly every request.
"""
if has_explicit_api_key:
return False
if storage.load() is not None:

credentials = storage.load()
if credentials is not None:
# Already provisioned. Self-heal only an incomplete provision — one
# whose model resolution failed, leaving credentials with no models
# — and only when the caller can confirm that state. Re-resolve
# against the stored key (not a new trial) and persist the result.
if has_resolved_models is None or has_resolved_models():
return False

models = AmazeeModelResolver(client or AmazeeClient()).resolve(
credentials["litellm_api_url"], credentials["litellm_token"]
)
if on_models_resolved is not None and (
models["ai_model"] is not None or models["ai_expansion_model"] is not None
):
on_models_resolved(models["ai_model"] or "", models["ai_expansion_model"] or "")
return False

amazee_client = client or AmazeeClient()
Expand Down
99 changes: 99 additions & 0 deletions tests/ai/amazee/test_amazee.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,105 @@ def test_reprovision_returns_false_on_api_error():
assert storage.load() is None


# -- auto provisioner: self-heal of an incomplete provision -------------------
# A provision whose /model/info call failed stores token+url with no resolved
# models. ensure_ai_available() used to no-op on those forever, so the caller
# fell back to the dated config default the Amazee gateway rejects with HTTP 400
# and AI broke permanently. Re-resolving against the STORED key (never a fresh
# trial) heals it.


def test_auto_provisioner_self_heals_half_provisioned_state():
# Exercise the real bug sequence end to end: a provision whose /model/info
# returns no models, then a later pass that re-resolves once models are
# reachable.
state = {"model_info_empty": True, "trial_calls": 0}

def handler(request):
if request.url.path == "/auth/generate-trial-access":
state["trial_calls"] += 1
return httpx.Response(
200,
json={"litellm_token": "tok", "litellm_api_url": "https://llm.x", "region": "us"},
)
if request.url.path == "/model/info":
data = (
[]
if state["model_info_empty"]
else [{"model_name": "claude-sonnet-4-6"}, {"model_name": "claude-haiku-4-5"}]
)
return httpx.Response(200, json={"data": data})
return httpx.Response(404)

client = AmazeeClient(http_client=httpx.Client(transport=httpx.MockTransport(handler)))
storage = MemoryStorage()
resolved = []

# Pass 1: trial provisioning succeeds; /model/info returns no models.
provisioned = AutoProvisioner.ensure_ai_available(
storage,
on_models_resolved=lambda m, e: resolved.append((m, e)),
client=client,
has_resolved_models=lambda: False,
)
assert provisioned is True # a fresh trial WAS provisioned
assert storage.load()["litellm_token"] == "tok"
assert resolved == [] # but models stayed unresolved — the gap

# Pass 2: credentials present, models still unresolved → self-heal by
# re-resolving against the stored key. No second trial is provisioned.
state["model_info_empty"] = False
healed = AutoProvisioner.ensure_ai_available(
storage,
on_models_resolved=lambda m, e: resolved.append((m, e)),
client=client,
has_resolved_models=lambda: False,
)
assert healed is False # a model-only heal, not a new provision
assert resolved == [("claude-sonnet-4-6", "claude-haiku-4-5")]
assert state["trial_calls"] == 1 # never burned a second trial
# The resolved model is a real undated alias, never the dated default the
# gateway rejects.
assert resolved[0][0] != "claude-sonnet-4-5-20250929"


def test_auto_provisioner_does_not_reresolve_when_models_resolved():
# Fully provisioned: the predicate reports models present, so /model/info is
# never queried (re-resolving every request is wasteful).
def handler(request):
raise AssertionError(f"no HTTP call expected, got {request.url.path}")

client = AmazeeClient(http_client=httpx.Client(transport=httpx.MockTransport(handler)))
storage = MemoryStorage()
storage.store("tok", "https://llm.x", "us")
called = []
result = AutoProvisioner.ensure_ai_available(
storage,
on_models_resolved=lambda m, e: called.append((m, e)),
client=client,
has_resolved_models=lambda: True,
)
assert result is False
assert called == []


def test_auto_provisioner_stored_creds_without_predicate_stay_noop():
# Back-compat: a caller that does not pass has_resolved_models keeps the
# historical "stored credentials are complete" no-op (no HTTP call).
def handler(request):
raise AssertionError(f"no HTTP call expected, got {request.url.path}")

client = AmazeeClient(http_client=httpx.Client(transport=httpx.MockTransport(handler)))
storage = MemoryStorage()
storage.store("tok", "https://llm.x", "us")
called = []
result = AutoProvisioner.ensure_ai_available(
storage, on_models_resolved=lambda m, e: called.append((m, e)), client=client
)
assert result is False
assert called == []


# -- budget decorator ---------------------------------------------------------


Expand Down
Loading