diff --git a/sdk/documentintelligence/azure-ai-documentintelligence/azure/ai/documentintelligence/_model_base.py b/sdk/documentintelligence/azure-ai-documentintelligence/azure/ai/documentintelligence/_model_base.py
index 7f73b97b23e..d999a521c9e 100644
--- a/sdk/documentintelligence/azure-ai-documentintelligence/azure/ai/documentintelligence/_model_base.py
+++ b/sdk/documentintelligence/azure-ai-documentintelligence/azure/ai/documentintelligence/_model_base.py
@@ -13,6 +13,7 @@ import decimal
import functools
import sys
import logging
+import threading
import base64
import re
import typing
@@ -495,6 +496,10 @@ class Model(_MyMutableMapping):
# label whether current class's _attr_to_rest_field has been calculated
# could not see _attr_to_rest_field directly because subclass inherits it from parent class
_calculated: typing.Set[str] = set()
+ # serializes first-time calculation of _attr_to_rest_field: assigning it into a class
+ # __dict__ that a concurrent __new__ is iterating over raises "dictionary changed size
+ # during iteration", which deserialization fallbacks then swallow into corrupted results
+ _calculated_lock = threading.Lock()
def __init__(self, *args: typing.Any, **kwargs: typing.Any) -> None:
class_name = self.__class__.__name__
@@ -576,26 +581,31 @@ class Model(_MyMutableMapping):
def __new__(cls, *args: typing.Any, **kwargs: typing.Any) -> Self:
if f"{cls.__module__}.{cls.__qualname__}" not in cls._calculated:
- # we know the last nine classes in mro are going to be 'Model', '_MyMutableMapping', 'MutableMapping',
- # 'Mapping', 'Collection', 'Sized', 'Iterable', 'Container' and 'object'
- mros = cls.__mro__[:-9][::-1] # ignore parents, and reverse the mro order
- attr_to_rest_field: typing.Dict[str, _RestField] = { # map attribute name to rest_field property
- k: v for mro_class in mros for k, v in mro_class.__dict__.items() if k[0] != "_" and hasattr(v, "_type")
- }
- annotations = {
- k: v
- for mro_class in mros
- if hasattr(mro_class, "__annotations__")
- for k, v in mro_class.__annotations__.items()
- }
- for attr, rf in attr_to_rest_field.items():
- rf._module = cls.__module__
- if not rf._type:
- rf._type = rf._get_deserialize_callable_from_annotation(annotations.get(attr, None))
- if not rf._rest_name_input:
- rf._rest_name_input = attr
- cls._attr_to_rest_field: typing.Dict[str, _RestField] = dict(attr_to_rest_field.items())
- cls._calculated.add(f"{cls.__module__}.{cls.__qualname__}")
+ with cls._calculated_lock:
+ if f"{cls.__module__}.{cls.__qualname__}" not in cls._calculated:
+ # we know the last nine classes in mro are going to be 'Model', '_MyMutableMapping',
+ # 'MutableMapping', 'Mapping', 'Collection', 'Sized', 'Iterable', 'Container' and 'object'
+ mros = cls.__mro__[:-9][::-1] # ignore parents, and reverse the mro order
+ attr_to_rest_field: typing.Dict[str, _RestField] = { # map attribute name to rest_field property
+ k: v
+ for mro_class in mros
+ for k, v in mro_class.__dict__.items()
+ if k[0] != "_" and hasattr(v, "_type")
+ }
+ annotations = {
+ k: v
+ for mro_class in mros
+ if hasattr(mro_class, "__annotations__")
+ for k, v in mro_class.__annotations__.items()
+ }
+ for attr, rf in attr_to_rest_field.items():
+ rf._module = cls.__module__
+ if not rf._type:
+ rf._type = rf._get_deserialize_callable_from_annotation(annotations.get(attr, None))
+ if not rf._rest_name_input:
+ rf._rest_name_input = attr
+ cls._attr_to_rest_field: typing.Dict[str, _RestField] = dict(attr_to_rest_field.items())
+ cls._calculated.add(f"{cls.__module__}.{cls.__qualname__}")
return super().__new__(cls) # pylint: disable=no-value-for-parameter
diff --git a/sdk/vision/azure-ai-vision-imageanalysis/azure/ai/vision/imageanalysis/_model_base.py b/sdk/vision/azure-ai-vision-imageanalysis/azure/ai/vision/imageanalysis/_model_base.py
index 43fd8c7e9b1..1842ec95ea1 100644
--- a/sdk/vision/azure-ai-vision-imageanalysis/azure/ai/vision/imageanalysis/_model_base.py
+++ b/sdk/vision/azure-ai-vision-imageanalysis/azure/ai/vision/imageanalysis/_model_base.py
@@ -11,6 +11,7 @@ import calendar
import decimal
import functools
import sys
+import threading
import logging
import base64
import re
@@ -476,6 +477,13 @@ def _create_value(rf: typing.Optional["_RestField"], value: typing.Any) -> typin
class Model(_MyMutableMapping):
_is_model = True
+ # label whether current class's _attr_to_rest_field has been calculated
+ # could not see _attr_to_rest_field directly because subclass inherits it from parent class
+ _calculated: typing.Set[str] = set()
+ # serializes first-time calculation of _attr_to_rest_field: assigning it into a class
+ # __dict__ that a concurrent __new__ is iterating over raises "dictionary changed size
+ # during iteration", which deserialization fallbacks then swallow into corrupted results
+ _calculated_lock = threading.Lock()
def __init__(self, *args: typing.Any, **kwargs: typing.Any) -> None:
class_name = self.__class__.__name__
@@ -508,24 +516,31 @@ class Model(_MyMutableMapping):
return Model(self.__dict__)
def __new__(cls, *args: typing.Any, **kwargs: typing.Any) -> Self: # pylint: disable=unused-argument
- # we know the last three classes in mro are going to be 'Model', 'dict', and 'object'
- mros = cls.__mro__[:-3][::-1] # ignore model, dict, and object parents, and reverse the mro order
- attr_to_rest_field: typing.Dict[str, _RestField] = { # map attribute name to rest_field property
- k: v for mro_class in mros for k, v in mro_class.__dict__.items() if k[0] != "_" and hasattr(v, "_type")
- }
- annotations = {
- k: v
- for mro_class in mros
- if hasattr(mro_class, "__annotations__") # pylint: disable=no-member
- for k, v in mro_class.__annotations__.items() # pylint: disable=no-member
- }
- for attr, rf in attr_to_rest_field.items():
- rf._module = cls.__module__
- if not rf._type:
- rf._type = rf._get_deserialize_callable_from_annotation(annotations.get(attr, None))
- if not rf._rest_name_input:
- rf._rest_name_input = attr
- cls._attr_to_rest_field: typing.Dict[str, _RestField] = dict(attr_to_rest_field.items())
+ if f"{cls.__module__}.{cls.__qualname__}" not in cls._calculated:
+ with cls._calculated_lock:
+ if f"{cls.__module__}.{cls.__qualname__}" not in cls._calculated:
+ # we know the last three classes in mro are going to be 'Model', 'dict', and 'object'
+ mros = cls.__mro__[:-3][::-1] # ignore model, dict, and object parents, and reverse the mro order
+ attr_to_rest_field: typing.Dict[str, _RestField] = { # map attribute name to rest_field property
+ k: v
+ for mro_class in mros
+ for k, v in mro_class.__dict__.items()
+ if k[0] != "_" and hasattr(v, "_type")
+ }
+ annotations = {
+ k: v
+ for mro_class in mros
+ if hasattr(mro_class, "__annotations__") # pylint: disable=no-member
+ for k, v in mro_class.__annotations__.items() # pylint: disable=no-member
+ }
+ for attr, rf in attr_to_rest_field.items():
+ rf._module = cls.__module__
+ if not rf._type:
+ rf._type = rf._get_deserialize_callable_from_annotation(annotations.get(attr, None))
+ if not rf._rest_name_input:
+ rf._rest_name_input = attr
+ cls._attr_to_rest_field: typing.Dict[str, _RestField] = dict(attr_to_rest_field.items())
+ cls._calculated.add(f"{cls.__module__}.{cls.__qualname__}")
return super().__new__(cls) # pylint: disable=no-value-for-parameter
azure-ai-documentintelligence 1.0.2azure-ai-vision-imageanalysis 1.0.03.14Disclaimer
Assisted by Claude (emphasis on assisted)
The race condition was observed "in the wild"; generating the minimal example and the diagnosis was made with assistance of AI with lots of human guidance and the resulting issue (hopefully) cleaned from most of the AI clutter
I am always careful when it comes to concurrency-issues and (possible) necessesity of adding
locks, but the attached diff does exactly this and fixes the described issue (which does not necessarily mean that it is the right place to apply a fix, but at least it pinpoints the problematic code part)Describe the bug
A race condition in the generated
_model_base.pycauses deserialization of HTTP responses to silently return raw JSON dicts (or partially deserialized models whose nested fields are raw dicts) instead of model objects, when the first model objects of a process are constructed concurrently, e.g. when an application issues its first SDK calls from a thread pool.Affected (at least):
azure-ai-vision-imageanalysis1.0.0 —analyze()can return a plaindictinstead ofImageAnalysisResult, or anImageAnalysisResultwhose nested fields (blocks,lines,words, …) are raw dicts.azure-ai-documentintelligence1.0.2 — same forAnalyzeResult._model_base.py(the file is present in most of the modules; is it auto-generated? If so, this information should be added into its header), including the currentmainbranch of this repo.Likely root cause:
Model.__new__initializes the per-class metadata_attr_to_rest_fieldlazily and without any locking. It iterates the class__dict__s viacls.__mro__and then assigns the new_attr_to_rest_fieldattribute into one of those same__dict__s (_model_base.pylines 510–530 inazure-ai-vision-imageanalysis1.0.0).When two threads construct the first instances of related model classes concurrently, one thread's iteration races with the other thread's assignment and raises
RuntimeError: dictionary changed size during iteration(shared mutable_RestFieldstate, e.g.rf._type, is also written without synchronization).That
RuntimeErrornever surfaces, because_deserialize_defaultswallows any exception and returns the raw input object instead (lines 744–754):So instead of an error, the application receives corrupted results, which makes this very hard to diagnose in production: it only happens on the first few responses of a freshly started process, only under concurrency, and the symptom (an
AttributeErrorlike'dict' object has no attribute 'words', possibly much later in unrelated application code) gives no hint of the cause.To Reproduce
Steps to reproduce the behavior:
pip install azure-ai-vision-imageanalysis==1.0.0 azure-ai-documentintelligence==1.0.2azure_model_base_race_repro.py(no Azure credentials/endpoint needed — it calls_model_base._deserialize(Model, response_json)directly, which is exactly what the generated operations code does with every HTTP response body).python azure_model_base_race_repro.py ia(Image Analysis) orpython azure_model_base_race_repro.py di(Document Intelligence).Each trial wipes the
azuremodules fromsys.modulesso the lazy class state is uninitialized again, exactly like the first SDK response in a freshly started process, then deserializes a valid response payload from 32 threads concurrently.Observed output (Python 3.14.0, Linux x86_64; reproduces in 19–20 of 20 trials):
azure_model_base_race_repro.pyExpected behavior
_deserialize(Model, response_json)always returns a fully deserialized model object, regardless of how many threads are deserializing concurrently — i.e. the lazy initialization inModel.__new__is thread-safe (e.g. double-checked locking around the_attr_to_rest_fieldcomputation). Independently of the race itself, a failure inside deserialization should arguably not be silently swallowed by the bareexcept Exceptionin_deserialize_default, since returning the raw wire dict turns an internal error into silent data corruption.Screenshots
N/A (full console output included above).
Additional context
azure-ai-vision-imageanalysis1.0.0,azure-ai-documentintelligence1.0.2,azure-core1.41.0. The repro setssys.setswitchinterval(1e-6)to widen the race window, but the race also triggers with the default switch interval — the tight setting just makes the repro deterministic.__new__is still present on currentmain, e.g.sdk/vision/azure-ai-vision-imageanalysis/.../_model_base.py#L510, so newly generated/released SDKs are affected as well. Since_model_base.pyis emitted by typespec-python, the proper fix is presumably in the emitter template, with regeneration of affected packages.Model.__new__with a class-level lock and an "already calculated" check (double-checked locking), so the__dict__iteration can never race with the_attr_to_rest_fieldassignment. I'm happy to submit a PR._model_base.pyappears to be autogenerated, I am not adding a PR. But locally, thisgit difffixes the issue; please note that this fix is generated by Claude (basically adding alockat the critical places; seems reasonable to me, but I am always careful when it comes to concurrency):git diff