diff --git a/src/datajoint/condition.py b/src/datajoint/condition.py index 55f095246..f5789348a 100644 --- a/src/datajoint/condition.py +++ b/src/datajoint/condition.py @@ -268,6 +268,20 @@ def assert_join_compatibility( lineage2 = expr2.heading[name].lineage # Semantic match requires both lineages to be non-None and equal if lineage1 is None or lineage2 is None or lineage1 != lineage2: + if lineage1 is None or lineage2 is None: + # Missing lineage usually means stale ~lineage rows that survived + # an upgrade or a partial declare. Decoration in 2.3+ refreshes + # lineage automatically, so this typically indicates a schema + # that has not been re-decorated since the upgrade. + raise DataJointError( + f"Cannot join on attribute `{name}`: lineage missing on " + f"one side ({lineage1} vs {lineage2}). This usually " + f"indicates a stale `~lineage` entry from an older " + f"DataJoint version or an incomplete declare. Run " + f"`schema.rebuild_lineage()` to recompute lineage from " + f"current FK definitions. If the lineages are genuinely " + f"different, use `.proj()` to rename one of the attributes." + ) raise DataJointError( f"Cannot join on attribute `{name}`: " f"different lineages ({lineage1} vs {lineage2}). " diff --git a/src/datajoint/schemas.py b/src/datajoint/schemas.py index ff1b0e234..fa934d569 100644 --- a/src/datajoint/schemas.py +++ b/src/datajoint/schemas.py @@ -303,6 +303,23 @@ def _decorate_table(self, table_class: type, context: dict[str, Any], assert_dec if not is_declared and not assert_declared and create_tables: instance.declare(context) self.connection.dependencies.clear() + elif is_declared and create_tables: + # Table already exists — declare() didn't run, so _populate_lineage + # didn't either. Scan the already-loaded heading for the symptom + # of stale/missing lineage rows (#1454): any PK attribute with + # lineage=None indicates the ~lineage table is missing rows for + # this table. Only then trigger a refresh — no extra DB queries + # on healthy schemas, automatic repair when the bug is present. + # + # Note: stale-but-non-None rows (DJ version skew that wrote a + # different string format) are not auto-detected here; users hit + # the tailored "rebuild_lineage" error message on first join. + try: + pk_lineages = [instance.heading[attr].lineage for attr in instance.primary_key] + except Exception: + pk_lineages = [] + if pk_lineages and any(lineage is None for lineage in pk_lineages): + instance._refresh_lineage(context) is_declared = is_declared or instance.is_declared # add table definition to the doc string diff --git a/src/datajoint/table.py b/src/datajoint/table.py index 7f8cbaf70..ea82cefec 100644 --- a/src/datajoint/table.py +++ b/src/datajoint/table.py @@ -262,6 +262,43 @@ def _populate_lineage(self, primary_key, fk_attribute_map): if entries: insert_lineages(self.connection, self.database, entries) + def _refresh_lineage(self, context=None): + """ + Re-derive ``~lineage`` rows from the current definition and overwrite them. + + Called by ``@schema`` decoration on every pass — including when the table + is already declared — so that stale rows from earlier DataJoint versions + or partial declares do not survive a redeclare. The actual deletion + + re-insertion happens in ``_populate_lineage``; this method just parses + the definition to obtain ``primary_key`` and ``fk_attribute_map`` without + executing any DDL. + + Errors during refresh (e.g. missing write permission on ``~lineage``) are + logged and swallowed; a stale row is preferable to a failed import. + """ + try: + ( + _, + _, + primary_key, + fk_attribute_map, + _, + _, + ) = declare( + self.full_table_name, + self.definition, + context, + self.connection.adapter, + config=self.connection._config, + ) + self._populate_lineage(primary_key, fk_attribute_map) + except Exception as exc: # noqa: BLE001 — defensive; see docstring + logger.warning( + f"Could not refresh lineage for {self.full_table_name}: {exc}. " + "If you encounter `different lineages` errors, run " + "`schema.rebuild_lineage()` to rebuild from current FK definitions." + ) + def alter(self, prompt=True, context=None): """ Alter the table definition from self.definition diff --git a/tests/integration/test_semantic_matching.py b/tests/integration/test_semantic_matching.py index d8dff27fa..3d468a2b7 100644 --- a/tests/integration/test_semantic_matching.py +++ b/tests/integration/test_semantic_matching.py @@ -340,3 +340,153 @@ def test_rebuild_lineage_populates_table(self, schema_semantic): # Check that lineages were populated for Student table lineages = get_table_lineages(schema_semantic.connection, schema_semantic.database, "student") assert "student_id" in lineages + + +class TestLineageRefreshOnDecoration: + """Tests for #1454: @schema decoration auto-heals missing ~lineage entries. + + Contract: when an already-declared table's heading reports any PK attribute + with lineage=None, decoration triggers a refresh. The check is in-memory + against the heading's already-loaded lineage values — no extra DB queries + on healthy schemas. Stale-but-non-None entries (e.g. DJ version skew) are + NOT auto-healed and require manual rebuild_lineage(). + """ + + def test_redecorate_restores_missing_lineage(self, schema_semantic): + """ + Delete a table's ~lineage rows entirely, then re-decorate — rows are + recreated. Primary auto-heal path: PK lineage=None triggers refresh. + """ + from datajoint.lineage import get_lineage, delete_table_lineages + from datajoint.heading import Heading + + delete_table_lineages(schema_semantic.connection, schema_semantic.database, "trial") + # Force heading reload so the deleted state is reflected in memory + old_heading = Trial._heading + Trial._heading = Heading(table_info=old_heading.table_info) + assert get_lineage(schema_semantic.connection, schema_semantic.database, "trial", "session_id") is None + + schema_semantic(Trial) + + refreshed = get_lineage(schema_semantic.connection, schema_semantic.database, "trial", "session_id") + assert refreshed is not None and "session" in refreshed.lower() + + def test_redecorate_heals_partial_lineage(self, schema_semantic): + """ + Mixed state: one row stale (non-None bogus), another missing. The in-memory + check fires on the missing row and the refresh fixes both. + """ + from datajoint.lineage import get_lineage, delete_table_lineages, insert_lineages + from datajoint.heading import Heading + + correct_student = get_lineage(schema_semantic.connection, schema_semantic.database, "enrollment", "student_id") + assert correct_student is not None + + # Wipe both rows, then re-insert ONLY student_id with a stale value. + # course_id is now missing → triggers auto-heal of all enrollment rows. + delete_table_lineages(schema_semantic.connection, schema_semantic.database, "enrollment") + insert_lineages( + schema_semantic.connection, + schema_semantic.database, + [("enrollment", "student_id", "stale_schema.stale_table.stale_attr")], + ) + old_heading = Enrollment._heading + Enrollment._heading = Heading(table_info=old_heading.table_info) + + schema_semantic(Enrollment) + + assert get_lineage(schema_semantic.connection, schema_semantic.database, "enrollment", "student_id") == correct_student + course_lineage = get_lineage(schema_semantic.connection, schema_semantic.database, "enrollment", "course_id") + assert course_lineage is not None and "course" in course_lineage.lower() + + def test_redecorate_skips_when_lineage_healthy(self, schema_semantic): + """ + Healthy schema: re-decoration must issue no DELETE/INSERT against ~lineage. + Verifies the zero-cost path — the in-memory check skips the refresh. + """ + from datajoint.lineage import get_table_lineages + + # Pre-condition: healthy lineage state + assert get_table_lineages(schema_semantic.connection, schema_semantic.database, "trial") + + # Intercept any ~lineage write + connection = schema_semantic.connection + original_query = connection.query + write_calls = [] + + def counting_query(sql, *args, **kwargs): + if "lineage" in sql.lower() and any(tok in sql.lower() for tok in ("delete", "insert")): + write_calls.append(sql) + return original_query(sql, *args, **kwargs) + + connection.query = counting_query + try: + schema_semantic(Trial) + finally: + connection.query = original_query + + assert not write_calls, ( + f"Healthy schema decoration must not write to ~lineage; " f"observed {len(write_calls)} write(s): {write_calls}" + ) + + def test_stale_non_none_lineage_not_auto_refreshed(self, schema_semantic): + """ + Stale-but-non-None lineage values are NOT auto-healed. Users with this + case must call dj.migrate.rebuild_lineage(schema) or schema.rebuild_lineage(). + Documents the limitation explicitly. + """ + from datajoint.lineage import ( + get_lineage, + delete_table_lineages, + insert_lineages, + get_table_lineages, + ) + from datajoint.heading import Heading + + # Replace ALL trial rows with non-None stale values — no None state. + original = get_table_lineages(schema_semantic.connection, schema_semantic.database, "trial") + delete_table_lineages(schema_semantic.connection, schema_semantic.database, "trial") + stale_entries = [("trial", attr, f"stale_schema.stale.{attr}") for attr in original] + insert_lineages(schema_semantic.connection, schema_semantic.database, stale_entries) + old_heading = Trial._heading + Trial._heading = Heading(table_info=old_heading.table_info) + + try: + schema_semantic(Trial) + still_stale = get_lineage(schema_semantic.connection, schema_semantic.database, "trial", "session_id") + assert still_stale == "stale_schema.stale.session_id", ( + f"Expected stale value to persist (no auto-heal for non-None stale); " f"got {still_stale!r}" + ) + + # Manual rebuild fixes it + schema_semantic.rebuild_lineage() + fixed = get_lineage(schema_semantic.connection, schema_semantic.database, "trial", "session_id") + assert fixed is not None and fixed != "stale_schema.stale.session_id" + finally: + schema_semantic.rebuild_lineage() + Trial._heading = Heading(table_info=old_heading.table_info) + + def test_missing_lineage_error_points_to_rebuild(self, schema_semantic): + """ + When a join fails because one side has None lineage, the error must + point the user at `schema.rebuild_lineage()`. + """ + from datajoint.lineage import delete_table_lineages + from datajoint.heading import Heading + + # Wipe enrollment.student_id lineage by deleting the row, then force the + # class-level heading to reload from DB so it reflects the missing row. + delete_table_lineages(schema_semantic.connection, schema_semantic.database, "enrollment") + old_heading = Enrollment._heading + Enrollment._heading = Heading(table_info=old_heading.table_info) + try: + assert Enrollment().heading["student_id"].lineage is None + + with pytest.raises(DataJointError) as exc_info: + Student() * Enrollment() + assert "rebuild_lineage" in str(exc_info.value), f"Error must mention rebuild_lineage(); got: {exc_info.value}" + assert "stale" in str(exc_info.value).lower() or "missing" in str(exc_info.value).lower() + finally: + # Restore lineage so subsequent tests see clean state + schema_semantic.rebuild_lineage() + Enrollment._heading = Heading(table_info=old_heading.table_info)