Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion specifyweb/backend/workbench/upload/column_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from specifyweb.specify.utils.uiformatters import ScopedFormatter

MatchBehavior = Literal["ignoreWhenBlank", "ignoreAlways", "ignoreNever"]
DisambiguationBehavior = Literal["ask", "pickFirst"]

# A single row in the workbench. Maps column names to values in the row
Row = dict[str, str]
Expand All @@ -14,9 +15,10 @@ class ColumnOptions(NamedTuple):
matchBehavior: MatchBehavior
nullAllowed: bool
default: str | None
disambiguationBehavior: DisambiguationBehavior

def to_json(self) -> dict | str:
if self.matchBehavior == "ignoreNever" and self.nullAllowed and self.default is None:
if self.matchBehavior == "ignoreNever" and self.nullAllowed and self.default is None and self.disambiguationBehavior == "ask":
return self.column

return dict(self._asdict())
Expand All @@ -26,6 +28,7 @@ class ExtendedColumnOptions(NamedTuple):
matchBehavior: MatchBehavior
nullAllowed: bool
default: str | None
disambiguationBehavior: DisambiguationBehavior
uiformatter: ScopedFormatter | None
schemaitem: Any
picklist: Any
Expand Down
29 changes: 16 additions & 13 deletions specifyweb/backend/workbench/upload/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from specifyweb.specify.datamodel import datamodel
from specifyweb.backend.workbench.upload.predicates import filter_match_key
from .column_options import ExtendedColumnOptions
from .column_options import DisambiguationBehavior, ExtendedColumnOptions
Comment thread
alesan99 marked this conversation as resolved.
Dismissed
from specifyweb.backend.workbench.upload.parse import parse_field, is_latlong, ParseSucess, ParseFailure

Row = dict[str, str]
Expand Down Expand Up @@ -38,17 +38,20 @@ class ParseResult(NamedTuple):
add_to_picklist: PicklistAddition | None
column: str
missing_required: str | None
disambiguation_behavior: DisambiguationBehavior

@classmethod
def from_parse_success(cls, ps: ParseSucess, filter_on: Filter, add_to_picklist: PicklistAddition | None, column: str, missing_required: str | None):
return cls(filter_on=filter_on, upload=ps.payload, add_to_picklist=add_to_picklist, column=column, missing_required=missing_required)
def from_parse_success(cls, ps: ParseSucess, filter_on: Filter, add_to_picklist: PicklistAddition | None, column: str,
missing_required: str | None, disambiguation_behavior: DisambiguationBehavior):
return cls(filter_on=filter_on, upload=ps.payload, add_to_picklist=add_to_picklist, column=column,
missing_required=missing_required, disambiguation_behavior=disambiguation_behavior)

def match_key(self) -> str:
return filter_match_key(self.filter_on)


def filter_and_upload(f: Filter, column: str) -> ParseResult:
return ParseResult(f, f, None, column, None)
def filter_and_upload(f: Filter, column: str, disambiguation_behavior: DisambiguationBehavior = 'ask') -> ParseResult:
return ParseResult(f, f, None, column, None, disambiguation_behavior)


def parse_many(tablename: str, mapping: dict[str, ExtendedColumnOptions], row: Row) -> tuple[list[ParseResult], list[WorkBenchParseFailure]]:
Expand Down Expand Up @@ -76,7 +79,7 @@ def parse_value(tablename: str, fieldname: str, value_in: str, colopts: Extended
None
)
result = ParseResult({fieldname: None}, {fieldname: None},
None, colopts.column, missing_required)
None, colopts.column, missing_required, colopts.disambiguationBehavior)
else:
result = _parse(tablename, fieldname,
colopts, colopts.default)
Expand Down Expand Up @@ -105,7 +108,7 @@ def _parse(tablename: str, fieldname: str, colopts: ExtendedColumnOptions, value
field = table.get_field_strict(fieldname)

if colopts.picklist:
result = parse_with_picklist(colopts.picklist, fieldname, value, colopts.column)
result = parse_with_picklist(colopts.picklist, fieldname, value, colopts.column, colopts.disambiguationBehavior,)
if result is not None:
if isinstance(result, ParseResult) and hasattr(field, 'length') and field.length is not None and len(result.upload[fieldname]) > field.length:
return WorkBenchParseFailure(
Expand All @@ -123,19 +126,19 @@ def _parse(tablename: str, fieldname: str, colopts: ExtendedColumnOptions, value
if is_latlong(table, field) and isinstance(parsed, ParseSucess):
coord_text_field = field.name.replace('itude', '') + 'text' if field.name else ''
filter_on = {coord_text_field: parsed.payload[coord_text_field]}
return ParseResult.from_parse_success(parsed, filter_on, None, colopts.column, None)
return ParseResult.from_parse_success(parsed, filter_on, None, colopts.column, None, colopts.disambiguationBehavior)

if isinstance(parsed, ParseFailure):
return WorkBenchParseFailure.from_parse_failure(parsed, colopts.column)
else:
return ParseResult.from_parse_success(parsed, parsed.payload, None, colopts.column, None)
return ParseResult.from_parse_success(parsed, parsed.payload, None, colopts.column, None, colopts.disambiguationBehavior)


def parse_with_picklist(picklist, fieldname: str, value: str, column: str) -> ParseResult | WorkBenchParseFailure | None:
def parse_with_picklist(picklist, fieldname: str, value: str, column: str, disambiguation_behavior: DisambiguationBehavior) -> ParseResult | WorkBenchParseFailure | None:
if picklist.type == 0: # items from picklistitems table
try:
item = picklist.picklistitems.get(title=value)
return filter_and_upload({fieldname: item.value}, column)
return filter_and_upload({fieldname: item.value}, column, disambiguation_behavior)
except ObjectDoesNotExist:
if picklist.readonly:
return WorkBenchParseFailure(
Expand All @@ -144,11 +147,11 @@ def parse_with_picklist(picklist, fieldname: str, value: str, column: str) -> Pa
column
)
else:
return filter_and_upload({fieldname: value}, column)._replace(
return filter_and_upload({fieldname: value}, column, disambiguation_behavior)._replace(
add_to_picklist=PicklistAddition(
picklist=picklist, column=column, value=value)
)
return filter_and_upload({fieldname: value})
return filter_and_upload({fieldname: value}, column, disambiguation_behavior)
Comment thread
alesan99 marked this conversation as resolved.
Dismissed

elif picklist.type == 1: # items from rows in some table
# we ignore this type of picklist because it is primarily used to choose many-to-one's on forms
Expand Down
1 change: 1 addition & 0 deletions specifyweb/backend/workbench/upload/scoping.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def extend_columnoptions(
matchBehavior=colopts.matchBehavior,
nullAllowed=colopts.nullAllowed,
default=colopts.default,
disambiguationBehavior=colopts.disambiguationBehavior,
schemaitem=schemaitem,
# Formatters are "scoped" here, that is, all they need is a value coming directly from the row.
uiformatter=(None if scoped_formatter is None else CustomRepr(scoped_formatter, friendly_repr)),
Expand Down
93 changes: 77 additions & 16 deletions specifyweb/backend/workbench/upload/tests/testparsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def test_tree_cols_with_ignoreWhenBlank(self) -> None:
ranks=dict(
Genus=dict(name=parse_column_options('Genus')),
Species=dict(name=parse_column_options('Species'),
author=ColumnOptions(column='Species Author', matchBehavior="ignoreWhenBlank", nullAllowed=True, default=None))
author=ColumnOptions(column='Species Author', matchBehavior="ignoreWhenBlank", nullAllowed=True, default=None, disambiguationBehavior="ask"))
)
)
data = [
Expand All @@ -466,7 +466,7 @@ def test_higher_tree_cols_with_ignoreWhenBlank(self) -> None:
ranks=dict(
Genus=dict(name=parse_column_options('Genus')),
Species=dict(name=parse_column_options('Species'),
author=ColumnOptions(column='Species Author', matchBehavior="ignoreWhenBlank", nullAllowed=True, default=None)),
author=ColumnOptions(column='Species Author', matchBehavior="ignoreWhenBlank", nullAllowed=True, default=None, disambiguationBehavior="ask")),
Subspecies=dict(name=parse_column_options('Subspecies')),
)
)
Expand All @@ -488,7 +488,7 @@ def test_tree_cols_with_ignoreNever(self) -> None:
ranks=dict(
Genus=dict(name=parse_column_options('Genus')),
Species=dict(name=parse_column_options('Species'),
author=ColumnOptions(column='Species Author', matchBehavior="ignoreNever", nullAllowed=True, default=None))
author=ColumnOptions(column='Species Author', matchBehavior="ignoreNever", nullAllowed=True, default=None, disambiguationBehavior="ask"))
)
)
data = [
Expand All @@ -508,7 +508,7 @@ def test_tree_cols_with_required(self) -> None:
ranks=dict(
Genus=dict(name=parse_column_options('Genus')),
Species=dict(name=parse_column_options('Species'),
author=ColumnOptions(column='Species Author', matchBehavior="ignoreNever", nullAllowed=False, default=None))
author=ColumnOptions(column='Species Author', matchBehavior="ignoreNever", nullAllowed=False, default=None, disambiguationBehavior="ask"))
)
)
data = [
Expand All @@ -530,7 +530,7 @@ def test_tree_cols_with_ignoreAlways(self) -> None:
ranks=dict(
Genus=dict(name=parse_column_options('Genus')),
Species=dict(name=parse_column_options('Species'),
author=ColumnOptions(column='Species Author', matchBehavior="ignoreAlways", nullAllowed=True, default=None))
author=ColumnOptions(column='Species Author', matchBehavior="ignoreAlways", nullAllowed=True, default=None, disambiguationBehavior="ask"))
)
)
data = [
Expand All @@ -551,7 +551,7 @@ def test_wbcols_with_ignoreWhenBlank(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreWhenBlank", nullAllowed=True, default=None),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreWhenBlank", nullAllowed=True, default=None, disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand All @@ -577,7 +577,7 @@ def test_wbcols_with_ignoreWhenBlank_and_default(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreWhenBlank", nullAllowed=True, default="John"),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreWhenBlank", nullAllowed=True, default="John", disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand Down Expand Up @@ -609,7 +609,7 @@ def test_wbcols_with_ignoreNever(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=True, default=None),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=True, default=None, disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand All @@ -634,7 +634,7 @@ def test_wbcols_with_ignoreAlways(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreAlways", nullAllowed=True, default=None),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreAlways", nullAllowed=True, default=None, disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand Down Expand Up @@ -662,7 +662,7 @@ def test_wbcols_with_default(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=True, default="John"),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=True, default="John", disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand Down Expand Up @@ -690,7 +690,7 @@ def test_wbcols_with_default_matching(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=True, default="John"),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=True, default="John", disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand Down Expand Up @@ -721,7 +721,7 @@ def test_wbcols_with_default_and_null_disallowed(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=False, default="John"),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=False, default="John", disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand Down Expand Up @@ -750,7 +750,7 @@ def test_wbcols_with_default_blank(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=False, default=""),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=False, default="", disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand Down Expand Up @@ -780,7 +780,7 @@ def test_wbcols_with_null_disallowed(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=False, default=None),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=False, default=None, disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand All @@ -805,7 +805,7 @@ def test_wbcols_with_null_disallowed_and_ignoreWhenBlank(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreWhenBlank", nullAllowed=False, default=None),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreWhenBlank", nullAllowed=False, default=None, disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand Down Expand Up @@ -834,7 +834,7 @@ def test_wbcols_with_null_disallowed_and_ignoreAlways(self) -> None:
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreAlways", nullAllowed=False, default=None),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreAlways", nullAllowed=False, default=None, disambiguationBehavior="ask"),
},
overrideScope=None,
static={},
Expand All @@ -857,3 +857,64 @@ def test_wbcols_with_null_disallowed_and_ignoreAlways(self) -> None:
self.assertIsInstance(results[2].record_result, Uploaded)
self.assertIsInstance(results[3].record_result, Matched)
self.assertIsInstance(results[4].record_result, Matched)

class DisambiguationBehaviorTests(UploadTestsBase):
def test_pickFirst_disambiguation_behavior(self) -> None:
# Upload some agents first
agent_plan = UploadTable(
name='Agent',
wbcols={
'firstname': parse_column_options('firstname'),
'lastname': parse_column_options('lastname'),
'email': parse_column_options('email'),
},
overrideScope=None,
static={},
toOne={},
toMany={}
)
agent_data = [
{'lastname': 'Doe', 'firstname': 'John', 'email': '0'},
{'lastname': 'Doe', 'firstname': 'John', 'email': '1'},
]

results = do_upload(self.collection, agent_data, agent_plan, self.agent.id)
for result in results:
validate([result.to_json()], upload_results_schema, cls=Draft7Validator)

self.assertIsInstance(results[0].record_result, Uploaded)
self.assertIsInstance(results[1].record_result, Uploaded)

# Try to add some Collection Objects with ambiguous catalogers
plan = UploadTable(
name='Collectionobject',
wbcols={
'catalognumber': parse_column_options('Cat #'),
},
overrideScope=None,
static={},
toOne={
'cataloger': UploadTable(
name='Agent',
wbcols={
'lastname': parse_column_options('lastname'),
'firstname': ColumnOptions(column='firstname', matchBehavior="ignoreNever", nullAllowed=True, default=None, disambiguationBehavior="pickFirst"),
},
overrideScope=None,
static={},
toOne={},
toMany={},
)
},
toMany={}
)
data = [
{'Cat #': '124', 'lastname': 'Doe', 'firstname': 'John'},
{'Cat #': '125', 'lastname': 'Doe', 'firstname': 'Jane'}
]
results = do_upload(self.collection, data, plan, self.agent.id)
for result in results:
validate([result.to_json()], upload_results_schema, cls=Draft7Validator)

self.assertIsInstance(results[0].toOne['cataloger'].record_result, Matched, "Record was not disambiguated automatically despite having disambiguationBehavior='pickFirst'.")
self.assertIsInstance(results[1].toOne['cataloger'].record_result, Uploaded)
6 changes: 4 additions & 2 deletions specifyweb/backend/workbench/upload/upload_attachments.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,17 @@ def add_attachments_to_plan(
column=f"_ATTACHMENT_ORDINAL_{index}",
matchBehavior="ignoreNever",
nullAllowed=True,
default="0"
default="0",
disambiguationBehavior="ask"
)
attackment_columns = {}
for field in attachment_fields_to_copy:
attackment_columns[field] = ColumnOptions(
column=f"_ATTACHMENT_{field.upper()}_{index}",
matchBehavior="ignoreNever",
nullAllowed=True,
default=attachment_field_default(field)
default=attachment_field_default(field),
disambiguationBehavior="ask"
)
attachment_uploadable = UploadTable(
name="Attachment",
Expand Down
Loading
Loading