diff --git a/datapackage.json b/datapackage.json index 26f32c4..5a2a4d9 100644 --- a/datapackage.json +++ b/datapackage.json @@ -17464,10 +17464,10 @@ } }, { - "name": "foer_besoegsdag_2", - "path": "resources/foer_besoegsdag_2/data.parquet", - "title": "foer_besoegsdag_2", - "description": "foer_besoegsdag_2", + "name": "foer_besoegsdag", + "path": "resources/foer_besoegsdag/data.parquet", + "title": "Before visit day workflow", + "description": "Workflow items completed before study visit days.", "schema": { "fields": [ { @@ -17499,36 +17499,29 @@ ] }, { - "name": "book_bloodsample_wfv2", - "title": "book_bloodsample_wfv2", - "type": "string", - "description": "Blood sample booking prior to visit. Workflow. Visit 2.", + "name": "visit_id", + "title": "Visit ID", + "type": "integer", + "description": "The study visit ID that the before-visit-day workflow item was recorded for.", "constraints": { "required": true, "enum": [ - "Ja", - "Nej" + 2, + 3, + 4 ] }, "categories": [ - "Ja", - "Nej" + 2, + 3, + 4 ] }, { - "name": "cgm_reader_wfv2", - "title": "cgm_reader_wfv2", - "type": "string", - "description": "Reader to be prepared for the visit. The same reader used to initialise the sensor at Visit 1. Wfv2. Derived using the formula: [besg_1__screening_arm_1][cgmreader_v1]", - "constraints": { - "required": true - } - }, - { - "name": "cgm_reader_ready_wfv2", - "title": "cgm_reader_ready_wfv2", + "name": "book_bloodsample", + "title": "book_bloodsample", "type": "string", - "description": "CGM reader ready. Workflow. Visit 2.", + "description": "Blood sample booking prior to visit. Workflow.", "constraints": { "required": true, "enum": [ @@ -17540,113 +17533,23 @@ "Ja", "Nej" ] - } - ], - "primaryKey": [ - "event" - ] - } - }, - { - "name": "foer_besoegsdag_3", - "path": "resources/foer_besoegsdag_3/data.parquet", - "title": "foer_besoegsdag_3", - "description": "foer_besoegsdag_3", - "schema": { - "fields": [ - { - "name": "event", - "title": "The unique name of the event", - "type": "string", - "description": "The unique name identifying the event when the form was filled in.", - "constraints": { - "required": true - } - }, - { - "name": "center", - "title": "Research center", - "type": "string", - "description": "The research center where the data item was recorded.", - "constraints": { - "required": true, - "enum": [ - "Copenhagen", - "Aarhus", - "Odense" - ] - }, - "categories": [ - "Copenhagen", - "Aarhus", - "Odense" - ] }, { - "name": "book_bloodsample_wfv3", - "title": "book_bloodsample_wfv3", - "type": "string", - "description": "Blood sample booking prior to visit. Workflow. Visit 3.", - "constraints": { - "required": true, - "enum": [ - "Ja", - "Nej" - ] - }, - "categories": [ - "Ja", - "Nej" - ] - } - ], - "primaryKey": [ - "event" - ] - } - }, - { - "name": "foer_besoegsdag_4", - "path": "resources/foer_besoegsdag_4/data.parquet", - "title": "foer_besoegsdag_4", - "description": "foer_besoegsdag_4", - "schema": { - "fields": [ - { - "name": "event", - "title": "The unique name of the event", + "name": "cgm_reader", + "title": "cgm_reader", "type": "string", - "description": "The unique name identifying the event when the form was filled in.", + "description": "Reader to be prepared for the visit. The same reader used to initialise the sensor at Visit 1. Derived using the formula: [besg_1__screening_arm_1][cgmreader_v1]", "constraints": { - "required": true + "required": false } }, { - "name": "center", - "title": "Research center", - "type": "string", - "description": "The research center where the data item was recorded.", - "constraints": { - "required": true, - "enum": [ - "Copenhagen", - "Aarhus", - "Odense" - ] - }, - "categories": [ - "Copenhagen", - "Aarhus", - "Odense" - ] - }, - { - "name": "book_bloodsample_wfv4", - "title": "book_bloodsample_wfv4", + "name": "cgm_reader_ready", + "title": "cgm_reader_ready", "type": "string", - "description": "Indicates if bloodsamples have been ordered for the participant. Workflow. Visit 4.", + "description": "CGM reader ready. Workflow.", "constraints": { - "required": true, + "required": false, "enum": [ "Ja", "Nej" @@ -17658,12 +17561,12 @@ ] }, { - "name": "medicin_print_wfv4", - "title": "medicin_print_wfv4", + "name": "medicin_print", + "title": "medicin_print", "type": "string", - "description": "Whether the participant's medication list has been printed in preparation for the visit. Workflow. Visit 4.", + "description": "Whether the participant's medication list has been printed in preparation for the visit. Workflow.", "constraints": { - "required": true, + "required": false, "enum": [ "Ja", "Nej" @@ -17676,7 +17579,8 @@ } ], "primaryKey": [ - "event" + "event", + "visit_id" ] } }, @@ -23835,17 +23739,17 @@ "name": "study_week", "title": "Study week", "type": "integer", - "description": "The study week when the SEFNC measurement was recorded. Must bein the range 0-52, where 0 is the baseline week and 52 represents52 weeks after the baseline week.", + "description": "The study week when the SEFNC measurement was recorded. Must be in the range 0-52, where 0 is the baseline week and 52 represents 52 weeks after the baseline week.", "constraints": { "required": true, "enum": [ - 1, + 0, 12, 52 ] }, "categories": [ - 1, + 0, 12, 52 ] diff --git a/scripts/redcap_dict_to_properties.py b/scripts/redcap_dict_to_properties.py index 82ed6a7..cbb2edd 100644 --- a/scripts/redcap_dict_to_properties.py +++ b/scripts/redcap_dict_to_properties.py @@ -13,9 +13,10 @@ VAS_TIMEPOINTS = [-10, 30, 60, 90, 120, 180, 240] VAS_TIME_FORM_PATTERN = re.compile(r"^vas_(minus10|(30|60|90|120|180|240)_?min)$") VAS_TIME_FIELD_PATTERN = re.compile(r"(_fasted)?_(minus10|30|60|90|120|180|240)min$") -SEFNC_WEEKS = [1, 12, 52] + +SEFNC_WEEKS = [0, 12, 52] SEFNC_FORM_WEEKS = { - "sefnc_baseline_v4": 1, + "sefnc_baseline_v4": 0, "sefnc_week12_v6": 12, "selfefficacy_for_nutrition_change_sefnc_week_52": 52, } @@ -27,6 +28,14 @@ } SEFNC_WEEK_FIELD_PATTERN = re.compile(r"_v(6|10)$") +FOER_BESOEGSDAG_VISITS = [2, 3, 4] +FOER_BESOEGSDAG_FORM_VISITS = { + "foer_besoegsdag_2": 2, + "foer_besoegsdag_3": 3, + "foer_besoegsdag_4": 4, +} +FOER_BESOEGSDAG_VISIT_FIELD_PATTERN = re.compile(r"_wfv(2|3|4)$") + def _map(x: Iterable[In], fn: Callable[[In], Out]) -> list[Out]: return list(map(fn, x)) @@ -53,6 +62,8 @@ def dictionary_to_properties( """Converts REDCap data dictionary to Data Package resources.""" redcap_fields = _join_sefnc_week_resources(redcap_fields) redcap_fields = _join_vas_time_resources(redcap_fields) + redcap_fields = _join_sefnc_week_resources(redcap_fields) + redcap_fields = _join_foer_besoegsdag_visit_resources(redcap_fields) sorted_by_form = sorted(redcap_fields, key=lambda field: field["form_name"]) grouped_by_form = groupby(sorted_by_form, key=lambda field: field["form_name"]) return _map( @@ -211,6 +222,113 @@ def _remove_sefnc_week_from_annotation(annotation: str) -> str: ).strip() +def _join_foer_besoegsdag_visit_resources( + redcap_fields: list[dict[str, str]], +) -> list[dict[str, str]]: + """Combines before-visit-day workflow forms into one resource schema.""" + common_fields = _get_common_foer_besoegsdag_field_names(redcap_fields) + return _deduplicate_foer_besoegsdag_fields( + _map( + redcap_fields, + lambda field: _normalise_foer_besoegsdag_visit_resource_field( + field, common_fields + ), + ) + ) + + +def _get_common_foer_besoegsdag_field_names( + redcap_fields: list[dict[str, str]], +) -> set[str]: + visit_fields = _map( + _filter(redcap_fields, _is_foer_besoegsdag_visit_resource_field), + lambda field: ( + _normalise_foer_besoegsdag_field_name(field["field_name"]), + FOER_BESOEGSDAG_FORM_VISITS[field["form_name"]], + ), + ) + field_visits = reduce(_add_foer_besoegsdag_field_visit, visit_fields, {}) + + return set( + _map( + _filter( + field_visits.items(), + lambda item: item[1] == set(FOER_BESOEGSDAG_VISITS), + ), + itemgetter(0), + ) + ) + + +def _add_foer_besoegsdag_field_visit( + field_visits: dict[str, set[int]], visit_field: tuple[str, int] +) -> dict[str, set[int]]: + field_name, visit = visit_field + return { + **field_visits, + field_name: field_visits.get(field_name, set()).union({visit}), + } + + +def _normalise_foer_besoegsdag_visit_resource_field( + field: dict[str, str], common_fields: set[str] +) -> dict[str, str]: + if not _is_foer_besoegsdag_visit_resource_field(field): + return field + + field_name = _normalise_foer_besoegsdag_field_name(field["field_name"]) + return { + **field, + "field_name": field_name, + "form_name": "foer_besoegsdag", + "required_field": field["required_field"] + if field_name in common_fields + else "", + "field_annotation": _remove_foer_besoegsdag_visit_from_annotation( + field["field_annotation"] + ), + } + + +def _normalise_foer_besoegsdag_field_name(field_name: str) -> str: + return FOER_BESOEGSDAG_VISIT_FIELD_PATTERN.sub("", field_name) + + +def _is_foer_besoegsdag_visit_resource_field(field: dict[str, str]) -> bool: + return field["form_name"] in FOER_BESOEGSDAG_FORM_VISITS + + +def _deduplicate_foer_besoegsdag_fields( + fields: list[dict[str, str]], +) -> list[dict[str, str]]: + deduplicated_fields, _ = reduce( + _append_if_new_foer_besoegsdag_field, + fields, + ([], set()), + ) + return deduplicated_fields + + +def _append_if_new_foer_besoegsdag_field( + result: tuple[list[dict[str, str]], set[str]], field: dict[str, str] +) -> tuple[list[dict[str, str]], set[str]]: + fields, seen_foer_besoegsdag_fields = result + field_name = field["field_name"] + + if field["form_name"] != "foer_besoegsdag": + return fields + [field], seen_foer_besoegsdag_fields + + if field_name in seen_foer_besoegsdag_fields: + return result + + return (fields + [field], seen_foer_besoegsdag_fields.union({field_name})) + + +def _remove_foer_besoegsdag_visit_from_annotation(annotation: str) -> str: + annotation = re.sub(r"\s+Visit\s+[234]\.$", "", annotation, flags=re.IGNORECASE) + return re.sub(r"\s+Wfv[234]\.$", "", annotation, flags=re.IGNORECASE).strip() + + def _form_to_resource( form_name: str, fields: list[dict[str, str]] ) -> sp.ResourceProperties: @@ -275,8 +393,8 @@ def _form_to_resource( title="Study week", type="integer", description=( - "The study week when the SEFNC measurement was recorded. Must be" - "in the range 0-52, where 0 is the baseline week and 52 represents" + "The study week when the SEFNC measurement was recorded. Must be " + "in the range 0-52, where 0 is the baseline week and 52 represents " "52 weeks after the baseline week." ), categories=SEFNC_WEEKS, @@ -288,6 +406,25 @@ def _form_to_resource( default_fields.append(visit_field) default_fields.append(week_field) primary_key.append("visit_id") + + + if form_name == "foer_besoegsdag": + visit_field = sp.FieldProperties( + name="visit_id", + title="Visit ID", + type="integer", + description=( + "The study visit ID that the before-visit-day workflow item was " + "recorded for." + ), + categories=FOER_BESOEGSDAG_VISITS, + constraints=sp.ConstraintsProperties( + required=True, + enum=FOER_BESOEGSDAG_VISITS, + ), + ) + default_fields.append(visit_field) + primary_key.append("visit_id") # Discard fields displayed for information only form_redcap_fields = _filter( @@ -338,6 +475,9 @@ def _get_resource_title(form_name: str) -> str: if form_name == "sefnc": return "Self-efficacy for nutrition change" + if form_name == "foer_besoegsdag": + return "Before visit day workflow" + return form_name @@ -354,6 +494,9 @@ def _get_resource_description(form_name: str) -> str: "self-reported by participants during the study across the weeks." ) + if form_name == "foer_besoegsdag": + return "Workflow items completed before study visit days." + return form_name