From dce45048a85372394acdff930b4359e76b9e9109 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 21 Apr 2026 12:27:21 +0200 Subject: [PATCH 1/9] feat: add ds.standardiseDf --- DESCRIPTION | 4 +- R/ds.standardiseDf.R | 603 +++++++++++++++++ tests/testthat/_snaps/smk-standardiseDf.md | 86 +++ tests/testthat/helpers.R | 218 +++++++ tests/testthat/test-smk-standardiseDf.R | 712 +++++++++++++++++++++ 5 files changed, 1622 insertions(+), 1 deletion(-) create mode 100644 R/ds.standardiseDf.R create mode 100644 tests/testthat/_snaps/smk-standardiseDf.md create mode 100644 tests/testthat/helpers.R create mode 100644 tests/testthat/test-smk-standardiseDf.R diff --git a/DESCRIPTION b/DESCRIPTION index e5e72e97..6876d043 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -69,7 +69,9 @@ Imports: gridExtra, data.table, methods, - dplyr + dplyr, + assertthat, + cli Suggests: lme4, httr, diff --git a/R/ds.standardiseDf.R b/R/ds.standardiseDf.R new file mode 100644 index 00000000..daaadbd6 --- /dev/null +++ b/R/ds.standardiseDf.R @@ -0,0 +1,603 @@ +#' Fill DataFrame with Missing Columns and Adjust Classes +#' +#' This function fills a given DataFrame by adding missing columns, ensuring consistent column classes, and adjusting factor levels where necessary. +#' It performs checks to detect class and factor level conflicts and prompts the user for decisions to resolve these conflicts. +#' +#' @param df.name Name of the input DataFrame to fill. +#' @param newobj Name of the new DataFrame object created after filling. +#' @param fix_class Character, determines behaviour if class of variables is not the same in all +#' studies. Option "ask" (default) provides the user with a prompt asking if they want to set the +#' class across all studies, option "no" will throw an error if class conflicts are present. +#' @param fix_levels Character, determines behaviour if levels of factor variables is not the same +#' in all studies. Option "ask" (default) provides the user with a prompt asking if they want to set +#' the levels of factor variables to be the same across all studies, whilst option "no" will throw +#' an error if factor variables do not have the same class. +#' @param datasources Data sources from which to aggregate data. Default is `NULL`. +#' @importFrom assertthat assert_that +#' @importFrom DSI datashield.aggregate datashield.assign +#' @return The filled DataFrame with added columns and adjusted classes or factor levels. +#' @export +ds.standardiseDf <- function(df.name = NULL, newobj = NULL, fix_class = "ask", fix_levels = "ask", + datasources = NULL) { + fill_warnings <- list() + + .check_arguments(df.name, newobj, fix_class, fix_levels) + + if(is.null(datasources)){ + datasources <- datashield.connections_find() + } + + col_names <- datashield.aggregate(datasources, call("colnamesDS", df.name)) + .stop_if_cols_identical(col_names) + + var_classes <- .get_var_classes(df.name, datasources) + class_conflicts <- .identify_class_conflicts(var_classes) + + datashield.assign(datasources, newobj, as.symbol(df.name)) + + if (length(class_conflicts) > 0 & fix_class == "no") { + DSI::datashield.aggregate(datasources, call("rmDS", newobj)) + cli_abort("Variables do not have the same class in all studies and `fix_class` is 'no'") + } else if (length(class_conflicts) > 0 & fix_class == "ask") { + class_decisions <- prompt_user_class_decision_all_vars( + names(class_conflicts), + var_classes$server, + dplyr::select(var_classes, all_of(names(class_conflicts))), + newobj, + datasources + ) + + withCallingHandlers({ + .fix_classes(newobj, names(class_conflicts), class_decisions, newobj, datasources) + }, warning = function(w) { + fill_warnings <<- c(fill_warnings, conditionMessage(w)) # Append warning to the list + invokeRestart("muffleWarning") # Suppress immediate display of the warning + }) + } + + unique_cols <- .get_unique_cols(col_names) + .add_missing_cols_to_df(newobj, unique_cols, newobj, datasources) + new_names <- datashield.aggregate(datasources, call("colnamesDS", newobj)) + added_cols <- .get_added_cols(col_names, new_names) + + new_classes <- .get_var_classes(newobj, datasources) + factor_vars <- .identify_factor_vars(new_classes) + factor_levels <- .get_factor_levels(newobj, factor_vars, datasources) + level_conflicts <- .identify_level_conflicts(factor_levels) + + if (length(level_conflicts) > 0 & fix_levels == "no") { + DSI::datashield.aggregate(datasources, call("rmDS", newobj)) + cli_abort("Factor variables do not have the same levels in all studies and `fix_levels` is 'no'") + } else if (length(level_conflicts) > 0 & fix_levels == "ask") { + levels_decision <- ask_question_wait_response_levels(level_conflicts, newobj, datasources) + } + + if (levels_decision == "1") { + unique_levels <- .get_unique_levels(factor_levels, level_conflicts) + .set_factor_levels(newobj, unique_levels, datasources) + } + + .print_out_messages(added_cols, class_decisions, names(class_conflicts), unique_levels, + level_conflicts, levels_decision, newobj) + + .handle_warnings(fill_warnings) + .print_class_warning(class_conflicts, fix_class, class_decisions) +} + +#' Check Function Arguments for Validity +#' +#' This function validates the arguments provided to ensure they meet specified conditions. +#' It checks that the `fix_class` and `fix_levels` arguments are set to accepted values +#' and that `df.name` and `newobj` are character strings. +#' +#' @param df.name A character string representing the name of the data frame. +#' @param newobj A character string representing the name of the new object to be created. +#' @param fix_class A character string indicating the method for handling class issues. +#' Must be either `"ask"` or `"no"`. +#' @param fix_levels A character string indicating the method for handling level issues. +#' Must be either `"ask"` or `"no"`. +#' @return NULL. This function is used for validation and does not return a value. +#' @importFrom assertthat assert_that +#' @noRd +.check_arguments <- function(df.name, newobj, fix_class, fix_levels) { + assert_that(fix_class %in% c("ask", "no")) + assert_that(fix_levels %in% c("ask", "no")) + assert_that(is.character(df.name)) + assert_that(is.character(newobj)) +} + +#' Stop If Columns Are Identical +#' +#' Checks if the columns in the data frames are identical and throws an error if they are. +#' +#' @param col_names A list of column names from different data sources. +#' @return None. Throws an error if columns are identical. +#' @importFrom cli cli_abort +#' @noRd +.stop_if_cols_identical <- function(col_names) { + are_identical <- all(sapply(col_names, identical, col_names[[1]])) + if (are_identical) { + cli_abort("Columns are identical in all data frames: nothing to fill") + } +} + +#' Get Variable Classes from DataFrame +#' +#' Retrieves the class of each variable in the specified DataFrame from different data sources. +#' +#' @param df.name Name of the input DataFrame. +#' @param datasources Data sources from which to aggregate data. +#' @return A DataFrame containing the variable classes from each data source. +#' @import dplyr +#' @noRd +.get_var_classes <- function(df.name, datasources) { + cally <- call("getClassAllColsDS", df.name) + classes <- datashield.aggregate(datasources, cally) %>% + bind_rows(.id = "server") + return(classes) +} + +#' Identify Class Conflicts +#' +#' Identifies conflicts in variable classes across different data sources. +#' +#' @param classes A DataFrame containing variable classes across data sources. +#' @return A list of variables that have class conflicts. +#' @import dplyr +#' @importFrom purrr map +#' @noRd +.identify_class_conflicts <- function(classes) { + server <- NULL + different_class <- classes |> + dplyr::select(-server) |> + map(~ unique(na.omit(.))) + + out <- different_class[which(different_class %>% map(length) > 1)] + return(out) +} + +#' Prompt User for Class Decision for All Variables +#' +#' Prompts the user to resolve class conflicts for all variables. +#' +#' @param vars A vector of variable names with class conflicts. +#' @param all_servers The names of all servers. +#' @param all_classes The classes of the variables across servers. +#' @return A vector of decisions for each variable's class. +#' @noRd +prompt_user_class_decision_all_vars <- function(vars, all_servers, all_classes, newobj, datasources) { + decisions <- c() + for (i in 1:length(vars)) { + decisions[i] <- prompt_user_class_decision(vars[i], all_servers, all_classes[[i]], newobj, datasources) + } + return(decisions) +} + +#' Prompt User for Class Decision for a Single Variable +#' +#' Prompts the user to resolve a class conflict for a single variable. +#' +#' @param var The variable name with a class conflict. +#' @param all_servers The names of all servers. +#' @param all_classes The classes of the variable across servers. +#' @importFrom cli cli_alert_warning cli_alert_danger +#' @return A decision for the variable's class. +#' @noRd +prompt_user_class_decision <- function(var, servers, classes, newobj, datasources) { + cli_alert_warning("`ds.dataFrameFill` requires that all columns have the same class.") + cli_alert_danger("Column {.strong {var}} has following classes:") + print_all_classes(servers, classes) + cli_text("") + return(ask_question_wait_response_class(var, newobj, datasources)) +} + +#' Print All Server-Class Pairs +#' +#' This function prints out a list of server names along with their corresponding +#' class types. It formats the output with a bullet-point list using the `cli` package. +#' +#' @param all_servers A character vector containing the names of servers. +#' @param all_classes A character vector containing the class types corresponding +#' to each server. +#' @return This function does not return a value. It prints the server-class pairs +#' to the console as a bulleted list. +#' @importFrom cli cli_ul cli_li cli_end +#' @noRd +print_all_classes <- function(all_servers, all_classes) { + combined <- paste(all_servers, all_classes, sep = ": ") + cli_ul() + for (i in 1:length(combined)) { + cli_li("{combined[i]}") + } + cli_end() +} + +#' Ask Question and Wait for Class Response +#' +#' Prompts the user with a question and waits for a response related to class decisions. +#' +#' @param question The question to ask the user. +#' @return The user's decision. +#' @importFrom cli cli_text cli_alert_warning cli_abort +#' @noRd +ask_question_wait_response_class <- function(var, newobj, datasources) { + readline <- NULL + ask_question_class(var) + answer <- readline() + if (answer == "6") { + DSI::datashield.aggregate(datasources, call("rmDS", newobj)) + cli_abort("Aborted `ds.dataFrameFill`", .call = NULL) + } else if (!answer %in% as.character(1:5)) { + cli_text("") + cli_alert_warning("Invalid input. Please try again.") + cli_text("") + ask_question_wait_response_class(var, newobj, datasources) + } else { + return(answer) + } +} + +#' Prompt User for Class Conversion Options +#' +#' This function prompts the user with options to convert a variable to a specific class (e.g., factor, integer, numeric, character, or logical). +#' The function provides a list of class conversion options for the specified variable and includes an option to cancel the operation. +#' +#' @param var The name of the variable for which the user is prompted to select a class conversion option. +#' +#' @importFrom cli cli_alert_info cli_ol +#' @return None. This function is used for prompting the user and does not return a value. +#' @examples +#' ask_question("variable_name") +#' @noRd +ask_question_class <- function(var) { + cli_alert_info("Would you like to:") + class_options <- c("a factor", "an integer", "numeric", "a character", "a logical vector") + class_message <- paste0("Convert `{var}` to ", class_options, " in all studies") + cli_ol( + c(class_message, "Cancel `ds.dataFrameFill` operation") + ) +} + +#' Fix Variable Classes +#' +#' Applies the user's class decisions to fix the classes of variables across different data sources. +#' +#' @param df.name The name of the DataFrame. +#' @param different_classes A list of variables with class conflicts. +#' @param class_decisions The decisions made by the user. +#' @param newobj The name of the new DataFrame. +#' @param datasources Data sources from which to aggregate data. +#' @return None. Updates the DataFrame with consistent variable classes. +#' @noRd +.fix_classes <- function(df.name, different_classes, class_decisions, newobj, datasources) { + cally <- call("fixClassDS", df.name, different_classes, class_decisions) + datashield.assign(datasources, newobj, cally) +} + +#' Get Unique Columns from Data Sources +#' +#' Retrieves all unique columns from the data sources. +#' +#' @param col_names A list of column names. +#' @return A vector of unique column names. +#' @noRd +.get_unique_cols <- function(col_names) { + return( + unique( + unlist(col_names) + ) + ) +} + +#' Add Missing Columns to DataFrame +#' +#' Adds any missing columns to the DataFrame to ensure all columns are present across data sources. +#' +#' @param df.name The name of the DataFrame. +#' @param unique_cols A vector of unique column names. +#' @param newobj The name of the new DataFrame. +#' @param datasources Data sources from which to aggregate data. +#' @return None. Updates the DataFrame with added columns. +#' @noRd +.add_missing_cols_to_df <- function(df.name, cols_to_add_if_missing, newobj, datasources) { + cally <- call("fixColsDS", df.name, cols_to_add_if_missing) + datashield.assign(datasources, newobj, cally) +} + +#' Get Added Columns +#' +#' Compares the old and new column names and identifies newly added columns. +#' +#' @param old_names A list of old column names. +#' @param new_names A list of new column names. +#' @importFrom purrr pmap +#' @return A list of added column names. +#' @noRd +.get_added_cols <- function(old_names, new_names) { + list(old_names, new_names) %>% + pmap(function(.x, .y) { + .y[!.y %in% .x] + }) +} + +#' Identify Factor Variables +#' +#' Identifies which variables are factors in the DataFrame. +#' +#' @param var_classes A DataFrame containing variable classes. +#' @return A vector of factor variables. +#' @noRd +.identify_factor_vars <- function(var_classes) { + return( + var_classes %>% + dplyr::filter(row_number() == 1) %>% + dplyr::select(where(~ . == "factor")) + ) +} + +#' Get Factor Levels from Data Sources +#' +#' Retrieves the levels of factor variables from different data sources. +#' +#' @param factor_vars A vector of factor variables. +#' @param newobj The name of the new DataFrame. +#' @param datasources Data sources from which to aggregate data. +#' @return A list of factor levels. +#' @noRd +.get_factor_levels <- function(df, factor_vars, datasources) { + factor_vars <- paste(names(factor_vars), collapse = ",") + cally <- call("getAllLevelsDS", df, factor_vars) + return(datashield.aggregate(datasources, cally)) +} + +#' Identify Factor Level Conflicts +#' +#' Identifies conflicts in factor levels across different data sources. +#' +#' @param factor_levels A list of factor levels. +#' @return A list of variables with level conflicts. +#' @importFrom purrr map_lgl pmap_lgl +#' @noRd +.identify_level_conflicts <- function(factor_levels) { + levels <- factor_levels %>% + pmap_lgl(function(...) { + args <- list(...) + !all(map_lgl(args[-1], ~ identical(.x, args[[1]]))) + }) + + return(names(levels[levels == TRUE])) +} + +#' Ask Question and Wait for Response on Factor Levels +#' +#' Prompts the user with options for resolving factor level conflicts and waits for a response. +#' +#' @param level_conflicts A list of variables with factor level conflicts. +#' @return The user's decision. +#' @noRd +ask_question_wait_response_levels <- function(level_conflicts, newobj, datasources) { + .make_levels_message(level_conflicts) + answer <- readline() + if (answer == "3") { + DSI::datashield.aggregate(datasources, call("rmDS", newobj)) + cli_abort("Aborted `ds.dataFrameFill`", .call = NULL) + } else if (!answer %in% as.character(1:2)) { + cli_alert_warning("Invalid input. Please try again.") + cli_alert_info("") + .make_levels_message(level_conflicts) + return(ask_question_wait_response_levels(level_conflicts, newobj, datasources)) + } else { + return(answer) + } +} + +#' Make Factor Level Conflict Message +#' +#' Creates a message to alert the user about factor level conflicts and prompt for action. +#' +#' @param level_conflicts A list of variables with factor level conflicts. +#' @importFrom cli cli_alert_warning cli_alert_info cli_ol +#' @return None. Prints the message to the console. +#' @noRd +.make_levels_message <- function(level_conflicts) { + cli_alert_warning("Warning: factor variables {level_conflicts} do not have the same levels in all studies") + cli_alert_info("Would you like to:") + cli_ol(c("Create the missing levels where they are not present", "Do nothing", "Cancel `ds.dataFrameFill` operation")) +} + +#' Get Unique Factor Levels +#' +#' Retrieves the unique factor levels for variables with conflicts. +#' +#' @param factor_levels A list of factor levels. +#' @param level_conflicts A list of variables with level conflicts. +#' @importFrom purrr pmap +#' @return A list of unique factor levels. +#' @noRd +.get_unique_levels <- function(factor_levels, level_conflicts) { + unique_levels <- factor_levels %>% + map(~ .[level_conflicts]) %>% + pmap(function(...) { + as.character(c(...)) + }) %>% + map(~ unique(.)) + return(unique_levels) +} + +#' Set Factor Levels in DataFrame +#' +#' Applies the unique factor levels to the DataFrame. +#' +#' @param newobj The name of the new DataFrame. +#' @param unique_levels A list of unique factor levels. +#' @param datasources Data sources from which to aggregate data. +#' @return None. Updates the DataFrame with the new factor levels. +#' @noRd +.set_factor_levels <- function(df, unique_levels, datasources) { + cally <- call("fixLevelsDS", df, names(unique_levels), unique_levels) + datashield.assign(datasources, df, cally) +} + +#' Print Out Summary Messages +#' +#' Prints summary messages regarding the filled DataFrame, including added columns, class decisions, and factor level adjustments. +#' +#' @param added_cols A list of added columns. +#' @param class_decisions A vector of class decisions. +#' @param different_classes A list of variables with class conflicts. +#' @param unique_levels A list of unique factor levels. +#' @param level_conflicts A list of variables with level conflicts. +#' @param levels_decision The decision made regarding factor levels. +#' @param newobj The name of the new DataFrame. +#' @importFrom cli cli_text +#' @return None. Prints messages to the console. +#' @noRd +.print_out_messages <- function(added_cols, class_decisions, different_classes, unique_levels, + level_conflicts, levels_decision, newobj) { + .print_var_recode_message(added_cols, newobj) + + if (length(different_classes) > 0) { + .print_class_recode_message(class_decisions, different_classes, newobj) + cli_text("") + } + + if (length(level_conflicts) > 0 & levels_decision == "1") { + .print_levels_recode_message(unique_levels, newobj) + } +} + +#' Print Variable Recode Message +#' +#' Prints a message summarizing the columns that were added to the DataFrame. +#' +#' @param added_cols A list of added columns. +#' @param newobj The name of the new DataFrame. +#' @importFrom cli cli_text +#' @return None. Prints the message to the console. +#' @noRd +.print_var_recode_message <- function(added_cols, newobj) { + cli_alert_success("The following variables have been added to {newobj}:") + added_cols_neat <- added_cols %>% map(~ ifelse(length(.) == 0, "", .)) + var_message <- paste0(names(added_cols), " --> ", added_cols_neat) + for (i in 1:length(var_message)) { + cli_alert_info("{var_message[[i]]}") + } + cli_text("") +} + +#' Print Class Recode Message +#' +#' Prints a message summarizing the class decisions that were made for variables with conflicts. +#' +#' @param class_decisions A vector of class decisions. +#' @param different_classes A list of variables with class conflicts. +#' @param newobj The name of the new DataFrame. +#' @importFrom cli cli_alert_info cli_alert_success +#' @return None. Prints the message to the console. +#' @noRd +.print_class_recode_message <- function(class_decisions, different_classes, newobj) { + choice_neat <- .change_choice_to_string(class_decisions) + class_message <- paste0(different_classes, " --> ", choice_neat) + cli_alert_success("The following classes have been set for all datasources in {newobj}: ") + for (i in 1:length(class_message)) { + cli_alert_info("{class_message[[i]]}") + } +} + +#' Convert Class Decision Code to String +#' +#' This function converts a numeric class decision input (represented as a string) +#' into the corresponding class type string (e.g., "factor", "integer", "numeric", etc.). +#' @param class_decision A string representing the class decision. It should be +#' one of the following values: "1", "2", "3", "4", or "5". +#' @return A string representing the class type corresponding to the input: +#' "factor", "integer", "numeric", "character", or "logical". +#' @noRd +.change_choice_to_string <- function(class_decision) { + case_when( + class_decision == "1" ~ "factor", + class_decision == "2" ~ "integer", + class_decision == "3" ~ "numeric", + class_decision == "4" ~ "character", + class_decision == "5" ~ "logical" + ) +} + +#' Print Factor Levels Recode Message +#' +#' Prints a message summarizing the factor level decisions that were made for variables with conflicts. +#' +#' @param unique_levels A list of unique factor levels. +#' @param newobj The name of the new DataFrame. +#' @importFrom cli cli_alert_success cli_alert_info +#' @return None. Prints the message to the console. +#' @noRd +.print_levels_recode_message <- function(unique_levels, newobj) { + levels_message <- .make_levels_recode_message(unique_levels) + cli_alert_success("The following levels have been set for all datasources in {newobj}: ") + for (i in 1:length(levels_message)) { + cli_alert_info("{levels_message[[i]]}") + } +} + +#' Make Levels Recode Message +#' +#' Creates a message to alert the user about factor level recoding. +#' +#' @param unique_levels A list of unique factor levels. +#' @return A formatted string summarizing the level recoding. +#' @importFrom purrr pmap +#' @noRd +.make_levels_recode_message <- function(unique_levels) { + return( + list(names(unique_levels), unique_levels) %>% + pmap(function(.x, .y) { + paste0(.x, " --> ", paste0(.y, collapse = ", ")) + }) + ) +} + +#' Handle Warnings for Class Conversion Issues +#' +#' This function iterates through a list of warnings generated during class conversion and +#' triggers a danger alert if any warnings indicate that the conversion has resulted in `NA` values. +#' +#' @param fill_warnings A list or vector of warning messages generated during class conversion. +#' If any warnings indicate that `NA` values were introduced, a danger alert will be displayed. +#' @return NULL. This function is used for its side effects of printing alerts. +#' @importFrom cli cli_alert_danger +#' @noRd +.handle_warnings <- function(fill_warnings) { + if (length(fill_warnings) > 0) { + for (i in seq_along(fill_warnings)) { + if (grepl("NAs introduced by coercion", fill_warnings[[i]])) { + cli_alert_danger("Class conversion resulted in the creation of NA values.") + } else { + cli_alert_danger(fill_warnings[[i]]) + } + } + } +} + +#' Print Warning for Class Conflicts in Data Conversion +#' +#' This function displays a warning when there are class conflicts in a dataset that may have resulted +#' from incompatible class changes during data conversion. It alerts users to verify column classes, +#' as incompatible changes could corrupt the data. +#' +#' @param class_conflicts A list or vector of conflicting classes identified during conversion. +#' @param fix_class A string indicating the user's choice for fixing class conflicts. Typically, +#' this is "ask" if the user is prompted to confirm class changes. +#' @param class_decisions A vector of decisions made for class conversions. When any value is not +#' "6", it indicates unresolved class conflicts. +#' @return NULL. This function is used for its side effects of printing alerts. +#' @importFrom cli cli_alert_warning +#' @noRd +.print_class_warning <- function(class_conflicts, fix_class, class_decisions) { + if(length(class_conflicts) > 0 & fix_class == "ask" & all(!class_decisions == "6")) { + cli_alert_warning("Please check all columns that have changed class. Not all class changes + are compatible with all data types, so this could have corrupted the data.") + } +} + +readline <- NULL diff --git a/tests/testthat/_snaps/smk-standardiseDf.md b/tests/testthat/_snaps/smk-standardiseDf.md new file mode 100644 index 00000000..4fa52eca --- /dev/null +++ b/tests/testthat/_snaps/smk-standardiseDf.md @@ -0,0 +1,86 @@ +# ask_question displays the correct prompt + + Code + ask_question_class("my_var") + Message + i Would you like to: + 1. Convert `my_var` to a factor in all studies + 2. Convert `my_var` to an integer in all studies + 3. Convert `my_var` to numeric in all studies + 4. Convert `my_var` to a character in all studies + 5. Convert `my_var` to a logical vector in all studies + 6. Cancel `ds.dataFrameFill` operation + +# print_all_classes prints the correct message + + Code + print_all_classes(c("server_1", "server_2", "server_3"), c("numeric", "factor", + "integer")) + Message + * server_1: numeric + * server_2: factor + * server_3: integer + +# .make_levels_message makes correct message + + Code + .make_levels_message(level_conflicts) + Message + ! Warning: factor variables fac_col2, fac_col3, fac_col6, and fac_col9 do not have the same levels in all studies + i Would you like to: + 1. Create the missing levels where they are not present + 2. Do nothing + 3. Cancel `ds.dataFrameFill` operation + +# .print_var_recode_message prints the correct message + + Code + .print_var_recode_message(added_cols, "test_df") + Message + v The following variables have been added to test_df: + i sim1 --> col11 + i sim2 --> col11 + i sim3 --> col12 + + +# .print_class_recode_message prints the correct message + + Code + .print_class_recode_message(class_decisions, different_classes, "test_df") + Message + v The following classes have been set for all datasources in test_df: + i fac_col4 --> factor + i fac_col5 --> logical + +# .print_levels_recode_message prints the correct message + + Code + .print_levels_recode_message(unique_levs, "test_df") + Message + v The following levels have been set for all datasources in test_df: + i fac_col2 --> Blue, Green, Red + i fac_col3 --> No, Yes + i fac_col6 --> Bird, Cat, Dog + i fac_col9 --> False, True + +# .print_out_messages prints the correct messages + + Code + .print_out_messages(added_cols, class_decisions, different_classes, unique_levs, + level_conflicts, "1", "test_df") + Message + v The following variables have been added to test_df: + i sim1 --> col11 + i sim2 --> col11 + i sim3 --> col12 + + v The following classes have been set for all datasources in test_df: + i fac_col4 --> factor + i fac_col5 --> logical + + v The following levels have been set for all datasources in test_df: + i fac_col2 --> Blue, Green, Red + i fac_col3 --> No, Yes + i fac_col6 --> Bird, Cat, Dog + i fac_col9 --> False, True + diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R new file mode 100644 index 00000000..919df923 --- /dev/null +++ b/tests/testthat/helpers.R @@ -0,0 +1,218 @@ +#' Create a DSLite login object that can be used for testing +#' +#' @param assign_method A string specifying the name of the custom assign method to be added +#' to the DSLite server. If `NULL`, no additional assign method is added. Default is `NULL`. +#' @param aggregate_method A string specifying the name of the custom aggregate method to be +#' added to the DSLite server. If `NULL`, no additional aggregate method is added. Default is `NULL`. +#' @param tables A named list of tables to be made available on the DSLite server. Default is `NULL`. +#' +#' @return A DataSHIELD login object containing the necessary connection information for the DSLite server. +#' +#' @examples +#' \dontrun{ +#' # Prepare a DSLite server with default methods and custom assign/aggregate methods +#' login_data <- .prepare_dslite( +#' assign_method = "customAssign", +#' aggregate_method = "customAggregate", +#' tables = list(mtcars = mtcars, mtcars_group = mtcars_group) +#' ) +#' +#' @importFrom DSLite newDSLiteServer +#' @importFrom DSI newDSLoginBuilder +#' @export +.prepare_dslite <- function(assign_method = NULL, aggregate_method = NULL, tables = NULL) { + + options(datashield.env = environment()) + dslite.server <- DSLite::newDSLiteServer(tables = tables) + dslite.server$config(defaultDSConfiguration(include = c("dsBase", "dsTidyverse"))) + dslite.server$aggregateMethod("exists", "base::exists") + dslite.server$aggregateMethod("classDS", "dsBase::classDS") + dslite.server$aggregateMethod("lsDS", "dsBase::lsDS") + dslite.server$aggregateMethod("dsListDisclosureSettings", "dsTidyverse::dsListDisclosureSettings") + + if (!is.null(assign_method)) { + dslite.server$assignMethod(assign_method, paste0("dsTidyverse::", assign_method)) + } + + if (!is.null(aggregate_method)) { + dslite.server$aggregateMethod(assign_method, paste0("dsTidyverse::", assign_method)) + } + + builder <- DSI::newDSLoginBuilder() + builder$append(server = "server_1", url = "dslite.server", driver = "DSLiteDriver") + builder$append(server = "server_2", url = "dslite.server", driver = "DSLiteDriver") + builder$append(server = "server_3", url = "dslite.server", driver = "DSLiteDriver") + login_data <- builder$build() + return(login_data) +} + +#' Create a mixed dataframe with factor and other types of columns +#' +#' This function generates a dataframe with a specified number of rows, +#' factor columns, and other columns (integer, numeric, and string). +#' +#' @param n_rows Number of rows in the dataframe. Default is 10,000. +#' @param n_factor_cols Number of factor columns in the dataframe. Default is 15. +#' @param n_other_cols Number of other columns (integer, numeric, and string) in the dataframe. Default is 15. +#' +#' @return A dataframe with the specified number of rows and columns, containing mixed data types. +#' @importFrom dplyr bind_cols +#' @importFrom purrr map_dfc +#' @examples +#' df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 5) +create_mixed_dataframe <- function(n_rows = 10000, n_factor_cols = 15, n_other_cols = 15) { + + # Function to create a factor column with defined levels + create_factor_column <- function(levels, n = n_rows) { + set.seed(123) # Set seed before sample for reproducibility + factor(sample(levels, n, replace = TRUE)) + } + + # Define factor levels for different columns + factor_levels <- list( + c("Low", "Medium", "High"), + c("Red", "Green", "Blue"), + c("Yes", "No"), + c("A", "B", "C"), + c("One", "Two", "Three"), + c("Cat", "Dog", "Bird"), + c("Small", "Medium", "Large"), + c("Alpha", "Beta", "Gamma"), + c("True", "False"), + c("Left", "Right"), + c("North", "South", "East", "West"), + c("Day", "Night"), + c("Up", "Down"), + c("Male", "Female"), + c("Summer", "Winter", "Spring", "Fall") + ) + + # Create factor columns + factor_columns <- map_dfc(factor_levels[1:n_factor_cols], create_factor_column) + colnames(factor_columns) <- paste0("fac_col", 1:n_factor_cols) + + # Function to create other types of columns + create_other_column <- function(type, n = n_rows) { + set.seed(123) # Set seed before sample for reproducibility + switch(type, + "int" = sample(1:100, n, replace = TRUE), # Integer column + "num" = runif(n, 0, 100), # Numeric column + "str" = sample(letters, n, replace = TRUE), # Character column + "log" = sample(c(TRUE, FALSE), n, replace = TRUE) # Logical column + ) + } + + # Ensure that each data type is included + column_types <- c( + "int", "num", "str", "log", "int", + "num", "str", "log", "int", "num", + "str", "int", "num", "log", "str" + ) + + # Create other columns with specified types + other_columns <- map_dfc(column_types[1:n_other_cols], create_other_column) + colnames(other_columns) <- paste0("col", (n_factor_cols + 1):(n_factor_cols + n_other_cols)) + + # Combine factor and other columns into a single dataframe + df <- bind_cols(factor_columns, other_columns) + + return(df) +} + + +#' Modify factor levels for partial overlap +#' +#' This function takes two sets of factor levels, computes the common and unique levels, +#' and returns a new set of levels with partial overlap. +#' +#' @param levels1 First set of factor levels. +#' @param levels2 Second set of factor levels. +#' +#' @return A character vector of new factor levels with partial overlap. +#' @examples +#' new_levels <- partial_overlap_levels(c("A", "B", "C"), c("B", "C", "D")) +partial_overlap_levels <- function(levels1, levels2) { + common <- intersect(levels1, levels2) + unique1 <- setdiff(levels1, common) + unique2 <- setdiff(levels2, common) + + # Set seed before each sample call + set.seed(123) + sampled_unique1 <- sample(unique1, length(unique1) * 0.5) + + set.seed(123) + sampled_unique2 <- sample(unique2, length(unique2) * 0.5) + + new_levels <- c(common, sampled_unique1, sampled_unique2) + return(new_levels) +} + + +#' Create additional dataframes with specific conditions +#' +#' This function generates additional dataframes based on an input dataframe, modifying column classes and levels, +#' and adding new columns with unique names. Different seeds are used for each iteration of the loop, +#' ensuring reproducibility of the generated dataframes. +#' +#' @param base_df The base dataframe used to create the additional dataframes. +#' @param n_rows Number of rows in the additional dataframes. Default is 10,000. +#' @param df_names Names of the additional dataframes to be created. Default is c("df1", "df2", "df3"). +#' +#' @return A list of dataframes with the specified modifications. +#' @importFrom dplyr bind_cols +#' @examples +#' base_df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 5) +#' additional_dfs <- create_additional_dataframes(base_df, n_rows = 1000, df_names = c("df1", "df2")) +create_additional_dataframes <- function(base_df, n_rows = 10000, df_names = c("df1", "df2", "df3")) { + + # Define a fixed sequence of seeds, one for each dataframe to be created + seeds <- c(123, 456, 789, 101112) + + df_list <- list() + + for (i in seq_along(df_names)) { + # Set the seed for this iteration based on the pre-defined seeds + set.seed(seeds[i]) + + # Proceed with the dataframe generation process + overlap_cols <- sample(colnames(base_df), size = round(0.8 * ncol(base_df))) + df <- base_df + cols_to_modify_class <- sample(overlap_cols, size = round(0.2 * length(overlap_cols))) + + # Modify columns to have different data types + for (col in cols_to_modify_class) { + current_class <- class(df[[col]]) + new_class <- switch(current_class, + "factor" = as.character(df[[col]]), + "character" = as.factor(df[[col]]), + "numeric" = as.integer(df[[col]]), + "integer" = as.numeric(df[[col]]), + df[[col]]) + df[[col]] <- new_class + } + + # Modify factor levels for partial overlap + factor_cols <- colnames(base_df)[sapply(base_df, is.factor)] + overlap_factor_cols <- intersect(overlap_cols, factor_cols) + cols_to_modify_levels <- sample(overlap_factor_cols, size = round(0.5 * length(overlap_factor_cols))) + + for (col in cols_to_modify_levels) { + original_levels <- levels(base_df[[col]]) + new_levels <- partial_overlap_levels(original_levels, original_levels) + df[[col]] <- factor(df[[col]], levels = new_levels) + } + + # Create new random columns for each dataframe (these will vary by seed) + set.seed(seeds[i]) # Set the seed again for generating new columns + n_new_cols <- round(0.2 * ncol(base_df)) + new_col_names <- paste0(df_names[i], "_new_col_", 1:n_new_cols) + new_cols <- data.frame(matrix(runif(n_rows * n_new_cols), ncol = n_new_cols)) + colnames(new_cols) <- new_col_names + + # Bind new columns to the dataframe + df <- bind_cols(df, new_cols) + df_list[[df_names[i]]] <- df + } + + return(df_list) +} diff --git a/tests/testthat/test-smk-standardiseDf.R b/tests/testthat/test-smk-standardiseDf.R new file mode 100644 index 00000000..d557c325 --- /dev/null +++ b/tests/testthat/test-smk-standardiseDf.R @@ -0,0 +1,712 @@ +# +# Set up +# +context("ds.standardiseDf::smk::setup") +options(datashield.errors.print = TRUE) + +connect.studies.dataset.stand( + c( + "fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col7", "fac_col9", + "fac_col10", "col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", + "col20") + ) + +test_that("setup", { + ds_expect_variables(c("D")) +}) + +# +# Tests +# + +#################################################################################################### +# Code that will be used in multiple tests +#################################################################################################### +var_class <- .get_var_classes("D", datasources = ds.test_env$connections) + +class_conflicts <- .identify_class_conflicts(var_class) + +different_classes <- c("fac_col4", "fac_col5") + +class_decisions <- c("1", "5") + +.fix_classes( + df.name = "D", + different_classes = different_classes, + class_decisions = class_decisions, + newobj = "new_classes", + datasources = ds.test_env$connections) + +cols_to_set <- c( + "fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col9", "col12", + "col15", "col18", "fac_col7", "fac_col10", "col13", "col16", "col19", "col11", "col14", "col17", + "col20") + +.add_missing_cols_to_df( + df.name = "D", + cols_to_add_if_missing = cols_to_set, + newobj = "with_new_cols", + datasources = ds.test_env$connections) + +old_cols <- ds.colnames("D") + +new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", + "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5", + "fac_col6", "fac_col7", "fac_col9") + +new_cols_servers <- list( + server_1 = new_cols, + server_2 = new_cols, + server_3 = new_cols +) + +added_cols <- .get_added_cols(old_cols, new_cols_servers) + +var_class_fact <- .get_var_classes("with_new_cols", datasources = ds.test_env$connections) + +fac_vars <- .identify_factor_vars(var_class_fact) + +fac_levels <- .get_factor_levels("with_new_cols", fac_vars, ds.test_env$connections) + +level_conflicts <- .identify_level_conflicts(fac_levels) + +unique_levs <- .get_unique_levels(fac_levels, level_conflicts) + +#################################################################################################### +# Tests +#################################################################################################### +test_that(".stop_if_cols_identical throws error if columns are identical", { + + identical_cols <- list( + c("col1", "col2", "col3"), + c("col1", "col2", "col3"), + c("col1", "col2", "col3") + ) + + expect_error( + .stop_if_cols_identical(identical_cols), + "Columns are identical in all data frames: nothing to fill" + ) + +}) + +test_that(".stop_if_cols_identical doesn't throw error if data frames have different columns", { + + different_cols <- list( + c("col1", "col2", "col3"), + c("col1", "col2", "col4"), + c("col1", "col5", "col3") + ) + + expect_silent( + .stop_if_cols_identical(different_cols) + ) + +}) + +test_that(".get_var_classes returns correct output", { + + expected <- tibble( + server = c("sim1", "sim2", "sim3"), + fac_col1 = c("factor", "factor", "factor"), + fac_col2 = c("factor", "factor", "factor"), + fac_col3 = c("factor", "factor", "factor"), + fac_col4 = c("numeric", "character", "factor"), + fac_col5 = c("logical", "integer", "factor"), + fac_col6 = c("factor", NA, NA), + fac_col9 = c("factor", NA, NA), + col12 = c("numeric", NA, NA), + col15 = c("integer", NA, NA), + col18 = c("logical", NA, NA), + fac_col7 = c(NA, "factor", NA), + fac_col10 = c(NA, "factor", NA), + col13 = c(NA, "character", NA), + col16 = c(NA, "numeric", NA), + col19 = c(NA, "integer", NA), + col11 = c(NA, NA, "integer"), + col14 = c(NA, NA, "logical"), + col17 = c(NA, NA, "character"), + col20 = c(NA, NA, "numeric") + ) + + expect_equal(var_class, expected) + +}) + +test_that(".identify_class_conflicts returns correct output", { + expected <- list( + fac_col4 = c("numeric", "character", "factor"), + fac_col5 = c("logical", "integer", "factor") + ) + + expect_equal(class_conflicts, expected) + +}) + +test_that("ask_question displays the correct prompt", { + expect_snapshot(ask_question_class("my_var")) +}) + +test_that("ask_question_wait_response_class continues with valid response", { + expect_equal( + with_mocked_bindings( + ask_question_wait_response_class("a variable"), + ask_question_class = function(var) "A question", + readline = function() "1" + ), "1" + ) +}) + +test_that("ask_question_wait_response_class throws error if option 6 selected", { + expect_error( + with_mocked_bindings( + ask_question_wait_response_class("a variable"), + ask_question_class = function(var) "A question", + readline = function() "6") + ) +}) + +test_that("print_all_classes prints the correct message", { + expect_snapshot( + print_all_classes( + c("server_1", "server_2", "server_3"), + c("numeric", "factor", "integer") + ) + ) +}) + +test_that("prompt_user_class_decision function properly", { + expect_message( + with_mocked_bindings( + prompt_user_class_decision( + var = "test_col", + servers = c("sim2", "sim2", "sim3"), + classes = c("numeric", "character", "factor"), + newobj = "test_df", + datasources = datasources), + ask_question_wait_response_class = function(var, newobj, datasources) "test_col" + ) + ) + + expect_equal( + with_mocked_bindings( + prompt_user_class_decision( + var = "test_col", + servers = c("sim2", "sim2", "sim3"), + classes = c("numeric", "character", "factor"), + newobj = "test_df", + datasources = datasources), + ask_question_wait_response_class = function(var, newobj, datasources) "test_col" + ), + "test_col" + ) +}) + +test_that("prompt_user_class_decision_all_vars returns correct value", { + expect_equal( + with_mocked_bindings( + prompt_user_class_decision_all_vars( + vars = c("test_var_1", "test_var_2"), + all_servers = c("sim2", "sim2", "sim3"), + all_classes = tibble( + test_var_1 = c("numeric", "character", "factor"), + test_var_2 = c("logical", "integer", "factor") + ), + "test_df", + conns), + prompt_user_class_decision = function(var, server, classes, newobj, datasources) "1" + ), + c("1", "1") + ) +}) + +test_that(".fix_classes sets the correct classes in serverside data frame", { + + expect_equal( + unname(unlist(ds.class("D$fac_col4"))), + c("numeric", "character", "factor") + ) + + expect_equal( + unname(unlist(ds.class("D$fac_col5"))), + c("logical", "integer", "factor") + ) + + expect_equal( + unname(unlist(ds.class("new_classes$fac_col4"))), + rep("factor", 3) + ) + + expect_equal( + unname(unlist(ds.class("new_classes$fac_col5"))), + rep("logical", 3) + ) + +}) + +test_that(".get_unique_cols extracts unique names from a list", { + expect_equal( + .get_unique_cols( + list( + server_1 = c("col_1", "col_2", "col_3"), + server_1 = c("col_1", "col_2", "col_4"), + server_1 = c("col_2", "col_3", "col_3", "col_5") + ) + ), + c("col_1", "col_2", "col_3", "col_4", "col_5") + ) +}) + +test_that(".add_missing_cols_to_df correctly creates missing columns", { + + new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", + "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5", + "fac_col6", "fac_col7", "fac_col9") + + observed <- ds.colnames("with_new_cols") + + expected <- list( + sim1 = new_cols, + sim2 = new_cols, + sim3 = new_cols + ) + + expect_equal(observed, expected) +}) + +test_that(".get_added_cols correctly identifies newly added columns", { + + expect_equal( + added_cols, + list( + sim1 = c("col11", "col13", "col14", "col16", "col17", "col19", "col20", "fac_col10", "fac_col7"), + sim2 = c("col11", "col12", "col14", "col15", "col17", "col18", "col20", "fac_col6", "fac_col9"), + sim3 = c("col12", "col13", "col15", "col16", "col18", "col19", "fac_col10", "fac_col6", "fac_col7", "fac_col9") + ) + ) +}) + +test_that(".identify_factor_vars correctly identifies factor variables", { + + + + var_class_fact <- var_class |> dplyr::select(server: col18) + expect_equal( + names(fac_vars), + c("fac_col1", "fac_col2", "fac_col3", "fac_col6", "fac_col9") + ) +}) + +test_that(".get_factor_levels correctly identifies factor levels", { + expected <- list( + sim1 = list( + fac_col1 = c("High", "Low", "Medium"), + fac_col2 = c("Blue", "Green"), + fac_col3 = c("No", "Yes"), + fac_col6 = c("Bird", "Cat", "Dog"), + fac_col9 = c("False", "True") + ), + sim2 = list( + fac_col1 = c("High", "Low", "Medium"), + fac_col2 = c("Green", "Red"), + fac_col3 = c("No"), + fac_col6 = NULL, + fac_col9 = NULL + ), + sim3 = list( + fac_col1 = c("High", "Low", "Medium"), + fac_col2 = c("Blue"), + fac_col3 = c("Yes"), + fac_col6 = NULL, + fac_col9 = NULL + ) + ) + + expect_equal(fac_levels, expected) +}) + +test_that(".identify_level_conflicts correctly factor columns with different levels", { + expect_equal( + .identify_level_conflicts(fac_levels), + c("fac_col2", "fac_col3", "fac_col6", "fac_col9") + ) + +}) + +test_that("ask_question_wait_response_levels continues with valid response", { + expect_equal( + with_mocked_bindings( + suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), + readline = function() "1" + ), "1" + ) + + expect_equal( + with_mocked_bindings( + suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), + readline = function() "1" + ), "1" + ) + +}) + +test_that("ask_question_wait_response_levels aborts with response of 3", { + expect_error( + with_mocked_bindings( + suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), + readline = function() "3") + ) +}) + +test_that(".make_levels_message makes correct message", { + expect_snapshot(.make_levels_message(level_conflicts)) +}) + +test_that(".get_unique_levels extracts all possible levels", { + + expected <- list( + fac_col2 = c("Blue", "Green", "Red"), + fac_col3 = c("No", "Yes"), + fac_col6 = c("Bird", "Cat", "Dog"), + fac_col9 = c("False", "True") + ) + + expect_equal(unique_levs, expected) + +}) + +test_that(".set_factor_levels sets levels correctly", { + .set_factor_levels("with_new_cols", unique_levs, ds.test_env$connections) + + expect_equal( + ds.levels("with_new_cols$fac_col2") |> map(~.x[[1]]), + list( + sim1 = c("Blue", "Green", "Red"), + sim2 = c("Blue", "Green", "Red"), + sim3 = c("Blue", "Green", "Red") + ) + ) + + expect_equal( + ds.levels("with_new_cols$fac_col3") |> map(~.x[[1]]), + list( + sim1 = c("No", "Yes"), + sim2 = c("No", "Yes"), + sim3 = c("No", "Yes") + ) + ) + + expect_equal( + ds.levels("with_new_cols$fac_col6") |> map(~.x[[1]]), + list( + sim1 = c("Bird", "Cat", "Dog"), + sim2 = c("Bird", "Cat", "Dog"), + sim3 = c("Bird", "Cat", "Dog") + ) + ) + + expect_equal( + ds.levels("with_new_cols$fac_col9") |> map(~.x[[1]]), + list( + sim1 = c("False", "True"), + sim2 = c("False", "True"), + sim3 = c("False", "True") + ) + ) + +}) + +test_that(".print_var_recode_message prints the correct message", { + expect_snapshot(.print_var_recode_message(added_cols, "test_df")) +}) + +test_that(".print_class_recode_message prints the correct message", { + expect_snapshot( + .print_class_recode_message(class_decisions, different_classes, "test_df") + ) +}) + +test_that(".print_levels_recode_message prints the correct message", { + expect_snapshot( + .print_levels_recode_message(unique_levs, "test_df") + ) +}) + +test_that(".make_levels_recode_message prints the correct message", { + expect_equal( + .make_levels_recode_message(unique_levs), + list( + "fac_col2 --> Blue, Green, Red", + "fac_col3 --> No, Yes", + "fac_col6 --> Bird, Cat, Dog", + "fac_col9 --> False, True" + ) + ) +}) + +test_that(".print_out_messages prints the correct messages", { + expect_snapshot( + .print_out_messages( + added_cols, class_decisions, different_classes, unique_levs, level_conflicts, "1", "test_df" + ) + ) +}) + +test_that(".change_choice_to_string converts numeric class codes to strings correctly", { + expect_equal(.change_choice_to_string("1"), "factor") + expect_equal(.change_choice_to_string("2"), "integer") + expect_equal(.change_choice_to_string("3"), "numeric") + expect_equal(.change_choice_to_string("4"), "character") + expect_equal(.change_choice_to_string("5"), "logical") +}) + +test_that("ds.standardiseDf doesn't run if dataframes are identical", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1", + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_error( + ds.standardiseDf( + df = "test_fill", + newobj = "shouldn't_exist"), + "Columns are identical" + ) + }) + +test_that("ds.standardiseDf works when called directly and class conversion is factor", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1", + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "factor" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is integer", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("2", "2"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "integer" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "integer" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is numeric", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("3", "3"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "numeric" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "numeric" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is character", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("4", "4"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "character" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "character" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is logical", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("5", "5"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "logical" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "logical" + ) +}) + +test_that("ds.standardiseDf changes levels if this option is selected", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "1" + ) + + levels_2 <- ds.levels("test_fill$fac_col2") %>% map(~.$Levels) + levels_3 <- ds.levels("test_fill$fac_col3") %>% map(~.$Levels) + levels_4 <- ds.levels("test_fill$fac_col4") %>% map(~.$Levels) + levels_5 <- ds.levels("test_fill$fac_col5") %>% map(~.$Levels) + levels_6 <- ds.levels("test_fill$fac_col6") %>% map(~.$Levels) + levels_9 <- ds.levels("test_fill$fac_col9") %>% map(~.$Levels) + + expect_equal( + levels_2, + list( + sim1 = c("Blue", "Green", "Red"), + sim2 = c("Blue", "Green", "Red"), + sim3 = c("Blue", "Green", "Red") + ) + ) + + expect_equal( + levels_3, + list( + sim1 = c("No", "Yes"), + sim2 = c("No", "Yes"), + sim3 = c("No", "Yes") + ) + ) + + expect_equal( + levels_4, + list( + sim1 = c("1", "2", "3", "A", "B", "C"), + sim2 = c("1", "2", "3", "A", "B", "C"), + sim3 = c("1", "2", "3", "A", "B", "C") + ) + ) + + expect_equal( + levels_5, + list( + sim1 = c("1", "2", "3", "One", "Three", "Two"), + sim2 = c("1", "2", "3", "One", "Three", "Two"), + sim3 = c("1", "2", "3", "One", "Three", "Two") + ) + ) + + expect_equal( + levels_6, + list( + sim1 = c("Bird", "Cat", "Dog"), + sim2 = c("Bird", "Cat", "Dog"), + sim3 = c("Bird", "Cat", "Dog") + ) + ) + + expect_equal( + levels_9, + list( + sim1 = c("False", "True"), + sim2 = c("False", "True"), + sim3 = c("False", "True") + ) + ) + +}) + +test_that("ds.standardiseDf doesn't run if classes are not identical and fix_class is no", { + expect_error( + ds.standardiseDf( + df = "D", + newobj = "shouldnt_exist", + fix_class = "no" + ), + "Variables do not have the same class in all studies" + ) + + expect_equal( + ds.exists("shouldnt_exist")[[1]], + FALSE + ) +}) + +test_that("ds.standardiseDf doesn't run if levels are not identical and fix_class is no", { + expect_error( + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "shouldnt_exist", + fix_levels = "no" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1") + ), + "Factor variables do not have the same levels in all studies" + ) + + expect_equal( + ds.exists("shouldnt_exist")[[1]], + FALSE + ) +}) + +test_that("ds.standardiseDf doesn't run if a factor variable has too many levels", { + connect.studies.dataset.stand_disclosure( + c( + "fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col7", "fac_col9", + "fac_col10", "col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", + "col20") + ) + + expect_error( + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1", + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + ) + +}) + +disconnect.studies.dataset.stand() + +context("ds.standardiseDf::smk::done") From 6290957c6e9c3ff12939a605858b62eeeb294c53 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 21 Apr 2026 12:37:22 +0200 Subject: [PATCH 2/9] feat: add test data and connection functions for standardise --- .../init_studies_datasets.R | 78 ++++++++++++++ .../testthat/data_files/STANDARDISE/std_1.csv | 101 ++++++++++++++++++ .../testthat/data_files/STANDARDISE/std_1.rda | Bin 0 -> 1474 bytes .../data_files/STANDARDISE/std_1_d.csv | 101 ++++++++++++++++++ .../data_files/STANDARDISE/std_1_d.rda | Bin 0 -> 2228 bytes .../testthat/data_files/STANDARDISE/std_2.csv | 101 ++++++++++++++++++ .../testthat/data_files/STANDARDISE/std_2.rda | Bin 0 -> 1706 bytes .../data_files/STANDARDISE/std_2_d.csv | 101 ++++++++++++++++++ .../data_files/STANDARDISE/std_2_d.rda | Bin 0 -> 2461 bytes .../testthat/data_files/STANDARDISE/std_3.csv | 101 ++++++++++++++++++ .../testthat/data_files/STANDARDISE/std_3.rda | Bin 0 -> 1597 bytes .../data_files/STANDARDISE/std_3_d.csv | 101 ++++++++++++++++++ .../data_files/STANDARDISE/std_3_d.rda | Bin 0 -> 2343 bytes ...lgenis_armadillo-upload_testing_datasets.R | 8 ++ 14 files changed, 692 insertions(+) create mode 100644 tests/testthat/data_files/STANDARDISE/std_1.csv create mode 100644 tests/testthat/data_files/STANDARDISE/std_1.rda create mode 100644 tests/testthat/data_files/STANDARDISE/std_1_d.csv create mode 100644 tests/testthat/data_files/STANDARDISE/std_1_d.rda create mode 100644 tests/testthat/data_files/STANDARDISE/std_2.csv create mode 100644 tests/testthat/data_files/STANDARDISE/std_2.rda create mode 100644 tests/testthat/data_files/STANDARDISE/std_2_d.csv create mode 100644 tests/testthat/data_files/STANDARDISE/std_2_d.rda create mode 100644 tests/testthat/data_files/STANDARDISE/std_3.csv create mode 100644 tests/testthat/data_files/STANDARDISE/std_3.rda create mode 100644 tests/testthat/data_files/STANDARDISE/std_3_d.csv create mode 100644 tests/testthat/data_files/STANDARDISE/std_3_d.rda diff --git a/tests/testthat/connection_to_datasets/init_studies_datasets.R b/tests/testthat/connection_to_datasets/init_studies_datasets.R index 0639aac6..2bc95718 100644 --- a/tests/testthat/connection_to_datasets/init_studies_datasets.R +++ b/tests/testthat/connection_to_datasets/init_studies_datasets.R @@ -222,6 +222,63 @@ init.studies.dataset.gamlss <- function(variables) } +init.studies.dataset.stand <- function(variables) +{ + if (ds.test_env$secure_login_details) + { + if (ds.test_env$driver == "OpalDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "STANDARDISE.std_1", options=ds.test_env$options_1) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "STANDARDISE.std_1", options=ds.test_env$options_2) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "STANDARDISE.std_1", options=ds.test_env$options_3) + ds.test_env$login.data <- builder$build() + } + else if (ds.test_env$driver == "ArmadilloDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/standardise/std_1", driver = ds.test_env$driver) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/standardise/std_2", driver = ds.test_env$driver) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/standardise/std_3", driver = ds.test_env$driver) + ds.test_env$login.data <- builder$build() + } + else + { + ds.test_env$login.data <- DSLite::setupCNSIMTest("dsBase", env = ds.test_env) + } + ds.test_env$stats.var <- variables + } +} + +init.studies.dataset.stand_disclosure <- function(variables) +{ + if (ds.test_env$secure_login_details) + { + if (ds.test_env$driver == "OpalDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "STANDARDISE.std_1_d", options=ds.test_env$options_1) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "STANDARDISE.std_2_d", options=ds.test_env$options_2) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "STANDARDISE.std_3_d", options=ds.test_env$options_3) + ds.test_env$login.data <- builder$build() + } + else if (ds.test_env$driver == "ArmadilloDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/standardise/std_1_d", driver = ds.test_env$driver) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/standardise/std_2_d", driver = ds.test_env$driver) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/standardise/std_3_d", driver = ds.test_env$driver) + ds.test_env$login.data <- builder$build() + } + else + { + ds.test_env$login.data <- DSLite::setupCNSIMTest("dsBase", env = ds.test_env) + } + ds.test_env$stats.var <- variables + } +} + + connect.studies.dataset.cnsim <- function(variables) { log.out.data.server() @@ -278,6 +335,22 @@ connect.studies.dataset.gamlss <- function(variables) log.in.data.server() } +connect.studies.dataset.stand <- function(variables) +{ + log.out.data.server() + source("connection_to_datasets/login_details.R") + init.studies.dataset.stand(variables) + log.in.data.server() +} + +connect.studies.dataset.stand_disclosure <- function(variables) +{ + log.out.data.server() + source("connection_to_datasets/login_details.R") + init.studies.dataset.stand_disclosure(variables) + log.in.data.server() +} + disconnect.studies.dataset.cnsim <- function() { log.out.data.server() @@ -312,3 +385,8 @@ disconnect.studies.dataset.gamlss <- function() { log.out.data.server() } + +disconnect.studies.dataset.stand <- function() +{ + log.out.data.server() +} diff --git a/tests/testthat/data_files/STANDARDISE/std_1.csv b/tests/testthat/data_files/STANDARDISE/std_1.csv new file mode 100644 index 00000000..32890959 --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_1.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col6,fac_col9,col12,col15,col18 +High,Blue,Yes,3,NA,Bird,True,28.757752012461424,31,TRUE +High,Blue,Yes,3,NA,Bird,True,78.83051354438066,79,TRUE +High,Blue,Yes,3,NA,Bird,True,40.89769218116999,51,TRUE +Medium,Green,No,2,NA,Dog,False,88.301740400493145,14,FALSE +High,Blue,Yes,3,NA,Bird,True,94.04672842938453,67,TRUE +Medium,Green,No,2,NA,Dog,False,4.555649938993156,42,FALSE +Medium,Green,No,2,NA,Dog,False,52.810548804700375,50,FALSE +Medium,Green,No,2,NA,Dog,False,89.2419044394046,43,FALSE +High,Blue,Yes,3,NA,Bird,True,55.14350144658238,14,TRUE +Low,NA,Yes,1,NA,Cat,True,45.661473530344665,25,TRUE +Medium,Green,No,2,NA,Dog,False,95.68333453498781,90,FALSE +Medium,Green,No,2,NA,Dog,False,45.33341561909765,91,FALSE +Low,NA,No,1,NA,Cat,False,67.75706354528666,69,FALSE +Medium,Green,Yes,2,NA,Dog,True,57.26334019564092,91,TRUE +High,Blue,No,3,NA,Bird,False,10.292468266561627,57,FALSE +Low,NA,Yes,1,NA,Cat,True,89.98249704018235,92,TRUE +High,Blue,No,3,NA,Bird,False,24.60877343546599,9,FALSE +High,Blue,Yes,3,NA,Bird,True,4.205953353084624,93,TRUE +Low,NA,Yes,1,NA,Cat,True,32.79207192827016,99,TRUE +Low,NA,Yes,1,NA,Cat,True,95.45036491472274,72,TRUE +Low,NA,Yes,1,NA,Cat,True,88.95393160637468,26,TRUE +Low,NA,No,1,NA,Cat,False,69.28034061565995,7,FALSE +High,Blue,Yes,3,NA,Bird,True,64.05068137682974,42,TRUE +Medium,Green,Yes,2,NA,Dog,True,99.42697766236961,9,TRUE +High,Blue,Yes,3,NA,Bird,True,65.57057991158217,83,TRUE +Medium,Green,Yes,2,NA,Dog,True,70.85304681677371,36,TRUE +Low,NA,No,1,NA,Cat,False,54.40660247113556,78,FALSE +Medium,Green,No,2,NA,Dog,False,59.41420204471797,81,FALSE +High,Blue,Yes,3,NA,Bird,True,28.91597372945398,43,TRUE +Medium,Green,No,2,NA,Dog,False,14.711364731192589,76,FALSE +Low,NA,Yes,1,NA,Cat,True,96.30242325365543,15,TRUE +High,Blue,No,3,NA,Bird,False,90.22990451194346,32,FALSE +High,Blue,Yes,3,NA,Bird,True,69.07052784226835,7,TRUE +Low,NA,No,1,NA,Cat,False,79.54674176871777,9,FALSE +High,Blue,No,3,NA,Bird,False,2.461368450894952,41,FALSE +Medium,Green,Yes,2,NA,Dog,True,47.77959710918367,74,TRUE +Low,NA,Yes,1,NA,Cat,True,75.84595375228673,23,TRUE +High,Blue,Yes,3,NA,Bird,True,21.640793583355844,27,TRUE +Low,NA,Yes,1,NA,Cat,True,31.818100763484836,60,TRUE +Low,NA,No,1,NA,Cat,False,23.16257853526622,53,FALSE +Medium,Green,Yes,2,NA,Dog,True,14.280002238228917,7,TRUE +High,Blue,No,3,NA,Bird,False,41.45463358145207,53,FALSE +High,Blue,No,3,NA,Bird,False,41.372432629577816,27,FALSE +Low,NA,Yes,1,NA,Cat,True,36.884545092470944,96,TRUE +High,Blue,Yes,3,NA,Bird,True,15.244474774226546,38,TRUE +Low,NA,Yes,1,NA,Cat,True,13.880606344901025,89,TRUE +High,Blue,Yes,3,NA,Bird,True,23.303409945219755,34,TRUE +Medium,Green,No,2,NA,Dog,False,46.59624502528459,93,FALSE +Low,NA,Yes,1,NA,Cat,True,26.597264036536217,69,TRUE +Medium,Green,Yes,2,NA,Dog,True,85.78277153428644,72,TRUE +Low,NA,No,1,NA,Cat,False,4.583116667345166,76,FALSE +Low,NA,Yes,1,NA,Cat,True,44.220007420517504,63,TRUE +High,Blue,Yes,3,NA,Bird,True,79.89248456433415,13,TRUE +Low,NA,Yes,1,NA,Cat,True,12.189925997518003,82,TRUE +Medium,Green,Yes,2,NA,Dog,True,56.094798375852406,97,TRUE +Low,NA,No,1,NA,Cat,False,20.65313896164298,91,FALSE +Low,NA,No,1,NA,Cat,False,12.753165024332702,25,FALSE +High,Blue,Yes,3,NA,Bird,True,75.33078643027693,38,TRUE +Low,NA,No,1,NA,Cat,False,89.50453591533005,21,FALSE +Medium,Green,Yes,2,NA,Dog,True,37.44627758860588,79,TRUE +Low,NA,Yes,1,NA,Cat,True,66.51151946280152,41,TRUE +High,Blue,No,3,NA,Bird,False,9.484066092409194,47,FALSE +Low,NA,No,1,NA,Cat,False,38.39696377981454,90,FALSE +High,Blue,Yes,3,NA,Bird,True,27.43836445733905,60,TRUE +Medium,Green,Yes,2,NA,Dog,True,81.46400388795882,95,TRUE +High,Blue,No,3,NA,Bird,False,44.851634139195085,16,FALSE +Medium,Green,Yes,2,NA,Dog,True,81.00643530488014,94,TRUE +Medium,Green,Yes,2,NA,Dog,True,81.23895095195621,6,TRUE +High,Blue,Yes,3,NA,Bird,True,79.43423211108893,72,TRUE +Medium,Green,Yes,2,NA,Dog,True,43.983168760314584,86,TRUE +Medium,Green,No,2,NA,Dog,False,75.44751586392522,86,FALSE +High,Blue,Yes,3,NA,Bird,True,62.922113155946136,39,TRUE +High,Blue,Yes,3,NA,Bird,True,71.01824013516307,31,TRUE +Low,NA,Yes,1,NA,Cat,True,0.062477332539856434,81,TRUE +Medium,Green,Yes,2,NA,Dog,True,47.53165740985423,50,TRUE +Medium,Green,No,2,NA,Dog,False,22.011888516135514,34,FALSE +Low,NA,No,1,NA,Cat,False,37.98165377229452,4,FALSE +Medium,Green,Yes,2,NA,Dog,True,61.277100327424705,13,TRUE +Low,NA,No,1,NA,Cat,False,35.179790924303234,69,FALSE +Low,NA,No,1,NA,Cat,False,11.113542434759438,25,FALSE +Medium,Green,No,2,NA,Dog,False,24.361947271972895,52,FALSE +High,Blue,No,3,NA,Bird,False,66.80555874481797,22,FALSE +High,Blue,Yes,3,NA,Bird,True,41.764677967876196,89,TRUE +Low,NA,No,1,NA,Cat,False,78.81958340294659,32,FALSE +Medium,Green,No,2,NA,Dog,False,10.286464425735176,25,FALSE +Low,NA,No,1,NA,Cat,False,43.489274149760604,87,FALSE +Medium,Green,Yes,2,NA,Dog,True,98.49569799844176,35,TRUE +Low,NA,Yes,1,NA,Cat,True,89.30511143989861,40,TRUE +High,Blue,No,3,NA,Bird,False,88.64690607879311,30,FALSE +High,Blue,Yes,3,NA,Bird,True,17.505265027284622,12,TRUE +Medium,Green,No,2,NA,Dog,False,13.069569156505167,31,FALSE +High,Blue,No,3,NA,Bird,False,65.31019250396639,30,FALSE +Low,NA,Yes,1,NA,Cat,True,34.3516472261399,64,TRUE +Medium,Green,No,2,NA,Dog,False,65.67581279668957,99,FALSE +Medium,Green,No,2,NA,Dog,False,32.03732424881309,14,FALSE +High,Blue,Yes,3,NA,Bird,True,18.769111926667392,93,TRUE +Medium,Green,Yes,2,NA,Dog,True,78.22943013161421,96,TRUE +Low,NA,No,1,NA,Cat,False,9.359498671256006,71,FALSE +High,Blue,Yes,3,NA,Bird,True,46.677904156968,67,TRUE +High,Blue,Yes,3,NA,Bird,True,51.15054599009454,23,TRUE diff --git a/tests/testthat/data_files/STANDARDISE/std_1.rda b/tests/testthat/data_files/STANDARDISE/std_1.rda new file mode 100644 index 0000000000000000000000000000000000000000..df5a8854fcbee0ae5fbf212f51ae0b47675348b7 GIT binary patch literal 1474 zcmV;z1wHy7iwFP!000001MOIQP*hbIKipm3U`$LYNs22Y0zvUuK!I3b0YhHP0s?`! zunSTPYYRw?ql3m&HVzr|Aasx>CoMFc3Pz0~a0tq=DIG0Jr=o{C_EM)wE0Ml$_guOh z?%f6c$v@ngkKg&e^PTT`&b^15t1gUiD|91-$cS9d5t$%&oOcb!B|MFJx+voVC)Fx9J9uOnPcs8%)Bib zYeSebJC@2{Dn3?+tt}_wOQ1KUXHG`LMuV{qMqbtyTRXX`x@tWg%Q8%jymF>NU)@l{ zm~gBx>gsq4lXfe074;@FjBZKn1hxwnzKK|V7G5ysaDz@uSEMXb2VG1iwbs@HVSoej zG7sCUaZC=8!J^g9%uzn=$*|pE`lI!QF zXD<%PKHHPevj4NLpB+2)_N`Q0Gu;=&a_=GfCNlL6b%*|6x^ov5jqx_Qi%!0tCU2if z)n+~af=bra^Fy_%>dsZfhEFkjxBFGZF;!=*$i->ssUqmjkXy9y-xpRLb=LNzWXxT zIe&Vj6!qdgqXPgnnyWrTXixTV>3x8O=d>M*kk>cscHw+)yXGLq6Y0LAA272=-uen) z+~v)QzJMXd=!qs==Usf!3+=Y_4nBtK)K5Pfh53I!ehTxF9Ui-?0!;hilP`7v=Kkrw zPzk8q>+Vqkm~hJE3IDXKq60fWj0Ak(g_3%-r#==|djauK_qPRrNl(NK!;4H6x;gkH z+6_Mb&DkTkF7rSt@@W~G!4rTh+b^sh0L(Wp_;m#9obS8c59{X}7|CH=e=TZ!32?bC z{%`C@hKHgU@mS~78m{mUy9PX;J|MCyl@^CeG9b&S`=CfwF)!)Fwu77my#-MZG|vbfEb_3xOb7qNMXmDq9Bx zr;!u^DX8BR>KAP$(7bYibTm#My5=65C;EAi=7T=U=+k5^K1ryp0!jh2J{npN^eLx4 zQmAd%4dh-LlOL6>0a^;Q90*P`@t}5atfIA=N8`tw(rDc=cQ-QqIzj|U1iYAxMKL?9 zOlmFbODY~)erD=Z%-&pcAU0&SaUXVLU5$bNuH{CBF7O&Et1ubE7(SfgBN#rC z;gt*@#qcX8KdynVa3P}eY2 c=+I5CtJj5An!y&azH@K=3;o3R#|st!04bEq-v9sr literal 0 HcmV?d00001 diff --git a/tests/testthat/data_files/STANDARDISE/std_1_d.csv b/tests/testthat/data_files/STANDARDISE/std_1_d.csv new file mode 100644 index 00000000..9fe7462f --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_1_d.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col6,fac_col9,col12,col15,col18 +High,Blue,Yes,3,NA,Bird,True,28.7577520124614,31,TRUE +High,Blue,Yes,3,NA,Bird,True,78.8305135443807,79,TRUE +High,Blue,Yes,3,NA,Bird,True,40.89769218117,51,TRUE +Medium,Green,No,2,NA,Dog,False,88.3017404004931,14,FALSE +High,Blue,Yes,3,NA,Bird,True,94.0467284293845,67,TRUE +Medium,Green,No,2,NA,Dog,False,4.55564993899316,42,FALSE +Medium,Green,No,2,NA,Dog,False,52.8105488047004,50,FALSE +Medium,Green,No,2,NA,Dog,False,89.2419044394046,43,FALSE +High,Blue,Yes,3,NA,Bird,True,55.1435014465824,14,TRUE +Low,NA,Yes,1,NA,Cat,True,45.6614735303447,25,TRUE +Medium,Green,No,2,NA,Dog,False,95.6833345349878,90,FALSE +Medium,Green,No,2,NA,Dog,False,45.3334156190977,91,FALSE +Low,NA,No,1,NA,Cat,False,67.7570635452867,69,FALSE +Medium,Green,Yes,2,NA,Dog,True,57.2633401956409,91,TRUE +High,Blue,No,3,NA,Bird,False,10.2924682665616,57,FALSE +Low,NA,Yes,1,NA,Cat,True,89.9824970401824,92,TRUE +High,Blue,No,3,NA,Bird,False,24.608773435466,9,FALSE +High,Blue,Yes,3,NA,Bird,True,4.20595335308462,93,TRUE +Low,NA,Yes,1,NA,Cat,True,32.7920719282702,99,TRUE +Low,NA,Yes,1,NA,Cat,True,95.4503649147227,72,TRUE +Low,NA,Yes,1,NA,Cat,True,88.9539316063747,26,TRUE +Low,NA,No,1,NA,Cat,False,69.28034061566,7,FALSE +High,Blue,Yes,3,NA,Bird,True,64.0506813768297,42,TRUE +Medium,Green,Yes,2,NA,Dog,True,99.4269776623696,9,TRUE +High,Blue,Yes,3,NA,Bird,True,65.5705799115822,83,TRUE +Medium,Green,Yes,2,NA,Dog,True,70.8530468167737,36,TRUE +Low,NA,No,1,NA,Cat,False,54.4066024711356,78,FALSE +Medium,Green,No,2,NA,Dog,False,59.414202044718,81,FALSE +High,Blue,Yes,3,NA,Bird,True,28.915973729454,43,TRUE +Medium,Green,No,2,NA,Dog,False,14.7113647311926,76,FALSE +Low,NA,Yes,1,NA,Cat,True,96.3024232536554,15,TRUE +High,Blue,No,3,NA,Bird,False,90.2299045119435,32,FALSE +High,Blue,Yes,3,NA,Bird,True,69.0705278422683,7,TRUE +Low,NA,No,1,NA,Cat,False,79.5467417687178,9,FALSE +High,Blue,No,3,NA,Bird,False,2.46136845089495,41,FALSE +Medium,Green,Yes,2,NA,Dog,True,47.7795971091837,74,TRUE +Low,NA,Yes,1,NA,Cat,True,75.8459537522867,23,TRUE +High,Blue,Yes,3,NA,Bird,True,21.6407935833558,27,TRUE +Low,NA,Yes,1,NA,Cat,True,31.8181007634848,60,TRUE +Low,NA,No,1,NA,Cat,False,23.1625785352662,53,FALSE +Medium,Green,Yes,2,NA,Dog,True,14.2800022382289,7,TRUE +High,Blue,No,3,NA,Bird,False,41.4546335814521,53,FALSE +High,Blue,No,3,NA,Bird,False,41.3724326295778,27,FALSE +Low,NA,Yes,1,NA,Cat,True,36.8845450924709,96,TRUE +High,Blue,Yes,3,NA,Bird,True,15.2444747742265,38,TRUE +Low,NA,Yes,1,NA,Cat,True,13.880606344901,89,TRUE +High,Blue,Yes,3,NA,Bird,True,23.3034099452198,34,TRUE +Medium,Green,No,2,NA,Dog,False,46.5962450252846,93,FALSE +Low,NA,Yes,1,NA,Cat,True,26.5972640365362,69,TRUE +Medium,Green,Yes,2,NA,Dog,True,85.7827715342864,72,TRUE +Low,NA,No,1,NA,Cat,False,4.58311666734517,76,FALSE +Low,NA,Yes,1,NA,Cat,True,44.2200074205175,63,TRUE +High,Blue,Yes,3,NA,Bird,True,79.8924845643342,13,TRUE +Low,NA,Yes,1,NA,Cat,True,12.189925997518,82,TRUE +Medium,Green,Yes,2,NA,Dog,True,56.0947983758524,97,TRUE +Low,NA,No,1,NA,Cat,False,20.653138961643,91,FALSE +Low,NA,No,1,NA,Cat,False,12.7531650243327,25,FALSE +High,Blue,Yes,3,NA,Bird,True,75.3307864302769,38,TRUE +Low,NA,No,1,NA,Cat,False,89.50453591533,21,FALSE +Medium,Green,Yes,2,NA,Dog,True,37.4462775886059,79,TRUE +Low,NA,Yes,1,NA,Cat,True,66.5115194628015,41,TRUE +High,Blue,No,3,NA,Bird,False,9.48406609240919,47,FALSE +Low,NA,No,1,NA,Cat,False,38.3969637798145,90,FALSE +High,Blue,Yes,3,NA,Bird,True,27.438364457339,60,TRUE +Medium,Green,Yes,2,NA,Dog,True,81.4640038879588,95,TRUE +High,Blue,No,3,NA,Bird,False,44.8516341391951,16,FALSE +Medium,Green,Yes,2,NA,Dog,True,81.0064353048801,94,TRUE +Medium,Green,Yes,2,NA,Dog,True,81.2389509519562,6,TRUE +High,Blue,Yes,3,NA,Bird,True,79.4342321110889,72,TRUE +Medium,Green,Yes,2,NA,Dog,True,43.9831687603146,86,TRUE +Medium,Green,No,2,NA,Dog,False,75.4475158639252,86,FALSE +High,Blue,Yes,3,NA,Bird,True,62.9221131559461,39,TRUE +High,Blue,Yes,3,NA,Bird,True,71.0182401351631,31,TRUE +Low,NA,Yes,1,NA,Cat,True,0.0624773325398564,81,TRUE +Medium,Green,Yes,2,NA,Dog,True,47.5316574098542,50,TRUE +Medium,Green,No,2,NA,Dog,False,22.0118885161355,34,FALSE +Low,NA,No,1,NA,Cat,False,37.9816537722945,4,FALSE +Medium,Green,Yes,2,NA,Dog,True,61.2771003274247,13,TRUE +Low,NA,No,1,NA,Cat,False,35.1797909243032,69,FALSE +Low,NA,No,1,NA,Cat,False,11.1135424347594,25,FALSE +Medium,Green,No,2,NA,Dog,False,24.3619472719729,52,FALSE +High,Blue,No,3,NA,Bird,False,66.805558744818,22,FALSE +High,Blue,Yes,3,NA,Bird,True,41.7646779678762,89,TRUE +Low,NA,No,1,NA,Cat,False,78.8195834029466,32,FALSE +Medium,Green,No,2,NA,Dog,False,10.2864644257352,25,FALSE +Low,NA,No,1,NA,Cat,False,43.4892741497606,87,FALSE +Medium,Green,Yes,2,NA,Dog,True,98.4956979984418,35,TRUE +Low,NA,Yes,1,NA,Cat,True,89.3051114398986,40,TRUE +High,Blue,No,3,NA,Bird,False,88.6469060787931,30,FALSE +High,Blue,Yes,3,NA,Bird,True,17.5052650272846,12,TRUE +Medium,Green,No,2,NA,Dog,False,13.0695691565052,31,FALSE +High,Blue,No,3,NA,Bird,False,65.3101925039664,30,FALSE +Low,NA,Yes,1,NA,Cat,True,34.3516472261399,64,TRUE +Medium,Green,No,2,NA,Dog,False,65.6758127966896,99,FALSE +Medium,Green,No,2,NA,Dog,False,32.0373242488131,14,FALSE +High,Blue,Yes,3,NA,Bird,True,18.7691119266674,93,TRUE +Medium,Green,Yes,2,NA,Dog,True,78.2294301316142,96,TRUE +Low,NA,No,1,NA,Cat,False,9.35949867125601,71,FALSE +High,Blue,Yes,3,NA,Bird,True,46.677904156968,67,TRUE +High,Blue,Yes,3,NA,Bird,True,51.1505459900945,23,TRUE diff --git a/tests/testthat/data_files/STANDARDISE/std_1_d.rda b/tests/testthat/data_files/STANDARDISE/std_1_d.rda new file mode 100644 index 0000000000000000000000000000000000000000..4387f72b8b0e93973132f43c9ddd0f762f606166 GIT binary patch literal 2228 zcmV;l2ut@LiwFP!000001MQk=a~;JMhDTSDZ5+pPAjE+z5JEx{!qEHTEOv|+LReyA zh@DMbTNg)FLQY9CRKD>;@*7k61t?GJ_RO?Kcdle%@E5!4t-13~&pD^h)@M|H|H>_W z`qt^HsuruIrBSuG%+G51gPYf03snVCRSVTp^%U2W+dFHka&?WR&v45-3;*R>E;;kr z*7#qm>zg~1dy}o+v!y%Z?MddoxyMhhUrf0JvYr`lZQcLu!UZnlf3JQ#zPoe3FLHTr z$$-psUCUg9#p1eH+>eU;QIB6JewXOW8_l?1=*cPhQSY6F-m|{kQSbRYt}mn9bN~0y z{)g=Auj%`CG(%UhZeL2B*qD4e+1Tb+Kl;z_i~jb`*81kX)lVl~5-jib;(jj2{oJ0; zoO5rk-}`6&z4$h%lGVO5SzCYb$@GPN1})#+7;kT9DXo=l@-=xi`n+oXrPojFb~`w^ z`xV%CYImMVP3erX%Ei9w8D+LKWahl`>8I}AeRjpDa7)QAG`~yq<(2Oh_wBoVzK}EO zubZ)cRO&s9>&vM2@45eb=Kkl{H|kyY`L>i*&G5xB({0qG+vSZ1lP1%zZ%rnf)e%$d zk!1asH{&bK^nPusO=}htWAp-f{OZ%O^v!XRVu|>r18B z=-vBGTfIHmt{z90AJc^XL3n=OLAZ8jWBw_4Hj7EuwM(a_`#5l|2JZXU{QTnoWf#(By4_kl0p1z&|d56mH-4DKh%yoy^F&O8=_=k-DL;LR#Ifv!# zkX=X3i)vac>Fa0h^J@Qy@&u0;U*lN&^eVr%&BedtqpnC*8PX=8UT^ny~Hy=|=H@6;4s>dLFk5v8`~zGAH}Y03 zcpE$mUI6F83it*1Id~a-tC~*P-vtVM4}6$h0c!9*_y)KDz75WRx4_@PPrw`CBzOgU zAAA5_2Y&)L!5_hE;1A%p;P=`627Cwn6#NGKxSCEd&w(r8Rq!rIGtMu;UGP_M3p|~} zcnQ1&E`t|A`g`s>DKbCmQ^tF(mDMp=XY!MCsy0q)7mO7lnrIu| z)JoWBwYFLW<5b66%j+@f0HiDUprv%qd2Oup4IgW5ksF+s$~qxC-UK6@6HyrvWo&pU zY89Mu#whExRVB|-7nF0>b$X?&rPS6aqm4&X=2xVuB^D|hqqj1&%T?_yW?R7vTD4|M zjfbLjQCgn0YFFSx5KcI4Ozfln7Vwk-~MX6q*W)wHG|ANfJpD3Az<;X$w@Aq5Op? zoNpD$15Z&CL(wLcHT7OMBC@HSv~I8@1IaBUA+|wv>s^gl0;mbQEqGrmf?@=|A^+P- zvGhBME0w?pT|08JCUMARS`m5Mn(tH{mBJEf>1nR@vNN@?!jaydP8hrOcweGZSwU`f z-5uU~VSS{RV7@ANhv8C@CXQy(@a;6ppHfYDP6efE>=f^8$IU8qOzh$Q9(MA2u@qMk<`bV^a{o7(N%OL>SR45iA=5B z7d)AVfAK;Lo%f@|N|nOJ(PX=0C5GCh0f1Tw#&kIziLQe7oRgZC^a%bMyuP_J|F6OQ zFTXU)zL#gbG`pr5G|jYWetk7_X^u?ucA6Ql=4+b0)7;w3+G(atvviudPi7hES(;tb zY@FuL8(C(WqtopCVqTlpr5QTS!fCEfb90)rFB*=6$6( zdz|fjF2D0^-e=PDdfrdcv6`Ko=KT{{)`uCdWdGdA{zy8PvW}}+b~Ah5&ihGR`c_FZ zc=~Qh-xO!Eob>xr_SM;Jf7;Xa?Dz9k@z=*{|CeM{T&v=K7T3SdlGEpg-tFrevS*&2 zqw1U`@3?!Y=5X`&Fh~FX{}!-0{$$epw}8``JVm9h-o3ved-tk$uY32VcW-<5u6N&@ z1ItO5DsOCg)Bo=}Pthpq#_sl8e|LBHKLefT^xAl5Tz|a9E7deLzxY2L7VDxOBme-K C!h~@E literal 0 HcmV?d00001 diff --git a/tests/testthat/data_files/STANDARDISE/std_2.csv b/tests/testthat/data_files/STANDARDISE/std_2.csv new file mode 100644 index 00000000..9c20866c --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_2.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col7,fac_col10,col13,col16,col19 +High,NA,NA,C,2,Large,Left,o,28.757752012461424,31 +High,NA,NA,C,2,Large,Left,s,78.83051354438066,79 +High,NA,NA,C,2,Large,Left,n,40.89769218116999,51 +Medium,Green,No,B,3,Medium,Right,c,88.301740400493145,14 +High,NA,NA,C,2,Large,Left,j,94.04672842938453,67 +Medium,Green,No,B,3,Medium,Right,r,4.555649938993156,42 +Medium,Green,No,B,3,Medium,Right,v,52.810548804700375,50 +Medium,Green,No,B,3,Medium,Right,k,89.2419044394046,43 +High,NA,NA,C,2,Large,Left,e,55.14350144658238,14 +Low,Red,NA,A,1,Small,Left,t,45.661473530344665,25 +Medium,Green,No,B,3,Medium,Right,n,95.68333453498781,90 +Medium,Green,No,B,3,Medium,Right,v,45.33341561909765,91 +Low,Red,No,A,1,Small,Right,y,67.75706354528666,69 +Medium,Green,NA,B,3,Medium,Left,z,57.26334019564092,91 +High,NA,No,C,2,Large,Right,e,10.292468266561627,57 +Low,Red,NA,A,1,Small,Left,s,89.98249704018235,92 +High,NA,No,C,2,Large,Right,y,24.60877343546599,9 +High,NA,NA,C,2,Large,Left,y,4.205953353084624,93 +Low,Red,NA,A,1,Small,Left,i,32.79207192827016,99 +Low,Red,NA,A,1,Small,Left,c,95.45036491472274,72 +Low,Red,NA,A,1,Small,Left,h,88.95393160637468,26 +Low,Red,No,A,1,Small,Right,z,69.28034061565995,7 +High,NA,NA,C,2,Large,Left,g,64.05068137682974,42 +Medium,Green,NA,B,3,Medium,Left,j,99.42697766236961,9 +High,NA,NA,C,2,Large,Left,i,65.57057991158217,83 +Medium,Green,NA,B,3,Medium,Left,s,70.85304681677371,36 +Low,Red,No,A,1,Small,Right,d,54.40660247113556,78 +Medium,Green,No,B,3,Medium,Right,n,59.41420204471797,81 +High,NA,NA,C,2,Large,Left,q,28.91597372945398,43 +Medium,Green,No,B,3,Medium,Right,k,14.711364731192589,76 +Low,Red,NA,A,1,Small,Left,g,96.30242325365543,15 +High,NA,No,C,2,Large,Right,u,90.22990451194346,32 +High,NA,NA,C,2,Large,Left,l,69.07052784226835,7 +Low,Red,No,A,1,Small,Right,o,79.54674176871777,9 +High,NA,No,C,2,Large,Right,j,2.461368450894952,41 +Medium,Green,NA,B,3,Medium,Left,m,47.77959710918367,74 +Low,Red,NA,A,1,Small,Left,g,75.84595375228673,23 +High,NA,NA,C,2,Large,Left,i,21.640793583355844,27 +Low,Red,NA,A,1,Small,Left,i,31.818100763484836,60 +Low,Red,No,A,1,Small,Right,j,23.16257853526622,53 +Medium,Green,NA,B,3,Medium,Left,w,14.280002238228917,7 +High,NA,No,C,2,Large,Right,u,41.45463358145207,53 +High,NA,No,C,2,Large,Right,g,41.372432629577816,27 +Low,Red,NA,A,1,Small,Left,u,36.884545092470944,96 +High,NA,NA,C,2,Large,Left,f,15.244474774226546,38 +Low,Red,NA,A,1,Small,Left,y,13.880606344901025,89 +High,NA,NA,C,2,Large,Left,b,23.303409945219755,34 +Medium,Green,No,B,3,Medium,Right,e,46.59624502528459,93 +Low,Red,NA,A,1,Small,Left,h,26.597264036536217,69 +Medium,Green,NA,B,3,Medium,Left,l,85.78277153428644,72 +Low,Red,No,A,1,Small,Right,m,4.583116667345166,76 +Low,Red,NA,A,1,Small,Left,r,44.220007420517504,63 +High,NA,NA,C,2,Large,Left,a,79.89248456433415,13 +Low,Red,NA,A,1,Small,Left,y,12.189925997518003,82 +Medium,Green,NA,B,3,Medium,Left,y,56.094798375852406,97 +Low,Red,No,A,1,Small,Right,f,20.65313896164298,91 +Low,Red,No,A,1,Small,Right,u,12.753165024332702,25 +High,NA,NA,C,2,Large,Left,o,75.33078643027693,38 +Low,Red,No,A,1,Small,Right,i,89.50453591533005,21 +Medium,Green,NA,B,3,Medium,Left,o,37.44627758860588,79 +Low,Red,NA,A,1,Small,Left,z,66.51151946280152,41 +High,NA,No,C,2,Large,Right,p,9.484066092409194,47 +Low,Red,No,A,1,Small,Right,t,38.39696377981454,90 +High,NA,NA,C,2,Large,Left,f,27.43836445733905,60 +Medium,Green,NA,B,3,Medium,Left,k,81.46400388795882,95 +High,NA,No,C,2,Large,Right,h,44.851634139195085,16 +Medium,Green,NA,B,3,Medium,Left,v,81.00643530488014,94 +Medium,Green,NA,B,3,Medium,Left,v,81.23895095195621,6 +High,NA,NA,C,2,Large,Left,g,79.43423211108893,72 +Medium,Green,NA,B,3,Medium,Left,p,43.983168760314584,86 +Medium,Green,No,B,3,Medium,Right,q,75.44751586392522,86 +High,NA,NA,C,2,Large,Left,v,62.922113155946136,39 +High,NA,NA,C,2,Large,Left,r,71.01824013516307,31 +Low,Red,NA,A,1,Small,Left,q,0.062477332539856434,81 +Medium,Green,NA,B,3,Medium,Left,b,47.53165740985423,50 +Medium,Green,No,B,3,Medium,Right,d,22.011888516135514,34 +Low,Red,No,A,1,Small,Right,m,37.98165377229452,4 +Medium,Green,NA,B,3,Medium,Left,e,61.277100327424705,13 +Low,Red,No,A,1,Small,Right,v,35.179790924303234,69 +Low,Red,No,A,1,Small,Right,s,11.113542434759438,25 +Medium,Green,No,B,3,Medium,Right,y,24.361947271972895,52 +High,NA,No,C,2,Large,Right,t,66.80555874481797,22 +High,NA,NA,C,2,Large,Left,v,41.764677967876196,89 +Low,Red,No,A,1,Small,Right,y,78.81958340294659,32 +Medium,Green,No,B,3,Medium,Right,n,10.286464425735176,25 +Low,Red,No,A,1,Small,Right,y,43.489274149760604,87 +Medium,Green,NA,B,3,Medium,Left,w,98.49569799844176,35 +Low,Red,NA,A,1,Small,Left,c,89.30511143989861,40 +High,NA,No,C,2,Large,Right,h,88.64690607879311,30 +High,NA,NA,C,2,Large,Left,p,17.505265027284622,12 +Medium,Green,No,B,3,Medium,Right,l,13.069569156505167,31 +High,NA,No,C,2,Large,Right,y,65.31019250396639,30 +Low,Red,NA,A,1,Small,Left,n,34.3516472261399,64 +Medium,Green,No,B,3,Medium,Right,c,65.67581279668957,99 +Medium,Green,No,B,3,Medium,Right,n,32.03732424881309,14 +High,NA,NA,C,2,Large,Left,g,18.769111926667392,93 +Medium,Green,NA,B,3,Medium,Left,c,78.22943013161421,96 +Low,Red,No,A,1,Small,Right,w,9.359498671256006,71 +High,NA,NA,C,2,Large,Left,v,46.677904156968,67 +High,NA,NA,C,2,Large,Left,z,51.15054599009454,23 diff --git a/tests/testthat/data_files/STANDARDISE/std_2.rda b/tests/testthat/data_files/STANDARDISE/std_2.rda new file mode 100644 index 0000000000000000000000000000000000000000..cbe3d2791b8807083caa46ecb97bb452bf1a303f GIT binary patch literal 1706 zcmV;b237eViwFP!000001Ep9CY?MV1-uv%bY-zEPC{z&2L4dY`^hL{~axHyeIVjhI zQm|02_h<{(9$b%BAP~St#Sld51AzcWA}An4EdeA*34#)gLO`n!6(2zG2{D37sWaT} z@88}1FGp!I{bpxpznPuc%S~I7SC$rC7EK5-5|c?G#t2YJ#I&ge!?Fkg10e=tB2j=Q zm!~2X{Ca{82aK_Rt~>kxi&29{&05i{70zzZ=vv|V6v^7a@o4P|cgDc&@w^JRAIipa zXz$GHdiU4MkN4qgt4LyL{5th4((7(;d+R}q3${4e$q6-8)l@fHeT~d{lG|0Yu$H5U zaC#l}j6vn~IGh2WAGF)-B`k+uOwf}d6S@|<#_XE2onC^3PvO_vv%FoyWsvxF^4FmD ztgemc&~OPlcUFj}sa$k+^(RXk@?+Yhh;V% zWjwr1?<^lnFP~6x!i_28>*^hdq$YaYGC%n}I(3+JPMPcrr>A#zTy)1UUSG-8w}^`B zlcVn=Zi%PFG;+dF!pS*AbkQe78sVH0GZce{U=AmiXbHh{kIvyF7UGeFv(>ft)3rK> zLm1A-Aw;U-JuO1mET#SC)6N0f*+DjI2;U@}@O-kFN;d28Gn#BB;{9dwRw$cnzT#&k zJ}!TMz5I9|zP1KQERCOZ#^r0x&yq5J0~d>*jrHbB{;aIldUGQ0uQxa9+4Op(_ts?h z*UL}#9%Xa6PAxip@3E;i!s_r>xw%=EZrN)cUT^r5PM95+*?5%k@HV}(p+3#w?B7jH z8DCfLAghTUcc>So(WSGj$?En5Sc^;GZ<@f}4?aGo)H8(*#|9@;<}$_4lm@2EV+wnC zhJe7MIxk|%V&PQ1;G-%vLO#_3PnD2rjlk>@5?dhLLgiV=6t7SyHAJm&szy*$=SG29 zjO7vTUMUo+T1ZSyL+y@3znC5&s87gQwW#HtFEm%>nI|OHAShJ=&jP{6FKnrIcL_Ij z3&+Lx6A*mVtvbP?c1L_;YR+DL9;aZb61EzJd=|s69{5?bjBULYHG&X}y=91b0$`s1 za`Wear9F%H-2|Mzvi-Gezz4E!S<$bk>PQ@7Y4MWsqkt3F&CaO=EI7L}X)yZ#{K1jq zxPE(g`EP(z{-{5=1JL?<%Ya>ggI+%DS&QeFT^y4Kn6c`kXwS9q^EMK(N_d!YXoSry_1CE)WwH@=AI_uqRtaCxFQuZgVEj7Nah{Z)Wf5-UePcB>nnBU&EXe!3J zuq__*DQ@WY=_#yVYf>BZb=;ozZq)?ubu4zq2EdWV?g?0*-1axm?ggCu##51zfH|)k zo!FPbqs?QkVE@Z^G-KbVytk$R-$UNT?`~k7?C%F+uj6}Jx3jS~#y`AuRSfoT%YmkN zyr=z{>t@76&y`ldloc~#u)j$IH^2BC^4Y&mc?;tWQ;r|P^*l?H4d0jPMD#Mi)MGz3 z{ep4Uyz?){Gw+YfKrAf%Xzf_QCz|tnSqc8Iv7|ppF-RKy?BE{{Ng$~p$39qxZoW6(somJsTvFa+OjU zLhECr^}v`W8Y7=_!)qW9(VP;fZ92$6kf9*(X(ln02XzarRS%j!-l>q*y%+zV(mF@2 zoBgLWni}te(WAoY^QLg?RBoNdt<$-62Di?n>nO%Qf@Oh=H0@%fb}@=eI?C^B9L^HQ zN-4|XS4T(3CT^BuV|HYq(p%wT-+x9}Q8n7-2snm&{J=$mgVyc;04=|izFrsr0J@22 A82|tP literal 0 HcmV?d00001 diff --git a/tests/testthat/data_files/STANDARDISE/std_2_d.csv b/tests/testthat/data_files/STANDARDISE/std_2_d.csv new file mode 100644 index 00000000..0a5a46eb --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_2_d.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col7,fac_col10,col13,col16,col19 +High,NA,NA,C,2,Large,Left,o,28.7577520124614,31 +High,NA,NA,C,2,Large,Left,s,78.8305135443807,79 +High,NA,NA,C,2,Large,Left,n,40.89769218117,51 +Medium,Green,No,B,3,Medium,Right,c,88.3017404004931,14 +High,NA,NA,C,2,Large,Left,j,94.0467284293845,67 +Medium,Green,No,B,3,Medium,Right,r,4.55564993899316,42 +Medium,Green,No,B,3,Medium,Right,v,52.8105488047004,50 +Medium,Green,No,B,3,Medium,Right,k,89.2419044394046,43 +High,NA,NA,C,2,Large,Left,e,55.1435014465824,14 +Low,Red,NA,A,1,Small,Left,t,45.6614735303447,25 +Medium,Green,No,B,3,Medium,Right,n,95.6833345349878,90 +Medium,Green,No,B,3,Medium,Right,v,45.3334156190977,91 +Low,Red,No,A,1,Small,Right,y,67.7570635452867,69 +Medium,Green,NA,B,3,Medium,Left,z,57.2633401956409,91 +High,NA,No,C,2,Large,Right,e,10.2924682665616,57 +Low,Red,NA,A,1,Small,Left,s,89.9824970401824,92 +High,NA,No,C,2,Large,Right,y,24.608773435466,9 +High,NA,NA,C,2,Large,Left,y,4.20595335308462,93 +Low,Red,NA,A,1,Small,Left,i,32.7920719282702,99 +Low,Red,NA,A,1,Small,Left,c,95.4503649147227,72 +Low,Red,NA,A,1,Small,Left,h,88.9539316063747,26 +Low,Red,No,A,1,Small,Right,z,69.28034061566,7 +High,NA,NA,C,2,Large,Left,g,64.0506813768297,42 +Medium,Green,NA,B,3,Medium,Left,j,99.4269776623696,9 +High,NA,NA,C,2,Large,Left,i,65.5705799115822,83 +Medium,Green,NA,B,3,Medium,Left,s,70.8530468167737,36 +Low,Red,No,A,1,Small,Right,d,54.4066024711356,78 +Medium,Green,No,B,3,Medium,Right,n,59.414202044718,81 +High,NA,NA,C,2,Large,Left,q,28.915973729454,43 +Medium,Green,No,B,3,Medium,Right,k,14.7113647311926,76 +Low,Red,NA,A,1,Small,Left,g,96.3024232536554,15 +High,NA,No,C,2,Large,Right,u,90.2299045119435,32 +High,NA,NA,C,2,Large,Left,l,69.0705278422683,7 +Low,Red,No,A,1,Small,Right,o,79.5467417687178,9 +High,NA,No,C,2,Large,Right,j,2.46136845089495,41 +Medium,Green,NA,B,3,Medium,Left,m,47.7795971091837,74 +Low,Red,NA,A,1,Small,Left,g,75.8459537522867,23 +High,NA,NA,C,2,Large,Left,i,21.6407935833558,27 +Low,Red,NA,A,1,Small,Left,i,31.8181007634848,60 +Low,Red,No,A,1,Small,Right,j,23.1625785352662,53 +Medium,Green,NA,B,3,Medium,Left,w,14.2800022382289,7 +High,NA,No,C,2,Large,Right,u,41.4546335814521,53 +High,NA,No,C,2,Large,Right,g,41.3724326295778,27 +Low,Red,NA,A,1,Small,Left,u,36.8845450924709,96 +High,NA,NA,C,2,Large,Left,f,15.2444747742265,38 +Low,Red,NA,A,1,Small,Left,y,13.880606344901,89 +High,NA,NA,C,2,Large,Left,b,23.3034099452198,34 +Medium,Green,No,B,3,Medium,Right,e,46.5962450252846,93 +Low,Red,NA,A,1,Small,Left,h,26.5972640365362,69 +Medium,Green,NA,B,3,Medium,Left,l,85.7827715342864,72 +Low,Red,No,A,1,Small,Right,m,4.58311666734517,76 +Low,Red,NA,A,1,Small,Left,r,44.2200074205175,63 +High,NA,NA,C,2,Large,Left,a,79.8924845643342,13 +Low,Red,NA,A,1,Small,Left,y,12.189925997518,82 +Medium,Green,NA,B,3,Medium,Left,y,56.0947983758524,97 +Low,Red,No,A,1,Small,Right,f,20.653138961643,91 +Low,Red,No,A,1,Small,Right,u,12.7531650243327,25 +High,NA,NA,C,2,Large,Left,o,75.3307864302769,38 +Low,Red,No,A,1,Small,Right,i,89.50453591533,21 +Medium,Green,NA,B,3,Medium,Left,o,37.4462775886059,79 +Low,Red,NA,A,1,Small,Left,z,66.5115194628015,41 +High,NA,No,C,2,Large,Right,p,9.48406609240919,47 +Low,Red,No,A,1,Small,Right,t,38.3969637798145,90 +High,NA,NA,C,2,Large,Left,f,27.438364457339,60 +Medium,Green,NA,B,3,Medium,Left,k,81.4640038879588,95 +High,NA,No,C,2,Large,Right,h,44.8516341391951,16 +Medium,Green,NA,B,3,Medium,Left,v,81.0064353048801,94 +Medium,Green,NA,B,3,Medium,Left,v,81.2389509519562,6 +High,NA,NA,C,2,Large,Left,g,79.4342321110889,72 +Medium,Green,NA,B,3,Medium,Left,p,43.9831687603146,86 +Medium,Green,No,B,3,Medium,Right,q,75.4475158639252,86 +High,NA,NA,C,2,Large,Left,v,62.9221131559461,39 +High,NA,NA,C,2,Large,Left,r,71.0182401351631,31 +Low,Red,NA,A,1,Small,Left,q,0.0624773325398564,81 +Medium,Green,NA,B,3,Medium,Left,b,47.5316574098542,50 +Medium,Green,No,B,3,Medium,Right,d,22.0118885161355,34 +Low,Red,No,A,1,Small,Right,m,37.9816537722945,4 +Medium,Green,NA,B,3,Medium,Left,e,61.2771003274247,13 +Low,Red,No,A,1,Small,Right,v,35.1797909243032,69 +Low,Red,No,A,1,Small,Right,s,11.1135424347594,25 +Medium,Green,No,B,3,Medium,Right,y,24.3619472719729,52 +High,NA,No,C,2,Large,Right,t,66.805558744818,22 +High,NA,NA,C,2,Large,Left,v,41.7646779678762,89 +Low,Red,No,A,1,Small,Right,y,78.8195834029466,32 +Medium,Green,No,B,3,Medium,Right,n,10.2864644257352,25 +Low,Red,No,A,1,Small,Right,y,43.4892741497606,87 +Medium,Green,NA,B,3,Medium,Left,w,98.4956979984418,35 +Low,Red,NA,A,1,Small,Left,c,89.3051114398986,40 +High,NA,No,C,2,Large,Right,h,88.6469060787931,30 +High,NA,NA,C,2,Large,Left,p,17.5052650272846,12 +Medium,Green,No,B,3,Medium,Right,l,13.0695691565052,31 +High,NA,No,C,2,Large,Right,y,65.3101925039664,30 +Low,Red,NA,A,1,Small,Left,n,34.3516472261399,64 +Medium,Green,No,B,3,Medium,Right,c,65.6758127966896,99 +Medium,Green,No,B,3,Medium,Right,n,32.0373242488131,14 +High,NA,NA,C,2,Large,Left,g,18.7691119266674,93 +Medium,Green,NA,B,3,Medium,Left,c,78.2294301316142,96 +Low,Red,No,A,1,Small,Right,w,9.35949867125601,71 +High,NA,NA,C,2,Large,Left,v,46.677904156968,67 +High,NA,NA,C,2,Large,Left,z,51.1505459900945,23 diff --git a/tests/testthat/data_files/STANDARDISE/std_2_d.rda b/tests/testthat/data_files/STANDARDISE/std_2_d.rda new file mode 100644 index 0000000000000000000000000000000000000000..cad2198a28fd31f5f70c1df6d297389e536bef15 GIT binary patch literal 2461 zcmV;O31apiiwFP!000001I1eDb{xeO?ioqm;#dw2aUdIkBqSjewJ**BCKy9vAT~=( zHZhh)vVttZNLUAcm|KT^^a`A97`ebc; z_h7s=o)qtv?~V4yf%ffV`trR;f_Jr4`|AY11s`3*as2L+#;J}k}*igzX7Vex(*t>ls0T(0}<@6)f`Q~Gw; zAuHQAlTv4P#t+9k`&gCHe|jeR`v;Tl-L18U<2DIaj*7Tn4spMHEHmWX8{1p=!+Pl! zQYDCeYrL`j*@NkU8HHBXcSigBflJadC11>|;eVR=#~(j))LL-%=xfmL+|fLhV(AQn z$fZ(rN0}}6sX0?V-pmO9KK@38=PK1BJv%#7J{|mq<#T?vq-VSa9s6eXXTSN`URuc` z<5jJTvz5G_eKWEOIf++k^Yr;d={AhneRDD%@5X$)Gu~(x%&@N{=})c2 zr=<0rEoJrTGf&QFR)ld=Q+~rJ#*Ap^YbDM7b%B1sxFpVGj*G)`s&P}imT83DYd+vYdx&b&Ku0 z36^KOV%T@1EB6nZIv(?AeX;M$;V%38^y|v=GUxj8d~nL!+sducWNTd9*}|)LAB=W( zns0Il`O39=>~+vZD}THAl(fFH^LpnZ?;k~G7r&F*(B+dp-t+1#Tc7Gew|r~7c@R_V z&i2;*gXVGED^H=uiH7mLxY&=2c&Qz%$Hl+nViFe* zHBxj&{m$I1gzi;3CrWbfCslA0pN}iW9#)I3iqEIjo=Np8*}hR(Iun@*W^dQJHRi2zXEH%9RLRS8{hy2i~tJU0DcR2 zX8#19Zl+VgPl1ns4}qJ&%YpxO;0?e6zXN{UOnbpS;9tP&!0l$*GZb(OcoO&ta1~eu zehvHz_!;mW;1ckCfCE1OJ^=hbz7XIJ@GamP@Lk{{@F(COz|R4H8956)5Bv~#A9x9P z7kCf&3-BWFXW$ROo6WS3N#J|HFM!_z&o#&Yj?+8^yav1gybYWKegmune+TXYPc+kh z_zLhW@G9^Oa0Pe^I0FbkXKnm))Aq|JD5085V}#&JIIFY_Z!U8pH51xOMXi%gMY<%M zDyM`{f?BD0OIHfVof8&dI-^^`nAX||shEj$M}z`js}18yQ`XX@l~il$xTKD`NN1FA ztEHBbDDa0**IMaTE@Ol-p(K|=8b}KCRmKT}hFm#k6tihNCq_YSMR7pzq$U&a zkW<<*g?CBpD(KcyO|_8Hwc0bd$5Ki;7nXDDGP;2}S{nGm@m5PkxRlZ$1d?-|yvz*o z(!4r`!_A3qiGeQ1JmAq}6{kc)efSe!SOKHp7Z}Ci9GBD)iZJ7h^O*^6wEHzu0eUF^!q9d86CQ4gxmArETbEg4|r>1 zO;)!81?RYtOhO3N8Qr09HTF`$+6|P@ga8$_3C|gm z(KTXfd>=s#wRE2&=t;pDIP6-!Zt7Q~025N172N4ssc-2zi^-*PJ-cp$SvyX*zjXOF|XZNN&XmssX9Y-j{a$|kBv$0QI0v&AO9gXNFp$3VM>_Bg42z?tv!_d9pbIN?3wgqi` zs!@Zlgh1QWE~CS_3WjO{Pw!Md$9EFBV=YR+CjM3tV|`jFFt=?oSTr8WP(e5&UF%QK(J>spa_AVdbU4j_C_2!AX_t zkoxc@dIiVq(N?rW)RFZVNtkctCZi+s;9q#b+1C3`Lrd<%g-?@eKP%1>=?4JRij}g> zafj$~kPW_*0xjuLF16bxb}l- z9H#xy=!a!LRG$qz{5wCK`r+3Pn70E@KPda*_?fWR@AE^lA9nrV><42%aI5fM1=>f= zbcpqX`m;gqjUd`ut??kVM7P-}W3G9sRc_ bhc9f54n|~if+JHOG++K7j6ZI{q$U6WOC;Vg literal 0 HcmV?d00001 diff --git a/tests/testthat/data_files/STANDARDISE/std_3.csv b/tests/testthat/data_files/STANDARDISE/std_3.csv new file mode 100644 index 00000000..583c9a22 --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_3.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,col11,col14,col17,col20 +High,Blue,Yes,C,Three,31,TRUE,o,28.757752012461424 +High,Blue,Yes,C,Three,79,TRUE,s,78.83051354438066 +High,Blue,Yes,C,Three,51,TRUE,n,40.89769218116999 +Medium,NA,NA,B,Two,14,FALSE,c,88.301740400493145 +High,Blue,Yes,C,Three,67,TRUE,j,94.04672842938453 +Medium,NA,NA,B,Two,42,FALSE,r,4.555649938993156 +Medium,NA,NA,B,Two,50,FALSE,v,52.810548804700375 +Medium,NA,NA,B,Two,43,FALSE,k,89.2419044394046 +High,Blue,Yes,C,Three,14,TRUE,e,55.14350144658238 +Low,NA,Yes,A,One,25,TRUE,t,45.661473530344665 +Medium,NA,NA,B,Two,90,FALSE,n,95.68333453498781 +Medium,NA,NA,B,Two,91,FALSE,v,45.33341561909765 +Low,NA,NA,A,One,69,FALSE,y,67.75706354528666 +Medium,NA,Yes,B,Two,91,TRUE,z,57.26334019564092 +High,Blue,NA,C,Three,57,FALSE,e,10.292468266561627 +Low,NA,Yes,A,One,92,TRUE,s,89.98249704018235 +High,Blue,NA,C,Three,9,FALSE,y,24.60877343546599 +High,Blue,Yes,C,Three,93,TRUE,y,4.205953353084624 +Low,NA,Yes,A,One,99,TRUE,i,32.79207192827016 +Low,NA,Yes,A,One,72,TRUE,c,95.45036491472274 +Low,NA,Yes,A,One,26,TRUE,h,88.95393160637468 +Low,NA,NA,A,One,7,FALSE,z,69.28034061565995 +High,Blue,Yes,C,Three,42,TRUE,g,64.05068137682974 +Medium,NA,Yes,B,Two,9,TRUE,j,99.42697766236961 +High,Blue,Yes,C,Three,83,TRUE,i,65.57057991158217 +Medium,NA,Yes,B,Two,36,TRUE,s,70.85304681677371 +Low,NA,NA,A,One,78,FALSE,d,54.40660247113556 +Medium,NA,NA,B,Two,81,FALSE,n,59.41420204471797 +High,Blue,Yes,C,Three,43,TRUE,q,28.91597372945398 +Medium,NA,NA,B,Two,76,FALSE,k,14.711364731192589 +Low,NA,Yes,A,One,15,TRUE,g,96.30242325365543 +High,Blue,NA,C,Three,32,FALSE,u,90.22990451194346 +High,Blue,Yes,C,Three,7,TRUE,l,69.07052784226835 +Low,NA,NA,A,One,9,FALSE,o,79.54674176871777 +High,Blue,NA,C,Three,41,FALSE,j,2.461368450894952 +Medium,NA,Yes,B,Two,74,TRUE,m,47.77959710918367 +Low,NA,Yes,A,One,23,TRUE,g,75.84595375228673 +High,Blue,Yes,C,Three,27,TRUE,i,21.640793583355844 +Low,NA,Yes,A,One,60,TRUE,i,31.818100763484836 +Low,NA,NA,A,One,53,FALSE,j,23.16257853526622 +Medium,NA,Yes,B,Two,7,TRUE,w,14.280002238228917 +High,Blue,NA,C,Three,53,FALSE,u,41.45463358145207 +High,Blue,NA,C,Three,27,FALSE,g,41.372432629577816 +Low,NA,Yes,A,One,96,TRUE,u,36.884545092470944 +High,Blue,Yes,C,Three,38,TRUE,f,15.244474774226546 +Low,NA,Yes,A,One,89,TRUE,y,13.880606344901025 +High,Blue,Yes,C,Three,34,TRUE,b,23.303409945219755 +Medium,NA,NA,B,Two,93,FALSE,e,46.59624502528459 +Low,NA,Yes,A,One,69,TRUE,h,26.597264036536217 +Medium,NA,Yes,B,Two,72,TRUE,l,85.78277153428644 +Low,NA,NA,A,One,76,FALSE,m,4.583116667345166 +Low,NA,Yes,A,One,63,TRUE,r,44.220007420517504 +High,Blue,Yes,C,Three,13,TRUE,a,79.89248456433415 +Low,NA,Yes,A,One,82,TRUE,y,12.189925997518003 +Medium,NA,Yes,B,Two,97,TRUE,y,56.094798375852406 +Low,NA,NA,A,One,91,FALSE,f,20.65313896164298 +Low,NA,NA,A,One,25,FALSE,u,12.753165024332702 +High,Blue,Yes,C,Three,38,TRUE,o,75.33078643027693 +Low,NA,NA,A,One,21,FALSE,i,89.50453591533005 +Medium,NA,Yes,B,Two,79,TRUE,o,37.44627758860588 +Low,NA,Yes,A,One,41,TRUE,z,66.51151946280152 +High,Blue,NA,C,Three,47,FALSE,p,9.484066092409194 +Low,NA,NA,A,One,90,FALSE,t,38.39696377981454 +High,Blue,Yes,C,Three,60,TRUE,f,27.43836445733905 +Medium,NA,Yes,B,Two,95,TRUE,k,81.46400388795882 +High,Blue,NA,C,Three,16,FALSE,h,44.851634139195085 +Medium,NA,Yes,B,Two,94,TRUE,v,81.00643530488014 +Medium,NA,Yes,B,Two,6,TRUE,v,81.23895095195621 +High,Blue,Yes,C,Three,72,TRUE,g,79.43423211108893 +Medium,NA,Yes,B,Two,86,TRUE,p,43.983168760314584 +Medium,NA,NA,B,Two,86,FALSE,q,75.44751586392522 +High,Blue,Yes,C,Three,39,TRUE,v,62.922113155946136 +High,Blue,Yes,C,Three,31,TRUE,r,71.01824013516307 +Low,NA,Yes,A,One,81,TRUE,q,0.062477332539856434 +Medium,NA,Yes,B,Two,50,TRUE,b,47.53165740985423 +Medium,NA,NA,B,Two,34,FALSE,d,22.011888516135514 +Low,NA,NA,A,One,4,FALSE,m,37.98165377229452 +Medium,NA,Yes,B,Two,13,TRUE,e,61.277100327424705 +Low,NA,NA,A,One,69,FALSE,v,35.179790924303234 +Low,NA,NA,A,One,25,FALSE,s,11.113542434759438 +Medium,NA,NA,B,Two,52,FALSE,y,24.361947271972895 +High,Blue,NA,C,Three,22,FALSE,t,66.80555874481797 +High,Blue,Yes,C,Three,89,TRUE,v,41.764677967876196 +Low,NA,NA,A,One,32,FALSE,y,78.81958340294659 +Medium,NA,NA,B,Two,25,FALSE,n,10.286464425735176 +Low,NA,NA,A,One,87,FALSE,y,43.489274149760604 +Medium,NA,Yes,B,Two,35,TRUE,w,98.49569799844176 +Low,NA,Yes,A,One,40,TRUE,c,89.30511143989861 +High,Blue,NA,C,Three,30,FALSE,h,88.64690607879311 +High,Blue,Yes,C,Three,12,TRUE,p,17.505265027284622 +Medium,NA,NA,B,Two,31,FALSE,l,13.069569156505167 +High,Blue,NA,C,Three,30,FALSE,y,65.31019250396639 +Low,NA,Yes,A,One,64,TRUE,n,34.3516472261399 +Medium,NA,NA,B,Two,99,FALSE,c,65.67581279668957 +Medium,NA,NA,B,Two,14,FALSE,n,32.03732424881309 +High,Blue,Yes,C,Three,93,TRUE,g,18.769111926667392 +Medium,NA,Yes,B,Two,96,TRUE,c,78.22943013161421 +Low,NA,NA,A,One,71,FALSE,w,9.359498671256006 +High,Blue,Yes,C,Three,67,TRUE,v,46.677904156968 +High,Blue,Yes,C,Three,23,TRUE,z,51.15054599009454 diff --git a/tests/testthat/data_files/STANDARDISE/std_3.rda b/tests/testthat/data_files/STANDARDISE/std_3.rda new file mode 100644 index 0000000000000000000000000000000000000000..8036267480506dd3af4291f4e142386a083abad1 GIT binary patch literal 1597 zcmV-D2EzFtiwFP!000001D#k4P*hbIKHR%XAQ+QVN}A%zpg@|yBBYWR70wNfQ z%RT^ISX~x2b#&C2%Elp+9;A-iFL< zf^~yH23-yZcJ$2u2U*9mPM37L#Eb{&WIb4Wl0Xk)?C8fOW={|^$J&*c`9Nc=4Smk6 zY?QxIeC!&wwvxb?ZryD?>+g0pI^FdkMfA3~x0BOqs;a3h=Xw0<;u%gyO+zhXqMzMu zt5+>(c3n2R&*KH@mgpp`Bt3sUtb*0qas6JSc_Vv9enorOb^jMvps_%R+10*OMP!E^Guq2%X54KfKYjHnvUj&R=)sN!$|10S zVOQl+XZ;;DRewRV7ZQ6xvlpP~g4178qKi_m8(m&T7pCYTU>^_BgTju99uvtxHp<^9 zKGCBfdh85paa&W>YdR=h6*5#|>K!$iZKyuzzBELiydJDQ(F<*KaM%Wk3e4ISz4At5 zqC>|vgY_#qXrlPoH31qbs)ex0a(VIvJkF%pKUZM(1xn1?#8cnF-;;H8BRB@r!aF!ihV2imdG&>Ed;N}XY zxv1rK)o_Bg*w0z!eYv>YE4e~dbAf4LXx*_H7t_T#^>8t3BU*VEan052EaU=fkfa$w3yw-cI@0p6*uMQ;#o?1?@os@x#MQY zc(_mt+ee$>)>bmTms&mpE*_M>_XhBs)m^Wq0pFK;GY{A0RvnH&F3w+8b_96(`g!S< zz}aWo;$m?9PwyW-hWcB(%6(atg-~~~v)~k_6(VqNRIlPWa$+^M}!%=rM**4<*-d5`_ zjK?xy?IGY9?WXlF15duPJbf7OSa(YIO58VS?xhH{+q}190q)Cu>X`|c|94%dFfVz} zg{um1&i5aGz81LP&*-7>aw^H&286*&txP%P2~P!1iQW7Gyjse`&zJeoUgm*>x{JuW z+rEKUPsv!EdJJA6W!&=klV~^o$k%80;l3F=vr*5=fbvwpURQ^sH);YUYD)|F-rN*-rIY0Nt zZ|MKriH2psSzVn=iqOybZIKvHeq-<_C$WAVah=fDDZ9IZs|n_HG`w^J@C1246xJuB z>y0yefM>q`WPkWFP|{zK?bw%?iRMX{vHxW|TCnf4-fhmt^N@Mr+v`}T!uNdP*YLco zZ}Sg9|A)4&3B&$v+20h2c@{o>&5RuDy4(Sru&Oi+`x`fE^9#?Sy~3{&-b8<6rDF$C zU#K)$@qC$%hpqrlJo-b^&*-Q5?SIgpd0#{_a!&Dw>)?w+mAn^-Jf})V_ v?(R*@_pJH}+}~H}u5hS#stg(xLmf7sZJf&swn*>C?yY|Tz&gqhuoM6QKWHU0 literal 0 HcmV?d00001 diff --git a/tests/testthat/data_files/STANDARDISE/std_3_d.csv b/tests/testthat/data_files/STANDARDISE/std_3_d.csv new file mode 100644 index 00000000..5dfce08f --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_3_d.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,col11,col14,col17,col20 +High,Blue,Yes,C,Three,31,TRUE,o,28.7577520124614 +High,Blue,Yes,C,Three,79,TRUE,s,78.8305135443807 +High,Blue,Yes,C,Three,51,TRUE,n,40.89769218117 +Medium,NA,NA,B,Two,14,FALSE,c,88.3017404004931 +High,Blue,Yes,C,Three,67,TRUE,j,94.0467284293845 +Medium,NA,NA,B,Two,42,FALSE,r,4.55564993899316 +Medium,NA,NA,B,Two,50,FALSE,v,52.8105488047004 +Medium,NA,NA,B,Two,43,FALSE,k,89.2419044394046 +High,Blue,Yes,C,Three,14,TRUE,e,55.1435014465824 +Low,NA,Yes,A,One,25,TRUE,t,45.6614735303447 +Medium,NA,NA,B,Two,90,FALSE,n,95.6833345349878 +Medium,NA,NA,B,Two,91,FALSE,v,45.3334156190977 +Low,NA,NA,A,One,69,FALSE,y,67.7570635452867 +Medium,NA,Yes,B,Two,91,TRUE,z,57.2633401956409 +High,Blue,NA,C,Three,57,FALSE,e,10.2924682665616 +Low,NA,Yes,A,One,92,TRUE,s,89.9824970401824 +High,Blue,NA,C,Three,9,FALSE,y,24.608773435466 +High,Blue,Yes,C,Three,93,TRUE,y,4.20595335308462 +Low,NA,Yes,A,One,99,TRUE,i,32.7920719282702 +Low,NA,Yes,A,One,72,TRUE,c,95.4503649147227 +Low,NA,Yes,A,One,26,TRUE,h,88.9539316063747 +Low,NA,NA,A,One,7,FALSE,z,69.28034061566 +High,Blue,Yes,C,Three,42,TRUE,g,64.0506813768297 +Medium,NA,Yes,B,Two,9,TRUE,j,99.4269776623696 +High,Blue,Yes,C,Three,83,TRUE,i,65.5705799115822 +Medium,NA,Yes,B,Two,36,TRUE,s,70.8530468167737 +Low,NA,NA,A,One,78,FALSE,d,54.4066024711356 +Medium,NA,NA,B,Two,81,FALSE,n,59.414202044718 +High,Blue,Yes,C,Three,43,TRUE,q,28.915973729454 +Medium,NA,NA,B,Two,76,FALSE,k,14.7113647311926 +Low,NA,Yes,A,One,15,TRUE,g,96.3024232536554 +High,Blue,NA,C,Three,32,FALSE,u,90.2299045119435 +High,Blue,Yes,C,Three,7,TRUE,l,69.0705278422683 +Low,NA,NA,A,One,9,FALSE,o,79.5467417687178 +High,Blue,NA,C,Three,41,FALSE,j,2.46136845089495 +Medium,NA,Yes,B,Two,74,TRUE,m,47.7795971091837 +Low,NA,Yes,A,One,23,TRUE,g,75.8459537522867 +High,Blue,Yes,C,Three,27,TRUE,i,21.6407935833558 +Low,NA,Yes,A,One,60,TRUE,i,31.8181007634848 +Low,NA,NA,A,One,53,FALSE,j,23.1625785352662 +Medium,NA,Yes,B,Two,7,TRUE,w,14.2800022382289 +High,Blue,NA,C,Three,53,FALSE,u,41.4546335814521 +High,Blue,NA,C,Three,27,FALSE,g,41.3724326295778 +Low,NA,Yes,A,One,96,TRUE,u,36.8845450924709 +High,Blue,Yes,C,Three,38,TRUE,f,15.2444747742265 +Low,NA,Yes,A,One,89,TRUE,y,13.880606344901 +High,Blue,Yes,C,Three,34,TRUE,b,23.3034099452198 +Medium,NA,NA,B,Two,93,FALSE,e,46.5962450252846 +Low,NA,Yes,A,One,69,TRUE,h,26.5972640365362 +Medium,NA,Yes,B,Two,72,TRUE,l,85.7827715342864 +Low,NA,NA,A,One,76,FALSE,m,4.58311666734517 +Low,NA,Yes,A,One,63,TRUE,r,44.2200074205175 +High,Blue,Yes,C,Three,13,TRUE,a,79.8924845643342 +Low,NA,Yes,A,One,82,TRUE,y,12.189925997518 +Medium,NA,Yes,B,Two,97,TRUE,y,56.0947983758524 +Low,NA,NA,A,One,91,FALSE,f,20.653138961643 +Low,NA,NA,A,One,25,FALSE,u,12.7531650243327 +High,Blue,Yes,C,Three,38,TRUE,o,75.3307864302769 +Low,NA,NA,A,One,21,FALSE,i,89.50453591533 +Medium,NA,Yes,B,Two,79,TRUE,o,37.4462775886059 +Low,NA,Yes,A,One,41,TRUE,z,66.5115194628015 +High,Blue,NA,C,Three,47,FALSE,p,9.48406609240919 +Low,NA,NA,A,One,90,FALSE,t,38.3969637798145 +High,Blue,Yes,C,Three,60,TRUE,f,27.438364457339 +Medium,NA,Yes,B,Two,95,TRUE,k,81.4640038879588 +High,Blue,NA,C,Three,16,FALSE,h,44.8516341391951 +Medium,NA,Yes,B,Two,94,TRUE,v,81.0064353048801 +Medium,NA,Yes,B,Two,6,TRUE,v,81.2389509519562 +High,Blue,Yes,C,Three,72,TRUE,g,79.4342321110889 +Medium,NA,Yes,B,Two,86,TRUE,p,43.9831687603146 +Medium,NA,NA,B,Two,86,FALSE,q,75.4475158639252 +High,Blue,Yes,C,Three,39,TRUE,v,62.9221131559461 +High,Blue,Yes,C,Three,31,TRUE,r,71.0182401351631 +Low,NA,Yes,A,One,81,TRUE,q,0.0624773325398564 +Medium,NA,Yes,B,Two,50,TRUE,b,47.5316574098542 +Medium,NA,NA,B,Two,34,FALSE,d,22.0118885161355 +Low,NA,NA,A,One,4,FALSE,m,37.9816537722945 +Medium,NA,Yes,B,Two,13,TRUE,e,61.2771003274247 +Low,NA,NA,A,One,69,FALSE,v,35.1797909243032 +Low,NA,NA,A,One,25,FALSE,s,11.1135424347594 +Medium,NA,NA,B,Two,52,FALSE,y,24.3619472719729 +High,Blue,NA,C,Three,22,FALSE,t,66.805558744818 +High,Blue,Yes,C,Three,89,TRUE,v,41.7646779678762 +Low,NA,NA,A,One,32,FALSE,y,78.8195834029466 +Medium,NA,NA,B,Two,25,FALSE,n,10.2864644257352 +Low,NA,NA,A,One,87,FALSE,y,43.4892741497606 +Medium,NA,Yes,B,Two,35,TRUE,w,98.4956979984418 +Low,NA,Yes,A,One,40,TRUE,c,89.3051114398986 +High,Blue,NA,C,Three,30,FALSE,h,88.6469060787931 +High,Blue,Yes,C,Three,12,TRUE,p,17.5052650272846 +Medium,NA,NA,B,Two,31,FALSE,l,13.0695691565052 +High,Blue,NA,C,Three,30,FALSE,y,65.3101925039664 +Low,NA,Yes,A,One,64,TRUE,n,34.3516472261399 +Medium,NA,NA,B,Two,99,FALSE,c,65.6758127966896 +Medium,NA,NA,B,Two,14,FALSE,n,32.0373242488131 +High,Blue,Yes,C,Three,93,TRUE,g,18.7691119266674 +Medium,NA,Yes,B,Two,96,TRUE,c,78.2294301316142 +Low,NA,NA,A,One,71,FALSE,w,9.35949867125601 +High,Blue,Yes,C,Three,67,TRUE,v,46.677904156968 +High,Blue,Yes,C,Three,23,TRUE,z,51.1505459900945 diff --git a/tests/testthat/data_files/STANDARDISE/std_3_d.rda b/tests/testthat/data_files/STANDARDISE/std_3_d.rda new file mode 100644 index 0000000000000000000000000000000000000000..4165f09836d3a23ed62cd6ad8e3179416d1a16d5 GIT binary patch literal 2343 zcmV+?3E1`@iwFP!000001I1bCb{t0)F3m{sLa;;(goG_DAz?{T`(lWGckuw-lRUw{3jz!(6=R@f@LfakgS;r6E9+{V$%*aBMl51!?bCy!>6k2ZJr52riR zS@mx9=43ui*cVUm=J{EJ^>|&L%w`8iSFYeO`DF9fWb5#tX1R89WPm3 z{ixWFDtx6_msIl`E!eMAd`f&&eX~-%tND(q_sg)FM`?4t92`^JgPlyh(F?%Yo6@ta7MB=)W8 z_U_#~%?GCxTHD&2%;$+q)-oqwn^&WMvGyN*{={)-!MWo{px^o9Wh%9$GfE=Iwde(9 zwmPKdVEH7IPo6E+`x-C59eO{+Z%FQPkeqWLwZ$1yjp?oglQ)5Hl*0P55StjSKG8gf?UmHrQv1K+am;XRnHU3~a zXOAPDKab!?n$6`!rXJko@NV}j6<%Ki%3)p)`FcMz5jISRZu*Z@0 z4@IH+XUx;5e#YR3@$%m=b;5re^bSaaE`g{Fe*{vXA1CY?(4Rqn0bNVa2>L5K`Hjy0 z2HFC>0eTj6F7cpue*k?S^fu_-#PdziI_SA{md^bU^jk6q^mWkBK{ma&3Ht}=2cQo? z-%WC_CAq}&yr^z0Yv6-CxcXD*%^~XdnB>%jd{6R8TlZGWK}{sj6S=ta=CKu;zQd65Z2EQb`_m~l7g_)B3aogtV!sHhpNsg^ z*wDANtRa1t$vLacMf~p92DSuSX2UZ1pQq-x+qrhoF6QlGzg=v#i+{F@S-ZH`E;nL^uF5d zE%M=`9_PNSTRq!v_8fJ)r&v}aM`tqZ%v(L72R)rLlHL1JFSwb{hdssa^%grlJ|Fka z%zCe~?b|&|r@hbn`E%GilV9!kkQ|+Uj%A(qhWTvuke%MEqnIY}dRyn>rE9=L}tCp_TMY?Tt+L4EihE-*R= zQF3R5j~X%1)@Ey!;zG(0f|Ure&0>}2(!syM!o#i$E^sY0;t`EiGIo3n*G@*`m6y@0 znAI`diGZ{+h=9x5E5|L4K_v}sGy8SrxzPdMF$k^BPE#<}dT*7G2(avo4IJT!UPGd> zIb*ftK?W&=M^J*vN@^IZy;ewt5Tup`?xPaWTm|JtHdezZ_ytBOI45RxwB^!AA1O28 zjqbcdDnJiQFTCi^!zvUA3<$xYW(qt8WSaJn00S}}xQ;G5;CP?$OGfYz5p@OQy81Da z!+xWcQxQ%I1rA*t!c9DsEO3wV0dMDnFX~pK;G*(I8VC`tz#|IR;jGZI%Rqw+SIBVW zt3Wy10;g=!1}>A)c5*G(8YpTLp7XxI9b)RKkD!J+oN@#`37Kz;%wGpi$_wHYU1u%| zh04|=IRmP_z;PN$BGE*G&g?DP0xHX({Dml-ZxxaUIEorE6gjD^;V0b)k)?K$-C&Un zNNyn!VjEO<-f^%+05!sH3*2*spcnz)ApdivSoAvyS1JJ?bnVDVj>JJOqZJ`Vh*dsNw+n5bx&$0-;T?zQW}zO5 zkL*Bimk50qLod<2;Bz6VPP>Bklxo6bN@%oA7YiKDwJ_9actRC8>K3yMEvD4c2Tp~l z!5{6h%PHUi;S10cuM0haYZL>lMH>?xAH-H0r4cVFLN^S6qe4+}h?SRJ=>kV3qIDrd zkSbYjfV~0``2zVV3mju6qVHJBs`5PMByuMNlt9S*gXKO@T3Ij;T`~kT9wAXd_{PM} zpTN;EBBmTV#%UZ*qalw7`z|>HqG(X%@InMLSK#CxiGqm%6Tgi*a4QT(4Z6A3#p#IP zMh|slQCU$?RKOja5S(1O5vdPvqE{%~J-UjHh&r+!R}x9B+!r`95B`N0V(7dd9kf&w zE{rDIjg=UxMPrCCiN|H@?^?Q7L#~c5uX7CG(%=j*pN3Jz(p_?aASU- Ne*r@x227A5001-BpXmSq literal 0 HcmV?d00001 diff --git a/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R b/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R index 9e129971..545467bc 100644 --- a/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R +++ b/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R @@ -69,4 +69,12 @@ upload_testing_dataset_table('datashield', 'testing', 'DATASET1', 'TESTING/DATAS upload_testing_dataset_table('datashield', 'testing', 'DATASET2', 'TESTING/DATASET2.rda') upload_testing_dataset_table('datashield', 'testing', 'DATASET3', 'TESTING/DATASET3.rda') +upload_testing_dataset_table('datashield', 'standardise', 'std_1', 'STANDARDISE/std_1.rda') +upload_testing_dataset_table('datashield', 'standardise', 'std_2', 'STANDARDISE/std_2.rda') +upload_testing_dataset_table('datashield', 'standardise', 'std_3', 'STANDARDISE/std_3.rda') + +upload_testing_dataset_table('datashield', 'standardise', 'std_1_d', 'STANDARDISE/std_1_d.rda') +upload_testing_dataset_table('datashield', 'standardise', 'std_2_d', 'STANDARDISE/std_2_d.rda') +upload_testing_dataset_table('datashield', 'standardise', 'std_3_d', 'STANDARDISE/std_3_d.rda') + print(MolgenisArmadillo::armadillo.list_tables('datashield')) From 256499fca2470661a11e1421c75cf9e4c85b54aa Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 21 Apr 2026 12:53:12 +0200 Subject: [PATCH 3/9] fix: replace snapshot tests with regex, update NAMESPACE --- NAMESPACE | 19 +++++ man/ds.standardiseDf.Rd | 37 ++++++++++ tests/testthat/_snaps/smk-standardiseDf.md | 86 ---------------------- tests/testthat/test-smk-standardiseDf.R | 26 ++++--- 4 files changed, 71 insertions(+), 97 deletions(-) create mode 100644 man/ds.standardiseDf.Rd delete mode 100644 tests/testthat/_snaps/smk-standardiseDf.md diff --git a/NAMESPACE b/NAMESPACE index 28959252..228e959c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -105,6 +105,7 @@ export(ds.setDefaultOpals) export(ds.setSeed) export(ds.skewness) export(ds.sqrt) +export(ds.standardiseDf) export(ds.subset) export(ds.subsetByClass) export(ds.summary) @@ -121,6 +122,24 @@ export(ds.vectorCalc) export(subsetHelper) import(DSI) import(data.table) +import(dplyr) +importFrom(DSI,datashield.aggregate) +importFrom(DSI,datashield.assign) +importFrom(assertthat,assert_that) +importFrom(cli,cli_abort) +importFrom(cli,cli_alert_danger) +importFrom(cli,cli_alert_info) +importFrom(cli,cli_alert_success) +importFrom(cli,cli_alert_warning) +importFrom(cli,cli_end) +importFrom(cli,cli_li) +importFrom(cli,cli_ol) +importFrom(cli,cli_text) +importFrom(cli,cli_ul) +importFrom(purrr,map) +importFrom(purrr,map_lgl) +importFrom(purrr,pmap) +importFrom(purrr,pmap_lgl) importFrom(stats,as.formula) importFrom(stats,na.omit) importFrom(stats,ts) diff --git a/man/ds.standardiseDf.Rd b/man/ds.standardiseDf.Rd new file mode 100644 index 00000000..4f544f1d --- /dev/null +++ b/man/ds.standardiseDf.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ds.standardiseDf.R +\name{ds.standardiseDf} +\alias{ds.standardiseDf} +\title{Fill DataFrame with Missing Columns and Adjust Classes} +\usage{ +ds.standardiseDf( + df.name = NULL, + newobj = NULL, + fix_class = "ask", + fix_levels = "ask", + datasources = NULL +) +} +\arguments{ +\item{df.name}{Name of the input DataFrame to fill.} + +\item{newobj}{Name of the new DataFrame object created after filling.} + +\item{fix_class}{Character, determines behaviour if class of variables is not the same in all +studies. Option "ask" (default) provides the user with a prompt asking if they want to set the +class across all studies, option "no" will throw an error if class conflicts are present.} + +\item{fix_levels}{Character, determines behaviour if levels of factor variables is not the same +in all studies. Option "ask" (default) provides the user with a prompt asking if they want to set +the levels of factor variables to be the same across all studies, whilst option "no" will throw +an error if factor variables do not have the same class.} + +\item{datasources}{Data sources from which to aggregate data. Default is `NULL`.} +} +\value{ +The filled DataFrame with added columns and adjusted classes or factor levels. +} +\description{ +This function fills a given DataFrame by adding missing columns, ensuring consistent column classes, and adjusting factor levels where necessary. +It performs checks to detect class and factor level conflicts and prompts the user for decisions to resolve these conflicts. +} diff --git a/tests/testthat/_snaps/smk-standardiseDf.md b/tests/testthat/_snaps/smk-standardiseDf.md deleted file mode 100644 index 4fa52eca..00000000 --- a/tests/testthat/_snaps/smk-standardiseDf.md +++ /dev/null @@ -1,86 +0,0 @@ -# ask_question displays the correct prompt - - Code - ask_question_class("my_var") - Message - i Would you like to: - 1. Convert `my_var` to a factor in all studies - 2. Convert `my_var` to an integer in all studies - 3. Convert `my_var` to numeric in all studies - 4. Convert `my_var` to a character in all studies - 5. Convert `my_var` to a logical vector in all studies - 6. Cancel `ds.dataFrameFill` operation - -# print_all_classes prints the correct message - - Code - print_all_classes(c("server_1", "server_2", "server_3"), c("numeric", "factor", - "integer")) - Message - * server_1: numeric - * server_2: factor - * server_3: integer - -# .make_levels_message makes correct message - - Code - .make_levels_message(level_conflicts) - Message - ! Warning: factor variables fac_col2, fac_col3, fac_col6, and fac_col9 do not have the same levels in all studies - i Would you like to: - 1. Create the missing levels where they are not present - 2. Do nothing - 3. Cancel `ds.dataFrameFill` operation - -# .print_var_recode_message prints the correct message - - Code - .print_var_recode_message(added_cols, "test_df") - Message - v The following variables have been added to test_df: - i sim1 --> col11 - i sim2 --> col11 - i sim3 --> col12 - - -# .print_class_recode_message prints the correct message - - Code - .print_class_recode_message(class_decisions, different_classes, "test_df") - Message - v The following classes have been set for all datasources in test_df: - i fac_col4 --> factor - i fac_col5 --> logical - -# .print_levels_recode_message prints the correct message - - Code - .print_levels_recode_message(unique_levs, "test_df") - Message - v The following levels have been set for all datasources in test_df: - i fac_col2 --> Blue, Green, Red - i fac_col3 --> No, Yes - i fac_col6 --> Bird, Cat, Dog - i fac_col9 --> False, True - -# .print_out_messages prints the correct messages - - Code - .print_out_messages(added_cols, class_decisions, different_classes, unique_levs, - level_conflicts, "1", "test_df") - Message - v The following variables have been added to test_df: - i sim1 --> col11 - i sim2 --> col11 - i sim3 --> col12 - - v The following classes have been set for all datasources in test_df: - i fac_col4 --> factor - i fac_col5 --> logical - - v The following levels have been set for all datasources in test_df: - i fac_col2 --> Blue, Green, Red - i fac_col3 --> No, Yes - i fac_col6 --> Bird, Cat, Dog - i fac_col9 --> False, True - diff --git a/tests/testthat/test-smk-standardiseDf.R b/tests/testthat/test-smk-standardiseDf.R index d557c325..d8e1ab77 100644 --- a/tests/testthat/test-smk-standardiseDf.R +++ b/tests/testthat/test-smk-standardiseDf.R @@ -144,7 +144,7 @@ test_that(".identify_class_conflicts returns correct output", { }) test_that("ask_question displays the correct prompt", { - expect_snapshot(ask_question_class("my_var")) + expect_message(ask_question_class("my_var"), "Convert.*my_var.*factor") }) test_that("ask_question_wait_response_class continues with valid response", { @@ -167,11 +167,12 @@ test_that("ask_question_wait_response_class throws error if option 6 selected", }) test_that("print_all_classes prints the correct message", { - expect_snapshot( + expect_message( print_all_classes( c("server_1", "server_2", "server_3"), c("numeric", "factor", "integer") - ) + ), + "server_1.*numeric" ) }) @@ -359,7 +360,7 @@ test_that("ask_question_wait_response_levels aborts with response of 3", { }) test_that(".make_levels_message makes correct message", { - expect_snapshot(.make_levels_message(level_conflicts)) + expect_message(.make_levels_message(level_conflicts), "factor variables.*fac_col2.*do not have the same levels") }) test_that(".get_unique_levels extracts all possible levels", { @@ -417,18 +418,20 @@ test_that(".set_factor_levels sets levels correctly", { }) test_that(".print_var_recode_message prints the correct message", { - expect_snapshot(.print_var_recode_message(added_cols, "test_df")) + expect_message(.print_var_recode_message(added_cols, "test_df"), "variables have been added to test_df") }) test_that(".print_class_recode_message prints the correct message", { - expect_snapshot( - .print_class_recode_message(class_decisions, different_classes, "test_df") + expect_message( + .print_class_recode_message(class_decisions, different_classes, "test_df"), + "classes have been set.*test_df" ) }) test_that(".print_levels_recode_message prints the correct message", { - expect_snapshot( - .print_levels_recode_message(unique_levs, "test_df") + expect_message( + .print_levels_recode_message(unique_levs, "test_df"), + "levels have been set.*test_df" ) }) @@ -445,10 +448,11 @@ test_that(".make_levels_recode_message prints the correct message", { }) test_that(".print_out_messages prints the correct messages", { - expect_snapshot( + expect_message( .print_out_messages( added_cols, class_decisions, different_classes, unique_levs, level_conflicts, "1", "test_df" - ) + ), + "variables have been added to test_df" ) }) From ed24ee43b15e88e4b87bdfbfc06018d034521dda Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 21 Apr 2026 12:56:37 +0200 Subject: [PATCH 4/9] test: add perf test for ds.standardiseDf --- tests/testthat/test-perf-ds.standardiseDf.R | 58 +++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 tests/testthat/test-perf-ds.standardiseDf.R diff --git a/tests/testthat/test-perf-ds.standardiseDf.R b/tests/testthat/test-perf-ds.standardiseDf.R new file mode 100644 index 00000000..ba514dbc --- /dev/null +++ b/tests/testthat/test-perf-ds.standardiseDf.R @@ -0,0 +1,58 @@ +# +# Set up +# + +# context("ds.standardiseDf::perf::setup") +connect.studies.dataset.stand( + c( + "fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col7", "fac_col9", + "fac_col10", "col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", + "col20") + ) + +# +# Tests +# + +# context("ds.standardiseDf::perf::0") +test_that("performance", { + .durationSec <- 60 # seconds + .count <- 0 + .start.time <- Sys.time() + .current.time <- .start.time + + while (difftime(.current.time, .start.time, units = "secs")[[1]] < .durationSec) { + with_mocked_bindings( + ds.standardiseDf(df.name = "D", newobj = "std.newobj"), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1", + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "1" + ) + + .count <- .count + 1 + .current.time <- Sys.time() + } + + .current.rate <- .count / (difftime(.current.time, .start.time, units = "secs")[[1]]) + .reference.rate <- perf.reference.rate("ds.standardiseDf::perf::0") + if (any(length(.reference.rate) == 0) || any(is.null(.reference.rate))) { + print(paste("ds.standardiseDf::perf::0 ", .current.rate, 0.5, 2.0)) + perf.reference.save("ds.standardiseDf::perf::0", .current.rate, 0.5, 2.0) + } else { + print(paste("ds.standardiseDf::perf::0 ", format(.current.rate, digits = 8), ", ", format(100.0 * .current.rate / .reference.rate, digits = 4), "%", sep = '')) + } + + .reference.rate <- perf.reference.rate("ds.standardiseDf::perf::0") + .reference.tolerance.lower <- perf.reference.tolerance.lower("ds.standardiseDf::perf::0") + .reference.tolerance.upper <- perf.reference.tolerance.upper("ds.standardiseDf::perf::0") + + expect_gt(.current.rate, .reference.rate * .reference.tolerance.lower, label = "Observed rate", expected.label = "lower threshold on rate") + expect_lt(.current.rate, .reference.rate * .reference.tolerance.upper, label = "Observed rate", expected.label = "upper threshold on rate") +}) + +# +# Done +# + +# context("ds.standardiseDf::perf::shutdown") +disconnect.studies.dataset.stand() +# context("ds.standardiseDf::perf::done") From eb57c2f796d5cdf37d3e0df26afd02dae7d97319 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Thu, 14 May 2026 10:49:31 +0200 Subject: [PATCH 5/9] trigger CI From 6740d5274ec692f1c33533f73e002317c5f4e18a Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 23 Jun 2026 12:28:02 +0200 Subject: [PATCH 6/9] added myself as author --- R/ds.standardiseDf.R | 1 + man/ds.standardiseDf.Rd | 3 +++ 2 files changed, 4 insertions(+) diff --git a/R/ds.standardiseDf.R b/R/ds.standardiseDf.R index daaadbd6..d0fcdf3e 100644 --- a/R/ds.standardiseDf.R +++ b/R/ds.standardiseDf.R @@ -16,6 +16,7 @@ #' @importFrom assertthat assert_that #' @importFrom DSI datashield.aggregate datashield.assign #' @return The filled DataFrame with added columns and adjusted classes or factor levels. +#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands #' @export ds.standardiseDf <- function(df.name = NULL, newobj = NULL, fix_class = "ask", fix_levels = "ask", datasources = NULL) { diff --git a/man/ds.standardiseDf.Rd b/man/ds.standardiseDf.Rd index 4f544f1d..49d45a44 100644 --- a/man/ds.standardiseDf.Rd +++ b/man/ds.standardiseDf.Rd @@ -35,3 +35,6 @@ The filled DataFrame with added columns and adjusted classes or factor levels. This function fills a given DataFrame by adding missing columns, ensuring consistent column classes, and adjusting factor levels where necessary. It performs checks to detect class and factor level conflicts and prompts the user for decisions to resolve these conflicts. } +\author{ +Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands +} From aef399b48be5451d29bbe599290a2ce45107363d Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 23 Jun 2026 15:50:12 +0200 Subject: [PATCH 7/9] fixed import issues --- DESCRIPTION | 3 ++- NAMESPACE | 9 +++++++-- R/computeWeightedMeans.R | 2 +- R/ds.bp_standards.R | 1 - R/ds.standardiseDf.R | 3 +-- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0b271d26..d67b19c4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -70,6 +70,7 @@ Imports: data.table, methods, dplyr, + purrr, assertthat, cli Suggests: @@ -85,4 +86,4 @@ Suggests: DSLite Encoding: UTF-8 Language: en-GB -Config/roxygen2/version: 8.0.0 +Config/roxygen2/version: 8.0.0.9000 diff --git a/NAMESPACE b/NAMESPACE index 228e959c..d0baa073 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -121,8 +121,6 @@ export(ds.var) export(ds.vectorCalc) export(subsetHelper) import(DSI) -import(data.table) -import(dplyr) importFrom(DSI,datashield.aggregate) importFrom(DSI,datashield.assign) importFrom(assertthat,assert_that) @@ -136,6 +134,13 @@ importFrom(cli,cli_li) importFrom(cli,cli_ol) importFrom(cli,cli_text) importFrom(cli,cli_ul) +importFrom(data.table,.SD) +importFrom(dplyr,"%>%") +importFrom(dplyr,all_of) +importFrom(dplyr,bind_rows) +importFrom(dplyr,case_when) +importFrom(dplyr,row_number) +importFrom(dplyr,where) importFrom(purrr,map) importFrom(purrr,map_lgl) importFrom(purrr,pmap) diff --git a/R/computeWeightedMeans.R b/R/computeWeightedMeans.R index 1284ffc0..1e74d634 100644 --- a/R/computeWeightedMeans.R +++ b/R/computeWeightedMeans.R @@ -11,7 +11,7 @@ #' @param by character vector of the columns to group by #' @return Returns a data table object with computed weighted means. #' -#' @import data.table +#' @importFrom data.table .SD #' @importFrom stats as.formula na.omit ts weighted.mean #' @keywords internal computeWeightedMeans <- function(data_table, variables, weight, by) { diff --git a/R/ds.bp_standards.R b/R/ds.bp_standards.R index ae76c54c..61fef6af 100644 --- a/R/ds.bp_standards.R +++ b/R/ds.bp_standards.R @@ -26,7 +26,6 @@ #' blood pressure in children and adolescents: #' https://www.nhlbi.nih.gov/sites/default/files/media/docs/hbp_ped.pdf #' @author Demetris Avraam for DataSHIELD Development Team -#' @import data.table #' @export ds.bp_standards <- function(sex=NULL, age=NULL, height=NULL, bp=NULL, systolic=TRUE, newobj=NULL, datasources=NULL){ diff --git a/R/ds.standardiseDf.R b/R/ds.standardiseDf.R index d0fcdf3e..fdc4d198 100644 --- a/R/ds.standardiseDf.R +++ b/R/ds.standardiseDf.R @@ -129,7 +129,7 @@ ds.standardiseDf <- function(df.name = NULL, newobj = NULL, fix_class = "ask", f #' @param df.name Name of the input DataFrame. #' @param datasources Data sources from which to aggregate data. #' @return A DataFrame containing the variable classes from each data source. -#' @import dplyr +#' @importFrom dplyr %>% bind_rows row_number where all_of case_when #' @noRd .get_var_classes <- function(df.name, datasources) { cally <- call("getClassAllColsDS", df.name) @@ -144,7 +144,6 @@ ds.standardiseDf <- function(df.name = NULL, newobj = NULL, fix_class = "ask", f #' #' @param classes A DataFrame containing variable classes across data sources. #' @return A list of variables that have class conflicts. -#' @import dplyr #' @importFrom purrr map #' @noRd .identify_class_conflicts <- function(classes) { From ca12139c9f26be19701f3135819c10c40514425d Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 23 Jun 2026 17:46:56 +0200 Subject: [PATCH 8/9] revert: set roxygen to 8.0 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d67b19c4..417c2840 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -86,4 +86,4 @@ Suggests: DSLite Encoding: UTF-8 Language: en-GB -Config/roxygen2/version: 8.0.0.9000 +Config/roxygen2/version: 8.0.0 From 9e266045cb9acbf819f04c9e03af6a5eb3cdd896 Mon Sep 17 00:00:00 2001 From: Stuart Wheater Date: Wed, 24 Jun 2026 10:41:53 +0100 Subject: [PATCH 9/9] Increased ping timeout plus additional per entry --- tests/testthat/connection_to_datasets/login_details.R | 4 ++-- .../perf_files/armadillo_hp-laptop-quay_perf-profile.csv | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testthat/connection_to_datasets/login_details.R b/tests/testthat/connection_to_datasets/login_details.R index 2ce4ca2d..7e410470 100644 --- a/tests/testthat/connection_to_datasets/login_details.R +++ b/tests/testthat/connection_to_datasets/login_details.R @@ -23,7 +23,7 @@ if (! is.null(getOption("default_driver"))) { if ((ds.test_env$driver == "DSLiteDriver") || (ds.test_env$driver == "OpalDriver")) { ds.test_env$ping_address <- paste("https://", ds.test_env$server_ip_address, ":8443", sep="") - ds.test_env$ping_config <- config(timeout=5, ssl_verifyhost=0, ssl_verifypeer=0) + ds.test_env$ping_config <- config(timeout=60, ssl_verifyhost=0, ssl_verifypeer=0) ds.test_env$ip_address_1 <- paste("https://", ds.test_env$server_ip_address, ":8443", sep="") ds.test_env$ip_address_2 <- paste("https://", ds.test_env$server_ip_address, ":8443", sep="") @@ -44,7 +44,7 @@ if ((ds.test_env$driver == "DSLiteDriver") || (ds.test_env$driver == "OpalDriver ds.test_env$secure_login_details <- TRUE } else if (ds.test_env$driver == "ArmadilloDriver") { ds.test_env$ping_address <- paste("http://", ds.test_env$server_ip_address, ":8080", sep="") - ds.test_env$ping_config <- config(timeout=5) + ds.test_env$ping_config <- config(timeout=60) ds.test_env$ip_address_1 <- paste("http://", ds.test_env$server_ip_address, ":8080", sep="") ds.test_env$ip_address_2 <- paste("http://", ds.test_env$server_ip_address, ":8080", sep="") diff --git a/tests/testthat/perf_files/armadillo_hp-laptop-quay_perf-profile.csv b/tests/testthat/perf_files/armadillo_hp-laptop-quay_perf-profile.csv index ebfd605c..191cc308 100644 --- a/tests/testthat/perf_files/armadillo_hp-laptop-quay_perf-profile.csv +++ b/tests/testthat/perf_files/armadillo_hp-laptop-quay_perf-profile.csv @@ -11,4 +11,5 @@ "ds.length::perf::combine:0","7.957","0.5","2" "ds.mean::perf::combine:0","9.049","0.5","2" "ds.mean::perf::split:0","9.429","0.5","2" +"ds.standardiseDf:::perf::0","9.50","0.5","2" "void::perf::void::0","21400.0","0.5","2"