Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
3e28629
refactor: moved checks to serverside
timcadman Apr 13, 2026
a6e7957
refactor: add class consistency checks, remove ValidityMessage
timcadman Apr 13, 2026
0bb5699
test: update kurtosis and skewness expectations for new return structure
timcadman Apr 13, 2026
00f1002
chore: update dsBase tar and refactor guide
timcadman Apr 13, 2026
a041d44
refactor: add classConsistencyCheck parameter, remove ValidityMessage
timcadman Apr 13, 2026
85007d4
test: update mean and var expectations for new return structure
timcadman Apr 13, 2026
9f27cbb
chore: update dsBase tar
timcadman Apr 13, 2026
4d4ea1a
test: add performance tests for batch 3 functions
timcadman Apr 13, 2026
2e6019b
docs: updated authorship
timcadman Apr 13, 2026
ae07a65
docs: regenerate man pages
timcadman Apr 13, 2026
13cb8a4
docs: update TODOs for batch 1 outstanding issues
timcadman Apr 13, 2026
c24575f
docs: update refactor guide with classConsistencyCheck and ValidityMe…
timcadman Apr 13, 2026
15287d1
test: update batch-3 expectations and refactor guide
timcadman Apr 14, 2026
30976f1
chore: regenerate ds.mean fixtures and update dsBase tar
timcadman Apr 14, 2026
f0cae98
fix: unwrap new list returns in ds.summary
timcadman Apr 14, 2026
67992ca
docs: note checkClass follow-up refactor
timcadman Apr 14, 2026
141ba18
test: add performance test for ds.summary
timcadman Apr 16, 2026
3efab38
fix: remove third-party copyright from new test
timcadman Apr 16, 2026
59e1eff
docs: sync REFACTOR_GUIDE.md from batch-9
timcadman Apr 16, 2026
504d003
docs: updated authorship
timcadman Apr 16, 2026
dac736c
docs: sync REFACTOR_GUIDE.md from batch-10
timcadman Apr 16, 2026
c92d01a
fix: align perf test datasets with smoke tests
timcadman Apr 19, 2026
af8960d
docs: sync REFACTOR_GUIDE.md
timcadman Apr 19, 2026
aa4d7cc
Merge v7.0-dev into refactor/perf-batch-3
timcadman Jun 12, 2026
4823ad8
test: fixed performance tests
timcadman Jun 12, 2026
03e5fb9
updated call construction
timcadman Jun 15, 2026
64b56aa
chore: remove rogue document
timcadman Jun 16, 2026
1a8c237
document arguments, set consistency checks to true
timcadman Jun 16, 2026
3ca7267
ci: added latest dsBase version for armadillo
timcadman Jun 16, 2026
d104e68
docs: added templates with different defaults
timcadman Jun 22, 2026
899a364
use consistent pattern for consistency checks - set as default only i…
timcadman Jun 22, 2026
c5aaa03
docs: removed stale validity check refernces
timcadman Jun 22, 2026
4a1e2ba
chore: remove changed whitespace
timcadman Jun 22, 2026
32660da
test: added more detailed expectations to class checks, moved to -smk
timcadman Jun 22, 2026
0542d39
revert: removed Arjuna copywright from new perf tests
timcadman Jun 22, 2026
5aa8d15
removed extra whitespace
timcadman Jun 22, 2026
96e84b7
refactor: generalised class check to handle classes returned with dif…
timcadman Jun 22, 2026
647707a
also check class of outcome variable
timcadman Jun 22, 2026
08aa214
added missing docs for consistency check
timcadman Jun 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 10 additions & 39 deletions R/ds.cor.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@
#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login.
#' If the \code{datasources} argument is not specified
#' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.
#' @template classConsistencyCheckTrue
#' @return \code{ds.cor} returns a list containing the number of missing values in each variable,
#' the number of missing variables casewise, the correlation matrix,
#' the number of used complete cases. The function applies two disclosure controls. The first disclosure
#' control checks that the number of variables is not bigger than a percentage of the individual-level records (the allowed
#' percentage is pre-specified by the 'nfilter.glm'). The second disclosure control checks that none of them is dichotomous
#' with a level having fewer counts than the pre-specified 'nfilter.tab' threshold.
#' @author DataSHIELD Development Team
#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands
#' @examples
#' \dontrun{
#'
Expand Down Expand Up @@ -79,56 +81,25 @@
#' }
#' @export
#'
ds.cor <- function(x=NULL, y=NULL, type="split", datasources=NULL){
ds.cor <- function(x=NULL, y=NULL, type="split", datasources=NULL, classConsistencyCheck=TRUE){

# look for DS connections
if(is.null(datasources)){
datasources <- datashield.connections_find()
}

# ensure datasources is a list of DSConnection-class
if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){
stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE)
}
datasources <- .set_datasources(datasources)

if(is.null(x)){
stop("x=NULL. Please provide the name of a matrix or dataframe or the names of two numeric vectors!", call.=FALSE)
}else{
isDefined(datasources, x)
}

# check the type of the input objects
typ <- checkClass(datasources, x)

if(('numeric' %in% typ) | ('integer' %in% typ) | ('factor' %in% typ)){
if(is.null(y)){
stop("If x is a numeric vector, y must be a numeric vector!", call.=FALSE)
}else{
isDefined(datasources, y)
typ2 <- checkClass(datasources, y)
}
}

if(('matrix' %in% typ) | ('data.frame' %in% typ) & !(is.null(y))){
y <- NULL
warning("x is a matrix or a dataframe; y will be ignored and a correlation matrix computed for x!")
}

# name of the studies to be used in the output
stdnames <- names(datasources)

# call the server side function
if(('matrix' %in% typ) | ('data.frame' %in% typ)){
calltext <- call("corDS", x, NULL)
}else{
if(!(is.null(y))){
calltext <- call("corDS", x, y)
}else{
calltext <- call("corDS", x, NULL)
}
}
calltext <- call("corDS", x, y)
output <- DSI::datashield.aggregate(datasources, calltext)


if(classConsistencyCheck){
.checkClassConsistency(output)
}

if (type=="split"){
covariance <- list()
sqrt.diag <- list()
Expand Down
29 changes: 11 additions & 18 deletions R/ds.corTest.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@
#' @param datasources a list of \code{\link[DSI]{DSConnection-class}}
#' objects obtained after login. If the \code{datasources} argument is not specified
#' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.
#' @template classConsistencyCheckFalse
#' @return \code{ds.corTest} returns to the client-side the results of the correlation test.
#' @author DataSHIELD Development Team
#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands
#' @export
#' @examples
#' \dontrun{
Expand Down Expand Up @@ -63,17 +65,9 @@
#'
#' }
#'
ds.corTest <- function(x=NULL, y=NULL, method="pearson", exact=NULL, conf.level=0.95, type='split', datasources=NULL){
ds.corTest <- function(x=NULL, y=NULL, method="pearson", exact=NULL, conf.level=0.95, type='split', datasources=NULL, classConsistencyCheck=FALSE){

# look for DS connections
if(is.null(datasources)){
datasources <- datashield.connections_find()
}

# ensure datasources is a list of DSConnection-class
if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){
stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE)
}
datasources <- .set_datasources(datasources)

if(is.null(x)){
stop("x=NULL. Please provide the names of the 1st numeric vector!", call.=FALSE)
Expand All @@ -85,19 +79,18 @@ ds.corTest <- function(x=NULL, y=NULL, method="pearson", exact=NULL, conf.level=
if(!(method %in% c("pearson", "kendall", "spearman"))){
stop('Function argument "method" has to be either "pearson", "kendall" or "spearman"', call.=FALSE)
}

# check if the input objects are defined in all the studies
isDefined(datasources, x)
isDefined(datasources, y)

# call the internal function that checks the input objects are of the same class in all studies.
typ <- checkClass(datasources, x)
typ <- checkClass(datasources, y)

# call the server side function
cally <- call("corTestDS", x, y, method, exact, conf.level)
out <- DSI::datashield.aggregate(datasources, cally)

if(classConsistencyCheck){
.checkClassConsistency(out)
}

# strip class field from results before returning
out <- lapply(out, function(r) { r$class <- NULL; r })

if(type=="split"){
return(out)
}else{
Expand Down
49 changes: 10 additions & 39 deletions R/ds.cov.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#' \code{'pairwise.complete'}. Default \code{'pairwise.complete'}. For more information see details.
#' @param type a character string that represents the type of analysis to carry out.
#' This must be set to \code{'split'} or \code{'combine'}. Default \code{'split'}. For more information see details.
#' @template classConsistencyCheckTrue
#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login.
#' If the \code{datasources} argument is not specified
#' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.
Expand All @@ -47,6 +48,7 @@
#' the disclosure controls then all the output values are replaced with NAs. If all the variables are valid and pass
#' the controls, then the output matrices are returned and also an error message is returned but it is replaced by NA.
#' @author DataSHIELD Development Team
#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands
#' @examples
#' \dontrun{
#'
Expand Down Expand Up @@ -96,56 +98,25 @@
#' }
#' @export
#'
ds.cov <- function(x=NULL, y=NULL, naAction='pairwise.complete', type="split", datasources=NULL){
ds.cov <- function(x=NULL, y=NULL, naAction='pairwise.complete', type="split", datasources=NULL, classConsistencyCheck=TRUE){

# look for DS connections
if(is.null(datasources)){
datasources <- datashield.connections_find()
}

# ensure datasources is a list of DSConnection-class
if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){
stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE)
}
datasources <- .set_datasources(datasources)

if(is.null(x)){
stop("x=NULL. Please provide the name of a matrix or dataframe or the names of two numeric vectors!", call.=FALSE)
}else{
isDefined(datasources, x)
}

# check the type of the input objects
typ <- checkClass(datasources, x)

if(('numeric' %in% typ) | ('integer' %in% typ) | ('factor' %in% typ)){
if(is.null(y)){
stop("If x is a numeric vector, y must be a numeric vector!", call.=FALSE)
}else{
isDefined(datasources, y)
typ2 <- checkClass(datasources, y)
}
}

if(('matrix' %in% typ) | ('data.frame' %in% typ) & !(is.null(y))){
y <- NULL
warning("x is a matrix or a dataframe; y will be ignored and a covariance matrix computed for x!")
}

# name of the studies to be used in the output
stdnames <- names(datasources)

# call the server side function
if(('matrix' %in% typ) | ('data.frame' %in% typ)){
calltext <- call("covDS", x, NULL, naAction)
}else{
if(!(is.null(y))){
calltext <- call("covDS", x, y, naAction)
}else{
calltext <- call("covDS", x, NULL, naAction)
}
}
calltext <- call("covDS", x, y, naAction)
output <- DSI::datashield.aggregate(datasources, calltext)


if(classConsistencyCheck){
.checkClassConsistency(output)
}

if (type=="split"){
covariance <- list()
results <- list()
Expand Down
4 changes: 2 additions & 2 deletions R/ds.dim.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#' the dimension is returned separately for each study.
#' If \code{type} is set to \code{'both'} or \code{'b'}, both sets of outputs are produced.
#' Default \code{'both'}.
#' @template classConsistencyCheck
#' @template classConsistencyCheckTrue
#' @param datasources a list of \code{\link[DSI]{DSConnection-class}}
#' objects obtained after login. If the \code{datasources} argument is not specified
#' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.
Expand Down Expand Up @@ -78,7 +78,7 @@
#'
#' }
#'
ds.dim <- function(x=NULL, type='both', classConsistencyCheck=TRUE, datasources=NULL) {
ds.dim <- function(x=NULL, type='both', datasources=NULL, classConsistencyCheck=TRUE) {

datasources <- .set_datasources(datasources)

Expand Down
4 changes: 2 additions & 2 deletions R/ds.isNA.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#'
#' Server function called: \code{isNaDS}
#' @param x a character string specifying the name of the vector to check.
#' @template classConsistencyCheck
#' @template classConsistencyCheckTrue
#' @param datasources a list of \code{\link[DSI]{DSConnection-class}}
#' objects obtained after login. If the \code{datasources} argument is not specified
#' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.
Expand Down Expand Up @@ -57,7 +57,7 @@
#'
#' }
#'
ds.isNA <- function(x=NULL, classConsistencyCheck=TRUE, datasources=NULL){
ds.isNA <- function(x=NULL, datasources=NULL, classConsistencyCheck=TRUE){

datasources <- .set_datasources(datasources)

Expand Down
50 changes: 17 additions & 33 deletions R/ds.kurtosis.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,19 @@
#' if \code{type} is set to 'split', 'splits' or 's', the kurtosis is returned separately for each study.
#' if \code{type} is set to 'both' or 'b', both sets of outputs are produced.
#' The default value is set to 'both'.
#' @template classConsistencyCheckFalse
#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login.
#' If the \code{datasources} argument is not specified
#' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.
#' @return a matrix showing the kurtosis of the input numeric variable, the number of valid observations and
#' the validity message.
#' @return a matrix showing the kurtosis of the input numeric variable and
#' the number of valid observations.
#' @author Demetris Avraam, for DataSHIELD Development Team
#' @author Tim Cadman, Genomics Coordination Centre, UMCG, Netherlands
#' @export
#'
ds.kurtosis <- function(x=NULL, method=1, type='both', datasources=NULL){

# if no opal login details are provided look for 'opal' objects in the environment
if(is.null(datasources)){
datasources <- datashield.connections_find()
}
ds.kurtosis <- function(x=NULL, method=1, type='both', datasources=NULL, classConsistencyCheck=FALSE){

# ensure datasources is a list of DSConnection-class
if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){
stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE)
}
datasources <- .set_datasources(datasources)

if(is.null(x)){
stop("Please provide the name of the input vector!", call.=FALSE)
Expand All @@ -53,26 +47,17 @@ ds.kurtosis <- function(x=NULL, method=1, type='both', datasources=NULL){
stop('Function argument "type" has to be either "both", "combine" or "split"', call.=FALSE)
}

# check if the input object is defined in all the studies
isDefined(datasources, x)

# call the internal function that checks the input object is of the same class in all studies.
typ <- checkClass(datasources, x)

# the input object must be a numeric or an integer vector
if(typ != 'integer' & typ != 'numeric'){
message(paste0(x, " is of type ", typ, "!"))
stop("The input object must be an integer or numeric vector.", call.=FALSE)
}

if (type=='split' | type=='both'){
calltext.split <- call("kurtosisDS1", x, method)
output.split <- DSI::datashield.aggregate(datasources, calltext.split)
mat.split <- matrix(as.numeric(matrix(unlist(output.split), nrow=length(datasources), byrow=TRUE)[,1:2]),nrow=length(datasources))
validity <- matrix(unlist(output.split), nrow=length(datasources), byrow=TRUE)[,3]
mat.split <- data.frame(cbind(mat.split, validity))
if(classConsistencyCheck){
.checkClassConsistency(output.split)
}
mat.split <- data.frame(
Kurtosis = sapply(output.split, function(r) r$Kurtosis),
Nvalid = sapply(output.split, function(r) r$Nvalid)
)
rownames(mat.split) <- names(output.split)
colnames(mat.split) <- c('Kurtosis', 'Nvalid', 'ValidityMessage')
}

if (type=='combine' | type=='both'){
Expand All @@ -84,6 +69,9 @@ ds.kurtosis <- function(x=NULL, method=1, type='both', datasources=NULL){
}else{
calltext.combined <- call("kurtosisDS2", x, global.mean)
output.combined <- DSI::datashield.aggregate(datasources, calltext.combined)
if(classConsistencyCheck){
.checkClassConsistency(output.combined)
}

Global.sum.quartics <- 0
Global.sum.squares <- 0
Expand All @@ -98,19 +86,15 @@ ds.kurtosis <- function(x=NULL, method=1, type='both', datasources=NULL){

if(method==1){
Global.kurtosis <- g2.global
combinedMessage <- "VALID ANALYSIS"
}
if(method==2){
Global.kurtosis <- ((Global.Nvalid + 1) * g2.global + 6) * (Global.Nvalid - 1)/((Global.Nvalid - 2) * (Global.Nvalid - 3))
combinedMessage <- "VALID ANALYSIS"
}
if(method==3){
Global.kurtosis <- (g2.global + 3) * (1 - 1/Global.Nvalid)^2 - 3
combinedMessage <- "VALID ANALYSIS"
}
mat.combined <- data.frame(cbind(Global.kurtosis, Global.Nvalid, combinedMessage))
mat.combined <- data.frame(Kurtosis = Global.kurtosis, Nvalid = Global.Nvalid)
rownames(mat.combined) <- 'studiesCombined'
colnames(mat.combined) <- c('Kurtosis', 'Nvalid', 'ValidityMessage')

}
}
Expand Down
4 changes: 2 additions & 2 deletions R/ds.length.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#' if \code{type} is set to \code{'both'} or \code{'b'},
#' both sets of outputs are produced.
#' Default \code{'both'}.
#' @template classConsistencyCheck
#' @template classConsistencyCheckTrue
#' @param datasources a list of \code{\link[DSI]{DSConnection-class}}
#' objects obtained after login. If the \code{datasources} argument is not specified
#' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.
Expand Down Expand Up @@ -73,7 +73,7 @@
#' datashield.logout(connections)
#' }
#'
ds.length <- function(x=NULL, type='both', classConsistencyCheck=TRUE, datasources=NULL){
ds.length <- function(x=NULL, type='both', datasources=NULL, classConsistencyCheck=TRUE){

datasources <- .set_datasources(datasources)

Expand Down
Loading