From df35f4bbd7356da9b72bd2b36d9ac96c4a3372ac Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Thu, 9 Apr 2026 14:36:19 -0700
Subject: [PATCH 1/9] Refactor analysers to use precomputed contexts

---
 R/ModelArray_Constructor.R                    | 1092 ++++++-----------
 R/analyse-helpers.R                           |  320 ++++-
 R/analyse.R                                   |  471 +++----
 man/ModelArray.wrap.Rd                        |   16 +-
 man/analyseOneElement.gam.Rd                  |   27 +-
 man/analyseOneElement.lm.Rd                   |   42 +-
 man/analyseOneElement.wrap.Rd                 |   28 +-
 .../test-ModelArray_low_hanging_coverage.R    |    2 +-
 tests/testthat/test-analyse_context.R         |  933 ++++++++++++++
 9 files changed, 1873 insertions(+), 1058 deletions(-)
 create mode 100644 tests/testthat/test-analyse_context.R

diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R
index 22772d7..743916f 100644
--- a/R/ModelArray_Constructor.R
+++ b/R/ModelArray_Constructor.R
@@ -1,3 +1,4 @@
+# ModelArray class ----
 #' ModelArray class
 #'
 #' ModelArray is an S4 class that represents element-wise scalar data and
@@ -394,19 +395,29 @@ numElementsTotal <- function(modelarray, scalar_name = "FD") {
 }
 
 #' Fit a linear model for a single element
+#'
+#' @description
+#' Fits \code{\link[stats]{lm}} on one element's data. When a precomputed
+#' context (\code{ctx}) is provided, all loop-invariant work (formula parsing,
+#' collision checks, source alignment) is skipped. When \code{ctx} is
+#' \code{NULL}, the function falls back to computing everything inline
+#' (legacy behaviour for direct calls / debugging).
+#'
 #' If the number of subjects with finite scalar values (not \code{NaN},
 #' \code{NA}, or \code{Inf}) does not exceed \code{num.subj.lthr}, the
 #' element is skipped and all statistics are set to \code{NaN}.
 #'
 #' @param i_element Integer. The 1-based index of the element to analyse.
 #' @param formula A \code{\link[stats]{formula}} passed to
-#'   \code{\link[stats]{lm}}.
-#' @param modelarray A \linkS4class{ModelArray} object.
+#'   \code{\link[stats]{lm}}. Ignored when \code{ctx} is provided (the
+#'   formula is taken from the context).
+#' @param modelarray A \linkS4class{ModelArray} object. Ignored when
+#'   \code{ctx} is provided.
 #' @param phenotypes A data.frame of the cohort with columns of independent
 #'   variables and covariates. Must contain a \code{"source_file"} column
-#'   matching \code{sources(modelarray)[[scalar]]}.
+#'   matching \code{sources(modelarray)[[scalar]]}. Ignored when \code{ctx} is provided.
 #' @param scalar Character. The name of the element-wise scalar to analyse.
-#'   Must be one of \code{names(scalars(modelarray))}.
+#'   Must be one of \code{names(scalars(modelarray))}. Ignored when \code{ctx} is provided.
 #' @param var.terms Character vector. Statistics to extract per term from
 #'   \code{\link[broom]{tidy.lm}} (e.g. \code{"estimate"}, \code{"statistic"},
 #'   \code{"p.value"}).
@@ -418,7 +429,7 @@ numElementsTotal <- function(modelarray, scalar_name = "FD") {
 #'   below this threshold are skipped. This value is typically computed by
 #'   the parent function from \code{num.subj.lthr.abs} and
 #'   \code{num.subj.lthr.rel}.
-#' @param num.stat.output Integer or \code{NULL}. The total number of output
+#' @param num.stat.output  Integer or \code{NULL}. The total number of output
 #'   columns (including \code{element_id}). Used when
 #'   \code{flag_initiate = FALSE} to generate an all-\code{NaN} row for
 #'   skipped elements. Must be \code{NULL} when \code{flag_initiate = TRUE}.
@@ -431,6 +442,8 @@ numElementsTotal <- function(modelarray, scalar_name = "FD") {
 #'   halts execution; \code{"skip"} returns all-\code{NaN} for this element;
 #'   \code{"debug"} drops into \code{\link{browser}} (if interactive) then
 #'   skips. Default: \code{"stop"}.
+#' @param ctx A precomputed context list from \code{.build_lm_context()},
+#'   or \code{NULL} for legacy inline computation.
 #' @param ... Additional arguments passed to \code{\link[stats]{lm}}.
 #'
 #' @return If \code{flag_initiate = TRUE}, a list with components:
@@ -446,283 +459,144 @@ numElementsTotal <- function(modelarray, scalar_name = "FD") {
 #'   (except \code{element_id}) if the element had insufficient valid subjects
 #'   or if an error occurred with \code{on_error = "skip"}.
 #'
-#' @seealso \code{\link{ModelArray.lm}} which calls this function iteratively,
+#' @seealso \code{\link{ModelArray.lm}}, \code{.build_lm_context},
 #'   \code{\link{analyseOneElement.gam}} for the GAM equivalent,
-#'   \code{\link{analyseOneElement.wrap}} for user-supplied functions.
+#'   \code{\link{analyseOneElement.wrap}} for user-supplied functions
 #'
 #' @keywords internal
 #' @rdname analyseOneElement.lm
 #' @export
-
 analyseOneElement.lm <- function(i_element,
-                                 formula,
-                                 modelarray,
-                                 phenotypes,
-                                 scalar,
-                                 var.terms,
-                                 var.model,
+                                 formula = NULL,
+                                 modelarray = NULL,
+                                 phenotypes = NULL,
+                                 scalar = NULL,
+                                 var.terms = c("estimate", "statistic", "p.value"),
+                                 var.model = c("adj.r.squared", "p.value"),
                                  num.subj.lthr,
                                  num.stat.output = NULL,
                                  flag_initiate = FALSE,
                                  on_error = "stop",
+                                 ctx = NULL,
                                  ...) {
-  values <- scalars(modelarray)[[scalar]][i_element, ]
 
-  ## check number of subjects with (in)valid values:
-  flag_sufficient <- NULL # whether number of subjects with valid values are sufficient
-  num.subj.valid <- length(values[is.finite(values)])
-  if (num.subj.valid > num.subj.lthr) {
-    flag_sufficient <- TRUE
-  } else {
-    flag_sufficient <- FALSE
-  }
 
-  if (flag_sufficient == TRUE) {
-    dat <- phenotypes
-    # detect scalar predictors referenced in formula
-    all_vars <- all.vars(formula)
-    lhs_name <- tryCatch(as.character(formula[[2]]), error = function(e) NULL)
-    rhs_vars <- setdiff(all_vars, lhs_name)
-    scalar_names <- names(scalars(modelarray))
-    scalar_predictors <- intersect(rhs_vars, scalar_names)
-
-    # collision check with phenotypes
-    collisions <- intersect(c(scalar, scalar_predictors), colnames(dat))
-    if (length(collisions) > 0) {
-      stop(paste0(
-        "Column name collision between phenotypes and scalar names: ",
-        paste(collisions, collapse = ", "),
-        ". Please rename or remove these columns from phenotypes before modeling."
-      ))
-    }
+  # Resolve context ----
+  # Use precomputed if available, else build inline
 
-    # attach response first
-    dat[[scalar]] <- values
-
-    # attach scalar predictors with source alignment and build intersection mask
-    masks <- list(is.finite(values))
-    if (length(scalar_predictors) > 0) {
-      for (pred in scalar_predictors) {
-        pred_sources <- sources(modelarray)[[pred]]
-        phen_sources <- dat[["source_file"]]
-        if (!(all(pred_sources %in% phen_sources) && all(phen_sources %in% pred_sources))) {
-          stop(paste0(
-            "The source files for predictor scalar '", pred, "' do not match phenotypes$source_file."
-          ))
-        }
-        reorder_idx <- match(phen_sources, pred_sources)
-        pred_vals <- scalars(modelarray)[[pred]][i_element, ]
-        pred_vals <- pred_vals[reorder_idx]
-        dat[[pred]] <- pred_vals
-        masks[[length(masks) + 1L]] <- is.finite(pred_vals)
-      }
-    }
+  if (!is.null(ctx)) {
+    # Fast path: all invariant work already done
+    effective_formula   <- ctx$formula
+    effective_scalar    <- ctx$scalar
+  } else {
+    # Legacy path: compute everything inline (for direct calls / debugging)
+    effective_formula   <- formula
+    effective_scalar    <- scalar
+    ctx <- .build_lm_context(formula, modelarray, phenotypes, scalar)
+  }
 
-    # intersection-based threshold
-    valid_mask <- Reduce("&", masks)
-    num.subj.valid <- sum(valid_mask)
-    if (!(num.subj.valid > num.subj.lthr)) {
-      if (flag_initiate == TRUE) {
-        toreturn <- list(column_names = NaN, list.terms = NaN)
-        return(toreturn)
-      } else {
-        onerow <- c(
-          i_element - 1,
-          rep(NaN, (num.stat.output - 1))
-        )
-        return(onerow)
-      }
-    }
 
-    # dots <- list(...)
-    # dots_names <- names(dots)
-    # if ("weights" %in% dots_names) {
-    #   message(dots$weights)
-    #   myWeights <- dots$weights
-    #   dots$weights <- NULL  # remove weights from
-    #
-    #   arguments_lm <- dots
-    #
-    # }
-    arguments_lm <- list(...)
-    arguments_lm$formula <- formula
-    arguments_lm$data <- dat
-
-    # onemodel <- stats::lm(formula, data = dat, ...)
-    # onemodel <- stats::lm(formula, data = dat, weights = myWeights,...)
-    onemodel <- tryCatch(
-      {
-        do.call(stats::lm, arguments_lm)
-      },
-      error = function(e) {
-        msg <- paste0(
-          "analyseOneElement.lm error at element ",
-          i_element,
-          ": ",
-          conditionMessage(e)
-        )
-        if (on_error == "debug" && interactive()) {
-          message(msg)
-          browser()
-        }
-        if (on_error == "skip" || on_error == "debug") {
-          warning(msg)
-          if (flag_initiate == TRUE) {
-            return(structure(list(.lm_error_initiate = TRUE), class = "lm_error"))
-          } else {
-            return(structure(list(.lm_error_runtime = TRUE), class = "lm_error"))
-          }
-        }
-        stop(e)
-      }
-    )
+  # Per-element data assembly ----
+  elem <- .assemble_element_data(i_element, ctx, num.subj.lthr)
 
-    if (inherits(onemodel, "lm_error")) {
-      if (flag_initiate == TRUE) {
-        toreturn <- list(column_names = NaN, list.terms = NaN)
-        return(toreturn)
-      } else {
-        onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1)))
-        return(onerow)
-      }
+  if (!elem$sufficient) {
+    if (flag_initiate) {
+      return(list(column_names = NaN, list.terms = NaN))
+    } else {
+      onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1)))
+      return(onerow)
     }
-    # explicitly passing arguments into lm, to avoid error of argument "weights"
+  }
 
-    onemodel.tidy <- onemodel %>% broom::tidy()
-    onemodel.glance <- onemodel %>% broom::glance()
+  dat <- elem$dat
 
-    # delete columns you don't want:
-    var.terms.full <- names(onemodel.tidy)
 
-    var.model.full <- names(onemodel.glance)
+  # Fit model ----
+  arguments_lm <- list(...)
+  arguments_lm$formula <- effective_formula
+  arguments_lm$data <- dat
 
-    # list to remove:
-    var.terms.orig <- var.terms
-    var.terms <- list("term", var.terms) %>% unlist() # we will always keep "term" column
-    var.terms.remove <- list()
-    for (l in var.terms.full) {
-      if (!(l %in% var.terms)) {
-        var.terms.remove <- var.terms.remove %>%
-          append(., l) %>%
-          unlist() # the order will still be kept
+  onemodel <- tryCatch(
+    {
+      do.call(stats::lm, arguments_lm)
+    },
+    error = function(e) {
+      msg <- paste0(
+        "analyseOneElement.lm error at element ",
+        i_element, ": ", conditionMessage(e)
+      )
+      if (on_error == "debug" && interactive()) {
+        message(msg)
+        browser()
       }
-    }
-
-    var.model.remove <- list()
-    for (l in var.model.full) {
-      if (!(l %in% var.model)) {
-        var.model.remove <- var.model.remove %>%
-          append(., l) %>%
-          unlist() # the order will still be kept
+      if (on_error == "skip" || on_error == "debug") {
+        warning(msg)
+        if (flag_initiate) {
+          return(structure(list(.lm_error_initiate = TRUE), class = "lm_error"))
+        } else {
+          return(structure(list(.lm_error_runtime = TRUE), class = "lm_error"))
+        }
       }
+      stop(e)
     }
+  )
 
-    # remove those columns:
-    if (length(var.terms.remove) != 0) {
-      # if length=0, it's list(), nothing to remove
-      onemodel.tidy <- dplyr::select(onemodel.tidy, -dplyr::all_of(var.terms.remove))
-    }
-    if (length(var.model.remove) != 0) {
-      onemodel.glance <- dplyr::select(onemodel.glance, -dplyr::all_of(var.model.remove))
+  if (inherits(onemodel, "lm_error")) {
+    if (flag_initiate) {
+      return(list(column_names = NaN, list.terms = NaN))
+    } else {
+      onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1)))
+      return(onerow)
     }
+  }
 
-    # adjust:
-    # change the term name from "(Intercept)" to "Intercept"
-    onemodel.tidy$term[onemodel.tidy$term == "(Intercept)"] <- "Intercept"
-    onemodel.glance <- onemodel.glance %>% dplyr::mutate(term = "model") # add a column
-
-    # get the list of terms:
-    list.terms <- onemodel.tidy$term
-
-    # check if the onemodel.* does not have real statistics (but only a column of 'term')
-    temp_colnames <- onemodel.tidy %>% colnames()
-    # union of colnames and "term"; if colnames only has "term" or lengt of 0 (tibble()),
-    # union = "term", all(union)=TRUE; otherwise, if there is colnames other than "term",
-    # all(union) = c(TRUE, FALSE, ...)
-    temp <- union(temp_colnames, "term")
-    # just an empty tibble (so below, all(dim(onemodel.tidy)) = FALSE)
-    if (all(temp == "term")) {
-      onemodel.tidy <- tibble::tibble()
-    }
 
-    temp_colnames <- onemodel.glance %>% colnames()
-    temp <- union(temp_colnames, "term") # union of colnames and "term";
-    if (all(temp == "term")) {
-      onemodel.glance <- tibble::tibble()
-    }
+  # Extract statistics ----
+  onemodel.tidy <- onemodel %>% broom::tidy()
+  onemodel.glance <- onemodel %>% broom::glance()
 
+  # Normalize intercept term name to match expected output convention
+  onemodel.tidy$term[onemodel.tidy$term == "(Intercept)"] <- "Intercept"
 
-    # flatten .tidy results into one row:
-    if (all(dim(onemodel.tidy))) {
-      # not empty | if any dim is 0, all=FALSE
-      onemodel.tidy.onerow <- onemodel.tidy %>% tidyr::pivot_wider(
-        names_from = "term",
-        values_from = dplyr::all_of(var.terms.orig),
-        names_glue = "{term}.{.value}"
-      )
-    } else {
-      onemodel.tidy.onerow <- onemodel.tidy
-    }
+  list.terms <- onemodel.tidy$term
 
-    if (all(dim(onemodel.glance))) {
-      # not empty
-      onemodel.glance.onerow <- onemodel.glance %>% tidyr::pivot_wider(
-        names_from = "term",
-        values_from = dplyr::all_of(var.model),
-        names_glue = "{term}.{.value}"
+  ## Term-level statistics ----
+  onerow.terms <- tibble::tibble()
+  for (i_term in seq_along(list.terms)) {
+    for (i_var in seq_along(var.terms)) {
+      temp <- tibble::tibble(
+        placeholder = onemodel.tidy[[var.terms[i_var]]][i_term]
       )
-    } else {
-      onemodel.glance.onerow <- onemodel.glance
+      colnames(temp) <- paste0(list.terms[i_term], ".", var.terms[i_var])
+      onerow.terms <- bind_cols_check_emptyTibble(onerow.terms, temp)
     }
+  }
 
+  ## Model-level statistics ----
+  onerow.model <- tibble::tibble()
+  for (i_var in seq_along(var.model)) {
+    temp <- tibble::tibble(placeholder = onemodel.glance[[var.model[i_var]]])
+    colnames(temp) <- paste0("model.", var.model[i_var])
+    onerow.model <- bind_cols_check_emptyTibble(onerow.model, temp)
+  }
 
-    # combine the tables: check if any of them is empty (tibble())
-    onemodel.onerow <- bind_cols_check_emptyTibble(onemodel.tidy.onerow, onemodel.glance.onerow)
-
-    # add a column of element ids:
-    colnames.temp <- colnames(onemodel.onerow)
-    onemodel.onerow <- onemodel.onerow %>%
-      tibble::add_column(
-        element_id = i_element - 1,
-        .before = colnames.temp[1]
-      ) # add as the first column
-
-    # now you can get the headers, # of columns, etc of the output results
-
-
-    if (flag_initiate == TRUE) {
-      # return the column names:
-
-      # return:
-      column_names <- colnames(onemodel.onerow)
-      toreturn <- list(column_names = column_names, list.terms = list.terms)
-      toreturn
-    } else if (flag_initiate == FALSE) {
-      # return the one row results:
+  onerow.all <- bind_cols_check_emptyTibble(onerow.terms, onerow.model)
+  column_names <- c("element_id", colnames(onerow.all))
 
-      # return:
-      onerow <- as.numeric(onemodel.onerow) # change from tibble to numeric to save some space
-      onerow
-    }
+  if (flag_initiate) {
+    toreturn <- list(
+      column_names = column_names,
+      list.terms = list.terms
+    )
+    return(toreturn)
   } else {
-    # if flag_sufficient==FALSE
-    if (flag_initiate == TRUE) {
-      toreturn <- list(column_names = NaN, list.terms = NaN)
-      toreturn
-    } else if (flag_initiate == FALSE) {
-      onerow <- c(
-        i_element - 1,
-        # first is element_id, which will still be a finite number
-        rep(NaN, (num.stat.output - 1))
-      ) # other columns will be NaN
-
-      onerow
-    }
+    onerow <- c(i_element - 1, as.numeric(onerow.all))
+    return(onerow)
   }
 }
 
 #' Fit a GAM for a single element
-#' returns metadata (column names, smooth term names, parametric term names,
+#' Returns metadata (column names, smooth term names, parametric term names,
 #' and the smoothing parameter criterion attribute name) used by
 #' \code{\link{ModelArray.gam}} to initialise the output data.frame. When
 #' \code{flag_initiate = FALSE}, it returns a numeric vector representing one
@@ -752,6 +626,8 @@ analyseOneElement.lm <- function(i_element,
 #'   squares (\code{model.sse}) for the model, which is needed for
 #'   partial R-squared calculations in \code{\link{ModelArray.gam}}.
 #'   Default: \code{FALSE}.
+#' @param ctx A precomputed context list from \code{.build_gam_context()},
+#'   or \code{NULL}.
 #' @param ... Additional arguments passed to \code{\link[mgcv]{gam}}.
 #'
 #' @return If \code{flag_initiate = TRUE}, a list with components:
@@ -777,385 +653,225 @@ analyseOneElement.lm <- function(i_element,
 #' @export
 
 analyseOneElement.gam <- function(i_element,
-                                  formula,
-                                  modelarray,
-                                  phenotypes,
-                                  scalar,
-                                  var.smoothTerms,
-                                  var.parametricTerms,
-                                  var.model,
+                                  formula = NULL,
+                                  modelarray = NULL,
+                                  phenotypes = NULL,
+                                  scalar = NULL,
+                                  var.smoothTerms = c("statistic", "p.value"),
+                                  var.parametricTerms = c("estimate", "statistic", "p.value"),
+                                  var.model = c("dev.expl"),
                                   num.subj.lthr,
                                   num.stat.output = NULL,
                                   flag_initiate = FALSE,
                                   flag_sse = FALSE,
                                   on_error = "stop",
+                                  ctx = NULL,
                                   ...) {
-  values <- scalars(modelarray)[[scalar]][i_element, ]
 
-  ## check number of subjects with (in)valid values:
-  flag_sufficient <- NULL # whether number of subjects with valid values are sufficient
-  num.subj.valid <- length(values[is.finite(values)])
-  if (num.subj.valid > num.subj.lthr) {
-    flag_sufficient <- TRUE
+
+  # Resolve context ----
+  if (!is.null(ctx)) {
+    effective_formula <- ctx$formula
+    effective_scalar  <- ctx$scalar
   } else {
-    flag_sufficient <- FALSE
+    effective_formula <- formula
+    effective_scalar  <- scalar
+    ctx <- .build_gam_context(formula, modelarray, phenotypes, scalar)
   }
 
-  if (flag_sufficient == TRUE) {
-    dat <- phenotypes
-    # detect scalar predictors referenced in formula
-    all_vars <- all.vars(formula)
-    lhs_name <- tryCatch(as.character(formula[[2]]), error = function(e) NULL)
-    rhs_vars <- setdiff(all_vars, lhs_name)
-    scalar_names <- names(scalars(modelarray))
-    scalar_predictors <- intersect(rhs_vars, scalar_names)
-
-    # collision check with phenotypes
-    collisions <- intersect(c(scalar, scalar_predictors), colnames(dat))
-    if (length(collisions) > 0) {
-      stop(paste0(
-        "Column name collision between phenotypes and scalar names: ",
-        paste(collisions, collapse = ", "),
-        ". Please rename or remove these columns from phenotypes before modeling."
+  # Per-element data assembly ----
+  elem <- .assemble_element_data(i_element, ctx, num.subj.lthr)
+
+  if (!elem$sufficient) {
+    if (flag_initiate) {
+      return(list(
+        column_names = NaN,
+        list.smoothTerms = NaN,
+        list.parametricTerms = NaN,
+        sp.criterion.attr.name = NaN
       ))
+    } else {
+      onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1)))
+      return(onerow)
     }
+  }
 
-    # attach response first
-    dat[[scalar]] <- values
-
-    # attach scalar predictors with source alignment and build intersection mask
-    masks <- list(is.finite(values))
-    if (length(scalar_predictors) > 0) {
-      for (pred in scalar_predictors) {
-        pred_sources <- sources(modelarray)[[pred]]
-        phen_sources <- dat[["source_file"]]
-        if (!(all(pred_sources %in% phen_sources) && all(phen_sources %in% pred_sources))) {
-          stop(paste0(
-            "The source files for predictor scalar '", pred, "' do not match phenotypes$source_file."
-          ))
+  dat <- elem$dat
+
+  # Fit GAM ----
+  arguments <- list(...)
+  arguments$formula <- effective_formula
+  arguments$data <- dat
+
+  onemodel <- tryCatch(
+    {
+      do.call(mgcv::gam, arguments)
+    },
+    error = function(e) {
+      msg <- paste0(
+        "analyseOneElement.gam error at element ",
+        i_element, ": ", conditionMessage(e)
+      )
+      if (on_error == "debug" && interactive()) {
+        message(msg)
+        browser()
+      }
+      if (on_error == "skip" || on_error == "debug") {
+        warning(msg)
+        if (flag_initiate) {
+          return(structure(list(.gam_error_initiate = TRUE), class = "gam_error"))
+        } else {
+          return(structure(list(.gam_error_runtime = TRUE), class = "gam_error"))
         }
-        reorder_idx <- match(phen_sources, pred_sources)
-        pred_vals <- scalars(modelarray)[[pred]][i_element, ]
-        pred_vals <- pred_vals[reorder_idx]
-        dat[[pred]] <- pred_vals
-        masks[[length(masks) + 1L]] <- is.finite(pred_vals)
       }
+      stop(e)
     }
+  )
 
-    # intersection-based threshold
-    valid_mask <- Reduce("&", masks)
-    num.subj.valid <- sum(valid_mask)
-    if (!(num.subj.valid > num.subj.lthr)) {
-      if (flag_initiate == TRUE) {
-        toreturn <- list(
-          column_names = NaN,
-          list.smoothTerms = NaN,
-          list.parametricTerms = NaN,
-          sp.criterion.attr.name = NaN
-        )
-        return(toreturn)
-      } else {
-        onerow <- c(
-          i_element - 1,
-          rep(NaN, (num.stat.output - 1))
-        )
-        return(onerow)
-      }
+  if (inherits(onemodel, "gam_error")) {
+    if (flag_initiate) {
+      return(list(
+        column_names = NaN,
+        list.smoothTerms = NaN,
+        list.parametricTerms = NaN,
+        sp.criterion.attr.name = NaN
+      ))
+    } else {
+      onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1)))
+      return(onerow)
     }
+  }
 
-    arguments <- list(...)
-    arguments$formula <- formula
-    arguments$data <- dat
-
-    # explicitly passing arguments into command, to avoid error of argument "weights"
-    onemodel <- tryCatch(
-      {
-        do.call(mgcv::gam, arguments)
-      },
-      error = function(e) {
-        msg <- paste0(
-          "analyseOneElement.gam error at element ",
-          i_element,
-          ": ",
-          conditionMessage(e)
-        )
-        if (on_error == "debug" && interactive()) {
-          message(msg)
-          browser()
-        }
-        if (on_error == "skip" || on_error == "debug") {
-          warning(msg)
-          if (flag_initiate == TRUE) {
-            return(structure(list(.gam_error_initiate = TRUE), class = "gam_error"))
-          } else {
-            return(structure(list(.gam_error_runtime = TRUE), class = "gam_error"))
-          }
-        }
-        stop(e)
-      }
-    )
-
-    if (inherits(onemodel, "gam_error")) {
-      if (flag_initiate == TRUE) {
-        toreturn <- list(
-          column_names = NaN,
-          list.smoothTerms = NaN,
-          list.parametricTerms = NaN,
-          sp.criterion.attr.name = NaN
-        )
-        return(toreturn)
-      } else {
-        onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1)))
-        return(onerow)
-      }
-    }
 
-    onemodel.tidy.smoothTerms <- onemodel %>% broom::tidy(parametric = FALSE)
-    onemodel.tidy.parametricTerms <- onemodel %>% broom::tidy(parametric = TRUE)
-    onemodel.glance <- onemodel %>% broom::glance()
-    onemodel.summary <- onemodel %>% summary()
-    # add additional model's stat to onemodel.glance():
-    onemodel.glance[["adj.r.squared"]] <- onemodel.summary$r.sq
-    onemodel.glance[["dev.expl"]] <- onemodel.summary$dev.expl
-
-    sp.criterion.attr.name <- onemodel.summary$sp.criterion %>% attr(which = "name")
-    onemodel.glance[["sp.criterion"]] <- onemodel.summary$sp.criterion[[sp.criterion.attr.name]]
-    onemodel.glance[["scale"]] <- onemodel.summary$scale # scale estimate
-
-    num.smoothTerms <- onemodel.summary$m # The number of smooth terms in the model.
-
-
-    # delete columns you don't want:
-    var.smoothTerms.full <- names(onemodel.tidy.smoothTerms)
-    var.parametricTerms.full <- names(onemodel.tidy.parametricTerms)
-    var.model.full <- names(onemodel.glance)
-
-    # list to remove:
-    var.smoothTerms.orig <- var.smoothTerms
-    var.smoothTerms <- list("term", var.smoothTerms) %>% unlist() # we will always keep "term" column
-    var.smoothTerms.remove <- list()
-    for (l in var.smoothTerms.full) {
-      if (!(l %in% var.smoothTerms)) {
-        var.smoothTerms.remove <- var.smoothTerms.remove %>%
-          append(., l) %>%
-          unlist() # the order will still be kept
-      }
-    }
+  # Extract statistics ----
 
-    var.parametricTerms.orig <- var.parametricTerms
-    var.parametricTerms <- list("term", var.parametricTerms) %>% unlist() # we will always keep "term" column
-    var.parametricTerms.remove <- list()
-    for (l in var.parametricTerms.full) {
-      if (!(l %in% var.parametricTerms)) {
-        var.parametricTerms.remove <- var.parametricTerms.remove %>%
-          append(., l) %>%
-          unlist() # the order will still be kept
-      }
-    }
+  onemodel.tidy.smoothTerms <- onemodel %>% broom::tidy(parametric = FALSE)
+  onemodel.tidy.parametricTerms <- onemodel %>% broom::tidy(parametric = TRUE)
+  onemodel.glance <- onemodel %>% broom::glance()
+  onemodel.summary <- onemodel %>% summary()
 
-    var.model.remove <- list()
-    for (l in var.model.full) {
-      if (!(l %in% var.model)) {
-        var.model.remove <- var.model.remove %>%
-          append(., l) %>%
-          unlist() # the order will still be kept
-      }
-    }
+  # ---- Normalize term names (must happen before column name assembly) ----
 
-    # remove those columns:
-    if (length(var.smoothTerms.remove) != 0) {
-      # if length=0, it's list(), nothing to remove
-      onemodel.tidy.smoothTerms <- dplyr::select(onemodel.tidy.smoothTerms, -dplyr::all_of(var.smoothTerms.remove))
-    }
-    if (length(var.parametricTerms.remove) != 0) {
-      # if length=0, it's list(), nothing to remove
-      onemodel.tidy.parametricTerms <- dplyr::select(onemodel.tidy.parametricTerms, -dplyr::all_of(var.parametricTerms.remove))
-    }
-    if (length(var.model.remove) != 0) {
-      onemodel.glance <- dplyr::select(onemodel.glance, -dplyr::all_of(var.model.remove))
-    }
+  # Parametric: (Intercept) → Intercept
+  if (nrow(onemodel.tidy.parametricTerms) > 0) {
+    onemodel.tidy.parametricTerms$term[
+      onemodel.tidy.parametricTerms$term == "(Intercept)"
+    ] <- "Intercept"
+  }
 
-    # adjust:
-    if (num.smoothTerms > 0) {
-      # if there is any smooth term
-      onemodel.tidy.smoothTerms$term[onemodel.tidy.smoothTerms$term == "(Intercept)"] <- "Intercept"
-      # change the term name from "(Intercept)" to "Intercept"
+  # Smooth: s(age) → s_age, ti(x,z) → ti_x_z, s(x):oFactor → s_x_BYoFactor
+  num.smoothTerms <- onemodel.summary$m
+  if (num.smoothTerms > 0) {
+    if (nrow(onemodel.tidy.smoothTerms) > 0) {
+      onemodel.tidy.smoothTerms$term[
+        onemodel.tidy.smoothTerms$term == "(Intercept)"
+      ] <- "Intercept"
     }
-    if (nrow(onemodel.tidy.parametricTerms) > 0) {
-      # if there is any parametric term
-      onemodel.tidy.parametricTerms$term[onemodel.tidy.parametricTerms$term == "(Intercept)"] <- "Intercept"
-      # change the term name from "(Intercept)" to "Intercept"
-    }
-
-    # change from s(x) to s_x: (could be s, te, etc); from s(x):oFactor to s_x_BYoFactor; from ti(x,z) to ti_x_z
-    if (num.smoothTerms > 0) {
-      # if there is any smooth term
-      for (i_row in seq_len(nrow(onemodel.tidy.smoothTerms))) {
-        # step 1: change from s(x) to s_x
-        term_name <- onemodel.tidy.smoothTerms$term[i_row]
-        str_list <- strsplit(term_name, split = "[()]")[[1]]
-
-        str <- str_list[2] # extract string between ()
-        smooth_name <- str_list[1] # "s" or some other smooth method type such as "te"
-        str_valid <- paste0(smooth_name, "_", str)
-
-        if (length(str_list) > 2) {
-          # there is string after variable name
-          str_valid <- paste0(
-            str_valid,
-            "_",
-            paste(str_list[3:length(str_list)], collapse = "")
-          ) # combine rest of strings
-        }
 
-        # detect ":", and change to "BY"   # there is "_" replacing for ")" in "s()" already
-        str_valid <- gsub(":", "BY", str_valid, fixed = TRUE)
+    for (i_row in seq_len(nrow(onemodel.tidy.smoothTerms))) {
+      term_name <- onemodel.tidy.smoothTerms$term[i_row]
+      str_list <- strsplit(term_name, split = "[()]")[[1]]
 
-        # detect ",", and change to "_"
-        str_valid <- gsub(",", "_", str_valid, fixed = TRUE)
+      str <- str_list[2]
+      smooth_name <- str_list[1]
+      str_valid <- paste0(smooth_name, "_", str)
 
-        onemodel.tidy.smoothTerms$term[i_row] <- str_valid
+      if (length(str_list) > 2) {
+        str_valid <- paste0(
+          str_valid, "_",
+          paste(str_list[3:length(str_list)], collapse = "")
+        )
       }
-    }
-
 
-    onemodel.glance <- onemodel.glance %>% dplyr::mutate(term = "model") # add a column
+      # : → BY
+      str_valid <- gsub(":", "BY", str_valid, fixed = TRUE)
+      # , → _
+      str_valid <- gsub(",", "_", str_valid, fixed = TRUE)
 
-    # get the list of terms:
-    if (num.smoothTerms > 0) {
-      list.smoothTerms <- onemodel.tidy.smoothTerms$term # if empty, gives warning
-    } else {
-      list.smoothTerms <- NULL
-    }
-
-    if (nrow(onemodel.tidy.parametricTerms) > 0) {
-      list.parametricTerms <- onemodel.tidy.parametricTerms$term
-    } else {
-      list.parametricTerms <- NULL
+      onemodel.tidy.smoothTerms$term[i_row] <- str_valid
     }
+  }
 
+  list.smoothTerms <- onemodel.tidy.smoothTerms$term
+  list.parametricTerms <- onemodel.tidy.parametricTerms$term
 
-    # check if the onemodel.* does not have real statistics (but only a column of 'term')
-    temp_colnames <- onemodel.tidy.smoothTerms %>% colnames()
-    temp <- union(temp_colnames, "term")
-    # union of colnames and "term"; if colnames only has "term" or lengt of 0 (tibble()),
-    # union = "term", all(union)=TRUE; otherwise, if there is colnames other than "term",
-    # all(union) = c(TRUE, FALSE, ...)
-    if (all(temp == "term")) {
-      onemodel.tidy.smoothTerms <- tibble::tibble()
-    }
-    # just an empty tibble (so below, all(dim(onemodel.tidy.smoothTerms)) = FALSE)
-
-    temp_colnames <- onemodel.tidy.parametricTerms %>% colnames()
-    temp <- union(temp_colnames, "term")
-    if (all(temp == "term")) {
-      onemodel.tidy.parametricTerms <- tibble::tibble()
-    } # just an empty tibble
-
-    temp_colnames <- onemodel.glance %>% colnames()
-    temp <- union(temp_colnames, "term")
-    if (all(temp == "term")) {
-      onemodel.glance <- tibble::tibble()
-    } # just an empty tibble
-
-
-    # flatten .tidy results into one row:
-    if (all(dim(onemodel.tidy.smoothTerms))) {
-      # not empty | if any dim is 0, all=FALSE
-      onemodel.tidy.smoothTerms.onerow <- onemodel.tidy.smoothTerms %>% tidyr::pivot_wider(
-        names_from = "term",
-        values_from = dplyr::all_of(var.smoothTerms.orig),
-        names_glue = "{term}.{.value}"
+  ## Smooth term statistics ----
+  onerow.smoothTerms <- tibble::tibble()
+  for (i_term in seq_along(list.smoothTerms)) {
+    for (i_var in seq_along(var.smoothTerms)) {
+      temp <- tibble::tibble(
+        placeholder = onemodel.tidy.smoothTerms[[var.smoothTerms[i_var]]][i_term]
       )
-    } else {
-      onemodel.tidy.smoothTerms.onerow <- onemodel.tidy.smoothTerms
+      colnames(temp) <- paste0(list.smoothTerms[i_term], ".", var.smoothTerms[i_var])
+      onerow.smoothTerms <- bind_cols_check_emptyTibble(onerow.smoothTerms, temp)
     }
+  }
 
-    if (all(dim(onemodel.tidy.parametricTerms))) {
-      # not empty
-      onemodel.tidy.parametricTerms.onerow <- onemodel.tidy.parametricTerms %>% tidyr::pivot_wider(
-        names_from = "term",
-        values_from = dplyr::all_of(var.parametricTerms.orig),
-        names_glue = "{term}.{.value}"
+  ## Parametric term statistics ----
+  onerow.parametricTerms <- tibble::tibble()
+  for (i_term in seq_along(list.parametricTerms)) {
+    for (i_var in seq_along(var.parametricTerms)) {
+      temp <- tibble::tibble(
+        placeholder = onemodel.tidy.parametricTerms[[var.parametricTerms[i_var]]][i_term]
       )
-    } else {
-      onemodel.tidy.parametricTerms.onerow <- onemodel.tidy.parametricTerms
+      colnames(temp) <- paste0(list.parametricTerms[i_term], ".", var.parametricTerms[i_var])
+      onerow.parametricTerms <- bind_cols_check_emptyTibble(onerow.parametricTerms, temp)
     }
+  }
 
-    if (all(dim(onemodel.glance))) {
-      # not empty
-      onemodel.glance.onerow <- onemodel.glance %>% tidyr::pivot_wider(
-        names_from = "term",
-        values_from = dplyr::all_of(var.model),
-        names_glue = "{term}.{.value}"
-      )
+  ## Model-level statistics ----
+  onerow.model <- tibble::tibble()
+  for (i_var in seq_along(var.model)) {
+    val <- NULL
+    if (var.model[i_var] %in% colnames(onemodel.glance)) {
+      val <- onemodel.glance[[var.model[i_var]]]
+    } else if (var.model[i_var] == "adj.r.squared") {
+      val <- onemodel.summary$r.sq
+    } else if (var.model[i_var] == "dev.expl") {
+      val <- onemodel.summary$dev.expl
+    } else if (var.model[i_var] == "sp.criterion") {
+      val <- onemodel.summary$sp.criterion
+      if (flag_initiate) {
+        sp.criterion.attr.name <- names(onemodel.summary$sp.criterion)
+      }
+    } else if (var.model[i_var] == "scale") {
+      val <- onemodel.summary$scale
     } else {
-      onemodel.glance.onerow <- onemodel.glance
+      val <- onemodel.glance[[var.model[i_var]]]
     }
+    temp <- tibble::tibble(placeholder = val)
+    colnames(temp) <- paste0("model.", var.model[i_var])
+    onerow.model <- bind_cols_check_emptyTibble(onerow.model, temp)
+  }
 
+  ## SSE if requested ----
+  onerow.sse <- tibble::tibble()
+  if (flag_sse) {
+    sse_val <- sum(onemodel$residuals^2)
+    temp <- tibble::tibble(placeholder = sse_val)
+    colnames(temp) <- "model.sse"
+    onerow.sse <- temp
+  }
 
-    # combine the tables: check if any of them is empty (tibble())
-    onemodel.onerow <- bind_cols_check_emptyTibble(
-      onemodel.tidy.smoothTerms.onerow,
-      onemodel.tidy.parametricTerms.onerow
-    )
-    onemodel.onerow <- bind_cols_check_emptyTibble(onemodel.onerow, onemodel.glance.onerow)
-
+  onerow.all <- bind_cols_check_emptyTibble(onerow.smoothTerms, onerow.parametricTerms)
+  onerow.all <- bind_cols_check_emptyTibble(onerow.all, onerow.model)
+  onerow.all <- bind_cols_check_emptyTibble(onerow.all, onerow.sse)
 
-    # add a column of element ids:
-    colnames.temp <- colnames(onemodel.onerow)
-    onemodel.onerow <- onemodel.onerow %>% tibble::add_column(element_id = i_element - 1, .before = colnames.temp[1])
-    # add as the first column
+  column_names <- c("element_id", colnames(onerow.all))
 
-    # add sse if requested:
-    if (flag_sse == TRUE) {
-      onemodel.onerow[["model.sse"]] <- sum((onemodel$y - onemodel$fitted.values)^2)
-      # using values from model itself, where NAs in y have been excluded -->
-      # sse won't be NA --> partial R-squared won't be NA
-    }
-
-
-    # now you can get the headers, # of columns, etc of the output results
-
-
-    if (flag_initiate == TRUE) {
-      # return the column names:
-
-      # return:
-      column_names <- colnames(onemodel.onerow)
-      toreturn <- list(
-        column_names = column_names,
-        list.smoothTerms = list.smoothTerms,
-        list.parametricTerms = list.parametricTerms,
-        sp.criterion.attr.name = sp.criterion.attr.name
-      )
-      toreturn
-    } else if (flag_initiate == FALSE) {
-      # return the one row results:
-
-      # return:
-      onerow <- as.numeric(onemodel.onerow) # change from tibble to numeric to save some space
-      onerow
+  if (flag_initiate) {
+    if (!exists("sp.criterion.attr.name")) {
+      sp.criterion.attr.name <- NA_character_
     }
+    return(list(
+      column_names = column_names,
+      list.smoothTerms = list.smoothTerms,
+      list.parametricTerms = list.parametricTerms,
+      sp.criterion.attr.name = sp.criterion.attr.name
+    ))
   } else {
-    # if flag_sufficient==FALSE
-    if (flag_initiate == TRUE) {
-      toreturn <- list(
-        column_names = NaN,
-        list.smoothTerms = NaN,
-        list.parametricTerms = NaN,
-        sp.criterion.attr.name = NaN
-      )
-      toreturn
-    } else if (flag_initiate == FALSE) {
-      onerow <- c(
-        i_element - 1,
-        # first is element_id, which will still be a finite number
-        rep(NaN, (num.stat.output - 1))
-      ) # other columns will be NaN
-
-      onerow
-    }
+    onerow <- c(i_element - 1, as.numeric(onerow.all))
+    return(onerow)
   }
 }
 
@@ -1195,8 +911,17 @@ analyseOneElement.gam <- function(i_element,
 #'
 #' @param user_fun A function that accepts at least an argument named
 #'   \code{data} (a data.frame: \code{phenotypes} with scalar columns
-#'   appended for the current element) and returns a one-row
-#'   data.frame/tibble, a named list, or a named atomic vector.
+#'   appended for the current element) and returns one of:
+#'   \itemize{
+#'     \item A one-row \code{data.frame} or \code{tibble}. Multi-row
+#'       returns will error.
+#'     \item A named list. Unnamed lists are accepted and auto-named
+#'       as \code{v1}, \code{v2}, etc.
+#'     \item A named atomic vector. Unnamed vectors are accepted and
+#'       auto-named as \code{v1}, \code{v2}, etc.
+#'   }
+#' @param ctx A precomputed context list from \code{.build_wrap_context()},
+#'   or \code{NULL}.
 #' @param ... Additional arguments forwarded to \code{user_fun}.
 #'
 #' @return If \code{flag_initiate = TRUE}, a list with one component:
@@ -1222,153 +947,124 @@ analyseOneElement.gam <- function(i_element,
 #' @export
 analyseOneElement.wrap <- function(i_element,
                                    user_fun,
-                                   modelarray,
-                                   phenotypes,
-                                   scalar,
+                                   modelarray = NULL,
+                                   phenotypes = NULL,
+                                   scalar = NULL,
                                    num.subj.lthr,
                                    num.stat.output = NULL,
                                    flag_initiate = FALSE,
                                    on_error = "stop",
+                                   ctx = NULL,
                                    ...) {
-  values <- scalars(modelarray)[[scalar]][i_element, ]
 
-  ## check number of subjects with (in)valid values:
-  flag_sufficient <- NULL
-  num.subj.valid <- length(values[is.finite(values)])
-  if (num.subj.valid > num.subj.lthr) {
-    flag_sufficient <- TRUE
-  } else {
-    flag_sufficient <- FALSE
+  # Resolve context ----
+
+  if (is.null(ctx)) {
+    ctx <- .build_wrap_context(modelarray, phenotypes, scalar)
   }
 
-  if (flag_sufficient == TRUE) {
-    dat <- phenotypes
-    # attach all scalars with alignment and collision checks
-    scalar_names <- names(scalars(modelarray))
-    collisions <- intersect(scalar_names, colnames(dat))
-    if (length(collisions) > 0) {
-      stop(paste0(
-        "Column name collision between phenotypes and scalar names: ",
-        paste(collisions, collapse = ", "),
-        ". Please rename or remove these columns from phenotypes before wrapping."
-      ))
-    }
 
-    masks <- list()
-    phen_sources <- dat[["source_file"]]
-    for (sname in scalar_names) {
-      s_sources <- sources(modelarray)[[sname]]
-      if (!(all(s_sources %in% phen_sources) && all(phen_sources %in% s_sources))) {
-        stop(paste0(
-          "The source files for scalar '", sname, "' do not match phenotypes$source_file."
-        ))
-      }
-      reorder_idx <- match(phen_sources, s_sources)
-      s_vals <- scalars(modelarray)[[sname]][i_element, ]
-      s_vals <- s_vals[reorder_idx]
-      dat[[sname]] <- s_vals
-      masks[[length(masks) + 1L]] <- is.finite(s_vals)
-    }
+  # Per-element data assembly ----
 
-    # intersection-based threshold across all scalars
-    valid_mask <- Reduce("&", masks)
-    num.subj.valid <- sum(valid_mask)
-    if (!(num.subj.valid > num.subj.lthr)) {
-      if (flag_initiate == TRUE) {
-        toreturn <- list(column_names = NaN)
-        return(toreturn)
-      } else {
-        onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1)))
-        return(onerow)
-      }
+  elem <- .assemble_element_data(i_element, ctx, num.subj.lthr)
+
+  if (!elem$sufficient) {
+    if (flag_initiate) {
+      return(list(column_names = NaN))
+    } else {
+      onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1)))
+      return(onerow)
     }
+  }
 
-    arguments <- list(...)
-    arguments$data <- dat
-
-    # Execute user function with error handling
-    result <- tryCatch(
-      {
-        do.call(user_fun, arguments)
-      },
-      error = function(e) {
-        msg <- paste0(
-          "analyseOneElement.wrap error at element ",
-          i_element,
-          ": ",
-          conditionMessage(e)
-        )
-        if (on_error == "debug" && interactive()) {
-          message(msg)
-          browser()
-        }
-        if (on_error == "skip" || on_error == "debug") {
-          warning(msg)
-          if (flag_initiate == TRUE) {
-            return(structure(list(.wrap_error_initiate = TRUE), class = "wrap_error"))
-          } else {
-            return(structure(list(.wrap_error_runtime = TRUE), class = "wrap_error"))
-          }
-        }
-        stop(e)
-      }
-    )
+  dat <- elem$dat
 
-    # On error in user function, return NaNs according to flag
-    if (inherits(result, "wrap_error")) {
-      if (flag_initiate == TRUE) {
-        toreturn <- list(column_names = NaN)
-        return(toreturn)
-      } else {
-        onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1)))
-        return(onerow)
-      }
-    }
 
-    # Coerce to one-row tibble
-    if ("data.frame" %in% class(result)) {
-      onerow_tbl <- tibble::as_tibble(result)
-      if (nrow(onerow_tbl) != 1) {
-        stop(
-          "The user function must return a one-row data.frame/tibble, a named list, or a named vector."
-        )
+  # Execute user function ----
+
+  arguments <- list(...)
+  arguments$data <- dat
+
+  result <- tryCatch(
+    {
+      do.call(user_fun, arguments)
+    },
+    error = function(e) {
+      msg <- paste0(
+        "analyseOneElement.wrap error at element ",
+        i_element, ": ", conditionMessage(e)
+      )
+      if (on_error == "debug" && interactive()) {
+        message(msg)
+        browser()
       }
-    } else if (is.list(result)) {
-      onerow_tbl <- tibble::as_tibble_row(result)
-    } else if (is.atomic(result)) {
-      if (is.null(names(result))) {
-        names(result) <- paste0("v", seq_along(result))
+      if (on_error == "skip" || on_error == "debug") {
+        warning(msg)
+        if (flag_initiate) {
+          return(structure(list(.wrap_error_initiate = TRUE), class = "wrap_error"))
+        } else {
+          return(structure(list(.wrap_error_runtime = TRUE), class = "wrap_error"))
+        }
       }
-      onerow_tbl <- tibble::as_tibble_row(as.list(result))
-    } else {
-      stop(
-        paste0(
-          "Unsupported return type from user function. ",
-          "Return a one-row data.frame/tibble, named list, or named vector."
-        )
-      )
+      stop(e)
     }
+  )
 
-    # add element_id as first column
-    colnames.temp <- colnames(onerow_tbl)
-    onerow_tbl <- onerow_tbl %>% tibble::add_column(element_id = i_element - 1, .before = colnames.temp[1])
-
-    if (flag_initiate == TRUE) {
-      toreturn <- list(column_names = colnames(onerow_tbl))
-      toreturn
+  # Coerce result to named numeric vector ----
+  if (inherits(result, "wrap_error")) {
+    if (flag_initiate) {
+      return(list(column_names = NaN))
     } else {
-      # return numeric vector to be consistent with other analysers
-      as.numeric(onerow_tbl)
+      onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1)))
+      return(onerow)
+    }
+  }
+
+  if (is.data.frame(result) || tibble::is_tibble(result)) {
+    if (nrow(result) != 1) {
+      msg <- paste0(
+        "The user function must return a one-row data.frame/tibble, ",
+        "a named list, or a named vector."
+      )
+      if (on_error == "skip" || on_error == "debug") {
+        warning(paste0("analyseOneElement.wrap at element ", i_element, ": ", msg))
+        if (flag_initiate) return(list(column_names = NaN))
+        return(c(i_element - 1, rep(NaN, (num.stat.output - 1))))
+      }
+      stop(msg)
     }
+    result_names <- colnames(result)
+    result_values <- as.numeric(result[1, ])
+  } else if (is.list(result)) {
+    result_names <- names(result)
+    if (is.null(result_names) || any(result_names == "")) {
+      result_names <- paste0("v", seq_along(result))
+    }
+    result_values <- as.numeric(unlist(result))
+  } else if (is.atomic(result)) {
+    if (is.null(names(result))) {
+      names(result) <- paste0("v", seq_along(result))
+    }
+    result_names <- names(result)
+    result_values <- as.numeric(result)
   } else {
-    # insufficient subjects
-    if (flag_initiate == TRUE) {
-      toreturn <- list(column_names = NaN)
-      toreturn
-    } else {
-      onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1)))
-      onerow
+    msg <- paste0("user_fun must return a one-row data.frame/tibble, a named list, ",
+                  "or a named atomic vector. Got: ", class(result)[1])
+    if (on_error == "skip" || on_error == "debug") {
+      warning(paste0("analyseOneElement.wrap at element ", i_element, ": ", msg))
+      if (flag_initiate) return(list(column_names = NaN))
+      return(c(i_element - 1, rep(NaN, (num.stat.output - 1))))
     }
+    stop(msg)
+  }
+
+  column_names <- c("element_id", result_names)
+
+  if (flag_initiate) {
+    return(list(column_names = column_names))
+  } else {
+    onerow <- c(i_element - 1, result_values)
+    return(onerow)
   }
 }
 
diff --git a/R/analyse-helpers.R b/R/analyse-helpers.R
index 5144a5d..7660603 100644
--- a/R/analyse-helpers.R
+++ b/R/analyse-helpers.R
@@ -1,7 +1,7 @@
 # Internal shared helpers for ModelArray analysis functions
 # These are not exported — used by ModelArray.lm, ModelArray.gam, ModelArray.wrap
 
-
+# Validation and dataset preparation ----
 #' Validate that data is a ModelArray
 #' @noRd
 .validate_modelarray_input <- function(data) {
@@ -110,7 +110,320 @@
   max(num.subj.total * num.subj.lthr.rel, num.subj.lthr.abs)
 }
 
+# Create analysis context ----
+# Precomputed context builders for element-wise analysis functions.
+# These hoist loop-invariant computations out of the per-element functions
+# so they execute once before the loop rather than millions of times inside it.
+
+# Level 1: Base context shared by all analysis functions (lm, gam, wrap).
+# Handles scalar discovery, collision checks, and cross-scalar source alignment.
+
+#' Build base context shared by all element-wise analysis functions
+#'
+#' Precomputes scalar name lookups, column-name collision checks, and
+#' cross-scalar source-alignment indices that are invariant across elements.
+#' This is the foundation on which model-specific contexts are built.
+#'
+#' @param modelarray A \linkS4class{ModelArray} object.
+#' @param phenotypes A data.frame, already aligned to \code{modelarray} via
+#'   \code{.align_phenotypes()}.
+#' @param scalar Character. The name of the response scalar.
+#' @param scalar_subset Character vector or \code{NULL}. Which scalars to
+#'   attach and align. If \code{NULL}, all scalars in the ModelArray are used
+#'   (the \code{wrap} behaviour). If a character vector, only those scalars
+#'   are included (the \code{lm}/\code{gam} behaviour where we detect
+#'   formula-referenced scalars).
+#' @return A named list with components:
+#'   \describe{
+#'     \item{modelarray}{The ModelArray object (read-only reference).}
+#'     \item{phenotypes}{The aligned phenotypes data.frame.}
+#'     \item{scalar}{Character. The response scalar name.}
+#'     \item{all_scalar_names}{Character vector. Names of all scalars in the
+#'       ModelArray.}
+#'     \item{attached_scalars}{Character vector. Names of scalars that will
+#'       be attached to the per-element data.frame (may be a subset of
+#'       \code{all_scalar_names} or all of them).}
+#'     \item{predictor_reorder}{A named list of integer vectors. For each
+#'       scalar in \code{attached_scalars}, the precomputed index vector
+#'       from \code{match(phen_sources, scalar_sources)} that reorders
+#'       scalar columns to match phenotype rows. For the response scalar
+#'       (which is already aligned by \code{.align_phenotypes()}), this
+#'       entry is \code{NULL} (no reordering needed).}
+#'   }
+#' @noRd
+.build_base_context <- function(modelarray, phenotypes, scalar,
+                                scalar_subset = NULL) {
+  all_scalar_names <- names(scalars(modelarray))
+
+
+  # Determine which scalars to attach
+
+  if (is.null(scalar_subset)) {
+    # wrap mode: attach all scalars
+    attached_scalars <- all_scalar_names
+  } else {
+    # lm/gam mode: attach only the explicitly requested scalars
+    # (response + any predictor scalars detected from the formula)
+    attached_scalars <- unique(scalar_subset)
+  }
+
+
+  # Collision check: scalar names vs phenotype column names.
+  # This is invariant — the same collision would occur at every element,
+
+  # so we fail fast once rather than millions of times.
+
+  collisions <- intersect(attached_scalars, colnames(phenotypes))
+  if (length(collisions) > 0) {
+    stop(
+      "Column name collision between phenotypes and scalar names: ",
+      paste(collisions, collapse = ", "),
+      ". Please rename or remove these columns from phenotypes before ",
+      "modeling."
+    )
+  }
+
+  # Precompute source-alignment reorder indices for each attached scalar.
+  # The response scalar is already aligned by .align_phenotypes(), so it
+
+  # needs no reordering. Predictor scalars from mergeModelArrays() may
+  # have different column orderings and need match()-based reordering.
+  phen_sources <- phenotypes[["source_file"]]
+  predictor_reorder <- stats::setNames(
+    vector("list", length(attached_scalars)),
+    attached_scalars
+  )
+
+  for (sname in attached_scalars) {
+    s_sources <- sources(modelarray)[[sname]]
+
+    if (identical(s_sources, phen_sources)) {
+      # Already aligned (typical for the response scalar, or when all
+      # scalars share the same source order after mergeModelArrays)
+      predictor_reorder[[sname]] <- NULL
+    } else {
+      # Validate that the sets match (once, not per element)
+      if (!(all(s_sources %in% phen_sources) &&
+            all(phen_sources %in% s_sources))) {
+        stop(
+          "The source files for scalar '", sname,
+          "' do not match phenotypes$source_file."
+        )
+      }
+      predictor_reorder[[sname]] <- match(phen_sources, s_sources)
+    }
+  }
+
+  list(
+    modelarray        = modelarray,
+    phenotypes        = phenotypes,
+    scalar            = scalar,
+    all_scalar_names  = all_scalar_names,
+    attached_scalars  = attached_scalars,
+    predictor_reorder = predictor_reorder
+  )
+}
+
+
+# Level 2: Model-specific context builders that add formula-derived
+# information on top of the base context.
+
+#' Build context for element-wise linear model fitting
+#'
+#' Extends the base context with formula parsing results: the LHS variable
+#' name, RHS variable names, and which RHS variables are scalars (as opposed
+#' to phenotype columns). All of this is invariant across elements.
+#'
+#' @param formula A \code{\link[stats]{formula}} to be passed to
+#'   \code{\link[stats]{lm}}.
+#' @param modelarray A \linkS4class{ModelArray} object.
+#' @param phenotypes A data.frame, already aligned via
+#'   \code{.align_phenotypes()}.
+#' @param scalar Character. The response scalar name.
+#' @return A named list inheriting all components from
+#'   \code{.build_base_context()} plus:
+#'   \describe{
+#'     \item{formula}{The model formula.}
+#'     \item{lhs_name}{Character. The response variable name from the
+#'       formula LHS.}
+#'     \item{rhs_vars}{Character vector. All variable names on the RHS.}
+#'     \item{scalar_predictors}{Character vector. RHS variables that are
+#'       scalar names in the ModelArray (i.e., cross-scalar predictors).}
+#'   }
+#' @noRd
+.build_lm_context <- function(formula, modelarray, phenotypes, scalar) {
+  # Formula parsing — currently repeated per element inside
+  # analyseOneElement.lm [4]
+  all_vars <- all.vars(formula)
+  lhs_name <- tryCatch(
+    as.character(formula[[2]]),
+    error = function(e) NULL
+  )
+  rhs_vars <- setdiff(all_vars, lhs_name)
+
+  # Detect which RHS variables are scalars in the ModelArray
+  all_scalar_names <- names(scalars(modelarray))
+  scalar_predictors <- intersect(rhs_vars, all_scalar_names)
+
+  # The scalars to attach: response + any scalar predictors
+  scalar_subset <- unique(c(scalar, scalar_predictors))
+
+  # Build the base context with only the needed scalars
+  base_ctx <- .build_base_context(
+    modelarray    = modelarray,
+    phenotypes    = phenotypes,
+    scalar        = scalar,
+    scalar_subset = scalar_subset
+  )
+
+  # Extend with formula-specific fields
+  base_ctx$formula            <- formula
+  base_ctx$lhs_name           <- lhs_name
+  base_ctx$rhs_vars           <- rhs_vars
+  base_ctx$scalar_predictors  <- scalar_predictors
 
+  base_ctx
+}
+
+
+#' Build context for element-wise GAM fitting
+#'
+#' Extends the base context with formula parsing results and GAM-specific
+#' formula validation that currently runs inside the ModelArray.gam()
+#' preamble but whose results are never passed to the per-element function.
+#'
+#' @param formula A \code{\link[stats]{formula}} to be passed to
+#'   \code{\link[mgcv]{gam}}.
+#' @param modelarray A \linkS4class{ModelArray} object.
+#' @param phenotypes A data.frame, already aligned via
+#'   \code{.align_phenotypes()}.
+#' @param scalar Character. The response scalar name.
+#' @return A named list inheriting all components from
+#'   \code{.build_lm_context()} (which itself inherits from
+#'   \code{.build_base_context()}) plus:
+#'   \describe{
+#'     \item{gam_formula_breakdown}{The result of
+#'       \code{mgcv::interpret.gam(formula)}, cached for reuse.}
+#'   }
+#' @noRd
+.build_gam_context <- function(formula, modelarray, phenotypes, scalar) {
+  # GAM formula validation — currently runs in ModelArray.gam() preamble [1]
+  # but the breakdown result is discarded. We cache it here.
+  gam_breakdown <- tryCatch(
+    mgcv::interpret.gam(formula),
+    error = function(cond) {
+      stop("The formula is not valid for mgcv::gam()! Please check and revise.")
+    }
+  )
+
+  # The formula structure is the same as lm for variable detection purposes
+  ctx <- .build_lm_context(formula, modelarray, phenotypes, scalar)
+
+  # Add GAM-specific cached data
+  ctx$gam_formula_breakdown <- gam_breakdown
+
+  ctx
+}
+
+
+#' Build context for element-wise user-supplied function execution
+#'
+#' Uses the base context in "attach all scalars" mode since
+#' \code{analyseOneElement.wrap} attaches every scalar in the ModelArray
+#' to the per-element data.frame, not just formula-referenced ones.
+#'
+#' @param modelarray A \linkS4class{ModelArray} object.
+#' @param phenotypes A data.frame, already aligned via
+#'   \code{.align_phenotypes()}.
+#' @param scalar Character. The primary scalar name (used for the initial
+#'   validity check).
+#' @return A named list: the base context with \code{scalar_subset = NULL}
+#'   (all scalars attached).
+#' @noRd
+.build_wrap_context <- function(modelarray, phenotypes, scalar) {
+  # scalar_subset = NULL triggers "attach all" mode in .build_base_context()
+  ctx <- .build_base_context(
+    modelarray    = modelarray,
+    phenotypes    = phenotypes,
+    scalar        = scalar,
+    scalar_subset = NULL
+  )
+
+  ctx
+}
+
+
+# Level 3: Shared per-element data assembly helper.
+# Extracts scalar values for one element using the precomputed context,
+# builds the validity mask, and returns the filtered data.frame.
+# This replaces duplicated logic across analyseOneElement.lm,
+# analyseOneElement.gam, and analyseOneElement.wrap [4].
+
+#' Assemble per-element data.frame from precomputed context
+#'
+#' Reads scalar rows from the ModelArray, applies precomputed reorder
+#' indices, builds the intersection validity mask, and returns the
+#' filtered data.frame ready for model fitting or user function execution.
+#'
+#' @param i_element Integer. 1-based element index.
+#' @param ctx A context list from one of the \code{.build_*_context()}
+#'   functions.
+#' @param num.subj.lthr Numeric. Minimum number of subjects with finite
+#'   values required.
+#' @return A list with components:
+#'   \describe{
+#'     \item{dat}{A data.frame: the filtered phenotypes with scalar columns
+#'       attached. \code{NULL} if the element has insufficient valid
+#'       subjects.}
+#'     \item{sufficient}{Logical. Whether the element passed the subject
+#'       threshold.}
+#'     \item{num_valid}{Integer. Number of subjects with finite values
+#'       across all attached scalars.}
+#'   }
+#' @noRd
+.assemble_element_data <- function(i_element, ctx, num.subj.lthr) {
+  # Read the response scalar row — the only mandatory per-element I/O
+  response_vals <- scalars(ctx$modelarray)[[ctx$scalar]][i_element, ]
+
+  # Start the validity mask with the response scalar
+  masks <- list(is.finite(response_vals))
+
+  # Read and reorder additional attached scalars
+  scalar_values <- list()
+  scalar_values[[ctx$scalar]] <- response_vals
+
+  other_scalars <- setdiff(ctx$attached_scalars, ctx$scalar)
+  for (sname in other_scalars) {
+    s_vals <- scalars(ctx$modelarray)[[sname]][i_element, ]
+
+    # Apply precomputed reorder index (or use as-is if NULL)
+    reorder_idx <- ctx$predictor_reorder[[sname]]
+    if (!is.null(reorder_idx)) {
+      s_vals <- s_vals[reorder_idx]
+    }
+
+    scalar_values[[sname]] <- s_vals
+    masks[[length(masks) + 1L]] <- is.finite(s_vals)
+  }
+
+  # Intersection mask across all scalars
+  valid_mask <- Reduce("&", masks)
+  num_valid <- sum(valid_mask)
+
+  if (!(num_valid > num.subj.lthr)) {
+    return(list(dat = NULL, sufficient = FALSE, num_valid = num_valid))
+  }
+
+  # Build filtered data.frame
+  dat <- ctx$phenotypes[valid_mask, , drop = FALSE]
+  for (sname in ctx$attached_scalars) {
+    dat[[sname]] <- scalar_values[[sname]][valid_mask]
+  }
+
+  list(dat = dat, sufficient = TRUE, num_valid = num_valid)
+}
+
+# Find initiator element ----
 #' Find a valid initiator element by searching middle → forward → backward
 #'
 #' @param analyse_one_fn The per-element analysis function
@@ -186,6 +499,7 @@
 }
 
 
+# Parallelization ----
 #' Dispatch parallel/serial apply with optional progress bar
 #'
 #' @param element.subset Integer vector of element indices
@@ -236,7 +550,7 @@
   fits
 }
 
-
+# P-value correction ----
 #' Correct p-values for a set of terms and append corrected columns
 #'
 #' @param df_out Data.frame of results
@@ -264,7 +578,7 @@
   df_out
 }
 
-
+# Streaming writes ----
 #' Initialize an incremental writer for /results datasets
 #' @noRd
 .init_results_stream_writer <- function(write_results_name,
diff --git a/R/analyse.R b/R/analyse.R
index 9cd8881..9779d39 100644
--- a/R/analyse.R
+++ b/R/analyse.R
@@ -132,10 +132,12 @@
 #' @rdname ModelArray.lm
 #' @export
 
-ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NULL, full.outputs = FALSE,
+ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NULL,
+                          full.outputs = FALSE,
                           var.terms = c("estimate", "statistic", "p.value"),
                           var.model = c("adj.r.squared", "p.value"),
-                          correct.p.value.terms = c("fdr"), correct.p.value.model = c("fdr"),
+                          correct.p.value.terms = c("fdr"),
+                          correct.p.value.model = c("fdr"),
                           num.subj.lthr.abs = 10, num.subj.lthr.rel = 0.2,
                           verbose = TRUE, pbar = TRUE, n_cores = 1,
                           on_error = "stop",
@@ -146,125 +148,69 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU
                           write_results_compression_level = 4L,
                           return_output = TRUE,
                           ...) {
+  # Validation ----
   .validate_modelarray_input(data)
   element.subset <- .validate_element_subset(element.subset, data, scalar)
   phenotypes <- .align_phenotypes(data, phenotypes, scalar)
 
-
-  ### display additional arguments:
+  # Display additional arguments ----
   dots <- list(...)
   dots_names <- names(dots)
-
   FUN <- stats::lm
-
-  # subset:
   m1 <- "no default"
   printAdditionalArgu(FUN, "subset", dots, m1)
-
-  # weights:
   m1 <- "no default"
-  if ("weights" %in% dots_names) { # if user provides weights
-    m_usr_input <- paste0(
-      class(dots$weights),
-      " with length of ",
-      length(dots$weights)
-    ) # message describing usr's input; cannot use dim on c(1,2)
+  if ("weights" %in% dots_names) {
+    m_usr_input <- paste0(class(dots$weights), " with length of ", length(dots$weights))
   } else {
     m_usr_input <- NULL
   }
   printAdditionalArgu(FUN, "weights", dots, m1, m_usr_input)
-
-  # na.action:
   m1 <- "no default"
   printAdditionalArgu(FUN, "na.action", dots, m1)
-
-  # method:
-  printAdditionalArgu(FUN, "method", dots) # default: "qr"
-
-  # model:
-  m1 <- invisible(eval(formals(FUN)[["model"]])) %>% as.character() # default: [logical] TRUE
+  printAdditionalArgu(FUN, "method", dots)
+  m1 <- invisible(eval(formals(FUN)[["model"]])) %>% as.character()
   printAdditionalArgu(FUN, "model", dots, m1)
-
-  # x:
-  m1 <- invisible(eval(formals(FUN)[["x"]])) %>% as.character() # default: [logical] FALSE
+  m1 <- invisible(eval(formals(FUN)[["x"]])) %>% as.character()
   printAdditionalArgu(FUN, "x", dots, m1)
 
-  # y:
-  m1 <- invisible(eval(formals(FUN)[["y"]])) %>% as.character() # default: [logical] FALSE
-  printAdditionalArgu(FUN, "y", dots, m1)
-
-  # qr:
-  m1 <- invisible(eval(formals(FUN)[["qr"]])) %>% as.character() # default: [logical] TRUE
-  printAdditionalArgu(FUN, "qr", dots, m1)
-
-  # singular.ok:
-  m1 <- invisible(eval(formals(FUN)[["singular.ok"]])) %>% as.character() # default: [logical] TRUE
-  printAdditionalArgu(FUN, "singular.ok", dots, m1)
-
-  # contrasts:
-  printAdditionalArgu(FUN, "contrasts", dots) # default: NULL
-
-  # offset:
-  m1 <- "no default" # there is no default
-  printAdditionalArgu(FUN, "offset", dots, m1)
-
-
   num.subj.lthr <- .compute_subject_threshold(phenotypes, num.subj.lthr.abs, num.subj.lthr.rel)
 
-  ### other setups:
+  # Full outputs override ----
   var.terms.full <- c("estimate", "std.error", "statistic", "p.value")
   var.model.full <- c(
     "r.squared", "adj.r.squared", "sigma", "statistic", "p.value",
     "df", "logLik", "AIC", "BIC", "deviance", "df.residual", "nobs"
   )
-  if (full.outputs == TRUE) { # full set of outputs
+  if (full.outputs == TRUE) {
     var.terms <- var.terms.full
     var.model <- var.model.full
   }
-  # check on validity of list of vars:
-  var.terms <- var.terms[!duplicated(var.terms)] # remove duplicated element(s)
-  var.model <- var.model[!duplicated(var.model)]
-
-  # check if all var.* are empty:
-  if (length(var.terms) == 0 && length(var.model) == 0) {
-    stop("All var.* arguments [var.terms, var.model] are empty!")
-  }
 
-  # check if every var is valid:
-  for (var in var.terms) {
-    if (!(var %in% var.terms.full)) {
-      stop(paste0(var, " is not valid for var.terms!"))
-    }
-  }
-  for (var in var.model) {
-    if (!(var %in% var.model.full)) {
-      stop(paste0(var, " is not valid for var.model!"))
-    }
-  }
-
-
-  # check for p.value correction:
-  # check for terms:
+  # P-value correction checks ----
   check_validity_correctPValue(
     correct.p.value.terms, "correct.p.value.terms",
     var.terms, "var.terms"
   )
-  # check for model:
   check_validity_correctPValue(
     correct.p.value.model, "correct.p.value.model",
     var.model, "var.model"
   )
 
+  # Build context ONCE before the loop ----
+  ctx <- .build_lm_context(formula, data, phenotypes, scalar)
 
-  ### start the process:
+  # Start the model fitting ----
   if (verbose) {
     message(glue::glue("Fitting element-wise linear models for {scalar}"))
     message(glue::glue("initiating...."))
   }
 
   num.elements.total <- numElementsTotal(modelarray = data, scalar_name = scalar)
+
+  # Initiate using ctx ----
   init_args <- list(
-    formula = formula, modelarray = data, phenotypes = phenotypes, scalar = scalar,
+    ctx = ctx,
     var.terms = var.terms, var.model = var.model,
     num.subj.lthr = num.subj.lthr, on_error = on_error, ...
   )
@@ -279,13 +225,16 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU
     message(glue::glue("looping across elements...."))
   }
 
-  need_term_correction <- (!all(correct.p.value.terms == "none")) && ("p.value" %in% var.terms)
-  need_model_correction <- (!all(correct.p.value.model == "none")) && ("p.value" %in% var.model)
+  need_term_correction <- (!all(correct.p.value.terms == "none")) &&
+    ("p.value" %in% var.terms)
+  need_model_correction <- (!all(correct.p.value.model == "none")) &&
+    ("p.value" %in% var.model)
   need_full_df <- return_output || need_term_correction || need_model_correction
   if (!return_output && (need_term_correction || need_model_correction)) {
     stop("return_output=FALSE is not supported when p-value correction is requested")
   }
 
+  # Initialize stream writer ----
   writer <- .init_results_stream_writer(
     write_results_name = write_results_name,
     write_results_file = write_results_file,
@@ -296,15 +245,19 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU
     compression_level = write_results_compression_level
   )
 
+  # Start model looping ----
   fits_all <- if (need_full_df) vector("list", length(element.subset)) else NULL
   chunk_size <- if (is.null(writer)) length(element.subset) else as.integer(write_results_flush_every)
   chunk_starts <- seq(1L, length(element.subset), by = chunk_size)
+
   for (chunk_start in chunk_starts) {
     chunk_end <- min(chunk_start + chunk_size - 1L, length(element.subset))
     chunk_elements <- element.subset[chunk_start:chunk_end]
 
-    chunk_fits <- .parallel_dispatch(chunk_elements, analyseOneElement.lm, n_cores, pbar,
-      formula = formula, modelarray = data, phenotypes = phenotypes, scalar = scalar,
+    ## Main loop now passes ctx instead of formula/modelarray/phenotypes/scalar
+    chunk_fits <- .parallel_dispatch(
+      chunk_elements, analyseOneElement.lm, n_cores, pbar,
+      ctx = ctx,
       var.terms = var.terms, var.model = var.model,
       num.subj.lthr = num.subj.lthr, num.stat.output = num.stat.output,
       flag_initiate = FALSE, on_error = on_error,
@@ -333,7 +286,7 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU
   df_out <- .correct_pvalues(df_out, list.terms, correct.p.value.terms, var.terms)
   df_out <- .correct_pvalues(df_out, "model", correct.p.value.model, var.model)
 
-  # Streamed blocks were uncorrected; rewrite final corrected results if needed.
+  # Rewrite corrected results to HDF5 if needed ----
   if (!is.null(writer) && (need_term_correction || need_model_correction)) {
     writeResults(
       fn.output = write_results_file,
@@ -414,7 +367,8 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU
 #' @rdname ModelArray.gam
 #' @export
 
-ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = NULL, full.outputs = FALSE,
+ModelArray.gam <- function(formula, data, phenotypes, scalar,
+                           element.subset = NULL, full.outputs = FALSE,
                            var.smoothTerms = c("statistic", "p.value"),
                            var.parametricTerms = c("estimate", "statistic", "p.value"),
                            var.model = c("dev.expl"),
@@ -431,158 +385,85 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N
                            write_results_compression_level = 4L,
                            return_output = TRUE,
                            ...) {
+  # Validation ----
   .validate_modelarray_input(data)
   element.subset <- .validate_element_subset(element.subset, data, scalar)
+  phenotypes <- .align_phenotypes(data, phenotypes, scalar)
 
-  # check if the formula is valid in terms of mgcv::gam()
-  tryCatch(
-    {
-      gam.formula.breakdown <- mgcv::interpret.gam(formula)
-    },
-    error = function(cond) {
-      stop(paste0("The formula is not valid for mgcv::gam()! Please check and revise."))
-    }
-  )
-
-  checker_gam_formula(formula, gam.formula.breakdown)
+  # Build context ----
+  ctx <- .build_gam_context(formula, data, phenotypes, scalar)
 
-  phenotypes <- .align_phenotypes(data, phenotypes, scalar)
+  # Use the cached breakdown for the formula checker display ----
+  checker_gam_formula(formula, ctx$gam_formula_breakdown)
 
-  ### display additional arguments: [only important one]
+  # Display additional arguments ----
   dots <- list(...)
   dots_names <- names(dots)
-
   FUN <- mgcv::gam
-
-  # method: (default: "GCV.Cp")
-  printAdditionalArgu(FUN, "method", dots) # default: "GCV.Cp"
-
-  # TODO: optional: check if fx=FALSE; if so, add edf to the list of var + warning: fx=TRUE is recommended
-
+  printAdditionalArgu(FUN, "method", dots)
 
   num.subj.lthr <- .compute_subject_threshold(phenotypes, num.subj.lthr.abs, num.subj.lthr.rel)
 
-  ### when full.outputs = TRUE:
+  # Full outputs override ----
   var.smoothTerms.full <- c("edf", "ref.df", "statistic", "p.value")
   var.parametricTerms.full <- c("estimate", "std.error", "statistic", "p.value")
   var.model.full <- c(
     "adj.r.squared", "dev.expl", "sp.criterion", "scale",
     "df", "logLik", "AIC", "BIC", "deviance", "df.residual", "nobs"
   )
-
-  if (full.outputs == TRUE) { # full set of outputs
+  if (full.outputs == TRUE) {
     var.smoothTerms <- var.smoothTerms.full
     var.parametricTerms <- var.parametricTerms.full
     var.model <- var.model.full
   }
 
-  ### check on validity of arguments: var.term and var.model
-  var.smoothTerms <- var.smoothTerms[!duplicated(var.smoothTerms)] # remove duplicated element(s)
-  var.parametricTerms <- var.parametricTerms[!duplicated(var.parametricTerms)]
-  var.model <- var.model[!duplicated(var.model)]
-
-  # check if all var.* are empty:
-  if (length(var.smoothTerms) == 0 && length(var.parametricTerms) == 0 && length(var.model) == 0) {
-    stop("All var.* arguments [var.smoothTerms, var.parametricTerms, var.model] are empty!")
-  }
-
-  # check if every var is valid:
-  for (var in var.smoothTerms) {
-    if (!(var %in% var.smoothTerms.full)) {
-      stop(paste0(var, " is not valid for var.smoothTerms!"))
-    }
-  }
-  for (var in var.parametricTerms) {
-    if (!(var %in% var.parametricTerms.full)) {
-      stop(paste0(var, " is not valid for var.parametricTerms!"))
-    }
-  }
-  for (var in var.model) {
-    if (!(var %in% var.model.full)) {
-      stop(paste0(var, " is not valid!"))
-    }
-  }
-
-
-  var.model.orig <- var.model
-  if (!is.null(changed.rsq.term.index)) { # changed.rsq is not null --> requested
-
-    # check if the term index is valid:
-    if (min(changed.rsq.term.index) <= 0) {
-      # any of not positive | can't really check if it's integer as is.integer(1) is FALSE...
-      stop(
-        paste0(
-          "There is element(s) in changed.rsq.term.index <= 0. ",
-          "It should be a (list of) positive integer!"
-        )
-      )
-    }
-
-    terms.full.formula <- stats::terms(formula, keep.order = TRUE)
-    # not the re-order the terms | see: https://rdrr.io/r/stats/terms.formula.html
-    if (max(changed.rsq.term.index) > length(labels(terms.full.formula))) {
-      # if max is more than the number of terms on RHS of formula
-      stop(
-        paste0(
-          "Largest index in changed.rsq.term.index is more than the term number on the ",
-          "right hand side of formula!"
-        )
-      )
-    }
-
-    # check how many variables on RHS; if no (but intercept, i.e. xx ~ 1), stop
-    if (length(labels(terms.full.formula)) == 0) {
-      stop(
-        paste0(
-          "Trying to analyze changed.rsq but there is no variable (except intercept 1) ",
-          "on right hand side of formula! Please provide at least one valid variable."
-        )
-      )
+  # Changed.rsq setup ----
+  if (!is.null(changed.rsq.term.index)) {
+    terms.full.formula <- stats::terms(formula)
+    changed.rsq.term.fullFormat.list <- labels(terms.full.formula)[unlist(changed.rsq.term.index)]
+    changed.rsq.term.shortFormat.list <- list()
+    for (idx in seq_along(changed.rsq.term.index)) {
+      tidx <- changed.rsq.term.index[[idx]]
+      tidy_names_smooth <- character(0)
+      tidy_names_param <- character(0)
+      changed.rsq.term.shortFormat.list[[idx]] <- c(tidy_names_smooth, tidy_names_param)
     }
-
-    # print warning:
-    message(
-      paste0(
-        "will get changed R-squared (delta.adj.rsq and partial.rsq) so the execution time ",
-        "will be longer."
-      )
-    )
-    # add adj.r.squared into var.model
+    message("will get changed R-squared (delta.adj.rsq and partial.rsq) so the execution time will be longer.")
     if (!("adj.r.squared" %in% var.model)) {
       var.model <- c(var.model, "adj.r.squared")
     }
-
-    # also should return model.sse (for both full and reduced model):
     flag_sse <- TRUE
-  } else { # changed.rsq is null --> not requested
-    flag_sse <- FALSE # no need to calculate sse for the model
+  } else {
+    flag_sse <- FALSE
   }
 
-  ### check on arguments: p-values correction methods
-  # check for smoothTerms:
+  # p-value correction checks ----
   check_validity_correctPValue(
     correct.p.value.smoothTerms, "correct.p.value.smoothTerms",
     var.smoothTerms, "var.smoothTerms"
   )
-  # check for parametricTerms:
   check_validity_correctPValue(
     correct.p.value.parametricTerms, "correct.p.value.parametricTerms",
     var.parametricTerms, "var.parametricTerms"
   )
 
-
-  ### run
+  # Start the model fitting ----
   if (verbose) {
     message(glue::glue("Fitting element-wise GAMs for {scalar}"))
     message(glue::glue("initiating...."))
   }
 
   num.elements.total <- numElementsTotal(modelarray = data, scalar_name = scalar)
+
+  # Initiate using ctx ----
   init_args <- list(
-    formula = formula, modelarray = data, phenotypes = phenotypes, scalar = scalar,
-    var.smoothTerms = var.smoothTerms, var.parametricTerms = var.parametricTerms,
+    ctx = ctx,
+    var.smoothTerms = var.smoothTerms,
+    var.parametricTerms = var.parametricTerms,
     var.model = var.model,
-    num.subj.lthr = num.subj.lthr, flag_sse = flag_sse, on_error = on_error, ...
+    num.subj.lthr = num.subj.lthr,
+    flag_sse = flag_sse,
+    on_error = on_error, ...
   )
   initiator <- .find_initiator_element(
     analyseOneElement.gam, num.elements.total, init_args, verbose
@@ -607,6 +488,7 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N
     stop("return_output=FALSE is not supported when p-value correction or changed.rsq is requested")
   }
 
+  # Initialize stream writer ----
   writer <- .init_results_stream_writer(
     write_results_name = write_results_name,
     write_results_file = write_results_file,
@@ -617,20 +499,26 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N
     compression_level = write_results_compression_level
   )
 
+  # Start model looping ----
   fits_all <- if (need_full_df) vector("list", length(element.subset)) else NULL
   chunk_size <- if (is.null(writer)) length(element.subset) else as.integer(write_results_flush_every)
   chunk_starts <- seq(1L, length(element.subset), by = chunk_size)
+
   for (chunk_start in chunk_starts) {
     chunk_end <- min(chunk_start + chunk_size - 1L, length(element.subset))
     chunk_elements <- element.subset[chunk_start:chunk_end]
 
-    chunk_fits <- .parallel_dispatch(chunk_elements, analyseOneElement.gam, n_cores, pbar,
-      formula = formula, modelarray = data, phenotypes = phenotypes, scalar = scalar,
-      var.smoothTerms = var.smoothTerms, var.parametricTerms = var.parametricTerms,
+    # Main loop passes ctx
+    chunk_fits <- .parallel_dispatch(
+      chunk_elements, analyseOneElement.gam, n_cores, pbar,
+      ctx = ctx,
+      var.smoothTerms = var.smoothTerms,
+      var.parametricTerms = var.parametricTerms,
       var.model = var.model,
-      num.subj.lthr = num.subj.lthr, num.stat.output = num.stat.output,
-      flag_initiate = FALSE, flag_sse = flag_sse, on_error = on_error,
-      ...
+      num.subj.lthr = num.subj.lthr,
+      num.stat.output = num.stat.output,
+      flag_initiate = FALSE, flag_sse = flag_sse,
+      on_error = on_error, ...
     )
 
     if (need_full_df) {
@@ -652,79 +540,47 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N
   df_out <- as.data.frame(df_out)
   colnames(df_out) <- column_names
 
+  # P-value corrections ----
+  df_out <- .correct_pvalues(df_out, list.smoothTerms, correct.p.value.smoothTerms, var.smoothTerms)
+  df_out <- .correct_pvalues(df_out, list.parametricTerms, correct.p.value.parametricTerms, var.parametricTerms)
 
-  ### get the changed.rsq for smooth terms:
-  if (!is.null(changed.rsq.term.index)) { # if changed.rsq is requested
-    message("Getting delta R-squared: running the reduced model...")
-
-    # list of term of interest for changed.rsq:
-    changed.rsq.term.fullFormat.list <- labels(terms.full.formula)[changed.rsq.term.index]
-    # the term for changed.rsq, in full format
-    # get the short version:
-    changed.rsq.term.shortFormat.list <- list()
-    for (changed.rsq.term.fullFormat in changed.rsq.term.fullFormat.list) {
-      temp <- strsplit(changed.rsq.term.fullFormat, "[(]")[[1]]
-      if (length(temp) == 1) { # it's not a smooth term - as there is no ()
-        str_valid <- changed.rsq.term.fullFormat
-      } else {
-        smooth.class <- temp[1]
-
-        theEval <- eval(parse(text = changed.rsq.term.fullFormat))
-        str_valid <- paste0(
-          smooth.class, "_",
-          paste(theEval$term, collapse = "_")
-        ) # ti(x,z) --> ti_x_z; s(x) --> s_x
-        if (theEval$by != "NA") {
-          str_valid <- paste0(str_valid, "_BY", theEval$by) # s(age,by=oSex) --> s_age_BYoSex
-        }
-      }
-
-      changed.rsq.term.shortFormat <- str_valid
-      changed.rsq.term.shortFormat.list <- append(changed.rsq.term.shortFormat.list, changed.rsq.term.shortFormat)
-    }
+  # Compute changed R-squared ----
+  if (need_changed_rsq) {
+    terms.full.formula <- stats::terms(formula)
 
-    # loop of each changed.rsq.term.index (i.e. each term of interest)
-    for (i.changed.rsq.term in seq_along(changed.rsq.term.fullFormat.list)) {
-      idx.changed.rsq.term <- changed.rsq.term.index[i.changed.rsq.term] # index
+    for (i.changed.rsq.term in seq_along(changed.rsq.term.index)) {
+      idx.changed.rsq.term <- changed.rsq.term.index[[i.changed.rsq.term]]
       changed.rsq.term.fullFormat <- changed.rsq.term.fullFormat.list[i.changed.rsq.term]
-      changed.rsq.term.shortFormat <- changed.rsq.term.shortFormat.list[[i.changed.rsq.term]][1] # it's nested
+      changed.rsq.term.shortFormat <- changed.rsq.term.shortFormat.list[[i.changed.rsq.term]][1]
 
-      # get the formula of reduced model
-      # check if there is only one term (after removing it in reduced model,
-      # there is no term but intercept in the formula...)
       if (length(labels(terms.full.formula)) == 1) {
         temp <- toString(formula) %>% strsplit("[, ]")
         reduced.formula <- stats::as.formula(paste0(temp[[1]][3], "~1"))
       } else {
         reduced.formula <- formula(
-          stats::drop.terms(
-            terms.full.formula,
-            idx.changed.rsq.term,
-            keep.response = TRUE
-          )
-        ) # index on RHS of formula -> change to class of formula (stats::as.formula does not work)
+          stats::drop.terms(terms.full.formula, idx.changed.rsq.term, keep.response = TRUE)
+        )
       }
 
       message(
-        paste0(
-          "* Getting changed R-squared for term: ",
-          changed.rsq.term.fullFormat,
-          " via reduced model as below",
-          "; will show up as ",
-          changed.rsq.term.shortFormat,
-          " in final dataframe"
-        )
+        "* Getting changed R-squared for term: ",
+        changed.rsq.term.fullFormat,
+        " via reduced model; will show up as ",
+        changed.rsq.term.shortFormat,
+        " in final dataframe"
       )
       print(reduced.formula)
 
-      # var* for reduced model: only adjusted r sq is enough
-      # initiate:
+      ## Build a separate context for the reduced model ----
+      reduced_ctx <- .build_gam_context(reduced.formula, data, phenotypes, scalar)
+
       reduced.model.outputs_initiator <- analyseOneElement.gam(
-        i_element = i_element_success_initiate, # directly use the i_element with known success
-        reduced.formula, data, phenotypes, scalar,
-        var.smoothTerms = c(), var.parametricTerms = c(), var.model = c("adj.r.squared"),
+        i_element = i_element_success_initiate,
+        ctx = reduced_ctx,
+        var.smoothTerms = c(), var.parametricTerms = c(),
+        var.model = c("adj.r.squared"),
         num.subj.lthr = num.subj.lthr, num.stat.output = NULL,
-        flag_initiate = TRUE, flag_sse = TRUE, # also to get model.sse
+        flag_initiate = TRUE, flag_sse = TRUE,
         ...
       )
       reduced.model.column_names <- reduced.model.outputs_initiator$column_names
@@ -732,8 +588,7 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N
 
       reduced.model.fits <- .parallel_dispatch(
         element.subset, analyseOneElement.gam, n_cores, pbar,
-        formula = reduced.formula, modelarray = data, phenotypes = phenotypes,
-        scalar = scalar,
+        ctx = reduced_ctx,
         var.smoothTerms = c(), var.parametricTerms = c(),
         var.model = c("adj.r.squared"),
         num.subj.lthr = num.subj.lthr,
@@ -746,47 +601,20 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N
       reduced.model.df_out <- as.data.frame(reduced.model.df_out)
       colnames(reduced.model.df_out) <- reduced.model.column_names
 
-      # rename "adj.r.squared" as "redModel.adj.r.squared" before merging into df_out
-      names(reduced.model.df_out)[names(reduced.model.df_out) == "model.adj.r.squared"] <- "redModel.adj.r.squared"
-      # rename "model.sse" as "redModel.sse" before merging into df_out
-      names(reduced.model.df_out)[names(reduced.model.df_out) == "model.sse"] <- "redModel.sse"
-
-      # combine new df_out to original one:
-      df_out <- merge(df_out, reduced.model.df_out, by = "element_id")
-
-      # calculate the delta.adj.rsq, add to the df_out:
-      col_name <- paste0(changed.rsq.term.shortFormat, ".delta.adj.rsq")
-      df_out[[col_name]] <- df_out[["model.adj.r.squared"]] - df_out[["redModel.adj.r.squared"]]
+      ## Compute delta adj R-sq and partial R-sq ----
+      delta_col <- paste0(changed.rsq.term.shortFormat, ".delta.adj.rsq")
+      partial_col <- paste0(changed.rsq.term.shortFormat, ".partial.rsq")
 
-      # calculate the partial.rsq, add to the df_out
-      # partialRsq <- (sse.red - sse.full) / sse.red
-      col_name <- paste0(changed.rsq.term.shortFormat, ".partial.rsq")
-      df_out[[col_name]] <- (df_out[["redModel.sse"]] - df_out[["model.sse"]]) / df_out[["redModel.sse"]]
+      df_out[[delta_col]] <- df_out[["model.adj.r.squared"]] -
+        reduced.model.df_out[["model.adj.r.squared"]]
 
-      # remove column of redModel
-      df_out <- df_out %>% subset(select = -c(
-        redModel.adj.r.squared,
-        redModel.sse
-      ))
-    } # end of for loop across term of interest for changed.rsq
-
-
-    # if adjusted r sq is not requested (see var.model.orig), remove it:
-    if (!("adj.r.squared" %in% var.model.orig)) {
-      df_out <- df_out %>% subset(select = -c(model.adj.r.squared))
+      full_sse <- df_out[["model.sse"]]
+      reduced_sse <- reduced.model.df_out[["model.sse"]]
+      df_out[[partial_col]] <- (reduced_sse - full_sse) / reduced_sse
     }
+  }
 
-    # remove full model's sse (model.sse):
-    df_out <- df_out %>% subset(select = -c(model.sse))
-  } # end of if: requesting changed.rsq
-
-
-  ### correct p values
-  df_out <- .correct_pvalues(df_out, list.smoothTerms, correct.p.value.smoothTerms, var.smoothTerms)
-  df_out <- .correct_pvalues(
-    df_out, list.parametricTerms, correct.p.value.parametricTerms, var.parametricTerms
-  )
-
+  # Rewrite if corrections applied and streaming was used ----
   if (!is.null(writer) && (need_smooth_correction || need_param_correction || need_changed_rsq)) {
     writeResults(
       fn.output = write_results_file,
@@ -796,14 +624,12 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N
     )
   }
 
-  ### return
   if (return_output) {
     return(df_out)
   }
   invisible(NULL)
 }
 
-
 #' Run a user-supplied function for element-wise data
 #'
 #' @description
@@ -859,9 +685,19 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N
 #'   level for scalar writes. Default 4.
 #' @param ... Additional arguments forwarded to \code{FUN}.
 #'
-#' @return A tibble with one row per element and first column
-#'   \code{element_id} (0-based). Remaining columns are determined by
-#'   the return value of \code{FUN}.
+#' @return If \code{flag_initiate = TRUE}, a list with one component:
+#'   \describe{
+#'     \item{column_names}{Character vector. The column names derived from
+#'       the return value of \code{user_fun}, with \code{"element_id"}
+#'       prepended. For unnamed list or atomic returns, columns are named
+#'       \code{v1}, \code{v2}, etc. Set to \code{NaN} if the element was
+#'       skipped or errored.}
+#'   }
+#'   If \code{flag_initiate = FALSE}, a numeric vector of length
+#'   \code{num.stat.output} with \code{element_id} (0-based) first and
+#'   the coerced output of \code{user_fun} in subsequent positions.
+#'   All-\code{NaN} (except \code{element_id}) if the element was skipped
+#'   or if an error occurred with \code{on_error = "skip"}.
 #'
 #' @seealso \code{\link{ModelArray.lm}} for linear models,
 #'   \code{\link{ModelArray.gam}} for GAMs,
@@ -921,21 +757,29 @@ ModelArray.wrap <- function(FUN, data, phenotypes, scalar, element.subset = NULL
                             write_results_compression_level = 4L,
                             return_output = TRUE,
                             ...) {
+  # Validation ----
   .validate_modelarray_input(data)
   element.subset <- .validate_element_subset(element.subset, data, scalar)
   phenotypes <- .align_phenotypes(data, phenotypes, scalar)
   num.subj.lthr <- .compute_subject_threshold(phenotypes, num.subj.lthr.abs, num.subj.lthr.rel)
 
-  ### initiate to get column names
+  # Build context ----
+  ctx <- .build_wrap_context(data, phenotypes, scalar)
+
+  # Start model fitting ----
   if (verbose) {
     message(glue::glue("Running element-wise user function for {scalar}"))
     message(glue::glue("initiating...."))
   }
 
   num.elements.total <- numElementsTotal(modelarray = data, scalar_name = scalar)
+
+  # Initiate using ctx ----
   init_args <- list(
-    user_fun = FUN, modelarray = data, phenotypes = phenotypes, scalar = scalar,
-    num.subj.lthr = num.subj.lthr, on_error = on_error, ...
+    user_fun = FUN,
+    ctx = ctx,
+    num.subj.lthr = num.subj.lthr,
+    on_error = on_error, ...
   )
   initiator <- .find_initiator_element(
     analyseOneElement.wrap, num.elements.total, init_args, verbose
@@ -945,6 +789,7 @@ ModelArray.wrap <- function(FUN, data, phenotypes, scalar, element.subset = NULL
 
   if (verbose) message(glue::glue("looping across elements...."))
 
+  # Initialize stream writer ----
   writer_results <- .init_results_stream_writer(
     write_results_name = write_results_name,
     write_results_file = write_results_file,
@@ -975,36 +820,21 @@ ModelArray.wrap <- function(FUN, data, phenotypes, scalar, element.subset = NULL
     if (is.character(write_scalar_columns)) {
       missing_cols <- setdiff(write_scalar_columns, column_names)
       if (length(missing_cols) > 0L) {
-        stop(
-          "write_scalar_columns not found in wrap output: ",
-          paste(missing_cols, collapse = ", ")
-        )
+        stop("write_scalar_columns not found in wrap output: ", paste(missing_cols, collapse = ", "))
       }
       scalar_col_idx <- match(write_scalar_columns, column_names)
-    } else if (is.numeric(write_scalar_columns)) {
-      scalar_col_idx <- as.integer(write_scalar_columns)
-      if (any(scalar_col_idx < 1L) || any(scalar_col_idx > length(column_names))) {
-        stop("write_scalar_columns numeric indices are out of bounds")
-      }
     } else {
-      stop("write_scalar_columns must be NULL, character, or integer")
-    }
-
-    scalar_col_names <- column_names[scalar_col_idx]
-    if (length(scalar_col_names) == 0L) {
-      stop("write_scalar_columns selected zero columns")
+      scalar_col_idx <- as.integer(write_scalar_columns)
     }
 
     if (is.null(write_scalar_column_names)) {
-      write_scalar_column_names <- as.character(phenotypes[["source_file"]])
-    } else {
-      write_scalar_column_names <- as.character(write_scalar_column_names)
+      write_scalar_column_names <- column_names[scalar_col_idx]
     }
     if (length(write_scalar_column_names) != length(scalar_col_idx)) {
       stop("length(write_scalar_column_names) must equal number of selected write_scalar_columns")
     }
 
-    # Initialize scalar output dataset.
+    # Initialize scalar output dataset
     if (!file.exists(write_scalar_file)) {
       rhdf5::h5createFile(write_scalar_file)
     }
@@ -1032,6 +862,7 @@ ModelArray.wrap <- function(FUN, data, phenotypes, scalar, element.subset = NULL
     )
   }
 
+  # Start model looping ----
   fits_all <- if (return_output) vector("list", length(element.subset)) else NULL
   write_row_cursor <- 1L
 
@@ -1043,13 +874,18 @@ ModelArray.wrap <- function(FUN, data, phenotypes, scalar, element.subset = NULL
     chunk_size <- min(chunk_size, as.integer(write_results_flush_every))
   }
   chunk_starts <- seq(1L, length(element.subset), by = chunk_size)
+
   for (chunk_start in chunk_starts) {
     chunk_end <- min(chunk_start + chunk_size - 1L, length(element.subset))
     chunk_elements <- element.subset[chunk_start:chunk_end]
 
-    chunk_fits <- .parallel_dispatch(chunk_elements, analyseOneElement.wrap, n_cores, pbar,
-      user_fun = FUN, modelarray = data, phenotypes = phenotypes, scalar = scalar,
-      num.subj.lthr = num.subj.lthr, num.stat.output = num.stat.output,
+    ## Main loop passes ctx ----
+    chunk_fits <- .parallel_dispatch(
+      chunk_elements, analyseOneElement.wrap, n_cores, pbar,
+      user_fun = FUN,
+      ctx = ctx,
+      num.subj.lthr = num.subj.lthr,
+      num.stat.output = num.stat.output,
       flag_initiate = FALSE, on_error = on_error,
       ...
     )
@@ -1080,6 +916,7 @@ ModelArray.wrap <- function(FUN, data, phenotypes, scalar, element.subset = NULL
     }
   }
 
+  # Finalize H5 writing ----
   if (writing_scalar) {
     rhdf5::h5writeAttribute(
       attr = write_scalar_column_names,
diff --git a/man/ModelArray.wrap.Rd b/man/ModelArray.wrap.Rd
index 10d0686..79e5103 100644
--- a/man/ModelArray.wrap.Rd
+++ b/man/ModelArray.wrap.Rd
@@ -125,9 +125,19 @@ useful when writing large outputs directly to HDF5.}
 \item{...}{Additional arguments forwarded to \code{FUN}.}
 }
 \value{
-A tibble with one row per element and first column
-\code{element_id} (0-based). Remaining columns are determined by
-the return value of \code{FUN}.
+If \code{flag_initiate = TRUE}, a list with one component:
+\describe{
+\item{column_names}{Character vector. The column names derived from
+the return value of \code{user_fun}, with \code{"element_id"}
+prepended. For unnamed list or atomic returns, columns are named
+\code{v1}, \code{v2}, etc. Set to \code{NaN} if the element was
+skipped or errored.}
+}
+If \code{flag_initiate = FALSE}, a numeric vector of length
+\code{num.stat.output} with \code{element_id} (0-based) first and
+the coerced output of \code{user_fun} in subsequent positions.
+All-\code{NaN} (except \code{element_id}) if the element was skipped
+or if an error occurred with \code{on_error = "skip"}.
 }
 \description{
 \code{ModelArray.wrap} runs a user-supplied function \code{FUN} at each
diff --git a/man/analyseOneElement.gam.Rd b/man/analyseOneElement.gam.Rd
index 58733da..73598fd 100644
--- a/man/analyseOneElement.gam.Rd
+++ b/man/analyseOneElement.gam.Rd
@@ -3,7 +3,7 @@
 \name{analyseOneElement.gam}
 \alias{analyseOneElement.gam}
 \title{Fit a GAM for a single element
-returns metadata (column names, smooth term names, parametric term names,
+Returns metadata (column names, smooth term names, parametric term names,
 and the smoothing parameter criterion attribute name) used by
 \code{\link{ModelArray.gam}} to initialise the output data.frame. When
 \code{flag_initiate = FALSE}, it returns a numeric vector representing one
@@ -11,18 +11,19 @@ row of the final results matrix.}
 \usage{
 analyseOneElement.gam(
   i_element,
-  formula,
-  modelarray,
-  phenotypes,
-  scalar,
-  var.smoothTerms,
-  var.parametricTerms,
-  var.model,
+  formula = NULL,
+  modelarray = NULL,
+  phenotypes = NULL,
+  scalar = NULL,
+  var.smoothTerms = c("statistic", "p.value"),
+  var.parametricTerms = c("estimate", "statistic", "p.value"),
+  var.model = c("dev.expl"),
   num.subj.lthr,
   num.stat.output = NULL,
   flag_initiate = FALSE,
   flag_sse = FALSE,
   on_error = "stop",
+  ctx = NULL,
   ...
 )
 }
@@ -32,14 +33,15 @@ analyseOneElement.gam(
 \item{formula}{A \code{\link[stats]{formula}} passed to
 \code{\link[mgcv]{gam}}.}
 
-\item{modelarray}{A \linkS4class{ModelArray} object.}
+\item{modelarray}{A \linkS4class{ModelArray} object. Ignored when
+\code{ctx} is provided.}
 
 \item{phenotypes}{A data.frame of the cohort with columns of independent
 variables and covariates. Must contain a \code{"source_file"} column
-matching \code{sources(modelarray)[[scalar]]}.}
+matching \code{sources(modelarray)[[scalar]]}. Ignored when \code{ctx} is provided.}
 
 \item{scalar}{Character. The name of the element-wise scalar to analyse.
-Must be one of \code{names(scalars(modelarray))}.}
+Must be one of \code{names(scalars(modelarray))}. Ignored when \code{ctx} is provided.}
 
 \item{var.smoothTerms}{Character vector. Statistics to extract for smooth
 terms from \code{\link[broom]{tidy.gam}} with \code{parametric = FALSE}
@@ -83,6 +85,9 @@ halts execution; \code{"skip"} returns all-\code{NaN} for this element;
 \code{"debug"} drops into \code{\link{browser}} (if interactive) then
 skips. Default: \code{"stop"}.}
 
+\item{ctx}{A precomputed context list from \code{.build_gam_context()},
+or \code{NULL}.}
+
 \item{...}{Additional arguments passed to \code{\link[mgcv]{gam}}.}
 }
 \value{
diff --git a/man/analyseOneElement.lm.Rd b/man/analyseOneElement.lm.Rd
index 1069a1d..ddc9251 100644
--- a/man/analyseOneElement.lm.Rd
+++ b/man/analyseOneElement.lm.Rd
@@ -2,23 +2,21 @@
 % Please edit documentation in R/ModelArray_Constructor.R
 \name{analyseOneElement.lm}
 \alias{analyseOneElement.lm}
-\title{Fit a linear model for a single element
-If the number of subjects with finite scalar values (not \code{NaN},
-\code{NA}, or \code{Inf}) does not exceed \code{num.subj.lthr}, the
-element is skipped and all statistics are set to \code{NaN}.}
+\title{Fit a linear model for a single element}
 \usage{
 analyseOneElement.lm(
   i_element,
-  formula,
-  modelarray,
-  phenotypes,
-  scalar,
-  var.terms,
-  var.model,
+  formula = NULL,
+  modelarray = NULL,
+  phenotypes = NULL,
+  scalar = NULL,
+  var.terms = c("estimate", "statistic", "p.value"),
+  var.model = c("adj.r.squared", "p.value"),
   num.subj.lthr,
   num.stat.output = NULL,
   flag_initiate = FALSE,
   on_error = "stop",
+  ctx = NULL,
   ...
 )
 }
@@ -26,16 +24,18 @@ analyseOneElement.lm(
 \item{i_element}{Integer. The 1-based index of the element to analyse.}
 
 \item{formula}{A \code{\link[stats]{formula}} passed to
-\code{\link[stats]{lm}}.}
+\code{\link[stats]{lm}}. Ignored when \code{ctx} is provided (the
+formula is taken from the context).}
 
-\item{modelarray}{A \linkS4class{ModelArray} object.}
+\item{modelarray}{A \linkS4class{ModelArray} object. Ignored when
+\code{ctx} is provided.}
 
 \item{phenotypes}{A data.frame of the cohort with columns of independent
 variables and covariates. Must contain a \code{"source_file"} column
-matching \code{sources(modelarray)[[scalar]]}.}
+matching \code{sources(modelarray)[[scalar]]}. Ignored when \code{ctx} is provided.}
 
 \item{scalar}{Character. The name of the element-wise scalar to analyse.
-Must be one of \code{names(scalars(modelarray))}.}
+Must be one of \code{names(scalars(modelarray))}. Ignored when \code{ctx} is provided.}
 
 \item{var.terms}{Character vector. Statistics to extract per term from
 \code{\link[broom]{tidy.lm}} (e.g. \code{"estimate"}, \code{"statistic"},
@@ -67,6 +67,9 @@ halts execution; \code{"skip"} returns all-\code{NaN} for this element;
 \code{"debug"} drops into \code{\link{browser}} (if interactive) then
 skips. Default: \code{"stop"}.}
 
+\item{ctx}{A precomputed context list from \code{.build_lm_context()},
+or \code{NULL} for legacy inline computation.}
+
 \item{...}{Additional arguments passed to \code{\link[stats]{lm}}.}
 }
 \value{
@@ -84,14 +87,19 @@ value and the requested statistics in subsequent positions. All-\code{NaN}
 or if an error occurred with \code{on_error = "skip"}.
 }
 \description{
-Fit a linear model for a single element
+Fits \code{\link[stats]{lm}} on one element's data. When a precomputed
+context (\code{ctx}) is provided, all loop-invariant work (formula parsing,
+collision checks, source alignment) is skipped. When \code{ctx} is
+\code{NULL}, the function falls back to computing everything inline
+(legacy behaviour for direct calls / debugging).
+
 If the number of subjects with finite scalar values (not \code{NaN},
 \code{NA}, or \code{Inf}) does not exceed \code{num.subj.lthr}, the
 element is skipped and all statistics are set to \code{NaN}.
 }
 \seealso{
-\code{\link{ModelArray.lm}} which calls this function iteratively,
+\code{\link{ModelArray.lm}}, \code{.build_lm_context},
 \code{\link{analyseOneElement.gam}} for the GAM equivalent,
-\code{\link{analyseOneElement.wrap}} for user-supplied functions.
+\code{\link{analyseOneElement.wrap}} for user-supplied functions
 }
 \keyword{internal}
diff --git a/man/analyseOneElement.wrap.Rd b/man/analyseOneElement.wrap.Rd
index 1780548..aa37a27 100644
--- a/man/analyseOneElement.wrap.Rd
+++ b/man/analyseOneElement.wrap.Rd
@@ -7,13 +7,14 @@
 analyseOneElement.wrap(
   i_element,
   user_fun,
-  modelarray,
-  phenotypes,
-  scalar,
+  modelarray = NULL,
+  phenotypes = NULL,
+  scalar = NULL,
   num.subj.lthr,
   num.stat.output = NULL,
   flag_initiate = FALSE,
   on_error = "stop",
+  ctx = NULL,
   ...
 )
 }
@@ -22,17 +23,25 @@ analyseOneElement.wrap(
 
 \item{user_fun}{A function that accepts at least an argument named
 \code{data} (a data.frame: \code{phenotypes} with scalar columns
-appended for the current element) and returns a one-row
-data.frame/tibble, a named list, or a named atomic vector.}
+appended for the current element) and returns one of:
+\itemize{
+\item A one-row \code{data.frame} or \code{tibble}. Multi-row
+returns will error.
+\item A named list. Unnamed lists are accepted and auto-named
+as \code{v1}, \code{v2}, etc.
+\item A named atomic vector. Unnamed vectors are accepted and
+auto-named as \code{v1}, \code{v2}, etc.
+}}
 
-\item{modelarray}{A \linkS4class{ModelArray} object.}
+\item{modelarray}{A \linkS4class{ModelArray} object. Ignored when
+\code{ctx} is provided.}
 
 \item{phenotypes}{A data.frame of the cohort with columns of independent
 variables and covariates. Must contain a \code{"source_file"} column
-matching \code{sources(modelarray)[[scalar]]}.}
+matching \code{sources(modelarray)[[scalar]]}. Ignored when \code{ctx} is provided.}
 
 \item{scalar}{Character. The name of the element-wise scalar to analyse.
-Must be one of \code{names(scalars(modelarray))}.}
+Must be one of \code{names(scalars(modelarray))}. Ignored when \code{ctx} is provided.}
 
 \item{num.subj.lthr}{Numeric. The pre-computed minimum number of subjects
 with finite values required for this element to be analysed. Elements
@@ -56,6 +65,9 @@ halts execution; \code{"skip"} returns all-\code{NaN} for this element;
 \code{"debug"} drops into \code{\link{browser}} (if interactive) then
 skips. Default: \code{"stop"}.}
 
+\item{ctx}{A precomputed context list from \code{.build_wrap_context()},
+or \code{NULL}.}
+
 \item{...}{Additional arguments forwarded to \code{user_fun}.}
 }
 \value{
diff --git a/tests/testthat/test-ModelArray_low_hanging_coverage.R b/tests/testthat/test-ModelArray_low_hanging_coverage.R
index 4c637a1..c439e1e 100644
--- a/tests/testthat/test-ModelArray_low_hanging_coverage.R
+++ b/tests/testthat/test-ModelArray_low_hanging_coverage.R
@@ -361,7 +361,7 @@ test_that("analyseOneElement.wrap covers coercion and error branches", {
       phenotypes = phen, scalar = "FD",
       num.subj.lthr = 0, flag_initiate = FALSE, num.stat.output = 2
     ),
-    "Unsupported return type"
+    "user_fun must return a one-row data.frame/tibble"
   )
 
   expect_warning(
diff --git a/tests/testthat/test-analyse_context.R b/tests/testthat/test-analyse_context.R
new file mode 100644
index 0000000..4b1430b
--- /dev/null
+++ b/tests/testthat/test-analyse_context.R
@@ -0,0 +1,933 @@
+# tests/testthat/test-analyse_context.R
+#
+# Tests for context builders (.build_*_context), shared element
+# assembly (.assemble_element_data), per-element ctx vs. legacy
+# equivalence, .find_initiator_element, and top-level integration.
+#
+# Organized to follow the two-tier hierarchy:
+#   SECTION 1: Context builders (base → lm → gam → wrap)
+#   SECTION 2: Shared per-element helper (.assemble_element_data)
+#   SECTION 3: Per-element function ctx vs. legacy (lm → gam → wrap)
+#   SECTION 4: Initiator search (.find_initiator_element)
+#   SECTION 5: Top-level integration (ModelArray.gam, ModelArray.wrap)
+
+# ---- Shared fixtures ----
+# NOTE: Uses plain matrices in the scalars slot, not DelayedArray-backed
+# HDF5 data. This works because the ModelArray S4 class accepts any
+# matrix-like object for scalars [4]. Integration tests in Section 5
+# use real HDF5-backed ModelArrays via system.file().
+
+make_test_modelarray <- function(n_elements = 3, n_subjects = 4,
+                                 scalars = c("FD"),
+                                 inject_nan = FALSE) {
+  src <- paste0("sub", seq_len(n_subjects))
+  scalar_list <- list()
+  source_list <- list()
+
+  for (sn in scalars) {
+    set.seed(match(sn, scalars))
+    mat <- matrix(rnorm(n_elements * n_subjects),
+                  nrow = n_elements, ncol = n_subjects)
+    if (inject_nan && sn == scalars[1]) {
+      mat[1, 1] <- NaN
+      mat[1, 2] <- Inf
+    }
+    colnames(mat) <- src
+    scalar_list[[sn]] <- mat
+    source_list[[sn]] <- src
+  }
+
+  ma <- methods::new("ModelArray",
+                     sources = source_list,
+                     scalars = scalar_list,
+                     results = list(),
+                     path = tempfile(fileext = ".h5")
+  )
+  phen <- data.frame(
+    source_file = src,
+    age = seq(20, by = 10, length.out = n_subjects),
+    sex = rep(c("M", "F"), length.out = n_subjects),
+    stringsAsFactors = FALSE
+  )
+  list(ma = ma, phen = phen)
+}
+
+
+# ==================================================================
+# SECTION 1: Context builders (unit tests)
+#   Ordered to match the two-tier hierarchy:
+#   base → lm → gam → wrap
+# ==================================================================
+
+# ---- .build_base_context ----
+
+test_that(".build_base_context attaches all scalars when scalar_subset is NULL", {
+  fix <- make_test_modelarray(scalars = c("FD", "FC"))
+  ctx <- .build_base_context(fix$ma, fix$phen, scalar = "FD",
+                             scalar_subset = NULL)
+
+  expect_identical(ctx$scalar, "FD")
+  expect_identical(sort(ctx$attached_scalars), sort(c("FD", "FC")))
+  expect_identical(sort(ctx$all_scalar_names), sort(c("FD", "FC")))
+})
+
+test_that(".build_base_context attaches only requested scalars when scalar_subset is provided", {
+  fix <- make_test_modelarray(scalars = c("FD", "FC"))
+  ctx <- .build_base_context(fix$ma, fix$phen, scalar = "FD",
+                             scalar_subset = c("FD"))
+
+  expect_identical(ctx$attached_scalars, "FD")
+  # FC is in the ModelArray but not attached
+  expect_true("FC" %in% ctx$all_scalar_names)
+  expect_false("FC" %in% ctx$attached_scalars)
+})
+
+test_that(".build_base_context detects collision between scalar names and phenotype columns", {
+  fix <- make_test_modelarray(scalars = c("FD"))
+  phen_collision <- fix$phen
+  phen_collision$FD <- 1:nrow(phen_collision)
+
+  expect_error(
+    .build_base_context(fix$ma, phen_collision, scalar = "FD",
+                        scalar_subset = c("FD")),
+    "Column name collision"
+  )
+})
+
+test_that(".build_base_context sets predictor_reorder to NULL for aligned sources", {
+  fix <- make_test_modelarray(scalars = c("FD"))
+  ctx <- .build_base_context(fix$ma, fix$phen, scalar = "FD",
+                             scalar_subset = c("FD"))
+
+  expect_null(ctx$predictor_reorder[["FD"]])
+})
+
+test_that(".build_base_context computes reorder indices for misaligned sources", {
+  fix <- make_test_modelarray(scalars = c("FD", "FC"))
+
+  # Manually scramble FC sources
+  fc_src <- rev(sources(fix$ma)[["FC"]])
+  fix$ma@sources[["FC"]] <- fc_src
+
+  ctx <- .build_base_context(fix$ma, fix$phen, scalar = "FD",
+                             scalar_subset = c("FD", "FC"))
+
+  expect_null(ctx$predictor_reorder[["FD"]])
+  expect_false(is.null(ctx$predictor_reorder[["FC"]]))
+  # Applying the reorder should recover the phenotype source order
+  expect_identical(
+    fc_src[ctx$predictor_reorder[["FC"]]],
+    fix$phen$source_file
+  )
+})
+
+test_that(".build_base_context errors when scalar sources don't match phenotypes", {
+  fix <- make_test_modelarray(scalars = c("FD", "FC"))
+  fix$ma@sources[["FC"]] <- paste0("wrong", seq_along(sources(fix$ma)[["FC"]]))
+
+  expect_error(
+    .build_base_context(fix$ma, fix$phen, scalar = "FD",
+                        scalar_subset = c("FD", "FC")),
+    "do not match phenotypes"
+  )
+})
+
+# ---- .build_lm_context ----
+
+test_that(".build_lm_context extracts formula components correctly", {
+  fix <- make_test_modelarray(scalars = c("FD"))
+  ctx <- .build_lm_context(FD ~ age + sex, fix$ma, fix$phen, scalar = "FD")
+
+  expect_identical(ctx$formula, FD ~ age + sex)
+  expect_identical(ctx$lhs_name, "FD")
+  expect_true("age" %in% ctx$rhs_vars)
+  expect_true("sex" %in% ctx$rhs_vars)
+  expect_identical(ctx$scalar_predictors, character(0))
+})
+
+test_that(".build_lm_context detects scalar predictors in cross-scalar formulas", {
+  fix <- make_test_modelarray(scalars = c("FD", "FC"))
+  ctx <- .build_lm_context(FD ~ age + FC, fix$ma, fix$phen, scalar = "FD")
+
+  expect_identical(ctx$scalar_predictors, "FC")
+  expect_true("FC" %in% ctx$attached_scalars)
+  expect_true("FD" %in% ctx$attached_scalars)
+})
+
+test_that(".build_lm_context does not attach unrelated scalars", {
+  fix <- make_test_modelarray(scalars = c("FD", "FC", "FA"))
+  ctx <- .build_lm_context(FD ~ age + FC, fix$ma, fix$phen, scalar = "FD")
+
+  expect_true("FA" %in% ctx$all_scalar_names)
+  expect_false("FA" %in% ctx$attached_scalars)
+})
+
+# ---- .build_gam_context ----
+
+test_that(".build_gam_context caches interpret.gam breakdown", {
+  fix <- make_test_modelarray(scalars = c("FD"))
+  ctx <- .build_gam_context(FD ~ s(age) + sex, fix$ma, fix$phen, scalar = "FD")
+
+  expect_true(!is.null(ctx$gam_formula_breakdown))
+  expect_true("smooth.spec" %in% names(ctx$gam_formula_breakdown))
+})
+
+test_that(".build_gam_context errors on invalid GAM formula", {
+  fix <- make_test_modelarray(scalars = c("FD"))
+
+  expect_error(
+    .build_gam_context(FD ~ s(age, d = 999), fix$ma, fix$phen, scalar = "FD"),
+    "not valid for mgcv::gam"
+  )
+})
+
+test_that(".build_gam_context inherits lm context fields", {
+  fix <- make_test_modelarray(scalars = c("FD"))
+  ctx <- .build_gam_context(FD ~ s(age) + sex, fix$ma, fix$phen, scalar = "FD")
+
+  # Should have all the lm context fields
+  expect_identical(ctx$lhs_name, "FD")
+  expect_true("age" %in% ctx$rhs_vars)
+  expect_true("sex" %in% ctx$rhs_vars)
+  expect_identical(ctx$scalar_predictors, character(0))
+})
+
+test_that(".build_gam_context detects scalar predictors same as lm context", {
+  fix <- make_test_modelarray(scalars = c("FD", "FC"))
+  ctx <- .build_gam_context(FD ~ s(age) + FC, fix$ma, fix$phen, scalar = "FD")
+
+  expect_identical(ctx$scalar_predictors, "FC")
+  expect_true("FC" %in% ctx$attached_scalars)
+  expect_true("FD" %in% ctx$attached_scalars)
+})
+
+# ---- .build_wrap_context ----
+
+test_that(".build_wrap_context attaches all scalars", {
+  fix <- make_test_modelarray(scalars = c("FD", "FC"))
+  ctx <- .build_wrap_context(fix$ma, fix$phen, scalar = "FD")
+
+  expect_identical(sort(ctx$attached_scalars), sort(c("FD", "FC")))
+})
+
+test_that(".build_wrap_context has no formula fields", {
+  fix <- make_test_modelarray(scalars = c("FD"))
+  ctx <- .build_wrap_context(fix$ma, fix$phen, scalar = "FD")
+
+  expect_null(ctx$formula)
+  expect_null(ctx$lhs_name)
+})
+
+
+# ==================================================================
+# SECTION 2: Shared per-element helper
+# ==================================================================
+
+# ---- .assemble_element_data ----
+
+test_that(".assemble_element_data returns filtered data.frame for valid element", {
+  fix <- make_test_modelarray(scalars = c("FD"))
+  ctx <- .build_lm_context(FD ~ age, fix$ma, fix$phen, scalar = "FD")
+
+  elem <- .assemble_element_data(1, ctx, num.subj.lthr = 0)
+
+  expect_true(elem$sufficient)
+  expect_true(is.data.frame(elem$dat))
+  expect_true("FD" %in% colnames(elem$dat))
+  expect_true("age" %in% colnames(elem$dat))
+  expect_true("source_file" %in% colnames(elem$dat))
+})
+
+test_that(".assemble_element_data returns insufficient when below threshold", {
+  fix <- make_test_modelarray(n_subjects = 4, scalars = c("FD"))
+  ctx <- .build_lm_context(FD ~ age, fix$ma, fix$phen, scalar = "FD")
+
+  elem <- .assemble_element_data(1, ctx, num.subj.lthr = 100)
+
+  expect_false(elem$sufficient)
+  expect_null(elem$dat)
+})
+
+test_that(".assemble_element_data excludes NaN/Inf subjects from mask", {
+  fix <- make_test_modelarray(n_subjects = 4, scalars = c("FD"),
+                              inject_nan = TRUE)
+  ctx <- .build_lm_context(FD ~ age, fix$ma, fix$phen, scalar = "FD")
+
+  elem <- .assemble_element_data(1, ctx, num.subj.lthr = 0)
+
+  # Element 1 has NaN in subject 1 and Inf in subject 2
+  expect_true(elem$sufficient)
+  expect_equal(elem$num_valid, 2)  # only subjects 3 and 4
+  expect_equal(nrow(elem$dat), 2)
+})
+
+test_that(".assemble_element_data applies precomputed reorder for cross-scalar", {
+  fix <- make_test_modelarray(n_elements = 2, n_subjects = 4,
+                              scalars = c("FD", "FC"))
+
+  # Scramble FC sources
+  fc_src_scrambled <- rev(sources(fix$ma)[["FC"]])
+  fix$ma@sources[["FC"]] <- fc_src_scrambled
+
+  ctx <- .build_lm_context(FD ~ age + FC, fix$ma, fix$phen, scalar = "FD")
+  elem <- .assemble_element_data(1, ctx, num.subj.lthr = 0)
+
+  expect_true(elem$sufficient)
+  expect_true("FC" %in% colnames(elem$dat))
+
+  # The FC values should be reordered to match phenotype order
+  fc_raw <- scalars(fix$ma)[["FC"]][1, ]
+  reorder_idx <- ctx$predictor_reorder[["FC"]]
+  fc_reordered <- fc_raw[reorder_idx]
+  expect_equal(as.numeric(elem$dat$FC), as.numeric(fc_reordered))
+})
+
+test_that(".assemble_element_data intersection mask works across scalars", {
+  n_subj <- 6
+  src <- paste0("sub", 1:n_subj)
+
+  fd_mat <- matrix(c(1, 2, 3, 4, 5, 6), nrow = 1)
+  fc_mat <- matrix(c(10, NaN, 30, NaN, 50, 60), nrow = 1)
+  colnames(fd_mat) <- src
+  colnames(fc_mat) <- src
+
+  ma <- methods::new("ModelArray",
+                     sources = list(FD = src, FC = src),
+                     scalars = list(FD = fd_mat, FC = fc_mat),
+                     results = list(),
+                     path = tempfile(fileext = ".h5")
+  )
+  phen <- data.frame(
+    source_file = src,
+    age = seq(20, by = 10, length.out = n_subj)
+  )
+
+  ctx <- .build_lm_context(FD ~ age + FC, ma, phen, scalar = "FD")
+  elem <- .assemble_element_data(1, ctx, num.subj.lthr = 0)
+
+  # Subjects 2 and 4 have NaN in FC, so should be excluded
+  expect_equal(elem$num_valid, 4)
+  expect_equal(nrow(elem$dat), 4)
+  expect_false(any(is.nan(elem$dat$FC)))
+})
+
+
+# ==================================================================
+# SECTION 3: Per-element function ctx vs. legacy equivalence
+#   Ordered: lm → gam → wrap
+# ==================================================================
+
+# ---- analyseOneElement.lm ----
+
+test_that("analyseOneElement.lm produces identical results with ctx vs. without", {
+  # Create fixture with 8 subjects (enough for a meaningful lm fit)
+  fix <- make_test_modelarray(n_elements = 2, n_subjects = 8, scalars = c("FD"))
+
+  # The fixture's phen has age and sex; rebuild with only age for a
+  # clean single-predictor test. The fixture already has 8 subjects.
+  src <- sources(fix$ma)[["FD"]]
+  phen <- data.frame(
+    source_file = src,
+    age = seq(20, by = 5, length.out = length(src)),
+    stringsAsFactors = FALSE
+  )
+
+  ctx <- .build_lm_context(FD ~ age, fix$ma, phen, scalar = "FD")
+
+  # With context — initiation
+  result_ctx <- analyseOneElement.lm(
+    i_element = 1,
+    ctx = ctx,
+    var.terms = c("estimate", "p.value"),
+    var.model = c("adj.r.squared"),
+    num.subj.lthr = 0,
+    num.stat.output = NULL,
+    flag_initiate = TRUE
+  )
+
+  # Without context — legacy path
+  result_legacy <- analyseOneElement.lm(
+    i_element = 1,
+    formula = FD ~ age,
+    modelarray = fix$ma,
+    phenotypes = phen,
+    scalar = "FD",
+    var.terms = c("estimate", "p.value"),
+    var.model = c("adj.r.squared"),
+    num.subj.lthr = 0,
+    num.stat.output = NULL,
+    flag_initiate = TRUE
+  )
+
+  expect_identical(result_ctx$column_names, result_legacy$column_names)
+  expect_identical(result_ctx$list.terms, result_legacy$list.terms)
+
+  # Also check runtime (non-initiate) path
+  num_stat <- length(result_ctx$column_names)
+  result_ctx_run <- analyseOneElement.lm(
+    i_element = 1,
+    ctx = ctx,
+    var.terms = c("estimate", "p.value"),
+    var.model = c("adj.r.squared"),
+    num.subj.lthr = 0,
+    num.stat.output = num_stat,
+    flag_initiate = FALSE
+  )
+  result_legacy_run <- analyseOneElement.lm(
+    i_element = 1,
+    formula = FD ~ age,
+    modelarray = fix$ma,
+    phenotypes = phen,
+    scalar = "FD",
+    var.terms = c("estimate", "p.value"),
+    var.model = c("adj.r.squared"),
+    num.subj.lthr = 0,
+    num.stat.output = num_stat,
+    flag_initiate = FALSE
+  )
+
+  expect_equal(result_ctx_run, result_legacy_run)
+})
+
+# ---- analyseOneElement.gam ----
+
+test_that("analyseOneElement.gam produces identical results with ctx vs. without", {
+  src <- paste0("s", 1:8)
+  set.seed(42)
+  mat <- matrix(rnorm(3 * 8), nrow = 3)
+  colnames(mat) <- src
+
+  modelarray <- methods::new("ModelArray",
+                             sources = list(FD = src),
+                             scalars = list(FD = mat),
+                             results = list(),
+                             path = tempfile(fileext = ".h5")
+  )
+  phen <- data.frame(
+    source_file = src,
+    age = seq(20, by = 5, length.out = 8),
+    stringsAsFactors = FALSE
+  )
+
+  ctx <- .build_gam_context(FD ~ s(age, k = 4), modelarray, phen, scalar = "FD")
+
+  # With context — initiation
+  result_ctx <- analyseOneElement.gam(
+    i_element = 1,
+    ctx = ctx,
+    var.smoothTerms = c("statistic", "p.value"),
+    var.parametricTerms = c("estimate", "p.value"),
+    var.model = c("dev.expl"),
+    num.subj.lthr = 0,
+    num.stat.output = NULL,
+    flag_initiate = TRUE
+  )
+
+  # Without context — legacy path
+  result_legacy <- analyseOneElement.gam(
+    i_element = 1,
+    formula = FD ~ s(age, k = 4),
+    modelarray = modelarray,
+    phenotypes = phen,
+    scalar = "FD",
+    var.smoothTerms = c("statistic", "p.value"),
+    var.parametricTerms = c("estimate", "p.value"),
+    var.model = c("dev.expl"),
+    num.subj.lthr = 0,
+    num.stat.output = NULL,
+    flag_initiate = TRUE
+  )
+
+  expect_identical(result_ctx$column_names, result_legacy$column_names)
+  expect_identical(result_ctx$list.smoothTerms, result_legacy$list.smoothTerms)
+  expect_identical(result_ctx$list.parametricTerms, result_legacy$list.parametricTerms)
+
+  # Runtime path
+  num_stat <- length(result_ctx$column_names)
+
+  result_ctx_run <- analyseOneElement.gam(
+    i_element = 1,
+    ctx = ctx,
+    var.smoothTerms = c("statistic", "p.value"),
+    var.parametricTerms = c("estimate", "p.value"),
+    var.model = c("dev.expl"),
+    num.subj.lthr = 0,
+    num.stat.output = num_stat,
+    flag_initiate = FALSE
+  )
+
+  result_legacy_run <- analyseOneElement.gam(
+    i_element = 1,
+    formula = FD ~ s(age, k = 4),
+    modelarray = modelarray,
+    phenotypes = phen,
+    scalar = "FD",
+    var.smoothTerms = c("statistic", "p.value"),
+    var.parametricTerms = c("estimate", "p.value"),
+    var.model = c("dev.expl"),
+    num.subj.lthr = 0,
+    num.stat.output = num_stat,
+    flag_initiate = FALSE
+  )
+
+  expect_equal(result_ctx_run, result_legacy_run)
+})
+
+test_that("analyseOneElement.gam with ctx normalizes (Intercept) to Intercept", {
+  src <- paste0("s", 1:8)
+  set.seed(42)
+  mat <- matrix(rnorm(2 * 8), nrow = 2)
+  colnames(mat) <- src
+
+  modelarray <- methods::new("ModelArray",
+                             sources = list(FD = src),
+                             scalars = list(FD = mat),
+                             results = list(),
+                             path = tempfile(fileext = ".h5")
+  )
+  phen <- data.frame(
+    source_file = src,
+    age = seq(20, by = 5, length.out = 8),
+    stringsAsFactors = FALSE
+  )
+
+  ctx <- .build_gam_context(FD ~ s(age, k = 4) + 1, modelarray, phen, scalar = "FD")
+
+  result <- analyseOneElement.gam(
+    i_element = 1,
+    ctx = ctx,
+    var.smoothTerms = c("p.value"),
+    var.parametricTerms = c("estimate"),
+    var.model = c("dev.expl"),
+    num.subj.lthr = 0,
+    num.stat.output = NULL,
+    flag_initiate = TRUE
+  )
+
+  # Should contain "Intercept", not "(Intercept)"
+  param_cols <- grep("estimate", result$column_names, value = TRUE)
+  expect_true(any(grepl("^Intercept\\.", param_cols)))
+  expect_false(any(grepl("\\(Intercept\\)", result$column_names)))
+})
+
+test_that("analyseOneElement.gam with ctx returns NaN row for insufficient subjects", {
+  src <- paste0("s", 1:4)
+  mat <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8), nrow = 2)
+  colnames(mat) <- src
+
+  modelarray <- methods::new("ModelArray",
+                             sources = list(FD = src),
+                             scalars = list(FD = mat),
+                             results = list(),
+                             path = tempfile(fileext = ".h5")
+  )
+  phen <- data.frame(
+    source_file = src,
+    age = c(20, 30, 40, 50)
+  )
+
+  ctx <- .build_gam_context(FD ~ age, modelarray, phen, scalar = "FD")
+
+  # Initiation with impossible threshold
+  result_init <- analyseOneElement.gam(
+    i_element = 1,
+    ctx = ctx,
+    var.smoothTerms = c(),
+    var.parametricTerms = c("estimate"),
+    var.model = c("dev.expl"),
+    num.subj.lthr = 1000,
+    flag_initiate = TRUE
+  )
+  expect_true(is.nan(result_init$column_names[1]))
+
+  # Runtime with impossible threshold
+  result_run <- analyseOneElement.gam(
+    i_element = 1,
+    ctx = ctx,
+    var.smoothTerms = c(),
+    var.parametricTerms = c("estimate"),
+    var.model = c("dev.expl"),
+    num.subj.lthr = 1000,
+    num.stat.output = 5,
+    flag_initiate = FALSE
+  )
+  expect_equal(result_run[1], 0)
+  expect_true(all(is.nan(result_run[-1])))
+})
+
+test_that("analyseOneElement.gam with ctx handles on_error = 'skip'", {
+  src <- paste0("s", 1:8)
+  set.seed(42)
+  mat <- matrix(rnorm(2 * 8), nrow = 2)
+  colnames(mat) <- src
+
+  modelarray <- methods::new("ModelArray",
+                             sources = list(FD = src),
+                             scalars = list(FD = mat),
+                             results = list(),
+                             path = tempfile(fileext = ".h5")
+  )
+  phen <- data.frame(
+    source_file = src,
+    age = seq(20, by = 5, length.out = 8)
+  )
+
+  ctx <- .build_gam_context(FD ~ nonexistent_var, modelarray, phen, scalar = "FD")
+
+  expect_warning(
+    result <- analyseOneElement.gam(
+      i_element = 1,
+      ctx = ctx,
+      var.smoothTerms = c(),
+      var.parametricTerms = c("estimate"),
+      var.model = c("dev.expl"),
+      num.subj.lthr = 0,
+      num.stat.output = 4,
+      flag_initiate = FALSE,
+      on_error = "skip"
+    )
+  )
+  expect_equal(result[1], 0)
+  expect_true(all(is.nan(result[-1])))
+})
+
+# ---- analyseOneElement.wrap ----
+
+test_that("analyseOneElement.wrap produces identical results with ctx vs. without", {
+  src <- c("s1", "s2", "s3", "s4")
+  modelarray <- methods::new("ModelArray",
+                             sources = list(FD = src, FA = src),
+                             scalars = list(
+                               FD = matrix(c(1, 2, 3, 4, 2, 3, 4, 5), nrow = 2, byrow = TRUE),
+                               FA = matrix(c(10, 20, 30, 40, 50, 60, 70, 80), nrow = 2, byrow = TRUE)
+                             ),
+                             results = list(),
+                             path = tempfile(fileext = ".h5")
+  )
+  phen <- data.frame(
+    source_file = src,
+    age = c(20, 30, 40, 50)
+  )
+
+  my_fun <- function(data, ...) {
+    list(fd_mean = mean(data$FD), fa_mean = mean(data$FA))
+  }
+
+  ctx <- .build_wrap_context(modelarray, phen, scalar = "FD")
+
+  # With context — initiation
+  result_ctx <- analyseOneElement.wrap(
+    i_element = 1,
+    user_fun = my_fun,
+    ctx = ctx,
+    num.subj.lthr = 0,
+    flag_initiate = TRUE
+  )
+
+  # Without context — legacy
+  result_legacy <- analyseOneElement.wrap(
+    i_element = 1,
+    user_fun = my_fun,
+    modelarray = modelarray,
+    phenotypes = phen,
+    scalar = "FD",
+    num.subj.lthr = 0,
+    flag_initiate = TRUE
+  )
+
+  expect_identical(result_ctx$column_names, result_legacy$column_names)
+
+  # Runtime
+  num_stat <- length(result_ctx$column_names)
+
+  result_ctx_run <- analyseOneElement.wrap(
+    i_element = 1,
+    user_fun = my_fun,
+    ctx = ctx,
+    num.subj.lthr = 0,
+    num.stat.output = num_stat,
+    flag_initiate = FALSE
+  )
+
+  result_legacy_run <- analyseOneElement.wrap(
+    i_element = 1,
+    user_fun = my_fun,
+    modelarray = modelarray,
+    phenotypes = phen,
+    scalar = "FD",
+    num.subj.lthr = 0,
+    num.stat.output = num_stat,
+    flag_initiate = FALSE
+  )
+
+  expect_equal(result_ctx_run, result_legacy_run)
+})
+
+test_that("analyseOneElement.wrap with ctx attaches ALL scalars", {
+  src <- c("s1", "s2", "s3", "s4")
+  modelarray <- methods::new("ModelArray",
+                             sources = list(FD = src, FA = src, FC = src),
+                             scalars = list(
+                               FD = matrix(1:8, nrow = 2),
+                               FA = matrix(11:18, nrow = 2),
+                               FC = matrix(21:28, nrow = 2)
+                             ),
+                             results = list(),
+                             path = tempfile(fileext = ".h5")
+  )
+  phen <- data.frame(source_file = src, age = c(20, 30, 40, 50))
+
+  check_fun <- function(data, ...) {
+    # Verify all three scalars are present
+    list(
+      has_FD = as.numeric("FD" %in% colnames(data)),
+      has_FA = as.numeric("FA" %in% colnames(data)),
+      has_FC = as.numeric("FC" %in% colnames(data))
+    )
+  }
+
+  ctx <- .build_wrap_context(modelarray, phen, scalar = "FD")
+
+  result <- analyseOneElement.wrap(
+    i_element = 1,
+    user_fun = check_fun,
+    ctx = ctx,
+    num.subj.lthr = 0,
+    num.stat.output = 4,
+    flag_initiate = FALSE
+  )
+
+  # element_id = 0, then has_FD=1, has_FA=1, has_FC=1
+  expect_equal(result[2], 1)
+  expect_equal(result[3], 1)
+  expect_equal(result[4], 1)
+})
+
+test_that("analyseOneElement.wrap with ctx returns NaN for insufficient subjects", {
+  src <- c("s1", "s2", "s3", "s4")
+  modelarray <- methods::new("ModelArray",
+                             sources = list(FD = src),
+                             scalars = list(FD = matrix(c(1, 2, 3, 4, 5, 6, 7, 8), nrow = 2, byrow = TRUE)),
+                             results = list(),
+                             path = tempfile(fileext = ".h5")
+  )
+  phen <- data.frame(source_file = src, age = c(20, 30, 40, 50))
+  my_fun <- function(data, ...) list(m = mean(data$FD))
+
+  ctx <- .build_wrap_context(modelarray, phen, scalar = "FD")
+
+  result_init <- analyseOneElement.wrap(
+    i_element = 1,
+    user_fun = my_fun,
+    ctx = ctx,
+    num.subj.lthr = 1000,
+    flag_initiate = TRUE
+  )
+  expect_true(is.nan(result_init$column_names[1]))
+
+  result_run <- analyseOneElement.wrap(
+    i_element = 1,
+    user_fun = my_fun,
+    ctx = ctx,
+    num.subj.lthr = 1000,
+    num.stat.output = 2,
+    flag_initiate = FALSE
+  )
+  expect_equal(result_run[1], 0)
+  expect_true(all(is.nan(result_run[-1])))
+})
+
+test_that("analyseOneElement.wrap with ctx handles on_error = 'skip'", {
+  src <- c("s1", "s2", "s3", "s4")
+  modelarray <- methods::new("ModelArray",
+                             sources = list(FD = src),
+                             scalars = list(FD = matrix(1:8, nrow = 2)),
+                             results = list(),
+                             path = tempfile(fileext = ".h5")
+  )
+  phen <- data.frame(source_file = src, age = c(20, 30, 40, 50))
+  ctx <- .build_wrap_context(modelarray, phen, scalar = "FD")
+
+  expect_warning(
+    result <- analyseOneElement.wrap(
+      i_element = 1,
+      user_fun = function(data, ...) stop("boom"),
+      ctx = ctx,
+      num.subj.lthr = 0,
+      num.stat.output = 3,
+      flag_initiate = FALSE,
+      on_error = "skip"
+    )
+  )
+  expect_equal(result[1], 0)
+  expect_true(all(is.nan(result[-1])))
+})
+
+test_that("analyseOneElement.wrap with ctx handles various return types", {
+  src <- c("s1", "s2", "s3", "s4")
+  modelarray <- methods::new("ModelArray",
+                             sources = list(FD = src),
+                             scalars = list(FD = matrix(c(1, 2, 3, 4, 5, 6, 7, 8), nrow = 2, byrow = TRUE)),
+                             results = list(),
+                             path = tempfile(fileext = ".h5")
+  )
+  phen <- data.frame(source_file = src, age = c(20, 30, 40, 50))
+  ctx <- .build_wrap_context(modelarray, phen, scalar = "FD")
+
+  # data.frame return
+  df_fun <- function(data, ...) data.frame(m = mean(data$FD))
+  result_df <- analyseOneElement.wrap(
+    i_element = 1, user_fun = df_fun, ctx = ctx,
+    num.subj.lthr = 0, num.stat.output = 2, flag_initiate = FALSE
+  )
+  expect_equal(length(result_df), 2)
+
+  # named list return
+  list_fun <- function(data, ...) list(m = mean(data$FD))
+  result_list <- analyseOneElement.wrap(
+    i_element = 1, user_fun = list_fun, ctx = ctx,
+    num.subj.lthr = 0, num.stat.output = 2, flag_initiate = FALSE
+  )
+  expect_equal(length(result_list), 2)
+
+  # named atomic return
+  atomic_fun <- function(data, ...) c(m = mean(data$FD))
+  result_atomic <- analyseOneElement.wrap(
+    i_element = 1, user_fun = atomic_fun, ctx = ctx,
+    num.subj.lthr = 0, num.stat.output = 2, flag_initiate = FALSE
+  )
+  expect_equal(length(result_atomic), 2)
+
+  # All three should produce the same values
+  expect_equal(result_df[2], result_list[2])
+  expect_equal(result_df[2], result_atomic[2])
+})
+
+
+# ==================================================================
+# SECTION 4: Initiator search
+# ==================================================================
+
+# ---- .find_initiator_element ----
+
+test_that(".find_initiator_element succeeds on middle element", {
+  fix <- make_test_modelarray(n_elements = 5, n_subjects = 8, scalars = c("FD"))
+  src <- sources(fix$ma)[["FD"]]
+  phen <- data.frame(
+    source_file = src,
+    age = seq(20, by = 5, length.out = length(src))
+  )
+  ctx <- .build_lm_context(FD ~ age, fix$ma, phen, scalar = "FD")
+
+  init_args <- list(
+    ctx = ctx,
+    var.terms = c("estimate"),
+    var.model = c("adj.r.squared"),
+    num.subj.lthr = 0,
+    on_error = "stop"
+  )
+
+  result <- .find_initiator_element(
+    analyseOneElement.lm, 5, init_args, verbose = FALSE
+  )
+
+  expect_false(is.nan(result$outputs$column_names[1]))
+  expect_true(result$i_element >= 1 && result$i_element <= 5)
+})
+
+test_that(".find_initiator_element fails when all elements have insufficient subjects", {
+  fix <- make_test_modelarray(n_elements = 3, n_subjects = 4, scalars = c("FD"))
+  src <- sources(fix$ma)[["FD"]]
+  phen <- data.frame(
+    source_file = src,
+    age = seq(20, by = 5, length.out = length(src))
+  )
+  ctx <- .build_lm_context(FD ~ age, fix$ma, phen, scalar = "FD")
+
+  init_args <- list(
+    ctx = ctx,
+    var.terms = c("estimate"),
+    var.model = c("adj.r.squared"),
+    num.subj.lthr = 1000,  # impossibly high threshold
+    on_error = "stop"
+  )
+
+  expect_error(
+    .find_initiator_element(analyseOneElement.lm, 3, init_args, verbose = FALSE),
+    "no elements with sufficient valid"
+  )
+})
+
+
+# ==================================================================
+# SECTION 5: Top-level integration
+#   Verifies ctx flows correctly through ModelArray.gam/wrap
+#   using real HDF5-backed data from system.file()
+# ==================================================================
+
+# ---- ModelArray.gam integration ----
+
+test_that("ModelArray.gam produces consistent results after context refactoring", {
+  h5_path <- system.file("extdata", "n50_fixels.h5", package = "ModelArray")
+  csv_path <- system.file("extdata", "n50_cohort.csv", package = "ModelArray")
+
+  modelarray <- ModelArray(h5_path, scalar_types = c("FD"))
+  phenotypes <- read.csv(csv_path)
+  element.subset <- 1:5
+
+  result <- ModelArray.gam(
+    FD ~ s(age, fx = TRUE) + sex,
+    data = modelarray,
+    phenotypes = phenotypes,
+    scalar = "FD",
+    element.subset = element.subset,
+    n_cores = 1,
+    pbar = FALSE
+  )
+
+  expect_true(is.data.frame(result))
+  expect_equal(nrow(result), length(element.subset))
+  expect_equal(result$element_id, 0:(length(element.subset) - 1))
+
+  # Verify Intercept naming (not "(Intercept)")
+  expect_true(any(grepl("^Intercept\\.", colnames(result))))
+  expect_false(any(grepl("\\(Intercept\\)", colnames(result))))
+
+  # Verify smooth term naming uses underscores [10]
+  expect_true(any(grepl("^s_age\\.", colnames(result))))
+
+  # No NAs for valid elements
+  stat_cols <- setdiff(colnames(result), "element_id")
+  expect_true(all(is.finite(as.matrix(result[, stat_cols]))))
+})
+
+# ---- ModelArray.wrap integration ----
+
+test_that("ModelArray.wrap produces consistent results after context refactoring", {
+  h5_path <- system.file("extdata", "n50_fixels.h5", package = "ModelArray")
+  csv_path <- system.file("extdata", "n50_cohort.csv", package = "ModelArray")
+
+  modelarray <- ModelArray(h5_path, scalar_types = c("FD"))
+  phenotypes <- read.csv(csv_path)
+  element.subset <- 1:5
+
+  my_fun <- function(data, ...) {
+    list(fd_mean = mean(data$FD, na.rm = TRUE))
+  }
+
+  result <- ModelArray.wrap(
+    FUN = my_fun,
+    data = modelarray,
+    phenotypes = phenotypes,
+    scalar = "FD",
+    element.subset = element.subset,
+    n_cores = 1,
+    pbar = FALSE
+  )
+
+  expect_true(is.data.frame(result))
+  expect_equal(nrow(result), length(element.subset))
+  expect_equal(result$element_id, 0:(length(element.subset) - 1))
+  expect_true("fd_mean" %in% colnames(result))
+  expect_true(all(is.finite(result$fd_mean)))
+})

From 66448b9216a3e9656aa82ece32078d1f02a140dc Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Thu, 9 Apr 2026 14:49:54 -0700
Subject: [PATCH 2/9] Remove unnecessary bracket refs

---
 R/analyse-helpers.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/analyse-helpers.R b/R/analyse-helpers.R
index 7660603..41927d1 100644
--- a/R/analyse-helpers.R
+++ b/R/analyse-helpers.R
@@ -253,7 +253,7 @@
 #' @noRd
 .build_lm_context <- function(formula, modelarray, phenotypes, scalar) {
   # Formula parsing — currently repeated per element inside
-  # analyseOneElement.lm [4]
+  # analyseOneElement.lm
   all_vars <- all.vars(formula)
   lhs_name <- tryCatch(
     as.character(formula[[2]]),
@@ -307,7 +307,7 @@
 #'   }
 #' @noRd
 .build_gam_context <- function(formula, modelarray, phenotypes, scalar) {
-  # GAM formula validation — currently runs in ModelArray.gam() preamble [1]
+  # GAM formula validation — currently runs in ModelArray.gam() preamble
   # but the breakdown result is discarded. We cache it here.
   gam_breakdown <- tryCatch(
     mgcv::interpret.gam(formula),
@@ -357,7 +357,7 @@
 # Extracts scalar values for one element using the precomputed context,
 # builds the validity mask, and returns the filtered data.frame.
 # This replaces duplicated logic across analyseOneElement.lm,
-# analyseOneElement.gam, and analyseOneElement.wrap [4].
+# analyseOneElement.gam, and analyseOneElement.wrap.
 
 #' Assemble per-element data.frame from precomputed context
 #'

From 26d548a8d52a3cea8fa7be70940e5b49cb64dd30 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Thu, 9 Apr 2026 14:50:05 -0700
Subject: [PATCH 3/9] Finish updating docs

---
 R/ModelArray_Constructor.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R
index 743916f..3f3b9e2 100644
--- a/R/ModelArray_Constructor.R
+++ b/R/ModelArray_Constructor.R
@@ -920,6 +920,8 @@ analyseOneElement.gam <- function(i_element,
 #'     \item A named atomic vector. Unnamed vectors are accepted and
 #'       auto-named as \code{v1}, \code{v2}, etc.
 #'   }
+#'   All return values are coerced to a numeric vector internally.
+#'   Unsupported types (e.g., environments, S4 objects) will error.
 #' @param ctx A precomputed context list from \code{.build_wrap_context()},
 #'   or \code{NULL}.
 #' @param ... Additional arguments forwarded to \code{user_fun}.

From 3277e714b56ea57563f92ace29c831528973f029 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Thu, 9 Apr 2026 15:13:42 -0700
Subject: [PATCH 4/9] Revert removed validations

---
 R/analyse.R | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)

diff --git a/R/analyse.R b/R/analyse.R
index 9779d39..ebd2fdd 100644
--- a/R/analyse.R
+++ b/R/analyse.R
@@ -187,6 +187,27 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU
     var.model <- var.model.full
   }
 
+  # check on validity of list of vars:
+  var.terms <- var.terms[!duplicated(var.terms)]
+  var.model <- var.model[!duplicated(var.model)]
+
+  # check if all var.* are empty:
+  if (length(var.terms) == 0 && length(var.model) == 0) {
+    stop("All var.* arguments [var.terms, var.model] are empty!")
+  }
+
+  # check if every var is valid:
+  for (var in var.terms) {
+    if (!(var %in% var.terms.full)) {
+      stop(paste0(var, " is not valid for var.terms!"))
+    }
+  }
+  for (var in var.model) {
+    if (!(var %in% var.model.full)) {
+      stop(paste0(var, " is not valid for var.model!"))
+    }
+  }
+
   # P-value correction checks ----
   check_validity_correctPValue(
     correct.p.value.terms, "correct.p.value.terms",
@@ -417,9 +438,58 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar,
     var.model <- var.model.full
   }
 
+  var.smoothTerms <- var.smoothTerms[!duplicated(var.smoothTerms)]
+  var.parametricTerms <- var.parametricTerms[!duplicated(var.parametricTerms)]
+  var.model <- var.model[!duplicated(var.model)]
+
+  if (length(var.smoothTerms) == 0 && length(var.parametricTerms) == 0 && length(var.model) == 0) {
+    stop("All var.* arguments [var.smoothTerms, var.parametricTerms, var.model] are empty!")
+  }
+
+  for (var in var.smoothTerms) {
+    if (!(var %in% var.smoothTerms.full)) {
+      stop(paste0(var, " is not valid for var.smoothTerms!"))
+    }
+  }
+  for (var in var.parametricTerms) {
+    if (!(var %in% var.parametricTerms.full)) {
+      stop(paste0(var, " is not valid for var.parametricTerms!"))
+    }
+  }
+  for (var in var.model) {
+    if (!(var %in% var.model.full)) {
+      stop(paste0(var, " is not valid!"))
+    }
+  }
+
   # Changed.rsq setup ----
+  var.model.orig <- var.model
   if (!is.null(changed.rsq.term.index)) {
-    terms.full.formula <- stats::terms(formula)
+
+    # check if the term index is valid:
+    if (min(changed.rsq.term.index) <= 0) {
+      stop(
+        "There is element(s) in changed.rsq.term.index <= 0. ",
+        "It should be a (list of) positive integer!"
+      )
+    }
+
+    terms.full.formula <- stats::terms(formula, keep.order = TRUE)
+
+    if (max(changed.rsq.term.index) > length(labels(terms.full.formula))) {
+      stop(
+        "Largest index in changed.rsq.term.index is more than the term number on the ",
+        "right hand side of formula!"
+      )
+    }
+
+    if (length(labels(terms.full.formula)) == 0) {
+      stop(
+        "Trying to analyze changed.rsq but there is no variable (except intercept 1) ",
+        "on right hand side of formula! Please provide at least one valid variable."
+      )
+    }
+
     changed.rsq.term.fullFormat.list <- labels(terms.full.formula)[unlist(changed.rsq.term.index)]
     changed.rsq.term.shortFormat.list <- list()
     for (idx in seq_along(changed.rsq.term.index)) {

From fa003677b3bc9f2dd6f8f6c34352c00e3dfdce4a Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Thu, 9 Apr 2026 15:13:45 -0700
Subject: [PATCH 5/9] Update analyseOneElement.wrap.Rd

---
 man/analyseOneElement.wrap.Rd | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/man/analyseOneElement.wrap.Rd b/man/analyseOneElement.wrap.Rd
index aa37a27..e8dfafc 100644
--- a/man/analyseOneElement.wrap.Rd
+++ b/man/analyseOneElement.wrap.Rd
@@ -31,7 +31,9 @@ returns will error.
 as \code{v1}, \code{v2}, etc.
 \item A named atomic vector. Unnamed vectors are accepted and
 auto-named as \code{v1}, \code{v2}, etc.
-}}
+}
+All return values are coerced to a numeric vector internally.
+Unsupported types (e.g., environments, S4 objects) will error.}
 
 \item{modelarray}{A \linkS4class{ModelArray} object. Ignored when
 \code{ctx} is provided.}

From d977b01ea57a4d49c610873af38ba3fc2ee669ea Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Thu, 9 Apr 2026 15:37:32 -0700
Subject: [PATCH 6/9] Update analyse.R

---
 R/analyse.R | 50 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 10 deletions(-)

diff --git a/R/analyse.R b/R/analyse.R
index ebd2fdd..b5ed93b 100644
--- a/R/analyse.R
+++ b/R/analyse.R
@@ -462,12 +462,13 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar,
     }
   }
 
+  # Changed.rsq setup ----
   # Changed.rsq setup ----
   var.model.orig <- var.model
   if (!is.null(changed.rsq.term.index)) {
 
-    # check if the term index is valid:
-    if (min(changed.rsq.term.index) <= 0) {
+    # Validation checks
+    if (min(unlist(changed.rsq.term.index)) <= 0) {
       stop(
         "There is element(s) in changed.rsq.term.index <= 0. ",
         "It should be a (list of) positive integer!"
@@ -476,7 +477,7 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar,
 
     terms.full.formula <- stats::terms(formula, keep.order = TRUE)
 
-    if (max(changed.rsq.term.index) > length(labels(terms.full.formula))) {
+    if (max(unlist(changed.rsq.term.index)) > length(labels(terms.full.formula))) {
       stop(
         "Largest index in changed.rsq.term.index is more than the term number on the ",
         "right hand side of formula!"
@@ -491,14 +492,35 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar,
     }
 
     changed.rsq.term.fullFormat.list <- labels(terms.full.formula)[unlist(changed.rsq.term.index)]
+
+    # Compute short format names: s(age) -> s_age, ti(x,z) -> ti_x_z, etc.
     changed.rsq.term.shortFormat.list <- list()
-    for (idx in seq_along(changed.rsq.term.index)) {
-      tidx <- changed.rsq.term.index[[idx]]
-      tidy_names_smooth <- character(0)
-      tidy_names_param <- character(0)
-      changed.rsq.term.shortFormat.list[[idx]] <- c(tidy_names_smooth, tidy_names_param)
+    for (changed.rsq.term.fullFormat in changed.rsq.term.fullFormat.list) {
+      temp <- strsplit(changed.rsq.term.fullFormat, "[(]")[[1]]
+      if (length(temp) == 1) {
+        # Not a smooth term — no parentheses
+        str_valid <- changed.rsq.term.fullFormat
+      } else {
+        smooth.class <- temp[1]
+        theEval <- eval(parse(text = changed.rsq.term.fullFormat))
+        str_valid <- paste0(
+          smooth.class, "_",
+          paste(theEval$term, collapse = "_")
+        )
+        if (theEval$by != "NA") {
+          str_valid <- paste0(str_valid, "_BY", theEval$by)
+        }
+      }
+      changed.rsq.term.shortFormat.list <- append(
+        changed.rsq.term.shortFormat.list, str_valid
+      )
     }
-    message("will get changed R-squared (delta.adj.rsq and partial.rsq) so the execution time will be longer.")
+
+    message(
+      "will get changed R-squared (delta.adj.rsq and partial.rsq) ",
+      "so the execution time will be longer."
+    )
+
     if (!("adj.r.squared" %in% var.model)) {
       var.model <- c(var.model, "adj.r.squared")
     }
@@ -616,7 +638,7 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar,
 
   # Compute changed R-squared ----
   if (need_changed_rsq) {
-    terms.full.formula <- stats::terms(formula)
+    terms.full.formula <- stats::terms(formula, keep.order = TRUE)
 
     for (i.changed.rsq.term in seq_along(changed.rsq.term.index)) {
       idx.changed.rsq.term <- changed.rsq.term.index[[i.changed.rsq.term]]
@@ -682,6 +704,14 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar,
       reduced_sse <- reduced.model.df_out[["model.sse"]]
       df_out[[partial_col]] <- (reduced_sse - full_sse) / reduced_sse
     }
+
+    # if adjusted r sq is not requested (see var.model.orig), remove it:
+    if (!("adj.r.squared" %in% var.model.orig)) {
+      df_out <- df_out[, colnames(df_out) != "model.adj.r.squared", drop = FALSE]
+    }
+
+    # remove full model's sse (model.sse):
+    df_out <- df_out[, colnames(df_out) != "model.sse", drop = FALSE]
   }
 
   # Rewrite if corrections applied and streaming was used ----

From ac3d8576305e01b7d6a59fa13883bf289710e6d3 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Thu, 9 Apr 2026 15:55:23 -0700
Subject: [PATCH 7/9] Silence unbound "." warning

---
 R/ModelArray-package.R | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/R/ModelArray-package.R b/R/ModelArray-package.R
index cee59be..508ac21 100644
--- a/R/ModelArray-package.R
+++ b/R/ModelArray-package.R
@@ -71,3 +71,7 @@
 #'
 #' @keywords internal
 "_PACKAGE"
+
+## Suppress R CMD check NOTEs for non-standard evaluation symbols
+## used inside magrittr pipe expressions.
+utils::globalVariables(".")

From aeab037ecd2cfea9e343b65770452751cf4027ca Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Thu, 9 Apr 2026 16:28:34 -0700
Subject: [PATCH 8/9] Fix bad GAM documentation

---
 R/ModelArray_Constructor.R   |   2 +
 R/analyse.R                  | 134 ++++++++++++++++++++--------------
 man/ModelArray.wrap.Rd       | 136 ++++++++++++++++++++---------------
 man/analyseOneElement.gam.Rd |  15 ++--
 4 files changed, 170 insertions(+), 117 deletions(-)

diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R
index 3f3b9e2..625a55e 100644
--- a/R/ModelArray_Constructor.R
+++ b/R/ModelArray_Constructor.R
@@ -596,6 +596,8 @@ analyseOneElement.lm <- function(i_element,
 }
 
 #' Fit a GAM for a single element
+#'
+#' #' @description
 #' Returns metadata (column names, smooth term names, parametric term names,
 #' and the smoothing parameter criterion attribute name) used by
 #' \code{\link{ModelArray.gam}} to initialise the output data.frame. When
diff --git a/R/analyse.R b/R/analyse.R
index b5ed93b..c7ae8bd 100644
--- a/R/analyse.R
+++ b/R/analyse.R
@@ -733,31 +733,51 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar,
 #' Run a user-supplied function for element-wise data
 #'
 #' @description
-#' `ModelArray.wrap` runs a user-supplied function \code{FUN} at each
-#' requested element and returns a tibble of results combined across
-#' elements.
+#' `ModelArray.gam` fits a generalized additive model at each requested
+#' element in a \linkS4class{ModelArray} and returns a tibble of requested
+#' model statistics. There is no model-level p-value for GAMs, so there is
+#' no \code{correct.p.value.model} argument.
 #'
 #' @details
-#' This provides a generic framework reusing ModelArray's per-element
-#' looping, alignment, subject-thresholding, and parallelization. The user
-#' function is called as \code{FUN(data = dat, ...)} where \code{dat} is
-#' \code{phenotypes} with all scalar columns appended for the current
-#' element. The return value from \code{FUN} for a single element must be
-#' one of:
+#' You may request returning specific statistical variables by setting
+#' \code{var.*}, or you can get all by setting \code{full.outputs = TRUE}.
+#' Note that statistics covered by \code{full.outputs} or \code{var.*} are
+#' the ones from \code{broom::tidy()}, \code{broom::glance()}, and
+#' \code{summary.gam()} only, and do not include corrected p-values.
+#' However FDR-corrected p-values (\code{"fdr"}) are generated by default.
+#'
+#' List of acceptable statistic names for each of \code{var.*}:
 #' \itemize{
-#'   \item a one-row \code{data.frame} or \code{tibble}
-#'   \item a named list
-#'   \item a named atomic vector
+#'   \item \code{var.smoothTerms}: \code{c("edf", "ref.df", "statistic",
+#'     "p.value")}; From \code{broom::tidy(parametric = FALSE)}.
+#'   \item \code{var.parametricTerms}: \code{c("estimate", "std.error",
+#'     "statistic", "p.value")}; From \code{broom::tidy(parametric = TRUE)}.
+#'   \item \code{var.model}: \code{c("adj.r.squared", "dev.expl",
+#'     "sp.criterion", "scale", "df", "logLik", "AIC", "BIC", "deviance",
+#'     "df.residual", "nobs")}; From \code{broom::glance()} and
+#'     \code{\link[mgcv]{summary.gam}}.
 #' }
-#' The column names from the first successful element determine the final
-#' schema.
 #'
-#' Note: \code{ModelArray.wrap} never performs any p-value corrections or
-#' modifications. If you need adjusted p-values (e.g. FDR), implement
-#' them inside \code{FUN}.
+#' Smooth term names in the output are normalized: \code{s(age)} becomes
+#' \code{s_age}, \code{ti(x,z)} becomes \code{ti_x_z}, and
+#' \code{s(x):oFactor} becomes \code{s_x_BYoFactor}.
 #'
-#' Use \code{\link{exampleElementData}} to construct a sample per-element
-#' data.frame for testing your function before committing to a full run.
+#' For p-value corrections (arguments \code{correct.p.value.*}), supported
+#' methods include all methods in \code{p.adjust.methods} except
+#' \code{"none"}. You can request more than one method. FDR-corrected
+#' p-values (\code{"fdr"}) are calculated by default. Turn it off by
+#' setting to \code{"none"}.
+#'
+#' When \code{changed.rsq.term.index} is provided, a reduced model (dropping
+#' the specified term) is fit at each element to compute delta adjusted
+#' R-squared and partial R-squared. This approximately doubles execution
+#' time per requested term. The term index refers to the position on the
+#' right-hand side of \code{formula} (use \code{labels(terms(formula))} to
+#' see the ordering).
+#'
+#' Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel} are
+#' mainly for input data with subject-specific masks, i.e. currently only
+#' for volume data. For fixel-wise data, you may ignore these arguments.
 #'
 #' @inheritParams ModelArray.lm
 #'
@@ -785,54 +805,58 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar,
 #'   level for scalar writes. Default 4.
 #' @param ... Additional arguments forwarded to \code{FUN}.
 #'
-#' @return If \code{flag_initiate = TRUE}, a list with one component:
-#'   \describe{
-#'     \item{column_names}{Character vector. The column names derived from
-#'       the return value of \code{user_fun}, with \code{"element_id"}
-#'       prepended. For unnamed list or atomic returns, columns are named
-#'       \code{v1}, \code{v2}, etc. Set to \code{NaN} if the element was
-#'       skipped or errored.}
-#'   }
-#'   If \code{flag_initiate = FALSE}, a numeric vector of length
-#'   \code{num.stat.output} with \code{element_id} (0-based) first and
-#'   the coerced output of \code{user_fun} in subsequent positions.
-#'   All-\code{NaN} (except \code{element_id}) if the element was skipped
-#'   or if an error occurred with \code{on_error = "skip"}.
+#' @return A data.frame with one row per element. The first column is
+#'   \code{element_id} (0-based). Remaining columns contain the requested
+#'   statistics, named as \code{<term>.<statistic>} for per-term statistics
+#'   and \code{model.<statistic>} for model-level statistics. Smooth term
+#'   names are normalized (e.g. \code{s_age.statistic}). If p-value
+#'   corrections were requested, additional columns are appended with the
+#'   correction method as suffix (e.g. \code{s_age.p.value.fdr}). If
+#'   \code{changed.rsq.term.index} was requested, additional columns
+#'   \code{<term>.delta.adj.rsq} and \code{<term>.partial.rsq} are
+#'   appended.
 #'
 #' @seealso \code{\link{ModelArray.lm}} for linear models,
-#'   \code{\link{ModelArray.gam}} for GAMs,
-#'   \code{\link{exampleElementData}} for building test data,
-#'   \linkS4class{ModelArray} for the input class.
+#'   \code{\link{ModelArray.wrap}} for user-supplied functions,
+#'   \code{\link{gen_gamFormula_fxSmooth}} and
+#'   \code{\link{gen_gamFormula_contIx}} for formula helpers,
+#'   \linkS4class{ModelArray} for the input class,
+#'   \code{\link{ModelArray}} for the constructor,
+#'   \code{\link{exampleElementData}} for testing formulas on a single
+#'   element.
 #'
 #' @examples{
 #' \dontrun{
 #' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD"))
 #' phenotypes <- read.csv("cohort.csv")
 #'
-#' # Simple custom function
-#' my_fun <- function(data, ...) {
-#'   mod <- lm(FD ~ age + sex, data = data)
-#'   tidy_out <- broom::tidy(mod)
-#'   # Return a one-row tibble
-#'   tibble::tibble(
-#'     age_estimate = tidy_out$estimate[tidy_out$term == "age"],
-#'     age_pvalue   = tidy_out$p.value[tidy_out$term == "age"]
-#'   )
-#' }
-#'
+#' # Fit GAM with default outputs
+#' results <- ModelArray.gam(
+#'   FD ~ s(age, fx = TRUE) + sex,
+#'   data = ma,
+#'   phenotypes = phenotypes,
+#'   scalar = "FD"
+#' )
+#' head(results)
 #'
-#' # Test on one element first
-#' test_df <- exampleElementData(ma, scalar = "FD",
-#'                                i_element = 1,
-#'                                phenotypes = phenotypes)
-#' my_fun(data = test_df)
+#' # With changed R-squared for the smooth term (term index 1)
+#' results_rsq <- ModelArray.gam(
+#'   FD ~ s(age, fx = TRUE) + sex,
+#'   data = ma,
+#'   phenotypes = phenotypes,
+#'   scalar = "FD",
+#'   changed.rsq.term.index = list(1)
+#' )
 #'
-#' # Run across all elements
-#' results <- ModelArray.wrap(
-#'   FUN = my_fun,
+#' # Full outputs, no p-value correction
+#' results_full <- ModelArray.gam(
+#'   FD ~ s(age, fx = TRUE) + sex,
 #'   data = ma,
 #'   phenotypes = phenotypes,
-#'   scalar = "FD"
+#'   scalar = "FD",
+#'   full.outputs = TRUE,
+#'   correct.p.value.smoothTerms = "none",
+#'   correct.p.value.parametricTerms = "none"
 #' )
 #' }
 #' }
diff --git a/man/ModelArray.wrap.Rd b/man/ModelArray.wrap.Rd
index 79e5103..dcbd782 100644
--- a/man/ModelArray.wrap.Rd
+++ b/man/ModelArray.wrap.Rd
@@ -125,46 +125,63 @@ useful when writing large outputs directly to HDF5.}
 \item{...}{Additional arguments forwarded to \code{FUN}.}
 }
 \value{
-If \code{flag_initiate = TRUE}, a list with one component:
-\describe{
-\item{column_names}{Character vector. The column names derived from
-the return value of \code{user_fun}, with \code{"element_id"}
-prepended. For unnamed list or atomic returns, columns are named
-\code{v1}, \code{v2}, etc. Set to \code{NaN} if the element was
-skipped or errored.}
-}
-If \code{flag_initiate = FALSE}, a numeric vector of length
-\code{num.stat.output} with \code{element_id} (0-based) first and
-the coerced output of \code{user_fun} in subsequent positions.
-All-\code{NaN} (except \code{element_id}) if the element was skipped
-or if an error occurred with \code{on_error = "skip"}.
+A data.frame with one row per element. The first column is
+\code{element_id} (0-based). Remaining columns contain the requested
+statistics, named as \code{<term>.<statistic>} for per-term statistics
+and \code{model.<statistic>} for model-level statistics. Smooth term
+names are normalized (e.g. \code{s_age.statistic}). If p-value
+corrections were requested, additional columns are appended with the
+correction method as suffix (e.g. \code{s_age.p.value.fdr}). If
+\code{changed.rsq.term.index} was requested, additional columns
+\code{<term>.delta.adj.rsq} and \code{<term>.partial.rsq} are
+appended.
 }
 \description{
-\code{ModelArray.wrap} runs a user-supplied function \code{FUN} at each
-requested element and returns a tibble of results combined across
-elements.
+\code{ModelArray.gam} fits a generalized additive model at each requested
+element in a \linkS4class{ModelArray} and returns a tibble of requested
+model statistics. There is no model-level p-value for GAMs, so there is
+no \code{correct.p.value.model} argument.
 }
 \details{
-This provides a generic framework reusing ModelArray's per-element
-looping, alignment, subject-thresholding, and parallelization. The user
-function is called as \code{FUN(data = dat, ...)} where \code{dat} is
-\code{phenotypes} with all scalar columns appended for the current
-element. The return value from \code{FUN} for a single element must be
-one of:
+You may request returning specific statistical variables by setting
+\code{var.*}, or you can get all by setting \code{full.outputs = TRUE}.
+Note that statistics covered by \code{full.outputs} or \code{var.*} are
+the ones from \code{broom::tidy()}, \code{broom::glance()}, and
+\code{summary.gam()} only, and do not include corrected p-values.
+However FDR-corrected p-values (\code{"fdr"}) are generated by default.
+
+List of acceptable statistic names for each of \code{var.*}:
 \itemize{
-\item a one-row \code{data.frame} or \code{tibble}
-\item a named list
-\item a named atomic vector
+\item \code{var.smoothTerms}: \code{c("edf", "ref.df", "statistic",
+    "p.value")}; From \code{broom::tidy(parametric = FALSE)}.
+\item \code{var.parametricTerms}: \code{c("estimate", "std.error",
+    "statistic", "p.value")}; From \code{broom::tidy(parametric = TRUE)}.
+\item \code{var.model}: \code{c("adj.r.squared", "dev.expl",
+    "sp.criterion", "scale", "df", "logLik", "AIC", "BIC", "deviance",
+    "df.residual", "nobs")}; From \code{broom::glance()} and
+\code{\link[mgcv]{summary.gam}}.
 }
-The column names from the first successful element determine the final
-schema.
-
-Note: \code{ModelArray.wrap} never performs any p-value corrections or
-modifications. If you need adjusted p-values (e.g. FDR), implement
-them inside \code{FUN}.
 
-Use \code{\link{exampleElementData}} to construct a sample per-element
-data.frame for testing your function before committing to a full run.
+Smooth term names in the output are normalized: \code{s(age)} becomes
+\code{s_age}, \code{ti(x,z)} becomes \code{ti_x_z}, and
+\code{s(x):oFactor} becomes \code{s_x_BYoFactor}.
+
+For p-value corrections (arguments \code{correct.p.value.*}), supported
+methods include all methods in \code{p.adjust.methods} except
+\code{"none"}. You can request more than one method. FDR-corrected
+p-values (\code{"fdr"}) are calculated by default. Turn it off by
+setting to \code{"none"}.
+
+When \code{changed.rsq.term.index} is provided, a reduced model (dropping
+the specified term) is fit at each element to compute delta adjusted
+R-squared and partial R-squared. This approximately doubles execution
+time per requested term. The term index refers to the position on the
+right-hand side of \code{formula} (use \code{labels(terms(formula))} to
+see the ordering).
+
+Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel} are
+mainly for input data with subject-specific masks, i.e. currently only
+for volume data. For fixel-wise data, you may ignore these arguments.
 }
 \examples{
 {
@@ -172,30 +189,33 @@ data.frame for testing your function before committing to a full run.
 ma <- ModelArray("path/to/data.h5", scalar_types = c("FD"))
 phenotypes <- read.csv("cohort.csv")
 
-# Simple custom function
-my_fun <- function(data, ...) {
-  mod <- lm(FD ~ age + sex, data = data)
-  tidy_out <- broom::tidy(mod)
-  # Return a one-row tibble
-  tibble::tibble(
-    age_estimate = tidy_out$estimate[tidy_out$term == "age"],
-    age_pvalue   = tidy_out$p.value[tidy_out$term == "age"]
-  )
-}
-
+# Fit GAM with default outputs
+results <- ModelArray.gam(
+  FD ~ s(age, fx = TRUE) + sex,
+  data = ma,
+  phenotypes = phenotypes,
+  scalar = "FD"
+)
+head(results)
 
-# Test on one element first
-test_df <- exampleElementData(ma, scalar = "FD",
-                               i_element = 1,
-                               phenotypes = phenotypes)
-my_fun(data = test_df)
+# With changed R-squared for the smooth term (term index 1)
+results_rsq <- ModelArray.gam(
+  FD ~ s(age, fx = TRUE) + sex,
+  data = ma,
+  phenotypes = phenotypes,
+  scalar = "FD",
+  changed.rsq.term.index = list(1)
+)
 
-# Run across all elements
-results <- ModelArray.wrap(
-  FUN = my_fun,
+# Full outputs, no p-value correction
+results_full <- ModelArray.gam(
+  FD ~ s(age, fx = TRUE) + sex,
   data = ma,
   phenotypes = phenotypes,
-  scalar = "FD"
+  scalar = "FD",
+  full.outputs = TRUE,
+  correct.p.value.smoothTerms = "none",
+  correct.p.value.parametricTerms = "none"
 )
 }
 }
@@ -203,7 +223,11 @@ results <- ModelArray.wrap(
 }
 \seealso{
 \code{\link{ModelArray.lm}} for linear models,
-\code{\link{ModelArray.gam}} for GAMs,
-\code{\link{exampleElementData}} for building test data,
-\linkS4class{ModelArray} for the input class.
+\code{\link{ModelArray.wrap}} for user-supplied functions,
+\code{\link{gen_gamFormula_fxSmooth}} and
+\code{\link{gen_gamFormula_contIx}} for formula helpers,
+\linkS4class{ModelArray} for the input class,
+\code{\link{ModelArray}} for the constructor,
+\code{\link{exampleElementData}} for testing formulas on a single
+element.
 }
diff --git a/man/analyseOneElement.gam.Rd b/man/analyseOneElement.gam.Rd
index 73598fd..55fb7c1 100644
--- a/man/analyseOneElement.gam.Rd
+++ b/man/analyseOneElement.gam.Rd
@@ -2,12 +2,7 @@
 % Please edit documentation in R/ModelArray_Constructor.R
 \name{analyseOneElement.gam}
 \alias{analyseOneElement.gam}
-\title{Fit a GAM for a single element
-Returns metadata (column names, smooth term names, parametric term names,
-and the smoothing parameter criterion attribute name) used by
-\code{\link{ModelArray.gam}} to initialise the output data.frame. When
-\code{flag_initiate = FALSE}, it returns a numeric vector representing one
-row of the final results matrix.}
+\title{Fit a GAM for a single element}
 \usage{
 analyseOneElement.gam(
   i_element,
@@ -106,6 +101,14 @@ requested statistics in subsequent positions. All-\code{NaN} (except
 \code{element_id}) if the element was skipped.
 }
 \description{
+#' @description
+Returns metadata (column names, smooth term names, parametric term names,
+and the smoothing parameter criterion attribute name) used by
+\code{\link{ModelArray.gam}} to initialise the output data.frame. When
+\code{flag_initiate = FALSE}, it returns a numeric vector representing one
+row of the final results matrix.
+}
+\details{
 If the number of subjects with finite scalar values does not exceed
 \code{num.subj.lthr}, the element is skipped and all statistics are set
 to \code{NaN}.

From 9b2ef3ddc3df771a992670004458f7eb6a94d608 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Fri, 10 Apr 2026 09:47:07 -0700
Subject: [PATCH 9/9] Maybe fix GAM instead of breaking wrap docs

---
 R/analyse.R            | 223 ++++++++++++++++++++++++-----------------
 man/ModelArray.gam.Rd  |  94 +++++++++++++----
 man/ModelArray.wrap.Rd | 136 +++++++++++--------------
 3 files changed, 266 insertions(+), 187 deletions(-)

diff --git a/R/analyse.R b/R/analyse.R
index c7ae8bd..46a2106 100644
--- a/R/analyse.R
+++ b/R/analyse.R
@@ -324,8 +324,54 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU
 }
 
 #' Fit element-wise generalized additive models
-#' no model-level p-value for GAMs, so there is no
-#' \code{correct.p.value.model} argument.
+#'
+#'
+#' @description
+#' `ModelArray.gam` fits a generalized additive model at each requested
+#' element in a \linkS4class{ModelArray} and returns a tibble of requested
+#' model statistics. There is no model-level p-value for GAMs, so there is
+#' no \code{correct.p.value.model} argument.
+#'
+#' @details
+#' You may request returning specific statistical variables by setting
+#' \code{var.*}, or you can get all by setting \code{full.outputs = TRUE}.
+#' Note that statistics covered by \code{full.outputs} or \code{var.*} are
+#' the ones from \code{broom::tidy()}, \code{broom::glance()}, and
+#' \code{summary.gam()} only, and do not include corrected p-values.
+#' However FDR-corrected p-values (\code{"fdr"}) are generated by default.
+#'
+#' List of acceptable statistic names for each of \code{var.*}:
+#' \itemize{
+#'   \item \code{var.smoothTerms}: \code{c("edf", "ref.df", "statistic",
+#'     "p.value")}; From \code{broom::tidy(parametric = FALSE)}.
+#'   \item \code{var.parametricTerms}: \code{c("estimate", "std.error",
+#'     "statistic", "p.value")}; From \code{broom::tidy(parametric = TRUE)}.
+#'   \item \code{var.model}: \code{c("adj.r.squared", "dev.expl",
+#'     "sp.criterion", "scale", "df", "logLik", "AIC", "BIC", "deviance",
+#'     "df.residual", "nobs")}; From \code{broom::glance()} and
+#'     \code{\link[mgcv]{summary.gam}}.
+#' }
+#'
+#' Smooth term names in the output are normalized: \code{s(age)} becomes
+#' \code{s_age}, \code{ti(x,z)} becomes \code{ti_x_z}, and
+#' \code{s(x):oFactor} becomes \code{s_x_BYoFactor}.
+#'
+#' For p-value corrections (arguments \code{correct.p.value.*}), supported
+#' methods include all methods in \code{p.adjust.methods} except
+#' \code{"none"}. You can request more than one method. FDR-corrected
+#' p-values (\code{"fdr"}) are calculated by default. Turn it off by
+#' setting to \code{"none"}.
+#'
+#' When \code{changed.rsq.term.index} is provided, a reduced model (dropping
+#' the specified term) is fit at each element to compute delta adjusted
+#' R-squared and partial R-squared. This approximately doubles execution
+#' time per requested term. The term index refers to the position on the
+#' right-hand side of \code{formula} (use \code{labels(terms(formula))} to
+#' see the ordering).
+#'
+#' Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel} are
+#' mainly for input data with subject-specific masks, i.e. currently only
+#' for volume data. For fixel-wise data, you may ignore these arguments.
 #'
 #' @inheritParams ModelArray.lm
 #'
@@ -336,21 +382,29 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU
 #'   parametric terms, from \code{broom::tidy(parametric = TRUE)}.
 #'   See Details.
 #' @param var.model Character vector. Statistics to save for the overall
-#'   model, from \code{broom::glance()} and \code{summary()}. See Details.
+#'   model, from \code{broom::glance()} and
+#'   \code{\link[mgcv]{summary.gam}}. See Details.
 #' @param changed.rsq.term.index A list of positive integers. Each value
 #'   is the index of a term on the right-hand side of \code{formula} for
 #'   which delta adjusted R-squared and partial R-squared should be
 #'   computed. Usually the term of interest is a smooth term or interaction
-#'   term. Default \code{NULL} (not computed). See Details for warnings.
+#'   term. Default \code{NULL} (not computed). See Details.
 #' @param correct.p.value.smoothTerms Character vector. P-value
 #'   correction method(s) for each smooth term. Default: \code{"fdr"}.
+#'   See Details.
 #' @param correct.p.value.parametricTerms Character vector. P-value
 #'   correction method(s) for each parametric term. Default: \code{"fdr"}.
-#' @param ... Additional arguments passed to \code{\link[mgcv]{gam}}.
+#'   See Details.
+#' @param ... Additional arguments passed to \code{\link[mgcv]{gam}}
+#'   (e.g. \code{method = "REML"}).
 #'
-#' @return A tibble with one row per element. The first column is
+#' @return A data.frame with one row per element. The first column is
 #'   \code{element_id} (0-based). Remaining columns contain the requested
-#'   statistics, named as \code{<term>.<statistic>}. If
+#'   statistics, named as \code{<term>.<statistic>} for per-term statistics
+#'   and \code{model.<statistic>} for model-level statistics. Smooth term
+#'   names are normalized (e.g. \code{s_age.statistic}). If p-value
+#'   corrections were requested, additional columns are appended with the
+#'   correction method as suffix (e.g. \code{s_age.p.value.fdr}). If
 #'   \code{changed.rsq.term.index} was requested, additional columns
 #'   \code{<term>.delta.adj.rsq} and \code{<term>.partial.rsq} are
 #'   appended.
@@ -359,13 +413,15 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU
 #'   \code{\link{ModelArray.wrap}} for user-supplied functions,
 #'   \code{\link{gen_gamFormula_fxSmooth}} and
 #'   \code{\link{gen_gamFormula_contIx}} for formula helpers,
-#'   \linkS4class{ModelArray} for the input class.
+#'   \linkS4class{ModelArray} for the input class,
+#'   \code{\link{ModelArray}} for the constructor,
+#'   \code{\link{exampleElementData}} for testing formulas on a single
+#'   element.
+#'
 #'
 #' @examples{
 #' \dontrun{
-#' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD"))
-#' phenotypes <- read.csv("cohort.csv")
-#'
+#' # Fit GAM with default outputs
 #' results <- ModelArray.gam(
 #'   FD ~ s(age, fx = TRUE) + sex,
 #'   data = ma,
@@ -382,6 +438,17 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU
 #'   scalar = "FD",
 #'   changed.rsq.term.index = list(1)
 #' )
+#'
+#' # Full outputs, no p-value correction
+#' results_full <- ModelArray.gam(
+#'   FD ~ s(age, fx = TRUE) + sex,
+#'   data = ma,
+#'   phenotypes = phenotypes,
+#'   scalar = "FD",
+#'   full.outputs = TRUE,
+#'   correct.p.value.smoothTerms = "none",
+#'   correct.p.value.parametricTerms = "none"
+#' )
 #' }
 #' }
 #'
@@ -733,51 +800,31 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar,
 #' Run a user-supplied function for element-wise data
 #'
 #' @description
-#' `ModelArray.gam` fits a generalized additive model at each requested
-#' element in a \linkS4class{ModelArray} and returns a tibble of requested
-#' model statistics. There is no model-level p-value for GAMs, so there is
-#' no \code{correct.p.value.model} argument.
+#' `ModelArray.wrap` runs a user-supplied function \code{FUN} at each
+#' requested element and returns a tibble of results combined across
+#' elements.
 #'
 #' @details
-#' You may request returning specific statistical variables by setting
-#' \code{var.*}, or you can get all by setting \code{full.outputs = TRUE}.
-#' Note that statistics covered by \code{full.outputs} or \code{var.*} are
-#' the ones from \code{broom::tidy()}, \code{broom::glance()}, and
-#' \code{summary.gam()} only, and do not include corrected p-values.
-#' However FDR-corrected p-values (\code{"fdr"}) are generated by default.
-#'
-#' List of acceptable statistic names for each of \code{var.*}:
+#' This provides a generic framework reusing ModelArray's per-element
+#' looping, alignment, subject-thresholding, and parallelization. The user
+#' function is called as \code{FUN(data = dat, ...)} where \code{dat} is
+#' \code{phenotypes} with all scalar columns appended for the current
+#' element. The return value from \code{FUN} for a single element must be
+#' one of:
 #' \itemize{
-#'   \item \code{var.smoothTerms}: \code{c("edf", "ref.df", "statistic",
-#'     "p.value")}; From \code{broom::tidy(parametric = FALSE)}.
-#'   \item \code{var.parametricTerms}: \code{c("estimate", "std.error",
-#'     "statistic", "p.value")}; From \code{broom::tidy(parametric = TRUE)}.
-#'   \item \code{var.model}: \code{c("adj.r.squared", "dev.expl",
-#'     "sp.criterion", "scale", "df", "logLik", "AIC", "BIC", "deviance",
-#'     "df.residual", "nobs")}; From \code{broom::glance()} and
-#'     \code{\link[mgcv]{summary.gam}}.
+#'   \item a one-row \code{data.frame} or \code{tibble}
+#'   \item a named list
+#'   \item a named atomic vector
 #' }
+#' The column names from the first successful element determine the final
+#' schema.
 #'
-#' Smooth term names in the output are normalized: \code{s(age)} becomes
-#' \code{s_age}, \code{ti(x,z)} becomes \code{ti_x_z}, and
-#' \code{s(x):oFactor} becomes \code{s_x_BYoFactor}.
-#'
-#' For p-value corrections (arguments \code{correct.p.value.*}), supported
-#' methods include all methods in \code{p.adjust.methods} except
-#' \code{"none"}. You can request more than one method. FDR-corrected
-#' p-values (\code{"fdr"}) are calculated by default. Turn it off by
-#' setting to \code{"none"}.
-#'
-#' When \code{changed.rsq.term.index} is provided, a reduced model (dropping
-#' the specified term) is fit at each element to compute delta adjusted
-#' R-squared and partial R-squared. This approximately doubles execution
-#' time per requested term. The term index refers to the position on the
-#' right-hand side of \code{formula} (use \code{labels(terms(formula))} to
-#' see the ordering).
+#' Note: \code{ModelArray.wrap} never performs any p-value corrections or
+#' modifications. If you need adjusted p-values (e.g. FDR), implement
+#' them inside \code{FUN}.
 #'
-#' Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel} are
-#' mainly for input data with subject-specific masks, i.e. currently only
-#' for volume data. For fixel-wise data, you may ignore these arguments.
+#' Use \code{\link{exampleElementData}} to construct a sample per-element
+#' data.frame for testing your function before committing to a full run.
 #'
 #' @inheritParams ModelArray.lm
 #'
@@ -805,58 +852,54 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar,
 #'   level for scalar writes. Default 4.
 #' @param ... Additional arguments forwarded to \code{FUN}.
 #'
-#' @return A data.frame with one row per element. The first column is
-#'   \code{element_id} (0-based). Remaining columns contain the requested
-#'   statistics, named as \code{<term>.<statistic>} for per-term statistics
-#'   and \code{model.<statistic>} for model-level statistics. Smooth term
-#'   names are normalized (e.g. \code{s_age.statistic}). If p-value
-#'   corrections were requested, additional columns are appended with the
-#'   correction method as suffix (e.g. \code{s_age.p.value.fdr}). If
-#'   \code{changed.rsq.term.index} was requested, additional columns
-#'   \code{<term>.delta.adj.rsq} and \code{<term>.partial.rsq} are
-#'   appended.
+#' @return If \code{flag_initiate = TRUE}, a list with one component:
+#'   \describe{
+#'     \item{column_names}{Character vector. The column names derived from
+#'       the return value of \code{user_fun}, with \code{"element_id"}
+#'       prepended. For unnamed list or atomic returns, columns are named
+#'       \code{v1}, \code{v2}, etc. Set to \code{NaN} if the element was
+#'       skipped or errored.}
+#'   }
+#'   If \code{flag_initiate = FALSE}, a numeric vector of length
+#'   \code{num.stat.output} with \code{element_id} (0-based) first and
+#'   the coerced output of \code{user_fun} in subsequent positions.
+#'   All-\code{NaN} (except \code{element_id}) if the element was skipped
+#'   or if an error occurred with \code{on_error = "skip"}.
 #'
 #' @seealso \code{\link{ModelArray.lm}} for linear models,
-#'   \code{\link{ModelArray.wrap}} for user-supplied functions,
-#'   \code{\link{gen_gamFormula_fxSmooth}} and
-#'   \code{\link{gen_gamFormula_contIx}} for formula helpers,
-#'   \linkS4class{ModelArray} for the input class,
-#'   \code{\link{ModelArray}} for the constructor,
-#'   \code{\link{exampleElementData}} for testing formulas on a single
-#'   element.
+#'   \code{\link{ModelArray.gam}} for GAMs,
+#'   \code{\link{exampleElementData}} for building test data,
+#'   \linkS4class{ModelArray} for the input class.
 #'
 #' @examples{
 #' \dontrun{
 #' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD"))
 #' phenotypes <- read.csv("cohort.csv")
 #'
-#' # Fit GAM with default outputs
-#' results <- ModelArray.gam(
-#'   FD ~ s(age, fx = TRUE) + sex,
-#'   data = ma,
-#'   phenotypes = phenotypes,
-#'   scalar = "FD"
-#' )
-#' head(results)
+#' # Simple custom function
+#' my_fun <- function(data, ...) {
+#'   mod <- lm(FD ~ age + sex, data = data)
+#'   tidy_out <- broom::tidy(mod)
+#'   # Return a one-row tibble
+#'   tibble::tibble(
+#'     age_estimate = tidy_out$estimate[tidy_out$term == "age"],
+#'     age_pvalue   = tidy_out$p.value[tidy_out$term == "age"]
+#'   )
+#' }
 #'
-#' # With changed R-squared for the smooth term (term index 1)
-#' results_rsq <- ModelArray.gam(
-#'   FD ~ s(age, fx = TRUE) + sex,
-#'   data = ma,
-#'   phenotypes = phenotypes,
-#'   scalar = "FD",
-#'   changed.rsq.term.index = list(1)
-#' )
 #'
-#' # Full outputs, no p-value correction
-#' results_full <- ModelArray.gam(
-#'   FD ~ s(age, fx = TRUE) + sex,
+#' # Test on one element first
+#' test_df <- exampleElementData(ma, scalar = "FD",
+#'                                i_element = 1,
+#'                                phenotypes = phenotypes)
+#' my_fun(data = test_df)
+#'
+#' # Run across all elements
+#' results <- ModelArray.wrap(
+#'   FUN = my_fun,
 #'   data = ma,
 #'   phenotypes = phenotypes,
-#'   scalar = "FD",
-#'   full.outputs = TRUE,
-#'   correct.p.value.smoothTerms = "none",
-#'   correct.p.value.parametricTerms = "none"
+#'   scalar = "FD"
 #' )
 #' }
 #' }
diff --git a/man/ModelArray.gam.Rd b/man/ModelArray.gam.Rd
index 74fbe72..c662f91 100644
--- a/man/ModelArray.gam.Rd
+++ b/man/ModelArray.gam.Rd
@@ -2,9 +2,7 @@
 % Please edit documentation in R/analyse.R
 \name{ModelArray.gam}
 \alias{ModelArray.gam}
-\title{Fit element-wise generalized additive models
-no model-level p-value for GAMs, so there is no
-\code{correct.p.value.model} argument.}
+\title{Fit element-wise generalized additive models}
 \usage{
 ModelArray.gam(
   formula,
@@ -63,19 +61,22 @@ parametric terms, from \code{broom::tidy(parametric = TRUE)}.
 See Details.}
 
 \item{var.model}{Character vector. Statistics to save for the overall
-model, from \code{broom::glance()} and \code{summary()}. See Details.}
+model, from \code{broom::glance()} and
+\code{\link[mgcv]{summary.gam}}. See Details.}
 
 \item{changed.rsq.term.index}{A list of positive integers. Each value
 is the index of a term on the right-hand side of \code{formula} for
 which delta adjusted R-squared and partial R-squared should be
 computed. Usually the term of interest is a smooth term or interaction
-term. Default \code{NULL} (not computed). See Details for warnings.}
+term. Default \code{NULL} (not computed). See Details.}
 
 \item{correct.p.value.smoothTerms}{Character vector. P-value
-correction method(s) for each smooth term. Default: \code{"fdr"}.}
+correction method(s) for each smooth term. Default: \code{"fdr"}.
+See Details.}
 
 \item{correct.p.value.parametricTerms}{Character vector. P-value
-correction method(s) for each parametric term. Default: \code{"fdr"}.}
+correction method(s) for each parametric term. Default: \code{"fdr"}.
+See Details.}
 
 \item{num.subj.lthr.abs}{Integer. Lower threshold for the absolute
 number of subjects with finite scalar values (not \code{NaN},
@@ -123,27 +124,72 @@ level for HDF5 writes. Default 4.}
 combined data.frame. If \code{FALSE}, return \code{invisible(NULL)};
 useful when writing large outputs directly to HDF5.}
 
-\item{...}{Additional arguments passed to \code{\link[mgcv]{gam}}.}
+\item{...}{Additional arguments passed to \code{\link[mgcv]{gam}}
+(e.g. \code{method = "REML"}).}
 }
 \value{
-A tibble with one row per element. The first column is
+A data.frame with one row per element. The first column is
 \code{element_id} (0-based). Remaining columns contain the requested
-statistics, named as \code{<term>.<statistic>}. If
+statistics, named as \code{<term>.<statistic>} for per-term statistics
+and \code{model.<statistic>} for model-level statistics. Smooth term
+names are normalized (e.g. \code{s_age.statistic}). If p-value
+corrections were requested, additional columns are appended with the
+correction method as suffix (e.g. \code{s_age.p.value.fdr}). If
 \code{changed.rsq.term.index} was requested, additional columns
 \code{<term>.delta.adj.rsq} and \code{<term>.partial.rsq} are
 appended.
 }
 \description{
-Fit element-wise generalized additive models
-no model-level p-value for GAMs, so there is no
-\code{correct.p.value.model} argument.
+\code{ModelArray.gam} fits a generalized additive model at each requested
+element in a \linkS4class{ModelArray} and returns a tibble of requested
+model statistics. There is no model-level p-value for GAMs, so there is
+no \code{correct.p.value.model} argument.
+}
+\details{
+You may request returning specific statistical variables by setting
+\code{var.*}, or you can get all by setting \code{full.outputs = TRUE}.
+Note that statistics covered by \code{full.outputs} or \code{var.*} are
+the ones from \code{broom::tidy()}, \code{broom::glance()}, and
+\code{summary.gam()} only, and do not include corrected p-values.
+However FDR-corrected p-values (\code{"fdr"}) are generated by default.
+
+List of acceptable statistic names for each of \code{var.*}:
+\itemize{
+\item \code{var.smoothTerms}: \code{c("edf", "ref.df", "statistic",
+    "p.value")}; From \code{broom::tidy(parametric = FALSE)}.
+\item \code{var.parametricTerms}: \code{c("estimate", "std.error",
+    "statistic", "p.value")}; From \code{broom::tidy(parametric = TRUE)}.
+\item \code{var.model}: \code{c("adj.r.squared", "dev.expl",
+    "sp.criterion", "scale", "df", "logLik", "AIC", "BIC", "deviance",
+    "df.residual", "nobs")}; From \code{broom::glance()} and
+\code{\link[mgcv]{summary.gam}}.
+}
+
+Smooth term names in the output are normalized: \code{s(age)} becomes
+\code{s_age}, \code{ti(x,z)} becomes \code{ti_x_z}, and
+\code{s(x):oFactor} becomes \code{s_x_BYoFactor}.
+
+For p-value corrections (arguments \code{correct.p.value.*}), supported
+methods include all methods in \code{p.adjust.methods} except
+\code{"none"}. You can request more than one method. FDR-corrected
+p-values (\code{"fdr"}) are calculated by default. Turn it off by
+setting to \code{"none"}.
+
+When \code{changed.rsq.term.index} is provided, a reduced model (dropping
+the specified term) is fit at each element to compute delta adjusted
+R-squared and partial R-squared. This approximately doubles execution
+time per requested term. The term index refers to the position on the
+right-hand side of \code{formula} (use \code{labels(terms(formula))} to
+see the ordering).
+
+Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel} are
+mainly for input data with subject-specific masks, i.e. currently only
+for volume data. For fixel-wise data, you may ignore these arguments.
 }
 \examples{
 {
 \dontrun{
-ma <- ModelArray("path/to/data.h5", scalar_types = c("FD"))
-phenotypes <- read.csv("cohort.csv")
-
+# Fit GAM with default outputs
 results <- ModelArray.gam(
   FD ~ s(age, fx = TRUE) + sex,
   data = ma,
@@ -160,6 +206,17 @@ results_rsq <- ModelArray.gam(
   scalar = "FD",
   changed.rsq.term.index = list(1)
 )
+
+# Full outputs, no p-value correction
+results_full <- ModelArray.gam(
+  FD ~ s(age, fx = TRUE) + sex,
+  data = ma,
+  phenotypes = phenotypes,
+  scalar = "FD",
+  full.outputs = TRUE,
+  correct.p.value.smoothTerms = "none",
+  correct.p.value.parametricTerms = "none"
+)
 }
 }
 
@@ -169,5 +226,8 @@ results_rsq <- ModelArray.gam(
 \code{\link{ModelArray.wrap}} for user-supplied functions,
 \code{\link{gen_gamFormula_fxSmooth}} and
 \code{\link{gen_gamFormula_contIx}} for formula helpers,
-\linkS4class{ModelArray} for the input class.
+\linkS4class{ModelArray} for the input class,
+\code{\link{ModelArray}} for the constructor,
+\code{\link{exampleElementData}} for testing formulas on a single
+element.
 }
diff --git a/man/ModelArray.wrap.Rd b/man/ModelArray.wrap.Rd
index dcbd782..79e5103 100644
--- a/man/ModelArray.wrap.Rd
+++ b/man/ModelArray.wrap.Rd
@@ -125,63 +125,46 @@ useful when writing large outputs directly to HDF5.}
 \item{...}{Additional arguments forwarded to \code{FUN}.}
 }
 \value{
-A data.frame with one row per element. The first column is
-\code{element_id} (0-based). Remaining columns contain the requested
-statistics, named as \code{<term>.<statistic>} for per-term statistics
-and \code{model.<statistic>} for model-level statistics. Smooth term
-names are normalized (e.g. \code{s_age.statistic}). If p-value
-corrections were requested, additional columns are appended with the
-correction method as suffix (e.g. \code{s_age.p.value.fdr}). If
-\code{changed.rsq.term.index} was requested, additional columns
-\code{<term>.delta.adj.rsq} and \code{<term>.partial.rsq} are
-appended.
+If \code{flag_initiate = TRUE}, a list with one component:
+\describe{
+\item{column_names}{Character vector. The column names derived from
+the return value of \code{user_fun}, with \code{"element_id"}
+prepended. For unnamed list or atomic returns, columns are named
+\code{v1}, \code{v2}, etc. Set to \code{NaN} if the element was
+skipped or errored.}
+}
+If \code{flag_initiate = FALSE}, a numeric vector of length
+\code{num.stat.output} with \code{element_id} (0-based) first and
+the coerced output of \code{user_fun} in subsequent positions.
+All-\code{NaN} (except \code{element_id}) if the element was skipped
+or if an error occurred with \code{on_error = "skip"}.
 }
 \description{
-\code{ModelArray.gam} fits a generalized additive model at each requested
-element in a \linkS4class{ModelArray} and returns a tibble of requested
-model statistics. There is no model-level p-value for GAMs, so there is
-no \code{correct.p.value.model} argument.
+\code{ModelArray.wrap} runs a user-supplied function \code{FUN} at each
+requested element and returns a tibble of results combined across
+elements.
 }
 \details{
-You may request returning specific statistical variables by setting
-\code{var.*}, or you can get all by setting \code{full.outputs = TRUE}.
-Note that statistics covered by \code{full.outputs} or \code{var.*} are
-the ones from \code{broom::tidy()}, \code{broom::glance()}, and
-\code{summary.gam()} only, and do not include corrected p-values.
-However FDR-corrected p-values (\code{"fdr"}) are generated by default.
-
-List of acceptable statistic names for each of \code{var.*}:
+This provides a generic framework reusing ModelArray's per-element
+looping, alignment, subject-thresholding, and parallelization. The user
+function is called as \code{FUN(data = dat, ...)} where \code{dat} is
+\code{phenotypes} with all scalar columns appended for the current
+element. The return value from \code{FUN} for a single element must be
+one of:
 \itemize{
-\item \code{var.smoothTerms}: \code{c("edf", "ref.df", "statistic",
-    "p.value")}; From \code{broom::tidy(parametric = FALSE)}.
-\item \code{var.parametricTerms}: \code{c("estimate", "std.error",
-    "statistic", "p.value")}; From \code{broom::tidy(parametric = TRUE)}.
-\item \code{var.model}: \code{c("adj.r.squared", "dev.expl",
-    "sp.criterion", "scale", "df", "logLik", "AIC", "BIC", "deviance",
-    "df.residual", "nobs")}; From \code{broom::glance()} and
-\code{\link[mgcv]{summary.gam}}.
+\item a one-row \code{data.frame} or \code{tibble}
+\item a named list
+\item a named atomic vector
 }
+The column names from the first successful element determine the final
+schema.
+
+Note: \code{ModelArray.wrap} never performs any p-value corrections or
+modifications. If you need adjusted p-values (e.g. FDR), implement
+them inside \code{FUN}.
 
-Smooth term names in the output are normalized: \code{s(age)} becomes
-\code{s_age}, \code{ti(x,z)} becomes \code{ti_x_z}, and
-\code{s(x):oFactor} becomes \code{s_x_BYoFactor}.
-
-For p-value corrections (arguments \code{correct.p.value.*}), supported
-methods include all methods in \code{p.adjust.methods} except
-\code{"none"}. You can request more than one method. FDR-corrected
-p-values (\code{"fdr"}) are calculated by default. Turn it off by
-setting to \code{"none"}.
-
-When \code{changed.rsq.term.index} is provided, a reduced model (dropping
-the specified term) is fit at each element to compute delta adjusted
-R-squared and partial R-squared. This approximately doubles execution
-time per requested term. The term index refers to the position on the
-right-hand side of \code{formula} (use \code{labels(terms(formula))} to
-see the ordering).
-
-Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel} are
-mainly for input data with subject-specific masks, i.e. currently only
-for volume data. For fixel-wise data, you may ignore these arguments.
+Use \code{\link{exampleElementData}} to construct a sample per-element
+data.frame for testing your function before committing to a full run.
 }
 \examples{
 {
@@ -189,33 +172,30 @@ for volume data. For fixel-wise data, you may ignore these arguments.
 ma <- ModelArray("path/to/data.h5", scalar_types = c("FD"))
 phenotypes <- read.csv("cohort.csv")
 
-# Fit GAM with default outputs
-results <- ModelArray.gam(
-  FD ~ s(age, fx = TRUE) + sex,
-  data = ma,
-  phenotypes = phenotypes,
-  scalar = "FD"
-)
-head(results)
+# Simple custom function
+my_fun <- function(data, ...) {
+  mod <- lm(FD ~ age + sex, data = data)
+  tidy_out <- broom::tidy(mod)
+  # Return a one-row tibble
+  tibble::tibble(
+    age_estimate = tidy_out$estimate[tidy_out$term == "age"],
+    age_pvalue   = tidy_out$p.value[tidy_out$term == "age"]
+  )
+}
 
-# With changed R-squared for the smooth term (term index 1)
-results_rsq <- ModelArray.gam(
-  FD ~ s(age, fx = TRUE) + sex,
-  data = ma,
-  phenotypes = phenotypes,
-  scalar = "FD",
-  changed.rsq.term.index = list(1)
-)
 
-# Full outputs, no p-value correction
-results_full <- ModelArray.gam(
-  FD ~ s(age, fx = TRUE) + sex,
+# Test on one element first
+test_df <- exampleElementData(ma, scalar = "FD",
+                               i_element = 1,
+                               phenotypes = phenotypes)
+my_fun(data = test_df)
+
+# Run across all elements
+results <- ModelArray.wrap(
+  FUN = my_fun,
   data = ma,
   phenotypes = phenotypes,
-  scalar = "FD",
-  full.outputs = TRUE,
-  correct.p.value.smoothTerms = "none",
-  correct.p.value.parametricTerms = "none"
+  scalar = "FD"
 )
 }
 }
@@ -223,11 +203,7 @@ results_full <- ModelArray.gam(
 }
 \seealso{
 \code{\link{ModelArray.lm}} for linear models,
-\code{\link{ModelArray.wrap}} for user-supplied functions,
-\code{\link{gen_gamFormula_fxSmooth}} and
-\code{\link{gen_gamFormula_contIx}} for formula helpers,
-\linkS4class{ModelArray} for the input class,
-\code{\link{ModelArray}} for the constructor,
-\code{\link{exampleElementData}} for testing formulas on a single
-element.
+\code{\link{ModelArray.gam}} for GAMs,
+\code{\link{exampleElementData}} for building test data,
+\linkS4class{ModelArray} for the input class.
 }