Visual Code(data=example):

Search Text in optgroup.txt

Enter search string:

Visual Code(data=input box):

then to see the Example

Age,Female,Male 0~10,5,6 11~20,15,14 21~30,20,22 31~40,25,24 41~50,25,28 51~60,30,26 61~70,20,18 71~80,15,16 81~90,10,12

for Bootstrap_theme_stability

Copy & Paste the Code in R to Rstudio

# bootstrap_theme_stability <- function(df_raw, id_col = "ID", profile_col = "Profile", # 可有可無；只排除用 nf = 2, B = 200, loading_cut = 0.30, rotate = "varimax", fm = "minres", seed = 123, verbose = TRUE) { if (!requireNamespace("psych", quietly = TRUE)) stop("Need package: psych") set.seed(seed) df0 <- as.data.frame(df_raw, stringsAsFactors = FALSE) # ---- ID ---- if (id_col %in% names(df0)) { id <- as.character(df0[[id_col]]) } else if (!is.null(rownames(df0)) && all(nchar(rownames(df0)) > 0)) { id <- rownames(df0) } else stop("Cannot find ID. Provide id_col or rownames.") # ---- items ---- drop_cols <- intersect(c(id_col, profile_col), names(df0)) item_cols <- setdiff(names(df0), drop_cols) items <- df0[, item_cols, drop = FALSE] items[] <- lapply(items, function(x) suppressWarnings(as.numeric(as.character(x)))) rownames(items) <- id N <- nrow(items) # ---- helpers ---- .safe_sd <- function(x) { s <- sd(x, na.rm = TRUE) if (!is.finite(s) || s == 0) NA_real_ else s } .z_with_train <- function(x, mu, s) { if (!is.finite(mu) || !is.finite(s) || s == 0) return(rep(0, length(x))) (x - mu) / s } .fit_efa_and_mapping <- function(zmat, nf_fixed) { ok_col <- apply(zmat, 2, function(v) { v2 <- v[is.finite(v)] length(v2) >= 3 && sd(v2) > 0 }) z_use <- zmat[, ok_col, drop = FALSE] if (ncol(z_use) < 2 || nrow(z_use) < 4) stop("Too few usable columns/rows for EFA.") nf_eff <- nf_fixed nf_eff <- max(2, nf_eff) nf_eff <- min(nf_eff, ncol(z_use) - 1, nrow(z_use) - 1) nf_eff <- max(2, nf_eff) efa <- psych::fa(z_use, nfactors = nf_eff, rotate = rotate, fm = fm) L <- as.matrix(unclass(efa$loadings)) if (is.null(dim(L))) L <- matrix(L, ncol = 1) assign_factor <- apply(L, 1, function(r) { mx <- max(abs(r)) if (is.finite(mx) && mx > loading_cut) which.max(abs(r)) else NA_integer_ }) if (all(is.na(assign_factor))) { assign_factor <- apply(L, 1, function(r) which.max(abs(r))) } p <- ncol(L) leader <- character(p) for (j in seq_len(p)) leader[j] <- rownames(L)[which.max(abs(L[, j]))] leader <- make.unique(leader) list( L = L, z_cols = colnames(z_use), item_factor = assign_factor, # named by item leader = leader, nf = p ) } .score_all_people <- function(z_all_mat, mapping) { all_ids <- rownames(z_all_mat) theme_term <- rep(NA_character_, length(all_ids)) cluster_theme <- rep(NA_integer_, length(all_ids)) theme_score <- rep(NA_real_, length(all_ids)) use_items <- intersect(names(mapping$item_factor), colnames(z_all_mat)) if (length(use_items) == 0) { return(data.frame(ID=all_ids, theme_term, cluster_theme, theme_score, stringsAsFactors = FALSE)) } for (i in seq_along(all_ids)) { zr <- z_all_mat[i, , drop = TRUE] names(zr) <- colnames(z_all_mat) p <- mapping$nf f_means <- rep(-Inf, p) for (f in seq_len(p)) { vars <- names(mapping$item_factor)[which(mapping$item_factor == f)] vars <- intersect(vars, use_items) if (length(vars) > 0) f_means[f] <- mean(zr[vars], na.rm = TRUE) } f_idx <- which.max(f_means) theme_term[i] <- mapping$leader[f_idx] cluster_theme[i] <- as.integer(f_idx) theme_score[i] <- as.numeric(f_means[f_idx]) } data.frame(ID=all_ids, theme_term=theme_term, cluster_theme=cluster_theme, theme_score=theme_score, stringsAsFactors = FALSE) } # ---- alignment helper (Hungarian; K=2 fallback if clue not installed) ---- .align_one_fold <- function(base_lab, fold_lab) { # base_lab / fold_lab: vectors (same length), may contain NA, are labels (character or integer-as-character) base_lab <- as.character(base_lab) fold_lab <- as.character(fold_lab) ok <- !is.na(base_lab) & !is.na(fold_lab) if (sum(ok) < 3) return(fold_lab) base_levels <- sort(unique(base_lab[ok])) fold_levels <- sort(unique(fold_lab[ok])) K <- max(length(base_levels), length(fold_levels)) if (K < 2) return(fold_lab) # contingency tab <- table(factor(base_lab[ok], levels = base_levels), factor(fold_lab[ok], levels = fold_levels)) M <- matrix(0, nrow = K, ncol = K) M[seq_len(nrow(tab)), seq_len(ncol(tab))] <- tab maxM <- max(M) if (requireNamespace("clue", quietly = TRUE)) { cost <- maxM - M perm <- clue::solve_LSAP(cost) # row i matched to col perm[i] perm <- as.integer(perm) # mapping: fold col j -> base row i where perm[i] == j inv <- rep(NA_integer_, K) inv[perm] <- seq_len(K) mapped <- fold_lab for (j in seq_along(fold_levels)) { row_i <- inv[j] if (!is.na(row_i) && row_i <= length(base_levels)) { mapped[fold_lab == fold_levels[j]] <- base_levels[row_i] } else { mapped[fold_lab == fold_levels[j]] <- base_levels[1] } } return(mapped) } # fallback: only handle K==2 by checking swap if (K == 2 && length(base_levels) == 2 && length(fold_levels) == 2) { mappedA <- fold_lab mappedA[fold_lab == fold_levels[1]] <- base_levels[1] mappedA[fold_lab == fold_levels[2]] <- base_levels[2] mappedB <- fold_lab mappedB[fold_lab == fold_levels[1]] <- base_levels[2] mappedB[fold_lab == fold_levels[2]] <- base_levels[1] accA <- mean(mappedA[ok] == base_lab[ok]) accB <- mean(mappedB[ok] == base_lab[ok]) if (accB > accA) return(mappedB) else return(mappedA) } stop("Need package: clue (or only K=2 fallback). Install.packages('clue')") } # ---- baseline mapping on full sample ---- mu_all <- sapply(items, function(x) mean(x, na.rm = TRUE)) sd_all <- sapply(items, .safe_sd) z_all <- as.data.frame(items) for (nm in names(z_all)) z_all[[nm]] <- .z_with_train(items[[nm]], mu_all[[nm]], sd_all[[nm]]) z_all <- as.matrix(z_all) rownames(z_all) <- id base_map <- .fit_efa_and_mapping(z_all, nf_fixed = nf) base_sc <- .score_all_people(z_all, base_map) theme_base <- setNames(base_sc$theme_term, base_sc$ID) cluster_base <- setNames(as.character(base_sc$cluster_theme), base_sc$ID) # store as character for alignment # ---- store bootstrap theme_term / cluster for each person ---- theme_mat <- matrix(NA_character_, nrow = N, ncol = B, dimnames = list(id, paste0("b", seq_len(B)))) cluster_mat <- matrix(NA_character_, nrow = N, ncol = B, dimnames = list(id, paste0("b", seq_len(B)))) ok_fold <- logical(B) err_fold <- character(B) for (b in seq_len(B)) { idx <- sample.int(N, size = N, replace = TRUE) boot_items <- items[idx, , drop = FALSE] # z using bootstrap-sample mean/sd mu_b <- sapply(boot_items, function(x) mean(x, na.rm = TRUE)) sd_b <- sapply(boot_items, .safe_sd) z_b <- as.data.frame(boot_items) for (nm in names(z_b)) z_b[[nm]] <- .z_with_train(boot_items[[nm]], mu_b[[nm]], sd_b[[nm]]) z_b <- as.matrix(z_b) fit <- tryCatch(.fit_efa_and_mapping(z_b, nf_fixed = nf), error = function(e) e) if (inherits(fit, "error")) { err_fold[b] <- conditionMessage(fit) next } # score ALL original people using THIS fold's mean/sd + mapping z_all_b <- as.data.frame(items) for (nm in names(z_all_b)) z_all_b[[nm]] <- .z_with_train(items[[nm]], mu_b[[nm]], sd_b[[nm]]) z_all_b <- as.matrix(z_all_b) rownames(z_all_b) <- id sc_all <- .score_all_people(z_all_b, fit) theme_mat[sc_all$ID, b] <- sc_all$theme_term cluster_mat[sc_all$ID, b] <- as.character(sc_all$cluster_theme) ok_fold[b] <- TRUE if (verbose && (b %% 25 == 0)) cat("[bootstrap] fold", b, "/", B, "\n") } # ---- RAW stability ---- stable_person <- sapply(id, function(pid) { v <- theme_mat[pid, ok_fold] v <- v[!is.na(v)] if (!length(v)) return(NA_real_) mean(v == theme_base[[pid]]) }) overall_stability <- mean(stable_person, na.rm = TRUE) majority_theme <- sapply(id, function(pid) { v <- theme_mat[pid, ok_fold] v <- v[!is.na(v)] if (!length(v)) return(NA_character_) tab <- sort(table(v), decreasing = TRUE) names(tab)[1] }) majority_prop <- sapply(id, function(pid) { v <- theme_mat[pid, ok_fold] v <- v[!is.na(v)] if (!length(v)) return(NA_real_) tab <- sort(table(v), decreasing = TRUE) as.numeric(tab[1]) / sum(tab) }) person_tbl <- data.frame( ID = id, theme_term_base = unname(theme_base[id]), stability_to_base = as.numeric(stable_person), majority_theme = unname(majority_theme), majority_prop = as.numeric(majority_prop), stringsAsFactors = FALSE ) unstable <- person_tbl[is.finite(person_tbl$stability_to_base) & person_tbl$stability_to_base < 0.90, ] unstable <- unstable[order(unstable$stability_to_base), , drop = FALSE] summary <- list( N = N, nf_fixed = nf, B = B, successful_folds = sum(ok_fold), overall_mean_stability = overall_stability, n_ge_0.90 = sum(person_tbl$stability_to_base >= 0.90, na.rm = TRUE), n_ge_0.95 = sum(person_tbl$stability_to_base >= 0.95, na.rm = TRUE), unstable_persons = unstable ) # ---- ALIGNED stability (fold-by-fold label mapping to baseline) ---- theme_mat_aligned <- theme_mat cluster_mat_aligned <- cluster_mat for (b in which(ok_fold)) { fb_theme <- theme_mat[, b] fb_clu <- cluster_mat[, b] theme_mat_aligned[, b] <- .align_one_fold(theme_base[id], fb_theme) cluster_mat_aligned[, b] <- .align_one_fold(cluster_base[id], fb_clu) } stable_person_aligned <- sapply(id, function(pid) { v <- theme_mat_aligned[pid, ok_fold] v <- v[!is.na(v)] if (!length(v)) return(NA_real_) mean(v == theme_base[[pid]]) }) overall_stability_aligned <- mean(stable_person_aligned, na.rm = TRUE) majority_theme_aligned <- sapply(id, function(pid) { v <- theme_mat_aligned[pid, ok_fold] v <- v[!is.na(v)] if (!length(v)) return(NA_character_) tab <- sort(table(v), decreasing = TRUE) names(tab)[1] }) majority_prop_aligned <- sapply(id, function(pid) { v <- theme_mat_aligned[pid, ok_fold] v <- v[!is.na(v)] if (!length(v)) return(NA_real_) tab <- sort(table(v), decreasing = TRUE) as.numeric(tab[1]) / sum(tab) }) person_aligned <- data.frame( ID = id, theme_term_base = unname(theme_base[id]), stability_aligned_to_base = as.numeric(stable_person_aligned), majority_theme_aligned = unname(majority_theme_aligned), majority_prop_aligned = as.numeric(majority_prop_aligned), stringsAsFactors = FALSE ) unstable_aligned <- person_aligned[ is.finite(person_aligned$stability_aligned_to_base) & person_aligned$stability_aligned_to_base < 0.90, ] unstable_aligned <- unstable_aligned[order(unstable_aligned$stability_aligned_to_base), , drop = FALSE] # aligned cluster-id agreement (optional summary) stable_cluster_aligned <- sapply(id, function(pid) { v <- cluster_mat_aligned[pid, ok_fold] v <- v[!is.na(v)] if (!length(v)) return(NA_real_) mean(v == cluster_base[[pid]]) }) summary_aligned <- list( N = N, nf_fixed = nf, B = B, successful_folds = sum(ok_fold), overall_mean_stability_aligned = overall_stability_aligned, n_ge_0.90_aligned = sum(person_aligned$stability_aligned_to_base >= 0.90, na.rm = TRUE), n_ge_0.95_aligned = sum(person_aligned$stability_aligned_to_base >= 0.95, na.rm = TRUE), unstable_persons_aligned = unstable_aligned, overall_mean_cluster_id_agreement_aligned = mean(stable_cluster_aligned, na.rm = TRUE) ) if (verbose) { cat("\n[bootstrap] successful folds:", summary$successful_folds, "/", B, "\n") cat("[bootstrap RAW] overall mean stability:", sprintf("%.3f", summary$overall_mean_stability), "\n") cat("[bootstrap RAW] persons >=0.90:", summary$n_ge_0.90, "/", N, "\n") cat("\n[bootstrap ALIGNED] overall mean stability:", sprintf("%.3f", summary_aligned$overall_mean_stability_aligned), "\n") cat("[bootstrap ALIGNED] persons >=0.90:", summary_aligned$n_ge_0.90_aligned, "/", N, "\n") cat("[bootstrap ALIGNED] mean cluster-id agreement:", sprintf("%.3f", summary_aligned$overall_mean_cluster_id_agreement_aligned), "\n") } list( summary = summary, person = person_tbl, theme_mat = theme_mat, cluster_mat = cluster_mat, ok_fold = ok_fold, err_fold = err_fold, # new summary_aligned = summary_aligned, person_aligned = person_aligned, theme_mat_aligned = theme_mat_aligned, cluster_mat_aligned = cluster_mat_aligned, # baseline snapshot (有時候寫論文很有用) baseline = list( theme_base = theme_base, cluster_base = cluster_base, base_map = base_map, base_sc = base_sc ) ) } # ------------------------- # 1) Data (TAB separated) <<<< replace this block if you want to read from file # ------------------------- df_raw <- read.table( textConnection(' ID\tAdh_MissDose\tAdh_Timing\tAdh_RefillDelay\tAdh_StopWhenBetter\tAdh_SkipSideEffects\tMood_Anhedonia\tMood_Depressed\tMood_SleepProblem\tMood_Anxiety\tMood_Fatigue\tProfile 1\t4\t4\t3\t4\t3\t3\t3\t3\t3\t3\t1 2\t3\t3\t3\t3\t3\t3\t4\t4\t3\t3\t1 3\t4\t4\t4\t4\t4\t5\t5\t5\t5\t5\t1 4\t4\t5\t4\t4\t4\t4\t4\t4\t4\t4\t1 5\t3\t3\t3\t3\t3\t3\t3\t2\t3\t3\t1 6\t3\t2\t3\t3\t3\t2\t1\t1\t1\t2\t1 7\t3\t3\t4\t3\t3\t2\t2\t2\t2\t2\t1 8\t3\t3\t2\t3\t3\t3\t3\t3\t3\t3\t1 9\t3\t3\t2\t3\t2\t3\t2\t3\t2\t2\t1 10\t4\t3\t4\t4\t4\t4\t4\t3\t4\t4\t1 11\t2\t2\t3\t2\t3\t2\t2\t2\t2\t2\t1 12\t1\t1\t1\t1\t1\t4\t4\t4\t4\t3\t1 13\t2\t2\t3\t2\t2\t2\t2\t2\t2\t2\t0 14\t4\t3\t4\t3\t3\t3\t4\t4\t3\t3\t0 15\t4\t4\t4\t4\t4\t3\t3\t3\t3\t2\t0 16\t3\t3\t2\t3\t3\t2\t2\t2\t2\t2\t0 17\t3\t3\t2\t2\t3\t3\t3\t3\t3\t3\t0 18\t3\t4\t3\t3\t3\t2\t2\t2\t2\t2\t0 19\t1\t1\t1\t2\t1\t3\t3\t3\t3\t3\t0 20\t5\t5\t5\t5\t5\t5\t4\t4\t5\t5\t0 '), header = TRUE, sep = "\t", fill = TRUE, quote = "", check.names = TRUE ) boot <- bootstrap_theme_stability(df_raw, id_col="ID", profile_col="Profile", nf=2, B=200, seed=123) boot$summary boot$summary_aligned boot$person boot$person_aligned note <- " 論文可用的結果句（中英各一段，強但不容易被打槍） English (paper-ready) We evaluated the robustness of the deterministic theme assignment rule (theme_term = argmax of factor-mean Z scores) using resampling-based stability analyses under a fixed two-factor EFA specification. Leave-one-out analysis successfully converged in all 20 folds, yielding high agreement with the baseline solution (theme_term agreement = 0.95; factor/cluster ID agreement = 0.90), with only one participant switching between closely related mood themes. In bootstrap resampling (B = 200), the naive (non-aligned) agreement underestimated stability (mean = 0.489), consistent with factor label permutation and rotational indeterminacy in EFA. After post-hoc label alignment via an assignment algorithm, the mean participant-level stability increased substantially (mean = 0.877; range 0.625–1.000), and 11 of 20 participants showed ≥0.95 agreement with the baseline theme. Residual instability was concentrated in a small subset of borderline profiles, suggesting proximity to the decision boundary rather than global model instability. 中文（論文可用）為檢驗以「各因子 mean-Z 最大者（argmax）」決定個人主題（theme_term）的分類規則之穩健性，本研究在固定二因子 EFA 架構下進行重抽樣穩定度分析。Leave-one-out 於 20/20 折均成功收斂，與全樣本基準解一致性高（theme_term 一致率 = 0.95；cluster ID 一致率 = 0.90），僅見 1 位個案在相近的情緒主題間發生切換。Bootstrap（B=200）下，未進行對齊之直接一致率平均為 0.489，顯示 EFA 在重抽樣時可能出現因子順序互換與旋轉不定性而低估穩定度。經 Hungarian/LSAP 進行標籤對齊後，個人層級穩定度顯著提升（平均 = 0.877；範圍 0.625–1.000），其中 11/20 個案達到 ≥0.95 的高一致性。其餘不穩定主要集中於少數邊界個案，推測與兩因子得分差距較小、接近決策邊界有關，並非整體模型不穩。 "