Visual Code(data=example):

Search Text in optgroup.txt

Enter search string:

Visual Code(data=input box):

then to see the Example

Age,Female,Male 0~10,5,6 11~20,15,14 21~30,20,22 31~40,25,24 41~50,25,28 51~60,30,26 61~70,20,18 71~80,15,16 81~90,10,12

for Loo_cluster_theme_stability

Copy & Paste the Code in R to Rstudio

# loo_cluster_theme_stability <- function(df_raw, id_col = "ID", profile_col = "Profile", # 可有可無；只是排除用 nf = 2, loading_cut = 0.30, rotate = "varimax", fm = "minres", verbose = TRUE) { # ---- deps ---- if (!requireNamespace("psych", quietly = TRUE)) stop("Need package: psych") # ---- extract ID + item columns ---- df0 <- as.data.frame(df_raw, stringsAsFactors = FALSE) if (id_col %in% names(df0)) { id <- as.character(df0[[id_col]]) } else if (!is.null(rownames(df0)) && all(nchar(rownames(df0)) > 0)) { id <- rownames(df0) } else { stop("Cannot find ID. Provide id_col or rownames.") } # item cols = all except ID & profile drop_cols <- intersect(c(id_col, profile_col), names(df0)) item_cols <- setdiff(names(df0), drop_cols) # coerce to numeric items <- df0[, item_cols, drop = FALSE] items[] <- lapply(items, function(x) suppressWarnings(as.numeric(as.character(x)))) rownames(items) <- id # ---- helpers ---- .safe_sd <- function(x) { s <- sd(x, na.rm = TRUE) if (!is.finite(s) || s == 0) NA_real_ else s } .z_with_train <- function(x, mu, s) { # x: vector; mu/s: scalar if (!is.finite(mu) || !is.finite(s) || s == 0) return(rep(0, length(x))) (x - mu) / s } .fit_efa_and_mapping <- function(zmat, nf_fixed) { # drop cols with all NA or 0-variance after z (just in case) ok_col <- apply(zmat, 2, function(v) { v2 <- v[is.finite(v)] length(v2) >= 3 && sd(v2) > 0 }) z_use <- zmat[, ok_col, drop = FALSE] if (ncol(z_use) < 2 || nrow(z_use) < 4) { stop("Too few usable columns/rows for EFA in this fold.") } nf_eff <- nf_fixed nf_eff <- max(2, nf_eff) nf_eff <- min(nf_eff, ncol(z_use) - 1, nrow(z_use) - 1) nf_eff <- max(2, nf_eff) efa <- psych::fa(z_use, nfactors = nf_eff, rotate = rotate, fm = fm) L <- as.matrix(unclass(efa$loadings)) if (is.null(dim(L))) L <- matrix(L, ncol = 1) # item -> factor by max abs loading > cut assign_factor <- apply(L, 1, function(r) { mx <- max(abs(r)) if (is.finite(mx) && mx > loading_cut) which.max(abs(r)) else NA_integer_ }) # if all NA, fallback: assign by which.max(abs) without cut if (all(is.na(assign_factor))) { assign_factor <- apply(L, 1, function(r) which.max(abs(r))) } # factor leader item (top abs loading per factor) p <- ncol(L) leader <- character(p) for (j in seq_len(p)) { leader[j] <- rownames(L)[which.max(abs(L[, j]))] } leader <- make.unique(leader) list( efa = efa, L = L, z_cols = colnames(z_use), item_factor = assign_factor, leader = leader, nf = p ) } .score_one_person <- function(z_row_named, mapping) { # z_row_named: named numeric vector (all item cols), may include cols not used in efa # mapping$item_factor is named by items used in efa use_items <- names(mapping$item_factor) use_items <- intersect(use_items, names(z_row_named)) if (length(use_items) == 0) { return(list(theme_term = NA_character_, cluster_theme = NA_integer_, theme_score = NA_real_)) } # compute factor mean z for each factor using items assigned to it p <- mapping$nf f_means <- rep(NA_real_, p) for (f in seq_len(p)) { vars <- names(mapping$item_factor)[which(mapping$item_factor == f)] vars <- intersect(vars, use_items) if (length(vars) == 0) { f_means[f] <- -Inf } else { f_means[f] <- mean(z_row_named[vars], na.rm = TRUE) } } f_idx <- which.max(f_means) theme_term <- mapping$leader[f_idx] theme_score <- f_means[f_idx] list(theme_term = theme_term, cluster_theme = as.integer(f_idx), theme_score = as.numeric(theme_score)) } # ---- baseline (fit on all) ---- # compute z using all-sample mean/sd mu_all <- sapply(items, function(x) mean(x, na.rm = TRUE)) sd_all <- sapply(items, .safe_sd) z_all <- as.data.frame(items) for (nm in names(z_all)) { z_all[[nm]] <- .z_with_train(items[[nm]], mu_all[[nm]], sd_all[[nm]]) } z_all <- as.matrix(z_all) rownames(z_all) <- id base_map <- .fit_efa_and_mapping(z_all, nf_fixed = nf) base_theme <- data.frame( ID = id, stringsAsFactors = FALSE ) # score each person under baseline mapping (in-sample) base_sc <- lapply(seq_len(nrow(z_all)), function(i) { zr <- z_all[i, , drop = TRUE] names(zr) <- colnames(z_all) .score_one_person(zr, base_map) }) base_theme$theme_term_base <- vapply(base_sc, `[[`, character(1), "theme_term") base_theme$cluster_theme_base <- vapply(base_sc, `[[`, integer(1), "cluster_theme") base_theme$theme_score_base <- vapply(base_sc, `[[`, numeric(1), "theme_score") # ---- LOO (fit on N-1, score the left-out person out-of-sample) ---- loo_theme_term <- rep(NA_character_, length(id)) loo_cluster <- rep(NA_integer_, length(id)) loo_score <- rep(NA_real_, length(id)) loo_nf <- rep(NA_integer_, length(id)) loo_leader <- vector("list", length(id)) loo_ok <- rep(FALSE, length(id)) loo_err <- rep("", length(id)) for (i in seq_along(id)) { train_idx <- setdiff(seq_along(id), i) test_idx <- i train_items <- items[train_idx, , drop = FALSE] test_items <- items[test_idx, , drop = FALSE] mu_tr <- sapply(train_items, function(x) mean(x, na.rm = TRUE)) sd_tr <- sapply(train_items, .safe_sd) # z-train z_tr <- as.data.frame(train_items) for (nm in names(z_tr)) { z_tr[[nm]] <- .z_with_train(train_items[[nm]], mu_tr[[nm]], sd_tr[[nm]]) } z_tr <- as.matrix(z_tr) rownames(z_tr) <- rownames(train_items) # z-test using TRAIN mean/sd z_te <- as.data.frame(test_items) for (nm in names(z_te)) { z_te[[nm]] <- .z_with_train(test_items[[nm]], mu_tr[[nm]], sd_tr[[nm]]) } z_te <- as.numeric(as.matrix(z_te[1, , drop = FALSE])) names(z_te) <- colnames(train_items) # fit + score fit <- tryCatch( .fit_efa_and_mapping(z_tr, nf_fixed = nf), error = function(e) e ) if (inherits(fit, "error")) { loo_err[i] <- conditionMessage(fit) next } sc <- .score_one_person(z_te, fit) loo_theme_term[i] <- sc$theme_term loo_cluster[i] <- sc$cluster_theme loo_score[i] <- sc$theme_score loo_nf[i] <- fit$nf loo_leader[[i]] <- fit$leader loo_ok[i] <- TRUE } out <- base_theme out$theme_term_loo <- loo_theme_term out$cluster_theme_loo <- loo_cluster out$theme_score_loo <- loo_score out$nf_loo <- loo_nf out$loo_ok <- loo_ok out$loo_err <- loo_err # ---- agreement summary ---- ok <- out$loo_ok & !is.na(out$theme_term_loo) & !is.na(out$theme_term_base) agree_term <- mean(out$theme_term_loo[ok] == out$theme_term_base[ok]) agree_cluster <- mean(out$cluster_theme_loo[ok] == out$cluster_theme_base[ok]) flips <- out[ok & (out$theme_term_loo != out$theme_term_base), c("ID","theme_term_base","theme_term_loo","theme_score_base","theme_score_loo")] summary <- list( N = length(id), nf_fixed = nf, LOO_success = sum(out$loo_ok), agreement_theme_term = agree_term, agreement_cluster_id = agree_cluster, flipped_cases = flips ) if (verbose) { cat("\n[LOO] success:", summary$LOO_success, "/", summary$N, "\n") cat("[LOO] agreement(theme_term):", sprintf("%.3f", summary$agreement_theme_term), "\n") cat("[LOO] agreement(cluster_theme id):", sprintf("%.3f", summary$agreement_cluster_id), "\n") if (nrow(flips) > 0) { cat("[LOO] flips found:", nrow(flips), "cases\n") print(flips) } else { cat("[LOO] no flips (theme_term) 🎯\n") } } list(detail = out, summary = summary) } # ------------------------- # 1) Data (TAB separated) <<<< replace this block if you want to read from file # ------------------------- df_raw <- read.table( textConnection(' ID\tAdh_MissDose\tAdh_Timing\tAdh_RefillDelay\tAdh_StopWhenBetter\tAdh_SkipSideEffects\tMood_Anhedonia\tMood_Depressed\tMood_SleepProblem\tMood_Anxiety\tMood_Fatigue\tProfile 1\t4\t4\t3\t4\t3\t3\t3\t3\t3\t3\t1 2\t3\t3\t3\t3\t3\t3\t4\t4\t3\t3\t1 3\t4\t4\t4\t4\t4\t5\t5\t5\t5\t5\t1 4\t4\t5\t4\t4\t4\t4\t4\t4\t4\t4\t1 5\t3\t3\t3\t3\t3\t3\t3\t2\t3\t3\t1 6\t3\t2\t3\t3\t3\t2\t1\t1\t1\t2\t1 7\t3\t3\t4\t3\t3\t2\t2\t2\t2\t2\t1 8\t3\t3\t2\t3\t3\t3\t3\t3\t3\t3\t1 9\t3\t3\t2\t3\t2\t3\t2\t3\t2\t2\t1 10\t4\t3\t4\t4\t4\t4\t4\t3\t4\t4\t1 11\t2\t2\t3\t2\t3\t2\t2\t2\t2\t2\t1 12\t1\t1\t1\t1\t1\t4\t4\t4\t4\t3\t1 13\t2\t2\t3\t2\t2\t2\t2\t2\t2\t2\t0 14\t4\t3\t4\t3\t3\t3\t4\t4\t3\t3\t0 15\t4\t4\t4\t4\t4\t3\t3\t3\t3\t2\t0 16\t3\t3\t2\t3\t3\t2\t2\t2\t2\t2\t0 17\t3\t3\t2\t2\t3\t3\t3\t3\t3\t3\t0 18\t3\t4\t3\t3\t3\t2\t2\t2\t2\t2\t0 19\t1\t1\t1\t2\t1\t3\t3\t3\t3\t3\t0 20\t5\t5\t5\t5\t5\t5\t4\t4\t5\t5\t0 '), header = TRUE, sep = "\t", fill = TRUE, quote = "", check.names = TRUE ) res <- loo_cluster_theme_stability(df_raw, id_col="ID", profile_col="Profile", nf=2) res$summary res$detail note <- " 下面給你兩段「可以直接放進論文」的寫法（英文＋中文），語氣會很強，但不會踩 reviewer 最常抓的點（例如把 LOO stability 說成 external validity）。 English (paper-ready) We evaluated the stability of the deterministic theme assignment algorithm, which assigns each person to a theme by the argmax of factor-level mean z-scores (factor scores computed as the mean standardized response across items loading on each factor), under a leave-one-out (LOO) refitting procedure. Across all LOO folds (N = 20/20 successful refits; fixed nf = 2), the theme label (theme_term) agreed with the full-sample solution for 95% of participants, and the corresponding factor-based cluster index (cluster_theme) agreed for 90%. Only one participant changed the assigned theme (Mood_Anxiety → Mood_Depressed), and the associated theme score remained close in magnitude (0.83 vs 0.87), suggesting that the observed discrepancy likely reflects a near-boundary case rather than systematic instability. These results indicate high internal stability of the argmax(mean-z) mapping with respect to minor perturbations of the sample. （可選一句，若你想更保守：） This analysis assesses internal stability rather than external predictive validity, as no independent ground-truth labels were used. 中文（論文可用）本研究以留一法（leave-one-out, LOO）重估檢驗「以各因子之平均 z 分數（factor mean z-score）取最大者（argmax）」所進行的個人主題（theme）指派之穩定性。該方法先以 EFA 建立題項—因子對應，並以每位受試者在各因子所屬題項之平均標準化分數作為因子分數，再以最大者決定其 theme_term 與對應之 cluster_theme。在固定因子數 nf = 2 的條件下，20 次 LOO 重估皆成功（20/20）；與全樣本結果相比，theme_term 的一致率為 95%，cluster_theme 的一致率為 90%。僅 1 位受試者在 LOO 中出現主題轉換（Mood_Anxiety → Mood_Depressed），且其主題分數差異甚小（0.83 vs 0.87），顯示該差異更可能源於邊界個案而非系統性不穩定。整體而言，argmax(mean-z) 之主題指派在樣本微擾下呈現高度內部穩定性。（可選補一句，避免被 reviewer 質疑）「本檢驗評估的是方法的內部穩定性（stability），並非對外部真實標籤之預測效度（validity）。」 "