Age,Female,Male 0~10,5,6 11~20,15,14 21~30,20,22 31~40,25,24 41~50,25,28 51~60,30,26 61~70,20,18 71~80,15,16 81~90,10,12
Select option:
# # Load necessary libraries library(dplyr) library(tidyr) library(igraph) if (1==3){ # Read the CSV file data <- read.csv("F:/RR/top10keywords.csv") } else { # Read the data with consistent formatting data <- read.table(textConnection(' Year A1 A2 A3 A4 A5 A6 A7 A8 A9 A10 2021 CONVOLUTIONAL NETWORKS CLASSIFICATION 2009 KERNELS 2020 CONVOLUTIONAL NETWORKS 2020 TRAVEL-TIME PREDICTION NEURAL-NETWORK FLOW MODELS VOLUME GAME GO 2021 FRAMEWORK 2018 DRUG-DRUG INTERACTIONS TARGET NETWORK PREDICTION COMBINATIONS MECHANISMS DATABASE PROTEIN DISCOVERY DISEASE TRENDS 2022 TOTAL-ENERGY CALCULATIONS MOLECULAR-DYNAMICS QUANTUM APPROXIMATION FORMATE DFT 2020 DATABASE QSAR HERG 2021 INHIBITORS SMILES '), header = TRUE, sep = "\t", fill = TRUE, stringsAsFactors = TRUE) } ncount<-20 # if dataoriginal<-data then enlarge the keyword number ncount<-55520 dataoriginal<-data itemlarger20<-0 #ncount addvalue<-0 CHORD<-TRUE # Check if the first column is numeric if (is.numeric(data[[1]])) { # Extract columns 2 to ncol(data) data <- data[, 2:ncol(data)] # Print the extracted data head(data,3) } else { # Print a message if the first column is not numeric cat("The first column is not numeric.\n") } # Convert the data into a single column of keywords keywords <- data %>% pivot_longer(cols = everything(), values_to = "keyword") %>% filter(!is.na(keyword) & keyword != "") %>% # Remove empty or NA values pull(keyword) # Compute keyword frequency keyword_frequency <- as.data.frame(table(keywords)) # Sort by frequency in descending order keyword_frequency <- keyword_frequency %>% arrange(desc(Freq)) # Print the keyword frequency table keyword_frequency<-head(keyword_frequency,10) #formatted_keywords <- apply(keyword_frequency, 1, function(row) paste(row[1],", ",row[2])) formatted_keywords <- apply(keyword_frequency, 1, function(row) paste(row[1], row[2], sep = "\t")) cat(formatted_keywords, sep = "\n") keyword_frequency <- as.data.frame(table(keywords)) # Sort by frequency in descending order keyword_frequency <- keyword_frequency %>% arrange(desc(Freq)) colnames(keyword_frequency) <- c("name", "value") nodes<-keyword_frequency # Sorting nodes by value (descending) and name (ascending) nodes <- nodes %>% arrange(desc(value), name) nodes <- head(keyword_frequency, ncount) ############################################# url <- "https://www.raschonline.com/raschonline/pairrelation.txt" sourceCode <- readLines(url, warn = FALSE) eval(parse(text = sourceCode)) ############FLCA######## network <- graph_from_data_frame(d = data, vertices = nodes, directed = FALSE) url <- "https://www.raschonline.com/raschonline/FLCA.txt" sourceCode <- readLines(url, warn = FALSE) eval(parse(text = sourceCode)) ########################################## ceb <- FLCA(network) nodes<-NULL nodes <- data.frame( name = ceb$name, value = ceb$value, carac = ceb$carac ) library(tibble) # Ensure tibble is loaded # Create data frame properly ##############aaaaa############################ ########################################## # Convert 'carac' to factor nodes$carac <- as.factor(nodes$carac) # Define color palette specified_colors <- c( "#FF0000", "#0000FF", "#998000", "#008000", "#800080", "#FFC0CB", "#000000", "#ADD8E6", "#FF4500", "#A52A2A", "#8B4513", "#FF8C00", "#32CD32", "#4682B4", "#9400D3", "#FFD700", "#C0C0C0", "#DC143C", "#1E90FF" ) # Expand palette if needed unique_clusters <- unique(nodes$carac) num_clusters <- length(unique_clusters) if (num_clusters > length(specified_colors)) { extra_colors <- grDevices::hcl.colors(num_clusters - length(specified_colors), "Dark 3", rev = TRUE) full_color_set <- c(specified_colors, extra_colors) } else { full_color_set <- specified_colors } # Map color to clusters color_mapping <- setNames(full_color_set[1:num_clusters], levels(nodes$carac)) nodes$color <- color_mapping[as.character(nodes$carac)] nodes <- nodes[order(nodes$carac,-nodes$value, nodes$name), ] ################################### if ( CHORD==TRUE){ library(chorddiag) library(dplyr) library(tidyr) # Create unique names list unique_names <- unique(c(data$Leader, data$follower)) # Initialize an empty matrix adj_matrix <- matrix(0, nrow = length(unique_names), ncol = length(unique_names), dimnames = list(unique_names, unique_names)) # Fill the matrix with WCD values for (i in 1:nrow(data)) { adj_matrix[data$Leader[i], data$follower[i]] <- data$WCD[i] } # Set diagonal values (self-loops) to 1.0 # Ensure `nodes$name` is in the same order as `adj_matrix` row/column names ordered_nodes <- nodes[match(rownames(adj_matrix), nodes$name), ] # Match order # Ensure there are no missing values after reordering ordered_nodes <- ordered_nodes[!is.na(ordered_nodes$name), ] # Create a named vector for diagonal values using the corrected order matched_values <- setNames(ordered_nodes$value, ordered_nodes$name) # Set diagonal values where names match for (name in rownames(adj_matrix)) { if (!is.na(matched_values[name])) { # Ensure no NA values adj_matrix[name, name] <- matched_values[name] } } # Ensure all names in nodes exist in unique_names ordered_names <- nodes$name[nodes$name %in% rownames(adj_matrix)] # Reorder the adjacency matrix based on ordered_names adj_matrix <- adj_matrix[ordered_names, ordered_names] # Print to verify order print(rownames(adj_matrix)) m <- adj_matrix # Check the matrix # Define row and column names using unique node names haircolors <- unique(c(data$Leader, data$follower)) # Assign proper row and column names to the adjacency matrix dimnames(adj_matrix) <- list(have = haircolors, prefer = haircolors) # Convert groupColors to a named list groupColors <- as.list(setNames(nodes$color, nodes$name)) # Ensure colors match reordered names groupColors <- setNames(nodes$color, nodes$name) # Keep only colors for the ordered names groupColors <- groupColors[ordered_names] # Convert to list format groupColors <- as.list(groupColors) m <- adj_matrix dimnames(m) <- list(have = haircolors, prefer = haircolors) # Ensure row and column names follow `ordered_names` dimnames(adj_matrix) <- list(ordered_names, ordered_names) # Debugging Output - Verify Final Order print("Final Ordered Names:") print(ordered_names) groupColors <- as.character(nodes$color) p <- chorddiag(adj_matrix, groupColors = groupColors, groupnamePadding = 20, showGroupnames = TRUE, margin = 150) p } if ( CHORD==TRUE){ library(circlize) library(tidyverse) library(RColorBrewer) edges<-data # === Step 2: 整理欄位名稱與排序節點 === names(nodes)[1:3] <- c("name", "value", "group") # 項目, 數值, 分群 names(edges)[1:3] <- c("from", "to", "weight") # 關聯 # 節點排序:先 group,再 -value,再 name nodes <- nodes %>% arrange(group, desc(value), name) # 節點名稱順序供後續設定 factor levels ordered_names <- nodes$name nodes$name <- factor(nodes$name, levels = ordered_names) # === Step 3: 指定每個 group 的顏色 === group_levels <- sort(unique(nodes$group)) group_colors <- brewer.pal(max(3, length(group_levels)), "Set1") names(group_colors) <- group_levels # 節點對應顏色(依照 group) node_colors <- setNames(group_colors[as.character(nodes$group)], nodes$name) # === Step 4: Chord diagram 的邊資料 === chord_data <- edges %>% filter(from %in% nodes$name & to %in% nodes$name) %>% select(from, to, value = weight) # === Step 5: 繪製 chord diagram === circos.clear() circos.par(gap.degree = 4, track.margin = c(0.01, 0.01)) chordDiagram( chord_data, grid.col = node_colors, order = levels(nodes$name), # 套用排序順序 transparency = 0.3, annotationTrack = "grid", preAllocateTracks = list(track.height = 0.06) ) # === Step 6: 加上外圈標籤 === circos.trackPlotRegion( track.index = 1, panel.fun = function(x, y) { sector.name = get.cell.meta.data("sector.index") xlim = get.cell.meta.data("xlim") circos.text( x = mean(xlim), y = 0.8, labels = sector.name, facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5), cex = 0.8, font = 2 ) }, bg.border = NA ) } ################################### ################################### data<-data[,1:3] head(data,3) nodes$carac<-nodes$group nodes <- nodes[, c("name", "value","carac")] nodes<-nodes[,1:3] parantheses<-0 url <- "https://www.raschonline.com/raschonline/sankeyplot.txt" sourceCode <- readLines(url, warn = FALSE) eval(parse(text = sourceCode)) nodes<-nodes[,1:3] nodes unique_elements<-nodes$name dataall <- subset(relation_set, Leader %in% unique_elements & follower %in% unique_elements) #write.csv(nodes, "F:/RR/nodesXX.csv", row.names = FALSE) #write.csv(data, "F:/RR/dataXX.csv", row.names = FALSE) #write.csv(dataall, "F:/RR/dataall.csv", row.names = FALSE) #write.csv(keyword_frequency, "F:/RR/nodesXX.csv", row.names = FALSE) ################################################# data<-dataoriginal # Improved function with whitespace trimming and case-insensitive match get_cluster_number <- function(keyword, nodes_map) { if (is.na(keyword) || trimws(keyword) == "") return(NA_character_) keyword_clean <- toupper(trimws(keyword)) if (keyword_clean %in% names(nodes_map)) { return(nodes_map[[keyword_clean]]) } else { return(NA_character_) } } # Ensure consistency in the nodes_map as well nodes$name <- toupper(trimws(nodes$name)) nodes_map <- setNames(nodes$carac, nodes$name) # Apply the mapping cluster_data <- data #for (col in names(data)[-1]) { for (col in names(data)) { cluster_data[[col]] <- sapply(data[[col]], get_cluster_number, nodes_map = nodes_map) } # View the result head(cluster_data,10) # Function to compute the mode (most frequent value) excluding empty strings get_mode <- function(x) { x <- x[x != "" & !is.na(x)] # remove blanks and NAs if (length(x) == 0) return(NA) freq <- table(x) # Check if all frequencies are equal if (length(unique(freq)) == 1) { return(min(as.numeric(names(freq)))) # return the smallest cluster number } else { return(names(freq)[which.max(freq)]) # standard mode } } # Add the first column "MainCluster" based on mode of A1 to A10 cluster_data$MainCluster <- apply(cluster_data, 1, get_mode) #cluster_data$MainCluster <- apply(cluster_data[, -1], 1, get_mode) head(cluster_data,10) # Move "MainCluster" to the first column cluster_data <- cluster_data[, c(ncol(cluster_data), 1:(ncol(cluster_data)-1))] # View result head(cluster_data,10) # Step 1: Ensure nodes$name is cleaned and ready nodes$name <- trimws(toupper(nodes$name)) # already done earlier if you're continuing # Step 2: Make a lookup table for representative keyword by carac # We'll take the first occurrence as representative (or you can choose by max value) rep_keywords <- aggregate(name ~ carac, data = nodes, FUN = function(x) x[1]) # first keyword per cluster rep_keyword_map <- setNames(rep_keywords$name, rep_keywords$carac) # Step 3: Add the corresponding keyword name to the second column cluster_data$ClusterKeyword <- rep_keyword_map[as.character(cluster_data$MainCluster)] head(cluster_data,10) # Step 4: Move it to second column cluster_data <- cluster_data[, c(1, ncol(cluster_data), 2:(ncol(cluster_data)-1))] # View the final result head(cluster_data,10) cluster_databk<-cluster_data cluster_databk<- cluster_databk[!apply(is.na(cluster_databk), 1, all), ] cluster_databk[is.na(cluster_databk)] <- "" ############################################################ cluster_summary <- cluster_data %>% count(MainCluster, sort = TRUE) sum(cluster_summary$n[!is.na(cluster_summary$MainCluster)]) head(cluster_summary,10) ##########################assign cluster to each article cat("End of themes assigned to articles","\t") ########################################### cluster_data<-cluster_data[,1:2] head(cluster_data,5) cat("cluster_data for themes in articles","\t") #write.csv(cluster_data, "F:/RR/cluster_data.csv", row.names = FALSE) # ClusterKeyword is the corresponding theme for each article # Generate frequency table keyword_freq <- sort(table(cluster_data$ClusterKeyword), decreasing = TRUE) top3 <- head(keyword_freq, 3) # extract r1, r2, r3 r1 <- as.numeric(top3[1]) r2 <- as.numeric(top3[2]) r3 <- as.numeric(top3[3]) # Compute intermediate term numerator_ratio <- (r1 * r3) / (r2^2) # Compute DC DC <- round(numerator_ratio / (1 + numerator_ratio),2) # Show the top 10 top_10_keywords <- head(keyword_freq, 10) # Print result print(top_10_keywords) # Generate frequency table keyword_freq <- sort(table(cluster_data$ClusterKeyword), decreasing = TRUE) # Show the top 10 top_10_keywords <- head(keyword_freq, 10) # Print result head(top_10_keywords,5) # Step 1: Create the frequency table # Step 1: Frequency table and conversion keyword_freq <- sort(table(cluster_data$ClusterKeyword), decreasing = TRUE) keyword_df <- as.data.frame(keyword_freq) colnames(keyword_df) <- c("Keyword", "Count") # Step 2: Compute h-index calculate_h_index <- function(counts) { h <- 0 for (i in seq_along(counts)) { if (counts[i] >= i) { h <- i } else { break } } return(h) } h_index <- calculate_h_index(keyword_df$Count) # Step 3: Subset top h-index keywords keyword_df_h <- keyword_df[1:h_index, ] total_count <- sum(keyword_df_h$Count) percentages <- (keyword_df_h$Count / total_count) * 100 # 2. Add percentages on top of bars # Step 4: Draw barplot and store midpoints # Add more space below for x-axis labels par(mar = c(5, 4, 4, 2)) # bottom, left, top, right # Create the barplot and save bar midpoints bp <- barplot( keyword_df_h$Count, xaxt = "n", # no default x-axis col = "skyblue", main = paste("Top", h_index, "Themes Contributing to that like h-index | DC =", round(DC, 2)), ylab = "Count in publications", ylim = c(0, max(keyword_df_h$Count) * 1.2) # leave space for h-index label ) # Add keyword names under each bar (fully visible, vertical) text( x = bp, y = -1, # position text just below axis labels = keyword_df_h$Keyword, srt = 60, # vertical text adj = 1, # right align xpd = TRUE, # allow drawing outside plot cex = 0.8 # text size ) text( x = bp, y = keyword_df_h$Count + (max(keyword_df_h$Count) * 0.05), labels = paste0(round(percentages, 1), "%"), cex = 0.8, col = "black" ) # Add h-index reference line abline(h = h_index, col = "red", lwd = 2, lty = 2) text( x = bp[1], y = h_index + 1, labels = paste("h-index =", h_index), col = "red", pos = 4 ) if (1==3){ library(dplyr) library(tidyr) # Step 0: Remove all columns named like "ClusterKeyword" to avoid duplication during merge nodes <- nodes[, !grepl("^ClusterKeyword", names(nodes))] # Step 1: Clean cluster_data (drop NAs and duplicates) cluster_data <- cluster_data[, 1:2] %>% na.omit() %>% distinct(MainCluster, .keep_all = TRUE) # Step 2: Merge with cluster_data (renamed here as top_clusters) top_clusters <- cluster_data # rename for clarity nodes_clean <- merge( nodes, top_clusters, by.x = "carac", by.y = "MainCluster", all.x = FALSE ) nrow(nodes_clean) keyword_frequency$name <- toupper(keyword_frequency$name) keyword_frequency$Count<-keyword_frequency$value keyword_frequency$value<-NULL nodes_clean <- merge( nodes_clean, keyword_frequency, by.x = "name", by.y = "name", all.x = FALSE ) # Step 3: Keep only rows whose ClusterKeyword appears in keyword_df_h nodes_clean <- nodes_clean[nodes_clean$ClusterKeyword %in% keyword_df_h$Keyword, ] # Step 4: Get top 5 keywords per ClusterKeyword (based on `value`) top_nodes <- nodes_clean %>% arrange(carac, desc(Count)) %>% group_by(carac) %>% slice_head(n = 5) %>% ungroup() # Step 5: Add index (1 to 5) per group for pivoting top_nodes <- top_nodes %>% group_by(ClusterKeyword) %>% mutate(index = row_number()) %>% ungroup() # Step 6: Pivot to wide format top_table <- top_nodes %>% select(ClusterKeyword, index, name, Count) %>% pivot_wider( names_from = index, values_from = c(name, Count), names_glue = "{.value}{index}" ) # Step 7: View or export result head(top_table, 10) cat("frequency observed for keywords","\t") library(dplyr) library(tidyr) library(stringr) # 1. Load your top_terms table top_terms <- top_table colnames(top_terms)[1]<-"topic" top_terms<-top_terms[1:4] top_terms <- top_terms %>% mutate(Top_Terms = paste(name1, name2,name3, sep = "/ ")) # 2. Generate topic labels top_terms_long <- top_terms %>% separate_rows(Top_Terms, sep = ";\\s*") %>% group_by(topic) %>% mutate(term_rank = row_number()) %>% ungroup() topic_labels <- top_terms_long %>% filter(term_rank <= 2) %>% group_by(topic) %>% summarise( topic_label = str_c(str_to_title(Top_Terms), collapse = " and "), .groups = "drop" ) print(topic_labels) # 4. Merge top_terms <- top_terms %>% left_join(topic_labels, by = c("topic" = "topic")) top_terms$Top_Terms<-NULL # 5. View head(top_terms) library(dplyr) library(stringr) # 1. Define stopwords to ignore in labels generic_words <- c("study", "results", "data", "care", "clinical", "patient", "app") top_terms <- top_terms %>% mutate(topic = row_number()) # 2. Reshape wide to long top_terms_long <- top_terms %>% pivot_longer(cols = starts_with("name"), names_to = "rank", values_to = "term") %>% mutate(term = str_to_title(term)) %>% filter(!tolower(term) %in% generic_words) # 3. Pick top 3 non-generic terms per topic top_terms_clean <- top_terms_long %>% group_by(topic) %>% slice_head(n = 3) %>% summarise( topic_label = str_c(term, collapse = " "), .groups = "drop" ) print(top_terms_clean) #I have 11 clusters of keywords. Please help me create human-readable theme labels: #Please provide polished, academic-style topic labels for each cluster. #I have topic modeling clusters with top keywords. Please help interpret them into short, publication-ready theme labels. Here are the clusters #Please give me human-readable topic labels for these keyword clusters #1. Cancer Care and Survivorship Management #2. COVID-19 Pandemic and Respiratory Disease Engagement #3. Medication Adherence and Technological Attitudes #4. Mobile Health (mHealth) Adoption and Feasibility #5. Pediatric Imaging and Pattern Recognition #6. Cloud-Based Health Services and Assisted Records #7. Mortality Measurement and Interactive Diagnostics #8. Healthcare Access Barriers and Provider Communication #9. eHealth Literacy, Skills, and Resource Activities #10. Telehealth Family Care Across Generations #11. Nursing Practice, Planning, and Usability # write.csv(top_table, "F:/RR/top_table.csv", row.names = FALSE) cluster_databk<-head(cluster_databk,10) # write.csv(cluster_databk, "F:/RR/cluster_databk.csv", row.names = FALSE) # bubble timeline plot at https://raschonline.com/raschonline/cbp.asp?cbp=2Bubbletimelineplottheme } ################################## #write.csv(cluster_data, "F:/RR/cluster_data.csv", row.names = FALSE) # Print result print(DC) aa<-as.data.frame(top_10_keywords) aa #https://pmc.ncbi.nlm.nih.gov/articles/PMC12149281/