用R寫了個(gè)函數(shù)實(shí)現(xiàn)GDC文件合并,記錄一下

image.png
library(dplyr)
library(readr)
merge_column <- function(input_dir, column_name, output_dir) {
# 加載必要的包
library(dplyr)
library(readr)
# 創(chuàng)建輸出目錄(如果不存在)
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
cat("output_dir:", output_dir, "\n")
# 獲取所有子文件夾路徑
sub_dirs <- list.dirs(input_dir, full.names = TRUE, recursive = FALSE)
# 初始化合并結(jié)果數(shù)據(jù)框
merged_data <- NULL
print("merging data...")
for (i in 1:length(sub_dirs)) {
cat(i, ":",sub_dirs[i])
# 獲取子文件夾中的所有文件
files <- list.files(sub_dirs[i], pattern = "\\.tsv$", full.names = TRUE)
for (file in files) {
# 讀取文件
exp_data <- read_tsv(file, skip = 6, col_names = FALSE, show_col_types = FALSE) # 從第7行開始讀取
colnames(exp_data) <- c("gene_id", "gene_name", "gene_type", "unstranded", "stranded_first", "stranded_second", "tpm_unstranded", "fpkm_unstranded", "fpkm_uq_unstranded")
exp_data <- as.data.frame(exp_data)
# 檢查文件中是否有指定的列
if (!(column_name %in% colnames(exp_data))) {
stop(paste("文件中缺少列:", column_name, ",文件:", file))
}
# 提取Ensembl基因ID、基因名稱和目標(biāo)列
sample_data <- exp_data %>%
select(gene_id = 1, gene_name = 2, gene_type = 3, target_column = !!sym(column_name))
# 為目標(biāo)列命名為當(dāng)前文件名(不含擴(kuò)展名)
colnames(sample_data)[4] <- tools::file_path_sans_ext(basename(file))
# 合并數(shù)據(jù)
if (is.null(merged_data)) {
merged_data <- sample_data
} else {
merged_data <- full_join(merged_data, sample_data, by = c("gene_id", "gene_name", "gene_type"))
}
}
}
# 保存合并結(jié)果
output_file <- file.path(output_dir, paste0(column_name, "_merged.tsv"))
write_tsv(merged_data, output_file)
cat("保存合并表到:", output_file, "\n")
return(merged_data) # 返回合并后的數(shù)據(jù)框
}
input_dir <- getwd()
output_dir <- "/Users/zhengyiyi/Desktop/res"
column_name <- "unstranded"
# 調(diào)用函數(shù)合并 unstranded 列
res <- list()
for (column_i in c("unstranded", "stranded_first", "stranded_second", "tpm_unstranded", "fpkm_unstranded", "fpkm_uq_unstranded")){
print(column_i)
res[[column_i]] <- merge_column(input_dir, column_i, output_dir)
}