remove(list = ls())
if (!requireNamespace("here", quietly = TRUE)) install.packages("here")
library(here)
setwd(here::here())

output_path=dir.create('results/WGCNA_result/',recursive = T)
output_path1='results/WGCNA_result/'
group_type='CD4' #"CD4" "CD8A" or "CD4CD8A"
#表达矩阵路径下的文件列举
df_list=list.files(paste0('data/',group_type,'/'),full.names = T)
#读取表达矩阵
for (i in 1:length(df_list)) {
  # i=1#调试用
  group_name=paste0(group_type,'_',strsplit(basename(df_list[i]),'_')[[1]][1])
  print(group_name)
  output_path_each=dir.create(paste0(output_path1,group_name))
  output_path_each1=paste0(output_path1,group_name)
  list=read.delim(df_list[i],header = T,sep = '\t',row.names = 1)
  datExpr=as.data.frame(t(list)) #需要行为样本列为基因的转置的表达矩阵
  
  #加载包，设置初始条件
  library(WGCNA)
  library(gplots)
  library(tidyverse)
  options(stringsAsFactors=F)
  
  nGenes = ncol(datExpr)
  nSamples = nrow(datExpr)
  #生成表型数据
  tumor=ifelse(substr(rownames(datExpr),14,15)<10,1,0)
  normal=ifelse(substr(rownames(datExpr),14,15)<10,0,1)
  datTraits=data.frame(
    rownames=rownames(datExpr),
    Tumor=tumor,
    Normal=normal
  )
  datTraits=column_to_rownames(datTraits,'rownames') #表型数据文件
  
  #选择软阈值
  powers = c(c(1:10), seq(from = 12, to=20, by=2))
  sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)#这里可能要半小时
  sizeGrWindow(9, 5)
  par(mfrow = c(1,2));
  cex1 = 0.9;
  plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
       xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
       main = paste("Scale independence"));
  text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
       labels=powers,cex=cex1,col="red");
  abline(h=0.85,col="red")  #查看位于0.85以上的点，可以改变高度值
  # 2.1.3 平均连接度
  plot(sft$fitIndices[,1], sft$fitIndices[,5],
       xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
       main = paste("Mean connectivity"))
  text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
  
  power_pick=sft$powerEstimate #系统推荐软阈值
  #如果没有推荐的软阈值，就根据样本量多少来定
  if (is.na(power_pick)){
    if(nSamples<20){
      power_pick=9
    }else if(nSamples>20 & nSamples <30){
      power_pick=8
    }else if(nSamples>30 & nSamples <40){
      power_pick=7
    }else if(nSamples >40){
      power_pick=6
    }
  }
  #-------------------------------------
  #构建网络
  net <- blockwiseModules(datExpr, power = power_pick, 
                          maxBlockSize = nGenes, TOMType = "unsigned", 
                          minModuleSize = 30, reassignThreshold = 0, mergeCutHeight = 0.25, 
                          numericLabels = TRUE, pamRespectsDendro = FALSE, 
                          saveTOMs = F, verbose = 3)
  
  # 查看划分的模块数和每个模块里面包含的基因个数
  table(net$colors)
  #-------------------------------------
  
  #相关性热图绘制
  moduleLabels = net$colors
  moduleColors = labels2colors(net$colors)
  MEs = net$MEs;
  geneTree = net$dendrograms[[1]];
  
  # 重新计算带有颜色标签的模块
  MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes
  MEs = orderMEs(MEs0)
  moduleTraitCor = cor(MEs, datTraits, use = "p");
  moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
  # 通过相关值对每个关联进行颜色编码
  sizeGrWindow(10,6)
  # 展示模块与表型数据的相关系数和 P值
  textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
                     signif(moduleTraitPvalue, 1), ")", sep = "");
  dim(textMatrix) = dim(moduleTraitCor)
  textMatrix <- gsub("\\n", " ", textMatrix)
  #输出相关性热图需要的矩阵
  tmp=cbind(moduleTraitCor,textMatrix)
  data_heatmap=data.frame(tmp)
  names(data_heatmap)[c(3,4)]=names(data_heatmap)[c(1,2)]
  write.table(data_heatmap,file = paste0(output_path_each1,'/',group_name,'_heatmap.txt'),sep = '\t',quote = F,col.names = NA) #需要的结果
  #输出相关性热图需要的矩阵
  
  # 3.2 基因与表型数据的关系、重要模块：基因显著性和模块成员--------------------------------------------
  tumor = as.data.frame(datTraits$Tumor)
  names(tumor) = "tumor"
  modNames = substring(names(MEs), 3)
  geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p"))
  MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))
  names(geneModuleMembership) = paste("MM", modNames, sep="")
  names(MMPvalue) = paste("p.MM", modNames, sep="")
  geneTraitSignificance = as.data.frame(cor(datExpr, tumor, use = "p"));#和癌症的关联
  GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples))
  names(geneTraitSignificance) = paste("GS.", names(tumor), sep="")
  names(GSPvalue) = paste("p.GS.", names(tumor), sep="")
  
  write.table(moduleTraitCor,paste0(output_path_each1,'/',group_name,'_moduleTraitCor.txt'),sep = '\t',quote = F,col.names = NA) #需要的结果 表型与各模块关联 
  write.table(MEs,"results/test_MEs.txt",sep = '\t',quote = F,col.names = NA)
  
  #导出所有模块的GS&MM相关信息
  df_GS_MM=data.frame(cbind(geneModuleMembership,geneTraitSignificance),module = moduleColors)
  write.table(df_GS_MM, paste0(output_path_each1,'/',group_name,'_GS&MM.txt'), sep = "\t",quote=F, col.names = NA)  #需要的结果
  
}



