#差异表达分析

remove(list = ls())
setwd('H:/project')
if (!requireNamespace("here", quietly = TRUE)) install.packages("here")
library(here)
setwd(here::here())
GEO_id='GSE200606'
platform_id='GPL10558'
tiss_type='PBMC_from_Peripheral_blood'
marker='illumina'

FC=2
p=0.05
library(limma)
expr_mat=read.table(file = paste0('results/',tiss_type,'/',marker,'/matrix/',GEO_id,'_',platform_id,'_expression_matrix_all_group_using_lumi_newpipeline_3_15.txt'),sep = '\t',header = T)
expr_mat=read.table(file = paste0('results/',tiss_type,'/',marker,'/matrix/',GEO_id,'_',platform_id,'_expression_matrix_all_group_using_lumi_newpipeline_3_15.txt'),sep = '\t',header = T)

expr_mat=expr_mat[,!colnames(expr_mat) %in% c('gene_symbol','ENSEMBL_id')]
expr_mat=expr_mat[,!grepl(pattern = 'NA',colnames(expr_mat))]
print(paste0(max(expr_mat),'_',min(expr_mat)))
do_basic_limma=T
do_custom_limma=F
special=F
if(do_basic_limma==T){
  #根据列名构建分组
  expr_mat=expr_mat[,!grepl('NA',colnames(expr_mat))]
  sample_type=gsub('GSM[0-9]*_','',colnames(expr_mat))

  if('HIVfree' %in% sample_type){
    
    group=unique(sample_type)
    group=group[-which(group=='HIVfree')]
    sepreate_differential_analysis=function(input_group,input_matrix){
      groups_keep=c('HIVfree',input_group)
      input_matrix=input_matrix[,apply(sapply(groups_keep, grepl, colnames(input_matrix)), 1, any)]
      sample_group=gsub('GSM[0-9]*_','',colnames(input_matrix))
      design=model.matrix(~0+sample_group)
      colnames(design)=levels(factor(sample_group))

      fit=lmFit(input_matrix,design)
      contrast1=paste0(input_group,'-HIVfree')
      contrast.matrix=makeContrasts(contrasts = contrast1,levels = colnames(coef(fit)))
      fit2=contrasts.fit(fit,contrast.matrix)
      fit2=eBayes(fit2)
      results=topTable(fit2, adjust.method="BH", sort.by="B",coef=1,number=Inf)
      results$gene_symbol=rownames(results)
      # write.table(results,file = paste0('results/',GEO_id,'_',input_group,'_vs_HIVfree_limma_results.txt'),sep = '\t',quote = F,row.names = F)
      write.table(results,file = paste0('results/',GEO_id,'_',platform_id,'_',input_group,'_vs_HIVfree_limma_results_using_lumi_newpipeline_3_15.txt'),sep = '\t',quote = F,row.names = F)
    }
    sapply(group,function(x)sepreate_differential_analysis(x,expr_mat))
    
  }
  else{
    stop('No HIVfree sample in this dataset, try another one!')
  }
}else if(do_custom_limma==T){
  
  expr_mat=expr_mat[,!grepl('NA',colnames(expr_mat))]
  expr_mat=expr_mat[,!grepl('LVL',colnames(expr_mat))]
  sample_type=gsub('GSM[0-9]*_','',colnames(expr_mat))
  
  if('HIV_nonART' %in% sample_type){
    
    group=unique(sample_type)
    group=group[-which(group=='HIV_nonART')]
    sepreate_differential_analysis=function(input_group,input_matrix){
      groups_keep=c('HIV_nonART',input_group)
      input_matrix=input_matrix[,apply(sapply(groups_keep, grepl, colnames(input_matrix)), 1, any)]
      sample_group=gsub('GSM[0-9]*_','',colnames(input_matrix))
      design=model.matrix(~0+sample_group)
      colnames(design)=levels(factor(sample_group))
      
      fit=lmFit(input_matrix,design)
      contrast1=paste0(input_group,'-HIV_nonART')
      contrast.matrix=makeContrasts(contrasts = contrast1,levels = colnames(coef(fit)))
      fit2=contrasts.fit(fit,contrast.matrix)
      fit2=eBayes(fit2)
      results=topTable(fit2, adjust.method="BH", sort.by="B",coef=1,number=Inf)
      results$gene_symbol=rownames(results)
      # write.table(results,file = paste0('results/',GEO_id,'_',input_group,'_vs_HIVfree_limma_results.txt'),sep = '\t',quote = F,row.names = F)
      write.table(results,file = paste0('results/',GEO_id,'_',platform_id,'_',input_group,'_vs_HIV_nonART_limma_results_identical.txt'),sep = '\t',quote = F,row.names = F)
    }
    sapply(group,function(x)sepreate_differential_analysis(x,expr_mat))
  }else{
    stop('No HIVfree sample in this dataset, try another one!')
}
}else if(special==T){
  sample_type=gsub('GSM[0-9]*_','',colnames(expr_mat))
  group=unique(sample_type)
  group=group[-which(group=='HIVfree')]
  input_group='HIV_ART_3'
  input_matrix=expr_mat
  groups_keep=c('HIVfree',input_group)
  input_matrix=input_matrix[,apply(sapply(groups_keep, grepl, colnames(input_matrix)), 1, any)]
  sample_group=gsub('GSM[0-9]*_','',colnames(input_matrix))
  design=model.matrix(~0+sample_group)
  colnames(design)=levels(factor(sample_group))

  fit=lmFit(input_matrix,design)
  contrast1=paste0(input_group,'-HIVfree')
  contrast.matrix=makeContrasts(contrasts = contrast1,levels = design)
  fit2=contrasts.fit(fit,contrast.matrix)
  fit2=eBayes(fit2)
  results=topTable(fit2, adjust.method="BH", sort.by="B",coef=1,number=Inf)
  results$gene_symbol=rownames(results)
  # write.table(results,file = paste0('results/',GEO_id,'_',input_group,'_vs_HIVfree_limma_results.txt'),sep = '\t',quote = F,row.names = F)
  write.table(results,file = paste0('results/',GEO_id,'_',platform_id,'_',input_group,'_vs_HIVfree_limma_results_24_3_20.txt'),sep = '\t',quote = F,row.names = F)

}
