remove(list = ls())
# setwd('H:/project')
if (!requireNamespace("here", quietly = TRUE)) install.packages("here")
library(here)
setwd(here::here())
group_list=c('nonART','ART')
library(dplyr)
df_all=read.table('data/KEGG_model_gene_list/KEGG_all_and_all_lasso_include_trend.txt',sep = '\t',quote = "",header = T)
df_lasso=df_all[df_all$type=='L',]
# 按照HIV和cancer分组，汇总所有type的基因
all_genes <- df_all %>%
  group_by(HIV, Cancer,trend) %>%
  summarize(genes = list(Gene), .groups = 'drop')

# 按照HIV和cancer分组，汇总type为L的基因
l_genes <- df_lasso %>%
  group_by(HIV, Cancer,trend) %>%
  summarize(genes = list(Gene), .groups = 'drop')

# 按照HIV和cancer分组，汇总所有type的基因（不考虑trend）
all_genes_no_trend <- df_all %>%
  group_by(HIV, Cancer) %>%
  summarize(genes = list(Gene), .groups = 'drop')

# 按照HIV和cancer分组，汇总type为L的基因（不考虑trend）
l_genes_no_trend <- df_lasso %>%
  group_by(HIV, Cancer) %>%
  summarize(genes = list(Gene), .groups = 'drop')

#给两个数据框的结果添加一列表明分类
all_genes$type='A'
l_genes$type='L'
all_genes_no_trend$type='A'
l_genes_no_trend$type='L'

all_genes$trend <- as.character(all_genes$trend)
l_genes$trend <- as.character(l_genes$trend)
all_genes_no_trend$trend <- 'all'
l_genes_no_trend$trend <- 'all'
# 合并上述所有数据框
merged_genes <- rbind(all_genes, l_genes, all_genes_no_trend, l_genes_no_trend)
# 生成新列名
df <- merged_genes %>%
  mutate(new_colname = paste(HIV, Cancer, type,trend, sep = "_"))

# 初始化一个空列表来存储列
new_columns <- list()

# 填充新列
for (i in 1:nrow(df)) {
  colname <- df$new_colname[i]
  genes <- df$genes[[i]]

  if (colname %in% names(new_columns)) {
    new_columns[[colname]] <- c(new_columns[[colname]], genes)
  } else {
    new_columns[[colname]] <- genes
  }
}

# 找到最长的列
max_length <- max(sapply(new_columns, length))

# 将所有列填充到相同的长度
new_columns <- lapply(new_columns, function(x) {
  length(x) <- max_length
  x[is.na(x)] <- ""
  return(x)
})

# 将列表转换为数据框
final_df <- as.data.frame(new_columns)
final_df=final_df[,grep('_all',colnames(final_df))]

#-----------------------------------
#批量ppi分析
library(httr)
library(jsonlite)
library(RCy3)
library(STRINGdb)
library(igraph)
# # 你的基因列表
string_db <- STRINGdb$new(version="12", species=9606, score_threshold=700, input_directory="E:/STRING")

for (j in 1:ncol(final_df)) {
  query=final_df[,j]
  # j=1 #调试用
  # query=final_df$ART_BLCA_A_all #调试用
  query=query[!query=='']
  what_sample=colnames(final_df)[j]
  print(what_sample)
  # 假设你有一组蛋白的符号
  protein_symbols <-query

  # 映射蛋白符号到STRINGdb ID
  mapped <- string_db$map(my_data_frame = data.frame(protein_symbols),
                          my_data_frame_id_col_names = "protein_symbols")

  # 获取PPI网络
  ppi_network <- string_db$get_interactions(mapped$STRING_id)
  # 确保ppi_network包含正确的列名
  colnames(ppi_network)[colnames(ppi_network) == "from"] <- "source"
  colnames(ppi_network)[colnames(ppi_network) == "to"] <- "target"
  #只保留包含输入基因列表的交互
  ppi_network_filtered <- ppi_network[ppi_network$source %in% mapped$STRING_id & ppi_network$target %in% mapped$STRING_id, ]

  #如果某种情况下ppi网络里啥都没有，就跳过这种情况
  if(nrow(ppi_network_filtered)==0){
    print(paste0('The group ',what_sample,' has no available PPI network.'))
    next
  }
  # 为每个节点创建URL
  mapped$STRING_URL <- paste0("https://string-db.org/cgi/network?identifiers=", mapped$protein_symbols, "%0d&species=9606&show_query_node_labels=1")

  # 为每个节点创建PDB数据库的URL
  mapped$PDB_URL <- paste0('https://www.rcsb.org/search?request=%7B%22query%22%3A%7B%22type%22%3A%22group%22%2C%22nodes%22%3A%5B%7B%22type%22%3A%22group%22%2C%22nodes%22%3A%5B%7B%22type%22%3A%22group%22%2C%22nodes%22%3A%5B%7B%22type%22%3A%22terminal%22%2C%22service%22%3A%22full_text%22%2C%22parameters%22%3A%7B%22value%22%3A%22',mapped$protein_symbols,'%22%7D%7D%5D%2C%22logical_operator%22%3A%22and%22%7D%5D%2C%22logical_operator%22%3A%22and%22%2C%22label%22%3A%22full_text%22%7D%5D%2C%22logical_operator%22%3A%22and%22%7D%2C%22return_type%22%3A%22entry%22%2C%22request_options%22%3A%7B%22paginate%22%3A%7B%22start%22%3A0%2C%22rows%22%3A25%7D%2C%22results_content_type%22%3A%5B%22experimental%22%5D%2C%22sort%22%3A%5B%7B%22sort_by%22%3A%22score%22%2C%22direction%22%3A%22desc%22%7D%5D%2C%22scoring_strategy%22%3A%22combined%22%7D%2C%22request_info%22%3A%7B%22query_id%22%3A%22f6957ec1aea09e02b078f70d0bd7e1db%22%7D%7D')

  # 连接到Cytoscape
  cytoscapePing()

  # 创建Cytoscape网络
  createNetworkFromDataFrames(edges=ppi_network_filtered, title="PPI Network", collection="STRINGdb")

  # 为节点设置属性
  node_data <- data.frame(node=mapped$STRING_id,
                          protein_symbol=mapped$protein_symbols,
                          STRING_URL=mapped$STRING_URL,
                          PDB_URL=mapped$PDB_URL)
  loadTableData(node_data, data.key.column="node", table="node")

  # 导出为cyjs文件
  output_path=dir.create(paste0('./results/dModel_STRING-PPI/',what_sample))
  gc()
  cytoscapePing()
  exportNetwork('./results/dModel_STRING-PPI/ppi_network.cyjs', type="CYJS")
  deleteAllNetworks()
  gc()
  output_path=(paste0('./results/dModel_STRING-PPI/',what_sample))
  file.copy('./results/dModel_STRING-PPI/ppi_network.cyjs', output_path, overwrite = TRUE)
  
  file.remove('./results/dModel_STRING-PPI/ppi_network.cyjs')
  
  gc()
  Sys.sleep(1)
}
