1. 病情說明:
機制難尋,腸道菌群
這句話足以說明菌群研究的必要性,畢竟很多工作都表明了菌群與疾病、發(fā)育、神經(jīng)等的緊密聯(lián)系。另外,從目前國內(nèi)科研服務相關(guān)的企業(yè)的戰(zhàn)略布局來看,微生物單細胞組和單細胞三代全長是重要的開發(fā)方向,這些企業(yè)的鼻子可是很靈的。如果5年前,沒能抓住單細胞轉(zhuǎn)錄組,今天又沒有抓住單細胞空間轉(zhuǎn)錄組,那兩三年后的單細胞全長轉(zhuǎn)錄組、微生物單細胞組和單細胞表觀組一定不能再錯過了。那我們就先來看看怎么把菌株-通路-代謝物-宿主靶點聯(lián)系起來的關(guān)鍵工具--humann。
HUMAnN采用比對泛基因組的方法鑒定群體的已知物種,并進一步翻譯檢索末分類的序列,最終定量基因家族和通路。 結(jié)果同時獲得功能通路中具體物種組成,建立起了物種與功能的聯(lián)系,可進一步研究功能組成的貢獻者。
通過humann的一通分析,我們可以拿到這些代謝通路的豐度差異結(jié)果噢!

image.png
2. 治療方案
import time
import threading
import argparse
import os
from pickle import FALSE
import threading
import shutil
import datetime
import json
import time
import subprocess
import pandas as pd
method_path = 'plot_scripts'
refer_path = 'DB'
# 1: check function
def humann_check(
data_path: str = None, ## path of kneaddout
input_type: str = None, ## choose {fastq,fastq.gz,fasta,fasta.gz,sam,bam,blastm8,genetable,biom}
search_mode: str = None, ## default [uniref90], uniref50,uniref90
alignment_type : str = None, ## software to use for translated alignment,{usearch,rapsearch,diamond}
pathway_type: str = None, ## [DEFAULT: metacyc] {metacyc,unipathway} the database to use for pathway computations
out_path: str = None, ## outpath of humann_check
):
## check knead file
check_file = f'{data_path}/knead_call_file.txt'
if not os.path.exists(check_file):
raise SystemExit('No knead file! Please run knead module!')
else:
knead_param = pd.read_csv(check_file, header=None)
cleandata_path = knead_param.at[0,0]
sampleinfo = knead_param.at[1,0]
inputtype = f'{input_type}'
search_mode = f'{search_mode}'
alignment_type = f"{alignment_type}"
pathway_type = f'{pathway_type}'
if not os.path.exists(out_path):
os.makedirs(out_path, exist_ok=True)
else:
pass
nucleotide_database = os.path.abspath(refer_path+'/full_chocophlan.v201901_v31')
if search_mode == 'uniref90':
protein_database = os.path.abspath(refer_path+'/uniref90_annotated_v201901b')
elif search_mode == 'uniref50':
protein_database = os.path.abspath(refer_path+'/uniref50_annotated_v201901b')
else:
pass
## output
out_path = os.path.abspath(out_path)
check_file_path = f'{out_path}/humann_check_file.txt'
check_file = pd.DataFrame([
cleandata_path,
sampleinfo,
inputtype,
search_mode,
alignment_type,
pathway_type,
out_path ,
nucleotide_database,
protein_database
])
check_file.to_csv(check_file_path,
header=None, index=None, sep='\t', mode='w')
return 0
def execCmd(cmd):
try:
print("命令{}開始運行{}".format(cmd, datetime.datetime.now()))
time.sleep(5)
os.system(cmd)
print("命令{}結(jié)束運行{}".format(cmd, datetime.datetime.now()))
except:
print("{}\t運行失敗".format(cmd))
def humann_call(
out_path: str = None # outpath of humann
):
## check humann_check file
humann_check_path = f'{out_path}/humann_check_file.txt'
if not os.path.exists(humann_check_path):
raise SystemExit(' No humann_check_file ! Please run humann_check !')
else:
humann_check = pd.read_csv(f'{out_path}/humann_check_file.txt',
header=None,delimiter='\t')
cleandata_path = humann_check.at[0,0]
sampleinfo = humann_check.at[1,0]
inputtype =humann_check.at[2,0]
search_mode= humann_check.at[3,0]
alignment_type = humann_check.at[4,0]
pathway_type = humann_check.at[5,0]
out_path = humann_check.at[6,0]
nucleotide_database = humann_check.at[7,0]
protein_database = humann_check.at[8,0]
sample_info_dic = {}
cmds = []
for line in open(sampleinfo):
if not line.startswith("sample"):
line=line.strip().split()
sample_info_dic[line[0]]=line[1]
cmd = f'''/root/miniconda3/envs/Rdoc/bin/humann -i {cleandata_path}/{line[0]}/{line[0]}.{inputtype} --output {out_path} --nucleotide-database {nucleotide_database} --protein-database {protein_database} \
--input-format {inputtype} --threads 30 --memory-use maximum '''
cmds.append(cmd)
threads = []
for cmd in cmds:
th = threading.Thread(target=execCmd, args=(cmd,))
th.start()
th.join()
time.sleep(2)
threads.append(th)
humann_out_path = os.path.abspath(out_path)
sampleinfo = os.path.abspath(sampleinfo)
call_file_path = f'{out_path}/humann_call_file.txt'
call_file = pd.DataFrame([humann_out_path,sampleinfo])
call_file.to_csv(call_file_path,
header=None, index=None, sep='\t', mode='w')
return 0
humann_call(out_path)
def humann_post(
out_path:str = None
):
## check humann_call file
# humann_call_path = f'{out_path}/humann_call_file.txt'
# if not os.path.exists(humann_call_path):
# raise SystemExit(' No humann_call_file ! Please run humann_call !')
# else:
# pass
merge_path = out_path +'/humann_merge_out'
if not os.path.exists(merge_path):
os.makedirs(merge_path, exist_ok=True)
cmd = f'''/root/miniconda3/envs/Rdoc/bin/humann_join_tables -s --input {out_path} --file_name pathabundance --output {merge_path}/humann_pathabundance.tsv && \
/root/miniconda3/envs/Rdoc/bin/humann_join_tables -s --input {out_path} --file_name pathcoverage --output {merge_path}/humann_pathcoverage.tsv && \
/root/miniconda3/envs/Rdoc/bin/humann_join_tables -s --input {out_path} --file_name genefamilies --output {merge_path}/humann_genefamilies.tsv
'''
execCmd(cmd)
else:
pass
time.sleep(10)
cmd_norm = f'''/root/miniconda3/envs/Rdoc/bin/humann_renorm_table --input {merge_path}/humann_pathabundance.tsv \
--units relab --output {merge_path}/humann_pathabundance_relab.tsv && \
/root/miniconda3/envs/Rdoc/bin/humann_renorm_table --input {merge_path}/humann_genefamilies.tsv \
--units relab --output {merge_path}/humann_genefamilies_relab.tsv
'''
execCmd(cmd_norm)
## split
final_path = out_path +'/humann_final_out'
cmd_split = f'''/root/miniconda3/envs/Rdoc/bin/humann_split_stratified_table --input {merge_path}/humann_pathabundance_relab.tsv --output {final_path} && \
/root/miniconda3/envs/Rdoc/bin/humann_split_stratified_table --input {merge_path}/humann_genefamilies_relab.tsv --output {final_path} && \
/root/miniconda3/envs/Rdoc/bin/humann_split_stratified_table --input {merge_path}/humann_pathcoverage.tsv --output {final_path}
'''
execCmd(cmd_split)
return 0
data_path = 'kneaddout2'
input_type = 'fastq'
search_mode = 'uniref90'
alignment_type = 'diamond'
pathway_type = 'metacyc'
out_path = 'humann_out'
humann_check(data_path,input_type,search_mode,alignment_type,pathway_type,out_path)
humann_call(out_path)
3. 補充說明
下一篇可以畫畫圖了,畢竟我們拿到的這些表格連自己都驚艷不了,又怎么能在組會上搏老板一笑呢。分析環(huán)境和測試數(shù)據(jù)我都整理好啦,一鍵部署到自己的服務器噢,持續(xù)關(guān)注,因為我會持續(xù)更新的。