最近試了很多Smart-seq2上游處理的方法,參照了Jimmy老師的教程,以及阿則的生信小站公眾號(hào)文章。對(duì)代碼進(jìn)行了整理。但是過(guò)程中仍然有一些問(wèn)題尚待解決。
1. 數(shù)據(jù)、軟件準(zhǔn)備
- SRR文件下載到SRRfile文件下
mkdir SRRfile
prefetch --option-file SRR_Acc_List.txt
- hisat2下載
conda install hisat2
- hisat2比對(duì)索引下載
axel https://genome-idx.s3.amazonaws.com/hisat/grch38_tran.tar.gz
# wget https://genome-idx.s3.amazonaws.com/hisat/grch38_tran.tar.gz
- conda安裝其他軟件
conda install fastqc multiqc samtools sambamba featureCounts
2. 數(shù)據(jù)處理
- 解壓縮
gunzip *.gz
- SRA文件轉(zhuǎn)fastq
mkdir fastq
ls SRRfile/SRR* | while read id;do \
(fastq-dump --gzip --split-3 -A `basename $id` -O fastq/$id &);done
- 質(zhì)控
mkdir fastqc
fastqc -o ./fastqc -t 20 ./fastq/*.fastq &&multiqc ./fastqc -o ./fastqc
- 去接頭
# trim_galore
mkdir clean
cat SRR_Acc_List.txt | while read id;do\
trim_galore --quality 20 --phred33 \
--length 20 -j 20 \
-o ../clean \
--paried ./fastq/${id}_1.fastq ./fastq/${id}_2.fastq
done
gunzip ./clean/*.gz
3. 序列比對(duì)
- hisat2
mkdir aligned
cat SRR_Acc_List.txt | while read id;do
hisat2 -p 20 -x ~/ref/genome_tran/genome_tran \
-S ./aligned/${id}.sam \
-1./clean/${id}_1_val_1.fq -2 ./clean/${id}_2_val_2.fq
done
- sam轉(zhuǎn)bam
mkdir bam
cat SRR_Acc_List.txt | while read id;do
samtools sort -n -@ 20 \
-o ./bam/${id}.bam ./aligned/${id}.sam
done
- bam文件排序
cat SRR_Acc_List.txt | while read id;do
sambamba sort -t 20 -o ./bam/${id}.bam ./bam/${id}.bam
done
- 輸出為count
mkdir count
cat SRR_Acc_List.txt | while read id;do
featureCounts -T 20 -p -t exon -g gene_name \
-a ~/ref/gencode.v40.annotation.gtf \
-o count/${id}.all_feature.txt \
bam/${id}.sorted.bam
done
4.批處理文件
根據(jù)以上,寫成批處理文件,批量處理SRR格式文件。
### srr2fastq
mkdir fastq
ls SRRfile/SRR* | while read id;do \
(fastq-dump --gzip --split-3 -A `basename $id` -O fastq/$id &);done
### fastqc
mkdir fastqc
fastqc -o ./fastqc -t 20 ./fastq/*.fastq &&multiqc ./fastqc -o ./fastqc
### trim_galore
mkdir clean
cat SRR_Acc_List.txt | while read id;do\
trim_galore --quality 20 --phred33 \
--length 20 -j 20 \
-o ../clean \
--paried ./fastq/${id}_1.fastq ./fastq/${id}_2.fastq
done
### hisat2
mkdir aligned
cat SRR_Acc_List.txt | while read id;do
hisat2 -p 20 -x ~/ref/genome_tran/genome_tran \
-S ./aligned/${id}.sam \
-1./clean/${id}_1_val_1.fq -2 ./clean/${id}_2_val_2.fq
done
### sam轉(zhuǎn)bam
mkdir bam
cat SRR_Acc_List.txt | while read id;do
samtools sort -n -@ 20 \
-o ./bam/${id}.bam ./aligned/${id}.sam
done
### bam文件排序
cat SRR_Acc_List.txt | while read id;do
sambamba sort -t 20 -o ./bam/${id}.bam ./bam/${id}.bam
done
### featurecounts
mkdir count
cat SRR_Acc_List.txt | while read id;do
featureCounts -T 20 -p -t exon -g gene_name \
-a ~/ref/gencode.v40.annotation.gtf \
-o count/${id}.all_feature.txt \
bam/${id}.sorted.bam
done
###
echo "------- end -------"