#Short read aligners (Bowtie, BWA, Maq, Novoalign, SSAHA2)
#SAMtools
This C-compiled program generates BAM files, sorts BAM files, generates pileup/mpileup files from the sorted BAM files
samtools mpileup -f [reference sequence] [BAM file(s)] > data.mpileup
#transforming gzipped VCF to a bgzipped form. tabix is used to index vcf
zcat variants.vcf.gz | bgzip -c > variants.vcf.bgzip.gz
tabix -p vcf myvariants.bgzip.vcf
samtools mpileup -B -Q 0 -d 999999999 -f ref.fa sorted.bam > sorted.bam.pileup
#bcftools
It is utilities for variant calling and manipulating VCFs and BCFs.
bcftools stats file.vcf > file.stats. This outout file will have the number of snps and indels
zcat in.vcf.gz | vcftools_0.1.9/bin/vcf-annotate --fill-type > out.vcf
grep -oP "TYPE=\w+" out.vcf | sort | uniq -c
or
zcat in.vcf.gz | vcftools_0.1.9/bin/vcf-annotate --fill-type | grep -oP "TYPE=\w+" | sort | uniq -c
#VCF file can be manipulated using grep and the result can be plotted using R
# Remove three fields
bcftools annotate -x ID,INFO/DP,FORMAT/DP file.vcf.gz
# Remove all INFO fields and all #FORMAT fields except for GT
bcftools annotate -x INFO,^FORMAT/GT file.vcf
#VarScan
Its variant calling tool, coded in Java, and executed from the terminal
It works on pileup/mpileup file
java -jar VarScan.jar [COMMAND] [OPTIONS]
#COMMAND can be for single sample or multi sample
java -jar VarScan.jar pileup2snp [pileup file]
java -jar VarScan.jar pileup2indel [pileup file]
java -jar VarScan.jar pileup2cns [pileup file]
java -jar VarScan.jar mpileup2snp [mpileup file]
java -jar VarScan.jar mpileup2indel [mpileup file]
java -jar VarScan.jar mpileup2cns [mpileup file]
#Tumor-normal comparison:
java -jar VarScan.jar somatic normal-tumor mpileup]
java -jar VarScan.jar copynumber [normal-tumor mpileup]
#To save disk space and file I/O, mpileup output can be directed to VarScan with a "pipe" command.
samtools mpileup -f reference.fasta data.bam | java -jar VarScan.v2.2.jar pileup2snp
samtools mpileup -f reference.fasta data11.bam data2.bam | java -jar VarScan.v2.2.jar pileup2snp
#Picard
These command line tools written in Java (requires Java 1.8 to be installed)can manipulate SAM/BAM/VCF formats. It is supported through the GATK Forums
#Check Java (Version of JVM)ersion
java -version
#Test run of Picard tools using full path
java -jar /path/to/picard.jar -h
#Test run of Picard tools using environment variable (here $PICARD)
java -jar $PICARD -h
#Check all tools in Picard toolkit
java jvm-args -jar picard.jar PicardToolName OPTION1=value1 OPTION2=value2...
#GATK
It is developed by http://www.broadinstitute.org
The RNA-sequencing workflow using GATK has 3 main steps: pre-processing (FASTQ to BAM files), variant discovery (BAM to VCF files), refinement and evaluation (genotype refinement, functional annotation and callset QC)
The above steps in details
Mapping and marking duplicates; split and trim; local realignment around indels (optional); base quality score recalibration (BQSR)
The sequence reads are mapped to the reference using STAR aligner (2-pass protocol) to produce a file in SAM/BAM format sorted by coordinate; mark duplicates; reads with N operators in the CIGAR () strings (which denote the presence of a splice junction) are adjusted,and assign mapping qualities from 255 to 60
Local realignment is performed around indels
Base quality scores are recalibrated, because the variant calling algorithms rely heavily on the quality scores
#GATK vcf file generation for variant (SNP, indel) finding
java -jar ~/GenomeAnalysisTK-2.0-38/GenomeAnalysisTK.jar -T UnifiedGenotyper --genotype_likelihoods_model BOTH -R ref.fasta -I IN.bam -o OUT.vcf
# For annotations of SNP/indel, use -A VariantType
#Genome Modeling Tools (BreakDancer (SVs), Somatic Sniper (SNVs), CMDS (Copy Number))
#BreakDancer
Its a Perl/Cpp package that can detect structural variants (including inversions, inter- and translocations)
#SAMtools
This C-compiled program generates BAM files, sorts BAM files, generates pileup/mpileup files from the sorted BAM files
samtools mpileup -f [reference sequence] [BAM file(s)] > data.mpileup
#transforming gzipped VCF to a bgzipped form. tabix is used to index vcf
zcat variants.vcf.gz | bgzip -c > variants.vcf.bgzip.gz
tabix -p vcf myvariants.bgzip.vcf
samtools mpileup -B -Q 0 -d 999999999 -f ref.fa sorted.bam > sorted.bam.pileup
#bcftools
It is utilities for variant calling and manipulating VCFs and BCFs.
bcftools stats file.vcf > file.stats. This outout file will have the number of snps and indels
zcat in.vcf.gz | vcftools_0.1.9/bin/vcf-annotate --fill-type > out.vcf
grep -oP "TYPE=\w+" out.vcf | sort | uniq -c
or
zcat in.vcf.gz | vcftools_0.1.9/bin/vcf-annotate --fill-type | grep -oP "TYPE=\w+" | sort | uniq -c
#VCF file can be manipulated using grep and the result can be plotted using R
# Remove three fields
bcftools annotate -x ID,INFO/DP,FORMAT/DP file.vcf.gz
# Remove all INFO fields and all #FORMAT fields except for GT
bcftools annotate -x INFO,^FORMAT/GT file.vcf
#VarScan
Its variant calling tool, coded in Java, and executed from the terminal
It works on pileup/mpileup file
java -jar VarScan.jar [COMMAND] [OPTIONS]
#COMMAND can be for single sample or multi sample
java -jar VarScan.jar pileup2snp [pileup file]
java -jar VarScan.jar pileup2indel [pileup file]
java -jar VarScan.jar pileup2cns [pileup file]
java -jar VarScan.jar mpileup2snp [mpileup file]
java -jar VarScan.jar mpileup2indel [mpileup file]
java -jar VarScan.jar mpileup2cns [mpileup file]
#Tumor-normal comparison:
java -jar VarScan.jar somatic normal-tumor mpileup]
java -jar VarScan.jar copynumber [normal-tumor mpileup]
#To save disk space and file I/O, mpileup output can be directed to VarScan with a "pipe" command.
samtools mpileup -f reference.fasta data.bam | java -jar VarScan.v2.2.jar pileup2snp
samtools mpileup -f reference.fasta data11.bam data2.bam | java -jar VarScan.v2.2.jar pileup2snp
#Picard
These command line tools written in Java (requires Java 1.8 to be installed)can manipulate SAM/BAM/VCF formats. It is supported through the GATK Forums
#Check Java (Version of JVM)ersion
java -version
#Test run of Picard tools using full path
java -jar /path/to/picard.jar -h
#Test run of Picard tools using environment variable (here $PICARD)
java -jar $PICARD -h
#Check all tools in Picard toolkit
java jvm-args -jar picard.jar PicardToolName OPTION1=value1 OPTION2=value2...
#GATK
It is developed by http://www.broadinstitute.org
The RNA-sequencing workflow using GATK has 3 main steps: pre-processing (FASTQ to BAM files), variant discovery (BAM to VCF files), refinement and evaluation (genotype refinement, functional annotation and callset QC)
The above steps in details
Mapping and marking duplicates; split and trim; local realignment around indels (optional); base quality score recalibration (BQSR)
The sequence reads are mapped to the reference using STAR aligner (2-pass protocol) to produce a file in SAM/BAM format sorted by coordinate; mark duplicates; reads with N operators in the CIGAR () strings (which denote the presence of a splice junction) are adjusted,and assign mapping qualities from 255 to 60
Local realignment is performed around indels
Base quality scores are recalibrated, because the variant calling algorithms rely heavily on the quality scores
#GATK vcf file generation for variant (SNP, indel) finding
java -jar ~/GenomeAnalysisTK-2.0-38/GenomeAnalysisTK.jar -T UnifiedGenotyper --genotype_likelihoods_model BOTH -R ref.fasta -I IN.bam -o OUT.vcf
# For annotations of SNP/indel, use -A VariantType
#Genome Modeling Tools (BreakDancer (SVs), Somatic Sniper (SNVs), CMDS (Copy Number))
#BreakDancer
Its a Perl/Cpp package that can detect structural variants (including inversions, inter- and translocations)
No comments:
Post a Comment