#! usr/bin/bash
#Analyze data_file
#Use a while loop to loo through $isolate_list and their fasta_seq
#Execute as: sh IS_blast.sh |& tee data_file
mkdir /home/pseema/denovo_analysis/result_files/IS_blast_results
while read isolate;
do
echo "Starting the $isolate genome"
echo "###################################################"
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1081 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta > /home/pseema/denovo_analysis/result_files/IS_blast_results/result_IS1081.fasta
echo "Total number of IS1081 hits"
grep 'Strand' /home/pseema/denovo_analysis/result_files/IS_blast_results/result_IS1081.fasta | wc
echo "Copies in plus strand"
grep 'Strand=Plus/Plus' /home/pseema/denovo_analysis/result_files/IS_blast_results/result_IS1081.fasta | wc
echo "Copies in minus strand"
grep 'Strand=Plus/Minus' /home/pseema/denovo_analysis/result_files/IS_blast_results/result_IS1081.fasta | wc
done < /home/pseema/denovo_analysis/input_files/isolate_list
----------
#! usr/bin/bash
#Analyzes the data_file genearted by running IS_blast.sh
mkdir /home/pseema/denovo_analysis/result_files/output_files
cp /home/pseema/denovo_analysis/data_file /home/pseema/denovo_analysis/result_files/output_files/data_file
#cat isolate_list
grep -q 'hits' /home/pseema/denovo_analysis/result_files/output_files/data_file
#echo "The hits are"
perl -ne 'if ($p) { print; $p = 0 } $p++ if /hits/' /home/pseema/denovo_analysis/result_files/output_files/data_file > /home/pseema/denovo_analysis/result_files/output_files/hits_file
#cat hits_file | wc -l
awk '{print $1}' /home/pseema/denovo_analysis/result_files/output_files/hits_file > /home/pseema/denovo_analysis/result_files/output_files/hits_column1
grep -q 'plus' /home/pseema/denovo_analysis/result_files/output_files/data_file
#echo "Copies in plus strand are"
perl -ne 'if ($p) { print; $p = 0 } $p++ if /plus/' /home/pseema/denovo_analysis/result_files/output_files/data_file > /home/pseema/denovo_analysis/result_files/output_files/plus_file
#cat plus_file | wc -l
awk '{print $1}' /home/pseema/denovo_analysis/result_files/output_files/plus_file > /home/pseema/denovo_analysis/result_files/output_files/trimmed_plus
grep -q 'minus' /home/pseema/denovo_analysis/result_files/output_files/data_file
#echo "Copies in minus strand are"
perl -ne 'if ($p) { print; $p = 0 } $p++ if /minus/' /home/pseema/denovo_analysis/result_files/output_files/data_file > /home/pseema/denovo_analysis/result_files/output_files/minus_file
#cat minus_file | wc -l
awk '{print $1}' /home/pseema/denovo_analysis/result_files/output_files/minus_file > /home/pseema/denovo_analysis/result_files/output_files/trimmed_minus
paste /home/pseema/denovo_analysis/result_files/output_files/trimmed_plus /home/pseema/denovo_analysis/result_files/output_files/trimmed_minus > /home/pseema/denovo_analysis/result_files/output_files/combined_result
paste /home/pseema/denovo_analysis/result_files/output_files/hits_column1 /home/pseema/denovo_analysis/result_files/output_files/combined_result > /home/pseema/denovo_analysis/result_files/output_files/3_columns
#The file 3_column has 3 columns, the first field shows no. of insertion sequences for #each of the 18 IS types, field 2 shows no. in plus strand and field 3 shows no. in minus #strains
#cat 3_columns
#The directory split_files built to keep the isolate files generated by splitting the file 3_column
#i.e one file for each of the 51 de novo isolates
mkdir /home/pseema/denovo_analysis/result_files/split_files
#Splits a file with many rows into small files with less number of rows (file size of 18 rows here). Split the file 3_column into chunks of 18 lines. The files are named with prefix isolates with different suffixes.
split -l 18 /home/pseema/denovo_analysis/result_files/output_files/3_columns /home/pseema/denovo_analysis/result_files/split_files/ isolates
#Find all files startings with pattern 'isolates' and move to folder split_files
#find . -type f -name "isolates*" -type f | xargs -I '{}' mv {} /home/pseema/denovo_analysis/result_files/split_files
ls /home/pseema/denovo_analysis/result_files/split_files/isolates* | awk '{print $1}' > /home/pseema/denovo_analysis/result_files/output_files/IS_filenames
#Remove the first 2 lines
#sed -e '1,2d' < /home/pseema/denovo_analysis/result_files/output_files/IS_filenames > /home/pseema/denovo_analysis/result_files/output_files/trimmed_filename
paste /home/pseema/denovo_analysis/input_files/isolate_list /home/pseema/denovo_analysis/result_files/output_files/IS_filenames > /home/pseema/denovo_analysis/result_files/output_files/ID_IS
#Create a directorty IS_profile to keep the individual IS profioles of the isolates
mkdir /home/pseema/denovo_analysis/IS_profile
#Extract first row of each file starting with pattern 'isolates' in the folder split_files and put in a new file. There can be 18 rows, one for each IS element. The files can be generated by chnaging the line
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*;
do
echo -n "IS1081 data: "
awk 'NR == 1' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1081_data;
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1081_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1081_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1081_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1081_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1081_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*;
do
echo -n "IS1096 data: "
awk 'NR == 2' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1096_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1096_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1096_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1096_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1096_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1096_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1535 data: "
awk 'NR == 3' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1535_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1535_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1535_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1535_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1535_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1535_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1536 data: "
awk 'NR == 4' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1536_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1536_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1536_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1536_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1536_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1536_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1537 data: "
awk 'NR == 5' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1537_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1537_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1537_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1537_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1537_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1537_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1538 data: "
awk 'NR == 6' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1538_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1538_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1538_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1538_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1538_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1538_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1539 data: "
awk 'NR == 7' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1539_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1539_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1539_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1539_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1539_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1539_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1547 data: "
awk 'NR == 8' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1547_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1547_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1547_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1547_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1547_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1547_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1602 data: "
awk 'NR == 9' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1602_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1602_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1602_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1602_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1602_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1602_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS6110 data: "
awk 'NR == 10' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS6110_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS6110_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS6110_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS6110_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS6110_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS6110_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS986 data: "
awk 'NR == 11' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS986_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS986_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS986_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS986_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS986_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS986_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "ISMt1 data: "
awk 'NR == 12' $file
done > /home/pseema/denovo_analysis/result_files/output_files/ISMt1_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/ISMt1_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMt1_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMt1_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMt1_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMt1_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "ISMt2 data: "
awk 'NR == 13' $file
done > /home/pseema/denovo_analysis/result_files/output_files/ISMt2_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/ISMt2_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMt2_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMt2_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMt2_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMt2_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "ISMt3 data: "
awk 'NR == 14' $file
done > /home/pseema/denovo_analysis/result_files/output_files/ISMt3_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/ISMt3_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMt3_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMt3_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMt3_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMt3_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "ISMyca1 data: "
awk 'NR == 15' $file
done > /home/pseema/denovo_analysis/result_files/output_files/ISMyca1_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/ISMyca1_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMyca1_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMyca1_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMyca1_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMyca1_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1553 data: "
awk 'NR == 16' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1553_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1553_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1553_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1553_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1553_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1553_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1554 data: "
awk 'NR == 17' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1554_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1554_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1554_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1554_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1554_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1554_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1557 data: "
#This awk command will pull out IS1557 information for all isolates
awk 'NR == 18' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1557_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1557_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1557_data
#Pull out all columns except the column 2, which does not contain any useful information.
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1557_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1557_data
#Move the clean IS information file into IS_profile
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1557_data /home/pseema/denovo_analysis/result_files/IS_profile
#The cleaned files are final IS outputs for each isolate
-------------
#! usr/bin/bash
mkdir /home/pseema/denovo_analysis/result_files/homology_results
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1081 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1096 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1096_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1535 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1535_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1536 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1536_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1537 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1537_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1538 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1538_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1539 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1539_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1547 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1547_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1602 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1602_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS6110 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS6110_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS986 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS986_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/ISMt1 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/ISMt1_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/ISMt2 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/ISMt2_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/ISMt3 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/ISMt3_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/ISMyca1 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/ISMyca1_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1553 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1553_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1554 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1554_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1557 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1557_blast_result
-------------
#! usr/bin/bash
#This (IS_analysis_wrapper.sh) is wrapper for the codes IS_blast.sh and IS_data_analysis.sh
#Analyze the IS distribution among the 51 denovo + 2 reference starins.
#The details of the input files and paths have been moutline din the README.txt
##############This code calls IS_blast.sh##############
#It basically conducts homology of the IS sequences with the 53 genomes
#Results in copy number and strand-specific distribution of each IS
#The result is directed into a data_file
echo "IS_blast.sh is running.........."
sh IS_blast.sh |& tee data_file
##############This code calls IS_data_analysis.sh##############
#Its input file is data_file. The code generates isolate-specific profile for each IS element
#These result files are put in a folder IS_profile
echo "IS_data_analysis.sh is running.........."
sh IS_data_analysis.sh
##############This code calls IS_homology.sh##############
#This code generates homology result of the 18 IS elements with each isolates
#Its required for use by the next code to manipulate positional preference of IS.
echo "IS_homology.sh is running.........."
sh IS_homology.sh
##############This code calls position_manipulation.sh##############
#This code calls position_manipulation.sh and gives their locations
echo "position_manipulations.sh is running.........."
sh position_manipulations.sh |& tee genomic_position_file
##############This code calls position_hotspots.sh##############
#This code calls position_hotspots.sh and gives genomic hotspots
echo "position_hotspots.sh is running.........."
sh position_hotspots.sh |& tee position_hotspot_file
#Analyze data_file
#Use a while loop to loo through $isolate_list and their fasta_seq
#Execute as: sh IS_blast.sh |& tee data_file
mkdir /home/pseema/denovo_analysis/result_files/IS_blast_results
while read isolate;
do
echo "Starting the $isolate genome"
echo "###################################################"
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1081 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta > /home/pseema/denovo_analysis/result_files/IS_blast_results/result_IS1081.fasta
echo "Total number of IS1081 hits"
grep 'Strand' /home/pseema/denovo_analysis/result_files/IS_blast_results/result_IS1081.fasta | wc
echo "Copies in plus strand"
grep 'Strand=Plus/Plus' /home/pseema/denovo_analysis/result_files/IS_blast_results/result_IS1081.fasta | wc
echo "Copies in minus strand"
grep 'Strand=Plus/Minus' /home/pseema/denovo_analysis/result_files/IS_blast_results/result_IS1081.fasta | wc
done < /home/pseema/denovo_analysis/input_files/isolate_list
----------
#! usr/bin/bash
#Analyzes the data_file genearted by running IS_blast.sh
mkdir /home/pseema/denovo_analysis/result_files/output_files
cp /home/pseema/denovo_analysis/data_file /home/pseema/denovo_analysis/result_files/output_files/data_file
#cat isolate_list
grep -q 'hits' /home/pseema/denovo_analysis/result_files/output_files/data_file
#echo "The hits are"
perl -ne 'if ($p) { print; $p = 0 } $p++ if /hits/' /home/pseema/denovo_analysis/result_files/output_files/data_file > /home/pseema/denovo_analysis/result_files/output_files/hits_file
#cat hits_file | wc -l
awk '{print $1}' /home/pseema/denovo_analysis/result_files/output_files/hits_file > /home/pseema/denovo_analysis/result_files/output_files/hits_column1
grep -q 'plus' /home/pseema/denovo_analysis/result_files/output_files/data_file
#echo "Copies in plus strand are"
perl -ne 'if ($p) { print; $p = 0 } $p++ if /plus/' /home/pseema/denovo_analysis/result_files/output_files/data_file > /home/pseema/denovo_analysis/result_files/output_files/plus_file
#cat plus_file | wc -l
awk '{print $1}' /home/pseema/denovo_analysis/result_files/output_files/plus_file > /home/pseema/denovo_analysis/result_files/output_files/trimmed_plus
grep -q 'minus' /home/pseema/denovo_analysis/result_files/output_files/data_file
#echo "Copies in minus strand are"
perl -ne 'if ($p) { print; $p = 0 } $p++ if /minus/' /home/pseema/denovo_analysis/result_files/output_files/data_file > /home/pseema/denovo_analysis/result_files/output_files/minus_file
#cat minus_file | wc -l
awk '{print $1}' /home/pseema/denovo_analysis/result_files/output_files/minus_file > /home/pseema/denovo_analysis/result_files/output_files/trimmed_minus
paste /home/pseema/denovo_analysis/result_files/output_files/trimmed_plus /home/pseema/denovo_analysis/result_files/output_files/trimmed_minus > /home/pseema/denovo_analysis/result_files/output_files/combined_result
paste /home/pseema/denovo_analysis/result_files/output_files/hits_column1 /home/pseema/denovo_analysis/result_files/output_files/combined_result > /home/pseema/denovo_analysis/result_files/output_files/3_columns
#The file 3_column has 3 columns, the first field shows no. of insertion sequences for #each of the 18 IS types, field 2 shows no. in plus strand and field 3 shows no. in minus #strains
#cat 3_columns
#The directory split_files built to keep the isolate files generated by splitting the file 3_column
#i.e one file for each of the 51 de novo isolates
mkdir /home/pseema/denovo_analysis/result_files/split_files
#Splits a file with many rows into small files with less number of rows (file size of 18 rows here). Split the file 3_column into chunks of 18 lines. The files are named with prefix isolates with different suffixes.
split -l 18 /home/pseema/denovo_analysis/result_files/output_files/3_columns /home/pseema/denovo_analysis/result_files/split_files/ isolates
#Find all files startings with pattern 'isolates' and move to folder split_files
#find . -type f -name "isolates*" -type f | xargs -I '{}' mv {} /home/pseema/denovo_analysis/result_files/split_files
ls /home/pseema/denovo_analysis/result_files/split_files/isolates* | awk '{print $1}' > /home/pseema/denovo_analysis/result_files/output_files/IS_filenames
#Remove the first 2 lines
#sed -e '1,2d' < /home/pseema/denovo_analysis/result_files/output_files/IS_filenames > /home/pseema/denovo_analysis/result_files/output_files/trimmed_filename
paste /home/pseema/denovo_analysis/input_files/isolate_list /home/pseema/denovo_analysis/result_files/output_files/IS_filenames > /home/pseema/denovo_analysis/result_files/output_files/ID_IS
#Create a directorty IS_profile to keep the individual IS profioles of the isolates
mkdir /home/pseema/denovo_analysis/IS_profile
#Extract first row of each file starting with pattern 'isolates' in the folder split_files and put in a new file. There can be 18 rows, one for each IS element. The files can be generated by chnaging the line
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*;
do
echo -n "IS1081 data: "
awk 'NR == 1' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1081_data;
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1081_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1081_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1081_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1081_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1081_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*;
do
echo -n "IS1096 data: "
awk 'NR == 2' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1096_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1096_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1096_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1096_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1096_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1096_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1535 data: "
awk 'NR == 3' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1535_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1535_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1535_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1535_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1535_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1535_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1536 data: "
awk 'NR == 4' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1536_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1536_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1536_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1536_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1536_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1536_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1537 data: "
awk 'NR == 5' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1537_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1537_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1537_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1537_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1537_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1537_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1538 data: "
awk 'NR == 6' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1538_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1538_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1538_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1538_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1538_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1538_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1539 data: "
awk 'NR == 7' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1539_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1539_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1539_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1539_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1539_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1539_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1547 data: "
awk 'NR == 8' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1547_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1547_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1547_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1547_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1547_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1547_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1602 data: "
awk 'NR == 9' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1602_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1602_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1602_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1602_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1602_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1602_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS6110 data: "
awk 'NR == 10' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS6110_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS6110_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS6110_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS6110_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS6110_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS6110_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS986 data: "
awk 'NR == 11' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS986_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS986_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS986_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS986_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS986_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS986_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "ISMt1 data: "
awk 'NR == 12' $file
done > /home/pseema/denovo_analysis/result_files/output_files/ISMt1_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/ISMt1_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMt1_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMt1_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMt1_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMt1_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "ISMt2 data: "
awk 'NR == 13' $file
done > /home/pseema/denovo_analysis/result_files/output_files/ISMt2_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/ISMt2_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMt2_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMt2_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMt2_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMt2_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "ISMt3 data: "
awk 'NR == 14' $file
done > /home/pseema/denovo_analysis/result_files/output_files/ISMt3_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/ISMt3_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMt3_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMt3_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMt3_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMt3_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "ISMyca1 data: "
awk 'NR == 15' $file
done > /home/pseema/denovo_analysis/result_files/output_files/ISMyca1_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/ISMyca1_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMyca1_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_ISMyca1_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMyca1_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_ISMyca1_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1553 data: "
awk 'NR == 16' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1553_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1553_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1553_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1553_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1553_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1553_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1554 data: "
awk 'NR == 17' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1554_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1554_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1554_data
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1554_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1554_data
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1554_data /home/pseema/denovo_analysis/result_files/IS_profile
for file in /home/pseema/denovo_analysis/result_files/split_files /isolates*
do
echo -n "IS1557 data: "
#This awk command will pull out IS1557 information for all isolates
awk 'NR == 18' $file
done > /home/pseema/denovo_analysis/result_files/output_files/IS1557_data
paste /home/pseema/denovo_analysis/result_files/output_files/ID_IS /home/pseema/denovo_analysis/result_files/output_files/IS1557_data > /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1557_data
#Pull out all columns except the column 2, which does not contain any useful information.
awk '{$2 = ""; print $0}' /home/pseema/denovo_analysis/result_files/output_files/pasted_IS1557_data > /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1557_data
#Move the clean IS information file into IS_profile
mv /home/pseema/denovo_analysis/result_files/output_files/cleaned_IS1557_data /home/pseema/denovo_analysis/result_files/IS_profile
#The cleaned files are final IS outputs for each isolate
-------------
#! usr/bin/bash
mkdir /home/pseema/denovo_analysis/result_files/homology_results
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1081 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1096 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1096_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1535 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1535_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1536 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1536_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1537 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1537_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1538 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1538_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1539 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1539_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1547 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1547_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1602 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1602_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS6110 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS6110_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS986 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS986_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/ISMt1 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/ISMt1_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/ISMt2 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/ISMt2_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/ISMt3 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/ISMt3_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/ISMyca1 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/ISMyca1_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1553 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1553_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1554 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1554_blast_result
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS1557 -subject /home/pseema/denovo_analysis/genome_fasta_files/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/isolate_list |& tee /home/pseema/denovo_analysis/result_files/homology_results/IS1557_blast_result
-------------
#! usr/bin/bash
#This (IS_analysis_wrapper.sh) is wrapper for the codes IS_blast.sh and IS_data_analysis.sh
#Analyze the IS distribution among the 51 denovo + 2 reference starins.
#The details of the input files and paths have been moutline din the README.txt
##############This code calls IS_blast.sh##############
#It basically conducts homology of the IS sequences with the 53 genomes
#Results in copy number and strand-specific distribution of each IS
#The result is directed into a data_file
echo "IS_blast.sh is running.........."
sh IS_blast.sh |& tee data_file
##############This code calls IS_data_analysis.sh##############
#Its input file is data_file. The code generates isolate-specific profile for each IS element
#These result files are put in a folder IS_profile
echo "IS_data_analysis.sh is running.........."
sh IS_data_analysis.sh
##############This code calls IS_homology.sh##############
#This code generates homology result of the 18 IS elements with each isolates
#Its required for use by the next code to manipulate positional preference of IS.
echo "IS_homology.sh is running.........."
sh IS_homology.sh
##############This code calls position_manipulation.sh##############
#This code calls position_manipulation.sh and gives their locations
echo "position_manipulations.sh is running.........."
sh position_manipulations.sh |& tee genomic_position_file
##############This code calls position_hotspots.sh##############
#This code calls position_hotspots.sh and gives genomic hotspots
echo "position_hotspots.sh is running.........."
sh position_hotspots.sh |& tee position_hotspot_file
No comments:
Post a Comment