#Code to find hypothetical proteins with domains and putative function in information pathway
#! /usr/bin
#Finds length of each fasta sequence
#cat /home/pseema/denovo_analysis/input_files/domain_hypothetical | awk '$0 ~ ">" {print c; c=0;printf substr($0,2,100) "\t"; } $0 !~ ">" {c+=length($0);} END { print c; }'
#While loop to find the domain motifs in all the isolates
while read isolate;
do
echo "*******Starting $isolate*******"
grep "^>" /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
#cat /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
wc -l /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
#Run protein BLAST to find homology between the motif-containing hypothetical proteins
blastp -q -query /home/pseema/denovo_analysis/input_files/domain_hypothetical -subject /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info
#Removes all the empty lines
sed '/^$/d' /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info > /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info_nonempty
#grep lines with the pattern 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info_nonempty
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info_nonempty | wc
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info_nonempty > /home/pseema/denovo_analysis/result_files/$isolate.domain_identities
echo "Sorting based on key 1"
cat /home/pseema/denovo_analysis/result_files/$isolate.domain_identities|sort -k1,1 | head -100 > /home/pseema/denovo_analysis/result_files/$isolate.domain_top_hits
cat /home/pseema/denovo_analysis/result_files/$isolate.domain_top_hits
echo "***Motif of Rv0060 ***"
perl motif_finder.pl /home/pseema/denovo_analysis/result_files/$isolate.hypothetical MITYGSGDLLRADTEALVNTVNCVGVMGKGIALQFKRRYPEMFTAYEKACKRGEVTIGKMFVVDTGQLDGPKHIINFPTKKHWRAPSKLAYIDAGLIDLIRVIRELNIASVAVPPLGVGNGGLDWEDVEQRL > /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_Rv0060
grep "motif found at position" /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_Rv0060
grep "motif found at position" /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_Rv0060 > /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_found_Rv0060
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
--------------------------------
#! /usr/bin
#Code to analyze data generated by domain.sh
#Find files in the directory that have pattern 'found'in their name
find /home/pseema/denovo_analysis/result_files -name '*found*'
find /home/pseema/denovo_analysis/result_files -name '*found*' |wc
#Delete empty files from the directory
find /home/pseema/denovo_analysis/result_files -size 0 -delete
#While loop to analyze domain domain motifs in all the isolates
while read isolate;
do
#Find the files with the pattern '*domain_motif_found_' in file name
echo "****Number of matches for the isolate $isolate and the conserved Rv genes containing the motifs****"
find /home/pseema/denovo_analysis/result_files -name $isolate.'*domain_motif_found_*' |wc
find /home/pseema/denovo_analysis/result_files -name $isolate.'*domain_motif_found_*' > /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
cat /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
##While loop to analyze the domain domain motif locations in all the isolates
while read isolate;
do
echo "********Starting $isolate********"
#find /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_found_*
find /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_found_* | wc -l
cat `find /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_found_*`
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/IO_isolates
#done < /home/pseema/denovo_analysis/input_files/EAS_isolates
#done < /home/pseema/denovo_analysis/input_files/EAI_isolates
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
echo "******Isolate $isolate done******"
-------------------------------------------------------------------------------------------------
#! /usr/bin
#Code to find hypothetical proteins with membrane helices
#Finds length of each fasta sequence
cat /home/pseema/denovo_analysis/input_files/membrane_hypothetical | awk '$0 ~ ">" {print c; c=0;printf substr($0,2,100) "\t"; } $0 !~ ">" {c+=length($0);} END { print c; }'
#While loop to find the helix motifs in all the isolates
while read isolate;
do
echo "*******Starting $isolate*******"
grep "^>" /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
#cat /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
wc -l /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
#Run protein BLAST to find homology between the helix motif-containing hypothetical proteins
blastp -query /home/pseema/denovo_analysis/input_files/membrane_hypothetical -subject /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info
#Removes all the empty lines
sed '/^$/d' /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info > /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info_nonempty
#grep linees with pattern 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info_nonempty
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info_nonempty | wc
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info_nonempty > /home/pseema/denovo_analysis/result_files/$isolate.helix_identities
echo "Sorting based on key 1"
cat /home/pseema/denovo_analysis/result_files/$isolate.helix_identities|sort -k1,1 | head -100 > /home/pseema/denovo_analysis/result_files/$isolate.helix_top_hits
cat /home/pseema/denovo_analysis/result_files/$isolate.helix_top_hits
#######################################################
#Add 68 hypothetical protein membrane motif. It might not be as conserved as that coiled coil
echo "***Motif of Rv0210 ()***"
perl motif_finder.pl /home/pseema/denovo_analysis/result_files/$isolate.hypothetical LTTLLGAGFGLGIALTLSRLVAG
> /home/pseema/denovo_analysis/result_files/$isolate.helix_motif_Rv0210
grep "motif obtained at position" /home/pseema/denovo_analysis/result_files/$isolate.helix_motif_Rv0210
grep "motif obtained at position" /home/pseema/denovo_analysis/result_files/$isolate.helix_motif_Rv0210 > /home/pseema/denovo_analysis/result_files/$isolate.helix_motifs_obtained_Rv0210
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
------------------------------------
#! /usr/bin
#Code to analyze data generated by helixed.sh
#Find files in the directory that have pattern 'obtained' in their name
find /home/pseema/denovo_analysis/result_files -name '*obtained*'
find /home/pseema/denovo_analysis/result_files -name '*obtained*' |wc
#Delete empty files from the directory
find /home/pseema/denovo_analysis/result_files -size 0 -delete
#While loop to analyze helixed helix motifs in all the isolates
while read isolate;
do
#Find the files with the pattern '*helix_motif_obtained_' in file name
echo "****Number of matches for the isolate $isolate and the conserved Rv genes containing the motifs****"
find /home/pseema/denovo_analysis/result_files -name $isolate.'*helix_motifs_obtained_*' |wc
find /home/pseema/denovo_analysis/result_files -name $isolate.'*helix_motifs_obtained_*' > /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
cat /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
##While loop to analyze the helixed helix motif locations in all the isolates
while read isolate;
do
echo "********Starting $isolate********"
#find /home/pseema/denovo_analysis/result_files/$isolate.helixed_motif_obtained_*
find /home/pseema/denovo_analysis/result_files/$isolate.helixed_motif_obtained_* | wc -l
cat `find /home/pseema/denovo_analysis/result_files/$isolate.helix_motif_obtained_*`
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/IO_isolates
#done < /home/pseema/denovo_analysis/input_files/EAS_isolates
#done < /home/pseema/denovo_analysis/input_files/EAI_isolates
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
echo "******Isolate $isolate done******"
--------------------------------
#! /usr/bin
#Code to find hypothetical proteins with coiled coils
#Finds length of each fasta sequence
cat /home/pseema/denovo_analysis/input_files/coiled_coil_hypothetical | awk '$0 ~ ">" {print c; c=0;printf substr($0,2,100) "\t"; } $0 !~ ">" {c+=length($0);} END { print c; }'
#While loop to find the coiled coil motifs in all the isolates
while read isolate;
do
echo "*******Starting $isolate*******"
grep "^>" /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
#cat /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
wc -l /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
#Run protein BLAST to find homology between the motif-containing hypothetical proteins
blastp -query /home/pseema/denovo_analysis/input_files/coiled_coil_hypothetical -subject /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.coil_homology_info
#Removes all the empty lines
sed '/^$/d' /home/pseema/denovo_analysis/result_files/$isolate.coil_homology_info > /home/pseema/denovo_analysis/result_files/$isolate.coil_homology_info_nonempty
#grep the lines with pattern 'Identities'
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.coil_homology_info_nonempty | wc
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.coil_homology_info_nonempty > /home/pseema/denovo_analysis/result_files/$isolate.coil_identities
echo "Sorting based on key 1"
cat /home/pseema/denovo_analysis/result_files/$isolate.coil_identities|sort -k1,1 | head -100 > /home/pseema/denovo_analysis/result_files/$isolate.coil_top_hits
cat /home/pseema/denovo_analysis/result_files/$isolate.coil_top_hits
echo "***Motif of Rv0047c (43aa)***"
perl motif_finder.pl /home/pseema/denovo_analysis/result_files/$isolate.hypothetical AEARMRILEGRRRQVEERREGLREAVARASSSFDRYTRQLHQL > /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_Rv0047c
grep "motif found at position" /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_Rv0047c
grep "motif found at position" /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_Rv0047c > /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_Rv0047c_found
#Do similarly for other motifs
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
------------------------------------------
#! /usr/bin
#Code to analyze data generated by coiled.sh
#Find files in the directory that have pattern 'found'in their name
find /home/pseema/denovo_analysis/result_files -name '*found*'
find /home/pseema/denovo_analysis/result_files -name '*found*' |wc
#Delete empty files from the directory
find /home/pseema/denovo_analysis/result_files -size 0 -delete
#While loop to analyze coiled coil motifs in all the isolates
while read isolate;
do
#Find the files with the pattern '*coiled_motif_found_' in file name
echo "****Number of matches for the isolate $isolate and the conserved Rv genes containing the motifs****"
find /home/pseema/denovo_analysis/result_files -name $isolate.'*coiled_motif_found_*' |wc
find /home/pseema/denovo_analysis/result_files -name $isolate.'*coiled_motif_found_*' > /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
cat /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
##While loop to analyze the coiled coil motif locations in all the isolates
while read isolate;
do
echo "********Starting $isolate********"
#find /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_found_*
find /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_found_* | wc -l
cat `find /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_found_*`
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/IO_isolates
#done < /home/pseema/denovo_analysis/input_files/EAS_isolates
#done < /home/pseema/denovo_analysis/input_files/EAI_isolates
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
echo "******Isolate $isolate done******"
#! /usr/bin
#Finds length of each fasta sequence
#cat /home/pseema/denovo_analysis/input_files/domain_hypothetical | awk '$0 ~ ">" {print c; c=0;printf substr($0,2,100) "\t"; } $0 !~ ">" {c+=length($0);} END { print c; }'
#While loop to find the domain motifs in all the isolates
while read isolate;
do
echo "*******Starting $isolate*******"
grep "^>" /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
#cat /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
wc -l /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
#Run protein BLAST to find homology between the motif-containing hypothetical proteins
blastp -q -query /home/pseema/denovo_analysis/input_files/domain_hypothetical -subject /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info
#Removes all the empty lines
sed '/^$/d' /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info > /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info_nonempty
#grep lines with the pattern 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info_nonempty
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info_nonempty | wc
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info_nonempty > /home/pseema/denovo_analysis/result_files/$isolate.domain_identities
echo "Sorting based on key 1"
cat /home/pseema/denovo_analysis/result_files/$isolate.domain_identities|sort -k1,1 | head -100 > /home/pseema/denovo_analysis/result_files/$isolate.domain_top_hits
cat /home/pseema/denovo_analysis/result_files/$isolate.domain_top_hits
echo "***Motif of Rv0060 ***"
perl motif_finder.pl /home/pseema/denovo_analysis/result_files/$isolate.hypothetical MITYGSGDLLRADTEALVNTVNCVGVMGKGIALQFKRRYPEMFTAYEKACKRGEVTIGKMFVVDTGQLDGPKHIINFPTKKHWRAPSKLAYIDAGLIDLIRVIRELNIASVAVPPLGVGNGGLDWEDVEQRL > /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_Rv0060
grep "motif found at position" /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_Rv0060
grep "motif found at position" /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_Rv0060 > /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_found_Rv0060
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
--------------------------------
#! /usr/bin
#Code to analyze data generated by domain.sh
#Find files in the directory that have pattern 'found'in their name
find /home/pseema/denovo_analysis/result_files -name '*found*'
find /home/pseema/denovo_analysis/result_files -name '*found*' |wc
#Delete empty files from the directory
find /home/pseema/denovo_analysis/result_files -size 0 -delete
#While loop to analyze domain domain motifs in all the isolates
while read isolate;
do
#Find the files with the pattern '*domain_motif_found_' in file name
echo "****Number of matches for the isolate $isolate and the conserved Rv genes containing the motifs****"
find /home/pseema/denovo_analysis/result_files -name $isolate.'*domain_motif_found_*' |wc
find /home/pseema/denovo_analysis/result_files -name $isolate.'*domain_motif_found_*' > /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
cat /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
##While loop to analyze the domain domain motif locations in all the isolates
while read isolate;
do
echo "********Starting $isolate********"
#find /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_found_*
find /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_found_* | wc -l
cat `find /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_found_*`
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/IO_isolates
#done < /home/pseema/denovo_analysis/input_files/EAS_isolates
#done < /home/pseema/denovo_analysis/input_files/EAI_isolates
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
echo "******Isolate $isolate done******"
-------------------------------------------------------------------------------------------------
#! /usr/bin
#Code to find hypothetical proteins with membrane helices
#Finds length of each fasta sequence
cat /home/pseema/denovo_analysis/input_files/membrane_hypothetical | awk '$0 ~ ">" {print c; c=0;printf substr($0,2,100) "\t"; } $0 !~ ">" {c+=length($0);} END { print c; }'
#While loop to find the helix motifs in all the isolates
while read isolate;
do
echo "*******Starting $isolate*******"
grep "^>" /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
#cat /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
wc -l /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
#Run protein BLAST to find homology between the helix motif-containing hypothetical proteins
blastp -query /home/pseema/denovo_analysis/input_files/membrane_hypothetical -subject /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info
#Removes all the empty lines
sed '/^$/d' /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info > /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info_nonempty
#grep linees with pattern 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info_nonempty
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info_nonempty | wc
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info_nonempty > /home/pseema/denovo_analysis/result_files/$isolate.helix_identities
echo "Sorting based on key 1"
cat /home/pseema/denovo_analysis/result_files/$isolate.helix_identities|sort -k1,1 | head -100 > /home/pseema/denovo_analysis/result_files/$isolate.helix_top_hits
cat /home/pseema/denovo_analysis/result_files/$isolate.helix_top_hits
#######################################################
#Add 68 hypothetical protein membrane motif. It might not be as conserved as that coiled coil
echo "***Motif of Rv0210 ()***"
perl motif_finder.pl /home/pseema/denovo_analysis/result_files/$isolate.hypothetical LTTLLGAGFGLGIALTLSRLVAG
> /home/pseema/denovo_analysis/result_files/$isolate.helix_motif_Rv0210
grep "motif obtained at position" /home/pseema/denovo_analysis/result_files/$isolate.helix_motif_Rv0210
grep "motif obtained at position" /home/pseema/denovo_analysis/result_files/$isolate.helix_motif_Rv0210 > /home/pseema/denovo_analysis/result_files/$isolate.helix_motifs_obtained_Rv0210
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
------------------------------------
#! /usr/bin
#Code to analyze data generated by helixed.sh
#Find files in the directory that have pattern 'obtained' in their name
find /home/pseema/denovo_analysis/result_files -name '*obtained*'
find /home/pseema/denovo_analysis/result_files -name '*obtained*' |wc
#Delete empty files from the directory
find /home/pseema/denovo_analysis/result_files -size 0 -delete
#While loop to analyze helixed helix motifs in all the isolates
while read isolate;
do
#Find the files with the pattern '*helix_motif_obtained_' in file name
echo "****Number of matches for the isolate $isolate and the conserved Rv genes containing the motifs****"
find /home/pseema/denovo_analysis/result_files -name $isolate.'*helix_motifs_obtained_*' |wc
find /home/pseema/denovo_analysis/result_files -name $isolate.'*helix_motifs_obtained_*' > /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
cat /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
##While loop to analyze the helixed helix motif locations in all the isolates
while read isolate;
do
echo "********Starting $isolate********"
#find /home/pseema/denovo_analysis/result_files/$isolate.helixed_motif_obtained_*
find /home/pseema/denovo_analysis/result_files/$isolate.helixed_motif_obtained_* | wc -l
cat `find /home/pseema/denovo_analysis/result_files/$isolate.helix_motif_obtained_*`
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/IO_isolates
#done < /home/pseema/denovo_analysis/input_files/EAS_isolates
#done < /home/pseema/denovo_analysis/input_files/EAI_isolates
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
echo "******Isolate $isolate done******"
--------------------------------
#! /usr/bin
#Code to find hypothetical proteins with coiled coils
#Finds length of each fasta sequence
cat /home/pseema/denovo_analysis/input_files/coiled_coil_hypothetical | awk '$0 ~ ">" {print c; c=0;printf substr($0,2,100) "\t"; } $0 !~ ">" {c+=length($0);} END { print c; }'
#While loop to find the coiled coil motifs in all the isolates
while read isolate;
do
echo "*******Starting $isolate*******"
grep "^>" /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
#cat /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
wc -l /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
#Run protein BLAST to find homology between the motif-containing hypothetical proteins
blastp -query /home/pseema/denovo_analysis/input_files/coiled_coil_hypothetical -subject /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.coil_homology_info
#Removes all the empty lines
sed '/^$/d' /home/pseema/denovo_analysis/result_files/$isolate.coil_homology_info > /home/pseema/denovo_analysis/result_files/$isolate.coil_homology_info_nonempty
#grep the lines with pattern 'Identities'
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.coil_homology_info_nonempty | wc
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.coil_homology_info_nonempty > /home/pseema/denovo_analysis/result_files/$isolate.coil_identities
echo "Sorting based on key 1"
cat /home/pseema/denovo_analysis/result_files/$isolate.coil_identities|sort -k1,1 | head -100 > /home/pseema/denovo_analysis/result_files/$isolate.coil_top_hits
cat /home/pseema/denovo_analysis/result_files/$isolate.coil_top_hits
echo "***Motif of Rv0047c (43aa)***"
perl motif_finder.pl /home/pseema/denovo_analysis/result_files/$isolate.hypothetical AEARMRILEGRRRQVEERREGLREAVARASSSFDRYTRQLHQL > /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_Rv0047c
grep "motif found at position" /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_Rv0047c
grep "motif found at position" /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_Rv0047c > /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_Rv0047c_found
#Do similarly for other motifs
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
------------------------------------------
#! /usr/bin
#Code to analyze data generated by coiled.sh
#Find files in the directory that have pattern 'found'in their name
find /home/pseema/denovo_analysis/result_files -name '*found*'
find /home/pseema/denovo_analysis/result_files -name '*found*' |wc
#Delete empty files from the directory
find /home/pseema/denovo_analysis/result_files -size 0 -delete
#While loop to analyze coiled coil motifs in all the isolates
while read isolate;
do
#Find the files with the pattern '*coiled_motif_found_' in file name
echo "****Number of matches for the isolate $isolate and the conserved Rv genes containing the motifs****"
find /home/pseema/denovo_analysis/result_files -name $isolate.'*coiled_motif_found_*' |wc
find /home/pseema/denovo_analysis/result_files -name $isolate.'*coiled_motif_found_*' > /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
cat /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
##While loop to analyze the coiled coil motif locations in all the isolates
while read isolate;
do
echo "********Starting $isolate********"
#find /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_found_*
find /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_found_* | wc -l
cat `find /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_found_*`
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/IO_isolates
#done < /home/pseema/denovo_analysis/input_files/EAS_isolates
#done < /home/pseema/denovo_analysis/input_files/EAI_isolates
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
echo "******Isolate $isolate done******"
No comments:
Post a Comment