Thursday, July 20, 2017

MY SCRIPT (4): domain, helices, coils motif analysis........

#Code to find hypothetical proteins with domains and putative function in information pathway
#! /usr/bin
#Finds length of each fasta sequence
#cat /home/pseema/denovo_analysis/input_files/domain_hypothetical |  awk '$0 ~ ">" {print c; c=0;printf substr($0,2,100) "\t"; } $0 !~ ">" {c+=length($0);} END { print c; }'
#While loop to find the domain motifs in all the isolates
while read isolate;
do
echo "*******Starting $isolate*******"
grep "^>" /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
#cat /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
wc -l /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names

#Run protein BLAST to find homology between the motif-containing hypothetical proteins
blastp -q -query  /home/pseema/denovo_analysis/input_files/domain_hypothetical  -subject /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info

#Removes  all the empty lines
sed  '/^$/d' /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info  > /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info_nonempty

#grep lines with the pattern 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info_nonempty
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info_nonempty | wc
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.domain_homology_info_nonempty > /home/pseema/denovo_analysis/result_files/$isolate.domain_identities

echo "Sorting based on key 1"
cat /home/pseema/denovo_analysis/result_files/$isolate.domain_identities|sort -k1,1 | head -100 >  /home/pseema/denovo_analysis/result_files/$isolate.domain_top_hits
cat /home/pseema/denovo_analysis/result_files/$isolate.domain_top_hits

echo "***Motif of Rv0060 ***"
perl motif_finder.pl /home/pseema/denovo_analysis/result_files/$isolate.hypothetical MITYGSGDLLRADTEALVNTVNCVGVMGKGIALQFKRRYPEMFTAYEKACKRGEVTIGKMFVVDTGQLDGPKHIINFPTKKHWRAPSKLAYIDAGLIDLIRVIRELNIASVAVPPLGVGNGGLDWEDVEQRL > /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_Rv0060
grep "motif found at position" /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_Rv0060
grep "motif found at position" /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_Rv0060 > /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_found_Rv0060

done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
--------------------------------
#! /usr/bin
#Code to analyze data generated by domain.sh
#Find files in the directory that have pattern 'found'in their name
find /home/pseema/denovo_analysis/result_files -name '*found*'
find /home/pseema/denovo_analysis/result_files -name '*found*' |wc

#Delete empty files from the directory
find /home/pseema/denovo_analysis/result_files -size 0  -delete

#While loop to analyze domain domain motifs in all the isolates
while read isolate;
do
#Find the files with the pattern '*domain_motif_found_' in file name
echo "****Number of matches for the isolate $isolate and the conserved Rv genes containing the motifs****"
find /home/pseema/denovo_analysis/result_files -name $isolate.'*domain_motif_found_*' |wc
find /home/pseema/denovo_analysis/result_files -name $isolate.'*domain_motif_found_*' > /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
cat  /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates

##While loop to analyze the domain domain motif locations in all the isolates
while read isolate;
do
echo "********Starting $isolate********"
#find /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_found_*
find /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_found_* |  wc -l
cat `find /home/pseema/denovo_analysis/result_files/$isolate.domain_motif_found_*`
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/IO_isolates
#done < /home/pseema/denovo_analysis/input_files/EAS_isolates
#done < /home/pseema/denovo_analysis/input_files/EAI_isolates
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
echo "******Isolate $isolate done******"
-------------------------------------------------------------------------------------------------
#! /usr/bin
#Code to find hypothetical proteins with membrane helices
#Finds length of each fasta sequence
cat /home/pseema/denovo_analysis/input_files/membrane_hypothetical |  awk '$0 ~ ">" {print c; c=0;printf substr($0,2,100) "\t"; } $0 !~ ">" {c+=length($0);} END { print c; }'

#While loop to find the helix motifs in all the isolates
while read isolate;
do
echo "*******Starting $isolate*******"
grep "^>" /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
#cat /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
wc -l /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names

#Run protein BLAST to find homology between the helix motif-containing hypothetical proteins
blastp -query  /home/pseema/denovo_analysis/input_files/membrane_hypothetical  -subject /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info

#Removes  all the empty lines
sed  '/^$/d' /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info  > /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info_nonempty

#grep linees with pattern 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info_nonempty
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info_nonempty | wc
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.helix_homology_info_nonempty > /home/pseema/denovo_analysis/result_files/$isolate.helix_identities

echo "Sorting based on key 1"
cat /home/pseema/denovo_analysis/result_files/$isolate.helix_identities|sort -k1,1 | head -100 >  /home/pseema/denovo_analysis/result_files/$isolate.helix_top_hits
cat /home/pseema/denovo_analysis/result_files/$isolate.helix_top_hits
#######################################################
#Add 68 hypothetical protein membrane motif. It might not be as conserved as that coiled coil
echo "***Motif of Rv0210 ()***"
perl motif_finder.pl /home/pseema/denovo_analysis/result_files/$isolate.hypothetical LTTLLGAGFGLGIALTLSRLVAG
 > /home/pseema/denovo_analysis/result_files/$isolate.helix_motif_Rv0210
grep "motif obtained at position" /home/pseema/denovo_analysis/result_files/$isolate.helix_motif_Rv0210
grep "motif obtained at position" /home/pseema/denovo_analysis/result_files/$isolate.helix_motif_Rv0210 > /home/pseema/denovo_analysis/result_files/$isolate.helix_motifs_obtained_Rv0210

done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
------------------------------------
#! /usr/bin
#Code to analyze data generated by helixed.sh
#Find files in the directory that have pattern 'obtained' in their name
find /home/pseema/denovo_analysis/result_files -name '*obtained*'
find /home/pseema/denovo_analysis/result_files -name '*obtained*' |wc

#Delete empty files from the directory
find /home/pseema/denovo_analysis/result_files -size 0  -delete

#While loop to analyze helixed helix motifs in all the isolates
while read isolate;
do
#Find the files with the pattern '*helix_motif_obtained_' in file name
echo "****Number of matches for the isolate $isolate and the conserved Rv genes containing the motifs****"
find /home/pseema/denovo_analysis/result_files -name $isolate.'*helix_motifs_obtained_*' |wc
find /home/pseema/denovo_analysis/result_files -name $isolate.'*helix_motifs_obtained_*' > /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
cat  /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates

##While loop to analyze the helixed helix motif locations in all the isolates
while read isolate;
do
echo "********Starting $isolate********"
#find /home/pseema/denovo_analysis/result_files/$isolate.helixed_motif_obtained_*
find /home/pseema/denovo_analysis/result_files/$isolate.helixed_motif_obtained_* |  wc -l
cat `find /home/pseema/denovo_analysis/result_files/$isolate.helix_motif_obtained_*`
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/IO_isolates
#done < /home/pseema/denovo_analysis/input_files/EAS_isolates
#done < /home/pseema/denovo_analysis/input_files/EAI_isolates
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
echo "******Isolate $isolate done******"
--------------------------------
#! /usr/bin
#Code to find hypothetical proteins with coiled coils
#Finds length of each fasta sequence
cat /home/pseema/denovo_analysis/input_files/coiled_coil_hypothetical |  awk '$0 ~ ">" {print c; c=0;printf substr($0,2,100) "\t"; } $0 !~ ">" {c+=length($0);} END { print c; }'

#While loop to find the coiled coil motifs in all the isolates
while read isolate;
do
echo "*******Starting $isolate*******"
grep "^>" /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
#cat /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names
wc -l /home/pseema/denovo_analysis/result_files/$isolate.hypothetical.header_names

#Run protein BLAST to find homology between the motif-containing hypothetical proteins
blastp -query  /home/pseema/denovo_analysis/input_files/coiled_coil_hypothetical  -subject /home/pseema/denovo_analysis/result_files/$isolate.hypothetical > /home/pseema/denovo_analysis/result_files/$isolate.coil_homology_info

#Removes  all the empty lines
sed  '/^$/d' /home/pseema/denovo_analysis/result_files/$isolate.coil_homology_info  > /home/pseema/denovo_analysis/result_files/$isolate.coil_homology_info_nonempty

#grep the lines with pattern 'Identities' 
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.coil_homology_info_nonempty | wc
grep 'Identities' /home/pseema/denovo_analysis/result_files/$isolate.coil_homology_info_nonempty > /home/pseema/denovo_analysis/result_files/$isolate.coil_identities

echo "Sorting based on key 1"
cat /home/pseema/denovo_analysis/result_files/$isolate.coil_identities|sort -k1,1 | head -100 >  /home/pseema/denovo_analysis/result_files/$isolate.coil_top_hits
cat /home/pseema/denovo_analysis/result_files/$isolate.coil_top_hits

echo "***Motif of Rv0047c (43aa)***"
perl motif_finder.pl /home/pseema/denovo_analysis/result_files/$isolate.hypothetical AEARMRILEGRRRQVEERREGLREAVARASSSFDRYTRQLHQL > /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_Rv0047c
grep "motif found at position" /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_Rv0047c
grep "motif found at position" /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_Rv0047c > /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_Rv0047c_found

#Do similarly for other motifs
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
------------------------------------------
#! /usr/bin
#Code to analyze data generated by coiled.sh

#Find files in the directory that have pattern 'found'in their name
find /home/pseema/denovo_analysis/result_files -name '*found*'
find /home/pseema/denovo_analysis/result_files -name '*found*' |wc

#Delete empty files from the directory
find /home/pseema/denovo_analysis/result_files -size 0  -delete

#While loop to analyze coiled coil motifs in all the isolates
while read isolate;
do
#Find the files with the pattern '*coiled_motif_found_' in file name
echo "****Number of matches for the isolate $isolate and the conserved Rv genes containing the motifs****"
find /home/pseema/denovo_analysis/result_files -name $isolate.'*coiled_motif_found_*' |wc
find /home/pseema/denovo_analysis/result_files -name $isolate.'*coiled_motif_found_*' > /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
cat  /home/pseema/denovo_analysis/result_files/all_motifs_$isolate
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates

##While loop to analyze the coiled coil motif locations in all the isolates
while read isolate;
do
echo "********Starting $isolate********"
#find /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_found_*
find /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_found_* |  wc -l
cat `find /home/pseema/denovo_analysis/result_files/$isolate.coiled_motif_found_*`
done < /home/pseema/denovo_analysis/input_files/isolate_list
#done < /home/pseema/denovo_analysis/input_files/IO_isolates
#done < /home/pseema/denovo_analysis/input_files/EAS_isolates
#done < /home/pseema/denovo_analysis/input_files/EAI_isolates
#done < /home/pseema/denovo_analysis/input_files/EAM_isolates
echo "******Isolate $isolate done******"

No comments:

Post a Comment

Laboratory tools and reagents (Micro-pipettes)...

Micro-pipettes are essential tools of R & D labs, and integral part of Good Laboratory Practices (GLPs) Micro-pipetting methods include ...