#EAS_IS6110_genome_position.sh
#! /usr/bin/sh
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS6110 -subject /home/pseema/denovo_analysis/denovo_genome_fasta/$isolate.fasta |& tee /home/pseema/denovo_analysis/homology_results/EAS_IS6110.$isolate
done < /home/pseema/denovo_analysis/input_files/EAsia_lineage
while read isolate;
do
echo "###########ISOLATE $isolate STARTING #########"
#Start_position_of_EAS_IS6110
#Find rows with the given sequence by grep
grep "TGAACCGCCCCGGCATGTCCGGAGACTCCAGTTCTTGGAAAGGATGGGGTCATGTCAGGT" /home/pseema/denovo_analysis/homology_results/EAS_IS6110.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_start_position.$isolate
#Find start positions by grep of the row with pattern "Sbjct"
awk '/Sbjct/' /home/pseema/denovo_analysis/homology_results/EAS_start_position.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_only_subj_start.$isolate
#Extract field 2 i.e start position
awk '{print $2}' /home/pseema/denovo_analysis/homology_results/EAS_only_subj_start.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_column_2.$isolate
#End_position_of_EAS_IS6110
#Find rows with the given sequence by grep
grep "AGATCAGAGAGTCTCCGGACTCACCGGGGCGGTTCA" /home/pseema/denovo_analysis/homology_results/EAS_IS6110.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_end_position.$isolate
#Find end positions by grep of the row with pattern "Sbjct"
awk '/Sbjct/' /home/pseema/denovo_analysis/homology_results/EAS_end_position.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_only_subj_end.$isolate
#Extract field 4 i.e end position
awk '{print $4}' /home/pseema/denovo_analysis/homology_results/EAS_only_subj_end.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_column_4.$isolate
#Paste the two fields side by side
paste /home/pseema/denovo_analysis/homology_results/EAS_column_2.$isolate /home/pseema/denovo_analysis/homology_results/EAS_column_4.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_start_end.$isolate
#Sort the rows numerically
sort -n /home/pseema/denovo_analysis/homology_results/EAS_start_end.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_sorted_start_end.$isolate
#Keep only field1 i.e start position only
cut -f 1 /home/pseema/denovo_analysis/homology_results/EAS_sorted_start_end.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_field1.$isolate
#Remove last 3 chacaters to see the positions in kb units
sed 's/...$//' /home/pseema/denovo_analysis/homology_results/EAS_field1.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_field1_kb_unit.$isolate
cut -f 2 /home/pseema/denovo_analysis/homology_results/EAS_sorted_start_end.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_field2.$isolate
#Remove last 3 chacaters to see the positions in kb units
sed 's/...$//' /home/pseema/denovo_analysis/homology_results/EAS_field2.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_field2_kb_unit.$isolate
paste /home/pseema/denovo_analysis/homology_results/EAS_field1_kb_unit.$isolate /home/pseema/denovo_analysis/homology_results/EAS_field2_kb_unit.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2.$isolate
#cat /home/pseema/denovo_analysis/homology_results/trimmed_field1_field2
#echo "Combined copy number of IS6110 in the 19 EAS isolates"
#cat /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2.$isolate | wc -l
#echo "#####Both start and end positions######"
cat /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2.$isolate
#For /home/pseema/denovo_analysis/homology_results/plus strand
awk '$1 < $2 {print $0}' /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_plus_strand.$isolate
#For /home/pseema/denovo_analysis/homology_results/minus strand
awk '$1 > $2 {print $0}' /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_minus_strand.$isolate
echo "Number of IS6110 copies in plus strand of $isolate"
cat /home/pseema/denovo_analysis/homology_results/EAS_plus_strand.$isolate | wc -l
echo "Number of IS6110 copies in minus strand of $isolate"
cat /home/pseema/denovo_analysis/homology_results/EAS_minus_strand.$isolate | wc -l
echo "###########ISOLATE $isolate DONE #########"
#done < /home/pseema/denovo_analysis/input_files/EAsia_lineage |& tee /home/pseema/denovo_analysis/homology_results/EAS_IS6110_isolate_genome
done < /home/pseema/denovo_analysis/input_files/EAsia_lineage |& tee EAS_IS6110_isolate_genome_positions
#cat /home/pseema/denovo_analysis/homology_results/EAS_plus_strand
#cat /home/pseema/denovo_analysis/homology_results/EAS_minus_strand
#EAS_IS6110_homology.sh
#! usr/bin/bash
mkdir homology_results
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS6110 -subject /home/pseema/denovo_analysis/denovo_genome_fasta/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/EAsia_lineage |& tee /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result
#EAS_IS6110_position_hotspots.sh
#! usr/bin/bash
echo "*********************EAS_IS6110 starting**********************"
echo "#######All plus start positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_start | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_start | sort -n |uniq > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_start_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_start | sort | uniq -c | sort -nr
echo "#######All plus end positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_end | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_end | sort -n |uniq > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_end_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_end | sort | uniq -c | sort -nr
echo "#######All minus start positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_start | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_start | sort -n |uniq > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_start_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_start | sort | uniq -c | sort -nr
echo "#######All plus end positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_end | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_end | sort -n |uniq > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_end_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_end | sort | uniq -c | sort -nr
awk '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_start_unique /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_end_unique /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_start_unique /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_end_unique > /home/pseema/denovo_analysis/IS_positions/combined_EAS_IS6110
########
echo "combined_EAS_IS6110"
echo "sorted"
cat /home/pseema/denovo_analysis/IS_positions/combined_EAS_IS6110 | sort
echo "sorted unique"
cat /home/pseema/denovo_analysis/IS_positions/combined_EAS_IS6110 | sort | uniq -c
cat /home/pseema/denovo_analysis/IS_positions/combined_EAS_IS6110 | sort | uniq -c | wc -l
echo "sorted unique sorted"
cat /home/pseema/denovo_analysis/IS_positions/combined_EAS_IS6110 | sort | uniq -c | sort -nr
echo "********************EAS_IS6110 done***************************"
#EAS_IS6110_position_manipulations.sh
#! usr/bin/bash
echo "***********Matches of EAS_IS6110 at the start of alignment**********"
grep "TGAACCGCCCCGGCATGTCCGGAGACTCCAGTTCTTGGAAAGGATGGGGTCATGTCAGGT" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_position
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_position |wc -l
#Extract lines with the pattern '/Sbjct/'
awk '/Sbjct/' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_position > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_only_subj_start
#Extract field 2 of the file
awk '{print $2}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_only_subj_start > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_column_2
echo "Total EAS_IS6110 copies :"
awk '{print $2, $4}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_only_subj_start > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4 |wc -l
echo "EAS_IS6110 in plus strand :"
awk '$2 > $1 {print $0}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4 > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus |wc -l
#############
awk '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus_mod && mv /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus_mod /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus
#cut -c 1-4 /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_start
sed -e 's/...$//' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_start
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_start
##############
sort -n /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_start > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_start
#The file EAS_IS6110_plus_trimmed_sorted_start will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_start
echo "EAS_IS6110 in minus strand :"
awk '$1 > $2 {print $0}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4 > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus |wc -l
#############
awk '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus_mod && mv /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus_mod /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus
#cut -c 1-4 /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_start
sed -e 's/...$//' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_start
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_start
##############
sort -n /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_start > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_start
#The file EAS_IS6110_minus_trimmed_sorted_start will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_start
echo "***********Matches of EAS_IS6110 at the end of alignment**********"
grep "GGACTCACCGGGGCGGTTCA" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_position
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_position |wc -l
awk '/Sbjct/' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_position > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_only_subj_end
#Extract field 4 of the file
awk '{print $4}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_only_subj_end > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_column_4
echo "Total EAS_IS6110 copies :"
awk '{print $2, $4}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_only_subj_end > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4 |wc -l
echo "EAS_IS6110 in plus strand :"
awk '$2 > $1 {print $0}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4 > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus |wc -l
##############
awk '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus_mod && mv /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus_mod /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus
#cut -c 1-4 /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_end
sed -e 's/...$//' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_end
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_end
#############
sort -n /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_end > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_end
#The file EAS_IS6110_plus_trimmed_sorted_end will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_end
echo "EAS_IS6110 in minus strand :"
awk '$1 > $2 {print $0}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4 > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus |wc -l
##############
awk '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus_mod && mv /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus_mod /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus
#cut -c 1-4 /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_end
sed -e 's/...$//' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_end
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_end
#############
sort -n /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_end > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_end
#The file EAS_IS6110_minus_trimmed_sorted_end will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_end
grep -q "Identities =" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result
#grep "Identities =" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result |wc -l
echo "Total number of EAS_IS6110 copies across the isolates"
grep "Strand=" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result |wc -l
echo "Copies in plus and minus strand"
grep "Strand=Plus/Plus" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result |wc -l
grep "Strand=Plus/Minus" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result |wc -l
echo "*********************EAS_IS6110 analysis done*******************"
#EAS_IS6110_start_end_position.sh
sr/bin/sh
#EAS_IS6110_start_end_position.sh
while read isolate;
do
echo "$isolate starting......"
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS6110 -subject /home/pseema/denovo_analysis/denovo_genome_fasta/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/EAsia_lineage |& tee /home/pseema/denovo_analysis/homology_results/EAS_IS6110_positions
#Start_position_of_EAS_IS6110
#Find rows with the given sequence by grep
grep "TGAACCGCCCCGGCATGTCCGGAGACTCCAGTTCTTGGAAAGGATGGGGTCATGTCAGGT" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_positions > /home/pseema/denovo_analysis/homology_results/EAS_start_position
#Find start positions by grep of the row with pattern "Sbjct"
awk '/Sbjct/' /home/pseema/denovo_analysis/homology_results/EAS_start_position > /home/pseema/denovo_analysis/homology_results/EAS_only_subj_start
#Extract field 2 i.e start position
awk '{print $2}' /home/pseema/denovo_analysis/homology_results/EAS_only_subj_start > /home/pseema/denovo_analysis/homology_results/EAS_column_2
#End_position_of_EAS_IS6110
#Find rows with the given sequence by grep
grep "AGATCAGAGAGTCTCCGGACTCACCGGGGCGGTTCA" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_positions > /home/pseema/denovo_analysis/homology_results/EAS_end_position
#Find end positions by grep of the row with pattern "Sbjct"
awk '/Sbjct/' /home/pseema/denovo_analysis/homology_results/EAS_end_position > /home/pseema/denovo_analysis/homology_results/EAS_only_subj_end
#Extract field 4 i.e end position
awk '{print $4}' /home/pseema/denovo_analysis/homology_results/EAS_only_subj_end > /home/pseema/denovo_analysis/homology_results/EAS_column_4
#Paste the two fields side by side
paste /home/pseema/denovo_analysis/homology_results/EAS_column_2 /home/pseema/denovo_analysis/homology_results/EAS_column_4 > /home/pseema/denovo_analysis/homology_results/EAS_start_end
#Sort the rows numerically
sort -n /home/pseema/denovo_analysis/homology_results/EAS_start_end > /home/pseema/denovo_analysis/homology_results/EAS_sorted_start_end
#Keep only field1 i.e start position only
cut -f 1 /home/pseema/denovo_analysis/homology_results/EAS_sorted_start_end > /home/pseema/denovo_analysis/homology_results/EAS_field1
#Remove last 3 chacaters to see the positions in kb units
sed 's/...$//' /home/pseema/denovo_analysis/homology_results/EAS_field1 >/home/pseema/denovo_analysis/homology_results/EAS_field1_kb_unit
cut -f 2 /home/pseema/denovo_analysis/homology_results/EAS_sorted_start_end > /home/pseema/denovo_analysis/homology_results/EAS_field2
#Remove last 3 chacaters to see the positions in kb units
sed 's/...$//' /home/pseema/denovo_analysis/homology_results/EAS_field2 > /home/pseema/denovo_analysis/homology_results/EAS_field2_kb_unit
paste /home/pseema/denovo_analysis/homology_results/EAS_field1_kb_unit /home/pseema/denovo_analysis/homology_results/EAS_field2_kb_unit > /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2
#cat /home/pseema/denovo_analysis/homology_results/trimmed_field1_field2
echo "Combined copy number of IS6110 in the 19 EAS isolates"
cat /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2 | wc -l
echo "#####Both start and end positions######"
cat /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2
#For /home/pseema/denovo_analysis/homology_results/plus strand
awk '$1 < $2 {print $0}' /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2 > /home/pseema/denovo_analysis/homology_results/EAS_plus_strand
#For /home/pseema/denovo_analysis/homology_results/minus strand
awk '$1 > $2 {print $0}' /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2 > /home/pseema/denovo_analysis/homology_results/EAS_minus_strand
echo "Number of IS6110 copies in plus strand of EAS isolates"
cat /home/pseema/denovo_analysis/homology_results/EAS_plus_strand | wc -l
echo "Number of IS6110 copies in minus strand of EAS isolates"
cat /home/pseema/denovo_analysis/homology_results/EAS_minus_strand | wc -l
#cat /home/pseema/denovo_analysis/homology_results/EAS_plus_strand
#cat /home/pseema/denovo_analysis/homology_results/EAS_minus_strand
#EAS_IS6110_analysis_wrapper.sh
#! usr/bin/bash
#This (EAS_IS6110_analysis_wrapper.sh) calls other codes.
#This code calls EAS_homology.sh
echo "EAS_homology.sh is running.........."
sh EAS_homology.sh |& tee EAS_IS6110_blast_result
#This code calls EAS_IS6110_position_manipulations.sh
echo "EAS_IS6110_position_manipulations.sh is running.........."
sh EAS_IS6110_position_manipulations.sh |& tee EAS_IS6110_blast_result_analysis_file
#This code calls EAS_IS6110_position_hotspots.sh
echo "EAS_IS6110_position_hotspots.sh running.........."
sh EAS_IS6110_position_hotspots.sh |& tee EAS_IS6110_position_analysis_file
#This code calls EAS_IS6110_start_end_position.sh
echo "EAS_IS6110_start_end_position.sh running.........."
sh EAS_IS6110_start_end_position.sh |& tee EAS_IS6110_start_end_file
#This code calls EAS_IS6110_genome_position.sh (position of each copy of IS6110 in each isolate is found)
echo "EAS_IS6110_genome_position.sh running.........."
sh EAS_IS6110_genome_position.sh
#This code calls Beijing_EAS_IS6110_start_end_position.sh
echo "Beijing_EAS_IS6110_start_end_position.sh running.........."
sh Beijing_EAS_IS6110_start_end_position.sh |& tee Beijing_EAS_IS6110_start_end_file
#! /usr/bin/sh
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS6110 -subject /home/pseema/denovo_analysis/denovo_genome_fasta/$isolate.fasta |& tee /home/pseema/denovo_analysis/homology_results/EAS_IS6110.$isolate
done < /home/pseema/denovo_analysis/input_files/EAsia_lineage
while read isolate;
do
echo "###########ISOLATE $isolate STARTING #########"
#Start_position_of_EAS_IS6110
#Find rows with the given sequence by grep
grep "TGAACCGCCCCGGCATGTCCGGAGACTCCAGTTCTTGGAAAGGATGGGGTCATGTCAGGT" /home/pseema/denovo_analysis/homology_results/EAS_IS6110.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_start_position.$isolate
#Find start positions by grep of the row with pattern "Sbjct"
awk '/Sbjct/' /home/pseema/denovo_analysis/homology_results/EAS_start_position.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_only_subj_start.$isolate
#Extract field 2 i.e start position
awk '{print $2}' /home/pseema/denovo_analysis/homology_results/EAS_only_subj_start.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_column_2.$isolate
#End_position_of_EAS_IS6110
#Find rows with the given sequence by grep
grep "AGATCAGAGAGTCTCCGGACTCACCGGGGCGGTTCA" /home/pseema/denovo_analysis/homology_results/EAS_IS6110.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_end_position.$isolate
#Find end positions by grep of the row with pattern "Sbjct"
awk '/Sbjct/' /home/pseema/denovo_analysis/homology_results/EAS_end_position.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_only_subj_end.$isolate
#Extract field 4 i.e end position
awk '{print $4}' /home/pseema/denovo_analysis/homology_results/EAS_only_subj_end.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_column_4.$isolate
#Paste the two fields side by side
paste /home/pseema/denovo_analysis/homology_results/EAS_column_2.$isolate /home/pseema/denovo_analysis/homology_results/EAS_column_4.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_start_end.$isolate
#Sort the rows numerically
sort -n /home/pseema/denovo_analysis/homology_results/EAS_start_end.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_sorted_start_end.$isolate
#Keep only field1 i.e start position only
cut -f 1 /home/pseema/denovo_analysis/homology_results/EAS_sorted_start_end.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_field1.$isolate
#Remove last 3 chacaters to see the positions in kb units
sed 's/...$//' /home/pseema/denovo_analysis/homology_results/EAS_field1.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_field1_kb_unit.$isolate
cut -f 2 /home/pseema/denovo_analysis/homology_results/EAS_sorted_start_end.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_field2.$isolate
#Remove last 3 chacaters to see the positions in kb units
sed 's/...$//' /home/pseema/denovo_analysis/homology_results/EAS_field2.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_field2_kb_unit.$isolate
paste /home/pseema/denovo_analysis/homology_results/EAS_field1_kb_unit.$isolate /home/pseema/denovo_analysis/homology_results/EAS_field2_kb_unit.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2.$isolate
#cat /home/pseema/denovo_analysis/homology_results/trimmed_field1_field2
#echo "Combined copy number of IS6110 in the 19 EAS isolates"
#cat /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2.$isolate | wc -l
#echo "#####Both start and end positions######"
cat /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2.$isolate
#For /home/pseema/denovo_analysis/homology_results/plus strand
awk '$1 < $2 {print $0}' /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_plus_strand.$isolate
#For /home/pseema/denovo_analysis/homology_results/minus strand
awk '$1 > $2 {print $0}' /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2.$isolate > /home/pseema/denovo_analysis/homology_results/EAS_minus_strand.$isolate
echo "Number of IS6110 copies in plus strand of $isolate"
cat /home/pseema/denovo_analysis/homology_results/EAS_plus_strand.$isolate | wc -l
echo "Number of IS6110 copies in minus strand of $isolate"
cat /home/pseema/denovo_analysis/homology_results/EAS_minus_strand.$isolate | wc -l
echo "###########ISOLATE $isolate DONE #########"
#done < /home/pseema/denovo_analysis/input_files/EAsia_lineage |& tee /home/pseema/denovo_analysis/homology_results/EAS_IS6110_isolate_genome
done < /home/pseema/denovo_analysis/input_files/EAsia_lineage |& tee EAS_IS6110_isolate_genome_positions
#cat /home/pseema/denovo_analysis/homology_results/EAS_plus_strand
#cat /home/pseema/denovo_analysis/homology_results/EAS_minus_strand
#EAS_IS6110_homology.sh
#! usr/bin/bash
mkdir homology_results
while read isolate;
do
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS6110 -subject /home/pseema/denovo_analysis/denovo_genome_fasta/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/EAsia_lineage |& tee /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result
#EAS_IS6110_position_hotspots.sh
#! usr/bin/bash
echo "*********************EAS_IS6110 starting**********************"
echo "#######All plus start positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_start | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_start | sort -n |uniq > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_start_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_start | sort | uniq -c | sort -nr
echo "#######All plus end positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_end | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_end | sort -n |uniq > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_end_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_end | sort | uniq -c | sort -nr
echo "#######All minus start positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_start | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_start | sort -n |uniq > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_start_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_start | sort | uniq -c | sort -nr
echo "#######All plus end positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_end | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_end | sort -n |uniq > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_end_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_end | sort | uniq -c | sort -nr
awk '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_start_unique /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_end_unique /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_start_unique /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_end_unique > /home/pseema/denovo_analysis/IS_positions/combined_EAS_IS6110
########
echo "combined_EAS_IS6110"
echo "sorted"
cat /home/pseema/denovo_analysis/IS_positions/combined_EAS_IS6110 | sort
echo "sorted unique"
cat /home/pseema/denovo_analysis/IS_positions/combined_EAS_IS6110 | sort | uniq -c
cat /home/pseema/denovo_analysis/IS_positions/combined_EAS_IS6110 | sort | uniq -c | wc -l
echo "sorted unique sorted"
cat /home/pseema/denovo_analysis/IS_positions/combined_EAS_IS6110 | sort | uniq -c | sort -nr
echo "********************EAS_IS6110 done***************************"
#EAS_IS6110_position_manipulations.sh
#! usr/bin/bash
echo "***********Matches of EAS_IS6110 at the start of alignment**********"
grep "TGAACCGCCCCGGCATGTCCGGAGACTCCAGTTCTTGGAAAGGATGGGGTCATGTCAGGT" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_position
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_position |wc -l
#Extract lines with the pattern '/Sbjct/'
awk '/Sbjct/' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_position > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_only_subj_start
#Extract field 2 of the file
awk '{print $2}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_only_subj_start > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_column_2
echo "Total EAS_IS6110 copies :"
awk '{print $2, $4}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_only_subj_start > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4 |wc -l
echo "EAS_IS6110 in plus strand :"
awk '$2 > $1 {print $0}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4 > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus |wc -l
#############
awk '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus_mod && mv /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus_mod /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus
#cut -c 1-4 /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_start
sed -e 's/...$//' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_plus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_start
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_start
##############
sort -n /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_start > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_start
#The file EAS_IS6110_plus_trimmed_sorted_start will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_start
echo "EAS_IS6110 in minus strand :"
awk '$1 > $2 {print $0}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4 > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus |wc -l
#############
awk '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus_mod && mv /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus_mod /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus
#cut -c 1-4 /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_start
sed -e 's/...$//' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_start_column_2_4_minus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_start
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_start
##############
sort -n /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_start > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_start
#The file EAS_IS6110_minus_trimmed_sorted_start will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_start
echo "***********Matches of EAS_IS6110 at the end of alignment**********"
grep "GGACTCACCGGGGCGGTTCA" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_position
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_position |wc -l
awk '/Sbjct/' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_position > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_only_subj_end
#Extract field 4 of the file
awk '{print $4}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_only_subj_end > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_column_4
echo "Total EAS_IS6110 copies :"
awk '{print $2, $4}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_only_subj_end > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4 |wc -l
echo "EAS_IS6110 in plus strand :"
awk '$2 > $1 {print $0}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4 > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus |wc -l
##############
awk '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus_mod && mv /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus_mod /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus
#cut -c 1-4 /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_end
sed -e 's/...$//' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_plus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_end
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_end
#############
sort -n /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_end > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_sorted_end
#The file EAS_IS6110_plus_trimmed_sorted_end will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_plus_trimmed_end
echo "EAS_IS6110 in minus strand :"
awk '$1 > $2 {print $0}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4 > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus |wc -l
##############
awk '{print $1}' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus_mod && mv /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus_mod /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus
#cut -c 1-4 /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_end
sed -e 's/...$//' /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_end_column_2_4_minus > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_end
cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_end
#############
sort -n /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_end > /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_sorted_end
#The file EAS_IS6110_minus_trimmed_sorted_end will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/IS_positions/EAS_IS6110_minus_trimmed_end
grep -q "Identities =" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result
#grep "Identities =" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result |wc -l
echo "Total number of EAS_IS6110 copies across the isolates"
grep "Strand=" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result |wc -l
echo "Copies in plus and minus strand"
grep "Strand=Plus/Plus" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result |wc -l
grep "Strand=Plus/Minus" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_blast_result |wc -l
echo "*********************EAS_IS6110 analysis done*******************"
#EAS_IS6110_start_end_position.sh
sr/bin/sh
#EAS_IS6110_start_end_position.sh
while read isolate;
do
echo "$isolate starting......"
blastn -query /home/pseema/denovo_analysis/IS_fasta_seq/IS6110 -subject /home/pseema/denovo_analysis/denovo_genome_fasta/$isolate.fasta
done < /home/pseema/denovo_analysis/input_files/EAsia_lineage |& tee /home/pseema/denovo_analysis/homology_results/EAS_IS6110_positions
#Start_position_of_EAS_IS6110
#Find rows with the given sequence by grep
grep "TGAACCGCCCCGGCATGTCCGGAGACTCCAGTTCTTGGAAAGGATGGGGTCATGTCAGGT" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_positions > /home/pseema/denovo_analysis/homology_results/EAS_start_position
#Find start positions by grep of the row with pattern "Sbjct"
awk '/Sbjct/' /home/pseema/denovo_analysis/homology_results/EAS_start_position > /home/pseema/denovo_analysis/homology_results/EAS_only_subj_start
#Extract field 2 i.e start position
awk '{print $2}' /home/pseema/denovo_analysis/homology_results/EAS_only_subj_start > /home/pseema/denovo_analysis/homology_results/EAS_column_2
#End_position_of_EAS_IS6110
#Find rows with the given sequence by grep
grep "AGATCAGAGAGTCTCCGGACTCACCGGGGCGGTTCA" /home/pseema/denovo_analysis/homology_results/EAS_IS6110_positions > /home/pseema/denovo_analysis/homology_results/EAS_end_position
#Find end positions by grep of the row with pattern "Sbjct"
awk '/Sbjct/' /home/pseema/denovo_analysis/homology_results/EAS_end_position > /home/pseema/denovo_analysis/homology_results/EAS_only_subj_end
#Extract field 4 i.e end position
awk '{print $4}' /home/pseema/denovo_analysis/homology_results/EAS_only_subj_end > /home/pseema/denovo_analysis/homology_results/EAS_column_4
#Paste the two fields side by side
paste /home/pseema/denovo_analysis/homology_results/EAS_column_2 /home/pseema/denovo_analysis/homology_results/EAS_column_4 > /home/pseema/denovo_analysis/homology_results/EAS_start_end
#Sort the rows numerically
sort -n /home/pseema/denovo_analysis/homology_results/EAS_start_end > /home/pseema/denovo_analysis/homology_results/EAS_sorted_start_end
#Keep only field1 i.e start position only
cut -f 1 /home/pseema/denovo_analysis/homology_results/EAS_sorted_start_end > /home/pseema/denovo_analysis/homology_results/EAS_field1
#Remove last 3 chacaters to see the positions in kb units
sed 's/...$//' /home/pseema/denovo_analysis/homology_results/EAS_field1 >/home/pseema/denovo_analysis/homology_results/EAS_field1_kb_unit
cut -f 2 /home/pseema/denovo_analysis/homology_results/EAS_sorted_start_end > /home/pseema/denovo_analysis/homology_results/EAS_field2
#Remove last 3 chacaters to see the positions in kb units
sed 's/...$//' /home/pseema/denovo_analysis/homology_results/EAS_field2 > /home/pseema/denovo_analysis/homology_results/EAS_field2_kb_unit
paste /home/pseema/denovo_analysis/homology_results/EAS_field1_kb_unit /home/pseema/denovo_analysis/homology_results/EAS_field2_kb_unit > /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2
#cat /home/pseema/denovo_analysis/homology_results/trimmed_field1_field2
echo "Combined copy number of IS6110 in the 19 EAS isolates"
cat /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2 | wc -l
echo "#####Both start and end positions######"
cat /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2
#For /home/pseema/denovo_analysis/homology_results/plus strand
awk '$1 < $2 {print $0}' /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2 > /home/pseema/denovo_analysis/homology_results/EAS_plus_strand
#For /home/pseema/denovo_analysis/homology_results/minus strand
awk '$1 > $2 {print $0}' /home/pseema/denovo_analysis/homology_results/EAS_trimmed_field1_field2 > /home/pseema/denovo_analysis/homology_results/EAS_minus_strand
echo "Number of IS6110 copies in plus strand of EAS isolates"
cat /home/pseema/denovo_analysis/homology_results/EAS_plus_strand | wc -l
echo "Number of IS6110 copies in minus strand of EAS isolates"
cat /home/pseema/denovo_analysis/homology_results/EAS_minus_strand | wc -l
#cat /home/pseema/denovo_analysis/homology_results/EAS_plus_strand
#cat /home/pseema/denovo_analysis/homology_results/EAS_minus_strand
#EAS_IS6110_analysis_wrapper.sh
#! usr/bin/bash
#This (EAS_IS6110_analysis_wrapper.sh) calls other codes.
#This code calls EAS_homology.sh
echo "EAS_homology.sh is running.........."
sh EAS_homology.sh |& tee EAS_IS6110_blast_result
#This code calls EAS_IS6110_position_manipulations.sh
echo "EAS_IS6110_position_manipulations.sh is running.........."
sh EAS_IS6110_position_manipulations.sh |& tee EAS_IS6110_blast_result_analysis_file
#This code calls EAS_IS6110_position_hotspots.sh
echo "EAS_IS6110_position_hotspots.sh running.........."
sh EAS_IS6110_position_hotspots.sh |& tee EAS_IS6110_position_analysis_file
#This code calls EAS_IS6110_start_end_position.sh
echo "EAS_IS6110_start_end_position.sh running.........."
sh EAS_IS6110_start_end_position.sh |& tee EAS_IS6110_start_end_file
#This code calls EAS_IS6110_genome_position.sh (position of each copy of IS6110 in each isolate is found)
echo "EAS_IS6110_genome_position.sh running.........."
sh EAS_IS6110_genome_position.sh
#This code calls Beijing_EAS_IS6110_start_end_position.sh
echo "Beijing_EAS_IS6110_start_end_position.sh running.........."
sh Beijing_EAS_IS6110_start_end_position.sh |& tee Beijing_EAS_IS6110_start_end_file
No comments:
Post a Comment