#! usr/bin/bash
#position_hotspots.sh
echo "*********************IS1081 starting**********************"
echo "#######All plus start positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start | sort -n |uniq > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start | sort | uniq -c | sort -nr
echo "#######All plus end positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end | sort -n |uniq > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end | sort | uniq -c | sort -nr
echo "#######All minus start positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start | sort -n |uniq > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start | sort | uniq -c | sort -nr
echo "#######All plus end positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end | sort -n |uniq > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end | sort | uniq -c | sort -nr
awk '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start_unique /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end_unique /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start_unique /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end_unique > /home/pseema/denovo_analysis/result_files/IS_positions/combined_IS1081
echo "combined_IS1081"
echo "sorted"
cat /home/pseema/denovo_analysis/result_files/IS_positions/combined_IS1081 | sort
echo "sorted unique"
cat /home/pseema/denovo_analysis/result_files/IS_positions/combined_IS1081 | sort | uniq -c
cat /home/pseema/denovo_analysis/result_files/IS_positions/combined_IS1081 | sort | uniq -c | wc -l
echo "sorted unique sorted"
cat /home/pseema/denovo_analysis/result_files/IS_positions/combined_IS1081 | sort | uniq -c | sort -nr
echo "********************IS1081 done***************************"
#! usr/bin/bash
#This code will take genome BLAST results and manipulate to find the position and orientations of the IS elements
#The IS elements are in
mkdir /home/pseema/denovo_analysis/result_files/IS_positions
#The ATCG sequence is different for start and end and for each IS element.
echo "***********Matches of IS1081 at the start of alignment**********"
grep "AGTTACGTCCAGGGGTGTGGTGTACGGGCAGGTAAGGCCGGTGGGCGTGTCGTAGCCCAG" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_position
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_position |wc -l
#Extract lines with the pattern '/Sbjct/'
awk '/Sbjct/' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_position > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_start
#Extract field 2 of the file
awk '{print $2}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_start > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_column_2
echo "Total IS1081 copies :"
awk '{print $2, $4}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_start > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4 |wc -l
echo "IS1081 in plus strand :"
awk '$2 > $1 {print $0}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4 > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus |wc -l
#############
awk '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus_mod && mv /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus_mod /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus
#cut -c 1-4 /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_start
sed -e 's/...$//' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_start
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_start
##############
sort -n /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_start > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start
#The file IS1081_plus_trimmed_sorted_start will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start
echo "IS1081 in minus strand :"
awk '$1 > $2 {print $0}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4 > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus |wc -l
#############
awk '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus_mod && mv /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus_mod /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus
#cut -c 1-4 /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_start
sed -e 's/...$//' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_start
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_start
##############
sort -n /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_start > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start
#The file IS1081_minus_trimmed_sorted_start will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start
echo "***********Matches of IS1081 at the end of alignment**********"
grep "CCCGAAGGATCACGCGAGGAACCTTCACTCGTACACCACGTCCCTGGCCTTGGCC" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_position
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_position |wc -l
awk '/Sbjct/' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_position > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_end
#Extract field 4 of the file
awk '{print $4}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_end > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_column_4
echo "Total IS1081 copies :"
awk '{print $2, $4}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_end > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4 |wc -l
echo "IS1081 in plus strand :"
awk '$2 > $1 {print $0}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4 > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus |wc -l
##############
awk '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus_mod && mv /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus_mod /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus
#cut -c 1-4 /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_end
sed -e 's/...$//' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_end
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_end
#############
sort -n /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_end > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end
#The file IS1081_plus_trimmed_sorted_end will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_end
echo "IS1081 in minus strand :"
awk '$1 > $2 {print $0}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4 > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus |wc -l
##############
awk '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus_mod && mv /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus_mod /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus
#cut -c 1-4 /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_end
sed -e 's/...$//' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_end
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_end
#############
sort -n /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_end > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end
#The file IS1081_minus_trimmed_sorted_end will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_end
grep -q "Identities =" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result
#grep "Identities =" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result |wc -l
echo "Total number of IS1081 copies across the isolates"
grep "Strand=" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result |wc -l
echo "Copies in plus and minus strand"
grep "Strand=Plus/Plus" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result |wc -l
grep "Strand=Plus/Minus" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result |wc -l
echo "*********************IS1081 analysis done*******************"
#position_hotspots.sh
echo "*********************IS1081 starting**********************"
echo "#######All plus start positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start | sort -n |uniq > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start | sort | uniq -c | sort -nr
echo "#######All plus end positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end | sort -n |uniq > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end | sort | uniq -c | sort -nr
echo "#######All minus start positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start | sort -n |uniq > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start | sort | uniq -c | sort -nr
echo "#######All plus end positions#######"
echo "No. of unique positions"
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end | sort -n |uniq |wc -l
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end | sort -n |uniq > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end | sort | uniq -c | sort -nr
awk '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start_unique /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end_unique /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start_unique /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end_unique > /home/pseema/denovo_analysis/result_files/IS_positions/combined_IS1081
echo "combined_IS1081"
echo "sorted"
cat /home/pseema/denovo_analysis/result_files/IS_positions/combined_IS1081 | sort
echo "sorted unique"
cat /home/pseema/denovo_analysis/result_files/IS_positions/combined_IS1081 | sort | uniq -c
cat /home/pseema/denovo_analysis/result_files/IS_positions/combined_IS1081 | sort | uniq -c | wc -l
echo "sorted unique sorted"
cat /home/pseema/denovo_analysis/result_files/IS_positions/combined_IS1081 | sort | uniq -c | sort -nr
echo "********************IS1081 done***************************"
#! usr/bin/bash
#This code will take genome BLAST results and manipulate to find the position and orientations of the IS elements
#The IS elements are in
mkdir /home/pseema/denovo_analysis/result_files/IS_positions
#The ATCG sequence is different for start and end and for each IS element.
echo "***********Matches of IS1081 at the start of alignment**********"
grep "AGTTACGTCCAGGGGTGTGGTGTACGGGCAGGTAAGGCCGGTGGGCGTGTCGTAGCCCAG" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_position
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_position |wc -l
#Extract lines with the pattern '/Sbjct/'
awk '/Sbjct/' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_position > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_start
#Extract field 2 of the file
awk '{print $2}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_start > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_column_2
echo "Total IS1081 copies :"
awk '{print $2, $4}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_start > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4 |wc -l
echo "IS1081 in plus strand :"
awk '$2 > $1 {print $0}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4 > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus |wc -l
#############
awk '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus_mod && mv /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus_mod /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus
#cut -c 1-4 /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_start
sed -e 's/...$//' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_start
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_start
##############
sort -n /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_start > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start
#The file IS1081_plus_trimmed_sorted_start will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start
echo "IS1081 in minus strand :"
awk '$1 > $2 {print $0}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4 > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus |wc -l
#############
awk '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus_mod && mv /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus_mod /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus
#cut -c 1-4 /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_start
sed -e 's/...$//' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_start
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_start
##############
sort -n /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_start > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start
#The file IS1081_minus_trimmed_sorted_start will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start
echo "***********Matches of IS1081 at the end of alignment**********"
grep "CCCGAAGGATCACGCGAGGAACCTTCACTCGTACACCACGTCCCTGGCCTTGGCC" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_position
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_position |wc -l
awk '/Sbjct/' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_position > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_end
#Extract field 4 of the file
awk '{print $4}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_end > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_column_4
echo "Total IS1081 copies :"
awk '{print $2, $4}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_end > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4 |wc -l
echo "IS1081 in plus strand :"
awk '$2 > $1 {print $0}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4 > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus |wc -l
##############
awk '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus_mod && mv /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus_mod /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus
#cut -c 1-4 /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_end
sed -e 's/...$//' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_end
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_end
#############
sort -n /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_end > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end
#The file IS1081_plus_trimmed_sorted_end will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_end
echo "IS1081 in minus strand :"
awk '$1 > $2 {print $0}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4 > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus |wc -l
##############
awk '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus_mod && mv /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus_mod /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus
#cut -c 1-4 /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_end
sed -e 's/...$//' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_end
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_end
#############
sort -n /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_end > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end
#The file IS1081_minus_trimmed_sorted_end will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_end
grep -q "Identities =" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result
#grep "Identities =" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result |wc -l
echo "Total number of IS1081 copies across the isolates"
grep "Strand=" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result |wc -l
echo "Copies in plus and minus strand"
grep "Strand=Plus/Plus" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result |wc -l
grep "Strand=Plus/Minus" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result |wc -l
echo "*********************IS1081 analysis done*******************"
No comments:
Post a Comment