Thursday, July 20, 2017

MY SCRIPT (3): position hotspots, position manipulations....

#! usr/bin/bash
#position_hotspots.sh
echo "*********************IS1081 starting**********************"
echo "#######All plus start positions#######"
echo "No. of unique positions"
cat  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start | sort -n |uniq |wc -l
cat  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start | sort -n |uniq > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start | sort | uniq -c | sort -nr

echo "#######All plus end positions#######"
echo "No. of unique positions"
cat  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end | sort -n |uniq |wc -l
cat  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end | sort -n |uniq > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end  | sort | uniq -c | sort -nr

echo "#######All minus start positions#######"
echo "No. of unique positions"
cat  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start | sort -n |uniq |wc -l
cat  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start | sort -n |uniq > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start | sort | uniq -c | sort -nr

echo "#######All plus end positions#######"
echo "No. of unique positions"
cat  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end | sort -n |uniq |wc -l
cat  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end | sort -n |uniq > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end_unique
echo "Positions with highest frequency"
awk -F '\t' '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end  | sort | uniq -c | sort -nr

awk '{print $1}'  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start_unique /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end_unique  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start_unique /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end_unique > /home/pseema/denovo_analysis/result_files/IS_positions/combined_IS1081
echo "combined_IS1081"
echo "sorted"
cat /home/pseema/denovo_analysis/result_files/IS_positions/combined_IS1081 | sort
echo "sorted unique"
cat /home/pseema/denovo_analysis/result_files/IS_positions/combined_IS1081 | sort | uniq -c
cat /home/pseema/denovo_analysis/result_files/IS_positions/combined_IS1081 | sort | uniq -c | wc -l
echo "sorted unique sorted"
cat /home/pseema/denovo_analysis/result_files/IS_positions/combined_IS1081 | sort | uniq -c | sort -nr

echo "********************IS1081 done***************************"

#! usr/bin/bash
#This code will take genome BLAST results and manipulate to find the position and orientations of the IS elements
#The IS elements are in
mkdir /home/pseema/denovo_analysis/result_files/IS_positions
#The ATCG sequence is different for start and end and for each IS element.

echo "***********Matches of IS1081  at the start of alignment**********"
grep  "AGTTACGTCCAGGGGTGTGGTGTACGGGCAGGTAAGGCCGGTGGGCGTGTCGTAGCCCAG" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result  > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_position
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_position |wc -l
#Extract lines with the pattern '/Sbjct/'
awk '/Sbjct/' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_position > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_start
#Extract field 2 of the file
awk '{print $2}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_start > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_column_2

echo "Total IS1081  copies :"
awk '{print $2, $4}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_start > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4 |wc -l


echo "IS1081  in plus strand :"
awk '$2 > $1 {print $0}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4 > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus |wc -l
#############
awk '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus  > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus_mod && mv /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus_mod /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus

cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus
#cut -c 1-4  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_start
sed -e 's/...$//'  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_start
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_start
##############
sort -n /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_start > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start
#The file IS1081_plus_trimmed_sorted_start will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_start

echo "IS1081  in minus strand :"
awk '$1 > $2 {print $0}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4 > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus |wc -l
#############
awk '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus  > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus_mod && mv /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus_mod /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus

cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus
#cut -c 1-4  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_start
sed -e 's/...$//'  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_start_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_start
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_start
##############
sort -n /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_start > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start
#The file IS1081_minus_trimmed_sorted_start will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_start

echo "***********Matches of IS1081  at the end of alignment**********"
grep  "CCCGAAGGATCACGCGAGGAACCTTCACTCGTACACCACGTCCCTGGCCTTGGCC" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result  > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_position
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_position |wc -l
awk '/Sbjct/' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_position > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_end
#Extract field 4 of the file
awk '{print $4}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_end > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_column_4

echo "Total IS1081  copies :"
awk '{print $2, $4}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_only_subj_end > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4 |wc -l
echo "IS1081  in plus strand :"
awk '$2 > $1 {print $0}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4 > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus |wc -l
##############
awk '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus  > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus_mod && mv /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus_mod /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus

cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus
#cut -c 1-4  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_end
sed -e 's/...$//'  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_plus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_end
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_end
#############
sort -n /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_end > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_sorted_end
#The file IS1081_plus_trimmed_sorted_end will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_plus_trimmed_end

echo "IS1081  in minus strand :"
awk '$1 > $2 {print $0}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4 > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus |wc -l
##############
awk '{print $1}' /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus  > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus_mod && mv /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus_mod /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus

cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus
#cut -c 1-4  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_end
sed -e 's/...$//'  /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_end_column_2_4_minus > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_end
cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_end
#############
sort -n /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_end > /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_sorted_end
#The file IS1081_minus_trimmed_sorted_end will give the hotspots of the IS element occurring
#cat /home/pseema/denovo_analysis/result_files/IS_positions/IS1081_minus_trimmed_end

grep -q "Identities =" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result
#grep "Identities =" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result |wc -l
echo "Total number of IS1081  copies across the isolates"
grep "Strand=" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result |wc -l
echo "Copies in plus and minus strand"
grep "Strand=Plus/Plus" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result |wc -l
grep "Strand=Plus/Minus" /home/pseema/denovo_analysis/result_files/homology_results/IS1081_blast_result |wc -l

echo "*********************IS1081  analysis done*******************"

No comments:

Post a Comment

Laboratory tools and reagents (Micro-pipettes)...

Micro-pipettes are essential tools of R & D labs, and integral part of Good Laboratory Practices (GLPs) Micro-pipetting methods include ...