Thursday, July 20, 2017

MY SCRIPT (6): Core gene, accessory gene, pan gene analysis ........

#! /usr/bin
#Date 24/March/2016
#Code to analyze data for unique genes
#sh unique_genes_analysis.sh |& tee all_isolate_gene_analysis

#find *.matches_comm_12 |  wc -l
cat `find /home/pseema/denovo_analysis/result_files/*.matches_comm_12` > /home/pseema/denovo_analysis/result_files/all_isolates_common
echo "Common protein pool when the isolates were compared to each other..."
cat /home/pseema/denovo_analysis/result_files/all_isolates_common | wc -l
uniq /home/pseema/denovo_analysis/result_files/all_isolates_common > /home/pseema/denovo_analysis/result_files/all_isolates_common_uniq
cat /home/pseema/denovo_analysis/result_files/all_isolates_common_uniq | wc -l
awk '!NF || !seen[$0]++' /home/pseema/denovo_analysis/result_files/all_isolates_common_uniq > /home/pseema/denovo_analysis/result_files/all_isolates_common_reduced
echo "Unique proteins in the common protein pool..."
cat /home/pseema/denovo_analysis/result_files/all_isolates_common_reduced | wc -l

#find *.matches_comm_23 |  wc -l
cat `find /home/pseema/denovo_analysis/result_files/*.matches_comm_23` > /home/pseema/denovo_analysis/result_files/all_isolates_only_column1
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column1 | wc -l
#uniq command not working, so the awk command was used
uniq /home/pseema/denovo_analysis/result_files/all_isolates_only_column1 > /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq | wc -l
awk '!NF || !seen[$0]++' /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq > /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq_reduced
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq_reduced  | wc -l

#find *.matches_comm_13 |  wc -l
cat `find /home/pseema/denovo_analysis/result_files/*.matches_comm_13`> /home/pseema/denovo_analysis/result_files/all_isolates_only_column2
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column2 | wc -l
uniq /home/pseema/denovo_analysis/result_files/all_isolates_only_column2  > /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq | wc -l
awk '!NF || !seen[$0]++' /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq > /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq_reduced
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq_reduced  | wc -l

#paste these two files in one file
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq_reduced  /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq_reduced > /home/pseema/denovo_analysis/result_files/all_isolates_column1_column2

#Find unique lines
awk '!NF || !seen[$0]++' /home/pseema/denovo_analysis/result_files/all_isolates_column1_column2 > /home/pseema/denovo_analysis/result_files/core_genes

cat /home/pseema/denovo_analysis/result_files/all_isolates_column1_column2_uniq_reduced | wc -l

#Sort the unique lines by alhabetical manner
sort -u /home/pseema/denovo_analysis/result_files/all_isolates_column1_column2_uniq_reduced > /home/pseema/denovo_analysis/result_files/all_isolates_column1_column2_uniq_reduced_sorted




#Find lines to a given pattern
awk '/Proteins unique to/'  all_isolate_gene_profile > /home/pseema/denovo_analysis/result_files/pattern_files

#Find lines next to a given pattern
awk 'f{print;f=0} /Proteins unique to/{f=1}' all_isolate_gene_profile > /home/pseema/denovo_analysis/result_files/next_lines

#Paste these two files side by side
paste -d' ' /home/pseema/denovo_analysis/result_files/pattern_files /home/pseema/denovo_analysis/result_files/next_lines > /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes

#Extract only column 4
awk '{print $4}' /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes > /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate
#find difference between two consecutive lines in the generated file
#Extract only odd number lines
awk 'NR%2==1' /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate  > /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate_only_odd

#Extract only even number lines
awk 'NR%2==0' /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate  > /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate_only_even

#Paste the extracted columns side by side
paste -d' ' /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate_only_odd /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate_only_even > /home/pseema/denovo_analysis/result_files/merged_columns_isolates

#Find difference between two consecutive lines in the generated file
#Extract only odd number lines
awk 'NR%2==1' /home/pseema/denovo_analysis/result_files/next_lines  > /home/pseema/denovo_analysis/result_files/only_odd

#Extract only even number lines
awk 'NR%2==0' /home/pseema/denovo_analysis/result_files/next_lines  > /home/pseema/denovo_analysis/result_files/only_even

#Paste the extracted columns side by side
paste -d' ' /home/pseema/denovo_analysis/result_files/only_odd /home/pseema/denovo_analysis/result_files/only_even > /home/pseema/denovo_analysis/result_files/merged_columns
#Find difference between two columns of the file
awk 'NF > 0 { print $0 "\t" ($1 - $2) }' /home/pseema/denovo_analysis/result_files/merged_columns > /home/pseema/denovo_analysis/result_files/diff_columns

#Paste the extracted columns side by side
paste -d' ' /home/pseema/denovo_analysis/result_files/merged_columns_isolates /home/pseema/denovo_analysis/result_files/diff_columns > /home/pseema/denovo_analysis/result_files/isolate_gene_diff

#Print content beetween two patterns (all_isolate_gene_profile is in denovo_analysis folder)
awk '/Proteins unique to/ {flag=1;next} /Unique protein search/{flag=0} flag {print}' all_isolate_gene_profile && awk '/Unique protein search for/' all_isolate_gene_profile

------------------------------------------------
#! /usr/bin
#sh core_genes_analysis.sh |& tee core_proteins_in_all_isolates
grep "phage\|Phage"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "ribonuclease\|Ribonuclease"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "antitoxin\|Antitoxin"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "transposase\|Transposase"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "30S ribosomal"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "50S ribosomal"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "Universal stress protein"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "ESX"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "ESX-1"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "Multidrug resistance"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "phospholipase\|Phospholipase"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "chaperonin\|Chaperonin\| chaperone\| Chaperone"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "Serine/threonine"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "efflux\|Efflux\|transporter\|Transporter"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "trehalase\|Trehalase\|trehalose\|Trehalose"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "toxin\|Toxin"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "permease\|Permease"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "methyltransferase\|Methyltransferase"  /home/pseema/denovo_analysis/genes_common_to_all_70_isolates

No comments:

Post a Comment

Laboratory tools and reagents (Micro-pipettes)...

Micro-pipettes are essential tools of R & D labs, and integral part of Good Laboratory Practices (GLPs) Micro-pipetting methods include ...