#! /usr/bin
#Date 24/March/2016
#Code to analyze data for unique genes
#sh unique_genes_analysis.sh |& tee all_isolate_gene_analysis
#find *.matches_comm_12 | wc -l
cat `find /home/pseema/denovo_analysis/result_files/*.matches_comm_12` > /home/pseema/denovo_analysis/result_files/all_isolates_common
echo "Common protein pool when the isolates were compared to each other..."
cat /home/pseema/denovo_analysis/result_files/all_isolates_common | wc -l
uniq /home/pseema/denovo_analysis/result_files/all_isolates_common > /home/pseema/denovo_analysis/result_files/all_isolates_common_uniq
cat /home/pseema/denovo_analysis/result_files/all_isolates_common_uniq | wc -l
awk '!NF || !seen[$0]++' /home/pseema/denovo_analysis/result_files/all_isolates_common_uniq > /home/pseema/denovo_analysis/result_files/all_isolates_common_reduced
echo "Unique proteins in the common protein pool..."
cat /home/pseema/denovo_analysis/result_files/all_isolates_common_reduced | wc -l
#find *.matches_comm_23 | wc -l
cat `find /home/pseema/denovo_analysis/result_files/*.matches_comm_23` > /home/pseema/denovo_analysis/result_files/all_isolates_only_column1
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column1 | wc -l
#uniq command not working, so the awk command was used
uniq /home/pseema/denovo_analysis/result_files/all_isolates_only_column1 > /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq | wc -l
awk '!NF || !seen[$0]++' /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq > /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq_reduced
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq_reduced | wc -l
#find *.matches_comm_13 | wc -l
cat `find /home/pseema/denovo_analysis/result_files/*.matches_comm_13`> /home/pseema/denovo_analysis/result_files/all_isolates_only_column2
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column2 | wc -l
uniq /home/pseema/denovo_analysis/result_files/all_isolates_only_column2 > /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq | wc -l
awk '!NF || !seen[$0]++' /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq > /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq_reduced
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq_reduced | wc -l
#paste these two files in one file
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq_reduced /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq_reduced > /home/pseema/denovo_analysis/result_files/all_isolates_column1_column2
#Find unique lines
awk '!NF || !seen[$0]++' /home/pseema/denovo_analysis/result_files/all_isolates_column1_column2 > /home/pseema/denovo_analysis/result_files/core_genes
cat /home/pseema/denovo_analysis/result_files/all_isolates_column1_column2_uniq_reduced | wc -l
#Sort the unique lines by alhabetical manner
sort -u /home/pseema/denovo_analysis/result_files/all_isolates_column1_column2_uniq_reduced > /home/pseema/denovo_analysis/result_files/all_isolates_column1_column2_uniq_reduced_sorted
#Find lines to a given pattern
awk '/Proteins unique to/' all_isolate_gene_profile > /home/pseema/denovo_analysis/result_files/pattern_files
#Find lines next to a given pattern
awk 'f{print;f=0} /Proteins unique to/{f=1}' all_isolate_gene_profile > /home/pseema/denovo_analysis/result_files/next_lines
#Paste these two files side by side
paste -d' ' /home/pseema/denovo_analysis/result_files/pattern_files /home/pseema/denovo_analysis/result_files/next_lines > /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes
#Extract only column 4
awk '{print $4}' /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes > /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate
#find difference between two consecutive lines in the generated file
#Extract only odd number lines
awk 'NR%2==1' /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate > /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate_only_odd
#Extract only even number lines
awk 'NR%2==0' /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate > /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate_only_even
#Paste the extracted columns side by side
paste -d' ' /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate_only_odd /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate_only_even > /home/pseema/denovo_analysis/result_files/merged_columns_isolates
#Find difference between two consecutive lines in the generated file
#Extract only odd number lines
awk 'NR%2==1' /home/pseema/denovo_analysis/result_files/next_lines > /home/pseema/denovo_analysis/result_files/only_odd
#Extract only even number lines
awk 'NR%2==0' /home/pseema/denovo_analysis/result_files/next_lines > /home/pseema/denovo_analysis/result_files/only_even
#Paste the extracted columns side by side
paste -d' ' /home/pseema/denovo_analysis/result_files/only_odd /home/pseema/denovo_analysis/result_files/only_even > /home/pseema/denovo_analysis/result_files/merged_columns
#Find difference between two columns of the file
awk 'NF > 0 { print $0 "\t" ($1 - $2) }' /home/pseema/denovo_analysis/result_files/merged_columns > /home/pseema/denovo_analysis/result_files/diff_columns
#Paste the extracted columns side by side
paste -d' ' /home/pseema/denovo_analysis/result_files/merged_columns_isolates /home/pseema/denovo_analysis/result_files/diff_columns > /home/pseema/denovo_analysis/result_files/isolate_gene_diff
#Print content beetween two patterns (all_isolate_gene_profile is in denovo_analysis folder)
awk '/Proteins unique to/ {flag=1;next} /Unique protein search/{flag=0} flag {print}' all_isolate_gene_profile && awk '/Unique protein search for/' all_isolate_gene_profile
------------------------------------------------
#! /usr/bin
#sh core_genes_analysis.sh |& tee core_proteins_in_all_isolates
grep "phage\|Phage" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "ribonuclease\|Ribonuclease" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "antitoxin\|Antitoxin" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "transposase\|Transposase" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "30S ribosomal" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "50S ribosomal" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "Universal stress protein" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "ESX" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "ESX-1" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "Multidrug resistance" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "phospholipase\|Phospholipase" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "chaperonin\|Chaperonin\| chaperone\| Chaperone" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "Serine/threonine" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "efflux\|Efflux\|transporter\|Transporter" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "trehalase\|Trehalase\|trehalose\|Trehalose" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "toxin\|Toxin" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "permease\|Permease" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "methyltransferase\|Methyltransferase" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
#Date 24/March/2016
#Code to analyze data for unique genes
#sh unique_genes_analysis.sh |& tee all_isolate_gene_analysis
#find *.matches_comm_12 | wc -l
cat `find /home/pseema/denovo_analysis/result_files/*.matches_comm_12` > /home/pseema/denovo_analysis/result_files/all_isolates_common
echo "Common protein pool when the isolates were compared to each other..."
cat /home/pseema/denovo_analysis/result_files/all_isolates_common | wc -l
uniq /home/pseema/denovo_analysis/result_files/all_isolates_common > /home/pseema/denovo_analysis/result_files/all_isolates_common_uniq
cat /home/pseema/denovo_analysis/result_files/all_isolates_common_uniq | wc -l
awk '!NF || !seen[$0]++' /home/pseema/denovo_analysis/result_files/all_isolates_common_uniq > /home/pseema/denovo_analysis/result_files/all_isolates_common_reduced
echo "Unique proteins in the common protein pool..."
cat /home/pseema/denovo_analysis/result_files/all_isolates_common_reduced | wc -l
#find *.matches_comm_23 | wc -l
cat `find /home/pseema/denovo_analysis/result_files/*.matches_comm_23` > /home/pseema/denovo_analysis/result_files/all_isolates_only_column1
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column1 | wc -l
#uniq command not working, so the awk command was used
uniq /home/pseema/denovo_analysis/result_files/all_isolates_only_column1 > /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq | wc -l
awk '!NF || !seen[$0]++' /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq > /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq_reduced
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq_reduced | wc -l
#find *.matches_comm_13 | wc -l
cat `find /home/pseema/denovo_analysis/result_files/*.matches_comm_13`> /home/pseema/denovo_analysis/result_files/all_isolates_only_column2
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column2 | wc -l
uniq /home/pseema/denovo_analysis/result_files/all_isolates_only_column2 > /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq | wc -l
awk '!NF || !seen[$0]++' /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq > /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq_reduced
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq_reduced | wc -l
#paste these two files in one file
cat /home/pseema/denovo_analysis/result_files/all_isolates_only_column1_uniq_reduced /home/pseema/denovo_analysis/result_files/all_isolates_only_column2_uniq_reduced > /home/pseema/denovo_analysis/result_files/all_isolates_column1_column2
#Find unique lines
awk '!NF || !seen[$0]++' /home/pseema/denovo_analysis/result_files/all_isolates_column1_column2 > /home/pseema/denovo_analysis/result_files/core_genes
cat /home/pseema/denovo_analysis/result_files/all_isolates_column1_column2_uniq_reduced | wc -l
#Sort the unique lines by alhabetical manner
sort -u /home/pseema/denovo_analysis/result_files/all_isolates_column1_column2_uniq_reduced > /home/pseema/denovo_analysis/result_files/all_isolates_column1_column2_uniq_reduced_sorted
#Find lines to a given pattern
awk '/Proteins unique to/' all_isolate_gene_profile > /home/pseema/denovo_analysis/result_files/pattern_files
#Find lines next to a given pattern
awk 'f{print;f=0} /Proteins unique to/{f=1}' all_isolate_gene_profile > /home/pseema/denovo_analysis/result_files/next_lines
#Paste these two files side by side
paste -d' ' /home/pseema/denovo_analysis/result_files/pattern_files /home/pseema/denovo_analysis/result_files/next_lines > /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes
#Extract only column 4
awk '{print $4}' /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes > /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate
#find difference between two consecutive lines in the generated file
#Extract only odd number lines
awk 'NR%2==1' /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate > /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate_only_odd
#Extract only even number lines
awk 'NR%2==0' /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate > /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate_only_even
#Paste the extracted columns side by side
paste -d' ' /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate_only_odd /home/pseema/denovo_analysis/result_files/isolate_diff_unique_genes_only_isolate_only_even > /home/pseema/denovo_analysis/result_files/merged_columns_isolates
#Find difference between two consecutive lines in the generated file
#Extract only odd number lines
awk 'NR%2==1' /home/pseema/denovo_analysis/result_files/next_lines > /home/pseema/denovo_analysis/result_files/only_odd
#Extract only even number lines
awk 'NR%2==0' /home/pseema/denovo_analysis/result_files/next_lines > /home/pseema/denovo_analysis/result_files/only_even
#Paste the extracted columns side by side
paste -d' ' /home/pseema/denovo_analysis/result_files/only_odd /home/pseema/denovo_analysis/result_files/only_even > /home/pseema/denovo_analysis/result_files/merged_columns
#Find difference between two columns of the file
awk 'NF > 0 { print $0 "\t" ($1 - $2) }' /home/pseema/denovo_analysis/result_files/merged_columns > /home/pseema/denovo_analysis/result_files/diff_columns
#Paste the extracted columns side by side
paste -d' ' /home/pseema/denovo_analysis/result_files/merged_columns_isolates /home/pseema/denovo_analysis/result_files/diff_columns > /home/pseema/denovo_analysis/result_files/isolate_gene_diff
#Print content beetween two patterns (all_isolate_gene_profile is in denovo_analysis folder)
awk '/Proteins unique to/ {flag=1;next} /Unique protein search/{flag=0} flag {print}' all_isolate_gene_profile && awk '/Unique protein search for/' all_isolate_gene_profile
------------------------------------------------
#! /usr/bin
#sh core_genes_analysis.sh |& tee core_proteins_in_all_isolates
grep "phage\|Phage" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "ribonuclease\|Ribonuclease" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "antitoxin\|Antitoxin" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "transposase\|Transposase" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "30S ribosomal" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "50S ribosomal" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "Universal stress protein" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "ESX" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "ESX-1" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "Multidrug resistance" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "phospholipase\|Phospholipase" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "chaperonin\|Chaperonin\| chaperone\| Chaperone" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "Serine/threonine" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "efflux\|Efflux\|transporter\|Transporter" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "trehalase\|Trehalase\|trehalose\|Trehalose" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "toxin\|Toxin" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "permease\|Permease" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
grep "methyltransferase\|Methyltransferase" /home/pseema/denovo_analysis/genes_common_to_all_70_isolates
No comments:
Post a Comment