tayabsoomro · January 19, 2020 05:21
diff --git a/ShortListArabidopsisPeptides.sh b/ShortListArabidopsisPeptides.sh
 #!/bin/sh

 #####################################################################
 # SYNOPSIS
 #	The following script was used for the shortlist of
 #	the peptides sent for Arabidopsis-specific array
 #	synthesis
 #
 # 	DAPPLE2 Configruation 
 #		Target Organism 		: {ANY}
 #		PTM Type			: Phosphorylation
 #		Database(s)			: All (379,247 sites)
 #		Search method			: BLAST
 #		Maximum results per PTM site	: 1
 #
 # IMPLEMENTATION
 #	The script was created by Tayab Soomro ([email protected])
 #
 # VERSION HISTORY
 # 	- 18/1/2020 -- Creation of script
 #####################################################################


 if [ "$#" -ne 2 ]
 then
 	echo "USAGE: $0 <INPUT_FILE> <OUTPUT_FILE>"
 	echo "\t- INPUT_FILE -- This file contains the output from DAPPLE2"
 	echo "\t- OUTPUT_FILE -- The file to output the shortlist to"
 fi

 input=$1
 output=$2

 # Printing the header
 echo "Query Accession\t \
 	Query Organism\t \
 	Hit Site\t \
 	HitSequence\t \
 	Hit Protein Rank\t \
 	Hit Protein E-value\t \
 	RBH?\t \
 	High-throughput references\t \
 	Hit keywords" >> $output;

 # Printing the phosphorylation sites and assocaited data.
 awk -F "\t" '{print $1"\t"$3"\t"$6"\t"$9"\t"$14"\t"$15"\t"$16"\t"$18"\t"$22}' \
 	DAPPLE2_UtaqTFoldY.txt | \
 	grep "arabidopsis" | 	# Only Arabidopsis results were retained \
 	grep -v "N/A" | \
 	sort -rnk8 | 		# Sorted with highly referenced peptides on top \
 	grep ";" | \
 	head -n+1200 |		# Select up to 1200 peptides \
 	tail -n+2 | \
 	sort -rk8,8 | \
 	awk -F "\t" '!seen[$9]++' # Remove duplicate peptides \
 	>> $output
	#!/bin/sh

	#####################################################################
	# SYNOPSIS
	# The following script was used for the shortlist of
	# the peptides sent for Arabidopsis-specific array
	# synthesis
	#
	# DAPPLE2 Configruation
	# Target Organism : {ANY}
	# PTM Type : Phosphorylation
	# Database(s) : All (379,247 sites)
	# Search method : BLAST
	# Maximum results per PTM site : 1
	#
	# IMPLEMENTATION
	# The script was created by Tayab Soomro ([email protected])
	#
	# VERSION HISTORY
	# - 18/1/2020 -- Creation of script
	#####################################################################


	if [ "$#" -ne 2 ]
	then
	echo "USAGE: $0 <INPUT_FILE> <OUTPUT_FILE>"
	echo "\t- INPUT_FILE -- This file contains the output from DAPPLE2"
	echo "\t- OUTPUT_FILE -- The file to output the shortlist to"
	fi

	input=$1
	output=$2

	# Printing the header
	echo "Query Accession\t \
	Query Organism\t \
	Hit Site\t \
	HitSequence\t \
	Hit Protein Rank\t \
	Hit Protein E-value\t \
	RBH?\t \
	High-throughput references\t \
	Hit keywords" >> $output;

	# Printing the phosphorylation sites and assocaited data.
	awk -F "\t" '{print $1"\t"$3"\t"$6"\t"$9"\t"$14"\t"$15"\t"$16"\t"$18"\t"$22}' \
	DAPPLE2_UtaqTFoldY.txt \| \
	grep "arabidopsis" \| # Only Arabidopsis results were retained \
	grep -v "N/A" \| \
	sort -rnk8 \| # Sorted with highly referenced peptides on top \
	grep ";" \| \
	head -n+1200 \| # Select up to 1200 peptides \
	tail -n+2 \| \
	sort -rk8,8 \| \
	awk -F "\t" '!seen[$9]++' # Remove duplicate peptides \
	>> $output