Last active
January 19, 2020 05:21
-
-
Save tayabsoomro/4be83b6d36a5ab9be116ec7ad611d489 to your computer and use it in GitHub Desktop.
Shell Script for creating a shortlist of phosphopeptides of Arabidopsis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
##################################################################### | |
# SYNOPSIS | |
# The following script was used for the shortlist of | |
# the peptides sent for Arabidopsis-specific array | |
# synthesis | |
# | |
# DAPPLE2 Configruation | |
# Target Organism : {ANY} | |
# PTM Type : Phosphorylation | |
# Database(s) : All (379,247 sites) | |
# Search method : BLAST | |
# Maximum results per PTM site : 1 | |
# | |
# IMPLEMENTATION | |
# The script was created by Tayab Soomro ([email protected]) | |
# | |
# VERSION HISTORY | |
# - 18/1/2020 -- Creation of script | |
##################################################################### | |
if [ "$#" -ne 2 ] | |
then | |
echo "USAGE: $0 <INPUT_FILE> <OUTPUT_FILE>" | |
echo "\t- INPUT_FILE -- This file contains the output from DAPPLE2" | |
echo "\t- OUTPUT_FILE -- The file to output the shortlist to" | |
fi | |
input=$1 | |
output=$2 | |
# Printing the header | |
echo "Query Accession\t \ | |
Query Organism\t \ | |
Hit Site\t \ | |
HitSequence\t \ | |
Hit Protein Rank\t \ | |
Hit Protein E-value\t \ | |
RBH?\t \ | |
High-throughput references\t \ | |
Hit keywords" >> $output; | |
# Printing the phosphorylation sites and assocaited data. | |
awk -F "\t" '{print $1"\t"$3"\t"$6"\t"$9"\t"$14"\t"$15"\t"$16"\t"$18"\t"$22}' \ | |
DAPPLE2_UtaqTFoldY.txt | \ | |
grep "arabidopsis" | # Only Arabidopsis results were retained \ | |
grep -v "N/A" | \ | |
sort -rnk8 | # Sorted with highly referenced peptides on top \ | |
grep ";" | \ | |
head -n+1200 | # Select up to 1200 peptides \ | |
tail -n+2 | \ | |
sort -rk8,8 | \ | |
awk -F "\t" '!seen[$9]++' # Remove duplicate peptides \ | |
>> $output |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment