Last active
August 29, 2015 14:21
-
-
Save kate-crosby/36b011be0c644f92866b to your computer and use it in GitHub Desktop.
get_seeds_inv4.sh
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Run tassel from the get_list.R | |
module load gcc jdk/1.8 tassel/5 | |
run_pipeline.pl -Xmx64g -fork1 -h All_SeeD_2.7_chr4_no_filter.unimputed.hmp.txt -export -exportType VCF -runfork1 | |
sed '11q;d' All_SeeD_2.7_chr4_no_filter.unimputed.vcf > header.vcf | |
#This cuts lines BEFORE header | |
sed '1,10!d' All_SeeD_2.7_chr4_no_filter.unimputed.vcf > first.vcf | |
#This cuts lines below the header | |
sed -e '1,11d' All_SeeD_2.7_chr4_no_filter.unimputed.vcf > last.vcf | |
# transpose the header and awk and transpose again - give it the proper naming | |
cat header.vcf | tr '\t' '\n'| awk -F":" '{print $1}'| tr '\n' '\t' > middle.vcf | |
#Get the list of dups | |
cat middle.vcf | tr '\t' '\n' > testheader.vcf | |
uniq -d testheader.vcf > dups.txt | |
# Middle has no line ending so add one | |
sed -i -e '$a\' middle.vcf | |
# Put it all back together, this is our new vcf file | |
cat first.vcf middle.vcf last.vcf > seeds.vcf | |
# Get the range | |
~/bin/vcftools_0.1.12b/bin/vcftools --vcf seeds.vcf --from-bp 172000000 --to-bp 177000000 --chr 4 --recode --out range_chr4.vcf | |
# Now use the filter list from R and pass to vcftools and pull out the seeds stuff | |
sed -e '/^M/d' temp_keep.txt > gone.txt | |
~/bin/vcftools_0.1.12b/bin/vcftools --vcf range_chr4.vcf.recode.vcf --keep gone.txt --recode --out inds_sites_filtered_chr4_inv.vcf | |
# Get rid of the duplicates defined above | |
~/bin/vcftools_0.1.12b/bin/vcftools --vcf inds_sites_filtered_chr4_inv.vcf.recode.vcf --remove dups.txt --recode --out inds_sites_nodups_chr4_inv.vcf | |
# Convert this to plink | |
~/bin/vcftools_0.1.12b/bin/vcftools --vcf inds_sites_nodups_chr4_inv.vcf.recode.vcf --plink --out inv4_plk | |
# Convert to plink binary, and exclude the SNPs that are not on any chromosome | |
module load plink/1.90 | |
plink --file inv4_plk --make-bed --out inv4_bplk | |
# Get the indIDs from plink file to associate with the metadata and altitude | |
cut -f2 inv4_plk.ped > out.inds.txt | |
# Run flashpca - install in your path | |
flashpca --bfile inv4_bplk --stand center --numthreads 8 --outvec eigenevector_inv4 R --outload SNPload_inv4R --outval eigenvalue_inv4R | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
srun -p bigmemh --mem 36000 --pty bash