Created
November 29, 2015 10:14
Revisions
-
ashenkin created this gist
Nov 29, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,40 @@ query_higher_taxa_classes <- function(species_list, known = "genus", order = c("dataframe", "unique_sp")) { # Pass in a character vector of species, genera, families, or whatever (the search is flexible) # Returns a dataframe with the columns: query, db, family, order, subdivision # The dataframe returned is guaranteed to be in the same order as the species list passed in if order is "dataframe" order = match.arg(order) library(taxize) library(plyr) species_list = sub("^([^ ]*).*$","\\1",species_list) # just take the top level name before the space # remove short names that clog the taxon query - replace later which_short = which(nchar(species_list) < 3) short_names = species_list[which_short] species_list[which_short] = "TEMPTAXON" # agglomerate to be more efficient species_list_uniq = unique(species_list) if (known == "genus") { highertaxa <- tax_name(species_list_uniq,get=c("family", "order", "subdivision"), db = "both") } else if (known == "family") { highertaxa <- tax_name(species_list_uniq,get=c("order", "subdivision"), db = "both") } highertaxa_ncbi = subset(highertaxa, db == "ncbi") highertaxa_itis = subset(highertaxa, db == "itis") merdat <- merge(highertaxa_ncbi, highertaxa_ncbi, by="query") # seems self-documenting merdat$family.y[ is.na(merdat$family.y) ] <- merdat$family.x[ is.na(merdat$family.y) ] merdat$order.y[ is.na(merdat$order.y) ] <- merdat$order.x[ is.na(merdat$order.y) ] merdat$subdivision.y[ is.na(merdat$subdivision.y) ] <- merdat$subdivision.x[ is.na(merdat$subdivision.y) ] merdat = merdat[,c(1,grep("\\.x",names(merdat)))] names(merdat) = sub("\\.x","",names(merdat)) highertaxa = merdat species_list[which_short] = short_names # replace short names removed earlier ret_df = join(data.frame(query = species_list), highertaxa, by = "query", type = "left") # NA's will be filled in for short names that don't have a match in highertaxa now if (order == "dataframe") { return(ret_df) } else if (order == "unique_sp") { return(ret_df[-duplicated(ret_df$query),]) } }