Created
July 20, 2016 12:59
-
-
Save janfait/2bbe44cd24f91d0e6503edcbf006aaa6 to your computer and use it in GitHub Desktop.
custom weighted jaccard index for comparison of weighted sets in content+preference matching
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
userPreferences <- c(rep("mobile",6),rep("email",4)) | |
contentTags <- c("email","mobile") | |
similarityIndex <- function(content_tags=NULL,user_preferences=NULL){ | |
#weighted intersect of content_tags and user_preference, returns a sum of weights of tags which are in both sets | |
wi <- sum(sapply(intersect(content_tags,user_preferences),function(i) sum(i==user_preferences))) | |
#weighted union of all items entering the computation (content_tags have a weight==1) | |
wu <- length(user_preferences)+length(setdiff(content_tags,user_preferences)) | |
return(wi/wu) | |
} | |
similarityIndex(contentTags,userPreferences) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment