Created
July 22, 2025 12:21
-
-
Save otobrglez/f821b0287a5483eb3a25f8c5dc3516d0 to your computer and use it in GitHub Desktop.
Simple recommendation system with Scala
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Oto Brglez - July 2025 | |
// Our entity | |
final case class Book(title: String) | |
// Sample data. Books. yey! | |
val books @ List(programmingInScala, jsGoodParts, _*) = List( | |
Book("Programming in Scala"), | |
Book("JavaScript: The Good Parts"), | |
Book("Designing Data-Intensive Applications"), | |
Book("Grokking Functional Programming"), | |
Book("Programming Scala"), | |
Book("Clean Architecture"), | |
Book("Hands-on Scala Programming"), | |
Book("Functional Programming in Scala"), | |
Book("The Clean Coder"), | |
Book("Clean Code") | |
): @unchecked | |
// Generic recommend function that takes item and other items. | |
// Items are sorted by the result of the "distance function". Nothing fancy. | |
def recommend[I]( | |
item: I, | |
items: Seq[I], | |
distanceF: (I, I) => Double | |
): Seq[(I, Double)] = | |
items | |
.filterNot(_ == item) | |
.map(other => other -> distanceF(item, other)) | |
.sortBy(_._2) | |
// Set of unique boot title words, longer than 3 characters. | |
def words(book: Book): Set[String] = | |
"[a-zA-Z]{3,}".r | |
.findAllMatchIn(book.title) | |
.map(_.toString.toLowerCase) | |
.toSet | |
// Compute the Jaccard Index/Distance between word sets. | |
def jaccardDistance(a: Book, b: Book): Double = | |
val intersection = words(a) intersect words(b) | |
val union = words(a) union words(b) | |
1L - intersection.size.toDouble / union.size.toDouble | |
// Euclidian distance with the help of diffs between word sets. | |
def euclideanDistance(a: Book, b: Book): Double = | |
val (words1, words2) = words(a) -> words(b) | |
val symDiffSize = | |
((words1 diff words2) union (words2 diff words1)).size | |
math.sqrt(symDiffSize.toDouble) | |
// Sample usage with two different distance functions | |
@main def main(): Unit = | |
println(s"Books related to $programmingInScala (jaccard distance):") | |
recommend( | |
programmingInScala, | |
books, | |
jaccardDistance | |
).take(5).foreach(println) | |
println(s"\nBooks related to $programmingInScala (euclidean distance):") | |
recommend( | |
programmingInScala, | |
books, | |
euclideanDistance | |
).take(5).foreach(println) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment