Created
May 30, 2019 16:14
-
-
Save afranzi/294f42ccd95baff0e7f9266d73ade484 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install quinn>=0.3.1 | |
from quinn.extensions.dataframe_ext import DataFrame | |
def with_idx(id_col: str, output_col: str) -> Callable[[DataFrame], DataFrame]: | |
def inner(df: DataFrame) -> DataFrame: | |
window = Window.orderBy(id_col) | |
unique_activity_ids = df \ | |
.select(id_col).distinct() \ | |
.withColumn(output_col, F.row_number().over(window)) | |
return df.join(unique_activity_ids, on=[id_col]) | |
return inner | |
def compute_int_users_and_activities(df: DataFrame) -> DataFrame: | |
return df \ | |
.transform(with_idx('activityId', 'activityIdx')) \ | |
.transform(with_idx('userId', 'userIdx')) | |
def compute_ratings_matrix(user_events_df: DataFrame, rec_features_df: DataFrame, factors: List[dict]) -> DataFrame: | |
return rec_features_df \ | |
.transform(compute_rating_factors(user_events_df, factors)) \ | |
.transform(compute_relevance_scores) \ | |
.transform(compute_int_users_and_activities) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment