Skip to content

Instantly share code, notes, and snippets.

@thangarajan8
Created September 20, 2024 08:03
Show Gist options
  • Save thangarajan8/351ae5e85cc1f23318c9696d7213a976 to your computer and use it in GitHub Desktop.
Save thangarajan8/351ae5e85cc1f23318c9696d7213a976 to your computer and use it in GitHub Desktop.
date_time_diff
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, udf, expr, hour,format_number
from pyspark.sql.types import LongType
import pandas as pd
# Initialize Spark session
spark = SparkSession.builder.appName("DateDifference").getOrCreate()
def exclude_weekends_and_jan1(start_date, end_date):
# Create a date range
date_range = pd.date_range(start=start_date, end=end_date, freq='B')
# Exclude January 1st
date_range = date_range[date_range != pd.Timestamp(f"{start_date.year}-01-01")]
return len(date_range) # Return the count of business days
# Register UDF
exclude_udf = udf(exclude_weekends_and_jan1, LongType())
def calculate_difference(df, timestamp_col, date_col):
# Convert date column to timestamp
df = df.withColumn(date_col, col(date_col).cast("timestamp"))
# Add a new column for the difference in business days
df_with_diff = df.withColumn("business_days_diff", exclude_udf(col(date_col), col(timestamp_col)))
df_with_diff =df_with_diff.withColumn("hdiff",
hour(col("timestamp"))- hour(col("date")))
df_with_diff = df_with_diff.withColumn("h1", (col("business_days_diff") + col("hdiff") / 100).cast("float"))
return df_with_diff.withColumn("formatted_result", format_number(col("h1"), 2))
# Example usage
data = [(1, '2024-09-20 10:00:00', '2024-09-15'),
(2, '2024-09-22 15:30:00', '2024-01-18')]
columns = ["id", "timestamp", "date"]
df = spark.createDataFrame(data, columns)
# Convert timestamp column to appropriate type
df = df.withColumn("timestamp", col("timestamp").cast("timestamp"))
# Calculate differences
result_df = calculate_difference(df, "timestamp", "date")
result_df.show(truncate=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment