Created
July 11, 2024 15:42
-
-
Save Rajdave69/f14e473568f0a71f291f9a37d76f539d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import matplotlib.pyplot as plt | |
import mplcyberpunk | |
from datetime import datetime, timedelta | |
import pytz | |
# Define the function to create a dictionary of dates | |
def create_date_dict(starting_epoch, ending_epoch, timezone='Asia/Qatar'): | |
tz = pytz.timezone(timezone) | |
try: | |
start_date = datetime.fromtimestamp(starting_epoch, tz) | |
end_date = datetime.fromtimestamp(ending_epoch, tz) | |
except (OverflowError, OSError) as e: | |
raise ValueError(f"Invalid epoch time: {e}") | |
date_dict = {} | |
current_date = start_date | |
while current_date <= end_date: | |
date_str = current_date.strftime("%d/%m/%y") | |
date_dict[date_str] = 0 | |
current_date += timedelta(days=1) | |
return date_dict | |
# Define the User class to hold messages | |
class User: | |
def __init__(self, sender): | |
self.sender = sender | |
self.messages = [] | |
# Load messages from JSON files | |
INSTAGRAM_DM_FOLDER = "A:\Insta Packages\instagram-yourfavraj-2024-07-11-tkYpPey9\your_instagram_activity\messages\inbox\user_1234" | |
user_1 = None | |
user_2 = None | |
users = {} | |
for item in os.listdir(INSTAGRAM_DM_FOLDER): | |
print(item) | |
if not item.endswith(".json") or not item.startswith("message_"): | |
continue | |
file_data = json.load(open(os.path.join(INSTAGRAM_DM_FOLDER, item))) | |
# Extract participants and handle special characters in sender_name | |
participants = file_data.get('participants', []) | |
if len(participants) < 2: | |
continue | |
sender_names = [participant.get('name', '') for participant in participants] | |
if not all(sender_names): | |
continue | |
sender_name_1 = sender_names[0] | |
sender_name_2 = sender_names[1] | |
if user_1 is None or user_2 is None: | |
user_1 = User(sender_name_1) | |
user_2 = User(sender_name_2) | |
users[sender_name_1] = user_1 | |
users[sender_name_2] = user_2 | |
for message in file_data['messages']: | |
sender_name = message.get('sender_name', '') | |
if not sender_name: | |
continue | |
sender_name = sender_name.strip() # Clean up sender_name | |
if sender_name not in users: | |
users[sender_name] = User(sender_name) | |
users[sender_name].messages.append({ | |
'sender_name': sender_name, | |
'timestamp': message.get('timestamp_ms', 0) / 1000, | |
'content': message.get('content', '') | |
}) | |
combined_messages = [*user_1.messages, *user_2.messages] | |
combined_messages.sort(key=lambda thing: thing['timestamp']) | |
print("Total number of messages:", len(combined_messages)) | |
print(f"Total number of messages by {sender_name_1}: ", len(user_1.messages)) | |
print(f"Total number of messages by {sender_name_2}: ", len(user_2.messages)) | |
# Create the date dictionary for plotting | |
start_epoch = combined_messages[0]['timestamp'] | |
end_epoch = combined_messages[-1]['timestamp'] | |
date_dict = create_date_dict(start_epoch, end_epoch) | |
# Populate the date dictionary with message counts | |
for message in combined_messages: | |
date_key = datetime.fromtimestamp(message['timestamp']).strftime("%d/%m/%y") | |
date_dict[date_key] += 1 | |
# Calculate message lengths for each user, ignoring messages with None or empty content | |
def calculate_average_length(messages): | |
valid_messages = [msg for msg in messages if msg['content'] is not None and msg['content'].strip()] | |
total_length = sum(len(msg['content']) for msg in valid_messages) | |
total_messages = len(valid_messages) | |
return total_length / total_messages if total_messages > 0 else 0 | |
average_length_user1 = calculate_average_length(user_1.messages) | |
average_length_user2 = calculate_average_length(user_2.messages) | |
net_average_length = calculate_average_length(combined_messages) | |
print(f"Net Average Message Length: {net_average_length:.2f}") | |
print(f"Average Message Length for {user_1.sender}: {average_length_user1:.2f}") | |
print(f"Average Message Length for {user_2.sender}: {average_length_user2:.2f}") | |
# Plot the overall message data | |
plt.style.use("cyberpunk") | |
x_labels = list(date_dict.keys()) | |
y_values = list(date_dict.values()) | |
# Set a fixed height and dynamic width for a landscape orientation | |
fig_width = max(20, len(x_labels) // 4) # Ensure a minimum width | |
fig_height = 10 # Fixed height for landscape orientation | |
plt.figure(figsize=(fig_width, fig_height), dpi=100) | |
plt.plot(x_labels, y_values, marker='o', label="Total Messages") | |
# Format the x-axis and y-axis | |
plt.xticks(rotation=90, fontsize=14, ha='center') # Rotate and set font size for x-axis labels | |
plt.yticks(fontsize=32) # Increase font size for y-axis labels | |
plt.xlabel("Date", fontsize=36) | |
plt.ylabel("Number of Messages", fontsize=36) | |
plt.title("Total Messages Per Day", fontsize=40) | |
plt.grid(visible=True, linestyle='--', alpha=0.7) | |
plt.legend(fontsize=20) | |
# Remove excess empty space around the plot | |
plt.tight_layout() | |
# Add cyberpunk effects | |
mplcyberpunk.make_lines_glow() | |
mplcyberpunk.add_underglow() | |
# Save the overall message plot to a file | |
plt.savefig('./total_messages.png', dpi=100) | |
# Create separate date dictionaries for each user | |
date_dict_user1 = create_date_dict(start_epoch, end_epoch) | |
date_dict_user2 = create_date_dict(start_epoch, end_epoch) | |
# Populate the user-specific date dictionaries | |
for message in combined_messages: | |
date_key = datetime.fromtimestamp(message['timestamp']).strftime("%d/%m/%y") | |
if message['sender_name'] == user_1.sender: | |
date_dict_user1[date_key] += 1 | |
elif message['sender_name'] == user_2.sender: | |
date_dict_user2[date_key] += 1 | |
# Plot the user-specific message data | |
plt.figure(figsize=(fig_width, fig_height), dpi=100) | |
plt.plot(x_labels, list(date_dict_user1.values()), marker='o', label=user_1.sender) | |
plt.plot(x_labels, list(date_dict_user2.values()), marker='s', label=user_2.sender) | |
# Format the x-axis and y-axis | |
plt.xticks(rotation=90, fontsize=14, ha='center') # Rotate and set font size for x-axis labels | |
plt.yticks(fontsize=32) # Increase font size for y-axis labels | |
plt.xlabel("Date", fontsize=36) | |
plt.ylabel("Number of Messages", fontsize=36) | |
plt.title("Messages Per Day by User", fontsize=40) | |
plt.grid(visible=True, linestyle='--', alpha=0.7) | |
plt.legend(fontsize=20) | |
# Remove excess empty space around the plot | |
plt.tight_layout() | |
# Add cyberpunk effects | |
mplcyberpunk.make_lines_glow() | |
mplcyberpunk.add_underglow() | |
# Save the user-specific message plot to a file | |
plt.savefig('./user_specific_messages.png', dpi=100) | |
# Calculate messages per hour | |
hour_dict = {hour: 0 for hour in range(24)} | |
for message in combined_messages: | |
hour = datetime.fromtimestamp(message['timestamp']).hour | |
hour_dict[hour] += 1 | |
# Plot the messages per hour | |
hours = list(hour_dict.keys()) | |
messages_per_hour = list(hour_dict.values()) | |
plt.figure(figsize=(16, 10), dpi=100) | |
plt.bar(hours, messages_per_hour, color='cyan', edgecolor='black') | |
# Format the x-axis and y-axis | |
plt.xticks(hours, fontsize=18) # Set font size for x-axis labels (0-23) | |
plt.yticks(fontsize=32) # Increase font size for y-axis labels | |
plt.xlabel("Hour of Day (0-23)", fontsize=36) | |
plt.ylabel("Number of Messages", fontsize=36) | |
plt.title("Messages Distribution by Hour of Day", fontsize=40) | |
plt.grid(visible=True, linestyle='--', alpha=0.7) | |
# Remove excess empty space around the plot | |
plt.savefig('./hourly_messages.png', dpi=100) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment