Created
March 14, 2025 10:31
-
-
Save koorukuroo/8673bed264c44c4672fb046daf929107 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import pandas as pd | |
import plotly.graph_objects as go | |
def monty_hall_5doors_scenario(): | |
"""Generate a single scenario (car location, player choice, etc.) for 5-door Monty Hall.""" | |
doors = [0,1,2,3,4] | |
prize_door = random.randint(0, 4) | |
initial_choice = random.randint(0, 4) | |
# Monty opens 3 doors that are neither player's choice nor prize door | |
available_doors = [d for d in doors if d != initial_choice and d != prize_door] | |
monty_doors = random.sample(available_doors, 3) | |
# The remaining door (other than initial_choice) is the 'switch_to' door | |
remaining = [d for d in doors if d not in monty_doors and d != initial_choice] | |
switch_to = remaining[0] # there's exactly one left | |
return prize_door, initial_choice, switch_to | |
def generate_sankey_data_5doors(trials=1000): | |
""" | |
For each random scenario, record two rows: | |
1) did_switch = False -> final_choice = initial_choice | |
2) did_switch = True -> final_choice = switch_to | |
Then we see: | |
- initial_correct: was initial_choice == prize_door? | |
- did_switch | |
- result: Win/Lose | |
""" | |
data = [] | |
for _ in range(trials): | |
prize_door, initial_choice, switch_to = monty_hall_5doors_scenario() | |
# Case 1: Stay | |
final_choice_stay = initial_choice | |
stay_win = (final_choice_stay == prize_door) | |
data.append({ | |
"initial_correct": (initial_choice == prize_door), # True/False | |
"did_switch": False, | |
"win": stay_win | |
}) | |
# Case 2: Switch | |
final_choice_switch = switch_to | |
switch_win = (final_choice_switch == prize_door) | |
data.append({ | |
"initial_correct": (initial_choice == prize_door), | |
"did_switch": True, | |
"win": switch_win | |
}) | |
return pd.DataFrame(data) | |
def create_sankey_insight(df): | |
""" | |
Create a Sankey Diagram from three categories: | |
1) initial_correct -> "Initial: Correct" / "Initial: Wrong" | |
2) did_switch -> "Strategy: Switch" / "Strategy: Stay" | |
3) win -> "Result: Win" / "Result: Lose" | |
""" | |
# 1) Convert to string labels | |
df["initial_correct_label"] = df["initial_correct"].apply(lambda x: "Initial: Correct" if x else "Initial: Wrong") | |
df["strategy_label"] = df["did_switch"].apply(lambda x: "Strategy: Switch" if x else "Strategy: Stay") | |
df["result_label"] = df["win"].apply(lambda x: "Result: Win" if x else "Result: Lose") | |
# 2) Gather all labels for each stage | |
# We'll have 2 possible labels for initial_correct, 2 for strategy, 2 for result = up to 6 total. | |
all_labels = pd.unique( | |
df[["initial_correct_label", "strategy_label", "result_label"]].values.ravel() | |
).tolist() | |
label_to_index = {label: i for i, label in enumerate(all_labels)} | |
# 3) Group by (A -> B) and (B -> C) | |
# A = initial_correct_label | |
# B = strategy_label | |
# C = result_label | |
# Step A -> B | |
group_ab = df.groupby(["initial_correct_label", "strategy_label"]).size().reset_index(name="countAB") | |
# Step B -> C | |
group_bc = df.groupby(["strategy_label", "result_label"]).size().reset_index(name="countBC") | |
source = [] | |
target = [] | |
value = [] | |
# (A) initial_correct -> strategy | |
for _, row in group_ab.iterrows(): | |
src = label_to_index[row["initial_correct_label"]] | |
tgt = label_to_index[row["strategy_label"]] | |
val = row["countAB"] | |
source.append(src) | |
target.append(tgt) | |
value.append(val) | |
# (B) strategy -> result | |
for _, row in group_bc.iterrows(): | |
src = label_to_index[row["strategy_label"]] | |
tgt = label_to_index[row["result_label"]] | |
val = row["countBC"] | |
source.append(src) | |
target.append(tgt) | |
value.append(val) | |
fig = go.Figure(data=[go.Sankey( | |
node = dict( | |
pad = 15, | |
thickness = 20, | |
line = dict(color = "black", width = 0.5), | |
label = all_labels | |
), | |
link = dict( | |
source = source, | |
target = target, | |
value = value | |
) | |
)]) | |
fig.update_layout( | |
title_text = "Why Switching Matters: 5-Door Monty Hall", | |
font_size = 12, | |
width = 1000, | |
height = 600 | |
) | |
return fig | |
# 1) Generate data | |
df_sankey = generate_sankey_data_5doors(trials=2000) | |
# 2) Create Sankey Diagram | |
fig_insight = create_sankey_insight(df_sankey) | |
fig_insight.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment