koorukuroo · March 14, 2025 10:29
diff --git a/montyhall_5door_viz.py b/montyhall_5door_viz.py
 import random
 import pandas as pd
 import plotly.graph_objects as go

 def monty_hall_5doors(trials=1000, switch=True):
    """
    Runs the Monty Hall game with 5 doors for multiple trials.
    Returns a DataFrame with columns: [initial_choice, prize_door, result].
    """
    data = []
    for _ in range(trials):
        prize_door = random.randint(0, 4)
        initial_choice = random.randint(0, 4)
        doors = [0,1,2,3,4]
        
        # Monty opens 3 doors (not the player's choice, not the prize door)
        available_doors = [d for d in doors if d != initial_choice and d != prize_door]
        monty_doors = random.sample(available_doors, 3)
        
        # Switch if needed
        if switch:
            remaining = [d for d in doors if d not in monty_doors and d != initial_choice]
            final_choice = remaining[0]
        else:
            final_choice = initial_choice
        
        result = "Win" if final_choice == prize_door else "Lose"
        
        data.append({
            "initial_choice": initial_choice,
            "prize_door": prize_door,
            "result": result
        })
    return pd.DataFrame(data)

 def create_sankey(df, switch=True):
    """
    Creates a Sankey diagram based on the Monty Hall DataFrame.
    Stages:
      - Initial Choice
      - Prize Door
      - Result (Win/Lose)
    """
    # 1) Convert each column to a label string
    df["initial_choice_label"] = df["initial_choice"].apply(lambda x: f"Initial: Door {x}")
    df["prize_door_label"]   = df["prize_door"].apply(lambda x: f"Prize: Door {x}")
    df["result_label"]       = df["result"].apply(lambda x: f"Result: {x}")

    # 2) Gather all unique labels
    #    (We'll have 5 possible initial choices, 5 possible prize locations, and 2 possible results = total 12 unique labels maximum)
    all_labels = pd.unique(
        df[["initial_choice_label", "prize_door_label", "result_label"]].values.ravel()
    ).tolist()
    label_to_index = {label: i for i, label in enumerate(all_labels)}

    # 3) We want to show flows from:
    #    Initial Choice --> Prize Door --> Result

    # Step A: Initial -> Prize
    grouped_initial_prize = df.groupby(["initial_choice_label", "prize_door_label"]).size().reset_index(name="count")

    # Step B: Prize -> Result
    grouped_prize_result = df.groupby(["prize_door_label", "result_label"]).size().reset_index(name="count")

    source = []
    target = []
    value = []

    # (A) initial -> prize
    for _, row in grouped_initial_prize.iterrows():
        src = label_to_index[row["initial_choice_label"]]
        tgt = label_to_index[row["prize_door_label"]]
        val = row["count"]
        source.append(src)
        target.append(tgt)
        value.append(val)

    # (B) prize -> result
    for _, row in grouped_prize_result.iterrows():
        src = label_to_index[row["prize_door_label"]]
        tgt = label_to_index[row["result_label"]]
        val = row["count"]
        source.append(src)
        target.append(tgt)
        value.append(val)

    # 4) Build the Sankey figure
    fig = go.Figure(data=[go.Sankey(
        node = dict(
            pad = 15,
            thickness = 20,
            line = dict(color = "black", width = 0.5),
            label = all_labels
        ),
        link = dict(
            source = source,
            target = target,
            value = value
        )
    )])

    title_text = "Monty Hall (5 Doors) - Sankey Diagram | SWITCH ON" if switch else "Monty Hall (5 Doors) - Sankey Diagram | SWITCH OFF"
    fig.update_layout(title_text=title_text, font_size=12, width=1000, height=600)
    return fig

 # 1) SWITCH = ON
 df_switch_on = monty_hall_5doors(trials=2000, switch=True)
 fig_switch_on = create_sankey(df_switch_on, switch=True)
 fig_switch_on.show()

 # 2) SWITCH = OFF
 df_switch_off = monty_hall_5doors(trials=2000, switch=False)
 fig_switch_off = create_sankey(df_switch_off, switch=False)
 fig_switch_off.show()
	import random
	import pandas as pd
	import plotly.graph_objects as go

	def monty_hall_5doors(trials=1000, switch=True):
	"""
	Runs the Monty Hall game with 5 doors for multiple trials.
	Returns a DataFrame with columns: [initial_choice, prize_door, result].
	"""
	data = []
	for _ in range(trials):
	prize_door = random.randint(0, 4)
	initial_choice = random.randint(0, 4)
	doors = [0,1,2,3,4]

	# Monty opens 3 doors (not the player's choice, not the prize door)
	available_doors = [d for d in doors if d != initial_choice and d != prize_door]
	monty_doors = random.sample(available_doors, 3)

	# Switch if needed
	if switch:
	remaining = [d for d in doors if d not in monty_doors and d != initial_choice]
	final_choice = remaining[0]
	else:
	final_choice = initial_choice

	result = "Win" if final_choice == prize_door else "Lose"

	data.append({
	"initial_choice": initial_choice,
	"prize_door": prize_door,
	"result": result
	})
	return pd.DataFrame(data)

	def create_sankey(df, switch=True):
	"""
	Creates a Sankey diagram based on the Monty Hall DataFrame.
	Stages:
	- Initial Choice
	- Prize Door
	- Result (Win/Lose)
	"""
	# 1) Convert each column to a label string
	df["initial_choice_label"] = df["initial_choice"].apply(lambda x: f"Initial: Door {x}")
	df["prize_door_label"] = df["prize_door"].apply(lambda x: f"Prize: Door {x}")
	df["result_label"] = df["result"].apply(lambda x: f"Result: {x}")

	# 2) Gather all unique labels
	# (We'll have 5 possible initial choices, 5 possible prize locations, and 2 possible results = total 12 unique labels maximum)
	all_labels = pd.unique(
	df[["initial_choice_label", "prize_door_label", "result_label"]].values.ravel()
	).tolist()
	label_to_index = {label: i for i, label in enumerate(all_labels)}

	# 3) We want to show flows from:
	# Initial Choice --> Prize Door --> Result

	# Step A: Initial -> Prize
	grouped_initial_prize = df.groupby(["initial_choice_label", "prize_door_label"]).size().reset_index(name="count")

	# Step B: Prize -> Result
	grouped_prize_result = df.groupby(["prize_door_label", "result_label"]).size().reset_index(name="count")

	source = []
	target = []
	value = []

	# (A) initial -> prize
	for _, row in grouped_initial_prize.iterrows():
	src = label_to_index[row["initial_choice_label"]]
	tgt = label_to_index[row["prize_door_label"]]
	val = row["count"]
	source.append(src)
	target.append(tgt)
	value.append(val)

	# (B) prize -> result
	for _, row in grouped_prize_result.iterrows():
	src = label_to_index[row["prize_door_label"]]
	tgt = label_to_index[row["result_label"]]
	val = row["count"]
	source.append(src)
	target.append(tgt)
	value.append(val)

	# 4) Build the Sankey figure
	fig = go.Figure(data=[go.Sankey(
	node = dict(
	pad = 15,
	thickness = 20,
	line = dict(color = "black", width = 0.5),
	label = all_labels
	),
	link = dict(
	source = source,
	target = target,
	value = value
	)
	)])

	title_text = "Monty Hall (5 Doors) - Sankey Diagram \| SWITCH ON" if switch else "Monty Hall (5 Doors) - Sankey Diagram \| SWITCH OFF"
	fig.update_layout(title_text=title_text, font_size=12, width=1000, height=600)
	return fig

	# 1) SWITCH = ON
	df_switch_on = monty_hall_5doors(trials=2000, switch=True)
	fig_switch_on = create_sankey(df_switch_on, switch=True)
	fig_switch_on.show()

	# 2) SWITCH = OFF
	df_switch_off = monty_hall_5doors(trials=2000, switch=False)
	fig_switch_off = create_sankey(df_switch_off, switch=False)
	fig_switch_off.show()