koorukuroo · March 14, 2025 10:31
diff --git a/montyhall_5door_sankey.py b/montyhall_5door_sankey.py
 import random
 import pandas as pd
 import plotly.graph_objects as go

 def monty_hall_5doors_scenario():
    """Generate a single scenario (car location, player choice, etc.) for 5-door Monty Hall."""
    doors = [0,1,2,3,4]
    prize_door = random.randint(0, 4)
    initial_choice = random.randint(0, 4)
    
    # Monty opens 3 doors that are neither player's choice nor prize door
    available_doors = [d for d in doors if d != initial_choice and d != prize_door]
    monty_doors = random.sample(available_doors, 3)
    
    # The remaining door (other than initial_choice) is the 'switch_to' door
    remaining = [d for d in doors if d not in monty_doors and d != initial_choice]
    switch_to = remaining[0]  # there's exactly one left
    
    return prize_door, initial_choice, switch_to

 def generate_sankey_data_5doors(trials=1000):
    """
    For each random scenario, record two rows:
      1) did_switch = False  -> final_choice = initial_choice
      2) did_switch = True   -> final_choice = switch_to
    Then we see:
      - initial_correct: was initial_choice == prize_door?
      - did_switch
      - result: Win/Lose
    """
    data = []
    for _ in range(trials):
        prize_door, initial_choice, switch_to = monty_hall_5doors_scenario()
        
        # Case 1: Stay
        final_choice_stay = initial_choice
        stay_win = (final_choice_stay == prize_door)
        data.append({
            "initial_correct": (initial_choice == prize_door),  # True/False
            "did_switch": False,
            "win": stay_win
        })
        
        # Case 2: Switch
        final_choice_switch = switch_to
        switch_win = (final_choice_switch == prize_door)
        data.append({
            "initial_correct": (initial_choice == prize_door),
            "did_switch": True,
            "win": switch_win
        })
    return pd.DataFrame(data)

 def create_sankey_insight(df):
    """
    Create a Sankey Diagram from three categories:
      1) initial_correct -> "Initial: Correct" / "Initial: Wrong"
      2) did_switch     -> "Strategy: Switch" / "Strategy: Stay"
      3) win            -> "Result: Win" / "Result: Lose"
    """
    # 1) Convert to string labels
    df["initial_correct_label"] = df["initial_correct"].apply(lambda x: "Initial: Correct" if x else "Initial: Wrong")
    df["strategy_label"] = df["did_switch"].apply(lambda x: "Strategy: Switch" if x else "Strategy: Stay")
    df["result_label"]   = df["win"].apply(lambda x: "Result: Win" if x else "Result: Lose")

    # 2) Gather all labels for each stage
    #    We'll have 2 possible labels for initial_correct, 2 for strategy, 2 for result = up to 6 total.
    all_labels = pd.unique(
        df[["initial_correct_label", "strategy_label", "result_label"]].values.ravel()
    ).tolist()
    label_to_index = {label: i for i, label in enumerate(all_labels)}

    # 3) Group by (A -> B) and (B -> C)
    # A = initial_correct_label
    # B = strategy_label
    # C = result_label
    
    # Step A -> B
    group_ab = df.groupby(["initial_correct_label", "strategy_label"]).size().reset_index(name="countAB")
    # Step B -> C
    group_bc = df.groupby(["strategy_label", "result_label"]).size().reset_index(name="countBC")

    source = []
    target = []
    value = []

    # (A) initial_correct -> strategy
    for _, row in group_ab.iterrows():
        src = label_to_index[row["initial_correct_label"]]
        tgt = label_to_index[row["strategy_label"]]
        val = row["countAB"]
        source.append(src)
        target.append(tgt)
        value.append(val)

    # (B) strategy -> result
    for _, row in group_bc.iterrows():
        src = label_to_index[row["strategy_label"]]
        tgt = label_to_index[row["result_label"]]
        val = row["countBC"]
        source.append(src)
        target.append(tgt)
        value.append(val)

    fig = go.Figure(data=[go.Sankey(
        node = dict(
            pad = 15,
            thickness = 20,
            line = dict(color = "black", width = 0.5),
            label = all_labels
        ),
        link = dict(
            source = source,
            target = target,
            value = value
        )
    )])

    fig.update_layout(
        title_text = "Why Switching Matters: 5-Door Monty Hall",
        font_size = 12,
        width = 1000,
        height = 600
    )
    return fig

 # 1) Generate data
 df_sankey = generate_sankey_data_5doors(trials=2000)

 # 2) Create Sankey Diagram
 fig_insight = create_sankey_insight(df_sankey)
 fig_insight.show()
	import random
	import pandas as pd
	import plotly.graph_objects as go

	def monty_hall_5doors_scenario():
	"""Generate a single scenario (car location, player choice, etc.) for 5-door Monty Hall."""
	doors = [0,1,2,3,4]
	prize_door = random.randint(0, 4)
	initial_choice = random.randint(0, 4)

	# Monty opens 3 doors that are neither player's choice nor prize door
	available_doors = [d for d in doors if d != initial_choice and d != prize_door]
	monty_doors = random.sample(available_doors, 3)

	# The remaining door (other than initial_choice) is the 'switch_to' door
	remaining = [d for d in doors if d not in monty_doors and d != initial_choice]
	switch_to = remaining[0] # there's exactly one left

	return prize_door, initial_choice, switch_to

	def generate_sankey_data_5doors(trials=1000):
	"""
	For each random scenario, record two rows:
	1) did_switch = False -> final_choice = initial_choice
	2) did_switch = True -> final_choice = switch_to
	Then we see:
	- initial_correct: was initial_choice == prize_door?
	- did_switch
	- result: Win/Lose
	"""
	data = []
	for _ in range(trials):
	prize_door, initial_choice, switch_to = monty_hall_5doors_scenario()

	# Case 1: Stay
	final_choice_stay = initial_choice
	stay_win = (final_choice_stay == prize_door)
	data.append({
	"initial_correct": (initial_choice == prize_door), # True/False
	"did_switch": False,
	"win": stay_win
	})

	# Case 2: Switch
	final_choice_switch = switch_to
	switch_win = (final_choice_switch == prize_door)
	data.append({
	"initial_correct": (initial_choice == prize_door),
	"did_switch": True,
	"win": switch_win
	})
	return pd.DataFrame(data)

	def create_sankey_insight(df):
	"""
	Create a Sankey Diagram from three categories:
	1) initial_correct -> "Initial: Correct" / "Initial: Wrong"
	2) did_switch -> "Strategy: Switch" / "Strategy: Stay"
	3) win -> "Result: Win" / "Result: Lose"
	"""
	# 1) Convert to string labels
	df["initial_correct_label"] = df["initial_correct"].apply(lambda x: "Initial: Correct" if x else "Initial: Wrong")
	df["strategy_label"] = df["did_switch"].apply(lambda x: "Strategy: Switch" if x else "Strategy: Stay")
	df["result_label"] = df["win"].apply(lambda x: "Result: Win" if x else "Result: Lose")

	# 2) Gather all labels for each stage
	# We'll have 2 possible labels for initial_correct, 2 for strategy, 2 for result = up to 6 total.
	all_labels = pd.unique(
	df[["initial_correct_label", "strategy_label", "result_label"]].values.ravel()
	).tolist()
	label_to_index = {label: i for i, label in enumerate(all_labels)}

	# 3) Group by (A -> B) and (B -> C)
	# A = initial_correct_label
	# B = strategy_label
	# C = result_label

	# Step A -> B
	group_ab = df.groupby(["initial_correct_label", "strategy_label"]).size().reset_index(name="countAB")
	# Step B -> C
	group_bc = df.groupby(["strategy_label", "result_label"]).size().reset_index(name="countBC")

	source = []
	target = []
	value = []

	# (A) initial_correct -> strategy
	for _, row in group_ab.iterrows():
	src = label_to_index[row["initial_correct_label"]]
	tgt = label_to_index[row["strategy_label"]]
	val = row["countAB"]
	source.append(src)
	target.append(tgt)
	value.append(val)

	# (B) strategy -> result
	for _, row in group_bc.iterrows():
	src = label_to_index[row["strategy_label"]]
	tgt = label_to_index[row["result_label"]]
	val = row["countBC"]
	source.append(src)
	target.append(tgt)
	value.append(val)

	fig = go.Figure(data=[go.Sankey(
	node = dict(
	pad = 15,
	thickness = 20,
	line = dict(color = "black", width = 0.5),
	label = all_labels
	),
	link = dict(
	source = source,
	target = target,
	value = value
	)
	)])

	fig.update_layout(
	title_text = "Why Switching Matters: 5-Door Monty Hall",
	font_size = 12,
	width = 1000,
	height = 600
	)
	return fig

	# 1) Generate data
	df_sankey = generate_sankey_data_5doors(trials=2000)

	# 2) Create Sankey Diagram
	fig_insight = create_sankey_insight(df_sankey)
	fig_insight.show()