- Python MultiIndex artifacts mean that
df.pivot()
is not the exact inverse ofdf.melt()
# ---- "load" wide data ----
# df <- tibble::tribble(~country, ~pop, ~capital,
# "USA", 330, "Washington D.C.",
# "AUS", 25, "Canberra",
# "JPN", 125, "Tokyo" )
df = pd.DataFrame({'country': ['USA', 'AUS', 'JPN'],
'pop': [330, 25, 125],
'capital': ['Washington D.C.', 'Canberra', 'Tokyo']})
print("--- wide data ---\n", df)
# ---- wide to long format ----
# df_long <- df %>%
# tidyr::pivot_longer(names_to = "variable", values_to = "value",
# cols = -c(country))
df_long = df.melt(id_vars=['country'])
print("--- long format ---\n", df_long)
# ---- back to wide ----
# df_long <- df %>%
# pivot_wider(names_from = "variable", values_from = "value")
df_wide = df_long.pivot(index=['country'],
columns='variable', values='value')
print("--- back to wide? ---\n", df_wide)
print("--- actually back to wide! ---")
(df_wide.reset_index() ## put index (country) in a column
.rename_axis(None, axis=1) ## removes column axis label 'variable'
).pipe(print)
Output
--- wide data ---
country pop capital
0 USA 330 Washington D.C.
1 AUS 25 Canberra
2 JPN 125 Tokyo
--- long format ---
country variable value
0 USA pop 330
1 AUS pop 25
2 JPN pop 125
3 USA capital Washington D.C.
4 AUS capital Canberra
5 JPN capital Tokyo
--- back to wide? ---
variable capital pop
country
AUS Canberra 25
JPN Tokyo 125
USA Washington D.C. 330
--- actually back to wide! ---
country capital pop
0 AUS Canberra 25
1 JPN Tokyo 125
2 USA Washington D.C. 330