Last active
January 15, 2022 19:53
-
-
Save chendaniely/93845857e671da9666ee80bd0eb7002e to your computer and use it in GitHub Desktop.
Pandas assign inplace example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# tl;dr: you can't assign in place because inplace returns None | |
import pandas as pd | |
dat = pd.util.testing.makeMixedDataFrame() | |
print(dat) | |
# A B C D | |
# 0 0.0 0.0 foo1 2009-01-01 | |
# 1 1.0 1.0 foo2 2009-01-02 | |
# 2 2.0 0.0 foo3 2009-01-05 | |
# 3 3.0 1.0 foo4 2009-01-06 | |
# 4 4.0 0.0 foo5 2009-01-07 | |
# individual assignment with [ ] | |
dat1 = dat.copy() | |
dat1["new_col_1"] = dat1["A"] + dat1["B"] ## create new column | |
dat1["new_col_2"] = dat1["new_col_1"]*10 ## use new column | |
dat1 = dat1.loc[dat1["new_col_1"] >= 2] ## filter on existing column | |
print(dat1) | |
# A B C D new_col_1 new_col_2 | |
# 1 1.0 1.0 foo2 2009-01-02 2.0 20.0 | |
# 2 2.0 0.0 foo3 2009-01-05 2.0 20.0 | |
# 3 3.0 1.0 foo4 2009-01-06 4.0 40.0 | |
# 4 4.0 0.0 foo5 2009-01-07 4.0 40.0 | |
# assign back to dataframe works just fine | |
dat2 = dat.copy() | |
dat2 = dat2.assign(a = 3, | |
b = lambda x: x["a"] * 10) ## using new column needs lambda notation | |
print(dat2) | |
# A B C D a b | |
# 0 0.0 0.0 foo1 2009-01-01 3 30 | |
# 1 1.0 1.0 foo2 2009-01-02 3 30 | |
# 2 2.0 0.0 foo3 2009-01-05 3 30 | |
# 3 3.0 1.0 foo4 2009-01-06 3 30 | |
# 4 4.0 0.0 foo5 2009-01-07 3 30 | |
# case for "inplace" | |
dat2 = dat.copy() | |
dat2 = dat2.assign(new_col_1 = lambda x: x["A"] + x["B"], | |
new_col_2 = lambda x: x["new_col_1"]*10 | |
).loc[dat2["new_col_1"] >= 2] | |
dat2 | |
# KeyError: 'new_col_1' ## from the .loc call | |
# you would need to re-write the above as such | |
dat2 = dat.copy() | |
dat2 = dat2.assign(new_col_1 = lambda x: x["A"] + x["B"], | |
new_col_2 = lambda x: x["new_col_1"]*10 | |
) | |
dat2 = dat2.loc[dat2["new_col_1"] >= 2] | |
print(dat2) | |
# A B C D new_col_1 new_col_2 | |
# 1 1.0 1.0 foo2 2009-01-02 2.0 20.0 | |
# 2 2.0 0.0 foo3 2009-01-05 2.0 20.0 | |
# 3 3.0 1.0 foo4 2009-01-06 4.0 40.0 | |
# 4 4.0 0.0 foo5 2009-01-07 4.0 40.0 | |
# what it would look like with an "inplace_" parameter | |
# "inplace_" instead of "inplace" just incase there's a column named "inpalce" | |
# this woudln't work because normally inplace returns None | |
dat2 = dat.copy() | |
dat2 = (dat2 | |
.assign(new_col_1 = lambda x: x["A"] + x["B"], | |
new_col_2 = lambda x: x["new_col_1"]*10, | |
inplace_ = True | |
) | |
.loc[dat2["new_col_1"] >= 2] | |
print(dat2) | |
# would expect the output to be the same as dat1 above: | |
# A B C D new_col_1 new_col_2 | |
# 1 1.0 1.0 foo2 2009-01-02 2.0 20.0 | |
# 2 2.0 0.0 foo3 2009-01-05 2.0 20.0 | |
# 3 3.0 1.0 foo4 2009-01-06 4.0 40.0 | |
# 4 4.0 0.0 foo5 2009-01-07 4.0 40.0 | |
# Other methods that dont require direct reference would work | |
# below i show `.drop()` | |
dat3 = dat.copy() | |
dat3 = (dat3 | |
.assign(new_col_1 = lambda x: x["A"] + x["B"], | |
new_col_2 = lambda x: x["new_col_1"]*10) | |
.drop(columns=["A", "B", "C"]) | |
) | |
print(dat3) | |
# D new_col_1 new_col_2 | |
# 0 2009-01-01 0.0 0.0 | |
# 1 2009-01-02 2.0 20.0 | |
# 2 2009-01-05 2.0 20.0 | |
# 3 2009-01-06 4.0 40.0 | |
# 4 2009-01-07 4.0 40.0 | |
# .loc will stil fail in the example, even if you put inplace within drop call | |
# this is because inplace retuns None | |
# instead you would fix all of this by using query | |
dat3 = dat.copy() | |
dat3 = (dat3 | |
.assign(new_col_1 = lambda x: x["A"] + x["B"], | |
new_col_2 = lambda x: x["new_col_1"]*10) | |
.drop(columns=["A", "B", "C"]) | |
.query('new_col_1 >= 2') | |
) | |
print(dat3) | |
# D new_col_1 new_col_2 | |
# 1 2009-01-02 2.0 20.0 | |
# 2 2009-01-05 2.0 20.0 | |
# 3 2009-01-06 4.0 40.0 | |
# 4 2009-01-07 4.0 40.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment