From 6a5bd899833ee72678e6c61b9e22ba1b24c19878 Mon Sep 17 00:00:00 2001 From: TomAugspurger Date: Mon, 12 Jan 2015 21:13:15 -0600 Subject: [PATCH] ENH: Add assign method to DataFrame Creates a new method for DataFrame, based off dplyr's mutate. Closes https://github.com/pydata/pandas/issues/9229 --- doc/source/_static/whatsnew_assign.png | Bin 0 -> 22935 bytes doc/source/basics.rst | 2 + doc/source/dsintro.rst | 76 +++++++++++++++++++++++ doc/source/whatsnew/v0.16.0.txt | 41 +++++++++++++ pandas/core/frame.py | 82 +++++++++++++++++++++++++ pandas/tests/test_frame.py | 54 ++++++++++++++++ 6 files changed, 255 insertions(+) create mode 100644 doc/source/_static/whatsnew_assign.png diff --git a/doc/source/_static/whatsnew_assign.png b/doc/source/_static/whatsnew_assign.png new file mode 100644 index 0000000000000000000000000000000000000000..0e39e161dc6069400aaf01995133fcd50b925e86 GIT binary patch literal 22935 zcmeFZ2UJv9w=H}?5k--<6>W))f=EUI$w3Sxp~y&9kt`HZVgUxUMF|ol=bR-=Hj#6d ztdg^Yq9ng{Fm!+2-@SL-`@esT_r`dPfml_i&e>=0wbzT-OuHU~8-f68pu?&Cq+Dgi(?uQ@O{kQ*s*9WYmHEj`u;wJh_lp>yNiXguu zGM6u^I!4S6;;bUaaLk(vZBO64Fnso!xHpSC;jmLI898Hu`LF(m<>}8nnPRt?eK>0r zHsAcPaD;K5mkZ+%&^pJf`>S-1#j3?*30^beY3TTICr*(cQ!m}SmCN_s;~TBHEc-Q8SGYnl}rl5WjU5LKxh_TXzbF^kCfvo=M@zdbt04w zM@B_O4d4iaTx$i!(YV!TJL*|F#b5ToTE&==6?A@mdys4N33F5}$q60qk;Q{tW!%1E z>yvVBTN}eW+xZ0rV-dE^bufbNvM~;@--&Nt+HP=P<1#fht)tY?$%!#Quj&k6PR=hZY@S+f5m|1G za2gNo*l}BH(2v?fdU%w2`0FD-FfBzXvI1uNwWTo`w~9ESu`i$AY3AIjipa8`FP<>E zeLKO-02b@UCrUFW`#OIn&CKMY`w^h`BE;7*xKzv%fT?l;)y7irRePr_xc*~?ZvWb zwQN~`*LmCMhF;_7$e!|fd(GV2GV|qxA=d?$;f7=}e5$7Va86FnGanz$THCmDK@Ens zj|6$JDGz@RV6W%FR1IBwwZkyO;n)g6QVP1pz5?^*=f19ta5YQ+cyWO-8>i}?&33LW z<%(^5_5{q@RXxW~o;p@HI6Ah0Gk>)w!0V-H~0VF@WSu5NaC|()fsRyyyb;j6iCdA ztIV<|X-+n*4qlS+Z^8{uO_|8K$Q4(g-h-GZNV2we?vtv{HLQuccBNf+&%542Yn-#N zwz&`|;$f6HaKc1ugvD@UK9;7`NK2u;(nWfT3z6{tITgH_-GO<<*)G*#rYvUlMcXp=wNOK~Z2we{WbOwK{X%C|Q~Z8KTH0 z$tn?7b^?j{eO879W}f?@D$G3DCF%b^U!)oRoGZneJ~L4yX4_jK@VvnJ7jvbL{TahI zN;*2)UaY>b@s`?$NR%H@zUM>@6ZwMH~He|zwftUD1B!t=vz-g7dQ7le}ZkWzJnyIB451=oSVz#-+Jbd zqQ$`TVk5s$!ML7P%ScrHFoC*w;|mx%(T<_Gd%q$`5ZsX$xz97h1|QK5QQY6}NFVOV z>sfLjOokDSkjjtMM<|af6OF%d(?L9@e}xm|Cq;l&@qE_&W&lZxgajXjz+6JBqvb#` z7ytY~aQWkE9wCMdVx%!xG*zO?S*S19Vq|>%fW_S%{Pm=vE5O!%Qx@A&8XJjjhj_+G3)R^ql}6XIdk zE^y%Np_+nD`s|a4qJ<D7yMl_~QJLQ3^Rmqv=MCn^SB+3K&- zQMF7yd`X6U2p~K5Y(#4s!;~G0%RpY&?T0hdxai28w~u(@p>^99*Q8M3YY+CK-b55r zhmQ9EE562jAENp|G!wcVfUo zATc5S=1xwQ+cn6nJ5aD}ksxGOe(Z9OGp%7Z7YOz(*LgUYdOLgOGL(@R#oY`38)tI% z7Y520nf!k^lVeZ)$Xag)5{Qv2Mi|!Dr(jc_mWs;8bAv-^l4X5OOdWaF(CRjOKQrOJ zWz@jIzM)%Jjv^ncG7^Z{U%q*suhN8R3AK#lWSl z=x3vQu(M4(E#70H6V*%i_*cg_#81|JeYOYOu5{oAF?))#zrC*9^3Krq$kg2{1W~aF z@$zZIQ>@7A1J_Y&8)MXW`jX7EKJ_ACbMPtM9^~U$1~9T9g_pOT<7yBD5Z7^4^ZMR_ z9?^KX!`mwQMa!~8h{EeA)tHw_R0q3ULT08+8VbEep!HYL@aHc_5vIf&d~0CV`n z+gBX-r{B;lyrUWAewUFLuKon<;S;(a&@F$?vgfr`p@mNLP;+;fk9mcbibBm)viu`( zy$T6_F3Q;Au@^cHgZ9O%edf!bDBJ}~9TuYSJz`rc9ZowN!_gS`^^SuAk~d<%ISHSc zNBxy9wZ(A=A|GSLfcFueubnFzjv8Kz9Ii7KTMpH6U92kp^oaUAFYk)K{>E%x%eYGP zx@vWGwQ{@AQSkcn$2b#)oF*cR*Qe8MrxJscl9QPi+z6!|9UTFs3l$rI`urCzyr9OG zojajxr=+*h*OR1EIDC-xT$HlB2D|+`62!#PlPs01?2BZSDbgUF>i*kWxYCeLf>`qS1U1@4+U*3=g zFw)Y}YL+|WpY6Qh)D7!U5avkN$X3wSj*#4g!hmf+qh)1kwG6jI7$pRey@|QrAu|4e3)u*U3W4uX+$$Txullwu z_SbQ8+pc3{6%rPJraBZV5XA4;%YhTub$)oh`B+9N>a%#e{KWDr&#UfSBXN-$LlD)+ zeq{2N;9Rut&W8G2FWvVj4-Iv9o43n88!-;-LD&zCRkXgZR;dR##?y^_cY?SBDxcgY zUR&Kge8?DfCcw3H{7VFxj`AbxEUy~Eu%X|iVW14V{>>+xRu4iEM3#x~*saG^|IUgZ zUPe5e!Ts?TNBI z@c3LM|BN67H<&0xg3T6MBFxHo!?V?F4kE3N4^?jrnRU6!)NvpZBPf_kw_j!&D}=4} z4!oKf>tM7~+PK_Pr+G*M3z!p#nfapp{q`fT;{auhJo`70BFE}x!P!V>A|yrBMfnp` zV`BfE_>h+ER@te}gNVebgtszk?G}YcBFx+;yUEi}uxDWS^w&Q=;*;8me6l|UcBYiH zXFy7o*)5)pwQ%AGGGP<)kmnlZGE*wVgqAjS!}mJFiG@iY<=Ng4fCa7lSo`5VP8>nJ zWTX>Kr*vFwR5mVg={sEIgkSKZ;>F^2H+h%CA$4d>7zj?{2`^7lNq+)Hko-V}$38r= zkG=#2HY)P*R69<45JeYBR(az>7kT?|-7=r*rtKuayyDCW?W*JL#f817*}~7#YV%dj_{oy5G>)NQsoq?y1F+@%CF^ZCZ6DLQ$D1YrU+m z#$5c}ahLL|uC!ru0`W#J6^MlW)Vy4W=4^Ap74^BSl7qd84!j&EGvDcJHgy-j>W$(; z6md;!!GiLc&l_Z^+nz7JfKV~M(=Qsr=wz(#0Km=GdFC>ThF?pjR2m*g@$U|#&ds;u zOsbFe)g2DQtl1W12Qxl69lzG9v3k2_1Zv-k<_ zU0~sv>*NxjDF&C3q`pCSLN`xzxT^JRCHA_`u7e3lUtL|z&?-nv>rtIQD4*3g)>5D9 z>o6<&BMf@GVj78%8)6WqWzzJwb-ZT^j883yT%4N3;_XiuQQC_nYVz+jlNY2-jxKzC zn)NUf9+6-IaNyb9(H7NGVSw8<%C%c5S}(D+lyXrX@tbB>1Y~-XiIS>SN1fm`zta6A zqB4$;zQk2t%x*0V{lA>~K(E88#Ldy-Cj*OL?xDy+e6vovaP~t00FE%9gGkJg>oT7E z+H2EYuAE(nEU6S1J|9*1LOIH5-sJ4oM{1Ylxf-_@kW%!P+${K21MzVBMLG7~k3fFJ z^sj+DT?yVF3vdv7K~d?XE9Sg$x`VJ*QS;b#OPi}U>t1x1oh$4)iDE9^KNbUXafH@q zd}X`o3c1Rc5 zVe#ReI_d3|wBWG?x=mAACc;T1CJda>?|*SdXzC-N*3h}7+u?LoR)({z%#q+^wv*Fk zi^S+1NNx4l8|bO_extv(hJpxb0zhek_|Ow0RG!r`h<)?oB}C%%5#`wCXUU9?+-zeu zOO4NMt}-`5ymi~3s>lrKQLl@M2%4)f&kEiVvz_YfhvabQ<>J`Xj<~pt4R6f$R=jvaM~io&J3+C~^15#ABZ&T7Mog5` z9-@z%68+VdA-Qpj&Eu;qOX6V*E*#4n9ty{b+1z-3^$?;cs;(UC6Vy=WSRB~xPnmmF zc`D6GuD6Qylhb4$PtAd1mp%UmZ?E=}0wPhhg0HMi&AM4wq(i+Xq3U}Tm0d}euK1VF$$ zgD-{ZI;U^iHj{6>B8m~yl<^d5>sD?a6n8rV!|k>G@H$DosYR>rMN#?ZjIudOf|O z1E=xgoXvi=77QVGRf_^(tI}D5r_lL=;g>LX{_#ZkNw-T3186Cg=7KShGrUzPS0x>b2|L|82}t-Qu|M4lE&!AP=pGF&9l3*)y(V#Im_$P|BP z7PgcQH5&^ne{u}zq-IM+NaC+iA|^&aH@rEkLV|>a`jIIr%Bm817~RI8kVjIs@2 zeKR+$f22<4gJX?z zB|n6bqKQrZ@H6|KXBgrwek`EJg~UhzD|1T{gEPglBp2oJLAtGQzRSDUoCHC0Db;I} zsTlPVB9Bu?m8n~AMQBdxT=VHVyL=;d0uO1S;L_%O9hV%A)di#ycB9k&RPx*BnltP; z&v=`}PS~H`8em_wHBE3KMH+ql`XrK?Ga}YIEdF#@CF^uAKOA?~<8NZ@UG=qZjChT{gPl;au9+}9aV;5<2LlKJ=?mkRk+Jy)YKNnQo5+65)9l9CdekFW1(Q9NKxsV);! zBlKQfFV>u$XqP5fifvY)C=f8~UqZ?;WoAA}>B!If!>T?7JC3B@)@w=Jn01u>9$XTH z_GB?gwX$JiCdonxOw8c#hVLSlNS z<`0rwh*GYaxTrvq6$#{>Aa?{9)orWW8TF?l%2h;z+wwa%MH*$BM>}!Hk(l+21WBSM z2g-bD!<1xaP8Ir%HVj3wp7tw%v4%iq)!0XS1M#eQgn^PQ<$1jV2CtJOc6YqJ(DjRV zdGW&Hr>Pe0S<1RpwKm;7w$ztgTc%0*Gi09Pq9CJ{8etT;w-$lXl5gsJmaL;N?>2h_CAeHD zyp8SRRvpx_MI64jqrIh&>7dHB}a9^_z) zEUV)FOqS%pCuL(NS{fr=4mY0N!EK~Bb2fVE=4BW1Z!D7{$D@K&LwF0smp<^t$~H5P zO^iEZw3VAR<@#+z6Ex5f&yVb%$92P%q$Aim!Vgt zVVywYUh4nR{rH?aE`m{+;1aKCr*e^ylh+1G-jOC@| znnf#Z)uSsz6uP}%X{z_{4*TgPY0slyNC^YwoqAQpLmgGldjC$XK!IviI}zB^Z69;%YfUA~y#ZbeiRh7ImJwQr%SbGC&Zxh_z$ zp)8#&^wgSF2X(UGYwz^tFavePM`vxDUesJ$(n*XQ3l35xo4TE@V=mM`ot2TV`62x} zPPPwX1!ID;K(f{tnoJZ>9p{RTT4jyOkekD9D?8MW)wy^2`QJriLJy>NQcB%8HY{0w zxJyrM7Wf+D;T$n{o|Ajsx8CM0If%OoX;e;UXfDXB_O+nWN* zuJBrkKaZeHFsoq7VKxioi;8D^Rp7{BH%uas<1EBQeYQ~YQKze z=zy`lW&Q}RzY51AZ8C+6s2dimq4 z>3Y9vKins;-b;iWSN}xCJouk*SPA;et3{Qf;r!U9hC5zk`C#RjgHF?_ z$;YuD8|xfp?K?b7Vtq?e`I(*PlIX%qG!C|1OO`UXbNUMmn< zni`Z9J5g<}mnCDUplY_!>yXh>xiN0wv|gMuRg!%*^T7<1AY$WGrGl7N#RR_3vNaArwy|7T%$(6ERPi1;CfbSB1N_6 z71Yf(_)8xzfg=i8{Lne$lV8Zeb|FWBp#tQnjn z82&>Gu-tlyVl3_{XPDdOr^ocoOu;(mlg&keEHyO`-|7$sKI^KK)WnLYKmjn(Jv(1J zt<$mk;2+k4z&mTuJ3E)?ECcJdw3&CBkIK~WC&Dpki%98Qg{*aXpJIsh#jdNjZIk+jlf%L+h7@W49PIy)u1l^QPYSoJ*Qi|Y+MJXUuSX|; zjLTh6*kQg=O+!~LJ^78CK&5$VR*L=j<;22?yZI9Vm3J#Ri-1Ox|Fj)e_Mh>1G#n!? z)|&HddW&8IE(_&r?ZFj+VCg86sa<5*lo+h1^)&f)LI7WhQ%!nvvmotU@vT8Gb{6@| z8lCIhU2UrmsJm&l9M1S(YfNpJyA^qEnq9@@+QHuaD`!sfm+*<#aknsmA%9h(>QPIcyMOzO-EM$fo|nD$cs^mYF zVHcC}#fyoz^{~9`-Cojd>Uuv;n6|Z%tjW7k7JGh|nRHP$6&Yh3?sqt=kDHvD^5~nb zR!K`Ya8S6rx5?Fq!c5*t>Hf$q9Q}=(?C%%o5ifaZ!#DDC6e6pbCq{$xld}p4B2e&_ zPa*D{y!|8G;`VDJ^Dth4^LGb&Bk_v4Nk@(-vbpUbyz~q~SI%{vyj!SygSstMw%e?j z-=WauLU|j~=`}2lX4-~IPCcap<9!?*)Z{#rnG8pH&sVx5VURaGO)z0H4|+kz?{e1m z*uGAMyq#?#ljD-C>~V2{8I%Ppujo2wDsBNy$&m2&lya{6@Ix2a!4vzbLOji)&9!Fo zgw37U70V-!^iP)2Arfsskf@l9Zasdg>pZZZkZZExH4%SA4=UW}g6hX|E2YsD6OWIN zFQD5;l1FI$`fTfQ;%(=ho8M`rhEHl$x9n3zArEC%`7~ek$*u5H*4gDGB|Tpwr9h`k zxO0Car;}{-1)iwPiQh?Bs;xRJYMJ2TG+XicTlg%PSEr|*!XpTkuZ{jkfo23}I!}(g ze)|$4qf*1TWC5B0{=IznzD}dtsNXL6{-7ADP4NJe^0RY7&T&!qBwjY>TfE(|ALe|c5| zF`?Sp*rPdlN+B=!*Ao^)yc;u0TwWKjDHPvQ7&h;Kn5$;EEYs(WMN>X5n(XMrnLJee z{Rz$sc=LBnV zOO<}zX(-%+3E=H?#Gxu|jyDuN5Ki)f+s zC#L_LSUN@wyiuyjuDnLj_3IZIak2B~{h{uV`T62S9x9rX;nu5@S@%9^MvIaGr4|<# z)cKUFLI6cCbhwqa5wX?U)4rqO!<$BW%V9l@E>^AV<1VXb0Rww^sVirful8+zc`V*o zBet5O0|6T?P^fSQFL$b7#=!S0fZ=ArF=oD$ADOy3qVy2NM?*mq`$os zy!?6(#VDw5to`FB5-LV$7iv-IQ*(23+TmkVp?OjhSi87DpcwB&iySvz9?cP$%7<-FWEf{7)s0lgJoeEjT$m*p$>cS zXX$8eZceknOie9YvkmeJT!D~F{*ihEOhK{(>dLkT5G1TA8l(z9jj;r#FSb9HRJYlypAr8oLh#da?=Zq&`TTbo2hO_v@YW|Hxt5%dR;((-iyN@-4$v5##C zgANl=7u9=tanG{D9j95STYyw1Esu+vIk|6Gj4u6hQ}d=kJ=w%!xd58I($?RC4G0k4+rtd>!RgSu_-;zdfekysL7v)zZ?+ zwHd2N)ncgKr3<5_cAA~{sUh@I#T48Qd52T8-SFwl(;Ah|>%+l%&QsPPWHI4fEwW`D zuTh%c(-?*W$4rOdd|mHwXWPqS#h^O* zz3x{vh$GAohC?mX$k9;%6l%r=+M7aZD4o?NaB z`d{CkSbFUK^)af!V-{blGxLFBv&R`lCF#LDV+?2MY%U->CYh^OuO^AF_VrX;$adeF zF{aT#wPSfoO478{)D5P|kuTuK&d%=ka0QM>4EA27i1_9wiuCgma9Ehs6$X32nkVV# z6#UC(4Y?W|DwZS5p!D29?+&OwjYkMQO7@C(Kr^^Hk<||dGX6ys_{_CNW3d2V_stQf ztr`93*x1+s+ZIu$j_@JXu6sa4wti(LOWj%0-)^8m(fCqHPdVoTs3?~bxo|97TU(|U z7EK_0tKMA2?%Y))CF9L<7T`aOcvQi#%Rk;fXtcROyWl7_v|3m+1~xV|JA2LC+&s!{ zeJTQe)lM?DhWyY5{Z$_J6oe+N8L*L@-yST;zdcqTgkH$NSM-JWdc1gAcjCQ$eBTJn z+yAUuro;+9?yRAA7Xf3|-v){2xyOfTh2}?gzEJG=VO&2SJxRU%_>7g!cO?i(#V(zh z)Bs{B6R|u_0y^FD{DXm{bdERHa{zbW8F+PW!F577fYD*VT1sqZtz`mcsR?zP)4s=H zDIZ+`7lO_Rg~%6)p|92aY9Q~k})2gD04`U^+sW)>E*@2K)7sL%@L+P^bPH>I?~z{2o? zKj_i1ZHnO3VeRS}5&&y9!*(;+PqVA01fG^kYY_!7(AJnbaALgS`RT5S-SZn!Dpzg{ zE~wirHgJ|^#AU}@)4GgGpYQ5Y3kjnjK)!bMe7c`AWZi0Z*4_aEClgb;dL{{W(}zA> zbqCI^$r5tT5&E64lD>OBKBW3Y15V8d3-5J-XWD%)Vhnh4M@U&t2YeD*{eG>P^N72iSD=QUaQ&*Nhm-4iT@a);Mheyl}+{f*Oq4H&_ z-(Kfvsi=R33D^rhf=wf&I>$>;=9icsCsyM`u{CM^yuEqM-o{?QI$yq6t@q3Y&)_=MPZmGnj&h|-~&7p-Yy&-&-!`M;-m;N8;y54VZZSV*QY0cbJl=_*+ zKM*q5)o06gnhimfq+$*CS3S)Ysa8~Zoj(0Lf|wk7%fhV>H_eFJ_EHnp4K*RB^{M1M zr{(bs{Yo)kktONv?S&mNlmW{_?c4{|`|kskiG!eJvxV=OfT#fh%XVor05jei3lid? zp`ihAi!%O%@&#NMH49i3d3K!NIvQ%nYy{o6C)^=Ozd&)`{7TIWc1?fS{EdRBc)C&Z z%0$BUM^f?H?Ct67D7y!DFc{3EM~}w&K%0kzVIYRyx^)UQ96;J4A`yj!UPYy76ifhn zn(5ALK=X#9Zg=eLcuVij)i>y^<~ordf)S6aK)4$ZGhYH6Ic^)hp#wLyRVjmltBL6C zspzFeUw09-3?w|X=mXFgzEX_}5GZMl?7`^9(cvrs?rL^)1tq-`$kfAuSj=VJqr?8Q zcyy80`UBrL1=Dw#JhukmVCFeB)l@+&&DJ)dGWy=VdyI8b;_GdPC@74rthix5($zET z%>n+i>sOSeJ=hETNf2azPnx*P}PYUV1RcK#I=s z@BUKl_Y*c7#UEY8}1W@{gw^xTiH8&+TccNt@)-Ir zBzQtqq|4_D`)z=vjr?K|y|X#t>ylAfS(&Q@CA-!@7-V+oe2FIw9E>!IB~(DhruP~& zlf2oBf&PpDu^kza0{&zheFRB^s>*KWGAtU%2s+3}&@ttMyO3B0WiMg|HLhKam7X{x zqY)O=JrFtzl17MA5;f^B?m;B*--XM^zP<=pS)dE!+#xZ!`e#wMQ{tPD%AhHKS6sPI zc9}QIzsn@W0HjPvr}Vd^B6_>7S|17gB;{;Jr#^ir_sW&d7w_8bUsZr~^y z%_^O*hZc=I=%6W^5M4l*PDXqOwSXX$#zy-v&`bg)%TdA_xUZ5w;-SstMg>`s|IbdE zh0W%LQJ(0vYn&BdKMz>FBR@$+B{bEuOU<$AuhrifEa7usPj(N3{+t2F5$}_~{dT?V z9$;W@EHq;{3^^HRRjg0z00f$vnyM>>q#xz3=SvsF;3QAo7rB<29V&RT9jn;Th*>tH zyHKxb92rEv`H}P~zZGb2Z~jo*Q?jZSFF==#A9IxBDb#v^zW}H#vOXD~SJC|8!*xL7 z?QLyJa}30YqT)}%T|A}Z4@v%UsdmU=Ri#B;zgqd zLj8EUkQih^=HD$vjLH7bqBBTtLc-q$ZJd!YHy^8%h5-j@W6SyIzrH3#NnchUh&*vr ze^>LzHvcJpllu=@Nx#;8V^Lll8^N^lk#dAj*`f zL}3hwPKP(g`In{KSEQVvYe;Nj0r%zO8|bG|=d}z|`KhB)H2&7k^Ta5EAEqwP{Dz}5 zU%{@B1Qi5>a$UJ0w#yqnZN#{^xIjHct;_s4^iZLcyhg=R(182S7D~#XnU%=eXaI_6 z{23u##9Y7*MUc|FTCJOJFLCXhM=1}nFZU?WQ~~TNY?}g{aw`aAU}sJM#W{-7oM^km z)Y;CkL)-^FH&XvonI?@c9}ehUbONAtY#g&ait#Dcb9di~dq$0T^qKvH$_oAE$1?dc;F!H^m`5G|Vj={>e(*IQSorrwXTx;crsvcKR4Gc~EpQ<34Ga8B{svcEnS4xD( zlAcE&H2D16X(Tg&P2M7Kh%`pfBw{b3`1_CDJ9Q|Bm1UHj^R}+zEc|`Z12m%JId)w;69OA=1?X|no+=zjnQlX3 z3FB!WQqEeeMb1)z^?Ls^-YgHlOQ?|GaUe z5!Eie{P@osN4U@>JDBzNr9^x{UyD4w>coX`RiT0DziPFjss^PGCbj!rNS-sTL*8=9 zo3~bb$q_}u4?zD{_CPMjw=A{CnqN4Znw=f)IK}1L9k;BoWc*Y_7k1m}a3$*149=EW zzI+K{OvK(oB=HVJdxM^r^=NMhWy!I;@1H{<|Hl}owZ2Dg^7LItrlm3g?nmbFVCqqQje#Vmx}_FOPl7)gqb6c*Lx>f?o-<9G`SU4;VGP?1LGL(HkHcD!!nN}qPU?RtcnYxJ59X^h?)`98$z z8Oqr9<`|b?Xa2O19|t9Gutyr-TTcL6xzX+@+gsscQvK%CHx5P4)OcC2>#Nvud2?P% zCHKq>a_kA(N&2mMF#yWp;}!EK%|jCz!`DkXgo^b9!&_3T384%2FCq(XclQvT^;a&| zDZI&0_n)L5M%MCtr>3LtT+W|5<{hAb-@RaQv!HRb=@7TE+wd+d{zeb8Bq;o<9LiSt zK6SUXGrTXPkJ>pcsHqp76AV+&_mT_8W$CWL6UfKCMn|b8sM*u)z0J_+g2o?!%YSJF zjDb!Q-sEMYd$*t^3uqtit|`Zj+sPwMly|S~Gm`^<7sK#F6U&hRgwCHGdLG3}^B@Wz zr2du{PFm}H?`P|T2B;sdEXCztH&?Ttr;q_E3nY*IM}MBAF1jgA_WwrnTJF*BX6Bsb1 zIT}Hicd!dX58{Lt#C_b=Ilb5q#r%HI@iY;~MJCw{@1it6vj9~yTLwLU83TDc*?^>T zW)@fDP36mt+N+a-LX&d3=vklKDvKkw$S{cW54oJFRJP|-+-KQetOZk=Y--Eos81|2aD-Pl(B*M|l3(yGJjQ$bjG=37dxmY8w$b_CJhQZ0D#G?=BMA=)wStRGbxw&cfOgmFOC^*<9juHJ# z;^;Zf1igu|v9T#=^n@Sz##dt%>gj6jJsPUndHr)_hYsDz-`cltU;UKbg}znmiN(c5 zC28qDk{cv=E)Bn?T_GR1xcg^1{T$tOa~)|s9%kd2hYn-#d3VF*!NCtV~BqNh$2Sq1W4Y@0fHCCh&7MiQ0}oYj1DY z$=-oFg0%k``LIzkkQcyDo@T_rA1qT^wT8u9(+|eDzYp}sc(ae^fq#Xc*7Liu_kUt+ z|DUb(|F9Ey2)|a-v{O5d2__o2fW_=i&ug3p&2dWIwC|IJR&*J#kTP6R}2pGt&C3ce5Nn?t3e6ahQ}@ zbQ}J8;^$@Lwd)Lc4PZ2Iy~Gn6Qqt}2*)hK=wz=*usfRGq;9{NpaTosl?{F^vhb`Ct zGg);1{`X_V|Mq44vT8?E0C*?}7`e2iC7UZ(ubRz%$eHfVGk(wlycmLnRDmy}=C|y8 zL{7cZ$kSr7y}2eUE!{d$Y^$cM+#X|#9)yg8OaBjIO8?J8SQFIM)TT!30=#^Dvd#@< zYV%VbJI4Eg20ejC=rgj^yQRARqZ1Pv1_NdVX=P<)Z^qG&AcYux5CAX3Za4EgxlpfR zh#qLNA72AkUvh|-=Vvs4pb%3*bz16VP-BLAdO~)#kDhT+Kjm+~eYjz3c9e!jCt8_E zM%lz9C0R~_=kj0Uh)7e)7%)o}US3}9=+ifz(Tg}LLY>0mrQq%WcurC(GqJF+$N=%t zm8Dw>!8N6^@mi#iy(};QFBl<|vGDP!!81tGmv`?*V);g=jlK-kxmyos@0PZAbg1d+ zWu>H?l@C3C<m%Uq3#O zzs@I;S%F)YhXcD-raGWs*THmYw9Z?H<(l$v#oFhmT)gJp8R6BXc2jY~pl9{gWIgn` zd2M`YtN|4*tSq?iXhT?i;p^)Qe5-h>eE0=<)ooW8&!4nB%rZOYyB8xWn@g>rro^1zS;~Gzyh-_*0~Qkmrii$sX{E}1>jRv zUEOW!?qDZ9_{usS$`dh9jxw{JKd&q(C>SQ}kae6>C*|eKms%4vqRy)4&YfGY!??vk zDP#^5alrkN?cd)TbMZmg*Zm(4%&#=rQ&Lif@fiE0rKcO_Tn|gR74q@!y3tUXlUaXZ z=1f5mffi?0&Ht){UrkLdjL-Z9*iE%JbuyHMYb}c!YHE_7q(4rEj|)iJGBVg{h3ryt z8ax>Wrov(M4ULUwjKw!XPn|lYs;C&VvSR1tW~i2amJ2Ry{qzXWGt4 zvUjfuSO%yTX@xnCiVUM%e{9lc+-h$T?m&iE-o&LK(&^r zQCVAC=l0&Af=dhColW`t`EwE*4c(Z+u(YzheZg_|>nP&E&kt@SEi6|>HVsxh&(f{? zI*w6PR2(^R=roKSmQ+bX!XrsOJn7-Xy>OMP_wQ>P8q%l89!nK>@bgC*yW((w&XUox zo4kgANPT2QZ>dA}zC*voz_6}dzHIW^gJ`-ZI~r<-J$9b&MyC6PJv}|wS2{3ws7tVN zaw^aY-&Hj;OM_}*B1n^aOrsrUqi1#@3r|$X|uV3ViiDQR038v3<3|l8&DW7 z+gj~kT3+se;$9+58NX2|cjH_XHp*pQ(8>eWtPQ{i4-WHRlwP;O6@J0YW-tPqJh0brGXQP%eB*BmN$ zU9-VYwp{}ACcl0p?7{k~yn=#{tgLLlk2syh!^Z-&7!G(vXLP@3(W$*vqai+^)%ennnQ zEeJ}`wDM6Rnsa>xDo_Uu^z(~>@`MzKsXwK*ay%@Zhc6<9I%YnYGx6r)B+pla!`N-8 z3C_3-Z$!<=ifHX~SWhY`(E=~X&(U(|z=1Z198{U=A!B!21beb$Ky82B1-8txty`H9 zjJG55e)yip?_voOm!# zBXA7?gO)v@stg1_m7mWW^`&1DTwA7gk^0rEk08bDOp>Rb8!GR&A8x_VeQX{F|JWWS z>H>W#P;qjuv$x;NA%)whuBnld@}W_(w9M)&!lLHWBEF+@^5jV>UXx!MaN55(Gz-FB zLK8no9PiE#q!takXi>`if z?%Sge0{jck_-`{9e?_OZAmir@qwcB0k@+cKi8yqppM z3M!yN0}O0-QQbK9<8t0TzT(@C-h|DqEqhRE%{-T)Q@VETnfAtYc$CPkTes|>Fh0Z^ z6&jif&h{)PXLF_nJf;Sv?qjN0SXtYlKz<7B6R;dBY}%lppmmTf1O)`d!?W|G-JvQa zqHB~1PpYFEgd&oS01Yb(i;|vR6xenuIEE5RRURH5`~xTtCi#WICiCwmA|S4wnb=E@%}{5RXXFhE#l(I6 z7(2UsSZeA!E(p>|2^nbXXBu%glo{YBQI!GKqZ5c2-Rww(WORef&fWqCw~7udLrvW(0VU=7eXA>$8LX*WM0Xg}R4gAw1#zhz*c+uzX8&;jwxZgo}@$cMF! zjdW0$I*&e=GRvIkzkK=fS-?^dz4@w#aRl8SXanRMGzG2Idxve0NI6u?d3e;%|ALaqOvfO zbDbSLMfy3|xPnzxRV?i6*8tSBva%|}@B!Pez_S#k{lN)l16b?%^_C3T;lmYUZds%~ z;6=%)c%_+`n800zgIFyC5+In#^r3=1h}G#`yF#DxQsA(lhkAEW=M_b;$CA5qX(1uM z0lIr7MgX2o3feomfQrL|d~TQzO>oMgF$-{82pIb;6+QP251@Gx)HAb0MJh4TV(a*9 z_e^l#Y=HCp1O5G#<6nSQ3SA*=#ahzp!f-bDuXffOhSbb^r$jC^6N!uw<(8<@; z)#1~?iu+KOl0!jK@_rH~IxC1xgH>Eye{F5eAI7G|!oYA5HK(_4-{u?C9EP`30WJu+ ztY(;R<8L?vZAisX0DdORgH;AsG=>?0vgt`8r8JL8$v^1M?2gnw& zndP~;%HYo69p#%hV*w@|`{kDyC>y`3o`8*0;EXpLo0yn^lmwnpnKo1Y^aNKLWMB4M z6aI{IgQXf^4vRC-Yjw zI$-APWo0_GiW#p=D zyubgiGRlsQU*#9T>Whcq0W^0}Mu$TSAUNC0q=W<$cz_a`76DoLJGdyoPx2w$vE9YP zcg54~=IW>L>FE&h>RVbgY;1DDRu|SAKfQZAqXTSsc~C?IJA4Opec7-av+~dm6&{}6 z-mWYP?GRZm@=U4u`T5No#gq=j@Kh)~NMojZUn+18DRYzVzcsGm@tI5FYu zkHryG0I-4>N2X+TE2F)@MPm^4$IuNW=PzV88Ka@+bj5bOu?+-wXVSZ-`!K*zo9*q? zhMi_l76rml{{+ZLs4xt7g(4vjPzTm@P!KX;OXZE7Rst>F!bi(_+h zne&6CGZh>29bj|$AZ7>`17(n)#$?%i^~+$m7oZ^UbNw3aJ(%XL@%HNge<2Wj$Q+sc zkQ0wSgLE2|sDQzXC*cP#k*(8g-BqW0?c)o(EuJmAtEd^jO zR5iu8jk}TJ?s~ZO73-Pb5aiVn4Rl4C+v278Ec$pM9fAm=aPi{B-2S<~a+e|>TH&PR zC}&xC2Z#>Eo$ajx(~dK=f;N}o@lML%A??3@xe54A4m8`4nq|!y85&N__U56H8j@8M zqQG66L+`$S0tiOHr)|w)-Bkd(gXP47tbFJ^ch4A^akgIhN=NjzDx|GiWsW(}H=v*5 zzO(HNksiN3lZ~DLAhjxhm1g)-$0nVq^CUQ#WS|p(gb)^tRPd(an@(!c%v2Wzc?z16 zfC?uOlDr;`*2~a2G1(dmd+2`+m;~!VGp$i`fMr@Xu7uf-Z-3=PMa=@%B2$4e0bH?j;`Z%mV8c?%G;0blma}rc0~gd>113hGF0-w9 zccXx%iIi>Cme=zFf%_Lg4L)Ei_abo1;8Wl-U0}}<=sGW8sl9fiZuilm%X^n?pEPIA znnjD0fUP#a#Kc4|U*D_1v7oKM?6x*yqZ2SOf%=ooUqsqtbJX8Q6puZ zHwSp;tfD{g*kFgv$G~&wSXws+VG;$ZRGQi8`OmKWQp#J#EJzpBp7M0{b6Mw<&;$Sn CbA&4Z literal 0 HcmV?d00001 diff --git a/doc/source/basics.rst b/doc/source/basics.rst index dc43c1177f8c3..8e78ac597479b 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -11,6 +11,7 @@ from pandas.compat import lrange options.display.max_rows=15 + ============================== Essential Basic Functionality ============================== @@ -793,6 +794,7 @@ This is equivalent to the following result result.loc[:,:,'ItemA'] + .. _basics.reindexing: diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 44321375d31a2..6eb13ce722fff 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -450,6 +450,82 @@ available to insert at a particular location in the columns: df.insert(1, 'bar', df['one']) df +.. _dsintro.chained_assignment: + +Assigning New Columns in Method Chains +-------------------------------------- + +.. versionadded:: 0.16.0 + +Inspired by `dplyr's +`__ +``mutate`` verb, DataFrame has an :meth:`~pandas.DataFrame.assign` +method that allows you to easily create new columns that are potentially +derived from existing columns. + +.. ipython:: python + + iris = read_csv('data/iris.data') + iris.head() + + (iris.assign(sepal_ratio = iris['SepalWidth'] / iris['SepalLength']) + .head()) + +Above was an example of inserting a precomputed value. We can also pass in +a function of one argument to be evalutated on the DataFrame being assigned to. + +.. ipython:: python + + iris.assign(sepal_ratio = lambda x: (x['SepalWidth'] / + x['SepalLength'])).head() + +``assign`` **always** returns a copy of the data, leaving the original +DataFrame untouched. + +Passing a callable, as opposed to an actual value to be inserted, is +useful when you don't have a reference to the DataFrame at hand. This is +common when using ``assign`` in chains of operations. For example, +we can limit the DataFrame to just those observations with a Sepal Length +greater than 5, calculate the ratio, and plot: + +.. ipython:: python + + @savefig basics_assign.png + (iris.query('SepalLength > 5') + .assign(SepalRatio = lambda x: x.SepalWidth / x.SepalLength, + PetalRatio = lambda x: x.PetalWidth / x.PetalLength) + .plot(kind='scatter', x='SepalRatio', y='PetalRatio')) + +Since a function is passed in, the function is computed on the DataFrame +being assigned to. Importantly, this is the DataFrame that's been filtered +to those rows with sepal length greater than 5. The filtering happens first, +and then the ratio calculations. This is an example where we didn't +have a reference to the *filtered* DataFrame available. + +The function signature for ``assign`` is simply ``**kwargs``. The keys +are the column names for the new fields, and the values are either a value +to be inserted (for example, a ``Series`` or NumPy array), or a function +of one argument to be called on the ``DataFrame``. A *copy* of the original +DataFrame is returned, with the new values inserted. + +.. warning:: + + Since the function signature of ``assign`` is ``**kwargs``, a dictionary, + the order of the new columns in the resulting DataFrame cannot be guaranteed. + + All expressions are computed first, and then assigned. So you can't refer + to another column being assigned in the same call to ``assign``. For example: + + .. ipython:: + :verbatim: + + In [1]: # Don't do this, bad reference to `C` + df.assign(C = lambda x: x['A'] + x['B'], + D = lambda x: x['A'] + x['C']) + In [2]: # Instead, break it into two assigns + (df.assign(C = lambda x: x['A'] + x['B']) + .assign(D = lambda x: x['A'] + x['C'])) + Indexing / Selection ~~~~~~~~~~~~~~~~~~~~ The basics of indexing are as follows: diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index ead3c79430bf9..b9c358f24f460 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -29,6 +29,47 @@ New features This method is also exposed by the lower level ``Index.get_indexer`` and ``Index.get_loc`` methods. +- DataFrame assign method + +Inspired by `dplyr's +`__ ``mutate`` verb, DataFrame has a new +:meth:`~pandas.DataFrame.assign` method. +The function signature for ``assign`` is simply ``**kwargs``. The keys +are the column names for the new fields, and the values are either a value +to be inserted (for example, a ``Series`` or NumPy array), or a function +of one argument to be called on the ``DataFrame``. The new values are inserted, +and the entire DataFrame (with all original and new columns) is returned. + +.. ipython :: python + + iris = read_csv('data/iris.data') + iris.head() + + iris.assign(sepal_ratio=iris['SepalWidth'] / iris['SepalLength']).head() + +Above was an example of inserting a precomputed value. We can also pass in +a function to be evalutated. + +.. ipython :: python + + iris.assign(sepal_ratio = lambda x: (x['SepalWidth'] / + x['SepalLength'])).head() + +The power of ``assign`` comes when used in chains of operations. For example, +we can limit the DataFrame to just those with a Sepal Length greater than 5, +calculate the ratio, and plot + +.. ipython:: python + + (iris.query('SepalLength > 5') + .assign(SepalRatio = lambda x: x.SepalWidth / x.SepalLength, + PetalRatio = lambda x: x.PetalWidth / x.PetalLength) + .plot(kind='scatter', x='SepalRatio', y='PetalRatio')) + +.. image:: _static/whatsnew_assign.png + +See the :ref:`documentation ` for more. (:issue:`9229`) + .. _whatsnew_0160.api: .. _whatsnew_0160.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d64353db8cda6..97e3560e3fcb1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2220,6 +2220,88 @@ def insert(self, loc, column, value, allow_duplicates=False): self._data.insert( loc, column, value, allow_duplicates=allow_duplicates) + def assign(self, **kwargs): + """ + Assign new columns to a DataFrame, returning a new object + (a copy) with all the original columns in addition to the new ones. + + .. versionadded:: 0.16.0 + + Parameters + ---------- + kwargs : keyword, value pairs + keywords are the column names. If the values are + callable, they are computed on the DataFrame and + assigned to the new columns. If the values are + not callable, (e.g. a Series, scalar, or array), + they are simply assigned. + + Returns + ------- + df : DataFrame + A new DataFrame with the new columns in addition to + all the existing columns. + + Notes + ----- + Since ``kwargs`` is a dictionary, the order of your + arguments may not be preserved, and so the order of the + new columns is not well defined. Assigning multiple + columns within the same ``assign`` is possible, but you cannot + reference other columns created within the same ``assign`` call. + + Examples + -------- + >>> df = DataFrame({'A': range(1, 11), 'B': np.random.randn(10)}) + + Where the value is a callable, evaluated on `df`: + + >>> df.assign(ln_A = lambda x: np.log(x.A)) + A B ln_A + 0 1 0.426905 0.000000 + 1 2 -0.780949 0.693147 + 2 3 -0.418711 1.098612 + 3 4 -0.269708 1.386294 + 4 5 -0.274002 1.609438 + 5 6 -0.500792 1.791759 + 6 7 1.649697 1.945910 + 7 8 -1.495604 2.079442 + 8 9 0.549296 2.197225 + 9 10 -0.758542 2.302585 + + Where the value already exists and is inserted: + + >>> newcol = np.log(df['A']) + >>> df.assign(ln_A=newcol) + A B ln_A + 0 1 0.426905 0.000000 + 1 2 -0.780949 0.693147 + 2 3 -0.418711 1.098612 + 3 4 -0.269708 1.386294 + 4 5 -0.274002 1.609438 + 5 6 -0.500792 1.791759 + 6 7 1.649697 1.945910 + 7 8 -1.495604 2.079442 + 8 9 0.549296 2.197225 + 9 10 -0.758542 2.302585 + """ + data = self.copy() + + # do all calculations first... + results = {} + for k, v in kwargs.items(): + + if callable(v): + results[k] = v(data) + else: + results[k] = v + + # ... and then assign + for k, v in results.items(): + data[k] = v + + return data + def _sanitize_column(self, key, value): # Need to make sure new columns (which go into the BlockManager as new # blocks) are always copied diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 9ec890a1d1856..f7c91501b683b 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -13965,6 +13965,60 @@ def test_select_dtypes_bad_arg_raises(self): with tm.assertRaisesRegexp(TypeError, 'data type.*not understood'): df.select_dtypes(['blargy, blarg, blarg']) + def test_assign(self): + df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) + original = df.copy() + result = df.assign(C=df.B / df.A) + expected = df.copy() + expected['C'] = [4, 2.5, 2] + assert_frame_equal(result, expected) + + # lambda syntax + result = df.assign(C=lambda x: x.B / x.A) + assert_frame_equal(result, expected) + + # original is unmodified + assert_frame_equal(df, original) + + # Non-Series array-like + result = df.assign(C=[4, 2.5, 2]) + assert_frame_equal(result, expected) + # original is unmodified + assert_frame_equal(df, original) + + result = df.assign(B=df.B / df.A) + expected = expected.drop('B', axis=1).rename(columns={'C': 'B'}) + assert_frame_equal(result, expected) + + # overwrite + result = df.assign(A=df.A + df.B) + expected = df.copy() + expected['A'] = [5, 7, 9] + assert_frame_equal(result, expected) + + # lambda + result = df.assign(A=lambda x: x.A + x.B) + assert_frame_equal(result, expected) + + def test_assign_multiple(self): + df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) + result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B) + expected = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9], + 'D': [1, 2, 3], 'E': [4, 5, 6]}) + # column order isn't preserved + assert_frame_equal(result.reindex_like(expected), expected) + + def test_assign_bad(self): + df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) + # non-keyword argument + with tm.assertRaises(TypeError): + df.assign(lambda x: x.A) + with tm.assertRaises(AttributeError): + df.assign(C=df.A, D=df.A + df.C) + with tm.assertRaises(KeyError): + df.assign(C=lambda df: df.A, D=lambda df: df['A'] + df['C']) + with tm.assertRaises(KeyError): + df.assign(C=df.A, D=lambda x: x['A'] + x['C']) def skip_if_no_ne(engine='numexpr'): if engine == 'numexpr':