*In[2]:*
#SERIES
#DATAFRAMES
#MISSING DATA
#GROUPBY
#MERGING, JOINING AND CONCATENATION
#OPERATIONS
#DATA INPUTS AND OUTPUTS*In[3]:*
#DATA FRAMES
import numpy as np
import pandas as py*In[4]:*
from numpy.random import randn*In[5]:*
np.random.seed(101) # use same random numbers*In[8]:*
df=py.DataFrame(randn(5,4),['A','B','C','D','E'],['W','X','Y','Z'])*In[9]:*
df*Out[9]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |C |-2.018168 |0.740122 |0.528813 |-0.589001 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |E |0.190794 |1.978757 |2.605967 |0.683509 |===
*In[10]:*
#Indexing and Selection*In[11]:*
df['W']*Out[11]:* ----A 2.706850 B 0.651118 C -2.018168 D 0.188695 E 0.190794 Name: W, dtype: float64----
*In[12]:*
type(df['W'])*Out[12]:* ----pandas.core.series.Series----
*In[13]:*
type(df)*Out[13]:* ----pandas.core.frame.DataFrame----
*In[14]:*
df['X']*Out[14]:* ----A 0.628133 B -0.319318 C 0.740122 D -0.758872 E 1.978757 Name: X, dtype: float64----
*In[16]:*
df[['W','Z']]*Out[16]:*
[cols=",,",options="header",] |=== | |W |Z |A |2.706850 |0.503826 |B |0.651118 |0.605965 |C |-2.018168 |-0.589001 |D |0.188695 |0.955057 |E |0.190794 |0.683509 |===
*In[17]:*
df*Out[17]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |C |-2.018168 |0.740122 |0.528813 |-0.589001 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |E |0.190794 |1.978757 |2.605967 |0.683509 |===
*In[18]:*
df['new']=df['W']+df['Y']*In[19]:*
df*Out[19]:*
[cols=",,,,,",options="header",] |=== | |W |X |Y |Z |new |A |2.706850 |0.628133 |0.907969 |0.503826 |3.614819 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |-0.196959 |C |-2.018168 |0.740122 |0.528813 |-0.589001 |-1.489355 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |-0.744542 |E |0.190794 |1.978757 |2.605967 |0.683509 |2.796762 |===
*In[21]:*
df.drop('new',axis=1)*Out[21]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |C |-2.018168 |0.740122 |0.528813 |-0.589001 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |E |0.190794 |1.978757 |2.605967 |0.683509 |===
*In[22]:*
df*Out[22]:*
[cols=",,,,,",options="header",] |=== | |W |X |Y |Z |new |A |2.706850 |0.628133 |0.907969 |0.503826 |3.614819 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |-0.196959 |C |-2.018168 |0.740122 |0.528813 |-0.589001 |-1.489355 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |-0.744542 |E |0.190794 |1.978757 |2.605967 |0.683509 |2.796762 |===
*In[23]:*
df.drop('new',axis=1,inplace=True)*In[24]:*
df*Out[24]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |C |-2.018168 |0.740122 |0.528813 |-0.589001 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |E |0.190794 |1.978757 |2.605967 |0.683509 |===
*In[25]:*
df.drop('E',axis=0)*Out[25]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |C |-2.018168 |0.740122 |0.528813 |-0.589001 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |===
*In[26]:*
df*Out[26]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |C |-2.018168 |0.740122 |0.528813 |-0.589001 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |E |0.190794 |1.978757 |2.605967 |0.683509 |===
*In[28]:*
df.drop('E')*Out[28]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |C |-2.018168 |0.740122 |0.528813 |-0.589001 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |===
*In[29]:*
df.shape*Out[29]:* ----(5, 4)----
*In[30]:*
df*Out[30]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |C |-2.018168 |0.740122 |0.528813 |-0.589001 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |E |0.190794 |1.978757 |2.605967 |0.683509 |===
*In[31]:*
df.loc['A'] #selecting Row*Out[31]:* ----W 2.706850 X 0.628133 Y 0.907969 Z 0.503826 Name: A, dtype: float64----
*In[32]:*
df.iloc[2]*Out[32]:* ----W -2.018168 X 0.740122 Y 0.528813 Z -0.589001 Name: C, dtype: float64----
*In[33]:*
df.loc['B','Y']*Out[33]:* -----0.8480769834036315----
*In[34]:*
df*Out[34]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |C |-2.018168 |0.740122 |0.528813 |-0.589001 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |E |0.190794 |1.978757 |2.605967 |0.683509 |===
*In[35]:*
df.loc[['A','B'],['W','X']]*Out[35]:*
[cols=",,",options="header",] |=== | |W |X |A |2.706850 |0.628133 |B |0.651118 |-0.319318 |===
*In[36]:*
#Conditional Statements*In[37]:*
df>0*Out[37]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |True |True |True |True |B |True |False |False |True |C |False |True |True |False |D |True |False |False |True |E |True |True |True |True |===
*In[38]:*
df*Out[38]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |C |-2.018168 |0.740122 |0.528813 |-0.589001 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |E |0.190794 |1.978757 |2.605967 |0.683509 |===
*In[39]:*
booldf=df>0*In[40]:*
df[booldf]*Out[40]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |NaN |NaN |0.605965 |C |NaN |0.740122 |0.528813 |NaN |D |0.188695 |NaN |NaN |0.955057 |E |0.190794 |1.978757 |2.605967 |0.683509 |===
*In[41]:*
df[df>0]*Out[41]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |NaN |NaN |0.605965 |C |NaN |0.740122 |0.528813 |NaN |D |0.188695 |NaN |NaN |0.955057 |E |0.190794 |1.978757 |2.605967 |0.683509 |===
*In[42]:*
df['W']>0*Out[42]:* ----A True B True C False D True E True Name: W, dtype: bool----
*In[43]:*
df['W']*Out[43]:* ----A 2.706850 B 0.651118 C -2.018168 D 0.188695 E 0.190794 Name: W, dtype: float64----
*In[44]:*
df[df['W']>0]*Out[44]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |E |0.190794 |1.978757 |2.605967 |0.683509 |===
*In[45]:*
df*Out[45]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |C |-2.018168 |0.740122 |0.528813 |-0.589001 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |E |0.190794 |1.978757 |2.605967 |0.683509 |===
*In[46]:*
df[df['Z']<0]*Out[46]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |C |-2.018168 |0.740122 |0.528813 |-0.589001 |===
*In[47]:*
resultdf=df[df['W']>0]*In[48]:*
resultdf*Out[48]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |E |0.190794 |1.978757 |2.605967 |0.683509 |===
*In[50]:*
resultdf['X']*Out[50]:* ----A 0.628133 B -0.319318 D -0.758872 E 1.978757 Name: X, dtype: float64----
*In[52]:*
df[df['W']>0]['X']*Out[52]:* ----A 0.628133 B -0.319318 D -0.758872 E 1.978757 Name: X, dtype: float64----
*In[53]:*
df[df['W']>0][['Y','X']]*Out[53]:*
[cols=",,",options="header",] |=== | |Y |X |A |0.907969 |0.628133 |B |-0.848077 |-0.319318 |D |-0.933237 |-0.758872 |E |2.605967 |1.978757 |===
*In[54]:*
boolser=df['W']>0
result=df[boolser]
mycols=['Y','X']
result[mycols]*Out[54]:*
[cols=",,",options="header",] |=== | |Y |X |A |0.907969 |0.628133 |B |-0.848077 |-0.319318 |D |-0.933237 |-0.758872 |E |2.605967 |1.978757 |===
*In[55]:*
df[(df['W']>0) and (df['Y']>1)]*Out[55]:*
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-55-17d0fc17a94d> in <module>
----> 1 df[(df['W']>0) and (df['Y']>1)]
~/miniconda3/lib/python3.8/site-packages/pandas/core/generic.py in __nonzero__(self)
1440 @final
1441 def __nonzero__(self):
-> 1442 raise ValueError(
1443 f"The truth value of a {type(self).__name__} is ambiguous. "
1444 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
*In[57]:*
True and True*Out[57]:* ----True----
*In[58]:*
df[(df['W']>0) & (df['Y']>1)]*Out[58]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |E |0.190794 |1.978757 |2.605967 |0.683509 |===
*In[59]:*
df[(df['W']>0) | (df['Y']>1)]*Out[59]:*
[cols=",,,,",options="header",] |=== | |W |X |Y |Z |A |2.706850 |0.628133 |0.907969 |0.503826 |B |0.651118 |-0.319318 |-0.848077 |0.605965 |D |0.188695 |-0.758872 |-0.933237 |0.955057 |E |0.190794 |1.978757 |2.605967 |0.683509 |===
*In[ ]:*