import pandas as pd
import numpy as np
# pandas as an array
data = np.array(['p','a','n','d','a', 's'])
myseries = pd.Series(data)
print(myseries)​
import pandas as pd
d = {'col1': [1, 2], 'col2': [3, 4]}
df = pd.DataFrame(data=d)
print(df)
Output :
col1 col2
0 1 3
1 2 4
import pandas as pd
country_population = {'India': 1600000000, 'China': 1730000000, 'USA': 390000000, 'UK': 450000000}
population = pd.Series(country_population)
#print(population)
country_land = {'India': '2547869 hectares', 'China': '9543578 hectares', 'USA': '5874658 hectares', 'UK': '6354652 hectares'}
area = pd.Series(country_land)
#print(area)
df = pd.DataFrame({'Population': population, 'SpaceOccupied': area})
print(df)
import pandas as pd
index_A = pd.Index([1, 3, 5, 7, 9])
index_B = pd.Index([2, 3, 5, 7, 11])
import pandas as pd
import numpy as np
N=20
df = pd.DataFrame({
'A': pd.date_range(start='2021-01-18',periods=N,freq='D'),
'x': np.linspace(0,stop=N-1,num=N),
'y': np.random.rand(N),
'C': np.random.choice(['Low','Medium','High'],N).tolist(),
'D': np.random.normal(100, 10, size=(N)).tolist()
})
#reindex the DataFrame
df_reindexed = df.reindex(index=[0,2,5], columns=['A', 'C', 'B'])
print (df_reindexed)​
Output :
A C B
0 2021-01-18 Medium NaN
2 2021-01-20 High NaN
5 2021-01-23 Low NaN
import pandas as pd
import numpy as np
df1 = pd.DataFrame(np.random.randn(4, 3), columns=['col1', 'col2', 'col3'])
df2 = pd.DataFrame(np.random.randn(2, 3), columns=['col1', 'col2', 'col3'])
print(df2.reindex_like(df1))
Output :
col1 col2 col3
0 -0.641715 1.031070 -0.208415
1 -1.560385 -0.584403 0.291666
2 NaN NaN NaN
3 NaN NaN NaN
Reindexing with using methods(bfill or ffill)
import pandas as pd
import numpy as np
df1 = pd.DataFrame(np.random.randn(4, 3), columns=['col1', 'col2', 'col3'])
df2 = pd.DataFrame(np.random.randn(2, 3), columns=['col1', 'col2', 'col3'])
print(df2.reindex_like(df1, method='ffill'))
Output :
col1 col2 col3
0 1.332612 -0.479218 -1.016999
1 -1.091319 -0.844934 -0.492755
2 -1.091319 -0.844934 -0.492755
3 -1.091319 -0.844934 -0.492755
import pandas as pd
import numpy as np
df1 = pd.DataFrame(np.random.randn(4, 3), columns=['col1', 'col2', 'col3'])
df2 = pd.DataFrame(np.random.randn(2, 3), columns=['col1', 'col2', 'col3'])
print(df2.reindex_like(df1, method='bfill'))
Output :
col1 col2 col3
0 0.526663 -0.450748 0.791112
1 -1.805287 0.641050 1.864871
2 NaN NaN NaN
3 NaN NaN NaN
pandas.concat()
function does all the heavy lifting of performing concatenation operations along with an axis od Pandas objects while performing optional set logic (union or intersection) of the indexes (if any) on the other axes.import numpy as np
import pandas as pd
s1 = pd.Series(['a', 'b'])
s2 = pd.Series(['c', 'd'])
print(pd.concat([s1, s2]))
Output :
0 a
1 b
0 c
1 d
dtype: object
pandas.Series.copy
Series.copy(deep=True)
pandas.Series.copy
. Make a deep copy, including a copy of the data and the indices. With deep=False neither the indices or the data are copied. Note that when deep=True data is copied, actual python objects will not be copied recursively, only the reference to the object..loc ()
, .iloc ()
and .ix()
. The .loc ()
is label based, .iloc()
is integer based and .ix()
is booth label and integer based. .loc ()
or .iloc ()
. import pandas as pd
info=pd.DataFrame ()
print(info)​
Output :
Empty DataFrame
Columns: []
Index: []
missing_data_count=df.isnull().sum()
We can handle missing values by either replacing the values in the column with 0 as follows:df[‘column_name’].fillna(0)
Or by replacing it with the mean value of the columndf[‘column_name’] = df[‘column_name’].fillna((df[‘column_name’].mean()))
pandas.MultiIndex(levels=None, codes=None, sortorder=None, names=None, dtype=None, copy=False, name=None, verify_integrity=True)
pivot_table()
method in pandas library can be used. This is an effective method for drafting these pivot tables in pandas.pandas.pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All', observed=False)