pnadas 使用技巧汇总

时间:2022-06-20 15:02:26

查看类型

df.dtypes

转换类型

df["a"].astype("int")

描述显示数据快速统计摘要

df.describe()

按轴排序

df.sort_index(axis=1, ascending=False)

按值排序

df.sort(columns='a'ascending=False)

整数片断

df.iloc[3:7,0:6]
df.iloc[[1,2,4],[0,2]]

显式获取一个值

df.iloc[1,1]
df.iat[1,1]

布尔索引

df[df.A > 0]

isin() 筛选

df[df['E'].isin(['two','four'])]

drop na

df.dropna(how='any')

替换 na

df.fillna(value=5)
df.fillna({1:0,2:0.5}) #对第一列nan值赋0,第二列赋值0.5
df.fillna(method='ffill') #在列方向上以前一个值作为值赋给NaN

apply函数

#Create a new function:
def num_missing(x):
return sum(x.isnull())

#Applying per column:
print "Missing values per column:"
print data.apply(num_missing, axis=0) #axis=0 defines that function is to be applied on each column

#Applying per row:
print "nMissing values per row:"
print data.apply(num_missing, axis=1).head() #axis=1 defines that function is to be applied on each ro

合并

pieces = [df[:3], df[3:7], df[7:]]
pd.concat(pieces)

连接

pd.merge(left, right, how='inner', on=None, left_on=None, right_on=None,
left_index=False, right_index=False, sort=True,
suffixes=('_x', '_y'), copy=True, indicator=False,
validate=None)
left.join(right, on=['key1', 'key2'], how='inner')

groupby

df.groupby('A').sum()

数据透视

pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])

修改列名

df.rename(columns={"a":"A","b":"B"}, inplace=True) 
df.rename(mapper=None, index=None, columns=None, axis=None, copy=True, inplace=False, level=None)

换index

df.set_index("columns")
df.reset_index() #原始数字

去重

df.drop_duplicates()
df.drop_duplicates('a', keep=False) #重复全弃
df.drop_duplicates('a', keep='last') #保留最后一个

pandas数据存入MongoDB

for index, row in type_id_frame.sort_values(  
by=['type_count', 'type_mean'], ascending=False).iterrows():
db[MONGO_TABLE].insert(row.to_dict())

插入

a = df.pop("a")
df.insert(0,"a",a)