查看类型
df.dtypes
转换类型
df["a"].astype("int")
描述显示数据快速统计摘要
df.describe()
按轴排序
df.sort_index(axis=1, ascending=False)
按值排序
df.sort(columns='a',ascending=False)
整数片断
df.iloc[3:7,0:6]
df.iloc[[1,2,4],[0,2]]
显式获取一个值
df.iloc[1,1]
df.iat[1,1]
布尔索引
df[df.A > 0]
isin() 筛选
df[df['E'].isin(['two','four'])]
drop na
df.dropna(how='any')
df.fillna(value=5)
df.fillna({1:0,2:0.5}) #对第一列nan值赋0,第二列赋值0.5
df.fillna(method='ffill') #在列方向上以前一个值作为值赋给NaN
apply函数
#Create a new function:
def num_missing(x):
return sum(x.isnull())
#Applying per column:
print "Missing values per column:"
print data.apply(num_missing, axis=0) #axis=0 defines that function is to be applied on each column
#Applying per row:
print "nMissing values per row:"
print data.apply(num_missing, axis=1).head() #axis=1 defines that function is to be applied on each ro
合并
pieces = [df[:3], df[3:7], df[7:]]
pd.concat(pieces)
连接
pd.merge(left, right, how='inner', on=None, left_on=None, right_on=None,
left_index=False, right_index=False, sort=True,
suffixes=('_x', '_y'), copy=True, indicator=False,
validate=None)
left.join(right, on=['key1', 'key2'], how='inner')
groupby
df.groupby('A').sum()
数据透视
pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])
修改列名
df.rename(columns={"a":"A","b":"B"}, inplace=True)
df.rename(mapper=None, index=None, columns=None, axis=None, copy=True, inplace=False, level=None)
换index
df.set_index("columns")
df.reset_index() #原始数字
df.drop_duplicates()
df.drop_duplicates('a', keep=False) #重复全弃
df.drop_duplicates('a', keep='last') #保留最后一个
pandas数据存入MongoDB
for index, row in type_id_frame.sort_values(
by=['type_count', 'type_mean'], ascending=False).iterrows():
db[MONGO_TABLE].insert(row.to_dict())
插入
a = df.pop("a")
df.insert(0,"a",a)