# two - python分組

## 將多個函數應用於多個groupby列 (2)

``````In [28]: df
Out[28]:
A         B         C         D         E  GRP
0  0.395670  0.219560  0.600644  0.613445  0.242893    0
1  0.323911  0.464584  0.107215  0.204072  0.927325    0
2  0.321358  0.076037  0.166946  0.439661  0.914612    1
3  0.133466  0.447946  0.014815  0.130781  0.268290    1

In [26]: f = {'A':['sum','mean'], 'B':['prod']}

In [27]: df.groupby('GRP').agg(f)
Out[27]:
A                   B
sum      mean      prod
GRP
0    0.719580  0.359790  0.102004
1    0.454824  0.227412  0.034060
``````

``````In [67]: f = {'A':['sum','mean'], 'B':['prod'], 'D': lambda g: df.loc[g.index].E.sum()}

In [69]: df.groupby('GRP').agg(f)
Out[69]:
A                   B         D
sum      mean      prod  <lambda>
GRP
0    0.719580  0.359790  0.102004  1.170219
1    0.454824  0.227412  0.034060  1.182901
``````

``````In [95]: cust = lambda g: g[df.loc[g.index]['C'] < 0.5].sum()

In [96]: f = {'A':['sum','mean'], 'B':['prod'], 'D': {'my name': cust}}

In [97]: df.groupby('GRP').agg(f)
Out[97]:
A                   B         D
sum      mean      prod   my name
GRP
0    0.719580  0.359790  0.102004  0.204072
1    0.454824  0.227412  0.034060  0.570441
``````

docs展示瞭如何在輸出列名作為關鍵字的情況下使用dict一次在groupby對像上應用多個函數：

``````In [563]: grouped['D'].agg({'result1' : np.sum,
.....:                   'result2' : np.mean})
.....:
Out[563]:
result2   result1
A
bar -0.579846 -1.739537
foo -0.280588 -1.402938
``````

``````grouped.agg({'C_sum' : lambda x: x['C'].sum(),
'C_std': lambda x: x['C'].std(),
'D_sum' : lambda x: x['D'].sum()},
'D_sumifC3': lambda x: x['D'][x['C'] == 3].sum(), ...)
``````

``````df = pd.DataFrame(np.random.rand(4,4), columns=list('abcd'))
df['group'] = [0, 0, 1, 1]
df

a         b         c         d  group
0  0.418500  0.030955  0.874869  0.145641      0
1  0.446069  0.901153  0.095052  0.487040      0
2  0.843026  0.936169  0.926090  0.041722      1
3  0.635846  0.439175  0.828787  0.714123      1
``````

``````df.groupby('group').agg({'a':['sum', 'max'],
'b':'mean',
'c':'sum',
'd': lambda x: x.max() - x.min()})

a                   b         c         d
sum       max      mean       sum  <lambda>
group
0      0.560541  0.507058  0.418546  1.707651  0.129667
1      0.187757  0.157958  0.887315  0.533531  0.652427
``````

``````def max_min(x):
return x.max() - x.min()

max_min.__name__ = 'Max minus Min'

df.groupby('group').agg({'a':['sum', 'max'],
'b':'mean',
'c':'sum',
'd': max_min})

a                   b         c             d
sum       max      mean       sum Max minus Min
group
0      0.560541  0.507058  0.418546  1.707651      0.129667
1      0.187757  0.157958  0.887315  0.533531      0.652427
``````

## 使用`apply`並返回一個Series

``````def f(x):
d = {}
d['a_sum'] = x['a'].sum()
d['a_max'] = x['a'].max()
d['b_mean'] = x['b'].mean()
d['c_d_prodsum'] = (x['c'] * x['d']).sum()
return pd.Series(d, index=['a_sum', 'a_max', 'b_mean', 'c_d_prodsum'])

df.groupby('group').apply(f)
a_sum     a_max    b_mean  c_d_prodsum
group
0      0.560541  0.507058  0.418546     0.118106
1      0.187757  0.157958  0.887315     0.276808
``````

``````    def f_mi(x):
d = []
d.append(x['a'].sum())
d.append(x['a'].max())
d.append(x['b'].mean())
d.append((x['c'] * x['d']).sum())
return pd.Series(d, index=[['a', 'a', 'b', 'c_d'],
['sum', 'max', 'mean', 'prodsum']])

df.groupby('group').apply(f_mi)

a                   b       c_d
sum       max      mean   prodsum
group
0      0.560541  0.507058  0.418546  0.118106
1      0.187757  0.157958  0.887315  0.276808
``````