如何实现pandas中group by agg对多列调用多个函数批量计算指标

import pandas as pd
import numpy as np

def flatten_name(prefix, src_names):
    ret = []
    for c in src_names:
        if c[0] in ['time_id', 'stock_id']:
            ret.append(c[0])
        else:
            ret.append('_'.join([prefix] + list(c)))
    return ret

data=pd.DataFrame({'id':[1,2,1,2,1],'a':[1,2,3,4,5],'b':[1,2,3,4,5],'c':[1,2,3,4,5]})
agg = data.groupby('id').agg({'a':[np.mean, np.max],'b':[np.mean, np.max]}).reset_index(drop=False)
agg.columns = flatten_name('book', agg.columns)

原数据包含id, a, b, c列,通过字典可以分别对a,b列计算均值和最大值,如何在DolphinDB中实现这种批量生成因子。

请先 登录 后评论

1 个回答

Hao Jia

可以配合DolphinDB元编程实现:

https://www.dolphindb.cn/cn/help/200/FunctionsandCommands/FunctionReferences/s/sql.html

data = table(take(1 2, 10) as `id, 1..10 as `col1, 2..11 as `col2, 1..10 as `col3, 2..11 as `col4, 1..10 as `col5)
def createAggMetaCode(aggDict){
	metaCode = []
	for(colName in aggDict.keys()){
		for(funcName in aggDict[colName])
		{
			metaCode.append!(sqlCol(colName, funcName, colName + `_ + funcName))
		}
	}
	return metaCode
}
aggDict = {
	"col1" : [min, max, std],
	"col2" : [min, max, std],
	"col3" : [min, max, std],
	"col4" : [min, max, std],
	"col5" : [min, max, std]
}
createAggMetaCode(aggDict)
sql(select = createAggMetaCode(aggDict), from = data, groupBy = <id>).eval()

分别对col1, col2, col3, col4, col5 调用min, max, std函数

attachments-2022-06-pGjefxsm62a045f2edb9a.png

请先 登录 后评论