Commit 34e6f263 authored by stephen.wang's avatar stephen.wang

Merge remote-tracking branch 'origin/dev-dividend' into dev-dividend

parents 466c7595 f502aea2
import json
from typing import List
from py_jftech import autowired
from py_jftech import autowired, parse_date, prev_workday, format_date
from ai.dao.robo_datas import get_base_info
from ai.dao.robo_datas import get_base_info, get_index_list, get_fund_list
from ai.data_access import DataAccess
from ai.model_trainer import ModelTrainer
from ai.noticer import upload_predict
from ai.training_data_builder import TrainingDataBuilder
from api import DataSync
# 截止日期
max_date = None
# max_date = None
# max_date = '2023-10-06'
# max_date = '2023-10-13'
# max_date = '2023-10-20'
# max_date = '2023-10-27'
# max_date = '2023-11-03'
# max_date = '2023-11-10'
# max_date = '2023-11-17'
# max_date = '2023-11-24'
# max_date = '2023-12-01'
# max_date = '2023-12-08'
max_date = '2023-12-15'
# 待预测指数
# PREDICT_LIST = [67]
PREDICT_LIST = [67, 121, 122, 123]
# PREDICT_LIST = [67, 121, 122, 123]
PREDICT_LIST = [67, 121, 122, 123, 155, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 168, 169, 170, 171, 174, 175]
eco = [65, 66, 74, 134]
index = [67, 68, 69, 70, 71, 72, 73, 75, 116, 117, 138, 139, 142, 143, 140, 141, 144, 145, 146]
fund = [121, 122, 123]
index = [67, 68, 69, 70, 71, 72, 73, 75,76,77,105,106, 116, 117, 138, 139, 142, 143, 140, 141, 144, 145, 146]
fund = [121, 122, 123, 155, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 168, 169, 170, 171, 174, 175]
@autowired
......@@ -43,49 +54,51 @@ def predictionFromMoel(the_model, scaledX_forecast, predict_item, indexDict: dic
return prediction
def judgement(id, type, predict):
from datetime import datetime
predict_term = 21
start = parse_date(max_date) if max_date else prev_workday(datetime.today())
navs = []
if type == 'INDEX':
navs = get_index_list(index_ids=id, min_date=start, limit=predict_term)
navs = [nav['rid_close'] for nav in navs]
elif type == 'FUND':
navs = get_fund_list(fund_ids=id, min_date=start, limit=predict_term)
navs = [nav['rfn_nav_cal'] for nav in navs]
if len(navs) == predict_term:
upper = True if navs[-1] >= navs[0] else False
result = {}
for k, v, in predict.items():
pred = True if v[0] > 0 else False
if upper == pred:
result[k] = True
else:
result[k] = False
j = {
'id': id,
'date': format_date(start),
'result': result
}
with open('predict.txt', 'a+') as file:
file.write(json.dumps(j))
file.write('\n')
########################################
if __name__ == '__main__':
sync()
toForecast = False # False means test, True means forecast
toForecast = True # False means test, True means forecast
# define some parameters
win1W = 5 # 1 week
win1M = 21 # 1 Month
win1Q = 63 # 1 Quarter
numForecastDays = 21 # business days, 21 business days means one month
theThreshold = 0.0
indexDict = {
65: "CPI_YOY",
66: "FDTR",
67: "SPX",
68: "USGG10YR",
69: "USGG2YR",
70: "MXWO", # not use now
71: "MXWD", # not use now
72: "CCMP",
73: "TWSE", # not use now
74: "CPURNSA",
75: "VIX",
76: "US0001M",
77: "US0012M",
# FUND
121: "IEF_US",
122: "TLT_US",
123: "UUP_US",
139: "COI_TOTL",
138: "LEI_TOTL",
116: "MID",
134: "NAPMPMI",
142: "OE4EKLAC",
143: "OEA5KLAC",
146: "OECNKLAC",
145: "OEJPKLAC",
141: "OEOTGTAC",
144: "OEUSKLAC",
117: "SML",
140: "USRINDEX"
}
ids = set(PREDICT_LIST) | set(eco) | set(index) | set(fund)
infos = get_base_info(ids)
infos_type = {info['id']: info['type'] for info in infos}
indexDict = {info['id']: info['ticker'].replace(' Index', '').replace(' Equity', '').replace(' ', '_') for info in
infos}
###################
# Step 1: Prepare X and y (features and labels)
# 准备基础数据
......@@ -114,6 +127,11 @@ if __name__ == '__main__':
gbt_model = trainer.train_GBT(X_train, y_train, X_test, y_test)
svc_model = trainer.train_SVC(X_train, y_train, X_test, y_test)
ensemble_model = trainer.ensemble_model(rf_model, gbt_model, svc_model, X_train, y_train, X_test, y_test)
if (toForecast):
model_predict = {'forest': rf_model.predict(scaledX_forecast),
'gbt': gbt_model.predict(scaledX_forecast),
'svc': svc_model.predict(scaledX_forecast),
'ensemble': ensemble_model.predict(scaledX_forecast)}
print(f'预测结果:{model_predict}'.center(60, '+'))
judgement(pid, infos_type[pid], model_predict)
if toForecast:
predictionFromMoel(ensemble_model, scaledX_forecast, indexDict[pid], indexDict)
......@@ -2,7 +2,8 @@ from py_jftech import read, to_tuple, where
@read
def get_index_list(index_ids=None, min_date=None, max_date=None):
def get_index_list(index_ids=None, min_date=None, max_date=None, limit=None):
limit_sql = f'limit {limit}' if limit else ''
sqls = []
if min_date:
sqls.append(f"rid_date >= '{min_date}'")
......@@ -10,7 +11,7 @@ def get_index_list(index_ids=None, min_date=None, max_date=None):
sqls.append(f"rid_date <= '{max_date}'")
return f'''
select * from robo_index_datas
{where(*sqls, rid_index_id=to_tuple(index_ids))} order by rid_index_id, rid_date
{where(*sqls, rid_index_id=to_tuple(index_ids))} order by rid_index_id, rid_date {limit_sql}
'''
......@@ -27,7 +28,8 @@ def get_eco_list(eco_ids=None, min_date=None, max_date=None):
'''
@read
def get_fund_list(fund_ids=None, min_date=None, max_date=None):
def get_fund_list(fund_ids=None, min_date=None, max_date=None, limit=None):
limit_sql = f'limit {limit}' if limit else ''
sqls = []
if min_date:
sqls.append(f"rfn_date >= '{min_date}'")
......@@ -35,7 +37,7 @@ def get_fund_list(fund_ids=None, min_date=None, max_date=None):
sqls.append(f"rfn_date <= '{max_date}'")
return f'''
select * from robo_fund_navs
{where(*sqls, rfn_fund_id=to_tuple(fund_ids))} order by rfn_fund_id, rfn_date
{where(*sqls, rfn_fund_id=to_tuple(fund_ids))} order by rfn_fund_id, rfn_date {limit_sql}
'''
@read
......
......@@ -21,9 +21,16 @@ class DataAccess(ABC):
indexData = indexData[
["rid_index_id", "rid_date", "rid_high", "rid_open", "rid_low", "rid_close", "rid_pe", "rid_pb",
"rid_volume", "rid_frdpe", "rid_frdpes", "rid_pc"]]
# 数据替换和截取以后用105 和106,(都从2022.1.3开始)然后2022年以前的部分:105前边接76的数据 106前边接77的数据
condition1 = ((indexData['rid_index_id'] == 76) & (indexData['rid_date'] >= '2022-01-03'))
condition2 = ((indexData['rid_index_id'] == 77) & (indexData['rid_date'] >= '2022-01-03'))
condition3 = ((indexData['rid_index_id'] == 105) & (indexData['rid_date'] < '2022-01-03'))
condition4 = ((indexData['rid_index_id'] == 106) & (indexData['rid_date'] < '2022-01-03'))
indexData.drop(indexData[condition1 | condition2 | condition3 | condition4].index, inplace=True)
indexData.loc[indexData['rid_index_id'] == 76, 'rid_index_id'] = 105
indexData.loc[indexData['rid_index_id'] == 77, 'rid_index_id'] = 106
indexData.rename(columns={"rid_date": 'date'}, inplace=True) # please use 'date'
indexData["rid_index_id"] = indexData["rid_index_id"].map(self._indexDict)
indexData['rid_frdpe'].ffill(inplace=True)
return indexData
def get_eco_datas(self):
......@@ -45,16 +52,21 @@ class DataAccess(ABC):
def get_vix(self, indexData):
# VIX:芝加哥期权交易所SPX波动率指
vixData = indexData[indexData['rid_index_id'] == "VIX"].copy()
vixData = vixData[["date", "rid_high", "rid_open", "rid_low", "rid_close"]]
vixData = vixData[
["date", "rid_high", "rid_open", "rid_low", "rid_close", "rid_pc", "rid_pb", "rid_pe", "rid_frdpe",
"rid_frdpes"]]
vixData.rename(
columns={"rid_high": 'vix_high', 'rid_open': 'vix_open', "rid_low": 'vix_low', "rid_close": 'vix_close'},
columns={"rid_high": 'vix_high', 'rid_open': 'vix_open', "rid_low": 'vix_low', "rid_volume": 'vix_volume',
"rid_close": 'vix_close', "rid_pc": 'vix_pc', "rid_pb": 'vix_pb', "rid_pe": 'vix_pe',
"rid_frdpe": 'vix_frdpe', "rid_frdpes": 'vix_frdpes'},
inplace=True)
vixData.set_index('date', inplace=True)
vixData.index = pd.to_datetime(vixData.index)
vixData.dropna(axis=1, inplace=True)
return vixData
def get_other_index(self, indexData):
other_index = ["USGG10YR", "USGG2YR", "CCMP", "US0001M", "US0012M", "COI_TOTL", "LEI_TOTL", "MID",
other_index = ["USGG10YR", "USGG2YR", "CCMP", "TSFR1M", "TSFR12M", "COI_TOTL", "LEI_TOTL", "MID",
"OE4EKLAC", "OEA5KLAC", "OECNKLAC", "OEJPKLAC", "OEOTGTAC", "OEUSKLAC", "USRINDEX", "SPX"]
cols = ['date', 'rid_close', 'rid_pe', 'rid_pb', 'rid_volume', 'rid_frdpe', 'rid_frdpes', 'rid_pc']
indexOtherData = pd.DataFrame()
......
{"id": 67, "date": "2023-10-06", "result": {"forest": false, "gbt": true, "svc": true, "ensemble": true}}
{"id": 121, "date": "2023-10-06", "result": {"forest": true, "gbt": true, "svc": false, "ensemble": true}}
{"id": 122, "date": "2023-10-06", "result": {"forest": true, "gbt": true, "svc": false, "ensemble": true}}
{"id": 123, "date": "2023-10-06", "result": {"forest": false, "gbt": false, "svc": true, "ensemble": false}}
{"id": 67, "date": "2023-10-13", "result": {"forest": false, "gbt": true, "svc": true, "ensemble": true}}
{"id": 121, "date": "2023-10-13", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 122, "date": "2023-10-13", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 123, "date": "2023-10-13", "result": {"forest": false, "gbt": false, "svc": true, "ensemble": false}}
{"id": 67, "date": "2023-10-20", "result": {"forest": false, "gbt": false, "svc": true, "ensemble": false}}
{"id": 121, "date": "2023-10-20", "result": {"forest": false, "gbt": true, "svc": false, "ensemble": true}}
{"id": 122, "date": "2023-10-20", "result": {"forest": true, "gbt": true, "svc": false, "ensemble": true}}
{"id": 123, "date": "2023-10-20", "result": {"forest": false, "gbt": false, "svc": true, "ensemble": false}}
{"id": 67, "date": "2023-10-27", "result": {"forest": false, "gbt": true, "svc": true, "ensemble": true}}
{"id": 121, "date": "2023-10-27", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 122, "date": "2023-10-27", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 123, "date": "2023-10-27", "result": {"forest": false, "gbt": false, "svc": true, "ensemble": false}}
{"id": 67, "date": "2023-11-03", "result": {"forest": true, "gbt": true, "svc": true, "ensemble": true}}
{"id": 121, "date": "2023-11-03", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 122, "date": "2023-11-03", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 123, "date": "2023-11-03", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 67, "date": "2023-11-17", "result": {"forest": false, "gbt": false, "svc": true, "ensemble": false}}
{"id": 121, "date": "2023-11-17", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 122, "date": "2023-11-17", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 123, "date": "2023-11-17", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 67, "date": "2023-11-24", "result": {"forest": false, "gbt": false, "svc": true, "ensemble": false}}
{"id": 121, "date": "2023-11-24", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 122, "date": "2023-11-24", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 123, "date": "2023-11-24", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 67, "date": "2023-12-01", "result": {"forest": false, "gbt": false, "svc": true, "ensemble": false}}
{"id": 121, "date": "2023-12-01", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 122, "date": "2023-12-01", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 123, "date": "2023-12-01", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 67, "date": "2023-12-08", "result": {"forest": false, "gbt": false, "svc": true, "ensemble": false}}
{"id": 121, "date": "2023-12-08", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 122, "date": "2023-12-08", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 123, "date": "2023-12-08", "result": {"forest": false, "gbt": false, "svc": false, "ensemble": false}}
{"id": 67, "date": "2023-12-15", "result": {"forest": false, "gbt": false, "svc": true, "ensemble": false}}
{"id": 121, "date": "2023-12-15", "result": {"forest": true, "gbt": true, "svc": true, "ensemble": true}}
{"id": 122, "date": "2023-12-15", "result": {"forest": false, "gbt": false, "svc": true, "ensemble": false}}
{"id": 123, "date": "2023-12-15", "result": {"forest": true, "gbt": true, "svc": true, "ensemble": true}}
......@@ -87,7 +87,7 @@ class TrainingDataBuilder(ABC):
predictData.rename(
columns={"rid_high": 'high', 'rid_open': 'open', "rid_low": 'low', "rid_close": 'close',
'rid_volume': 'volume',
"rid_pe": "SPX_pe", "rid_pb": "SPX_pb"},
"rid_pe": f"{self._indexDict[pid]}_pe", "rid_pb": f"{self._indexDict[pid]}_pb"},
inplace=True)
elif pid in self._fund:
predictData = indexData[indexData['rfn_fund_id'] == self._indexDict[pid]].copy()
......@@ -147,6 +147,11 @@ class TrainingDataBuilder(ABC):
DataAll[col].bfill(inplace=True)
DataAll[col].ffill(inplace=True)
###### clean NaN
DataAll.ffill(inplace=True)
DataAll.dropna(inplace=True)
DataAll.reset_index(inplace=True, drop=True)
if (self._toForecast):
# 处理CPI_YOY:美国城镇消费物价指数同比未经季 CPURNSA:美国消费者物价指数未经季调
DataAllCopy = DataAll.copy()
......@@ -157,13 +162,10 @@ class TrainingDataBuilder(ABC):
DataAllCopy.drop(['futureR', 'yLabel'], axis=1, inplace=True)
forecastDayIndex = DataAllCopy.index[DataAllCopy['date'] == forecastDay]
forecastData = DataAllCopy.iloc[forecastDayIndex.to_list(), 1:]
forecastData.dropna(inplace=True, axis=1)
X_forecast = forecastData.to_numpy()
del DataAllCopy
###### clean NaN
DataAll.dropna(inplace=True)
DataAll.reset_index(inplace=True, drop=True)
###### get X and y
y = DataAll['yLabel'].to_numpy(copy=True)
......
......@@ -51,6 +51,7 @@ class SolveType(Enum):
INFEASIBLE = 0
MPT = 1
POEM = 2
RISK_PARITY = 3
@unique
......@@ -365,6 +366,15 @@ class Solver(ABC):
'''
pass
@abstractmethod
def solve_risk_parity(self):
'''
risk_parity计算
:return: 投组
'''
pass
@abstractmethod
def reset_navs(self, day):
'''
......
......@@ -42,7 +42,7 @@ class DefaultDatum(Datum):
if DatumType(datum['type']) is DatumType.FUND:
return {
**datum,
'inceptDate': parse_date(datum['inceptDate'])
'inceptDate': parse_date(datum.get('inceptDate')) if datum.get('inceptDate') else None
}
return datum
......
......@@ -177,7 +177,9 @@ class IndexSync(JDCDataSync):
return next_workday(last['date']) if last else self.start_date
def build_urls(self, datum, start_date, page=0) -> str:
return f'http://jdcprod.thiztech.com/api/datas/index-value?page={page}&size=200&sourceCode={quote(datum["bloombergTicker"])}&sourceType=BLOOMBERG&startDate={format_date(start_date)}'
sourceCode = quote(datum["bloombergTicker"]) if quote(datum["bloombergTicker"]) else quote(datum["thsTicker"])
sourceType = 'BLOOMBERG' if quote(datum["bloombergTicker"]) else 'THS'
return f'http://jdcprod.thiztech.com/api/datas/index-value?page={page}&size=200&sourceCode={sourceCode}&sourceType={sourceType}&startDate={format_date(start_date)}'
def store_date(self, datumid, datas: List[dict]):
# add frdpe,frdpes,erp,pc
......@@ -195,7 +197,8 @@ class IndexSync(JDCDataSync):
'frdpes': x['forwardEps'] if 'forwardEps' in x else None,
'erp': x['erp'] if 'erp' in x else None,
'pc': x['pcRatio'] if 'pcRatio' in x else None,
} for x in datas if is_workday(dt.fromtimestamp(x['date'] / 1000, tz=pytz.timezone('Asia/Shanghai'))) and 'close' in x]
} for x in datas if
is_workday(dt.fromtimestamp(x['date'] / 1000, tz=pytz.timezone('Asia/Shanghai'))) and 'close' in x]
if save_datas:
rid.batch_insert(save_datas)
......@@ -270,7 +273,6 @@ class FundNavSync(JDCDataSync):
} for x in response['body']['content'] if x['fundId'] in ft_tickers
}}
def store_date(self, datumid, datas: List[dict]):
save_navs = [{
'fund_id': datumid,
......
......@@ -40,7 +40,7 @@ py-jftech:
backtest: robo_executor.BacktestExecutor
datum: basic.datum.DefaultDatum
hold-report: portfolios.holder.DivHoldReportor
mpt: portfolios.builder.PoemARCPortfoliosBuilder
mpt: portfolios.builder.RiskParityARCPortfoliosBuilder
dividend-holder: portfolios.holder.InvTrustPortfoliosHolder
navs-sync: basic.sync.FundNavSync
email:
......@@ -115,7 +115,7 @@ portfolios: # 投组模块
checker: #投组检测模块
switch: on #是否开启检查
custom-type-priority: [3,2,1,4] # 检测优先级
month-fund-filter: {2:['TEMSCFA LX Equity'],12:['TEMHYAD LX Equity','TEMFIAI LX Equity']} # 根据月份删除某几档基金
month-fund-filter: {} # 根据月份删除某几档基金
reports: # 报告模块相关
navs:
type: FUND
......@@ -243,7 +243,7 @@ robo-executor: # 执行器相关
start-date: 2023-01-02 # 回测起始日期
end-date: 2023-10-31 # 回测截止日期
sealing-period: 10 #调仓封闭期
start-step: ${BACKTEST_START_STEP:1} # 回测从哪一步开始执行 1:计算资产池;2:计算最优投组:3:计算再平衡信号以及持仓投组
start-step: ${BACKTEST_START_STEP:2} # 回测从哪一步开始执行 1:计算资产池;2:计算最优投组:3:计算再平衡信号以及持仓投组
end-step: ${BACKTEST_END_STEP:3} # 回测从哪一步执行完成后结束执行 1:计算资产池;2:计算最优投组:3:计算再平衡信号以及持仓投组
clean-up: on
real: # 实盘执行器
......
......@@ -121,7 +121,7 @@ class MptARCPortfoliosBuilder(MptPortfoliosBuilder):
try:
portfolio = rmp.get_one(day, type, risk)
if not portfolio:
result, detail = self.build_portfolio(day, type)
result = self.build_portfolio(day, type)
for build_risk, datas in result.items():
datas['portfolio'] = self._checker.check(day, json.loads(datas['portfolio']))
try:
......@@ -141,7 +141,9 @@ class MptARCPortfoliosBuilder(MptPortfoliosBuilder):
return {int(x[0]): x[1] for x in result.items()}
return None
except Exception as e:
logger.exception(f"build protfolio of type[{type.name}] and risk[{risk.name}] with date[{format_date(day)}] failure.", exc_info=e)
logger.exception(
f"build protfolio of type[{type.name}] and risk[{risk.name}] with date[{format_date(day)}] failure.",
exc_info=e)
raise e
def build_portfolio(self, day, type: PortfoliosType):
......@@ -180,6 +182,7 @@ class MptARCPortfoliosBuilder(MptPortfoliosBuilder):
@component(bean_name='mpt')
class PoemARCPortfoliosBuilder(MptARCPortfoliosBuilder):
def build_portfolio(self, day, type: PortfoliosType):
result, detail = super(PoemARCPortfoliosBuilder, self).build_portfolio(day, type)
risk = PortfoliosRisk.FT3
......@@ -200,3 +203,24 @@ class PoemARCPortfoliosBuilder(MptARCPortfoliosBuilder):
}
detail[risk]['mpt_cvar'] = mpt_cvar
return result, detail
@component(bean_name='mpt')
class RiskParityARCPortfoliosBuilder(MptPortfoliosBuilder):
def build_portfolio(self, day, type: PortfoliosType):
result = {}
risk = PortfoliosRisk.FT3
logger.info(
f"start to build protfolio of type[{type.name}] and risk[{risk.name}] with date[{format_date(day)}]")
solver = self._factory.create_solver(risk, type)
solver.reset_navs(day)
portfolio = solver.solve_risk_parity()
result[risk] = {
'solve': SolveType.RISK_PARITY,
'portfolio': json.dumps(portfolio),
} if portfolio else {
'solve': SolveType.INFEASIBLE
}
return result
......@@ -81,6 +81,10 @@ class DefaultSolver(Solver):
rtn = (self.navs / self.navs.shift(1) - 1)[1:]
return rtn.cov() * 252
@property
def risk_parity_sigma(self):
return self.navs.cov()
@property
def rtn_history(self):
result = self.rtn_matrix * 12
......@@ -188,6 +192,14 @@ class DefaultSolver(Solver):
self.debug_solve_result(model)
return self.calc_port_weight(model), self.calc_port_cvar(model)
def solve_risk_parity(self):
model = self.create_model()
model.objective = Objective(expr=sum(
[(model.z[i] * model.w[i] * (self.risk_parity_sigma.iloc[i] @ model.w) - model.z[j] * model.w[j] * (self.risk_parity_sigma.iloc[j] @ model.w)) ** 2
for i in model.indices for j in model.indices]), sense=minimize)
self._solver.solve(model)
return self.calc_port_weight(model)
def calc_port_weight(self, model):
id_list = self.navs.columns
weight_list = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment