Commit ee438e4e authored by wenwen.tang's avatar wenwen.tang 😕

增加预测模型,变为5档预测

parent c7526068
import json
from typing import List
import numpy as np
from py_jftech import autowired, parse_date, prev_workday, format_date
from ai.config import LABEL_RANGE, LABEL_TAG
from ai.dao import robo_predict
from ai.dao.robo_datas import get_base_info, get_index_list, get_fund_list
from ai.data_access import DataAccess
from ai.model_trainer import ModelTrainer
......@@ -37,22 +40,21 @@ def sync(syncs: List[DataSync] = None):
s.do_sync()
def predictionFromMoel(the_model, scaledX_forecast, predict_item, indexDict: dict):
prediction = the_model.predict(scaledX_forecast)
predictionStr = 'DOWN'
if (prediction > 0.5):
predictionStr = 'UP'
def report_prediction(label, predict_item, indexDict: dict):
prediction = label
predictionStr = LABEL_TAG.get(prediction)
content = f"""\n On day {forecastDay.strftime("%m/%d/%Y")}, the model predicts {predict_item} to be {predictionStr} in {str(numForecastDays)} business days. \n"""
print(content)
# 上传预测结果
key = [k for k, v in indexDict.items() if v == predict_item]
key = [k for k, v in indexDict.items() if v == predict_item][0]
index_info = get_base_info(key)[0]
if uploadData:
from ai.noticer import upload_predict
upload_predict(index_info['ticker'], forecastDay, predictionStr)
if doReport:
from ai.reporter import do_reporter
do_reporter()
if len(LABEL_RANGE) > 2:
data = {"rbd_id": key, "date": forecastDay, "predict": prediction}
robo_predict.insert(data)
else:
from ai.noticer import upload_predict
upload_predict(index_info['ticker'], forecastDay, predictionStr)
send(content)
return prediction
......@@ -142,4 +144,17 @@ if __name__ == '__main__':
print(f'预测结果:{model_predict}'.center(60, '+'))
judgement(pid, infos_type[pid], model_predict)
if toForecast:
predictionFromMoel(ensemble_model, scaledX_forecast, indexDict[pid], indexDict)
if len(LABEL_RANGE) > 2:
average = round(np.mean(list(model_predict.values())))
report_prediction(average, indexDict[pid], indexDict)
else:
report_prediction(ensemble_model.predict(scaledX_forecast), indexDict[pid], indexDict)
if doReport:
if len(LABEL_RANGE) > 2:
from ai.reporter import do_reporter2
do_reporter2()
else:
from ai.reporter import do_reporter
do_reporter()
# 预测标签
from math import inf
LABEL_RANGE = {2: [0.05, inf], 1: [0.02, 0.05], 0: [-0.02, 0.02], -1: [-0.05, -0.02], -2: [-inf, -0.05]}
LABEL_TAG = {2: 'UPUP', 1: 'UP', 0: 'NEUTRAL', -1: 'DOWN', -2: 'DOWNDOWN'}
......@@ -27,6 +27,7 @@ def get_eco_list(eco_ids=None, min_date=None, max_date=None):
{where(*sqls, red_eco_id=to_tuple(eco_ids))} order by red_eco_id, red_date
'''
@read
def get_fund_list(fund_ids=None, min_date=None, max_date=None, limit=None):
limit_sql = f'limit {limit}' if limit else ''
......@@ -40,10 +41,11 @@ def get_fund_list(fund_ids=None, min_date=None, max_date=None, limit=None):
{where(*sqls, rfn_fund_id=to_tuple(fund_ids))} order by rfn_fund_id, rfn_date {limit_sql}
'''
@read
def get_base_info(ids=None):
sqls = []
return f"""
SELECT rbd_id id,v_rbd_bloomberg_ticker ticker,v_rbd_type type, rbd_datas datas FROM `robo_base_datum`
{where(*sqls,rbd_id=to_tuple(ids))}
"""
\ No newline at end of file
{where(*sqls, rbd_id=to_tuple(ids))}
"""
from py_jftech import read, write, format_date, to_tuple, where, mapper_columns
__COLUMNS__ = {
'rp_rbd_id': 'rbd_id',
'rp_date': 'date',
'rp_predict': 'predict',
'rp_remark': 'remark',
'rp_create_time': 'create_time'
}
@write
def insert(datas):
datas = mapper_columns(datas=datas, columns=__COLUMNS__)
return f'''
replace into robo_predict({','.join([x for x in datas.keys()])})
values ({','.join([f"'{x[1]}'" for x in datas.items()])})
'''
@read
def get_list(index_ids: object = None, min_date: object = None, max_date: object = None) -> object:
sqls = []
if min_date:
sqls.append(f"rp_date >= '{format_date(min_date)}'")
if max_date:
sqls.append(f"rp_date <= '{format_date(max_date)}'")
return f'''
select {','.join([f"{x[0]} as {x[1]}" for x in __COLUMNS__.items()])} from robo_predict
{where(*sqls, rid_index_id=to_tuple(index_ids))} order by rp_rbd_id, rp_date
'''
CREATE TABLE IF NOT EXISTS robo_predict (
`rp_rbd_id` bigint(20) NOT NULL,
`rp_date` datetime NOT NULL,
`rp_predict` int(11) NOT NULL,
`rp_remark` json NULL,
`rp_create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
`rp_update_time` datetime NULL DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP,
INDEX `rp_rbd_id`(`rp_rbd_id`, `rp_date`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
\ No newline at end of file
......@@ -8,6 +8,8 @@ from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from ai.config import LABEL_RANGE
class ModelTrainer(ABC):
"""
......@@ -25,7 +27,8 @@ class ModelTrainer(ABC):
print(strMethod + " ====== test results ======")
y_pred = classifier.predict(X_test)
result0 = confusion_matrix(y_test, y_pred, labels=[0, 1])
labels = list(LABEL_RANGE.keys())[::-1]
result0 = confusion_matrix(y_test, y_pred, labels=labels)
print(strMethod + " Confusion Matrix:")
print(result0)
......@@ -35,7 +38,7 @@ class ModelTrainer(ABC):
result2 = accuracy_score(y_test, y_pred)
print(strMethod + " Accuracy:", result2)
cm_display = ConfusionMatrixDisplay(confusion_matrix=result0, display_labels=['Down', 'Up'])
cm_display = ConfusionMatrixDisplay(confusion_matrix=result0, display_labels=labels)
cm_display.plot()
plt.title(strMethod + ' Accuracy: ' + f'{result2:.0%}')
plt.show()
......
......@@ -25,4 +25,5 @@ def upload_predict(ticker, predictDate, predict):
headers = {"X-AUTH-token": "rt7297LwQvyAYTke2iD8Vg"}
response = requests.post(url=f'{jrp_domain}/ai/predict', json=predict_data, headers=headers)
if response.status_code != 200:
print(response.text)
print("上传ai预测结果失败,请重试")
......@@ -4,6 +4,10 @@ import json
import pandas as pd
import requests
from ai.config import LABEL_TAG
from ai.dao import robo_predict
from ai.dao.robo_datas import get_base_info
symbols = ['ACWI', 'EWJ', 'MCHI', 'EEM', 'BKF', 'INDA', 'AAXJ', 'VGK', 'QQQ', 'SPY', 'SPX', 'IWN',
'IUSG', 'IWD', 'DON', 'GDX', 'TOLZ', 'XLU', 'XBI', 'ESGD', 'IGE', 'EMLC', 'IGAA',
'LQD', 'HYG', 'SHY', 'IEI', 'IEF', 'GLD', 'IYR', 'UUP', 'CEW', 'TLT']
......@@ -29,5 +33,24 @@ def do_reporter(start='2023-10-01', end=datetime.date.today()):
pf.to_excel("Forcast_Report.xlsx", index=False)
def do_reporter2():
datas = []
index_info = get_base_info()
info = {x['id']: x for x in index_info}
symbol_index_dict = {symbol: index for index, symbol in enumerate(symbols)}
records = robo_predict.get_list()
for item in records:
data = {
'Forcast On Date': item['date'],
'Ticker': info.get(item['rbd_id'])['ticker'].replace(' Index', '').replace(' Equity', ''),
'In 21 business days': LABEL_TAG.get(item['predict']),
'Ticker Name': json.loads(info.get(item['rbd_id'])['datas'])['chineseName'],
}
datas.append(data)
sorted_data = sorted(datas, key=lambda x: symbol_index_dict[x['Ticker'].split(' ')[0]])
pf = pd.DataFrame(sorted_data)
pf.to_excel("Forcast_Report.xlsx", index=False)
if __name__ == '__main__':
do_reporter()
do_reporter2()
......@@ -6,6 +6,8 @@ from finta import TA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from ai.config import LABEL_RANGE
def imp():
print(TA)
......@@ -73,6 +75,10 @@ class TrainingDataBuilder(ABC):
return data
def build_predict_data(self, indexData, pid):
def map_to_label(ret):
for label, (lower, upper) in LABEL_RANGE.items():
if float(lower) <= ret < float(upper):
return label
"""
@param pid: 需要预测的指数或基金id
@return:
......@@ -114,8 +120,7 @@ class TrainingDataBuilder(ABC):
# The following uses future info for the y label, to be deleted later
predictData['futureR'] = np.log(predictData['close'].shift(-self._numForecastDays) / predictData['close'])
# predictData = predictData[predictData['futureR'].notna()]
predictData['yLabel'] = (predictData['futureR'] >= self._theThreshold).astype(int)
spxDataCloseSave = predictData[['date', 'close']]
predictData['yLabel'] = predictData['futureR'].apply(lambda r: map_to_label(r))
del (predictData['close'])
return predictData
......@@ -178,7 +183,8 @@ class TrainingDataBuilder(ABC):
###################
# scale data
scaler = MinMaxScaler(feature_range=(0, 1))
labels = list(LABEL_RANGE.keys())
scaler = MinMaxScaler(feature_range=(labels[-1], labels[0]))
# scaledX = scaler.fit_transform(X)
DataScaler = scaler.fit(X)
scaledX = DataScaler.transform(X)
......
......@@ -248,8 +248,8 @@ reports: # 报告模块相关
subject: "SVROBO6-实盘版-每日监测_{today}"
content: "Dear All: 附件是今天生成的监测数据,請驗收,謝謝! 注>:該郵件為自動發送,如有問題請聯繫矽谷團隊 telan_qian@chifufund.com"
robo-executor: # 执行器相关
use: ${ROBO_EXECUTOR:backtest} # 执行哪个执行器,优先取系统环境变量ROBO_EXECUTOR的值,默认backtest
sync-data: ${SYNC_DATA:off} # 是否开启同步资料数据
use: ${ROBO_EXECUTOR:real} # 执行哪个执行器,优先取系统环境变量ROBO_EXECUTOR的值,默认backtest
sync-data: ${SYNC_DATA:on} # 是否开启同步资料数据
backtest: # 回测执行器相关
start-date: 2024-03-02 # 回测起始日期
end-date: 2024-04-11 # 回测截止日期
......@@ -258,7 +258,7 @@ robo-executor: # 执行器相关
end-step: ${BACKTEST_END_STEP:3} # 回测从哪一步执行完成后结束执行 1:计算资产池;2:计算最优投组:3:计算再平衡信号以及持仓投组
clean-up: on
real: # 实盘执行器
export: ${EXPORT_ENABLE:off} # 是否开启报告
export: ${EXPORT_ENABLE:on} # 是否开启报告
start-date: 2023-05-08 # 实盘开始时间
include-date: []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment