Commit 36280db7 authored by 吕先亚's avatar 吕先亚

ai 通用性改动

parent 80567726
......@@ -16,7 +16,7 @@ from api import DataSync
# 截止日期
max_date = None
toForecast = True # False means test, True means forecast
toForecast = False # False means test, True means forecast
syncData = False # 开启会同步数据库指数及基金数据
uploadData = False # 开启会上传预测结果
doReport = False # 开启会生成Excel报告
......@@ -135,15 +135,15 @@ if __name__ == '__main__':
ada_model = trainer.train_AdaBoost(X_train, y_train, X_test, y_test)
ensemble_model = trainer.ensemble_model(rf_model, gbt_model, svc_model,
knn_model, ada_model, X_train, y_train, X_test, y_test)
model_predict = {'forest': rf_model.predict(scaledX_forecast),
'gbt': gbt_model.predict(scaledX_forecast),
'svc': svc_model.predict(scaledX_forecast),
'knn': knn_model.predict(scaledX_forecast),
'adaboost': ada_model.predict(scaledX_forecast),
'ensemble': ensemble_model.predict(scaledX_forecast)}
print(f'预测结果:{model_predict}'.center(60, '+'))
judgement(pid, infos_type[pid], model_predict)
if toForecast:
model_predict = {'forest': rf_model.predict(scaledX_forecast),
'gbt': gbt_model.predict(scaledX_forecast),
'svc': svc_model.predict(scaledX_forecast),
'knn': knn_model.predict(scaledX_forecast),
'adaboost': ada_model.predict(scaledX_forecast),
'ensemble': ensemble_model.predict(scaledX_forecast)}
print(f'预测结果:{model_predict}'.center(60, '+'))
judgement(pid, infos_type[pid], model_predict)
if len(LABEL_RANGE) > 2:
average = round(np.mean(list(model_predict.values())))
report_prediction(average, indexDict[pid], indexDict)
......
......@@ -13,7 +13,7 @@ __COLUMNS__ = {
def insert(datas):
datas = mapper_columns(datas=datas, columns=__COLUMNS__)
return f'''
replace into robo_predict({','.join([x for x in datas.keys()])})
replace into robo_predict_chu({','.join([x for x in datas.keys()])})
values ({','.join([f"'{x[1]}'" for x in datas.items()])})
'''
......@@ -26,7 +26,6 @@ def get_list(index_ids: object = None, min_date: object = None, max_date: object
if max_date:
sqls.append(f"rp_date <= '{format_date(max_date)}'")
return f'''
select {','.join([f"{x[0]} as {x[1]}" for x in __COLUMNS__.items()])} from robo_predict
select {','.join([f"{x[0]} as {x[1]}" for x in __COLUMNS__.items()])} from robo_predict_chu
{where(*sqls, rid_index_id=to_tuple(index_ids))} order by rp_rbd_id, rp_date
'''
......@@ -29,19 +29,17 @@ class ModelTrainer(ABC):
labels = list(LABEL_RANGE.keys())[::-1]
result0 = confusion_matrix(y_test, y_pred, labels=labels)
print(strMethod + " Confusion Matrix:")
print(result0)
print(strMethod + " Confusion Matrix:", result0)
result1 = classification_report(y_test, y_pred, zero_division=1.0)
print(strMethod + " Classification Report:")
print(result1)
print(strMethod + " Classification Report:", result1)
result2 = accuracy_score(y_test, y_pred)
print(strMethod + " Accuracy:", result2)
cm_display = ConfusionMatrixDisplay(confusion_matrix=result0, display_labels=labels)
cm_display.plot()
plt.title(strMethod + ' Accuracy: ' + f'{result2:.0%}')
plt.show()
# cm_display = ConfusionMatrixDisplay(confusion_matrix=result0, display_labels=labels)
# cm_display.plot()
# plt.title(strMethod + ' Accuracy: ' + f'{result2:.0%}')
# plt.show()
def train_random_forest(self, X_train, y_train, X_test, y_test):
classifier = RandomForestClassifier()
......
import datetime
import json
import random
import pandas as pd
import requests
from py_jftech import format_date
from ai.EstimateMarketTrendV20 import sync
from ai.config import LABEL_TAG, LABEL_RANGE
from ai.dao import robo_predict
from ai.dao.robo_datas import get_base_info, get_index_list, get_fund_list
......@@ -42,16 +42,23 @@ def do_reporter2():
symbol_index_dict = {symbol: index for index, symbol in enumerate(symbols)}
records = robo_predict.get_list()
for item in records:
result, rtn, real = is_right(item['rbd_id'], info.get(item['rbd_id'])['type'], item['date'], item['predict'])
labels = list(LABEL_RANGE.keys())[::-1]
data = {
'Forcast On Date': item['date'],
'Ticker': info.get(item['rbd_id'])['ticker'].replace(' Index', '').replace(' Equity', ''),
'In 21 business days': LABEL_TAG.get(item['predict']),
'real outcome label': LABEL_TAG.get(real),
'random variable label': LABEL_TAG.get(random.randint(labels[0], labels[-1])),
'Ticker Name': json.loads(info.get(item['rbd_id'])['datas'])['chineseName'],
'Run On Time': item['create_time'],
'return in 21 business days': rtn,
'result': result
}
datas.append(data)
sorted_data = sorted(datas, key=lambda x: symbol_index_dict[x['Ticker'].split(' ')[0]])
pf = pd.DataFrame(sorted_data)
pf.to_excel("Forcast_Report.xlsx", index=False)
pf.to_excel("Forcast_Report_chu.xlsx", index=False)
def map_to_label(ret):
......@@ -61,7 +68,6 @@ def map_to_label(ret):
def is_right(id, type, start, predict):
from datetime import datetime
predict_term = 21
navs = []
if type == 'INDEX':
......@@ -74,30 +80,10 @@ def is_right(id, type, start, predict):
rtn = navs[-1] / navs[0] - 1
real = map_to_label(rtn)
result = True if predict == real or (rtn < 0 and predict < 0) or (rtn > 0 and predict > 0) else False
return result
return None
def statistics():
datas = []
index_info = get_base_info()
info = {x['id']: x for x in index_info}
records = robo_predict.get_list()
for item in records:
result = is_right(item['rbd_id'], info.get(item['rbd_id'])['type'], item['date'], item['predict'])
data = {
'Forcast On Date': format_date(item['date']),
'Ticker': info.get(item['rbd_id'])['ticker'].replace(' Index', '').replace(' Equity', ''),
'In 21 business days': LABEL_TAG.get(item['predict']),
'Ticker Name': json.loads(info.get(item['rbd_id'])['datas'])['chineseName'],
'result': result
}
datas.append(data)
pf = pd.DataFrame(datas)
pf.to_excel("result.xlsx", index=False)
return result, round(rtn * 100, 2), real
return None, None, None
if __name__ == '__main__':
do_reporter2()
# sync()
# statistics()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment