Commit 36280db7 authored by 吕先亚's avatar 吕先亚

ai 通用性改动

parent 80567726
...@@ -16,7 +16,7 @@ from api import DataSync ...@@ -16,7 +16,7 @@ from api import DataSync
# 截止日期 # 截止日期
max_date = None max_date = None
toForecast = True # False means test, True means forecast toForecast = False # False means test, True means forecast
syncData = False # 开启会同步数据库指数及基金数据 syncData = False # 开启会同步数据库指数及基金数据
uploadData = False # 开启会上传预测结果 uploadData = False # 开启会上传预测结果
doReport = False # 开启会生成Excel报告 doReport = False # 开启会生成Excel报告
...@@ -135,15 +135,15 @@ if __name__ == '__main__': ...@@ -135,15 +135,15 @@ if __name__ == '__main__':
ada_model = trainer.train_AdaBoost(X_train, y_train, X_test, y_test) ada_model = trainer.train_AdaBoost(X_train, y_train, X_test, y_test)
ensemble_model = trainer.ensemble_model(rf_model, gbt_model, svc_model, ensemble_model = trainer.ensemble_model(rf_model, gbt_model, svc_model,
knn_model, ada_model, X_train, y_train, X_test, y_test) knn_model, ada_model, X_train, y_train, X_test, y_test)
model_predict = {'forest': rf_model.predict(scaledX_forecast),
'gbt': gbt_model.predict(scaledX_forecast),
'svc': svc_model.predict(scaledX_forecast),
'knn': knn_model.predict(scaledX_forecast),
'adaboost': ada_model.predict(scaledX_forecast),
'ensemble': ensemble_model.predict(scaledX_forecast)}
print(f'预测结果:{model_predict}'.center(60, '+'))
judgement(pid, infos_type[pid], model_predict)
if toForecast: if toForecast:
model_predict = {'forest': rf_model.predict(scaledX_forecast),
'gbt': gbt_model.predict(scaledX_forecast),
'svc': svc_model.predict(scaledX_forecast),
'knn': knn_model.predict(scaledX_forecast),
'adaboost': ada_model.predict(scaledX_forecast),
'ensemble': ensemble_model.predict(scaledX_forecast)}
print(f'预测结果:{model_predict}'.center(60, '+'))
judgement(pid, infos_type[pid], model_predict)
if len(LABEL_RANGE) > 2: if len(LABEL_RANGE) > 2:
average = round(np.mean(list(model_predict.values()))) average = round(np.mean(list(model_predict.values())))
report_prediction(average, indexDict[pid], indexDict) report_prediction(average, indexDict[pid], indexDict)
......
...@@ -13,7 +13,7 @@ __COLUMNS__ = { ...@@ -13,7 +13,7 @@ __COLUMNS__ = {
def insert(datas): def insert(datas):
datas = mapper_columns(datas=datas, columns=__COLUMNS__) datas = mapper_columns(datas=datas, columns=__COLUMNS__)
return f''' return f'''
replace into robo_predict({','.join([x for x in datas.keys()])}) replace into robo_predict_chu({','.join([x for x in datas.keys()])})
values ({','.join([f"'{x[1]}'" for x in datas.items()])}) values ({','.join([f"'{x[1]}'" for x in datas.items()])})
''' '''
...@@ -26,7 +26,6 @@ def get_list(index_ids: object = None, min_date: object = None, max_date: object ...@@ -26,7 +26,6 @@ def get_list(index_ids: object = None, min_date: object = None, max_date: object
if max_date: if max_date:
sqls.append(f"rp_date <= '{format_date(max_date)}'") sqls.append(f"rp_date <= '{format_date(max_date)}'")
return f''' return f'''
select {','.join([f"{x[0]} as {x[1]}" for x in __COLUMNS__.items()])} from robo_predict select {','.join([f"{x[0]} as {x[1]}" for x in __COLUMNS__.items()])} from robo_predict_chu
{where(*sqls, rid_index_id=to_tuple(index_ids))} order by rp_rbd_id, rp_date {where(*sqls, rid_index_id=to_tuple(index_ids))} order by rp_rbd_id, rp_date
''' '''
...@@ -29,19 +29,17 @@ class ModelTrainer(ABC): ...@@ -29,19 +29,17 @@ class ModelTrainer(ABC):
labels = list(LABEL_RANGE.keys())[::-1] labels = list(LABEL_RANGE.keys())[::-1]
result0 = confusion_matrix(y_test, y_pred, labels=labels) result0 = confusion_matrix(y_test, y_pred, labels=labels)
print(strMethod + " Confusion Matrix:") print(strMethod + " Confusion Matrix:", result0)
print(result0)
result1 = classification_report(y_test, y_pred, zero_division=1.0) result1 = classification_report(y_test, y_pred, zero_division=1.0)
print(strMethod + " Classification Report:") print(strMethod + " Classification Report:", result1)
print(result1)
result2 = accuracy_score(y_test, y_pred) result2 = accuracy_score(y_test, y_pred)
print(strMethod + " Accuracy:", result2) print(strMethod + " Accuracy:", result2)
cm_display = ConfusionMatrixDisplay(confusion_matrix=result0, display_labels=labels) # cm_display = ConfusionMatrixDisplay(confusion_matrix=result0, display_labels=labels)
cm_display.plot() # cm_display.plot()
plt.title(strMethod + ' Accuracy: ' + f'{result2:.0%}') # plt.title(strMethod + ' Accuracy: ' + f'{result2:.0%}')
plt.show() # plt.show()
def train_random_forest(self, X_train, y_train, X_test, y_test): def train_random_forest(self, X_train, y_train, X_test, y_test):
classifier = RandomForestClassifier() classifier = RandomForestClassifier()
......
import datetime import datetime
import json import json
import random
import pandas as pd import pandas as pd
import requests import requests
from py_jftech import format_date from py_jftech import format_date
from ai.EstimateMarketTrendV20 import sync
from ai.config import LABEL_TAG, LABEL_RANGE from ai.config import LABEL_TAG, LABEL_RANGE
from ai.dao import robo_predict from ai.dao import robo_predict
from ai.dao.robo_datas import get_base_info, get_index_list, get_fund_list from ai.dao.robo_datas import get_base_info, get_index_list, get_fund_list
...@@ -42,16 +42,23 @@ def do_reporter2(): ...@@ -42,16 +42,23 @@ def do_reporter2():
symbol_index_dict = {symbol: index for index, symbol in enumerate(symbols)} symbol_index_dict = {symbol: index for index, symbol in enumerate(symbols)}
records = robo_predict.get_list() records = robo_predict.get_list()
for item in records: for item in records:
result, rtn, real = is_right(item['rbd_id'], info.get(item['rbd_id'])['type'], item['date'], item['predict'])
labels = list(LABEL_RANGE.keys())[::-1]
data = { data = {
'Forcast On Date': item['date'], 'Forcast On Date': item['date'],
'Ticker': info.get(item['rbd_id'])['ticker'].replace(' Index', '').replace(' Equity', ''), 'Ticker': info.get(item['rbd_id'])['ticker'].replace(' Index', '').replace(' Equity', ''),
'In 21 business days': LABEL_TAG.get(item['predict']), 'In 21 business days': LABEL_TAG.get(item['predict']),
'real outcome label': LABEL_TAG.get(real),
'random variable label': LABEL_TAG.get(random.randint(labels[0], labels[-1])),
'Ticker Name': json.loads(info.get(item['rbd_id'])['datas'])['chineseName'], 'Ticker Name': json.loads(info.get(item['rbd_id'])['datas'])['chineseName'],
'Run On Time': item['create_time'],
'return in 21 business days': rtn,
'result': result
} }
datas.append(data) datas.append(data)
sorted_data = sorted(datas, key=lambda x: symbol_index_dict[x['Ticker'].split(' ')[0]]) sorted_data = sorted(datas, key=lambda x: symbol_index_dict[x['Ticker'].split(' ')[0]])
pf = pd.DataFrame(sorted_data) pf = pd.DataFrame(sorted_data)
pf.to_excel("Forcast_Report.xlsx", index=False) pf.to_excel("Forcast_Report_chu.xlsx", index=False)
def map_to_label(ret): def map_to_label(ret):
...@@ -61,7 +68,6 @@ def map_to_label(ret): ...@@ -61,7 +68,6 @@ def map_to_label(ret):
def is_right(id, type, start, predict): def is_right(id, type, start, predict):
from datetime import datetime
predict_term = 21 predict_term = 21
navs = [] navs = []
if type == 'INDEX': if type == 'INDEX':
...@@ -74,30 +80,10 @@ def is_right(id, type, start, predict): ...@@ -74,30 +80,10 @@ def is_right(id, type, start, predict):
rtn = navs[-1] / navs[0] - 1 rtn = navs[-1] / navs[0] - 1
real = map_to_label(rtn) real = map_to_label(rtn)
result = True if predict == real or (rtn < 0 and predict < 0) or (rtn > 0 and predict > 0) else False result = True if predict == real or (rtn < 0 and predict < 0) or (rtn > 0 and predict > 0) else False
return result return result, round(rtn * 100, 2), real
return None return None, None, None
def statistics():
datas = []
index_info = get_base_info()
info = {x['id']: x for x in index_info}
records = robo_predict.get_list()
for item in records:
result = is_right(item['rbd_id'], info.get(item['rbd_id'])['type'], item['date'], item['predict'])
data = {
'Forcast On Date': format_date(item['date']),
'Ticker': info.get(item['rbd_id'])['ticker'].replace(' Index', '').replace(' Equity', ''),
'In 21 business days': LABEL_TAG.get(item['predict']),
'Ticker Name': json.loads(info.get(item['rbd_id'])['datas'])['chineseName'],
'result': result
}
datas.append(data)
pf = pd.DataFrame(datas)
pf.to_excel("result.xlsx", index=False)
if __name__ == '__main__': if __name__ == '__main__':
do_reporter2() do_reporter2()
# sync() # sync()
# statistics()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment