Commit 0074a743 authored by 吕先亚's avatar 吕先亚

ai 通用性改动

parent d80afae8
...@@ -16,10 +16,10 @@ from api import DataSync ...@@ -16,10 +16,10 @@ from api import DataSync
# 截止日期 # 截止日期
max_date = None max_date = None
toForecast = False # False means test, True means forecast toForecast = True # False means test, True means forecast
syncData = False # 开启会同步数据库指数及基金数据 syncData = False # 开启会同步数据库指数及基金数据
uploadData = False # 开启会上传预测结果 uploadData = False # 开启会上传预测结果
doReport = False # 开启会生成Excel报告 doReport = True # 开启会生成Excel报告
# 待预测指数 # 待预测指数
PREDICT_LIST = [156] PREDICT_LIST = [156]
...@@ -125,16 +125,16 @@ if __name__ == '__main__': ...@@ -125,16 +125,16 @@ if __name__ == '__main__':
for pid in PREDICT_LIST: for pid in PREDICT_LIST:
print(f'{indexDict[pid]} start '.center(50, '=')) print(f'{indexDict[pid]} start '.center(50, '='))
t_data = indexData if pid in index else fundData t_data = indexData if pid in index else fundData
X_train, X_test, y_train, y_test, scaledX_forecast, forecastDay = \ X_train, X_test, y_train, y_test, scaledX_forecast, forecastDay, date_index = \
builder.build_train_test(pid, t_data, vixData, indexOtherData, cpiData, FDTRData, NAPMPMIData,TTM) builder.build_train_test(pid, t_data, vixData, indexOtherData, cpiData, FDTRData, NAPMPMIData, TTM)
trainer = ModelTrainer(toForecast) trainer = ModelTrainer(toForecast, pid)
rf_model = trainer.train_random_forest(X_train, y_train, X_test, y_test) rf_model = trainer.train_random_forest(X_train, y_train, X_test, y_test, date_index)
gbt_model = trainer.train_GBT(X_train, y_train, X_test, y_test) gbt_model = trainer.train_GBT(X_train, y_train, X_test, y_test, date_index)
svc_model = trainer.train_SVC(X_train, y_train, X_test, y_test) svc_model = trainer.train_SVC(X_train, y_train, X_test, y_test, date_index)
knn_model = trainer.train_nearest_neighbors(X_train, y_train, X_test, y_test) knn_model = trainer.train_nearest_neighbors(X_train, y_train, X_test, y_test, date_index)
ada_model = trainer.train_AdaBoost(X_train, y_train, X_test, y_test) ada_model = trainer.train_AdaBoost(X_train, y_train, X_test, y_test, date_index)
ensemble_model = trainer.ensemble_model(rf_model, gbt_model, svc_model, ensemble_model = trainer.ensemble_model(rf_model, gbt_model, svc_model,
knn_model, ada_model, X_train, y_train, X_test, y_test) knn_model, ada_model, X_train, y_train, X_test, y_test, date_index)
if toForecast: if toForecast:
model_predict = {'forest': rf_model.predict(scaledX_forecast), model_predict = {'forest': rf_model.predict(scaledX_forecast),
'gbt': gbt_model.predict(scaledX_forecast), 'gbt': gbt_model.predict(scaledX_forecast),
......
from abc import ABC from abc import ABC
from datetime import datetime
from lightgbm import LGBMClassifier from lightgbm import LGBMClassifier
from sklearn import svm from sklearn import svm
...@@ -7,6 +8,7 @@ from sklearn.ensemble import RandomForestClassifier, VotingClassifier ...@@ -7,6 +8,7 @@ from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.neighbors import KNeighborsClassifier from sklearn.neighbors import KNeighborsClassifier
from ai import reporter
from ai.config import LABEL_RANGE from ai.config import LABEL_RANGE
...@@ -15,14 +17,15 @@ class ModelTrainer(ABC): ...@@ -15,14 +17,15 @@ class ModelTrainer(ABC):
模型训练类 模型训练类
""" """
def __init__(self, toForecast) -> None: def __init__(self, toForecast, pid) -> None:
super().__init__() super().__init__()
self._toForecast = toForecast self._toForecast = toForecast
self._pid = pid
################### ###################
# Step 3: Train the model # Step 3: Train the model
def test_model(self, strMethod, classifier, X_test, y_test): def test_model(self, strMethod, classifier, X_test, y_test, date_index):
print(strMethod + " ====== test results ======") print(strMethod + " ====== test results ======")
y_pred = classifier.predict(X_test) y_pred = classifier.predict(X_test)
...@@ -34,51 +37,59 @@ class ModelTrainer(ABC): ...@@ -34,51 +37,59 @@ class ModelTrainer(ABC):
print(strMethod + " Classification Report:", result1) print(strMethod + " Classification Report:", result1)
result2 = accuracy_score(y_test, y_pred) result2 = accuracy_score(y_test, y_pred)
print(strMethod + " Accuracy:", result2) print(strMethod + " Accuracy:", result2)
from ai.EstimateMarketTrendV20 import doReport
if doReport:
if strMethod == "Ensemble Model":
datas = []
for predict, date in zip(y_pred, date_index):
datas.append(
{'predict': predict, 'date': date, 'rbd_id': self._pid, 'create_time': datetime.now()})
reporter.do_reporter2(records=datas, excel_name='Backtest_Report_chu.xlsx')
# cm_display = ConfusionMatrixDisplay(confusion_matrix=result0, display_labels=labels) # cm_display = ConfusionMatrixDisplay(confusion_matrix=result0, display_labels=labels)
# cm_display.plot() # cm_display.plot()
# plt.title(strMethod + ' Accuracy: ' + f'{result2:.0%}') # plt.title(strMethod + ' Accuracy: ' + f'{result2:.0%}')
# plt.show() # plt.show()
def train_random_forest(self, X_train, y_train, X_test, y_test): def train_random_forest(self, X_train, y_train, X_test, y_test, date_index):
classifier = RandomForestClassifier() classifier = RandomForestClassifier()
classifier.fit(X_train, y_train) classifier.fit(X_train, y_train)
if not self._toForecast: if not self._toForecast:
self.test_model('Random Forest', classifier, X_test, y_test) self.test_model('Random Forest', classifier, X_test, y_test, date_index)
return classifier return classifier
def train_GBT(self, X_train, y_train, X_test, y_test): def train_GBT(self, X_train, y_train, X_test, y_test, date_index):
# Gradient Boosted Tree # Gradient Boosted Tree
classifierGBT = LGBMClassifier() classifierGBT = LGBMClassifier()
classifierGBT.fit(X_train, y_train) classifierGBT.fit(X_train, y_train)
if not self._toForecast: if not self._toForecast:
self.test_model('Gradient Boosted Tree', classifierGBT, X_test, y_test) self.test_model('Gradient Boosted Tree', classifierGBT, X_test, y_test, date_index)
return classifierGBT return classifierGBT
def train_SVC(self, X_train, y_train, X_test, y_test): def train_SVC(self, X_train, y_train, X_test, y_test, date_index):
# Support Vector Machines # Support Vector Machines
classifierSVC = svm.SVC() classifierSVC = svm.SVC()
classifierSVC.fit(X_train, y_train) classifierSVC.fit(X_train, y_train)
if not self._toForecast: if not self._toForecast:
self.test_model('Support Vector Machines', classifierSVC, X_test, y_test) self.test_model('Support Vector Machines', classifierSVC, X_test, y_test, date_index)
return classifierSVC return classifierSVC
def train_nearest_neighbors(self, X_train, y_train, X_test, y_test): def train_nearest_neighbors(self, X_train, y_train, X_test, y_test, date_index):
classifier = KNeighborsClassifier() classifier = KNeighborsClassifier()
classifier.fit(X_train, y_train) classifier.fit(X_train, y_train)
if not self._toForecast: if not self._toForecast:
self.test_model('K-Nearest Neighbors', classifier, X_test, y_test) self.test_model('K-Nearest Neighbors', classifier, X_test, y_test, date_index)
return classifier return classifier
def train_AdaBoost(self, X_train, y_train, X_test, y_test): def train_AdaBoost(self, X_train, y_train, X_test, y_test, date_index):
classifier = AdaBoostClassifier() classifier = AdaBoostClassifier()
classifier.fit(X_train, y_train) classifier.fit(X_train, y_train)
if not self._toForecast: if not self._toForecast:
self.test_model('AdaBoost', classifier, X_test, y_test) self.test_model('AdaBoost', classifier, X_test, y_test, date_index)
return classifier return classifier
def ensemble_model(self, rf_model, gbt_model, svc_model, knn_model, def ensemble_model(self, rf_model, gbt_model, svc_model, knn_model,
ada_model, X_train, y_train, X_test, y_test): ada_model, X_train, y_train, X_test, y_test, date_index):
# Create a dictionary of our models # Create a dictionary of our models
estimators = [('rf', rf_model), ('gbt', gbt_model), ('svc', svc_model), estimators = [('rf', rf_model), ('gbt', gbt_model), ('svc', svc_model),
('knn', knn_model), ('AdaBoost', ada_model)] ('knn', knn_model), ('AdaBoost', ada_model)]
...@@ -87,5 +98,5 @@ class ModelTrainer(ABC): ...@@ -87,5 +98,5 @@ class ModelTrainer(ABC):
# fit model to training data # fit model to training data
ensemble.fit(X_train, y_train) ensemble.fit(X_train, y_train)
if not self._toForecast: if not self._toForecast:
self.test_model('Ensemble Model', ensemble, X_test, y_test) self.test_model('Ensemble Model', ensemble, X_test, y_test, date_index)
return ensemble return ensemble
...@@ -35,14 +35,15 @@ def do_reporter(start='2023-10-01', end=datetime.date.today()): ...@@ -35,14 +35,15 @@ def do_reporter(start='2023-10-01', end=datetime.date.today()):
pf.to_excel("Forcast_Report.xlsx", index=False) pf.to_excel("Forcast_Report.xlsx", index=False)
def do_reporter2(): def do_reporter2(records=None, excel_name=None):
datas = []
index_info = get_base_info() index_info = get_base_info()
info = {x['id']: x for x in index_info} info = {x['id']: x for x in index_info}
symbol_index_dict = {symbol: index for index, symbol in enumerate(symbols)} datas = []
records = robo_predict.get_list() if not records:
records = robo_predict.get_list()
for item in records: for item in records:
result, rtn, real = is_right(item['rbd_id'], info.get(item['rbd_id'])['type'], item['date'], item['predict']) result, rtn, real = is_right(item['rbd_id'], info.get(item['rbd_id'])['type'], item['date'],
item['predict'])
labels = list(LABEL_RANGE.keys())[::-1] labels = list(LABEL_RANGE.keys())[::-1]
data = { data = {
'Forcast On Date': item['date'], 'Forcast On Date': item['date'],
...@@ -51,14 +52,15 @@ def do_reporter2(): ...@@ -51,14 +52,15 @@ def do_reporter2():
'real outcome label': LABEL_TAG.get(real), 'real outcome label': LABEL_TAG.get(real),
'random variable label': LABEL_TAG.get(random.randint(labels[0], labels[-1])), 'random variable label': LABEL_TAG.get(random.randint(labels[0], labels[-1])),
'Ticker Name': json.loads(info.get(item['rbd_id'])['datas'])['chineseName'], 'Ticker Name': json.loads(info.get(item['rbd_id'])['datas'])['chineseName'],
'Run On Time': item['create_time'], 'Run On Time': item['create_time'].replace(minute=0, second=0),
'return in 21 business days': rtn, 'return in 21 business days': rtn,
'result': result 'result': result
} }
datas.append(data) datas.append(data)
symbol_index_dict = {symbol: index for index, symbol in enumerate(symbols)}
sorted_data = sorted(datas, key=lambda x: symbol_index_dict[x['Ticker'].split(' ')[0]]) sorted_data = sorted(datas, key=lambda x: symbol_index_dict[x['Ticker'].split(' ')[0]])
pf = pd.DataFrame(sorted_data) pf = pd.DataFrame(sorted_data)
pf.to_excel("Forcast_Report_chu.xlsx", index=False) pf.to_excel(excel_name if excel_name else "Forcast_Report_chu.xlsx", index=False)
def map_to_label(ret): def map_to_label(ret):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment