Commit 95202e88 authored by 吕先亚's avatar 吕先亚

ai 通用性改动

parent c71416e0
......@@ -16,10 +16,10 @@ from api import DataSync
# 截止日期
max_date = None
toForecast = False # False means test, True means forecast
toForecast = True # False means test, True means forecast
syncData = False # 开启会同步数据库指数及基金数据
uploadData = False # 开启会上传预测结果
doReport = False # 开启会生成Excel报告
doReport = True # 开启会生成Excel报告
# 待预测指数
# PREDICT_LIST = [67, 121, 122, 123]
......@@ -123,16 +123,16 @@ if __name__ == '__main__':
for pid in PREDICT_LIST:
print(f'{indexDict[pid]} start '.center(50, '='))
t_data = indexData if pid in index else fundData
X_train, X_test, y_train, y_test, scaledX_forecast, forecastDay = \
X_train, X_test, y_train, y_test, scaledX_forecast, forecastDay, date_index = \
builder.build_train_test(pid, t_data, vixData, indexOtherData, cpiData, FDTRData, NAPMPMIData)
trainer = ModelTrainer(toForecast)
rf_model = trainer.train_random_forest(X_train, y_train, X_test, y_test)
gbt_model = trainer.train_GBT(X_train, y_train, X_test, y_test)
svc_model = trainer.train_SVC(X_train, y_train, X_test, y_test)
knn_model = trainer.train_nearest_neighbors(X_train, y_train, X_test, y_test)
ada_model = trainer.train_AdaBoost(X_train, y_train, X_test, y_test)
trainer = ModelTrainer(toForecast, pid)
rf_model = trainer.train_random_forest(X_train, y_train, X_test, y_test, date_index)
gbt_model = trainer.train_GBT(X_train, y_train, X_test, y_test, date_index)
svc_model = trainer.train_SVC(X_train, y_train, X_test, y_test, date_index)
knn_model = trainer.train_nearest_neighbors(X_train, y_train, X_test, y_test, date_index)
ada_model = trainer.train_AdaBoost(X_train, y_train, X_test, y_test, date_index)
ensemble_model = trainer.ensemble_model(rf_model, gbt_model, svc_model,
knn_model, ada_model, X_train, y_train, X_test, y_test)
knn_model, ada_model, X_train, y_train, X_test, y_test, date_index)
if toForecast:
model_predict = {'forest': rf_model.predict(scaledX_forecast),
'gbt': gbt_model.predict(scaledX_forecast),
......
from abc import ABC
from datetime import datetime
from lightgbm import LGBMClassifier
from sklearn import svm
......@@ -7,6 +8,7 @@ from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from ai import reporter
from ai.config import LABEL_RANGE
......@@ -15,14 +17,15 @@ class ModelTrainer(ABC):
模型训练类
"""
def __init__(self, toForecast) -> None:
def __init__(self, toForecast, pid) -> None:
super().__init__()
self._toForecast = toForecast
self._pid = pid
###################
# Step 3: Train the model
def test_model(self, strMethod, classifier, X_test, y_test):
def test_model(self, strMethod, classifier, X_test, y_test, date_index):
print(strMethod + " ====== test results ======")
y_pred = classifier.predict(X_test)
......@@ -34,51 +37,59 @@ class ModelTrainer(ABC):
print(strMethod + " Classification Report:", result1)
result2 = accuracy_score(y_test, y_pred)
print(strMethod + " Accuracy:", result2)
from ai.EstimateMarketTrendV20 import doReport
if doReport:
if strMethod == "Ensemble Model":
datas = []
for predict, date in zip(y_pred, date_index):
datas.append(
{'predict': predict, 'date': date, 'rbd_id': self._pid, 'create_time': datetime.now()})
reporter.do_reporter2(records=datas, excel_name='Backtest_Report_chu.xlsx')
# cm_display = ConfusionMatrixDisplay(confusion_matrix=result0, display_labels=labels)
# cm_display.plot()
# plt.title(strMethod + ' Accuracy: ' + f'{result2:.0%}')
# plt.show()
def train_random_forest(self, X_train, y_train, X_test, y_test):
def train_random_forest(self, X_train, y_train, X_test, y_test, date_index):
classifier = RandomForestClassifier()
classifier.fit(X_train, y_train)
if not self._toForecast:
self.test_model('Random Forest', classifier, X_test, y_test)
self.test_model('Random Forest', classifier, X_test, y_test, date_index)
return classifier
def train_GBT(self, X_train, y_train, X_test, y_test):
def train_GBT(self, X_train, y_train, X_test, y_test, date_index):
# Gradient Boosted Tree
classifierGBT = LGBMClassifier()
classifierGBT.fit(X_train, y_train)
if not self._toForecast:
self.test_model('Gradient Boosted Tree', classifierGBT, X_test, y_test)
self.test_model('Gradient Boosted Tree', classifierGBT, X_test, y_test, date_index)
return classifierGBT
def train_SVC(self, X_train, y_train, X_test, y_test):
def train_SVC(self, X_train, y_train, X_test, y_test, date_index):
# Support Vector Machines
classifierSVC = svm.SVC()
classifierSVC.fit(X_train, y_train)
if not self._toForecast:
self.test_model('Support Vector Machines', classifierSVC, X_test, y_test)
self.test_model('Support Vector Machines', classifierSVC, X_test, y_test, date_index)
return classifierSVC
def train_nearest_neighbors(self, X_train, y_train, X_test, y_test):
def train_nearest_neighbors(self, X_train, y_train, X_test, y_test, date_index):
classifier = KNeighborsClassifier()
classifier.fit(X_train, y_train)
if not self._toForecast:
self.test_model('K-Nearest Neighbors', classifier, X_test, y_test)
self.test_model('K-Nearest Neighbors', classifier, X_test, y_test, date_index)
return classifier
def train_AdaBoost(self, X_train, y_train, X_test, y_test):
def train_AdaBoost(self, X_train, y_train, X_test, y_test, date_index):
classifier = AdaBoostClassifier()
classifier.fit(X_train, y_train)
if not self._toForecast:
self.test_model('AdaBoost', classifier, X_test, y_test)
self.test_model('AdaBoost', classifier, X_test, y_test, date_index)
return classifier
def ensemble_model(self, rf_model, gbt_model, svc_model, knn_model,
ada_model, X_train, y_train, X_test, y_test):
ada_model, X_train, y_train, X_test, y_test, date_index):
# Create a dictionary of our models
estimators = [('rf', rf_model), ('gbt', gbt_model), ('svc', svc_model),
('knn', knn_model), ('AdaBoost', ada_model)]
......@@ -87,5 +98,5 @@ class ModelTrainer(ABC):
# fit model to training data
ensemble.fit(X_train, y_train)
if not self._toForecast:
self.test_model('Ensemble Model', ensemble, X_test, y_test)
self.test_model('Ensemble Model', ensemble, X_test, y_test, date_index)
return ensemble
......@@ -35,14 +35,15 @@ def do_reporter(start='2023-10-01', end=datetime.date.today()):
pf.to_excel("Forcast_Report.xlsx", index=False)
def do_reporter2():
datas = []
def do_reporter2(records=None, excel_name=None):
index_info = get_base_info()
info = {x['id']: x for x in index_info}
symbol_index_dict = {symbol: index for index, symbol in enumerate(symbols)}
records = robo_predict.get_list()
datas = []
if not records:
records = robo_predict.get_list()
for item in records:
result, rtn, real = is_right(item['rbd_id'], info.get(item['rbd_id'])['type'], item['date'], item['predict'])
result, rtn, real = is_right(item['rbd_id'], info.get(item['rbd_id'])['type'], item['date'],
item['predict'])
labels = list(LABEL_RANGE.keys())[::-1]
data = {
'Forcast On Date': item['date'],
......@@ -51,14 +52,15 @@ def do_reporter2():
'real outcome label': LABEL_TAG.get(real),
'random variable label': LABEL_TAG.get(random.randint(labels[0], labels[-1])),
'Ticker Name': json.loads(info.get(item['rbd_id'])['datas'])['chineseName'],
'Run On Time': item['create_time'],
'Run On Time': item['create_time'].replace(minute=0, second=0),
'return in 21 business days': rtn,
'result': result
}
datas.append(data)
symbol_index_dict = {symbol: index for index, symbol in enumerate(symbols)}
sorted_data = sorted(datas, key=lambda x: symbol_index_dict[x['Ticker'].split(' ')[0]])
pf = pd.DataFrame(sorted_data)
pf.to_excel("Forcast_Report_chu.xlsx", index=False)
pf.to_excel(excel_name if excel_name else "Forcast_Report_chu.xlsx", index=False)
def map_to_label(ret):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment