ai 通用性改动

b4255cbb · 吕先亚 · bfec2eee · b4255cbb · b4255cbb · b4255cbb
Commit b4255cbb authored Oct 25, 2024 by 吕先亚
Showing with 31 additions and 50 deletions

EstimateMarketTrendV20.py ai/EstimateMarketTrendV20.py +12 -14

robo_predict.py ai/dao/robo_predict.py +2 -3

model_trainer.py ai/model_trainer.py +6 -8

reporter.py ai/reporter.py +11 -25

No files found.
--- a/ai/EstimateMarketTrendV20.py
+++ b/ai/EstimateMarketTrendV20.py
@@ -15,13 +15,11 @@ from api import DataSync

 # 截止日期
 max_date = None
-# max_date = '2024-03-20'
-# max_date = '2024-01-11'

-toForecast = True  # False means test, True means forecast
-syncData = True  # 开启会同步数据库指数及基金数据
-uploadData = True  # 开启会上传预测结果
-doReport = True  # 开启会生成Excel报告
+toForecast = False  # False means test, True means forecast
+syncData = False  # 开启会同步数据库指数及基金数据
+uploadData = False  # 开启会上传预测结果
+doReport = False  # 开启会生成Excel报告

 # 待预测指数
 # PREDICT_LIST = [67, 121, 122, 123]
@@ -135,6 +133,7 @@ if __name__ == '__main__':
        ada_model = trainer.train_AdaBoost(X_train, y_train, X_test, y_test)
        ensemble_model = trainer.ensemble_model(rf_model, gbt_model, svc_model,
                                                knn_model, ada_model, X_train, y_train, X_test, y_test)
+        if toForecast:
            model_predict = {'forest': rf_model.predict(scaledX_forecast),
                             'gbt': gbt_model.predict(scaledX_forecast),
                             'svc': svc_model.predict(scaledX_forecast),
@@ -143,7 +142,6 @@ if __name__ == '__main__':
                             'ensemble': ensemble_model.predict(scaledX_forecast)}
            print(f'预测结果：{model_predict}'.center(60, '+'))
            judgement(pid, infos_type[pid], model_predict)
-        if toForecast:
            if len(LABEL_RANGE) > 2:
                average = round(np.mean(list(model_predict.values())))
                report_prediction(average, indexDict[pid], indexDict)

--- a/ai/dao/robo_predict.py
+++ b/ai/dao/robo_predict.py
@@ -13,7 +13,7 @@ __COLUMNS__ = {
 def insert(datas):
    datas = mapper_columns(datas=datas, columns=__COLUMNS__)
    return f'''
-    replace into robo_predict({','.join([x for x in datas.keys()])})
+    replace into robo_predict_chu({','.join([x for x in datas.keys()])})
    values ({','.join([f"'{x[1]}'" for x in datas.items()])})
    '''

@@ -26,7 +26,6 @@ def get_list(index_ids: object = None, min_date: object = None, max_date: object
    if max_date:
        sqls.append(f"rp_date <= '{format_date(max_date)}'")
    return f'''
-    select {','.join([f"{x[0]} as {x[1]}" for x in __COLUMNS__.items()])} from robo_predict 
+    select {','.join([f"{x[0]} as {x[1]}" for x in __COLUMNS__.items()])} from robo_predict_chu 
    {where(*sqls, rid_index_id=to_tuple(index_ids))} order by rp_rbd_id, rp_date
    '''
-
--- a/ai/model_trainer.py
+++ b/ai/model_trainer.py
@@ -29,19 +29,17 @@ class ModelTrainer(ABC):

        labels = list(LABEL_RANGE.keys())[::-1]
        result0 = confusion_matrix(y_test, y_pred, labels=labels)
-        print(strMethod + " Confusion Matrix:")
-        print(result0)
+        print(strMethod + " Confusion Matrix:", result0)

        result1 = classification_report(y_test, y_pred, zero_division=1.0)
-        print(strMethod + " Classification Report:")
-        print(result1)
+        print(strMethod + " Classification Report:", result1)
        result2 = accuracy_score(y_test, y_pred)
        print(strMethod + " Accuracy:", result2)

-        cm_display = ConfusionMatrixDisplay(confusion_matrix=result0, display_labels=labels)
-        cm_display.plot()
-        plt.title(strMethod + ' Accuracy: ' + f'{result2:.0%}')
-        plt.show()
+        # cm_display = ConfusionMatrixDisplay(confusion_matrix=result0, display_labels=labels)
+        # cm_display.plot()
+        # plt.title(strMethod + ' Accuracy: ' + f'{result2:.0%}')
+        # plt.show()

    def train_random_forest(self, X_train, y_train, X_test, y_test):
        classifier = RandomForestClassifier()

--- a/ai/reporter.py
+++ b/ai/reporter.py
 import datetime
 import json
+import random

 import pandas as pd
 import requests
 from py_jftech import format_date

-from ai.EstimateMarketTrendV20 import sync
 from ai.config import LABEL_TAG, LABEL_RANGE
 from ai.dao import robo_predict
 from ai.dao.robo_datas import get_base_info, get_index_list, get_fund_list
@@ -42,16 +42,23 @@ def do_reporter2():
    symbol_index_dict = {symbol: index for index, symbol in enumerate(symbols)}
    records = robo_predict.get_list()
    for item in records:
+        result, rtn, real = is_right(item['rbd_id'], info.get(item['rbd_id'])['type'], item['date'], item['predict'])
+        labels = list(LABEL_RANGE.keys())[::-1]
        data = {
            'Forcast On Date': item['date'],
            'Ticker': info.get(item['rbd_id'])['ticker'].replace(' Index', '').replace(' Equity', ''),
            'In 21 business days': LABEL_TAG.get(item['predict']),
+            'real outcome label': LABEL_TAG.get(real),
+            'random variable label': LABEL_TAG.get(random.randint(labels[0], labels[-1])),
            'Ticker Name': json.loads(info.get(item['rbd_id'])['datas'])['chineseName'],
+            'Run On Time': item['create_time'],
+            'return in 21 business days': rtn,
+            'result': result
        }
        datas.append(data)
    sorted_data = sorted(datas, key=lambda x: symbol_index_dict[x['Ticker'].split(' ')[0]])
    pf = pd.DataFrame(sorted_data)
-    pf.to_excel("Forcast_Report.xlsx", index=False)
+    pf.to_excel("Forcast_Report_chu.xlsx", index=False)


 def map_to_label(ret):
@@ -61,7 +68,6 @@ def map_to_label(ret):


 def is_right(id, type, start, predict):
-    from datetime import datetime
    predict_term = 21
    navs = []
    if type == 'INDEX':
@@ -74,30 +80,10 @@ def is_right(id, type, start, predict):
        rtn = navs[-1] / navs[0] - 1
        real = map_to_label(rtn)
        result = True if predict == real or (rtn < 0 and predict < 0) or (rtn > 0 and predict > 0) else False
-        return result
-    return None
-
-
-def statistics():
-    datas = []
-    index_info = get_base_info()
-    info = {x['id']: x for x in index_info}
-    records = robo_predict.get_list()
-    for item in records:
-        result = is_right(item['rbd_id'], info.get(item['rbd_id'])['type'], item['date'], item['predict'])
-        data = {
-            'Forcast On Date': format_date(item['date']),
-            'Ticker': info.get(item['rbd_id'])['ticker'].replace(' Index', '').replace(' Equity', ''),
-            'In 21 business days': LABEL_TAG.get(item['predict']),
-            'Ticker Name': json.loads(info.get(item['rbd_id'])['datas'])['chineseName'],
-            'result': result
-        }
-        datas.append(data)
-    pf = pd.DataFrame(datas)
-    pf.to_excel("result.xlsx", index=False)
+        return result, round(rtn * 100, 2), real
+    return None, None, None


 if __name__ == '__main__':
    do_reporter2()
    # sync()
-    # statistics()