import datetime import json import math import random import pandas as pd import requests from ai.config import LABEL_TAG, LABEL_RANGE from ai.dao import robo_predict from ai.dao.robo_datas import get_base_info, get_index_list, get_fund_list symbols = ['ACWI', 'EWJ', 'MCHI', 'EEM', 'BKF', 'INDA', 'AAXJ', 'VGK', 'QQQ', 'SPY', 'SPX', 'IWN', 'IUSG', 'IWD', 'DON', 'GDX', 'TOLZ', 'XLU', 'XBI', 'ESGD', 'IGE', 'EMLC', 'IGAA', 'LQD', 'HYG', 'SHY', 'IEI', 'IEF', 'GLD', 'IYR', 'UUP', 'CEW', 'TLT'] def do_reporter(start='2023-10-01', end=datetime.date.today()): url = f"https://jrp.jfquant.com/api/v1.0/ai/predict?startTime={start}&endTime={end.strftime('%Y-%m-%d')}" resp = requests.get(url) datas = [] symbol_index_dict = {symbol: index for index, symbol in enumerate(symbols)} for value in resp.json()['body'].values(): for item in value: data = { 'Forcast On Date': item['aiPredict']['predictDate'], 'Ticker': item['bloombergTicker'].replace(' Index', '').replace(' Equity', ''), 'In 21 business days': 'UP' if item['aiPredict']['predict'] == 1 else 'DOWN', 'Ticker Name': item['indexName'], } datas.append(data) sorted_data = sorted(datas, key=lambda x: symbol_index_dict[x['Ticker'].split(' ')[0]]) print(json.dumps(sorted_data, ensure_ascii=False)) pf = pd.DataFrame(sorted_data) pf.to_excel("Forcast_Report.xlsx", index=False) def do_reporter2(records=None, excel_name=None): index_info = get_base_info() info = {x['id']: x for x in index_info} datas = [] if not records: records = robo_predict.get_list() for item in records: predict_term = json.loads(item['remark'])['period'] result, rtn, real = is_right(item['rbd_id'], info.get(item['rbd_id'])['type'], item['date'], item['predict'], predict_term) labels = list(LABEL_RANGE.keys())[::-1] data = { 'Forcast On Date': item['date'], 'Ticker': info.get(item['rbd_id'])['ticker'].replace(' Index', '').replace(' Equity', ''), 'Forecast Business Days After': predict_term, 'Forecast Label': LABEL_TAG.get(item['predict']), 'real outcome label': LABEL_TAG.get(real), 'random variable label': LABEL_TAG.get(random.randint(labels[0], labels[-1])), 'Ticker Name': json.loads(info.get(item['rbd_id'])['datas'])['chineseName'], 'Run On Time': item['create_time'].replace(minute=0, second=0), 'return on Forecast Label': rtn, 'result': result } datas.append(data) RMSE = get_RMSE(datas) RefRMSE = get_RefRMSE(datas) RevisedReferenceRMSE = get_RevisedReferenceRMSE(datas) symbol_index_dict = {symbol: index for index, symbol in enumerate(symbols)} sorted_data = sorted(datas, key=lambda x: symbol_index_dict[x['Ticker'].split(' ')[0]]) pf = pd.DataFrame(sorted_data) pf['RMSE'] = RMSE pf['RefRMSE'] = RefRMSE pf['RevisedReferenceRMSE'] = RevisedReferenceRMSE excel_name = excel_name if excel_name else "Forcast_Report_chu.xlsx" with pd.ExcelWriter(excel_name, mode='a', if_sheet_exists='replace') as writer: pf.to_excel(writer, sheet_name=f'{predict_term} business days', index=False) def map_to_label(ret): for label, (lower, upper) in LABEL_RANGE.items(): if float(lower) <= ret < float(upper): return label def get_RMSE(datas): """ Root Mean Square Error (RMSE) @param data: 预测值 @return: """ datas = [data for data in datas if data.get('real outcome label') is not None] if not datas: return 0 tags = {v: k for k, v in LABEL_TAG.items()} return (sum( [(tags.get(data['real outcome label']) - tags.get(data['Forecast Label'])) ** 2 for data in datas]) / len( datas)) ** 0.5 def get_RefRMSE(datas): """ Root Mean Square Error (Reference RMSE) @param data: 预测值 @return: """ datas = [data for data in datas if data.get('real outcome label') is not None] if not datas: return 0 tags = {v: k for k, v in LABEL_TAG.items()} return (sum( [(tags.get(data['real outcome label']) - tags.get(data['random variable label'])) ** 2 for data in datas]) / len(datas)) ** 0.5 def get_RevisedReferenceRMSE(datas): """ Revised Reference RMSE @param data: 预测值 @return: """ datas = [data for data in datas if data.get('real outcome label') is not None] tags = {v: k for k, v in LABEL_TAG.items()} # Initialize the sum of squared differences total_sum = 0 # Iterate over each data point for data in datas: # Get the true label true_label = tags.get(data['real outcome label']) # Sum up the squared differences for all possible deviations (-2 to 2) for deviation in range(-2, 3): total_sum += (true_label - deviation) ** 2 # Calculate the average by dividing by 5N average = total_sum / (5 * len(datas)) # Take the square root to get the final Rev. Ref. RMSE revised_ref_rmse = math.sqrt(average) return revised_ref_rmse def is_right(id, type, start, predict, predict_term): navs = [] if type == 'INDEX': navs = get_index_list(index_ids=id, min_date=start, limit=predict_term + 1) navs = [nav['rid_close'] for nav in navs] elif type == 'FUND': navs = get_fund_list(fund_ids=id, min_date=start, limit=predict_term + 1) navs = [nav['rfn_nav_cal'] for nav in navs] if len(navs) == predict_term + 1: rtn = navs[-1] / navs[0] - 1 real = map_to_label(rtn) result = True if predict == real or (rtn < 0 and predict < 0) or (rtn > 0 and predict > 0) else False return result, round(rtn * 100, 2), real return None, None, None if __name__ == '__main__': do_reporter2() # sync()