Commit 3f656b0a authored by 吕先亚's avatar 吕先亚

ai 加入eps_ttm_yoy

parent 0074a743
......@@ -142,7 +142,7 @@ class TrainingDataBuilder(ABC):
DataAll.reset_index(inplace=True)
DataAll.ffill(inplace=True)
DataAll['EPS_TTM_YOY'] = (DataAll['JIFU_SPX_OPEPS_CURRQ_TTM'] / DataAll['JIFU_SPX_OPEPS_CURRQ_TTM'].shift(
-252) - 1.0)
252) - 1.0)
if (self._toForecast):
# 处理CPI_YOY:美国城镇消费物价指数同比未经季 CPURNSA:美国消费者物价指数未经季调
DataAllCopy = DataAll.copy()
......@@ -166,30 +166,28 @@ class TrainingDataBuilder(ABC):
# delete future information
DataAll.drop(['futureR', 'yLabel'], axis=1, inplace=True)
X = DataAll.iloc[:, 1:].values
###################
# scale data
labels = list(LABEL_RANGE.keys())
scaler = MinMaxScaler(feature_range=(labels[-1], labels[0]))
# scaledX = scaler.fit_transform(X)
DataScaler = scaler.fit(X)
scaledX = DataScaler.transform(X)
scaledX_forecast = None
if (self._toForecast):
if self._toForecast:
scaledX_forecast = DataScaler.transform(X_forecast)
X_train = scaledX
y_train = y
X_test = []
y_test = []
date_index = []
else:
# Step 2: Split data into train set and test set
X_train, X_test, y_train, y_test = train_test_split(scaledX, y, test_size=0.02, shuffle=False)
date_index = DataAll['date'][-len(X_test):-self._numForecastDays].to_numpy()
# To avoid data leak, test set should start from numForecastDays later
X_test = X_test[self._numForecastDays:]
y_test = y_test[self._numForecastDays:]
return X_train, X_test, y_train, y_test, scaledX_forecast, forecastDay
X_test = X_test[:-self._numForecastDays]
y_test = y_test[:-self._numForecastDays]
return X_train, X_test, y_train, y_test, scaledX_forecast, forecastDay, date_index
......@@ -6,6 +6,7 @@ import pandas as pd
import pytz
import requests
from openpyxl.reader.excel import load_workbook
from py_jftech import sendmail
def is_dst():
......@@ -78,7 +79,7 @@ def list_files_sorted_by_name(directory, max_day=None):
def fetch_sp500():
temp_file = Path(__file__).parent/'resources/sp-500.xlsx'
temp_file = Path(__file__).parent / 'resources/sp-500.xlsx'
response = requests.get("https://www.spglobal.com/spdji/en/documents/additional-material/sp-500-eps-est.xlsx")
# 确保请求成功
if response.status_code == 200:
......@@ -91,7 +92,7 @@ def fetch_sp500():
def save_sp500():
fetch_sp500()
files = list_files_sorted_by_name(Path(__file__).parent/'resources')[-2:]
files = list_files_sorted_by_name(Path(__file__).parent / 'resources')[-2:]
compare_day = None
for file in files:
# 使用openpyxl加载Excel文件
......@@ -103,13 +104,15 @@ def save_sp500():
compare_day = report_day
else:
if compare_day != report_day:
wb.save(Path(__file__).parent/f'resources/sp-500-eps-est_USA{usa_close_day()}.xlsx')
new_path = Path(__file__).parent / f'resources/sp-500-eps-est_USA{usa_close_day()}.xlsx'
wb.save(new_path)
send_sp500('download sp500.', [new_path])
# 关闭工作簿
wb.close()
def sync_sp500(day):
file = Path(__file__).parent/'resources/sp-500-eps-est_USA20241014.xlsx'
file = Path(__file__).parent / 'resources/sp-500-eps-est_USA20241014.xlsx'
if day:
files = list_files_sorted_by_name(Path(__file__).parent / 'resources', day)
if files:
......@@ -138,14 +141,23 @@ def sync_sp500(day):
date_value = datetime.strptime(str(ws[f'A{i}'].value).split(' ')[0].strip(), '%m/%d/%Y') if type(
ws[f'A{i}'].value) == str else ws[f'A{i}'].value
if date_value < report_day:
# 日期只要是季度首日也设置red_date = red_release_date
data = {'date': date_value,
'eps': ws[f'C{i}'].value}
data["releaseDate"] = data['date'] + timedelta(days=1)
data["date"] = data['releaseDate']
datas.append(data)
elif date_value == get_quarter_end_date(report_day):
data = {'date': report_day,
'eps': ws[f'C{i}'].value,
'releaseDate': datetime.strptime(str(file)[-13:-5], "%Y%m%d")}
# 如果发布日是季度末,则red_date = red_release_date
if report_day == get_quarter_end_date(report_day):
data = {'date': date_value,
'eps': ws[f'C{i}'].value}
data["releaseDate"] = data['date'] + timedelta(days=1)
data["date"] = data['releaseDate']
else:
data = {'date': report_day,
'eps': ws[f'C{i}'].value,
'releaseDate': datetime.strptime(str(file)[-13:-5], "%Y%m%d")}
datas.append(data)
for i in range(actuals_row + 1, ws.max_row):
if ws[f'A{i}'].value is None:
......@@ -154,6 +166,7 @@ def sync_sp500(day):
ws[f'A{i}'].value) == str else ws[f'A{i}'].value,
'eps': ws[f'C{i}'].value}
data["releaseDate"] = data['date'] + timedelta(days=1)
data["date"] = data['releaseDate']
datas.append(data)
wb.close()
datas = pd.DataFrame(datas[::-1])
......@@ -162,6 +175,12 @@ def sync_sp500(day):
return datas.to_dict(orient="records")[-1::] if day else datas.to_dict(orient="records")
def send_sp500(content, attach_paths):
receives = ['Tony.Wu.Home@gmail.com']
subject = 'sp500 eps download'
sendmail(receives=receives, copies=[], attach_paths=attach_paths, subject=subject, content=content)
if __name__ == '__main__':
# print(list_files_sorted_by_name(Path(__file__).parent / 'resources'))
# save_sp500()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment