Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
R
robo-dividend
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenwen.tang
robo-dividend
Commits
3f656b0a
Commit
3f656b0a
authored
Oct 30, 2024
by
吕先亚
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ai 加入eps_ttm_yoy
parent
0074a743
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
33 additions
and
16 deletions
+33
-16
training_data_builder.py
ai/training_data_builder.py
+7
-9
sp500.py
basic/sp500.py
+26
-7
No files found.
ai/training_data_builder.py
View file @
3f656b0a
...
...
@@ -142,7 +142,7 @@ class TrainingDataBuilder(ABC):
DataAll
.
reset_index
(
inplace
=
True
)
DataAll
.
ffill
(
inplace
=
True
)
DataAll
[
'EPS_TTM_YOY'
]
=
(
DataAll
[
'JIFU_SPX_OPEPS_CURRQ_TTM'
]
/
DataAll
[
'JIFU_SPX_OPEPS_CURRQ_TTM'
]
.
shift
(
-
252
)
-
1.0
)
252
)
-
1.0
)
if
(
self
.
_toForecast
):
# 处理CPI_YOY:美国城镇消费物价指数同比未经季 CPURNSA:美国消费者物价指数未经季调
DataAllCopy
=
DataAll
.
copy
()
...
...
@@ -166,30 +166,28 @@ class TrainingDataBuilder(ABC):
# delete future information
DataAll
.
drop
([
'futureR'
,
'yLabel'
],
axis
=
1
,
inplace
=
True
)
X
=
DataAll
.
iloc
[:,
1
:]
.
values
###################
# scale data
labels
=
list
(
LABEL_RANGE
.
keys
())
scaler
=
MinMaxScaler
(
feature_range
=
(
labels
[
-
1
],
labels
[
0
]))
# scaledX = scaler.fit_transform(X)
DataScaler
=
scaler
.
fit
(
X
)
scaledX
=
DataScaler
.
transform
(
X
)
scaledX_forecast
=
None
if
(
self
.
_toForecast
)
:
if
self
.
_toForecast
:
scaledX_forecast
=
DataScaler
.
transform
(
X_forecast
)
X_train
=
scaledX
y_train
=
y
X_test
=
[]
y_test
=
[]
date_index
=
[]
else
:
# Step 2: Split data into train set and test set
X_train
,
X_test
,
y_train
,
y_test
=
train_test_split
(
scaledX
,
y
,
test_size
=
0.02
,
shuffle
=
False
)
date_index
=
DataAll
[
'date'
][
-
len
(
X_test
):
-
self
.
_numForecastDays
]
.
to_numpy
()
# To avoid data leak, test set should start from numForecastDays later
X_test
=
X_test
[
self
.
_numForecastDays
:
]
y_test
=
y_test
[
self
.
_numForecastDays
:
]
return
X_train
,
X_test
,
y_train
,
y_test
,
scaledX_forecast
,
forecastDay
X_test
=
X_test
[
:
-
self
.
_numForecastDays
]
y_test
=
y_test
[
:
-
self
.
_numForecastDays
]
return
X_train
,
X_test
,
y_train
,
y_test
,
scaledX_forecast
,
forecastDay
,
date_index
basic/sp500.py
View file @
3f656b0a
...
...
@@ -6,6 +6,7 @@ import pandas as pd
import
pytz
import
requests
from
openpyxl.reader.excel
import
load_workbook
from
py_jftech
import
sendmail
def
is_dst
():
...
...
@@ -78,7 +79,7 @@ def list_files_sorted_by_name(directory, max_day=None):
def
fetch_sp500
():
temp_file
=
Path
(
__file__
)
.
parent
/
'resources/sp-500.xlsx'
temp_file
=
Path
(
__file__
)
.
parent
/
'resources/sp-500.xlsx'
response
=
requests
.
get
(
"https://www.spglobal.com/spdji/en/documents/additional-material/sp-500-eps-est.xlsx"
)
# 确保请求成功
if
response
.
status_code
==
200
:
...
...
@@ -91,7 +92,7 @@ def fetch_sp500():
def
save_sp500
():
fetch_sp500
()
files
=
list_files_sorted_by_name
(
Path
(
__file__
)
.
parent
/
'resources'
)[
-
2
:]
files
=
list_files_sorted_by_name
(
Path
(
__file__
)
.
parent
/
'resources'
)[
-
2
:]
compare_day
=
None
for
file
in
files
:
# 使用openpyxl加载Excel文件
...
...
@@ -103,13 +104,15 @@ def save_sp500():
compare_day
=
report_day
else
:
if
compare_day
!=
report_day
:
wb
.
save
(
Path
(
__file__
)
.
parent
/
f
'resources/sp-500-eps-est_USA{usa_close_day()}.xlsx'
)
new_path
=
Path
(
__file__
)
.
parent
/
f
'resources/sp-500-eps-est_USA{usa_close_day()}.xlsx'
wb
.
save
(
new_path
)
send_sp500
(
'download sp500.'
,
[
new_path
])
# 关闭工作簿
wb
.
close
()
def
sync_sp500
(
day
):
file
=
Path
(
__file__
)
.
parent
/
'resources/sp-500-eps-est_USA20241014.xlsx'
file
=
Path
(
__file__
)
.
parent
/
'resources/sp-500-eps-est_USA20241014.xlsx'
if
day
:
files
=
list_files_sorted_by_name
(
Path
(
__file__
)
.
parent
/
'resources'
,
day
)
if
files
:
...
...
@@ -138,14 +141,23 @@ def sync_sp500(day):
date_value
=
datetime
.
strptime
(
str
(
ws
[
f
'A{i}'
]
.
value
)
.
split
(
' '
)[
0
]
.
strip
(),
'
%
m/
%
d/
%
Y'
)
if
type
(
ws
[
f
'A{i}'
]
.
value
)
==
str
else
ws
[
f
'A{i}'
]
.
value
if
date_value
<
report_day
:
# 日期只要是季度首日也设置red_date = red_release_date
data
=
{
'date'
:
date_value
,
'eps'
:
ws
[
f
'C{i}'
]
.
value
}
data
[
"releaseDate"
]
=
data
[
'date'
]
+
timedelta
(
days
=
1
)
data
[
"date"
]
=
data
[
'releaseDate'
]
datas
.
append
(
data
)
elif
date_value
==
get_quarter_end_date
(
report_day
):
data
=
{
'date'
:
report_day
,
'eps'
:
ws
[
f
'C{i}'
]
.
value
,
'releaseDate'
:
datetime
.
strptime
(
str
(
file
)[
-
13
:
-
5
],
"
%
Y
%
m
%
d"
)}
# 如果发布日是季度末,则red_date = red_release_date
if
report_day
==
get_quarter_end_date
(
report_day
):
data
=
{
'date'
:
date_value
,
'eps'
:
ws
[
f
'C{i}'
]
.
value
}
data
[
"releaseDate"
]
=
data
[
'date'
]
+
timedelta
(
days
=
1
)
data
[
"date"
]
=
data
[
'releaseDate'
]
else
:
data
=
{
'date'
:
report_day
,
'eps'
:
ws
[
f
'C{i}'
]
.
value
,
'releaseDate'
:
datetime
.
strptime
(
str
(
file
)[
-
13
:
-
5
],
"
%
Y
%
m
%
d"
)}
datas
.
append
(
data
)
for
i
in
range
(
actuals_row
+
1
,
ws
.
max_row
):
if
ws
[
f
'A{i}'
]
.
value
is
None
:
...
...
@@ -154,6 +166,7 @@ def sync_sp500(day):
ws
[
f
'A{i}'
]
.
value
)
==
str
else
ws
[
f
'A{i}'
]
.
value
,
'eps'
:
ws
[
f
'C{i}'
]
.
value
}
data
[
"releaseDate"
]
=
data
[
'date'
]
+
timedelta
(
days
=
1
)
data
[
"date"
]
=
data
[
'releaseDate'
]
datas
.
append
(
data
)
wb
.
close
()
datas
=
pd
.
DataFrame
(
datas
[::
-
1
])
...
...
@@ -162,6 +175,12 @@ def sync_sp500(day):
return
datas
.
to_dict
(
orient
=
"records"
)[
-
1
::]
if
day
else
datas
.
to_dict
(
orient
=
"records"
)
def
send_sp500
(
content
,
attach_paths
):
receives
=
[
'Tony.Wu.Home@gmail.com'
]
subject
=
'sp500 eps download'
sendmail
(
receives
=
receives
,
copies
=
[],
attach_paths
=
attach_paths
,
subject
=
subject
,
content
=
content
)
if
__name__
==
'__main__'
:
# print(list_files_sorted_by_name(Path(__file__).parent / 'resources'))
# save_sp500()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment