Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
R
robo-dividend
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenwen.tang
robo-dividend
Commits
95202e88
Commit
95202e88
authored
Oct 30, 2024
by
吕先亚
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ai 通用性改动
parent
c71416e0
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
44 additions
and
31 deletions
+44
-31
EstimateMarketTrendV20.py
ai/EstimateMarketTrendV20.py
+10
-10
model_trainer.py
ai/model_trainer.py
+25
-14
reporter.py
ai/reporter.py
+9
-7
No files found.
ai/EstimateMarketTrendV20.py
View file @
95202e88
...
...
@@ -16,10 +16,10 @@ from api import DataSync
# 截止日期
max_date
=
None
toForecast
=
Fals
e
# False means test, True means forecast
toForecast
=
Tru
e
# False means test, True means forecast
syncData
=
False
# 开启会同步数据库指数及基金数据
uploadData
=
False
# 开启会上传预测结果
doReport
=
Fals
e
# 开启会生成Excel报告
doReport
=
Tru
e
# 开启会生成Excel报告
# 待预测指数
# PREDICT_LIST = [67, 121, 122, 123]
...
...
@@ -123,16 +123,16 @@ if __name__ == '__main__':
for
pid
in
PREDICT_LIST
:
print
(
f
'{indexDict[pid]} start '
.
center
(
50
,
'='
))
t_data
=
indexData
if
pid
in
index
else
fundData
X_train
,
X_test
,
y_train
,
y_test
,
scaledX_forecast
,
forecastDay
=
\
X_train
,
X_test
,
y_train
,
y_test
,
scaledX_forecast
,
forecastDay
,
date_index
=
\
builder
.
build_train_test
(
pid
,
t_data
,
vixData
,
indexOtherData
,
cpiData
,
FDTRData
,
NAPMPMIData
)
trainer
=
ModelTrainer
(
toForecast
)
rf_model
=
trainer
.
train_random_forest
(
X_train
,
y_train
,
X_test
,
y_test
)
gbt_model
=
trainer
.
train_GBT
(
X_train
,
y_train
,
X_test
,
y_test
)
svc_model
=
trainer
.
train_SVC
(
X_train
,
y_train
,
X_test
,
y_test
)
knn_model
=
trainer
.
train_nearest_neighbors
(
X_train
,
y_train
,
X_test
,
y_test
)
ada_model
=
trainer
.
train_AdaBoost
(
X_train
,
y_train
,
X_test
,
y_test
)
trainer
=
ModelTrainer
(
toForecast
,
pid
)
rf_model
=
trainer
.
train_random_forest
(
X_train
,
y_train
,
X_test
,
y_test
,
date_index
)
gbt_model
=
trainer
.
train_GBT
(
X_train
,
y_train
,
X_test
,
y_test
,
date_index
)
svc_model
=
trainer
.
train_SVC
(
X_train
,
y_train
,
X_test
,
y_test
,
date_index
)
knn_model
=
trainer
.
train_nearest_neighbors
(
X_train
,
y_train
,
X_test
,
y_test
,
date_index
)
ada_model
=
trainer
.
train_AdaBoost
(
X_train
,
y_train
,
X_test
,
y_test
,
date_index
)
ensemble_model
=
trainer
.
ensemble_model
(
rf_model
,
gbt_model
,
svc_model
,
knn_model
,
ada_model
,
X_train
,
y_train
,
X_test
,
y_test
)
knn_model
,
ada_model
,
X_train
,
y_train
,
X_test
,
y_test
,
date_index
)
if
toForecast
:
model_predict
=
{
'forest'
:
rf_model
.
predict
(
scaledX_forecast
),
'gbt'
:
gbt_model
.
predict
(
scaledX_forecast
),
...
...
ai/model_trainer.py
View file @
95202e88
from
abc
import
ABC
from
datetime
import
datetime
from
lightgbm
import
LGBMClassifier
from
sklearn
import
svm
...
...
@@ -7,6 +8,7 @@ from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from
sklearn.metrics
import
classification_report
,
confusion_matrix
,
accuracy_score
from
sklearn.neighbors
import
KNeighborsClassifier
from
ai
import
reporter
from
ai.config
import
LABEL_RANGE
...
...
@@ -15,14 +17,15 @@ class ModelTrainer(ABC):
模型训练类
"""
def
__init__
(
self
,
toForecast
)
->
None
:
def
__init__
(
self
,
toForecast
,
pid
)
->
None
:
super
()
.
__init__
()
self
.
_toForecast
=
toForecast
self
.
_pid
=
pid
###################
# Step 3: Train the model
def
test_model
(
self
,
strMethod
,
classifier
,
X_test
,
y_test
):
def
test_model
(
self
,
strMethod
,
classifier
,
X_test
,
y_test
,
date_index
):
print
(
strMethod
+
" ====== test results ======"
)
y_pred
=
classifier
.
predict
(
X_test
)
...
...
@@ -34,51 +37,59 @@ class ModelTrainer(ABC):
print
(
strMethod
+
" Classification Report:"
,
result1
)
result2
=
accuracy_score
(
y_test
,
y_pred
)
print
(
strMethod
+
" Accuracy:"
,
result2
)
from
ai.EstimateMarketTrendV20
import
doReport
if
doReport
:
if
strMethod
==
"Ensemble Model"
:
datas
=
[]
for
predict
,
date
in
zip
(
y_pred
,
date_index
):
datas
.
append
(
{
'predict'
:
predict
,
'date'
:
date
,
'rbd_id'
:
self
.
_pid
,
'create_time'
:
datetime
.
now
()})
reporter
.
do_reporter2
(
records
=
datas
,
excel_name
=
'Backtest_Report_chu.xlsx'
)
# cm_display = ConfusionMatrixDisplay(confusion_matrix=result0, display_labels=labels)
# cm_display.plot()
# plt.title(strMethod + ' Accuracy: ' + f'{result2:.0%}')
# plt.show()
def
train_random_forest
(
self
,
X_train
,
y_train
,
X_test
,
y_test
):
def
train_random_forest
(
self
,
X_train
,
y_train
,
X_test
,
y_test
,
date_index
):
classifier
=
RandomForestClassifier
()
classifier
.
fit
(
X_train
,
y_train
)
if
not
self
.
_toForecast
:
self
.
test_model
(
'Random Forest'
,
classifier
,
X_test
,
y_test
)
self
.
test_model
(
'Random Forest'
,
classifier
,
X_test
,
y_test
,
date_index
)
return
classifier
def
train_GBT
(
self
,
X_train
,
y_train
,
X_test
,
y_test
):
def
train_GBT
(
self
,
X_train
,
y_train
,
X_test
,
y_test
,
date_index
):
# Gradient Boosted Tree
classifierGBT
=
LGBMClassifier
()
classifierGBT
.
fit
(
X_train
,
y_train
)
if
not
self
.
_toForecast
:
self
.
test_model
(
'Gradient Boosted Tree'
,
classifierGBT
,
X_test
,
y_test
)
self
.
test_model
(
'Gradient Boosted Tree'
,
classifierGBT
,
X_test
,
y_test
,
date_index
)
return
classifierGBT
def
train_SVC
(
self
,
X_train
,
y_train
,
X_test
,
y_test
):
def
train_SVC
(
self
,
X_train
,
y_train
,
X_test
,
y_test
,
date_index
):
# Support Vector Machines
classifierSVC
=
svm
.
SVC
()
classifierSVC
.
fit
(
X_train
,
y_train
)
if
not
self
.
_toForecast
:
self
.
test_model
(
'Support Vector Machines'
,
classifierSVC
,
X_test
,
y_test
)
self
.
test_model
(
'Support Vector Machines'
,
classifierSVC
,
X_test
,
y_test
,
date_index
)
return
classifierSVC
def
train_nearest_neighbors
(
self
,
X_train
,
y_train
,
X_test
,
y_test
):
def
train_nearest_neighbors
(
self
,
X_train
,
y_train
,
X_test
,
y_test
,
date_index
):
classifier
=
KNeighborsClassifier
()
classifier
.
fit
(
X_train
,
y_train
)
if
not
self
.
_toForecast
:
self
.
test_model
(
'K-Nearest Neighbors'
,
classifier
,
X_test
,
y_test
)
self
.
test_model
(
'K-Nearest Neighbors'
,
classifier
,
X_test
,
y_test
,
date_index
)
return
classifier
def
train_AdaBoost
(
self
,
X_train
,
y_train
,
X_test
,
y_test
):
def
train_AdaBoost
(
self
,
X_train
,
y_train
,
X_test
,
y_test
,
date_index
):
classifier
=
AdaBoostClassifier
()
classifier
.
fit
(
X_train
,
y_train
)
if
not
self
.
_toForecast
:
self
.
test_model
(
'AdaBoost'
,
classifier
,
X_test
,
y_test
)
self
.
test_model
(
'AdaBoost'
,
classifier
,
X_test
,
y_test
,
date_index
)
return
classifier
def
ensemble_model
(
self
,
rf_model
,
gbt_model
,
svc_model
,
knn_model
,
ada_model
,
X_train
,
y_train
,
X_test
,
y_test
):
ada_model
,
X_train
,
y_train
,
X_test
,
y_test
,
date_index
):
# Create a dictionary of our models
estimators
=
[(
'rf'
,
rf_model
),
(
'gbt'
,
gbt_model
),
(
'svc'
,
svc_model
),
(
'knn'
,
knn_model
),
(
'AdaBoost'
,
ada_model
)]
...
...
@@ -87,5 +98,5 @@ class ModelTrainer(ABC):
# fit model to training data
ensemble
.
fit
(
X_train
,
y_train
)
if
not
self
.
_toForecast
:
self
.
test_model
(
'Ensemble Model'
,
ensemble
,
X_test
,
y_test
)
self
.
test_model
(
'Ensemble Model'
,
ensemble
,
X_test
,
y_test
,
date_index
)
return
ensemble
ai/reporter.py
View file @
95202e88
...
...
@@ -35,14 +35,15 @@ def do_reporter(start='2023-10-01', end=datetime.date.today()):
pf
.
to_excel
(
"Forcast_Report.xlsx"
,
index
=
False
)
def
do_reporter2
():
datas
=
[]
def
do_reporter2
(
records
=
None
,
excel_name
=
None
):
index_info
=
get_base_info
()
info
=
{
x
[
'id'
]:
x
for
x
in
index_info
}
symbol_index_dict
=
{
symbol
:
index
for
index
,
symbol
in
enumerate
(
symbols
)}
records
=
robo_predict
.
get_list
()
datas
=
[]
if
not
records
:
records
=
robo_predict
.
get_list
()
for
item
in
records
:
result
,
rtn
,
real
=
is_right
(
item
[
'rbd_id'
],
info
.
get
(
item
[
'rbd_id'
])[
'type'
],
item
[
'date'
],
item
[
'predict'
])
result
,
rtn
,
real
=
is_right
(
item
[
'rbd_id'
],
info
.
get
(
item
[
'rbd_id'
])[
'type'
],
item
[
'date'
],
item
[
'predict'
])
labels
=
list
(
LABEL_RANGE
.
keys
())[::
-
1
]
data
=
{
'Forcast On Date'
:
item
[
'date'
],
...
...
@@ -51,14 +52,15 @@ def do_reporter2():
'real outcome label'
:
LABEL_TAG
.
get
(
real
),
'random variable label'
:
LABEL_TAG
.
get
(
random
.
randint
(
labels
[
0
],
labels
[
-
1
])),
'Ticker Name'
:
json
.
loads
(
info
.
get
(
item
[
'rbd_id'
])[
'datas'
])[
'chineseName'
],
'Run On Time'
:
item
[
'create_time'
],
'Run On Time'
:
item
[
'create_time'
]
.
replace
(
minute
=
0
,
second
=
0
)
,
'return in 21 business days'
:
rtn
,
'result'
:
result
}
datas
.
append
(
data
)
symbol_index_dict
=
{
symbol
:
index
for
index
,
symbol
in
enumerate
(
symbols
)}
sorted_data
=
sorted
(
datas
,
key
=
lambda
x
:
symbol_index_dict
[
x
[
'Ticker'
]
.
split
(
' '
)[
0
]])
pf
=
pd
.
DataFrame
(
sorted_data
)
pf
.
to_excel
(
"Forcast_Report_chu.xlsx"
,
index
=
False
)
pf
.
to_excel
(
excel_name
if
excel_name
else
"Forcast_Report_chu.xlsx"
,
index
=
False
)
def
map_to_label
(
ret
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment