Commit d7197a54 authored by 吕先亚's avatar 吕先亚

To avoid data leak, test set should start from numForecastDays later

parent cf35d207
......@@ -199,6 +199,6 @@ class TrainingDataBuilder(ABC):
X_train, X_test, y_train, y_test = train_test_split(scaledX, y, test_size=0.04, shuffle=False)
date_index = DataAll['date'][-len(X_test):-self._numForecastDays].to_numpy()
# To avoid data leak, test set should start from numForecastDays later
X_test = X_test[:-self._numForecastDays]
y_test = y_test[:-self._numForecastDays]
X_test = X_test[self._numForecastDays:]
y_test = y_test[self._numForecastDays:]
return X_train, X_test, y_train, y_test, scaledX_forecast, forecastDay, date_index
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment