Commit 5529e737 authored by 吕先亚's avatar 吕先亚

To avoid data leak, test set should start from numForecastDays later

parent d7197a54
......@@ -197,8 +197,8 @@ class TrainingDataBuilder(ABC):
else:
# Step 2: Split data into train set and test set
X_train, X_test, y_train, y_test = train_test_split(scaledX, y, test_size=0.04, shuffle=False)
date_index = DataAll['date'][-len(X_test):-self._numForecastDays].to_numpy()
# To avoid data leak, test set should start from numForecastDays later
X_test = X_test[self._numForecastDays:]
y_test = y_test[self._numForecastDays:]
date_index = DataAll['date'][:-len(X_test)].to_numpy()
return X_train, X_test, y_train, y_test, scaledX_forecast, forecastDay, date_index
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment