|
| 1 | +--- |
| 2 | +title: "Tidymodels Workflow with Functional Keras Models (Multi-Input)" |
| 3 | +output: rmarkdown::html_vignette |
| 4 | +vignette: > |
| 5 | + %\VignetteIndexEntry{Tidymodels Workflow with Functional Keras Models (Multi-Input)} |
| 6 | + %\VignetteEngine{knitr::rmarkdown} |
| 7 | + %\VignetteEncoding{UTF-8} |
| 8 | +--- |
| 9 | + |
| 10 | +```{r, include = FALSE} |
| 11 | +knitr::opts_chunk$set( |
| 12 | + collapse = TRUE, |
| 13 | + comment = "#>", |
| 14 | + eval = FALSE # Set to TRUE to run all chunks when knitting |
| 15 | +) |
| 16 | +# Suppress verbose Keras output for the vignette |
| 17 | +options(keras.fit_verbose = 0) |
| 18 | +set.seed(123) |
| 19 | +``` |
| 20 | + |
| 21 | +## Introduction |
| 22 | + |
| 23 | +This vignette demonstrates a complete `tidymodels` workflow for a regression task using a Keras functional model defined with `kerasnip`. We will use the Ames Housing dataset to predict house prices. A key feature of this example is the use of a multi-input Keras model, where numerical and categorical features are processed through separate input branches. |
| 24 | + |
| 25 | +`kerasnip` allows you to define complex Keras architectures, including those with multiple inputs, and integrate them seamlessly into the `tidymodels` ecosystem for robust modeling and tuning. |
| 26 | + |
| 27 | +## Setup |
| 28 | + |
| 29 | +First, we load the necessary packages. |
| 30 | + |
| 31 | +```{r load-packages} |
| 32 | +library(kerasnip) |
| 33 | +library(tidymodels) |
| 34 | +library(keras3) |
| 35 | +library(dplyr) # For data manipulation |
| 36 | +library(ggplot2) # For plotting |
| 37 | +library(future) # For parallel processing |
| 38 | +library(finetune) # For racing |
| 39 | +``` |
| 40 | + |
| 41 | +## Data Preparation |
| 42 | + |
| 43 | +We'll use the Ames Housing dataset, which is available in the `modeldata` package. We will then split the data into training and testing sets. |
| 44 | + |
| 45 | +```{r data-prep} |
| 46 | +# Select relevant columns and remove rows with missing values |
| 47 | +ames_df <- ames |> |
| 48 | + select(Sale_Price, Gr_Liv_Area, Year_Built, Neighborhood, Bldg_Type, Overall_Cond, Total_Bsmt_SF, contains("SF")) |> |
| 49 | + na.omit() |
| 50 | +
|
| 51 | +# Split data into training and testing sets |
| 52 | +set.seed(123) |
| 53 | +ames_split <- initial_split(ames_df, prop = 0.8, strata = Sale_Price) |
| 54 | +ames_train <- training(ames_split) |
| 55 | +ames_test <- testing(ames_split) |
| 56 | +
|
| 57 | +# Create cross-validation folds for tuning |
| 58 | +ames_folds <- vfold_cv(ames_train, v = 5, strata = Sale_Price) |
| 59 | +``` |
| 60 | + |
| 61 | +## Recipe for Preprocessing |
| 62 | + |
| 63 | +We will create a `recipes` object to preprocess our data. This recipe will: |
| 64 | +* Predict `Sale_Price` using all other variables. |
| 65 | +* Normalize all numerical predictors. |
| 66 | +* Create dummy variables for categorical predictors. |
| 67 | +* Collapse each group of predictors into a single matrix column using `step_collapse()`. |
| 68 | + |
| 69 | +This final step is crucial for the multi-input Keras model, as the `kerasnip` functional API expects a list of matrices for multiple inputs, where each matrix corresponds to a distinct input layer. |
| 70 | + |
| 71 | +```{r create-recipe} |
| 72 | +ames_recipe <- recipe(Sale_Price ~ ., data = ames_train) |> |
| 73 | + step_normalize(all_numeric_predictors()) |> |
| 74 | + step_collapse(all_numeric_predictors(), new_col = "numerical_input") |> |
| 75 | + step_dummy(Neighborhood) |> |
| 76 | + step_collapse(starts_with("Neighborhood"), new_col = "neighborhood_input") |> |
| 77 | + step_dummy(Bldg_Type) |> |
| 78 | + step_collapse(starts_with("Bldg_Type"), new_col = "bldg_input") |> |
| 79 | + step_dummy(Overall_Cond) |> |
| 80 | + step_collapse(starts_with("Overall_Cond"), new_col = "condition_input") |
| 81 | +
|
| 82 | +# You can prep and bake the recipe to see the processed data |
| 83 | +# prep(ames_recipe) |> bake(new_data = ames_train) |
| 84 | +``` |
| 85 | + |
| 86 | +## Define Keras Functional Model with `kerasnip` |
| 87 | + |
| 88 | +Now, we define our Keras functional model using `kerasnip`'s layer blocks. This model will have four distinct input layers: one for numerical features and three for categorical features. These branches will be processed separately and then concatenated before the final output layer. |
| 89 | + |
| 90 | +```{r define-kerasnip-model} |
| 91 | +# Define layer blocks for multi-input functional model |
| 92 | +
|
| 93 | +# Input blocks for numerical and categorical features |
| 94 | +input_numerical <- function(input_shape) { |
| 95 | + layer_input(shape = input_shape, name = "numerical_input") |
| 96 | +} |
| 97 | +
|
| 98 | +input_neighborhood <- function(input_shape) { |
| 99 | + layer_input(shape = input_shape, name = "neighborhood_input") |
| 100 | +} |
| 101 | +
|
| 102 | +input_bldg <- function(input_shape) { |
| 103 | + layer_input(shape = input_shape, name = "bldg_input") |
| 104 | +} |
| 105 | +
|
| 106 | +input_condition <- function(input_shape) { |
| 107 | + layer_input(shape = input_shape, name = "condition_input") |
| 108 | +} |
| 109 | +
|
| 110 | +# Processing blocks for each input type |
| 111 | +dense_numerical <- function(tensor, units = 32, activation = "relu") { |
| 112 | + tensor |> |
| 113 | + layer_dense(units = units, activation = activation) |
| 114 | +} |
| 115 | +
|
| 116 | +dense_categorical <- function(tensor, units = 16, activation = "relu") { |
| 117 | + tensor |> |
| 118 | + layer_dense(units = units, activation = activation) |
| 119 | +} |
| 120 | +
|
| 121 | +# Concatenation block |
| 122 | +concatenate_features <- function(numeric, neighborhood, bldg, condition) { |
| 123 | + layer_concatenate(list(numeric, neighborhood, bldg, condition)) |
| 124 | +} |
| 125 | +
|
| 126 | +# Output block for regression |
| 127 | +output_regression <- function(tensor) { |
| 128 | + layer_dense(tensor, units = 1, name = "output") |
| 129 | +} |
| 130 | +
|
| 131 | +# Create the kerasnip model specification function |
| 132 | +create_keras_functional_spec( |
| 133 | + model_name = "ames_functional_mlp", |
| 134 | + layer_blocks = list( |
| 135 | + numerical_input = input_numerical, |
| 136 | + neighborhood_input = input_neighborhood, |
| 137 | + bldg_input = input_bldg, |
| 138 | + condition_input = input_condition, |
| 139 | + processed_numerical = inp_spec(dense_numerical, "numerical_input"), |
| 140 | + processed_neighborhood = inp_spec(dense_categorical, "neighborhood_input"), |
| 141 | + processed_bldg = inp_spec(dense_categorical, "bldg_input"), |
| 142 | + processed_condition = inp_spec(dense_categorical, "condition_input"), |
| 143 | + combined_features = inp_spec( |
| 144 | + concatenate_features, |
| 145 | + c( |
| 146 | + processed_numerical = "numeric", |
| 147 | + processed_neighborhood = "neighborhood", |
| 148 | + processed_bldg = "bldg", |
| 149 | + processed_condition = "condition" |
| 150 | + ) |
| 151 | + ), |
| 152 | + output = inp_spec(output_regression, "combined_features") |
| 153 | + ), |
| 154 | + mode = "regression" |
| 155 | +) |
| 156 | +
|
| 157 | +# Clean up the spec when the vignette is done knitting |
| 158 | +on.exit(remove_keras_spec("ames_functional_mlp"), add = TRUE) |
| 159 | +``` |
| 160 | + |
| 161 | +## Model Specification |
| 162 | + |
| 163 | +We'll define our `ames_functional_mlp` model specification and set some hyperparameters to `tune()`. Note how the arguments are prefixed with their corresponding block names (e.g., `processed_numerical_units`). |
| 164 | + |
| 165 | +```{r define-tune-spec} |
| 166 | +# Define the tunable model specification |
| 167 | +functional_mlp_spec <- ames_functional_mlp( |
| 168 | + # Tunable parameters for numerical branch |
| 169 | + processed_numerical_units = tune(), |
| 170 | + # Tunable parameters for categorical branch |
| 171 | + processed_neighborhood_units = tune(), |
| 172 | + processed_bldg_units = tune(), |
| 173 | + processed_condition_units = tune(), |
| 174 | + # Fixed compilation and fitting parameters |
| 175 | + compile_loss = "mean_squared_error", |
| 176 | + compile_optimizer = "adam", |
| 177 | + compile_metrics = c("mean_absolute_error"), |
| 178 | + fit_epochs = 50, |
| 179 | + fit_batch_size = 32, |
| 180 | + fit_validation_split = 0.2, |
| 181 | + fit_callbacks = list(callback_early_stopping(monitor = "val_loss", patience = 5)) |
| 182 | +) |> |
| 183 | + set_engine("keras") |
| 184 | +
|
| 185 | +print(functional_mlp_spec) |
| 186 | +``` |
| 187 | + |
| 188 | +## Create Workflow |
| 189 | + |
| 190 | +A `workflow` combines the recipe and the model specification. |
| 191 | + |
| 192 | +```{r create-workflow} |
| 193 | +ames_wf <- workflow() |> |
| 194 | + add_recipe(ames_recipe) |> |
| 195 | + add_model(functional_mlp_spec) |
| 196 | +
|
| 197 | +print(ames_wf) |
| 198 | +``` |
| 199 | + |
| 200 | +## Define Tuning Grid |
| 201 | + |
| 202 | +We will create a regular grid for our hyperparameters. |
| 203 | + |
| 204 | +```{r create-tuning-grid} |
| 205 | +# Define the tuning grid |
| 206 | +params <- extract_parameter_set_dials(ames_wf) |> |
| 207 | + update( |
| 208 | + processed_numerical_units = hidden_units(range = c(32, 128)), |
| 209 | + processed_neighborhood_units = hidden_units(range = c(16, 64)), |
| 210 | + processed_bldg_units = hidden_units(range = c(16, 64)), |
| 211 | + processed_condition_units = hidden_units(range = c(16, 64)) |
| 212 | + ) |
| 213 | +functional_mlp_grid <- grid_regular(params, levels = 3) |
| 214 | +
|
| 215 | +print(functional_mlp_grid) |
| 216 | +``` |
| 217 | + |
| 218 | +## Tune Model |
| 219 | + |
| 220 | +Now, we'll use `tune_race_anova()` to perform cross-validation and find the best hyperparameters. |
| 221 | + |
| 222 | +```{r tune-model} |
| 223 | +# Note: Parallel processing with `plan(multisession)` is currently not working |
| 224 | +# with Keras models due to backend conflicts. |
| 225 | +# plan(multisession) |
| 226 | +
|
| 227 | +set.seed(123) |
| 228 | +ames_tune_results <- tune_race_anova( |
| 229 | + ames_wf, |
| 230 | + resamples = ames_folds, |
| 231 | + grid = functional_mlp_grid, |
| 232 | + metrics = metric_set(rmse, mae, rsq), # Evaluate regression metrics |
| 233 | + control = control_race(save_pred = TRUE, save_workflow = TRUE) |
| 234 | +) |
| 235 | +``` |
| 236 | + |
| 237 | +## Inspect Tuning Results |
| 238 | + |
| 239 | +We can inspect the tuning results to see which hyperparameter combinations performed best. |
| 240 | + |
| 241 | +```{r inspect-results} |
| 242 | +# Show the best performing models based on RMSE |
| 243 | +show_best(ames_tune_results, metric = "rmse", n = 5) |
| 244 | +
|
| 245 | +# Autoplot the results |
| 246 | +# autoplot(ames_tune_results) # Currently does not work due to a label issue. |
| 247 | +
|
| 248 | +# Select the best hyperparameters |
| 249 | +best_functional_mlp_params <- select_best(ames_tune_results, metric = "rmse") |
| 250 | +print(best_functional_mlp_params) |
| 251 | +``` |
| 252 | + |
| 253 | +## Finalize Workflow and Fit Model |
| 254 | + |
| 255 | +Once we have the best hyperparameters, we finalize the workflow and fit the model on the entire training dataset. |
| 256 | + |
| 257 | +```{r finalize-fit} |
| 258 | +# Finalize the workflow with the best hyperparameters |
| 259 | +final_ames_wf <- finalize_workflow(ames_wf, best_functional_mlp_params) |
| 260 | +
|
| 261 | +# Fit the final model on the full training data |
| 262 | +final_ames_fit <- fit(final_ames_wf, data = ames_train) |
| 263 | +
|
| 264 | +print(final_ames_fit) |
| 265 | +``` |
| 266 | + |
| 267 | +### Inspect Final Model |
| 268 | + |
| 269 | +You can extract the underlying Keras model and its training history for further inspection. |
| 270 | + |
| 271 | +```{r inspect-final-keras-model-summary} |
| 272 | +# Extract the Keras model summary |
| 273 | +final_ames_fit |> |
| 274 | + extract_fit_parsnip() |> |
| 275 | + extract_keras_model() |> |
| 276 | + summary() |
| 277 | +``` |
| 278 | + |
| 279 | +```{r inspect-final-keras-model-plot} |
| 280 | +# Plot the Keras model |
| 281 | +final_ames_fit |> |
| 282 | + extract_fit_parsnip() |> |
| 283 | + extract_keras_model() |> |
| 284 | + plot() |
| 285 | +``` |
| 286 | + |
| 287 | +```{r inspect-final-keras-model-history} |
| 288 | +# Plot the training history |
| 289 | +final_ames_fit |> |
| 290 | + extract_fit_parsnip() |> |
| 291 | + extract_keras_history() |> |
| 292 | + plot() |
| 293 | +``` |
| 294 | + |
| 295 | +## Make Predictions and Evaluate |
| 296 | + |
| 297 | +Finally, we will make predictions on the test set and evaluate the model's performance. |
| 298 | + |
| 299 | +```{r predict-evaluate} |
| 300 | +# Make predictions on the test set |
| 301 | +ames_test_pred <- predict(final_ames_fit, new_data = ames_test) |
| 302 | +
|
| 303 | +# Combine predictions with actuals |
| 304 | +ames_results <- tibble::tibble( |
| 305 | + Sale_Price = ames_test$Sale_Price, |
| 306 | + .pred = ames_test_pred$.pred |
| 307 | +) |
| 308 | +
|
| 309 | +print(head(ames_results)) |
| 310 | +
|
| 311 | +# Evaluate performance using yardstick metrics |
| 312 | +metrics_results <- metric_set(rmse, mae, rsq)(ames_results, truth = Sale_Price, estimate = .pred) |
| 313 | +
|
| 314 | +print(metrics_results) |
| 315 | +``` |
0 commit comments