Source code for tabensemb.model.widedeep

from .base import AbstractWrapper
from torch import nn
from tabensemb.utils import *
from tabensemb.model import AbstractModel
from skopt.space import Integer, Categorical, Real
from packaging import version

if version.parse(torch.__version__) < version.parse("2.0.0"):
    # From pytorch_widedeep > 1.2.2, it imports LRScheduler instead of _LRScheduler
    from torch.optim.lr_scheduler import _LRScheduler

    torch.optim.lr_scheduler.LRScheduler = _LRScheduler


[docs] class WideDeep(AbstractModel):
[docs] def __init__(self, *args, **kwargs): super(WideDeep, self).__init__(*args, **kwargs) if len(self.trainer.label_name) > 1: raise Exception( f"pytorch-widedeep does not support multi-target tasks. " f"See https://github.com/jrzaurin/pytorch-widedeep/issues/152" )
[docs] def _get_program_name(self): return "WideDeep"
[docs] def _space(self, model_name): """ Spaces are selected around default parameters. """ _space_dict = { "TabMlp": [ Real(low=0.0, high=0.3, prior="uniform", name="cat_embed_dropout"), Real(low=0.0, high=0.3, prior="uniform", name="mlp_dropout"), ] + self.trainer.SPACE, "TabResnet": [ Real(low=0.0, high=0.3, prior="uniform", name="cat_embed_dropout"), Real(low=0.0, high=0.3, prior="uniform", name="mlp_dropout"), Real(low=0.0, high=0.3, prior="uniform", name="blocks_dropout"), ] + self.trainer.SPACE, "TabTransformer": [ Real(low=0.0, high=0.3, prior="uniform", name="cat_embed_dropout"), Real(low=0.0, high=0.3, prior="uniform", name="mlp_dropout"), Categorical(categories=[8, 16, 32], name="input_dim"), Categorical(categories=[2, 4, 8], name="n_heads"), Integer(low=4, high=8, prior="uniform", name="n_blocks", dtype=int), Real(low=0.0, high=0.3, prior="uniform", name="attn_dropout"), Real(low=0.0, high=0.3, prior="uniform", name="ff_dropout"), ] + self.trainer.SPACE, "TabNet": [ Real(low=0.0, high=0.3, prior="uniform", name="cat_embed_dropout"), Integer(low=1, high=6, prior="uniform", name="n_steps", dtype=int), Integer(low=4, high=16, prior="uniform", name="step_dim", dtype=int), Integer(low=4, high=16, prior="uniform", name="attn_dim", dtype=int), Real(low=0.0, high=0.3, prior="uniform", name="dropout"), Integer( low=1, high=4, prior="uniform", name="n_glu_step_dependent", dtype=int, ), Integer(low=1, high=4, prior="uniform", name="n_glu_shared", dtype=int), Real(low=1.0, high=1.5, prior="uniform", name="gamma"), ] + self.trainer.SPACE, "SAINT": [ Real(low=0.0, high=0.3, prior="uniform", name="cat_embed_dropout"), Real(low=0.0, high=0.3, prior="uniform", name="mlp_dropout"), Categorical(categories=[4, 8, 16], name="input_dim"), Categorical(categories=[1, 2, 4], name="n_heads"), Integer(low=1, high=4, prior="uniform", name="n_blocks", dtype=int), Real(low=0.0, high=0.3, prior="uniform", name="attn_dropout"), Real(low=0.0, high=0.3, prior="uniform", name="ff_dropout"), ] + self.trainer.SPACE, "ContextAttentionMLP": [ Real(low=0.0, high=0.3, prior="uniform", name="cat_embed_dropout"), Categorical(categories=[8, 16, 32], name="input_dim"), Integer(low=2, high=4, prior="uniform", name="n_blocks", dtype=int), Real(low=0.0, high=0.3, prior="uniform", name="attn_dropout"), ] + self.trainer.SPACE, "SelfAttentionMLP": [ Real(low=0.0, high=0.3, prior="uniform", name="cat_embed_dropout"), Categorical(categories=[8, 16, 32], name="input_dim"), Categorical(categories=[2, 4, 8], name="n_heads"), Integer(low=2, high=4, prior="uniform", name="n_blocks", dtype=int), Real(low=0.0, high=0.3, prior="uniform", name="attn_dropout"), ] + self.trainer.SPACE, "FTTransformer": [ Real(low=0.0, high=0.3, prior="uniform", name="cat_embed_dropout"), Real(low=0.0, high=0.3, prior="uniform", name="mlp_dropout"), Categorical(categories=[8, 16, 32], name="input_dim"), Categorical(categories=[2, 4, 8], name="n_heads"), Integer(low=2, high=4, prior="uniform", name="n_blocks", dtype=int), Real(low=0.0, high=0.3, prior="uniform", name="attn_dropout"), Real(low=0.0, high=0.3, prior="uniform", name="ff_dropout"), Real(low=0.4, high=0.6, prior="uniform", name="kv_compression_factor"), ] + self.trainer.SPACE, "TabPerceiver": [ Real(low=0.0, high=0.3, prior="uniform", name="cat_embed_dropout"), Real(low=0.0, high=0.3, prior="uniform", name="mlp_dropout"), Categorical(categories=[8, 16, 32], name="input_dim"), Categorical(categories=[2, 4], name="n_cross_attn_heads"), Categorical(categories=[2, 4, 8], name="n_latents"), Categorical(categories=[16, 32, 64], name="latent_dim"), Categorical(categories=[2, 4], name="n_latent_heads"), Integer( low=2, high=4, prior="uniform", name="n_latent_blocks", dtype=int ), Integer( low=2, high=4, prior="uniform", name="n_perceiver_blocks", dtype=int ), Real(low=0.0, high=0.3, prior="uniform", name="attn_dropout"), Real(low=0.0, high=0.3, prior="uniform", name="ff_dropout"), ] + self.trainer.SPACE, "TabFastFormer": [ Real(low=0.0, high=0.3, prior="uniform", name="cat_embed_dropout"), Real(low=0.0, high=0.3, prior="uniform", name="mlp_dropout"), Categorical(categories=[8, 16, 32], name="input_dim"), Categorical(categories=[2, 4, 8], name="n_heads"), Integer(low=2, high=4, prior="uniform", name="n_blocks", dtype=int), Real(low=0.0, high=0.3, prior="uniform", name="attn_dropout"), Real(low=0.0, high=0.3, prior="uniform", name="ff_dropout"), ] + self.trainer.SPACE, } return _space_dict[model_name]
[docs] def _initial_values(self, model_name): _value_dict = { "TabMlp": { "cat_embed_dropout": 0.1, "mlp_dropout": 0.1, }, "TabResnet": { "cat_embed_dropout": 0.1, "mlp_dropout": 0.1, "blocks_dropout": 0.1, }, "TabTransformer": { "cat_embed_dropout": 0.1, "mlp_dropout": 0.1, "input_dim": 32, "n_heads": 8, "n_blocks": 6, "attn_dropout": 0.1, "ff_dropout": 0.1, }, "TabNet": { "cat_embed_dropout": 0.1, "n_steps": 3, "step_dim": 8, "attn_dim": 8, "dropout": 0.0, "n_glu_step_dependent": 2, "n_glu_shared": 2, "gamma": 1.3, }, "SAINT": { "cat_embed_dropout": 0.1, "mlp_dropout": 0.1, "input_dim": 16, "n_heads": 4, "n_blocks": 2, "attn_dropout": 0.2, "ff_dropout": 0.1, }, "ContextAttentionMLP": { "cat_embed_dropout": 0.1, "input_dim": 32, "n_blocks": 3, "attn_dropout": 0.2, }, "SelfAttentionMLP": { "cat_embed_dropout": 0.1, "input_dim": 32, "n_heads": 8, "n_blocks": 3, "attn_dropout": 0.2, }, "FTTransformer": { "cat_embed_dropout": 0.1, "mlp_dropout": 0.1, "input_dim": 32, "n_heads": 8, "n_blocks": 4, "attn_dropout": 0.1, "ff_dropout": 0.1, "kv_compression_factor": 0.5, }, "TabPerceiver": { "cat_embed_dropout": 0.1, "mlp_dropout": 0.1, "input_dim": 32, "n_cross_attn_heads": 4, "n_latents": 8, # 16 by default in widedeep. "latent_dim": 64, # 128 by default in widedeep. "n_latent_heads": 4, "n_latent_blocks": 4, "n_perceiver_blocks": 4, "attn_dropout": 0.2, "ff_dropout": 0.1, }, "TabFastFormer": { "cat_embed_dropout": 0.1, "mlp_dropout": 0.1, "input_dim": 32, "n_heads": 8, "n_blocks": 4, "attn_dropout": 0.2, "ff_dropout": 0.1, }, } for key in _value_dict.keys(): _value_dict[key].update(self.trainer.chosen_params) return _value_dict[model_name]
[docs] def _new_model(self, model_name, verbose, **kwargs): from pytorch_widedeep.models import ( WideDeep, TabMlp, TabResnet, TabTransformer, TabNet, SAINT, ContextAttentionMLP, SelfAttentionMLP, FTTransformer, TabPerceiver, TabFastFormer, ) from pytorch_widedeep import Trainer as wd_Trainer cont_feature_names = self.trainer.cont_feature_names cat_feature_names = self.trainer.cat_feature_names ( opt_name, opt_params, lrs_name, lrs_params, ) = self._update_optimizer_lr_scheduler_params(model_name=model_name, **kwargs) model_args = { key: value for key, value in kwargs.items() if key not in ["lr", "batch_size", "original_batch_size"] and key not in opt_params.keys() and key not in lrs_params.keys() } args = dict( column_idx=self.tab_preprocessor.column_idx, continuous_cols=cont_feature_names, cat_embed_input=( self.tab_preprocessor.cat_embed_input if len(cat_feature_names) != 0 else None ), **model_args, ) if model_name == "TabTransformer": args["embed_continuous"] = True if len(cat_feature_names) == 0 else False mapping = { "TabMlp": TabMlp, "TabResnet": TabResnet, "TabTransformer": TabTransformer, "TabNet": TabNet, "SAINT": SAINT, "ContextAttentionMLP": ContextAttentionMLP, "SelfAttentionMLP": SelfAttentionMLP, "FTTransformer": FTTransformer, "TabPerceiver": TabPerceiver, "TabFastFormer": TabFastFormer, } task = self.trainer.datamodule.task loss = self.trainer.datamodule.loss if task == "binary" and loss == "cross_entropy": loss = "binary_cross_entropy" self.task = task tab_model = mapping[model_name](**args) if task == "multiclass": model = WideDeep( deeptabular=tab_model, pred_dim=self.trainer.datamodule.n_classes[0] ) else: model = WideDeep(deeptabular=tab_model) optimizer = getattr(torch.optim, opt_name)(model.parameters(), **opt_params) lr_scheduler = getattr(torch.optim.lr_scheduler, lrs_name)( optimizer, **lrs_params ) wd_trainer = wd_Trainer( model, objective=loss, verbose=0, device="cpu" if self.trainer.device == "cpu" else "cuda", num_workers=0, optimizers=optimizer, lr_schedulers=lr_scheduler, ) return wd_trainer
[docs] def _train_data_preprocess(self, model_name, warm_start=False): import pytorch_widedeep from pytorch_widedeep.preprocessing import TabPreprocessor from pandas._config import option_context data = self.trainer.datamodule cont_feature_names = self.trainer.cont_feature_names cat_feature_names = self.trainer.cat_feature_names if not warm_start: if version.parse(pytorch_widedeep.__version__) < version.parse("1.2.3"): tab_preprocessor = TabPreprocessor( continuous_cols=cont_feature_names, cat_embed_cols=( cat_feature_names if len(cat_feature_names) != 0 else None ), ) else: # https://github.com/jrzaurin/pytorch-widedeep/commit/cc0d1ad59c447dabd29072a552194ece12173778#diff-2f6e79eedee796c7edeac4fffc29ef35ecbfb8c234ff63313509e412a8d3ed42L108 tab_preprocessor = TabPreprocessor( continuous_cols=cont_feature_names, cat_embed_cols=( cat_feature_names if len(cat_feature_names) != 0 else None ), cols_to_scale=cont_feature_names, ) with option_context("mode.chained_assignment", None): X_tab_train = tab_preprocessor.fit_transform(data.X_train) X_tab_val = tab_preprocessor.transform(data.X_val) X_tab_test = tab_preprocessor.transform(data.X_test) self.tab_preprocessor = tab_preprocessor else: with option_context("mode.chained_assignment", None): X_tab_train = self.tab_preprocessor.transform(data.X_train) X_tab_val = self.tab_preprocessor.transform(data.X_val) X_tab_test = self.tab_preprocessor.transform(data.X_test) return { "X_train": X_tab_train, "y_train": data.y_train.flatten(), "X_val": X_tab_val, "y_val": data.y_val.flatten(), "X_test": X_tab_test, "y_test": data.y_test.flatten(), }
[docs] def _train_single_model( self, model, model_name, epoch, X_train, y_train, X_val, y_val, verbose, warm_start, in_bayes_opt, **kwargs, ): """ pytorch_widedeep uses an approximated loss calculation procedure that calculates the average loss across batches, which is not what we do (in a precise way for MSE) at the end of training and makes results from the callback differ from our final metrics. """ from ._widedeep.widedeep_callback import WideDeepCallback, EarlyStopping es_callback = EarlyStopping( patience=self.trainer.static_params["patience"], verbose=1 if verbose else 0, restore_best_weights=True, ) model._set_callbacks_and_metrics( callbacks=[ es_callback, WideDeepCallback(total_epoch=epoch, verbose=verbose), ], metrics=None, ) if warm_start: # The model is stored in cpu after loaded from disk. And widedeep does not make model and data on the # same device. Also note that when _finetune and cuda is available, data.cuda() is called. from pytorch_widedeep.training import _finetune _finetune.use_cuda = self.device == "cuda" model.fit( X_train={"X_tab": X_train, "target": y_train}, X_val={"X_tab": X_val, "target": y_val}, n_epochs=epoch if not warm_start else 1, batch_size=int(kwargs["batch_size"]), finetune=warm_start, finetune_epochs=10, ) self.train_losses[model_name] = model.history["train_loss"] self.val_losses[model_name] = model.history["val_loss"] self.restored_epochs[model_name] = es_callback.best_epoch
[docs] def _pred_single_model(self, model, X_test, verbose, **kwargs): original_batch_size = model.batch_size delattr(model, "batch_size") if self.task == "regression": res = model.predict(X_tab=X_test, batch_size=len(X_test)).reshape(-1, 1) elif self.task == "binary": res = model.predict_proba(X_tab=X_test, batch_size=len(X_test))[ :, 1 ].reshape(-1, 1) else: res = model.predict_proba(X_tab=X_test, batch_size=len(X_test)) setattr(model, "batch_size", original_batch_size) return res
[docs] def _data_preprocess(self, df, derived_data, model_name): # SettingWithCopyWarning in TabPreprocessor.transform # i.e. df_cont[self.standardize_cols] = self.scaler.transform(df_std.values) from pandas._config import option_context with option_context("mode.chained_assignment", None): X_df = self.tab_preprocessor.transform(df) return X_df
[docs] @staticmethod def _get_model_names(): return [ "TabMlp", "TabResnet", "TabTransformer", "TabNet", "SAINT", "ContextAttentionMLP", "SelfAttentionMLP", "FTTransformer", "TabPerceiver", "TabFastFormer", ]
def widedeep_forward(self, input): """ This is the forward of nn.Sequential because WideDeep model is a nn.Module and WideDeep.deeptabular is a nn.Sequential where the last module is a linear layer. """ l = len(self) for idx, module in enumerate(self): if idx == l - 1: setattr(self, "_hidden_representation", input) input = module(input) return input
[docs] class WideDeepWrapper(AbstractWrapper):
[docs] def __init__(self, model: WideDeep): super(WideDeepWrapper, self).__init__(model=model) if self.model_name == "TabNet": raise Exception(f"Wrapping TabNet is not supported.")
[docs] def wrap_forward(self): component = self.wrapped_model.model[self.model_name].model.deeptabular self.original_forward = component.forward component.forward = widedeep_forward.__get__(component, nn.Sequential)
[docs] def reset_forward(self): component = self.wrapped_model.model[self.model_name].model.deeptabular component.forward = self.original_forward
@property def hidden_rep_dim(self): """ In pytorch_widedeep.models.wide_deep, see WideDeep_add_pred_layer() """ component = self.wrapped_model.model[self.model_name].model if component.deeptext is not None or component.deepimage is not None: warnings.warn( f"The WideDeep model has deeptext or deepimage component, which is not supported for " f"hidden representation extraction." ) return component.deeptabular[0].output_dim @property def hidden_representation(self): component = self.wrapped_model.model[self.model_name].model.deeptabular return getattr(component, "_hidden_representation")