Compute two-way feature interaction strength based on Friedman's H-statistic.
interaction_strength(pd_2d, pd_1d)
pd_2d | Data frame containing the 2D partial dependence as returned by
|
---|---|
pd_1d | List of data frames containing the 1D partial dependence for
var1 and var2 as returned by |
A numeric value between 0 and 1 indicating interaction strength.
if (FALSE) { data('mtpl_be') features <- setdiff(names(mtpl_be), c('id', 'nclaims', 'expo', 'long', 'lat')) set.seed(12345) gbm_fit <- gbm::gbm(as.formula(paste('nclaims ~', paste(features, collapse = ' + '))), distribution = 'poisson', data = mtpl_be, n.trees = 50, interaction.depth = 3, shrinkage = 0.1) gbm_fun <- function(object, newdata) mean(predict(object, newdata, n.trees = object$n.trees, type = 'response')) pd_2d <- get_pd(mfit = gbm_fit, var = 'ageph_coverage', grid = tidyr::expand_grid('ageph' %>% get_grid(data = mtpl_be), 'coverage' %>% get_grid(data = mtpl_be)), data = mtpl_be, subsample = 10000, fun = gbm_fun) pd_1d <- list(get_pd(mfit = gbm_fit, var = 'ageph', grid = 'ageph' %>% get_grid(data = mtpl_be), data = mtpl_be, subsample = 10000, fun = gbm_fun), get_pd(mfit = gbm_fit, var = 'coverage', grid = 'coverage' %>% get_grid(data = mtpl_be), data = mtpl_be, subsample = 10000, fun = gbm_fun)) interaction_strength(pd_2d, pd_1d) }