Resource | Github Actions | Code Coverage |
---|---|---|
Platforms | Windows, macOS, Linux | codecov |
R CMD check |
Gaussian Process (GP) and nearest neighbor Gaussian Process (nnGP) approaches for nonparametric modeling.
library("devtools")
install_github("NSAPH-Software/GPCERF", ref="develop")
library("GPCERF")
library(GPCERF)
set.seed(781)
<- generate_synthetic_data(sample_size = 500, gps_spec = 1)
sim_data
<- 1
n_core
<- function(nthread = n_core, ...) {
m_xgboost ::SL.xgboost(nthread = nthread, ...)
SuperLearner
}
<- function(num.threads = n_core, ...){
m_ranger ::SL.ranger(num.threads = num.threads, ...)
SuperLearner
}
# Estimate GPS function
<- estimate_gps(cov_mt = sim_data[,-(1:2)],
gps_m w_all = sim_data$treat,
sl_lib = c("m_xgboost", "m_ranger"),
dnorm_log = TRUE)
# exposure values
<- stats::quantile(sim_data$treat, 0.05)
q1 <- stats::quantile(sim_data$treat, 0.95)
q2
<- seq(q1, q2, 1)
w_all
<- list(alpha = 10 ^ seq(-2, 2, length.out = 10),
params_lst beta = 10 ^ seq(-2, 2, length.out = 10),
g_sigma = c(0.1, 1, 10),
tune_app = "all")
<- estimate_cerf_gp(sim_data,
cerf_gp_obj
w_all,
gps_m,params = params_lst,
nthread = n_core)
summary(cerf_gp_obj)
plot(cerf_gp_obj)
GPCERF standard Gaussian grocess exposure response function object
Optimal hyper parameters(#trial: 300):
alpha = 12.9154966501488 beta = 12.9154966501488 g_sigma = 0.1
Optimal covariate balance:
cf1 = 0.072
cf2 = 0.082
cf3 = 0.062
cf4 = 0.068
cf5 = 0.056
cf6 = 0.082
Original covariate balance:
cf1 = 0.222
cf2 = 0.112
cf3 = 0.175
cf4 = 0.318
cf5 = 0.198
cf6 = 0.257
----***----
set.seed(781)
<- generate_synthetic_data(sample_size = 5000, gps_spec = 1)
sim_data
<- function(nthread = 12, ...) {
m_xgboost ::SL.xgboost(nthread = nthread, ...)
SuperLearner
}
<- function(num.threads = 12, ...){
m_ranger ::SL.ranger(num.threads = num.threads, ...)
SuperLearner
}
# Estimate GPS function
<- estimate_gps(cov_mt = sim_data[,-(1:2)],
gps_m w_all = sim_data$treat,
sl_lib = c("m_xgboost", "m_ranger"),
dnorm_log = TRUE)
# exposure values
<- stats::quantile(sim_data$treat, 0.05)
q1 <- stats::quantile(sim_data$treat, 0.95)
q2
<- seq(q1, q2, 1)
w_all
<- list(alpha = 10 ^ seq(-2, 2, length.out = 10),
params_lst beta = 10 ^ seq(-2, 2, length.out = 10),
g_sigma = c(0.1, 1, 10),
tune_app = "all",
n_neighbor = 50,
block_size = 1e3)
<- estimate_cerf_nngp(sim_data,
cerf_nngp_obj
w_all,
gps_m,params = params_lst,
nthread = 12)
summary(cerf_nngp_obj)
plot(cerf_nngp_obj)
GPCERF nearest neighbore Gaussian process exposure response function object summary
Optimal hyper parameters(#trial: 300):
alpha = 0.0278255940220712 beta = 0.215443469003188 g_sigma = 0.1
Optimal covariate balance:
cf1 = 0.058
cf2 = 0.071
cf3 = 0.087
cf4 = 0.066
cf5 = 0.076
cf6 = 0.088
Original covariate balance:
cf1 = 0.115
cf2 = 0.137
cf3 = 0.145
cf4 = 0.296
cf5 = 0.208
cf6 = 0.225
----***----
Ren, B., Wu, X., Braun, D., Pillai, N. and Dominici, F., 2021. Bayesian modeling for exposure response curve via gaussian processes: Causal effects of exposure to air pollution on health outcomes. arXiv preprint arXiv:2105.03454.