library(SplitKnockoff)
Author : Haoxue Wang, Yang Cao, Xinwei Sun, Yuan Yao
Split Knockoff is a novel method for controlling the false discovery rate (FDR) in structural sparsity setting. This proposed scheme relaxes the linear subspace constraint to its neighborhood, often known as variable splitting in optimization. This vignette illustrates the usage of Splitknockoff with simulation experiments and will help you apply the split knockoff method in a light way. On top of that, an application to Alzheimer’s Disease study with MRI data demonstrates that the split knockoff method can disclose important lesion regions in brains associated with the disease and connections between neighboring regions of high contrast variations during disease progression.
install.packages("SplitKnockoff") # just one line code to install our package
There is another Matlab package available. Simulation experiment of both packages verify the conclusion of the paper. This R package is more convenient when applied to new dataset as glmnet can be used directly by
install.packages("glmnet'") # just one line code to install the glmnet tool
Please update your glmnet package to the latest version for smooth usage of this package, this simulation uses glmnet 4.2
For more information, please see the manual inside this package.
splitknockoff.filter(X, D, y, option) : Split Knockoff filter for structural sparsity problem.
splitknockoff.select(W, q): this function is for variable selection based on W statistics.
private.hittingpoint(coef, lambda_vec): calculate the hitting time and the sign of respective variable in a path.
simu_unit(n, p, D, A, c, k, option): the simulation unit for simulation experiments.
split_knockoffs.create(X, y, D, nu, option): create a knockoff copy for A_gamma
split_knockoffs.statistics.pathorder.W_path(X, D, y, nu, option): generate the knockoff statistics W for beta from a split LASSO path in the intercepetion assignment step, using the method of path order.
Figure 1b of the paper, comparing the different distributions of Knockoff statistics and Split Knockoff statistics.
Figure 2a of the paper, FDR in \(D_1\)
Figure 2b of the paper, Power in \(D_1\)
Figure 2b of the paper, FDR in \(D_2\)
Figure 2b of the paper, Power in \(D_2\)
Figure 2b of the paper, FDR in \(D_3\)
Figure 2b of the paper, Power in \(D_3\)
Figure 3a of the paper, Effect of Feature Correlation: FDR for q = 0.2
Figure 3b of the paper, Effect of Feature Correlation: Power for q = 0.2
Figure 4a of the paper, Effect of SNR: FDR for q = 0.2
Figure 4b of the paper, Effect of SNR: Power for q = 0.2
Figure 5a of the paper, Effect of Sparsity Level: FDR for q = 0.2
Figure 5b of the paper, Effect of Sparsity Level: Power and FDR for q = 0.2
install.packages("SplitKnockoff") # install our package
library(latex2exp)
library(ggplot2)
library(Matrix)
library(glmnet)
library(MASS)
library(SplitKnockoff)
<- 20 # sparsity level
k <- 1 # magnitude
A <- 350 # sample size
n <- 100 # dimension of variables
p <- 0.5 # feature correlation
c <-1 # noise level
sigma
<- array(data = NA, dim = length(data), dimnames = NULL)
option $q <- 0.2
option$eta <- 0.1
option$method <- 'knockoff'
option$stage0 <- 'path'
option$normalize <- 'true'
option$cv_rule <- 'min'
option$lambda <- 10.^seq(0, -6, by=-0.01)
option$nu <- 10
option$copy <- 'true'
option<- option[-1] option
<- diag(p)
D <- nrow(D)
m
# generate X
= matrix(0, p, p)
Sigma for( i in 1: p){
for(j in 1: p){
<- c^(abs(i - j))
Sigma[i, j]
} }
library(mvtnorm) # package mvtnorm needed for this generation
set.seed(100)
<- rmvnorm(n,matrix(0, p, 1), Sigma) # generate X X
<- matrix(0, p, 1)
beta_true for( i in 1: k){
1] = A
beta_true[i, if ( i%%3 == 1){
1] = -A
beta_true[i,
}
}<- D %*% beta_true
gamma_true
<- which(gamma_true!=0) S0
generate varepsilon and noise and y
# generate varepsilon
set.seed(1)
# generate noise and y
<- rnorm(n) * sqrt(sigma)
varepsilon <- X %*% beta_true + varepsilon y
use the key function filter to get the Z_path and t_Z_path
## Split Knockoff
<- splitknockoff.filter(X, D, y, option)
filter_result <- filter_result$Z
Z_path <- filter_result$t_Z t_Z_path
plot the figure
<- sub("TRUE","0",is.element(1:m, S0))
a <- sub("FALSE","1",a)
b <- data.frame(x=Z_path[[1]],y= t_Z_path[[1]], feature=b)
df save.image("C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/figure_1/result/figure_1.RData")
png(file='C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/figure_1/plot/figure_1b.png', height=1000, width=1000)
<- ggplot(data=df,aes(x=x, y=y, color=feature))+geom_point(size=2)
p <-p + labs(title = "Split Knockoff with Lasso Statistic")
p <-p + labs(x = TeX("Value of $Z_i$"))
p <-p + labs(y = TeX("Value of $\\tilde{Z_i}$"))
p <-p + scale_fill_discrete(breaks=c("Null feature","Non-null feature"))
p <-p + geom_abline(intercept=0,slope=1 )
p <-p + scale_colour_discrete(labels=c("Null feature", "Non-null feature"))
p <-p + scale_y_continuous(limits = c(0, Z_path[[1]]))
p print(p)
dev.off()
here we see the result
# 1 June, 2021
# revised 7 July, 2021
# author:Haoxue Wang (haoxwang@student.ethz.ch)
# This script reproduces the figures of FDR and power in Section 5.2, 5.3, 5.4
# of the paper, comparing the performance of Split Knockoffs and Knockoffs.
#
# The intermediate result will be automatically saved to
# '../result/temp.RData’ with the proceeding of the calculation. The final
# result will be saved to '../result/examples.mat'. The whole calculation
# may takes hours to finish.
install.packages("SplitKnockoff") # install our package
library(latex2exp)
library(ggplot2)
library(Matrix)
library(glmnet)
library(MASS)
library(SplitKnockoff)
<- 20 # sparsity level
k <- 1 # magnitude
A <- 350 # sample size
n <- 100 # dimension of variables
p <- 0.5 # feature correlation
c
<- array(data=NA, dim = length(data), dimnames = NULL)
option $q <- 0.2
option$eta <- 0.1
option$stage0 <- 'path'
option$normalize <- 'true'
option$lambda <- 10.^seq(0, -6, by=-0.01)
option$copy <- 'true'
option<- option[-1]
option
# settings for nu
<- seq(-1, 3, by=0.2)
expo $nu <- 10.^expo
option<- length(option$nu)
num_nu ## calculation
# generate D1, D2, and D3
= matrix(0,p-1, p)
D_G
for (i in 1:p-1){
<- 1
D_G[i, i] +1] <- -1
D_G[i, i
}
<-diag(p)
D_1 <- D_G
D_2 <- rbind(diag(p), D_G)
D_3 <- array(data = NA, dim = length(data), dimnames = NULL)
D_s $D_1 <- D_1
D_s$D_2 <- D_2
D_s$D_3 <- D_3
D_s<- D_s[-1]
D_s
<- array(0,c(3, num_nu, 2))
fdr_split <- array(0,c(3, num_nu,2))
power_split
# fdr_knock <- matrix(0, 2, 2)
# power_knock <- matrix(0, 2, 2)
<- 2
num_method <- c('knockoff', 'knockoff+')
method_s
# attention! for quick speed, please set the meth manually 1 and 2
# when meth =2, set the ratio calculation offset=1 in function select
= 1
meth for (i in 1: 3){
# choose the respective D for each example
sprintf('Running example for D_%d', i)
<- D_s[[i]]
D <- simu_unit(n, p, D, A, c, k, option)
simu_data <- simu_data$fdr_split
fdr_split[i, , meth ] <- simu_data$power_split
power_split[i, , meth] # if (i < 3){
# fdr_knock[meth, i] <- simu_data$fdr_knock
# power_knock[meth, i] <- simu_data$power_knock
# }
}save.image(file = "C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/examples/result/temp.RData")
# save results
save.image("C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/examples/result/examples.RData")
<- fdr_split
fdr_split_result <- power_split
power_split_result
## for D_1
## plot for FDR
<- expo
x <- fdr_split_result[1, ,1]
fdr_split <- array(fdr_split, dim=c(num_nu, 1))
fdr_split <- fdr_split_result[1,,2]
fdr_split_plus <- array(fdr_split_plus, dim=c(num_nu, 1))
fdr_split_plus
<- data.frame(x,fdr_split,fdr_split_plus)
fdr is.na(fdr)] <- 0
fdr[
png(file='C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/examples/plot/figure_11.png', height=1000, width=1000)
<- ggplot()+geom_line(data=fdr,aes(x=x, y=fdr_split, color='fdr_split'))
p <- p +geom_line(data=fdr,aes(x=x, y=fdr_split_plus, color='fdr_split_plus'))
p <-p + labs(x = TeX("$\\log_{10} (\\nu)$"))
p <-p + labs(y = TeX("FDR"))
p <-p + geom_abline(intercept=0.2,slope=0,linetype="dashed" )
p <-p + scale_fill_discrete(breaks=c("split Knockoff", "split Knockoff+"))
p <-p + scale_colour_discrete(labels=c("split Knockoff", "split Knockoff+"))
p <-p + scale_y_continuous(limits = c(0,1))
p print(p)
dev.off()
## plot for Power
<- expo
x <- power_split_result[1,,1]
power_split <- array(power_split, dim=c(num_nu, 1))
power_split <- power_split_result[1,,2]
power_split_plus <- array(power_split_plus, dim=c(num_nu, 1))
power_split_plus
<- data.frame(x, power_split,power_split_plus)
power png(file='C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/examples/plot/figure_12.png', height=1000, width=1000)
<- ggplot()+geom_line(data=power,aes(x=x, y=power_split, color='split Knockoff'))
q <- q + geom_line(data=power,aes(x=x, y=power_split_plus, color='split Knockoff+'))
q <-q + labs(x = TeX("$\\log_{10} (\\nu)$"))
q <-q + labs(y = TeX("Power"))
q <-q + scale_fill_discrete(breaks=c("split Knockoff", "split Knockoff+"))
q <-q + scale_colour_discrete(labels=c("split Knockoff", "split Knockoff+"))
q <-q+ scale_y_continuous(limits = c(0,1))
q print(q)
dev.off()
## for D_2
## plot for FDR
<- expo
x <- fdr_split_result[2,,1]
fdr_split <- array(fdr_split, dim=c(num_nu, 1))
fdr_split <- fdr_split_result[2, ,2]
fdr_split_plus <- array(fdr_split_plus, dim=c(num_nu, 1))
fdr_split_plus
<- data.frame(x,fdr_split,fdr_split_plus)
fdr is.na(fdr)] <- 0
fdr[# save results
png(file='C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/examples/plot/figure_21.png', height=1000, width=1000)
<- ggplot()+geom_line(data=fdr,aes(x=x, y=fdr_split, color='fdr_split'))
p <- p +geom_line(data=fdr,aes(x=x, y=fdr_split_plus, color='fdr_split_plus'))
p <-p + labs(x = TeX("$\\log_{10} (\\nu)$"))
p <-p + labs(y = TeX("FDR"))
p <-p + geom_abline(intercept=0.2,slope=0,linetype="dashed" )
p <-p + scale_fill_discrete(breaks=c("split Knockoff", "split Knockoff+"))
p <-p + scale_colour_discrete(labels=c("split Knockoff", "split Knockoff+"))
p <-p + scale_y_continuous(limits = c(0,1))
p print(p)
dev.off()
## plot for Power
<- expo
x <- power_split_result[2,,1 ]
power_split <- array(power_split, dim=c(num_nu, 1))
power_split <- power_split_result[2,,2]
power_split_plus <- array(power_split_plus, dim=c(num_nu, 1))
power_split_plus # power_knock_ <- power_knock[1]
# power_knock_plus_ <- power_knock[2]
# power_knock_ <- rep(power_knock_, num_nu)
# power_knock_plus_ <- rep(power_knock_plus_, num_nu)
<- data.frame(x,power_split,power_split_plus)
power png(file='C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/examples/plot/figure_22.png', height=1000, width=1000)
<- ggplot()+geom_line(data=power,aes(x=x, y=power_split, color='power_split'))
q <- q + geom_line(data=power,aes(x=x, y=power_split_plus, color='power_split_plus'))
q <-q + labs(x = TeX("$\\log_{10} (\\nu)$"))
q <-q + labs(y = TeX("Power"))
q <-q + scale_fill_discrete(breaks=c("split Knockoff", "split Knockoff+"))
q <-q+ scale_colour_discrete(labels=c("split Knockoff", "split Knockoff+"))
q <-q+ scale_y_continuous(limits = c(0,1))
q print(q)
dev.off()
## for D_3
## plot for FDR
<- expo
x <- fdr_split_result[3,,1 ]
fdr_split <- array(fdr_split, dim=c(num_nu, 1))
fdr_split <- fdr_split_result[3,,2]
fdr_split_plus <- array(fdr_split_plus, dim=c(num_nu, 1))
fdr_split_plus
<- data.frame(x,fdr_split,fdr_split_plus)
fdr is.na(fdr)] <- 0
fdr[# save results
png(file='C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/examples/plot/figure_31.png', height=1000, width=1000)
<- ggplot()+geom_line(data=fdr,aes(x=x, y=fdr_split, color='fdr_split'))
p <- p +geom_line(data=fdr,aes(x=x, y=fdr_split_plus, color='fdr_split_plus'))
p <-p + labs(x = TeX("$\\log_{10} (\\nu)$"))
p <-p + labs(y = TeX("FDR"))
p <-p + geom_abline(intercept=0.2,slope=0,linetype="dashed" )
p <-p + scale_fill_discrete(breaks=c("split Knockoff", "split Knockoff+"))
p <-p + scale_colour_discrete(labels=c("split Knockoff", "split Knockoff+"))
p <-p + scale_y_continuous(limits = c(0,1))
p print(p)
dev.off()
## plot for Power
<- expo
x <- power_split_result[3,,1 ]
power_split <- array(power_split, dim=c(num_nu, 1))
power_split <- power_split_result[3,,2]
power_split_plus <- array(power_split_plus, dim=c(num_nu, 1))
power_split_plus
<- data.frame(x,power_split,power_split_plus)
power png(file='C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/examples/plot/figure_32.png', height=1000, width=1000)
<- ggplot()+geom_line(data=power,aes(x=x, y=power_split, color='power_split'))
q <- q + geom_line(data=power,aes(x=x, y=power_split_plus, color='power_split_plus'))
q <-q + labs(x = TeX("$\\log_{10} (\\nu)$"))
q <-q + labs(y = TeX("Power"))
q <-q + scale_fill_discrete(breaks=c("split Knockoff", "split Knockoff+"))
q <-q+ scale_colour_discrete(labels=c("split Knockoff ", "split Knockoff+"))
q <-q+ scale_y_continuous(limits = c(0,1))
q print(q)
dev.off()
save.image(file = "C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/examples/result/final.RData")
# simulation experinment
# 1 June, 2021
# revised 7 July, 2021
# author:Haoxue Wang (haoxwang@student.ethz.ch)
# This script reproduces Figure 3 of the paper, comparing the performance
# of Knockoff and Split Knockoff in extreme settings of feature
# correlation.
# The final workspace is saved to '../result/correlation.RData'.
install.packages("SplitKnockoff") # install our package
library(latex2exp)
library(ggplot2)
library(Matrix)
library(glmnet)
library(MASS)
library(SplitKnockoff)
## choose parameters
<- 20 # sparsity level
k <- 1 # magnitude
A <- 350 # sample size
n <- 100 # dimention of variables
p <- c(0, 0.9) # feature correlation
c_s
<- array(data = NA, dim = length(data), dimnames = NULL)
option $q <- 0.2
option$eta <- 0.1
option$method <- 'knockoff'
option$stage0 <- 'path'
option$normalize = 'true'
option$lambda <- 10.^seq(0, -6, by=-0.01)
option$copy = 'true'
option
## settings for nu
= seq(-1, 3, by=0.2)
expo $nu = 10.^expo
option= length(option$nu)
num_nu
## calculation
# generate D
<- matrix(0,p-1, p)
D_G
for(i in 1:p-1){
<- 1
D_G[i, i] +1] <- -1
D_G[i, i
}
<- D_G
D
<- matrix(0, 2, num_nu)
fdr_glmnet <- matrix(0, 2, num_nu)
power_glmnet
# fdr_knock <- matrix(0, 2, 1)
# power_knock <- matrix(0, 2, 1)
for(i in 1: 2){
<- c_s[i]
c <- simu_unit(n, p, D, A, c, k,option)
simu_data <- simu_data$fdr_split
fdr_glmnet[i, ] <- simu_data$power_split
power_glmnet[i, ] # fdr_knock[i,] <- simu_data$fdr_knock
# power_knock[i,] <- simu_data$power_knock
}
## plot for FDR
<- expo
x <- fdr_glmnet[1, ]
fdr_split <- array(fdr_split, dim=c(num_nu, 1))
fdr_split <- fdr_glmnet[2, ]
fdr_split_plus <- array(fdr_split_plus, dim=c(num_nu, 1))
fdr_split_plus # fdr_knock_ <- fdr_knock[1]
# fdr_knock_plus_ <- fdr_knock[2]
# fdr_knock_ <- rep(fdr_knock_, num_nu)
# fdr_knock_plus_ <- rep(fdr_knock_plus_, num_nu)
<- data.frame(x,fdr_split,fdr_split_plus)
fdr is.na(fdr)] <- 0
fdr[# save results
save.image("C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/param_ablation/result/correlation.RData")
png(file='C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/param_ablation/plot/figure_3a.png', height=1000, width=1000)
<- ggplot()+geom_line(data=fdr,aes(x=x, y=fdr_split, color='fdr_split'))
p <- p +geom_line(data=fdr,aes(x=x, y=fdr_split_plus, color='fdr_split_plus'))
p <-p + labs(x = TeX("$\\log_{10} (\\nu)$"))
p <-p + labs(y = TeX("FDR"))
p <-p + geom_abline(intercept=0.2,slope=0,linetype="dashed" )
p <-p + scale_fill_discrete(breaks=c("split Knockoff (c = 0)", "split Knockoff (c = 0.9)"))
p <-p + scale_colour_discrete(labels=c("split Knockoff (c = 0)", "split Knockoff (c = 0.9)"))
p <-p + scale_y_continuous(limits = c(0,1))
p print(p)
dev.off()
## plot for Power
<- expo
x <- power_glmnet[1, ]
power_split <- array(power_split, dim=c(num_nu, 1))
power_split <- power_glmnet[2, ]
power_split_plus <- array(power_split_plus, dim=c(num_nu, 1))
power_split_plus # power_knock_ <- power_knock[1]
# power_knock_plus_ <- power_knock[2]
# power_knock_ <- rep(power_knock_, num_nu)
# power_knock_plus_ <- rep(power_knock_plus_, num_nu)
<- data.frame(x,power_split,power_split_plus)
power png(file='C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/param_ablation/plot/figure_3b.png', height=1000, width=1000)
<- ggplot()+geom_line(data=power,aes(x=x, y=power_split, color='power_split'))
q <- q + geom_line(data=power,aes(x=x, y=power_split_plus, color='power_split_plus'))
q <-q + labs(x = TeX("$\\log_{10} (\\nu)$"))
q <-q + labs(y = TeX("Power"))
q <-q + scale_fill_discrete(breaks=c("split Knockoff (c = 0)", "split Knockoff (c = 0.9)"))
q <-q+ scale_colour_discrete(labels=c("split Knockoff (c = 0)", "split Knockoff (c = 0.9)"))
q <-q+ scale_y_continuous(limits = c(0,1))
q print(q)
dev.off()
install.packages("SplitKnockoff") # install our package
library(latex2exp)
library(ggplot2)
library(Matrix)
library(glmnet)
library(MASS)
library(SplitKnockoff)
## choose parameters
<- 20 # sparsity level
k <- c(0.25, 4) #magnitude
A_s <- 350 # sample size
n <- 100 # dimention of variables
p <- 0.5 # feature correlation
c
<- array(data = NA, dim = length(data), dimnames = NULL)
option $q <- 0.2
option$eta <- 0.1
option$method <- 'knockoff'
option$stage0 <- 'path'
option$normalize <- 'true'
option$lambda <- 10.^seq(0, -6, by=-0.01)
option$copy = 'true'
option
# settings for nu
<- seq(-1, 3, by=0.2)
expo $nu <- 10.^expo
option<- length(option$nu)
num_nu
# generate D
<- matrix(0,p-1, p)
D_G
for(i in 1:p-1){
<- 1
D_G[i, i] +1] <- -1
D_G[i, i
}
<- D_G
D
<- matrix(0, 2, num_nu)
fdr_glmnet <- matrix(0, 2, num_nu)
power_glmnet
# fdr_knock <- matrix(0, 2, 1)
# power_knock <- matrix(0, 2, 1)
for(i in 1: 2){
<- A_s[i]
A <- simu_unit(n, p, D, A, c, k, option)
simu_data <- simu_data$fdr_split
fdr_glmnet[i, ] <- simu_data$power_split
power_glmnet[i, ] # fdr_knock[i] <- simu_data$fdr_knock
# power_knock[i] <- simu_data$power_knock
}
## plot for FDR
<- expo
x <- fdr_glmnet[1, ]
fdr_split <- array(fdr_split, dim=c(num_nu, 1))
fdr_split <- fdr_glmnet[2, ]
fdr_split_plus <- array(fdr_split_plus, dim=c(num_nu, 1))
fdr_split_plus # fdr_knock_ <- fdr_knock[1]
# fdr_knock_plus_ <- fdr_knock[2]
# fdr_knock_ <- rep(fdr_knock_, num_nu)
# fdr_knock_plus_ <- rep(fdr_knock_plus_, num_nu)
<- data.frame(x,fdr_split,fdr_split_plus)
fdr is.na(fdr)] <- 0
fdr[# save results
save.image("C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/param_ablation/result/snr.RData")
png(file='C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/param_ablation/plot/figure_4a.png', height=1000, width=1000)
<- ggplot()+geom_line(data=fdr,aes(x=x, y=fdr_split, color='fdr_split'))
p <- p +geom_line(data=fdr,aes(x=x, y=fdr_split_plus, color='fdr_split_plus'))
p <-p + labs(x = TeX("$\\log_{10} (\\nu)$"))
p <-p + labs(y = TeX("FDR"))
p <-p + geom_abline(intercept=0.2,slope=0,linetype="dashed" )
p <-p + scale_fill_discrete(breaks=c("split Knockoff (SNR = 0.25)", "split Knockoff (SNR = 4)"))
p <-p + scale_colour_discrete(labels=c("split Knockoff (SNR = 0.25)", "split Knockoff (SNR = 4)"))
p <-p + geom_abline(intercept=0.2,slope=0,linetype="dashed" )
p <-p + scale_y_continuous(limits = c(0,1))
p print(p)
dev.off()
## plot for Power
<- expo
x <- power_glmnet[1, ]
power_split <- array(power_split, dim=c(num_nu, 1))
power_split <- power_glmnet[2, ]
power_split_plus <- array(power_split_plus, dim=c(num_nu, 1))
power_split_plus # power_knock_ <- power_knock[1]
# power_knock_plus_ <- power_knock[2]
# power_knock_ <- rep(power_knock_, num_nu)
# power_knock_plus_ <- rep(power_knock_plus_, num_nu)
<- data.frame(power_split,power_split_plus)
power png(file='C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/param_ablation/plot/figure_4b.png', height=1000, width=1000)
<- ggplot()+geom_line(data=power,aes(x=x, y=power_split, color='power_split'))
q <- q + geom_line(data=power,aes(x=x, y=power_split_plus, color='power_split_plus'))
q <-q + labs(x = TeX("$\\log_{10} (\\nu)$"))
q <-q + labs(y = TeX("Power"))
q <-q + scale_fill_discrete(breaks=c("split Knockoff (SNR = 0.25)", "split Knockoff (SNR = 4)"))
q <-q+ scale_colour_discrete(labels=c("split Knockoff (SNR = 0.25)", "split Knockoff (SNR = 4)"))
q <-q+ scale_y_continuous(limits = c(0,1))
q print(q)
dev.off()
install.packages("SplitKnockoff") # install our package
library(latex2exp)
library(ggplot2)
library(Matrix)
library(glmnet)
library(MASS)
library(SplitKnockoff)
## choose parameters
<- c(10, 100) # sparsity level
k_s <- 1 # magnitude
A <- 350 # sample size
n <- 100 # dimention of variables
p <- 0.5 # feature correlation
c
<- array(data = NA, dim = length(data), dimnames = NULL)
option $q <- 0.2
option$eta <- 0.1
option$method <- 'knockoff'
option$stage0 <- 'path'
option$normalize <- 'true'
option$lambda <- 10.^seq(0, -6, by=-0.01)
option$copy = 'true'
option
# settings for nu
<- seq(-1, 3, by=0.2)
expo $nu <- 10.^expo
option<- length(option$nu)
num_nu
## calculation
# generate D
<- matrix(0,p-1, p)
D_G
for(i in 1:p-1){
<- 1
D_G[i, i] +1] <- -1
D_G[i, i
}
<- D_G
D
<- matrix(0, 2, num_nu)
fdr_glmnet <- matrix(0, 2, num_nu)
power_glmnet
# fdr_knock <- matrix(0, 2, 1)
# power_knock <- matrix(0, 2, 1)
for(i in 1: 2){
<- k_s[i]
k <- simu_unit(n, p, D, A, c, k, option)
simu_data <- simu_data$fdr_split
fdr_glmnet[i, ] <- simu_data$power_split
power_glmnet[i, ] # fdr_knock[i] <- simu_data$fdr_knock
# power_knock[i] <- simu_data$power_knock
}
## plot for FDR
<- expo
x <- fdr_glmnet[1, ]
fdr_split <- array(fdr_split, dim=c(num_nu, 1))
fdr_split <- fdr_glmnet[2, ]
fdr_split_plus <- array(fdr_split_plus, dim=c(num_nu, 1))
fdr_split_plus # fdr_knock_ <- fdr_knock[1]
# fdr_knock_plus_ <- fdr_knock[2]
# fdr_knock_ <- rep(fdr_knock_, num_nu)
# fdr_knock_plus_ <- rep(fdr_knock_plus_, num_nu)
<- data.frame(x,fdr_split,fdr_split_plus)
fdr is.na(fdr)] <- 0
fdr[# save results
save.image("C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/param_ablation/result/sparsity.RData")
png(file='C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/param_ablation/plot/figure_5a.png', height=1000, width=1000)
<- ggplot()+geom_line(data=fdr,aes(x=x, y=fdr_split, color='fdr_split'))
p <- p +geom_line(data=fdr,aes(x=x, y=fdr_split_plus, color='fdr_split_plus'))
p <-p + labs(x = TeX("$\\log_{10} (\\nu)$"))
p <-p + labs(y = TeX("FDR"))
p <-p + geom_abline(intercept=0.2,slope=0,linetype="dashed" )
p <-p + scale_fill_discrete(breaks=c("split Knockoff (k = 10)", "split Knockoff (k = 100)"))
p <-p + scale_colour_discrete(labels=c("split Knockoff (k = 10)", "split Knockoff (k = 100)"))
p <-p + scale_y_continuous(limits = c(0,1))
p print(p)
dev.off()
## plot for Power
<- expo
x <- power_glmnet[1, ]
power_split <- array(power_split, dim=c(num_nu, 1))
power_split <- power_glmnet[2, ]
power_split_plus <- array(power_split_plus, dim=c(num_nu, 1))
power_split_plus # power_knock_ <- power_knock[1]
# power_knock_plus_ <- power_knock[2]
# power_knock_ <- rep(power_knock_, num_nu)
# power_knock_plus_ <- rep(power_knock_plus_, num_nu)
<- data.frame(power_split,power_split_plus)
power png(file='C:/Users/v-haoxuewang/Desktop/SplitKnockoff/simu_experiments/param_ablation/plot/figure_5b.png', height=1000, width=1000)
<- ggplot()+geom_line(data=power,aes(x=x, y=power_split, color='power_split'))
q <- q + geom_line(data=power,aes(x=x, y=power_split_plus, color='power_split_plus'))
q <-q + labs(x = TeX("$\\log_{10} (\\nu)$"))
q <-q + labs(y = TeX("Power"))
q <-q + scale_fill_discrete(breaks=c("split Knockoff (k = 10)", "split Knockoff (k = 100)"))
q <-q+ scale_colour_discrete(labels=c("split Knockoff (k = 10)", "split Knockoff (k = 100)"))
q <-q+ scale_y_continuous(limits = c(0,1))
q <-q+ scale_y_continuous(limits = c(0,1))
q print(q)
dev.off()
library(latex2exp)
library(ggplot2)
library(Matrix)
library(glmnet)
library(MASS)
library(SplitKnockoff)
<- array(data=NA, dim = length(data), dimnames = NULL)
option $q <- 0.2
option$eta <- 0.1
option$stage0 <- 'path'
option$normalize <- 'true'
option$lambda <- 10.^seq(0, -6, by=-0.01)
option$copy <- 'true'
option<- option[-1]
option
# settings for nu
<- seq(-1, 1, by=0.1)
expo $nu <- 10.^expo
option<- length(option$nu)
num_nu
= getwd()
root setwd(sprintf('%s/data/', root))
= read.csv('X.csv')
X =as.matrix(X)
X = read.csv('y.csv')
y = as.matrix(y)
y = read.csv('D.csv')
D = as.matrix(D)
D = diag(90)
D
<- splitknockoff.filter(X, D, y, option)
filter_result <- filter_result$results
results save(results, file="results/region.RData")
results of the regions
each number specifies a region
library(latex2exp)
library(ggplot2)
library(Matrix)
library(glmnet)
library(MASS)
library(SplitKnockoff)
<- array(data=NA, dim = length(data), dimnames = NULL)
option $q <- 0.2
option$eta <- 0.1
option$stage0 <- 'path'
option$normalize <- 'true'
option$lambda <- 10.^seq(0, -6, by=-0.01)
option$copy <- 'true'
option<- option[-1]
option
# settings for nu
<- seq(-1, 1, by=0.1)
expo $nu <- 10.^expo
option<- length(option$nu)
num_nu
# original .mat data
= getwd()
root setwd(sprintf('%s/data/', root))
= readMat('X.mat')
X = X$X
X = readMat('y.mat')
y = y$y
y = readMat('D.mat')
D = D$D
D #
# root = getwd()
# setwd(sprintf('%s/data/', root))
# X = read.csv('X.csv')
# X =as.matrix(X)
# y = read.csv('y.csv')
# y = as.matrix(y)
# D = read.csv('D.csv')
# D = as.matrix(D)
<- splitknockoff.filter(X, D, y, option)
filter_result <- filter_result$results
results save(results, file="results/connection.RData")
results of connection
each number specifies a connection
It usually takes 50min-80min to simulate each experiment. If you want to see the result of the experiment directly, you can reload our generated result by
load("...../SplitKnockoff/simu_experiments/figure_1/result/figure_1.RData")
load("...../SplitKnockoff/simu_experiments/param_ablation/result/correlation.RData")
load("...../SplitKnockoff/simu_experiments/param_ablation/result/snr.RData")
load("...../SplitKnockoff/simu_experiments/param_ablation/result/sparsity.RData")
load("...../SplitKnockoff/simu_experiments/examples/result/final.RData")
If you have any question or idea, please contact the author of this R package Haoxue Wang (haoxwang@student.ethz.ch).