X<-c(94,197,16,38,99,141,23)
Y<-c(52,104,146,10,51,30,40,27,46)lab05a: Bootstrap
Este documento ilustra el uso de Bootstrap.
Se tienen datos de sobrevivencia de 16 ratones luego de una cirugía de prueba: 9 ratones en el grupo control y 7 ratones en el grupo de tratamiento.
| Grupo | Tiempo de sovrevivencia(días) | Media |
|---|---|---|
| Tratamiento | 94,197,16,38,99,141,23 | 86.86 |
| Control | 52,104,146,10,51,30,40,27,46 | 56.22 |
¿Podemos decir que el tratamiento es efectivo?
En estadística, resolvemos esa pregunta estimando \(\bar{X}- \bar{Y} = 30.63\). El problema es cómo calcular la variabilidad, ¿podemos suponer lo mismo de siempre?
1 Datos
2 Estimación de \(\bar{X}\).
2.1 Estimación por intervalo del tiempo promedio de sobrevivencia del tratamiento.
(estimacion<-mean(X))[1] 86.85714
sd(X)[1] 66.76683
n<-length(X)Asumiendo normalidad de la población, tenemos que el intervalo de confianza de 95% es dado por:
estimacion+c(-1,1)*qt(p=0.975,df=(n-1))*sd(X)/sqrt(n)[1] 25.10812 148.60616
2.2 IC estándar de Bootstrap
B <- 1000
Tboot_b <- NULL
for(b in 1:B) {
xb <- sample(X, size = n, replace = TRUE)
Tboot_b[b] <- mean(xb)
}
Tboot_b[1:10] [1] 88.57143 62.42857 83.00000 82.28571 47.28571 106.71429 76.00000
[8] 84.85714 95.85714 60.00000
hist(Tboot_b)
2.2.1 Sesgo del bootstrap
(media_bootstrap <- mean(Tboot_b))[1] 86.49543
estimacion[1] 86.85714
(sesgo_bootstrap <- media_bootstrap-estimacion)[1] -0.3617143
2.2.2 Intervalos de confianza de 95%
(cuantil_z <- qnorm(1 - 0.05 / 2))[1] 1.959964
(sdboot <- sd(Tboot_b))[1] 22.97013
estimacion + c(-1,1)*cuantil_z * sdboot[1] 41.83651 131.87777
2.2.3 IC bootstrap t
B <- 1000
Tboot_b <- NULL
Tboot_bm <- NULL
sdboot_b <- NULL
for (b in 1:B) {
xb <- sample(X, size = n, replace = TRUE)
Tboot_b[b] <- mean(xb)
for (m in 1:B) {
xbm <- sample(xb, size = n, replace = TRUE)
Tboot_bm[m] <- mean(xbm)
}
sdboot_b[b] <- sd(Tboot_bm)
}
z_star <- (Tboot_b - estimacion) / sdboot_b
hist(z_star)
summary(z_star) Min. 1st Qu. Median Mean 3rd Qu. Max.
-19.66985 -0.80053 0.01485 -0.12324 0.71915 5.59375
cuantil_t_empirico <- quantile(z_star, c(0.05/2, 1-0.05/2))
estimacion +cuantil_t_empirico* sdboot 2.5% 97.5%
11.0173 142.4652
2.2.4 IC percentil de bootstrap
B <- 1000
Tboot_b <- NULL
for(b in 1:B) {
xb <- sample(X, size = n, replace = TRUE)
Tboot_b[b] <- mean(xb)
}
hist(Tboot_b)
quantile(Tboot_b,0.05 / 2) 2.5%
45.56429
quantile(Tboot_b, 1 - 0.05 / 2) 97.5%
137.5714
2.2.5 Con el paquete boots de R.
library(boot)
mean_fun=function(datos,indice){
m=mean(datos[indice])
v=var(datos[indice])
c(m,v)
}
X.boot<-boot(X,mean_fun,R=1000)
(results<-boot.ci(X.boot,type=c("basic","stud","perc")))BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 1000 bootstrap replicates
CALL :
boot.ci(boot.out = X.boot, type = c("basic", "stud", "perc"))
Intervals :
Level Basic Studentized Percentile
95% ( 41.45, 130.85 ) ( 35.75, 166.95 ) ( 42.86, 132.27 )
Calculations and Intervals on Original Scale
3 Estimación de \(\bar{X}\) - \(\bar{Y}\).
3.1 Ilustración a mano
(estimacion<-mean(X)-mean(Y))[1] 30.63492
n<-length(X)
m<-length(Y)
B <- 1000
Tboot_b <- NULL
for(b in 1:B) {
xb <- sample(X, size = n, replace = TRUE)
yb <- sample(Y, size = m, replace = TRUE)
Tboot_b[b] <- mean(xb)-mean(yb)
}
Tboot_b[1:10] [1] 10.730159 42.857143 74.317460 -2.444444 30.095238 59.253968
[7] 49.253968 -14.746032 3.650794 9.555556
hist(Tboot_b)
(cuantil_z <- qnorm(1 - 0.05 / 2))[1] 1.959964
(sdboot <- sd(Tboot_b))[1] 27.65406
estimacion + c(-1,1)*cuantil_z * sdboot[1] -23.56605 84.83589
3.2 Con el paquete simpleboot
library(simpleboot)Simple Bootstrap Routines (1.1-8)
bootstrap_diff <- two.boot(X, Y, mean, R = 1000, student = TRUE, M = 1000)
(bootstrap_diff_res<-boot.ci(bootstrap_diff,type=c("basic","stud","perc")))BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 1000 bootstrap replicates
CALL :
boot.ci(boot.out = bootstrap_diff, type = c("basic", "stud",
"perc"))
Intervals :
Level Basic Studentized Percentile
95% (-24.07, 80.53 ) (-31.78, 103.07 ) (-19.26, 85.34 )
Calculations and Intervals on Original Scale