<-c(94,197,16,38,99,141,23)
X<-c(52,104,146,10,51,30,40,27,46) Y
lab05a: Bootstrap
XS3310 Teoría Estadística
Este documento ilustra el uso de Bootstrap.
Se tienen datos de sobrevivencia de 16 ratones luego de una cirugía de prueba: 9 ratones en el grupo control y 7 ratones en el grupo de tratamiento.
Grupo | Tiempo de sovrevivencia(días) | Media |
---|---|---|
Tratamiento | 94,197,16,38,99,141,23 | 86.86 |
Control | 52,104,146,10,51,30,40,27,46 | 56.22 |
¿Podemos decir que el tratamiento es efectivo?
En estadística, resolvemos esa pregunta estimando \(\bar{X}- \bar{Y} = 30.63\). El problema es cómo calcular la variabilidad, ¿podemos suponer lo mismo de siempre?
1 Datos
2 Estimación de \(\bar{X}\).
2.1 Estimación por intervalo del tiempo promedio de sobrevivencia del tratamiento.
<-mean(X)) (estimacion
[1] 86.85714
sd(X)
[1] 66.76683
<-length(X) n
Asumiendo normalidad de la población, tenemos que el intervalo de confianza de 95% es dado por:
+c(-1,1)*qt(p=0.975,df=(n-1))*sd(X)/sqrt(n) estimacion
[1] 25.10812 148.60616
2.2 IC estándar de Bootstrap
<- 1000
B <- NULL
Tboot_b for(b in 1:B) {
<- sample(X, size = n, replace = TRUE)
xb <- mean(xb)
Tboot_b[b]
}1:10] Tboot_b[
[1] 84.00000 86.57143 99.57143 91.28571 90.00000 83.00000 105.42857
[8] 88.14286 115.57143 72.14286
hist(Tboot_b)
2.2.1 Sesgo del bootstrap
<- mean(Tboot_b)) (media_bootstrap
[1] 86.69243
estimacion
[1] 86.85714
<- media_bootstrap-estimacion) (sesgo_bootstrap
[1] -0.1647143
2.2.2 Intervalos de confianza de 95%
<- qnorm(1 - 0.05 / 2)) (cuantil_z
[1] 1.959964
<- sd(Tboot_b)) (sdboot
[1] 22.59721
+ c(-1,1)*cuantil_z * sdboot estimacion
[1] 42.56742 131.14687
2.2.3 IC bootstrap t
<- 1000
B <- NULL
Tboot_b <- NULL
Tboot_bm <- NULL
sdboot_b for (b in 1:B) {
<- sample(X, size = n, replace = TRUE)
xb <- mean(xb)
Tboot_b[b] for (m in 1:B) {
<- sample(xb, size = n, replace = TRUE)
xbm <- mean(xbm)
Tboot_bm[m]
}<- sd(Tboot_bm)
sdboot_b[b]
}<- (Tboot_b - estimacion) / sdboot_b
z_star
hist(z_star)
summary(z_star)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-16.08554 -0.82876 0.02683 -0.10469 0.76720 10.80232
<- quantile(z_star, c(0.05/2, 1-0.05/2))
cuantil_t_empirico +cuantil_t_empirico* sdboot estimacion
2.5% 97.5%
10.24223 134.78094
2.2.4 IC percentil de bootstrap
<- 1000
B <- NULL
Tboot_b for(b in 1:B) {
<- sample(X, size = n, replace = TRUE)
xb <- mean(xb)
Tboot_b[b]
}hist(Tboot_b)
quantile(Tboot_b,0.05 / 2)
2.5%
43.84643
quantile(Tboot_b, 1 - 0.05 / 2)
97.5%
134.4714
2.2.5 Con el paquete boots
de R.
library(boot)
Warning: package 'boot' was built under R version 4.3.3
=function(datos,indice){
mean_fun=mean(datos[indice])
m=var(datos[indice])
vc(m,v)
}
<-boot(X,mean_fun,R=1000)
X.boot<-boot.ci(X.boot,type=c("basic","stud","perc"))) (results
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 1000 bootstrap replicates
CALL :
boot.ci(boot.out = X.boot, type = c("basic", "stud", "perc"))
Intervals :
Level Basic Studentized Percentile
95% ( 39.59, 130.29 ) ( 31.93, 165.10 ) ( 43.43, 134.12 )
Calculations and Intervals on Original Scale
3 Estimación de \(\bar{X}\) - \(\bar{Y}\).
3.1 Ilustración a mano
<-mean(X)-mean(Y)) (estimacion
[1] 30.63492
<-length(X)
n<-length(Y)
m<- 1000
B <- NULL
Tboot_b for(b in 1:B) {
<- sample(X, size = n, replace = TRUE)
xb <- sample(Y, size = m, replace = TRUE)
yb <- mean(xb)-mean(yb)
Tboot_b[b]
}1:10] Tboot_b[
[1] 32.1269841 35.4920635 42.6190476 33.0952381 32.0158730 6.5555556
[7] 33.4920635 0.6031746 4.0793651 10.5714286
hist(Tboot_b)
<- qnorm(1 - 0.05 / 2)) (cuantil_z
[1] 1.959964
<- sd(Tboot_b)) (sdboot
[1] 27.28183
+ c(-1,1)*cuantil_z * sdboot estimacion
[1] -22.83648 84.10632
3.2 Con el paquete simpleboot
library(simpleboot)
Warning: package 'simpleboot' was built under R version 4.3.3
Simple Bootstrap Routines (1.1-7)
<- two.boot(X, Y, mean, R = 1000, student = TRUE, M = 1000)
bootstrap_diff <-boot.ci(bootstrap_diff,type=c("basic","stud","perc"))) (bootstrap_diff_res
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 1000 bootstrap replicates
CALL :
boot.ci(boot.out = bootstrap_diff, type = c("basic", "stud",
"perc"))
Intervals :
Level Basic Studentized Percentile
95% (-23.24, 80.38 ) (-29.88, 102.24 ) (-19.11, 84.51 )
Calculations and Intervals on Original Scale