Supuestos del modelo
En este ejemplo analizaremos el cumplimiento de los supuestos vistos en clase para el modelo de regresión lineal simple
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(nortest)
library(car)
## Loading required package: carData
Veamos los primeros datos
bueno<-read.table("good.txt", header=T)
plot(bueno)
model<-lm(y~x, bueno)
abline(model)
par(mfrow=c(2,2))
plot(model)
Veamos los supuestos
#homocedasticidad
plot(model$residuals)
bptest(model)
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 0.83147, df = 1, p-value = 0.3618
grupos<-factor(bueno$x<3)
bartlett.test(model$res, grupos)
##
## Bartlett test of homogeneity of variances
##
## data: model$res and grupos
## Bartlett's K-squared = 1.8206, df = 1, p-value = 0.1772
fligner.test(model$res, cut(bueno$x,c(0,1,2,3,4,5)))
##
## Fligner-Killeen test of homogeneity of variances
##
## data: model$res and cut(bueno$x, c(0, 1, 2, 3, 4, 5))
## Fligner-Killeen:med chi-squared = 3.2789, df = 3, p-value = 0.3506
#autocorrelac.
dwtest(model)
##
## Durbin-Watson test
##
## data: model
## DW = 1.9294, p-value = 0.3492
## alternative hypothesis: true autocorrelation is greater than 0
#Normalidad
hist(model$residuals, probability = T)
lines(density(model$residuals), col=2)
lillie.test(model$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: model$residuals
## D = 0.042555, p-value = 0.8575
shapiro.test(model$res)
##
## Shapiro-Wilk normality test
##
## data: model$res
## W = 0.99262, p-value = 0.7788
ad.test(model$res)
##
## Anderson-Darling normality test
##
## data: model$res
## A = 0.17543, p-value = 0.9222
Parentesis para hacer la grafica en ggplot2
par(mfrow=c(1,1))
library(ggplot2)
ggplot(bueno,aes(x,y))+
geom_point(color="darkblue")+
geom_smooth(method="lm",se=TRUE,color="darkred")+
ggtitle("Ejemplo RLS")+
annotate("text", x=2,y=13,label=paste("y=",round(summary(model)$coef[1,1],3),"+",round(summary(model)$coef[2,1],3),"x"))
## `geom_smooth()` using formula 'y ~ x'
library(ggfortify)
autoplot(model)+theme_bw()
A continuación veamos como se ve el siguiente conjunto
malo<-read.table("bad.txt", header=T)
plot(malo)
model2<-lm(y~x,malo)
abline(model2, col="red")
par(mfrow=c(2,2))
plot(model2)
Y analicemos los supuestos para este caso
bptest(model2)
##
## studentized Breusch-Pagan test
##
## data: model2
## BP = 3.0949, df = 1, p-value = 0.07854
fligner.test(model2$res, cut(malo$x,c(0,1,2,3,4,5,6,7,8,9,10)))
##
## Fligner-Killeen test of homogeneity of variances
##
## data: model2$res and cut(malo$x, c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
## Fligner-Killeen:med chi-squared = 16.57, df = 9, p-value = 0.05589
leveneTest(model2$res, cut(malo$x,c(0,1,2,3,4,5,6,7,8,9,10)))
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 9 1.9281 0.0551 .
## 110
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#corr
library(lmtest)
dwtest(model2)
##
## Durbin-Watson test
##
## data: model2
## DW = 2.1006, p-value = 0.7077
## alternative hypothesis: true autocorrelation is greater than 0
#normalidad
lillie.test(model2$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: model2$residuals
## D = 0.10764, p-value = 0.001634
shapiro.test(model2$residuals)
##
## Shapiro-Wilk normality test
##
## data: model2$residuals
## W = 0.94471, p-value = 9.023e-05
hist(model2$residuals, probability = T)
lines(density(model2$residuals))
Último ejemplo
datos<-read.table("esp.txt", header=TRUE)
model<-lm(y~x,datos)
par(mfrow=c(2,2))
plot(model)
¿Qué supuesto podrÃa no cumplir?
par(mfrow=c(1,1))
plot(datos$x,datos$y)
abline(model)