Outliers
################################################################
# Ejemplo. Outliers #
################################################################
library("faraway")
library("ggplot2")
library("ggfortify")
data(star)
head(star)
## index temp light
## 1 1 4.37 5.23
## 2 2 4.56 5.74
## 3 3 4.26 4.93
## 4 4 4.56 5.74
## 5 5 4.30 5.19
## 6 6 4.46 5.46
modelo<-lm(light~temp,star)
summary(modelo)
##
## Call:
## lm(formula = light ~ temp, data = star)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1052 -0.5067 0.1327 0.4423 0.9390
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.7935 1.2365 5.494 1.75e-06 ***
## temp -0.4133 0.2863 -1.444 0.156
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5646 on 45 degrees of freedom
## Multiple R-squared: 0.04427, Adjusted R-squared: 0.02304
## F-statistic: 2.085 on 1 and 45 DF, p-value: 0.1557
ggplot(star,aes(temp,light))+
geom_point(color="darkblue")+
geom_smooth(method="lm",se=F,color="darkred")+
ggtitle("Estrellas")+
annotate("text", x=4,y=5,label=paste("y=",round(summary(modelo)$coef[1,1],3),"+",round(summary(modelo)$coef[2,1],3),"x"))
## `geom_smooth()` using formula 'y ~ x'
autoplot(modelo,label.size = 3)
#distancia de Cook
distancia<-cooks.distance(modelo)
star2<-subset(star, distancia<4/47)
modelo2<-lm(light~temp,star2)
#otra forma
boxplot.stats(star$temp)$out
## [1] 3.84 3.49 3.49 3.48 3.49
star3<-subset(star, temp>3.85)
modelo3<-lm(light~temp,star3)
#grafica
ggplot(star,aes(temp,light))+
geom_point(color="black")+
geom_smooth(method="lm",se=F,color="darkred")+
ggtitle("Estrellas")+
annotate("text", x=4,y=5,label=paste("y=",round(summary(modelo)$coef[1,1],3),"+",round(summary(modelo)$coef[2,1],3),"x"), col="darkred")+
geom_abline(intercept = modelo2$coef[1], slope = modelo2$coef[2], color="darkblue", linetype="dashed", size=1.3)+
annotate("text", x=4,y=4.5,label=paste("y=",round(summary(modelo2)$coef[1,1],3),"+",round(summary(modelo2)$coef[2,1],3),"x"),col="darkblue")+
geom_abline(intercept = modelo3$coef[1], slope = modelo3$coef[2], color="darkgreen", linetype="dashed", size=1.3)+
annotate("text", x=4.29,y=4.05,label=paste("y=",round(summary(modelo3)$coef[1,1],3),"+",round(summary(modelo3)$coef[2,1],3),"x"),col="darkgreen")
## `geom_smooth()` using formula 'y ~ x'