################################################################
#                 Ejemplo. Outliers                           #
################################################################
library("faraway")
library("ggplot2")
library("ggfortify")

data(star)
head(star)
##   index temp light
## 1     1 4.37  5.23
## 2     2 4.56  5.74
## 3     3 4.26  4.93
## 4     4 4.56  5.74
## 5     5 4.30  5.19
## 6     6 4.46  5.46
modelo<-lm(light~temp,star)
summary(modelo)
## 
## Call:
## lm(formula = light ~ temp, data = star)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1052 -0.5067  0.1327  0.4423  0.9390 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.7935     1.2365   5.494 1.75e-06 ***
## temp         -0.4133     0.2863  -1.444    0.156    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5646 on 45 degrees of freedom
## Multiple R-squared:  0.04427,    Adjusted R-squared:  0.02304 
## F-statistic: 2.085 on 1 and 45 DF,  p-value: 0.1557
ggplot(star,aes(temp,light))+
  geom_point(color="darkblue")+
  geom_smooth(method="lm",se=F,color="darkred")+
  ggtitle("Estrellas")+
  annotate("text", x=4,y=5,label=paste("y=",round(summary(modelo)$coef[1,1],3),"+",round(summary(modelo)$coef[2,1],3),"x"))
## `geom_smooth()` using formula 'y ~ x'

autoplot(modelo,label.size = 3)

#distancia de Cook
distancia<-cooks.distance(modelo)
star2<-subset(star, distancia<4/47)
modelo2<-lm(light~temp,star2)

#otra forma
boxplot.stats(star$temp)$out
## [1] 3.84 3.49 3.49 3.48 3.49
star3<-subset(star, temp>3.85)
modelo3<-lm(light~temp,star3)

#grafica
ggplot(star,aes(temp,light))+
  geom_point(color="black")+
  geom_smooth(method="lm",se=F,color="darkred")+
  ggtitle("Estrellas")+
  annotate("text", x=4,y=5,label=paste("y=",round(summary(modelo)$coef[1,1],3),"+",round(summary(modelo)$coef[2,1],3),"x"), col="darkred")+
  geom_abline(intercept = modelo2$coef[1], slope = modelo2$coef[2], color="darkblue", linetype="dashed", size=1.3)+
  annotate("text", x=4,y=4.5,label=paste("y=",round(summary(modelo2)$coef[1,1],3),"+",round(summary(modelo2)$coef[2,1],3),"x"),col="darkblue")+
  geom_abline(intercept = modelo3$coef[1], slope = modelo3$coef[2], color="darkgreen", linetype="dashed", size=1.3)+
  annotate("text", x=4.29,y=4.05,label=paste("y=",round(summary(modelo3)$coef[1,1],3),"+",round(summary(modelo3)$coef[2,1],3),"x"),col="darkgreen")
## `geom_smooth()` using formula 'y ~ x'