Transformaciones en el modelo de RLS

Ejemplo 1

data<-read.table("s017.txt")
names(data)<-c('Body wt kg', 'Brain wt kg', 'log Body', 'log Brain')

Suponga que un investigador tiene la teoría de que existe una relación lineal entre el peso cerebral y peso corporal de los animales. Para ello, obtiene una muestra de distintas especies: ¿Qué puede decir sobre el modelo ajustado?

plot(data$`Body wt kg`, data$`Brain wt kg`, xlab = "Peso corporal", ylab="Peso del cerebro",col="blue4",  pch = 16, cex = .7)
model<-lm(data$`Brain wt kg`~ data$`Body wt kg`)
abline(model, col="darkred")

par(mfrow=c(2,2))
plot(model)

En casos como este es posible aplicar una transformación a las variables:

#transformación
plot(log(data$`Body wt kg`), log(data$`Brain wt kg`),xlab = "log(Peso corporal)", ylab="log(Peso del cerebro)",col="blue4",  pch = 16, cex = .7)
model2<-lm(log(data$`Brain wt kg`)~ log(data$`Body wt kg`))
abline(model2, col="darkred")

par(mfrow=c(2,2))
plot(model2)

¿Que ocurre cuando se aplica la tranaformación logaritmo? ¿Cual sería la interpretacion de los coeficientes estimado?¿Es cierto que existe una relación lineal entre el peso corporal y peso cerebral?

Ejemplo 2

A continuacion investigaremos la relación entre la dureza y densidad de distontos árboles

data<-read.table("hardness.txt")
data

##      V1   V2   V3   V4   V5   V6
## 1  24.7  484 39.4 1210 53.4 1880
## 2  24.8  427 39.9  989 56.0 1980
## 3  27.3  413 40.3 1160 56.5 1820
## 4  28.4  517 40.6 1010 57.3 2020
## 5  28.4  549 40.7 1100 57.6 1980
## 6  29.0  648 40.7 1130 59.2 2310
## 7  30.3  587 42.9 1270 59.8 1940
## 8  32.7  704 45.8 1180 66.0 3260
## 9  35.6  979 46.9 1400 67.4 2700
## 10 38.5  914 48.2 1760 68.8 2890
## 11 38.8 1070 51.5 1710 69.1 2740
## 12 39.3 1020 51.5 2010 69.1 3140

data2<-cbind(c(data[,1], data[,3],data[,5]),c(data[,2], data[,4], data[,6]))
data2

##       [,1] [,2]
##  [1,] 24.7  484
##  [2,] 24.8  427
##  [3,] 27.3  413
##  [4,] 28.4  517
##  [5,] 28.4  549
##  [6,] 29.0  648
##  [7,] 30.3  587
##  [8,] 32.7  704
##  [9,] 35.6  979
## [10,] 38.5  914
## [11,] 38.8 1070
## [12,] 39.3 1020
## [13,] 39.4 1210
## [14,] 39.9  989
## [15,] 40.3 1160
## [16,] 40.6 1010
## [17,] 40.7 1100
## [18,] 40.7 1130
## [19,] 42.9 1270
## [20,] 45.8 1180
## [21,] 46.9 1400
## [22,] 48.2 1760
## [23,] 51.5 1710
## [24,] 51.5 2010
## [25,] 53.4 1880
## [26,] 56.0 1980
## [27,] 56.5 1820
## [28,] 57.3 2020
## [29,] 57.6 1980
## [30,] 59.2 2310
## [31,] 59.8 1940
## [32,] 66.0 3260
## [33,] 67.4 2700
## [34,] 68.8 2890
## [35,] 69.1 2740
## [36,] 69.1 3140

plot(data2,xlab = "Densidad", ylab="Dureza",col="blue4",  pch = 16, cex = .7)
model<-lm(data2[,2]~data2[,1])
abline(model,col="darkred")

par(mfrow=c(2,2))
plot(model)

summary(model)

## 
## Call:
## lm(formula = data2[, 2] ~ data2[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -338.40  -96.98  -15.71   92.71  625.06 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1160.500    108.580  -10.69 2.07e-12 ***
## data2[, 1]     57.507      2.279   25.24  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 183.1 on 34 degrees of freedom
## Multiple R-squared:  0.9493, Adjusted R-squared:  0.9478 
## F-statistic:   637 on 1 and 34 DF,  p-value: < 2.2e-16

¿Qué trasnformación aplicaria para mejorar este modelo?

#transformación 1
plot(data2[,1], log(data2[,2]),xlab = "Densidad", ylab="log(Dureza)",col="blue4",  pch = 16, cex = .7)
model2<-lm(log(data2[,2])~data2[,1])
abline(model2,col="darkred")

par(mfrow=c(2,2))
plot(model2)

summary(model2)

## 
## Call:
## lm(formula = log(data2[, 2]) ~ data2[, 1])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33578 -0.08690  0.00215  0.08458  0.23294 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.208584   0.080497   64.71   <2e-16 ***
## data2[, 1]  0.042148   0.001689   24.95   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1357 on 34 degrees of freedom
## Multiple R-squared:  0.9482, Adjusted R-squared:  0.9467 
## F-statistic: 622.6 on 1 and 34 DF,  p-value: < 2.2e-16

#tranformacion 2
par(mfrow=c(1,1))
plot(data2[,1], sqrt(data2[,2]),xlab = "Densidad", ylab="sqrt(Dureza)",col="blue4",  pch = 16, cex = .7)
model3<-lm(sqrt(data2[,2])~data2[,1])
abline(model3,col="darkred")

par(mfrow=c(1,1))
plot(model3)

summary(model3)

## 
## Call:
## lm(formula = sqrt(data2[, 2]) ~ data2[, 1])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5387 -1.0912 -0.2802  1.0259  4.8130 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.25867    1.09397   2.065   0.0466 *  
## data2[, 1]   0.75795    0.02296  33.016   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.844 on 34 degrees of freedom
## Multiple R-squared:  0.9698, Adjusted R-squared:  0.9689 
## F-statistic:  1090 on 1 and 34 DF,  p-value: < 2.2e-16

############
data<-read.table("wordrecall.txt", header=TRUE)
plot(data)

plot(log(data$time),data$prop)
model<-lm(data$prop ~log(data$time))
abline(model)

plot(model)

data<-read.csv("energia.csv", header=TRUE)
plot(data$uso.kWh.,data$demanda.kW.)

model<-lm(sqrt(data$demanda.kW.) ~ data$uso.kWh.)
plot(data$uso.kWh.,sqrt(data$demanda.kW.))
abline(model)

Transformaciones en el modelo de RLS

31/marzo/2022

Ejemplo 1

Ejemplo 2