# setting the directory
setwd('/Users/stp48131/Library/CloudStorage/Dropbox/WKU/Teaching/ECON_307/Class_Materials/Honors/Non-Linear')

# loading the tidyverse library
# if you have not installed this enter install.packages('tidyverse) in the console
library(readxl)
library(ggplot2) # for graphics

# loading the excel file that contains the data
# Electricity is kWh, not dollars.
data <- read_excel('NonLinear_R.xlsx', 
                   .name_repair = 'universal' # cleans variable names
                   )


# attaching the data frame. doing this allows us to reference
# variables without specifying the data dataframe each time
attach(data)

Notice the obvious non-linear relationship.

## scatter plot
ggplot(data, aes(x=Average.Temp, y=Electricity)) +
  geom_point()

I am estimating all of the models even though they may not be ideal for this data.

## linear model
linear<-lm(Electricity~Average.Temp, data=data)
summary(linear)
## 
## Call:
## lm(formula = Electricity ~ Average.Temp, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -271.242  -69.331   -8.261   91.444  265.470 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -187.392    101.840  -1.840   0.0745 .  
## Average.Temp   11.274      1.635   6.894 6.09e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 143.7 on 34 degrees of freedom
## Multiple R-squared:  0.583,  Adjusted R-squared:  0.5707 
## F-statistic: 47.53 on 1 and 34 DF,  p-value: 6.094e-08

On average, electricity usage increases by 11.27 kWh for each one degree increase in average temperature.

# adding the linear regression to the plot
ggplot(data, aes(x=Average.Temp, y=Electricity)) +
  geom_point() +
  geom_line(aes(y=linear$fitted.values))

## quadratic
# generating average temp squared
data$temp_sq=Average.Temp^2

quad<-lm(Electricity~Average.Temp+temp_sq, data=data)
summary(quad)
## 
## Call:
## lm(formula = Electricity ~ Average.Temp + temp_sq, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -184.216  -48.695    8.296   53.785  128.399 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1895.14143  236.40201   8.017 3.00e-09 ***
## Average.Temp  -63.92740    8.34679  -7.659 8.11e-09 ***
## temp_sq         0.63671    0.07027   9.061 1.80e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 78.11 on 33 degrees of freedom
## Multiple R-squared:  0.8804, Adjusted R-squared:  0.8732 
## F-statistic: 121.5 on 2 and 33 DF,  p-value: 6.03e-16

On average, kWh of electricity usage changes by -63.93+2.64Temp for each one degree increase in average temperature. You can plug in different values of temperate to determine the slope at that point. For example, when the average temperature is 70 degrees, electricity usage increase by -69.93+2.6470=19.67 kWh when the average temperature increases by 1 degree. 19.67 is the slope of the fitted regression equation at temperature=70.

# adding the quadratic regression to the plot
ggplot(data, aes(x=Average.Temp, y=Electricity)) +
  geom_point() +
  geom_line(aes(y=quad$fitted.values))

## log dependent
## log() gives the natural log by default so you do not need to use ln()
## you do not have to generate a new variable for this
lny<-lm(log(Electricity)~Average.Temp, data=data)
summary(lny)
## 
## Call:
## lm(formula = log(Electricity) ~ Average.Temp, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54261 -0.16358 -0.00796  0.17887  0.48318 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.882341   0.191913  25.440  < 2e-16 ***
## Average.Temp 0.020447   0.003082   6.635 1.31e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2708 on 34 degrees of freedom
## Multiple R-squared:  0.5642, Adjusted R-squared:  0.5514 
## F-statistic: 44.02 on 1 and 34 DF,  p-value: 1.306e-07

On average, each one degree increase in average temperature increases electricity usage by 2%.

# adding the fitted regression to the plot
ggplot(data, aes(x=Average.Temp, y=Electricity)) +
  geom_point() +
  geom_line(aes(y=exp(lny$fitted.values)))

## log independent
## you do not have to generate a new variable for this
lnx<-lm(Electricity~log(Average.Temp), data=data)
summary(lnx)
## 
## Call:
## lm(formula = Electricity ~ log(Average.Temp), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -273.19  -90.90  -18.55  117.40  304.49 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -1867.7      417.2  -4.477 8.10e-05 ***
## log(Average.Temp)    580.4      102.3   5.675 2.28e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 159.5 on 34 degrees of freedom
## Multiple R-squared:  0.4864, Adjusted R-squared:  0.4713 
## F-statistic:  32.2 on 1 and 34 DF,  p-value: 2.279e-06

On average, a 1% increase in average temperature increases electricity usage by 5.8 kWh. The plot below confirms that this regression equation is not appropriate for this data.

# adding the fitted regression to the plot
ggplot(data, aes(x=Average.Temp, y=Electricity)) +
  geom_point() +
  geom_line(aes(y=lnx$fitted.values))