# setting the directory
setwd('/Users/stp48131/Library/CloudStorage/Dropbox/WKU/Teaching/ECON_307/Class_Materials/Honors/Non-Linear')
# loading the tidyverse library
# if you have not installed this enter install.packages('tidyverse) in the console
library(readxl)
library(ggplot2) # for graphics
# loading the excel file that contains the data
# Electricity is kWh, not dollars.
data <- read_excel('NonLinear_R.xlsx',
.name_repair = 'universal' # cleans variable names
)
# attaching the data frame. doing this allows us to reference
# variables without specifying the data dataframe each time
attach(data)
Notice the obvious non-linear relationship.
## scatter plot
ggplot(data, aes(x=Average.Temp, y=Electricity)) +
geom_point()
I am estimating all of the models even though they may not be ideal for this data.
## linear model
linear<-lm(Electricity~Average.Temp, data=data)
summary(linear)
##
## Call:
## lm(formula = Electricity ~ Average.Temp, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -271.242 -69.331 -8.261 91.444 265.470
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -187.392 101.840 -1.840 0.0745 .
## Average.Temp 11.274 1.635 6.894 6.09e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 143.7 on 34 degrees of freedom
## Multiple R-squared: 0.583, Adjusted R-squared: 0.5707
## F-statistic: 47.53 on 1 and 34 DF, p-value: 6.094e-08
On average, electricity usage increases by 11.27 kWh for each one degree increase in average temperature.
# adding the linear regression to the plot
ggplot(data, aes(x=Average.Temp, y=Electricity)) +
geom_point() +
geom_line(aes(y=linear$fitted.values))
## quadratic
# generating average temp squared
data$temp_sq=Average.Temp^2
quad<-lm(Electricity~Average.Temp+temp_sq, data=data)
summary(quad)
##
## Call:
## lm(formula = Electricity ~ Average.Temp + temp_sq, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -184.216 -48.695 8.296 53.785 128.399
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1895.14143 236.40201 8.017 3.00e-09 ***
## Average.Temp -63.92740 8.34679 -7.659 8.11e-09 ***
## temp_sq 0.63671 0.07027 9.061 1.80e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 78.11 on 33 degrees of freedom
## Multiple R-squared: 0.8804, Adjusted R-squared: 0.8732
## F-statistic: 121.5 on 2 and 33 DF, p-value: 6.03e-16
On average, kWh of electricity usage changes by -63.93+2.64Temp for each one degree increase in average temperature. You can plug in different values of temperate to determine the slope at that point. For example, when the average temperature is 70 degrees, electricity usage increase by -69.93+2.6470=19.67 kWh when the average temperature increases by 1 degree. 19.67 is the slope of the fitted regression equation at temperature=70.
# adding the quadratic regression to the plot
ggplot(data, aes(x=Average.Temp, y=Electricity)) +
geom_point() +
geom_line(aes(y=quad$fitted.values))
## log dependent
## log() gives the natural log by default so you do not need to use ln()
## you do not have to generate a new variable for this
lny<-lm(log(Electricity)~Average.Temp, data=data)
summary(lny)
##
## Call:
## lm(formula = log(Electricity) ~ Average.Temp, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.54261 -0.16358 -0.00796 0.17887 0.48318
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.882341 0.191913 25.440 < 2e-16 ***
## Average.Temp 0.020447 0.003082 6.635 1.31e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2708 on 34 degrees of freedom
## Multiple R-squared: 0.5642, Adjusted R-squared: 0.5514
## F-statistic: 44.02 on 1 and 34 DF, p-value: 1.306e-07
On average, each one degree increase in average temperature increases electricity usage by 2%.
# adding the fitted regression to the plot
ggplot(data, aes(x=Average.Temp, y=Electricity)) +
geom_point() +
geom_line(aes(y=exp(lny$fitted.values)))
## log independent
## you do not have to generate a new variable for this
lnx<-lm(Electricity~log(Average.Temp), data=data)
summary(lnx)
##
## Call:
## lm(formula = Electricity ~ log(Average.Temp), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -273.19 -90.90 -18.55 117.40 304.49
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1867.7 417.2 -4.477 8.10e-05 ***
## log(Average.Temp) 580.4 102.3 5.675 2.28e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 159.5 on 34 degrees of freedom
## Multiple R-squared: 0.4864, Adjusted R-squared: 0.4713
## F-statistic: 32.2 on 1 and 34 DF, p-value: 2.279e-06
On average, a 1% increase in average temperature increases electricity usage by 5.8 kWh. The plot below confirms that this regression equation is not appropriate for this data.
# adding the fitted regression to the plot
ggplot(data, aes(x=Average.Temp, y=Electricity)) +
geom_point() +
geom_line(aes(y=lnx$fitted.values))