# loading packages
library(tidyverse)
library(ggplot2)
library(readxl)
library(stargazer)
# loading data and generating variables
# look at the hdma documentation for more information
loandata<- data.frame(read_excel('loandata.xlsx')) %>%
mutate(deny=ifelse(s7==3,1,0), # =1 if the person's loan app was denied
black=ifelse(s13==3,1,0), # =1 if the applicant was black, 0 if white
pi_ratio=s46/100, # applicant's payment to income ratio
married=ifelse(s23a=='M',1,0) ) %>% # =1 if applicant is married
filter(s23a!='NA') %>% # removing missing values for married
select('deny', 'black','pi_ratio', 'married') # keeping necessary variables
# plotting the data
ggplot(loandata,aes(x=pi_ratio, y=deny)) +
geom_point() +
theme_minimal() +
labs(x="Payment/Income Ratio",
y='=1 if Denied')
# estimating the models
# probit
probit <- glm(deny~pi_ratio+black+married,
family=binomial(link='probit'),
data=loandata)
# logit
logit <- glm(deny~pi_ratio+black+married,
family=binomial(link='logit'),
data=loandata)
# linear probability model
lpm <- lm(deny~pi_ratio+black+married,data=loandata)
# combining the results
stargazer(probit, logit, lpm, type='text')
##
## ================================================================
## Dependent variable:
## --------------------------------------------
## deny
## probit logistic OLS
## (1) (2) (3)
## ----------------------------------------------------------------
## pi_ratio 2.720*** 5.296*** 0.556***
## (0.382) (0.728) (0.060)
##
## black 0.686*** 1.229*** 0.172***
## (0.084) (0.147) (0.018)
##
## married -0.183*** -0.324** -0.035***
## (0.071) (0.133) (0.013)
##
## Constant -2.142*** -3.906*** -0.068***
## (0.144) (0.280) (0.023)
##
## ----------------------------------------------------------------
## Observations 2,378 2,378 2,378
## R2 0.079
## Adjusted R2 0.077
## Log Likelihood -793.651 -792.632
## Akaike Inf. Crit. 1,595.303 1,593.265
## Residual Std. Error 0.312 (df = 2374)
## F Statistic 67.518*** (df = 3; 2374)
## ================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# predicted probabilities when black equals zero and one
# you can do the same thing for the logit results
# this is not necessary for the lpm model since the coefficients can be
# interpreted as the change in the predicted probability without additional steps
# set all variables except the variable of interest to their mean value
probit_predict <- predict(probit,
newdata = data.frame('black' = c(0,1),
'pi_ratio' = mean(loandata$pi_ratio),
'married'=mean(loandata$married)
),
type = "response")
# difference in the predicted probabilities
# compare this to the coefficient on black in the lpm
diff(probit_predict)
## 2
## 0.1642222