rm(list=ls())
library(haven)
TEDS_2016 <- read_stata("https://github.com/datageneration/home/blob/master/DataProgramming/data/TEDS_2016.dta?raw=true")
Create subset
TEDS_subset <- subset(TEDS_2016, select = c("Tondu","female","DPP","age","income","edu","Taiwanese","Econ_worse"))
Change labels
TEDS_subset$Tondu<-as.numeric(TEDS_subset$Tondu,labels=c("Unification now",
"Status quo, unif. in future",
"Status quo, decide later",
"Status quo forever",
"Status quo, indep. in future",
"Independence now",
"No response"))
Regression
fit= lm(Tondu ~ age + income + edu, data = TEDS_subset)
summary(fit)
##
## Call:
## lm(formula = Tondu ~ age + income + edu, data = TEDS_subset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7780 -1.1841 -0.4322 1.1079 5.4157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.302529 0.257369 20.603 < 2e-16 ***
## age -0.004205 0.003194 -1.316 0.1882
## income -0.031855 0.016357 -1.948 0.0516 .
## edu -0.244608 0.037579 -6.509 9.96e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.725 on 1676 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.04287, Adjusted R-squared: 0.04115
## F-statistic: 25.02 on 3 and 1676 DF, p-value: 7.771e-16
Answer:
7 categories in the dependent variable (Tondu). To improve: turn it into logistic regression.
fit=lm(Tondu~age+edu+income,data=TEDS_subset)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
ggplot(data = TEDS_2016) +
geom_point(mapping = aes(x = Party , y = income )) +
facet_wrap(~ female, nrow = 2)
## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.