rm(list=ls())
library(haven)

TEDS_2016 <- read_stata("https://github.com/datageneration/home/blob/master/DataProgramming/data/TEDS_2016.dta?raw=true")

Create subset

TEDS_subset <- subset(TEDS_2016, select = c("Tondu","female","DPP","age","income","edu","Taiwanese","Econ_worse"))

Change labels

TEDS_subset$Tondu<-as.numeric(TEDS_subset$Tondu,labels=c("Unification now",
                                                         "Status quo, unif. in future",
                                                         "Status quo, decide later", 
                                                         "Status quo forever",
                                                         "Status quo, indep. in future", 
                                                         "Independence now",
                                                         "No response"))

Regression

fit= lm(Tondu ~ age + income + edu, data = TEDS_subset)
summary(fit)

## 
## Call:
## lm(formula = Tondu ~ age + income + edu, data = TEDS_subset)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7780 -1.1841 -0.4322  1.1079  5.4157 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.302529   0.257369  20.603  < 2e-16 ***
## age         -0.004205   0.003194  -1.316   0.1882    
## income      -0.031855   0.016357  -1.948   0.0516 .  
## edu         -0.244608   0.037579  -6.509 9.96e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.725 on 1676 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.04287,    Adjusted R-squared:  0.04115 
## F-statistic: 25.02 on 3 and 1676 DF,  p-value: 7.771e-16

Regplot

(regplot= plot(lm(Tondu ~ age + income + edu, data = TEDS_subset)))

## NULL

Answer:

7 categories in the dependent variable (Tondu). To improve: turn it into logistic regression.

fit=lm(Tondu~age+edu+income,data=TEDS_subset)
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.0.3

ggplot(data = TEDS_2016) + 
  geom_point(mapping = aes(x = Party , y = income )) + 
  facet_wrap(~ female, nrow = 2)

## Don't know how to automatically pick scale for object of type haven_labelled/vctrs_vctr/double. Defaulting to continuous.

Assignment_3

Federico Ferrero

2/10/2021

Create subset

Change labels

Regression

Regplot

Answer:

7 categories in the dependent variable (Tondu). To improve: turn it into logistic regression.