handout

library(haven)
library(tidyverse)
library(knitr)
library(here)

library(tidyverse) # data wrangling and visualization
library(lme4)      # "golden standard" for mixed-effects modelling in R (no p-values)
library(lmerTest)  # p-values for MEMs based on the Satterthwaite approximation
library(psycho)    # mainly for an "analyze()" function
library(broom)     # for tidy results
library(knitr)     # beautifying tables
library(sjPlot)    # for visualising MEMs
library(effects)   # for visualising MEMs
library(report)    # for describing models
library(emmeans)   # for post-hoc analysis
#pisa <- read_sav("CY07_MSU_STU_QQQ.sav")
#pisa_idn <- pisa %>%  filter(CNTRYID==360)
#write_csv(pisa_idn, "pisa_idn.csv")
pisa_idn <- read_csv(here("dataset", "pisa_idn.csv"))
skor <- pisa_idn %>% select(ST184Q01HA, ST185Q01HA, ST185Q02HA, ST185Q03HA, ST034Q01TA,PV1MATH,PV2MATH,PV3MATH,PV4MATH,PV5MATH,
                        PV6MATH, PV7MATH, PV8MATH, PV9MATH, PV10MATH, CNTSTUID, CNTSCHID, CNTRYID) %>%
  filter(CNTSCHID == 36000271 | 
        CNTSCHID == 36000272 |
        CNTSCHID == 36000273 |
        CNTSCHID == 36000274 |
        CNTSCHID == 36000275 |
        CNTSCHID == 36000276 |
        CNTSCHID == 36000277|
        CNTSCHID == 36000278 |
        CNTSCHID == 36000279 |
        CNTSCHID == 36000280 |
        CNTSCHID == 36000285 |
        CNTSCHID == 36000286 |
        CNTSCHID == 36000287 |
        CNTSCHID == 36000288 |
        CNTSCHID == 36000289 |
        CNTSCHID == 36000290 |
        CNTSCHID == 36000291 |
        CNTSCHID == 36000292 |
        CNTSCHID == 36000293 |
        CNTSCHID == 36000294) %>%
  mutate(math=(PV1MATH+PV2MATH+PV3MATH+PV4MATH+PV5MATH+
                 PV6MATH+PV7MATH+PV8MATH+PV9MATH+PV10MATH)/10) %>%
  mutate(idsekolah=CNTSCHID) %>%
  rename("lonely"=ST034Q01TA) 

skor <- skor %>% filter(ST184Q01HA != "NA")
count(skor, CNTSCHID) %>% print(n=20)

kabel

knitr::kable(count(skor, CNTSCHID) %>% print(n=20))
skor %>% 
  group_by(CNTSCHID) %>% 
  summarise(mean = mean(math, na.rm = T), 
            SD = sd(math, na.rm = T), 
            miss = mean(is.na(math))) %>% 
  mutate_if(is.numeric, ~round(., 2)) %>% 
  print(n = 50)
skor %>% 
  ggplot(aes(ST184Q01HA)) + 
  geom_bar() +
  facet_wrap(~CNTSCHID)

skor %>% 
  ggplot(aes(math)) + 
  geom_density() +
  facet_wrap(~CNTSCHID)
# load package for MLM
library(lme4)
# empty multilevel model
m0 <- lmer(math~ 1 + (1 | CNTSCHID), data = skor)
# details of results
summary(m0)

tab_model(m0)
mx <- lm(math~ST184Q01HA, data=skor)
tab_model(mx)

4089/(4089+2614)
skor$m0 <- predict(m0)
# graph with predicted country level support for immigration
skor %>% 
  ggplot(aes(math, m0, color = CNTSCHID, group = CNTSCHID)) + 
  geom_smooth(se = F, method = lm) +
  geom_jitter() +
  theme_bw() +
  theme(axis.text.x = element_blank(),
        axis.ticks = element_blank()) +
  labs(x = "x", y = "Skor Matematika", color = "Sekolah")
# dotplot using lattice package
library(lattice)
qqmath(ranef(m0, condVar = TRUE))

# random intercept with a control variable
m1 <- lmer(math ~ 1 + ST184Q01HA + (1 | CNTSCHID), data = skor)
summary(m1)

# save predicted scores
skor$m1 <- predict(m1)
# plot lines based on our model
skor %>% 
  ggplot(aes(ST184Q01HA, m1, color = CNTSCHID, group = CNTSCHID)) + 
  geom_smooth(se = F, method = lm) +
  geom_jitter() +
  theme_bw() +
  labs(x = "Growth Mindset", 
       y = "Skor Matematika", 
       color = "CNTSCHID")
########
# save predicted scores
skor$m1 <- predict(m1)
# plot lines based on our model
skor %>% 
  ggplot(aes(ST184Q01HA, m1, color = CNTSCHID, group = CNTSCHID)) + 
  geom_smooth(se = F, method = lm) +
  geom_jitter() +
  theme_bw() +
  labs(x = "Growth Mindset", 
       y = "Skor Matematika", 
       color = "Sekolah")
# model with random slope
m2 <- lmer(math ~ 1 + ST184Q01HA + 
             (1 + ST184Q01HA | CNTSCHID), 
           data = skor)
# print results
summary(m2)

# another way to see random effect
qqmath(ranef(m2, condVar = TRUE))
# predict results based on our model
skor$m2 <- predict(m2)
# visualize the predictions based on our model
skor %>% 
  ggplot(aes(ST184Q01HA, m2)) + 
  geom_smooth(se = F, method = lm, size = 2) +
  stat_smooth(aes(color = CNTSCHID, group = CNTSCHID),
              geom = "line", alpha = 0.4, size = 1) +
  theme_bw() +
  guides(color = F) +
  labs(x = "loneliness", 
       y = "Skor Matematika", 
       color = "Country")


# yet another way to look at the random effects
# save coefficients
coefs_m2 <- coef(m2)
# print random effects and best line

coefs_m2$CNTSCHID %>%
  mutate(CNTSCHID = rownames(coefs_m2$CNTSCHID))  %>% 
  ggplot(aes(ST184Q01HA, `(Intercept)`, label = CNTSCHID)) + 
  geom_point() + 
  geom_smooth(se = F, method = lm) +
  geom_label(nudge_y = 0.15, alpha = 0.5) +
  theme_bw() +
  labs(x = "Slope", y = "Intercept")