SPHC-2014.Rmd

---
title: "Sexual identity and health outcomes in Stockholm County, SPHC 2014"
author: Guoqiang Zhang
email: guoqiang.zhang@ki.se
output: html_notebook
editor_options: 
  chunk_output_type: console
---

### 1. Load Packages
```{r}
library(haven)
library(tidyr)
library(tidyverse)
library(dplyr)
library(finalfit)
library(stringr)
library(ggplot2)
library(naniar)
library(jomo)
library(mitml)
library(mitools)
library(survey)
library(svyVGAM)
library(vcd)
library(rcompanion)
source("/Users/guoqiang.zhang/Library/CloudStorage/OneDrive-KarolinskaInstitutet/Karolinska Institutet/Research Projects/Sexual Identity and Health Outcomes in Stockholm County/Sexual_identity_and_health_outcomes_in_Stockholm_County/Helper_functions.R") # helper function
```

### 2. Import and Prepare Data
```{r}
load("/Volumes/LGBT Project data/d_2014.RData")

# sampling strata
n_miss( d_2014$stratum )
d_2014$sampling_strata <- as.factor( d_2014$stratum )
length( unique( d_2014$sampling_strata ) ) # 39 strata

d_2014 <- d_2014 %>%
  mutate( sampling_strata_region = recode( sampling_strata, 
                                           `3101` = "Upplands-Väsby",
                                           `3102` = "Vallentuna",
                                           `3103` = "Österåker",
                                           `3104` = "Värmdö",
                                           `3105` = "Järfälla",
                                           `3106` = "Ekerö",
                                           `3107` = "Huddinge",
                                           `3108` = "Botkyrka",
                                           `3109` = "Salem",
                                           `3110` = "Haninge",
                                           `3111` = "Tyresö",
                                           `3112` = "Upplands-Bro",
                                           `3113` = "Nykvarn",
                                           `3114` = "Täby",
                                           `3115` = "Danderyd",
                                           `3116` = "Sollentuna",
                                           `3117` = "Södertälje",
                                           `3118` = "Nacka",
                                           `3119` = "Sundbyberg",
                                           `3120` = "Solna",
                                           `3121` = "Lidingö",
                                           `3122` = "Vaxholm",
                                           `3123` = "Norrtälje",
                                           `3124` = "Sigtuna",
                                           `3125` = "Nynäshamn",
                                           `3201` = "Rinkeby-Kista",
                                           `3202` = "Spånga-Tensta",
                                           `3203` = "Hässelby-Vällingby",
                                           `3204` = "Bromma",
                                           `3205` = "Kungsholmen",
                                           `3206` = "Norrmalm",
                                           `3207` = "Östermalm",
                                           `3208` = "Södermalm",
                                           `3209` = "Enskede-Årsta-Vantör",
                                           `3210` = "Skarpnäck",
                                           `3211` = "Farsta",
                                           `3212` = "Älvsjö",
                                           `3213` = "Hägersten-Liljeholmen",
                                           `3214` = "Skärholmen"
                                           ) )

# sexual identity in 2014
table( d_2014$F14U90G82, useNA = "always" )
d_2014$sexual_identity_2014 <- factor( ifelse( d_2014$F14U90G82 == 1, "Heterosexual",
                                                  ifelse( d_2014$F14U90G82 == 2, "Homosexual",
                                                          ifelse( d_2014$F14U90G82 == 3, "Bisexual", "None of the above" ) ) ),
                                          levels = c( "Heterosexual", "Homosexual", "Bisexual", "None of the above" ) )
table( d_2014$sexual_identity_2014, useNA = "always" )

# sexual identity in 2021
table( d_2014$F21F91, useNA = "always" )
d_2014$sexual_identity_2021 <- factor( ifelse( d_2014$F21F91 == 1, "Heterosexual",
                                                  ifelse( d_2014$F21F91 == 2, "Homosexual",
                                                          ifelse( d_2014$F21F91 == 3, "Bisexual", "None of the above" ) ) ),
                                          levels = c( "Heterosexual", "Homosexual", "Bisexual", "None of the above" ) )
table( d_2014$sexual_identity_2021, useNA = "always" )

# any change in sexual identity in 2014-2021
d_2014$sexual_identity_fluidity <- ifelse( d_2014$sexual_identity_2014 != d_2014$sexual_identity_2021, 1, 0 )

# sex
table( d_2014$kon, useNA = "always" )
d_2014$sex <- factor( ifelse( d_2014$kon == 1, "Male", "Female" ),
                      levels = c( "Male", "Female" ) )
table( d_2014$sex, useNA = "always" )

# age
summary( d_2014$F14alder )
d_2014$age <- d_2014$F14alder
d_2014$age_cat <- factor( ifelse( d_2014$age <= 29, "16-29", 
                                  ifelse( d_2014$age >=30 & d_2014$age <= 44, "30-44",
                                          ifelse( d_2014$age >= 45 & d_2014$age <= 59, "45-59", ">=60" ) ) ),
                          levels = c( "16-29", "30-44", "45-59", ">=60" ) )
table( d_2014$age_cat, useNA = "always" )

# interaction term between age and sex
d_2014$age_sex <- interaction( d_2014$age_cat, d_2014$sex )

# generation
d_2014$birth_year <- 2014 - d_2014$age
d_2014 <- d_2014 %>%
  mutate( generation = case_when(
    birth_year >= 1997 & birth_year <= 2012 ~ "Generation Z (1997–2012)",
    birth_year >= 1981 & birth_year <= 1996 ~ "Millennials (1981–1996)",
    birth_year >= 1965 & birth_year <= 1980 ~ "Generation X (1965–1980)",
    birth_year >= 1946 & birth_year <= 1964 ~ "Baby Boomers (1946–1964)",
    birth_year >= 1928 & birth_year <= 1945 ~ "Silent Generation (1928–1945)",
    birth_year >= 1901 & birth_year <= 1927 ~ "Greatest Generation (1901–1927)"
  ) )

d_2014$generation <- factor( 
  d_2014$generation, 
  levels = c( "Generation Z (1997–2012)", "Millennials (1981–1996)", "Generation X (1965–1980)", "Baby Boomers (1946–1964)", "Silent Generation (1928–1945)", "Greatest Generation (1901–1927)" ) )
summary( d_2014$generation )

# country of birth
table( d_2014$fodelseland, useNA = "always" )
d_2014$country_of_birth <- factor( ifelse( d_2014$fodelseland == "Sverige", "Sweden",
                                           ifelse( d_2014$fodelseland == "Europa", "Europe", "Outside Europe" ) ),
                                   levels = c( "Sweden", "Europe", "Outside Europe" ) )
table( d_2014$country_of_birth, useNA = "always" )

# education
table( d_2014$utbniva2014, useNA = "always" )
d_2014$education <- factor( ifelse( d_2014$utbniva2014 <= 2, "<=9 years",
                                    ifelse( d_2014$utbniva2014 <= 4, "10-12 years", ">=13 years" ) ),
                            levels = c( "<=9 years", "10-12 years", ">=13 years" ) )
table( d_2014$education, useNA = "always" )

# occupation
table( d_2014$SSYK_kl, useNA = "always" )
d_2014$occupation <- factor(
  ifelse(
    d_2014$SSYK_kl == "Yrken inom byggverksamhet och tillverkning" |
      d_2014$SSYK_kl == "Yrken inom lantbruk, trädgård, skogsbruk och fiske" |
      d_2014$SSYK_kl == "Yrken inom maskinell tillverkning och transport m.m.",
    "Manual and field trades",
    ifelse(
      d_2014$SSYK_kl == "Service-, omsorgs- och försäljningsyrken" |
        d_2014$SSYK_kl == "Yrken inom administration och kundtjänst" |
        d_2014$SSYK_kl == "Yrken med krav på kortare utbildning eller introduktion",
      "Service and support",
      ifelse(
        d_2014$SSYK_kl == "Yrken med krav på fördjupad högskolekompetens" |
          d_2014$SSYK_kl == "Yrken med krav på högskolekompetens eller motsvarande" |
          d_2014$SSYK_kl == "Chefsyrken" |
          d_2014$SSYK_kl == "Militära yrken",
        "Expertise and leadership",
        NA
      )
    )
  ),
  levels = c(
    "Manual and field trades",
    "Service and support",
    "Expertise and leadership"
  )
)
table( d_2014$occupation, useNA = "always" )

# disposable income
summary( d_2014$dispink2014, useNA = "always" )
d_2014$dispink2014[ d_2014$dispink2014 < 0 ] <- NA
d_2014$dispink2014 <- d_2014$dispink2014*( 1960/1790 ) # Consumer Price Index (CPI)-adjusted income
d_2014$income <- factor( ifelse( d_2014$dispink2014 <= 2500, "<=2,500",
                                 ifelse( d_2014$dispink2014 > 2500 & d_2014$dispink2014 <= 3500, "(2,500, 3,500]",
                                         ifelse( d_2014$dispink2014 > 3500 & d_2014$dispink2014 <= 4500, "(3,500, 4,500]", ">4,500" ) ) ),
                         levels = c( "<=2,500", "(2,500, 3,500]", "(3,500, 4,500]", ">4,500" ) )
table( d_2014$income, useNA = "always" )

# marital status
table( d_2014$civil2014, useNA = "always" )
d_2014$marital_status <- factor( ifelse( d_2014$civil2014 == "G" | d_2014$civil2014 == "RP", "Currently married",
                                         ifelse( d_2014$civil2014 == "OG", "Never married", "Other" ) ),
                                 levels = c( "Never married", "Currently married", "Other" ) )
table( d_2014$marital_status, useNA = "always" )

# living alone
table( d_2014$F14U52aG58a, useNA = "always" )
d_2014$living_alone <- factor( ifelse( d_2014$F14U52aG58a == 1, "no", "yes" ), 
                               levels = c( "yes", "no" ) )
table( d_2014$living_alone, useNA = "always" )

# personal support
table( d_2014$F14U56G66, useNA = "always" )
d_2014$personal_support <- factor( ifelse( d_2014$F14U56G66 <= 2, "yes", "no" ), 
                                   levels = c( "yes", "no" ) )
table( d_2014$personal_support, useNA = "always" )


##### demographic characteristics #####

# among respondents (n = 22,250)
# make characteristics table by sexual identity
explanatory =  c( "sex", "age_cat", "country_of_birth", "education", "occupation", "income", "marital_status", "living_alone", "personal_support" )
dependent = "sexual_identity_2014"

d_2014_table_1 <- d_2014 %>% 
  summary_factorlist( dependent,
                      explanatory, 
                      na_include = TRUE,
                      na_include_dependent = TRUE, 
                      total_col = TRUE,
                      add_col_totals = TRUE,
                      column = FALSE )

# Fisher's test
x1 <- table( d_2014$sex, d_2014$sexual_identity_2014 )
x1
format( round( fisher.test( x1[, c( 1, 2 ) ] )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x1[, c( 1, 3 ) ] )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x1[, c( 1, 4 ) ] )$p.value, 3 ), nsmall = 3 )

x2 <- table( d_2014$age_cat, d_2014$sexual_identity_2014 )
x2
format( round( fisher.test( x2[, c( 1, 2 ) ] )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x2[, c( 1, 3 ) ], simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x2[, c( 1, 4 ) ], simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )

x3 <- table( d_2014$country_of_birth, d_2014$sexual_identity_2014 )
x3
format( round( fisher.test( x3[, c( 1, 2 ) ], simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x3[, c( 1, 3 ) ], simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x3[, c( 1, 4 ) ], simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )

x4 <- table( d_2014$education, d_2014$sexual_identity_2014 )
x4
format( round( fisher.test( x4[, c( 1, 2 ) ] )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x4[, c( 1, 3 ) ] )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x4[, c( 1, 4 ) ], simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )

x5 <- table( d_2014$income, d_2014$sexual_identity_2014 )
x5
format( round( fisher.test( x5[, c( 1, 2 ) ] )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x5[, c( 1, 3 ) ], simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x5[, c( 1, 4 ) ], simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )

x6 <- table( d_2014$marital_status, d_2014$sexual_identity_2014 )
x6
format( round( fisher.test( x6[, c( 1, 2 ) ] )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x6[, c( 1, 3 ) ], simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x6[, c( 1, 4 ) ], simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )

x7 <- table( d_2014$living_alone, d_2014$sexual_identity_2014 )
x7
format( round( fisher.test( x7[, c( 1, 2 ) ] )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x7[, c( 1, 3 ) ] )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x7[, c( 1, 4 ) ] )$p.value, 3 ), nsmall = 3 )

x8 <- table( d_2014$personal_support, d_2014$sexual_identity_2014 )
x8
format( round( fisher.test( x8[, c( 1, 2 ) ] )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x8[, c( 1, 3 ) ] )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x8[, c( 1, 4 ) ] )$p.value, 3 ), nsmall = 3 )

x9 <- table( d_2014$occupation, d_2014$sexual_identity_2014 )
x9
format( round( fisher.test( x9[, c( 1, 2 ) ] )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x9[, c( 1, 3 ) ] )$p.value, 3 ), nsmall = 3 )
format( round( fisher.test( x9[, c( 1, 4 ) ] )$p.value, 3 ), nsmall = 3 )

# make characteristics table by change of sexual identity
explanatory =  c( "sex", "age_cat", "country_of_birth", "education", "occupation", "income", "marital_status", "living_alone", "personal_support", "sexual_identity_2014" )
dependent = "sexual_identity_fluidity"

d_2014_table_2 <- d_2014 %>% 
  mutate( sexual_identity_fluidity = factor( sexual_identity_fluidity, levels = c( 0, 1 ), labels = c( "no change", "change" ) ) ) %>%
  summary_factorlist( dependent,
                      explanatory, 
                      na_include = TRUE,
                      na_include_dependent = TRUE, 
                      total_col = TRUE,
                      add_col_totals = TRUE,
                      column = FALSE )

# Fisher's test
x1 <- table( d_2014$sex, d_2014$sexual_identity_fluidity )
x1
format( round( fisher.test( x1 )$p.value, 3 ), nsmall = 3 )

x2 <- table( d_2014$age_cat, d_2014$sexual_identity_fluidity )
x2
format( round( fisher.test( x2, simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )

x3 <- table( d_2014$country_of_birth, d_2014$sexual_identity_fluidity )
x3
format( round( fisher.test( x3, simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )

x4 <- table( d_2014$education, d_2014$sexual_identity_fluidity )
x4
format( round( fisher.test( x4, simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )

x5 <- table( d_2014$occupation, d_2014$sexual_identity_fluidity )
x5
format( round( fisher.test( x5, simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )

x6 <- table( d_2014$income, d_2014$sexual_identity_fluidity )
x6
format( round( fisher.test( x6, simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )

x7 <- table( d_2014$marital_status, d_2014$sexual_identity_fluidity )
x7
format( round( fisher.test( x7, simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )

x8 <- table( d_2014$living_alone, d_2014$sexual_identity_fluidity )
x8
format( round( fisher.test( x8, simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )

x9 <- table( d_2014$personal_support, d_2014$sexual_identity_fluidity )
x9
format( round( fisher.test( x9, simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )

x10 <- table( d_2014$sexual_identity_2014, d_2014$sexual_identity_fluidity )
x10
format( round( fisher.test( x10, simulate.p.value = TRUE )$p.value, 3 ), nsmall = 3 )


##### design weights #####

summary( d_2014$F14dvikt )
d_2014$design_weight <- d_2014$F14dvikt


##### calibrated weights #####
d_2014$calibrated_weight <- d_2014$F14kalvikt


##### non-response #####

# unit non-response
d_2014$design_weight_unit_nonresponse <- d_2014$F14dbvikt # weights calculated assuming Missing Completely At Random (MCAR) within each stratum
summary( d_2014$design_weight_unit_nonresponse )
sum( d_2014$design_weight_unit_nonresponse ) # No. of source population = 1,764,671

unitresponse_prob <- d_2014 %>% 
  group_by( sampling_strata_region ) %>% 
  summarise( unitresponse_prob = unique( design_weight ) / unique( design_weight_unit_nonresponse ),
             no.of.population = sum( design_weight_unit_nonresponse ),
             sample_size = unique( no.of.population )/unique( design_weight ) ) # calculate overall unit response rate, and no. of population and sample size within each stratum

d_2014 <- d_2014 %>% 
  left_join( unitresponse_prob, by = "sampling_strata_region" )

# item non-response
itemresponse_prob <- d_2014 %>% 
  group_by( sampling_strata_region ) %>% 
  summarise( itemresponse_prob = sum( !is.na( sexual_identity_2014 ) ) / n() ) # calculate item response rate

d_2014 <- d_2014 %>% 
  left_join( itemresponse_prob, by = "sampling_strata_region" ) %>% 
  mutate( itemresponse_prob = ifelse( is.na( sexual_identity_2014 ), 0, itemresponse_prob ) )


##### summary of stratified sampling #####

sampling_frame_2014 <- as.data.frame( d_2014 %>% 
                                   group_by( sampling_strata_region ) %>% 
                                   reframe( no.of.population = unique( no.of.population ), 
                                            sample_size = unique( no.of.population/design_weight ),
                                            unitresponse = n(),
                                            itemresponse = sum( itemresponse_prob != 0 ) ) )

sampling_frame_2014$unitresponse_rate <- sampling_frame_2014$unitresponse/sampling_frame_2014$sample_size
sampling_frame_2014$itemresponse_rate <- sampling_frame_2014$itemresponse/sampling_frame_2014$unitresponse
sampling_frame_2014$overallresponse_rate <- sampling_frame_2014$itemresponse/sampling_frame_2014$sample_size

sampling_frame_2014$unitresponse_label <- paste0( sampling_frame_2014$unitresponse, " (", 
                                             sprintf( "%.1f", sampling_frame_2014$unitresponse_rate*100 ), "%)" )
sampling_frame_2014$overallresponse_label <- paste0( sampling_frame_2014$itemresponse, " (", 
                                                sprintf( "%.1f", sampling_frame_2014$overallresponse_rate*100 ), "%)" )

round( sum( sampling_frame_2014$unitresponse )/( sum( sampling_frame_2014$sample_size ) ), 3 ) # overall unit response rate
round( sum( sampling_frame_2014$itemresponse )/( sum( sampling_frame_2014$unitresponse ) ), 3 ) # overall item response rate
round( sum( sampling_frame_2014$itemresponse )/( sum( sampling_frame_2014$sample_size ) ), 3 ) # overall response rate

writexl::write_xlsx( sampling_frame_2014, "sampling_frame_2014.xlsx" )


##### replicating non-respondent rows for sampling design #####

# because d_2014 includes data only for respondents (unit response), we need to duplicate rows for non-respondents
# so that the final dataset represents the sampling process
rows_to_add <- d_2014 %>%
  group_by( sampling_strata_region ) %>%
  summarise( to_add = unique( sample_size ) - n(),
             no.of.population = unique( no.of.population ),
             sample_size = unique( sample_size ),
             design_weight = unique( design_weight ) )

sum( rows_to_add$to_add ) # 30,787 non-respondents

rows_to_add$to_add <- round( rows_to_add$to_add ) # to resolve floating-point precision issues

replicated_rows <- rows_to_add[ rep( row.names( rows_to_add ), rows_to_add$to_add ), c( 1, 3:5 ) ]

d_2014_complete <- bind_rows( d_2014, replicated_rows ) # 53,037 individuals in the original sample
```

### 3. Self-Reported Health Outcomes
#### 3.1. Complete-case analysis
##### 3.1.1. Prepare dataset
```{r}
##### mental health outcomes #####

# recent psychological distress
summary( d_2014_complete$F14GHQ12 )
d_2014_complete$psychological_distress_continuous <- d_2014_complete$F14GHQ12
d_2014_complete$psychological_distress_binary <- factor( ifelse( d_2014_complete$F14GHQ12 < 3, "No", "Yes" ),
                                                         levels = c( "Yes", "No" ) ) # psychological distress defined as a score >= 3
table( d_2014_complete$psychological_distress_binary, useNA = "always" )

# suicidal thoughts
table( d_2014_complete$F14U50G56, useNA = "always" )
d_2014_complete$suicidal_thoughts <- factor( 
  case_when(
    d_2014_complete$F14U50G56 == 1 ~ "Never",
    d_2014_complete$F14U50G56 %in% c( 2, 3, 4 ) ~ "Ever"
  ),
  levels = c( "Never", "Ever" ) )
table( d_2014_complete$suicidal_thoughts, useNA = "always" )

# suicidal attempts
table( d_2014_complete$F14U51G57, useNA = "always" )
d_2014_complete$suicidal_attempts <- factor( 
  case_when(
    d_2014_complete$F14U51G57 == 1 ~ "Never",
    d_2014_complete$F14U51G57 %in% c( 2, 3, 4 ) ~ "Ever"
  ),
  levels = c( "Never", "Ever" ) )
table( d_2014_complete$suicidal_attempts, useNA = "always" )
```

```{r}
##### physical health outcomes #####


```

##### 3.1.2. Calculate prevalence by sexual identity
```{r}
# create survey design
d_2014_complete_cc <- d_2014_complete

survey_design_cc <- svydesign( ids = ~ 1, 
                               strata = ~ sampling_strata_region,
                               weights = ~ design_weight,
                               fpc = ~ no.of.population,
                               data = d_2014_complete_cc )

# the following analyses assume MCAR

variables_list <- list(
  list( variable = "psychological_distress_binary", condition = "Yes", name = "psychological_distress" ),
  list( variable = "suicidal_thoughts", condition = "Ever", name = "suicidal_thoughts" ),
  list( variable = "suicidal_attempts", condition = "Ever", name = "suicidal_attempts" )
)

# by sexual identity
results_list <- calculate_svyby_proportion( variables_list = variables_list, 
                                            design = survey_design_cc, 
                                            year = 2014, 
                                            group_var = "sexual_identity_2014" )

self_report_health_outcomes_2014 <- Reduce( function( x, y ){
  merge( x, y, by = "sexual_identity_2014" )
}, 
results_list )

# overall sample
results_list_overall <- calculate_svyby_proportion_overall( 
  variables_list = variables_list,
  design = survey_design_cc,
  year = 2014 )

self_report_health_outcomes_2014_overall <- Reduce( function( x, y ){
  cbind( x, y )
}, 
results_list_overall ) %>% 
  as.data.frame() %>%
  rownames_to_column( var = "sexual_identity_2014" )
self_report_health_outcomes_2014_overall[ 1, 1 ] <- "Stockholm County"

writexl::write_xlsx( rbind( self_report_health_outcomes_2014, self_report_health_outcomes_2014_overall ), "prevalence_cc_self_report_health_outcomes_2014.xlsx" )

# by sex
results_list_by_sex <- calculate_svyby_proportion_subgroup( 
  variables_list = variables_list,
  design = survey_design_cc,
  year = 2014,
  group_var = "sexual_identity_2014 + sex" )

self_report_health_outcomes_2014_by_sex <- Reduce( function( x, y ){
  merge( x, y, by = c( "sexual_identity_2014", "sex" ) )
}, 
results_list_by_sex )
writexl::write_xlsx( self_report_health_outcomes_2014_by_sex, "prevalence_cc_self_report_health_outcomes_2014_by_sex.xlsx" )

# by age
results_list_by_age <- calculate_svyby_proportion_subgroup( 
  variables_list = variables_list,
  design = survey_design_cc,
  year = 2014,
  group_var = "sexual_identity_2014 + age_cat" )

self_report_health_outcomes_2014_by_age <- Reduce( function( x, y ){
  merge( x, y, by = c( "sexual_identity_2014", "age_cat" ) )
}, 
results_list_by_age )
writexl::write_xlsx( self_report_health_outcomes_2014_by_age, "prevalence_cc_self_report_health_outcomes_2014_by_age.xlsx" )

# by country of birth
results_list_by_country_of_birth <- calculate_svyby_proportion_subgroup( 
  variables_list = variables_list,
  design = survey_design_cc,
  year = 2014,
  group_var = "sexual_identity_2014 + country_of_birth" )

self_report_health_outcomes_2014_by_country_of_birth <- Reduce( function( x, y ){
  merge( x, y, by = c( "sexual_identity_2014", "country_of_birth" ) )
}, 
results_list_by_country_of_birth )
writexl::write_xlsx( self_report_health_outcomes_2014_by_country_of_birth, "prevalence_cc_self_report_health_outcomes_2014_by_country_of_birth.xlsx" )
```