6  Consolidation

L’objectif de ce document est de combiner les clusters de 1997, 2008, 2011, 2013, 2016 et 2021 avec leurs caractéristiques géophysiques et leur variable de résultat respectif dans un seul dataframe.

6.1 Méthodes

Les fichiers contenant les données du Household Recode (HR) pour les années 1997, 2008, 2011, 2013, 2016 et 2021, fournis par DHS, sont d’abord chargés. Pour chaque année, on sélectionne que les variables nécessaires pour l’analyse, à savoir les identifiants des ménages (hv001 et hv002) et les wealth index. On fusionne ces données avec les données géophysiques des ménages, permettant ainsi de constituer un seul dataframe consolidé pour chaque année.

Code
library(tidyverse)
library(sf)
library(haven)
library(lubridate)

# Covariates spatio-temporelles + classification de traitement
spatial_covars_spei <- readRDS("data/derived/spatial_covars_spei_staggered.rds")
all_covars <- spatial_covars_spei %>%
  select(DHSYEAR, DHSCLUST, URBAN_RURA, treecover_area, slope, elevation,
         population_count, traveltime_2000, spei_wc)

all_class <- read.csv("data/derived/cluster_treatment_classification_staggered.csv")

# Helper: fabrique la table finale pour une année donnée
vars_to_nest <- c("treecover_area", "slope", "elevation",
                  "population_count", "traveltime_2000", "spei_wc")

build_year <- function(hr_object,
                       year,
                       hh_rural_path,
                       spei_years = (year-2):year) {

  # Charger HR (identifiants + variables chef) et HH_rural (centiles/zscore déjà calculés)
  hr <- hr_object %>%
    dplyr::select(hv001, hv002, hv219, hv220)
  
  hh_rural <- read_rds(hh_rural_path) # contient hv001/hv002 + wealth_* déjà prêts
  
  # Joindre covariates spatiaux + classification de groupes
  base <- hh_rural %>%
    left_join(hr, by = c("hv001", "hv002")) %>%
    left_join(
      all_covars %>% filter(DHSYEAR == year) %>% select(-DHSYEAR),
      by = c("hv001" = "DHSCLUST")
    ) %>%
    left_join(
      all_class %>% filter(DHSYEAR == year) %>% select(-DHSYEAR),
      by = c("hv001" = "DHSCLUST")
    ) %>%
    mutate(DHSYEAR = year) %>%
    relocate(DHSYEAR, .before = everything())
  
  # Désimbriquer les covars imbriquées et appliquer la fenêtre temporelle SPEI
  #    moyenne par (hv001, hv002) pour chaque indicateur_année
  df_long <- base %>%
    select(hv001, hv002, any_of(vars_to_nest)) %>%
    pivot_longer(cols = any_of(vars_to_nest),
                 names_to = "indicator", values_to = "data") %>%
    unnest(data) %>%
    filter(indicator != "spei_wc" | year(datetime) %in% spei_years) %>%
    mutate(year_indicator = paste0(indicator, "_", year(datetime))) %>%
    select(hv001, hv002, year_indicator, value)
  
  df_wide <- df_long %>%
    pivot_wider(names_from = year_indicator, values_from = value,
                names_glue = "{year_indicator}") %>%
    group_by(hv001, hv002) %>%
    summarise(across(everything(), ~ mean(.x, na.rm = TRUE)), .groups = "drop")
  
  # Table finale (une ligne par ménage hv001/hv002)
  out <- base %>%
    select(-any_of(vars_to_nest)) %>%
    distinct(hv001, hv002, .keep_all = TRUE) %>%
    left_join(df_wide, by = c("hv001", "hv002"))
  
  out
}

# Application

hr_1997_final <- read_dta("data/raw/dhs/DHS_1997/MDHR31DT/MDHR31FL.DTA") %>%
  build_year(year = 1997,
             hh_rural_path = "data/derived/hh_1997_rural_simpler.rds",
             spei_years = 1995:1997)

hr_2008_final <- read_dta("data/raw/dhs/DHS_2008/MDHR51DT/MDHR51FL.DTA") %>%
  build_year(year = 2008,
             hh_rural_path = "data/derived/hh_2008_rural_simpler.rds",
             spei_years = 2006:2008)

hr_2011_final <- read_dta("data/raw/dhs/DHS_2011/MDHR61DT/MDHR61FL.DTA") %>%
  build_year(year = 2011,
             hh_rural_path = "data/derived/hh_2011_rural_simpler.rds",
             spei_years = 2009:2011)

hr_2013_final <- read_dta("data/raw/dhs/DHS_2013/MDHR6ADT/MDHR6AFL.DTA") %>%
  build_year(year = 2013,
  hh_rural_path = "data/derived/hh_2013_rural_simpler.rds",
  spei_years = 2011:2013)

hr_2016_final <- read_dta("data/raw/dhs/DHS_2016/MDHR71DT/MDHR71FL.DTA") %>%
  build_year(year = 2016,
  hh_rural_path = "data/derived/hh_2016_rural_simpler.rds",
  spei_years = 2014:2016)

hr_2021_final <- read_dta("data/raw/dhs/DHS_2021/MDHR81DT/MDHR81FL.DTA") %>%
  build_year(year = 2021,
  hh_rural_path = "data/derived/hh_2021_rural_simpler.rds",
  spei_years = 2019:2021)

# Consolidation

hr_consolidated <- bind_rows(
  hr_1997_final,
  hr_2008_final,
  hr_2011_final,
  hr_2013_final,
  hr_2016_final,
  hr_2021_final
)

hr_consolidated %>% count(DHSYEAR)
# A tibble: 6 × 2
  DHSYEAR     n
    <dbl> <int>
1    1997  5124
2    2008 13364
3    2011  6025
4    2013  6375
5    2016  9295
6    2021 15364

Enregistrement des résultats

Code
# Sauvegardes millésime
write_rds(hr_1997_final, "data/derived/hr_1997_final.rds")
write_rds(hr_2008_final, "data/derived/hr_2008_final.rds")
write_rds(hr_2011_final, "data/derived/hr_2011_final.rds")
write_rds(hr_2013_final, "data/derived/hr_2013_final.rds")
write_rds(hr_2016_final, "data/derived/hr_2016_final.rds")
write_rds(hr_2021_final, "data/derived/hr_2021_final.rds")

# Sauvegarde consolidée
write_rds(hr_consolidated, "data/derived/hr_consolidated_1997_2008_2011_2013_2016_2021.rds")
cat("Données enregistrées\n")
Données enregistrées