Analysis

Set up

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
library(rnaturalearth)
library(scales)

Attaching package: 'scales'
The following object is masked from 'package:purrr':

    discard
The following object is masked from 'package:readr':

    col_factor
library(sf)
Linking to GEOS 3.13.1, GDAL 3.10.2, PROJ 9.5.1; sf_use_s2() is TRUE

Load Data

# Load cleaned data
load("data/hivdata.RData")

Q1: Top 10 Countries with Highest Number of Adults Living with HIV In 2022

# Select relevant indicator
df_adult <- df_hiv |>
  filter(series == "Adults (ages 15+) living with HIV") |> 
  select(country, year, series, hiv)
df_adult |>
  filter(year == 2022) |>
  filter(!is.na(hiv)) |> # Remove invalid values
  arrange(desc(hiv)) |>
  head(10)
# A tibble: 10 × 4
   country       year series                                hiv
   <chr>        <int> <chr>                               <dbl>
 1 South Africa  2022 Adults (ages 15+) living with HIV 7400000
 2 India         2022 Adults (ages 15+) living with HIV 2400000
 3 Mozambique    2022 Adults (ages 15+) living with HIV 2300000
 4 Tanzania      2022 Adults (ages 15+) living with HIV 1600000
 5 Uganda        2022 Adults (ages 15+) living with HIV 1400000
 6 Kenya         2022 Adults (ages 15+) living with HIV 1300000
 7 Zambia        2022 Adults (ages 15+) living with HIV 1300000
 8 Zimbabwe      2022 Adults (ages 15+) living with HIV 1200000
 9 Malawi        2022 Adults (ages 15+) living with HIV  950000
10 Ethiopia      2022 Adults (ages 15+) living with HIV  570000

Data Visualization

# List of Top 10 countries with number of adults living with HIV in 2022
adult_list <- c("South Africa", "India", "Mozambique", "Tanzania", "Uganda", 
                  "Kenya", "Zambia", "Zimbabwe", "Malawi", "Ethiopia")
# specify ten colors
colors <- c("South Africa" = "#67000d","India" = "#a50f15", "Mozambique" = "#cb181d","Tanzania" = "#ef3b2c","Uganda" = "#fb6a4a", "Kenya" = "#fcbba1","Zambia" = "#fc9272","Zimbabwe" = "#fee0d2","Malawi" = "#fde0dd","Ethiopia" = "#fff5f0")

p1 <- df_adult|>
  filter(year == 2022) |>
  filter(country %in% adult_list) |> # Match the countries in adult_list
  ggplot(aes(x = reorder(country, hiv), y = hiv, fill = country)) +
  geom_col() +
  scale_fill_manual(values = colors) + # Apply custom colors
  coord_flip() +
  scale_y_continuous(labels = comma_format()) + # Use commas to separate numbers
  labs(title = "Top 10 Countries with Highest Number of Adults Living with HIV In 2022",
       x = "Country",
       y = "Number of Adults Living with HIV",
       fill = "Country",
       caption = "Source: World Bank | Author: Cindy Xie") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"), 
        axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 12))

ggplotly(p1)
library(htmlwidgets)

# convert to interactive plot
p1 <- ggplotly(p1)

# save the interactive plot
saveWidget(p1, file = "out/p1.html")

Q3: Top 10 Countries with Highest Number of AIDS Deaths In 2022

# Select relevant indicator
df_death <- df_hiv |>
  filter(series == "AIDS estimated deaths (UNAIDS estimates)") |> 
  select(country, year, series, hiv)
df_death |>
  filter(year == 2022) |>
  filter(!is.na(hiv)) |> # Remove invalid values
  arrange(desc(hiv)) |>
  head(10)
# A tibble: 10 × 4
   country       year series                                     hiv
   <chr>        <int> <chr>                                    <dbl>
 1 Mozambique    2022 AIDS estimated deaths (UNAIDS estimates) 48000
 2 South Africa  2022 AIDS estimated deaths (UNAIDS estimates) 45000
 3 India         2022 AIDS estimated deaths (UNAIDS estimates) 40000
 4 Indonesia     2022 AIDS estimated deaths (UNAIDS estimates) 26000
 5 Tanzania      2022 AIDS estimated deaths (UNAIDS estimates) 22000
 6 Zimbabwe      2022 AIDS estimated deaths (UNAIDS estimates) 20000
 7 Zambia        2022 AIDS estimated deaths (UNAIDS estimates) 19000
 8 Kenya         2022 AIDS estimated deaths (UNAIDS estimates) 18000
 9 Uganda        2022 AIDS estimated deaths (UNAIDS estimates) 17000
10 Angola        2022 AIDS estimated deaths (UNAIDS estimates) 13000

Data Visualization

# List of Top 10 countries with highest number of AIDS estimated deaths in 2022
death_list <- c("Mozambique", "South Africa", "India", "Indonesia", "Tanzania", 
                  "Zimbabwe", "Zambia", "Kenya", "Uganda", "Angola")
# specify ten colors
colors <- c("Mozambique" = "#800026","South Africa" = "#bd0026", "India" = "#e31a1c","Indonesia" = "#fc4e2a","Tanzania" = "#fd8d3c","Zimbabwe" = "#feb24c","Zambia" = "#fed976","Kenya" = "#ffeda0","Uganda" = "#fff7bc","Angola" = "#ffffcc")

p4 <- df_death|>
  filter(year == 2022) |>
  filter(country %in% death_list) |> # Match the countries in death_list
  ggplot(aes(x = reorder(country, hiv), y = hiv, fill = country)) +
  geom_col() +
  scale_fill_manual(values = colors) + # Apply custom colors
  coord_flip() +
  labs(title = "AIDS Estimated Deaths In 2022",
       x = "Country",
       y = "Number of AIDS estimated deaths",
       fill = "Country",
       caption = "Source: World Bank | Author: Cindy Xie") +
  geom_text(aes(label = round(hiv)), vjust = 0.3, hjust = 1, size = 3) + # Add text labels to charts and adjust their size
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"), 
        axis.title.x = element_text(size = 12),
        axis.title.y = element_text(size = 12))

# save the plot as a file
ggsave("out/p4.png", p4, width = 11, height = 6, dpi = 300)

p4