Load Libraries
# Load required packages for data manipulation and visualization
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ readr 2.1.6
## ✔ ggplot2 4.0.1 ✔ stringr 1.6.0
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.2.0 ✔ tidyr 1.3.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Load Data from Github and Append Data
github_url <- "https://github.com/FundamentalsSid601/FinalHW-Ex1/raw/main/data/"
# List of all Excel file names to read
file_names <- c("fsi-2006.xlsx", "fsi-2007.xlsx", "fsi-2008.xlsx", "fsi-2009.xlsx",
"fsi-2010.xlsx", "fsi-2011.xlsx", "fsi-2012.xlsx", "fsi-2013.xlsx",
"fsi-2014.xlsx", "fsi-2015.xlsx", "fsi-2016.xlsx", "fsi-2017.xlsx",
"fsi-2018.xlsx", "fsi-2019.xlsx", "fsi-2020.xlsx", "fsi-2021.xlsx",
"fsi-2022-download.xlsx", "FSI-2023-DOWNLOAD.xlsx")
# Loop through each file, download from GitHub, read it, and combine all into one dataframe
all_data <- map_df(file_names, function(file) {
url <- paste0(github_url, file) # Create full URL
temp_file <- tempfile(fileext = ".xlsx") # Create temporary file
download.file(url, temp_file, mode = "wb", quiet = TRUE) # Download file
df <- read_excel(temp_file) # Read Excel file into dataframe
df <- df %>% mutate(Year = as.numeric(Year)) # Convert Year column to numeric
return(df)
})
Inspect Data
# View first 6 rows of data
head(all_data)
## # A tibble: 6 × 17
## Country Year Rank Total C1: Security Apparat…¹ C2: Factionalized El…²
## <chr> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 Sudan 1.14e9 1st 112. 9.8 9.1
## 2 Congo Democr… 1.14e9 2nd 110. 9.8 9.6
## 3 Cote d'Ivoire 1.14e9 3rd 109. 9.8 9.8
## 4 Iraq 1.14e9 4th 109 9.8 9.7
## 5 Zimbabwe 1.14e9 5th 109. 9.4 8.5
## 6 Chad 1.14e9 6th 106. 9.4 9.5
## # ℹ abbreviated names: ¹​`C1: Security Apparatus`, ²​`C2: Factionalized Elites`
## # ℹ 11 more variables: `C3: Group Grievance` <dbl>, `E1: Economy` <dbl>,
## # `E2: Economic Inequality` <dbl>, `E3: Human Flight and Brain Drain` <dbl>,
## # `P1: State Legitimacy` <dbl>, `P2: Public Services` <dbl>,
## # `P3: Human Rights` <dbl>, `S1: Demographic Pressures` <dbl>,
## # `S2: Refugees and IDPs` <dbl>, `X1: External Intervention` <dbl>,
## # `Change from Previous Year` <dbl>
# View all column names
colnames(all_data)
## [1] "Country" "Year"
## [3] "Rank" "Total"
## [5] "C1: Security Apparatus" "C2: Factionalized Elites"
## [7] "C3: Group Grievance" "E1: Economy"
## [9] "E2: Economic Inequality" "E3: Human Flight and Brain Drain"
## [11] "P1: State Legitimacy" "P2: Public Services"
## [13] "P3: Human Rights" "S1: Demographic Pressures"
## [15] "S2: Refugees and IDPs" "X1: External Intervention"
## [17] "Change from Previous Year"
Keep Only Required Data
# Select only the 16 required columns and rename them to use underscores instead of colons/spaces
all_data <- all_data %>% select(Country, Year, Rank, Total,
"C1: Security Apparatus", "C2: Factionalized Elites",
"C3: Group Grievance", "E1: Economy",
"E2: Economic Inequality", "E3: Human Flight and Brain Drain",
"P1: State Legitimacy", "P2: Public Services",
"P3: Human Rights", "S1: Demographic Pressures",
"S2: Refugees and IDPs", "X1: External Intervention") %>%
rename(C1_Security_Apparatus = "C1: Security Apparatus",
C2_Factionalized_Elites = "C2: Factionalized Elites",
C3_Group_Grievance = "C3: Group Grievance",
E1_Economy = "E1: Economy",
E2_Economic_Inequality = "E2: Economic Inequality",
E3_Human_Flight_and_Brain_Drain = "E3: Human Flight and Brain Drain",
P1_State_Legitimacy = "P1: State Legitimacy",
P2_Public_Services = "P2: Public Services",
P3_Human_Rights = "P3: Human Rights",
S1_Demographic_Pressures = "S1: Demographic Pressures",
S2_Refugees_and_IDPs = "S2: Refugees and IDPs",
X1_External_Intervention = "X1: External Intervention")
Clean and Format Data
# Remove non-numeric characters from Rank (e.g., "1st" becomes 1) and convert to number
all_data <- all_data %>% mutate(Rank = as.numeric(gsub("[^0-9]", "", Rank)))
# Convert Year to integer
all_data <- all_data %>% mutate(Year = as.integer(Year))
# Convert Year from Unix timestamp to actual year (some files store Year as seconds since 1970)
all_data <- all_data %>% mutate(Year = lubridate::year(as.POSIXct(Year, origin = "1970-01-01", tz = "UTC")))
# Check unique years to verify conversion
unique(all_data$Year)
## [1] 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
## [16] 1970 2022
# Fix any 1970 values (these are from 2023 data where Year was stored as 0)
all_data <- all_data %>% mutate(Year = if_else(Year == 1970, 2023, Year))
# Ensure Year is stored as integer
all_data <- all_data %>% mutate(Year = as.integer(Year))
# Verify final data structure and values
head(all_data)
## # A tibble: 6 × 16
## Country Year Rank Total C1_Security_Apparatus C2_Factionalized_Eli…¹
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Sudan 2006 1 112. 9.8 9.1
## 2 Congo Democrat… 2006 2 110. 9.8 9.6
## 3 Cote d'Ivoire 2006 3 109. 9.8 9.8
## 4 Iraq 2006 4 109 9.8 9.7
## 5 Zimbabwe 2006 5 109. 9.4 8.5
## 6 Chad 2006 6 106. 9.4 9.5
## # ℹ abbreviated name: ¹​C2_Factionalized_Elites
## # ℹ 10 more variables: C3_Group_Grievance <dbl>, E1_Economy <dbl>,
## # E2_Economic_Inequality <dbl>, E3_Human_Flight_and_Brain_Drain <dbl>,
## # P1_State_Legitimacy <dbl>, P2_Public_Services <dbl>, P3_Human_Rights <dbl>,
## # S1_Demographic_Pressures <dbl>, S2_Refugees_and_IDPs <dbl>,
## # X1_External_Intervention <dbl>
str(all_data)
## tibble [3,170 × 16] (S3: tbl_df/tbl/data.frame)
## $ Country : chr [1:3170] "Sudan" "Congo Democratic Republic" "Cote d'Ivoire" "Iraq" ...
## $ Year : int [1:3170] 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 ...
## $ Rank : num [1:3170] 1 2 3 4 5 6 6 8 9 10 ...
## $ Total : num [1:3170] 112 110 109 109 109 ...
## $ C1_Security_Apparatus : num [1:3170] 9.8 9.8 9.8 9.8 9.4 9.4 10 9.4 9.1 8.2 ...
## $ C2_Factionalized_Elites : num [1:3170] 9.1 9.6 9.8 9.7 8.5 9.5 9.8 9.6 9.1 8 ...
## $ C3_Group_Grievance : num [1:3170] 9.7 9.1 9.8 9.8 8.5 8.5 8 8.8 8.6 9.1 ...
## $ E1_Economy : num [1:3170] 7.5 8.1 9 8.2 9.8 7.9 8.5 8.4 7 7.5 ...
## $ E2_Economic_Inequality : num [1:3170] 9.2 9 8 8.7 9.2 9 7.5 8.3 8.9 8 ...
## $ E3_Human_Flight_and_Brain_Drain: num [1:3170] 9.1 8 8.5 9.1 9 8 7 8 8.1 7 ...
## $ P1_State_Legitimacy : num [1:3170] 9.5 9 10 8.5 8.9 9.5 10 9.4 8.5 8.3 ...
## $ P2_Public_Services : num [1:3170] 9.5 9 8.5 8.3 9.5 9 10 9.3 7.5 8 ...
## $ P3_Human_Rights : num [1:3170] 9.8 9.5 9.4 9.7 9.5 9.1 9.5 9.6 8.5 8.2 ...
## $ S1_Demographic_Pressures : num [1:3170] 9.6 9.5 8.8 8.9 9.7 9 9 8.8 9.3 7.9 ...
## $ S2_Refugees_and_IDPs : num [1:3170] 9.7 9.5 7.6 8.3 8.9 9 8.1 5 9.3 9.6 ...
## $ X1_External_Intervention : num [1:3170] 9.8 10 10 10 8 8 8.5 10 9.2 10 ...
unique(all_data$Year)
## [1] 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
## [16] 2023 2022
Boxplot: Total Score by Year
# Create boxplot showing distribution of Total FSI score for each year
ggplot(all_data, aes(x = factor(Year), y = Total)) +
geom_boxplot(fill = "steelblue") +
labs(title = "Distribution of Fragile States Index Total Score by Year",
subtitle = "Higher scores indicate greater state fragility",
x = "Year",
y = "Total Score",
caption = "Source: Fragile States Index Data (2006-2023)") +
theme_minimal()

Histograms: Cohesion Indicators (2013 vs 2023)
# Filter for 2013 and 2023, reshape data to long format, and create faceted histograms
all_data %>% filter(Year %in% c(2013, 2023)) %>%
pivot_longer(cols = c(C1_Security_Apparatus, C2_Factionalized_Elites, C3_Group_Grievance),
names_to = "Variable", values_to = "Value") %>%
ggplot(aes(x = Value, fill = factor(Year))) +
geom_histogram(bins = 15, alpha = 0.7, position = "identity") +
facet_wrap(~Variable, scales = "free", ncol = 1) +
labs(title = "Comparison of Cohesion Indicators: 2013 vs 2023",
subtitle = "Higher scores indicate greater fragility",
x = "Score",
y = "Number of Countries",
fill = "Year",
caption = "Source: Fragile States Index Data") +
theme_minimal()
