This is a visualization representing my engagement with UNDP Libya Stabilization Facility. For the purpose of this exercise and general sharing, the names of individual projects (activities) have been generalized and financial figures have been altered from the actual data.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(viridis)
## Loading required package: viridisLite
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:viridis':
##
## viridis_pal
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
sfl <- read.csv("tracker.csv")
theme_set(theme_minimal())
sfl$Contract.Signing <- as.Date(sfl$Contract.Signing, "%m/%d/%Y")
sfl$Start.Date <- as.Date(sfl$Start.Date, "%m/%d/%Y")
sfl$Actual.completion.date <- as.Date(sfl$Actual.completion.date, "%m/%d/%Y")
sfl$Overall.Status <-
factor(sfl$Overall.Status, levels = c("Completed","Underway", "With Procurement", "In Development"))
ggplot(sfl, aes(x=Overall.Status, fill=Overall.Status)) +
geom_bar(width = 0.5)+
geom_text(aes(label=stat(count)), stat = "count", vjust=-0.5)+
scale_fill_brewer(type="qual", palette = 7, guide=NULL)+
labs(title = "Overall implementation progress status", x = "Status", y="Number of Projects")+
theme(plot.title = element_text(size = 15),
axis.title = element_text(size=12),
axis.title.y = element_text(margin = margin(r=10)),
axis.title.x = element_text(margin = margin(t=10)))
## Warning: `stat(count)` was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(sfl, aes(x=Overall.Status, y=Amount, fill=Overall.Status)) +
geom_col(width = 0.5)+
scale_y_continuous(labels = unit_format(unit="M",scale = 1e-6))+
stat_summary(fun = sum, geom = "text",
aes(label = scales::unit_format
(unit = "", scale = 1e-6)(..y..)), vjust = -0.5)+
scale_fill_brewer(type="qual", palette = 7, guide=NULL)+
labs(title = "Overall fund status", x="Status", y="Amount (in millions)") +
theme(plot.title = element_text(size = 15),
axis.title = element_text(size=12),
axis.title.y = element_text(margin = margin(r=10)),
axis.title.x = element_text(margin = margin(t=10)))
## Warning: The dot-dot notation (`..y..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(y)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 1 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 1 rows containing missing values (`position_stack()`).
sector_count <- count(sfl, Sector)
ggplot(sector_count, aes(x=fct_reorder(Sector, n, .desc = TRUE), y=n, fill=Sector))+
geom_col()+
geom_text(aes(label=n, vjust=-0.5))+
scale_fill_viridis_d(guide=NULL)+
labs(title="Number of activities per sector",
x="Sector",
y="Number of Activities")+
scale_x_discrete(labels = c("Public Infrastructure and Services" = "Public\nInfrastructure",
"Solid Waste Management" = "Solid Waste\nManagement"))+
theme(plot.title = element_text(size = 15),
axis.title = element_text(size=12),
axis.title.y = element_text(margin = margin(r=10)),
axis.title.x = element_text(margin = margin(t=10)))
Summary: Compared to other sectors, education and health sectors have far more numbers of activities.
sector_amount <- sfl %>%
group_by(Sector) %>%
summarise(total_amount = sum(Amount, na.rm = TRUE))
ggplot(sector_amount, aes(x=fct_reorder(Sector, total_amount, .desc = TRUE), y=total_amount, fill=Sector))+
geom_col()+
geom_text(aes(label=sprintf("%.2f", total_amount/1e6), vjust=-0.5))+
scale_fill_viridis_d(guide=NULL)+
labs(title="Sector-wise amount distribution",
x="Sector",
y="Amount (in millions)")+
scale_x_discrete(labels = c("Public Infrastructure and Services" = "Public\nInfrastructure",
"Solid Waste Management" = "Solid Waste\nManagement"))+
scale_y_continuous(limits = c(NA,9000000), n.breaks = 6, labels = scales::unit_format(unit = "M", scale = 1e-6)) +
theme(plot.title = element_text(size = 15),
axis.title = element_text(size=12),
axis.title.y = element_text(margin = margin(r=10)),
axis.title.x = element_text(margin = margin(t=10)))
Summary: Budget is highest for education sector but unlike numbers of activities, there isn’t a huge difference in budget distribution, apart from Covid-19 interventions.
ggplot(sector_amount, aes(x=fct_reorder(Sector, total_amount, .desc = TRUE), y=total_amount, group=1))+
geom_line()+
geom_point(aes(color=Sector),size=4)+
geom_text(aes(label=sprintf("%.2f", total_amount/1e6), vjust=-0.7))+
scale_x_discrete(labels = c("Public Infrastructure and Services" = "Public\nInfrastructure",
"Solid Waste Management" = "Solid Waste\nManagement"))+
scale_y_continuous(limits = c(NA, 9000000), n.breaks = 6, labels = scales::unit_format(unit = "M", scale = 1e-6)) +
scale_color_viridis_d(guide=NULL)+
labs(title="Sector-wise amount distribution",
x="Sector",
y="Amount (in millions)")+
theme(plot.title = element_text(size = 15),
axis.title = element_text(size=12),
axis.title.y = element_text(margin = margin(r=10)),
axis.title.x = element_text(margin = margin(t=10)))
ggplot(sfl, aes(y=fct_reorder(Sector, Amount, .fun = median, .na_rm = T), x=Amount)) +
geom_boxplot(aes(fill=Sector), alpha=0.5) +
geom_point(position = position_jitter(width = 0.2, seed = 1), alpha=0.1)+
scale_fill_viridis_d(guide=NULL)+
scale_y_discrete(labels = c("Public Infrastructure and Services" = "Public\nInfrastructure",
"Solid Waste Management" = "Solid Waste\nManagement"))+
scale_x_continuous(labels = unit_format(unit="M",scale = 1e-6))+
labs(title = "Distribution of activities by values per sector ",
x="Amount (in millions)",
y="Sector") +
theme(plot.title = element_text(size = 15),
axis.title = element_text(size = 12),
axis.title.y = element_text(margin = margin(r = 10)),
axis.title.x = element_text(margin = margin(t = 10)))
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
Summary: Here we can see that in general, value of individual activities for health and education sectors are much less, and that of solid waste management and energy are higher.
ggplot(sfl, aes(x = Implementation_duration)) +
geom_histogram(binwidth = 30, color="black",fill="cornflowerblue",linewidth=0.1) +
labs(title = "Number of activities by implementation duration",
x = "Implementation Duration (in days)",
y = "Number of Activities") +
scale_x_continuous(n.breaks = 10) +
scale_y_continuous(n.breaks = 6) +
theme(plot.title = element_text(size = 15),
axis.title = element_text(size = 12),
axis.title.y = element_text(margin = margin(r = 10)),
axis.title.x = element_text(margin = margin(t = 10)),
panel.grid.minor = element_blank())
## Warning: Removed 45 rows containing non-finite values (`stat_bin()`).
Summary: Most of the implementation fall within the period of one year with some interesting outliers where one bin shows over 2.5 years of implementation period and a chunk of nearly 70 activities taking nearly 2 years to implement. ***
contract_year <- sfl %>%
filter(!is.na(Contract.Signing)) %>%
group_by(year(Contract.Signing)) %>%
summarise(count = n()) %>%
rename(year = `year(Contract.Signing)`)
ggplot(contract_year, aes(x=year, y=count))+
geom_line(color="darkgrey") +
geom_point(size=4, aes(color=count), alpha=.7) +
geom_text(aes(label=count, vjust=-0.7)) +
labs(title = "Contracts issued per year", x = "Year", y = "Number of Activities") +
scale_color_gradient(high = "#132B43",
low = "#56B1F7", guide = NULL)+
scale_y_continuous(limits = c(NA,130), n.breaks = 6)+
theme(plot.title = element_text(size = 15),
axis.title = element_text(size = 12),
axis.title.y = element_text(margin = margin(r = 10)),
axis.title.x = element_text(margin = margin(t = 10)),
panel.grid.minor.y = element_blank())
Summary: Contract issuance was in increasing trend until year 2019, where it was at it highest. The trend decreased starting from 2020, which was marred by pandemic. ***
complete_year <- sfl %>%
filter(!is.na(Actual.completion.date)) %>%
group_by(year(Actual.completion.date)) %>%
summarise(count = n()) %>%
rename(year = `year(Actual.completion.date)`)
ggplot(complete_year, aes(x=year, y=count)) +
geom_line()+
geom_point(size=4, aes(color=count), alpha=0.7)+
geom_text(aes(label=count), vjust=-0.7) +
labs(title = "Activities completed per year", x = "Year", y = "Number of Activities") +
scale_color_gradient(high = "darkgreen",
low = "lightgreen", guide = NULL)+
scale_y_continuous(limits = c(NA,130), n.breaks = 6)+
theme(plot.title = element_text(size = 15),
axis.title = element_text(size = 12),
axis.title.y = element_text(margin = margin(r = 10)),
axis.title.x = element_text(margin = margin(t = 10)),
panel.grid.minor.y = element_blank())
Summary: This shows alignment with contract issuance trend. One thing to note is that during pandemic also, the implementation was not affected that much. ***
ggplot(sfl, aes(x=fct_reorder(Location, Location, .fun = length, .desc = T, .na_rm = T), fill=Location))+
geom_bar(width = 0.04)+
geom_point(aes(color=Location), stat="count", size=6)+
geom_text(aes(label=stat(count)), stat="count", color="black", nudge_y = 10)+
scale_fill_viridis_d(guide=NULL)+
scale_color_viridis_d(guide=NULL)+
labs(title = "Number of activities per location",
x="Location",
y="Number of Activities")+
theme(plot.title = element_text(size=15),
axis.title = element_text(size=12),
axis.title.y = element_text(margin = margin(r=10)),
axis.title.x = element_text(margin = margin(t=10)),
axis.text.x = element_text(angle=90))
Summary: Tripoli has far more number of activities. Here, chunk of education and health activities, which were relatively small in value, were also implemented.
ggplot(sfl, aes(x=fct_reorder(Location, Amount, .fun = sum, .desc = T, .na_rm = T) , y=Amount, fill=Location))+
geom_col(width = 0.04)+
stat_summary(aes(color=Location), fun = sum, geom = "point", size = 6) +
scale_fill_viridis_d(guide=NULL)+
scale_color_viridis_d(guide=NULL)+
scale_y_continuous(limits = c(NA,9000000), n.breaks = 6, labels = unit_format(unit="M",scale = 1e-6))+
stat_summary(fun = sum, geom = "text",
aes(label = scales::unit_format(unit = "", scale = 1e-6, accuracy = 0.1)(..y..)),
color="black", vjust = -1.2, na.rm = T) +
labs(title = "Amount distribution per location",
x="Location",
y="Amount (in millions)")+
theme(plot.title = element_text(size=15, margin = margin(b=10)),
axis.title = element_text(size=12),
axis.title.y = element_text(margin = margin(r=10)),
axis.title.x = element_text(margin = margin(t=10)),
axis.text.x = element_text(angle=90))
## Warning: Removed 1 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 1 rows containing missing values (`position_stack()`).
Summary: Here also, Tripoli has the highest value. Along with highest number of activities, it also had other high value activities.