library(dplyr)
library(ggplot2)
library(lubridate)
library(stringr)
library(tidyr)
library(forcats)
library(scales)
library(tidytext)
library(ggalluvial)
library(plotly)
#| label: parcats-311-borough-color
#| echo: true
df_clean_final <- readRDS("complaint_bucket_borough.rds")
d <- df_clean_final |>
transmute(
borough = fct_lump_n(factor(borough), 5, other_level = "Other"),
complaint_bucket = fct_lump_n(factor(complaint_bucket), 10, other_level = "Other"),
agency_name = fct_lump_n(factor(agency_name), 10, other_level = "Other"),
status = fct_lump_n(factor(status), 6, other_level = "Other")
) |>
tidyr::drop_na(borough, complaint_bucket, agency_name, status) |>
filter(
borough != "Other",
complaint_bucket != "Other",
agency_name != "Other",
status != "Other"
)
d_counts <- d |>
count(borough, complaint_bucket, agency_name, status, name = "n") |>
arrange(desc(n)) |>
mutate(borough_id = as.integer(borough))
borough_levels <- levels(d_counts$borough)
K <- length(borough_levels)
pal5 <- c("#0072B2", "#D55E00", "#009E73", "#CC79A7", "#F0E442")
discrete_scale <- list(
list(0/4, pal5[1]), list(0/4, pal5[1]),
list(1/4, pal5[2]), list(1/4, pal5[2]),
list(2/4, pal5[3]), list(2/4, pal5[3]),
list(3/4, pal5[4]), list(3/4, pal5[4]),
list(4/4, pal5[5]), list(4/4, pal5[5])
)
fig_parcats <- plot_ly(
type = "parcats",
arrangement = "freeform",
dimensions = list(
list(label = "Borough", values = d_counts$borough),
list(label = "Complaint Bucket", values = d_counts$complaint_bucket),
list(label = "Agency", values = d_counts$agency_name),
list(label = "Status", values = d_counts$status)
),
counts = d_counts$n,
line = list(
color = d_counts$borough_id,
colorscale = discrete_scale,
cmin = 1, cmax = K,
showscale = FALSE
)
)%>%
plotly::layout(
title = list(
text = "311 Complaint Flow: Borough → Type → Agency → Status (Colored by Borough)",
x = 0.02
),
margin = list(l = 40, r = 40, t = 60, b = 30)
)|>
layout(autosize = TRUE)
fig_parcats