Chapter 18 Cutting room floor

It’s worth remembering that most graphs end up on the proverbial cutting room floor. Some graph types in particular are truly hit or miss: parallel coordinate plots are at the top of the list in this category. I’m including some of the “misses” here so you’ll realize you’re not alone if you create a graph that does not show anything worthwhile.

18.1 Parallel coordinate plots

library(dplyr)
library(forcats)
library(ggplot2)
library(readr)
library(stringr)
library(tibble)
library(tidyr)

oedi_building <- read_csv("data/oedi_building.csv")
oedi_building |>
  filter(str_detect(in.building_type, "Office")) |> 
  GGally::ggparcoord(columns = 1:4, groupColumn = 5, 
                     alphaLines = .5, splineFactor = 10)

oedi_building |>
  select(c(starts_with("in.week")), in.heating_fuel) |>
  rownames_to_column("ID") |> 
  pivot_longer(cols = starts_with("in.week"),
               names_to = "variable", values_to = "value") |> 
  ggplot(aes(x = variable, y = value, group = ID, color = in.heating_fuel)) +
  geom_line(lwd = .1) +
  theme_bw() +
  coord_flip()

# https://collegescorecard.ed.gov/data
df <- read_csv("data/college_scorecard.csv")

df |> 
  na.omit() |> 
  mutate(COMP_ORIG_YR4_RT = as.numeric(COMP_ORIG_YR4_RT)) |> 
  mutate(WOMENONLY = fct_recode(factor(WOMENONLY), `Women only` = "0", `Not women only` = "1")) |> 
GGally::ggparcoord(columns = 1:4, alphaLines = .5,
                   scale = "globalminmax", groupColumn = 5) +
  theme_bw() +
  theme(legend.position = "bottom", legend.title = element_blank())

uk <- read_csv("data/uk_universities.csv",
               col_types = "ccdcddddddnncccccdddc")
uk |> 
  GGally::ggparcoord(columns = c(5, 7, 9:12),
                     alphaLines = .5, groupColumn = 2) + 
  coord_flip()

# https://www.strava.com/clubs/537620/leaderboard 
run <- read_csv("data/leaderboard.csv", na = "--")

run$Longest <- parse_number(run$Longest)
run$Distance <- parse_number(run$Distance)
run$Pace <- parse_number(run$`Avg. Pace`)
run$Gain <- parse_number(run$`Elev. Gain`)

run |> 
  filter(Distance < 1000, Rank <= 50) |> 
  select(Athlete, Distance, Runs, Longest, Pace, Gain) |> 
  parcoords::parcoords(rownames = F,
                       reorderable = TRUE,
                       brushMode = "1D-axes")
library(parcoords)
uk |> 
  filter(str_detect(Region, "England")) |> 
  select(University_name, UK_rank, UGfees = `UG_average_fees_(in_pounds)`,
         PGfees = `PG_average_fees_(in_pounds)`,
         International_students, 
         Student_satisfaction,
         COL = `Estimated_cost_of_living_per_year_(in_pounds)`,
         Campus_setting) |> 
 parcoords(rownames = FALSE,
           reorderable = TRUE,
           brushMode = "1D-axes",
           color = list(colorBy = "Campus_setting",
                         colorScale = "scaleOrdinal",
                         colorScheme = "schemeCategory10"),
           withD3 = TRUE,
           width = 770,
           height = 600)