This is just some derping around with Game of Thrones user ratings from trakt.tv. The code is here if anyone cares.

Episode Ratings

got_eps_looong %>%
  group_by(Season, episode) %>%
  summarize(rating = mean(rating), votes = max(votes.x)) %>%
  ggplot(aes(x = episode, y = rating, color = votes)) +
  geom_point(size = 3) +
  facet_wrap(~Season, nrow = 1, labeller = label_both, scales = "free_x") +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  scale_y_continuous(breaks = seq(0, 10, 1), minor_breaks = seq(0, 10, .5), 
                     limits = c(5, 10)) +
  scale_color_viridis_c(guide = guide_colorbar(barwidth = 20), breaks = scales::pretty_breaks(), labels = scales::comma_format()) +
  labs(title = "Game of Thrones on trakt.tv",
       subtitle = "Average episode ratings by trakt.tv users by Season",
       x = "Episode Number", y = "Rating (1-10)", color = "# of Votes",
       caption = caption) +
  theme(legend.position = "bottom")

Zoomed in y-Axis

This plot is meant to emphasize relative differences in ratings through the narrow y-axis range.

got_eps_looong %>%
  group_by(Season, episode) %>%
  summarize(rating = mean(rating), votes = max(votes.x)) %>%
  ggplot(aes(x = episode, y = rating, color = votes)) +
  geom_point(size = 3) +
  facet_wrap(~Season, nrow = 1, labeller = label_both, scales = "free_x") +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  scale_y_continuous(breaks = seq(0, 10, .5), minor_breaks = seq(0, 10, .25)) +
  scale_color_viridis_c(
    guide = guide_colorbar(barwidth = 20), 
    breaks = scales::pretty_breaks(), 
    labels = scales::comma_format()) +
  labs(title = "Game of Thrones on trakt.tv",
       subtitle = "Average episode ratings by trakt.tv users by Season",
       x = "Episode Number", y = "Rating (1-10)", color = "# of Votes",
       caption = caption) +
  theme(legend.position = "bottom")

IMDb ratings

got_eps %>%
  ggplot(aes(x = episode, y = rating_imdb)) +
  geom_point(size = 3) +
  facet_wrap(~Season, nrow = 1, labeller = label_both, scales = "free_x") +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  scale_y_continuous(breaks = seq(0, 10, .5), minor_breaks = seq(0, 10, .25)) +
  labs(title = "Game of Thrones on trakt.tv",
       subtitle = "Average episode ratings by trakt.tv users by Season",
       x = "Episode Number", y = "Rating (1-10)", color = "# of Votes",
       caption = caption) +
  theme(legend.position = "bottom")

ggplot(got_eps, aes(x = episode, y = rating_imdb, fill = Season, color = Season)) +
  #geom_path(size = .3) +
  geom_smooth(method = lm, se = FALSE) +
  geom_point(size = 3, shape = 21, color = "black", stroke = .4, alpha = .75) +
  scale_x_continuous(breaks = 1:10) +
  scale_y_continuous(
    breaks = 1:10, minor_breaks = seq(1, 10, .5)
  ) +
  scale_fill_brewer(palette = "Set2", aesthetics = c("color", "fill")) +
  labs(
    title = "Game of Thrones: IMDb Ratings",
    subtitle = "Per-episode ratings on imdb.com",
    x = "Episode #", y = "Rating (1 - 10)", color = "Season", fill = "Season",
    caption = caption
  ) +
  theme(
    legend.position = "bottom"
  )

Comparison of trakt.tv and IMDb votes

left_join(
  got_eps %>%
  select(Season, episode, starts_with("rating")) %>%
  gather(source, rating, starts_with("rating")) %>%
  mutate(source = ifelse(str_detect(source, "imdb"), "IMDb", "trakt.tv")),
  got_eps %>%
    select(Season, episode, starts_with("votes")) %>%
    gather(source, votes, starts_with("votes")) %>%
    mutate(source = ifelse(str_detect(source, "imdb"), "IMDb", "trakt.tv")),
  by = c("Season", "episode", "source")
) %>%
  ggplot(aes(x = episode, y = rating, fill = votes)) +
  geom_point(aes(shape = source), size = 3, color = "black", stroke = .4, alpha = .75) +
  facet_wrap(~Season, nrow = 1, labeller = labeller(
    season = function(x) paste0("S", x)
  ), scales = "free_x") +
  scale_x_continuous(breaks = scales::pretty_breaks()) +
  scale_y_continuous(breaks = seq(0, 10, .5), minor_breaks = seq(0, 10, .25)) +
  scale_shape_manual(values = c("trakt.tv" = 21, "IMDb" = 22)) +
  scale_fill_viridis_c(
    guide = guide_colorbar(barwidth = 10), 
    breaks = scales::pretty_breaks(), 
    labels = scales::comma_format()) +
  theme(legend.position = "bottom")

Season Averages

trakt.tv allow rating of episodes and seasons directly, but since it seems reasonable to assume that average per-episode ratings within a season are a better estimate for the perception of the season than the (arguably lower) number of direct season ratings, I’ll focus on the episode ratings.

got_eps_looong %>%
  group_by(Season, episode) %>%
  summarize(rating = mean(rating), votes = max(votes.x)) %>%
  ungroup() %>%
  mutate(Season = factor(Season)) %>%
  ggplot(aes(x = Season, y = rating, color = Season, fill = Season)) +
  geom_boxplot(alpha = .25, outlier.alpha = 0) +
  geom_beeswarm(shape = 21) +
  #facet_wrap(~Season, nrow = 1, labeller = label_both, scales = "free_x") +
  scale_y_continuous(breaks = seq(0, 10, .5), minor_breaks = seq(0, 10, .25)) +
  scale_color_brewer(palette = "Set2", guide = FALSE) +
  scale_fill_brewer(palette = "Set2", guide = FALSE) +
  labs(title = "Game of Thrones on trakt.tv",
       subtitle = "Average episode ratings by trakt.tv users by Season",
       x = "Season", y = "Rating (1-10)",
       caption = caption) +
  theme(legend.position = "bottom")

Per-Episode Distribution

Boxplots

ggplot(data = got_eps_looong, aes(x = factor(episode), 
                                  y = rating, 
                                  fill = factor(Season))) +
  geom_boxplot(alpha = .75) +
  stat_summary(fun.y = mean, geom = "point", shape = 21, color = "black") +
  facet_wrap(~Season, nrow = 1, scales = "free_x", labeller = label_both) +
  scale_y_continuous(breaks = seq(0, 10, 1),
                     minor_breaks = seq(0, 10, .25)) +
  scale_fill_brewer(palette = "Set2", guide = FALSE) +
  labs(title = "Game of Thrones on trakt.tv",
       subtitle = "User rating distribution: Boxplots with mean (dot)",
       x = "Episode", y = "Rating (1-10)",
       caption = caption)

95% CIs

ggplot(data = got_eps_looong, aes(x = factor(episode), 
                                  y = rating, 
                                  fill = factor(Season))) +
  stat_summary(fun.data = mean_cl_normal, geom = "errorbar") +
  stat_summary(fun.y = mean, geom = "point", shape = 21, color = "black") +
  facet_wrap(~Season, nrow = 1, scales = "free_x", labeller = label_both) +
  scale_y_continuous(breaks = seq(0, 10, 1),
                     minor_breaks = seq(0, 10, .25)) +
  scale_fill_brewer(palette = "Set2", guide = FALSE) +
  labs(title = "Game of Thrones on trakt.tv",
       subtitle = "User rating distribution: 95% CIs",
       x = "Episode", y = "Rating (1-10)",
       caption = caption)

Within-Season Variation

Linear Scale

ggplot(data = got_eps_looong, aes(x = rating, 
                                  fill = factor(Season))) +
  geom_histogram(binwidth = 1, color = "black") +
  facet_wrap(~Season, ncol = 2, labeller = label_both) +
  scale_x_continuous(breaks = seq(0, 10, 1),
                     minor_breaks = seq(0, 10, .25)) +
  scale_y_comma() +
  scale_fill_brewer(palette = "Set2", guide = FALSE) +
  labs(title = "Game of Thrones on trakt.tv",
       subtitle = "Per-season user rating distribution",
       x = "Rating (1-10)", y = "Frequency",
       caption = caption)

Log Scale

ggplot(data = got_eps_looong, aes(x = rating, 
                                  fill = factor(Season))) +
  geom_histogram(binwidth = 1, color = "black") +
  facet_wrap(~Season, ncol = 2, labeller = label_both) +
  scale_x_continuous(breaks = seq(0, 10, 1),
                     minor_breaks = seq(0, 10, .25)) +
  scale_y_comma(trans = "log10") +
  scale_fill_brewer(palette = "Set2", guide = FALSE) +
  labs(title = "Game of Thrones on trakt.tv",
       subtitle = "Per-season user rating distribution",
       x = "Rating (1-10)", y = "Frequency (log10 scale)",
       caption = caption)

Runtime and Rating

ggplot(data = got_eps, aes(x = runtime, y = rating)) +
  geom_smooth(aes(group = 1), method = lm, se = FALSE, color = "red") +
  geom_point() +
  gghighlight(rating < 7.5 | runtime > 80, n = 1,
              label_key = paste0("s0", Season, ": ", title), 
              use_group_by = FALSE) +
  labs(title = "Game of Thrones on trakt.tv",
       subtitle = "Episode Ratings by Runtime",
       x = "Runtime (mins)", y = "Rating (1-10)",
       caption = caption)