Intro

This notebook analysis data that was collected by Disconnect and Mozilla to compare page load speed of different browsers.

Browsers under test:

library(dplyr, warn.conflicts = FALSE)
library(ggplot2, warn.conflicts = FALSE)
library(tidyr, warn.conflicts = FALSE)
library(scales, warn.conflicts = FALSE)
library(data.table, warn.conflicts = FALSE)

# colorblind-friendly colors
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

# read data from results csv file. Reading timestamps as character to keep it readible.
tp_data <- fread("pageloadstudy.csv", colClasses = list(character=3:25))

# replace strings for different Browsers
tp_data <- tp_data[Browser == "chrome_normal", Browser := "Chrome"]
tp_data <- tp_data[Browser == "chrome_private", Browser := "Chrome Incognito"]
tp_data <- tp_data[Browser == "firefox_normal", Browser := "Firefox Quantum"]
tp_data <- tp_data[Browser == "firefox_private", Browser := "Firefox Quantum Private Browsing"]

In our study, we collected performance.timing data for 200 news websites that were loaded 10 times each in every browser. Here is a sample of the data as example:

# create difference between two timestamps
# we are interested in the difference between each of the timestamps and navigationStart
perf_timings <- tp_data %>% 
  mutate_at(.vars = vars(-Domain, -Browser, -`Load Time`), .funs = funs(diff = as.numeric(.)-as.numeric(navigationStart))) %>% 
  select(Domain, Browser, ends_with("diff")) %>% 
  mutate_if(is.numeric, funs(replace(., .<0, NA))) %>% 
  select_if(~sum(!is.na(.)) > 0)

names(perf_timings) <- gsub("_diff", "", names(perf_timings))

#display sample of data_perf in notebook
sample_n(perf_timings, 3)
##                               Domain                          Browser
## 5342          http://news.google.com                 Chrome Incognito
## 6710           http://Denverpost.com                 Chrome Incognito
## 5156 http://Sandiegouniontribune.com Firefox Quantum Private Browsing
##      navigationStart fetchStart domainLookupStart domainLookupEnd
## 5342               0        753               753             753
## 6710               0        145               146             154
## 5156               0         67                67              67
##      connectStart connectEnd secureConnectionStart requestStart
## 5342          753        753                    NA          754
## 6710          154        160                    NA          160
## 5156           67         67                     0           84
##      responseStart responseEnd domLoading domInteractive
## 5342           843        1388        845           1654
## 6710           169         184        180           1045
## 5156            96         119        101            186
##      domContentLoadedEventStart domContentLoadedEventEnd domComplete
## 5342                       1699                     1699        2107
## 6710                       1045                     1073        3664
## 5156                        186                      187        3085
##      loadEventStart loadEventEnd timeToNonBlankPaint unloadEventStart
## 5342           2109         2109                  NA               NA
## 6710           3664         3700                  NA               NA
## 5156           3090         3221                 182               NA
##      unloadEventEnd redirectStart redirectEnd
## 5342             NA            NA          NA
## 6710             NA            NA          NA
## 5156             NA            NA          NA

Comparing means per browser

To analyze the data, let’s look for at the mean page load time for each browser and compare differences.

by_browser <- perf_timings %>% 
  group_by(Browser) %>% 
  summarize(mean_page_load_time = mean(loadEventEnd))

by_browser
## # A tibble: 4 x 2
##                            Browser mean_page_load_time
##                              <chr>               <dbl>
## 1                           Chrome            7686.065
## 2                 Chrome Incognito            7667.695
## 3                  Firefox Quantum            7303.077
## 4 Firefox Quantum Private Browsing            3186.758
ggplot(data = perf_timings, aes(Browser, loadEventEnd)) +
  geom_boxplot() +
  stat_summary(fun.y=mean, colour="#E69F00", geom="point", shape=16, size=5,show.legend = F) + scale_y_log10( breaks = c(100, 500, 1000, 5000, 10000)) + 
  theme_minimal() +
  theme(text = element_text(size=16)) +
  scale_x_discrete(labels=c("Chrome" = "Chrome", "Chrome Incognito" = "Chrome\nIncognito", "Firefox Quantum" = "Firefox\nQuantum", "Firefox Quantum Private Browsing" = "Firefox\nQuantum\nPrivate\nBrowsing")) + 
  labs(y = "time to loadEventEnd in ms")

The difference between Chrome’s Incognito mode and Firefox Quantum Private Browsing is x. It is also visible that there is no difference between Chrome’s normal and Incognito mode. This shows that the differences between Firefox Quantum and its Private Browser option, which is similar to Chrome’s Incognito mode + Tracking Protection, come from Tracking Protection.

Putting simulated measurements into context

One important final step when doing automated performance measurements is to find related results that can be used to put your measurements into context.

SOASTA published results about predicting user bounce rate based on performance timings that they measured in the wild. Average session load time was one of the best performing predictors in their study.

Their results show that an average session load time of 6 seconds already leads to a 70% bounce rate. Let’s look at the share of pages in our data that has a load time longer than 6 seconds and compare across browsers.

long_loading <- perf_timings %>% 
  filter(grepl("Incog|Private", Browser)) %>% 
  mutate(islonger = ifelse(loadEventEnd > 6000, 1, 0)) %>% 
  group_by(Browser) %>% 
  summarize(slow = sum(islonger), fast = n() - slow) %>% 
  gather(page_load_time, n, slow:fast) %>% 
  mutate(perc = (n / 2000)*100)

long_loading$page_load_time <- factor(long_loading$page_load_time, levels=c("fast", "slow"), labels=c("within 6 sec", "longer than 6 sec"))

ggplot(long_loading, aes(x = Browser, y = perc, fill = page_load_time, label = perc)) + 
  geom_bar(stat = "identity", position = "fill", width = 0.5) + 
  scale_y_continuous(labels=percent) + 
  theme_minimal() +
  theme(text = element_text(size=16)) +
  scale_x_discrete(labels=c("Chrome" = "Chrome", "Chrome Incognito" = "Chrome\nIncognito", "Firefox Quantum" = "Firefox\nQuantum", "Firefox Quantum Private Browsing" = "Firefox\nQuantum\nPrivate\nBrowsing")) +
  labs(y = "Share of pages loaded in %", color = "Page load time", fill = "Page load time") + 
  scale_fill_manual(values=cbPalette) +
  geom_text(size = 4,  position = position_fill(vjust = 0.5))