library(gt)
library(tidyverse)
#setwd("C:/Users/AlexThomson/OneDrive - Statistic For Sustainable Development/June Seminar - tables from R/gt/")
load("imdb.Rdata")
CC3_farm <- readRDS("CC3_farm.rds")

IMDB entry tables

basic table with no formatting

You can create a very basic table by simply calling the gt() function and specifying your data frame. You can also pipe the function directly from using dplyr, tidyr etc. to manipulate your data just like you would any other tidyverese package. In the example below the function has even automatically detected that the first column can be used as a group name for rows column in the stub.

Remember that in gt terminology the stub is the row names and the spanner is a name for a group of columns.

imdb_animation_avgs <- imdb %>%
  filter(type != "videoGame")%>%
  group_by(type, animation)%>%
  summarise(Mean.Votes = mean(numVotes, na.rm = TRUE),
            Mean.Rating = mean(averageRating, na.rm = TRUE),
            Number.Of.Entries = n())

gt(imdb_animation_avgs) # automatically detected row groupings
animation Mean.Votes Mean.Rating Number.Of.Entries
movie
FALSE 17337.709 6.254202 41085
TRUE 36393.630 6.656642 1197
short
FALSE 1576.763 6.823500 1183
TRUE 1822.737 7.363987 933
tvMiniSeries
FALSE 5779.093 7.452980 906
TRUE 2829.885 7.411458 96
tvMovie
FALSE 1857.997 6.093077 3019
TRUE 2162.584 7.100000 125
tvSeries
FALSE 12953.661 7.208266 4682
TRUE 7238.361 7.269250 1174
tvShort
FALSE 1929.167 7.422917 48
TRUE 2902.484 7.179121 91
tvSpecial
FALSE 1830.817 7.427378 431
TRUE 5492.600 7.820000 5
video
FALSE 2556.080 5.559368 1425
TRUE 5036.685 6.431738 397

include a title and subtitle

You can easily specify a title and subtitle directly unlike when using Flextable. This is done using the tab_header() function and then specifying a couple strings for your titles.

top_20_movies <- imdb %>%
  filter(type == "movie" & numVotes > 99999)%>%
  arrange(desc(averageRating))%>%
  select(title, year, averageRating, numVotes)%>%
  slice(1:20)

tab1<-top_20_movies%>%
  gt()%>%
  tab_header(title = "Top 20 Movies of all time", # add a title
             subtitle = "IMDB Entries with at least 100,000 votes") # add a subtitle
tab1
Top 20 Movies of all time
IMDB Entries with at least 100,000 votes
title year averageRating numVotes
The Shawshank Redemption 1994 9.3 2138866
The Godfather 1972 9.2 1468315
The Godfather: Part II 1974 9.0 1021652
The Dark Knight 2008 9.0 2102907
The Mountain II 2016 9.0 101445
12 Angry Men 1957 8.9 610262
Schindler's List 1993 8.9 1109973
Pulp Fiction 1994 8.9 1678715
The Lord of the Rings: The Return of the King 2003 8.9 1520719
The Good, the Bad and the Ugly 1966 8.8 635197
Forrest Gump 1994 8.8 1646150
The Lord of the Rings: The Fellowship of the Ring 2001 8.8 1536315
Fight Club 1999 8.8 1709679
Inception 2010 8.8 1875664
One Flew Over the Cuckoo's Nest 1975 8.7 845858
Star Wars: Episode V - The Empire Strikes Back 1980 8.7 1070168
Goodfellas 1990 8.7 924279
The Matrix 1999 8.7 1540221
The Lord of the Rings: The Two Towers 2002 8.7 1375568
It's a Wonderful Life 1946 8.6 363682

change column labels and include number formatting

Changing column names works basically in the same way as flextable by writing variable.name = "new column name" within the function cols_label().

There are many formatting functions provided by gt including ones for currencies, dates, times, date-times, etc.

The fmt_number() function allows you to edit numeric values. This allows you to set many features including; decimal places, trailing zeros, scaling, suffixing (K(thousand),M(million),B(billion)), separation markers.

`random_movies` <- imdb %>%
  filter(type == "movie" & numVotes > 99999)
random_movies_5 <- `random_movies`[sample(nrow(`random_movies`), 5),]
random_movies_5 <- random_movies_5%>%
  arrange(title)%>%
  select(title,year,length, numVotes, averageRating, director)

gt(random_movies_5,
   rowname_col = "title")%>%
    tab_header(title = md("**5 Movies**"),
             subtitle = md("IMDB Entries with *at least* 100,000 votes"))%>%
  cols_label(year = "Year",
             length = "Length (Minutes)",
             numVotes = "Total Votes",
             averageRating = "Rating",
             director = "Director")%>% # set new column names
  fmt_number(columns = vars(numVotes), # choose column to format
             suffixing = TRUE) # add suffixing K = thousand, M = Million etc.
5 Movies
IMDB Entries with at least 100,000 votes
Year Length (Minutes) Total Votes Rating Director
American Gangster 2007 157 371.33K 7.8 Ridley Scott
Blades of Glory 2007 93 149.22K 6.3 Josh Gordon
From Dusk Till Dawn 1996 108 267.53K 7.2 Robert Rodriguez
Me, Myself & Irene 2000 116 211.90K 6.6 Bobby Farrelly
Scent of a Woman 1992 156 236.91K 8.0 Martin Brest

add footnotes and a background colour

Using the tab_options() function allows you to set many, many aesthetic characteristics including borders, font, headings, column labels, footnotes, colours etc. If you see the help page for this function you will find a very long list of all the changes you can make using this function. For demonstration I have just showed you that i can set the background colour for the entire table.

You can add footnotes using the tab_footnote() function which requires a few arguments including the location of the footnote (i.e. in cells, the stub or the header) and to which column/row should it be applied to. You can set these conditionally such as setting the footnote to apply to the row in a column where the column is at its minimum. You can use the tab_options() to also set the appearance of the footnote itself including font size, padding, separation markers and what marks should be used for the footnotes (numbers, alphabet etc.)

Similarly you can also add a source for your data using source_note().

tab1 <- imdb %>%
  filter(type == "movie" & numVotes > 99999)%>%
  arrange(desc(averageRating))%>%
  select(title, year, averageRating, numVotes)%>%
  slice(1:20)%>%
  gt()%>% # can directly pipe on from other  functions
    cols_label(title = "Movie",
             year = "Year of Release",
             averageRating = "Average Rating",
             numVotes = "Number of Votes")%>%
  tab_header(title = md("**Top 20 Movies of all time**"), # can use markdown notation to make text bold or italic
             subtitle = md("IMDB Entries with *at least* 100,000 votes"))%>%
  tab_source_note( #create a source note for data
    source_note = "Source : https://www.imdb.com/search/title/?count=100&groups=top_1000&sort=user_rating (2020)" # add source
  )%>% 
  tab_footnote(
    footnote = "Christopher Nolan Films",
    locations = cells_body( # set where this footnote applies (see help for where as can go basically anywhere in the table)
      columns = vars(title), #column it applies to
      rows = c(4,14) # row it applies to
    ) # create a footnote
    )%>%
  tab_footnote(
    footnote = "Peter Jackson Films",
    locations = cells_body(
      columns = vars(title),
      rows = c(9,12,19)
    )
    
  )%>%
  fmt_number(columns = vars(numVotes),
             suffixing = TRUE)%>% # format numeric columns
    tab_footnote(
    footnote = "Least Voted film",
    locations = cells_body(
      columns = vars(numVotes),
      rows = numVotes == min(numVotes)
    )
    )%>%
  tab_options(table.background.color = "blue") #change many many options about the appearance of the table including colours

tab1
Top 20 Movies of all time
IMDB Entries with at least 100,000 votes
Movie Year of Release Average Rating Number of Votes
The Shawshank Redemption 1994 9.3 2.14M
The Godfather 1972 9.2 1.47M
The Godfather: Part II 1974 9.0 1.02M
The Dark Knight1 2008 9.0 2.10M
The Mountain II 2016 9.0 2 101.45K
12 Angry Men 1957 8.9 610.26K
Schindler's List 1993 8.9 1.11M
Pulp Fiction 1994 8.9 1.68M
The Lord of the Rings: The Return of the King3 2003 8.9 1.52M
The Good, the Bad and the Ugly 1966 8.8 635.20K
Forrest Gump 1994 8.8 1.65M
The Lord of the Rings: The Fellowship of the Ring3 2001 8.8 1.54M
Fight Club 1999 8.8 1.71M
Inception1 2010 8.8 1.88M
One Flew Over the Cuckoo's Nest 1975 8.7 845.86K
Star Wars: Episode V - The Empire Strikes Back 1980 8.7 1.07M
Goodfellas 1990 8.7 924.28K
The Matrix 1999 8.7 1.54M
The Lord of the Rings: The Two Towers3 2002 8.7 1.38M
It's a Wonderful Life 1946 8.6 363.68K
Source : https://www.imdb.com/search/title/?count=100&groups=top_1000&sort=user_rating (2020)
1 Christopher Nolan Films
2 Least Voted film
3 Peter Jackson Films

Cotton Farming example

The cotton farming example shown in the seminar works similarly to the flextable example in that a column of group names was required on which the data could be grouped by. However rather than needing a separate function to do the grouping, instead you can specify the gropname_col = x within the call to gt().

tab1 <- gt(A, groupname_col = "Group")%>% # create table object and set groupnames for rows
  cols_label(`Sub-group` = "Sub-group",
              `Mean (Farm)` = "Farm Area (Mean)",
              `Median (Farm)` = "Farm Area (Median)",
              `Mean (Cottton)` = "Cotton Area (Mean)",
              `Median (Cotton)` = "Cotton Area (Median)",
             `Mean (%Cotton)` = "Cotton Area as % of Farm Area (Mean)",
             `Median (%Cotton)` = "Cotton Area as % of Farm Area (Median)",
             N = "N")%>% #set column names
  tab_header("Farm Area (Acre), Cotton Area (Acre), Cotton Area as % of Farm Area") #create a table header

tab1
Farm Area (Acre), Cotton Area (Acre), Cotton Area as % of Farm Area
Sub-group Farm Area (Mean) Farm Area (Median) Cotton Area (Mean) Cotton Area (Median) Cotton Area as % of Farm Area (Mean) Cotton Area as % of Farm Area (Median) N
State
Gujarat 4.97 5.00 4.72 4.50 95.3 100.0 100
Madhya Pradesh 4.24 4.00 3.32 3.00 77.9 83.8 100
Maharashtra 2.12 1.00 2.04 1.00 98.5 100.0 100
Rajasthan 4.80 4.30 2.01 1.50 43.1 48.2 50
Partner
1 5.31 5.05 4.87 5.00 91.6 100.0 50
2 4.64 4.05 4.57 4.00 99.0 100.0 50
3 1.00 1.00 1.00 1.00 100.0 100.0 50
4 3.24 3.00 3.08 2.25 97.1 100.0 50
5 4.69 5.05 3.91 3.00 82.9 100.0 50
6 3.79 4.00 2.72 3.00 72.9 75.0 50
7 4.80 4.30 2.01 1.50 43.1 48.2 50
Overall
Total 3.92 4.00 3.16 3.00 83.8 100.0 350

Modelling Examples

movies <- imdb%>%
  filter(type == "movie")%>%
  mutate(num_Votes_10000 = numVotes/10000)

model1 <- aov(averageRating ~ length + num_Votes_10000 + animation, movies)

m1 <- broom::tidy(model1)

gt(m1)%>%
  fmt_number(columns = c(3:4),
             suffixing = T)%>%
  fmt_number(columns = 5,
             decimals = 1,
             )%>%
  fmt_scientific(columns = 6,
                 decimals = 2)%>%
  fmt_missing(columns = 5:6,
              missing_text = "")
term df sumsq meansq statistic p.value
length 1 3.71K 3.71K 2,834.6 0.00
num_Votes_10000 1 1.17K 1.17K 893.8 2.34 × 10−194
animation 1 339.00 339.00 259.0 4.16 × 10−58
Residuals 42278 55.33K 1.31

lm(averageRating ~ length + num_Votes_10000 + animation, movies) %>%
  broom::tidy()%>%
  gt()%>%
  fmt_number(columns = 2:3,
             decimals = 3)%>%
  fmt_number(columns = 4,
             decimals = 1)%>%
  fmt_scientific(columns = 5,
                 decimals = 2)
term estimate std.error statistic p.value
(Intercept) 4.925 0.026 187.8 0.00
length 0.012 0.000 50.0 0.00
num_Votes_10000 0.023 0.001 28.9 2.42 × 10−181
animationTRUE 0.544 0.034 16.1 4.16 × 10−58