gt examples

library(gt)
library(tidyverse)
#setwd("C:/Users/AlexThomson/OneDrive - Statistic For Sustainable Development/June Seminar - tables from R/gt/")
load("imdb.Rdata")
CC3_farm <- readRDS("CC3_farm.rds")

IMDB entry tables

basic table with no formatting

You can create a very basic table by simply calling the gt() function and specifying your data frame. You can also pipe the function directly from using dplyr, tidyr etc. to manipulate your data just like you would any other tidyverese package. In the example below the function has even automatically detected that the first column can be used as a group name for rows column in the stub.

Remember that in gt terminology the stub is the row names and the spanner is a name for a group of columns.

imdb_animation_avgs <- imdb %>%
  filter(type != "videoGame")%>%
  group_by(type, animation)%>%
  summarise(Mean.Votes = mean(numVotes, na.rm = TRUE),
            Mean.Rating = mean(averageRating, na.rm = TRUE),
            Number.Of.Entries = n())

gt(imdb_animation_avgs) # automatically detected row groupings

animation	Mean.Votes	Mean.Rating	Number.Of.Entries
movie
FALSE	17337.709	6.254202	41085
TRUE	36393.630	6.656642	1197
short
FALSE	1576.763	6.823500	1183
TRUE	1822.737	7.363987	933
tvMiniSeries
FALSE	5779.093	7.452980	906
TRUE	2829.885	7.411458	96
tvMovie
FALSE	1857.997	6.093077	3019
TRUE	2162.584	7.100000	125
tvSeries
FALSE	12953.661	7.208266	4682
TRUE	7238.361	7.269250	1174
tvShort
FALSE	1929.167	7.422917	48
TRUE	2902.484	7.179121	91
tvSpecial
FALSE	1830.817	7.427378	431
TRUE	5492.600	7.820000	5
video
FALSE	2556.080	5.559368	1425
TRUE	5036.685	6.431738	397

include a title and subtitle

You can easily specify a title and subtitle directly unlike when using Flextable. This is done using the tab_header() function and then specifying a couple strings for your titles.

top_20_movies <- imdb %>%
  filter(type == "movie" & numVotes > 99999)%>%
  arrange(desc(averageRating))%>%
  select(title, year, averageRating, numVotes)%>%
  slice(1:20)

tab1<-top_20_movies%>%
  gt()%>%
  tab_header(title = "Top 20 Movies of all time", # add a title
             subtitle = "IMDB Entries with at least 100,000 votes") # add a subtitle
tab1

title	year	averageRating	numVotes
Top 20 Movies of all time
IMDB Entries with at least 100,000 votes
The Shawshank Redemption	1994	9.3	2138866
The Godfather	1972	9.2	1468315
The Godfather: Part II	1974	9.0	1021652
The Dark Knight	2008	9.0	2102907
The Mountain II	2016	9.0	101445
12 Angry Men	1957	8.9	610262
Schindler's List	1993	8.9	1109973
Pulp Fiction	1994	8.9	1678715
The Lord of the Rings: The Return of the King	2003	8.9	1520719
The Good, the Bad and the Ugly	1966	8.8	635197
Forrest Gump	1994	8.8	1646150
The Lord of the Rings: The Fellowship of the Ring	2001	8.8	1536315
Fight Club	1999	8.8	1709679
Inception	2010	8.8	1875664
One Flew Over the Cuckoo's Nest	1975	8.7	845858
Star Wars: Episode V - The Empire Strikes Back	1980	8.7	1070168
Goodfellas	1990	8.7	924279
The Matrix	1999	8.7	1540221
The Lord of the Rings: The Two Towers	2002	8.7	1375568
It's a Wonderful Life	1946	8.6	363682

change column labels and include number formatting

Changing column names works basically in the same way as flextable by writing variable.name = "new column name" within the function cols_label().

There are many formatting functions provided by gt including ones for currencies, dates, times, date-times, etc.

The fmt_number() function allows you to edit numeric values. This allows you to set many features including; decimal places, trailing zeros, scaling, suffixing (K(thousand),M(million),B(billion)), separation markers.

`random_movies` <- imdb %>%
  filter(type == "movie" & numVotes > 99999)
random_movies_5 <- `random_movies`[sample(nrow(`random_movies`), 5),]
random_movies_5 <- random_movies_5%>%
  arrange(title)%>%
  select(title,year,length, numVotes, averageRating, director)

gt(random_movies_5,
   rowname_col = "title")%>%
    tab_header(title = md("**5 Movies**"),
             subtitle = md("IMDB Entries with *at least* 100,000 votes"))%>%
  cols_label(year = "Year",
             length = "Length (Minutes)",
             numVotes = "Total Votes",
             averageRating = "Rating",
             director = "Director")%>% # set new column names
  fmt_number(columns = vars(numVotes), # choose column to format
             suffixing = TRUE) # add suffixing K = thousand, M = Million etc.

	Year	Length (Minutes)	Total Votes	Rating	Director
5 Movies
IMDB Entries with at least 100,000 votes
American Gangster	2007	157	371.33K	7.8	Ridley Scott
Blades of Glory	2007	93	149.22K	6.3	Josh Gordon
From Dusk Till Dawn	1996	108	267.53K	7.2	Robert Rodriguez
Me, Myself & Irene	2000	116	211.90K	6.6	Bobby Farrelly
Scent of a Woman	1992	156	236.91K	8.0	Martin Brest

add footnotes and a background colour

Using the tab_options() function allows you to set many, many aesthetic characteristics including borders, font, headings, column labels, footnotes, colours etc. If you see the help page for this function you will find a very long list of all the changes you can make using this function. For demonstration I have just showed you that i can set the background colour for the entire table.

You can add footnotes using the tab_footnote() function which requires a few arguments including the location of the footnote (i.e. in cells, the stub or the header) and to which column/row should it be applied to. You can set these conditionally such as setting the footnote to apply to the row in a column where the column is at its minimum. You can use the tab_options() to also set the appearance of the footnote itself including font size, padding, separation markers and what marks should be used for the footnotes (numbers, alphabet etc.)

Similarly you can also add a source for your data using source_note().

tab1 <- imdb %>%
  filter(type == "movie" & numVotes > 99999)%>%
  arrange(desc(averageRating))%>%
  select(title, year, averageRating, numVotes)%>%
  slice(1:20)%>%
  gt()%>% # can directly pipe on from other  functions
    cols_label(title = "Movie",
             year = "Year of Release",
             averageRating = "Average Rating",
             numVotes = "Number of Votes")%>%
  tab_header(title = md("**Top 20 Movies of all time**"), # can use markdown notation to make text bold or italic
             subtitle = md("IMDB Entries with *at least* 100,000 votes"))%>%
  tab_source_note( #create a source note for data
    source_note = "Source : https://www.imdb.com/search/title/?count=100&groups=top_1000&sort=user_rating (2020)" # add source
  )%>% 
  tab_footnote(
    footnote = "Christopher Nolan Films",
    locations = cells_body( # set where this footnote applies (see help for where as can go basically anywhere in the table)
      columns = vars(title), #column it applies to
      rows = c(4,14) # row it applies to
    ) # create a footnote
    )%>%
  tab_footnote(
    footnote = "Peter Jackson Films",
    locations = cells_body(
      columns = vars(title),
      rows = c(9,12,19)
    )
    
  )%>%
  fmt_number(columns = vars(numVotes),
             suffixing = TRUE)%>% # format numeric columns
    tab_footnote(
    footnote = "Least Voted film",
    locations = cells_body(
      columns = vars(numVotes),
      rows = numVotes == min(numVotes)
    )
    )%>%
  tab_options(table.background.color = "blue") #change many many options about the appearance of the table including colours

tab1

Movie	Year of Release	Average Rating	Number of Votes
Top 20 Movies of all time
IMDB Entries with at least 100,000 votes
The Shawshank Redemption	1994	9.3	2.14M
The Godfather	1972	9.2	1.47M
The Godfather: Part II	1974	9.0	1.02M
The Dark Knight¹	2008	9.0	2.10M
The Mountain II	2016	9.0	² 101.45K
12 Angry Men	1957	8.9	610.26K
Schindler's List	1993	8.9	1.11M
Pulp Fiction	1994	8.9	1.68M
The Lord of the Rings: The Return of the King³	2003	8.9	1.52M
The Good, the Bad and the Ugly	1966	8.8	635.20K
Forrest Gump	1994	8.8	1.65M
The Lord of the Rings: The Fellowship of the Ring³	2001	8.8	1.54M
Fight Club	1999	8.8	1.71M
Inception¹	2010	8.8	1.88M
One Flew Over the Cuckoo's Nest	1975	8.7	845.86K
Star Wars: Episode V - The Empire Strikes Back	1980	8.7	1.07M
Goodfellas	1990	8.7	924.28K
The Matrix	1999	8.7	1.54M
The Lord of the Rings: The Two Towers³	2002	8.7	1.38M
It's a Wonderful Life	1946	8.6	363.68K
Source : https://www.imdb.com/search/title/?count=100&groups=top_1000&sort=user_rating (2020)
¹ Christopher Nolan Films
² Least Voted film
³ Peter Jackson Films

Cotton Farming example

The cotton farming example shown in the seminar works similarly to the flextable example in that a column of group names was required on which the data could be grouped by. However rather than needing a separate function to do the grouping, instead you can specify the gropname_col = x within the call to gt().

tab1 <- gt(A, groupname_col = "Group")%>% # create table object and set groupnames for rows
  cols_label(`Sub-group` = "Sub-group",
              `Mean (Farm)` = "Farm Area (Mean)",
              `Median (Farm)` = "Farm Area (Median)",
              `Mean (Cottton)` = "Cotton Area (Mean)",
              `Median (Cotton)` = "Cotton Area (Median)",
             `Mean (%Cotton)` = "Cotton Area as % of Farm Area (Mean)",
             `Median (%Cotton)` = "Cotton Area as % of Farm Area (Median)",
             N = "N")%>% #set column names
  tab_header("Farm Area (Acre), Cotton Area (Acre), Cotton Area as % of Farm Area") #create a table header

tab1

Sub-group	Farm Area (Mean)	Farm Area (Median)	Cotton Area (Mean)	Cotton Area (Median)	Cotton Area as % of Farm Area (Mean)	Cotton Area as % of Farm Area (Median)	N
Farm Area (Acre), Cotton Area (Acre), Cotton Area as % of Farm Area
State
Gujarat	4.97	5.00	4.72	4.50	95.3	100.0	100
Madhya Pradesh	4.24	4.00	3.32	3.00	77.9	83.8	100
Maharashtra	2.12	1.00	2.04	1.00	98.5	100.0	100
Rajasthan	4.80	4.30	2.01	1.50	43.1	48.2	50
Partner
1	5.31	5.05	4.87	5.00	91.6	100.0	50
2	4.64	4.05	4.57	4.00	99.0	100.0	50
3	1.00	1.00	1.00	1.00	100.0	100.0	50
4	3.24	3.00	3.08	2.25	97.1	100.0	50
5	4.69	5.05	3.91	3.00	82.9	100.0	50
6	3.79	4.00	2.72	3.00	72.9	75.0	50
7	4.80	4.30	2.01	1.50	43.1	48.2	50
Overall
Total	3.92	4.00	3.16	3.00	83.8	100.0	350

Modelling Examples

movies <- imdb%>%
  filter(type == "movie")%>%
  mutate(num_Votes_10000 = numVotes/10000)

model1 <- aov(averageRating ~ length + num_Votes_10000 + animation, movies)

m1 <- broom::tidy(model1)

gt(m1)%>%
  fmt_number(columns = c(3:4),
             suffixing = T)%>%
  fmt_number(columns = 5,
             decimals = 1,
             )%>%
  fmt_scientific(columns = 6,
                 decimals = 2)%>%
  fmt_missing(columns = 5:6,
              missing_text = "")

term	df	sumsq	meansq	statistic	p.value
length	1	3.71K	3.71K	2,834.6	0.00
num_Votes_10000	1	1.17K	1.17K	893.8	2.34 × 10⁻¹⁹⁴
animation	1	339.00	339.00	259.0	4.16 × 10⁻⁵⁸
Residuals	42278	55.33K	1.31

lm(averageRating ~ length + num_Votes_10000 + animation, movies) %>%
  broom::tidy()%>%
  gt()%>%
  fmt_number(columns = 2:3,
             decimals = 3)%>%
  fmt_number(columns = 4,
             decimals = 1)%>%
  fmt_scientific(columns = 5,
                 decimals = 2)

term	estimate	std.error	statistic	p.value
(Intercept)	4.925	0.026	187.8	0.00
length	0.012	0.000	50.0	0.00
num_Votes_10000	0.023	0.001	28.9	2.42 × 10⁻¹⁸¹
animationTRUE	0.544	0.034	16.1	4.16 × 10⁻⁵⁸