# Read the data in
inaug_dat <- read_csv("inaug_speeches.csv")
# Clean it
dat <-
inaug_dat %>%
transmute(president = str_to_lower(Name) %>% str_replace_all(.," ","_"),
address = case_when(
str_detect(`Inaugural Address`,"First") ~ "first",
str_detect(`Inaugural Address`,"Second") ~ "second",
str_detect(`Inaugural Address`,"Third") ~ "third",
str_detect(`Inaugural Address`,"Fourth") ~ "fourth",
T ~ "first"),
date = as.Date(Date,"%A, %B %d, %Y"),
year = lubridate::year(date),
length = str_count(text),
text = text)
# Adjust for one problematic date
dat[dat$president=="bill_clinton" & dat$address=="second",]$date = as.Date("1997-01-20")
dat[dat$president=="bill_clinton" & dat$address=="second",]$year = 1997
head(dat)
Treat these as stop words that are particular to inaugural speeches (i.e. every president uses these words).
Note: Don’t consider FDR’s third and fourth inaugural speeches.
k
(the number of topics you’re looking for to 5
). Try and interpret the output.