Introduction to text analysis using R and quanteda

Purpose

This post introduces the capabilities of the R package quantida for for managing and analyzing textual data. quanted has been developed by Kenneth Benoit, Kohei Watanabe, and other contributors (Benoit, Kenneth, Kohei Watanabe, Haiyan Wang, Paul Nulty, Adam Obeng, Stefan Müller, and Akitaka Matsuo. (2018) “quanteda: An R package for the quantitative analysis of textual data”. Journal of Open Source Software. 3(30), 774. https://doi.org/10.21105/joss.00774; See also:http://quanteda.io/index.html).

This post builds heavely on the quanteda tutorial: https://tutorials.quanteda.io/

Data

For demonstration, we will use the corpus of United States Presidential State of the Union Addresses available through the sotu-package

# install packages
# install.packages("quanteda")
# install.packages("quanteda.textstats")
# install.packages("quanteda.textplots")
# install.packages("rvest")
# install.packages("stringr")
# install.packages("devtools")
# devtools::install_github("quanteda/quanteda.tidy")
#United States Presidential State of the Union Addresses package sotu
#install.packages("sotu")
# load packages
library("quanteda")
library("rvest")
library("stringr")
library("quanteda.textstats")
library("quanteda.textplots")
library("quanteda.tidy")
library(sotu)
library(dplyr)

Acquiring text

meta <- sotu_meta 
glimpse(meta)
## Rows: 240
## Columns: 6
## $ X            <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
## $ president    <chr> "George Washington", "George Washington", "George Washing…
## $ year         <int> 1790, 1790, 1791, 1792, 1793, 1794, 1795, 1796, 1797, 179…
## $ years_active <chr> "1789-1793", "1789-1793", "1789-1793", "1789-1793", "1793…
## $ party        <chr> "Nonpartisan", "Nonpartisan", "Nonpartisan", "Nonpartisan…
## $ sotu_type    <chr> "speech", "speech", "speech", "speech", "speech", "speech…
text <- sotu_text
glimpse(text)
##  chr [1:240] "Fellow-Citizens of the Senate and House of Representatives: \n\nI embrace with great satisfaction the opportuni"| __truncated__ ...
state_of_union <- cbind(meta, text)
glimpse(state_of_union)
## Rows: 240
## Columns: 7
## $ X            <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
## $ president    <chr> "George Washington", "George Washington", "George Washing…
## $ year         <int> 1790, 1790, 1791, 1792, 1793, 1794, 1795, 1796, 1797, 179…
## $ years_active <chr> "1789-1793", "1789-1793", "1789-1793", "1789-1793", "1793…
## $ party        <chr> "Nonpartisan", "Nonpartisan", "Nonpartisan", "Nonpartisan…
## $ sotu_type    <chr> "speech", "speech", "speech", "speech", "speech", "speech…
## $ text         <chr> "Fellow-Citizens of the Senate and House of Representativ…
#Keep texts from Barak Obama and Donald Trump
data_sotu <- state_of_union %>%
  filter(president == "Barack Obama" | president =="Donald Trump")

glimpse(data_sotu)
## Rows: 12
## Columns: 7
## $ X            <int> 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240
## $ president    <chr> "Barack Obama", "Barack Obama", "Barack Obama", "Barack O…
## $ year         <int> 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 201…
## $ years_active <chr> "2009-2013", "2009-2013", "2009-2013", "2009-2013", "2013…
## $ party        <chr> "Democratic", "Democratic", "Democratic", "Democratic", "…
## $ sotu_type    <chr> "speech", "speech", "speech", "speech", "speech", "speech…
## $ text         <chr> "Madam Speaker, Mr. Vice President, Members of Congress, …
data_sotu$president
##  [1] "Barack Obama" "Barack Obama" "Barack Obama" "Barack Obama" "Barack Obama"
##  [6] "Barack Obama" "Barack Obama" "Barack Obama" "Donald Trump" "Donald Trump"
## [11] "Donald Trump" "Donald Trump"

Creating a text corpus

data_corpus <- corpus(data_sotu)

# check the number of documents included in the text corpus
ndoc(data_corpus)
## [1] 12

Tokenizing a corpus

Next, we tokenize our text corpus. Typically, tokenization involves separating texts by white spaces. We tokenize the text corpus without any pre-processing using tokens().

toks_sotu <- tokens(data_corpus)

# let's inspect the first six tokens of the first four documents
print(toks_sotu, max_ndoc = 4, max_ntoken = 6)
## Tokens consisting of 12 documents and 6 docvars.
## text1 :
## [1] "Madam"   "Speaker" ","       "Mr"      "."       "Vice"   
## [ ... and 6,737 more ]
## 
## text2 :
## [1] "Madam"     "Speaker"   ","         "Vice"      "President" "Biden"    
## [ ... and 8,145 more ]
## 
## text3 :
## [1] "Mr"      "."       "Speaker" ","       "Mr"      "."      
## [ ... and 7,735 more ]
## 
## text4 :
## [1] "Mr"      "."       "Speaker" ","       "Mr"      "."      
## [ ... and 7,830 more ]
## 
## [ reached max_ndoc ... 8 more documents ]
tokens(data_corpus)
## Tokens consisting of 12 documents and 6 docvars.
## text1 :
##  [1] "Madam"     "Speaker"   ","         "Mr"        "."         "Vice"     
##  [7] "President" ","         "Members"   "of"        "Congress"  ","        
## [ ... and 6,731 more ]
## 
## text2 :
##  [1] "Madam"         "Speaker"       ","             "Vice"         
##  [5] "President"     "Biden"         ","             "Members"      
##  [9] "of"            "Congress"      ","             "distinguished"
## [ ... and 8,139 more ]
## 
## text3 :
##  [1] "Mr"        "."         "Speaker"   ","         "Mr"        "."        
##  [7] "Vice"      "President" ","         "Members"   "of"        "Congress" 
## [ ... and 7,729 more ]
## 
## text4 :
##  [1] "Mr"        "."         "Speaker"   ","         "Mr"        "."        
##  [7] "Vice"      "President" ","         "Members"   "of"        "Congress" 
## [ ... and 7,824 more ]
## 
## text5 :
##  [1] "Please"    ","         "everybody" ","         "have"      "a"        
##  [7] "seat"      "."         "Mr"        "."         "Speaker"   ","        
## [ ... and 7,568 more ]
## 
## text6 :
##  [1] "The"       "President" "."         "Mr"        "."         "Speaker"  
##  [7] ","         "Mr"        "."         "Vice"      "President" ","        
## [ ... and 7,896 more ]
## 
## [ reached max_ndoc ... 6 more documents ]
# check number of tokens and types
toks_sotu %>%
    ntoken() %>%
    sum()
## [1] 86965
toks_sotu %>% 
    ntype() %>% 
    sum()
## [1] 21696

Without any pre-processing, the corpus consists of 86,965 tokens and 21,696 types.

Pre-processing

toks_sotu_pros <- toks_sotu %>% 
    tokens(remove_punct = TRUE) %>%
    tokens_tolower()

# check number of tokens and types
toks_sotu_pros %>% 
    ntoken() %>%
    sum()
## [1] 76779
toks_sotu_pros %>%
    ntype() %>%
    sum()
## [1] 20299

Keywords-in-context

We can use tokens objects to identify the occurrence of keywords and their immediate context.

kw_america <- kwic(toks_sotu, 
                   pattern = c("america"),
                   window = 2)

# number of mentions
nrow(kw_america)
## [1] 291
# print first 6 mentions of America and the context of ±2 words
head(kw_america, n = 6)
## Keyword-in-context with 6 matches.                                                     
##   [text1, 255]     States of | America | will emerge 
##   [text1, 325]     have made | America | the greatest
##  [text1, 1067] households in | America | will receive
##  [text1, 2698]    vision for | America | , as        
##  [text1, 3164]      time for | America | to lead     
##  [text1, 3339]     energy in | America | . That's

Text processing

We remove very frequent features and transform all words to lowercase. The code below shows how to adjust the object accordingly.

toks_sotu_pros <- data_corpus %>% 
    tokens(remove_punct = TRUE) %>% 
    tokens_remove(pattern = stopwords("en")) %>% 
    tokens_tolower()

New token object

Let’s inspect if the changes have been implemented as we expect by calling kwic() on the new tokens object.

kw_america_pros <- kwic(toks_sotu_pros, 
                             pattern = c("america"),
                             window = 2)

# print first 6 mentions of America and the context of ±2 words
head(kw_america_pros, n = 6)
## Keyword-in-context with 6 matches.                                                              
##   [text1, 109]      united states | america | emerge stronger 
##   [text1, 136]     qualities made | america | greatest force  
##   [text1, 458] working households | america | receive tax     
##  [text1, 1184]         see vision | america | blueprint future
##  [text1, 1404]        either time | america | lead thanks     
##  [text1, 1494]   renewable energy | america | need support
# test: print as table+
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
kw_america_pros |> data.frame() %>%
  dplyr::select(Pre = pre, Keyword = keyword, Post = post, Pattern = pattern) %>%  
  kbl(booktabs = T) %>%
  kable_styling(latex_options = c("striped", "scale_down"), html_font = "Source Sans Pro", full_width = F)
Pre Keyword Post Pattern
united states america emerge stronger america
qualities made america greatest force america
working households america receive tax america
see vision america blueprint future america
either time america lead thanks america
renewable energy america need support america
built right america speaking auto america
cost easy america easy necessary america
causes bankruptcy america every 30 america
promise education america global economy america
make children america already made america
goal 2020 america highest proportion america
powerful example america ordered closing america
united states america torture can america
begun know america meet threats america
meet without america shun negotiating america
enduring spirit america quit someday america
united states america thank america
progress inevitable america always destined america
hesitations fears america prevailed chose america
like across america 2 years america
united states america now house america
wait long america put future america
united states america hard may america
kind energy america now grateful america
global economy america must nation america
support right america tonight set america
million jobs america help meet america
just competitors america sits sidelines america
united states america one go america
year 2000 america budget surplus america
hopeful future america world work america
60 years america takes actions america
corruption guinea america must always america
values built america values allowed america
united states america america
leadership made america just place america
predicting decline america still largest america
world make america best place america
revolution can america better anyone america
google facebook america innovation just america
produce jobs america overseas also america
many nations america fallen ninth america
nation builders america time treated america
end decade america highest proportion america
future rebuilding america attract new america
d better america nation built america
every part america digital age america
infrastructure make america better place america
goal competitive america submit proposal america
united states america stands people america
days founding america story ordinary america
things idea america endures destiny america
united states america america
example think america within reach america
educating people america attracts new america
china meanwhile america productive weeks america
choose stay america get hit america
stay hire america third american america
jobs right america send tax america
level promise america always win america
every family america able afford america
jobs innovation america always new america
can last america nearly 100 america
chemicals use america develop resource america
already positioned america world's leading america
next decade america less pollution america
infrastructure much america needs rebuilt america
owner rural america selling products america
great depression america built hoover america
handouts copouts america built last america
reduce deficit america built last america
united states america achieve lesson america
united states america position strength america
source attacks america tide war america
let doubt america determined prevent america
made clear america pacific power america
moral example america back anyone america
anyone tells america decline influence america
rio opinions america higher years america
every event america remains one america
cops firefighters america strong defend america
watching back america time look america
united states america america
can know america moves forward america
united states america tax reform america
united states america american people america
priority making america magnet new america
making macs america things can america
made right america can get america
power capacity america generate even america
new goal america cut half america
partnership rebuild america attracts private america
united states america start right america
responsible homeowner america chance save america
single child america something able america
make sure america remains place america
right away america better get america
first job america place chance america
hit towns america get communities america
communities stronger america kind prosperity- america
say confidence america complete mission america
al qaida america continue lead america
meet obligations america must also america
reach see america must remain america
voting experience america definitely needs america
united states america america
americans today america teacher spent america
part help america wean foreign america
communities across america fathers mothers america
place invest america believe can america
breakthrough year america 5 years america
get ahead america now face america
eager work america stand still america
believe- believe- america success depend america
can help america lead world america
new jobs america past 5 america
tomorrow edge america surrender federally america
working today america closer energy america
selling truck america knew make america
business leader america join us america
us stronger america fields full america
need right america need get america
women succeed america succeeds now america
incredible success america americans overwhelmingly america
economy good america every mayor america
state legislator america say wait america
yes give america raise give america
half parents america point lives america
choice tell america differently see america
foreign partners america must move america
state knows america always side america
strong confident america can negotiate america
advantage opportunities america alliance europe america
god bless america things help america
loves like america serves sergeant america
remind us america never come america
every citizen america want kids- america
kids- rising america honest work america
united states america america
breakthrough year america economy growing america
grateful service america endured grit america
always propelled america forward 2 america
young love america get much america
hard times america rebekah ben's america
planet today america number one america
oil gas america number one america
young children america creating slots america
every worker america opportunity earn america
working people america raise now america
upgrade skills america thrived 20th america
idea across america 2 years america
free universal america high school america
every ceo america let repeat america
since 2010 america put people america
know want america know third america
reward invest america use savings america
growth competitiveness- america needs go america
united states america question whether america
question whether america leads world america
terrorists threaten america iraq syria america
well today america stands strong america
economy tatters america leads bluster america
iran secures america allies including america
fails alienating america allies making america
said liberal america conservative america america
america conservative america black america america
america black america white america america
america white america united states america
united states america said seen america
office seen america best seen america
mission building america going arguments america
believe best america share broad america
united states america want grow america
work remaking america laid new america
change accelerate america big changes america
idea threatening america control time america
third keep america safe lead america
united states america right now america
trends unique america offend uniquely america
say people america going work america
tackling poverty america giving everybody america
practices across america part brighter america
discovery dna america thomas edison america
washington carver america grace hopper america
sally ride america every immigrant america
new moonshot america can cure america
save make america country cures america
together keep america safe strong america
getting stronger america getting weaker america
united states america powerful nation america
united states america help remake america
exactly year america led coalition america
parts central america africa asia america
power says america always act america
products made america support good america
good jobs america tpp china america
back latin america restored diplomatic america
trying weaken america democracy grinds america
live now america want make america
kindness helped america travel far america
right worth america know country america
united states america thank america
states citizens america tonight mark america
allies find america ready lead america
foe find america strong america america
america strong america proud america america
america proud america free 9 america
history world america look like america
crucial demand america must put america
truly make america great dying america
loved one america refused uphold america
form inside america allow nation america
ship products america many countries america
ship products america charge nothing america
going let america great companies america
financially yet america enforce rule america
national rebuilding america spent approximately america
finally keep america safe must america
braver fight america uniform blessed america
kind friend america look heroes america
expressing people america respects right america
united states america know america america
america know america better less america
process rebuilding america willing find america
can found america friends today america
250th year america see world america
ask made america greater ever america
action now america empowered aspirations america
future believe america thank god america
mission make america great americans america
strong proud america since election america
350 billion america hire another america
believe believe america can dream america
american way america know faith america
united states america want exciting america
substantially watch america also finally america
crumbling infrastructure america nation builders america
family leave america regains strength america
praying everyone america grieving please america
security future america recent weeks america
bill puts america first come america
bring best america see vivid america
sergeant peck america salutes terrorists america
go friends america enemies america america
america enemies america strengthen friendships america
stay silent america stands people america
threat pose america allies otto america
labor returning america last june america
place called america small cluster america
people making america great long america
god bless america goodnight america
applause year america recognize two america
20th century america saved freedom america
can compete america applause now america
earth far america applause america america
america applause america winning every america
following lead america nation believes america
show world america committed ending america
financial wellbeing america moral duty america
truly make america safe work america
defeat aids america beyond applause america
wealthiest south america state abject america
booo president america founded liberty america
renew resolve america never socialist america
chants death america threatens genocide america
old knew america prevail cause america
yet unborn america us everything america
must keep america first hearts america
god bless america thank much america
3 years america now energy america
two administrations america now gained america
united states america indeed place america
went serve america korea vietnam america
job put america first next america
high school america expand equal america
aids epidemic america end decade america
sick know america constantly achieving america
new trees america around world america
especially rural america better tomorrow america
us keep america safe means america
many cities america radical politicians america
united states america sanctuary law-abiding america
public schools america punish prayer america
preachers pastors america celebrate faith america
must remember america always frontier america
pad ensure america first nation america
witness tonight america land heroes america
almighty god america place anything america
can happen america place anyone america
god bless america thank much america

Identifying multiword expressions

The package quanteda.textstats includes the function textstat_collocation() that automatically retrieves common multiword expressions.

tstat_coll <- data_corpus %>% 
    tokens(remove_punct = TRUE) %>%
    tokens_remove(pattern = stopwords("en"), padding = TRUE) %>% 
    textstat_collocations(size = 2:3, min_count = 5)

#  select the first 20 collocations
head(tstat_coll, 20)
##           collocation count count_nested length   lambda        z
## 1         health care    55           44      2 8.096432 29.84438
## 2       united states    87           17      2 9.290649 29.57031
## 3           last year    46           36      2 5.714933 28.44516
## 4     american people    55           22      2 4.318872 26.54055
## 5           right now    41           29      2 4.946390 25.08424
## 6         high school    24           19      2 8.175795 22.86750
## 7           years ago    31           29      2 6.007038 22.75534
## 8           make sure    35           14      2 7.225183 22.35350
## 9        middle class    35           24      2 9.703782 22.22763
## 10         first time    27            6      2 5.187307 22.01583
## 11       clean energy    28           18      2 7.747080 21.62456
## 12           new jobs    36           24      2 4.073386 21.24003
## 13        took office    17            5      2 8.221849 20.39640
## 14   health insurance    19           16      2 7.193143 19.98062
## 15   small businesses    17           11      2 7.005867 19.52964
## 16           tax cuts    17           13      2 6.561422 19.20325
## 17          every day    18           16      2 5.335121 18.33243
## 18   working families    15            8      2 5.458811 17.86163
## 19     every american    27           17      2 3.837339 17.85800
## 20 immigration system    12            9      2 6.652498 17.73959

Document-feature matrix

Next, we transform our tokens object into a document-feature matrix (dfm). A dfm counts the occurrences of tokens in each document.

dfmat <- quanteda::dfm(toks_sotu_pros)

# most frequent features
topfeatures(dfmat, n = 10)
##  american       new   america       can       now    people        us     years 
##       310       301       291       280       279       275       246       225 
## americans      jobs 
##       220       214
# most frequent features by speaker

topfeatures(dfmat, groups = president, n = 10)
## $`Barack Obama`
##      can      new      now  america   people       us     jobs american 
##      222      217      214      203      196      184      179      177 
##    years     work 
##      161      155 
## 
## $`Donald Trump`
##  american  applause     thank   america       new    people       one   country 
##       133       117        92        88        84        79        78        77 
## americans   tonight 
##        76        73

Keyness

Keyness analysis allows compare frequencies of words between target and reference documents. textstat_keyness()identifies features that occur differentially across different categories – in our case, Obama’s and Trump’s speeches. The function textplot_keyness() provides a way of visualize the results of the keyness analysis.

Keyness Figure

tstat_key <- dfmat %>%
    quanteda::dfm_group(groups = president) %>%
    quanteda.textstats::textstat_keyness(target = "Donald Trump")

textplot_keyness(tstat_key)
## Scaling document positions

Wordfish is an unsupervised one-dimensional text scaling method (a Poisson scaling model of one-dimensional document positions), estimating the positions of documents solely based on the observed word frequencies. Here we estimate the ideological positions of speeches fro Barak Obama and Donald Trump.

library(quanteda.textmodels) 
library(quanteda.textplots)
 tmod_wf <- textmodel_wordfish(dfmat, dir = c(2, 1))

 # plot the Wordfish estimates by president
    textplot_scale1d(tmod_wf, groups = docvars(dfmat, "president"))

Topic model

We estimate a topic model for repectively Barak Obama and domnald Trump.

library(topicmodels)

dfmat_obama <- dfmat %>%
  quanteda::dfm_subset(president %in% c("Barack Obama"))
dfmat_trump <- dfmat %>%
  quanteda::dfm_subset(president  %in% c("Donald Trump"))
                                   
tmod_lda_obama <- LDA(dfmat_obama, k = 10, method="Gibbs", control=list(seed=1948))  
terms(tmod_lda_obama, 10)
##       Topic 1       Topic 2          Topic 3       Topic 4        Topic 5     
##  [1,] "ones"        "plan"           "pay"         "unemployment" "going"     
##  [2,] "skills"      "budget"         "control"     "important"    "lot"       
##  [3,] "students"    "health"         "built"       "cory"         "political" 
##  [4,] "development" "banks"          "regulations" "workforce"    "love"      
##  [5,] "prepare"     "cost"           "higher"      "u.s.a"        "democracy" 
##  [6,] "doubt"       "care"           "receive"     "son"          "especially"
##  [7,] "poverty"     "recovery"       "return"      "depend"       "alone"     
##  [8,] "efforts"     "responsibility" "gas"         "ask"          "isil"      
##  [9,] "others"      "crisis"         "brought"     "worker"       "gives"     
## [10,] "process"     "largest"        "dignity"     "iran's"       "planet"    
##       Topic 6         Topic 7    Topic 8    Topic 9      Topic 10   
##  [1,] "wage"          "spending" "can"      "none"       "past"     
##  [2,] "opportunities" "race"     "new"      "week"       "want"     
##  [3,] "achieve"       "goal"     "now"      "incentives" "sick"     
##  [4,] "5"             "come"     "america"  "trillion"   "rate"     
##  [5,] "need"          "level"    "people"   "exports"    "diplomacy"
##  [6,] "15"            "dream"    "us"       "human"      "issues"   
##  [7,] "lower"         "remember" "jobs"     "wall"       "networks" 
##  [8,] "rising"        "nation"   "american" "union"      "earth"    
##  [9,] "changing"      "internet" "years"    "income"     "forces"   
## [10,] "skills"        "willing"  "work"     "hundred"    "childcare"
tmod_lda_trump <- LDA(dfmat_trump, k = 10, method="Gibbs", control=list(seed=1948))  
terms(tmod_lda_trump,10)
##       Topic 1       Topic 2      Topic 3   Topic 4    Topic 5     Topic 6     
##  [1,] "learn"       "vice"       "space"   "applause" "days"      "national"  
##  [2,] "honor"       "soon"       "deliver" "usa"      "sanctuary" "foreign"   
##  [3,] "rebecca"     "countries"  "got"     "put"      "3"         "obamacare" 
##  [4,] "greatest"    "republican" "may"     "secure"   "reached"   "department"
##  [5,] "well"        "signed"     "lead"    "wall"     "aliens"    "insurance" 
##  [6,] "credit"      "crime"      "making"  "confront" "thanks"    "allow"     
##  [7,] "perhaps"     "continue"   "poverty" "whether"  "best"      "truly"     
##  [8,] "legislation" "outdated"   "york"    "century"  "lowest"    "community" 
##  [9,] "senator"     "steps"      "tariffs" "prison"   "u.s"       "ryan"      
## [10,] "increases"   "change"     "culture" "grace"    "take"      "us"        
##       Topic 7     Topic 8     Topic 9    Topic 10   
##  [1,] "american"  "invest"    "building" "look"     
##  [2,] "thank"     "crossings" "ryan"     "taking"   
##  [3,] "america"   "restoring" "veterans" "longer"   
##  [4,] "new"       "israel"    "job"      "walls"    
##  [5,] "country"   "heroic"    "serve"    "something"
##  [6,] "one"       "250th"     "kind"     "liberty"  
##  [7,] "tonight"   "women"     "stands"   "capitol"  
##  [8,] "people"    "energy"    "ms-13"    "happen"   
##  [9,] "americans" "helping"   "live"     "childhood"
## [10,] "now"       "thrive"    "strength" "black"

Viisualize topic model

cf. https://www.tidytextmining.com/topicmodeling.html

library(tidytext)
obama_topics <- tidy(tmod_lda_obama, matrix = "beta")
library(ggplot2)
library(dplyr)
terms_per_topic <- 10
obama_top_terms <- obama_topics %>%
#    filter(topic==6 | topic==8) %>%
    group_by(topic) %>%
    top_n(terms_per_topic, beta) %>%
    ungroup() %>%
    arrange(topic, -beta)
# top_n() doesn't handle ties -__- so just take top 10 manually
obama_top_terms <- obama_top_terms %>%
    group_by(topic) %>%
    slice(1:terms_per_topic) %>%
    ungroup()

obama_top_terms$topic <- factor(obama_top_terms$topic)

obama_top_terms %>%
  mutate(term = reorder(term, beta)) %>%
  ggplot(aes(term, beta)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ topic, scales = "free") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  coord_flip()
Bernard Enjolras
Research Professor at The Institute for Social research in Oslo and the Director of the Center for Research on Civil Society and Voluntary Sector.

My research interests include volunteering, voluntary organizations, governance, social capital and trust, civic engagement as well as the digitization of the public sphere, social media and freedom of expression.