library(rtweet)
## Warning: package 'rtweet' was built under R version 4.0.4
library(igraph)
## Warning: package 'igraph' was built under R version 4.0.4
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.4
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.3
## Warning: package 'tidyr' was built under R version 4.0.4
## Warning: package 'readr' was built under R version 4.0.3
## Warning: package 'dplyr' was built under R version 4.0.4
## Warning: package 'forcats' was built under R version 4.0.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::as_data_frame() masks tibble::as_data_frame(), igraph::as_data_frame()
## x purrr::compose() masks igraph::compose()
## x tidyr::crossing() masks igraph::crossing()
## x dplyr::filter() masks stats::filter()
## x purrr::flatten() masks rtweet::flatten()
## x dplyr::groups() masks igraph::groups()
## x dplyr::lag() masks stats::lag()
## x purrr::simplify() masks igraph::simplify()
library(ggraph)
## Warning: package 'ggraph' was built under R version 4.0.4
library(data.table)
## Warning: package 'data.table' was built under R version 4.0.4
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
# Check https://datageneration.org/adp/twitter/ for detail
# Create token for direct authentication to access Twitter data
# Enter key and tokens from Twitter developer account
# (Developer Portal--> Projects & Apps --> under Apps, Keys and tokens)
twitter_token <- rtweet::create_token(
app="Data methods Web data",
consumer_key <- "3aBeG3yYXjdafXgUBGt5RDK7N",
consumer_secret <- "QMBowAbJgRVGbYr1kuMuZIf2viqIwpanh5GoAzGqk3JgxMORcy",
access_token_key <- "785910745506521088-xmkYhshGvP5CBSnAGGhWWTqpL225Ssu",
access_secret <- "32AIv4dEkzNFpSGjsUWsDWzdOeElRZgMvwQz5Nme71gpL")
jb <- rtweet::search_tweets(q = "JoeBiden", n = 1000, lang = "en", token = twitter_token)
users_data(jb)
## # A tibble: 943 x 20
## user_id screen_name name location description url protected
## <chr> <chr> <chr> <chr> <chr> <chr> <lgl>
## 1 29472026~ heavyed65 "Eddie DiF~ "" "Activist for a~ <NA> FALSE
## 2 29472026~ heavyed65 "Eddie DiF~ "" "Activist for a~ <NA> FALSE
## 3 12270465~ MikeRecon_0~ "<U+270A>Foxy Mik~ "Pomona~ "U.S. Marine, U~ <NA> FALSE
## 4 13310139~ Dana32019207 "Dana" "" "I promote trut~ <NA> FALSE
## 5 465316012 JoniMangare~ "INTERPRET" "Brookl~ "" https~ FALSE
## 6 878511020 shay_nurse "Sharon De~ "Newnan~ "Love walking, ~ <NA> FALSE
## 7 12664027~ kiranjohnso~ "Kiran" "" "" <NA> FALSE
## 8 23465332~ JWC3D "James Cox" "" "" <NA> FALSE
## 9 23465332~ JWC3D "James Cox" "" "" <NA> FALSE
## 10 23465332~ JWC3D "James Cox" "" "" <NA> FALSE
## # ... with 933 more rows, and 13 more variables: followers_count <int>,
## # friends_count <int>, listed_count <int>, statuses_count <int>,
## # favourites_count <int>, account_created_at <dttm>, verified <lgl>,
## # profile_url <chr>, profile_expanded_url <chr>, account_lang <lgl>,
## # profile_banner_url <chr>, profile_background_url <chr>,
## # profile_image_url <chr>
jb_timeline = get_timelines("JoeBiden", n = 1000)
ts_plot(jb_timeline, by = "days") + theme_bw()
jb1 <- rtweet::search_tweets("JoeBiden OR president OR potus", n = 100,
retryonratelimit = TRUE)
## Warning: could take some time to create the igraph
## Suggestion: start from a smaller data file
## Credit: Russell, Matthew. 2018. 21 Recipes for Mining Twitter Data with rtweet
## https://rud.is/books/21-recipes/visualizing-a-graph-of-retweet-relationships.html
filter(jb, retweet_count > 0 ) %>%
select(screen_name, mentions_screen_name) %>%
unnest(mentions_screen_name) %>%
filter(!is.na(mentions_screen_name)) %>%
graph_from_data_frame() -> jb_g
V(jb_g)$node_label <- unname(ifelse(degree(jb_g)[V(jb_g)] > 20, names(V(jb_g)), ""))
V(jb_g)$node_size <- unname(ifelse(degree(jb_g)[V(jb_g)] > 20, degree(jb_g), 0))
# ggraph layouts: 'star', 'circle', 'gem', 'dh', 'graphopt', 'grid', 'mds',
# 'randomly', 'fr', 'kk', 'drl', 'lgl'
# Davidson-Harel algorithm
# Try also fr (fruchterman reingold)
ggraph(jb_g, layout = 'dh') +
geom_edge_arc(edge_width=0.1, aes(alpha=..index..)) +
geom_node_label(aes(label=node_label, size=node_size),
label.size=0, fill="#ffffff66", segment.colour="light blue",
color="red", repel=TRUE) +
coord_fixed() +
scale_size_area(trans="sqrt") +
labs(title="Joe Biden Twitter Plot", subtitle="Edges=volume of retweets. Screenname size=influence") +
theme_bw() +
theme(legend.position="none")
## Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
kh <- rtweet::search_tweets(q = "KamalaHarris", n = 100, lang = "en", token = twitter_token)
users_data(kh)
## # A tibble: 96 x 20
## user_id screen_name name location description url protected
## <chr> <chr> <chr> <chr> <chr> <chr> <lgl>
## 1 8725453~ midwifeMary~ "Mary L~ "\U0001f1ee\~ "<U+2618><U+FE0F>\U0001f1ee\U~ <NA> FALSE
## 2 1267183~ phortuenti "bread ~ "" "We will know o~ <NA> FALSE
## 3 4632389~ fbravo56 "Fbravo~ "" "" <NA> FALSE
## 4 2454936~ rolltak "awkwar~ "" "In the words o~ <NA> FALSE
## 5 1369946~ Ihaveadream~ "Countr~ "" "Freedom!!" <NA> FALSE
## 6 1247640~ bestbiafra "Bestbi~ "" "BIAFRAN *Freed~ <NA> FALSE
## 7 1004141~ LuisaColomb~ "LuisaM~ "USAQUEN" "#IamJewis #Rev~ <NA> FALSE
## 8 1004141~ LuisaColomb~ "LuisaM~ "USAQUEN" "#IamJewis #Rev~ <NA> FALSE
## 9 1004141~ LuisaColomb~ "LuisaM~ "USAQUEN" "#IamJewis #Rev~ <NA> FALSE
## 10 9744193~ cipherEqual~ "Cipher~ "Now" "Celebrating th~ <NA> FALSE
## # ... with 86 more rows, and 13 more variables: followers_count <int>,
## # friends_count <int>, listed_count <int>, statuses_count <int>,
## # favourites_count <int>, account_created_at <dttm>, verified <lgl>,
## # profile_url <chr>, profile_expanded_url <chr>, account_lang <lgl>,
## # profile_banner_url <chr>, profile_background_url <chr>,
## # profile_image_url <chr>
kh_timeline = get_timelines("KamalaHarris", n = 100)
ts_plot(kh_timeline, by = "days") + theme_bw()
filter(kh, retweet_count > 0 ) %>%
select(screen_name, mentions_screen_name) %>%
unnest(mentions_screen_name) %>%
filter(!is.na(mentions_screen_name)) %>%
graph_from_data_frame() -> kh_g
V(kh_g)$node_label <- unname(ifelse(degree(kh_g)[V(kh_g)] > 20, names(V(kh_g)), ""))
V(kh_g)$node_size <- unname(ifelse(degree(kh_g)[V(kh_g)] > 20, degree(kh_g), 0))
# ggraph layouts: 'star', 'circle', 'gem', 'dh', 'graphopt', 'grid', 'mds',
# 'randomly', 'fr', 'kk', 'drl', 'lgl'
# Davidson-Harel algorithm
# Try also fr (fruchterman reingold)
ggraph(kh_g, layout = 'dh') +
geom_edge_arc(edge_width=0.1, aes(alpha=..index..)) +
geom_node_label(aes(label=node_label, size=node_size),
label.size=0, fill="#ffffff66", segment.colour="light blue",
color="red", repel=TRUE) +
coord_fixed() +
scale_size_area(trans="sqrt") +
labs(title="Kamala Harris Twitter Plot", subtitle="Edges=volume of retweets. Screenname size=influence") +
theme_bw() +
theme(legend.position="none")