Sentiment Analysis on Twitter Data

Analyzing Twitter data
Extract tweets and followers from the Twitter website with R and the twitteR package
1. With the tm package, clean text by removing punctuation,
2. numbers, hyperlinks and stop words, followed by stemming and stem completion
3. Build a term-document matrix
4. Analyse topics with the topicmodels package
5. Analyse sentiment with the sentiment140 package
6. Analyse following/followed and retweeting relationships with the igraph package
There is an attached eBook on twitter analysis using R. Download
Please find the attached twitter sample data. Data
**********************************************************************************************************
install.packages(“twitteR”, dependencies = T)
library(“twitteR”)
library(“plyr”)
library(“httr”)
library(“stringr”)
library(“maps”)
library(“tm”)
## This will create one time authorization with twitter api
options(httr_oauth_cache=T)
## Following details should be taken from twitter.
api_key <- “DWZXXXXXXXXXU74rCth4jr2etg”
api_secret <- “JIdtf3CJblIsHTwq3oBfikeGONV0CldRqXXXXXXXX3VnE”
access_token <- “172317191-ZtOZMfrED5ICXXXXXXXXXXjactKGvyDx2”
access_token_secret <- “ElEwGFJxZ3WnDJXXXXXXXX2AaYfhkoQD1mOePWGqrj”
***********************************************************************************************************
## Pass the above credential
setup_twitter_oauth(api_key,api_secret,access_token,access_token_secret)
tweet <- searchTwitter(“#EUref”, n=5000)   ## twitter Hashtag that needs to be analysed
class(tweet)
tweet_EUreferendum <- tweet <- searchTwitter(“#EUreferendum”, n=5000)
** Optional
# Use the searchTwitter function to only get tweets within 50 miles of Los Angeles
tweets_geolocated <- searchTwitter(“#EUref OR #EUreferendum OR #VoteLeave OR #Brexit”, n=5000, lang=”en”, geocode=”51.278236,-0.95171109,100mi”, since=”2016-05-10″)
#lang, geocode, since is optional. Tweet can also be fetched without specifying this parameter
tweets_geolocated.df <- twListToDF(tweets_geolocated)
# Extract Tweets text
tweets.text <- lapply(tweets_geolocated, function(t)t$getText())
class(tweets.text)
# Write the dataframe into local
write.csv(tweets_geolocated.df, file=’C:/Users/XXX/Desktop/tweets_geolocated.df.csv’, row.names=T)
#- Cleaning data using tm Package
mycorpus <- Corpus(VectorSource(tweets_geolocated.df$text))
x <- as.character(mycorpus)
str(mycorpus)
# Removing white spaces
mycorpus1 <- tm_map(mycorpus, stripWhitespace)
# Converting text to lower case
mycorpus2 <- tm_map(mycorpus1, tolower)
# Removing stopwords
mycorpus3 <- tm_map(mycorpus2, removeWords, stopwords(“english”))
# Removing Punctuation
mycorpus4 <- tm_map(mycorpus3, removePunctuation)
# Removing Numbers
mycorpus5 <- tm_map(mycorpus4, removeNumbers)
# Plain text documents
mycorpus6 <- tm_map(mycorpus5, PlainTextDocument)
write.csv(mycorpus6, file = “C:/Users/XXX/Desktop/mycorpus6”, row.names = F)
data_dtm1<-DocumentTermMatrix(mycorpus6)
inspect(data_dtm1)
#### Frequent Words coming in the text document #####
frequent<-findFreqTerms(data_dtm1,lowfreq =100,highfreq = Inf)
frequent
##### Generating Word Cloud #######
#############################
install.packages(“wordcloud”,dependencies = TRUE)
install.packages(“stringr”,dependencies = TRUE)
library(stringr)
library(wordcloud)
wordcloud::wordcloud(mycorpus6,max.words=50, random.order = FALSE)
pal <- brewer.pal(9,”YlGnBu”)
pal <- pal[-(1:4)]
set.seed(123)
wordcloud::wordcloud(words = mycorpus6,scale=c(5,0.1),max.words=100,
random.order=FALSE,rot.per=0.35, use.r.layout=FALSE, colors=pal)

### Counting the occurrence of the words in the document 
install.packages(“slam”, dependencies = TRUE)
library(slam)
freq<-colapply_simple_triplet_matrix(data_dtm1,FUN = sum)
freq
###Writing it out in csv 
write.csv(frequent,file = “C:/Users/XXX/Desktop/frequent_words_final.csv”)
write.csv(freq,file = “C:/Users/XXX/Desktop/total_words_count_final.csv”)

################ Sentiment Analysis ###############
############################################
library(syuzhet)
library(lubridate)
library(ggplot2)
library(scales)
library(reshape2)
library(dplyr)
sentiment<-get_nrc_sentiment(x)
sentiment
t<-as.matrix(sentiment)
write.csv(sentiment,”sentiment_score.csv”)
getwd()
######## Visualizing the sentiment score #########
#######################################
comments<-cbind(tweets_geolocated.df$text,sentiment)
sentimentTotals <- data.frame(colSums(sentiment[,c(1:8)]))
names(sentimentTotals) <- “count”
sentimentTotals <- cbind(“sentiment” = rownames(sentimentTotals), sentimentTotals)
rownames(sentimentTotals) <- NULL
ggplot(data = sentimentTotals, aes(x = sentiment, y = count)) +
geom_bar(aes(fill = sentiment), stat = “identity”) +
theme(legend.position = “none”) +
xlab(“Sentiment”) + ylab(“Total Count”) + ggtitle(“Total Sentiment Score for all Tweets”)

You might also like More from author