Graphing Real Time Twitter Streams

Below is a script I put together. Some of the code is put together by me, and some is from the teachers/trainers at datacamp.com.

The script will take two terms and will look at the real time conversations on Twitter using those terms.  A count is created of each term and a graph is produced that shows the user interest.

import tweepy, json
import pandas as pd
import mystreamlistener as ms
import matplotlib.pyplot as plt
import seaborn as sns
import re

access_token = 'xxxxxxxxxxxxxx'
access_token_secret = 'xxxxxxxxxxxxx'
consumer_key = 'xxxxxxxxxxxxxxx'
consumer_secret = 'xxxxxxxxxxxxxxxxxx'

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

## SUBJECTS
subject1 = "cnn"
subject2 = "fox"

# Stream tweets to flat file:
def tweet_track(val1, val2): 
       myStreamListener = ms.MyStreamListener()
       stream = tweepy.Stream(auth, myStreamListener)
       stream.filter(track=[val1,val2])

tweet_track(subject1, subject2)

# Load flat file
tweets_data_path = 'tweets.txt'

tweets_data = []

tweets_file = open(tweets_data_path, "r")

# Read in tweets and store in list: tweets_data
for line in tweets_file:
    tweet = json.loads(line)
    tweets_data.append(tweet)
    
# Close connection to file
tweets_file.close()

# Print the keys of the first tweet dict
print(tweets_data[0].keys())

## # Build DataFrame of tweet texts and languages
df = pd.DataFrame(tweets_data, columns=['text','lang'])

# Print head of DataFrame
print(df.head())


## GETTING COUNTS ON TWEETS

def word_in_text(word, tweet):
    word = word.lower()
    text = tweet.lower()
    match = re.search(word, tweet)

    if match:
        return True
    return False

# Initialize list to store tweet counts
[a, b] = [0, 0]

# Iterate through df, counting the number of tweets in which
# each candidate is mentioned
for index, row in df.iterrows():
    a += word_in_text(subject1, row['text'])
    b += word_in_text(subject2, row['text'])

    
## PLOTTING BAR CHART
# Set seaborn style
sns.set(color_codes=True)

# Create a list of labels:cd
cd = [subject1,subject2]

# Plot histogram
ax = sns.barplot(cd, [a, b])
ax.set(ylabel="count")
plt.show()
    
print([a,b])

Output