Created
October 28, 2019 17:01
-
-
Save GlinZachariah/808338016d445dd3ae996890ce09ebdb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from google.cloud import firestore | |
import firebase_admin | |
from firebase_admin import credentials | |
from firebase_admin import firestore | |
import json | |
import datetime | |
import os,csv,re,emoji | |
#filekey =input("Enter key file name: ") | |
companyname =input("Enter company name: ") | |
# firestore authentication | |
cred = credentials.Certificate('firebase-adminsdk-.json') | |
default_app = firebase_admin.initialize_app(cred) | |
db = firestore.client() | |
docs = db.collection(u'dataset-netflix').get() | |
for doc in docs: | |
tweet_id =doc.id | |
tweet=doc.to_dict() | |
print(tweet_id) | |
document_name="" | |
if filekey=="key2": | |
document_name ="key2/dataset_"+companyname+".csv" | |
else: | |
document_name ="final_dataset_"+companyname+".csv" | |
with open("output/"+document_name, mode='a') as tweets_file: | |
tweet_writer = csv.writer(tweets_file, delimiter='^', quotechar='"', quoting=csv.QUOTE_MINIMAL) | |
#twitter attributes #tweet Text processing | |
tweet_text =tweet['Tweet text'] | |
tweet_text =emoji.demojize(tweet_text) | |
tweet_text= re.sub(',', ' ', tweet_text) | |
sentence =tweet_text.lower() #convert to lower case | |
fsen = re.sub(" #| & |\n|\t"," ",sentence) #removing hastags line breaks and tabs with space | |
fsen = re.sub("#| \n| \t|\.","",fsen) #removing hastags line breaks and tabs without space | |
fsen = re.sub(r'\d+', '', fsen) #removing numbers | |
#removing links from text | |
sen =fsen.split(" ") | |
lsen=[] | |
for i in sen: | |
if "https://" not in str(i): | |
lsen.append(i) | |
tweet_text= " ".join(lsen) | |
tweet_retweet_count = tweet['Retweet count'] | |
tweet_favorite_count = tweet['Favorited'] | |
d =datetime.datetime.strptime(tweet['Tweet Timestamp'], '%a %b %d %H:%M:%S %z %Y') | |
tweet_date=d.strftime("%Y-%m-%d") | |
tweet_time=d.strftime("%H:%M:%S") | |
tweet_senstivity=tweet['Tweet Sensitivity'] | |
tweet_verified_user = tweet['Verified User'] | |
tweet_followers_count =tweet['User followers_count'] | |
tweet_favorited =tweet['Favorited'] | |
tweet_retweeted =tweet['Retweeted'] | |
game =str(tweet_id)+ '^'+str(tweet_retweet_count)+'^'+str(tweet_favorite_count)+ '^'+'\''+str(tweet_text)+'\'' '^' + str(tweet_date) + "^"+ str(tweet_time)+ "^"+ str(tweet_senstivity)+ "^"+ str(tweet_verified_user)+ '^'+ str(tweet_followers_count)+ '^'+ str(tweet_favorited)+ '^'+ str(tweet_retweeted) | |
tweet_writer.writerow([game]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment