m-sean · March 30, 2019 16:04
diff --git a/getquery.py b/getquery.py
 ### Downloads tweets containing a user-specified query.
 ### The data is converted to an indexable of (JSON) tweets and written to disk.

 import TwitterAPI
 import yaml
 import json

 # User parameters.
 TOTAL_COUNT=15000                   # Total number of tweets to scrape (generally works up to ~15000)
 COUNT=200                           # Twitter API allows 200 per scrape
 CREDENTIALS = "credentials.yaml"    # YAML file containing developer credentials
 SINK = "data.json"                  # Output JSON file
 QUERY = "search query"              # Search term for scrape (note: further cleaning may be necessary
                                    # since the term may not be in the actual text of every tweet)

 def main():
    # Creates API connection.
    with open(CREDENTIALS, "r") as source:
        credentials = yaml.load(source)
    api = TwitterAPI.TwitterAPI(
        consumer_key=credentials["app_key"],
        consumer_secret=credentials["app_secret"],
        access_token_key=credentials["oauth_token"],
        access_token_secret=credentials["oauth_token_secret"],
        )

    # Downloads data.
    # First download.
    kwargs = dict(
        q=QUERY, 
        lang="en", 
        count=COUNT,
        tweet_mode="extended",
    )
    data = []
    data.extend(api.request('search/tweets',kwargs))
    # This is the lowest ID for the batch, since they are given in reverse
    # chronological order. Since ID filtering is inclusive, we subtract one.
    max_id = data[-1]["id"] - 1
    while len(data) < TOTAL_COUNT:
        kwargs["max_id"]=max_id
        batch = []
        batch.extend(api.request('search/tweets',kwargs))
        if not batch: #no tweets returned
            break
        max_id = batch[-1]["id"] - 1
        data.extend(batch)
    
    #writes all tweets to disk
    print(f"{len(data)} tweets obtained")
    with open(SINK, "w") as sink:
        json.dump(data,sink)
    print(f"Data written to {SINK}")

 if __name__ == "__main__":
    main()
	### Downloads tweets containing a user-specified query.
	### The data is converted to an indexable of (JSON) tweets and written to disk.

	import TwitterAPI
	import yaml
	import json

	# User parameters.
	TOTAL_COUNT=15000 # Total number of tweets to scrape (generally works up to ~15000)
	COUNT=200 # Twitter API allows 200 per scrape
	CREDENTIALS = "credentials.yaml" # YAML file containing developer credentials
	SINK = "data.json" # Output JSON file
	QUERY = "search query" # Search term for scrape (note: further cleaning may be necessary
	# since the term may not be in the actual text of every tweet)

	def main():
	# Creates API connection.
	with open(CREDENTIALS, "r") as source:
	credentials = yaml.load(source)
	api = TwitterAPI.TwitterAPI(
	consumer_key=credentials["app_key"],
	consumer_secret=credentials["app_secret"],
	access_token_key=credentials["oauth_token"],
	access_token_secret=credentials["oauth_token_secret"],
	)

	# Downloads data.
	# First download.
	kwargs = dict(
	q=QUERY,
	lang="en",
	count=COUNT,
	tweet_mode="extended",
	)
	data = []
	data.extend(api.request('search/tweets',kwargs))
	# This is the lowest ID for the batch, since they are given in reverse
	# chronological order. Since ID filtering is inclusive, we subtract one.
	max_id = data[-1]["id"] - 1
	while len(data) < TOTAL_COUNT:
	kwargs["max_id"]=max_id
	batch = []
	batch.extend(api.request('search/tweets',kwargs))
	if not batch: #no tweets returned
	break
	max_id = batch[-1]["id"] - 1
	data.extend(batch)

	#writes all tweets to disk
	print(f"{len(data)} tweets obtained")
	with open(SINK, "w") as sink:
	json.dump(data,sink)
	print(f"Data written to {SINK}")

	if __name__ == "__main__":
	main()