Created
April 8, 2024 20:45
-
-
Save Duartemartins/439a5dc41a501d0bcb1933d382d44767 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import necessary libraries | |
from http.client import HTTPSConnection | |
from base64 import b64encode | |
from json import loads | |
from json import dumps | |
import os | |
from dotenv import load_dotenv | |
import json | |
import pandas as pd | |
# Define path to environment variables file | |
env_path = os.path.join(".", "env") | |
# Load environment variables | |
load_dotenv(env_path, override=True) | |
dataforseo_username = os.getenv("DATAFORSEO_USERNAME") | |
dataforseo_password = os.getenv("DATAFORSEO_PASSWORD") | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
google_api_key = os.getenv("GOOGLE_API_KEY") | |
google_project_id = os.getenv("GOOGLE_PROJECT_ID") | |
# Define RestClient class for making HTTP requests | |
class RestClient: | |
domain = "api.dataforseo.com" | |
def __init__(self, username, password): | |
self.username = username | |
self.password = password | |
# Define method for making HTTP requests | |
def request(self, path, method, data=None): | |
connection = HTTPSConnection(self.domain) | |
try: | |
base64_bytes = b64encode( | |
("%s:%s" % (self.username, self.password)).encode("ascii") | |
).decode("ascii") | |
headers = {'Authorization' : 'Basic %s' % base64_bytes, 'Content-Encoding' : 'gzip'} | |
connection.request(method, path, headers=headers, body=data) | |
response = connection.getresponse() | |
return loads(response.read().decode()) | |
finally: | |
connection.close() | |
# Define methods for GET and POST requests | |
def get(self, path): | |
return self.request(path, 'GET') | |
def post(self, path, data): | |
if isinstance(data, str): | |
data_str = data | |
else: | |
data_str = dumps(data) | |
return self.request(path, 'POST', data_str) | |
# Create RestClient object | |
seodata_client = RestClient(dataforseo_username, dataforseo_password) | |
# Define data for POST request | |
post_data_ki = dict() | |
post_data_ki[len(post_data_ki)] = dict( | |
keywords=[ | |
"list", | |
"of", | |
"keywords" | |
] | |
, | |
location_name="United States", | |
language_name="English", | |
filters=[ | |
["keyword_info.search_volume", ">", 400], | |
"and", | |
["keyword_properties.keyword_difficulty", "<", 25], | |
], | |
include_serp_info=True, | |
closely_variants=True, | |
limit=200 | |
) | |
# Define filename and path for saving response | |
filename_ki = 'keyword_ideas' | |
file_path = os.path.join("response", f'{filename_ki}.json') | |
# If file already exists and is not empty, clear its contents | |
if os.path.isfile(file_path) and os.path.getsize(file_path) > 0: | |
print('Deleting existing file contents') | |
open(file_path, 'w').close() | |
# Make POST request and save response | |
print('Get data from DataForSEO') | |
response_ki = seodata_client.post( | |
"/v3/dataforseo_labs/google/keyword_ideas/live", post_data_ki | |
) | |
# If response is successful, save it to file | |
if response_ki["status_code"] == 20000: | |
with open(file_path, "w") as f: | |
json.dump(response_ki, f) | |
else: | |
print( | |
"error. Code: %d Message: %s" | |
% (response_ki["status_code"], response_ki["status_message"]) | |
) | |
# Extract results from response | |
results_ki = response_ki['tasks'][0]["result"][0]['items'] | |
# Initialize lists for storing result data | |
words = [] | |
cpc = [] | |
search_volume = [] | |
low_top_of_page_bid = [] | |
high_top_of_page_bid = [] | |
competition = [] | |
competition_level = [] | |
keyword_difficulty = [] | |
keyword_difficulty = [] | |
# Extract data from each result and append to lists | |
for result in results_ki: | |
words.append(result['keyword']) | |
cpc.append(result["keyword_info"]["cpc"]) | |
search_volume.append(result["keyword_info"]['search_volume']) | |
low_top_of_page_bid.append(result["keyword_info"]["low_top_of_page_bid"]) | |
high_top_of_page_bid.append(result["keyword_info"]["high_top_of_page_bid"]) | |
competition.append(result["keyword_info"]["competition"]) | |
competition_level.append(result["keyword_info"]["competition_level"]) | |
keyword_difficulty.append(result["keyword_properties"]["keyword_difficulty"]) | |
# Create DataFrame from lists | |
df_ki = pd.DataFrame( | |
{ | |
"keywords": words, | |
"cpc": cpc, | |
"search_volume": search_volume, | |
"low_top_of_page_bid": low_top_of_page_bid, | |
"high_top_of_page_bid": high_top_of_page_bid, | |
"competition": competition, | |
"competition_level": competition_level, | |
"keyword_difficulty": keyword_difficulty | |
} | |
) | |
# Print DataFrame and number of records | |
print(df_ki.to_string()) | |
num_records = len(df_ki) | |
print(num_records) | |
# Save DataFrame to CSV | |
df_ki.to_csv('output.csv', index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment