Skip to content

Instantly share code, notes, and snippets.

@Duartemartins
Created April 8, 2024 20:45
Show Gist options
  • Save Duartemartins/439a5dc41a501d0bcb1933d382d44767 to your computer and use it in GitHub Desktop.
Save Duartemartins/439a5dc41a501d0bcb1933d382d44767 to your computer and use it in GitHub Desktop.
# Import necessary libraries
from http.client import HTTPSConnection
from base64 import b64encode
from json import loads
from json import dumps
import os
from dotenv import load_dotenv
import json
import pandas as pd
# Define path to environment variables file
env_path = os.path.join(".", "env")
# Load environment variables
load_dotenv(env_path, override=True)
dataforseo_username = os.getenv("DATAFORSEO_USERNAME")
dataforseo_password = os.getenv("DATAFORSEO_PASSWORD")
openai_api_key = os.getenv("OPENAI_API_KEY")
google_api_key = os.getenv("GOOGLE_API_KEY")
google_project_id = os.getenv("GOOGLE_PROJECT_ID")
# Define RestClient class for making HTTP requests
class RestClient:
domain = "api.dataforseo.com"
def __init__(self, username, password):
self.username = username
self.password = password
# Define method for making HTTP requests
def request(self, path, method, data=None):
connection = HTTPSConnection(self.domain)
try:
base64_bytes = b64encode(
("%s:%s" % (self.username, self.password)).encode("ascii")
).decode("ascii")
headers = {'Authorization' : 'Basic %s' % base64_bytes, 'Content-Encoding' : 'gzip'}
connection.request(method, path, headers=headers, body=data)
response = connection.getresponse()
return loads(response.read().decode())
finally:
connection.close()
# Define methods for GET and POST requests
def get(self, path):
return self.request(path, 'GET')
def post(self, path, data):
if isinstance(data, str):
data_str = data
else:
data_str = dumps(data)
return self.request(path, 'POST', data_str)
# Create RestClient object
seodata_client = RestClient(dataforseo_username, dataforseo_password)
# Define data for POST request
post_data_ki = dict()
post_data_ki[len(post_data_ki)] = dict(
keywords=[
"list",
"of",
"keywords"
]
,
location_name="United States",
language_name="English",
filters=[
["keyword_info.search_volume", ">", 400],
"and",
["keyword_properties.keyword_difficulty", "<", 25],
],
include_serp_info=True,
closely_variants=True,
limit=200
)
# Define filename and path for saving response
filename_ki = 'keyword_ideas'
file_path = os.path.join("response", f'{filename_ki}.json')
# If file already exists and is not empty, clear its contents
if os.path.isfile(file_path) and os.path.getsize(file_path) > 0:
print('Deleting existing file contents')
open(file_path, 'w').close()
# Make POST request and save response
print('Get data from DataForSEO')
response_ki = seodata_client.post(
"/v3/dataforseo_labs/google/keyword_ideas/live", post_data_ki
)
# If response is successful, save it to file
if response_ki["status_code"] == 20000:
with open(file_path, "w") as f:
json.dump(response_ki, f)
else:
print(
"error. Code: %d Message: %s"
% (response_ki["status_code"], response_ki["status_message"])
)
# Extract results from response
results_ki = response_ki['tasks'][0]["result"][0]['items']
# Initialize lists for storing result data
words = []
cpc = []
search_volume = []
low_top_of_page_bid = []
high_top_of_page_bid = []
competition = []
competition_level = []
keyword_difficulty = []
keyword_difficulty = []
# Extract data from each result and append to lists
for result in results_ki:
words.append(result['keyword'])
cpc.append(result["keyword_info"]["cpc"])
search_volume.append(result["keyword_info"]['search_volume'])
low_top_of_page_bid.append(result["keyword_info"]["low_top_of_page_bid"])
high_top_of_page_bid.append(result["keyword_info"]["high_top_of_page_bid"])
competition.append(result["keyword_info"]["competition"])
competition_level.append(result["keyword_info"]["competition_level"])
keyword_difficulty.append(result["keyword_properties"]["keyword_difficulty"])
# Create DataFrame from lists
df_ki = pd.DataFrame(
{
"keywords": words,
"cpc": cpc,
"search_volume": search_volume,
"low_top_of_page_bid": low_top_of_page_bid,
"high_top_of_page_bid": high_top_of_page_bid,
"competition": competition,
"competition_level": competition_level,
"keyword_difficulty": keyword_difficulty
}
)
# Print DataFrame and number of records
print(df_ki.to_string())
num_records = len(df_ki)
print(num_records)
# Save DataFrame to CSV
df_ki.to_csv('output.csv', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment