Last active
November 20, 2020 14:58
-
-
Save jsstoni/0b57278c2c076d26c7aa70f4b7c328f0 to your computer and use it in GitHub Desktop.
Obtener de una lista de URL las imágenes de los resultados de pagespeed
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, requests, json, xlsxwriter, base64 | |
import pandas as pd | |
from time import sleep | |
""" | |
@df: variable creada para lectura del archivo excel donde se encuentran todas las urls | |
@dfx: variable que contiene todos los valos leidos de df.values | |
@key: apikey generada de google | |
""" | |
df = pd.read_excel('urls.xlsx', sheet_name='Hoja1', index=False, header=None) | |
dfx = df.values | |
key = 'AIzaSyAvjt5RQk_KFHeXeGpo72wM1_NP16ZVyMo' | |
""" | |
get_pagespeed() : funcion empleada para crear una peticio request hacia la url | |
retorna como dato el codigo de la imagen codificado en base64 | |
""" | |
def get_pagespeed(key, url, strategy): | |
#url api de pagespeed | |
resp_url = 'https://www.googleapis.com/pagespeedonline/v5/runPagespeed?url='+url+'&key='+key+'&screenshot=true&strategy='+strategy | |
try : | |
page = requests.get(resp_url) | |
response = page.json() | |
data = response['lighthouseResult']['audits']['final-screenshot']['details']['data'] | |
return data | |
except: | |
print "Error al cargar" | |
pass | |
sleep(1) | |
""" | |
@outputFile: variable de trabajo preparando stament para trabajar con la creacion de nuevos | |
enlaces en un archivo .xlsx excel | |
""" | |
outputFile = xlsxwriter.Workbook('images.xlsx') | |
outputWriter = outputFile.add_worksheet() | |
info_pagespeed = [] | |
info_pagespeed.append(['URL', 'DESKTOP']) | |
for urls in dfx: | |
url = urls[0] | |
print url | |
desktop_pagespeed = get_pagespeed(key, url, 'desktop') | |
print('termino url: '+url) | |
info_pagespeed.append([url, desktop_pagespeed]) | |
if (desktop_pagespeed) : | |
""" | |
aqui se encuentra la magia para crear la imagen | |
se debe reemplazar la codificacion data:image... | |
seguido se codifica el resto de la cadena del base64 creando una variable @blob_image | |
se crea un stament para abrir la imagen y escribir sobre ella el @blob_image | |
""" | |
desktop_pagespeed = desktop_pagespeed.replace('data:image/jpeg;base64,', '') | |
blob_image = base64.b64decode(desktop_pagespeed) | |
with open(urls[1]+"-desktop.jpg", "wb") as fh: | |
fh.write(blob_image) | |
for row_num, row_data in enumerate(info_pagespeed): | |
for col_num, col_data in enumerate(row_data): | |
outputWriter.write(row_num, col_num, col_data) | |
outputFile.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment