Last active
June 30, 2017 13:46
-
-
Save baamenabar/444e667b76fff8fbc8dc8c5931d01604 to your computer and use it in GitHub Desktop.
Un script scraper, para extraer listas de vehículos de las estadísticas de el registro civil de Chile.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// script para scrape de registro civil | |
var elAno = '2017'; | |
var request = new XMLHttpRequest(); | |
var nuevoLin = undefined; | |
var saltarLin = undefined; | |
request.onreadystatechange = function () { | |
if (request.readyState === 4 && request.status === 200) { | |
//console.log('volvió! con:',request.responseText) | |
var eldiv = document.createElement('div'); | |
var laMarca = request.responseURL.split('/PrimerasBUS_').pop().split('_'+elAno+'.')[0]; | |
var sesionActiva=true; | |
if(request.responseText.indexOf('t_ingreso_sistema.jpg') >= 0){ | |
nuevoLin = document.createElement('a'); | |
nuevoLin.href = 'javascript:removeAndRestart()'; | |
nuevoLin.innerHTML = '<h2>Reactive la sesión en otra pestaña y reinicie la carga aquí<h2>'; | |
document.body.appendChild(nuevoLin); | |
saltarLin = document.createElement('a'); | |
saltarLin.href = 'javascript:removeAndRestart(true)'; | |
saltarLin.innerHTML = '<h2>... saltando...<h2>'; | |
document.body.appendChild(saltarLin); | |
eldiv.innerHTML = laMarca+' (requiere acceso)'; | |
document.body.appendChild(eldiv); | |
return; | |
} | |
if(request.responseText.indexOf('<td>Ha alcanzado el m') >= 0 || request.responseText.indexOf('<td>Se ha detectado un incumplimiento') >= 0) { | |
console.log('Fallò! hay un incidente'); | |
eldiv.innerHTML = laMarca; | |
document.body.appendChild(eldiv); | |
}else { | |
eldiv.innerHTML = request.responseText; | |
window.document.body.appendChild(eldiv); | |
var tabla = window.document.body.appendChild(eldiv.querySelectorAll('center')[2]);//.querySelectorAll('th').textContent = ; | |
document.body.removeChild(eldiv); | |
Array.prototype.forEach.call(tabla.querySelectorAll('th'), (ele)=>{ | |
ele.textContent = laMarca + '-' + ele.textContent; | |
}); | |
} | |
} else if (request.readyState === 3 && request.status === 404) { | |
console.error('WW_ERROR:', 'js', '_before', 'loadHeader()', 'XMLHttpRequest'); | |
} | |
if (request.readyState === 4){ | |
continuaConSiguiente(); | |
} | |
}; | |
function continuaConSiguiente(){ | |
console.log('conteo:',conteo) | |
if(conteo <= listadePaginas.length){ | |
conteo++ | |
window.setTimeout(()=>{ | |
console.log('llamando despues de 2 segundos a:',listadePaginas[conteo]) | |
llamarPagHueona(listadePaginas[conteo]) | |
},2000); | |
} | |
} | |
function removeAndRestart(skipCurrent){ | |
document.body.removeChild(nuevoLin); | |
document.body.removeChild(saltarLin); | |
if (skipCurrent) { | |
conteo++; | |
} | |
iniciar(); | |
} | |
function llamarPagHueona(url){ | |
request.open('GET', url, true); | |
request.send(); | |
} | |
var mira=''; | |
var listadePaginas = []; | |
var conteo = 0; | |
Array.prototype.forEach.call(document.querySelectorAll('tr > th a'), (ele)=>{ | |
var unaPagina = ele.href.replace(/AFab/, elAno); | |
console.log(unaPagina); | |
mira+=unaPagina+'\n'; | |
listadePaginas.push(unaPagina); | |
} ); | |
console.log('-------ESTA ES LA LISTA DE URLs -------\n'+mira); | |
console.log('llamando.... tuuut tuuuu t'); | |
function iniciar(){ | |
console.log('llamando a:',listadePaginas[conteo]); | |
llamarPagHueona(listadePaginas[conteo]); | |
} | |
var estilos = document.createElement('style'); | |
estilos.innerHTML = 'center>table+b{display:none;} center>table{width:90%;}'; | |
document.head.appendChild(estilos) | |
iniciar(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment