Last active
January 3, 2017 21:20
-
-
Save jamlfy/2c1b14f64a75f59a0a0b62ede0180adb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var webPage = require('webpage'); | |
var HOME = 'http://my.domain.com/'; | |
var stepIndex = 0; | |
var HOME_CONTENT = 'Where ara the info in home'; | |
var POST_MENU = 'Go to menu'; | |
var TOP_MENU = 'Top Menu'; | |
var CONTENT_POST = 'Where are the info in pages'; | |
var PAGES = {}; | |
var POST = []; | |
function posibleError (msg, line, source) { | |
console.log('>', msg); | |
} | |
function posibleAlert (msg) { | |
console.log('<', msg); | |
} | |
/** | |
* [getPostOrPages description] | |
* @param {String} name Selector | |
* @return {Object} Post/Pages | |
*/ | |
function getPostOrPages (name) { | |
function clean (node) { | |
var newNode = []; | |
for (var i = 0; i < node.length; i++) { | |
if(node[i] && !node[i].body && node[i].tagName ){ | |
if(node[i].tagName == 'DIV'){ | |
var z = clean($(node[i] | |
.innerHTML | |
.replace(/\t|\n|\s{2,}/gim, '') | |
.replace(/(\<br\>)/gim, '</p><p>'))); | |
for (var w = 0; w < z.length; w++) { | |
newNode.push(z[w]); | |
} | |
} else if(/P|H[1-6]|UL|I|TABLE|LI|IMG/i.test(node[i].tagName)){ | |
newNode.push(node[i]); | |
} | |
} | |
} | |
return newNode; | |
} | |
var post = { text : [] }; | |
var child = clean($(name).children()); | |
for (var i = 0; i < child.length; i++) { | |
if(child[i] && child[i].tagName && !child[i].body && child[i].innerHTML.length){ | |
if( child[i].tagName != 'BR' && child[i].tagName != 'H1' ){ | |
post.text.push(child[i].outerHTML); | |
} else if ( child[i].tagName == 'H1' ) { | |
post.title = child[i].innerText; | |
} else if ( child[i].tagName == 'I' && !post.autor ) { | |
post.autor = child[i].innerText; | |
} | |
} | |
} | |
return post; | |
} | |
/** | |
* [getHome description] | |
* @param {String} name Selector | |
* @return {Array} Element | |
*/ | |
function getHome(name) { | |
var data = []; | |
var post = {}; | |
var cild = $(name).children(); | |
for (var i = 0; i < cild.length; i++) { | |
if(cild[i].tagName === 'H2'){ | |
if(post.title){ | |
data.push(post); | |
} | |
post = { title : cild[i].innerText, text : [] }; | |
} else { | |
if(post.text && cild[i].tagName != 'BR' ){ | |
if(cild[i].tagName != 'I'){ | |
post.text.push(cild[i] | |
.outerHTML | |
.replace(/\t|\n|(\s){2,}|(\<br\>){2,}/gim, '')); | |
} else { | |
post.autor = cild[i].innerText; | |
} | |
} | |
} | |
} | |
if(post.title){ | |
data.push(post); | |
} | |
return data; | |
} | |
/** | |
* [getPostMenu description] | |
* @param {String} name Selector | |
* @return {Array} Array URL | |
*/ | |
function getPostMenu (name) { | |
var urs = []; | |
var items = $(name); | |
for (var i = 0; i < items.length; i++) { | |
urs.push(items[i].href); | |
} | |
return urs; | |
} | |
/** | |
* [getMenu description] | |
* @param {Number} i Number in array | |
* @param {String} top Selector | |
* @return {String} Go to | |
*/ | |
function getMenu (i, top) { | |
return $(top)[i].href; | |
} | |
/** | |
* [startPage description] | |
* @param {String} url Go to | |
* @param {Boolean} isPost [description] | |
*/ | |
function startPage(url, isPost) { | |
var page = webPage.create(); | |
page.onConsoleMessage = posibleError; | |
page.onAlert = posibleAlert; | |
stepIndex++; | |
if(url){ | |
page.open(url, function(status){ | |
if (status === 'success') { | |
console.log('Start :', url); | |
page.injectJs('jquery.min.js'); | |
var newUrl; | |
if(!phantom.state){ | |
if(isPost || url.indexOf('index.html') < 0){ | |
PAGES[ url ] = page.evaluate(getPostOrPages, CONTENT_POST); | |
} | |
if(isPost){ | |
PAGES[ url ].post = true; | |
newUrl = POST[stepIndex]; | |
} else { | |
if(url.indexOf('index.html') >= 0){ | |
POST = page.evaluate(getPostMenu, POST_MENU); | |
PAGES[ url ] = page.evaluate(getHome, HOME_CONTENT); | |
PAGES[ url ].list = true; | |
} else { | |
PAGES[ url ].page = true; | |
} | |
newUrl = page.evaluate(getMenu, stepIndex, TOP_MENU)); | |
} | |
if(!newUrl){ | |
stepIndex = 0; | |
newUrl = POST[stepIndex]; | |
isPost = true; | |
} | |
startPage(newUrl, isPost); | |
} else { | |
phantom.state(); | |
} | |
} | |
}); | |
} else { | |
console.log(JSON.stringify(PAGES, null, '\t')); | |
} | |
} | |
startPage(HOME + 'index.html'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment