Skip to content

Instantly share code, notes, and snippets.

@jamlfy
Last active January 3, 2017 21:20
Show Gist options
  • Save jamlfy/2c1b14f64a75f59a0a0b62ede0180adb to your computer and use it in GitHub Desktop.
Save jamlfy/2c1b14f64a75f59a0a0b62ede0180adb to your computer and use it in GitHub Desktop.
/*** HOME ***/
var HOME = 'http://www.plotandesign.net/';
/*** WHERE ***/
var HOME_CONTENT = '#content .row2 > .main > .wrapper > .col1.cols',
POST_MENU = 'ul.left a',
TOP_MENU = '#header .menu li > a',
CONTENT_POST = '#content .row2 .main > .wrapper';
/*** DATA ***/
var PAGES = {},
POST = [],
stepIndex = 0;
function posibleJS (msg, line, source) {
console.log('>', msg);
}
function posibleAlert (msg) {
console.log('<', msg);
}
function posibleError (resourceError) {
console.error(resourceError.errorString)
}
/**
* [getPostOrPages description]
* @param {String} name Selector
* @return {Object} Post/Pages
*/
function getPostOrPages (name) {
function clean (node) {
var newNode = [];
for (var i = 0; i < node.length; i++) {
if(node[i] && !node[i].body && node[i].tagName ){
if(node[i].tagName == 'DIV'){
var z = clean($(node[i].innerHTML.replace(/\t|\n|\s{2,}/gim, '').replace(/<br>/gim, '</p><p>')));
for (var w = 0; w < z.length; w++) {
newNode.push(z[w]);
}
} else if( /P|H[1-6]|UL|I|TABLE|LI|IMG/i.test(node[i].tagName) ){
newNode.push(node[i]);
}
}
}
return newNode;
}
var child = clean($(name).children());
var post = { text : [] };
for (var i = 0; i < child.length; i++) {
if( child[i] && child[i].tagName && !child[i].body && child[i].innerHTML.length ){
if( child[i].tagName != 'BR' && child[i].tagName != 'H1' ){
post.text.push(child[i].outerHTML);
} else if ( child[i].tagName == 'H1' ) {
post.title = child[i].innerText;
} else if ( child[i].tagName == 'I' && !post.autor ) {
post.autor = child[i].innerText;
}
}
}
return post;
}
/**
* [getHome description]
* @param {String} name Selector
* @return {Array} Element
*/
function getHome(name) {
var data = [],
post = {},
child = $(name).children();
for (var i = 0; i < child.length; i++) {
if(child[i].tagName === 'H2'){
if(post.title){
data.push(post);
}
post = { title : child[i].innerText, text : [] };
} else {
if(post.text && child[i].tagName != 'BR' ){
if(child[i].tagName != 'I'){
post.text.push(child[i].outerHTML.replace(/\t|\n|\s{2,}/gim, '').replace(/<br>/gim, '</p><p>')));
} else {
post.autor = child[i].innerText;
}
}
}
}
if(post.title){
data.push(post);
}
return data;
}
/**
* [getPostMenu description]
* @param {String} name Selector
* @return {Array} Array URL
*/
function getPostMenu (name) {
var urs = [],
items = $(name);
for (var i = 0; i < items.length; i++) {
urs.push(items[i].href);
}
return urs;
}
/**
* [getMenu description]
* @param {Number} i Number in array
* @param {String} top Selector
* @return {String} Go to
*/
function getMenu (i, top) {
return $(top)[i].href;
}
/**
* [startPage description]
* @param {String} url Go to
* @param {Boolean} isPost [description]
*/
function startPage(url, isPost) {
var webPage = require('webpage');
var page = webPage.create(),
newUrl;
page.onConsoleMessage = posibleJS;
page.onAlert = posibleAlert;
page.onResourceError = posibleError;
stepIndex++;
if(url){
page.open(url, function(status){
if (status === 'success') {
console.log('Start :', url);
page.injectJs('jquery.min.js');
if(!phantom.state){
if(isPost || url.indexOf('index.html') < 0){
PAGES[ url ] = page.evaluate(getPostOrPages, CONTENT_POST);
}
if(isPost){
PAGES[ url ].post = true;
newUrl = POST[stepIndex];
} else {
if(url.indexOf('index.html') >= 0){
POST = page.evaluate(getPostMenu, POST_MENU);
PAGES[ url ] = page.evaluate(getHome, HOME_CONTENT);
PAGES[ url ].list = true;
} else {
PAGES[ url ].page = true;
}
newUrl = page.evaluate(getMenu, stepIndex, TOP_MENU);
}
if(!newUrl && !isPost){
stepIndex = 0;
newUrl = POST[stepIndex];
isPost = true;
}
startPage(newUrl, isPost);
} else {
phantom.state();
}
} else {
console.log('mierda');
}
});
} else {
console.log(JSON.stringify(PAGES, null, '\t'));
phantom.exit();
}
}
console.log('Start :', HOME);
startPage(HOME + 'index.html');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment