Skip to content

Instantly share code, notes, and snippets.

@jamlfy
Last active January 3, 2017 21:20
Show Gist options
  • Save jamlfy/2c1b14f64a75f59a0a0b62ede0180adb to your computer and use it in GitHub Desktop.
Save jamlfy/2c1b14f64a75f59a0a0b62ede0180adb to your computer and use it in GitHub Desktop.
var webPage = require('webpage');
var HOME = 'http://my.domain.com/';
var stepIndex = 0;
var HOME_CONTENT = 'Where ara the info in home';
var POST_MENU = 'Go to menu';
var TOP_MENU = 'Top Menu';
var CONTENT_POST = 'Where are the info in pages';
var PAGES = {};
var POST = [];
function posibleError (msg, line, source) {
console.log('>', msg);
}
function posibleAlert (msg) {
console.log('<', msg);
}
function getPostOrPages (content) {
function clean (node) {
var newNode = [];
for (var i = 0; i < node.length; i++) {
if(node[i] && !node[i].body && node[i].tagName ){
if(node[i].tagName == 'DIV'){
var z = clean($(node[i].innerHTML.replace(/\t|\n|\s{2,}/gim, '').replace(/(\<br\>)/gim, '</p><p>')));
for (var w = 0; w < z.length; w++) {
newNode.push(z[w]);
}
} else if(/P|H[1-6]|UL|I|TABLE|LI|IMG/i.test(node[i].tagName)){
newNode.push(node[i]);
}
}
}
return newNode;
}
var post = { text : [] };
var child = clean($(content).children());
for (var i = 0; i < child.length; i++) {
if(child[i] && child[i].tagName != 'BR' && child[i].tagName != 'H1' && !child[i].body && child[i].innerHTML.length ){
post.text.push(child[i].outerHTML);
} else if ( child[i] && child[i].tagName == 'H1' ) {
post.title = child[i].innerText;
} else if ( child[i] && child[i].tagName == 'I' && !post.autor ) {
post.autor = child[i].innerText;
}
}
return post;
}
function getHome(content) {
var data = [];
var post = {};
var cild = $(content).children();
for (var i = 0; i < cild.length; i++) {
if(cild[i].tagName === 'H2'){
if(post.title){
data.push(post);
}
post = { title : cild[i].innerText, text : [] };
} else {
if(post.text && cild[i].tagName != 'BR' ){
if(cild[i].tagName != 'I'){
post.text.push(cild[i].outerHTML.replace(/\t|\n|(\s){2,}|(\<br\>){2,}/gim, ''));
} else {
post.autor = cild[i].innerText;
}
}
}
}
if(post.title){
data.push(post);
}
function posibleError (msg, line, source) {
console.log('>', msg);
}
function posibleAlert (msg) {
console.log('<', msg);
}
function getPostOrPages (name) {
function clean (node) {
var newNode = [];
for (var i = 0; i < node.length; i++) {
if(node[i] && !node[i].body && node[i].tagName ){
if(node[i].tagName == 'DIV'){
var z = clean($(node[i].innerHTML.replace(/\t|\n|\s{2,}/gim, '').replace(/(\<br\>)/gim, '</p><p>')));
for (var w = 0; w < z.length; w++) {
newNode.push(z[w]);
}
} else if(/P|H[1-6]|UL|I|TABLE|LI|IMG/i.test(node[i].tagName)){
newNode.push(node[i]);
}
}
}
return newNode;
}
var post = { text : [] };
var child = clean($(name).children());
for (var i = 0; i < child.length; i++) {
if(child[i] && child[i].tagName != 'BR' && child[i].tagName != 'H1' && !child[i].body && child[i].innerHTML.length ){
post.text.push(child[i].outerHTML);
} else if ( child[i] && child[i].tagName == 'H1' ) {
post.title = child[i].innerText;
} else if ( child[i] && child[i].tagName == 'I' && !post.autor ) {
post.autor = child[i].innerText;
}
}
return post;
}
function getHome(name) {
var data = [];
var post = {};
var cild = $(name).children();
for (var i = 0; i < cild.length; i++) {
if(cild[i].tagName === 'H2'){
if(post.title){
data.push(post);
}
post = { title : cild[i].innerText, text : [] };
} else {
if(post.text && cild[i].tagName != 'BR' ){
if(cild[i].tagName != 'I'){
post.text.push(cild[i].outerHTML.replace(/\t|\n|(\s){2,}|(\<br\>){2,}/gim, ''));
} else {
post.autor = cild[i].innerText;
}
}
}
}
if(post.title){
data.push(post);
}
return data;
}
function getPost (name) {
var urs = [];
var items = $(name);
for (var i = 0; i < items.length; i++) {
urs.push(items[i].href);
}
return urs;
}
function getMenu (i, top) {
return $(top)[i].href;
}
function startPage(url, isPost) {
var page = webPage.create();
page.onConsoleMessage = posibleError;
page.onAlert = posibleAlert;
stepIndex++;
if(url){
page.open(url, function(status){
if (status === 'success') {
console.log('Start :', url);
page.injectJs('jquery.min.js');
var newUrl;
if(!phantom.state){
if(isPost || url.indexOf('index.html') < 0){
PAGES[ url ] = page.evaluate(getPostOrPages, CONTENT_POST);
}
if(isPost){
PAGES[ url ].post = true;
newUrl = POST[stepIndex];
} else {
if(url.indexOf('index.html') >= 0){
POST = page.evaluate(getPost, POST_MENU);
PAGES[ url ] = page.evaluate(getHome, HOME_CONTENT);
PAGES[ url ].list = true;
} else {
PAGES[ url ].page = true;
}
newUrl = page.evaluate(getMenu, stepIndex, TOP_MENU));
}
if(!newUrl){
stepIndex = 0;
newUrl = POST[stepIndex];
isPost = true;
}
startPage(newUrl, isPost);
} else {
phantom.state();
}
}
});
} else {
console.log(JSON.stringify(PAGES, null, '\t'));
phantom.kill();
}
}
startPage(HOME + 'index.html');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment