Last active
January 3, 2017 21:20
Revisions
-
AlejoNext revised this gist
Jan 3, 2017 . 1 changed file with 41 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,41 @@ 2017-01-03T16:17:16 [DEBUG] CookieJar - Created but will not store cookies (use option '--cookies-file=<filename>' to enable persistent cookie storage) 2017-01-03T16:17:22 [DEBUG] Set "http" proxy to: "" : 1080 2017-01-03T16:17:22 [DEBUG] Phantom - execute: Configuration 2017-01-03T16:17:22 [DEBUG] 0 objectName : "" 2017-01-03T16:17:22 [DEBUG] 1 cookiesFile : "" 2017-01-03T16:17:22 [DEBUG] 2 diskCacheEnabled : "false" 2017-01-03T16:17:22 [DEBUG] 3 maxDiskCacheSize : "-1" 2017-01-03T16:17:22 [DEBUG] 4 diskCachePath : "" 2017-01-03T16:17:22 [DEBUG] 5 ignoreSslErrors : "false" 2017-01-03T16:17:22 [DEBUG] 6 localUrlAccessEnabled : "true" 2017-01-03T16:17:22 [DEBUG] 7 localToRemoteUrlAccessEnabled : "false" 2017-01-03T16:17:22 [DEBUG] 8 outputEncoding : "UTF-8" 2017-01-03T16:17:22 [DEBUG] 9 proxyType : "http" 2017-01-03T16:17:22 [DEBUG] 10 proxy : ":1080" 2017-01-03T16:17:22 [DEBUG] 11 proxyAuth : ":" 2017-01-03T16:17:22 [DEBUG] 12 scriptEncoding : "UTF-8" 2017-01-03T16:17:22 [DEBUG] 13 webSecurityEnabled : "true" 2017-01-03T16:17:22 [DEBUG] 14 offlineStoragePath : "" 2017-01-03T16:17:22 [DEBUG] 15 localStoragePath : "" 2017-01-03T16:17:22 [DEBUG] 16 localStorageDefaultQuota : "-1" 2017-01-03T16:17:22 [DEBUG] 17 offlineStorageDefaultQuota : "-1" 2017-01-03T16:17:22 [DEBUG] 18 printDebugMessages : "true" 2017-01-03T16:17:22 [DEBUG] 19 javascriptCanOpenWindows : "true" 2017-01-03T16:17:22 [DEBUG] 20 javascriptCanCloseWindows : "true" 2017-01-03T16:17:22 [DEBUG] 21 sslProtocol : "default" 2017-01-03T16:17:22 [DEBUG] 22 sslCiphers : "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA:ECDHE-ECDSA-AES128-SHA:ECDHE-RSA-AES128-SHA:ECDHE-RSA-AES256-SHA:ECDHE-ECDSA-RC4-SHA:ECDHE-RSA-RC4-SHA:DHE-RSA-AES128-SHA:DHE-DSS-AES128-SHA:DHE-RSA-AES256-SHA:AES128-GCM-SHA256:AES128-SHA:AES256-SHA:DES-CBC3-SHA:RC4-SHA:RC4-MD5" 2017-01-03T16:17:22 [DEBUG] 23 sslCertificatesPath : "" 2017-01-03T16:17:22 [DEBUG] 24 sslClientCertificateFile : "" 2017-01-03T16:17:22 [DEBUG] 25 sslClientKeyFile : "" 2017-01-03T16:17:22 [DEBUG] 26 sslClientKeyPassphrase : "" 2017-01-03T16:17:22 [DEBUG] 27 webdriver : ":" 2017-01-03T16:17:22 [DEBUG] 28 webdriverLogFile : "" 2017-01-03T16:17:22 [DEBUG] 29 webdriverLogLevel : "INFO" 2017-01-03T16:17:22 [DEBUG] 30 webdriverSeleniumGridHub : "" 2017-01-03T16:17:22 [DEBUG] Phantom - execute: Script & Arguments 2017-01-03T16:17:22 [DEBUG] script: "import/getText.js" 2017-01-03T16:17:22 [DEBUG] Phantom - execute: Starting normal mode 2017-01-03T16:17:22 [DEBUG] WebPage - setupFrame "" 2017-01-03T16:17:22 [DEBUG] FileSystem - _open: ":/modules/fs.js" QMap(("mode", QVariant(QString, "r"))) 2017-01-03T16:17:23 [DEBUG] FileSystem - _open: ":/modules/system.js" QMap(("mode", QVariant(QString, "r"))) 2017-01-03T16:17:23 [DEBUG] FileSystem - _open: ":/modules/webpage.js" QMap(("mode", QVariant(QString, "r"))) -
AlejoNext revised this gist
Jan 3, 2017 . 1 changed file with 51 additions and 43 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,23 +1,29 @@ /*** HOME ***/ var HOME = 'http://www.plotandesign.net/'; /*** WHERE ***/ var HOME_CONTENT = '#content .row2 > .main > .wrapper > .col1.cols', POST_MENU = 'ul.left a', TOP_MENU = '#header .menu li > a', CONTENT_POST = '#content .row2 .main > .wrapper'; /*** DATA ***/ var PAGES = {}, POST = [], stepIndex = 0; function posibleJS (msg, line, source) { console.log('>', msg); } function posibleAlert (msg) { console.log('<', msg); } function posibleError (resourceError) { console.error(resourceError.errorString) } /** * [getPostOrPages description] * @param {String} name Selector @@ -29,15 +35,11 @@ function getPostOrPages (name) { for (var i = 0; i < node.length; i++) { if(node[i] && !node[i].body && node[i].tagName ){ if(node[i].tagName == 'DIV'){ var z = clean($(node[i].innerHTML.replace(/\t|\n|\s{2,}/gim, '').replace(/<br>/gim, '</p><p>'))); for (var w = 0; w < z.length; w++) { newNode.push(z[w]); } } else if( /P|H[1-6]|UL|I|TABLE|LI|IMG/i.test(node[i].tagName) ){ newNode.push(node[i]); } } @@ -46,16 +48,16 @@ function getPostOrPages (name) { return newNode; } var child = clean($(name).children()); var post = { text : [] }; for (var i = 0; i < child.length; i++) { if( child[i] && child[i].tagName && !child[i].body && child[i].innerHTML.length ){ if( child[i].tagName != 'BR' && child[i].tagName != 'H1' ){ post.text.push(child[i].outerHTML); } else if ( child[i].tagName == 'H1' ) { post.title = child[i].innerText; } else if ( child[i].tagName == 'I' && !post.autor ) { post.autor = child[i].innerText; } } @@ -70,27 +72,27 @@ function getPostOrPages (name) { * @return {Array} Element */ function getHome(name) { var data = [], post = {}, child = $(name).children(); for (var i = 0; i < child.length; i++) { if(child[i].tagName === 'H2'){ if(post.title){ data.push(post); } post = { title : child[i].innerText, text : [] }; } else { if(post.text && child[i].tagName != 'BR' ){ if(child[i].tagName != 'I'){ post.text.push(child[i].outerHTML.replace(/\t|\n|\s{2,}/gim, '').replace(/<br>/gim, '</p><p>'))); } else { post.autor = child[i].innerText; } } } } if(post.title){ data.push(post); } @@ -104,11 +106,13 @@ function getHome(name) { * @return {Array} Array URL */ function getPostMenu (name) { var urs = [], items = $(name); for (var i = 0; i < items.length; i++) { urs.push(items[i].href); } return urs; } @@ -128,11 +132,14 @@ function getMenu (i, top) { * @param {Boolean} isPost [description] */ function startPage(url, isPost) { var webPage = require('webpage'); var page = webPage.create(), newUrl; page.onConsoleMessage = posibleJS; page.onAlert = posibleAlert; page.onResourceError = posibleError; stepIndex++; if(url){ @@ -142,8 +149,6 @@ function startPage(url, isPost) { page.injectJs('jquery.min.js'); if(!phantom.state){ if(isPost || url.indexOf('index.html') < 0){ PAGES[ url ] = page.evaluate(getPostOrPages, CONTENT_POST); @@ -161,10 +166,10 @@ function startPage(url, isPost) { PAGES[ url ].page = true; } newUrl = page.evaluate(getMenu, stepIndex, TOP_MENU); } if(!newUrl && !isPost){ stepIndex = 0; newUrl = POST[stepIndex]; isPost = true; @@ -174,12 +179,15 @@ function startPage(url, isPost) { } else { phantom.state(); } } else { console.log('mierda'); } }); } else { console.log(JSON.stringify(PAGES, null, '\t')); phantom.exit(); } } console.log('Start :', HOME); startPage(HOME + 'index.html'); -
AlejoNext revised this gist
Jan 3, 2017 . 1 changed file with 45 additions and 12 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -10,7 +10,6 @@ var CONTENT_POST = 'Where are the info in pages'; var PAGES = {}; var POST = []; function posibleError (msg, line, source) { console.log('>', msg); } @@ -19,13 +18,22 @@ function posibleAlert (msg) { console.log('<', msg); } /** * [getPostOrPages description] * @param {String} name Selector * @return {Object} Post/Pages */ function getPostOrPages (name) { function clean (node) { var newNode = []; for (var i = 0; i < node.length; i++) { if(node[i] && !node[i].body && node[i].tagName ){ if(node[i].tagName == 'DIV'){ var z = clean($(node[i] .innerHTML .replace(/\t|\n|\s{2,}/gim, '') .replace(/(\<br\>)/gim, '</p><p>'))); for (var w = 0; w < z.length; w++) { newNode.push(z[w]); } @@ -42,18 +50,25 @@ function getPostOrPages (name) { var child = clean($(name).children()); for (var i = 0; i < child.length; i++) { if(child[i] && child[i].tagName && !child[i].body && child[i].innerHTML.length){ if( child[i].tagName != 'BR' && child[i].tagName != 'H1' ){ post.text.push(child[i].outerHTML); } else if ( child[i].tagName == 'H1' ) { post.title = child[i].innerText; } else if ( child[i].tagName == 'I' && !post.autor ) { post.autor = child[i].innerText; } } } return post; } /** * [getHome description] * @param {String} name Selector * @return {Array} Element */ function getHome(name) { var data = []; var post = {}; @@ -67,7 +82,9 @@ function getHome(name) { } else { if(post.text && cild[i].tagName != 'BR' ){ if(cild[i].tagName != 'I'){ post.text.push(cild[i] .outerHTML .replace(/\t|\n|(\s){2,}|(\<br\>){2,}/gim, '')); } else { post.autor = cild[i].innerText; } @@ -81,7 +98,12 @@ function getHome(name) { return data; } /** * [getPostMenu description] * @param {String} name Selector * @return {Array} Array URL */ function getPostMenu (name) { var urs = []; var items = $(name); for (var i = 0; i < items.length; i++) { @@ -90,10 +112,21 @@ function getPost (name) { return urs; } /** * [getMenu description] * @param {Number} i Number in array * @param {String} top Selector * @return {String} Go to */ function getMenu (i, top) { return $(top)[i].href; } /** * [startPage description] * @param {String} url Go to * @param {Boolean} isPost [description] */ function startPage(url, isPost) { var page = webPage.create(); @@ -121,7 +154,7 @@ function startPage(url, isPost) { newUrl = POST[stepIndex]; } else { if(url.indexOf('index.html') >= 0){ POST = page.evaluate(getPostMenu, POST_MENU); PAGES[ url ] = page.evaluate(getHome, HOME_CONTENT); PAGES[ url ].list = true; } else { @@ -145,8 +178,8 @@ function startPage(url, isPost) { }); } else { console.log(JSON.stringify(PAGES, null, '\t')); } } startPage(HOME + 'index.html'); -
AlejoNext revised this gist
Jan 3, 2017 . 1 changed file with 1 addition and 68 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -10,73 +10,6 @@ var CONTENT_POST = 'Where are the info in pages'; var PAGES = {}; var POST = []; function posibleError (msg, line, source) { console.log('>', msg); @@ -216,4 +149,4 @@ function startPage(url, isPost) { } } startPage(HOME + 'index.html'); -
AlejoNext revised this gist
Jan 3, 2017 . 1 changed file with 86 additions and 15 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -77,30 +77,103 @@ function getHome(content) { data.push(post); } function posibleError (msg, line, source) { console.log('>', msg); } function posibleAlert (msg) { console.log('<', msg); } function getPostOrPages (name) { function clean (node) { var newNode = []; for (var i = 0; i < node.length; i++) { if(node[i] && !node[i].body && node[i].tagName ){ if(node[i].tagName == 'DIV'){ var z = clean($(node[i].innerHTML.replace(/\t|\n|\s{2,}/gim, '').replace(/(\<br\>)/gim, '</p><p>'))); for (var w = 0; w < z.length; w++) { newNode.push(z[w]); } } else if(/P|H[1-6]|UL|I|TABLE|LI|IMG/i.test(node[i].tagName)){ newNode.push(node[i]); } } } return newNode; } var post = { text : [] }; var child = clean($(name).children()); for (var i = 0; i < child.length; i++) { if(child[i] && child[i].tagName != 'BR' && child[i].tagName != 'H1' && !child[i].body && child[i].innerHTML.length ){ post.text.push(child[i].outerHTML); } else if ( child[i] && child[i].tagName == 'H1' ) { post.title = child[i].innerText; } else if ( child[i] && child[i].tagName == 'I' && !post.autor ) { post.autor = child[i].innerText; } } return post; } function getHome(name) { var data = []; var post = {}; var cild = $(name).children(); for (var i = 0; i < cild.length; i++) { if(cild[i].tagName === 'H2'){ if(post.title){ data.push(post); } post = { title : cild[i].innerText, text : [] }; } else { if(post.text && cild[i].tagName != 'BR' ){ if(cild[i].tagName != 'I'){ post.text.push(cild[i].outerHTML.replace(/\t|\n|(\s){2,}|(\<br\>){2,}/gim, '')); } else { post.autor = cild[i].innerText; } } } } if(post.title){ data.push(post); } return data; } function getPost (name) { var urs = []; var items = $(name); for (var i = 0; i < items.length; i++) { urs.push(items[i].href); } return urs; } function getMenu (i, top) { return $(top)[i].href; } function startPage(url, isPost) { var page = webPage.create(); page.onConsoleMessage = posibleError; page.onAlert = posibleAlert; stepIndex++; if(url){ page.open(url, function(status){ if (status === 'success') { console.log('Start :', url); page.injectJs('jquery.min.js'); var newUrl; @@ -115,16 +188,20 @@ function startPage(url, isPost) { newUrl = POST[stepIndex]; } else { if(url.indexOf('index.html') >= 0){ POST = page.evaluate(getPost, POST_MENU); PAGES[ url ] = page.evaluate(getHome, HOME_CONTENT); PAGES[ url ].list = true; } else { PAGES[ url ].page = true; } newUrl = page.evaluate(getMenu, stepIndex, TOP_MENU)); } if(!newUrl){ stepIndex = 0; newUrl = POST[stepIndex]; isPost = true; } startPage(newUrl, isPost); @@ -134,14 +211,8 @@ function startPage(url, isPost) { } }); } else { console.log(JSON.stringify(PAGES, null, '\t')); phantom.kill(); } } -
AlejoNext renamed this gist
Jan 3, 2017 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
AlejoNext created this gist
Jan 3, 2017 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,148 @@ var webPage = require('webpage'); var HOME = 'http://my.domain.com/'; var stepIndex = 0; var HOME_CONTENT = 'Where ara the info in home'; var POST_MENU = 'Go to menu'; var TOP_MENU = 'Top Menu'; var CONTENT_POST = 'Where are the info in pages'; var PAGES = {}; var POST = []; function posibleError (msg, line, source) { console.log('>', msg); } function posibleAlert (msg) { console.log('<', msg); } function getPostOrPages (content) { function clean (node) { var newNode = []; for (var i = 0; i < node.length; i++) { if(node[i] && !node[i].body && node[i].tagName ){ if(node[i].tagName == 'DIV'){ var z = clean($(node[i].innerHTML.replace(/\t|\n|\s{2,}/gim, '').replace(/(\<br\>)/gim, '</p><p>'))); for (var w = 0; w < z.length; w++) { newNode.push(z[w]); } } else if(/P|H[1-6]|UL|I|TABLE|LI|IMG/i.test(node[i].tagName)){ newNode.push(node[i]); } } } return newNode; } var post = { text : [] }; var child = clean($(content).children()); for (var i = 0; i < child.length; i++) { if(child[i] && child[i].tagName != 'BR' && child[i].tagName != 'H1' && !child[i].body && child[i].innerHTML.length ){ post.text.push(child[i].outerHTML); } else if ( child[i] && child[i].tagName == 'H1' ) { post.title = child[i].innerText; } else if ( child[i] && child[i].tagName == 'I' && !post.autor ) { post.autor = child[i].innerText; } } return post; } function getHome(content) { var data = []; var post = {}; var cild = $(content).children(); for (var i = 0; i < cild.length; i++) { if(cild[i].tagName === 'H2'){ if(post.title){ data.push(post); } post = { title : cild[i].innerText, text : [] }; } else { if(post.text && cild[i].tagName != 'BR' ){ if(cild[i].tagName != 'I'){ post.text.push(cild[i].outerHTML.replace(/\t|\n|(\s){2,}|(\<br\>){2,}/gim, '')); } else { post.autor = cild[i].innerText; } } } } if(post.title){ data.push(post); } return data; } function getPost (post) { var urs = []; var items = $(post); for (var i = 0; i < items.length; i++) { urs.push(items[i].href); } return urs; } function startPage(url, isPost) { var page = webPage.create(); page.onConsoleMessage = posibleError; page.onAlert = posibleAlert; stepIndex++; if(url){ page.open(url, function(status){ console.log(status); if (status === 'success') { console.log('Start :', url); page.injectJs('jquery.min.js'); var newUrl; if(!phantom.state){ if(isPost || url.indexOf('index.html') < 0){ PAGES[ url ] = page.evaluate(getPostOrPages, CONTENT_POST); } if(isPost){ PAGES[ url ].post = true; newUrl = POST[stepIndex]; } else { if(url.indexOf('index.html') >= 0){ POST = page.evaluate(getPost, POST_MENU) PAGES[ url ] = page.evaluate(getHome, HOME_CONTENT); PAGES[ url ].list = true; } else { PAGES[ url ].page = true; } newUrl = page.evaluate(function(i, top) { return $(top)[i].href; }, stepIndex, TOP_MENU)); } startPage(newUrl, isPost); } else { phantom.state(); } } }); } else { if(isPost){ console.log(JSON.stringify(PAGES, null, '\t')); phantom.exit(); phantom.kill(); } else { stepIndex = 0; startPage(POST[stepIndex], true); } } } startPage(HOME + 'index.html');