Created
August 7, 2018 07:48
-
-
Save BonsoirDiep/ae58fc62777ba0bc8509b4f8b2640513 to your computer and use it in GitHub Desktop.
Puppeteer test 1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
var database = {}; | |
var fs = require('fs'); | |
var cheerio = require('cheerio'); | |
/* utils */ | |
function save(path, data){ | |
var xx = require('path').join(__dirname, path); | |
fs.writeFileSync(xx, JSON.stringify(data)); | |
} | |
function sleep(ms) { | |
return new Promise((resolve) => setTimeout(resolve, ms)); | |
} | |
String.prototype.GetValue = function(para) { | |
let reg = new RegExp("(^|&)" + para + "=([^&]*)(&|$)"); | |
let r = this.substr(this.indexOf("\?") + 1).match(reg); | |
if (r != null) return unescape(r[2]); | |
return null; | |
} | |
String.prototype.GetValue2 = function(para) { | |
let reg = new RegExp("(^|&)" + para + "=([^&]*)(&|$)"); | |
let r = this.substr(this.indexOf("\?") + 1).match(reg); | |
if (r != null) return (r[2]); | |
return null; | |
} | |
var meUrl = 'https://www.1688.com/'; | |
var datas = []; | |
function addMore(key, newVal){ | |
if(typeof(newVal)== 'object'){ | |
var a = datas.filter((el)=> el.itemMore==key)[0]; | |
if(a) a.itemMore = newVal; | |
} | |
} | |
// | |
(async() => { | |
const browser = await puppeteer.launch({ | |
//slowMo: 250, // slow down by 250ms | |
headless: false, | |
// args: ['--auto-open-devtools-for-tabs'] | |
// args: ['--start-fullscreen'] | |
args: ["--no-sandbox", "--disable-web-security", `--user-data-dir=data`] | |
}); | |
const page = await browser.newPage(); | |
await page.setRequestInterception(true); | |
/*page.on('load', (event) => { | |
console.log('Opened new url'); | |
});*/ | |
page.on('close',(evt)=>{ | |
console.log('close: ', evt); | |
}); | |
//domcontentloaded | |
page.on('domcontentloaded',()=>{ | |
console.log('domcontentloaded: #'); | |
}); | |
/*page.on('console', msg => { | |
for (let i = 0; i < msg.args().length; ++i) | |
console.log(`${i}: ${msg.args()[i]}`); | |
cont = false; | |
});*/ | |
page.on('request', (request) => { | |
if (request.resourceType() === 'image'){ | |
request.abort(); | |
//request.continue(); | |
} | |
else{ | |
request.continue(); | |
} | |
}); | |
page.on('response', (res) => { | |
var a = res.url(); | |
if(a.includes('/ajax/member_bsr_indexs_json.do')){ | |
// console.log('reason: hover mouse in item product'); | |
res.text().then(function(data){ | |
if(data && a.GetValue2('callback')== 'diep12'){ | |
var newContent = data; | |
newContent = newContent.substring(newContent.indexOf('(')+1,newContent.indexOf('})')+1); | |
try{ | |
newContent = JSON.parse(newContent); | |
if(newContent.success) addMore(a.GetValue2('loginid'), newContent); | |
} | |
catch(ex) { console.log(ex.message) } | |
} | |
else if(a.GetValue2('callback')== 'bug'){ | |
datas.forEach(function(el){ | |
if(el.itemMore && typeof(el.itemMore)!= 'string') | |
console.log(el); | |
}) | |
} | |
}); | |
} else if(a.includes('rpc_async_render.jsonp')){ | |
console.log('reason: scroll mouse'); | |
res.text().then(function(data){ | |
console.log({ | |
url: a, | |
// newContent: data | |
}) | |
}); | |
} | |
}); | |
await page.goto(meUrl, { | |
//networkIdleTimeout: 5000, | |
//waitUntil: 'networkidle', | |
timeout: 0 | |
}); | |
//await page.screenshot({path: 'news.png', fullPage: true}); | |
let cont = true; | |
while (cont) { | |
const dimensions = await page.evaluate(() => { | |
// close popup | |
document.getElementsByClassName('identity-close')[0].click(); | |
window.addEventListener("beforeunload", function (e) { | |
return console.log('agagagag'); | |
}); | |
return { | |
width: document.documentElement.clientWidth, | |
height: document.documentElement.clientHeight, | |
deviceScaleFactor: window.devicePixelRatio, | |
bodyHeight: document.body.scrollHeight | |
}; | |
}); | |
/*await page.evaluate((_x, _y)=> { | |
window.scrollTo(parseInt(_x || 0, 10), parseInt(_y || 0, 10)); | |
}, 0, dimensions.bodyHeight);*/ | |
// await sleep(2500); | |
cont = false; | |
}; | |
// await page.evaluate(()=> {window.scrollTo(0, 0); return -1;}); | |
await sleep(1500); | |
await page.type('.searchfollow_keywords', '壁橱'); | |
await page.click('button#alisearch-submit'); | |
await sleep(3000); | |
var huyC = await page.evaluate(() => { | |
return { | |
width: document.documentElement.clientWidth, | |
height: document.documentElement.clientHeight, | |
deviceScaleFactor: window.devicePixelRatio, | |
bodyHeight: document.body.scrollHeight | |
}; | |
}); | |
for(var i=0; i<=huyC.bodyHeight; i+=50){ | |
huyC = await page.evaluate((_x, _y)=> { | |
if(document.getElementsByClassName('s-overlay-close-b')[0]) | |
document.getElementsByClassName('s-overlay-close-b')[0].click(); | |
window.scrollTo(parseInt(_x || 0, 10), parseInt(_y || 0, 10)); | |
return { | |
bodyHeight: document.body.scrollHeight | |
}; | |
}, 0, i); | |
await sleep(300); | |
} | |
huyC = await page.evaluate(() => { | |
return { | |
body: document.getElementsByClassName('sw-layout-box')[2].innerHTML | |
}; | |
}); | |
if(huyC.body){ | |
var $ = cheerio.load(huyC.body); | |
$('li').each(function(idx, el){ | |
var a = $(this).find('div.sw-dpl-offer-photo a img'); | |
var data = {}; | |
if(a.attr('alt')){ | |
data.title = a.attr('alt'); | |
data.img = a.attr('src'); | |
// | |
a = $(this).find('div.sm-offer-company a').eq(0); | |
data.company = { | |
name: a.text().trim(), | |
href: a.attr('href'), | |
offerid: a.attr('offerid'), | |
memberid: a.attr('memberid'), | |
gotodetail: a.attr('gotodetail') | |
}; | |
data.price = $(this).find('.sm-offer-price').eq(0).text().trim(); | |
data.location = $(this).find('.sm-offer-location').eq(0).text().trim(); | |
// | |
data.itemMore = $(this).find('span.sw-ui-flaticon-ww-off-s').eq(0).attr('data-nick'); | |
datas.push(data); | |
} | |
}) | |
for(var i in datas){ | |
var el = datas[i]; | |
if(el.itemMore){ | |
await page.evaluate((itemMore) => { | |
fetch('https://member.1688.com/member/ajax/member_bsr_indexs_json.do?_input_charset=utf-8&callback=diep12&loginid='+ itemMore | |
).then(r=> r.text()).then(data=> console.log(data)).catch(err=> console.log(err.message)) | |
}, el.itemMore); | |
await sleep(1200); | |
} | |
} | |
} else { | |
console.log('capcha or something wrong...') | |
} | |
// console.log(datas) | |
datas.forEach(function(el){ | |
if(el.itemMore && typeof(el.itemMore)!= 'string') | |
console.log(el); | |
}) | |
// await browser.close(); | |
// save('./data.json', database); | |
console.log('closed!!!!!!!!!'); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Các API có thể sử dụng, cần đọc thêm:
Các URL có thể cần cookie hay gì đó, nhưng phải request với header referer https://s.1688.com/ ...
load list sản phẩm:
https://s.1688.com/selloffer/rpc_async_render.jsonp?keywords=<từ_tìm_kiếm>&n=y&sug=1_0&uniqfield=pic_tag_id&templateConfigName=marketOfferresult&offset=2&pageSize=60&asyncCount=20&startIndex=20&async=true&enableAsync=true&leftP4PIds=569025065725&rpcflag=new&pageName=market&callback=<tên_hàm_xử_lý_JSON>
load các chi tiết thêm về sản phẩm:
https://member.1688.com/member/ajax/member_bsr_indexs_json.do?_input_charset=utf-8&callback=<tên_hàm_xử_lý_JSON>&loginid=<id_của_sản_phẩm>