Skip to content

Instantly share code, notes, and snippets.

@carmel
Last active December 20, 2024 12:40
Show Gist options
  • Save carmel/7a58bfe42e5b77a13159a281018498ea to your computer and use it in GitHub Desktop.
Save carmel/7a58bfe42e5b77a13159a281018498ea to your computer and use it in GitHub Desktop.
chromedp example: goto detail page via list page item & close current tab
func TestList2Detail(t *testing.T) {
// 创建一个主上下文
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.Flag("headless", false), // 禁用无头模式
chromedp.Flag("disable-gpu", false), // 启用 GPU 加速(可选)
chromedp.Flag("start-maximized", true), // 启动时窗口最大化(可选)
)
// 创建带有自定义选项的 ExecAllocator
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
// also set up a custom logger
ctx, cancel := chromedp.NewContext(allocCtx, chromedp.WithLogf(log.Printf))
defer cancel()
// 目标 URL
targetURL := "https://quotes.toscrape.com" // 将这里替换为你的列表页面的 URL
// 定义用于存储 HTML 内容的变量
var htmlContent string
// 定义用于存储列表项链接的变量
var listLinks []string
// 执行 chromedp 操作
err := chromedp.Run(ctx,
// 打开目标页面
chromedp.Navigate(targetURL),
// 等待列表项加载完成,需要根据实际情况选择选择器
chromedp.WaitVisible(`div.container div.row div.quote`, chromedp.ByQuery),
// 获取所有列表项的超链接
chromedp.Evaluate(`
Array.from(document.querySelectorAll('div.container div.row div.quote > span:nth-child(2) > a')).map(a => a.href);
`, &listLinks),
)
if err != nil {
log.Fatalf("Failed to navigate and get list links: %v", err)
}
fmt.Printf("Found %d list links\n", len(listLinks))
// 遍历列表项链接
for _, link := range listLinks {
if link == "" {
continue
}
// 创建一个子上下文用于操作新标签页
tabCtx, tabCancel := chromedp.NewContext(ctx)
// 执行新标签页的操作
err = chromedp.Run(tabCtx,
chromedp.Navigate(link),
)
if err != nil {
log.Printf("Failed to navigate to list link: %v, error: %v", link, err)
tabCancel() // 取消子上下文,关闭新标签页
continue
}
err = chromedp.Run(tabCtx,
// 获取新标签页的 HTML 内容
chromedp.ActionFunc(func(ctx context.Context) error {
err = chromedp.InnerHTML("html", &htmlContent).Do(ctx)
return err
}),
)
tabCancel()
if err != nil {
log.Printf("Failed to get inner html, error:%v", err)
continue
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent))
if err != nil {
log.Fatalf("Failed to parse HTML: %v", err)
}
fmt.Println(doc.Find("div.author-details > p:nth-child(2)").Text())
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment