Last active
December 20, 2024 12:40
-
-
Save carmel/7a58bfe42e5b77a13159a281018498ea to your computer and use it in GitHub Desktop.
chromedp example: goto detail page via list page item & close current tab
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
func TestList2Detail(t *testing.T) { | |
// 创建一个主上下文 | |
opts := append(chromedp.DefaultExecAllocatorOptions[:], | |
chromedp.Flag("headless", false), // 禁用无头模式 | |
chromedp.Flag("disable-gpu", false), // 启用 GPU 加速(可选) | |
chromedp.Flag("start-maximized", true), // 启动时窗口最大化(可选) | |
) | |
// 创建带有自定义选项的 ExecAllocator | |
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...) | |
defer cancel() | |
// also set up a custom logger | |
ctx, cancel := chromedp.NewContext(allocCtx, chromedp.WithLogf(log.Printf)) | |
defer cancel() | |
// 目标 URL | |
targetURL := "https://quotes.toscrape.com" // 将这里替换为你的列表页面的 URL | |
// 定义用于存储 HTML 内容的变量 | |
var htmlContent string | |
// 定义用于存储列表项链接的变量 | |
var listLinks []string | |
// 执行 chromedp 操作 | |
err := chromedp.Run(ctx, | |
// 打开目标页面 | |
chromedp.Navigate(targetURL), | |
// 等待列表项加载完成,需要根据实际情况选择选择器 | |
chromedp.WaitVisible(`div.container div.row div.quote`, chromedp.ByQuery), | |
// 获取所有列表项的超链接 | |
chromedp.Evaluate(` | |
Array.from(document.querySelectorAll('div.container div.row div.quote > span:nth-child(2) > a')).map(a => a.href); | |
`, &listLinks), | |
) | |
if err != nil { | |
log.Fatalf("Failed to navigate and get list links: %v", err) | |
} | |
fmt.Printf("Found %d list links\n", len(listLinks)) | |
// 遍历列表项链接 | |
for _, link := range listLinks { | |
if link == "" { | |
continue | |
} | |
// 创建一个子上下文用于操作新标签页 | |
tabCtx, tabCancel := chromedp.NewContext(ctx) | |
// 执行新标签页的操作 | |
err = chromedp.Run(tabCtx, | |
chromedp.Navigate(link), | |
) | |
if err != nil { | |
log.Printf("Failed to navigate to list link: %v, error: %v", link, err) | |
tabCancel() // 取消子上下文,关闭新标签页 | |
continue | |
} | |
err = chromedp.Run(tabCtx, | |
// 获取新标签页的 HTML 内容 | |
chromedp.ActionFunc(func(ctx context.Context) error { | |
err = chromedp.InnerHTML("html", &htmlContent).Do(ctx) | |
return err | |
}), | |
) | |
tabCancel() | |
if err != nil { | |
log.Printf("Failed to get inner html, error:%v", err) | |
continue | |
} | |
doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent)) | |
if err != nil { | |
log.Fatalf("Failed to parse HTML: %v", err) | |
} | |
fmt.Println(doc.Find("div.author-details > p:nth-child(2)").Text()) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment