Skip to content

Instantly share code, notes, and snippets.

@moniquelive
Created June 19, 2021 04:46
Show Gist options
  • Save moniquelive/f0f7788938d00a956d22ed0e0c06706e to your computer and use it in GitHub Desktop.
Save moniquelive/f0f7788938d00a956d22ed0e0c06706e to your computer and use it in GitHub Desktop.
Crawler in Golang
package main
import (
"fmt"
"os"
"strings"
"github.com/anaskhan96/soup"
)
func main() {
url := "https://www.amazon.com.br/gp/bestsellers/"
resp, err := soup.Get(url)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
doc := soup.HTMLParse(resp)
//------------------------------------------------------------- categorias ---
divs := doc.FindAll("div", "class", "zg_homeWidget")
for _, div := range divs {
h3 := div.Find("h3")
fmt.Println("\n", h3.Text())
fmt.Println("------------------------------------------------------------")
items := div.FindAll("div", "class", "zg_item")
for _, item := range items {
a := item.Find("a")
fmt.Println(parseProduct(a))
}
}
}
//------------------------------------------------------------------ produto ---
func parseProduct(a soup.Root) (string, string) {
url := "https://www.amazon.com.br" + a.Attrs()["href"]
resp, err := soup.Get(url)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
doc := soup.HTMLParse(resp)
titleEl := doc.Find("span", "id", "productTitle")
priceEl := doc.Find("span", "id", "price_inside_buybox")
if priceEl.Error != nil {
priceEl = doc.Find("span", "class", "priceBlockSavingsString")
}
if priceEl.Error != nil {
priceEl = doc.Find("span", "class", "header-price")
}
if priceEl.Error != nil {
priceEl = doc.Find("span", "class", "priceBlockBuyingPriceString")
}
title := strings.TrimSpace(titleEl.Text())
price := strings.TrimSpace(priceEl.Text())
return price, title
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment