pablospizzamiglio · July 18, 2020 12:55
diff --git a/web_scraper.py b/web_scraper.py
 from contextlib import closing

 # pip install beautifulsoup4 requests
 from bs4 import BeautifulSoup
 from requests import get
 from requests.exceptions import RequestException


 def simple_get(url):
    """
    Attempts to get the content at `url` by making an HTTP GET request.
    If the content-type of response is some kind of HTML/XML, return the
    text content, otherwise return None.
    """
    try:
        with closing(get(url, stream=True)) as response:
            if is_good_response(response):
                return response.content
            else:
                return None

    except RequestException as e:
        log_error("Error during requests to {0} : {1}".format(url, str(e)))
        return None


 def is_good_response(response):
    """
    Returns True if the response seems to be HTML, False otherwise.
    """
    content_type = response.headers["Content-Type"].lower()
    return (
        response.status_code == 200
        and content_type is not None
        and "html" in content_type
    )


 def log_error(e):
    """
    It is always a good idea to log errors. 
    This function just prints them, but you can
    make it do anything.
    """
    print(e)


 def get_product_metadata(url):
    """
    Downloads the page where the Product detail is found and returns a 
    dictionary containing the relevant metadata.
    """
    response = simple_get(url)

    if response is not None:
        html = BeautifulSoup(response, "html.parser")

        META_NAMES = [
            "description",
            "productcode",
            "productid",
            "productprice",
            "productsaleprice"
            "productstatus",
        ]

        return {
            meta.attrs["name"]: meta.attrs["content"]
            for meta in html.select("meta")
            if "name" in meta.attrs and meta.attrs["name"] in META_NAMES
        }

    # Raise an exception if we failed to get any data from the url
    raise Exception("Error retrieving contents at {}".format(url))


 if __name__ == "__main__":
    get_product_metadata("https://www.lenovo.com/us/en/laptops/thinkpad/thinkpad-x/ThinkPad-X1-Carbon-6th-Gen/p/22TP2TXX16G")
	from contextlib import closing

	# pip install beautifulsoup4 requests
	from bs4 import BeautifulSoup
	from requests import get
	from requests.exceptions import RequestException


	def simple_get(url):
	"""
	Attempts to get the content at `url` by making an HTTP GET request.
	If the content-type of response is some kind of HTML/XML, return the
	text content, otherwise return None.
	"""
	try:
	with closing(get(url, stream=True)) as response:
	if is_good_response(response):
	return response.content
	else:
	return None

	except RequestException as e:
	log_error("Error during requests to {0} : {1}".format(url, str(e)))
	return None


	def is_good_response(response):
	"""
	Returns True if the response seems to be HTML, False otherwise.
	"""
	content_type = response.headers["Content-Type"].lower()
	return (
	response.status_code == 200
	and content_type is not None
	and "html" in content_type
	)


	def log_error(e):
	"""
	It is always a good idea to log errors.
	This function just prints them, but you can
	make it do anything.
	"""
	print(e)


	def get_product_metadata(url):
	"""
	Downloads the page where the Product detail is found and returns a
	dictionary containing the relevant metadata.
	"""
	response = simple_get(url)

	if response is not None:
	html = BeautifulSoup(response, "html.parser")

	META_NAMES = [
	"description",
	"productcode",
	"productid",
	"productprice",
	"productsaleprice"
	"productstatus",
	]

	return {
	meta.attrs["name"]: meta.attrs["content"]
	for meta in html.select("meta")
	if "name" in meta.attrs and meta.attrs["name"] in META_NAMES
	}

	# Raise an exception if we failed to get any data from the url
	raise Exception("Error retrieving contents at {}".format(url))


	if __name__ == "__main__":
	get_product_metadata("https://www.lenovo.com/us/en/laptops/thinkpad/thinkpad-x/ThinkPad-X1-Carbon-6th-Gen/p/22TP2TXX16G")