AndrewPardoe · January 10, 2022 03:48
diff --git a/UserVoice.py b/UserVoice.py
 # UserVoice doesn't have a search capability that will filter on category. I only care about my category (C++) in a huge
 # Visual Studio database. This script scrapes all UserVoice suggestions in my category with links into an HTML document.
 # Improvements welcome from those who actually know Python--this is the first Python script I've ever needed to write.

 import re
 import requests
 import urllib.request
 from bs4 import BeautifulSoup

 # Whack any Unicode characters when printing to file. Not correct, but not crashing. 
 def safewrite(file, string):
    try:
        file.write(string)
    except UnicodeEncodeError:
        for char in string:
            try:
                file.write(char)
            except UnicodeEncodeError:
                file.write("?")

 # Specifics of my UserVoice page and category
 prefix = 'https://visualstudio.uservoice.com'
 firstPage = prefix + '/forums/121579-visual-studio-ide/category/30937-languages-c'
 nextPage = prefix + '/forums/121579-visual-studio-ide/category/30937-languages-c/filters/top?page={}'

 # Ideas are split across many pages. Find all page numbers from the Pagination control.
 soup = BeautifulSoup(requests.get(firstPage).content, "lxml")
 pagination = soup.find("div", attrs={'class':'uvPagination'})
 def pageeq(href):
    return href and re.compile("page=").search(href)
 redigits=re.compile('\d+')

 # Loop through and find the last page number. Could probably write a better regex above.
 i = 0
 for pageref in pagination.find_all(href=pageeq):
    x = redigits.findall(pageref.string)
    if x:
        i = x[0]
 upper = int(i) + 1

 # Create a local HTML page with a list of my category's UserVoice links
 outfile = open('UserVoice.html', 'w')
 outfile.write("<html>\n<head>\n<title>UserVoice items from {0}</title>\n</head>\n".format(firstPage))
 outfile.write("<body>\n<h2>UserVoice items from {0}</h2>\n<ul>\n".format(firstPage))

 # Run through every page, find the links, print the prefix, link, and link title
 for page in range(1, upper): 
    print ("Processing page {0} of {1}".format(page, upper - 1))
    soup = BeautifulSoup(requests.get(nextPage.format(page)).content, "lxml")
    for header in soup.find_all("h2", class_="uvIdeaTitle"):
        outfile.write("\t<li><a href=\"{0}{1}\">".format(prefix, header.a.get('href')))
        safewrite(outfile, header.a.string)
        outfile.write("</a/></li>\n")

 # Close out the HTML page
 outfile.write("</ul>\n</body>\n</html>\n")
 outfile.close()
	# UserVoice doesn't have a search capability that will filter on category. I only care about my category (C++) in a huge
	# Visual Studio database. This script scrapes all UserVoice suggestions in my category with links into an HTML document.
	# Improvements welcome from those who actually know Python--this is the first Python script I've ever needed to write.

	import re
	import requests
	import urllib.request
	from bs4 import BeautifulSoup

	# Whack any Unicode characters when printing to file. Not correct, but not crashing.
	def safewrite(file, string):
	try:
	file.write(string)
	except UnicodeEncodeError:
	for char in string:
	try:
	file.write(char)
	except UnicodeEncodeError:
	file.write("?")

	# Specifics of my UserVoice page and category
	prefix = 'https://visualstudio.uservoice.com'
	firstPage = prefix + '/forums/121579-visual-studio-ide/category/30937-languages-c'
	nextPage = prefix + '/forums/121579-visual-studio-ide/category/30937-languages-c/filters/top?page={}'

	# Ideas are split across many pages. Find all page numbers from the Pagination control.
	soup = BeautifulSoup(requests.get(firstPage).content, "lxml")
	pagination = soup.find("div", attrs={'class':'uvPagination'})
	def pageeq(href):
	return href and re.compile("page=").search(href)
	redigits=re.compile('\d+')

	# Loop through and find the last page number. Could probably write a better regex above.
	i = 0
	for pageref in pagination.find_all(href=pageeq):
	x = redigits.findall(pageref.string)
	if x:
	i = x[0]
	upper = int(i) + 1

	# Create a local HTML page with a list of my category's UserVoice links
	outfile = open('UserVoice.html', 'w')
	outfile.write("<html>\n<head>\n<title>UserVoice items from {0}</title>\n</head>\n".format(firstPage))
	outfile.write("<body>\n<h2>UserVoice items from {0}</h2>\n<ul>\n".format(firstPage))

	# Run through every page, find the links, print the prefix, link, and link title
	for page in range(1, upper):
	print ("Processing page {0} of {1}".format(page, upper - 1))
	soup = BeautifulSoup(requests.get(nextPage.format(page)).content, "lxml")
	for header in soup.find_all("h2", class_="uvIdeaTitle"):
	outfile.write("\t<li><a href=\"{0}{1}\">".format(prefix, header.a.get('href')))
	safewrite(outfile, header.a.string)
	outfile.write("</a/></li>\n")

	# Close out the HTML page
	outfile.write("</ul>\n</body>\n</html>\n")
	outfile.close()