blakev · July 19, 2024 06:39 · Nov 10, 2015 · Nov 10, 2015
diff --git a/ksl.py b/ksl.py
@@ -0,0 +1,181 @@
+import re
+import argparse
+import concurrent.futures
+import string
+
+from collections import namedtuple
+from urllib.request import urlopen
+from urllib.parse import urlencode
+
+from bs4 import BeautifulSoup
+
+Listing = namedtuple('Listing', 'title city state age price')
+
+class KSL(object):
+	URL = 'http://ksl.com/?nid=231'
+
+	URL_QS = {
+		'sold': 0,				# do not list sold items
+		'nocache': 1,		    # don't cache results, FRESH!
+		'viewNumResults': 20,	# maximum results per "page"
+		'sort': 1				# newest first
+	}
+
+	def __init__(self):
+		self.thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=4)
+
+	def __fix_query(self, q):
+		return [item.strip() for item in q.split(',')]
+
+	def __do_request(self, args):
+		if len(args) == 2:
+			query, url = args
+			timeout = 5 # seconds
+		else:
+			query, url, timeout = args
+
+		return (query, urlopen(url, timeout=timeout).read(), )
+
+	def search(self, query, **etc):
+		with self.thread_pool as ex:
+			# perform every search using the thread pool executor
+			yield from ex.map(self.__do_request, self.build_qs(query, **etc))
+
+	def find_elements(self, html):
+		soup = BeautifulSoup(html, 'html.parser')
+
+		for ad_box in soup.find_all('div', class_='adBox'):
+			links = ad_box.find_all('a', class_='listlink')
+
+			# get the listing title
+			if links:
+				#    and clean it up...
+				title = links[0].text.strip(string.punctuation)
+				title = [t.capitalize() for t in title.split() if len(t) > 3]
+				title = ' '.join(title)
+			else:
+				continue
+
+			# get the price
+			price_box = ad_box.find('div', class_='priceBox')
+
+			# ignore prices that "don't exist"
+			if not price_box or price_box.text.count('-') >= 6:
+				continue
+			else:
+				price = price_box.find('span').text.strip('$')
+				price = price.replace(',', '').strip()
+				price = int(float(price) / 100)
+
+			# get the location
+			ad_time = ad_box.find('div', class_='adTime')
+
+			location = ad_time.find('span').text.encode('ascii', 'ignore')
+			location = location.decode('utf-8')
+			location = location.split(',')
+
+			#    get the city and state, clean up the city from shit formatting
+
+			city, state = location[0].strip(','), location[-1][:2].upper()
+			city = ' '.join([p.capitalize() for p in city.split()])
+
+			#    get the age of the posting, separating the cruft from time
+			lifespan = ad_time.text
+			lifespan = lifespan.encode('ascii', 'ignore').split(b'|')[-1].strip()
+			lifespan = lifespan.decode('ascii')
+
+			yield Listing(title, city, state, lifespan, price)
+
+
+
+	def build_qs(self, query, **etc):
+		# extract the search terms in a comma-list
+		searches = self.__fix_query(query)
+
+		for query in searches:
+			# ensure we always have a minimum price, of at least $0
+			minp = etc.get('min_price', None) or 0
+			minp = max(0, int(minp))
+
+			maxp = etc.get('max_price', None)
+			if maxp is not None:
+				maxp = max(0, int(maxp))
+				# if we have a minimum and maximum price
+				# then we want to make sure the lower value is set to `minp`
+				minp, maxp = sorted([minp, maxp])
+
+			qs = {
+				'min_price': minp,
+				'max_price': maxp,
+				'zip': etc.get('zipcode', None),
+				'distance': etc.get('distance', None),
+				'search': query
+			}
+
+			# apply defaults
+			qs.update(self.URL_QS)
+
+			# fill in any additional parameters
+			# that were passed, but not explicitly handled
+			for k, value in etc.items():
+				k = k.lower()
+				qs.setdefault(k, value)
+
+			# make all `None` values blank for our querystring
+			for k, value in qs.items():
+				if value is None:
+					qs[k] = ''
+
+			# encode that shitttt
+			qs = urlencode(qs)
+
+			# boom
+			yield (query, '{}&{}'.format(self.URL, qs), )
+
+	def listing(id):
+		pass
+
+
+def main(args):
+	if args.get('query') is None:
+		return
+
+	# create the thin object
+	ksl = KSL()
+
+	# find our resultssss 
+	for query, data in ksl.search(args.pop('query'), **args):	
+		for index, result in enumerate(ksl.find_elements(data)):
+			if index == 0:
+				print('==== {}'.format(query))
+
+			f = '{0: >2}. {2: >7} - {1: <35} : {3: <23} {4} - {5: <8}'.format(
+				index+1, 
+				result.title[:33] + ('..' if len(result.title) >= 35 else ''),
+				'${}'.format(result.price),
+				result.city,
+				result.state,
+				result.age
+			)
+
+
+			print(f)
+		print()
+
+
+if __name__ == '__main__':
+	p = argparse.ArgumentParser(
+		description='ksl - command line utility to query KSL classifieds'
+	)
+
+	p.add_argument('query', action='store', default=None)
+	p.add_argument('-m', '--min-price', action='store', default=0,		dest='min_price')
+	p.add_argument('-M', '--max-price', action='store', default=None, 	dest='max_price')
+	p.add_argument('-z', '--zip-code',  action='store', default=None,   dest='zipcode')
+	p.add_argument('-d', '--distance',  action='store', default=None,   dest='distance')
+
+	args = p.parse_args()
+
+	# do eeeeet
+	main(vars(args))
+