Created
December 14, 2015 19:04
-
-
Save DenisCarriere/1a56ac970f91e90e9f46 to your computer and use it in GitHub Desktop.
MLS Scraper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import geocoder | |
import requests | |
import unicodecsv as csv | |
import time | |
container = {} | |
g = geocoder.google("New Brunswick, Canada") | |
url = "https://www.realtor.ca/api/Listing.svc/PropertySearch_Post" | |
PropertySearchType = { | |
1: "Residential", | |
2: "Recreational", | |
3: "Condo/Strata", | |
8: "Multi Family", | |
4: "Agriculture", | |
5: "Parking", | |
6: "Vacant Land" | |
} | |
LandSizeRange = ["0-10", "10-50", "50-100", "100-320", "320-640", "640-1000", "1000-0"] | |
interval = 250000 | |
intervals = 2 | |
max_results = 350 | |
for PropertySearch in PropertySearchType.keys(): | |
Prices = list((i * interval, i * interval + interval) for i in range(intervals)) | |
for PriceMin, PriceMax in Prices: | |
print('Looking for {} ({}$-{}$)...'.format( | |
PropertySearchType[PropertySearch], | |
PriceMin, | |
PriceMax | |
)) | |
payload = { | |
"CultureId": "1", | |
"ApplicationId": "1", | |
"RecordsPerPage": max_results, | |
"MaximumResults": max_results, | |
"PropertySearchTypeId": PropertySearch, | |
"PriceMin": PriceMin, | |
"PriceMax": PriceMax, | |
"LandSizeRange": "0-0", | |
"TransactionTypeId": "2", | |
"StoreyRange": "0-0", | |
"BedRange": "0-0", | |
"BathRange": "0-0", | |
"LongitudeMin": g.west, | |
"LongitudeMax": g.east, | |
"LatitudeMin": g.south, | |
"LatitudeMax": g.north, | |
"SortOrder": "A", | |
"SortBy": "1", | |
"viewState": "m", | |
"Longitude": g.lng, | |
"Latitude": g.lat, | |
"ZoomLevel": "8", | |
} | |
while True: | |
try: | |
r = requests.post(url, data=payload) | |
break | |
except: | |
print(PriceMin, PriceMax) | |
time.sleep(1) | |
print('Connectin Fail...') | |
pass | |
if r.ok: | |
results = r.json()['Results'] | |
print('Found {} results!'.format(len(results))) | |
if len(results) == max_results: | |
half = (PriceMax - PriceMin) / 2 | |
Prices.append([PriceMin, int(PriceMin + half)]) | |
Prices.append([int(PriceMin + half), PriceMax]) | |
print('Price split {}$-{}$'.format(PriceMin, PriceMax)) | |
for result in results: | |
data = { | |
'lng': result['Property']['Address']['Longitude'], | |
'lat': result['Property']['Address']['Latitude'], | |
'address': result['Property']['Address']['AddressText'], | |
'postal': result['PostalCode'], | |
'property_type': result['Property']['Type'], | |
'price': result['Property']['Price'], | |
'mls': result['MlsNumber'], | |
'bathrooms': result['Building'].get('BathroomTotal', 0), | |
'bedrooms': result['Building'].get('Bedrooms', 0), | |
'PriceMin': payload['PriceMin'], | |
'PriceMax': payload['PriceMax'], | |
'PropertySearch': PropertySearchType[PropertySearch], | |
'LandSize': result['Land'].get('SizeTotal'), | |
'url': 'https://www.realtor.ca' + result['RelativeDetailsURL'] | |
} | |
container[result['MlsNumber']] = data | |
with open('mls.txt', 'wb') as f: | |
writer = csv.DictWriter(f, fieldnames=next(iter(container.values())), dialect='excel') | |
writer.writeheader() | |
for row in container.values(): | |
writer.writerow(row) |
This script most likely doesn’t work anymore
This script most likely doesn’t work anymore
Hi Dennis, do you know why? Is it because "https://www.realtor.ca/api/Listing.svc/PropertySearch_Post"
doesn't work anymore?
You can check out https://github.com/Froren/realtorca for an up-to-date script.
I've created an up-to-date wrapper for this in Python. Check here https://github.com/harry-s-grewal/realtorca
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I am getting a
requests.exceptions.SSLError
. Is it working for anyone else?