index.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524
  1. #!/usr/bin/python3
  2. import mysql.connector
  3. import requests
  4. from bs4 import BeautifulSoup
  5. import urllib.parse
  6. import datetime
  7. import custom_email
  8. from tabulate import tabulate
  9. from configparser import ConfigParser
  10. from os import path
  11. import logging
  12. from email.mime.text import MIMEText
  13. ### TO DO ###
  14. #
  15. # Check if db entries no longer appear online (mark expired)
  16. # When checking online from various sites, check if address already exists in db
  17. # - if so, warn user and do not add
  18. # Check results against database for changes
  19. # - update and add/change date_modified
  20. # Add database column to hold parcel number. Make links to GIS servers
  21. #
  22. #############
  23. class Property:
  24. """Description of a proerty"""
  25. def __init__(self, site_name, type, MLS, address, city, st, zip, county, price, acres, title='', sqft=0, bedrooms=0,
  26. baths=0, description='', link=''):
  27. self.site_name = site_name
  28. self.type = type
  29. self.MLS = MLS
  30. self.title = title
  31. self.sqft = sqft
  32. self.bedrooms = bedrooms
  33. self.baths = baths
  34. self.address = address
  35. self.city = city
  36. self.st = st
  37. self.zip = zip
  38. self.county = county
  39. self.price = price
  40. self.acres = acres
  41. self.description = description
  42. self.link = link
  43. class Parameters:
  44. '''Parameters taken from config file'''
  45. def __init__(self, file='landsearch.conf'):
  46. # print(path.dirname(__file__))
  47. self.file = path.join(path.dirname(__file__), file)
  48. if not path.exists(self.file):
  49. raise FileNotFoundError("The config file cannot be opened", self.file)
  50. try:
  51. self.config = ConfigParser()
  52. self.config.read(self.file)
  53. self.search_params = self.config['Search']
  54. self.log_params = self.config['Logging']
  55. self.email_params = self.config['Email']
  56. except Exception as err:
  57. print(err, "Using default search Parameters")
  58. class Mylogger:
  59. ''' Logging tool for this session'''
  60. def __init__(self):
  61. log_params = Parameters().log_params
  62. filename = log_params.get('log_file')
  63. level = int(log_params.get('logging_level', str('30')))
  64. format = '%(asctime)s %(levelname)-8s %(message)s'
  65. datefmt = '%Y-%m-%d %H:%M:%S'
  66. class Search:
  67. '''Universal Search Criteria'''
  68. def checktype(self, attribute):
  69. '''Fixes string None in config file and converts to '' '''
  70. if not attribute == 'None':
  71. return attribute
  72. else:
  73. return ''
  74. def __init__(self, file='landsearch.conf'):
  75. params = Parameters()
  76. search_params = params.search_params
  77. log_params = params.log_params
  78. logging.basicConfig(filename=log_params.get('log_file'),
  79. level=int(log_params.get('logging_level', str('30'))),
  80. format='%(asctime)s %(levelname)-8s %(message)s',
  81. datefmt='%Y-%m-%d %H:%M:%S') ## Default log level WARNING (30)
  82. logging.getLogger("urllib3").setLevel(logging.WARNING) ## Supress Requests method logging
  83. logging.debug("Log level set to %s", logging.root.level)
  84. Mylogger()
  85. county = search_params.get('county', ['Gwinnett', 'Hall', 'Jackson', 'Walton', 'Barrow'])
  86. if isinstance(county, str):
  87. county = county.split(", ")
  88. type = search_params.get('type', ['farm', 'house', 'land'])
  89. if isinstance(type, str):
  90. type = type.split(", ")
  91. self.types = ['land', 'farm', 'home', 'house']
  92. self.county = county
  93. self.lower_price = self.checktype(search_params.get('lower_price', '0'))
  94. self.upper_price = self.checktype(search_params.get('upper_price', '525000'))
  95. self.lower_acres = self.checktype(search_params.get('lower_acres', '5'))
  96. self.upper_acres = self.checktype(search_params.get('upper_acres', '15'))
  97. self.type = type ##accept list!
  98. self.lower_sqft = self.checktype(search_params.get('lower_sqft', ''))
  99. self.upper_sqft = self.checktype(search_params.get('upper_sqft', ''))
  100. self.lower_bedrooms = self.checktype(search_params.get('lower_bedrooms', ''))
  101. self.upper_bedrooms = self.checktype(search_params.get('upper_bedrooms', ''))
  102. for property_type in self.type:
  103. assert property_type in self.types, (
  104. "Unknown type '" + property_type + "'. Property Type must be of type: " + str(self.types))
  105. ## FOR TESTING, PRINT ALL ATTRIBUTES OF SEARCH ##
  106. logging.debug(vars(self))
  107. class ImproperSearchError(Exception):
  108. def __init__(self, search, message="Improper Search. Must use instance of Search class"):
  109. self.search = search
  110. self.message = message
  111. super().__init__(self.message)
  112. class MLSDATA:
  113. """Fetches and stores MLS Data
  114. Currently only supports GeorgiaMLS.com (GMLS)"""
  115. counties = ['Gwinnett', 'Barrow', 'Hall', 'Jackson', 'Walton']
  116. GoogleAPIKey = 'AIzaSyAXAnpBtjv760W8YIPqKZ0dFXpwAaZN7Es'
  117. # live_google = False
  118. def __init__(self, mlstype):
  119. self.parameters = Parameters()
  120. self.help = "This is a class that will retrieve MLS data from various sources, store the info in a database, and run queries on the data."
  121. self.mlstype = mlstype.lower() ## Determines what kind of data is to be retreieve (gmls, Zillow, etc)
  122. self.cursor = ''
  123. self.cnx = ''
  124. self.new_listings = []
  125. self.email = self.parameters.search_params.getboolean('email')
  126. self.live_google = self.parameters.search_params.getboolean('live_google')
  127. self.recipients = self.parameters.email_params['recipients']
  128. print('Email ' + str(self.email))
  129. def stringbuilder(self, search: Search, county):
  130. """ Takes Search class and build appropriate URL query based on mlstype. Currently only supports gmls."""
  131. if self.mlstype == 'gmls':
  132. base_addr = 'https://www.georgiamls.com/real-estate/search-action.cfm?'
  133. params = [('cnty', county),
  134. ('lpl', search.lower_price), ('lph', search.upper_price),
  135. ('acresL', search.lower_acres), ('acresH', search.upper_acres),
  136. ('sqftl', search.lower_sqft), ('sqfth', search.upper_sqft),
  137. ('orderBy', 'b'),
  138. ('scat', '1'),
  139. ('sdsp', 'g')]
  140. for type in search.type:
  141. if 'land' in type.lower():
  142. params.append(('typ', 'll'))
  143. if 'farm' in type.lower():
  144. params.append(('typ', 'af'))
  145. if 'home' in type.lower():
  146. params.append(('typ', 'sd'))
  147. if 'house' in type.lower():
  148. params.append(('typ', 'sd'))
  149. search_string = base_addr + urllib.parse.urlencode(params)
  150. print(search_string)
  151. logging.debug(search_string)
  152. return search_string
  153. def break_address(self, address):
  154. """Takes an address string in the form 'street address|city, state zip' and returns a list"""
  155. street = address[:address.find('|')]
  156. csz = address[address.find('|') + 1:]
  157. city = csz[:csz.find(',')]
  158. st = csz[csz.find(',') + 1:].split(' ')[1]
  159. zip = csz[csz.find(',') + 1:].split(' ')[2]
  160. split_address = [street, city, st, zip]
  161. return split_address
  162. def gmlsparser(self, URL, county, pages=''):
  163. """ Retrieve the website for georgiamls.com and returns a list of Property objects.
  164. UNIQUE TO GEORGIAMLS.COM ONLY!!"""
  165. properties_list = []
  166. r = requests.get(URL)
  167. soup = BeautifulSoup(r.content, 'html5lib')
  168. if pages == '':
  169. try:
  170. pages = soup.find("div", {'class': 'small listing-pagination-count'}).getText().strip().split(" ")[-1]
  171. current_page = \
  172. soup.find("div", {'class': 'small listing-pagination-count'}).getText().strip().split(" ")[-3]
  173. except AttributeError as err:
  174. print("No Results Found.")
  175. return
  176. else:
  177. print('pages already set to: ' + str(pages))
  178. for page in range(0, int(pages)):
  179. print('Processing Page: ' + str(page + 1) + ' of ' + str(pages))
  180. if not page == 0:
  181. next_URL = URL + '&start=' + str(((12 * page) + 1))
  182. soup = BeautifulSoup(requests.get(next_URL).content, 'html5lib')
  183. raw_listings = soup.findAll("div", {'class': 'col-xs-12 col-sm-6 col-lg-4 text-center listing-gallery'})
  184. for listing in raw_listings:
  185. items = listing.findAll("p") ##
  186. site_name = self.mlstype
  187. MLS = " ".join(items[3].getText().strip()[6:15].split()) ## MLS NUMBER
  188. title = '' ## Listing Title (address if no title)
  189. price = items[0].string.strip() ## Price
  190. if self.mlstype == 'gmls':
  191. link = 'https://www.georgiamls.com' + listing.a['href']
  192. detail_request = requests.get(link)
  193. detail_soup = BeautifulSoup(detail_request.content, 'html5lib')
  194. details = detail_soup.findAll('tr')
  195. bedbath = details[1].findAll('td')[1].getText().strip().split('/')
  196. br = bedbath[0][:-3]
  197. ba = bedbath[1][:-3]
  198. baths = ba ## IF House is present
  199. bedrooms = br ## IF House is present
  200. address = ''
  201. for element in details:
  202. if 'sqft' in element.getText():
  203. sqft = element.findAll('td')[1].getText().strip()[:-5].replace(',', '')
  204. if 'lot size' in element.getText().lower():
  205. acres = element.findAll('td')[1].getText().strip()[:-6]
  206. if 'Property Type' in element.getText():
  207. ptype = element.findAll('td')[1].getText().strip()
  208. if 'acreage' in ptype.lower():
  209. type = 'af'
  210. elif 'land lot' in ptype.lower():
  211. type = 'll'
  212. elif 'single family home' in ptype.lower():
  213. type = 'sf'
  214. else:
  215. type = 'unknown'
  216. if 'Address' in element.getText():
  217. if not address: # Prevents finding the word 'address' elsewhere in the listings
  218. address = element.findAll('td')[1]
  219. # 7 print("TEST ADDRESS: ", element)
  220. street_address = list(address)[0].strip()
  221. csz = list(address)[2].strip()
  222. split_address = self.break_address(street_address + '|' + csz)
  223. description = detail_soup.find('div', {'id': 'listing-remarks'}).getText().strip().replace('\t', '')
  224. data = Property(site_name=self.mlstype,
  225. type=type,
  226. MLS=MLS,
  227. bedrooms=bedrooms,
  228. baths=baths,
  229. sqft=sqft,
  230. address=split_address[0],
  231. city=split_address[1].title(),
  232. st=split_address[2].upper(),
  233. zip=split_address[3],
  234. county=county.title(),
  235. price=price.replace('$', '').replace(',', ''),
  236. acres=acres,
  237. description=description,
  238. link=link)
  239. properties_list.append(data)
  240. logging.debug('Scanned: ' + data.address)
  241. print('Scanned: ' + data.address)
  242. return properties_list
  243. def getmlsdata(self, search: Search, county):
  244. """This is the main entrypoint. Takes arguments to pass to stringbuilder to create the URL.
  245. Selects appropriate parser based on self.mlstype from class intance.
  246. Needs any modifications from the standard search ($0 to $500,000, 5 to 15 acres, etc)
  247. See class search for more information.
  248. --> 9/1/20 - takes Search class as argument. All properties are handled by the class <--"""
  249. logging.info('getgmlsdata starting.')
  250. logging.debug('Scanning: ' + county + " county")
  251. if isinstance(search, Search):
  252. if not county in self.counties: ### FIX for lower()
  253. print("County " + county + " not regognized. Exiting")
  254. else:
  255. print("Scanning for results in " + county + " using the " + self.mlstype.upper() + " database.")
  256. if self.mlstype == 'gmls':
  257. list = self.gmlsparser(self.stringbuilder(search, county), county)
  258. logging.info(
  259. "Completed search in " + county + " county. " + str(len(list)) + " total properties scanned.")
  260. return list
  261. else:
  262. raise ImproperSearchError(search)
  263. def check_db(self, criteria_dict):
  264. """Check dictionary of critera against database.
  265. Currently accepts keys: MLS, title, address (street number/name, not city/state/zip).
  266. Returns True if records exists."""
  267. if not self.cursor: ## Check if DB is connected
  268. try:
  269. self.connect_db()
  270. logging.debug("No Database Connection. Connecting to DB in check_db function.")
  271. except Exception as err:
  272. print("Could not connect to Database. " + str(err))
  273. logging.warning("Could not connect to Database. " + str(err))
  274. return 0
  275. for criteria in criteria_dict:
  276. ## Determine criteria passed, and execute queries for each
  277. if criteria == 'MLS':
  278. self.cursor.execute("SELECT COUNT(*) FROM properties WHERE MLS = %(MLS)s GROUP BY id",
  279. {criteria: criteria_dict[criteria]})
  280. if self.cursor.rowcount > 0: return self.cursor.rowcount # stop for loop if match already found.
  281. elif criteria == 'title':
  282. self.cursor.execute("SELECT COUNT(*) FROM properties WHERE title = %(title)s GROUP BY id",
  283. {criteria: criteria_dict[criteria]})
  284. if self.cursor.rowcount > 0: return self.cursor.rowcount # stop for loop if match already found.
  285. elif criteria == 'address':
  286. self.cursor.execute("SELECT COUNT(*) FROM properties WHERE address = %(address)s GROUP BY id",
  287. {criteria: criteria_dict[criteria]})
  288. if self.cursor.rowcount > 0: return self.cursor.rowcount # stop for loop if match already found.
  289. else:
  290. print("Cannot search on parameter: " + criteria)
  291. return self.cursor.rowcount
  292. def get_google(self, property):
  293. """Supplies date from Google Distance Matrix API to populate
  294. distance_to_work
  295. time_to_work
  296. distance_to_school
  297. time_to_school
  298. Costs money, so it should only be called when inserting a new db record.
  299. Returns distance in METERS (1m = 0.000621371 mi) and time in SECONDS
  300. returns fully populated Propery object."""
  301. print("Fetching live Google Data. $$")
  302. logging.warning("Calling Google API. $$")
  303. destination1 = 'Hebron Christian Acadamy' ## Working query for Hebron Christian Acadamy
  304. destination2 = 'JHRJ+FJ Atlanta, Georgia' ## Plus code for Hourly parking at Int'l Terminal, KATL
  305. params = {}
  306. params['units'] = 'imperial'
  307. params['origins'] = property.address + ', ' + property.city + ' ' + property.st
  308. params['destinations'] = 'Hebron Christian Acadamy|JHRJ+FJ Atlanta, Georgia'
  309. params['key'] = self.GoogleAPIKey
  310. baseURL = 'https://maps.googleapis.com/maps/api/distancematrix/json?'
  311. API_URL = baseURL + urllib.parse.urlencode(params)
  312. # print(API_URL)
  313. # Send Request and capture result as json
  314. try:
  315. google_result = requests.get(API_URL).json()
  316. if google_result['status'] == 'OK':
  317. property.distance_to_school = google_result['rows'][0]['elements'][0]['distance']['value']
  318. property.time_to_school = google_result['rows'][0]['elements'][0]['duration']['value']
  319. property.distance_to_work = google_result['rows'][0]['elements'][1]['distance']['value']
  320. property.time_to_work = google_result['rows'][0]['elements'][1]['duration']['value']
  321. except:
  322. print("ERROR: Failed to obtain Google API data")
  323. def insertrecord(self, property, work_address=None, school_address=None):
  324. """Inserts record into database. Takes argument Property class object."""
  325. if not self.cursor:
  326. print("not self.cursor")
  327. logging.debug("MYSQL connection not established. Trying to connect...")
  328. try:
  329. self.connect_db()
  330. logging.debug("Connecting to DB in insertrecord fucntion.")
  331. except Exception as err:
  332. print("Could not connect to Database. " + str(err))
  333. logging.warning("Could not connect to Database. " + str(err))
  334. return
  335. if self.cursor:
  336. criteria_dict = property.__dict__
  337. criteria_dict['Date_Added'] = str(datetime.date.today())
  338. placeholder_columns = ", ".join(criteria_dict.keys())
  339. placeholder_values = ", ".join([":{0}".format(col) for col in criteria_dict.keys()])
  340. qry = "INSERT INTO properties ({placeholder_columns}) VALUES {placeholder_values}".format(
  341. placeholder_columns=placeholder_columns, placeholder_values=tuple(criteria_dict.values()))
  342. try:
  343. self.cursor.execute(qry)
  344. self.cnx.commit()
  345. print("Inserted " + criteria_dict['MLS'] + " | " + criteria_dict['address'] + " into database.")
  346. logging.info("Inserted " + criteria_dict['MLS'] + " | " + criteria_dict['address'] + " into database.")
  347. except Exception as e:
  348. print("Could not insert " + criteria_dict['address'] + " into database. Database connection error.")
  349. logging.warning("Could not insert " + criteria_dict['address'] + "into database. Database connection "
  350. "error.")
  351. logging.warning(str(e))
  352. else:
  353. print("Database is not connected or cursor not filled. Use function 'connectdb()' to establish")
  354. print(str(self.cursor))
  355. def connect_db(self, host='192.168.100.26', user='landsearchuser', password='1234', database='landsearch'):
  356. """Connects to database and returns a cursor object"""
  357. self.cnx = mysql.connector.connect(host=host, user=user, password=password, database=database, buffered=True)
  358. self.cursor = self.cnx.cursor()
  359. return self.cursor
  360. def close_db(self):
  361. """Cleanly close the db."""
  362. self.cursor.close()
  363. self.cnx.close()
  364. def db_insert(self, properties: list):
  365. """Inserts records into database. Takes list of Property class objects"""
  366. if not properties == None:
  367. if not isinstance(properties, list):
  368. raise TypeError('type list required')
  369. for property in properties:
  370. if not self.check_db({'MLS': property.MLS, 'address': property.address}):
  371. if self.live_google: self.get_google(
  372. property) ## <- This will populate distance and time fields if set TRUE
  373. else:
  374. print("NOT fetching google data. Suppressed by settings in landsearch.conf")
  375. logging.warning("NOT fetching google data for " + property.address + ". Suppressed by "
  376. "settings in "
  377. "landsearch.conf")
  378. self.insertrecord(property)
  379. self.new_listings.append(property)
  380. else:
  381. print(property.MLS + ' | ' + property.address + ' is already in db. Not inserted.')
  382. ##REMOVE FOR TESTING###
  383. # self.new_listings.append(property)
  384. #######################
  385. else:
  386. print("Empty dataset. No records to insert.")
  387. logging.info("Database Update Complete.")
  388. logging.info(str(len(self.new_listings)) + " new listings found.")
  389. def email_results(self):
  390. global mymail, html
  391. html = ''
  392. # sendto = ['M_Stagl@hotmail.com', 'stagl.mike@gmail.com']
  393. sendto = self.recipients.split(',')
  394. # print(type(self.recipients))
  395. # print(type(sendto))
  396. if self.email:
  397. ''' Send some kind of email! '''
  398. # If there are new listings, populate email ##
  399. if len(self.new_listings) > 0:
  400. body = ''
  401. data = []
  402. html = '<html><head></head><body>'
  403. html += '<p>Daily Real Estate Search Report.</p>'
  404. html += '<p>The following properties have been found which may be of interest:</p>'
  405. html += '<table>'
  406. subj = str(len(self.new_listings)) + " New Real Estate Listings for " + str(datetime.date.today())
  407. for listing in self.new_listings:
  408. row = []
  409. row.append(listing.MLS)
  410. row.append(listing.address)
  411. row.append('{:0,.2f}'.format(float(listing.acres)))
  412. row.append(listing.sqft)
  413. row.append('${:0,.0f}'.format(int(listing.price)))
  414. row.append(listing.time_to_school / 60 if hasattr(listing, 'time_to_school') else 'NA')
  415. row.append(listing.link)
  416. data.append(row)
  417. html += '<tr><td colspan=100%><hr></td></tr>'
  418. html += '<tr><th>MLS</th><td>' + listing.MLS + '</td></tr>'
  419. html += '<tr><th>Address</th><td>' + listing.address + '</td></tr>'
  420. html += '<tr><th>Acres</th><td>' + '{:0,.2f}'.format(float(listing.acres)) + '</td></tr>'
  421. html += '<tr><th>Time To School</th><td>' + (str(int(round(listing.time_to_school/60))) if hasattr(listing, 'time_to_school') else 'NA') + '</td></tr>'
  422. html += '<tr><th>Price</th><td>' + '${:0,.0f}'.format(int(listing.price)) + '</td></tr>'
  423. html += '<tr><th>Link</th><td><a href=' + listing.link + '>Link</a></td></tr>'
  424. body = """Daily Real Estate Search Report.\n
  425. The following properties have been found which may be of interest.\n
  426. """
  427. results = tabulate(data, headers=['MLS', 'Address', 'Acres', 'sqft', 'Price', 'Time to School', 'link'])
  428. body += results
  429. html += '<tr><td colspan=100%><hr></td></tr>'
  430. html += '</table></body></html>'
  431. # htmlformat = MIMEText(html, 'html')
  432. # mymail = custom_email.simplemail(subj, body, sendto)
  433. # mymail = custom_email.simplemail(subj, htmlformat, sendto)
  434. # print(body)
  435. # print(html)
  436. else:
  437. body = 'No new listings found'
  438. html = 'No new listings found.'
  439. subj = '0 New Real Estate Listings for ' + str(datetime.date.today())
  440. try:
  441. mymail = custom_email.simplemail(subj, body, sendto, html=html)
  442. # mymail = custom_email.simplemail(subj, body, sendto)
  443. mymail.sendhtml()
  444. print("Email sent.")
  445. logging.info('Emails sent to: ' + str(sendto))
  446. except Exception as e:
  447. print("Error sending email. " + str(e))
  448. logging.warning("Error sending email. " + str(e))
  449. else:
  450. print("Suppressing email based on landsearch.conf preferences.")
  451. logging.warning("Suppressing email based on landsearch.conf preferences.")
  452. if __name__ == '__main__':
  453. gmls = MLSDATA('GMLS') # Create MLSDATA object
  454. mysearch = Search() # Create a custom search object
  455. myresults = []
  456. ## Create function in MLSDATA module:
  457. # - takes counties from configparser and calls getmlsdata for each county.
  458. # - Compiles results into single list and returns that list
  459. # - User code would look something like this:
  460. # _ mysearch = Search()
  461. # _ mydata = gmls.findalllistings(mysearch) # This would control the looping of counties and return a list like normal
  462. # _ gmls.dbinsert(myresults) # This would automate db opening and closing
  463. for county in mysearch.county:
  464. print("local search: ", county)
  465. mysearch = Search() ## Search used to take county as parameter, so this loop would work. Now Search class contains list. loop must occur in getmlsdata module
  466. mydata = gmls.getmlsdata(mysearch, county)
  467. if mydata: # Avoids a crash is there is no data
  468. for listing in mydata:
  469. myresults.append(listing)
  470. #gmls.connectdb()
  471. gmls.db_insert(myresults)
  472. #gmls.closedb()
  473. gmls.email_results()