From 3ef55c05904842f0aff5286c62eac08f80d30e4f Mon Sep 17 00:00:00 2001 From: Mathew Guest Date: Wed, 2 Mar 2022 08:20:22 -0700 Subject: [PATCH] moved browser instance creation to factory, more cleaning and appdirs --- wabot/api.py | 193 +++++++++++----------------------------- wabot/create_browser.py | 175 ++++++++++++++++++++++++++++++++++++ 2 files changed, 229 insertions(+), 139 deletions(-) create mode 100644 wabot/create_browser.py diff --git a/wabot/api.py b/wabot/api.py index 1984f54..d1aa8b6 100644 --- a/wabot/api.py +++ b/wabot/api.py @@ -1,4 +1,6 @@ +from .create_browser import * +import appdirs import logging # import pickle import dill as pickle @@ -7,32 +9,62 @@ import selenium.webdriver import sys import time import traceback - -LOGGER = logging.getLogger('wabot') +import os USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)" REFUSE_AFTER_EXCEPTION = True -PICKLE_FILENAME = '/tmp/nhsnwebdriverdump' EXECUTOR_PORT = 4444 REMOTE_EXECUTOR = 'http://127.0.0.1:%s/wd/hub' +# PICKLE_FILENAME = '/tmp/nhsnwebdriverdump' +# PICKLE_FILENAME = os.path.join( +# appdirs.user_data_dir('wabot'), +# 'saved_browser_instances.pickle' +# ) + +LOGGER = logging.getLogger('wabot') + +DEFAULT_WEBDRIVER_TYPE = 'firefox1' + class BrowserProxy: def __init__( self, session_name='webdriver', - pickle_filename=PICKLE_FILENAME, - phantom=False + pickle_filename=None, + phantom=False, + webdriver_type=None # remote_chromium2 ): - self._pickle_filename = pickle_filename - LOGGER.info('creating selenium browser: session name = %s', session_name) + """ + BrowserProxy wraps a selenium webdriver instance and provides utility + functions for automation webpages. + """ + LOGGER.info('requesting selenium browser instance (%s): instance_name = %s', webdriver_type, session_name) + + # if pickle_filename is None: + # pickle_filename = PICKLE_FILENAME + + # self._pickle_filename = pickle_filename + + if webdriver_type is None: + webdriver_type = 'firefox1' + + assert webdriver_type in ( + 'firefox1', + 'firefox2', + 'chromium2', + 'remote_chromium2', + 'phantomjs' + ), 'webdriver_type must be firefox1, firefox2, chromium2, remote_chromium2, or phantomjs' + + + + try: - driver_type = 'remote_chromium2' - self.driver_type = driver_type - print('session_name:', session_name) + self.driver_type = webdriver_type if phantom: pass # driver_type = "phantomjs" - self.driver = self.get_driver(driver_type, session_name) + self.driver = self.get_driver(webdriver_type, session_name) if not self.driver: LOGGER.error('failed to get selenium webdriver') self.good = False @@ -40,6 +72,7 @@ class BrowserProxy: except Exception as ex: print('caught exception at BrowserProxy().__init__') print(type(ex), ex) + raise # self.page = nhsn_lo.pages.Login(self) # self.good = True @@ -174,23 +207,16 @@ class BrowserProxy: Returns: The selenium webdriver handle. """ - # nhsn actively rejects http requests that do not request with an - # IE user agent. All of these browser instances have to change the - # user agent in various ways. - LOGGER.debug('creating selenium driver: %s' % (browser)) - # todo(mathew guest) turn USER_AGENT into local variable so that it - # could be parameterized someday - # user_agent = USER_AGENT - - # todo(mathew guest) fallback drivers if one doesn't exist? + LOGGER.debug('requesting selenium browser instance: type = %s' % (browser)) driver = None - + browser_factory = CreateBrowser() if browser == 'chromium2': # Selenium 2 - Chrome driver = self._create_driver_chromium2() elif browser == 'remote_chromium2': - driver = self._create_driver_remote_chromium2(session_name) + driver = browser_factory._create_driver_remote_chromium2(session_name) + # driver = self._create_driver_remote_chromium2(session_name) elif browser == 'chromium1': # Selenium 1 - Chrome without working user agent switch driver = self._create_driver_chromium1() @@ -204,124 +230,13 @@ class BrowserProxy: elif browser == 'phantomjs': driver = self._create_driver_phantomjs() else: - LOGGER.error('an attempt was made to request an '\ - 'unsupported (by this product) selenium '\ - 'webdriver; refusing. requested = %s'\ - % (browser)) + LOGGER.error( + 'an attempt was made to request an '\ + 'unsupported (by this product) selenium '\ + 'webdriver; refusing. requested = %s'\ + % (browser) + ) driver.implicitly_wait(10) return driver - def _create_driver_remote_chromium2(self, session_name): - fp = None - drivers = {} - try: - fp = open(self._pickle_filename, 'rb') - drivers = pickle.load(fp) - if not drivers: - raise Exception - LOGGER.debug('found pickled drivers: %s', drivers.keys()) - driver = drivers.get(session_name) - if not driver: - raise Exception - LOGGER.debug('connected to pickled webdriver instance: %s', session_name) - url = driver.current_url # throw error if driver isn't reliable anymore - LOGGER.info('webdriver instance is ready') - self.driver = driver - return driver - except (FileNotFoundError, IOError) as ex: - self.driver = None - LOGGER.error('unable to connect to existing webdriver: no pickled drivers found') - except Exception as ex: - self.driver = None - LOGGER.error('unable to connect to existing webdriver: %s' % ex) - - if self.driver is None: - print('drivers', drivers) - LOGGER.info('creating new webdriver') - opt = selenium.webdriver.chrome.options.Options() - opt.add_argument("--user-agent=" + USER_AGENT) - opt.add_argument("--kiosk-printing") - opt.add_argument("--focus-existing-tab-on-open=false") - driver = selenium.webdriver.Remote( - command_executor=REMOTE_EXECUTOR % (EXECUTOR_PORT), - desired_capabilities = opt.to_capabilities()) - print(REMOTE_EXECUTOR % EXECUTOR_PORT) - fp = open(self._pickle_filename, 'wb') - drivers[session_name] = driver - print('b4 pickle') - print(drivers) - # print('#skipping pickle') - pickle.dump(drivers, fp) - # print('after pickle') - self.driver = driver - return driver - - - def _create_driver_chromium2(self): - opt = selenium.webdriver.chrome.options.Options() - opt.add_argument("--user-agent=" + USER_AGENT) - opt.add_argument("--kiosk-printing") - driver = selenium.webdriver.Chrome(chrome_options = opt) - self.driver = driver - return driver - - def _create_driver_chromium1(self): - # Selenium 1 - Chrome without working user agent switch - # These two methods of creation ChromeOptions are equivalent objects - options = selenium.webdriver.ChromeOptions() - options.add_argument("--user-agent=" + USER_AGENT) - driver = selenium.webdriver.Remote(desired_capabilities = options.to_capabilities()) - driver = selenium.webdriver.Remote(SELENIUM1_SERVER_PATH, - selenium.webdriver.DesiredCapabilities.CHROME.copy()) - return driver - - def _create_driver_firefox2(self): - # tmp = selenium.webdriver.FirefoxProfile() - # tmp = None - profile = None - # filename = "/tmp/firefox_profile" - # try: - # fp = open(filename, "rb") - # profile = pickle.load(fp) - # except: - # pass - - if not profile: - profile = selenium.webdriver.FirefoxProfile(profile_directory = "/home/mathew/firefox_prof") - profile.set_preference("general.useragent.override", USER_AGENT ) - profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv"); - profile.set_preference("network.http.redirection-limit", "0" ) - # profile.set_preference("javascript.enabled", False ) - # profile.set_preference("print.always_print_silent", True) - profile.set_preference("print.print_to_file", True) - profile.set_preference("print.print_to_filename", "/tmp/print.pdf") - profile.update_preferences() - profile.set_preference("network.http.redirection-limit", "0" ) - # with open("/tmp/firefox_profile", "wb") as fp: - # pickle.dump(profile, fp, pickle.HIGHEST_PROTOCOL) - - # driver = selenium.webdriver.Firefox() - driver = selenium.webdriver.Firefox(profile) - return driver - - def _create_driver_firefox1(self): - profile = selenium.webdriver.FirefoxProfile() - profile.set_preference("general.useragent.override", USER_AGENT ) - driver = selenium.webdriver.Remote(SELENIUM1_SERVER_PATH, - selenium.webdriver.DesiredCapabilities.FIREFOX.copy(), browser_profile = profile ) - return driver - - def _create_driver_phantomjs(self): - # Note(MG): Selenium support for PhantomJS has been deprecated, please use headless - # driver = selenium.webdriver.PhantomJS() - # return driver - opt = selenium.webdriver.chrome.options.Options() - opt.add_argument("--user-agent=" + USER_AGENT) - opt.add_argument("--kiosk-printing") - opt.add_argument("--headless") - driver = selenium.webdriver.Chrome(chrome_options = opt) - driver.set_window_size(838, 907) - self.driver = driver - return driver - diff --git a/wabot/create_browser.py b/wabot/create_browser.py new file mode 100644 index 0000000..47df9b3 --- /dev/null +++ b/wabot/create_browser.py @@ -0,0 +1,175 @@ + +import appdirs +import logging +import os +import selenium +import pickle + + +LOGGER = logging.getLogger('wabot') + +USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)" +REFUSE_AFTER_EXCEPTION = True +EXECUTOR_PORT = 4444 +REMOTE_EXECUTOR = 'http://127.0.0.1:%s/wd/hub' + +# Factory Creator +class CreateBrowser: + """ + Creates and instantiates selenium webbrowser instances. + + Two strategies: remote via selenium server or local. + + The advantage of using remote is the browser instance can stay + open after the process closes. This allows you to disconnect + and re-connect to the same browser instance with different + processes. + """ + def __init__(self, pickle_filename=None): + self._pickle_filename = None + if pickle_filename is not None: + self.pickle_filename = pickle_filename + + @property + def pickle_filename(self): + if self._pickle_filename is None: + self._pickle_filename = os.path.join( + appdirs.user_data_dir('wabot'), + 'saved_browser_instances.pickle' + ) + return self._pickle_filename + else: + return self._pickle_filename + + @pickle_filename.setter + def pickle_filename(self, value): + self._pickle_filename = value + + def _create_driver_remote_chromium2(self, session_name): + p = self.pickle_filename + final_name = '{}-{}'.format('rchromium2', session_name) + driver = None + + # Definitely no browser instance already, we must instantiate + if not os.path.exists(p): + LOGGER.debug('no pickled file for saved browser instances (nothing saved yet)') + + # There MAY be an open browser or an invalidated reference to a once-open browser + if os.path.exists(p): + LOGGER.debug('found pickled file for saved browser instances: %s', p) + # First, see if existing session_name browser instance exists + fp = None + drivers = {} + try: + fp = open(p, 'rb') + drivers = pickle.load(fp) + if not drivers: + raise Exception + LOGGER.debug('found saved browser instances: %s', list(drivers.keys())) + driver = drivers.get(final_name) + if not driver: + raise Exception + LOGGER.debug('connected to pickled webdriver instance: %s', final_name) + url = driver.current_url # throw error if driver isn't reliable anymore + LOGGER.info('webdriver instance is ready') + # self.driver = driver + return driver + except (FileNotFoundError, IOError) as ex: + LOGGER.error('unable to connect to existing webdriver: no pickled drivers found') + except Exception as ex: + self.driver = None + + # At this point, need to instantiate a new browser instance + sel_host = REMOTE_EXECUTOR % (EXECUTOR_PORT) + LOGGER.info('instantianting new browser instance (chromium2)') + LOGGER.info('remote selenium: %s', sel_host) + + opt = selenium.webdriver.chrome.options.Options() + opt.add_argument("--user-agent=" + USER_AGENT) + opt.add_argument("--kiosk-printing") + opt.add_argument("--focus-existing-tab-on-open=false") + driver = selenium.webdriver.Remote( + command_executor=sel_host, + desired_capabilities = opt.to_capabilities() + ) + + # Save to pickle + fp = open(p, 'wb') + drivers[final_name] = driver + LOGGER.info('saving browser instance to pickle: %s', final_name) + pickle.dump(drivers, fp) + self.driver = driver + return driver + + + + def _create_driver_chromium2(self): + opt = selenium.webdriver.chrome.options.Options() + opt.add_argument("--user-agent=" + USER_AGENT) + opt.add_argument("--kiosk-printing") + driver = selenium.webdriver.Chrome(chrome_options = opt) + self.driver = driver + return driver + + def _create_driver_chromium1(self): + # Selenium 1 - Chrome without working user agent switch + # These two methods of creation ChromeOptions are equivalent objects + options = selenium.webdriver.ChromeOptions() + options.add_argument("--user-agent=" + USER_AGENT) + driver = selenium.webdriver.Remote(desired_capabilities = options.to_capabilities()) + driver = selenium.webdriver.Remote(SELENIUM1_SERVER_PATH, + selenium.webdriver.DesiredCapabilities.CHROME.copy()) + return driver + + def _create_driver_firefox2(self): + # tmp = selenium.webdriver.FirefoxProfile() + # tmp = None + profile = None + # filename = "/tmp/firefox_profile" + # try: + # fp = open(filename, "rb") + # profile = pickle.load(fp) + # except: + # pass + + if not profile: + profile = selenium.webdriver.FirefoxProfile(profile_directory = "/home/mathew/firefox_prof") + profile.set_preference("general.useragent.override", USER_AGENT ) + profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv"); + profile.set_preference("network.http.redirection-limit", "0" ) + # profile.set_preference("javascript.enabled", False ) + # profile.set_preference("print.always_print_silent", True) + profile.set_preference("print.print_to_file", True) + profile.set_preference("print.print_to_filename", "/tmp/print.pdf") + profile.update_preferences() + profile.set_preference("network.http.redirection-limit", "0" ) + # with open("/tmp/firefox_profile", "wb") as fp: + # pickle.dump(profile, fp, pickle.HIGHEST_PROTOCOL) + + # driver = selenium.webdriver.Firefox() + driver = selenium.webdriver.Firefox(profile) + return driver + + def _create_driver_firefox1(self): + profile = selenium.webdriver.FirefoxProfile() + profile.set_preference("general.useragent.override", USER_AGENT) + driver = selenium.webdriver.Remote( + # SELENIUM1_SERVER_PATH, + REMOTE_EXECUTOR % (EXECUTOR_PORT), + selenium.webdriver.DesiredCapabilities.FIREFOX.copy(), + browser_profile = profile + ) + return driver + + def _create_driver_phantomjs(self): + # Note(MG): Selenium support for PhantomJS has been deprecated, please use headless + # driver = selenium.webdriver.PhantomJS() + # return driver + opt = selenium.webdriver.chrome.options.Options() + opt.add_argument("--user-agent=" + USER_AGENT) + opt.add_argument("--kiosk-printing") + opt.add_argument("--headless") + driver = selenium.webdriver.Chrome(chrome_options = opt) + driver.set_window_size(838, 907) + self.driver = driver + return driver