moved browser instance creation to factory, more cleaning and appdirs

This commit is contained in:
Mathew Guest 2022-03-02 08:20:22 -07:00
parent e0e70cc539
commit 3ef55c0590
2 changed files with 229 additions and 139 deletions

@ -1,4 +1,6 @@
from .create_browser import *
import appdirs
import logging import logging
# import pickle # import pickle
import dill as pickle import dill as pickle
@ -7,32 +9,62 @@ import selenium.webdriver
import sys import sys
import time import time
import traceback import traceback
import os
LOGGER = logging.getLogger('wabot')
USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)" USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)"
REFUSE_AFTER_EXCEPTION = True REFUSE_AFTER_EXCEPTION = True
PICKLE_FILENAME = '/tmp/nhsnwebdriverdump'
EXECUTOR_PORT = 4444 EXECUTOR_PORT = 4444
REMOTE_EXECUTOR = 'http://127.0.0.1:%s/wd/hub' REMOTE_EXECUTOR = 'http://127.0.0.1:%s/wd/hub'
# PICKLE_FILENAME = '/tmp/nhsnwebdriverdump'
# PICKLE_FILENAME = os.path.join(
# appdirs.user_data_dir('wabot'),
# 'saved_browser_instances.pickle'
# )
LOGGER = logging.getLogger('wabot')
DEFAULT_WEBDRIVER_TYPE = 'firefox1'
class BrowserProxy: class BrowserProxy:
def __init__( def __init__(
self, self,
session_name='webdriver', session_name='webdriver',
pickle_filename=PICKLE_FILENAME, pickle_filename=None,
phantom=False phantom=False,
webdriver_type=None # remote_chromium2
): ):
self._pickle_filename = pickle_filename """
LOGGER.info('creating selenium browser: session name = %s', session_name) BrowserProxy wraps a selenium webdriver instance and provides utility
functions for automation webpages.
"""
LOGGER.info('requesting selenium browser instance (%s): instance_name = %s', webdriver_type, session_name)
# if pickle_filename is None:
# pickle_filename = PICKLE_FILENAME
# self._pickle_filename = pickle_filename
if webdriver_type is None:
webdriver_type = 'firefox1'
assert webdriver_type in (
'firefox1',
'firefox2',
'chromium2',
'remote_chromium2',
'phantomjs'
), 'webdriver_type must be firefox1, firefox2, chromium2, remote_chromium2, or phantomjs'
try: try:
driver_type = 'remote_chromium2' self.driver_type = webdriver_type
self.driver_type = driver_type
print('session_name:', session_name)
if phantom: if phantom:
pass pass
# driver_type = "phantomjs" # driver_type = "phantomjs"
self.driver = self.get_driver(driver_type, session_name) self.driver = self.get_driver(webdriver_type, session_name)
if not self.driver: if not self.driver:
LOGGER.error('failed to get selenium webdriver') LOGGER.error('failed to get selenium webdriver')
self.good = False self.good = False
@ -40,6 +72,7 @@ class BrowserProxy:
except Exception as ex: except Exception as ex:
print('caught exception at BrowserProxy().__init__') print('caught exception at BrowserProxy().__init__')
print(type(ex), ex) print(type(ex), ex)
raise
# self.page = nhsn_lo.pages.Login(self) # self.page = nhsn_lo.pages.Login(self)
# self.good = True # self.good = True
@ -174,23 +207,16 @@ class BrowserProxy:
Returns: Returns:
The selenium webdriver handle. The selenium webdriver handle.
""" """
# nhsn actively rejects http requests that do not request with an LOGGER.debug('requesting selenium browser instance: type = %s' % (browser))
# IE user agent. All of these browser instances have to change the
# user agent in various ways.
LOGGER.debug('creating selenium driver: %s' % (browser))
# todo(mathew guest) turn USER_AGENT into local variable so that it
# could be parameterized someday
# user_agent = USER_AGENT
# todo(mathew guest) fallback drivers if one doesn't exist?
driver = None driver = None
browser_factory = CreateBrowser()
if browser == 'chromium2': # Selenium 2 - Chrome if browser == 'chromium2': # Selenium 2 - Chrome
driver = self._create_driver_chromium2() driver = self._create_driver_chromium2()
elif browser == 'remote_chromium2': elif browser == 'remote_chromium2':
driver = self._create_driver_remote_chromium2(session_name) driver = browser_factory._create_driver_remote_chromium2(session_name)
# driver = self._create_driver_remote_chromium2(session_name)
elif browser == 'chromium1': # Selenium 1 - Chrome without working user agent switch elif browser == 'chromium1': # Selenium 1 - Chrome without working user agent switch
driver = self._create_driver_chromium1() driver = self._create_driver_chromium1()
@ -204,124 +230,13 @@ class BrowserProxy:
elif browser == 'phantomjs': elif browser == 'phantomjs':
driver = self._create_driver_phantomjs() driver = self._create_driver_phantomjs()
else: else:
LOGGER.error('an attempt was made to request an '\ LOGGER.error(
'an attempt was made to request an '\
'unsupported (by this product) selenium '\ 'unsupported (by this product) selenium '\
'webdriver; refusing. requested = %s'\ 'webdriver; refusing. requested = %s'\
% (browser)) % (browser)
)
driver.implicitly_wait(10) driver.implicitly_wait(10)
return driver return driver
def _create_driver_remote_chromium2(self, session_name):
fp = None
drivers = {}
try:
fp = open(self._pickle_filename, 'rb')
drivers = pickle.load(fp)
if not drivers:
raise Exception
LOGGER.debug('found pickled drivers: %s', drivers.keys())
driver = drivers.get(session_name)
if not driver:
raise Exception
LOGGER.debug('connected to pickled webdriver instance: %s', session_name)
url = driver.current_url # throw error if driver isn't reliable anymore
LOGGER.info('webdriver instance is ready')
self.driver = driver
return driver
except (FileNotFoundError, IOError) as ex:
self.driver = None
LOGGER.error('unable to connect to existing webdriver: no pickled drivers found')
except Exception as ex:
self.driver = None
LOGGER.error('unable to connect to existing webdriver: %s' % ex)
if self.driver is None:
print('drivers', drivers)
LOGGER.info('creating new webdriver')
opt = selenium.webdriver.chrome.options.Options()
opt.add_argument("--user-agent=" + USER_AGENT)
opt.add_argument("--kiosk-printing")
opt.add_argument("--focus-existing-tab-on-open=false")
driver = selenium.webdriver.Remote(
command_executor=REMOTE_EXECUTOR % (EXECUTOR_PORT),
desired_capabilities = opt.to_capabilities())
print(REMOTE_EXECUTOR % EXECUTOR_PORT)
fp = open(self._pickle_filename, 'wb')
drivers[session_name] = driver
print('b4 pickle')
print(drivers)
# print('#skipping pickle')
pickle.dump(drivers, fp)
# print('after pickle')
self.driver = driver
return driver
def _create_driver_chromium2(self):
opt = selenium.webdriver.chrome.options.Options()
opt.add_argument("--user-agent=" + USER_AGENT)
opt.add_argument("--kiosk-printing")
driver = selenium.webdriver.Chrome(chrome_options = opt)
self.driver = driver
return driver
def _create_driver_chromium1(self):
# Selenium 1 - Chrome without working user agent switch
# These two methods of creation ChromeOptions are equivalent objects
options = selenium.webdriver.ChromeOptions()
options.add_argument("--user-agent=" + USER_AGENT)
driver = selenium.webdriver.Remote(desired_capabilities = options.to_capabilities())
driver = selenium.webdriver.Remote(SELENIUM1_SERVER_PATH,
selenium.webdriver.DesiredCapabilities.CHROME.copy())
return driver
def _create_driver_firefox2(self):
# tmp = selenium.webdriver.FirefoxProfile()
# tmp = None
profile = None
# filename = "/tmp/firefox_profile"
# try:
# fp = open(filename, "rb")
# profile = pickle.load(fp)
# except:
# pass
if not profile:
profile = selenium.webdriver.FirefoxProfile(profile_directory = "/home/mathew/firefox_prof")
profile.set_preference("general.useragent.override", USER_AGENT )
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv");
profile.set_preference("network.http.redirection-limit", "0" )
# profile.set_preference("javascript.enabled", False )
# profile.set_preference("print.always_print_silent", True)
profile.set_preference("print.print_to_file", True)
profile.set_preference("print.print_to_filename", "/tmp/print.pdf")
profile.update_preferences()
profile.set_preference("network.http.redirection-limit", "0" )
# with open("/tmp/firefox_profile", "wb") as fp:
# pickle.dump(profile, fp, pickle.HIGHEST_PROTOCOL)
# driver = selenium.webdriver.Firefox()
driver = selenium.webdriver.Firefox(profile)
return driver
def _create_driver_firefox1(self):
profile = selenium.webdriver.FirefoxProfile()
profile.set_preference("general.useragent.override", USER_AGENT )
driver = selenium.webdriver.Remote(SELENIUM1_SERVER_PATH,
selenium.webdriver.DesiredCapabilities.FIREFOX.copy(), browser_profile = profile )
return driver
def _create_driver_phantomjs(self):
# Note(MG): Selenium support for PhantomJS has been deprecated, please use headless
# driver = selenium.webdriver.PhantomJS()
# return driver
opt = selenium.webdriver.chrome.options.Options()
opt.add_argument("--user-agent=" + USER_AGENT)
opt.add_argument("--kiosk-printing")
opt.add_argument("--headless")
driver = selenium.webdriver.Chrome(chrome_options = opt)
driver.set_window_size(838, 907)
self.driver = driver
return driver

175
wabot/create_browser.py Normal file

@ -0,0 +1,175 @@
import appdirs
import logging
import os
import selenium
import pickle
LOGGER = logging.getLogger('wabot')
USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)"
REFUSE_AFTER_EXCEPTION = True
EXECUTOR_PORT = 4444
REMOTE_EXECUTOR = 'http://127.0.0.1:%s/wd/hub'
# Factory Creator
class CreateBrowser:
"""
Creates and instantiates selenium webbrowser instances.
Two strategies: remote via selenium server or local.
The advantage of using remote is the browser instance can stay
open after the process closes. This allows you to disconnect
and re-connect to the same browser instance with different
processes.
"""
def __init__(self, pickle_filename=None):
self._pickle_filename = None
if pickle_filename is not None:
self.pickle_filename = pickle_filename
@property
def pickle_filename(self):
if self._pickle_filename is None:
self._pickle_filename = os.path.join(
appdirs.user_data_dir('wabot'),
'saved_browser_instances.pickle'
)
return self._pickle_filename
else:
return self._pickle_filename
@pickle_filename.setter
def pickle_filename(self, value):
self._pickle_filename = value
def _create_driver_remote_chromium2(self, session_name):
p = self.pickle_filename
final_name = '{}-{}'.format('rchromium2', session_name)
driver = None
# Definitely no browser instance already, we must instantiate
if not os.path.exists(p):
LOGGER.debug('no pickled file for saved browser instances (nothing saved yet)')
# There MAY be an open browser or an invalidated reference to a once-open browser
if os.path.exists(p):
LOGGER.debug('found pickled file for saved browser instances: %s', p)
# First, see if existing session_name browser instance exists
fp = None
drivers = {}
try:
fp = open(p, 'rb')
drivers = pickle.load(fp)
if not drivers:
raise Exception
LOGGER.debug('found saved browser instances: %s', list(drivers.keys()))
driver = drivers.get(final_name)
if not driver:
raise Exception
LOGGER.debug('connected to pickled webdriver instance: %s', final_name)
url = driver.current_url # throw error if driver isn't reliable anymore
LOGGER.info('webdriver instance is ready')
# self.driver = driver
return driver
except (FileNotFoundError, IOError) as ex:
LOGGER.error('unable to connect to existing webdriver: no pickled drivers found')
except Exception as ex:
self.driver = None
# At this point, need to instantiate a new browser instance
sel_host = REMOTE_EXECUTOR % (EXECUTOR_PORT)
LOGGER.info('instantianting new browser instance (chromium2)')
LOGGER.info('remote selenium: %s', sel_host)
opt = selenium.webdriver.chrome.options.Options()
opt.add_argument("--user-agent=" + USER_AGENT)
opt.add_argument("--kiosk-printing")
opt.add_argument("--focus-existing-tab-on-open=false")
driver = selenium.webdriver.Remote(
command_executor=sel_host,
desired_capabilities = opt.to_capabilities()
)
# Save to pickle
fp = open(p, 'wb')
drivers[final_name] = driver
LOGGER.info('saving browser instance to pickle: %s', final_name)
pickle.dump(drivers, fp)
self.driver = driver
return driver
def _create_driver_chromium2(self):
opt = selenium.webdriver.chrome.options.Options()
opt.add_argument("--user-agent=" + USER_AGENT)
opt.add_argument("--kiosk-printing")
driver = selenium.webdriver.Chrome(chrome_options = opt)
self.driver = driver
return driver
def _create_driver_chromium1(self):
# Selenium 1 - Chrome without working user agent switch
# These two methods of creation ChromeOptions are equivalent objects
options = selenium.webdriver.ChromeOptions()
options.add_argument("--user-agent=" + USER_AGENT)
driver = selenium.webdriver.Remote(desired_capabilities = options.to_capabilities())
driver = selenium.webdriver.Remote(SELENIUM1_SERVER_PATH,
selenium.webdriver.DesiredCapabilities.CHROME.copy())
return driver
def _create_driver_firefox2(self):
# tmp = selenium.webdriver.FirefoxProfile()
# tmp = None
profile = None
# filename = "/tmp/firefox_profile"
# try:
# fp = open(filename, "rb")
# profile = pickle.load(fp)
# except:
# pass
if not profile:
profile = selenium.webdriver.FirefoxProfile(profile_directory = "/home/mathew/firefox_prof")
profile.set_preference("general.useragent.override", USER_AGENT )
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv");
profile.set_preference("network.http.redirection-limit", "0" )
# profile.set_preference("javascript.enabled", False )
# profile.set_preference("print.always_print_silent", True)
profile.set_preference("print.print_to_file", True)
profile.set_preference("print.print_to_filename", "/tmp/print.pdf")
profile.update_preferences()
profile.set_preference("network.http.redirection-limit", "0" )
# with open("/tmp/firefox_profile", "wb") as fp:
# pickle.dump(profile, fp, pickle.HIGHEST_PROTOCOL)
# driver = selenium.webdriver.Firefox()
driver = selenium.webdriver.Firefox(profile)
return driver
def _create_driver_firefox1(self):
profile = selenium.webdriver.FirefoxProfile()
profile.set_preference("general.useragent.override", USER_AGENT)
driver = selenium.webdriver.Remote(
# SELENIUM1_SERVER_PATH,
REMOTE_EXECUTOR % (EXECUTOR_PORT),
selenium.webdriver.DesiredCapabilities.FIREFOX.copy(),
browser_profile = profile
)
return driver
def _create_driver_phantomjs(self):
# Note(MG): Selenium support for PhantomJS has been deprecated, please use headless
# driver = selenium.webdriver.PhantomJS()
# return driver
opt = selenium.webdriver.chrome.options.Options()
opt.add_argument("--user-agent=" + USER_AGENT)
opt.add_argument("--kiosk-printing")
opt.add_argument("--headless")
driver = selenium.webdriver.Chrome(chrome_options = opt)
driver.set_window_size(838, 907)
self.driver = driver
return driver