mirror of
https://git.zavage.net/Zavage-Software/wabot.git
synced 2024-12-22 10:29:20 -07:00
moved browser instance creation to factory, more cleaning and appdirs
This commit is contained in:
parent
e0e70cc539
commit
3ef55c0590
193
wabot/api.py
193
wabot/api.py
@ -1,4 +1,6 @@
|
||||
from .create_browser import *
|
||||
|
||||
import appdirs
|
||||
import logging
|
||||
# import pickle
|
||||
import dill as pickle
|
||||
@ -7,32 +9,62 @@ import selenium.webdriver
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
LOGGER = logging.getLogger('wabot')
|
||||
import os
|
||||
|
||||
USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)"
|
||||
REFUSE_AFTER_EXCEPTION = True
|
||||
PICKLE_FILENAME = '/tmp/nhsnwebdriverdump'
|
||||
EXECUTOR_PORT = 4444
|
||||
REMOTE_EXECUTOR = 'http://127.0.0.1:%s/wd/hub'
|
||||
|
||||
# PICKLE_FILENAME = '/tmp/nhsnwebdriverdump'
|
||||
# PICKLE_FILENAME = os.path.join(
|
||||
# appdirs.user_data_dir('wabot'),
|
||||
# 'saved_browser_instances.pickle'
|
||||
# )
|
||||
|
||||
LOGGER = logging.getLogger('wabot')
|
||||
|
||||
DEFAULT_WEBDRIVER_TYPE = 'firefox1'
|
||||
|
||||
class BrowserProxy:
|
||||
def __init__(
|
||||
self,
|
||||
session_name='webdriver',
|
||||
pickle_filename=PICKLE_FILENAME,
|
||||
phantom=False
|
||||
pickle_filename=None,
|
||||
phantom=False,
|
||||
webdriver_type=None # remote_chromium2
|
||||
):
|
||||
self._pickle_filename = pickle_filename
|
||||
LOGGER.info('creating selenium browser: session name = %s', session_name)
|
||||
"""
|
||||
BrowserProxy wraps a selenium webdriver instance and provides utility
|
||||
functions for automation webpages.
|
||||
"""
|
||||
LOGGER.info('requesting selenium browser instance (%s): instance_name = %s', webdriver_type, session_name)
|
||||
|
||||
# if pickle_filename is None:
|
||||
# pickle_filename = PICKLE_FILENAME
|
||||
|
||||
# self._pickle_filename = pickle_filename
|
||||
|
||||
if webdriver_type is None:
|
||||
webdriver_type = 'firefox1'
|
||||
|
||||
assert webdriver_type in (
|
||||
'firefox1',
|
||||
'firefox2',
|
||||
'chromium2',
|
||||
'remote_chromium2',
|
||||
'phantomjs'
|
||||
), 'webdriver_type must be firefox1, firefox2, chromium2, remote_chromium2, or phantomjs'
|
||||
|
||||
|
||||
|
||||
|
||||
try:
|
||||
driver_type = 'remote_chromium2'
|
||||
self.driver_type = driver_type
|
||||
print('session_name:', session_name)
|
||||
self.driver_type = webdriver_type
|
||||
if phantom:
|
||||
pass
|
||||
# driver_type = "phantomjs"
|
||||
self.driver = self.get_driver(driver_type, session_name)
|
||||
self.driver = self.get_driver(webdriver_type, session_name)
|
||||
if not self.driver:
|
||||
LOGGER.error('failed to get selenium webdriver')
|
||||
self.good = False
|
||||
@ -40,6 +72,7 @@ class BrowserProxy:
|
||||
except Exception as ex:
|
||||
print('caught exception at BrowserProxy().__init__')
|
||||
print(type(ex), ex)
|
||||
raise
|
||||
|
||||
# self.page = nhsn_lo.pages.Login(self)
|
||||
# self.good = True
|
||||
@ -174,23 +207,16 @@ class BrowserProxy:
|
||||
Returns:
|
||||
The selenium webdriver handle.
|
||||
"""
|
||||
# nhsn actively rejects http requests that do not request with an
|
||||
# IE user agent. All of these browser instances have to change the
|
||||
# user agent in various ways.
|
||||
LOGGER.debug('creating selenium driver: %s' % (browser))
|
||||
# todo(mathew guest) turn USER_AGENT into local variable so that it
|
||||
# could be parameterized someday
|
||||
# user_agent = USER_AGENT
|
||||
|
||||
# todo(mathew guest) fallback drivers if one doesn't exist?
|
||||
LOGGER.debug('requesting selenium browser instance: type = %s' % (browser))
|
||||
|
||||
driver = None
|
||||
|
||||
browser_factory = CreateBrowser()
|
||||
if browser == 'chromium2': # Selenium 2 - Chrome
|
||||
driver = self._create_driver_chromium2()
|
||||
|
||||
elif browser == 'remote_chromium2':
|
||||
driver = self._create_driver_remote_chromium2(session_name)
|
||||
driver = browser_factory._create_driver_remote_chromium2(session_name)
|
||||
# driver = self._create_driver_remote_chromium2(session_name)
|
||||
|
||||
elif browser == 'chromium1': # Selenium 1 - Chrome without working user agent switch
|
||||
driver = self._create_driver_chromium1()
|
||||
@ -204,124 +230,13 @@ class BrowserProxy:
|
||||
elif browser == 'phantomjs':
|
||||
driver = self._create_driver_phantomjs()
|
||||
else:
|
||||
LOGGER.error('an attempt was made to request an '\
|
||||
'unsupported (by this product) selenium '\
|
||||
'webdriver; refusing. requested = %s'\
|
||||
% (browser))
|
||||
LOGGER.error(
|
||||
'an attempt was made to request an '\
|
||||
'unsupported (by this product) selenium '\
|
||||
'webdriver; refusing. requested = %s'\
|
||||
% (browser)
|
||||
)
|
||||
|
||||
driver.implicitly_wait(10)
|
||||
return driver
|
||||
|
||||
def _create_driver_remote_chromium2(self, session_name):
|
||||
fp = None
|
||||
drivers = {}
|
||||
try:
|
||||
fp = open(self._pickle_filename, 'rb')
|
||||
drivers = pickle.load(fp)
|
||||
if not drivers:
|
||||
raise Exception
|
||||
LOGGER.debug('found pickled drivers: %s', drivers.keys())
|
||||
driver = drivers.get(session_name)
|
||||
if not driver:
|
||||
raise Exception
|
||||
LOGGER.debug('connected to pickled webdriver instance: %s', session_name)
|
||||
url = driver.current_url # throw error if driver isn't reliable anymore
|
||||
LOGGER.info('webdriver instance is ready')
|
||||
self.driver = driver
|
||||
return driver
|
||||
except (FileNotFoundError, IOError) as ex:
|
||||
self.driver = None
|
||||
LOGGER.error('unable to connect to existing webdriver: no pickled drivers found')
|
||||
except Exception as ex:
|
||||
self.driver = None
|
||||
LOGGER.error('unable to connect to existing webdriver: %s' % ex)
|
||||
|
||||
if self.driver is None:
|
||||
print('drivers', drivers)
|
||||
LOGGER.info('creating new webdriver')
|
||||
opt = selenium.webdriver.chrome.options.Options()
|
||||
opt.add_argument("--user-agent=" + USER_AGENT)
|
||||
opt.add_argument("--kiosk-printing")
|
||||
opt.add_argument("--focus-existing-tab-on-open=false")
|
||||
driver = selenium.webdriver.Remote(
|
||||
command_executor=REMOTE_EXECUTOR % (EXECUTOR_PORT),
|
||||
desired_capabilities = opt.to_capabilities())
|
||||
print(REMOTE_EXECUTOR % EXECUTOR_PORT)
|
||||
fp = open(self._pickle_filename, 'wb')
|
||||
drivers[session_name] = driver
|
||||
print('b4 pickle')
|
||||
print(drivers)
|
||||
# print('#skipping pickle')
|
||||
pickle.dump(drivers, fp)
|
||||
# print('after pickle')
|
||||
self.driver = driver
|
||||
return driver
|
||||
|
||||
|
||||
def _create_driver_chromium2(self):
|
||||
opt = selenium.webdriver.chrome.options.Options()
|
||||
opt.add_argument("--user-agent=" + USER_AGENT)
|
||||
opt.add_argument("--kiosk-printing")
|
||||
driver = selenium.webdriver.Chrome(chrome_options = opt)
|
||||
self.driver = driver
|
||||
return driver
|
||||
|
||||
def _create_driver_chromium1(self):
|
||||
# Selenium 1 - Chrome without working user agent switch
|
||||
# These two methods of creation ChromeOptions are equivalent objects
|
||||
options = selenium.webdriver.ChromeOptions()
|
||||
options.add_argument("--user-agent=" + USER_AGENT)
|
||||
driver = selenium.webdriver.Remote(desired_capabilities = options.to_capabilities())
|
||||
driver = selenium.webdriver.Remote(SELENIUM1_SERVER_PATH,
|
||||
selenium.webdriver.DesiredCapabilities.CHROME.copy())
|
||||
return driver
|
||||
|
||||
def _create_driver_firefox2(self):
|
||||
# tmp = selenium.webdriver.FirefoxProfile()
|
||||
# tmp = None
|
||||
profile = None
|
||||
# filename = "/tmp/firefox_profile"
|
||||
# try:
|
||||
# fp = open(filename, "rb")
|
||||
# profile = pickle.load(fp)
|
||||
# except:
|
||||
# pass
|
||||
|
||||
if not profile:
|
||||
profile = selenium.webdriver.FirefoxProfile(profile_directory = "/home/mathew/firefox_prof")
|
||||
profile.set_preference("general.useragent.override", USER_AGENT )
|
||||
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv");
|
||||
profile.set_preference("network.http.redirection-limit", "0" )
|
||||
# profile.set_preference("javascript.enabled", False )
|
||||
# profile.set_preference("print.always_print_silent", True)
|
||||
profile.set_preference("print.print_to_file", True)
|
||||
profile.set_preference("print.print_to_filename", "/tmp/print.pdf")
|
||||
profile.update_preferences()
|
||||
profile.set_preference("network.http.redirection-limit", "0" )
|
||||
# with open("/tmp/firefox_profile", "wb") as fp:
|
||||
# pickle.dump(profile, fp, pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
# driver = selenium.webdriver.Firefox()
|
||||
driver = selenium.webdriver.Firefox(profile)
|
||||
return driver
|
||||
|
||||
def _create_driver_firefox1(self):
|
||||
profile = selenium.webdriver.FirefoxProfile()
|
||||
profile.set_preference("general.useragent.override", USER_AGENT )
|
||||
driver = selenium.webdriver.Remote(SELENIUM1_SERVER_PATH,
|
||||
selenium.webdriver.DesiredCapabilities.FIREFOX.copy(), browser_profile = profile )
|
||||
return driver
|
||||
|
||||
def _create_driver_phantomjs(self):
|
||||
# Note(MG): Selenium support for PhantomJS has been deprecated, please use headless
|
||||
# driver = selenium.webdriver.PhantomJS()
|
||||
# return driver
|
||||
opt = selenium.webdriver.chrome.options.Options()
|
||||
opt.add_argument("--user-agent=" + USER_AGENT)
|
||||
opt.add_argument("--kiosk-printing")
|
||||
opt.add_argument("--headless")
|
||||
driver = selenium.webdriver.Chrome(chrome_options = opt)
|
||||
driver.set_window_size(838, 907)
|
||||
self.driver = driver
|
||||
return driver
|
||||
|
||||
|
175
wabot/create_browser.py
Normal file
175
wabot/create_browser.py
Normal file
@ -0,0 +1,175 @@
|
||||
|
||||
import appdirs
|
||||
import logging
|
||||
import os
|
||||
import selenium
|
||||
import pickle
|
||||
|
||||
|
||||
LOGGER = logging.getLogger('wabot')
|
||||
|
||||
USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)"
|
||||
REFUSE_AFTER_EXCEPTION = True
|
||||
EXECUTOR_PORT = 4444
|
||||
REMOTE_EXECUTOR = 'http://127.0.0.1:%s/wd/hub'
|
||||
|
||||
# Factory Creator
|
||||
class CreateBrowser:
|
||||
"""
|
||||
Creates and instantiates selenium webbrowser instances.
|
||||
|
||||
Two strategies: remote via selenium server or local.
|
||||
|
||||
The advantage of using remote is the browser instance can stay
|
||||
open after the process closes. This allows you to disconnect
|
||||
and re-connect to the same browser instance with different
|
||||
processes.
|
||||
"""
|
||||
def __init__(self, pickle_filename=None):
|
||||
self._pickle_filename = None
|
||||
if pickle_filename is not None:
|
||||
self.pickle_filename = pickle_filename
|
||||
|
||||
@property
|
||||
def pickle_filename(self):
|
||||
if self._pickle_filename is None:
|
||||
self._pickle_filename = os.path.join(
|
||||
appdirs.user_data_dir('wabot'),
|
||||
'saved_browser_instances.pickle'
|
||||
)
|
||||
return self._pickle_filename
|
||||
else:
|
||||
return self._pickle_filename
|
||||
|
||||
@pickle_filename.setter
|
||||
def pickle_filename(self, value):
|
||||
self._pickle_filename = value
|
||||
|
||||
def _create_driver_remote_chromium2(self, session_name):
|
||||
p = self.pickle_filename
|
||||
final_name = '{}-{}'.format('rchromium2', session_name)
|
||||
driver = None
|
||||
|
||||
# Definitely no browser instance already, we must instantiate
|
||||
if not os.path.exists(p):
|
||||
LOGGER.debug('no pickled file for saved browser instances (nothing saved yet)')
|
||||
|
||||
# There MAY be an open browser or an invalidated reference to a once-open browser
|
||||
if os.path.exists(p):
|
||||
LOGGER.debug('found pickled file for saved browser instances: %s', p)
|
||||
# First, see if existing session_name browser instance exists
|
||||
fp = None
|
||||
drivers = {}
|
||||
try:
|
||||
fp = open(p, 'rb')
|
||||
drivers = pickle.load(fp)
|
||||
if not drivers:
|
||||
raise Exception
|
||||
LOGGER.debug('found saved browser instances: %s', list(drivers.keys()))
|
||||
driver = drivers.get(final_name)
|
||||
if not driver:
|
||||
raise Exception
|
||||
LOGGER.debug('connected to pickled webdriver instance: %s', final_name)
|
||||
url = driver.current_url # throw error if driver isn't reliable anymore
|
||||
LOGGER.info('webdriver instance is ready')
|
||||
# self.driver = driver
|
||||
return driver
|
||||
except (FileNotFoundError, IOError) as ex:
|
||||
LOGGER.error('unable to connect to existing webdriver: no pickled drivers found')
|
||||
except Exception as ex:
|
||||
self.driver = None
|
||||
|
||||
# At this point, need to instantiate a new browser instance
|
||||
sel_host = REMOTE_EXECUTOR % (EXECUTOR_PORT)
|
||||
LOGGER.info('instantianting new browser instance (chromium2)')
|
||||
LOGGER.info('remote selenium: %s', sel_host)
|
||||
|
||||
opt = selenium.webdriver.chrome.options.Options()
|
||||
opt.add_argument("--user-agent=" + USER_AGENT)
|
||||
opt.add_argument("--kiosk-printing")
|
||||
opt.add_argument("--focus-existing-tab-on-open=false")
|
||||
driver = selenium.webdriver.Remote(
|
||||
command_executor=sel_host,
|
||||
desired_capabilities = opt.to_capabilities()
|
||||
)
|
||||
|
||||
# Save to pickle
|
||||
fp = open(p, 'wb')
|
||||
drivers[final_name] = driver
|
||||
LOGGER.info('saving browser instance to pickle: %s', final_name)
|
||||
pickle.dump(drivers, fp)
|
||||
self.driver = driver
|
||||
return driver
|
||||
|
||||
|
||||
|
||||
def _create_driver_chromium2(self):
|
||||
opt = selenium.webdriver.chrome.options.Options()
|
||||
opt.add_argument("--user-agent=" + USER_AGENT)
|
||||
opt.add_argument("--kiosk-printing")
|
||||
driver = selenium.webdriver.Chrome(chrome_options = opt)
|
||||
self.driver = driver
|
||||
return driver
|
||||
|
||||
def _create_driver_chromium1(self):
|
||||
# Selenium 1 - Chrome without working user agent switch
|
||||
# These two methods of creation ChromeOptions are equivalent objects
|
||||
options = selenium.webdriver.ChromeOptions()
|
||||
options.add_argument("--user-agent=" + USER_AGENT)
|
||||
driver = selenium.webdriver.Remote(desired_capabilities = options.to_capabilities())
|
||||
driver = selenium.webdriver.Remote(SELENIUM1_SERVER_PATH,
|
||||
selenium.webdriver.DesiredCapabilities.CHROME.copy())
|
||||
return driver
|
||||
|
||||
def _create_driver_firefox2(self):
|
||||
# tmp = selenium.webdriver.FirefoxProfile()
|
||||
# tmp = None
|
||||
profile = None
|
||||
# filename = "/tmp/firefox_profile"
|
||||
# try:
|
||||
# fp = open(filename, "rb")
|
||||
# profile = pickle.load(fp)
|
||||
# except:
|
||||
# pass
|
||||
|
||||
if not profile:
|
||||
profile = selenium.webdriver.FirefoxProfile(profile_directory = "/home/mathew/firefox_prof")
|
||||
profile.set_preference("general.useragent.override", USER_AGENT )
|
||||
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv");
|
||||
profile.set_preference("network.http.redirection-limit", "0" )
|
||||
# profile.set_preference("javascript.enabled", False )
|
||||
# profile.set_preference("print.always_print_silent", True)
|
||||
profile.set_preference("print.print_to_file", True)
|
||||
profile.set_preference("print.print_to_filename", "/tmp/print.pdf")
|
||||
profile.update_preferences()
|
||||
profile.set_preference("network.http.redirection-limit", "0" )
|
||||
# with open("/tmp/firefox_profile", "wb") as fp:
|
||||
# pickle.dump(profile, fp, pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
# driver = selenium.webdriver.Firefox()
|
||||
driver = selenium.webdriver.Firefox(profile)
|
||||
return driver
|
||||
|
||||
def _create_driver_firefox1(self):
|
||||
profile = selenium.webdriver.FirefoxProfile()
|
||||
profile.set_preference("general.useragent.override", USER_AGENT)
|
||||
driver = selenium.webdriver.Remote(
|
||||
# SELENIUM1_SERVER_PATH,
|
||||
REMOTE_EXECUTOR % (EXECUTOR_PORT),
|
||||
selenium.webdriver.DesiredCapabilities.FIREFOX.copy(),
|
||||
browser_profile = profile
|
||||
)
|
||||
return driver
|
||||
|
||||
def _create_driver_phantomjs(self):
|
||||
# Note(MG): Selenium support for PhantomJS has been deprecated, please use headless
|
||||
# driver = selenium.webdriver.PhantomJS()
|
||||
# return driver
|
||||
opt = selenium.webdriver.chrome.options.Options()
|
||||
opt.add_argument("--user-agent=" + USER_AGENT)
|
||||
opt.add_argument("--kiosk-printing")
|
||||
opt.add_argument("--headless")
|
||||
driver = selenium.webdriver.Chrome(chrome_options = opt)
|
||||
driver.set_window_size(838, 907)
|
||||
self.driver = driver
|
||||
return driver
|
Loading…
Reference in New Issue
Block a user