mirror of
https://git.zavage.net/Zavage-Software/wabot.git
synced 2024-12-22 18:39:20 -07:00
moved browser instance creation to factory, more cleaning and appdirs
This commit is contained in:
parent
e0e70cc539
commit
3ef55c0590
193
wabot/api.py
193
wabot/api.py
@ -1,4 +1,6 @@
|
|||||||
|
from .create_browser import *
|
||||||
|
|
||||||
|
import appdirs
|
||||||
import logging
|
import logging
|
||||||
# import pickle
|
# import pickle
|
||||||
import dill as pickle
|
import dill as pickle
|
||||||
@ -7,32 +9,62 @@ import selenium.webdriver
|
|||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
import os
|
||||||
LOGGER = logging.getLogger('wabot')
|
|
||||||
|
|
||||||
USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)"
|
USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)"
|
||||||
REFUSE_AFTER_EXCEPTION = True
|
REFUSE_AFTER_EXCEPTION = True
|
||||||
PICKLE_FILENAME = '/tmp/nhsnwebdriverdump'
|
|
||||||
EXECUTOR_PORT = 4444
|
EXECUTOR_PORT = 4444
|
||||||
REMOTE_EXECUTOR = 'http://127.0.0.1:%s/wd/hub'
|
REMOTE_EXECUTOR = 'http://127.0.0.1:%s/wd/hub'
|
||||||
|
|
||||||
|
# PICKLE_FILENAME = '/tmp/nhsnwebdriverdump'
|
||||||
|
# PICKLE_FILENAME = os.path.join(
|
||||||
|
# appdirs.user_data_dir('wabot'),
|
||||||
|
# 'saved_browser_instances.pickle'
|
||||||
|
# )
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger('wabot')
|
||||||
|
|
||||||
|
DEFAULT_WEBDRIVER_TYPE = 'firefox1'
|
||||||
|
|
||||||
class BrowserProxy:
|
class BrowserProxy:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
session_name='webdriver',
|
session_name='webdriver',
|
||||||
pickle_filename=PICKLE_FILENAME,
|
pickle_filename=None,
|
||||||
phantom=False
|
phantom=False,
|
||||||
|
webdriver_type=None # remote_chromium2
|
||||||
):
|
):
|
||||||
self._pickle_filename = pickle_filename
|
"""
|
||||||
LOGGER.info('creating selenium browser: session name = %s', session_name)
|
BrowserProxy wraps a selenium webdriver instance and provides utility
|
||||||
|
functions for automation webpages.
|
||||||
|
"""
|
||||||
|
LOGGER.info('requesting selenium browser instance (%s): instance_name = %s', webdriver_type, session_name)
|
||||||
|
|
||||||
|
# if pickle_filename is None:
|
||||||
|
# pickle_filename = PICKLE_FILENAME
|
||||||
|
|
||||||
|
# self._pickle_filename = pickle_filename
|
||||||
|
|
||||||
|
if webdriver_type is None:
|
||||||
|
webdriver_type = 'firefox1'
|
||||||
|
|
||||||
|
assert webdriver_type in (
|
||||||
|
'firefox1',
|
||||||
|
'firefox2',
|
||||||
|
'chromium2',
|
||||||
|
'remote_chromium2',
|
||||||
|
'phantomjs'
|
||||||
|
), 'webdriver_type must be firefox1, firefox2, chromium2, remote_chromium2, or phantomjs'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
driver_type = 'remote_chromium2'
|
self.driver_type = webdriver_type
|
||||||
self.driver_type = driver_type
|
|
||||||
print('session_name:', session_name)
|
|
||||||
if phantom:
|
if phantom:
|
||||||
pass
|
pass
|
||||||
# driver_type = "phantomjs"
|
# driver_type = "phantomjs"
|
||||||
self.driver = self.get_driver(driver_type, session_name)
|
self.driver = self.get_driver(webdriver_type, session_name)
|
||||||
if not self.driver:
|
if not self.driver:
|
||||||
LOGGER.error('failed to get selenium webdriver')
|
LOGGER.error('failed to get selenium webdriver')
|
||||||
self.good = False
|
self.good = False
|
||||||
@ -40,6 +72,7 @@ class BrowserProxy:
|
|||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
print('caught exception at BrowserProxy().__init__')
|
print('caught exception at BrowserProxy().__init__')
|
||||||
print(type(ex), ex)
|
print(type(ex), ex)
|
||||||
|
raise
|
||||||
|
|
||||||
# self.page = nhsn_lo.pages.Login(self)
|
# self.page = nhsn_lo.pages.Login(self)
|
||||||
# self.good = True
|
# self.good = True
|
||||||
@ -174,23 +207,16 @@ class BrowserProxy:
|
|||||||
Returns:
|
Returns:
|
||||||
The selenium webdriver handle.
|
The selenium webdriver handle.
|
||||||
"""
|
"""
|
||||||
# nhsn actively rejects http requests that do not request with an
|
LOGGER.debug('requesting selenium browser instance: type = %s' % (browser))
|
||||||
# IE user agent. All of these browser instances have to change the
|
|
||||||
# user agent in various ways.
|
|
||||||
LOGGER.debug('creating selenium driver: %s' % (browser))
|
|
||||||
# todo(mathew guest) turn USER_AGENT into local variable so that it
|
|
||||||
# could be parameterized someday
|
|
||||||
# user_agent = USER_AGENT
|
|
||||||
|
|
||||||
# todo(mathew guest) fallback drivers if one doesn't exist?
|
|
||||||
|
|
||||||
driver = None
|
driver = None
|
||||||
|
browser_factory = CreateBrowser()
|
||||||
if browser == 'chromium2': # Selenium 2 - Chrome
|
if browser == 'chromium2': # Selenium 2 - Chrome
|
||||||
driver = self._create_driver_chromium2()
|
driver = self._create_driver_chromium2()
|
||||||
|
|
||||||
elif browser == 'remote_chromium2':
|
elif browser == 'remote_chromium2':
|
||||||
driver = self._create_driver_remote_chromium2(session_name)
|
driver = browser_factory._create_driver_remote_chromium2(session_name)
|
||||||
|
# driver = self._create_driver_remote_chromium2(session_name)
|
||||||
|
|
||||||
elif browser == 'chromium1': # Selenium 1 - Chrome without working user agent switch
|
elif browser == 'chromium1': # Selenium 1 - Chrome without working user agent switch
|
||||||
driver = self._create_driver_chromium1()
|
driver = self._create_driver_chromium1()
|
||||||
@ -204,124 +230,13 @@ class BrowserProxy:
|
|||||||
elif browser == 'phantomjs':
|
elif browser == 'phantomjs':
|
||||||
driver = self._create_driver_phantomjs()
|
driver = self._create_driver_phantomjs()
|
||||||
else:
|
else:
|
||||||
LOGGER.error('an attempt was made to request an '\
|
LOGGER.error(
|
||||||
'unsupported (by this product) selenium '\
|
'an attempt was made to request an '\
|
||||||
'webdriver; refusing. requested = %s'\
|
'unsupported (by this product) selenium '\
|
||||||
% (browser))
|
'webdriver; refusing. requested = %s'\
|
||||||
|
% (browser)
|
||||||
|
)
|
||||||
|
|
||||||
driver.implicitly_wait(10)
|
driver.implicitly_wait(10)
|
||||||
return driver
|
return driver
|
||||||
|
|
||||||
def _create_driver_remote_chromium2(self, session_name):
|
|
||||||
fp = None
|
|
||||||
drivers = {}
|
|
||||||
try:
|
|
||||||
fp = open(self._pickle_filename, 'rb')
|
|
||||||
drivers = pickle.load(fp)
|
|
||||||
if not drivers:
|
|
||||||
raise Exception
|
|
||||||
LOGGER.debug('found pickled drivers: %s', drivers.keys())
|
|
||||||
driver = drivers.get(session_name)
|
|
||||||
if not driver:
|
|
||||||
raise Exception
|
|
||||||
LOGGER.debug('connected to pickled webdriver instance: %s', session_name)
|
|
||||||
url = driver.current_url # throw error if driver isn't reliable anymore
|
|
||||||
LOGGER.info('webdriver instance is ready')
|
|
||||||
self.driver = driver
|
|
||||||
return driver
|
|
||||||
except (FileNotFoundError, IOError) as ex:
|
|
||||||
self.driver = None
|
|
||||||
LOGGER.error('unable to connect to existing webdriver: no pickled drivers found')
|
|
||||||
except Exception as ex:
|
|
||||||
self.driver = None
|
|
||||||
LOGGER.error('unable to connect to existing webdriver: %s' % ex)
|
|
||||||
|
|
||||||
if self.driver is None:
|
|
||||||
print('drivers', drivers)
|
|
||||||
LOGGER.info('creating new webdriver')
|
|
||||||
opt = selenium.webdriver.chrome.options.Options()
|
|
||||||
opt.add_argument("--user-agent=" + USER_AGENT)
|
|
||||||
opt.add_argument("--kiosk-printing")
|
|
||||||
opt.add_argument("--focus-existing-tab-on-open=false")
|
|
||||||
driver = selenium.webdriver.Remote(
|
|
||||||
command_executor=REMOTE_EXECUTOR % (EXECUTOR_PORT),
|
|
||||||
desired_capabilities = opt.to_capabilities())
|
|
||||||
print(REMOTE_EXECUTOR % EXECUTOR_PORT)
|
|
||||||
fp = open(self._pickle_filename, 'wb')
|
|
||||||
drivers[session_name] = driver
|
|
||||||
print('b4 pickle')
|
|
||||||
print(drivers)
|
|
||||||
# print('#skipping pickle')
|
|
||||||
pickle.dump(drivers, fp)
|
|
||||||
# print('after pickle')
|
|
||||||
self.driver = driver
|
|
||||||
return driver
|
|
||||||
|
|
||||||
|
|
||||||
def _create_driver_chromium2(self):
|
|
||||||
opt = selenium.webdriver.chrome.options.Options()
|
|
||||||
opt.add_argument("--user-agent=" + USER_AGENT)
|
|
||||||
opt.add_argument("--kiosk-printing")
|
|
||||||
driver = selenium.webdriver.Chrome(chrome_options = opt)
|
|
||||||
self.driver = driver
|
|
||||||
return driver
|
|
||||||
|
|
||||||
def _create_driver_chromium1(self):
|
|
||||||
# Selenium 1 - Chrome without working user agent switch
|
|
||||||
# These two methods of creation ChromeOptions are equivalent objects
|
|
||||||
options = selenium.webdriver.ChromeOptions()
|
|
||||||
options.add_argument("--user-agent=" + USER_AGENT)
|
|
||||||
driver = selenium.webdriver.Remote(desired_capabilities = options.to_capabilities())
|
|
||||||
driver = selenium.webdriver.Remote(SELENIUM1_SERVER_PATH,
|
|
||||||
selenium.webdriver.DesiredCapabilities.CHROME.copy())
|
|
||||||
return driver
|
|
||||||
|
|
||||||
def _create_driver_firefox2(self):
|
|
||||||
# tmp = selenium.webdriver.FirefoxProfile()
|
|
||||||
# tmp = None
|
|
||||||
profile = None
|
|
||||||
# filename = "/tmp/firefox_profile"
|
|
||||||
# try:
|
|
||||||
# fp = open(filename, "rb")
|
|
||||||
# profile = pickle.load(fp)
|
|
||||||
# except:
|
|
||||||
# pass
|
|
||||||
|
|
||||||
if not profile:
|
|
||||||
profile = selenium.webdriver.FirefoxProfile(profile_directory = "/home/mathew/firefox_prof")
|
|
||||||
profile.set_preference("general.useragent.override", USER_AGENT )
|
|
||||||
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv");
|
|
||||||
profile.set_preference("network.http.redirection-limit", "0" )
|
|
||||||
# profile.set_preference("javascript.enabled", False )
|
|
||||||
# profile.set_preference("print.always_print_silent", True)
|
|
||||||
profile.set_preference("print.print_to_file", True)
|
|
||||||
profile.set_preference("print.print_to_filename", "/tmp/print.pdf")
|
|
||||||
profile.update_preferences()
|
|
||||||
profile.set_preference("network.http.redirection-limit", "0" )
|
|
||||||
# with open("/tmp/firefox_profile", "wb") as fp:
|
|
||||||
# pickle.dump(profile, fp, pickle.HIGHEST_PROTOCOL)
|
|
||||||
|
|
||||||
# driver = selenium.webdriver.Firefox()
|
|
||||||
driver = selenium.webdriver.Firefox(profile)
|
|
||||||
return driver
|
|
||||||
|
|
||||||
def _create_driver_firefox1(self):
|
|
||||||
profile = selenium.webdriver.FirefoxProfile()
|
|
||||||
profile.set_preference("general.useragent.override", USER_AGENT )
|
|
||||||
driver = selenium.webdriver.Remote(SELENIUM1_SERVER_PATH,
|
|
||||||
selenium.webdriver.DesiredCapabilities.FIREFOX.copy(), browser_profile = profile )
|
|
||||||
return driver
|
|
||||||
|
|
||||||
def _create_driver_phantomjs(self):
|
|
||||||
# Note(MG): Selenium support for PhantomJS has been deprecated, please use headless
|
|
||||||
# driver = selenium.webdriver.PhantomJS()
|
|
||||||
# return driver
|
|
||||||
opt = selenium.webdriver.chrome.options.Options()
|
|
||||||
opt.add_argument("--user-agent=" + USER_AGENT)
|
|
||||||
opt.add_argument("--kiosk-printing")
|
|
||||||
opt.add_argument("--headless")
|
|
||||||
driver = selenium.webdriver.Chrome(chrome_options = opt)
|
|
||||||
driver.set_window_size(838, 907)
|
|
||||||
self.driver = driver
|
|
||||||
return driver
|
|
||||||
|
|
||||||
|
175
wabot/create_browser.py
Normal file
175
wabot/create_browser.py
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
|
||||||
|
import appdirs
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import selenium
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger('wabot')
|
||||||
|
|
||||||
|
USER_AGENT = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)"
|
||||||
|
REFUSE_AFTER_EXCEPTION = True
|
||||||
|
EXECUTOR_PORT = 4444
|
||||||
|
REMOTE_EXECUTOR = 'http://127.0.0.1:%s/wd/hub'
|
||||||
|
|
||||||
|
# Factory Creator
|
||||||
|
class CreateBrowser:
|
||||||
|
"""
|
||||||
|
Creates and instantiates selenium webbrowser instances.
|
||||||
|
|
||||||
|
Two strategies: remote via selenium server or local.
|
||||||
|
|
||||||
|
The advantage of using remote is the browser instance can stay
|
||||||
|
open after the process closes. This allows you to disconnect
|
||||||
|
and re-connect to the same browser instance with different
|
||||||
|
processes.
|
||||||
|
"""
|
||||||
|
def __init__(self, pickle_filename=None):
|
||||||
|
self._pickle_filename = None
|
||||||
|
if pickle_filename is not None:
|
||||||
|
self.pickle_filename = pickle_filename
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pickle_filename(self):
|
||||||
|
if self._pickle_filename is None:
|
||||||
|
self._pickle_filename = os.path.join(
|
||||||
|
appdirs.user_data_dir('wabot'),
|
||||||
|
'saved_browser_instances.pickle'
|
||||||
|
)
|
||||||
|
return self._pickle_filename
|
||||||
|
else:
|
||||||
|
return self._pickle_filename
|
||||||
|
|
||||||
|
@pickle_filename.setter
|
||||||
|
def pickle_filename(self, value):
|
||||||
|
self._pickle_filename = value
|
||||||
|
|
||||||
|
def _create_driver_remote_chromium2(self, session_name):
|
||||||
|
p = self.pickle_filename
|
||||||
|
final_name = '{}-{}'.format('rchromium2', session_name)
|
||||||
|
driver = None
|
||||||
|
|
||||||
|
# Definitely no browser instance already, we must instantiate
|
||||||
|
if not os.path.exists(p):
|
||||||
|
LOGGER.debug('no pickled file for saved browser instances (nothing saved yet)')
|
||||||
|
|
||||||
|
# There MAY be an open browser or an invalidated reference to a once-open browser
|
||||||
|
if os.path.exists(p):
|
||||||
|
LOGGER.debug('found pickled file for saved browser instances: %s', p)
|
||||||
|
# First, see if existing session_name browser instance exists
|
||||||
|
fp = None
|
||||||
|
drivers = {}
|
||||||
|
try:
|
||||||
|
fp = open(p, 'rb')
|
||||||
|
drivers = pickle.load(fp)
|
||||||
|
if not drivers:
|
||||||
|
raise Exception
|
||||||
|
LOGGER.debug('found saved browser instances: %s', list(drivers.keys()))
|
||||||
|
driver = drivers.get(final_name)
|
||||||
|
if not driver:
|
||||||
|
raise Exception
|
||||||
|
LOGGER.debug('connected to pickled webdriver instance: %s', final_name)
|
||||||
|
url = driver.current_url # throw error if driver isn't reliable anymore
|
||||||
|
LOGGER.info('webdriver instance is ready')
|
||||||
|
# self.driver = driver
|
||||||
|
return driver
|
||||||
|
except (FileNotFoundError, IOError) as ex:
|
||||||
|
LOGGER.error('unable to connect to existing webdriver: no pickled drivers found')
|
||||||
|
except Exception as ex:
|
||||||
|
self.driver = None
|
||||||
|
|
||||||
|
# At this point, need to instantiate a new browser instance
|
||||||
|
sel_host = REMOTE_EXECUTOR % (EXECUTOR_PORT)
|
||||||
|
LOGGER.info('instantianting new browser instance (chromium2)')
|
||||||
|
LOGGER.info('remote selenium: %s', sel_host)
|
||||||
|
|
||||||
|
opt = selenium.webdriver.chrome.options.Options()
|
||||||
|
opt.add_argument("--user-agent=" + USER_AGENT)
|
||||||
|
opt.add_argument("--kiosk-printing")
|
||||||
|
opt.add_argument("--focus-existing-tab-on-open=false")
|
||||||
|
driver = selenium.webdriver.Remote(
|
||||||
|
command_executor=sel_host,
|
||||||
|
desired_capabilities = opt.to_capabilities()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Save to pickle
|
||||||
|
fp = open(p, 'wb')
|
||||||
|
drivers[final_name] = driver
|
||||||
|
LOGGER.info('saving browser instance to pickle: %s', final_name)
|
||||||
|
pickle.dump(drivers, fp)
|
||||||
|
self.driver = driver
|
||||||
|
return driver
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _create_driver_chromium2(self):
|
||||||
|
opt = selenium.webdriver.chrome.options.Options()
|
||||||
|
opt.add_argument("--user-agent=" + USER_AGENT)
|
||||||
|
opt.add_argument("--kiosk-printing")
|
||||||
|
driver = selenium.webdriver.Chrome(chrome_options = opt)
|
||||||
|
self.driver = driver
|
||||||
|
return driver
|
||||||
|
|
||||||
|
def _create_driver_chromium1(self):
|
||||||
|
# Selenium 1 - Chrome without working user agent switch
|
||||||
|
# These two methods of creation ChromeOptions are equivalent objects
|
||||||
|
options = selenium.webdriver.ChromeOptions()
|
||||||
|
options.add_argument("--user-agent=" + USER_AGENT)
|
||||||
|
driver = selenium.webdriver.Remote(desired_capabilities = options.to_capabilities())
|
||||||
|
driver = selenium.webdriver.Remote(SELENIUM1_SERVER_PATH,
|
||||||
|
selenium.webdriver.DesiredCapabilities.CHROME.copy())
|
||||||
|
return driver
|
||||||
|
|
||||||
|
def _create_driver_firefox2(self):
|
||||||
|
# tmp = selenium.webdriver.FirefoxProfile()
|
||||||
|
# tmp = None
|
||||||
|
profile = None
|
||||||
|
# filename = "/tmp/firefox_profile"
|
||||||
|
# try:
|
||||||
|
# fp = open(filename, "rb")
|
||||||
|
# profile = pickle.load(fp)
|
||||||
|
# except:
|
||||||
|
# pass
|
||||||
|
|
||||||
|
if not profile:
|
||||||
|
profile = selenium.webdriver.FirefoxProfile(profile_directory = "/home/mathew/firefox_prof")
|
||||||
|
profile.set_preference("general.useragent.override", USER_AGENT )
|
||||||
|
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv");
|
||||||
|
profile.set_preference("network.http.redirection-limit", "0" )
|
||||||
|
# profile.set_preference("javascript.enabled", False )
|
||||||
|
# profile.set_preference("print.always_print_silent", True)
|
||||||
|
profile.set_preference("print.print_to_file", True)
|
||||||
|
profile.set_preference("print.print_to_filename", "/tmp/print.pdf")
|
||||||
|
profile.update_preferences()
|
||||||
|
profile.set_preference("network.http.redirection-limit", "0" )
|
||||||
|
# with open("/tmp/firefox_profile", "wb") as fp:
|
||||||
|
# pickle.dump(profile, fp, pickle.HIGHEST_PROTOCOL)
|
||||||
|
|
||||||
|
# driver = selenium.webdriver.Firefox()
|
||||||
|
driver = selenium.webdriver.Firefox(profile)
|
||||||
|
return driver
|
||||||
|
|
||||||
|
def _create_driver_firefox1(self):
|
||||||
|
profile = selenium.webdriver.FirefoxProfile()
|
||||||
|
profile.set_preference("general.useragent.override", USER_AGENT)
|
||||||
|
driver = selenium.webdriver.Remote(
|
||||||
|
# SELENIUM1_SERVER_PATH,
|
||||||
|
REMOTE_EXECUTOR % (EXECUTOR_PORT),
|
||||||
|
selenium.webdriver.DesiredCapabilities.FIREFOX.copy(),
|
||||||
|
browser_profile = profile
|
||||||
|
)
|
||||||
|
return driver
|
||||||
|
|
||||||
|
def _create_driver_phantomjs(self):
|
||||||
|
# Note(MG): Selenium support for PhantomJS has been deprecated, please use headless
|
||||||
|
# driver = selenium.webdriver.PhantomJS()
|
||||||
|
# return driver
|
||||||
|
opt = selenium.webdriver.chrome.options.Options()
|
||||||
|
opt.add_argument("--user-agent=" + USER_AGENT)
|
||||||
|
opt.add_argument("--kiosk-printing")
|
||||||
|
opt.add_argument("--headless")
|
||||||
|
driver = selenium.webdriver.Chrome(chrome_options = opt)
|
||||||
|
driver.set_window_size(838, 907)
|
||||||
|
self.driver = driver
|
||||||
|
return driver
|
Loading…
Reference in New Issue
Block a user