mirror of
https://git.zavage.net/Zavage-Software/wikicrawl.git
synced 2024-11-23 00:30:25 -07:00
start of new app structure for wiki
This commit is contained in:
parent
f093fb9ecc
commit
6ae2a8dd06
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
build/
|
||||||
|
dist/
|
||||||
|
__pycache__
|
||||||
|
*.egg-info
|
||||||
|
|
11
launcher.py
11
launcher.py
@ -1,11 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
import wikicrawl
|
|
||||||
import settings
|
|
||||||
|
|
||||||
# Inject the settings.DefaultSettings object into the
|
|
||||||
# app and start running the program.
|
|
||||||
wikicrawl.init(settings.DefaultSettings)
|
|
||||||
wikicrawl.main()
|
|
||||||
input('<enter> to exit')
|
|
||||||
|
|
60
lib/app_skellington/README.md
Normal file
60
lib/app_skellington/README.md
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
app_skellington
|
||||||
|
===============
|
||||||
|
Application framework for Python, features include:
|
||||||
|
* Pain-free multi-level command menu: Register classes
|
||||||
|
* Simple to define services and automatic dependency injection based on name (with custom invocation as an option)
|
||||||
|
* INI-style config and and validation (provided through ConfigObj)
|
||||||
|
* Colored logging (provided through colorlog)
|
||||||
|
* Works on Linux, Windows, and Mac
|
||||||
|
|
||||||
|
Principles:
|
||||||
|
* Lend to creating beautiful, easy to read and understand code in the application.
|
||||||
|
* Minimize coupling of applications to this framework.
|
||||||
|
* Compatable with Linux, Windows, and Mac. Try to be compatible as possible otherwise.
|
||||||
|
* Try to be compatible with alternate Python runtimes such as PyPy.
|
||||||
|
|
||||||
|
Application Configuration
|
||||||
|
-------------------------
|
||||||
|
Site configurations are supported through configobj. There is a config.spec
|
||||||
|
in the src directory which is a validation file; it contains the accepted
|
||||||
|
parameter names, types, and limits for configurable options in the
|
||||||
|
application which is built on app_skellington. The format is multi-level .ini syntax.
|
||||||
|
|
||||||
|
See the configobj documentation for more information.
|
||||||
|
|
||||||
|
Site configuration files (config.ini) are created if they don't exit. The
|
||||||
|
file always contains the full specification of parameters; i.e. even default
|
||||||
|
parameters are added into the config file.
|
||||||
|
|
||||||
|
Linux:
|
||||||
|
|
||||||
|
/home/\<user\>/.config/\<app_name\>/config.ini
|
||||||
|
|
||||||
|
/home/\<user\>/.cache/\<app_name\>/log/\<app_name\>.log
|
||||||
|
|
||||||
|
Windows:
|
||||||
|
|
||||||
|
C:\Users\\\<user>\\\<app_name\>\\Local\\\<app_name\>\\config.ini
|
||||||
|
|
||||||
|
C:\Users\\\<user>\\\<app_name\>\\Local\\\<app_name\>\\Logs\\\<app_name\>.log
|
||||||
|
|
||||||
|
Application configuration can be overridden ad-hoc through the --config <filename>
|
||||||
|
argument.
|
||||||
|
|
||||||
|
Debug - Turn on Logging
|
||||||
|
---------------------------
|
||||||
|
Set 'APPSKELLINGTON_ENABLE_LOGGING' environment variable to any value which turns
|
||||||
|
on AppSkellington-level logging. For example,
|
||||||
|
|
||||||
|
APPSKELLINGTON_ENABLE_LOGGING=true ./runme
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
export APPSKELLINGTON_ENABLE_LOGGING=1
|
||||||
|
./runme
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
See official website: https://zavage-software.com
|
||||||
|
Please report bugs, improvements, or feedback! <contact>
|
||||||
|
|
11
lib/app_skellington/__init__.py
Normal file
11
lib/app_skellington/__init__.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
|
||||||
|
APP_CONFIG_FILENAME = 'config.ini' # Relative to user directory on machine
|
||||||
|
APP_CONFIGSPEC_FILENAME = 'config.spec' # Relative to module source directory
|
||||||
|
|
||||||
|
from .app_container import *
|
||||||
|
from .cfg import *
|
||||||
|
from .cli import *
|
||||||
|
from .log import *
|
||||||
|
|
31
lib/app_skellington/_bootstrap.py
Normal file
31
lib/app_skellington/_bootstrap.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Check and gracefully fail if the user needs to install a 3rd-party dep.
|
||||||
|
libnames = ['appdirs', 'configobj', 'colorlog']
|
||||||
|
def check_env_has_dependencies(libnames):
|
||||||
|
rc = True
|
||||||
|
for libname in libnames:
|
||||||
|
try:
|
||||||
|
__import__(libname)
|
||||||
|
except ModuleNotFoundError as ex:
|
||||||
|
print('missing third-part library: ', ex, file=sys.stderr)
|
||||||
|
rc = False
|
||||||
|
return rc
|
||||||
|
if not check_env_has_dependencies(libnames):
|
||||||
|
print('refusing to load program without installed dependencies', file=sys.stderr)
|
||||||
|
raise ImportError('python environment needs third-party dependencies installed')
|
||||||
|
|
||||||
|
# Logger for before the application and logging config is loaded
|
||||||
|
# - used to log before logging is configured
|
||||||
|
_log_fmt = '%(levelname)-7s:%(message)s'
|
||||||
|
_logger_name = 'app_skellington'
|
||||||
|
_bootstrap_logger = logging.getLogger(_logger_name)
|
||||||
|
_bootstrap_logger.setLevel(1000)
|
||||||
|
_bootstrap_logger.propagate = False
|
||||||
|
|
||||||
|
# NOTE(MG) Pretty sure the logger has the default handler too at this point.
|
||||||
|
# It's been related to some issues with the logger double-printing messages.
|
||||||
|
_bootstrap_logger.addHandler(logging.NullHandler())
|
||||||
|
|
116
lib/app_skellington/_util.py
Normal file
116
lib/app_skellington/_util.py
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
from __future__ import print_function
|
||||||
|
import inspect
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from . import _util
|
||||||
|
|
||||||
|
def eprint(*args, **kwargs):
|
||||||
|
"""
|
||||||
|
Print to STDERR stream.
|
||||||
|
"""
|
||||||
|
print(*args, file=sys.stderr, **kwargs)
|
||||||
|
|
||||||
|
def filename_to_abspath(filename):
|
||||||
|
"""
|
||||||
|
Converts a filename to it's absolute path. If it's already an
|
||||||
|
absolute path, do nothing.
|
||||||
|
"""
|
||||||
|
return os.path.abspath(filename)
|
||||||
|
|
||||||
|
def does_file_exist(filepath):
|
||||||
|
"""
|
||||||
|
Because the file can be deleted or created immediately after execution of
|
||||||
|
this function, there cannot be guarantees made around the existence of
|
||||||
|
said file (race condition). This merely says if the file existed at this
|
||||||
|
instant in execution.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
fp = open(filepath, 'r')
|
||||||
|
return True
|
||||||
|
except FileNotFoundError as ex:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def ensure_dir_exists(dirpath):
|
||||||
|
if dirpath is None:
|
||||||
|
return
|
||||||
|
if dirpath == '':
|
||||||
|
return
|
||||||
|
os.makedirs(dirpath, exist_ok=True)
|
||||||
|
|
||||||
|
def get_root_asset(filepath):
|
||||||
|
"""
|
||||||
|
Attempts to locate a resource or asset shipped with the application.
|
||||||
|
Searches starting at the root module (__main__) which should be the
|
||||||
|
python file initially invoked.
|
||||||
|
"""
|
||||||
|
module_root =\
|
||||||
|
os.path.abspath(
|
||||||
|
os.path.dirname(
|
||||||
|
sys.modules['__main__'].__file__))
|
||||||
|
path = os.path.join(module_root, filepath)
|
||||||
|
return path
|
||||||
|
|
||||||
|
def get_asset(module, filepath):
|
||||||
|
"""
|
||||||
|
Attempts to locate a resource or asset shipped with the application.
|
||||||
|
Input filename is relative to the caller code, i.e. this starts
|
||||||
|
searching relative to the file that called this function.
|
||||||
|
|
||||||
|
Returns the full absolute path of the located file if found or None
|
||||||
|
|
||||||
|
Args:
|
||||||
|
module: Pass in the module (or __name__) to search relative to module
|
||||||
|
filepath: the relative filepath of the file to look for in the
|
||||||
|
package directory.
|
||||||
|
"""
|
||||||
|
if isinstance(module, str):
|
||||||
|
module_file = sys.modules[module].__file__
|
||||||
|
elif isinstance(module, module):
|
||||||
|
module_file = module.__file__
|
||||||
|
else:
|
||||||
|
raise Exception('Invalid Usage')
|
||||||
|
|
||||||
|
try:
|
||||||
|
root = module_file
|
||||||
|
|
||||||
|
if os.path.islink(root):
|
||||||
|
root = os.path.realpath(root)
|
||||||
|
|
||||||
|
root = os.path.dirname(os.path.abspath(root))
|
||||||
|
except Exception as ex:
|
||||||
|
raise
|
||||||
|
|
||||||
|
path = os.path.join(root, filepath)
|
||||||
|
return path
|
||||||
|
|
||||||
|
def register_class_as_commands(app, submenu, cls_object):
|
||||||
|
"""
|
||||||
|
Registers commands for each class method. e.g.: pass in the CLI
|
||||||
|
object, the target submenu, and the class to be registered, and
|
||||||
|
this will create a command-line menu item for each method in
|
||||||
|
the class.
|
||||||
|
|
||||||
|
IMPORTANT: Currently, you need to pass in only a class and not
|
||||||
|
an object/instance of a class.
|
||||||
|
"""
|
||||||
|
cls_constructor = cls_object
|
||||||
|
members = inspect.getmembers(cls_object)
|
||||||
|
for m in members:
|
||||||
|
name = m[0]
|
||||||
|
ref = m[1]
|
||||||
|
if inspect.isfunction(ref) and not name.startswith('_'):
|
||||||
|
cls_method = ref
|
||||||
|
constructor = app._inject_service_dependencies(cls_constructor)
|
||||||
|
sig = inspect.signature(cls_method)
|
||||||
|
func = create_func(constructor, cls_method)
|
||||||
|
# docstring = cls_method.__doc__
|
||||||
|
docstring = inspect.getdoc(cls_method)
|
||||||
|
submenu.register_command(func, name, sig, docstring)
|
||||||
|
|
||||||
|
def create_func(constructor, cls_method):
|
||||||
|
def func(*args, **kwargs):
|
||||||
|
obj = constructor()
|
||||||
|
return cls_method(obj, *args, **kwargs)
|
||||||
|
return func
|
||||||
|
|
202
lib/app_skellington/app_container.py
Normal file
202
lib/app_skellington/app_container.py
Normal file
@ -0,0 +1,202 @@
|
|||||||
|
import appdirs
|
||||||
|
import collections
|
||||||
|
import functools
|
||||||
|
import inspect
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Application scaffolding:
|
||||||
|
from ._bootstrap import _bootstrap_logger
|
||||||
|
from . import log
|
||||||
|
from . import _util
|
||||||
|
from . import cli
|
||||||
|
from . import cfg
|
||||||
|
|
||||||
|
DEFAULT_APP_NAME = 'python-app'
|
||||||
|
DEFAULT_APP_AUTHOR = 'John Doe'
|
||||||
|
|
||||||
|
|
||||||
|
# OPTIONAL: classes can sub-class from this?
|
||||||
|
class Components:
|
||||||
|
def inject_dependencies_based_on_names_in_args(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def inject_dependency(self, name):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def register_dependency(self, service, name):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class ApplicationContext:
|
||||||
|
"""
|
||||||
|
Container for application-wide state; i.e. app configuration and loggers.
|
||||||
|
"""
|
||||||
|
def __init__(self, config, log):
|
||||||
|
self.config = config
|
||||||
|
self.log = log
|
||||||
|
self.parsed_argv = None
|
||||||
|
self.parsed_argv_unknown = None
|
||||||
|
|
||||||
|
class ApplicationContainer:
|
||||||
|
"""
|
||||||
|
Generalized application functionality. Used for linking components and modules of the application
|
||||||
|
together. Invokes runtime configuration reading from file, maintains the
|
||||||
|
object instances for services, passes off to the cli to determine what to
|
||||||
|
do, and then injects any necessary dependencies (e.g. database module)
|
||||||
|
and kicks off the functionality requested in the cli.
|
||||||
|
"""
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
configspec_filepath=None,
|
||||||
|
config_filepath=None,
|
||||||
|
*args, **kwargs
|
||||||
|
):
|
||||||
|
# Instantiate root application context (container for globals)
|
||||||
|
if configspec_filepath is None:
|
||||||
|
configspec_filepath = self._get_configspec_filepath()
|
||||||
|
|
||||||
|
self.appname = kwargs.get('appname') or DEFAULT_APP_NAME
|
||||||
|
self.appauthor = kwargs.get('appauthor') or DEFAULT_APP_AUTHOR
|
||||||
|
|
||||||
|
self._dependencies = {}
|
||||||
|
|
||||||
|
config = cfg.Config(configspec_filepath)
|
||||||
|
config.load_config_from_file(config_filepath)
|
||||||
|
|
||||||
|
logger = log.LoggingLayer(self.appname, self.appauthor)
|
||||||
|
|
||||||
|
# added here, is this okay to do twice?
|
||||||
|
logger.configure_logging()
|
||||||
|
|
||||||
|
self.ctx = ApplicationContext(config, logger)
|
||||||
|
self['ctx'] = lambda: self.ctx
|
||||||
|
|
||||||
|
self.cli = cli.CommandTree() # Command-line interface
|
||||||
|
|
||||||
|
if callable(getattr(self, '_cli_options', None)):
|
||||||
|
self._cli_options()
|
||||||
|
if callable(getattr(self, '_services', None)):
|
||||||
|
self._services()
|
||||||
|
if callable(getattr(self, '_command_menu', None)):
|
||||||
|
self._command_menu()
|
||||||
|
|
||||||
|
def __delitem__(self, service_name):
|
||||||
|
"""
|
||||||
|
Deletes a service or dependency from the available dependencies.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
del self._dependencies[service_name]
|
||||||
|
except KeyError as ex:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __getitem__(self, service_name):
|
||||||
|
"""
|
||||||
|
Returns a factory of a service or dependency. The factory is a function
|
||||||
|
that is called to return an instance of the service object.
|
||||||
|
|
||||||
|
app_container['netezza'] => returns the netezza service instance
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
service_factory = self._dependencies[service_name] # Retrieve factory function
|
||||||
|
return service_factory() # Call factory() to return instance of service
|
||||||
|
except KeyError as ex:
|
||||||
|
msg = 'failed to inject service: {}'.format(service_name)
|
||||||
|
_bootstrap_logger.critical(msg)
|
||||||
|
_util.eprint(msg)
|
||||||
|
raise ServiceNotFound
|
||||||
|
|
||||||
|
def __setitem__(self, service_name, value):
|
||||||
|
"""
|
||||||
|
Register a service or dependency factory to return a service.
|
||||||
|
|
||||||
|
The factory function is called to return an instance of a service object.
|
||||||
|
"""
|
||||||
|
self._dependencies[service_name] = value
|
||||||
|
|
||||||
|
def _construct_model(self, model_constructor, *args):
|
||||||
|
"""
|
||||||
|
Performs dependency resolution and instantiates an object of given type.
|
||||||
|
|
||||||
|
This takes in the reference to a class constructor and a list of names
|
||||||
|
of the dependencies that need passed into it, constructs that object and
|
||||||
|
returns it. Models contain business logic and application functionality.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_constructor: reference to object constructor.
|
||||||
|
"""
|
||||||
|
dependency_names = args
|
||||||
|
dep_references = []
|
||||||
|
for dep_name in dependency_names:
|
||||||
|
dep_references.append(self[dep_name])
|
||||||
|
return model_constructor(*dep_references)
|
||||||
|
|
||||||
|
def _get_config_filepath(self, app_name, app_author, config_filename='config.ini'):
|
||||||
|
"""
|
||||||
|
Attempt to find config.ini in the user's config directory.
|
||||||
|
|
||||||
|
On Linux, this will be /home/<user>/.config/<app>/config.ini
|
||||||
|
On Windows, this will be C:\\Users\\<user>\\AppData\\Local\\<app>\\config.ini
|
||||||
|
"""
|
||||||
|
dirname = appdirs.user_config_dir(app_name, app_author)
|
||||||
|
filepath = os.path.join(dirname, config_filename)
|
||||||
|
_bootstrap_logger.info('default config filepath calculated to be: %s', filepath)
|
||||||
|
return filepath
|
||||||
|
|
||||||
|
def _get_configspec_filepath(self, configspec_filename='config.spec'):
|
||||||
|
"""
|
||||||
|
Attempt to find config.spec inside the installed package directory.
|
||||||
|
"""
|
||||||
|
return _util.get_root_asset(configspec_filename)
|
||||||
|
|
||||||
|
def _inject_service_dependencies(self, constructor):
|
||||||
|
"""
|
||||||
|
Returns a function that, when called, constructs a new object for
|
||||||
|
business/application logic with the listed dependencies.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
constructor: service class to be created object.
|
||||||
|
"""
|
||||||
|
sig = inspect.signature(constructor.__init__)
|
||||||
|
params = sig.parameters
|
||||||
|
params = [params[paramname].name for paramname in params] # Convert Param() type => str
|
||||||
|
cls_dependencies = params[1:] # Skip 'self' parameter on class methods.
|
||||||
|
|
||||||
|
return functools.partial(self._construct_model, constructor, *cls_dependencies)
|
||||||
|
|
||||||
|
def load_command(self):
|
||||||
|
args, unk, success = self.cli.parse()
|
||||||
|
if not success:
|
||||||
|
return False
|
||||||
|
self.ctx.parsed_argv = args
|
||||||
|
self.ctx.parsed_argv_unknown = unk
|
||||||
|
return True
|
||||||
|
|
||||||
|
def invoke_command(self):
|
||||||
|
rc = self.load_command()
|
||||||
|
if not rc:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
self.cli.run_command()
|
||||||
|
except NoCommandSpecified as ex:
|
||||||
|
print('Failure: No command specified.')
|
||||||
|
|
||||||
|
def interactive_shell(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def invoke_from_cli(self):
|
||||||
|
self.invoke_command()
|
||||||
|
|
||||||
|
def usage(self):
|
||||||
|
pass
|
||||||
|
# Applications need a default usage
|
||||||
|
|
||||||
|
class ServiceNotFound(Exception):
|
||||||
|
"""
|
||||||
|
Application framework error: unable to find and inject dependency.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
class NoCommandSpecified(Exception):
|
||||||
|
pass
|
||||||
|
|
184
lib/app_skellington/cfg.py
Normal file
184
lib/app_skellington/cfg.py
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
import appdirs
|
||||||
|
|
||||||
|
from . import _util
|
||||||
|
import argparse
|
||||||
|
import configobj
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import validate
|
||||||
|
|
||||||
|
from ._bootstrap import _bootstrap_logger
|
||||||
|
from . import _util
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
"""
|
||||||
|
Structure to store application runtime configuration. Also contains
|
||||||
|
functionality to load configuration from local site file.
|
||||||
|
"""
|
||||||
|
def __init__(self, configspec_filepath=None):
|
||||||
|
self.config_obj = None
|
||||||
|
self._config_filepaths = []
|
||||||
|
self._configspec_filepath = None
|
||||||
|
self.configspec_filepath = configspec_filepath
|
||||||
|
|
||||||
|
def __delitem__(self, key):
|
||||||
|
"""
|
||||||
|
Deletes the configuration item identified by <key> in the internal
|
||||||
|
configuration storage.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
del self[key]
|
||||||
|
except KeyError as ex:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
"""
|
||||||
|
Returns the vaLue of the configuration item identified by <key>.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return self.config_obj[key].dict()
|
||||||
|
except KeyError as ex:
|
||||||
|
# raise ConfigurationItemNotFoundError()
|
||||||
|
raise
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
"""
|
||||||
|
Assigns the value of the configuration item
|
||||||
|
identified by <key> as <value>.
|
||||||
|
"""
|
||||||
|
self[key] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def config_filepath(self, idx=0):
|
||||||
|
"""
|
||||||
|
Returns the config filepath (optionally specified by index
|
||||||
|
when using multiple config files).
|
||||||
|
"""
|
||||||
|
assert idx>=0, 'invalid idx argument: index must be greater than 0'
|
||||||
|
if len(self._config_filepaths) > 0:
|
||||||
|
try:
|
||||||
|
return self._config_filepaths[idx]
|
||||||
|
except ValueError as ex:
|
||||||
|
return
|
||||||
|
|
||||||
|
@config_filepath.setter
|
||||||
|
def config_filepath(self, value, idx=0):
|
||||||
|
"""
|
||||||
|
Assigns <value> as the config filepath (optionally specified by index
|
||||||
|
when using multiple config files).
|
||||||
|
"""
|
||||||
|
assert idx>=0, 'invalid idx argument: index must be greater than 0'
|
||||||
|
self._config_filepaths[idx] = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def configspec_filepath(self):
|
||||||
|
return self._configspec_filepath
|
||||||
|
|
||||||
|
@configspec_filepath.setter
|
||||||
|
def configspec_filepath(self, filepath):
|
||||||
|
if _util.does_file_exist(filepath):
|
||||||
|
self._configspec_filepath = filepath
|
||||||
|
else:
|
||||||
|
_bootstrap_logger.error(
|
||||||
|
'failed to set config.spec: file not found '
|
||||||
|
'(%s)', filepath)
|
||||||
|
|
||||||
|
def load_config_from_file(self, config_filepath):
|
||||||
|
"""
|
||||||
|
Loads configuration settings from file, overwritting all configuration.
|
||||||
|
"""
|
||||||
|
# Record all config.ini files passed in
|
||||||
|
if config_filepath not in self._config_filepaths:
|
||||||
|
self._config_filepaths.append(config_filepath)
|
||||||
|
|
||||||
|
# Check for config.spec
|
||||||
|
if self.configspec_filepath:
|
||||||
|
_bootstrap_logger.info('using config.spec: %s', self.configspec_filepath)
|
||||||
|
else:
|
||||||
|
_bootstrap_logger.info('config.spec not defined')
|
||||||
|
_bootstrap_logger.info('using config file: %s', config_filepath)
|
||||||
|
|
||||||
|
# Pre-check for config.ini existence
|
||||||
|
if _util.does_file_exist(config_filepath):
|
||||||
|
_bootstrap_logger.info('existing config file found')
|
||||||
|
else:
|
||||||
|
_bootstrap_logger.info('no config file found: using defaults')
|
||||||
|
|
||||||
|
# interpolation='template' changes config file variable replacement to
|
||||||
|
# use the form $var instead of %(var)s, which is useful to enable
|
||||||
|
# literal %(text)s values in the config.
|
||||||
|
try:
|
||||||
|
configspec_filepath = self.configspec_filepath
|
||||||
|
if configspec_filepath:
|
||||||
|
self.config_obj = configobj.ConfigObj(
|
||||||
|
config_filepath,
|
||||||
|
configspec=configspec_filepath,
|
||||||
|
interpolation='template'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.config_obj = configobj.ConfigObj(
|
||||||
|
config_filepath,
|
||||||
|
# configspec=configspec_filepath,
|
||||||
|
interpolation='template'
|
||||||
|
)
|
||||||
|
except configobj.ParseError as ex:
|
||||||
|
msg = 'failed to load config: error in config.spec configuration: {}'.format(config_filepath)
|
||||||
|
_bootstrap_logger.error(msg)
|
||||||
|
_util.eprint(msg)
|
||||||
|
return False
|
||||||
|
except OSError as ex:
|
||||||
|
msg = 'failed to load config: config.spec file not found'
|
||||||
|
_bootstrap_logger.error(msg)
|
||||||
|
_util.eprint(msg)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# Hack the configobj module to alter the interpolation for validate.py:
|
||||||
|
configobj.DEFAULT_INTERPOLATION = 'template'
|
||||||
|
self.config_obj.filename = config_filepath
|
||||||
|
|
||||||
|
|
||||||
|
if self.configspec_filepath:
|
||||||
|
# Validate config.ini against config.spec
|
||||||
|
try:
|
||||||
|
_bootstrap_logger.info('validating config file against spec')
|
||||||
|
val = validate.Validator()
|
||||||
|
test = self.config_obj.validate(val, copy=True)
|
||||||
|
if test is not True:
|
||||||
|
_bootstrap_logger.critical('config file failed validation')
|
||||||
|
_bootstrap_logger.critical('config file errors: %s', test)
|
||||||
|
return False
|
||||||
|
except ValueError as ex:
|
||||||
|
_bootstrap_logger.error('failed validating configspec')
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Create the config file if it doesn't exist
|
||||||
|
# if not _util.does_file_exist(config_filepath):
|
||||||
|
if True:
|
||||||
|
_bootstrap_logger.info('writing new config file: %s', config_filepath)
|
||||||
|
dirname = os.path.dirname(config_filepath)
|
||||||
|
_util.ensure_dir_exists(dirname)
|
||||||
|
self.config_obj.write()
|
||||||
|
|
||||||
|
_bootstrap_logger.info('done loading config file')
|
||||||
|
return True
|
||||||
|
|
||||||
|
def print_config(self):
|
||||||
|
"""
|
||||||
|
Print configuration to stdout.
|
||||||
|
"""
|
||||||
|
print('config:')
|
||||||
|
|
||||||
|
self.config_obj.walk(print)
|
||||||
|
for section in self.config_obj.sections:
|
||||||
|
print(section)
|
||||||
|
for key in self.config_obj[section]:
|
||||||
|
print(' ', self.config_obj[section][key])
|
||||||
|
|
||||||
|
class EnvironmentVariables:
|
||||||
|
def __init__(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
class ConfigurationItemNotFoundError(Exception):
|
||||||
|
pass
|
||||||
|
|
540
lib/app_skellington/cli.py
Normal file
540
lib/app_skellington/cli.py
Normal file
@ -0,0 +1,540 @@
|
|||||||
|
import argparse
|
||||||
|
import inspect
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import app_skellington
|
||||||
|
from ._bootstrap import _bootstrap_logger
|
||||||
|
from . import app_container
|
||||||
|
|
||||||
|
# If explicit fail is enabled, any command with at least one unknown
|
||||||
|
# argument will be rejected entirely. If not enabled, unknown arguments
|
||||||
|
# will be ignored.
|
||||||
|
EXPLICIT_FAIL_ON_UNKNOWN_ARGS = True
|
||||||
|
|
||||||
|
class CommandTree:
|
||||||
|
"""
|
||||||
|
Command-line interface to hold a menu of commands. You can register
|
||||||
|
commands (functions or methods) in a CommandTree which will generate
|
||||||
|
a corresponding argparse.ArgumentParser (and nested SubParsers) that
|
||||||
|
map function/method arguments into argparse Parameters. Then, you
|
||||||
|
can translate command-line arguments into invoking the function.
|
||||||
|
|
||||||
|
Commands must be registered before being invoked. You create nested
|
||||||
|
SubMenu(s). If function parameters have defaults, those will be
|
||||||
|
available for override else they use the function defaults.
|
||||||
|
|
||||||
|
Print helpful information:
|
||||||
|
|
||||||
|
./scriptname -h # View tier-0 help and usage doc
|
||||||
|
./scriptname [submenu] -h # View submenu help and usage doc
|
||||||
|
./scriptname [submenu] [command] -h # View command documentation and parameters
|
||||||
|
|
||||||
|
argparse is finicky about argument placement:
|
||||||
|
|
||||||
|
./scriptname
|
||||||
|
[application arguments]
|
||||||
|
[submenu] [submenu arguments]
|
||||||
|
[command] [command arguments]
|
||||||
|
|
||||||
|
For example,
|
||||||
|
|
||||||
|
./scriptname --option="value" [submenu] [command]
|
||||||
|
|
||||||
|
is different than
|
||||||
|
|
||||||
|
./scriptname [submenu] [command] --option="value"
|
||||||
|
|
||||||
|
in that option is being applied to the application in the first example and
|
||||||
|
applied to the refresh_datasets command (under the nhsn command group) in
|
||||||
|
the second. In the same way the -h, --help options print different docs
|
||||||
|
depending on where the help option was passed.
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
self.root_parser = argparse.ArgumentParser()
|
||||||
|
self.submenu_param = None # submenu_param is the variable name
|
||||||
|
# of the root submenu argument, i.e. the arg
|
||||||
|
# in root_parser which selects the submenu.
|
||||||
|
self.entries = {}
|
||||||
|
# NOTE(MG) Implementation note:
|
||||||
|
# CommandTree uses only one of these internal structures (i.e. mutually exclusive),
|
||||||
|
# 'entries' is used when there is a submenu linked to multiple commands.
|
||||||
|
# '_cmd_tree_is_single_command' and '_single_command' instead are used
|
||||||
|
# when the CommandTree is linked to one and only one command.
|
||||||
|
self._cmd_tree_is_single_command = False
|
||||||
|
self._single_command = None
|
||||||
|
|
||||||
|
def print_tree(self):
|
||||||
|
import pprint
|
||||||
|
pprint.pprint(self.entries)
|
||||||
|
|
||||||
|
def add_argument(self, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Adds an argument to the root parser.
|
||||||
|
"""
|
||||||
|
_bootstrap_logger.info('adding argument to root parser: %s and %s', args, kwargs)
|
||||||
|
self.root_parser.add_argument(*args, **kwargs)
|
||||||
|
|
||||||
|
def init_submenu(self, param_name, is_required=False):
|
||||||
|
"""
|
||||||
|
Creates a root-level submenu with no entries. SubMenu node is
|
||||||
|
returned which can have submenus and commands attached to it.
|
||||||
|
"""
|
||||||
|
# Creates an argument as a slot in the underlying argparse.
|
||||||
|
subparsers = self.root_parser.add_subparsers(
|
||||||
|
dest = param_name,
|
||||||
|
metavar = param_name,
|
||||||
|
required = is_required
|
||||||
|
)
|
||||||
|
|
||||||
|
submenu = SubMenu(self, subparsers, param_name)
|
||||||
|
submenu.submenu_path = ''
|
||||||
|
submenu.var_name = param_name
|
||||||
|
|
||||||
|
_bootstrap_logger.info('Initialized root-level submenu: Parameter = \'%s\'', param_name)
|
||||||
|
self.entries[param_name] = submenu
|
||||||
|
self.submenu_param = param_name
|
||||||
|
|
||||||
|
return submenu
|
||||||
|
|
||||||
|
def register_command(
|
||||||
|
self, func, cmd_name=None, func_signature=None,
|
||||||
|
docstring=None
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
When no submenu functionality is desired, this links a single
|
||||||
|
command into underlying argparse options.
|
||||||
|
"""
|
||||||
|
# begin copy-paste from SubMenu.register_command
|
||||||
|
if inspect.isfunction(func):
|
||||||
|
# print('func is function')
|
||||||
|
pass
|
||||||
|
elif inspect.ismethod(func):
|
||||||
|
pass
|
||||||
|
# print('func is method')
|
||||||
|
else:
|
||||||
|
raise Exception('bad value passed in for function')
|
||||||
|
|
||||||
|
if not cmd_name:
|
||||||
|
# safe try/except
|
||||||
|
cmd_name = func.__name__
|
||||||
|
|
||||||
|
if func_signature is None:
|
||||||
|
func_signature = inspect.signature(func)
|
||||||
|
|
||||||
|
if docstring is None:
|
||||||
|
docstring = func.__doc__
|
||||||
|
|
||||||
|
sig = func_signature
|
||||||
|
params = sig.parameters
|
||||||
|
|
||||||
|
# help is displayed next to the command in the submenu enumeration or
|
||||||
|
# list of commands:
|
||||||
|
help_text = HelpGenerator.generate_help_from_sig(docstring)
|
||||||
|
# description is displayed when querying help for the specific command:
|
||||||
|
description_text = HelpGenerator.generate_description_from_sig(docstring)
|
||||||
|
# end copy-paste from SubMenu.register_command
|
||||||
|
|
||||||
|
# begin copy-paste then editted from SubMenu.register_command
|
||||||
|
# For each paramter in the function create an argparse argument in
|
||||||
|
# the child ArgumentParser created for this menu entry:
|
||||||
|
for key in params:
|
||||||
|
if key == 'self':
|
||||||
|
continue
|
||||||
|
param = params[key]
|
||||||
|
|
||||||
|
if '=' in str(param):
|
||||||
|
if param.default is None:
|
||||||
|
helptext = 'default provided'
|
||||||
|
else:
|
||||||
|
helptext = "default = '{}'".format(param.default)
|
||||||
|
self.root_parser.add_argument(
|
||||||
|
key,
|
||||||
|
help=helptext,
|
||||||
|
nargs='?',
|
||||||
|
default=param.default)
|
||||||
|
else:
|
||||||
|
helptext = 'required'
|
||||||
|
self.root_parser.add_argument(
|
||||||
|
key,
|
||||||
|
help=helptext)
|
||||||
|
|
||||||
|
# # Wrapper function that instantiates an object and runs a method
|
||||||
|
# # on-demand. The object is created, injected with necessary
|
||||||
|
# # dependencies or services, and the method is invoked.
|
||||||
|
# def func(*args, **kwargs):
|
||||||
|
# obj = constructor()
|
||||||
|
# return cls_method(obj, *args, **kwargs)
|
||||||
|
|
||||||
|
# Build the CommandEntry structure
|
||||||
|
cmd = CommandEntry()
|
||||||
|
cmd.argparse_node = self.root_parser
|
||||||
|
cmd.cmd_name = cmd_name
|
||||||
|
cmd.func_signature = sig
|
||||||
|
# cmd.func_ref = None
|
||||||
|
cmd.callback = func
|
||||||
|
|
||||||
|
registered_name = cmd_name
|
||||||
|
_bootstrap_logger.info('registered command: %s', registered_name)
|
||||||
|
# end copy-paste then editted from SubMenu.register_command
|
||||||
|
|
||||||
|
self._cmd_tree_is_single_command = True
|
||||||
|
self._single_command = cmd
|
||||||
|
self._entries = None
|
||||||
|
|
||||||
|
# def _validate(self):
|
||||||
|
# pass
|
||||||
|
# # TODO(MG):
|
||||||
|
# # subparser can not be empty, needs to have parsers attached
|
||||||
|
|
||||||
|
def parse(self, args=None):
|
||||||
|
if args is None:
|
||||||
|
args = sys.argv[1:]
|
||||||
|
|
||||||
|
try:
|
||||||
|
# on error, prints some argparse error messages:
|
||||||
|
pargs, unk = self.root_parser.parse_known_args(args)
|
||||||
|
|
||||||
|
# if len(unk) > 0:
|
||||||
|
# _bootstrap_logger.error(
|
||||||
|
# 'failed to interpret argument(s) or command-line switch from shell: %s',
|
||||||
|
# unk)
|
||||||
|
|
||||||
|
# if EXPLICIT_FAIL_ON_UNKNOWN_ARGS:
|
||||||
|
# _bootstrap_logger.warn(
|
||||||
|
# 'failed to parse arguments: explicitly failing to be safe')
|
||||||
|
# return False, False
|
||||||
|
|
||||||
|
if hasattr(pargs, 'usage'):
|
||||||
|
pass
|
||||||
|
# print('found usage in app_skellington')
|
||||||
|
|
||||||
|
return pargs, unk, True
|
||||||
|
|
||||||
|
# Note: SystemExit is raised when '-h' argument is supplied.
|
||||||
|
except SystemExit as ex:
|
||||||
|
return None, None, False
|
||||||
|
|
||||||
|
def run_command(self, args=None):
|
||||||
|
args, unk, success = self.parse(args)
|
||||||
|
if not success:
|
||||||
|
_bootstrap_logger.info('SystemExit: Perhaps user invoked --help')
|
||||||
|
return
|
||||||
|
|
||||||
|
if args is False and unk is False:
|
||||||
|
_bootstrap_logger.error('failed parsing args')
|
||||||
|
return False
|
||||||
|
_bootstrap_logger.info('received args from shell: %s', args)
|
||||||
|
|
||||||
|
args = vars(args)
|
||||||
|
|
||||||
|
cmd = self._lookup_command(args)
|
||||||
|
if cmd is None:
|
||||||
|
print('cmd is None')
|
||||||
|
_bootstrap_logger.error('failed to find command')
|
||||||
|
return False
|
||||||
|
|
||||||
|
return self._invoke_command(cmd, args)
|
||||||
|
|
||||||
|
def _lookup_command(self, args):
|
||||||
|
keys = list(args.keys())
|
||||||
|
|
||||||
|
# In the case there is at-most one command registered in
|
||||||
|
# the CommandTree with no SubMenu (submenu will be disabled
|
||||||
|
# in this case):
|
||||||
|
if self._cmd_tree_is_single_command:
|
||||||
|
assert self._cmd_tree_is_single_command is True, 'corrupt data structure in CommandMenu'
|
||||||
|
assert self._entries is None, 'corrupt data structure in CommandMenu'
|
||||||
|
assert isinstance(self._single_command, CommandEntry), 'corrupt data structure in CommandMenu'
|
||||||
|
return self._single_command
|
||||||
|
|
||||||
|
# There is at least one submenu we need to go down:
|
||||||
|
else:
|
||||||
|
|
||||||
|
assert self._single_command is None, 'corrupt data structure in CommandMenu'
|
||||||
|
assert self._cmd_tree_is_single_command == False, 'corrupt data structure in CommandMenu'
|
||||||
|
|
||||||
|
# Key or variable name used by argparse to store the submenu options
|
||||||
|
argparse_param = self.submenu_param # e.g.: submenu_root
|
||||||
|
submenu = self.entries[argparse_param]
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if argparse_param not in keys:
|
||||||
|
print('root menu parameter not found in args:', argparse_param)
|
||||||
|
input('<broken>')
|
||||||
|
|
||||||
|
val = args.get(argparse_param)
|
||||||
|
_bootstrap_logger.debug('argparse command is \'{}\' = {}'.format(argparse_param, val))
|
||||||
|
|
||||||
|
lookup = submenu.entries.get(val)
|
||||||
|
_bootstrap_logger.debug('lookup, entries[{}] = {}'.format(val, lookup))
|
||||||
|
# print(submenu.entries)
|
||||||
|
|
||||||
|
# pop value
|
||||||
|
del args[argparse_param]
|
||||||
|
|
||||||
|
if isinstance(lookup, SubMenu):
|
||||||
|
submenu = lookup
|
||||||
|
argparse_param = submenu.var_name
|
||||||
|
elif isinstance(lookup, CommandEntry):
|
||||||
|
return lookup
|
||||||
|
# return self._invoke_command(lookup, args)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise app_container.NoCommandSpecified('No command specified.')
|
||||||
|
|
||||||
|
def _invoke_command(self, cmd, args):
|
||||||
|
func = cmd.callback
|
||||||
|
sig = cmd.func_signature
|
||||||
|
params = sig.parameters
|
||||||
|
params = [params[paramname] for paramname in params]
|
||||||
|
func_args = []
|
||||||
|
for param in params:
|
||||||
|
if param.name in args:
|
||||||
|
func_args.append(args[param.name])
|
||||||
|
|
||||||
|
_bootstrap_logger.info('function: %s', func)
|
||||||
|
_bootstrap_logger.info('function args: %s', func_args)
|
||||||
|
return func(*func_args)
|
||||||
|
|
||||||
|
def _get_subparser(self):
|
||||||
|
return self.root_parser._subparsers._actions[1]
|
||||||
|
|
||||||
|
class SubMenu:
|
||||||
|
def __init__(self, parent, subparsers_obj, name):
|
||||||
|
self.parent = parent # Reference to root CommandTree
|
||||||
|
self.subparsers_obj = subparsers_obj
|
||||||
|
self.name = name
|
||||||
|
self.submenu_path = None
|
||||||
|
|
||||||
|
self.entries = {}
|
||||||
|
|
||||||
|
def register_command(
|
||||||
|
self, func, cmd_name=None, func_signature=None,
|
||||||
|
docstring=None
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Registers a command as an entry in this submenu. Provided function is
|
||||||
|
converted into argparse arguments and made available to the user.
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
func:
|
||||||
|
Callback function which will be mapped
|
||||||
|
to the submenu entry.
|
||||||
|
|
||||||
|
cmd_name (optional):
|
||||||
|
User-facing entry name. By default will be the function name.
|
||||||
|
The user will be able to use [cmd_name] [arg, ...] to
|
||||||
|
invoke the callback function.
|
||||||
|
|
||||||
|
func_signature: optionally, you can pass in the
|
||||||
|
inspect.signature(). If None, will inspect the
|
||||||
|
incoming func. Note on internals: This is used
|
||||||
|
to pass the function signature of the command
|
||||||
|
function while having the callback point to a
|
||||||
|
function partial which executes some other code.
|
||||||
|
This hook is used to inject dependencies and then
|
||||||
|
execute the command function.
|
||||||
|
"""
|
||||||
|
if inspect.isfunction(func):
|
||||||
|
# print('func is function')
|
||||||
|
pass
|
||||||
|
elif inspect.ismethod(func):
|
||||||
|
pass
|
||||||
|
# print('func is method')
|
||||||
|
else:
|
||||||
|
raise Exception('bad value passed in for function')
|
||||||
|
|
||||||
|
if not cmd_name:
|
||||||
|
# safe try/except
|
||||||
|
cmd_name = func.__name__
|
||||||
|
|
||||||
|
if func_signature is None:
|
||||||
|
func_signature = inspect.signature(func)
|
||||||
|
|
||||||
|
if docstring is None:
|
||||||
|
docstring = func.__doc__
|
||||||
|
|
||||||
|
sig = func_signature
|
||||||
|
params = sig.parameters
|
||||||
|
|
||||||
|
# help is displayed next to the command in the submenu enumeration or
|
||||||
|
# list of commands:
|
||||||
|
help_text = HelpGenerator.generate_help_from_sig(docstring)
|
||||||
|
# description is displayed when querying help for the specific command:
|
||||||
|
description_text = HelpGenerator.generate_description_from_sig(docstring)
|
||||||
|
|
||||||
|
# Entry in local argparse._SubParsersAction
|
||||||
|
# type = ArgumentParser
|
||||||
|
child_node = self.subparsers_obj.add_parser(
|
||||||
|
cmd_name, # Note: cmd_name here will be the VALUE
|
||||||
|
# passed into the argparse arg VARIABLE NAME
|
||||||
|
# created when the SubMenu/argparse.addZ_subparsers()
|
||||||
|
# was created.
|
||||||
|
help=help_text,
|
||||||
|
description=description_text
|
||||||
|
)
|
||||||
|
|
||||||
|
# For each paramter in the function create an argparse argument in
|
||||||
|
# the child ArgumentParser created for this menu entry:
|
||||||
|
for key in params:
|
||||||
|
if key == 'self':
|
||||||
|
continue
|
||||||
|
param = params[key]
|
||||||
|
|
||||||
|
if '=' in str(param):
|
||||||
|
if param.default is None:
|
||||||
|
helptext = 'default provided'
|
||||||
|
else:
|
||||||
|
helptext = "default = '{}'".format(param.default)
|
||||||
|
child_node.add_argument(
|
||||||
|
key,
|
||||||
|
help=helptext,
|
||||||
|
nargs='?',
|
||||||
|
default=param.default)
|
||||||
|
else:
|
||||||
|
helptext = 'required'
|
||||||
|
child_node.add_argument(
|
||||||
|
key,
|
||||||
|
help=helptext)
|
||||||
|
|
||||||
|
# # Wrapper function that instantiates an object and runs a method
|
||||||
|
# # on-demand. The object is created, injected with necessary
|
||||||
|
# # dependencies or services, and the method is invoked.
|
||||||
|
# def func(*args, **kwargs):
|
||||||
|
# obj = constructor()
|
||||||
|
# return cls_method(obj, *args, **kwargs)
|
||||||
|
|
||||||
|
# Build the CommandEntry structure
|
||||||
|
cmd = CommandEntry()
|
||||||
|
cmd.argparse_node = child_node
|
||||||
|
cmd.cmd_name = cmd_name
|
||||||
|
cmd.func_signature = sig
|
||||||
|
# cmd.func_ref = None
|
||||||
|
cmd.callback = func
|
||||||
|
|
||||||
|
registered_name = '{}.{}'.format(
|
||||||
|
self.submenu_path,
|
||||||
|
cmd_name)
|
||||||
|
_bootstrap_logger.info('registered command: %s', registered_name)
|
||||||
|
self.entries[cmd_name] = cmd
|
||||||
|
|
||||||
|
def create_submenu(
|
||||||
|
self, var_name, cmd_entry_name=None, is_required=False
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Creates a child-submenu.
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
var_name:
|
||||||
|
A code-facing argparse parameter used to store the
|
||||||
|
value/entry chosen by the user.
|
||||||
|
|
||||||
|
cmd_entry_name:
|
||||||
|
A user-facing name used to select created submenu.
|
||||||
|
If not provided, the user-facing command name defaults
|
||||||
|
to the same name as the code-facing argparse parameter
|
||||||
|
|
||||||
|
is_required:
|
||||||
|
Switches if a value must be selected in the created submenu.
|
||||||
|
If not, it's an optional positional argument.
|
||||||
|
"""
|
||||||
|
if cmd_entry_name is None:
|
||||||
|
cmd_entry_name = var_name
|
||||||
|
|
||||||
|
# Create an entry in self's submenu:
|
||||||
|
# type = ArgumentParser
|
||||||
|
entry_node = self.subparsers_obj.add_parser(
|
||||||
|
cmd_entry_name,
|
||||||
|
help='sub-submenu help',
|
||||||
|
description='sub-sub description')
|
||||||
|
|
||||||
|
# Turn entry into a submenu of it's own:
|
||||||
|
# type = _SubParsersAction
|
||||||
|
subp_node = entry_node.add_subparsers(
|
||||||
|
dest = var_name,
|
||||||
|
metavar = var_name,
|
||||||
|
required = is_required)
|
||||||
|
|
||||||
|
submenu = SubMenu(
|
||||||
|
self.parent,
|
||||||
|
subp_node,
|
||||||
|
cmd_entry_name)
|
||||||
|
|
||||||
|
submenu.var_name = var_name
|
||||||
|
|
||||||
|
submenu.submenu_path = '{}.{}'.format(self.submenu_path, cmd_entry_name)
|
||||||
|
submenu_name = submenu.submenu_path
|
||||||
|
|
||||||
|
_bootstrap_logger.info('registered submenu: %s', submenu_name)
|
||||||
|
self.entries[cmd_entry_name] = submenu
|
||||||
|
return submenu
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'SubMenu({})<{}>'.format(
|
||||||
|
self.name,
|
||||||
|
','.join(['cmds'])
|
||||||
|
)
|
||||||
|
|
||||||
|
class CommandEntry:
|
||||||
|
"""
|
||||||
|
Structure for a command-entry in the CLI.
|
||||||
|
|
||||||
|
Stores the command-subcommand names, the function signature which contains
|
||||||
|
the original parameters of the function-to-be-invoked, a reference to the
|
||||||
|
original function, and a callback function wrapper which, by convention,
|
||||||
|
instantiates the necessary objects (injecting dependencies, etc.) and
|
||||||
|
executes the original function.
|
||||||
|
|
||||||
|
The CLI module has functionality to translate the original function
|
||||||
|
arguments into argparse options (creating the documentation also). Similary,
|
||||||
|
it can convert from argparse options into a function call.
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
self.argparse_node = None
|
||||||
|
|
||||||
|
self.cmd_name = None # Don't think we need. And needs to be changed
|
||||||
|
# from SubMenu
|
||||||
|
self.menu_path = None
|
||||||
|
self.func_signature = None
|
||||||
|
self.func_ref = None
|
||||||
|
self.callback = None
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'CommandEntry<{}>'.format(self.cmd_name)
|
||||||
|
|
||||||
|
class HelpGenerator:
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def generate_help_from_sig(doctext):
|
||||||
|
"""
|
||||||
|
The 'help' text is displayed next to the command when enumerating
|
||||||
|
the submenu commands.
|
||||||
|
"""
|
||||||
|
if doctext == None:
|
||||||
|
return doctext
|
||||||
|
regex = '(.*?)[.?!]'
|
||||||
|
match = re.match(regex, doctext, re.MULTILINE | re.DOTALL)
|
||||||
|
if match:
|
||||||
|
return match.group(1) + '.'
|
||||||
|
return doctext
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def generate_description_from_sig(doctext):
|
||||||
|
"""
|
||||||
|
The 'description' paragraph is provided when the user requests help
|
||||||
|
on a specific command.
|
||||||
|
"""
|
||||||
|
if doctext == None:
|
||||||
|
return doctext
|
||||||
|
regex = '(.*?)[.?!]'
|
||||||
|
match = re.match(regex, doctext, re.MULTILINE | re.DOTALL)
|
||||||
|
if match:
|
||||||
|
return match.group(1) + '.'
|
||||||
|
return doctext
|
||||||
|
|
165
lib/app_skellington/log.py
Normal file
165
lib/app_skellington/log.py
Normal file
@ -0,0 +1,165 @@
|
|||||||
|
import appdirs
|
||||||
|
import colorlog
|
||||||
|
import logging
|
||||||
|
import logging.config
|
||||||
|
import os
|
||||||
|
|
||||||
|
from ._bootstrap import _bootstrap_logger
|
||||||
|
from . import _util
|
||||||
|
|
||||||
|
DEFAULT_LOG_SETTINGS = {
|
||||||
|
'formatters': {
|
||||||
|
'colored': {
|
||||||
|
'class': 'colorlog.ColoredFormatter',
|
||||||
|
# 'format': '%(log_color)s%(levelname)-8s%(reset)s:%(log_color)s%(name)-5s%(reset)s:%(white)s%(message)s'
|
||||||
|
'format': '%(white)s%(name)7s%(reset)s|%(log_color)s%(message)s',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
'handlers': {
|
||||||
|
'stderr': {
|
||||||
|
'class': 'logging.StreamHandler',
|
||||||
|
'level': 'debug',
|
||||||
|
'formatter': 'colored'
|
||||||
|
}
|
||||||
|
|
||||||
|
},
|
||||||
|
|
||||||
|
'loggers': {
|
||||||
|
'root': {
|
||||||
|
'handlers': ['stderr',],
|
||||||
|
'level': 'debug'
|
||||||
|
},
|
||||||
|
'app_skellington': {
|
||||||
|
# 'handlers': ['stderr',],
|
||||||
|
'level': 'critical',
|
||||||
|
'propagate': 'false'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class LoggingLayer:
|
||||||
|
def __init__(self, appname, appauthor, config=None):
|
||||||
|
self.appname = appname
|
||||||
|
self.appauthor = appauthor
|
||||||
|
self.loggers = {}
|
||||||
|
|
||||||
|
def __getitem__(self, k):
|
||||||
|
"""
|
||||||
|
Returns Logger object named <k>.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
log = LoggingLayer(...)
|
||||||
|
log['db'].info('loaded database module')
|
||||||
|
|
||||||
|
Args:
|
||||||
|
k: the name of the logger to retrieve (k, i.e. key)
|
||||||
|
"""
|
||||||
|
logger = self.loggers.get(k)
|
||||||
|
if not logger:
|
||||||
|
logger = logging.getLogger(k)
|
||||||
|
self.loggers[k] = logger
|
||||||
|
return logger
|
||||||
|
|
||||||
|
def configure_logging(self, config_dict=None):
|
||||||
|
"""
|
||||||
|
Set the logging level for the process. Verbosity is controlled by a
|
||||||
|
parameter in the config.
|
||||||
|
|
||||||
|
Advice: While DEBUG verbosity is useful to debug, it can produce too much
|
||||||
|
noise for typical operation.
|
||||||
|
"""
|
||||||
|
if config_dict is None:
|
||||||
|
_bootstrap_logger.debug('No application logging configuration provided. Using default')
|
||||||
|
config_dict = DEFAULT_LOG_SETTINGS
|
||||||
|
|
||||||
|
self.transform_config(config_dict)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# TODO(MG) switch to pretty-print, as it'd be more human readable
|
||||||
|
_bootstrap_logger.debug('Log configuration: %s', config_dict)
|
||||||
|
logging.config.dictConfig(config_dict)
|
||||||
|
except Exception as ex:
|
||||||
|
print('unable to configure logging:', ex, type(ex))
|
||||||
|
|
||||||
|
def transform_config(self, config_dict):
|
||||||
|
"""
|
||||||
|
Fix some incompatibilities and differences between the config-file logging
|
||||||
|
parameters and the final config dictionary passed into the logging module.
|
||||||
|
"""
|
||||||
|
# Version should be hard-coded 1, per Python docs
|
||||||
|
if 'version' in config_dict:
|
||||||
|
if config_dict['version'] != 1:
|
||||||
|
_bootstrap_logger.warn("logging['version'] must be '1' per Python docs")
|
||||||
|
config_dict['version'] = 1
|
||||||
|
|
||||||
|
self._add_own_logconfig(config_dict)
|
||||||
|
|
||||||
|
# Replace logger level strings with value integers from module
|
||||||
|
for handler in config_dict['handlers']:
|
||||||
|
d = config_dict['handlers'][handler]
|
||||||
|
self._convert_str_to_loglevel(d, 'level')
|
||||||
|
|
||||||
|
# Replace logger level strings with value integers from module
|
||||||
|
for logger in config_dict['loggers']:
|
||||||
|
d = config_dict['loggers'][logger]
|
||||||
|
self._convert_str_to_loglevel(d, 'level')
|
||||||
|
|
||||||
|
# Replace 'root' logger with '', logging module convention for root handler
|
||||||
|
# Note: '' is disallowed in ConfigObj (hence the reason for this replacement)
|
||||||
|
config_dict['loggers'][''] = config_dict['loggers']['root']
|
||||||
|
del config_dict['loggers']['root']
|
||||||
|
|
||||||
|
|
||||||
|
# Evaluate the full filepath of the file handler
|
||||||
|
if 'file' not in config_dict['handlers']:
|
||||||
|
return
|
||||||
|
|
||||||
|
if os.path.abspath(config_dict['handlers']['file']['filename']) ==\
|
||||||
|
config_dict['handlers']['file']['filename']:
|
||||||
|
# Path is already absolute
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
dirname = appdirs.user_log_dir(self.appname, self.appauthor)
|
||||||
|
_util.ensure_dir_exists(dirname)
|
||||||
|
log_filepath = os.path.join(dirname, config_dict['handlers']['file']['filename'])
|
||||||
|
config_dict['handlers']['file']['filename'] = log_filepath
|
||||||
|
|
||||||
|
def _add_own_logconfig(self, config_dict):
|
||||||
|
if os.environ.get('APPSKELLINGTON_ENABLE_LOGGING', None):
|
||||||
|
if 'app_skellington' not in config_dict['loggers']:
|
||||||
|
config_dict['loggers']['app_skellington'] = {
|
||||||
|
'level': 'debug', 'propagate': 'false'
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
config_dict['loggers']['app_skellington']['level'] = 'debug'
|
||||||
|
|
||||||
|
def _convert_str_to_loglevel(self, dict_, key):
|
||||||
|
"""
|
||||||
|
Convert a dictionary value from a string representation of a log level
|
||||||
|
into the numeric value of that log level. The value is modified in-place
|
||||||
|
and is passed in by a dictionary reference and a key name.
|
||||||
|
|
||||||
|
For example,
|
||||||
|
d = {'loggers': {'cas': {'level': 'critical'}}}
|
||||||
|
convert_str_to_loglevel(d['loggers']['cas'], 'level')
|
||||||
|
=>
|
||||||
|
d is now {'loggers': {'cas': {'level': logging.CRITICAL}}}
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
s = dict_[key]
|
||||||
|
except KeyError as ex:
|
||||||
|
raise
|
||||||
|
if s == 'critical':
|
||||||
|
dict_[key] = logging.CRITICAL
|
||||||
|
elif s == 'error':
|
||||||
|
dict_[key] = logging.ERROR
|
||||||
|
elif s == 'warning':
|
||||||
|
dict_[key] = logging.WARNING
|
||||||
|
elif s == 'info':
|
||||||
|
dict_[key] = logging.INFO
|
||||||
|
elif s == 'debug':
|
||||||
|
dict_[key] = logging.DEBUG
|
||||||
|
elif s == 'all':
|
||||||
|
dict_[key] = logging.NOTSET
|
||||||
|
|
45
lib/setup.py
Normal file
45
lib/setup.py
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
#
|
||||||
|
# First, enable the python environment you want to install to, or if installing
|
||||||
|
# system-wide then ensure you're logged in with sufficient permissions
|
||||||
|
# (admin or root to install to system directories)
|
||||||
|
#
|
||||||
|
# installation:
|
||||||
|
#
|
||||||
|
# $ ./setup.py install
|
||||||
|
#
|
||||||
|
# de-installation:
|
||||||
|
#
|
||||||
|
# $ pip uninstall <app>
|
||||||
|
|
||||||
|
|
||||||
|
from setuptools import setup
|
||||||
|
|
||||||
|
__project__ = 'app_skellington'
|
||||||
|
__version__ = '0.1.0'
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name = __project__,
|
||||||
|
version = __version__,
|
||||||
|
description = 'A high-powered 2-level CLI framework',
|
||||||
|
author = 'Mathew Guest',
|
||||||
|
author_email = 'mathewguest@gmail.com',
|
||||||
|
url = 'https://git-mirror.zavage-software.com',
|
||||||
|
|
||||||
|
# Third-party dependencies; will be automatically installed
|
||||||
|
install_requires = (
|
||||||
|
'appdirs',
|
||||||
|
'configobj',
|
||||||
|
'colorlog',
|
||||||
|
'pprint',
|
||||||
|
),
|
||||||
|
|
||||||
|
# Local packages to be installed (our packages)
|
||||||
|
packages = (
|
||||||
|
'app_skellington',
|
||||||
|
),
|
||||||
|
|
||||||
|
)
|
||||||
|
|
6
road2philosophy.py
Executable file
6
road2philosophy.py
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
import wikicrawl
|
||||||
|
import settings
|
||||||
|
wikicrawl.start_app()
|
||||||
|
input('<enter> to exit')
|
||||||
|
|
31
setup.py
Normal file → Executable file
31
setup.py
Normal file → Executable file
@ -3,23 +3,42 @@
|
|||||||
# required third-party dependencies and package the app. You can also
|
# required third-party dependencies and package the app. You can also
|
||||||
# install the application system-wide.
|
# install the application system-wide.
|
||||||
|
|
||||||
from setuptools import setup
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
__project__ = 'wikicrawl'
|
__project__ = 'wikicrawl'
|
||||||
# If you're looking for a versioning scheme, one revered pattern
|
# If you're looking for a versioning scheme, one revered pattern
|
||||||
# can be read about at http://semver.org
|
# can be read about at http://semver.org
|
||||||
__version__ = '0.9.0'
|
__version__ = '0.9.0'
|
||||||
|
|
||||||
setup(name = __project__,
|
setup(
|
||||||
|
name = __project__,
|
||||||
version = __version__,
|
version = __version__,
|
||||||
description = '',
|
description = '',
|
||||||
author = '',
|
author = '',
|
||||||
author_email = '',
|
author_email = '',
|
||||||
url = '',
|
url = '',
|
||||||
install_requires = ('yandex.translate',
|
install_requires = (
|
||||||
|
'yandex.translate',
|
||||||
'selenium',
|
'selenium',
|
||||||
'colorlog',
|
'colorlog',
|
||||||
'baker'
|
'baker',
|
||||||
),
|
|
||||||
packages = ('wikicrawl',))
|
## Additional dependencies required from app_skellington:
|
||||||
|
'appdirs',
|
||||||
|
'configobj',
|
||||||
|
'colorlog',
|
||||||
|
'pprint'
|
||||||
|
),
|
||||||
|
packages = find_packages(
|
||||||
|
where='.',
|
||||||
|
include=('*',),
|
||||||
|
exclude=()
|
||||||
|
),
|
||||||
|
package_dir = {
|
||||||
|
'app_skellington': 'lib'
|
||||||
|
},
|
||||||
|
scripts = (
|
||||||
|
'road2philosophy.py',
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
@ -7,6 +7,5 @@
|
|||||||
# We export config.init() as a reference to wikicrawl.config.init() and
|
# We export config.init() as a reference to wikicrawl.config.init() and
|
||||||
# wikicrawl.main as a reference to wikicrawl.cli.main
|
# wikicrawl.main as a reference to wikicrawl.cli.main
|
||||||
|
|
||||||
from .config import init
|
from .app import start_app
|
||||||
from .main import main
|
|
||||||
|
|
||||||
|
87
wikicrawl/app.py
Normal file
87
wikicrawl/app.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
# The command-line interface module creates an interface for
|
||||||
|
# interacting with the python program (wikicrawl). This is an implementation
|
||||||
|
# of the baker demo shown previously. The user can type in commands to
|
||||||
|
# make the program do things.
|
||||||
|
|
||||||
|
import sys
|
||||||
|
if sys.platform == 'linux':
|
||||||
|
import readline # Needed for command history <up> and <down> arrows to work
|
||||||
|
|
||||||
|
from . import browser
|
||||||
|
from . import dal
|
||||||
|
from . import model
|
||||||
|
|
||||||
|
from app_skellington import _util
|
||||||
|
import app_skellington
|
||||||
|
from app_skellington._util import register_class_as_commands
|
||||||
|
|
||||||
|
# Problem pages:
|
||||||
|
# Decision (from politics)
|
||||||
|
# Malaysia (goes inside parenthesis)
|
||||||
|
# Soft-sediment_deformation_structures (doesn't find link)
|
||||||
|
# Chemicals (loops at philosophical)
|
||||||
|
|
||||||
|
class InteractiveInterface(app_skellington.ApplicationContainer):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
configspec_filepath = _util.get_asset(__name__, 'config.spec')
|
||||||
|
config_filepath = self._get_config_filepath(
|
||||||
|
'road2philosophy', '', 'config.ini'
|
||||||
|
)
|
||||||
|
|
||||||
|
super().__init__(
|
||||||
|
configspec_filepath=configspec_filepath,
|
||||||
|
config_filepath=config_filepath,
|
||||||
|
*args, **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
# Configure logging:
|
||||||
|
# log_config = self.ctx.config['logging']
|
||||||
|
# self.ctx.log.configure_logging(log_config)
|
||||||
|
# self.ctx.log.configure_logging()
|
||||||
|
|
||||||
|
def invoke_from_cli(self):
|
||||||
|
rc = self.load_command()
|
||||||
|
if not rc:
|
||||||
|
print('Invalid command. Try -h for usage')
|
||||||
|
return
|
||||||
|
# load config
|
||||||
|
self.invoke_command()
|
||||||
|
|
||||||
|
def usage(self):
|
||||||
|
s = '''
|
||||||
|
...usage info to come ;)...
|
||||||
|
'''
|
||||||
|
print(s)
|
||||||
|
|
||||||
|
def _cli_options(self):
|
||||||
|
self.cli.add_argument(
|
||||||
|
'--usage',
|
||||||
|
help='Prints program usage information',
|
||||||
|
action='store_true'
|
||||||
|
)
|
||||||
|
|
||||||
|
def _services(self):
|
||||||
|
self['ctx'] = lambda: self.ctx
|
||||||
|
|
||||||
|
self.dal = dal.DataLayer(self.ctx)
|
||||||
|
self['dal'] = lambda: self.dal
|
||||||
|
|
||||||
|
self.browser = browser.Browser(self.ctx)
|
||||||
|
self['browser'] = lambda: self.browser
|
||||||
|
|
||||||
|
self.model = model.Model(self.ctx, self.browser, self.dal)
|
||||||
|
self['model'] = lambda: self.model
|
||||||
|
|
||||||
|
def _command_menu(self):
|
||||||
|
sm_root = self.cli.init_submenu('command')
|
||||||
|
self.sm_root = sm_root
|
||||||
|
|
||||||
|
register_class_as_commands(
|
||||||
|
self, sm_root,
|
||||||
|
model.Model
|
||||||
|
)
|
||||||
|
|
||||||
|
def start_app(config=None):
|
||||||
|
x = InteractiveInterface()
|
||||||
|
x.invoke_from_cli()
|
||||||
|
|
@ -12,39 +12,44 @@
|
|||||||
import selenium
|
import selenium
|
||||||
import selenium.webdriver
|
import selenium.webdriver
|
||||||
|
|
||||||
from . import config
|
class Browser:
|
||||||
from . import log
|
def __init__(self, ctx):
|
||||||
|
self.ctx = ctx
|
||||||
|
|
||||||
# This function has a parameter (driver) that passes in a value. In this case,
|
# This function has a parameter (driver) that passes in a value. In this case,
|
||||||
# this driver variable defaults to the string 'chrome'. The code can call
|
# this driver variable defaults to the string 'chrome'. The code can call
|
||||||
# create_webdriver() which is the same as create_webdriver('chrome') but
|
# create_webdriver() which is the same as create_webdriver('chrome') but
|
||||||
# can alternatively call create_webdriver('firefox') and get different
|
# can alternatively call create_webdriver('firefox') and get different
|
||||||
# functionality.
|
# functionality.
|
||||||
def create_webdriver(driver='chrome'):
|
def create_webdriver(self, driver='chrome'):
|
||||||
if driver == 'chrome':
|
if driver == 'chrome':
|
||||||
return create_webdriver_chrome()
|
return self.create_webdriver_chrome()
|
||||||
elif driver == 'firefox':
|
elif driver == 'firefox':
|
||||||
return create_webdriver_firefox()
|
return self.create_webdriver_firefox()
|
||||||
elif driver == 'phantom':
|
elif driver == 'phantom':
|
||||||
return create_webdriver_phantom()
|
return self.create_webdriver_phantom()
|
||||||
else:
|
else:
|
||||||
log.LOGGER('browser').error('unable to handle webdriver request: %s' % driver)
|
log.LOGGER('browser').error('unable to handle webdriver request: %s' % driver)
|
||||||
return
|
return
|
||||||
|
|
||||||
def create_webdriver_firefox():
|
def create_webdriver_firefox(self):
|
||||||
profile = selenium.webdriver.FirefoxProfile()
|
profile = selenium.webdriver.FirefoxProfile()
|
||||||
profile.set_preference("general.useragent.override", config.obj.WEBDRIVER_USER_AGENT)
|
profile.set_preference(
|
||||||
|
"general.useragent.override",
|
||||||
|
self.ctx.config['app']['webdriver_user_agent'])
|
||||||
driver = selenium.webdriver.Firefox(profile)
|
driver = selenium.webdriver.Firefox(profile)
|
||||||
return driver
|
return driver
|
||||||
|
|
||||||
def create_webdriver_chrome():
|
def create_webdriver_chrome(self):
|
||||||
opt = selenium.webdriver.chrome.options.Options()
|
opt = selenium.webdriver.chrome.options.Options()
|
||||||
opt.add_argument('--user-agent=' + config.obj.WEBDRIVER_USER_AGENT)
|
opt.add_argument('--user-agent=' + self.ctx.config['app']['webdriver_user_agent'])
|
||||||
driver = selenium.webdriver.Chrome(executable_path=config.obj.CHROMEDRIVER_EXE,
|
driver = selenium.webdriver.Chrome(
|
||||||
chrome_options=opt)
|
executable_path=self.ctx.config['app']['chromedriver_exe'],
|
||||||
|
chrome_options=opt
|
||||||
|
)
|
||||||
return driver
|
return driver
|
||||||
|
|
||||||
def create_webdriver_phantom():
|
def create_webdriver_phantom(self):
|
||||||
driver = selenium.webdriver.PhantomJS()
|
driver = selenium.webdriver.PhantomJS()
|
||||||
return driver
|
return driver
|
||||||
|
|
||||||
|
203
wikicrawl/cli.py
203
wikicrawl/cli.py
@ -1,203 +0,0 @@
|
|||||||
# The command-line interface module creates an interface for
|
|
||||||
# interacting with the python program (wikicrawl). This is an implementation
|
|
||||||
# of the baker demo shown previously. The user can type in commands to
|
|
||||||
# make the program do things.
|
|
||||||
|
|
||||||
import baker
|
|
||||||
import sys
|
|
||||||
|
|
||||||
if sys.platform == 'linux':
|
|
||||||
import readline # Needed for command history <up> and <down> arrows to work
|
|
||||||
|
|
||||||
from . import log
|
|
||||||
from . import model
|
|
||||||
from . import config
|
|
||||||
|
|
||||||
# Problem pages:
|
|
||||||
# Decision (from politics)
|
|
||||||
# Malaysia (goes inside parenthesis)
|
|
||||||
# Soft-sediment_deformation_structures (doesn't find link)
|
|
||||||
# Chemicals (loops at philosophical)
|
|
||||||
|
|
||||||
commander = baker.Baker()
|
|
||||||
|
|
||||||
class InteractiveInterface:
|
|
||||||
def __init__(self):
|
|
||||||
# Instantiate the variable self.model as an object
|
|
||||||
# of instance of the Model class defined in the model
|
|
||||||
# module. model.Model refers to the Model class in the
|
|
||||||
# model module and this line creates a new variable (self.model)
|
|
||||||
# which is a variable that is an instance of Model, i.e.
|
|
||||||
# it has the type Model and has Model.methods() available
|
|
||||||
# to it.
|
|
||||||
#
|
|
||||||
# self.model is a variable that is attached to the instance/object
|
|
||||||
# returned by this constructor that has the type InteractiveInterface.
|
|
||||||
self.model = model.Model()
|
|
||||||
|
|
||||||
def run_command(self, args, main=True):
|
|
||||||
"""
|
|
||||||
Runs the command-line interface for a single command.
|
|
||||||
|
|
||||||
If called by InteractiveInterface.run(sys.argv), this method
|
|
||||||
will execute the commands and arguments specified on command
|
|
||||||
line when running this program. Alternatively, the code could
|
|
||||||
pass in a different set of arguments to specify what to do.
|
|
||||||
See start_command_loop() for more information.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
commander.run(argv=args, main=True, help_on_error=True,
|
|
||||||
instance=self)
|
|
||||||
except baker.CommandError as ex:
|
|
||||||
log.LOGGER['cli'].warn('incorrect user input: %s' % ex)
|
|
||||||
commander.usage()
|
|
||||||
except baker.TopHelp as ex:
|
|
||||||
commander.usage()
|
|
||||||
except Exception as ex:
|
|
||||||
log.LOGGER['cli'].error('caught general exception!!')
|
|
||||||
log.LOGGER['cli'].error(ex)
|
|
||||||
|
|
||||||
def start_command_loop(self):
|
|
||||||
"""
|
|
||||||
Repeatedly asks the user what command to run until they exit.
|
|
||||||
|
|
||||||
This method calls InteractiveInterface.run(args) a little bit
|
|
||||||
differently. Instead of passing the arguments from the command-line
|
|
||||||
that were passed in when invoking the python wikicrawl app,
|
|
||||||
this asks the user for a line of textual input and passes
|
|
||||||
those strings to run() as the arguments. This way, the user can
|
|
||||||
access an interactive shell and repeatedly issue different
|
|
||||||
commands while the application is running.
|
|
||||||
"""
|
|
||||||
commander.usage()
|
|
||||||
self.model.open_browser()
|
|
||||||
while True:
|
|
||||||
print('$ ', end = '') # Display to the user a command prompt
|
|
||||||
# The dollar-sign is a common indication
|
|
||||||
# of a shell that communicates to the user
|
|
||||||
# that we are waiting for their textual
|
|
||||||
# input. The end = '' indicates to python
|
|
||||||
# to NOT drop to a newline after printing
|
|
||||||
# in the terminal. Instead, let the user
|
|
||||||
# type their command on the same line as
|
|
||||||
# the printed '$ '.
|
|
||||||
try:
|
|
||||||
inp = input()
|
|
||||||
except EOFError: # <ctrl>+D will send "End Line" and exit the command loop
|
|
||||||
break
|
|
||||||
|
|
||||||
# Note on "arguments" (mg):
|
|
||||||
# Whenever a program is run in windows or *nix, the operating
|
|
||||||
# system passes in the command string that was used to invoke
|
|
||||||
# the program. You can append data in that command to configure
|
|
||||||
# switches or values going into the program on the fly. For
|
|
||||||
# example, you can invoke this wikicrawl app in more than one
|
|
||||||
# way. You can of course run "python launcher.py" to run the
|
|
||||||
# software but you can also pass in an argument. You can
|
|
||||||
# alternatively run "python launcher.py <argument> <argument>..."
|
|
||||||
# and the operating system will provide the <argument> values into
|
|
||||||
# the process that is running as variables.
|
|
||||||
#
|
|
||||||
# In a real world use case, many commands provide switches to
|
|
||||||
# adjust what the program does. For example,
|
|
||||||
#
|
|
||||||
# The command:
|
|
||||||
# find music -name "*justin*bieber*"
|
|
||||||
# runs the "find" program and asks to find all the filenames that match the
|
|
||||||
# pattern *justin*bieber* in the "music" directory.
|
|
||||||
# (music, -name, "*justin*biever*") are argument parameters
|
|
||||||
# that are passed into the program. The program is coded to
|
|
||||||
# parse and interpret these values and execute differently based
|
|
||||||
# on the values passed in. This is one way to pass in information
|
|
||||||
# into a running program. Some other ways are to read from a file
|
|
||||||
# (such as how we read from settings.py to load the runtime
|
|
||||||
# configuration), from something called environment variables
|
|
||||||
# (won't get into but another set of values provided to programs
|
|
||||||
# from the operating system), or they can be hard-coded into
|
|
||||||
# the application.
|
|
||||||
#
|
|
||||||
# Side note: arguments are not unique to python (almost all
|
|
||||||
# programming languages implement arguments), the functionality
|
|
||||||
# is defined by the application (some programs require arguments,
|
|
||||||
# some are optional, and the syntax for sending in argument
|
|
||||||
# parameters are different and defined by the individual programs,
|
|
||||||
# and lastly, the first argument sent in is the script name or
|
|
||||||
# filename of the script. In our case, the first argument is
|
|
||||||
# the string "launcher.py". If the user invoked the command
|
|
||||||
# as C:\Users\mguest\launcher.py then the first argument
|
|
||||||
# would be C:\Users\mguest\launcher.py.
|
|
||||||
|
|
||||||
# What this method (start_command_loop()) does is provide a
|
|
||||||
# REPL shell which is a
|
|
||||||
# read-eval-print-loop. It repeatedly asks the user for an
|
|
||||||
# input (read), evaluates that input into an action (evaluate),
|
|
||||||
# give the user some feedback (print), and start the process
|
|
||||||
# over again (loop). When you call just "python", you are loading a
|
|
||||||
# program that gives you a REPL interactive shell. The way
|
|
||||||
# this wikicrawl app is implemented gives the user a REPL
|
|
||||||
# that has commands to interact with wikipedia pages.
|
|
||||||
|
|
||||||
# Because we take in the input as a single string, we do
|
|
||||||
# a transformation to turn something like "do_random_page 5"
|
|
||||||
# into ["launcher.py", "do_random_page", "5"] which is how
|
|
||||||
# the arguments array would have been created if it were
|
|
||||||
# passed in the initial command instead of typed and interpretted
|
|
||||||
# as input as is done here.
|
|
||||||
args = [sys.argv[0], ] + inp.split()
|
|
||||||
|
|
||||||
# The user can at any point in the command pass the argument
|
|
||||||
# switch "--help". If doing this, the command line interface
|
|
||||||
# will instead print out the inline documentation associated
|
|
||||||
# with this command and quit after doing so. For example,
|
|
||||||
# the user can type "python launcher.py do_random_page --help"
|
|
||||||
# and the program will spit out the generated documentation
|
|
||||||
# for the do_random_page command and run nothing. In our case,
|
|
||||||
# this documentation is created by the baker library and will
|
|
||||||
# print out the docstring associated with the method. Try it
|
|
||||||
# out in your shell (cmd.exe or powershell.exe) by invoking
|
|
||||||
# python launcher.py do_random_page --help
|
|
||||||
# You will see the program spit out the heredoc below the
|
|
||||||
# do_random_page method defined below.
|
|
||||||
if '--help' in args:
|
|
||||||
args.remove('--help')
|
|
||||||
try:
|
|
||||||
print('command usage:')
|
|
||||||
commander.usage(args[1])
|
|
||||||
except Exception as ex:
|
|
||||||
print(type(ex), ex)
|
|
||||||
continue
|
|
||||||
|
|
||||||
self.run_command(args, main=False)
|
|
||||||
|
|
||||||
@commander.command
|
|
||||||
def play_specific_page(self, title):
|
|
||||||
pass
|
|
||||||
|
|
||||||
@commander.command
|
|
||||||
def play_random_page(self):
|
|
||||||
"""
|
|
||||||
Instructs the wikicrawl application to play the game on a random
|
|
||||||
article.
|
|
||||||
"""
|
|
||||||
self.model.play_random_page()
|
|
||||||
|
|
||||||
@commander.command
|
|
||||||
def play_multiple(self, n):
|
|
||||||
"""
|
|
||||||
Plays the wikicrawl game <n>-times.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
n = int(n)
|
|
||||||
except ValueError as ex:
|
|
||||||
log.LOGGER['cli'].warn('failed to process "%s" as a parameter' % n)
|
|
||||||
return False
|
|
||||||
for i in range(n):
|
|
||||||
self.model.play_random_page()
|
|
||||||
|
|
||||||
@commander.command
|
|
||||||
def exit(self):
|
|
||||||
"""
|
|
||||||
Immediately exit the program.
|
|
||||||
"""
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
@ -1,22 +0,0 @@
|
|||||||
# config module defines a place to store the external configuration/settings
|
|
||||||
# and is used to provide an interface to the runtime configuration for the
|
|
||||||
# program.
|
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from . import log
|
|
||||||
|
|
||||||
obj = {}
|
|
||||||
|
|
||||||
def init(settings_obj):
|
|
||||||
global obj
|
|
||||||
obj = settings_obj
|
|
||||||
|
|
||||||
find_chromedriver_path()
|
|
||||||
|
|
||||||
|
|
||||||
log.init_logging()
|
|
||||||
|
|
||||||
def find_chromedriver_path():
|
|
||||||
print(__file__)
|
|
||||||
|
|
76
wikicrawl/config.spec
Normal file
76
wikicrawl/config.spec
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
[app]
|
||||||
|
# Filepath parameters - THESE MUST EXIST OR PROGRAM WILL NOT RUN!!
|
||||||
|
log_filename = string(min=0, max=255, default='/tmp/wikicrawl.log')
|
||||||
|
sqlite_dbfile = string(min=0, max=255, default='/home/mathew/.wikicrawler.db')
|
||||||
|
chromedriver_exe = string(min=0, max=255, default='/usr/bin/chromedriver')
|
||||||
|
# CHROMEDRIVER_EXE = 'C:\\Users\\mathew\\windows-share\\dev\\wikicrawl\\chromedriver.exe'
|
||||||
|
|
||||||
|
# Application Parameters
|
||||||
|
do_breakpoints = boolean(default=False)
|
||||||
|
page_delay = integer(min=0, max=2000, default=0)
|
||||||
|
|
||||||
|
# Web Driver Parameters
|
||||||
|
webdriver_user_agent = string(min=0, max=255, default='Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)')
|
||||||
|
|
||||||
|
# Requested browser and webdriver dependencies are required for this to work.
|
||||||
|
# This means you need to have installed on your system:
|
||||||
|
# Chrome + WebDriver for Chrome
|
||||||
|
# Firefox + geckodriver for Firefox
|
||||||
|
# phantomjs for phantom
|
||||||
|
# Options are 'chrome', 'firefox', 'phantom'
|
||||||
|
webdriver_browser = string(min=1, max=63, default='chrome')
|
||||||
|
|
||||||
|
# Wikipedia Parameters
|
||||||
|
page_base_url = string(min=1, max=255, default='https://www.wikipedia.org/')
|
||||||
|
|
||||||
|
# Supported Languages so far:
|
||||||
|
# German, English, Spanish, French, Italian, Portuguese, Polish, Russian
|
||||||
|
# 'de', 'en', 'es', 'fr', 'it', 'pl', 'pt', 'ru'
|
||||||
|
page_language = string(min=2, max=22, default='en')
|
||||||
|
|
||||||
|
# API Keys
|
||||||
|
# Yandex is a web REST API for translating between different languages.
|
||||||
|
yandex_api_key = string(min=0, max=1023, default='trnsl.1.1.20170825T194642Z.26862b9dd4c1a755.9490ed28de448ff67522c2854f262eff05ec0dc3')
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
log_file = string(max=255, default='')
|
||||||
|
log_level = option('critical', 'error', 'warning', 'info', 'debug', default='info')
|
||||||
|
log_fmt = string(max=255, default='')
|
||||||
|
|
||||||
|
[[formatters]]
|
||||||
|
[[[colored]]]
|
||||||
|
() = string(default='colorlog.ColoredFormatter')
|
||||||
|
format = string(max=255, default='%(log_color)s%(levelname)-8s%(reset)s:%(log_color)s%(name)-5s%(reset)s:%(white)s%(message)s')
|
||||||
|
|
||||||
|
[[[basic]]]
|
||||||
|
() = string(max=255, default='logging.Formatter')
|
||||||
|
format = string(max=255, default='%(levelname)s:%(name)s:%(asctime)s:%(message)s')
|
||||||
|
|
||||||
|
[[[forstorage]]]
|
||||||
|
() = string(max=255, default='logging.Formatter')
|
||||||
|
format = string(max=255, default='%(levelname)s:%(name)s:%(asctime)s:%(message)s')
|
||||||
|
|
||||||
|
[[handlers]]
|
||||||
|
[[[stderr]]]
|
||||||
|
class = string(max=255, default='logging.StreamHandler')
|
||||||
|
level = option('critical', 'error', 'warning', 'info', 'debug', default='debug')
|
||||||
|
formatter = string(max=255, default='colored')
|
||||||
|
|
||||||
|
[[[file]]]
|
||||||
|
class = string(max=255, default='logging.handlers.RotatingFileHandler')
|
||||||
|
level = option('critical', 'error', 'warning', 'info', 'debug', default='warning')
|
||||||
|
formatter = string(max=255, default='forstorage')
|
||||||
|
filename = string(max=255, default='road2philosophy.log')
|
||||||
|
maxBytes = integer(min=0, max=33554432, default=33554432)
|
||||||
|
backupCount = integer(min=0, max=3, default=1)
|
||||||
|
|
||||||
|
[[loggers]]
|
||||||
|
[[[root]]]
|
||||||
|
level = option('critical', 'error', 'warning', 'info', 'debug', default='debug')
|
||||||
|
handlers = string_list(max=8, default=list('file',)
|
||||||
|
|
||||||
|
[[[r2p]]]
|
||||||
|
level = option('critical', 'error', 'warning', 'info', 'debug', default='debug')
|
||||||
|
handlers = string_list(max=8, default=list('stderr',))
|
||||||
|
propagate = boolean(default=False)
|
||||||
|
|
@ -4,9 +4,7 @@
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from . import config
|
|
||||||
|
|
||||||
class DataLayer:
|
class DataLayer:
|
||||||
def __init__(self):
|
def __init__(self, ctx):
|
||||||
pass
|
self.ctx = ctx
|
||||||
|
|
||||||
|
@ -1,39 +0,0 @@
|
|||||||
# log module is a wrapper around third-party colorlog library
|
|
||||||
# and provides an application-level interface to a logging system.
|
|
||||||
|
|
||||||
import colorlog
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from . import config
|
|
||||||
|
|
||||||
# Default python log severity levels:
|
|
||||||
# CRITICAL
|
|
||||||
# ERROR
|
|
||||||
# WARNING
|
|
||||||
# INFO
|
|
||||||
# DEBUG
|
|
||||||
|
|
||||||
LOGGER = None
|
|
||||||
|
|
||||||
class LoggingLayer:
|
|
||||||
def __init__(self, config):
|
|
||||||
self.loggers = {}
|
|
||||||
logging.config.dictConfig(config)
|
|
||||||
|
|
||||||
# Note on __getitem__:
|
|
||||||
# __getitem__ overrides the functionality of the [] operator.
|
|
||||||
# That means this code:
|
|
||||||
# objinstance = LoggingLayer(...)
|
|
||||||
# objinstance[foo] calls LoggingLayer.__getitem__(foo)
|
|
||||||
# and returns the result.
|
|
||||||
def __getitem__(self, k):
|
|
||||||
logger = self.loggers.get(k)
|
|
||||||
if not logger:
|
|
||||||
logger = logging.getLogger(k)
|
|
||||||
self.loggers[k] = logger
|
|
||||||
return logger
|
|
||||||
|
|
||||||
def init_logging():
|
|
||||||
global LOGGER
|
|
||||||
LOGGER = LoggingLayer(config.obj.LOG_SETTINGS)
|
|
||||||
|
|
@ -1,23 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from . import cli
|
|
||||||
from . import config
|
|
||||||
from . import util
|
|
||||||
|
|
||||||
def main():
|
|
||||||
user_interface = cli.InteractiveInterface()
|
|
||||||
|
|
||||||
if len(sys.argv) > 1: # Command line arguments were passed in
|
|
||||||
# command-line when invoking python
|
|
||||||
user_interface.run_command(sys.argv)
|
|
||||||
else:
|
|
||||||
user_interface.start_command_loop()
|
|
||||||
|
|
||||||
def verify_config_is_valid():
|
|
||||||
pass
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
|
|
@ -9,15 +9,15 @@
|
|||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from . import browser
|
|
||||||
from . import config
|
|
||||||
from . import dal
|
|
||||||
from . import log
|
|
||||||
from . import pages
|
from . import pages
|
||||||
from . import util
|
from . import util
|
||||||
|
|
||||||
class Model:
|
class Model:
|
||||||
def __init__(self):
|
def __init__(self, ctx, browser, dal):
|
||||||
|
self.ctx = ctx
|
||||||
|
self.browser = browser
|
||||||
|
self.dal = dal
|
||||||
|
|
||||||
self._webdriver = None
|
self._webdriver = None
|
||||||
self._translated_philosophy = None
|
self._translated_philosophy = None
|
||||||
|
|
||||||
@ -33,7 +33,7 @@ class Model:
|
|||||||
# always existing with or without knowing if it exists because if it
|
# always existing with or without knowing if it exists because if it
|
||||||
# hasn't been created yet then it will be created on-the-fly.
|
# hasn't been created yet then it will be created on-the-fly.
|
||||||
if not self._webdriver:
|
if not self._webdriver:
|
||||||
self._webdriver = browser.create_webdriver(config.obj.WEBDRIVER_BROWSER)
|
self._webdriver = self.browser.create_webdriver(self.ctx.config['app']['webdriver_browser'])
|
||||||
page_api = pages.LandingPage(self.webdriver)
|
page_api = pages.LandingPage(self.webdriver)
|
||||||
page_api.goto_landing_page()
|
page_api.goto_landing_page()
|
||||||
return self._webdriver
|
return self._webdriver
|
||||||
@ -41,10 +41,10 @@ class Model:
|
|||||||
@property
|
@property
|
||||||
def translated_philosophy(self):
|
def translated_philosophy(self):
|
||||||
# This translates 'philosophy' to the target language with only 1 api call.
|
# This translates 'philosophy' to the target language with only 1 api call.
|
||||||
if config.obj.PAGE_LANGUAGE == 'en':
|
if self.ctx.config['app']['page_language'] == 'en':
|
||||||
self._translated_philosophy = 'philosophy'
|
self._translated_philosophy = 'philosophy'
|
||||||
elif not self._translated_philosophy:
|
elif not self._translated_philosophy:
|
||||||
text = util.translate_text('en', config.obj.PAGE_LANGUAGE, 'philosophy')
|
text = util.translate_text('en', self.ctx.config['app']['page_language'], 'philosophy')
|
||||||
self._translated_philosophy = text
|
self._translated_philosophy = text
|
||||||
return self._translated_philosophy
|
return self._translated_philosophy
|
||||||
|
|
||||||
@ -59,6 +59,19 @@ class Model:
|
|||||||
# creates it and then it is re-used later
|
# creates it and then it is re-used later
|
||||||
# in the application.
|
# in the application.
|
||||||
|
|
||||||
|
def play_multiple(self, n):
|
||||||
|
"""
|
||||||
|
Plays the wikicrawl game <n>-times.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
n = int(n)
|
||||||
|
except ValueError as ex:
|
||||||
|
self.ctx.log['cli'].warn('failed to process "%s" as a parameter' % n)
|
||||||
|
return False
|
||||||
|
for i in range(n):
|
||||||
|
self.model.play_random_page()
|
||||||
|
|
||||||
|
|
||||||
def play_random_page(self):
|
def play_random_page(self):
|
||||||
"""
|
"""
|
||||||
Select a random page and repeatedly click the first link until
|
Select a random page and repeatedly click the first link until
|
||||||
@ -86,7 +99,7 @@ class Model:
|
|||||||
# In this case, we have made the language a parameter
|
# In this case, we have made the language a parameter
|
||||||
# that you can pass into the program, i.e. you can run it
|
# that you can pass into the program, i.e. you can run it
|
||||||
# for English or Spanish or Russian or what have you.
|
# for English or Spanish or Russian or what have you.
|
||||||
page_api.select_language(config.obj.PAGE_LANGUAGE)
|
page_api.select_language(self.ctx.config['app']['page_language'])
|
||||||
|
|
||||||
# Main page: next 2 lines
|
# Main page: next 2 lines
|
||||||
|
|
||||||
@ -114,20 +127,20 @@ class Model:
|
|||||||
|
|
||||||
# Get the article title (and translate if necessary)
|
# Get the article title (and translate if necessary)
|
||||||
title = page_api.get_title()
|
title = page_api.get_title()
|
||||||
if config.obj.PAGE_LANGUAGE != 'en':
|
if self.ctx.config['app']['page_language'] != 'en':
|
||||||
translated_title = util.translate_text(config.obj.PAGE_LANGUAGE, 'en', title)
|
translated_title = util.translate_text(self.ctx.config['app']['page_language'], 'en', title)
|
||||||
log.LOGGER['model'].info('visited page: %s (%s)' % (title, translated_title))
|
self.ctx.log['model'].info('visited page: %s (%s)' % (title, translated_title))
|
||||||
else:
|
else:
|
||||||
log.LOGGER['model'].info('visited page: %s' % title)
|
self.ctx.log['model'].info('visited page: %s' % title)
|
||||||
|
|
||||||
# Check for page loops (have we already visisted this page?)
|
# Check for page loops (have we already visisted this page?)
|
||||||
if title in pages_visited:
|
if title in pages_visited:
|
||||||
log.LOGGER['model'].info('encountered loop at page = %s' % title)
|
self.ctx.log['model'].info('encountered loop at page = %s' % title)
|
||||||
break
|
break
|
||||||
|
|
||||||
# Check if we reached the article on philosophy
|
# Check if we reached the article on philosophy
|
||||||
if self._is_article_on_philosophy(title, translated_title):
|
if self._is_article_on_philosophy(title, translated_title):
|
||||||
log.LOGGER['model'].info('made it to philosophy in %s pages' % len(pages_visited))
|
self.ctx.log['model'].info('made it to philosophy in %s pages' % len(pages_visited))
|
||||||
pages_visited.append(title)
|
pages_visited.append(title)
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -136,7 +149,7 @@ class Model:
|
|||||||
|
|
||||||
rc = page_api.click_first_link()
|
rc = page_api.click_first_link()
|
||||||
if not rc:
|
if not rc:
|
||||||
log.LOGGER['model'].warn('failure: unable to continue (perhaps no valid links?)')
|
self.ctx.log['model'].warn('failure: unable to continue (perhaps no valid links?)')
|
||||||
break
|
break
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
@ -2,15 +2,15 @@
|
|||||||
# There are separate classes defined for each page with their own
|
# There are separate classes defined for each page with their own
|
||||||
# defined methods for performing certain actions.
|
# defined methods for performing certain actions.
|
||||||
|
|
||||||
|
from . import browser
|
||||||
|
from . import util
|
||||||
|
from .assets.languages import LANGUAGES
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import selenium
|
import selenium
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from . import browser
|
PAGE_BASE_URL = 'https://www.wikipedia.org/'
|
||||||
from . import config
|
|
||||||
from . import log
|
|
||||||
from . import util
|
|
||||||
from .assets.languages import LANGUAGES
|
|
||||||
|
|
||||||
class PageRootObject:
|
class PageRootObject:
|
||||||
"""
|
"""
|
||||||
@ -43,7 +43,8 @@ class PageRootObject:
|
|||||||
html link in the webpage.
|
html link in the webpage.
|
||||||
"""
|
"""
|
||||||
self.highlight(el, 'red')
|
self.highlight(el, 'red')
|
||||||
time.sleep(config.obj.PAGE_DELAY)
|
DELAY=0
|
||||||
|
time.sleep(DELAY)
|
||||||
util.breakpoint()
|
util.breakpoint()
|
||||||
el.click()
|
el.click()
|
||||||
|
|
||||||
@ -97,7 +98,7 @@ class LandingPage(PageRootObject):
|
|||||||
"""
|
"""
|
||||||
Navigates the browser to www.wikipedia.org
|
Navigates the browser to www.wikipedia.org
|
||||||
"""
|
"""
|
||||||
self.driver.get(config.obj.PAGE_BASE_URL)
|
self.driver.get(PAGE_BASE_URL)
|
||||||
|
|
||||||
def select_language(self, language):
|
def select_language(self, language):
|
||||||
lang_text = LANGUAGES.get(language)
|
lang_text = LANGUAGES.get(language)
|
||||||
@ -219,13 +220,13 @@ class ArticlePage(PageRootObject):
|
|||||||
if len(links) == 0:
|
if len(links) == 0:
|
||||||
return False
|
return False
|
||||||
for link in links:
|
for link in links:
|
||||||
log.LOGGER['pages'].debug('processing link: %s' % link.text)
|
self.ctx.log['pages'].debug('processing link: %s' % link.text)
|
||||||
if not self._is_valid_link(p, link):
|
if not self._is_valid_link(p, link):
|
||||||
log.LOGGER['pages'].debug('skipping link inside parenthesis: %s' % link.text)
|
self.ctx.log['pages'].debug('skipping link inside parenthesis: %s' % link.text)
|
||||||
self.highlight(link, 'blue')
|
self.highlight(link, 'blue')
|
||||||
continue
|
continue
|
||||||
self.highlight(link, 'red')
|
self.highlight(link, 'red')
|
||||||
log.LOGGER['pages'].info('selected link: %s' % link.text)
|
self.ctx.log['pages'].info('selected link: %s' % link.text)
|
||||||
self.click(link)
|
self.click(link)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -3,14 +3,14 @@
|
|||||||
|
|
||||||
import yandex_translate
|
import yandex_translate
|
||||||
|
|
||||||
from . import config
|
DO_BREAKPOINTS = False
|
||||||
|
|
||||||
def breakpoint():
|
def breakpoint():
|
||||||
"""
|
"""
|
||||||
If DO_BREAKPOINTS is switched on, this will pause program
|
If DO_BREAKPOINTS is switched on, this will pause program
|
||||||
execution and wait for the user to press enter to continue.
|
execution and wait for the user to press enter to continue.
|
||||||
"""
|
"""
|
||||||
if config.obj.DO_BREAKPOINTS:
|
if DO_BREAKPOINTS:
|
||||||
input('BREAKPOINT hit. <Enter> to continue...')
|
input('BREAKPOINT hit. <Enter> to continue...')
|
||||||
|
|
||||||
def translate_text(source_language, target_language, text):
|
def translate_text(source_language, target_language, text):
|
||||||
|
Loading…
Reference in New Issue
Block a user