mirror of
https://git.zavage.net/Zavage-Software/wikicrawl.git
synced 2025-01-17 23:57:56 -07:00
start of new app structure for wiki
This commit is contained in:
parent
f093fb9ecc
commit
6ae2a8dd06
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
build/
|
||||
dist/
|
||||
__pycache__
|
||||
*.egg-info
|
||||
|
11
launcher.py
11
launcher.py
@ -1,11 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import wikicrawl
|
||||
import settings
|
||||
|
||||
# Inject the settings.DefaultSettings object into the
|
||||
# app and start running the program.
|
||||
wikicrawl.init(settings.DefaultSettings)
|
||||
wikicrawl.main()
|
||||
input('<enter> to exit')
|
||||
|
60
lib/app_skellington/README.md
Normal file
60
lib/app_skellington/README.md
Normal file
@ -0,0 +1,60 @@
|
||||
app_skellington
|
||||
===============
|
||||
Application framework for Python, features include:
|
||||
* Pain-free multi-level command menu: Register classes
|
||||
* Simple to define services and automatic dependency injection based on name (with custom invocation as an option)
|
||||
* INI-style config and and validation (provided through ConfigObj)
|
||||
* Colored logging (provided through colorlog)
|
||||
* Works on Linux, Windows, and Mac
|
||||
|
||||
Principles:
|
||||
* Lend to creating beautiful, easy to read and understand code in the application.
|
||||
* Minimize coupling of applications to this framework.
|
||||
* Compatable with Linux, Windows, and Mac. Try to be compatible as possible otherwise.
|
||||
* Try to be compatible with alternate Python runtimes such as PyPy.
|
||||
|
||||
Application Configuration
|
||||
-------------------------
|
||||
Site configurations are supported through configobj. There is a config.spec
|
||||
in the src directory which is a validation file; it contains the accepted
|
||||
parameter names, types, and limits for configurable options in the
|
||||
application which is built on app_skellington. The format is multi-level .ini syntax.
|
||||
|
||||
See the configobj documentation for more information.
|
||||
|
||||
Site configuration files (config.ini) are created if they don't exit. The
|
||||
file always contains the full specification of parameters; i.e. even default
|
||||
parameters are added into the config file.
|
||||
|
||||
Linux:
|
||||
|
||||
/home/\<user\>/.config/\<app_name\>/config.ini
|
||||
|
||||
/home/\<user\>/.cache/\<app_name\>/log/\<app_name\>.log
|
||||
|
||||
Windows:
|
||||
|
||||
C:\Users\\\<user>\\\<app_name\>\\Local\\\<app_name\>\\config.ini
|
||||
|
||||
C:\Users\\\<user>\\\<app_name\>\\Local\\\<app_name\>\\Logs\\\<app_name\>.log
|
||||
|
||||
Application configuration can be overridden ad-hoc through the --config <filename>
|
||||
argument.
|
||||
|
||||
Debug - Turn on Logging
|
||||
---------------------------
|
||||
Set 'APPSKELLINGTON_ENABLE_LOGGING' environment variable to any value which turns
|
||||
on AppSkellington-level logging. For example,
|
||||
|
||||
APPSKELLINGTON_ENABLE_LOGGING=true ./runme
|
||||
|
||||
or
|
||||
|
||||
export APPSKELLINGTON_ENABLE_LOGGING=1
|
||||
./runme
|
||||
|
||||
Notes
|
||||
-----
|
||||
See official website: https://zavage-software.com
|
||||
Please report bugs, improvements, or feedback! <contact>
|
||||
|
11
lib/app_skellington/__init__.py
Normal file
11
lib/app_skellington/__init__.py
Normal file
@ -0,0 +1,11 @@
|
||||
import logging
|
||||
import sys
|
||||
|
||||
APP_CONFIG_FILENAME = 'config.ini' # Relative to user directory on machine
|
||||
APP_CONFIGSPEC_FILENAME = 'config.spec' # Relative to module source directory
|
||||
|
||||
from .app_container import *
|
||||
from .cfg import *
|
||||
from .cli import *
|
||||
from .log import *
|
||||
|
31
lib/app_skellington/_bootstrap.py
Normal file
31
lib/app_skellington/_bootstrap.py
Normal file
@ -0,0 +1,31 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Check and gracefully fail if the user needs to install a 3rd-party dep.
|
||||
libnames = ['appdirs', 'configobj', 'colorlog']
|
||||
def check_env_has_dependencies(libnames):
|
||||
rc = True
|
||||
for libname in libnames:
|
||||
try:
|
||||
__import__(libname)
|
||||
except ModuleNotFoundError as ex:
|
||||
print('missing third-part library: ', ex, file=sys.stderr)
|
||||
rc = False
|
||||
return rc
|
||||
if not check_env_has_dependencies(libnames):
|
||||
print('refusing to load program without installed dependencies', file=sys.stderr)
|
||||
raise ImportError('python environment needs third-party dependencies installed')
|
||||
|
||||
# Logger for before the application and logging config is loaded
|
||||
# - used to log before logging is configured
|
||||
_log_fmt = '%(levelname)-7s:%(message)s'
|
||||
_logger_name = 'app_skellington'
|
||||
_bootstrap_logger = logging.getLogger(_logger_name)
|
||||
_bootstrap_logger.setLevel(1000)
|
||||
_bootstrap_logger.propagate = False
|
||||
|
||||
# NOTE(MG) Pretty sure the logger has the default handler too at this point.
|
||||
# It's been related to some issues with the logger double-printing messages.
|
||||
_bootstrap_logger.addHandler(logging.NullHandler())
|
||||
|
116
lib/app_skellington/_util.py
Normal file
116
lib/app_skellington/_util.py
Normal file
@ -0,0 +1,116 @@
|
||||
from __future__ import print_function
|
||||
import inspect
|
||||
import os
|
||||
import sys
|
||||
|
||||
from . import _util
|
||||
|
||||
def eprint(*args, **kwargs):
|
||||
"""
|
||||
Print to STDERR stream.
|
||||
"""
|
||||
print(*args, file=sys.stderr, **kwargs)
|
||||
|
||||
def filename_to_abspath(filename):
|
||||
"""
|
||||
Converts a filename to it's absolute path. If it's already an
|
||||
absolute path, do nothing.
|
||||
"""
|
||||
return os.path.abspath(filename)
|
||||
|
||||
def does_file_exist(filepath):
|
||||
"""
|
||||
Because the file can be deleted or created immediately after execution of
|
||||
this function, there cannot be guarantees made around the existence of
|
||||
said file (race condition). This merely says if the file existed at this
|
||||
instant in execution.
|
||||
"""
|
||||
try:
|
||||
fp = open(filepath, 'r')
|
||||
return True
|
||||
except FileNotFoundError as ex:
|
||||
return False
|
||||
|
||||
def ensure_dir_exists(dirpath):
|
||||
if dirpath is None:
|
||||
return
|
||||
if dirpath == '':
|
||||
return
|
||||
os.makedirs(dirpath, exist_ok=True)
|
||||
|
||||
def get_root_asset(filepath):
|
||||
"""
|
||||
Attempts to locate a resource or asset shipped with the application.
|
||||
Searches starting at the root module (__main__) which should be the
|
||||
python file initially invoked.
|
||||
"""
|
||||
module_root =\
|
||||
os.path.abspath(
|
||||
os.path.dirname(
|
||||
sys.modules['__main__'].__file__))
|
||||
path = os.path.join(module_root, filepath)
|
||||
return path
|
||||
|
||||
def get_asset(module, filepath):
|
||||
"""
|
||||
Attempts to locate a resource or asset shipped with the application.
|
||||
Input filename is relative to the caller code, i.e. this starts
|
||||
searching relative to the file that called this function.
|
||||
|
||||
Returns the full absolute path of the located file if found or None
|
||||
|
||||
Args:
|
||||
module: Pass in the module (or __name__) to search relative to module
|
||||
filepath: the relative filepath of the file to look for in the
|
||||
package directory.
|
||||
"""
|
||||
if isinstance(module, str):
|
||||
module_file = sys.modules[module].__file__
|
||||
elif isinstance(module, module):
|
||||
module_file = module.__file__
|
||||
else:
|
||||
raise Exception('Invalid Usage')
|
||||
|
||||
try:
|
||||
root = module_file
|
||||
|
||||
if os.path.islink(root):
|
||||
root = os.path.realpath(root)
|
||||
|
||||
root = os.path.dirname(os.path.abspath(root))
|
||||
except Exception as ex:
|
||||
raise
|
||||
|
||||
path = os.path.join(root, filepath)
|
||||
return path
|
||||
|
||||
def register_class_as_commands(app, submenu, cls_object):
|
||||
"""
|
||||
Registers commands for each class method. e.g.: pass in the CLI
|
||||
object, the target submenu, and the class to be registered, and
|
||||
this will create a command-line menu item for each method in
|
||||
the class.
|
||||
|
||||
IMPORTANT: Currently, you need to pass in only a class and not
|
||||
an object/instance of a class.
|
||||
"""
|
||||
cls_constructor = cls_object
|
||||
members = inspect.getmembers(cls_object)
|
||||
for m in members:
|
||||
name = m[0]
|
||||
ref = m[1]
|
||||
if inspect.isfunction(ref) and not name.startswith('_'):
|
||||
cls_method = ref
|
||||
constructor = app._inject_service_dependencies(cls_constructor)
|
||||
sig = inspect.signature(cls_method)
|
||||
func = create_func(constructor, cls_method)
|
||||
# docstring = cls_method.__doc__
|
||||
docstring = inspect.getdoc(cls_method)
|
||||
submenu.register_command(func, name, sig, docstring)
|
||||
|
||||
def create_func(constructor, cls_method):
|
||||
def func(*args, **kwargs):
|
||||
obj = constructor()
|
||||
return cls_method(obj, *args, **kwargs)
|
||||
return func
|
||||
|
202
lib/app_skellington/app_container.py
Normal file
202
lib/app_skellington/app_container.py
Normal file
@ -0,0 +1,202 @@
|
||||
import appdirs
|
||||
import collections
|
||||
import functools
|
||||
import inspect
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Application scaffolding:
|
||||
from ._bootstrap import _bootstrap_logger
|
||||
from . import log
|
||||
from . import _util
|
||||
from . import cli
|
||||
from . import cfg
|
||||
|
||||
DEFAULT_APP_NAME = 'python-app'
|
||||
DEFAULT_APP_AUTHOR = 'John Doe'
|
||||
|
||||
|
||||
# OPTIONAL: classes can sub-class from this?
|
||||
class Components:
|
||||
def inject_dependencies_based_on_names_in_args(self):
|
||||
pass
|
||||
|
||||
def inject_dependency(self, name):
|
||||
pass
|
||||
|
||||
def register_dependency(self, service, name):
|
||||
pass
|
||||
|
||||
|
||||
class ApplicationContext:
|
||||
"""
|
||||
Container for application-wide state; i.e. app configuration and loggers.
|
||||
"""
|
||||
def __init__(self, config, log):
|
||||
self.config = config
|
||||
self.log = log
|
||||
self.parsed_argv = None
|
||||
self.parsed_argv_unknown = None
|
||||
|
||||
class ApplicationContainer:
|
||||
"""
|
||||
Generalized application functionality. Used for linking components and modules of the application
|
||||
together. Invokes runtime configuration reading from file, maintains the
|
||||
object instances for services, passes off to the cli to determine what to
|
||||
do, and then injects any necessary dependencies (e.g. database module)
|
||||
and kicks off the functionality requested in the cli.
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
configspec_filepath=None,
|
||||
config_filepath=None,
|
||||
*args, **kwargs
|
||||
):
|
||||
# Instantiate root application context (container for globals)
|
||||
if configspec_filepath is None:
|
||||
configspec_filepath = self._get_configspec_filepath()
|
||||
|
||||
self.appname = kwargs.get('appname') or DEFAULT_APP_NAME
|
||||
self.appauthor = kwargs.get('appauthor') or DEFAULT_APP_AUTHOR
|
||||
|
||||
self._dependencies = {}
|
||||
|
||||
config = cfg.Config(configspec_filepath)
|
||||
config.load_config_from_file(config_filepath)
|
||||
|
||||
logger = log.LoggingLayer(self.appname, self.appauthor)
|
||||
|
||||
# added here, is this okay to do twice?
|
||||
logger.configure_logging()
|
||||
|
||||
self.ctx = ApplicationContext(config, logger)
|
||||
self['ctx'] = lambda: self.ctx
|
||||
|
||||
self.cli = cli.CommandTree() # Command-line interface
|
||||
|
||||
if callable(getattr(self, '_cli_options', None)):
|
||||
self._cli_options()
|
||||
if callable(getattr(self, '_services', None)):
|
||||
self._services()
|
||||
if callable(getattr(self, '_command_menu', None)):
|
||||
self._command_menu()
|
||||
|
||||
def __delitem__(self, service_name):
|
||||
"""
|
||||
Deletes a service or dependency from the available dependencies.
|
||||
"""
|
||||
try:
|
||||
del self._dependencies[service_name]
|
||||
except KeyError as ex:
|
||||
pass
|
||||
|
||||
def __getitem__(self, service_name):
|
||||
"""
|
||||
Returns a factory of a service or dependency. The factory is a function
|
||||
that is called to return an instance of the service object.
|
||||
|
||||
app_container['netezza'] => returns the netezza service instance
|
||||
"""
|
||||
try:
|
||||
service_factory = self._dependencies[service_name] # Retrieve factory function
|
||||
return service_factory() # Call factory() to return instance of service
|
||||
except KeyError as ex:
|
||||
msg = 'failed to inject service: {}'.format(service_name)
|
||||
_bootstrap_logger.critical(msg)
|
||||
_util.eprint(msg)
|
||||
raise ServiceNotFound
|
||||
|
||||
def __setitem__(self, service_name, value):
|
||||
"""
|
||||
Register a service or dependency factory to return a service.
|
||||
|
||||
The factory function is called to return an instance of a service object.
|
||||
"""
|
||||
self._dependencies[service_name] = value
|
||||
|
||||
def _construct_model(self, model_constructor, *args):
|
||||
"""
|
||||
Performs dependency resolution and instantiates an object of given type.
|
||||
|
||||
This takes in the reference to a class constructor and a list of names
|
||||
of the dependencies that need passed into it, constructs that object and
|
||||
returns it. Models contain business logic and application functionality.
|
||||
|
||||
Args:
|
||||
model_constructor: reference to object constructor.
|
||||
"""
|
||||
dependency_names = args
|
||||
dep_references = []
|
||||
for dep_name in dependency_names:
|
||||
dep_references.append(self[dep_name])
|
||||
return model_constructor(*dep_references)
|
||||
|
||||
def _get_config_filepath(self, app_name, app_author, config_filename='config.ini'):
|
||||
"""
|
||||
Attempt to find config.ini in the user's config directory.
|
||||
|
||||
On Linux, this will be /home/<user>/.config/<app>/config.ini
|
||||
On Windows, this will be C:\\Users\\<user>\\AppData\\Local\\<app>\\config.ini
|
||||
"""
|
||||
dirname = appdirs.user_config_dir(app_name, app_author)
|
||||
filepath = os.path.join(dirname, config_filename)
|
||||
_bootstrap_logger.info('default config filepath calculated to be: %s', filepath)
|
||||
return filepath
|
||||
|
||||
def _get_configspec_filepath(self, configspec_filename='config.spec'):
|
||||
"""
|
||||
Attempt to find config.spec inside the installed package directory.
|
||||
"""
|
||||
return _util.get_root_asset(configspec_filename)
|
||||
|
||||
def _inject_service_dependencies(self, constructor):
|
||||
"""
|
||||
Returns a function that, when called, constructs a new object for
|
||||
business/application logic with the listed dependencies.
|
||||
|
||||
Args:
|
||||
constructor: service class to be created object.
|
||||
"""
|
||||
sig = inspect.signature(constructor.__init__)
|
||||
params = sig.parameters
|
||||
params = [params[paramname].name for paramname in params] # Convert Param() type => str
|
||||
cls_dependencies = params[1:] # Skip 'self' parameter on class methods.
|
||||
|
||||
return functools.partial(self._construct_model, constructor, *cls_dependencies)
|
||||
|
||||
def load_command(self):
|
||||
args, unk, success = self.cli.parse()
|
||||
if not success:
|
||||
return False
|
||||
self.ctx.parsed_argv = args
|
||||
self.ctx.parsed_argv_unknown = unk
|
||||
return True
|
||||
|
||||
def invoke_command(self):
|
||||
rc = self.load_command()
|
||||
if not rc:
|
||||
return False
|
||||
try:
|
||||
self.cli.run_command()
|
||||
except NoCommandSpecified as ex:
|
||||
print('Failure: No command specified.')
|
||||
|
||||
def interactive_shell(self):
|
||||
pass
|
||||
|
||||
def invoke_from_cli(self):
|
||||
self.invoke_command()
|
||||
|
||||
def usage(self):
|
||||
pass
|
||||
# Applications need a default usage
|
||||
|
||||
class ServiceNotFound(Exception):
|
||||
"""
|
||||
Application framework error: unable to find and inject dependency.
|
||||
"""
|
||||
pass
|
||||
|
||||
class NoCommandSpecified(Exception):
|
||||
pass
|
||||
|
184
lib/app_skellington/cfg.py
Normal file
184
lib/app_skellington/cfg.py
Normal file
@ -0,0 +1,184 @@
|
||||
import appdirs
|
||||
|
||||
from . import _util
|
||||
import argparse
|
||||
import configobj
|
||||
import os
|
||||
import sys
|
||||
import validate
|
||||
|
||||
from ._bootstrap import _bootstrap_logger
|
||||
from . import _util
|
||||
|
||||
class Config:
|
||||
"""
|
||||
Structure to store application runtime configuration. Also contains
|
||||
functionality to load configuration from local site file.
|
||||
"""
|
||||
def __init__(self, configspec_filepath=None):
|
||||
self.config_obj = None
|
||||
self._config_filepaths = []
|
||||
self._configspec_filepath = None
|
||||
self.configspec_filepath = configspec_filepath
|
||||
|
||||
def __delitem__(self, key):
|
||||
"""
|
||||
Deletes the configuration item identified by <key> in the internal
|
||||
configuration storage.
|
||||
"""
|
||||
try:
|
||||
del self[key]
|
||||
except KeyError as ex:
|
||||
pass
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""
|
||||
Returns the vaLue of the configuration item identified by <key>.
|
||||
"""
|
||||
try:
|
||||
return self.config_obj[key].dict()
|
||||
except KeyError as ex:
|
||||
# raise ConfigurationItemNotFoundError()
|
||||
raise
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""
|
||||
Assigns the value of the configuration item
|
||||
identified by <key> as <value>.
|
||||
"""
|
||||
self[key] = value
|
||||
|
||||
@property
|
||||
def config_filepath(self, idx=0):
|
||||
"""
|
||||
Returns the config filepath (optionally specified by index
|
||||
when using multiple config files).
|
||||
"""
|
||||
assert idx>=0, 'invalid idx argument: index must be greater than 0'
|
||||
if len(self._config_filepaths) > 0:
|
||||
try:
|
||||
return self._config_filepaths[idx]
|
||||
except ValueError as ex:
|
||||
return
|
||||
|
||||
@config_filepath.setter
|
||||
def config_filepath(self, value, idx=0):
|
||||
"""
|
||||
Assigns <value> as the config filepath (optionally specified by index
|
||||
when using multiple config files).
|
||||
"""
|
||||
assert idx>=0, 'invalid idx argument: index must be greater than 0'
|
||||
self._config_filepaths[idx] = value
|
||||
|
||||
@property
|
||||
def configspec_filepath(self):
|
||||
return self._configspec_filepath
|
||||
|
||||
@configspec_filepath.setter
|
||||
def configspec_filepath(self, filepath):
|
||||
if _util.does_file_exist(filepath):
|
||||
self._configspec_filepath = filepath
|
||||
else:
|
||||
_bootstrap_logger.error(
|
||||
'failed to set config.spec: file not found '
|
||||
'(%s)', filepath)
|
||||
|
||||
def load_config_from_file(self, config_filepath):
|
||||
"""
|
||||
Loads configuration settings from file, overwritting all configuration.
|
||||
"""
|
||||
# Record all config.ini files passed in
|
||||
if config_filepath not in self._config_filepaths:
|
||||
self._config_filepaths.append(config_filepath)
|
||||
|
||||
# Check for config.spec
|
||||
if self.configspec_filepath:
|
||||
_bootstrap_logger.info('using config.spec: %s', self.configspec_filepath)
|
||||
else:
|
||||
_bootstrap_logger.info('config.spec not defined')
|
||||
_bootstrap_logger.info('using config file: %s', config_filepath)
|
||||
|
||||
# Pre-check for config.ini existence
|
||||
if _util.does_file_exist(config_filepath):
|
||||
_bootstrap_logger.info('existing config file found')
|
||||
else:
|
||||
_bootstrap_logger.info('no config file found: using defaults')
|
||||
|
||||
# interpolation='template' changes config file variable replacement to
|
||||
# use the form $var instead of %(var)s, which is useful to enable
|
||||
# literal %(text)s values in the config.
|
||||
try:
|
||||
configspec_filepath = self.configspec_filepath
|
||||
if configspec_filepath:
|
||||
self.config_obj = configobj.ConfigObj(
|
||||
config_filepath,
|
||||
configspec=configspec_filepath,
|
||||
interpolation='template'
|
||||
)
|
||||
else:
|
||||
self.config_obj = configobj.ConfigObj(
|
||||
config_filepath,
|
||||
# configspec=configspec_filepath,
|
||||
interpolation='template'
|
||||
)
|
||||
except configobj.ParseError as ex:
|
||||
msg = 'failed to load config: error in config.spec configuration: {}'.format(config_filepath)
|
||||
_bootstrap_logger.error(msg)
|
||||
_util.eprint(msg)
|
||||
return False
|
||||
except OSError as ex:
|
||||
msg = 'failed to load config: config.spec file not found'
|
||||
_bootstrap_logger.error(msg)
|
||||
_util.eprint(msg)
|
||||
return False
|
||||
|
||||
|
||||
# Hack the configobj module to alter the interpolation for validate.py:
|
||||
configobj.DEFAULT_INTERPOLATION = 'template'
|
||||
self.config_obj.filename = config_filepath
|
||||
|
||||
|
||||
if self.configspec_filepath:
|
||||
# Validate config.ini against config.spec
|
||||
try:
|
||||
_bootstrap_logger.info('validating config file against spec')
|
||||
val = validate.Validator()
|
||||
test = self.config_obj.validate(val, copy=True)
|
||||
if test is not True:
|
||||
_bootstrap_logger.critical('config file failed validation')
|
||||
_bootstrap_logger.critical('config file errors: %s', test)
|
||||
return False
|
||||
except ValueError as ex:
|
||||
_bootstrap_logger.error('failed validating configspec')
|
||||
return False
|
||||
|
||||
# Create the config file if it doesn't exist
|
||||
# if not _util.does_file_exist(config_filepath):
|
||||
if True:
|
||||
_bootstrap_logger.info('writing new config file: %s', config_filepath)
|
||||
dirname = os.path.dirname(config_filepath)
|
||||
_util.ensure_dir_exists(dirname)
|
||||
self.config_obj.write()
|
||||
|
||||
_bootstrap_logger.info('done loading config file')
|
||||
return True
|
||||
|
||||
def print_config(self):
|
||||
"""
|
||||
Print configuration to stdout.
|
||||
"""
|
||||
print('config:')
|
||||
|
||||
self.config_obj.walk(print)
|
||||
for section in self.config_obj.sections:
|
||||
print(section)
|
||||
for key in self.config_obj[section]:
|
||||
print(' ', self.config_obj[section][key])
|
||||
|
||||
class EnvironmentVariables:
|
||||
def __init__(self):
|
||||
raise NotImplementedError
|
||||
|
||||
class ConfigurationItemNotFoundError(Exception):
|
||||
pass
|
||||
|
540
lib/app_skellington/cli.py
Normal file
540
lib/app_skellington/cli.py
Normal file
@ -0,0 +1,540 @@
|
||||
import argparse
|
||||
import inspect
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
|
||||
import app_skellington
|
||||
from ._bootstrap import _bootstrap_logger
|
||||
from . import app_container
|
||||
|
||||
# If explicit fail is enabled, any command with at least one unknown
|
||||
# argument will be rejected entirely. If not enabled, unknown arguments
|
||||
# will be ignored.
|
||||
EXPLICIT_FAIL_ON_UNKNOWN_ARGS = True
|
||||
|
||||
class CommandTree:
|
||||
"""
|
||||
Command-line interface to hold a menu of commands. You can register
|
||||
commands (functions or methods) in a CommandTree which will generate
|
||||
a corresponding argparse.ArgumentParser (and nested SubParsers) that
|
||||
map function/method arguments into argparse Parameters. Then, you
|
||||
can translate command-line arguments into invoking the function.
|
||||
|
||||
Commands must be registered before being invoked. You create nested
|
||||
SubMenu(s). If function parameters have defaults, those will be
|
||||
available for override else they use the function defaults.
|
||||
|
||||
Print helpful information:
|
||||
|
||||
./scriptname -h # View tier-0 help and usage doc
|
||||
./scriptname [submenu] -h # View submenu help and usage doc
|
||||
./scriptname [submenu] [command] -h # View command documentation and parameters
|
||||
|
||||
argparse is finicky about argument placement:
|
||||
|
||||
./scriptname
|
||||
[application arguments]
|
||||
[submenu] [submenu arguments]
|
||||
[command] [command arguments]
|
||||
|
||||
For example,
|
||||
|
||||
./scriptname --option="value" [submenu] [command]
|
||||
|
||||
is different than
|
||||
|
||||
./scriptname [submenu] [command] --option="value"
|
||||
|
||||
in that option is being applied to the application in the first example and
|
||||
applied to the refresh_datasets command (under the nhsn command group) in
|
||||
the second. In the same way the -h, --help options print different docs
|
||||
depending on where the help option was passed.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.root_parser = argparse.ArgumentParser()
|
||||
self.submenu_param = None # submenu_param is the variable name
|
||||
# of the root submenu argument, i.e. the arg
|
||||
# in root_parser which selects the submenu.
|
||||
self.entries = {}
|
||||
# NOTE(MG) Implementation note:
|
||||
# CommandTree uses only one of these internal structures (i.e. mutually exclusive),
|
||||
# 'entries' is used when there is a submenu linked to multiple commands.
|
||||
# '_cmd_tree_is_single_command' and '_single_command' instead are used
|
||||
# when the CommandTree is linked to one and only one command.
|
||||
self._cmd_tree_is_single_command = False
|
||||
self._single_command = None
|
||||
|
||||
def print_tree(self):
|
||||
import pprint
|
||||
pprint.pprint(self.entries)
|
||||
|
||||
def add_argument(self, *args, **kwargs):
|
||||
"""
|
||||
Adds an argument to the root parser.
|
||||
"""
|
||||
_bootstrap_logger.info('adding argument to root parser: %s and %s', args, kwargs)
|
||||
self.root_parser.add_argument(*args, **kwargs)
|
||||
|
||||
def init_submenu(self, param_name, is_required=False):
|
||||
"""
|
||||
Creates a root-level submenu with no entries. SubMenu node is
|
||||
returned which can have submenus and commands attached to it.
|
||||
"""
|
||||
# Creates an argument as a slot in the underlying argparse.
|
||||
subparsers = self.root_parser.add_subparsers(
|
||||
dest = param_name,
|
||||
metavar = param_name,
|
||||
required = is_required
|
||||
)
|
||||
|
||||
submenu = SubMenu(self, subparsers, param_name)
|
||||
submenu.submenu_path = ''
|
||||
submenu.var_name = param_name
|
||||
|
||||
_bootstrap_logger.info('Initialized root-level submenu: Parameter = \'%s\'', param_name)
|
||||
self.entries[param_name] = submenu
|
||||
self.submenu_param = param_name
|
||||
|
||||
return submenu
|
||||
|
||||
def register_command(
|
||||
self, func, cmd_name=None, func_signature=None,
|
||||
docstring=None
|
||||
):
|
||||
"""
|
||||
When no submenu functionality is desired, this links a single
|
||||
command into underlying argparse options.
|
||||
"""
|
||||
# begin copy-paste from SubMenu.register_command
|
||||
if inspect.isfunction(func):
|
||||
# print('func is function')
|
||||
pass
|
||||
elif inspect.ismethod(func):
|
||||
pass
|
||||
# print('func is method')
|
||||
else:
|
||||
raise Exception('bad value passed in for function')
|
||||
|
||||
if not cmd_name:
|
||||
# safe try/except
|
||||
cmd_name = func.__name__
|
||||
|
||||
if func_signature is None:
|
||||
func_signature = inspect.signature(func)
|
||||
|
||||
if docstring is None:
|
||||
docstring = func.__doc__
|
||||
|
||||
sig = func_signature
|
||||
params = sig.parameters
|
||||
|
||||
# help is displayed next to the command in the submenu enumeration or
|
||||
# list of commands:
|
||||
help_text = HelpGenerator.generate_help_from_sig(docstring)
|
||||
# description is displayed when querying help for the specific command:
|
||||
description_text = HelpGenerator.generate_description_from_sig(docstring)
|
||||
# end copy-paste from SubMenu.register_command
|
||||
|
||||
# begin copy-paste then editted from SubMenu.register_command
|
||||
# For each paramter in the function create an argparse argument in
|
||||
# the child ArgumentParser created for this menu entry:
|
||||
for key in params:
|
||||
if key == 'self':
|
||||
continue
|
||||
param = params[key]
|
||||
|
||||
if '=' in str(param):
|
||||
if param.default is None:
|
||||
helptext = 'default provided'
|
||||
else:
|
||||
helptext = "default = '{}'".format(param.default)
|
||||
self.root_parser.add_argument(
|
||||
key,
|
||||
help=helptext,
|
||||
nargs='?',
|
||||
default=param.default)
|
||||
else:
|
||||
helptext = 'required'
|
||||
self.root_parser.add_argument(
|
||||
key,
|
||||
help=helptext)
|
||||
|
||||
# # Wrapper function that instantiates an object and runs a method
|
||||
# # on-demand. The object is created, injected with necessary
|
||||
# # dependencies or services, and the method is invoked.
|
||||
# def func(*args, **kwargs):
|
||||
# obj = constructor()
|
||||
# return cls_method(obj, *args, **kwargs)
|
||||
|
||||
# Build the CommandEntry structure
|
||||
cmd = CommandEntry()
|
||||
cmd.argparse_node = self.root_parser
|
||||
cmd.cmd_name = cmd_name
|
||||
cmd.func_signature = sig
|
||||
# cmd.func_ref = None
|
||||
cmd.callback = func
|
||||
|
||||
registered_name = cmd_name
|
||||
_bootstrap_logger.info('registered command: %s', registered_name)
|
||||
# end copy-paste then editted from SubMenu.register_command
|
||||
|
||||
self._cmd_tree_is_single_command = True
|
||||
self._single_command = cmd
|
||||
self._entries = None
|
||||
|
||||
# def _validate(self):
|
||||
# pass
|
||||
# # TODO(MG):
|
||||
# # subparser can not be empty, needs to have parsers attached
|
||||
|
||||
def parse(self, args=None):
|
||||
if args is None:
|
||||
args = sys.argv[1:]
|
||||
|
||||
try:
|
||||
# on error, prints some argparse error messages:
|
||||
pargs, unk = self.root_parser.parse_known_args(args)
|
||||
|
||||
# if len(unk) > 0:
|
||||
# _bootstrap_logger.error(
|
||||
# 'failed to interpret argument(s) or command-line switch from shell: %s',
|
||||
# unk)
|
||||
|
||||
# if EXPLICIT_FAIL_ON_UNKNOWN_ARGS:
|
||||
# _bootstrap_logger.warn(
|
||||
# 'failed to parse arguments: explicitly failing to be safe')
|
||||
# return False, False
|
||||
|
||||
if hasattr(pargs, 'usage'):
|
||||
pass
|
||||
# print('found usage in app_skellington')
|
||||
|
||||
return pargs, unk, True
|
||||
|
||||
# Note: SystemExit is raised when '-h' argument is supplied.
|
||||
except SystemExit as ex:
|
||||
return None, None, False
|
||||
|
||||
def run_command(self, args=None):
|
||||
args, unk, success = self.parse(args)
|
||||
if not success:
|
||||
_bootstrap_logger.info('SystemExit: Perhaps user invoked --help')
|
||||
return
|
||||
|
||||
if args is False and unk is False:
|
||||
_bootstrap_logger.error('failed parsing args')
|
||||
return False
|
||||
_bootstrap_logger.info('received args from shell: %s', args)
|
||||
|
||||
args = vars(args)
|
||||
|
||||
cmd = self._lookup_command(args)
|
||||
if cmd is None:
|
||||
print('cmd is None')
|
||||
_bootstrap_logger.error('failed to find command')
|
||||
return False
|
||||
|
||||
return self._invoke_command(cmd, args)
|
||||
|
||||
def _lookup_command(self, args):
|
||||
keys = list(args.keys())
|
||||
|
||||
# In the case there is at-most one command registered in
|
||||
# the CommandTree with no SubMenu (submenu will be disabled
|
||||
# in this case):
|
||||
if self._cmd_tree_is_single_command:
|
||||
assert self._cmd_tree_is_single_command is True, 'corrupt data structure in CommandMenu'
|
||||
assert self._entries is None, 'corrupt data structure in CommandMenu'
|
||||
assert isinstance(self._single_command, CommandEntry), 'corrupt data structure in CommandMenu'
|
||||
return self._single_command
|
||||
|
||||
# There is at least one submenu we need to go down:
|
||||
else:
|
||||
|
||||
assert self._single_command is None, 'corrupt data structure in CommandMenu'
|
||||
assert self._cmd_tree_is_single_command == False, 'corrupt data structure in CommandMenu'
|
||||
|
||||
# Key or variable name used by argparse to store the submenu options
|
||||
argparse_param = self.submenu_param # e.g.: submenu_root
|
||||
submenu = self.entries[argparse_param]
|
||||
|
||||
while True:
|
||||
if argparse_param not in keys:
|
||||
print('root menu parameter not found in args:', argparse_param)
|
||||
input('<broken>')
|
||||
|
||||
val = args.get(argparse_param)
|
||||
_bootstrap_logger.debug('argparse command is \'{}\' = {}'.format(argparse_param, val))
|
||||
|
||||
lookup = submenu.entries.get(val)
|
||||
_bootstrap_logger.debug('lookup, entries[{}] = {}'.format(val, lookup))
|
||||
# print(submenu.entries)
|
||||
|
||||
# pop value
|
||||
del args[argparse_param]
|
||||
|
||||
if isinstance(lookup, SubMenu):
|
||||
submenu = lookup
|
||||
argparse_param = submenu.var_name
|
||||
elif isinstance(lookup, CommandEntry):
|
||||
return lookup
|
||||
# return self._invoke_command(lookup, args)
|
||||
|
||||
else:
|
||||
raise app_container.NoCommandSpecified('No command specified.')
|
||||
|
||||
def _invoke_command(self, cmd, args):
|
||||
func = cmd.callback
|
||||
sig = cmd.func_signature
|
||||
params = sig.parameters
|
||||
params = [params[paramname] for paramname in params]
|
||||
func_args = []
|
||||
for param in params:
|
||||
if param.name in args:
|
||||
func_args.append(args[param.name])
|
||||
|
||||
_bootstrap_logger.info('function: %s', func)
|
||||
_bootstrap_logger.info('function args: %s', func_args)
|
||||
return func(*func_args)
|
||||
|
||||
def _get_subparser(self):
|
||||
return self.root_parser._subparsers._actions[1]
|
||||
|
||||
class SubMenu:
|
||||
def __init__(self, parent, subparsers_obj, name):
|
||||
self.parent = parent # Reference to root CommandTree
|
||||
self.subparsers_obj = subparsers_obj
|
||||
self.name = name
|
||||
self.submenu_path = None
|
||||
|
||||
self.entries = {}
|
||||
|
||||
def register_command(
|
||||
self, func, cmd_name=None, func_signature=None,
|
||||
docstring=None
|
||||
):
|
||||
"""
|
||||
Registers a command as an entry in this submenu. Provided function is
|
||||
converted into argparse arguments and made available to the user.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
func:
|
||||
Callback function which will be mapped
|
||||
to the submenu entry.
|
||||
|
||||
cmd_name (optional):
|
||||
User-facing entry name. By default will be the function name.
|
||||
The user will be able to use [cmd_name] [arg, ...] to
|
||||
invoke the callback function.
|
||||
|
||||
func_signature: optionally, you can pass in the
|
||||
inspect.signature(). If None, will inspect the
|
||||
incoming func. Note on internals: This is used
|
||||
to pass the function signature of the command
|
||||
function while having the callback point to a
|
||||
function partial which executes some other code.
|
||||
This hook is used to inject dependencies and then
|
||||
execute the command function.
|
||||
"""
|
||||
if inspect.isfunction(func):
|
||||
# print('func is function')
|
||||
pass
|
||||
elif inspect.ismethod(func):
|
||||
pass
|
||||
# print('func is method')
|
||||
else:
|
||||
raise Exception('bad value passed in for function')
|
||||
|
||||
if not cmd_name:
|
||||
# safe try/except
|
||||
cmd_name = func.__name__
|
||||
|
||||
if func_signature is None:
|
||||
func_signature = inspect.signature(func)
|
||||
|
||||
if docstring is None:
|
||||
docstring = func.__doc__
|
||||
|
||||
sig = func_signature
|
||||
params = sig.parameters
|
||||
|
||||
# help is displayed next to the command in the submenu enumeration or
|
||||
# list of commands:
|
||||
help_text = HelpGenerator.generate_help_from_sig(docstring)
|
||||
# description is displayed when querying help for the specific command:
|
||||
description_text = HelpGenerator.generate_description_from_sig(docstring)
|
||||
|
||||
# Entry in local argparse._SubParsersAction
|
||||
# type = ArgumentParser
|
||||
child_node = self.subparsers_obj.add_parser(
|
||||
cmd_name, # Note: cmd_name here will be the VALUE
|
||||
# passed into the argparse arg VARIABLE NAME
|
||||
# created when the SubMenu/argparse.addZ_subparsers()
|
||||
# was created.
|
||||
help=help_text,
|
||||
description=description_text
|
||||
)
|
||||
|
||||
# For each paramter in the function create an argparse argument in
|
||||
# the child ArgumentParser created for this menu entry:
|
||||
for key in params:
|
||||
if key == 'self':
|
||||
continue
|
||||
param = params[key]
|
||||
|
||||
if '=' in str(param):
|
||||
if param.default is None:
|
||||
helptext = 'default provided'
|
||||
else:
|
||||
helptext = "default = '{}'".format(param.default)
|
||||
child_node.add_argument(
|
||||
key,
|
||||
help=helptext,
|
||||
nargs='?',
|
||||
default=param.default)
|
||||
else:
|
||||
helptext = 'required'
|
||||
child_node.add_argument(
|
||||
key,
|
||||
help=helptext)
|
||||
|
||||
# # Wrapper function that instantiates an object and runs a method
|
||||
# # on-demand. The object is created, injected with necessary
|
||||
# # dependencies or services, and the method is invoked.
|
||||
# def func(*args, **kwargs):
|
||||
# obj = constructor()
|
||||
# return cls_method(obj, *args, **kwargs)
|
||||
|
||||
# Build the CommandEntry structure
|
||||
cmd = CommandEntry()
|
||||
cmd.argparse_node = child_node
|
||||
cmd.cmd_name = cmd_name
|
||||
cmd.func_signature = sig
|
||||
# cmd.func_ref = None
|
||||
cmd.callback = func
|
||||
|
||||
registered_name = '{}.{}'.format(
|
||||
self.submenu_path,
|
||||
cmd_name)
|
||||
_bootstrap_logger.info('registered command: %s', registered_name)
|
||||
self.entries[cmd_name] = cmd
|
||||
|
||||
def create_submenu(
|
||||
self, var_name, cmd_entry_name=None, is_required=False
|
||||
):
|
||||
"""
|
||||
Creates a child-submenu.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
var_name:
|
||||
A code-facing argparse parameter used to store the
|
||||
value/entry chosen by the user.
|
||||
|
||||
cmd_entry_name:
|
||||
A user-facing name used to select created submenu.
|
||||
If not provided, the user-facing command name defaults
|
||||
to the same name as the code-facing argparse parameter
|
||||
|
||||
is_required:
|
||||
Switches if a value must be selected in the created submenu.
|
||||
If not, it's an optional positional argument.
|
||||
"""
|
||||
if cmd_entry_name is None:
|
||||
cmd_entry_name = var_name
|
||||
|
||||
# Create an entry in self's submenu:
|
||||
# type = ArgumentParser
|
||||
entry_node = self.subparsers_obj.add_parser(
|
||||
cmd_entry_name,
|
||||
help='sub-submenu help',
|
||||
description='sub-sub description')
|
||||
|
||||
# Turn entry into a submenu of it's own:
|
||||
# type = _SubParsersAction
|
||||
subp_node = entry_node.add_subparsers(
|
||||
dest = var_name,
|
||||
metavar = var_name,
|
||||
required = is_required)
|
||||
|
||||
submenu = SubMenu(
|
||||
self.parent,
|
||||
subp_node,
|
||||
cmd_entry_name)
|
||||
|
||||
submenu.var_name = var_name
|
||||
|
||||
submenu.submenu_path = '{}.{}'.format(self.submenu_path, cmd_entry_name)
|
||||
submenu_name = submenu.submenu_path
|
||||
|
||||
_bootstrap_logger.info('registered submenu: %s', submenu_name)
|
||||
self.entries[cmd_entry_name] = submenu
|
||||
return submenu
|
||||
|
||||
def __repr__(self):
|
||||
return 'SubMenu({})<{}>'.format(
|
||||
self.name,
|
||||
','.join(['cmds'])
|
||||
)
|
||||
|
||||
class CommandEntry:
|
||||
"""
|
||||
Structure for a command-entry in the CLI.
|
||||
|
||||
Stores the command-subcommand names, the function signature which contains
|
||||
the original parameters of the function-to-be-invoked, a reference to the
|
||||
original function, and a callback function wrapper which, by convention,
|
||||
instantiates the necessary objects (injecting dependencies, etc.) and
|
||||
executes the original function.
|
||||
|
||||
The CLI module has functionality to translate the original function
|
||||
arguments into argparse options (creating the documentation also). Similary,
|
||||
it can convert from argparse options into a function call.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.argparse_node = None
|
||||
|
||||
self.cmd_name = None # Don't think we need. And needs to be changed
|
||||
# from SubMenu
|
||||
self.menu_path = None
|
||||
self.func_signature = None
|
||||
self.func_ref = None
|
||||
self.callback = None
|
||||
|
||||
def __repr__(self):
|
||||
return 'CommandEntry<{}>'.format(self.cmd_name)
|
||||
|
||||
class HelpGenerator:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def generate_help_from_sig(doctext):
|
||||
"""
|
||||
The 'help' text is displayed next to the command when enumerating
|
||||
the submenu commands.
|
||||
"""
|
||||
if doctext == None:
|
||||
return doctext
|
||||
regex = '(.*?)[.?!]'
|
||||
match = re.match(regex, doctext, re.MULTILINE | re.DOTALL)
|
||||
if match:
|
||||
return match.group(1) + '.'
|
||||
return doctext
|
||||
|
||||
@staticmethod
|
||||
def generate_description_from_sig(doctext):
|
||||
"""
|
||||
The 'description' paragraph is provided when the user requests help
|
||||
on a specific command.
|
||||
"""
|
||||
if doctext == None:
|
||||
return doctext
|
||||
regex = '(.*?)[.?!]'
|
||||
match = re.match(regex, doctext, re.MULTILINE | re.DOTALL)
|
||||
if match:
|
||||
return match.group(1) + '.'
|
||||
return doctext
|
||||
|
165
lib/app_skellington/log.py
Normal file
165
lib/app_skellington/log.py
Normal file
@ -0,0 +1,165 @@
|
||||
import appdirs
|
||||
import colorlog
|
||||
import logging
|
||||
import logging.config
|
||||
import os
|
||||
|
||||
from ._bootstrap import _bootstrap_logger
|
||||
from . import _util
|
||||
|
||||
DEFAULT_LOG_SETTINGS = {
|
||||
'formatters': {
|
||||
'colored': {
|
||||
'class': 'colorlog.ColoredFormatter',
|
||||
# 'format': '%(log_color)s%(levelname)-8s%(reset)s:%(log_color)s%(name)-5s%(reset)s:%(white)s%(message)s'
|
||||
'format': '%(white)s%(name)7s%(reset)s|%(log_color)s%(message)s',
|
||||
}
|
||||
},
|
||||
|
||||
'handlers': {
|
||||
'stderr': {
|
||||
'class': 'logging.StreamHandler',
|
||||
'level': 'debug',
|
||||
'formatter': 'colored'
|
||||
}
|
||||
|
||||
},
|
||||
|
||||
'loggers': {
|
||||
'root': {
|
||||
'handlers': ['stderr',],
|
||||
'level': 'debug'
|
||||
},
|
||||
'app_skellington': {
|
||||
# 'handlers': ['stderr',],
|
||||
'level': 'critical',
|
||||
'propagate': 'false'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class LoggingLayer:
|
||||
def __init__(self, appname, appauthor, config=None):
|
||||
self.appname = appname
|
||||
self.appauthor = appauthor
|
||||
self.loggers = {}
|
||||
|
||||
def __getitem__(self, k):
|
||||
"""
|
||||
Returns Logger object named <k>.
|
||||
|
||||
Example:
|
||||
log = LoggingLayer(...)
|
||||
log['db'].info('loaded database module')
|
||||
|
||||
Args:
|
||||
k: the name of the logger to retrieve (k, i.e. key)
|
||||
"""
|
||||
logger = self.loggers.get(k)
|
||||
if not logger:
|
||||
logger = logging.getLogger(k)
|
||||
self.loggers[k] = logger
|
||||
return logger
|
||||
|
||||
def configure_logging(self, config_dict=None):
|
||||
"""
|
||||
Set the logging level for the process. Verbosity is controlled by a
|
||||
parameter in the config.
|
||||
|
||||
Advice: While DEBUG verbosity is useful to debug, it can produce too much
|
||||
noise for typical operation.
|
||||
"""
|
||||
if config_dict is None:
|
||||
_bootstrap_logger.debug('No application logging configuration provided. Using default')
|
||||
config_dict = DEFAULT_LOG_SETTINGS
|
||||
|
||||
self.transform_config(config_dict)
|
||||
|
||||
try:
|
||||
# TODO(MG) switch to pretty-print, as it'd be more human readable
|
||||
_bootstrap_logger.debug('Log configuration: %s', config_dict)
|
||||
logging.config.dictConfig(config_dict)
|
||||
except Exception as ex:
|
||||
print('unable to configure logging:', ex, type(ex))
|
||||
|
||||
def transform_config(self, config_dict):
|
||||
"""
|
||||
Fix some incompatibilities and differences between the config-file logging
|
||||
parameters and the final config dictionary passed into the logging module.
|
||||
"""
|
||||
# Version should be hard-coded 1, per Python docs
|
||||
if 'version' in config_dict:
|
||||
if config_dict['version'] != 1:
|
||||
_bootstrap_logger.warn("logging['version'] must be '1' per Python docs")
|
||||
config_dict['version'] = 1
|
||||
|
||||
self._add_own_logconfig(config_dict)
|
||||
|
||||
# Replace logger level strings with value integers from module
|
||||
for handler in config_dict['handlers']:
|
||||
d = config_dict['handlers'][handler]
|
||||
self._convert_str_to_loglevel(d, 'level')
|
||||
|
||||
# Replace logger level strings with value integers from module
|
||||
for logger in config_dict['loggers']:
|
||||
d = config_dict['loggers'][logger]
|
||||
self._convert_str_to_loglevel(d, 'level')
|
||||
|
||||
# Replace 'root' logger with '', logging module convention for root handler
|
||||
# Note: '' is disallowed in ConfigObj (hence the reason for this replacement)
|
||||
config_dict['loggers'][''] = config_dict['loggers']['root']
|
||||
del config_dict['loggers']['root']
|
||||
|
||||
|
||||
# Evaluate the full filepath of the file handler
|
||||
if 'file' not in config_dict['handlers']:
|
||||
return
|
||||
|
||||
if os.path.abspath(config_dict['handlers']['file']['filename']) ==\
|
||||
config_dict['handlers']['file']['filename']:
|
||||
# Path is already absolute
|
||||
pass
|
||||
else:
|
||||
dirname = appdirs.user_log_dir(self.appname, self.appauthor)
|
||||
_util.ensure_dir_exists(dirname)
|
||||
log_filepath = os.path.join(dirname, config_dict['handlers']['file']['filename'])
|
||||
config_dict['handlers']['file']['filename'] = log_filepath
|
||||
|
||||
def _add_own_logconfig(self, config_dict):
|
||||
if os.environ.get('APPSKELLINGTON_ENABLE_LOGGING', None):
|
||||
if 'app_skellington' not in config_dict['loggers']:
|
||||
config_dict['loggers']['app_skellington'] = {
|
||||
'level': 'debug', 'propagate': 'false'
|
||||
}
|
||||
else:
|
||||
config_dict['loggers']['app_skellington']['level'] = 'debug'
|
||||
|
||||
def _convert_str_to_loglevel(self, dict_, key):
|
||||
"""
|
||||
Convert a dictionary value from a string representation of a log level
|
||||
into the numeric value of that log level. The value is modified in-place
|
||||
and is passed in by a dictionary reference and a key name.
|
||||
|
||||
For example,
|
||||
d = {'loggers': {'cas': {'level': 'critical'}}}
|
||||
convert_str_to_loglevel(d['loggers']['cas'], 'level')
|
||||
=>
|
||||
d is now {'loggers': {'cas': {'level': logging.CRITICAL}}}
|
||||
"""
|
||||
try:
|
||||
s = dict_[key]
|
||||
except KeyError as ex:
|
||||
raise
|
||||
if s == 'critical':
|
||||
dict_[key] = logging.CRITICAL
|
||||
elif s == 'error':
|
||||
dict_[key] = logging.ERROR
|
||||
elif s == 'warning':
|
||||
dict_[key] = logging.WARNING
|
||||
elif s == 'info':
|
||||
dict_[key] = logging.INFO
|
||||
elif s == 'debug':
|
||||
dict_[key] = logging.DEBUG
|
||||
elif s == 'all':
|
||||
dict_[key] = logging.NOTSET
|
||||
|
45
lib/setup.py
Normal file
45
lib/setup.py
Normal file
@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Usage:
|
||||
#
|
||||
# First, enable the python environment you want to install to, or if installing
|
||||
# system-wide then ensure you're logged in with sufficient permissions
|
||||
# (admin or root to install to system directories)
|
||||
#
|
||||
# installation:
|
||||
#
|
||||
# $ ./setup.py install
|
||||
#
|
||||
# de-installation:
|
||||
#
|
||||
# $ pip uninstall <app>
|
||||
|
||||
|
||||
from setuptools import setup
|
||||
|
||||
__project__ = 'app_skellington'
|
||||
__version__ = '0.1.0'
|
||||
|
||||
setup(
|
||||
name = __project__,
|
||||
version = __version__,
|
||||
description = 'A high-powered 2-level CLI framework',
|
||||
author = 'Mathew Guest',
|
||||
author_email = 'mathewguest@gmail.com',
|
||||
url = 'https://git-mirror.zavage-software.com',
|
||||
|
||||
# Third-party dependencies; will be automatically installed
|
||||
install_requires = (
|
||||
'appdirs',
|
||||
'configobj',
|
||||
'colorlog',
|
||||
'pprint',
|
||||
),
|
||||
|
||||
# Local packages to be installed (our packages)
|
||||
packages = (
|
||||
'app_skellington',
|
||||
),
|
||||
|
||||
)
|
||||
|
6
road2philosophy.py
Executable file
6
road2philosophy.py
Executable file
@ -0,0 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
import wikicrawl
|
||||
import settings
|
||||
wikicrawl.start_app()
|
||||
input('<enter> to exit')
|
||||
|
45
setup.py
Normal file → Executable file
45
setup.py
Normal file → Executable file
@ -3,23 +3,42 @@
|
||||
# required third-party dependencies and package the app. You can also
|
||||
# install the application system-wide.
|
||||
|
||||
from setuptools import setup
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
__project__ = 'wikicrawl'
|
||||
# If you're looking for a versioning scheme, one revered pattern
|
||||
# can be read about at http://semver.org
|
||||
__version__ = '0.9.0'
|
||||
|
||||
setup(name = __project__,
|
||||
version = __version__,
|
||||
description = '',
|
||||
author = '',
|
||||
author_email = '',
|
||||
url = '',
|
||||
install_requires = ('yandex.translate',
|
||||
'selenium',
|
||||
'colorlog',
|
||||
'baker'
|
||||
),
|
||||
packages = ('wikicrawl',))
|
||||
setup(
|
||||
name = __project__,
|
||||
version = __version__,
|
||||
description = '',
|
||||
author = '',
|
||||
author_email = '',
|
||||
url = '',
|
||||
install_requires = (
|
||||
'yandex.translate',
|
||||
'selenium',
|
||||
'colorlog',
|
||||
'baker',
|
||||
|
||||
## Additional dependencies required from app_skellington:
|
||||
'appdirs',
|
||||
'configobj',
|
||||
'colorlog',
|
||||
'pprint'
|
||||
),
|
||||
packages = find_packages(
|
||||
where='.',
|
||||
include=('*',),
|
||||
exclude=()
|
||||
),
|
||||
package_dir = {
|
||||
'app_skellington': 'lib'
|
||||
},
|
||||
scripts = (
|
||||
'road2philosophy.py',
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -7,6 +7,5 @@
|
||||
# We export config.init() as a reference to wikicrawl.config.init() and
|
||||
# wikicrawl.main as a reference to wikicrawl.cli.main
|
||||
|
||||
from .config import init
|
||||
from .main import main
|
||||
from .app import start_app
|
||||
|
||||
|
87
wikicrawl/app.py
Normal file
87
wikicrawl/app.py
Normal file
@ -0,0 +1,87 @@
|
||||
# The command-line interface module creates an interface for
|
||||
# interacting with the python program (wikicrawl). This is an implementation
|
||||
# of the baker demo shown previously. The user can type in commands to
|
||||
# make the program do things.
|
||||
|
||||
import sys
|
||||
if sys.platform == 'linux':
|
||||
import readline # Needed for command history <up> and <down> arrows to work
|
||||
|
||||
from . import browser
|
||||
from . import dal
|
||||
from . import model
|
||||
|
||||
from app_skellington import _util
|
||||
import app_skellington
|
||||
from app_skellington._util import register_class_as_commands
|
||||
|
||||
# Problem pages:
|
||||
# Decision (from politics)
|
||||
# Malaysia (goes inside parenthesis)
|
||||
# Soft-sediment_deformation_structures (doesn't find link)
|
||||
# Chemicals (loops at philosophical)
|
||||
|
||||
class InteractiveInterface(app_skellington.ApplicationContainer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
configspec_filepath = _util.get_asset(__name__, 'config.spec')
|
||||
config_filepath = self._get_config_filepath(
|
||||
'road2philosophy', '', 'config.ini'
|
||||
)
|
||||
|
||||
super().__init__(
|
||||
configspec_filepath=configspec_filepath,
|
||||
config_filepath=config_filepath,
|
||||
*args, **kwargs
|
||||
)
|
||||
|
||||
# Configure logging:
|
||||
# log_config = self.ctx.config['logging']
|
||||
# self.ctx.log.configure_logging(log_config)
|
||||
# self.ctx.log.configure_logging()
|
||||
|
||||
def invoke_from_cli(self):
|
||||
rc = self.load_command()
|
||||
if not rc:
|
||||
print('Invalid command. Try -h for usage')
|
||||
return
|
||||
# load config
|
||||
self.invoke_command()
|
||||
|
||||
def usage(self):
|
||||
s = '''
|
||||
...usage info to come ;)...
|
||||
'''
|
||||
print(s)
|
||||
|
||||
def _cli_options(self):
|
||||
self.cli.add_argument(
|
||||
'--usage',
|
||||
help='Prints program usage information',
|
||||
action='store_true'
|
||||
)
|
||||
|
||||
def _services(self):
|
||||
self['ctx'] = lambda: self.ctx
|
||||
|
||||
self.dal = dal.DataLayer(self.ctx)
|
||||
self['dal'] = lambda: self.dal
|
||||
|
||||
self.browser = browser.Browser(self.ctx)
|
||||
self['browser'] = lambda: self.browser
|
||||
|
||||
self.model = model.Model(self.ctx, self.browser, self.dal)
|
||||
self['model'] = lambda: self.model
|
||||
|
||||
def _command_menu(self):
|
||||
sm_root = self.cli.init_submenu('command')
|
||||
self.sm_root = sm_root
|
||||
|
||||
register_class_as_commands(
|
||||
self, sm_root,
|
||||
model.Model
|
||||
)
|
||||
|
||||
def start_app(config=None):
|
||||
x = InteractiveInterface()
|
||||
x.invoke_from_cli()
|
||||
|
@ -12,39 +12,44 @@
|
||||
import selenium
|
||||
import selenium.webdriver
|
||||
|
||||
from . import config
|
||||
from . import log
|
||||
class Browser:
|
||||
def __init__(self, ctx):
|
||||
self.ctx = ctx
|
||||
|
||||
# This function has a parameter (driver) that passes in a value. In this case,
|
||||
# this driver variable defaults to the string 'chrome'. The code can call
|
||||
# create_webdriver() which is the same as create_webdriver('chrome') but
|
||||
# can alternatively call create_webdriver('firefox') and get different
|
||||
# functionality.
|
||||
def create_webdriver(driver='chrome'):
|
||||
if driver == 'chrome':
|
||||
return create_webdriver_chrome()
|
||||
elif driver == 'firefox':
|
||||
return create_webdriver_firefox()
|
||||
elif driver == 'phantom':
|
||||
return create_webdriver_phantom()
|
||||
else:
|
||||
log.LOGGER('browser').error('unable to handle webdriver request: %s' % driver)
|
||||
return
|
||||
# This function has a parameter (driver) that passes in a value. In this case,
|
||||
# this driver variable defaults to the string 'chrome'. The code can call
|
||||
# create_webdriver() which is the same as create_webdriver('chrome') but
|
||||
# can alternatively call create_webdriver('firefox') and get different
|
||||
# functionality.
|
||||
def create_webdriver(self, driver='chrome'):
|
||||
if driver == 'chrome':
|
||||
return self.create_webdriver_chrome()
|
||||
elif driver == 'firefox':
|
||||
return self.create_webdriver_firefox()
|
||||
elif driver == 'phantom':
|
||||
return self.create_webdriver_phantom()
|
||||
else:
|
||||
log.LOGGER('browser').error('unable to handle webdriver request: %s' % driver)
|
||||
return
|
||||
|
||||
def create_webdriver_firefox():
|
||||
profile = selenium.webdriver.FirefoxProfile()
|
||||
profile.set_preference("general.useragent.override", config.obj.WEBDRIVER_USER_AGENT)
|
||||
driver = selenium.webdriver.Firefox(profile)
|
||||
return driver
|
||||
def create_webdriver_firefox(self):
|
||||
profile = selenium.webdriver.FirefoxProfile()
|
||||
profile.set_preference(
|
||||
"general.useragent.override",
|
||||
self.ctx.config['app']['webdriver_user_agent'])
|
||||
driver = selenium.webdriver.Firefox(profile)
|
||||
return driver
|
||||
|
||||
def create_webdriver_chrome():
|
||||
opt = selenium.webdriver.chrome.options.Options()
|
||||
opt.add_argument('--user-agent=' + config.obj.WEBDRIVER_USER_AGENT)
|
||||
driver = selenium.webdriver.Chrome(executable_path=config.obj.CHROMEDRIVER_EXE,
|
||||
chrome_options=opt)
|
||||
return driver
|
||||
def create_webdriver_chrome(self):
|
||||
opt = selenium.webdriver.chrome.options.Options()
|
||||
opt.add_argument('--user-agent=' + self.ctx.config['app']['webdriver_user_agent'])
|
||||
driver = selenium.webdriver.Chrome(
|
||||
executable_path=self.ctx.config['app']['chromedriver_exe'],
|
||||
chrome_options=opt
|
||||
)
|
||||
return driver
|
||||
|
||||
def create_webdriver_phantom():
|
||||
driver = selenium.webdriver.PhantomJS()
|
||||
return driver
|
||||
def create_webdriver_phantom(self):
|
||||
driver = selenium.webdriver.PhantomJS()
|
||||
return driver
|
||||
|
||||
|
203
wikicrawl/cli.py
203
wikicrawl/cli.py
@ -1,203 +0,0 @@
|
||||
# The command-line interface module creates an interface for
|
||||
# interacting with the python program (wikicrawl). This is an implementation
|
||||
# of the baker demo shown previously. The user can type in commands to
|
||||
# make the program do things.
|
||||
|
||||
import baker
|
||||
import sys
|
||||
|
||||
if sys.platform == 'linux':
|
||||
import readline # Needed for command history <up> and <down> arrows to work
|
||||
|
||||
from . import log
|
||||
from . import model
|
||||
from . import config
|
||||
|
||||
# Problem pages:
|
||||
# Decision (from politics)
|
||||
# Malaysia (goes inside parenthesis)
|
||||
# Soft-sediment_deformation_structures (doesn't find link)
|
||||
# Chemicals (loops at philosophical)
|
||||
|
||||
commander = baker.Baker()
|
||||
|
||||
class InteractiveInterface:
|
||||
def __init__(self):
|
||||
# Instantiate the variable self.model as an object
|
||||
# of instance of the Model class defined in the model
|
||||
# module. model.Model refers to the Model class in the
|
||||
# model module and this line creates a new variable (self.model)
|
||||
# which is a variable that is an instance of Model, i.e.
|
||||
# it has the type Model and has Model.methods() available
|
||||
# to it.
|
||||
#
|
||||
# self.model is a variable that is attached to the instance/object
|
||||
# returned by this constructor that has the type InteractiveInterface.
|
||||
self.model = model.Model()
|
||||
|
||||
def run_command(self, args, main=True):
|
||||
"""
|
||||
Runs the command-line interface for a single command.
|
||||
|
||||
If called by InteractiveInterface.run(sys.argv), this method
|
||||
will execute the commands and arguments specified on command
|
||||
line when running this program. Alternatively, the code could
|
||||
pass in a different set of arguments to specify what to do.
|
||||
See start_command_loop() for more information.
|
||||
"""
|
||||
try:
|
||||
commander.run(argv=args, main=True, help_on_error=True,
|
||||
instance=self)
|
||||
except baker.CommandError as ex:
|
||||
log.LOGGER['cli'].warn('incorrect user input: %s' % ex)
|
||||
commander.usage()
|
||||
except baker.TopHelp as ex:
|
||||
commander.usage()
|
||||
except Exception as ex:
|
||||
log.LOGGER['cli'].error('caught general exception!!')
|
||||
log.LOGGER['cli'].error(ex)
|
||||
|
||||
def start_command_loop(self):
|
||||
"""
|
||||
Repeatedly asks the user what command to run until they exit.
|
||||
|
||||
This method calls InteractiveInterface.run(args) a little bit
|
||||
differently. Instead of passing the arguments from the command-line
|
||||
that were passed in when invoking the python wikicrawl app,
|
||||
this asks the user for a line of textual input and passes
|
||||
those strings to run() as the arguments. This way, the user can
|
||||
access an interactive shell and repeatedly issue different
|
||||
commands while the application is running.
|
||||
"""
|
||||
commander.usage()
|
||||
self.model.open_browser()
|
||||
while True:
|
||||
print('$ ', end = '') # Display to the user a command prompt
|
||||
# The dollar-sign is a common indication
|
||||
# of a shell that communicates to the user
|
||||
# that we are waiting for their textual
|
||||
# input. The end = '' indicates to python
|
||||
# to NOT drop to a newline after printing
|
||||
# in the terminal. Instead, let the user
|
||||
# type their command on the same line as
|
||||
# the printed '$ '.
|
||||
try:
|
||||
inp = input()
|
||||
except EOFError: # <ctrl>+D will send "End Line" and exit the command loop
|
||||
break
|
||||
|
||||
# Note on "arguments" (mg):
|
||||
# Whenever a program is run in windows or *nix, the operating
|
||||
# system passes in the command string that was used to invoke
|
||||
# the program. You can append data in that command to configure
|
||||
# switches or values going into the program on the fly. For
|
||||
# example, you can invoke this wikicrawl app in more than one
|
||||
# way. You can of course run "python launcher.py" to run the
|
||||
# software but you can also pass in an argument. You can
|
||||
# alternatively run "python launcher.py <argument> <argument>..."
|
||||
# and the operating system will provide the <argument> values into
|
||||
# the process that is running as variables.
|
||||
#
|
||||
# In a real world use case, many commands provide switches to
|
||||
# adjust what the program does. For example,
|
||||
#
|
||||
# The command:
|
||||
# find music -name "*justin*bieber*"
|
||||
# runs the "find" program and asks to find all the filenames that match the
|
||||
# pattern *justin*bieber* in the "music" directory.
|
||||
# (music, -name, "*justin*biever*") are argument parameters
|
||||
# that are passed into the program. The program is coded to
|
||||
# parse and interpret these values and execute differently based
|
||||
# on the values passed in. This is one way to pass in information
|
||||
# into a running program. Some other ways are to read from a file
|
||||
# (such as how we read from settings.py to load the runtime
|
||||
# configuration), from something called environment variables
|
||||
# (won't get into but another set of values provided to programs
|
||||
# from the operating system), or they can be hard-coded into
|
||||
# the application.
|
||||
#
|
||||
# Side note: arguments are not unique to python (almost all
|
||||
# programming languages implement arguments), the functionality
|
||||
# is defined by the application (some programs require arguments,
|
||||
# some are optional, and the syntax for sending in argument
|
||||
# parameters are different and defined by the individual programs,
|
||||
# and lastly, the first argument sent in is the script name or
|
||||
# filename of the script. In our case, the first argument is
|
||||
# the string "launcher.py". If the user invoked the command
|
||||
# as C:\Users\mguest\launcher.py then the first argument
|
||||
# would be C:\Users\mguest\launcher.py.
|
||||
|
||||
# What this method (start_command_loop()) does is provide a
|
||||
# REPL shell which is a
|
||||
# read-eval-print-loop. It repeatedly asks the user for an
|
||||
# input (read), evaluates that input into an action (evaluate),
|
||||
# give the user some feedback (print), and start the process
|
||||
# over again (loop). When you call just "python", you are loading a
|
||||
# program that gives you a REPL interactive shell. The way
|
||||
# this wikicrawl app is implemented gives the user a REPL
|
||||
# that has commands to interact with wikipedia pages.
|
||||
|
||||
# Because we take in the input as a single string, we do
|
||||
# a transformation to turn something like "do_random_page 5"
|
||||
# into ["launcher.py", "do_random_page", "5"] which is how
|
||||
# the arguments array would have been created if it were
|
||||
# passed in the initial command instead of typed and interpretted
|
||||
# as input as is done here.
|
||||
args = [sys.argv[0], ] + inp.split()
|
||||
|
||||
# The user can at any point in the command pass the argument
|
||||
# switch "--help". If doing this, the command line interface
|
||||
# will instead print out the inline documentation associated
|
||||
# with this command and quit after doing so. For example,
|
||||
# the user can type "python launcher.py do_random_page --help"
|
||||
# and the program will spit out the generated documentation
|
||||
# for the do_random_page command and run nothing. In our case,
|
||||
# this documentation is created by the baker library and will
|
||||
# print out the docstring associated with the method. Try it
|
||||
# out in your shell (cmd.exe or powershell.exe) by invoking
|
||||
# python launcher.py do_random_page --help
|
||||
# You will see the program spit out the heredoc below the
|
||||
# do_random_page method defined below.
|
||||
if '--help' in args:
|
||||
args.remove('--help')
|
||||
try:
|
||||
print('command usage:')
|
||||
commander.usage(args[1])
|
||||
except Exception as ex:
|
||||
print(type(ex), ex)
|
||||
continue
|
||||
|
||||
self.run_command(args, main=False)
|
||||
|
||||
@commander.command
|
||||
def play_specific_page(self, title):
|
||||
pass
|
||||
|
||||
@commander.command
|
||||
def play_random_page(self):
|
||||
"""
|
||||
Instructs the wikicrawl application to play the game on a random
|
||||
article.
|
||||
"""
|
||||
self.model.play_random_page()
|
||||
|
||||
@commander.command
|
||||
def play_multiple(self, n):
|
||||
"""
|
||||
Plays the wikicrawl game <n>-times.
|
||||
"""
|
||||
try:
|
||||
n = int(n)
|
||||
except ValueError as ex:
|
||||
log.LOGGER['cli'].warn('failed to process "%s" as a parameter' % n)
|
||||
return False
|
||||
for i in range(n):
|
||||
self.model.play_random_page()
|
||||
|
||||
@commander.command
|
||||
def exit(self):
|
||||
"""
|
||||
Immediately exit the program.
|
||||
"""
|
||||
sys.exit(0)
|
||||
|
@ -1,22 +0,0 @@
|
||||
# config module defines a place to store the external configuration/settings
|
||||
# and is used to provide an interface to the runtime configuration for the
|
||||
# program.
|
||||
|
||||
import sys
|
||||
|
||||
from . import log
|
||||
|
||||
obj = {}
|
||||
|
||||
def init(settings_obj):
|
||||
global obj
|
||||
obj = settings_obj
|
||||
|
||||
find_chromedriver_path()
|
||||
|
||||
|
||||
log.init_logging()
|
||||
|
||||
def find_chromedriver_path():
|
||||
print(__file__)
|
||||
|
76
wikicrawl/config.spec
Normal file
76
wikicrawl/config.spec
Normal file
@ -0,0 +1,76 @@
|
||||
[app]
|
||||
# Filepath parameters - THESE MUST EXIST OR PROGRAM WILL NOT RUN!!
|
||||
log_filename = string(min=0, max=255, default='/tmp/wikicrawl.log')
|
||||
sqlite_dbfile = string(min=0, max=255, default='/home/mathew/.wikicrawler.db')
|
||||
chromedriver_exe = string(min=0, max=255, default='/usr/bin/chromedriver')
|
||||
# CHROMEDRIVER_EXE = 'C:\\Users\\mathew\\windows-share\\dev\\wikicrawl\\chromedriver.exe'
|
||||
|
||||
# Application Parameters
|
||||
do_breakpoints = boolean(default=False)
|
||||
page_delay = integer(min=0, max=2000, default=0)
|
||||
|
||||
# Web Driver Parameters
|
||||
webdriver_user_agent = string(min=0, max=255, default='Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)')
|
||||
|
||||
# Requested browser and webdriver dependencies are required for this to work.
|
||||
# This means you need to have installed on your system:
|
||||
# Chrome + WebDriver for Chrome
|
||||
# Firefox + geckodriver for Firefox
|
||||
# phantomjs for phantom
|
||||
# Options are 'chrome', 'firefox', 'phantom'
|
||||
webdriver_browser = string(min=1, max=63, default='chrome')
|
||||
|
||||
# Wikipedia Parameters
|
||||
page_base_url = string(min=1, max=255, default='https://www.wikipedia.org/')
|
||||
|
||||
# Supported Languages so far:
|
||||
# German, English, Spanish, French, Italian, Portuguese, Polish, Russian
|
||||
# 'de', 'en', 'es', 'fr', 'it', 'pl', 'pt', 'ru'
|
||||
page_language = string(min=2, max=22, default='en')
|
||||
|
||||
# API Keys
|
||||
# Yandex is a web REST API for translating between different languages.
|
||||
yandex_api_key = string(min=0, max=1023, default='trnsl.1.1.20170825T194642Z.26862b9dd4c1a755.9490ed28de448ff67522c2854f262eff05ec0dc3')
|
||||
|
||||
[logging]
|
||||
log_file = string(max=255, default='')
|
||||
log_level = option('critical', 'error', 'warning', 'info', 'debug', default='info')
|
||||
log_fmt = string(max=255, default='')
|
||||
|
||||
[[formatters]]
|
||||
[[[colored]]]
|
||||
() = string(default='colorlog.ColoredFormatter')
|
||||
format = string(max=255, default='%(log_color)s%(levelname)-8s%(reset)s:%(log_color)s%(name)-5s%(reset)s:%(white)s%(message)s')
|
||||
|
||||
[[[basic]]]
|
||||
() = string(max=255, default='logging.Formatter')
|
||||
format = string(max=255, default='%(levelname)s:%(name)s:%(asctime)s:%(message)s')
|
||||
|
||||
[[[forstorage]]]
|
||||
() = string(max=255, default='logging.Formatter')
|
||||
format = string(max=255, default='%(levelname)s:%(name)s:%(asctime)s:%(message)s')
|
||||
|
||||
[[handlers]]
|
||||
[[[stderr]]]
|
||||
class = string(max=255, default='logging.StreamHandler')
|
||||
level = option('critical', 'error', 'warning', 'info', 'debug', default='debug')
|
||||
formatter = string(max=255, default='colored')
|
||||
|
||||
[[[file]]]
|
||||
class = string(max=255, default='logging.handlers.RotatingFileHandler')
|
||||
level = option('critical', 'error', 'warning', 'info', 'debug', default='warning')
|
||||
formatter = string(max=255, default='forstorage')
|
||||
filename = string(max=255, default='road2philosophy.log')
|
||||
maxBytes = integer(min=0, max=33554432, default=33554432)
|
||||
backupCount = integer(min=0, max=3, default=1)
|
||||
|
||||
[[loggers]]
|
||||
[[[root]]]
|
||||
level = option('critical', 'error', 'warning', 'info', 'debug', default='debug')
|
||||
handlers = string_list(max=8, default=list('file',)
|
||||
|
||||
[[[r2p]]]
|
||||
level = option('critical', 'error', 'warning', 'info', 'debug', default='debug')
|
||||
handlers = string_list(max=8, default=list('stderr',))
|
||||
propagate = boolean(default=False)
|
||||
|
@ -4,9 +4,7 @@
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
from . import config
|
||||
|
||||
class DataLayer:
|
||||
def __init__(self):
|
||||
pass
|
||||
def __init__(self, ctx):
|
||||
self.ctx = ctx
|
||||
|
||||
|
@ -1,39 +0,0 @@
|
||||
# log module is a wrapper around third-party colorlog library
|
||||
# and provides an application-level interface to a logging system.
|
||||
|
||||
import colorlog
|
||||
import logging
|
||||
|
||||
from . import config
|
||||
|
||||
# Default python log severity levels:
|
||||
# CRITICAL
|
||||
# ERROR
|
||||
# WARNING
|
||||
# INFO
|
||||
# DEBUG
|
||||
|
||||
LOGGER = None
|
||||
|
||||
class LoggingLayer:
|
||||
def __init__(self, config):
|
||||
self.loggers = {}
|
||||
logging.config.dictConfig(config)
|
||||
|
||||
# Note on __getitem__:
|
||||
# __getitem__ overrides the functionality of the [] operator.
|
||||
# That means this code:
|
||||
# objinstance = LoggingLayer(...)
|
||||
# objinstance[foo] calls LoggingLayer.__getitem__(foo)
|
||||
# and returns the result.
|
||||
def __getitem__(self, k):
|
||||
logger = self.loggers.get(k)
|
||||
if not logger:
|
||||
logger = logging.getLogger(k)
|
||||
self.loggers[k] = logger
|
||||
return logger
|
||||
|
||||
def init_logging():
|
||||
global LOGGER
|
||||
LOGGER = LoggingLayer(config.obj.LOG_SETTINGS)
|
||||
|
@ -1,23 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
|
||||
from . import cli
|
||||
from . import config
|
||||
from . import util
|
||||
|
||||
def main():
|
||||
user_interface = cli.InteractiveInterface()
|
||||
|
||||
if len(sys.argv) > 1: # Command line arguments were passed in
|
||||
# command-line when invoking python
|
||||
user_interface.run_command(sys.argv)
|
||||
else:
|
||||
user_interface.start_command_loop()
|
||||
|
||||
def verify_config_is_valid():
|
||||
pass
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -9,15 +9,15 @@
|
||||
import os
|
||||
import time
|
||||
|
||||
from . import browser
|
||||
from . import config
|
||||
from . import dal
|
||||
from . import log
|
||||
from . import pages
|
||||
from . import util
|
||||
|
||||
class Model:
|
||||
def __init__(self):
|
||||
def __init__(self, ctx, browser, dal):
|
||||
self.ctx = ctx
|
||||
self.browser = browser
|
||||
self.dal = dal
|
||||
|
||||
self._webdriver = None
|
||||
self._translated_philosophy = None
|
||||
|
||||
@ -33,7 +33,7 @@ class Model:
|
||||
# always existing with or without knowing if it exists because if it
|
||||
# hasn't been created yet then it will be created on-the-fly.
|
||||
if not self._webdriver:
|
||||
self._webdriver = browser.create_webdriver(config.obj.WEBDRIVER_BROWSER)
|
||||
self._webdriver = self.browser.create_webdriver(self.ctx.config['app']['webdriver_browser'])
|
||||
page_api = pages.LandingPage(self.webdriver)
|
||||
page_api.goto_landing_page()
|
||||
return self._webdriver
|
||||
@ -41,10 +41,10 @@ class Model:
|
||||
@property
|
||||
def translated_philosophy(self):
|
||||
# This translates 'philosophy' to the target language with only 1 api call.
|
||||
if config.obj.PAGE_LANGUAGE == 'en':
|
||||
if self.ctx.config['app']['page_language'] == 'en':
|
||||
self._translated_philosophy = 'philosophy'
|
||||
elif not self._translated_philosophy:
|
||||
text = util.translate_text('en', config.obj.PAGE_LANGUAGE, 'philosophy')
|
||||
text = util.translate_text('en', self.ctx.config['app']['page_language'], 'philosophy')
|
||||
self._translated_philosophy = text
|
||||
return self._translated_philosophy
|
||||
|
||||
@ -59,6 +59,19 @@ class Model:
|
||||
# creates it and then it is re-used later
|
||||
# in the application.
|
||||
|
||||
def play_multiple(self, n):
|
||||
"""
|
||||
Plays the wikicrawl game <n>-times.
|
||||
"""
|
||||
try:
|
||||
n = int(n)
|
||||
except ValueError as ex:
|
||||
self.ctx.log['cli'].warn('failed to process "%s" as a parameter' % n)
|
||||
return False
|
||||
for i in range(n):
|
||||
self.model.play_random_page()
|
||||
|
||||
|
||||
def play_random_page(self):
|
||||
"""
|
||||
Select a random page and repeatedly click the first link until
|
||||
@ -86,7 +99,7 @@ class Model:
|
||||
# In this case, we have made the language a parameter
|
||||
# that you can pass into the program, i.e. you can run it
|
||||
# for English or Spanish or Russian or what have you.
|
||||
page_api.select_language(config.obj.PAGE_LANGUAGE)
|
||||
page_api.select_language(self.ctx.config['app']['page_language'])
|
||||
|
||||
# Main page: next 2 lines
|
||||
|
||||
@ -114,20 +127,20 @@ class Model:
|
||||
|
||||
# Get the article title (and translate if necessary)
|
||||
title = page_api.get_title()
|
||||
if config.obj.PAGE_LANGUAGE != 'en':
|
||||
translated_title = util.translate_text(config.obj.PAGE_LANGUAGE, 'en', title)
|
||||
log.LOGGER['model'].info('visited page: %s (%s)' % (title, translated_title))
|
||||
if self.ctx.config['app']['page_language'] != 'en':
|
||||
translated_title = util.translate_text(self.ctx.config['app']['page_language'], 'en', title)
|
||||
self.ctx.log['model'].info('visited page: %s (%s)' % (title, translated_title))
|
||||
else:
|
||||
log.LOGGER['model'].info('visited page: %s' % title)
|
||||
self.ctx.log['model'].info('visited page: %s' % title)
|
||||
|
||||
# Check for page loops (have we already visisted this page?)
|
||||
if title in pages_visited:
|
||||
log.LOGGER['model'].info('encountered loop at page = %s' % title)
|
||||
self.ctx.log['model'].info('encountered loop at page = %s' % title)
|
||||
break
|
||||
|
||||
# Check if we reached the article on philosophy
|
||||
if self._is_article_on_philosophy(title, translated_title):
|
||||
log.LOGGER['model'].info('made it to philosophy in %s pages' % len(pages_visited))
|
||||
self.ctx.log['model'].info('made it to philosophy in %s pages' % len(pages_visited))
|
||||
pages_visited.append(title)
|
||||
break
|
||||
|
||||
@ -136,7 +149,7 @@ class Model:
|
||||
|
||||
rc = page_api.click_first_link()
|
||||
if not rc:
|
||||
log.LOGGER['model'].warn('failure: unable to continue (perhaps no valid links?)')
|
||||
self.ctx.log['model'].warn('failure: unable to continue (perhaps no valid links?)')
|
||||
break
|
||||
print()
|
||||
|
||||
|
@ -2,15 +2,15 @@
|
||||
# There are separate classes defined for each page with their own
|
||||
# defined methods for performing certain actions.
|
||||
|
||||
from . import browser
|
||||
from . import util
|
||||
from .assets.languages import LANGUAGES
|
||||
|
||||
import re
|
||||
import selenium
|
||||
import time
|
||||
|
||||
from . import browser
|
||||
from . import config
|
||||
from . import log
|
||||
from . import util
|
||||
from .assets.languages import LANGUAGES
|
||||
PAGE_BASE_URL = 'https://www.wikipedia.org/'
|
||||
|
||||
class PageRootObject:
|
||||
"""
|
||||
@ -43,7 +43,8 @@ class PageRootObject:
|
||||
html link in the webpage.
|
||||
"""
|
||||
self.highlight(el, 'red')
|
||||
time.sleep(config.obj.PAGE_DELAY)
|
||||
DELAY=0
|
||||
time.sleep(DELAY)
|
||||
util.breakpoint()
|
||||
el.click()
|
||||
|
||||
@ -97,7 +98,7 @@ class LandingPage(PageRootObject):
|
||||
"""
|
||||
Navigates the browser to www.wikipedia.org
|
||||
"""
|
||||
self.driver.get(config.obj.PAGE_BASE_URL)
|
||||
self.driver.get(PAGE_BASE_URL)
|
||||
|
||||
def select_language(self, language):
|
||||
lang_text = LANGUAGES.get(language)
|
||||
@ -219,13 +220,13 @@ class ArticlePage(PageRootObject):
|
||||
if len(links) == 0:
|
||||
return False
|
||||
for link in links:
|
||||
log.LOGGER['pages'].debug('processing link: %s' % link.text)
|
||||
self.ctx.log['pages'].debug('processing link: %s' % link.text)
|
||||
if not self._is_valid_link(p, link):
|
||||
log.LOGGER['pages'].debug('skipping link inside parenthesis: %s' % link.text)
|
||||
self.ctx.log['pages'].debug('skipping link inside parenthesis: %s' % link.text)
|
||||
self.highlight(link, 'blue')
|
||||
continue
|
||||
self.highlight(link, 'red')
|
||||
log.LOGGER['pages'].info('selected link: %s' % link.text)
|
||||
self.ctx.log['pages'].info('selected link: %s' % link.text)
|
||||
self.click(link)
|
||||
return True
|
||||
|
||||
|
@ -3,14 +3,14 @@
|
||||
|
||||
import yandex_translate
|
||||
|
||||
from . import config
|
||||
DO_BREAKPOINTS = False
|
||||
|
||||
def breakpoint():
|
||||
"""
|
||||
If DO_BREAKPOINTS is switched on, this will pause program
|
||||
execution and wait for the user to press enter to continue.
|
||||
"""
|
||||
if config.obj.DO_BREAKPOINTS:
|
||||
if DO_BREAKPOINTS:
|
||||
input('BREAKPOINT hit. <Enter> to continue...')
|
||||
|
||||
def translate_text(source_language, target_language, text):
|
||||
|
Loading…
Reference in New Issue
Block a user