start of new app structure for wiki

This commit is contained in:
Mathew Guest 2020-01-24 03:01:45 -07:00
parent f093fb9ecc
commit 6ae2a8dd06
25 changed files with 1641 additions and 376 deletions

5
.gitignore vendored Normal file

@ -0,0 +1,5 @@
build/
dist/
__pycache__
*.egg-info

@ -1,11 +0,0 @@
#!/usr/bin/env python
import wikicrawl
import settings
# Inject the settings.DefaultSettings object into the
# app and start running the program.
wikicrawl.init(settings.DefaultSettings)
wikicrawl.main()
input('<enter> to exit')

@ -0,0 +1,60 @@
app_skellington
===============
Application framework for Python, features include:
* Pain-free multi-level command menu: Register classes
* Simple to define services and automatic dependency injection based on name (with custom invocation as an option)
* INI-style config and and validation (provided through ConfigObj)
* Colored logging (provided through colorlog)
* Works on Linux, Windows, and Mac
Principles:
* Lend to creating beautiful, easy to read and understand code in the application.
* Minimize coupling of applications to this framework.
* Compatable with Linux, Windows, and Mac. Try to be compatible as possible otherwise.
* Try to be compatible with alternate Python runtimes such as PyPy.
Application Configuration
-------------------------
Site configurations are supported through configobj. There is a config.spec
in the src directory which is a validation file; it contains the accepted
parameter names, types, and limits for configurable options in the
application which is built on app_skellington. The format is multi-level .ini syntax.
See the configobj documentation for more information.
Site configuration files (config.ini) are created if they don't exit. The
file always contains the full specification of parameters; i.e. even default
parameters are added into the config file.
Linux:
/home/\<user\>/.config/\<app_name\>/config.ini
/home/\<user\>/.cache/\<app_name\>/log/\<app_name\>.log
Windows:
C:\Users\\\<user>\\\<app_name\>\\Local\\\<app_name\>\\config.ini
C:\Users\\\<user>\\\<app_name\>\\Local\\\<app_name\>\\Logs\\\<app_name\>.log
Application configuration can be overridden ad-hoc through the --config <filename>
argument.
Debug - Turn on Logging
---------------------------
Set 'APPSKELLINGTON_ENABLE_LOGGING' environment variable to any value which turns
on AppSkellington-level logging. For example,
APPSKELLINGTON_ENABLE_LOGGING=true ./runme
or
export APPSKELLINGTON_ENABLE_LOGGING=1
./runme
Notes
-----
See official website: https://zavage-software.com
Please report bugs, improvements, or feedback! <contact>

@ -0,0 +1,11 @@
import logging
import sys
APP_CONFIG_FILENAME = 'config.ini' # Relative to user directory on machine
APP_CONFIGSPEC_FILENAME = 'config.spec' # Relative to module source directory
from .app_container import *
from .cfg import *
from .cli import *
from .log import *

@ -0,0 +1,31 @@
import logging
import os
import sys
# Check and gracefully fail if the user needs to install a 3rd-party dep.
libnames = ['appdirs', 'configobj', 'colorlog']
def check_env_has_dependencies(libnames):
rc = True
for libname in libnames:
try:
__import__(libname)
except ModuleNotFoundError as ex:
print('missing third-part library: ', ex, file=sys.stderr)
rc = False
return rc
if not check_env_has_dependencies(libnames):
print('refusing to load program without installed dependencies', file=sys.stderr)
raise ImportError('python environment needs third-party dependencies installed')
# Logger for before the application and logging config is loaded
# - used to log before logging is configured
_log_fmt = '%(levelname)-7s:%(message)s'
_logger_name = 'app_skellington'
_bootstrap_logger = logging.getLogger(_logger_name)
_bootstrap_logger.setLevel(1000)
_bootstrap_logger.propagate = False
# NOTE(MG) Pretty sure the logger has the default handler too at this point.
# It's been related to some issues with the logger double-printing messages.
_bootstrap_logger.addHandler(logging.NullHandler())

@ -0,0 +1,116 @@
from __future__ import print_function
import inspect
import os
import sys
from . import _util
def eprint(*args, **kwargs):
"""
Print to STDERR stream.
"""
print(*args, file=sys.stderr, **kwargs)
def filename_to_abspath(filename):
"""
Converts a filename to it's absolute path. If it's already an
absolute path, do nothing.
"""
return os.path.abspath(filename)
def does_file_exist(filepath):
"""
Because the file can be deleted or created immediately after execution of
this function, there cannot be guarantees made around the existence of
said file (race condition). This merely says if the file existed at this
instant in execution.
"""
try:
fp = open(filepath, 'r')
return True
except FileNotFoundError as ex:
return False
def ensure_dir_exists(dirpath):
if dirpath is None:
return
if dirpath == '':
return
os.makedirs(dirpath, exist_ok=True)
def get_root_asset(filepath):
"""
Attempts to locate a resource or asset shipped with the application.
Searches starting at the root module (__main__) which should be the
python file initially invoked.
"""
module_root =\
os.path.abspath(
os.path.dirname(
sys.modules['__main__'].__file__))
path = os.path.join(module_root, filepath)
return path
def get_asset(module, filepath):
"""
Attempts to locate a resource or asset shipped with the application.
Input filename is relative to the caller code, i.e. this starts
searching relative to the file that called this function.
Returns the full absolute path of the located file if found or None
Args:
module: Pass in the module (or __name__) to search relative to module
filepath: the relative filepath of the file to look for in the
package directory.
"""
if isinstance(module, str):
module_file = sys.modules[module].__file__
elif isinstance(module, module):
module_file = module.__file__
else:
raise Exception('Invalid Usage')
try:
root = module_file
if os.path.islink(root):
root = os.path.realpath(root)
root = os.path.dirname(os.path.abspath(root))
except Exception as ex:
raise
path = os.path.join(root, filepath)
return path
def register_class_as_commands(app, submenu, cls_object):
"""
Registers commands for each class method. e.g.: pass in the CLI
object, the target submenu, and the class to be registered, and
this will create a command-line menu item for each method in
the class.
IMPORTANT: Currently, you need to pass in only a class and not
an object/instance of a class.
"""
cls_constructor = cls_object
members = inspect.getmembers(cls_object)
for m in members:
name = m[0]
ref = m[1]
if inspect.isfunction(ref) and not name.startswith('_'):
cls_method = ref
constructor = app._inject_service_dependencies(cls_constructor)
sig = inspect.signature(cls_method)
func = create_func(constructor, cls_method)
# docstring = cls_method.__doc__
docstring = inspect.getdoc(cls_method)
submenu.register_command(func, name, sig, docstring)
def create_func(constructor, cls_method):
def func(*args, **kwargs):
obj = constructor()
return cls_method(obj, *args, **kwargs)
return func

@ -0,0 +1,202 @@
import appdirs
import collections
import functools
import inspect
import os
import sys
# Application scaffolding:
from ._bootstrap import _bootstrap_logger
from . import log
from . import _util
from . import cli
from . import cfg
DEFAULT_APP_NAME = 'python-app'
DEFAULT_APP_AUTHOR = 'John Doe'
# OPTIONAL: classes can sub-class from this?
class Components:
def inject_dependencies_based_on_names_in_args(self):
pass
def inject_dependency(self, name):
pass
def register_dependency(self, service, name):
pass
class ApplicationContext:
"""
Container for application-wide state; i.e. app configuration and loggers.
"""
def __init__(self, config, log):
self.config = config
self.log = log
self.parsed_argv = None
self.parsed_argv_unknown = None
class ApplicationContainer:
"""
Generalized application functionality. Used for linking components and modules of the application
together. Invokes runtime configuration reading from file, maintains the
object instances for services, passes off to the cli to determine what to
do, and then injects any necessary dependencies (e.g. database module)
and kicks off the functionality requested in the cli.
"""
def __init__(
self,
configspec_filepath=None,
config_filepath=None,
*args, **kwargs
):
# Instantiate root application context (container for globals)
if configspec_filepath is None:
configspec_filepath = self._get_configspec_filepath()
self.appname = kwargs.get('appname') or DEFAULT_APP_NAME
self.appauthor = kwargs.get('appauthor') or DEFAULT_APP_AUTHOR
self._dependencies = {}
config = cfg.Config(configspec_filepath)
config.load_config_from_file(config_filepath)
logger = log.LoggingLayer(self.appname, self.appauthor)
# added here, is this okay to do twice?
logger.configure_logging()
self.ctx = ApplicationContext(config, logger)
self['ctx'] = lambda: self.ctx
self.cli = cli.CommandTree() # Command-line interface
if callable(getattr(self, '_cli_options', None)):
self._cli_options()
if callable(getattr(self, '_services', None)):
self._services()
if callable(getattr(self, '_command_menu', None)):
self._command_menu()
def __delitem__(self, service_name):
"""
Deletes a service or dependency from the available dependencies.
"""
try:
del self._dependencies[service_name]
except KeyError as ex:
pass
def __getitem__(self, service_name):
"""
Returns a factory of a service or dependency. The factory is a function
that is called to return an instance of the service object.
app_container['netezza'] => returns the netezza service instance
"""
try:
service_factory = self._dependencies[service_name] # Retrieve factory function
return service_factory() # Call factory() to return instance of service
except KeyError as ex:
msg = 'failed to inject service: {}'.format(service_name)
_bootstrap_logger.critical(msg)
_util.eprint(msg)
raise ServiceNotFound
def __setitem__(self, service_name, value):
"""
Register a service or dependency factory to return a service.
The factory function is called to return an instance of a service object.
"""
self._dependencies[service_name] = value
def _construct_model(self, model_constructor, *args):
"""
Performs dependency resolution and instantiates an object of given type.
This takes in the reference to a class constructor and a list of names
of the dependencies that need passed into it, constructs that object and
returns it. Models contain business logic and application functionality.
Args:
model_constructor: reference to object constructor.
"""
dependency_names = args
dep_references = []
for dep_name in dependency_names:
dep_references.append(self[dep_name])
return model_constructor(*dep_references)
def _get_config_filepath(self, app_name, app_author, config_filename='config.ini'):
"""
Attempt to find config.ini in the user's config directory.
On Linux, this will be /home/<user>/.config/<app>/config.ini
On Windows, this will be C:\\Users\\<user>\\AppData\\Local\\<app>\\config.ini
"""
dirname = appdirs.user_config_dir(app_name, app_author)
filepath = os.path.join(dirname, config_filename)
_bootstrap_logger.info('default config filepath calculated to be: %s', filepath)
return filepath
def _get_configspec_filepath(self, configspec_filename='config.spec'):
"""
Attempt to find config.spec inside the installed package directory.
"""
return _util.get_root_asset(configspec_filename)
def _inject_service_dependencies(self, constructor):
"""
Returns a function that, when called, constructs a new object for
business/application logic with the listed dependencies.
Args:
constructor: service class to be created object.
"""
sig = inspect.signature(constructor.__init__)
params = sig.parameters
params = [params[paramname].name for paramname in params] # Convert Param() type => str
cls_dependencies = params[1:] # Skip 'self' parameter on class methods.
return functools.partial(self._construct_model, constructor, *cls_dependencies)
def load_command(self):
args, unk, success = self.cli.parse()
if not success:
return False
self.ctx.parsed_argv = args
self.ctx.parsed_argv_unknown = unk
return True
def invoke_command(self):
rc = self.load_command()
if not rc:
return False
try:
self.cli.run_command()
except NoCommandSpecified as ex:
print('Failure: No command specified.')
def interactive_shell(self):
pass
def invoke_from_cli(self):
self.invoke_command()
def usage(self):
pass
# Applications need a default usage
class ServiceNotFound(Exception):
"""
Application framework error: unable to find and inject dependency.
"""
pass
class NoCommandSpecified(Exception):
pass

184
lib/app_skellington/cfg.py Normal file

@ -0,0 +1,184 @@
import appdirs
from . import _util
import argparse
import configobj
import os
import sys
import validate
from ._bootstrap import _bootstrap_logger
from . import _util
class Config:
"""
Structure to store application runtime configuration. Also contains
functionality to load configuration from local site file.
"""
def __init__(self, configspec_filepath=None):
self.config_obj = None
self._config_filepaths = []
self._configspec_filepath = None
self.configspec_filepath = configspec_filepath
def __delitem__(self, key):
"""
Deletes the configuration item identified by <key> in the internal
configuration storage.
"""
try:
del self[key]
except KeyError as ex:
pass
def __getitem__(self, key):
"""
Returns the vaLue of the configuration item identified by <key>.
"""
try:
return self.config_obj[key].dict()
except KeyError as ex:
# raise ConfigurationItemNotFoundError()
raise
def __setitem__(self, key, value):
"""
Assigns the value of the configuration item
identified by <key> as <value>.
"""
self[key] = value
@property
def config_filepath(self, idx=0):
"""
Returns the config filepath (optionally specified by index
when using multiple config files).
"""
assert idx>=0, 'invalid idx argument: index must be greater than 0'
if len(self._config_filepaths) > 0:
try:
return self._config_filepaths[idx]
except ValueError as ex:
return
@config_filepath.setter
def config_filepath(self, value, idx=0):
"""
Assigns <value> as the config filepath (optionally specified by index
when using multiple config files).
"""
assert idx>=0, 'invalid idx argument: index must be greater than 0'
self._config_filepaths[idx] = value
@property
def configspec_filepath(self):
return self._configspec_filepath
@configspec_filepath.setter
def configspec_filepath(self, filepath):
if _util.does_file_exist(filepath):
self._configspec_filepath = filepath
else:
_bootstrap_logger.error(
'failed to set config.spec: file not found '
'(%s)', filepath)
def load_config_from_file(self, config_filepath):
"""
Loads configuration settings from file, overwritting all configuration.
"""
# Record all config.ini files passed in
if config_filepath not in self._config_filepaths:
self._config_filepaths.append(config_filepath)
# Check for config.spec
if self.configspec_filepath:
_bootstrap_logger.info('using config.spec: %s', self.configspec_filepath)
else:
_bootstrap_logger.info('config.spec not defined')
_bootstrap_logger.info('using config file: %s', config_filepath)
# Pre-check for config.ini existence
if _util.does_file_exist(config_filepath):
_bootstrap_logger.info('existing config file found')
else:
_bootstrap_logger.info('no config file found: using defaults')
# interpolation='template' changes config file variable replacement to
# use the form $var instead of %(var)s, which is useful to enable
# literal %(text)s values in the config.
try:
configspec_filepath = self.configspec_filepath
if configspec_filepath:
self.config_obj = configobj.ConfigObj(
config_filepath,
configspec=configspec_filepath,
interpolation='template'
)
else:
self.config_obj = configobj.ConfigObj(
config_filepath,
# configspec=configspec_filepath,
interpolation='template'
)
except configobj.ParseError as ex:
msg = 'failed to load config: error in config.spec configuration: {}'.format(config_filepath)
_bootstrap_logger.error(msg)
_util.eprint(msg)
return False
except OSError as ex:
msg = 'failed to load config: config.spec file not found'
_bootstrap_logger.error(msg)
_util.eprint(msg)
return False
# Hack the configobj module to alter the interpolation for validate.py:
configobj.DEFAULT_INTERPOLATION = 'template'
self.config_obj.filename = config_filepath
if self.configspec_filepath:
# Validate config.ini against config.spec
try:
_bootstrap_logger.info('validating config file against spec')
val = validate.Validator()
test = self.config_obj.validate(val, copy=True)
if test is not True:
_bootstrap_logger.critical('config file failed validation')
_bootstrap_logger.critical('config file errors: %s', test)
return False
except ValueError as ex:
_bootstrap_logger.error('failed validating configspec')
return False
# Create the config file if it doesn't exist
# if not _util.does_file_exist(config_filepath):
if True:
_bootstrap_logger.info('writing new config file: %s', config_filepath)
dirname = os.path.dirname(config_filepath)
_util.ensure_dir_exists(dirname)
self.config_obj.write()
_bootstrap_logger.info('done loading config file')
return True
def print_config(self):
"""
Print configuration to stdout.
"""
print('config:')
self.config_obj.walk(print)
for section in self.config_obj.sections:
print(section)
for key in self.config_obj[section]:
print(' ', self.config_obj[section][key])
class EnvironmentVariables:
def __init__(self):
raise NotImplementedError
class ConfigurationItemNotFoundError(Exception):
pass

540
lib/app_skellington/cli.py Normal file

@ -0,0 +1,540 @@
import argparse
import inspect
import logging
import re
import sys
import app_skellington
from ._bootstrap import _bootstrap_logger
from . import app_container
# If explicit fail is enabled, any command with at least one unknown
# argument will be rejected entirely. If not enabled, unknown arguments
# will be ignored.
EXPLICIT_FAIL_ON_UNKNOWN_ARGS = True
class CommandTree:
"""
Command-line interface to hold a menu of commands. You can register
commands (functions or methods) in a CommandTree which will generate
a corresponding argparse.ArgumentParser (and nested SubParsers) that
map function/method arguments into argparse Parameters. Then, you
can translate command-line arguments into invoking the function.
Commands must be registered before being invoked. You create nested
SubMenu(s). If function parameters have defaults, those will be
available for override else they use the function defaults.
Print helpful information:
./scriptname -h # View tier-0 help and usage doc
./scriptname [submenu] -h # View submenu help and usage doc
./scriptname [submenu] [command] -h # View command documentation and parameters
argparse is finicky about argument placement:
./scriptname
[application arguments]
[submenu] [submenu arguments]
[command] [command arguments]
For example,
./scriptname --option="value" [submenu] [command]
is different than
./scriptname [submenu] [command] --option="value"
in that option is being applied to the application in the first example and
applied to the refresh_datasets command (under the nhsn command group) in
the second. In the same way the -h, --help options print different docs
depending on where the help option was passed.
"""
def __init__(self):
self.root_parser = argparse.ArgumentParser()
self.submenu_param = None # submenu_param is the variable name
# of the root submenu argument, i.e. the arg
# in root_parser which selects the submenu.
self.entries = {}
# NOTE(MG) Implementation note:
# CommandTree uses only one of these internal structures (i.e. mutually exclusive),
# 'entries' is used when there is a submenu linked to multiple commands.
# '_cmd_tree_is_single_command' and '_single_command' instead are used
# when the CommandTree is linked to one and only one command.
self._cmd_tree_is_single_command = False
self._single_command = None
def print_tree(self):
import pprint
pprint.pprint(self.entries)
def add_argument(self, *args, **kwargs):
"""
Adds an argument to the root parser.
"""
_bootstrap_logger.info('adding argument to root parser: %s and %s', args, kwargs)
self.root_parser.add_argument(*args, **kwargs)
def init_submenu(self, param_name, is_required=False):
"""
Creates a root-level submenu with no entries. SubMenu node is
returned which can have submenus and commands attached to it.
"""
# Creates an argument as a slot in the underlying argparse.
subparsers = self.root_parser.add_subparsers(
dest = param_name,
metavar = param_name,
required = is_required
)
submenu = SubMenu(self, subparsers, param_name)
submenu.submenu_path = ''
submenu.var_name = param_name
_bootstrap_logger.info('Initialized root-level submenu: Parameter = \'%s\'', param_name)
self.entries[param_name] = submenu
self.submenu_param = param_name
return submenu
def register_command(
self, func, cmd_name=None, func_signature=None,
docstring=None
):
"""
When no submenu functionality is desired, this links a single
command into underlying argparse options.
"""
# begin copy-paste from SubMenu.register_command
if inspect.isfunction(func):
# print('func is function')
pass
elif inspect.ismethod(func):
pass
# print('func is method')
else:
raise Exception('bad value passed in for function')
if not cmd_name:
# safe try/except
cmd_name = func.__name__
if func_signature is None:
func_signature = inspect.signature(func)
if docstring is None:
docstring = func.__doc__
sig = func_signature
params = sig.parameters
# help is displayed next to the command in the submenu enumeration or
# list of commands:
help_text = HelpGenerator.generate_help_from_sig(docstring)
# description is displayed when querying help for the specific command:
description_text = HelpGenerator.generate_description_from_sig(docstring)
# end copy-paste from SubMenu.register_command
# begin copy-paste then editted from SubMenu.register_command
# For each paramter in the function create an argparse argument in
# the child ArgumentParser created for this menu entry:
for key in params:
if key == 'self':
continue
param = params[key]
if '=' in str(param):
if param.default is None:
helptext = 'default provided'
else:
helptext = "default = '{}'".format(param.default)
self.root_parser.add_argument(
key,
help=helptext,
nargs='?',
default=param.default)
else:
helptext = 'required'
self.root_parser.add_argument(
key,
help=helptext)
# # Wrapper function that instantiates an object and runs a method
# # on-demand. The object is created, injected with necessary
# # dependencies or services, and the method is invoked.
# def func(*args, **kwargs):
# obj = constructor()
# return cls_method(obj, *args, **kwargs)
# Build the CommandEntry structure
cmd = CommandEntry()
cmd.argparse_node = self.root_parser
cmd.cmd_name = cmd_name
cmd.func_signature = sig
# cmd.func_ref = None
cmd.callback = func
registered_name = cmd_name
_bootstrap_logger.info('registered command: %s', registered_name)
# end copy-paste then editted from SubMenu.register_command
self._cmd_tree_is_single_command = True
self._single_command = cmd
self._entries = None
# def _validate(self):
# pass
# # TODO(MG):
# # subparser can not be empty, needs to have parsers attached
def parse(self, args=None):
if args is None:
args = sys.argv[1:]
try:
# on error, prints some argparse error messages:
pargs, unk = self.root_parser.parse_known_args(args)
# if len(unk) > 0:
# _bootstrap_logger.error(
# 'failed to interpret argument(s) or command-line switch from shell: %s',
# unk)
# if EXPLICIT_FAIL_ON_UNKNOWN_ARGS:
# _bootstrap_logger.warn(
# 'failed to parse arguments: explicitly failing to be safe')
# return False, False
if hasattr(pargs, 'usage'):
pass
# print('found usage in app_skellington')
return pargs, unk, True
# Note: SystemExit is raised when '-h' argument is supplied.
except SystemExit as ex:
return None, None, False
def run_command(self, args=None):
args, unk, success = self.parse(args)
if not success:
_bootstrap_logger.info('SystemExit: Perhaps user invoked --help')
return
if args is False and unk is False:
_bootstrap_logger.error('failed parsing args')
return False
_bootstrap_logger.info('received args from shell: %s', args)
args = vars(args)
cmd = self._lookup_command(args)
if cmd is None:
print('cmd is None')
_bootstrap_logger.error('failed to find command')
return False
return self._invoke_command(cmd, args)
def _lookup_command(self, args):
keys = list(args.keys())
# In the case there is at-most one command registered in
# the CommandTree with no SubMenu (submenu will be disabled
# in this case):
if self._cmd_tree_is_single_command:
assert self._cmd_tree_is_single_command is True, 'corrupt data structure in CommandMenu'
assert self._entries is None, 'corrupt data structure in CommandMenu'
assert isinstance(self._single_command, CommandEntry), 'corrupt data structure in CommandMenu'
return self._single_command
# There is at least one submenu we need to go down:
else:
assert self._single_command is None, 'corrupt data structure in CommandMenu'
assert self._cmd_tree_is_single_command == False, 'corrupt data structure in CommandMenu'
# Key or variable name used by argparse to store the submenu options
argparse_param = self.submenu_param # e.g.: submenu_root
submenu = self.entries[argparse_param]
while True:
if argparse_param not in keys:
print('root menu parameter not found in args:', argparse_param)
input('<broken>')
val = args.get(argparse_param)
_bootstrap_logger.debug('argparse command is \'{}\' = {}'.format(argparse_param, val))
lookup = submenu.entries.get(val)
_bootstrap_logger.debug('lookup, entries[{}] = {}'.format(val, lookup))
# print(submenu.entries)
# pop value
del args[argparse_param]
if isinstance(lookup, SubMenu):
submenu = lookup
argparse_param = submenu.var_name
elif isinstance(lookup, CommandEntry):
return lookup
# return self._invoke_command(lookup, args)
else:
raise app_container.NoCommandSpecified('No command specified.')
def _invoke_command(self, cmd, args):
func = cmd.callback
sig = cmd.func_signature
params = sig.parameters
params = [params[paramname] for paramname in params]
func_args = []
for param in params:
if param.name in args:
func_args.append(args[param.name])
_bootstrap_logger.info('function: %s', func)
_bootstrap_logger.info('function args: %s', func_args)
return func(*func_args)
def _get_subparser(self):
return self.root_parser._subparsers._actions[1]
class SubMenu:
def __init__(self, parent, subparsers_obj, name):
self.parent = parent # Reference to root CommandTree
self.subparsers_obj = subparsers_obj
self.name = name
self.submenu_path = None
self.entries = {}
def register_command(
self, func, cmd_name=None, func_signature=None,
docstring=None
):
"""
Registers a command as an entry in this submenu. Provided function is
converted into argparse arguments and made available to the user.
Arguments
---------
func:
Callback function which will be mapped
to the submenu entry.
cmd_name (optional):
User-facing entry name. By default will be the function name.
The user will be able to use [cmd_name] [arg, ...] to
invoke the callback function.
func_signature: optionally, you can pass in the
inspect.signature(). If None, will inspect the
incoming func. Note on internals: This is used
to pass the function signature of the command
function while having the callback point to a
function partial which executes some other code.
This hook is used to inject dependencies and then
execute the command function.
"""
if inspect.isfunction(func):
# print('func is function')
pass
elif inspect.ismethod(func):
pass
# print('func is method')
else:
raise Exception('bad value passed in for function')
if not cmd_name:
# safe try/except
cmd_name = func.__name__
if func_signature is None:
func_signature = inspect.signature(func)
if docstring is None:
docstring = func.__doc__
sig = func_signature
params = sig.parameters
# help is displayed next to the command in the submenu enumeration or
# list of commands:
help_text = HelpGenerator.generate_help_from_sig(docstring)
# description is displayed when querying help for the specific command:
description_text = HelpGenerator.generate_description_from_sig(docstring)
# Entry in local argparse._SubParsersAction
# type = ArgumentParser
child_node = self.subparsers_obj.add_parser(
cmd_name, # Note: cmd_name here will be the VALUE
# passed into the argparse arg VARIABLE NAME
# created when the SubMenu/argparse.addZ_subparsers()
# was created.
help=help_text,
description=description_text
)
# For each paramter in the function create an argparse argument in
# the child ArgumentParser created for this menu entry:
for key in params:
if key == 'self':
continue
param = params[key]
if '=' in str(param):
if param.default is None:
helptext = 'default provided'
else:
helptext = "default = '{}'".format(param.default)
child_node.add_argument(
key,
help=helptext,
nargs='?',
default=param.default)
else:
helptext = 'required'
child_node.add_argument(
key,
help=helptext)
# # Wrapper function that instantiates an object and runs a method
# # on-demand. The object is created, injected with necessary
# # dependencies or services, and the method is invoked.
# def func(*args, **kwargs):
# obj = constructor()
# return cls_method(obj, *args, **kwargs)
# Build the CommandEntry structure
cmd = CommandEntry()
cmd.argparse_node = child_node
cmd.cmd_name = cmd_name
cmd.func_signature = sig
# cmd.func_ref = None
cmd.callback = func
registered_name = '{}.{}'.format(
self.submenu_path,
cmd_name)
_bootstrap_logger.info('registered command: %s', registered_name)
self.entries[cmd_name] = cmd
def create_submenu(
self, var_name, cmd_entry_name=None, is_required=False
):
"""
Creates a child-submenu.
Arguments
---------
var_name:
A code-facing argparse parameter used to store the
value/entry chosen by the user.
cmd_entry_name:
A user-facing name used to select created submenu.
If not provided, the user-facing command name defaults
to the same name as the code-facing argparse parameter
is_required:
Switches if a value must be selected in the created submenu.
If not, it's an optional positional argument.
"""
if cmd_entry_name is None:
cmd_entry_name = var_name
# Create an entry in self's submenu:
# type = ArgumentParser
entry_node = self.subparsers_obj.add_parser(
cmd_entry_name,
help='sub-submenu help',
description='sub-sub description')
# Turn entry into a submenu of it's own:
# type = _SubParsersAction
subp_node = entry_node.add_subparsers(
dest = var_name,
metavar = var_name,
required = is_required)
submenu = SubMenu(
self.parent,
subp_node,
cmd_entry_name)
submenu.var_name = var_name
submenu.submenu_path = '{}.{}'.format(self.submenu_path, cmd_entry_name)
submenu_name = submenu.submenu_path
_bootstrap_logger.info('registered submenu: %s', submenu_name)
self.entries[cmd_entry_name] = submenu
return submenu
def __repr__(self):
return 'SubMenu({})<{}>'.format(
self.name,
','.join(['cmds'])
)
class CommandEntry:
"""
Structure for a command-entry in the CLI.
Stores the command-subcommand names, the function signature which contains
the original parameters of the function-to-be-invoked, a reference to the
original function, and a callback function wrapper which, by convention,
instantiates the necessary objects (injecting dependencies, etc.) and
executes the original function.
The CLI module has functionality to translate the original function
arguments into argparse options (creating the documentation also). Similary,
it can convert from argparse options into a function call.
"""
def __init__(self):
self.argparse_node = None
self.cmd_name = None # Don't think we need. And needs to be changed
# from SubMenu
self.menu_path = None
self.func_signature = None
self.func_ref = None
self.callback = None
def __repr__(self):
return 'CommandEntry<{}>'.format(self.cmd_name)
class HelpGenerator:
def __init__(self):
pass
@staticmethod
def generate_help_from_sig(doctext):
"""
The 'help' text is displayed next to the command when enumerating
the submenu commands.
"""
if doctext == None:
return doctext
regex = '(.*?)[.?!]'
match = re.match(regex, doctext, re.MULTILINE | re.DOTALL)
if match:
return match.group(1) + '.'
return doctext
@staticmethod
def generate_description_from_sig(doctext):
"""
The 'description' paragraph is provided when the user requests help
on a specific command.
"""
if doctext == None:
return doctext
regex = '(.*?)[.?!]'
match = re.match(regex, doctext, re.MULTILINE | re.DOTALL)
if match:
return match.group(1) + '.'
return doctext

165
lib/app_skellington/log.py Normal file

@ -0,0 +1,165 @@
import appdirs
import colorlog
import logging
import logging.config
import os
from ._bootstrap import _bootstrap_logger
from . import _util
DEFAULT_LOG_SETTINGS = {
'formatters': {
'colored': {
'class': 'colorlog.ColoredFormatter',
# 'format': '%(log_color)s%(levelname)-8s%(reset)s:%(log_color)s%(name)-5s%(reset)s:%(white)s%(message)s'
'format': '%(white)s%(name)7s%(reset)s|%(log_color)s%(message)s',
}
},
'handlers': {
'stderr': {
'class': 'logging.StreamHandler',
'level': 'debug',
'formatter': 'colored'
}
},
'loggers': {
'root': {
'handlers': ['stderr',],
'level': 'debug'
},
'app_skellington': {
# 'handlers': ['stderr',],
'level': 'critical',
'propagate': 'false'
}
}
}
class LoggingLayer:
def __init__(self, appname, appauthor, config=None):
self.appname = appname
self.appauthor = appauthor
self.loggers = {}
def __getitem__(self, k):
"""
Returns Logger object named <k>.
Example:
log = LoggingLayer(...)
log['db'].info('loaded database module')
Args:
k: the name of the logger to retrieve (k, i.e. key)
"""
logger = self.loggers.get(k)
if not logger:
logger = logging.getLogger(k)
self.loggers[k] = logger
return logger
def configure_logging(self, config_dict=None):
"""
Set the logging level for the process. Verbosity is controlled by a
parameter in the config.
Advice: While DEBUG verbosity is useful to debug, it can produce too much
noise for typical operation.
"""
if config_dict is None:
_bootstrap_logger.debug('No application logging configuration provided. Using default')
config_dict = DEFAULT_LOG_SETTINGS
self.transform_config(config_dict)
try:
# TODO(MG) switch to pretty-print, as it'd be more human readable
_bootstrap_logger.debug('Log configuration: %s', config_dict)
logging.config.dictConfig(config_dict)
except Exception as ex:
print('unable to configure logging:', ex, type(ex))
def transform_config(self, config_dict):
"""
Fix some incompatibilities and differences between the config-file logging
parameters and the final config dictionary passed into the logging module.
"""
# Version should be hard-coded 1, per Python docs
if 'version' in config_dict:
if config_dict['version'] != 1:
_bootstrap_logger.warn("logging['version'] must be '1' per Python docs")
config_dict['version'] = 1
self._add_own_logconfig(config_dict)
# Replace logger level strings with value integers from module
for handler in config_dict['handlers']:
d = config_dict['handlers'][handler]
self._convert_str_to_loglevel(d, 'level')
# Replace logger level strings with value integers from module
for logger in config_dict['loggers']:
d = config_dict['loggers'][logger]
self._convert_str_to_loglevel(d, 'level')
# Replace 'root' logger with '', logging module convention for root handler
# Note: '' is disallowed in ConfigObj (hence the reason for this replacement)
config_dict['loggers'][''] = config_dict['loggers']['root']
del config_dict['loggers']['root']
# Evaluate the full filepath of the file handler
if 'file' not in config_dict['handlers']:
return
if os.path.abspath(config_dict['handlers']['file']['filename']) ==\
config_dict['handlers']['file']['filename']:
# Path is already absolute
pass
else:
dirname = appdirs.user_log_dir(self.appname, self.appauthor)
_util.ensure_dir_exists(dirname)
log_filepath = os.path.join(dirname, config_dict['handlers']['file']['filename'])
config_dict['handlers']['file']['filename'] = log_filepath
def _add_own_logconfig(self, config_dict):
if os.environ.get('APPSKELLINGTON_ENABLE_LOGGING', None):
if 'app_skellington' not in config_dict['loggers']:
config_dict['loggers']['app_skellington'] = {
'level': 'debug', 'propagate': 'false'
}
else:
config_dict['loggers']['app_skellington']['level'] = 'debug'
def _convert_str_to_loglevel(self, dict_, key):
"""
Convert a dictionary value from a string representation of a log level
into the numeric value of that log level. The value is modified in-place
and is passed in by a dictionary reference and a key name.
For example,
d = {'loggers': {'cas': {'level': 'critical'}}}
convert_str_to_loglevel(d['loggers']['cas'], 'level')
=>
d is now {'loggers': {'cas': {'level': logging.CRITICAL}}}
"""
try:
s = dict_[key]
except KeyError as ex:
raise
if s == 'critical':
dict_[key] = logging.CRITICAL
elif s == 'error':
dict_[key] = logging.ERROR
elif s == 'warning':
dict_[key] = logging.WARNING
elif s == 'info':
dict_[key] = logging.INFO
elif s == 'debug':
dict_[key] = logging.DEBUG
elif s == 'all':
dict_[key] = logging.NOTSET

45
lib/setup.py Normal file

@ -0,0 +1,45 @@
#!/usr/bin/env python
#
# Usage:
#
# First, enable the python environment you want to install to, or if installing
# system-wide then ensure you're logged in with sufficient permissions
# (admin or root to install to system directories)
#
# installation:
#
# $ ./setup.py install
#
# de-installation:
#
# $ pip uninstall <app>
from setuptools import setup
__project__ = 'app_skellington'
__version__ = '0.1.0'
setup(
name = __project__,
version = __version__,
description = 'A high-powered 2-level CLI framework',
author = 'Mathew Guest',
author_email = 'mathewguest@gmail.com',
url = 'https://git-mirror.zavage-software.com',
# Third-party dependencies; will be automatically installed
install_requires = (
'appdirs',
'configobj',
'colorlog',
'pprint',
),
# Local packages to be installed (our packages)
packages = (
'app_skellington',
),
)

6
road2philosophy.py Executable file

@ -0,0 +1,6 @@
#!/usr/bin/env python
import wikicrawl
import settings
wikicrawl.start_app()
input('<enter> to exit')

45
setup.py Normal file → Executable file

@ -3,23 +3,42 @@
# required third-party dependencies and package the app. You can also # required third-party dependencies and package the app. You can also
# install the application system-wide. # install the application system-wide.
from setuptools import setup from setuptools import setup, find_packages
__project__ = 'wikicrawl' __project__ = 'wikicrawl'
# If you're looking for a versioning scheme, one revered pattern # If you're looking for a versioning scheme, one revered pattern
# can be read about at http://semver.org # can be read about at http://semver.org
__version__ = '0.9.0' __version__ = '0.9.0'
setup(name = __project__, setup(
version = __version__, name = __project__,
description = '', version = __version__,
author = '', description = '',
author_email = '', author = '',
url = '', author_email = '',
install_requires = ('yandex.translate', url = '',
'selenium', install_requires = (
'colorlog', 'yandex.translate',
'baker' 'selenium',
), 'colorlog',
packages = ('wikicrawl',)) 'baker',
## Additional dependencies required from app_skellington:
'appdirs',
'configobj',
'colorlog',
'pprint'
),
packages = find_packages(
where='.',
include=('*',),
exclude=()
),
package_dir = {
'app_skellington': 'lib'
},
scripts = (
'road2philosophy.py',
)
)

@ -7,6 +7,5 @@
# We export config.init() as a reference to wikicrawl.config.init() and # We export config.init() as a reference to wikicrawl.config.init() and
# wikicrawl.main as a reference to wikicrawl.cli.main # wikicrawl.main as a reference to wikicrawl.cli.main
from .config import init from .app import start_app
from .main import main

87
wikicrawl/app.py Normal file

@ -0,0 +1,87 @@
# The command-line interface module creates an interface for
# interacting with the python program (wikicrawl). This is an implementation
# of the baker demo shown previously. The user can type in commands to
# make the program do things.
import sys
if sys.platform == 'linux':
import readline # Needed for command history <up> and <down> arrows to work
from . import browser
from . import dal
from . import model
from app_skellington import _util
import app_skellington
from app_skellington._util import register_class_as_commands
# Problem pages:
# Decision (from politics)
# Malaysia (goes inside parenthesis)
# Soft-sediment_deformation_structures (doesn't find link)
# Chemicals (loops at philosophical)
class InteractiveInterface(app_skellington.ApplicationContainer):
def __init__(self, *args, **kwargs):
configspec_filepath = _util.get_asset(__name__, 'config.spec')
config_filepath = self._get_config_filepath(
'road2philosophy', '', 'config.ini'
)
super().__init__(
configspec_filepath=configspec_filepath,
config_filepath=config_filepath,
*args, **kwargs
)
# Configure logging:
# log_config = self.ctx.config['logging']
# self.ctx.log.configure_logging(log_config)
# self.ctx.log.configure_logging()
def invoke_from_cli(self):
rc = self.load_command()
if not rc:
print('Invalid command. Try -h for usage')
return
# load config
self.invoke_command()
def usage(self):
s = '''
...usage info to come ;)...
'''
print(s)
def _cli_options(self):
self.cli.add_argument(
'--usage',
help='Prints program usage information',
action='store_true'
)
def _services(self):
self['ctx'] = lambda: self.ctx
self.dal = dal.DataLayer(self.ctx)
self['dal'] = lambda: self.dal
self.browser = browser.Browser(self.ctx)
self['browser'] = lambda: self.browser
self.model = model.Model(self.ctx, self.browser, self.dal)
self['model'] = lambda: self.model
def _command_menu(self):
sm_root = self.cli.init_submenu('command')
self.sm_root = sm_root
register_class_as_commands(
self, sm_root,
model.Model
)
def start_app(config=None):
x = InteractiveInterface()
x.invoke_from_cli()

@ -12,39 +12,44 @@
import selenium import selenium
import selenium.webdriver import selenium.webdriver
from . import config class Browser:
from . import log def __init__(self, ctx):
self.ctx = ctx
# This function has a parameter (driver) that passes in a value. In this case, # This function has a parameter (driver) that passes in a value. In this case,
# this driver variable defaults to the string 'chrome'. The code can call # this driver variable defaults to the string 'chrome'. The code can call
# create_webdriver() which is the same as create_webdriver('chrome') but # create_webdriver() which is the same as create_webdriver('chrome') but
# can alternatively call create_webdriver('firefox') and get different # can alternatively call create_webdriver('firefox') and get different
# functionality. # functionality.
def create_webdriver(driver='chrome'): def create_webdriver(self, driver='chrome'):
if driver == 'chrome': if driver == 'chrome':
return create_webdriver_chrome() return self.create_webdriver_chrome()
elif driver == 'firefox': elif driver == 'firefox':
return create_webdriver_firefox() return self.create_webdriver_firefox()
elif driver == 'phantom': elif driver == 'phantom':
return create_webdriver_phantom() return self.create_webdriver_phantom()
else: else:
log.LOGGER('browser').error('unable to handle webdriver request: %s' % driver) log.LOGGER('browser').error('unable to handle webdriver request: %s' % driver)
return return
def create_webdriver_firefox(): def create_webdriver_firefox(self):
profile = selenium.webdriver.FirefoxProfile() profile = selenium.webdriver.FirefoxProfile()
profile.set_preference("general.useragent.override", config.obj.WEBDRIVER_USER_AGENT) profile.set_preference(
driver = selenium.webdriver.Firefox(profile) "general.useragent.override",
return driver self.ctx.config['app']['webdriver_user_agent'])
driver = selenium.webdriver.Firefox(profile)
return driver
def create_webdriver_chrome(): def create_webdriver_chrome(self):
opt = selenium.webdriver.chrome.options.Options() opt = selenium.webdriver.chrome.options.Options()
opt.add_argument('--user-agent=' + config.obj.WEBDRIVER_USER_AGENT) opt.add_argument('--user-agent=' + self.ctx.config['app']['webdriver_user_agent'])
driver = selenium.webdriver.Chrome(executable_path=config.obj.CHROMEDRIVER_EXE, driver = selenium.webdriver.Chrome(
chrome_options=opt) executable_path=self.ctx.config['app']['chromedriver_exe'],
return driver chrome_options=opt
)
return driver
def create_webdriver_phantom(): def create_webdriver_phantom(self):
driver = selenium.webdriver.PhantomJS() driver = selenium.webdriver.PhantomJS()
return driver return driver

@ -1,203 +0,0 @@
# The command-line interface module creates an interface for
# interacting with the python program (wikicrawl). This is an implementation
# of the baker demo shown previously. The user can type in commands to
# make the program do things.
import baker
import sys
if sys.platform == 'linux':
import readline # Needed for command history <up> and <down> arrows to work
from . import log
from . import model
from . import config
# Problem pages:
# Decision (from politics)
# Malaysia (goes inside parenthesis)
# Soft-sediment_deformation_structures (doesn't find link)
# Chemicals (loops at philosophical)
commander = baker.Baker()
class InteractiveInterface:
def __init__(self):
# Instantiate the variable self.model as an object
# of instance of the Model class defined in the model
# module. model.Model refers to the Model class in the
# model module and this line creates a new variable (self.model)
# which is a variable that is an instance of Model, i.e.
# it has the type Model and has Model.methods() available
# to it.
#
# self.model is a variable that is attached to the instance/object
# returned by this constructor that has the type InteractiveInterface.
self.model = model.Model()
def run_command(self, args, main=True):
"""
Runs the command-line interface for a single command.
If called by InteractiveInterface.run(sys.argv), this method
will execute the commands and arguments specified on command
line when running this program. Alternatively, the code could
pass in a different set of arguments to specify what to do.
See start_command_loop() for more information.
"""
try:
commander.run(argv=args, main=True, help_on_error=True,
instance=self)
except baker.CommandError as ex:
log.LOGGER['cli'].warn('incorrect user input: %s' % ex)
commander.usage()
except baker.TopHelp as ex:
commander.usage()
except Exception as ex:
log.LOGGER['cli'].error('caught general exception!!')
log.LOGGER['cli'].error(ex)
def start_command_loop(self):
"""
Repeatedly asks the user what command to run until they exit.
This method calls InteractiveInterface.run(args) a little bit
differently. Instead of passing the arguments from the command-line
that were passed in when invoking the python wikicrawl app,
this asks the user for a line of textual input and passes
those strings to run() as the arguments. This way, the user can
access an interactive shell and repeatedly issue different
commands while the application is running.
"""
commander.usage()
self.model.open_browser()
while True:
print('$ ', end = '') # Display to the user a command prompt
# The dollar-sign is a common indication
# of a shell that communicates to the user
# that we are waiting for their textual
# input. The end = '' indicates to python
# to NOT drop to a newline after printing
# in the terminal. Instead, let the user
# type their command on the same line as
# the printed '$ '.
try:
inp = input()
except EOFError: # <ctrl>+D will send "End Line" and exit the command loop
break
# Note on "arguments" (mg):
# Whenever a program is run in windows or *nix, the operating
# system passes in the command string that was used to invoke
# the program. You can append data in that command to configure
# switches or values going into the program on the fly. For
# example, you can invoke this wikicrawl app in more than one
# way. You can of course run "python launcher.py" to run the
# software but you can also pass in an argument. You can
# alternatively run "python launcher.py <argument> <argument>..."
# and the operating system will provide the <argument> values into
# the process that is running as variables.
#
# In a real world use case, many commands provide switches to
# adjust what the program does. For example,
#
# The command:
# find music -name "*justin*bieber*"
# runs the "find" program and asks to find all the filenames that match the
# pattern *justin*bieber* in the "music" directory.
# (music, -name, "*justin*biever*") are argument parameters
# that are passed into the program. The program is coded to
# parse and interpret these values and execute differently based
# on the values passed in. This is one way to pass in information
# into a running program. Some other ways are to read from a file
# (such as how we read from settings.py to load the runtime
# configuration), from something called environment variables
# (won't get into but another set of values provided to programs
# from the operating system), or they can be hard-coded into
# the application.
#
# Side note: arguments are not unique to python (almost all
# programming languages implement arguments), the functionality
# is defined by the application (some programs require arguments,
# some are optional, and the syntax for sending in argument
# parameters are different and defined by the individual programs,
# and lastly, the first argument sent in is the script name or
# filename of the script. In our case, the first argument is
# the string "launcher.py". If the user invoked the command
# as C:\Users\mguest\launcher.py then the first argument
# would be C:\Users\mguest\launcher.py.
# What this method (start_command_loop()) does is provide a
# REPL shell which is a
# read-eval-print-loop. It repeatedly asks the user for an
# input (read), evaluates that input into an action (evaluate),
# give the user some feedback (print), and start the process
# over again (loop). When you call just "python", you are loading a
# program that gives you a REPL interactive shell. The way
# this wikicrawl app is implemented gives the user a REPL
# that has commands to interact with wikipedia pages.
# Because we take in the input as a single string, we do
# a transformation to turn something like "do_random_page 5"
# into ["launcher.py", "do_random_page", "5"] which is how
# the arguments array would have been created if it were
# passed in the initial command instead of typed and interpretted
# as input as is done here.
args = [sys.argv[0], ] + inp.split()
# The user can at any point in the command pass the argument
# switch "--help". If doing this, the command line interface
# will instead print out the inline documentation associated
# with this command and quit after doing so. For example,
# the user can type "python launcher.py do_random_page --help"
# and the program will spit out the generated documentation
# for the do_random_page command and run nothing. In our case,
# this documentation is created by the baker library and will
# print out the docstring associated with the method. Try it
# out in your shell (cmd.exe or powershell.exe) by invoking
# python launcher.py do_random_page --help
# You will see the program spit out the heredoc below the
# do_random_page method defined below.
if '--help' in args:
args.remove('--help')
try:
print('command usage:')
commander.usage(args[1])
except Exception as ex:
print(type(ex), ex)
continue
self.run_command(args, main=False)
@commander.command
def play_specific_page(self, title):
pass
@commander.command
def play_random_page(self):
"""
Instructs the wikicrawl application to play the game on a random
article.
"""
self.model.play_random_page()
@commander.command
def play_multiple(self, n):
"""
Plays the wikicrawl game <n>-times.
"""
try:
n = int(n)
except ValueError as ex:
log.LOGGER['cli'].warn('failed to process "%s" as a parameter' % n)
return False
for i in range(n):
self.model.play_random_page()
@commander.command
def exit(self):
"""
Immediately exit the program.
"""
sys.exit(0)

@ -1,22 +0,0 @@
# config module defines a place to store the external configuration/settings
# and is used to provide an interface to the runtime configuration for the
# program.
import sys
from . import log
obj = {}
def init(settings_obj):
global obj
obj = settings_obj
find_chromedriver_path()
log.init_logging()
def find_chromedriver_path():
print(__file__)

76
wikicrawl/config.spec Normal file

@ -0,0 +1,76 @@
[app]
# Filepath parameters - THESE MUST EXIST OR PROGRAM WILL NOT RUN!!
log_filename = string(min=0, max=255, default='/tmp/wikicrawl.log')
sqlite_dbfile = string(min=0, max=255, default='/home/mathew/.wikicrawler.db')
chromedriver_exe = string(min=0, max=255, default='/usr/bin/chromedriver')
# CHROMEDRIVER_EXE = 'C:\\Users\\mathew\\windows-share\\dev\\wikicrawl\\chromedriver.exe'
# Application Parameters
do_breakpoints = boolean(default=False)
page_delay = integer(min=0, max=2000, default=0)
# Web Driver Parameters
webdriver_user_agent = string(min=0, max=255, default='Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)')
# Requested browser and webdriver dependencies are required for this to work.
# This means you need to have installed on your system:
# Chrome + WebDriver for Chrome
# Firefox + geckodriver for Firefox
# phantomjs for phantom
# Options are 'chrome', 'firefox', 'phantom'
webdriver_browser = string(min=1, max=63, default='chrome')
# Wikipedia Parameters
page_base_url = string(min=1, max=255, default='https://www.wikipedia.org/')
# Supported Languages so far:
# German, English, Spanish, French, Italian, Portuguese, Polish, Russian
# 'de', 'en', 'es', 'fr', 'it', 'pl', 'pt', 'ru'
page_language = string(min=2, max=22, default='en')
# API Keys
# Yandex is a web REST API for translating between different languages.
yandex_api_key = string(min=0, max=1023, default='trnsl.1.1.20170825T194642Z.26862b9dd4c1a755.9490ed28de448ff67522c2854f262eff05ec0dc3')
[logging]
log_file = string(max=255, default='')
log_level = option('critical', 'error', 'warning', 'info', 'debug', default='info')
log_fmt = string(max=255, default='')
[[formatters]]
[[[colored]]]
() = string(default='colorlog.ColoredFormatter')
format = string(max=255, default='%(log_color)s%(levelname)-8s%(reset)s:%(log_color)s%(name)-5s%(reset)s:%(white)s%(message)s')
[[[basic]]]
() = string(max=255, default='logging.Formatter')
format = string(max=255, default='%(levelname)s:%(name)s:%(asctime)s:%(message)s')
[[[forstorage]]]
() = string(max=255, default='logging.Formatter')
format = string(max=255, default='%(levelname)s:%(name)s:%(asctime)s:%(message)s')
[[handlers]]
[[[stderr]]]
class = string(max=255, default='logging.StreamHandler')
level = option('critical', 'error', 'warning', 'info', 'debug', default='debug')
formatter = string(max=255, default='colored')
[[[file]]]
class = string(max=255, default='logging.handlers.RotatingFileHandler')
level = option('critical', 'error', 'warning', 'info', 'debug', default='warning')
formatter = string(max=255, default='forstorage')
filename = string(max=255, default='road2philosophy.log')
maxBytes = integer(min=0, max=33554432, default=33554432)
backupCount = integer(min=0, max=3, default=1)
[[loggers]]
[[[root]]]
level = option('critical', 'error', 'warning', 'info', 'debug', default='debug')
handlers = string_list(max=8, default=list('file',)
[[[r2p]]]
level = option('critical', 'error', 'warning', 'info', 'debug', default='debug')
handlers = string_list(max=8, default=list('stderr',))
propagate = boolean(default=False)

@ -4,9 +4,7 @@
import sqlite3 import sqlite3
import os import os
from . import config
class DataLayer: class DataLayer:
def __init__(self): def __init__(self, ctx):
pass self.ctx = ctx

@ -1,39 +0,0 @@
# log module is a wrapper around third-party colorlog library
# and provides an application-level interface to a logging system.
import colorlog
import logging
from . import config
# Default python log severity levels:
# CRITICAL
# ERROR
# WARNING
# INFO
# DEBUG
LOGGER = None
class LoggingLayer:
def __init__(self, config):
self.loggers = {}
logging.config.dictConfig(config)
# Note on __getitem__:
# __getitem__ overrides the functionality of the [] operator.
# That means this code:
# objinstance = LoggingLayer(...)
# objinstance[foo] calls LoggingLayer.__getitem__(foo)
# and returns the result.
def __getitem__(self, k):
logger = self.loggers.get(k)
if not logger:
logger = logging.getLogger(k)
self.loggers[k] = logger
return logger
def init_logging():
global LOGGER
LOGGER = LoggingLayer(config.obj.LOG_SETTINGS)

@ -1,23 +0,0 @@
#!/usr/bin/env python
import sys
from . import cli
from . import config
from . import util
def main():
user_interface = cli.InteractiveInterface()
if len(sys.argv) > 1: # Command line arguments were passed in
# command-line when invoking python
user_interface.run_command(sys.argv)
else:
user_interface.start_command_loop()
def verify_config_is_valid():
pass
if __name__ == '__main__':
main()

@ -9,15 +9,15 @@
import os import os
import time import time
from . import browser
from . import config
from . import dal
from . import log
from . import pages from . import pages
from . import util from . import util
class Model: class Model:
def __init__(self): def __init__(self, ctx, browser, dal):
self.ctx = ctx
self.browser = browser
self.dal = dal
self._webdriver = None self._webdriver = None
self._translated_philosophy = None self._translated_philosophy = None
@ -33,7 +33,7 @@ class Model:
# always existing with or without knowing if it exists because if it # always existing with or without knowing if it exists because if it
# hasn't been created yet then it will be created on-the-fly. # hasn't been created yet then it will be created on-the-fly.
if not self._webdriver: if not self._webdriver:
self._webdriver = browser.create_webdriver(config.obj.WEBDRIVER_BROWSER) self._webdriver = self.browser.create_webdriver(self.ctx.config['app']['webdriver_browser'])
page_api = pages.LandingPage(self.webdriver) page_api = pages.LandingPage(self.webdriver)
page_api.goto_landing_page() page_api.goto_landing_page()
return self._webdriver return self._webdriver
@ -41,10 +41,10 @@ class Model:
@property @property
def translated_philosophy(self): def translated_philosophy(self):
# This translates 'philosophy' to the target language with only 1 api call. # This translates 'philosophy' to the target language with only 1 api call.
if config.obj.PAGE_LANGUAGE == 'en': if self.ctx.config['app']['page_language'] == 'en':
self._translated_philosophy = 'philosophy' self._translated_philosophy = 'philosophy'
elif not self._translated_philosophy: elif not self._translated_philosophy:
text = util.translate_text('en', config.obj.PAGE_LANGUAGE, 'philosophy') text = util.translate_text('en', self.ctx.config['app']['page_language'], 'philosophy')
self._translated_philosophy = text self._translated_philosophy = text
return self._translated_philosophy return self._translated_philosophy
@ -59,6 +59,19 @@ class Model:
# creates it and then it is re-used later # creates it and then it is re-used later
# in the application. # in the application.
def play_multiple(self, n):
"""
Plays the wikicrawl game <n>-times.
"""
try:
n = int(n)
except ValueError as ex:
self.ctx.log['cli'].warn('failed to process "%s" as a parameter' % n)
return False
for i in range(n):
self.model.play_random_page()
def play_random_page(self): def play_random_page(self):
""" """
Select a random page and repeatedly click the first link until Select a random page and repeatedly click the first link until
@ -86,7 +99,7 @@ class Model:
# In this case, we have made the language a parameter # In this case, we have made the language a parameter
# that you can pass into the program, i.e. you can run it # that you can pass into the program, i.e. you can run it
# for English or Spanish or Russian or what have you. # for English or Spanish or Russian or what have you.
page_api.select_language(config.obj.PAGE_LANGUAGE) page_api.select_language(self.ctx.config['app']['page_language'])
# Main page: next 2 lines # Main page: next 2 lines
@ -114,20 +127,20 @@ class Model:
# Get the article title (and translate if necessary) # Get the article title (and translate if necessary)
title = page_api.get_title() title = page_api.get_title()
if config.obj.PAGE_LANGUAGE != 'en': if self.ctx.config['app']['page_language'] != 'en':
translated_title = util.translate_text(config.obj.PAGE_LANGUAGE, 'en', title) translated_title = util.translate_text(self.ctx.config['app']['page_language'], 'en', title)
log.LOGGER['model'].info('visited page: %s (%s)' % (title, translated_title)) self.ctx.log['model'].info('visited page: %s (%s)' % (title, translated_title))
else: else:
log.LOGGER['model'].info('visited page: %s' % title) self.ctx.log['model'].info('visited page: %s' % title)
# Check for page loops (have we already visisted this page?) # Check for page loops (have we already visisted this page?)
if title in pages_visited: if title in pages_visited:
log.LOGGER['model'].info('encountered loop at page = %s' % title) self.ctx.log['model'].info('encountered loop at page = %s' % title)
break break
# Check if we reached the article on philosophy # Check if we reached the article on philosophy
if self._is_article_on_philosophy(title, translated_title): if self._is_article_on_philosophy(title, translated_title):
log.LOGGER['model'].info('made it to philosophy in %s pages' % len(pages_visited)) self.ctx.log['model'].info('made it to philosophy in %s pages' % len(pages_visited))
pages_visited.append(title) pages_visited.append(title)
break break
@ -136,7 +149,7 @@ class Model:
rc = page_api.click_first_link() rc = page_api.click_first_link()
if not rc: if not rc:
log.LOGGER['model'].warn('failure: unable to continue (perhaps no valid links?)') self.ctx.log['model'].warn('failure: unable to continue (perhaps no valid links?)')
break break
print() print()

@ -2,15 +2,15 @@
# There are separate classes defined for each page with their own # There are separate classes defined for each page with their own
# defined methods for performing certain actions. # defined methods for performing certain actions.
from . import browser
from . import util
from .assets.languages import LANGUAGES
import re import re
import selenium import selenium
import time import time
from . import browser PAGE_BASE_URL = 'https://www.wikipedia.org/'
from . import config
from . import log
from . import util
from .assets.languages import LANGUAGES
class PageRootObject: class PageRootObject:
""" """
@ -43,7 +43,8 @@ class PageRootObject:
html link in the webpage. html link in the webpage.
""" """
self.highlight(el, 'red') self.highlight(el, 'red')
time.sleep(config.obj.PAGE_DELAY) DELAY=0
time.sleep(DELAY)
util.breakpoint() util.breakpoint()
el.click() el.click()
@ -97,7 +98,7 @@ class LandingPage(PageRootObject):
""" """
Navigates the browser to www.wikipedia.org Navigates the browser to www.wikipedia.org
""" """
self.driver.get(config.obj.PAGE_BASE_URL) self.driver.get(PAGE_BASE_URL)
def select_language(self, language): def select_language(self, language):
lang_text = LANGUAGES.get(language) lang_text = LANGUAGES.get(language)
@ -219,13 +220,13 @@ class ArticlePage(PageRootObject):
if len(links) == 0: if len(links) == 0:
return False return False
for link in links: for link in links:
log.LOGGER['pages'].debug('processing link: %s' % link.text) self.ctx.log['pages'].debug('processing link: %s' % link.text)
if not self._is_valid_link(p, link): if not self._is_valid_link(p, link):
log.LOGGER['pages'].debug('skipping link inside parenthesis: %s' % link.text) self.ctx.log['pages'].debug('skipping link inside parenthesis: %s' % link.text)
self.highlight(link, 'blue') self.highlight(link, 'blue')
continue continue
self.highlight(link, 'red') self.highlight(link, 'red')
log.LOGGER['pages'].info('selected link: %s' % link.text) self.ctx.log['pages'].info('selected link: %s' % link.text)
self.click(link) self.click(link)
return True return True

@ -3,14 +3,14 @@
import yandex_translate import yandex_translate
from . import config DO_BREAKPOINTS = False
def breakpoint(): def breakpoint():
""" """
If DO_BREAKPOINTS is switched on, this will pause program If DO_BREAKPOINTS is switched on, this will pause program
execution and wait for the user to press enter to continue. execution and wait for the user to press enter to continue.
""" """
if config.obj.DO_BREAKPOINTS: if DO_BREAKPOINTS:
input('BREAKPOINT hit. <Enter> to continue...') input('BREAKPOINT hit. <Enter> to continue...')
def translate_text(source_language, target_language, text): def translate_text(source_language, target_language, text):