wikicrawl/settings.py
2017-10-20 17:39:00 -06:00

115 lines
4.2 KiB
Python

# Application run-time configuration/settings. This contains variables
# that control how the program works but are kept separate from the
# program. It makes sense for certain parameters to be adjustable but
# not hard-coded into the application. For example, some users may want
# to run this program in English while others may want to run in Spanish.
# The way this works is we specify those variables external from the
# application (here) and pass them into the application (wikicrawl.config module).
# The application then references wikicrawl.config.obj to access the variables
# passed in from here.
import colorlog
import logging
import logging.config
class DefaultSettings:
# Filepath parameters - THESE MUST EXIST OR PROGRAM WILL NOT RUN!!
LOG_FILENAME = '/tmp/wikicrawl.log'
SQLITE_DBFILE = '/home/mathew/.wikicrawler.db'
CHROMEDRIVER_EXE = '/usr/bin/chromedriver'
# CHROMEDRIVER_EXE = 'C:\\Users\\mathew\\windows-share\\dev\\wikicrawl\\chromedriver.exe'
# Application Parameters
DO_BREAKPOINTS = False
PAGE_DELAY = 0
# Web Driver Parameters
WEBDRIVER_USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)'
# Requested browser and webdriver dependencies are required for this to work.
# This means you need to have installed on your system:
# Chrome + WebDriver for Chrome
# Firefox + geckodriver for Firefox
# phantomjs for phantom
WEBDRIVER_BROWSER = 'chrome' # Options are 'chrome', 'firefox', 'phantom'
# Wikipedia Parameters
PAGE_BASE_URL = 'https://www.wikipedia.org/'
# Supported Languages so far:
# German, English, Spanish, French, Italian, Portuguese, Polish, Russian
# 'de', 'en', 'es', 'fr', 'it', 'pl', 'pt', 'ru'
PAGE_LANGUAGE = 'en'
# API Keys
# Yandex is a web REST API for translating between different languages.
YANDEX_API_KEY = 'trnsl.1.1.20170825T194642Z.26862b9dd4c1a755.9490ed28de448ff67522c2854f262eff05ec0dc3'
# Logging Parameters
LOG_SETTINGS = {
'version': 1, # version is always 1
'formatters': {
'colored': {
'()': 'colorlog.ColoredFormatter',
'format': '%(log_color)s%(levelname)-8s%(reset)s:%(log_color)s%(name)-5s%(reset)s:%(white)s%(message)s'
},
'basic': {
'()': 'logging.Formatter',
'format': '%(levelname)s:%(name)s:%(asctime)s:%(message)s'
}
},
'handlers': {
'stderr': {
'class': 'logging.StreamHandler',
# The handler level will override the logger level if higher.
# That is, if the logger level is set to pass through DEBUG
# and higher and the handler is set to only pass through WARNING
# and higher, DEBUG messages will not pass through to this loggers
# handler. You can configure multiple handlers for any logger so
# for example you could log WARNINGS and ERRORS to a file but
# not save all the DEBUG messages.
'level': logging.DEBUG,
'formatter': 'colored'
},
'file': {
'class': 'logging.handlers.RotatingFileHandler',
'level': logging.INFO,
'formatter': 'basic',
'filename': LOG_FILENAME,
'maxBytes': 32768,
'backupCount': 3
}
},
'loggers': {
# Root Logger
'': {
'level': logging.DEBUG,
'handlers': ['file'],
},
'main': {
'level': logging.DEBUG,
'handlers': ['stderr'],
'propagate': False
},
'model': {
'level': logging.DEBUG,
'handlers': ['stderr'],
'propagate': True # Send to root logger
},
'cli': {
'level': logging.DEBUG,
'handlers': ['stderr'],
'propagate': False
},
'pages': {
'level': logging.INFO,
'handlers': ['stderr'],
'propagate': False
}
}
}