# Application run-time configuration/settings. This contains variables # that control how the program works but are kept separate from the # program. It makes sense for certain parameters to be adjustable but # not hard-coded into the application. For example, some users may want # to run this program in English while others may want to run in Spanish. # The way this works is we specify those variables external from the # application (here) and pass them into the application (wikicrawl.config module). # The application then references wikicrawl.config.obj to access the variables # passed in from here. import colorlog import logging import logging.config class DefaultSettings: # Filepath parameters - THESE MUST EXIST OR PROGRAM WILL NOT RUN!! LOG_FILENAME = '/tmp/wikicrawl.log' SQLITE_DBFILE = '/home/mathew/.wikicrawler.db' CHROMEDRIVER_EXE = '/usr/bin/chromedriver' # CHROMEDRIVER_EXE = 'C:\\Users\\mathew\\windows-share\\dev\\wikicrawl\\chromedriver.exe' # Application Parameters DO_BREAKPOINTS = False PAGE_DELAY = 0 # Web Driver Parameters WEBDRIVER_USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)' # Requested browser and webdriver dependencies are required for this to work. # This means you need to have installed on your system: # Chrome + WebDriver for Chrome # Firefox + geckodriver for Firefox # phantomjs for phantom WEBDRIVER_BROWSER = 'chrome' # Options are 'chrome', 'firefox', 'phantom' # Wikipedia Parameters PAGE_BASE_URL = 'https://www.wikipedia.org/' # Supported Languages so far: # German, English, Spanish, French, Italian, Portuguese, Polish, Russian # 'de', 'en', 'es', 'fr', 'it', 'pl', 'pt', 'ru' PAGE_LANGUAGE = 'en' # API Keys # Yandex is a web REST API for translating between different languages. YANDEX_API_KEY = 'trnsl.1.1.20170825T194642Z.26862b9dd4c1a755.9490ed28de448ff67522c2854f262eff05ec0dc3' # Logging Parameters LOG_SETTINGS = { 'version': 1, # version is always 1 'formatters': { 'colored': { '()': 'colorlog.ColoredFormatter', 'format': '%(log_color)s%(levelname)-8s%(reset)s:%(log_color)s%(name)-5s%(reset)s:%(white)s%(message)s' }, 'basic': { '()': 'logging.Formatter', 'format': '%(levelname)s:%(name)s:%(asctime)s:%(message)s' } }, 'handlers': { 'stderr': { 'class': 'logging.StreamHandler', # The handler level will override the logger level if higher. # That is, if the logger level is set to pass through DEBUG # and higher and the handler is set to only pass through WARNING # and higher, DEBUG messages will not pass through to this loggers # handler. You can configure multiple handlers for any logger so # for example you could log WARNINGS and ERRORS to a file but # not save all the DEBUG messages. 'level': logging.DEBUG, 'formatter': 'colored' }, 'file': { 'class': 'logging.handlers.RotatingFileHandler', 'level': logging.INFO, 'formatter': 'basic', 'filename': LOG_FILENAME, 'maxBytes': 32768, 'backupCount': 3 } }, 'loggers': { # Root Logger '': { 'level': logging.DEBUG, 'handlers': ['file'], }, 'main': { 'level': logging.DEBUG, 'handlers': ['stderr'], 'propagate': False }, 'model': { 'level': logging.DEBUG, 'handlers': ['stderr'], 'propagate': True # Send to root logger }, 'cli': { 'level': logging.DEBUG, 'handlers': ['stderr'], 'propagate': False }, 'pages': { 'level': logging.INFO, 'handlers': ['stderr'], 'propagate': False } } }