wikicrawl/settings.py

111 lines
4.0 KiB
Python
Raw Normal View History

# Application run-time configuration/settings. This contains variables
# that control how the program works but are kept separate from the
# program. It makes sense for certain parameters to be adjustable but
# not hard-coded into the application. For example, some users may want
# to run this program in English while others may want to run in Spanish.
# The way this works is we specify those variables external from the
# application (here) and pass them into the application (wikicrawl.config module).
# The application then references wikicrawl.config.obj to access the variables
# passed in from here.
import colorlog
2017-08-17 01:27:05 -06:00
import logging
import logging.config
2017-08-17 01:27:05 -06:00
class DefaultSettings:
# Filepath parameters - THESE MUST EXIST OR PROGRAM WILL NOT RUN!!
LOG_FILENAME = '/tmp/wikicrawl.log'
SQLITE_DBFILE = '/home/mathew/.wikicrawler.db'
2017-08-17 01:27:05 -06:00
# Application Parameters
DO_BREAKPOINTS = False
2017-08-17 01:45:07 -06:00
PAGE_DELAY = 0
2017-08-17 01:27:05 -06:00
# Web Driver Parameters
WEBDRIVER_USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)'
# Requested browser and webdriver dependencies are required for this to work.
# This means you need to have installed on your system:
# Chrome + WebDriver for Chrome
# Firefox + geckodriver for Firefox
# phantomjs for phantom
WEBDRIVER_BROWSER = 'chrome' # Options are 'chrome', 'firefox', 'phantom'
2017-08-17 01:27:05 -06:00
2017-08-17 01:45:07 -06:00
# Wikipedia Parameters
2017-08-17 01:27:05 -06:00
PAGE_BASE_URL = 'https://www.wikipedia.org/'
# Supported Languages so far:
# German, English, Spanish, French, Italian, Portuguese, Polish, Russian
# 'de', 'en', 'es', 'fr', 'it', 'pl', 'pt', 'ru'
PAGE_LANGUAGE = 'en'
# API Keys
YANDEX_API_KEY = 'trnsl.1.1.20170825T194642Z.26862b9dd4c1a755.9490ed28de448ff67522c2854f262eff05ec0dc3'
# Logging Parameters
LOG_SETTINGS = {
'version': 1, # version is always 1
'formatters': {
'colored': {
'()': 'colorlog.ColoredFormatter',
'format': '%(log_color)s%(levelname)-8s%(reset)s:%(log_color)s%(name)-5s%(reset)s:%(blue)s%(message)s'
},
'basic': {
'()': 'logging.Formatter',
'format': '%(levelname)s:%(name)s:%(asctime)s:%(message)s'
}
},
'handlers': {
'stderr': {
'class': 'logging.StreamHandler',
# The handler level will override the logger level if higher.
# That is, if the logger level is set to pass through DEBUG
# and higher and the handler is set to only pass through WARNING
# and higher, DEBUG messages will not pass through to this loggers
# handler. You can configure multiple handlers for any logger so
# for example you could log WARNINGS and ERRORS to a file but
# not save all the DEBUG messages.
'level': logging.DEBUG,
'formatter': 'colored'
},
'file': {
'class': 'logging.handlers.RotatingFileHandler',
'level': logging.INFO,
'formatter': 'basic',
'filename': LOG_FILENAME,
'maxBytes': 32768,
'backupCount': 3
}
},
'loggers': {
# Root Logger
'': {
'level': logging.DEBUG,
'handlers': ['file'],
},
'main': {
'level': logging.DEBUG,
'handlers': ['stderr'],
'propagate': False
},
'model': {
'level': logging.DEBUG,
'handlers': ['stderr'],
2017-08-25 18:32:19 -06:00
'propagate': True # Send to root logger
},
'cli': {
'level': logging.DEBUG,
'handlers': ['stderr'],
'propagate': False
},
'pages': {
'level': logging.INFO,
'handlers': ['stderr'],
'propagate': False
}
}
}
2017-08-17 01:27:05 -06:00