2017-08-22 17:50:33 -06:00
|
|
|
# Application run-time configuration/settings. This contains variables
|
|
|
|
# that control how the program works but are kept separate from the
|
|
|
|
# program. It makes sense for certain parameters to be adjustable but
|
|
|
|
# not hard-coded into the application. For example, some users may want
|
|
|
|
# to run this program in English while others may want to run in Spanish.
|
|
|
|
# The way this works is we specify those variables external from the
|
2017-08-25 18:09:46 -06:00
|
|
|
# application (here) and pass them into the application (wikicrawl.config module).
|
|
|
|
# The application then references wikicrawl.config.obj to access the variables
|
2017-08-22 17:50:33 -06:00
|
|
|
# passed in from here.
|
|
|
|
|
2017-08-25 18:09:46 -06:00
|
|
|
import colorlog
|
2017-08-17 01:27:05 -06:00
|
|
|
import logging
|
2017-08-25 18:09:46 -06:00
|
|
|
import logging.config
|
2017-08-17 01:27:05 -06:00
|
|
|
|
2017-08-22 17:50:33 -06:00
|
|
|
class DefaultSettings:
|
2017-08-25 18:09:46 -06:00
|
|
|
# Filepath parameters - THESE MUST EXIST OR PROGRAM WILL NOT RUN!!
|
|
|
|
LOG_FILENAME = '/tmp/wikicrawl.log'
|
|
|
|
SQLITE_DBFILE = '/home/mathew/.wikicrawler.db'
|
|
|
|
|
2017-08-17 01:27:05 -06:00
|
|
|
# Application Parameters
|
2017-08-25 18:09:46 -06:00
|
|
|
DO_BREAKPOINTS = False
|
2017-08-17 01:45:07 -06:00
|
|
|
PAGE_DELAY = 0
|
2017-08-17 01:27:05 -06:00
|
|
|
|
|
|
|
# Web Driver Parameters
|
|
|
|
WEBDRIVER_USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)'
|
2017-08-25 18:09:46 -06:00
|
|
|
|
|
|
|
# Requested browser and webdriver dependencies are required for this to work.
|
|
|
|
# This means you need to have installed on your system:
|
|
|
|
# Chrome + WebDriver for Chrome
|
|
|
|
# Firefox + geckodriver for Firefox
|
|
|
|
# phantomjs for phantom
|
|
|
|
WEBDRIVER_BROWSER = 'chrome' # Options are 'chrome', 'firefox', 'phantom'
|
2017-08-17 01:27:05 -06:00
|
|
|
|
2017-08-17 01:45:07 -06:00
|
|
|
# Wikipedia Parameters
|
2017-08-17 01:27:05 -06:00
|
|
|
PAGE_BASE_URL = 'https://www.wikipedia.org/'
|
|
|
|
|
2017-08-25 18:09:46 -06:00
|
|
|
# Supported Languages so far:
|
|
|
|
# German, English, Spanish, French, Italian, Portuguese, Polish, Russian
|
|
|
|
# 'de', 'en', 'es', 'fr', 'it', 'pl', 'pt', 'ru'
|
|
|
|
PAGE_LANGUAGE = 'en'
|
|
|
|
|
|
|
|
# API Keys
|
|
|
|
YANDEX_API_KEY = 'trnsl.1.1.20170825T194642Z.26862b9dd4c1a755.9490ed28de448ff67522c2854f262eff05ec0dc3'
|
|
|
|
|
|
|
|
# Logging Parameters
|
|
|
|
LOG_SETTINGS = {
|
|
|
|
'version': 1, # version is always 1
|
|
|
|
'formatters': {
|
|
|
|
'colored': {
|
|
|
|
'()': 'colorlog.ColoredFormatter',
|
|
|
|
'format': '%(log_color)s%(levelname)-8s%(reset)s:%(log_color)s%(name)-5s%(reset)s:%(blue)s%(message)s'
|
|
|
|
},
|
|
|
|
'basic': {
|
|
|
|
'()': 'logging.Formatter',
|
|
|
|
'format': '%(levelname)s:%(name)s:%(asctime)s:%(message)s'
|
|
|
|
}
|
|
|
|
},
|
|
|
|
'handlers': {
|
|
|
|
'stderr': {
|
|
|
|
'class': 'logging.StreamHandler',
|
|
|
|
# The handler level will override the logger level if higher.
|
|
|
|
# That is, if the logger level is set to pass through DEBUG
|
|
|
|
# and higher and the handler is set to only pass through WARNING
|
|
|
|
# and higher, DEBUG messages will not pass through to this loggers
|
|
|
|
# handler. You can configure multiple handlers for any logger so
|
|
|
|
# for example you could log WARNINGS and ERRORS to a file but
|
|
|
|
# not save all the DEBUG messages.
|
|
|
|
'level': logging.DEBUG,
|
|
|
|
'formatter': 'colored'
|
|
|
|
},
|
|
|
|
'file': {
|
|
|
|
'class': 'logging.handlers.RotatingFileHandler',
|
|
|
|
'level': logging.INFO,
|
|
|
|
'formatter': 'basic',
|
|
|
|
'filename': LOG_FILENAME,
|
|
|
|
'maxBytes': 32768,
|
|
|
|
'backupCount': 3
|
|
|
|
}
|
|
|
|
},
|
|
|
|
'loggers': {
|
|
|
|
# Root Logger
|
|
|
|
'': {
|
|
|
|
'level': logging.DEBUG,
|
|
|
|
'handlers': ['file'],
|
|
|
|
},
|
|
|
|
'main': {
|
|
|
|
'level': logging.DEBUG,
|
|
|
|
'handlers': ['stderr'],
|
|
|
|
'propagate': False
|
|
|
|
},
|
|
|
|
'model': {
|
|
|
|
'level': logging.DEBUG,
|
|
|
|
'handlers': ['stderr'],
|
2017-08-25 18:32:19 -06:00
|
|
|
'propagate': True # Send to root logger
|
2017-08-25 18:09:46 -06:00
|
|
|
},
|
|
|
|
'cli': {
|
|
|
|
'level': logging.DEBUG,
|
|
|
|
'handlers': ['stderr'],
|
|
|
|
'propagate': False
|
|
|
|
},
|
|
|
|
'pages': {
|
|
|
|
'level': logging.INFO,
|
|
|
|
'handlers': ['stderr'],
|
|
|
|
'propagate': False
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-08-17 01:27:05 -06:00
|
|
|
|