mirror of
https://git.zavage.net/Zavage-Software/wikicrawl.git
synced 2024-11-21 16:00:24 -07:00
final polish for 1.0
This commit is contained in:
parent
b851be0d03
commit
4a9609023b
@ -91,7 +91,7 @@ class DefaultSettings:
|
|||||||
'model': {
|
'model': {
|
||||||
'level': logging.DEBUG,
|
'level': logging.DEBUG,
|
||||||
'handlers': ['stderr'],
|
'handlers': ['stderr'],
|
||||||
'propagate': True
|
'propagate': True # Send to root logger
|
||||||
},
|
},
|
||||||
'cli': {
|
'cli': {
|
||||||
'level': logging.DEBUG,
|
'level': logging.DEBUG,
|
||||||
|
@ -6,7 +6,6 @@
|
|||||||
import baker
|
import baker
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
if sys.platform == 'linux':
|
if sys.platform == 'linux':
|
||||||
import readline # Needed for command history <up> and <down> arrows to work
|
import readline # Needed for command history <up> and <down> arrows to work
|
||||||
|
|
||||||
@ -56,7 +55,7 @@ class InteractiveInterface:
|
|||||||
commander.usage()
|
commander.usage()
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
log.LOGGER['cli'].error('caught general exception!!')
|
log.LOGGER['cli'].error('caught general exception!!')
|
||||||
log.LOGGER['cli'].error(type(ex), ex)
|
log.LOGGER['cli'].error(ex)
|
||||||
|
|
||||||
def start_command_loop(self):
|
def start_command_loop(self):
|
||||||
"""
|
"""
|
||||||
@ -170,6 +169,10 @@ class InteractiveInterface:
|
|||||||
|
|
||||||
self.run_command(args, main=False)
|
self.run_command(args, main=False)
|
||||||
|
|
||||||
|
@commander.command
|
||||||
|
def play_specific_page(self, title):
|
||||||
|
pass
|
||||||
|
|
||||||
@commander.command
|
@commander.command
|
||||||
def play_random_page(self):
|
def play_random_page(self):
|
||||||
"""
|
"""
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
|
# Nothing is implemented here yet... This is intended to be a future
|
||||||
|
# excercise.
|
||||||
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import pycurl
|
import pycurl
|
||||||
import os
|
import os
|
||||||
|
@ -221,6 +221,8 @@ class ArticlePage(PageRootObject):
|
|||||||
return False
|
return False
|
||||||
if self._is_link_audio(el):
|
if self._is_link_audio(el):
|
||||||
return False
|
return False
|
||||||
|
if self._is_not_wikipedia(el):
|
||||||
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _is_link_in_parenthesis(self, p, el):
|
def _is_link_in_parenthesis(self, p, el):
|
||||||
@ -299,3 +301,9 @@ class ArticlePage(PageRootObject):
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def _is_not_wikipedia(self, el):
|
||||||
|
href = el.get_attribute('href')
|
||||||
|
if 'wikipedia.org' not in href:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user