forked from Ponysearch/Ponysearch
[fix] pep8 compatibility
This commit is contained in:
parent
28fed5d9ad
commit
b0fd71b7b3
2 changed files with 22 additions and 18 deletions
|
@ -1,8 +1,9 @@
|
||||||
## Yahoo (News)
|
# Yahoo (News)
|
||||||
#
|
#
|
||||||
# @website https://news.yahoo.com
|
# @website https://news.yahoo.com
|
||||||
# @provide-api yes (https://developer.yahoo.com/boss/search/), $0.80/1000 queries
|
# @provide-api yes (https://developer.yahoo.com/boss/search/)
|
||||||
#
|
# $0.80/1000 queries
|
||||||
|
#
|
||||||
# @using-api no (because pricing)
|
# @using-api no (because pricing)
|
||||||
# @results HTML (using search portal)
|
# @results HTML (using search portal)
|
||||||
# @stable no (HTML can change)
|
# @stable no (HTML can change)
|
||||||
|
@ -22,7 +23,7 @@ paging = True
|
||||||
language_support = True
|
language_support = True
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
search_url = 'https://news.search.yahoo.com/search?{query}&b={offset}&fl=1&vl=lang_{lang}'
|
search_url = 'https://news.search.yahoo.com/search?{query}&b={offset}&fl=1&vl=lang_{lang}' # noqa
|
||||||
|
|
||||||
# specific xpath variables
|
# specific xpath variables
|
||||||
results_xpath = '//div[@class="res"]'
|
results_xpath = '//div[@class="res"]'
|
||||||
|
@ -41,7 +42,7 @@ def request(query, params):
|
||||||
language = 'en'
|
language = 'en'
|
||||||
else:
|
else:
|
||||||
language = params['language'].split('_')[0]
|
language = params['language'].split('_')[0]
|
||||||
|
|
||||||
params['url'] = search_url.format(offset=offset,
|
params['url'] = search_url.format(offset=offset,
|
||||||
query=urlencode({'p': query}),
|
query=urlencode({'p': query}),
|
||||||
lang=language)
|
lang=language)
|
||||||
|
|
|
@ -45,11 +45,9 @@ def load_single_https_ruleset(filepath):
|
||||||
# get root node
|
# get root node
|
||||||
root = tree.getroot()
|
root = tree.getroot()
|
||||||
|
|
||||||
#print(etree.tostring(tree))
|
|
||||||
|
|
||||||
# check if root is a node with the name ruleset
|
# check if root is a node with the name ruleset
|
||||||
# TODO improve parsing
|
# TODO improve parsing
|
||||||
if root.tag != 'ruleset':
|
if root.tag != 'ruleset':
|
||||||
return ()
|
return ()
|
||||||
|
|
||||||
# check if rule is deactivated by default
|
# check if rule is deactivated by default
|
||||||
|
@ -68,36 +66,39 @@ def load_single_https_ruleset(filepath):
|
||||||
for ruleset in root:
|
for ruleset in root:
|
||||||
# this child define a target
|
# this child define a target
|
||||||
if ruleset.tag == 'target':
|
if ruleset.tag == 'target':
|
||||||
# check if required tags available
|
# check if required tags available
|
||||||
if not ruleset.attrib.get('host'):
|
if not ruleset.attrib.get('host'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# convert host-rule to valid regex
|
# convert host-rule to valid regex
|
||||||
host = ruleset.attrib.get('host').replace('.', '\.').replace('*', '.*')
|
host = ruleset.attrib.get('host')\
|
||||||
|
.replace('.', '\.').replace('*', '.*')
|
||||||
|
|
||||||
# append to host list
|
# append to host list
|
||||||
hosts.append(host)
|
hosts.append(host)
|
||||||
|
|
||||||
# this child define a rule
|
# this child define a rule
|
||||||
elif ruleset.tag == 'rule':
|
elif ruleset.tag == 'rule':
|
||||||
# check if required tags available
|
# check if required tags available
|
||||||
if not ruleset.attrib.get('from')\
|
if not ruleset.attrib.get('from')\
|
||||||
or not ruleset.attrib.get('to'):
|
or not ruleset.attrib.get('to'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# TODO hack, which convert a javascript regex group into a valid python regex group
|
# TODO hack, which convert a javascript regex group
|
||||||
|
# into a valid python regex group
|
||||||
rule_from = ruleset.attrib.get('from').replace('$', '\\')
|
rule_from = ruleset.attrib.get('from').replace('$', '\\')
|
||||||
rule_to = ruleset.attrib.get('to').replace('$', '\\')
|
rule_to = ruleset.attrib.get('to').replace('$', '\\')
|
||||||
|
|
||||||
# TODO, not working yet because of the hack above, currently doing that in webapp.py
|
# TODO, not working yet because of the hack above,
|
||||||
#rule_from_rgx = re.compile(rule_from, re.I)
|
# currently doing that in webapp.py
|
||||||
|
# rule_from_rgx = re.compile(rule_from, re.I)
|
||||||
|
|
||||||
# append rule
|
# append rule
|
||||||
rules.append((rule_from, rule_to))
|
rules.append((rule_from, rule_to))
|
||||||
|
|
||||||
# this child define an exclusion
|
# this child define an exclusion
|
||||||
elif ruleset.tag == 'exclusion':
|
elif ruleset.tag == 'exclusion':
|
||||||
# check if required tags available
|
# check if required tags available
|
||||||
if not ruleset.attrib.get('pattern'):
|
if not ruleset.attrib.get('pattern'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -124,7 +125,9 @@ def load_https_rules(rules_path):
|
||||||
rules_path += '/'
|
rules_path += '/'
|
||||||
|
|
||||||
# search all xml files which are stored in the https rule directory
|
# search all xml files which are stored in the https rule directory
|
||||||
xml_files = [ join(rules_path,f) for f in listdir(rules_path) if isfile(join(rules_path,f)) and f[-4:] == '.xml' ]
|
xml_files = [join(rules_path, f)
|
||||||
|
for f in listdir(rules_path)
|
||||||
|
if isfile(join(rules_path, f)) and f[-4:] == '.xml']
|
||||||
|
|
||||||
# load xml-files
|
# load xml-files
|
||||||
for ruleset_file in xml_files:
|
for ruleset_file in xml_files:
|
||||||
|
@ -137,5 +140,5 @@ def load_https_rules(rules_path):
|
||||||
|
|
||||||
# append ruleset
|
# append ruleset
|
||||||
https_rules.append(ruleset)
|
https_rules.append(ruleset)
|
||||||
|
|
||||||
print(' * {n} https-rules loaded'.format(n=len(https_rules)))
|
print(' * {n} https-rules loaded'.format(n=len(https_rules)))
|
||||||
|
|
Loading…
Reference in a new issue