From bd22e9a3363090a5e4f851670b0650349a528749 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Mon, 18 Jan 2016 12:47:31 +0100 Subject: [PATCH 1/2] [fix] pep8 compatibilty --- manage.sh | 5 ++++- requirements-dev.txt | 2 +- searx/autocomplete.py | 6 ++---- searx/engines/blekko_images.py | 2 +- searx/engines/btdigg.py | 2 +- searx/engines/deviantart.py | 2 +- searx/engines/digg.py | 2 +- searx/engines/faroo.py | 2 +- searx/engines/searchcode_code.py | 4 ++-- searx/engines/searchcode_doc.py | 4 ++-- searx/engines/stackoverflow.py | 2 +- searx/engines/startpage.py | 8 ++++---- searx/engines/wikidata.py | 4 ++-- searx/engines/www1x.py | 2 +- searx/engines/xpath.py | 4 ++-- searx/engines/yandex.py | 2 +- searx/plugins/https_rewrite.py | 4 ++-- searx/poolrequests.py | 2 +- 18 files changed, 30 insertions(+), 29 deletions(-) diff --git a/manage.sh b/manage.sh index 0cff4b352..89d1ddb0e 100755 --- a/manage.sh +++ b/manage.sh @@ -16,7 +16,10 @@ update_dev_packages() { pep8_check() { echo '[!] Running pep8 check' - pep8 --max-line-length=120 "$SEARX_DIR" "$BASE_DIR/tests" + # ignored rules: + # E402 module level import not at top of file + # W503 line break before binary operator + pep8 --max-line-length=120 --ignore "E402,W503" "$SEARX_DIR" "$BASE_DIR/tests" } unit_tests() { diff --git a/requirements-dev.txt b/requirements-dev.txt index d9ec779b3..38be888e0 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,7 +1,7 @@ babel==2.2.0 -flake8==2.5.1 mock==1.0.1 nose2[coverage-plugin] +pep8==1.7.0 plone.testing==4.0.15 robotframework-selenium2library==1.7.4 robotsuite==1.7.0 diff --git a/searx/autocomplete.py b/searx/autocomplete.py index 264d0cc1f..d92dc4246 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -114,8 +114,7 @@ def dbpedia(query): # dbpedia autocompleter, no HTTPS autocomplete_url = 'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?' - response = get(autocomplete_url - + urlencode(dict(QueryString=query))) + response = get(autocomplete_url + urlencode(dict(QueryString=query))) results = [] @@ -141,8 +140,7 @@ def google(query): # google autocompleter autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&' - response = get(autocomplete_url - + urlencode(dict(q=query))) + response = get(autocomplete_url + urlencode(dict(q=query))) results = [] diff --git a/searx/engines/blekko_images.py b/searx/engines/blekko_images.py index 93ac6616b..c0664f390 100644 --- a/searx/engines/blekko_images.py +++ b/searx/engines/blekko_images.py @@ -37,7 +37,7 @@ def request(query, params): c=c) if params['pageno'] != 1: - params['url'] += '&page={pageno}'.format(pageno=(params['pageno']-1)) + params['url'] += '&page={pageno}'.format(pageno=(params['pageno'] - 1)) # let Blekko know we wan't have profiling params['cookies']['tag_lesslogging'] = '1' diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index 192ed6ee9..c2b22f003 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -29,7 +29,7 @@ search_url = url + '/search?q={search_term}&p={pageno}' # do search-request def request(query, params): params['url'] = search_url.format(search_term=quote(query), - pageno=params['pageno']-1) + pageno=params['pageno'] - 1) return params diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index 60c8d7ea7..135aeb324 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -24,7 +24,7 @@ paging = True # search-url base_url = 'https://www.deviantart.com/' -search_url = base_url+'browse/all/?offset={offset}&{query}' +search_url = base_url + 'browse/all/?offset={offset}&{query}' # do search-request diff --git a/searx/engines/digg.py b/searx/engines/digg.py index 000f66ba2..a10b38bb6 100644 --- a/searx/engines/digg.py +++ b/searx/engines/digg.py @@ -22,7 +22,7 @@ paging = True # search-url base_url = 'https://digg.com/' -search_url = base_url+'api/search/{query}.json?position={position}&format=html' +search_url = base_url + 'api/search/{query}.json?position={position}&format=html' # specific xpath variables results_xpath = '//article' diff --git a/searx/engines/faroo.py b/searx/engines/faroo.py index 43df14eef..9fa244e77 100644 --- a/searx/engines/faroo.py +++ b/searx/engines/faroo.py @@ -88,7 +88,7 @@ def response(resp): for result in search_res['results']: if result['news']: # timestamp (milliseconds since 1970) - publishedDate = datetime.datetime.fromtimestamp(result['date']/1000.0) # noqa + publishedDate = datetime.datetime.fromtimestamp(result['date'] / 1000.0) # noqa # append news result results.append({'url': result['url'], diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py index bd5eb71d2..de8cd43be 100644 --- a/searx/engines/searchcode_code.py +++ b/searx/engines/searchcode_code.py @@ -20,7 +20,7 @@ paging = True # search-url url = 'https://searchcode.com/' -search_url = url+'api/codesearch_I/?{query}&p={pageno}' +search_url = url + 'api/codesearch_I/?{query}&p={pageno}' # special code-endings which are not recognised by the file ending code_endings = {'cs': 'c#', @@ -32,7 +32,7 @@ code_endings = {'cs': 'c#', # do search-request def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query}), - pageno=params['pageno']-1) + pageno=params['pageno'] - 1) # Disable SSL verification # error: (60) SSL certificate problem: unable to get local issuer diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py index 9453f31a4..f24fe6f90 100644 --- a/searx/engines/searchcode_doc.py +++ b/searx/engines/searchcode_doc.py @@ -19,13 +19,13 @@ paging = True # search-url url = 'https://searchcode.com/' -search_url = url+'api/search_IV/?{query}&p={pageno}' +search_url = url + 'api/search_IV/?{query}&p={pageno}' # do search-request def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query}), - pageno=params['pageno']-1) + pageno=params['pageno'] - 1) # Disable SSL verification # error: (60) SSL certificate problem: unable to get local issuer diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py index 34ecabae7..fdd3711a9 100644 --- a/searx/engines/stackoverflow.py +++ b/searx/engines/stackoverflow.py @@ -22,7 +22,7 @@ paging = True # search-url url = 'https://stackoverflow.com/' -search_url = url+'search?{query}&page={pageno}' +search_url = url + 'search?{query}&page={pageno}' # specific xpath variables results_xpath = '//div[contains(@class,"question-summary")]' diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index a91cafa00..52dd0b92f 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -90,8 +90,8 @@ def response(resp): # check if search result starts with something like: "2 Sep 2014 ... " if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content): - date_pos = content.find('...')+4 - date_string = content[0:date_pos-5] + date_pos = content.find('...') + 4 + date_string = content[0:date_pos - 5] published_date = parser.parse(date_string, dayfirst=True) # fix content string @@ -99,8 +99,8 @@ def response(resp): # check if search result starts with something like: "5 days ago ... " elif re.match("^[0-9]+ days? ago \.\.\. ", content): - date_pos = content.find('...')+4 - date_string = content[0:date_pos-5] + date_pos = content.find('...') + 4 + date_string = content[0:date_pos - 5] # calculate datetime published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group())) diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index fc840d47c..9f3496b72 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -295,7 +295,7 @@ def get_geolink(claims, propertyName, defaultValue=''): if precision < 0.0003: zoom = 19 else: - zoom = int(15 - precision*8.8322 + precision*precision*0.625447) + zoom = int(15 - precision * 8.8322 + precision * precision * 0.625447) url = url_map\ .replace('{latitude}', str(value.get('latitude', 0)))\ @@ -318,6 +318,6 @@ def get_wikilink(result, wikiid): def get_wiki_firstlanguage(result, wikipatternid): for k in result.get('sitelinks', {}).keys(): - if k.endswith(wikipatternid) and len(k) == (2+len(wikipatternid)): + if k.endswith(wikipatternid) and len(k) == (2 + len(wikipatternid)): return k[0:2] return None diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py index ddb79bfea..1269a5422 100644 --- a/searx/engines/www1x.py +++ b/searx/engines/www1x.py @@ -22,7 +22,7 @@ paging = False # search-url base_url = 'https://1x.com' -search_url = base_url+'/backend/search.php?{query}' +search_url = base_url + '/backend/search.php?{query}' # do search-request diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 1a599dc0a..f51634be0 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -43,7 +43,7 @@ def extract_url(xpath_results, search_url): if url.startswith('//'): # add http or https to this kind of url //example.com/ parsed_search_url = urlparse(search_url) - url = parsed_search_url.scheme+url + url = parsed_search_url.scheme + url elif url.startswith('/'): # fix relative url to the search engine url = urljoin(search_url, url) @@ -69,7 +69,7 @@ def normalize_url(url): p = parsed_url.path mark = p.find('/**') if mark != -1: - return unquote(p[mark+3:]).decode('utf-8') + return unquote(p[mark + 3:]).decode('utf-8') return url diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py index edc6ad5f2..938fdd184 100644 --- a/searx/engines/yandex.py +++ b/searx/engines/yandex.py @@ -38,7 +38,7 @@ content_xpath = './/div[@class="serp-item__text"]//text()' def request(query, params): lang = params['language'].split('_')[0] host = base_url.format(tld=language_map.get(lang) or default_tld) - params['url'] = host + search_url.format(page=params['pageno']-1, + params['url'] = host + search_url.format(page=params['pageno'] - 1, query=urlencode({'text': query})) return params diff --git a/searx/plugins/https_rewrite.py b/searx/plugins/https_rewrite.py index a24f15a28..0a58cc85d 100644 --- a/searx/plugins/https_rewrite.py +++ b/searx/plugins/https_rewrite.py @@ -103,10 +103,10 @@ def load_single_https_ruleset(rules_path): # into a valid python regex group rule_from = ruleset.attrib['from'].replace('$', '\\') if rule_from.endswith('\\'): - rule_from = rule_from[:-1]+'$' + rule_from = rule_from[:-1] + '$' rule_to = ruleset.attrib['to'].replace('$', '\\') if rule_to.endswith('\\'): - rule_to = rule_to[:-1]+'$' + rule_to = rule_to[:-1] + '$' # TODO, not working yet because of the hack above, # currently doing that in webapp.py diff --git a/searx/poolrequests.py b/searx/poolrequests.py index 4761f6ae8..13c6a906e 100644 --- a/searx/poolrequests.py +++ b/searx/poolrequests.py @@ -92,7 +92,7 @@ def head(url, **kwargs): return request('head', url, **kwargs) -def post(url, data=None, **kwargs): +def post(url, data=None, **kwargs): return request('post', url, data=data, **kwargs) From 09b7673fbd271349b6878959bd2e1ae846981e13 Mon Sep 17 00:00:00 2001 From: Adam Tauber Date: Mon, 18 Jan 2016 13:08:46 +0100 Subject: [PATCH 2/2] [fix] temporary disable googles inner links - #491 --- searx/engines/google.py | 44 ++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/searx/engines/google.py b/searx/engines/google.py index e6dacc3a8..dbca205a1 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -209,29 +209,29 @@ def response(resp): parsed_url = urlparse(url, google_hostname) # map result - if ((parsed_url.netloc == google_hostname and parsed_url.path.startswith(maps_path)) - or (parsed_url.netloc.startswith(map_hostname_start))): - x = result.xpath(map_near) - if len(x) > 0: - # map : near the location - results = results + parse_map_near(parsed_url, x, google_hostname) - else: - # map : detail about a location - results = results + parse_map_detail(parsed_url, result, google_hostname) + if parsed_url.netloc == google_hostname: + # TODO fix inside links + continue + # if parsed_url.path.startswith(maps_path) or parsed_url.netloc.startswith(map_hostname_start): + # print "yooooo"*30 + # x = result.xpath(map_near) + # if len(x) > 0: + # # map : near the location + # results = results + parse_map_near(parsed_url, x, google_hostname) + # else: + # # map : detail about a location + # results = results + parse_map_detail(parsed_url, result, google_hostname) + # # google news + # elif parsed_url.path == search_path: + # # skipping news results + # pass - # google news - elif (parsed_url.netloc == google_hostname - and parsed_url.path == search_path): - # skipping news results - pass - - # images result - elif (parsed_url.netloc == google_hostname - and parsed_url.path == images_path): - # only thumbnail image provided, - # so skipping image results - # results = results + parse_images(result, google_hostname) - pass + # # images result + # elif parsed_url.path == images_path: + # # only thumbnail image provided, + # # so skipping image results + # # results = results + parse_images(result, google_hostname) + # pass else: # normal result