forked from Ponysearch/Ponysearch
parent
e4d46d21c7
commit
805fb02ed1
5 changed files with 54 additions and 9 deletions
File diff suppressed because one or more lines are too long
|
@ -20,6 +20,7 @@ from searx.utils import html_to_text
|
||||||
categories = None
|
categories = None
|
||||||
paging = True
|
paging = True
|
||||||
language_support = True
|
language_support = True
|
||||||
|
supported_languages_url = 'https://qwant.com/region'
|
||||||
|
|
||||||
category_to_keyword = {'general': 'web',
|
category_to_keyword = {'general': 'web',
|
||||||
'images': 'images',
|
'images': 'images',
|
||||||
|
@ -46,6 +47,13 @@ def request(query, params):
|
||||||
|
|
||||||
# add language tag if specified
|
# add language tag if specified
|
||||||
if params['language'] != 'all':
|
if params['language'] != 'all':
|
||||||
|
if params['language'].find('-') < 0:
|
||||||
|
# tries to get a country code from language
|
||||||
|
for lang in supported_languages:
|
||||||
|
lc = lang.split('-')
|
||||||
|
if params['language'] == lc[0]:
|
||||||
|
params['language'] = lang
|
||||||
|
break
|
||||||
params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
|
params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
@ -96,5 +104,21 @@ def response(resp):
|
||||||
'publishedDate': published_date,
|
'publishedDate': published_date,
|
||||||
'content': content})
|
'content': content})
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
# get supported languages from their site
|
||||||
|
def _fetch_supported_languages(resp):
|
||||||
|
# list of regions is embedded in page as a js object
|
||||||
|
response_text = resp.text
|
||||||
|
response_text = response_text[response_text.find('regionalisation'):]
|
||||||
|
response_text = response_text[response_text.find('{'):response_text.find(');')]
|
||||||
|
|
||||||
|
regions_json = loads(response_text)
|
||||||
|
|
||||||
|
supported_languages = []
|
||||||
|
for lang in regions_json['languages'].values():
|
||||||
|
for country in lang['countries']:
|
||||||
|
supported_languages.append(lang['code'] + '-' + country)
|
||||||
|
|
||||||
|
return supported_languages
|
||||||
|
|
|
@ -5,9 +5,6 @@
|
||||||
language_codes = (
|
language_codes = (
|
||||||
(u"ar-SA", u"العربية", u"", u"Arabic"),
|
(u"ar-SA", u"العربية", u"", u"Arabic"),
|
||||||
(u"bg-BG", u"Български", u"", u"Bulgarian"),
|
(u"bg-BG", u"Български", u"", u"Bulgarian"),
|
||||||
(u"ca", u"Català", u"", u"Catalan"),
|
|
||||||
(u"ca-CT", u"Català", u"", u"Catalan"),
|
|
||||||
(u"ca-ES", u"Català", u"Espanya", u"Catalan"),
|
|
||||||
(u"cs-CZ", u"Čeština", u"", u"Czech"),
|
(u"cs-CZ", u"Čeština", u"", u"Czech"),
|
||||||
(u"da-DK", u"Dansk", u"", u"Danish"),
|
(u"da-DK", u"Dansk", u"", u"Danish"),
|
||||||
(u"de", u"Deutsch", u"", u"German"),
|
(u"de", u"Deutsch", u"", u"German"),
|
||||||
|
@ -18,7 +15,9 @@ language_codes = (
|
||||||
(u"en", u"English", u"", u"English"),
|
(u"en", u"English", u"", u"English"),
|
||||||
(u"en-AU", u"English", u"Australia", u"English"),
|
(u"en-AU", u"English", u"Australia", u"English"),
|
||||||
(u"en-CA", u"English", u"Canada", u"English"),
|
(u"en-CA", u"English", u"Canada", u"English"),
|
||||||
|
(u"en-CY", u"English", u"Cyprus", u"English"),
|
||||||
(u"en-GB", u"English", u"United Kingdom", u"English"),
|
(u"en-GB", u"English", u"United Kingdom", u"English"),
|
||||||
|
(u"en-GD", u"English", u"Grenada", u"English"),
|
||||||
(u"en-ID", u"English", u"Indonesia", u"English"),
|
(u"en-ID", u"English", u"Indonesia", u"English"),
|
||||||
(u"en-IE", u"English", u"Ireland", u"English"),
|
(u"en-IE", u"English", u"Ireland", u"English"),
|
||||||
(u"en-IN", u"English", u"India", u"English"),
|
(u"en-IN", u"English", u"India", u"English"),
|
||||||
|
@ -54,10 +53,10 @@ language_codes = (
|
||||||
(u"ko-KR", u"한국어", u"", u"Korean"),
|
(u"ko-KR", u"한국어", u"", u"Korean"),
|
||||||
(u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
|
(u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
|
||||||
(u"lv-LV", u"Latviešu", u"", u"Latvian"),
|
(u"lv-LV", u"Latviešu", u"", u"Latvian"),
|
||||||
|
(u"ms-MY", u"Bahasa Melayu", u"", u"Malay"),
|
||||||
(u"nl", u"Nederlands", u"", u"Dutch"),
|
(u"nl", u"Nederlands", u"", u"Dutch"),
|
||||||
(u"nl-BE", u"Nederlands", u"België", u"Dutch"),
|
(u"nl-BE", u"Nederlands", u"België", u"Dutch"),
|
||||||
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
|
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
|
||||||
(u"no-NO", u"Norsk", u"", u"Norwegian"),
|
|
||||||
(u"pl-PL", u"Polski", u"", u"Polish"),
|
(u"pl-PL", u"Polski", u"", u"Polish"),
|
||||||
(u"pt", u"Português", u"", u"Portuguese"),
|
(u"pt", u"Português", u"", u"Portuguese"),
|
||||||
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
|
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
|
||||||
|
@ -69,7 +68,6 @@ language_codes = (
|
||||||
(u"sv-SE", u"Svenska", u"", u"Swedish"),
|
(u"sv-SE", u"Svenska", u"", u"Swedish"),
|
||||||
(u"th-TH", u"ไทย", u"", u"Thai"),
|
(u"th-TH", u"ไทย", u"", u"Thai"),
|
||||||
(u"tr-TR", u"Türkçe", u"", u"Turkish"),
|
(u"tr-TR", u"Türkçe", u"", u"Turkish"),
|
||||||
(u"uk-UA", u"Українська", u"", u"Ukrainian"),
|
|
||||||
(u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"),
|
(u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"),
|
||||||
(u"zh", u"中文", u"", u"Chinese"),
|
(u"zh", u"中文", u"", u"Chinese"),
|
||||||
(u"zh-CN", u"中文", u"中国", u"Chinese"),
|
(u"zh-CN", u"中文", u"中国", u"Chinese"),
|
||||||
|
|
|
@ -25,6 +25,11 @@ class TestQwantEngine(SearxTestCase):
|
||||||
self.assertFalse('fr' in params['url'])
|
self.assertFalse('fr' in params['url'])
|
||||||
self.assertIn('news', params['url'])
|
self.assertIn('news', params['url'])
|
||||||
|
|
||||||
|
qwant.supported_languages = ['en', 'fr-FR', 'fr-CA']
|
||||||
|
dicto['language'] = 'fr'
|
||||||
|
params = qwant.request(query, dicto)
|
||||||
|
self.assertIn('fr_fr', params['url'])
|
||||||
|
|
||||||
def test_response(self):
|
def test_response(self):
|
||||||
self.assertRaises(AttributeError, qwant.response, None)
|
self.assertRaises(AttributeError, qwant.response, None)
|
||||||
self.assertRaises(AttributeError, qwant.response, [])
|
self.assertRaises(AttributeError, qwant.response, [])
|
||||||
|
@ -315,3 +320,19 @@ class TestQwantEngine(SearxTestCase):
|
||||||
results = qwant.response(response)
|
results = qwant.response(response)
|
||||||
self.assertEqual(type(results), list)
|
self.assertEqual(type(results), list)
|
||||||
self.assertEqual(len(results), 0)
|
self.assertEqual(len(results), 0)
|
||||||
|
|
||||||
|
def test_fetch_supported_languages(self):
|
||||||
|
page = """some code...
|
||||||
|
config_set('project.regionalisation', {"continents":{},"languages":
|
||||||
|
{"de":{"code":"de","name":"Deutsch","countries":["DE","CH","AT"]},
|
||||||
|
"it":{"code":"it","name":"Italiano","countries":["IT","CH"]}}});
|
||||||
|
some more code..."""
|
||||||
|
response = mock.Mock(text=page)
|
||||||
|
languages = qwant._fetch_supported_languages(response)
|
||||||
|
self.assertEqual(type(languages), list)
|
||||||
|
self.assertEqual(len(languages), 5)
|
||||||
|
self.assertIn('de-DE', languages)
|
||||||
|
self.assertIn('de-CH', languages)
|
||||||
|
self.assertIn('de-AT', languages)
|
||||||
|
self.assertIn('it-IT', languages)
|
||||||
|
self.assertIn('it-CH', languages)
|
||||||
|
|
|
@ -14,7 +14,8 @@ from json import loads, dumps
|
||||||
import io
|
import io
|
||||||
from sys import path
|
from sys import path
|
||||||
path.append('../searx') # noqa
|
path.append('../searx') # noqa
|
||||||
from searx.engines import engines
|
from searx import settings
|
||||||
|
from searx.engines import initialize_engines, engines
|
||||||
|
|
||||||
# Geonames API for country names.
|
# Geonames API for country names.
|
||||||
geonames_user = '' # ADD USER NAME HERE
|
geonames_user = '' # ADD USER NAME HERE
|
||||||
|
@ -77,6 +78,7 @@ def get_country_name(locale):
|
||||||
|
|
||||||
# Fetchs supported languages for each engine and writes json file with those.
|
# Fetchs supported languages for each engine and writes json file with those.
|
||||||
def fetch_supported_languages():
|
def fetch_supported_languages():
|
||||||
|
initialize_engines(settings['engines'])
|
||||||
for engine_name in engines:
|
for engine_name in engines:
|
||||||
if hasattr(engines[engine_name], 'fetch_supported_languages'):
|
if hasattr(engines[engine_name], 'fetch_supported_languages'):
|
||||||
try:
|
try:
|
||||||
|
@ -117,7 +119,7 @@ def join_language_lists():
|
||||||
languages[lang]['counter'].append(engine_name)
|
languages[lang]['counter'].append(engine_name)
|
||||||
|
|
||||||
# filter list to include only languages supported by most engines
|
# filter list to include only languages supported by most engines
|
||||||
min_supported_engines = int(0.75 * len(engines_languages))
|
min_supported_engines = int(0.70 * len(engines_languages))
|
||||||
languages = {code: lang for code, lang
|
languages = {code: lang for code, lang
|
||||||
in languages.iteritems()
|
in languages.iteritems()
|
||||||
if len(lang.get('counter', [])) >= min_supported_engines or
|
if len(lang.get('counter', [])) >= min_supported_engines or
|
||||||
|
|
Loading…
Reference in a new issue