add language support for qwant

closes issue #863
This commit is contained in:
marc 2017-02-24 20:21:48 -06:00 committed by Adam Tauber
parent e4d46d21c7
commit 805fb02ed1
5 changed files with 54 additions and 9 deletions

File diff suppressed because one or more lines are too long

View file

@ -20,6 +20,7 @@ from searx.utils import html_to_text
categories = None categories = None
paging = True paging = True
language_support = True language_support = True
supported_languages_url = 'https://qwant.com/region'
category_to_keyword = {'general': 'web', category_to_keyword = {'general': 'web',
'images': 'images', 'images': 'images',
@ -46,6 +47,13 @@ def request(query, params):
# add language tag if specified # add language tag if specified
if params['language'] != 'all': if params['language'] != 'all':
if params['language'].find('-') < 0:
# tries to get a country code from language
for lang in supported_languages:
lc = lang.split('-')
if params['language'] == lc[0]:
params['language'] = lang
break
params['url'] += '&locale=' + params['language'].replace('-', '_').lower() params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
return params return params
@ -96,5 +104,21 @@ def response(resp):
'publishedDate': published_date, 'publishedDate': published_date,
'content': content}) 'content': content})
# return results
return results return results
# get supported languages from their site
def _fetch_supported_languages(resp):
# list of regions is embedded in page as a js object
response_text = resp.text
response_text = response_text[response_text.find('regionalisation'):]
response_text = response_text[response_text.find('{'):response_text.find(');')]
regions_json = loads(response_text)
supported_languages = []
for lang in regions_json['languages'].values():
for country in lang['countries']:
supported_languages.append(lang['code'] + '-' + country)
return supported_languages

View file

@ -5,9 +5,6 @@
language_codes = ( language_codes = (
(u"ar-SA", u"العربية", u"", u"Arabic"), (u"ar-SA", u"العربية", u"", u"Arabic"),
(u"bg-BG", u"Български", u"", u"Bulgarian"), (u"bg-BG", u"Български", u"", u"Bulgarian"),
(u"ca", u"Català", u"", u"Catalan"),
(u"ca-CT", u"Català", u"", u"Catalan"),
(u"ca-ES", u"Català", u"Espanya", u"Catalan"),
(u"cs-CZ", u"Čeština", u"", u"Czech"), (u"cs-CZ", u"Čeština", u"", u"Czech"),
(u"da-DK", u"Dansk", u"", u"Danish"), (u"da-DK", u"Dansk", u"", u"Danish"),
(u"de", u"Deutsch", u"", u"German"), (u"de", u"Deutsch", u"", u"German"),
@ -18,7 +15,9 @@ language_codes = (
(u"en", u"English", u"", u"English"), (u"en", u"English", u"", u"English"),
(u"en-AU", u"English", u"Australia", u"English"), (u"en-AU", u"English", u"Australia", u"English"),
(u"en-CA", u"English", u"Canada", u"English"), (u"en-CA", u"English", u"Canada", u"English"),
(u"en-CY", u"English", u"Cyprus", u"English"),
(u"en-GB", u"English", u"United Kingdom", u"English"), (u"en-GB", u"English", u"United Kingdom", u"English"),
(u"en-GD", u"English", u"Grenada", u"English"),
(u"en-ID", u"English", u"Indonesia", u"English"), (u"en-ID", u"English", u"Indonesia", u"English"),
(u"en-IE", u"English", u"Ireland", u"English"), (u"en-IE", u"English", u"Ireland", u"English"),
(u"en-IN", u"English", u"India", u"English"), (u"en-IN", u"English", u"India", u"English"),
@ -54,10 +53,10 @@ language_codes = (
(u"ko-KR", u"한국어", u"", u"Korean"), (u"ko-KR", u"한국어", u"", u"Korean"),
(u"lt-LT", u"Lietuvių", u"", u"Lithuanian"), (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
(u"lv-LV", u"Latviešu", u"", u"Latvian"), (u"lv-LV", u"Latviešu", u"", u"Latvian"),
(u"ms-MY", u"Bahasa Melayu", u"", u"Malay"),
(u"nl", u"Nederlands", u"", u"Dutch"), (u"nl", u"Nederlands", u"", u"Dutch"),
(u"nl-BE", u"Nederlands", u"België", u"Dutch"), (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"), (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
(u"no-NO", u"Norsk", u"", u"Norwegian"),
(u"pl-PL", u"Polski", u"", u"Polish"), (u"pl-PL", u"Polski", u"", u"Polish"),
(u"pt", u"Português", u"", u"Portuguese"), (u"pt", u"Português", u"", u"Portuguese"),
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"), (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
@ -69,7 +68,6 @@ language_codes = (
(u"sv-SE", u"Svenska", u"", u"Swedish"), (u"sv-SE", u"Svenska", u"", u"Swedish"),
(u"th-TH", u"ไทย", u"", u"Thai"), (u"th-TH", u"ไทย", u"", u"Thai"),
(u"tr-TR", u"Türkçe", u"", u"Turkish"), (u"tr-TR", u"Türkçe", u"", u"Turkish"),
(u"uk-UA", u"Українська", u"", u"Ukrainian"),
(u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"), (u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"),
(u"zh", u"中文", u"", u"Chinese"), (u"zh", u"中文", u"", u"Chinese"),
(u"zh-CN", u"中文", u"中国", u"Chinese"), (u"zh-CN", u"中文", u"中国", u"Chinese"),

View file

@ -25,6 +25,11 @@ class TestQwantEngine(SearxTestCase):
self.assertFalse('fr' in params['url']) self.assertFalse('fr' in params['url'])
self.assertIn('news', params['url']) self.assertIn('news', params['url'])
qwant.supported_languages = ['en', 'fr-FR', 'fr-CA']
dicto['language'] = 'fr'
params = qwant.request(query, dicto)
self.assertIn('fr_fr', params['url'])
def test_response(self): def test_response(self):
self.assertRaises(AttributeError, qwant.response, None) self.assertRaises(AttributeError, qwant.response, None)
self.assertRaises(AttributeError, qwant.response, []) self.assertRaises(AttributeError, qwant.response, [])
@ -315,3 +320,19 @@ class TestQwantEngine(SearxTestCase):
results = qwant.response(response) results = qwant.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 0) self.assertEqual(len(results), 0)
def test_fetch_supported_languages(self):
page = """some code...
config_set('project.regionalisation', {"continents":{},"languages":
{"de":{"code":"de","name":"Deutsch","countries":["DE","CH","AT"]},
"it":{"code":"it","name":"Italiano","countries":["IT","CH"]}}});
some more code..."""
response = mock.Mock(text=page)
languages = qwant._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 5)
self.assertIn('de-DE', languages)
self.assertIn('de-CH', languages)
self.assertIn('de-AT', languages)
self.assertIn('it-IT', languages)
self.assertIn('it-CH', languages)

View file

@ -14,7 +14,8 @@ from json import loads, dumps
import io import io
from sys import path from sys import path
path.append('../searx') # noqa path.append('../searx') # noqa
from searx.engines import engines from searx import settings
from searx.engines import initialize_engines, engines
# Geonames API for country names. # Geonames API for country names.
geonames_user = '' # ADD USER NAME HERE geonames_user = '' # ADD USER NAME HERE
@ -77,6 +78,7 @@ def get_country_name(locale):
# Fetchs supported languages for each engine and writes json file with those. # Fetchs supported languages for each engine and writes json file with those.
def fetch_supported_languages(): def fetch_supported_languages():
initialize_engines(settings['engines'])
for engine_name in engines: for engine_name in engines:
if hasattr(engines[engine_name], 'fetch_supported_languages'): if hasattr(engines[engine_name], 'fetch_supported_languages'):
try: try:
@ -117,7 +119,7 @@ def join_language_lists():
languages[lang]['counter'].append(engine_name) languages[lang]['counter'].append(engine_name)
# filter list to include only languages supported by most engines # filter list to include only languages supported by most engines
min_supported_engines = int(0.75 * len(engines_languages)) min_supported_engines = int(0.70 * len(engines_languages))
languages = {code: lang for code, lang languages = {code: lang for code, lang
in languages.iteritems() in languages.iteritems()
if len(lang.get('counter', [])) >= min_supported_engines or if len(lang.get('counter', [])) >= min_supported_engines or