Merge pull request #748 from a01200356/languages

[mod] Allow users to search in most engine supported languages
This commit is contained in:
Adam Tauber 2016-12-28 20:09:57 +01:00 committed by GitHub
commit 9743bde25e
54 changed files with 983 additions and 153 deletions

View file

@ -43,7 +43,7 @@ generally made searx better:
- Kang-min Liu
- Kirill Isakov
- Guilhem Bonnefille
- Marc Abonce Seguin
- Marc Abonce Seguin @a01200356
- @jibe-b
- Christian Pietsch @pietsch
- @Maxqia
@ -55,7 +55,6 @@ generally made searx better:
- Ammar Najjar @ammarnajjar
- @stepshal
- François Revol @mmuman
- marc @a01200356
- Harry Wood @harry-wood
- Thomas Renard @threnard
- Pydo `<https://github.com/pydo>`_

View file

@ -81,17 +81,17 @@ def searx_bang(full_query):
engine_query = full_query.getSearchQuery()[1:]
for lc in language_codes:
lang_id, lang_name, country = map(str.lower, lc)
lang_id, lang_name, country, english_name = map(str.lower, lc)
# check if query starts with language-id
if lang_id.startswith(engine_query):
if len(engine_query) <= 2:
results.append(':{lang_id}'.format(lang_id=lang_id.split('_')[0]))
results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
else:
results.append(':{lang_id}'.format(lang_id=lang_id))
# check if query starts with language name
if lang_name.startswith(engine_query):
if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
results.append(':{lang_name}'.format(lang_name=lang_name))
# check if query starts with country

File diff suppressed because one or more lines are too long

View file

@ -20,6 +20,8 @@ from os.path import realpath, dirname
import sys
from flask_babel import gettext
from operator import itemgetter
from json import loads
from requests import get
from searx import settings
from searx import logger
from searx.utils import load_module
@ -33,10 +35,13 @@ engines = {}
categories = {'general': []}
languages = loads(open(engine_dir + '/../data/engines_languages.json').read())
engine_shortcuts = {}
engine_default_args = {'paging': False,
'categories': ['general'],
'language_support': True,
'supported_languages': [],
'safesearch': False,
'timeout': settings['outgoing']['request_timeout'],
'shortcut': '-',
@ -85,6 +90,15 @@ def load_engine(engine_data):
.format(engine.name, engine_attr))
sys.exit(1)
# assign supported languages from json file
if engine_data['name'] in languages:
setattr(engine, 'supported_languages', languages[engine_data['name']])
# assign language fetching method if auxiliary method exists
if hasattr(engine, '_fetch_supported_languages'):
setattr(engine, 'fetch_supported_languages',
lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
engine.stats = {
'result_count': 0,
'search_count': 0,

View file

@ -29,8 +29,8 @@ xpath_link = './/div[@class="mw-search-result-heading"]/a'
# cut 'en' from 'en_US', 'de' from 'de_CH', and so on
def locale_to_lang_code(locale):
if locale.find('_') >= 0:
locale = locale.split('_')[0]
if locale.find('-') >= 0:
locale = locale.split('-')[0]
return locale
@ -95,6 +95,7 @@ main_langs = {
'uk': 'Українська',
'zh': '简体中文'
}
supported_languages = dict(lang_urls, **main_langs)
# do search-request

View file

@ -21,6 +21,7 @@ from searx.engines.xpath import extract_text
categories = ['general']
paging = True
language_support = True
supported_languages_url = 'https://www.bing.com/account/general'
# search-url
base_url = 'https://www.bing.com/'
@ -32,7 +33,7 @@ def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1
if params['language'] != 'all':
query = u'language:{} {}'.format(params['language'].split('_')[0].upper(),
query = u'language:{} {}'.format(params['language'].split('-')[0].upper(),
query.decode('utf-8')).encode('utf-8')
search_path = search_string.format(
@ -81,3 +82,15 @@ def response(resp):
# return results
return results
# get supported languages from their site
def _fetch_supported_languages(resp):
supported_languages = []
dom = html.fromstring(resp.text)
options = dom.xpath('//div[@id="limit-languages"]//input')
for option in options:
code = option.xpath('./@id')[0].replace('_', '-')
supported_languages.append(code)
return supported_languages

View file

@ -19,6 +19,7 @@ from urllib import urlencode
from lxml import html
from json import loads
import re
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config
categories = ['images']
@ -53,7 +54,7 @@ def request(query, params):
if params['language'] == 'all':
language = 'en-US'
else:
language = params['language'].replace('_', '-')
language = params['language']
search_path = search_string.format(
query=urlencode({'q': query}),

View file

@ -17,6 +17,7 @@ from datetime import datetime
from dateutil import parser
from lxml import etree
from searx.utils import list_get
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config
categories = ['news']
@ -74,7 +75,7 @@ def request(query, params):
if params['language'] == 'all':
language = 'en-US'
else:
language = params['language'].replace('_', '-')
language = params['language']
params['url'] = _get_url(query, language, offset, params['time_range'])

View file

@ -15,6 +15,7 @@
from urllib import urlencode
from json import loads
from datetime import datetime
from requests import get
# engine dependent config
categories = ['videos']
@ -27,6 +28,8 @@ search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,descr
embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\
'data-src="//www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>'
supported_languages_url = 'https://api.dailymotion.com/languages'
# do search-request
def request(query, params):
@ -74,3 +77,22 @@ def response(resp):
# return results
return results
# get supported languages from their site
def _fetch_supported_languages(resp):
supported_languages = {}
response_json = loads(resp.text)
for language in response_json['list']:
supported_languages[language['code']] = {}
name = language['native_name']
if name:
supported_languages[language['code']]['name'] = name
english_name = language['name']
if english_name:
supported_languages[language['code']]['english_name'] = english_name
return supported_languages

View file

@ -15,13 +15,15 @@
from urllib import urlencode
from lxml.html import fromstring
from requests import get
from json import loads
from searx.engines.xpath import extract_text
from searx.languages import language_codes
# engine dependent config
categories = ['general']
paging = True
language_support = True
supported_languages_url = 'https://duckduckgo.com/d2030.js'
time_range_support = True
# search-url
@ -46,19 +48,31 @@ def request(query, params):
offset = (params['pageno'] - 1) * 30
# custom fixes for languages
if params['language'] == 'all':
locale = None
elif params['language'][:2] == 'ja':
locale = 'jp-jp'
elif params['language'][:2] == 'sl':
locale = 'sl-sl'
elif params['language'] == 'zh-TW':
locale = 'tw-tzh'
elif params['language'] == 'zh-HK':
locale = 'hk-tzh'
elif params['language'][-2:] == 'SA':
locale = 'xa-' + params['language'].split('-')[0]
elif params['language'][-2:] == 'GB':
locale = 'uk-' + params['language'].split('-')[0]
else:
locale = params['language'].split('_')
locale = params['language'].split('-')
if len(locale) == 2:
# country code goes first
locale = locale[1].lower() + '-' + locale[0].lower()
else:
# tries to get a country code from language
locale = locale[0].lower()
lang_codes = [x[0] for x in language_codes]
for lc in lang_codes:
lc = lc.split('_')
for lc in supported_languages:
lc = lc.split('-')
if locale == lc[0]:
locale = lc[1].lower() + '-' + lc[0].lower()
break
@ -102,3 +116,17 @@ def response(resp):
# return results
return results
# get supported languages from their site
def _fetch_supported_languages(resp):
# response is a js file with regions as an embedded object
response_page = resp.text
response_page = response_page[response_page.find('regions:{') + 8:]
response_page = response_page[:response_page.find('}') + 1]
regions_json = loads(response_page)
supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
return supported_languages

View file

@ -4,6 +4,7 @@ from re import compile, sub
from lxml import html
from searx.utils import html_to_text
from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
@ -23,7 +24,7 @@ def result_to_text(url, text, htmlResult):
def request(query, params):
params['url'] = url.format(query=urlencode({'q': query}))
params['headers']['Accept-Language'] = params['language']
params['headers']['Accept-Language'] = params['language'].split('-')[0]
return params

View file

@ -14,6 +14,7 @@ from json import loads
from random import randint
from time import time
from urllib import urlencode
from lxml.html import fromstring
# engine dependent config
categories = ['general']
@ -40,6 +41,8 @@ url_xpath = './/url'
title_xpath = './/title'
content_xpath = './/sum'
supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
# do search-request
def request(query, params):
@ -48,7 +51,9 @@ def request(query, params):
if params['language'] == 'all':
language = 'xx'
else:
language = params['language'][0:2]
language = params['language'].replace('-', '_').lower()
if language.split('-')[0] != 'zh':
language = language.split('-')[0]
if params['safesearch'] >= 1:
safesearch = 1
@ -82,3 +87,16 @@ def response(resp):
# return results
return results
# get supported languages from their site
def _fetch_supported_languages(resp):
supported_languages = []
dom = fromstring(resp.text)
links = dom.xpath('//span[@id="menu2"]/a')
for link in links:
code = link.xpath('./@href')[0][-2:]
if code != 'xx' and code not in supported_languages:
supported_languages.append(code)
return supported_languages

View file

@ -103,6 +103,7 @@ map_hostname_start = 'maps.google.'
maps_path = '/maps'
redirect_path = '/url'
images_path = '/images'
supported_languages_url = 'https://www.google.com/preferences?#languages'
# specific xpath variables
results_xpath = '//div[@class="g"]'
@ -167,8 +168,12 @@ def request(query, params):
language = 'en'
country = 'US'
url_lang = ''
elif params['language'][:2] == 'jv':
language = 'jw'
country = 'ID'
url_lang = 'lang_jw'
else:
language_array = params['language'].lower().split('_')
language_array = params['language'].lower().split('-')
if len(language_array) == 2:
country = language_array[1]
else:
@ -355,3 +360,16 @@ def attributes_to_html(attributes):
retval = retval + '<tr><th>' + a.get('label') + '</th><td>' + value + '</td></tr>'
retval = retval + '</table>'
return retval
# get supported languages from their site
def _fetch_supported_languages(resp):
supported_languages = {}
dom = html.fromstring(resp.text)
options = dom.xpath('//table//td/font/label/span')
for option in options:
code = option.xpath('./@id')[0][1:]
name = option.text.title()
supported_languages[code] = {"name": name}
return supported_languages

View file

@ -12,6 +12,8 @@
from lxml import html
from urllib import urlencode
from json import loads
from searx.engines.google import _fetch_supported_languages, supported_languages_url
# search-url
categories = ['news']
@ -50,7 +52,7 @@ def request(query, params):
search_options=urlencode(search_options))
if params['language'] != 'all':
language_array = params['language'].lower().split('_')
language_array = params['language'].lower().split('-')
params['url'] += '&lr=lang_' + language_array[0]
return params

View file

@ -46,7 +46,7 @@ def request(query, params):
if params['language'] == 'all':
language = 'en'
else:
language = params['language'].split('_')[0]
language = params['language'].split('-')[0]
# format_string [('https://', 'language', '', None), ('.wikipedia.org/', None, None, None)]
if any(x[1] == 'language' for x in format_strings):

View file

@ -26,7 +26,7 @@ search_string = 'api/?{query}&limit={limit}'
result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
# list of supported languages
allowed_languages = ['de', 'en', 'fr', 'it']
supported_languages = ['de', 'en', 'fr', 'it']
# do search-request
@ -37,7 +37,7 @@ def request(query, params):
if params['language'] != 'all':
language = params['language'].split('_')[0]
if language in allowed_languages:
if language in supported_languages:
params['url'] = params['url'] + "&lang=" + language
# using searx User-Agent

View file

@ -46,7 +46,7 @@ def request(query, params):
# add language tag if specified
if params['language'] != 'all':
params['url'] += '&locale=' + params['language'].lower()
params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
return params

View file

@ -47,7 +47,7 @@ def request(query, params):
# set language if specified
if params['language'] != 'all':
params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
params['data']['with_language'] = ('lang_' + params['language'].split('-')[0])
return params

View file

@ -22,7 +22,7 @@ language = ""
# search-url
url = 'http://www.subtitleseeker.com/'
search_url = url + 'search/TITLES/{query}&p={pageno}'
search_url = url + 'search/TITLES/{query}?p={pageno}'
# specific xpath variables
results_xpath = '//div[@class="boxRows"]'
@ -43,10 +43,16 @@ def response(resp):
search_lang = ""
if resp.search_params['language'] != 'all':
search_lang = [lc[1]
# dirty fix for languages named differenly in their site
if resp.search_params['language'][:2] == 'fa':
search_lang = 'Farsi'
elif resp.search_params['language'] == 'pt-BR':
search_lang = 'Brazilian'
elif resp.search_params['language'] != 'all':
search_lang = [lc[3]
for lc in language_codes
if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
if lc[0].split('-')[0] == resp.search_params['language'].split('-')[0]]
search_lang = search_lang[0].split(' (')[0]
# parse results
for result in dom.xpath(results_xpath):

View file

@ -13,6 +13,7 @@
from json import loads
from urllib import urlencode, unquote
import re
from lxml.html import fromstring
# engine dependent config
categories = ['general', 'images']
@ -23,6 +24,8 @@ language_support = True
base_url = 'https://swisscows.ch/'
search_string = '?{query}&page={page}'
supported_languages_url = base_url
# regex
regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
regex_json_remove_start = re.compile(r'^initialData:\s*')
@ -35,9 +38,11 @@ def request(query, params):
if params['language'] == 'all':
ui_language = 'browser'
region = 'browser'
elif params['language'].split('-')[0] == 'no':
region = 'nb-NO'
else:
region = params['language'].replace('_', '-')
ui_language = params['language'].split('_')[0]
region = params['language']
ui_language = params['language'].split('-')[0]
search_path = search_string.format(
query=urlencode({'query': query,
@ -106,3 +111,15 @@ def response(resp):
# return results
return results
# get supported languages from their site
def _fetch_supported_languages(resp):
supported_languages = []
dom = fromstring(resp.text)
options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
for option in options:
code = option.xpath('./@data-val')[0]
supported_languages.append(code)
return supported_languages

View file

@ -40,7 +40,7 @@ def request(query, params):
# set language if specified
if params['language'] != 'all':
params['cookies']['lang'] = params['language'].split('_')[0]
params['cookies']['lang'] = params['language'].split('-')[0]
else:
params['cookies']['lang'] = 'en'

View file

@ -14,6 +14,8 @@
from searx import logger
from searx.poolrequests import get
from searx.engines.xpath import extract_text
from searx.utils import format_date_by_locale
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from json import loads
from lxml.html import fromstring
@ -55,7 +57,7 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
def request(query, params):
language = params['language'].split('_')[0]
language = params['language'].split('-')[0]
if language == 'all':
language = 'en'
@ -70,7 +72,7 @@ def response(resp):
html = fromstring(resp.content)
wikidata_ids = html.xpath(wikidata_ids_xpath)
language = resp.search_params['language'].split('_')[0]
language = resp.search_params['language'].split('-')[0]
if language == 'all':
language = 'en'

View file

@ -12,6 +12,8 @@
from json import loads
from urllib import urlencode, quote
from lxml.html import fromstring
# search-url
base_url = 'https://{language}.wikipedia.org/'
@ -24,14 +26,16 @@ search_postfix = 'w/api.php?'\
'&explaintext'\
'&pithumbsize=300'\
'&redirects'
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
# set language in base_url
def url_lang(lang):
if lang == 'all':
lang = lang.split('-')[0]
if lang == 'all' or lang not in supported_languages:
language = 'en'
else:
language = lang.split('_')[0]
language = lang
return base_url.format(language=language)
@ -111,3 +115,24 @@ def response(resp):
'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]})
return results
# get supported languages from their site
def _fetch_supported_languages(resp):
supported_languages = {}
dom = fromstring(resp.text)
tables = dom.xpath('//table[contains(@class,"sortable")]')
for table in tables:
# exclude header row
trs = table.xpath('.//tr')[1:]
for tr in trs:
td = tr.xpath('./td')
code = td[3].xpath('./a')[0].text
name = td[2].xpath('./a')[0].text
english_name = td[1].xpath('./a')[0].text
articles = int(td[4].xpath('./a/b')[0].text.replace(',', ''))
# exclude languages with too few articles
if articles >= 100000:
supported_languages[code] = {"name": name, "english_name": english_name, "articles": articles}
return supported_languages

View file

@ -53,7 +53,7 @@ def request(query, params):
# add language tag if specified
if params['language'] != 'all':
params['url'] += '&lr=lang_' + params['language'].split('_')[0]
params['url'] += '&lr=lang_' + params['language'].split('-')[0]
return params

View file

@ -27,6 +27,8 @@ base_url = 'https://search.yahoo.com/'
search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
supported_languages_url = 'https://search.yahoo.com/web/advanced'
# specific xpath variables
results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
url_xpath = './/h3/a/@href'
@ -72,7 +74,13 @@ def _get_url(query, offset, language, time_range):
def _get_language(params):
if params['language'] == 'all':
return 'en'
return params['language'].split('_')[0]
elif params['language'][:2] == 'zh':
if params['language'] == 'zh' or params['language'] == 'zh-CH':
return 'szh'
else:
return 'tzh'
else:
return params['language'].split('-')[0]
# do search-request
@ -132,3 +140,15 @@ def response(resp):
# return results
return results
# get supported languages from their site
def _fetch_supported_languages(resp):
supported_languages = []
dom = html.fromstring(resp.text)
options = dom.xpath('//div[@id="yschlang"]/span/label/input')
for option in options:
code = option.xpath('./@value')[0][5:].replace('_', '-')
supported_languages.append(code)
return supported_languages

View file

@ -12,7 +12,7 @@
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text, extract_url
from searx.engines.yahoo import parse_url
from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
from datetime import datetime, timedelta
import re
from dateutil import parser

View file

@ -22,7 +22,9 @@ language_support = True # TODO
default_tld = 'com'
language_map = {'ru': 'ru',
'ua': 'uk',
'ua': 'ua',
'be': 'by',
'kk': 'kz',
'tr': 'com.tr'}
# search-url
@ -36,7 +38,7 @@ content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m orga
def request(query, params):
lang = params['language'].split('_')[0]
lang = params['language'].split('-')[0]
host = base_url.format(tld=language_map.get(lang) or default_tld)
params['url'] = host + search_url.format(page=params['pageno'] - 1,
query=urlencode({'text': query}))

View file

@ -36,7 +36,7 @@ def request(query, params):
# add language tag if specified
if params['language'] != 'all':
params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0]
params['url'] += '&relevanceLanguage=' + params['language'].split('-')[0]
return params

View file

@ -1,78 +1,131 @@
'''
searx is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
searx is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
'''
# -*- coding: utf-8 -*-
# list of language codes
# this file is generated automatically by utils/update_search_languages.py
language_codes = (
("ar_XA", "Arabic", "Arabia"),
("bg_BG", "Bulgarian", "Bulgaria"),
("cs_CZ", "Czech", "Czech Republic"),
("da_DK", "Danish", "Denmark"),
("de_AT", "German", "Austria"),
("de_CH", "German", "Switzerland"),
("de_DE", "German", "Germany"),
("el_GR", "Greek", "Greece"),
("en_AU", "English", "Australia"),
("en_CA", "English", "Canada"),
("en_GB", "English", "United Kingdom"),
("en_ID", "English", "Indonesia"),
("en_IE", "English", "Ireland"),
("en_IN", "English", "India"),
("en_MY", "English", "Malaysia"),
("en_NZ", "English", "New Zealand"),
("en_PH", "English", "Philippines"),
("en_SG", "English", "Singapore"),
("en_US", "English", "United States"),
("en_XA", "English", "Arabia"),
("en_ZA", "English", "South Africa"),
("es_AR", "Spanish", "Argentina"),
("es_CL", "Spanish", "Chile"),
("es_ES", "Spanish", "Spain"),
("es_MX", "Spanish", "Mexico"),
("es_US", "Spanish", "United States"),
("es_XL", "Spanish", "Latin America"),
("et_EE", "Estonian", "Estonia"),
("fi_FI", "Finnish", "Finland"),
("fr_BE", "French", "Belgium"),
("fr_CA", "French", "Canada"),
("fr_CH", "French", "Switzerland"),
("fr_FR", "French", "France"),
("he_IL", "Hebrew", "Israel"),
("hr_HR", "Croatian", "Croatia"),
("hu_HU", "Hungarian", "Hungary"),
("it_IT", "Italian", "Italy"),
("ja_JP", "Japanese", "Japan"),
("ko_KR", "Korean", "Korea"),
("lt_LT", "Lithuanian", "Lithuania"),
("lv_LV", "Latvian", "Latvia"),
("nb_NO", "Norwegian", "Norway"),
("nl_BE", "Dutch", "Belgium"),
("nl_NL", "Dutch", "Netherlands"),
("oc_OC", "Occitan", "Occitan"),
("pl_PL", "Polish", "Poland"),
("pt_BR", "Portuguese", "Brazil"),
("pt_PT", "Portuguese", "Portugal"),
("ro_RO", "Romanian", "Romania"),
("ru_RU", "Russian", "Russia"),
("sk_SK", "Slovak", "Slovak Republic"),
("sl_SL", "Slovenian", "Slovenia"),
("sv_SE", "Swedish", "Sweden"),
("th_TH", "Thai", "Thailand"),
("tr_TR", "Turkish", "Turkey"),
("uk_UA", "Ukrainian", "Ukraine"),
("zh_CN", "Chinese", "China"),
("zh_HK", "Chinese", "Hong Kong SAR"),
("zh_TW", "Chinese", "Taiwan"))
(u"af", u"Afrikaans", u"", u""),
(u"am", u"አማርኛ", u"", u"Amharic"),
(u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"),
(u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
(u"be", u"Беларуская", u"", u"Belarusian"),
(u"bg-BG", u"Български", u"България", u"Bulgarian"),
(u"bn", u"বাংলা", u"", u"Bengali"),
(u"br", u"Brezhoneg", u"", u"Breton"),
(u"bs", u"Bosnian", u"", u"Bosnian"),
(u"ca", u"Català", u"", u"Catalan"),
(u"ca-CT", u"Català", u"", u"Catalan"),
(u"ca-ES", u"Català", u"Espanya", u"Catalan"),
(u"ce", u"Нохчийн", u"", u"Chechen"),
(u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
(u"cs-CZ", u"Čeština", u"Česko", u"Czech"),
(u"cy", u"Cymraeg", u"", u"Welsh"),
(u"da-DK", u"Dansk", u"Danmark", u"Danish"),
(u"de", u"Deutsch", u"", u"German"),
(u"de-AT", u"Deutsch", u"Österreich", u"German"),
(u"de-CH", u"Deutsch", u"Schweiz", u"German"),
(u"de-DE", u"Deutsch", u"Deutschland", u"German"),
(u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"),
(u"en", u"English", u"", u"English"),
(u"en-AU", u"English", u"Australia", u"English"),
(u"en-CA", u"English", u"Canada", u"English"),
(u"en-GB", u"English", u"United Kingdom", u"English"),
(u"en-ID", u"English", u"Indonesia", u"English"),
(u"en-IE", u"English", u"Ireland", u"English"),
(u"en-IN", u"English", u"India", u"English"),
(u"en-MY", u"English", u"Malaysia", u"English"),
(u"en-NZ", u"English", u"New Zealand", u"English"),
(u"en-PH", u"English", u"Philippines", u"English"),
(u"en-SG", u"English", u"Singapore", u"English"),
(u"en-US", u"English", u"United States", u"English"),
(u"en-ZA", u"English", u"South Africa", u"English"),
(u"eo", u"Esperanto", u"", u"Esperanto"),
(u"es", u"Español", u"", u"Spanish"),
(u"es-AR", u"Español", u"Argentina", u"Spanish"),
(u"es-CL", u"Español", u"Chile", u"Spanish"),
(u"es-CO", u"Español", u"Colombia", u"Spanish"),
(u"es-ES", u"Español", u"España", u"Spanish"),
(u"es-MX", u"Español", u"México", u"Spanish"),
(u"es-PE", u"Español", u"Perú", u"Spanish"),
(u"es-US", u"Español", u"Estados Unidos", u"Spanish"),
(u"et-EE", u"Eesti", u"Eesti", u"Estonian"),
(u"eu", u"Euskara", u"", u"Basque"),
(u"fa", u"فارسی", u"", u"Persian"),
(u"fi-FI", u"Suomi", u"Suomi", u"Finnish"),
(u"fr", u"Français", u"", u"French"),
(u"fr-BE", u"Français", u"Belgique", u"French"),
(u"fr-CA", u"Français", u"Canada", u"French"),
(u"fr-CH", u"Français", u"Suisse", u"French"),
(u"fr-FR", u"Français", u"France", u"French"),
(u"ga", u"Gaeilge", u"", u"Irish"),
(u"gl", u"Galego", u"", u"Galician"),
(u"gu", u"ગુજરાતી", u"", u"Gujarati"),
(u"he-IL", u"עברית", u"ישראל", u"Hebrew"),
(u"hi", u"हिन्दी", u"", u"Hindi"),
(u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"),
(u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"),
(u"hy", u"Հայերեն", u"", u"Armenian"),
(u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"),
(u"is", u"Íslenska", u"", u""),
(u"it", u"Italiano", u"", u"Italian"),
(u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
(u"it-IT", u"Italiano", u"Italia", u"Italian"),
(u"iw", u"עברית", u"", u""),
(u"ja-JP", u"日本語", u"日本", u"Japanese"),
(u"ka", u"ქართული", u"", u"Georgian"),
(u"kk", u"Қазақша", u"", u"Kazakh"),
(u"kn", u"ಕನ್ನಡ", u"", u"Kannada"),
(u"ko-KR", u"한국어", u"대한민국", u"Korean"),
(u"la", u"Latina", u"", u"Latin"),
(u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"),
(u"lv-LV", u"Latviešu", u"Latvijas Republika", u""),
(u"mi", u"Reo Māori", u"", u"Maori"),
(u"min", u"Minangkabau", u"", u"Minangkabau"),
(u"mk", u"Македонски", u"", u"Macedonian"),
(u"mn", u"Монгол", u"", u"Mongolian"),
(u"mr", u"मराठी", u"", u"Marathi"),
(u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"),
(u"mt", u"Malti", u"", u"Maltese"),
(u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"),
(u"nl", u"Nederlands", u"", u"Dutch"),
(u"nl-BE", u"Nederlands", u"België", u"Dutch"),
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
(u"nn", u"Nynorsk", u"", u"Norwegian"),
(u"no-NO", u"Norsk", u"Norge", u"Norwegian"),
(u"oc", u"Occitan", u"", u"Occitan"),
(u"or", u"Oriya", u"", u"Oriya"),
(u"pa", u"ਪੰਜਾਬੀ", u"", u"Panjabi"),
(u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"),
(u"ps", u"Pushto", u"", u"Pushto"),
(u"pt", u"Português", u"", u"Portuguese"),
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
(u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
(u"ro-RO", u"Română", u"România", u"Romanian"),
(u"ru-RU", u"Русский", u"Россия", u"Russian"),
(u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"),
(u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
(u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"),
(u"sl", u"Slovenščina", u"", u"Slovenian"),
(u"sr", u"Српски / Srpski", u"", u"Serbian"),
(u"sv-SE", u"Svenska", u"Sverige", u"Swedish"),
(u"sw", u"Kiswahili", u"", u""),
(u"ta", u"தமிழ்", u"", u"Tamil"),
(u"th-TH", u"ไทย", u"ไทย", u"Thai"),
(u"ti", u"ትግርኛ", u"", u"Tigrinya"),
(u"tl-PH", u"Filipino", u"Pilipinas", u""),
(u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"),
(u"tt", u"Татарча", u"", u"Tatar"),
(u"uk-UA", u"Українська", u"Україна", u"Ukrainian"),
(u"ur", u"اردو", u"", u"Urdu"),
(u"uz", u"Ozbek", u"", u"Uzbek"),
(u"ve", u"Venda", u"", u"Venda"),
(u"vi-VN", u"Tiếng Việt", u"Công Hòa Xã Hội Chủ Nghĩa Việt Nam", u"Vietnamese"),
(u"vo", u"Volapük", u"", u"Volapük"),
(u"wa", u"Walon", u"", u"Walloon"),
(u"war", u"Winaray", u"", u"Waray-Waray"),
(u"xh", u"Xhosa", u"", u"Xhosa"),
(u"zh", u"中文", u"", u"Chinese"),
(u"zh-CN", u"中文", u"中国", u"Chinese"),
(u"zh-HK", u"中文", u"香港", u"Chinese"),
(u"zh-TW", u"中文", u"台湾", u"Chinese"),
(u"zu", u"Isi-Zulu", u"", u"Zulu")
)

View file

@ -95,6 +95,25 @@ class MultipleChoiceSetting(EnumStringSetting):
resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE)
class SearchLanguageSetting(EnumStringSetting):
"""Available choices may change, so user's value may not be in choices anymore"""
def parse(self, data):
if data not in self.choices and data != self.value:
# hack to give some backwards compatibility with old language cookies
data = str(data).replace('_', '-')
lang = data.split('-')[0]
if data in self.choices:
pass
elif lang in self.choices:
data = lang
elif data == 'ar-XA':
data = 'ar-SA'
else:
data = self.value
self.value = data
class MapSetting(Setting):
"""Setting of a value that has to be translated in order to be storable"""
@ -216,8 +235,8 @@ class Preferences(object):
super(Preferences, self).__init__()
self.key_value_settings = {'categories': MultipleChoiceSetting(['general'], choices=categories),
'language': EnumStringSetting(settings['search']['language'],
choices=LANGUAGE_CODES),
'language': SearchLanguageSetting(settings['search']['language'],
choices=LANGUAGE_CODES),
'locale': EnumStringSetting(settings['ui']['default_locale'],
choices=settings['locales'].keys() + ['']),
'autocomplete': EnumStringSetting(settings['search']['autocomplete'],

View file

@ -71,21 +71,24 @@ class RawTextQuery(object):
# check if any language-code is equal with
# declared language-codes
for lc in language_codes:
lang_id, lang_name, country = map(str.lower, lc)
lang_id, lang_name, country, english_name = map(unicode.lower, lc)
# if correct language-code is found
# set it as new search-language
if lang == lang_id\
or lang_id.startswith(lang)\
or lang == lang_name\
or lang == english_name\
or lang.replace('_', ' ') == country:
parse_next = True
self.languages.append(lang)
break
self.languages.append(lang_id)
# to ensure best match (first match is not necessarily the best one)
if lang == lang_id:
break
# this force a engine or category
if query_part[0] == '!' or query_part[0] == '?':
prefix = query_part[1:].replace('_', ' ')
prefix = query_part[1:].replace('-', ' ')
# check if prefix is equal with engine shortcut
if prefix in engine_shortcuts:

View file

@ -211,10 +211,14 @@ def get_search_query_from_webapp(preferences, form):
# set query
query = raw_text_query.getSearchQuery()
# get last selected language in query, if possible
# set specific language if set on request, query or preferences
# TODO support search with multible languages
if len(raw_text_query.languages):
query_lang = raw_text_query.languages[-1]
elif 'language' in form:
query_lang = form.get('language')
else:
query_lang = preferences.get_value('language')
query_time_range = form.get('time_range')

View file

@ -15,5 +15,10 @@ $(document).ready(function() {
$('#search_form').submit();
}
});
$('#language').change(function(e) {
if($('#q').val()) {
$('#search_form').submit();
}
});
}
});

View file

@ -13,9 +13,9 @@
<legend>{{ _('Search language') }}</legend>
<p>
<select name='language'>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
{% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
{% endfor %}
</select>
</p>

View file

@ -14,9 +14,9 @@
<legend>{{ _('Search language') }}</legend>
<p>
<select name='language'>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
{% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
{% endfor %}
</select>
</p>

View file

@ -6,4 +6,5 @@
<div id="advanced-search-container">
{% include 'oscar/categories.html' %}
{% include 'oscar/time-range.html' %}
{% include 'oscar/languages.html' %}
</div>

View file

@ -0,0 +1,12 @@
{% if preferences %}
<select class="form-control" name='language'>
{% else %}
<select class="time_range" id='language' name='language'>
{% endif %}
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>
{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}
</option>
{% endfor %}
</select>

View file

@ -40,12 +40,7 @@
{% set language_label = _('Search language') %}
{% set language_info = _('What language do you prefer for search?') %}
{{ preferences_item_header(language_info, language_label, rtl) }}
<select class="form-control" name='language'>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
{% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
{% endfor %}
</select>
{% include 'oscar/languages.html' %}
{{ preferences_item_footer(language_info, language_label, rtl) }}
{% set locale_label = _('Interface language') %}
@ -153,6 +148,7 @@
<th>{{ _("Allow") }}</th>
<th>{{ _("Engine name") }}</th>
<th>{{ _("Shortcut") }}</th>
<th>{{ _("Language support") }}</th>
<th>{{ _("SafeSearch") }}</th>
<th>{{ _("Time range") }}</th>
<th>{{ _("Avg. time") }}</th>
@ -161,6 +157,7 @@
<th>{{ _("Max time") }}</th>
<th>{{ _("Avg. time") }}</th>
<th>{{ _("SafeSearch") }}</th>
<th>{{ _("Language support") }}</th>
<th>{{ _("Shortcut") }}</th>
<th>{{ _("Engine name") }}</th>
<th>{{ _("Allow") }}</th>
@ -175,6 +172,7 @@
</td>
<th>{{ search_engine.name }}</th>
<td>{{ shortcuts[search_engine.name] }}</td>
<td><input type="checkbox" {{ "checked" if current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td>
<td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
<td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td>
<td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
@ -183,6 +181,7 @@
<td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td>
<td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
<td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
<td><input type="checkbox" {{ "checked" if current_language == 'all' or current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages else ""}} readonly="readonly" disabled="disabled"></td>
<td>{{ shortcuts[search_engine.name] }}</td>
<th>{{ search_engine.name }}</th>
<td class="onoff-checkbox">

View file

@ -9,9 +9,9 @@
<legend>{{ _('Search language') }}</legend>
<p>
<select name='language'>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Automatic') }}</option>
{% for lang_id,lang_name,country_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} ({{ country_name }}) - {{ lang_id }}</option>
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option>
{% for lang_id,lang_name,country_name,english_name in language_codes | sort(attribute=1) %}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{{ lang_name }} {% if country_name %}({{ country_name }}) {% endif %}- {{ lang_id }}</option>
{% endfor %}
</select>
</p>

View file

@ -330,6 +330,10 @@ def render(template_name, override_theme=None, **kwargs):
kwargs['safesearch'] = str(request.preferences.get_value('safesearch'))
kwargs['language_codes'] = language_codes
if 'current_language' not in kwargs:
kwargs['current_language'] = request.preferences.get_value('language')
# override url_for function in templates
kwargs['url_for'] = url_for_theme
@ -510,6 +514,7 @@ def index():
answers=result_container.answers,
infoboxes=result_container.infoboxes,
paging=result_container.paging,
current_language=search_query.lang,
base_url=get_base_url(),
theme=get_current_theme_name(),
favicons=global_favicons[themes.index(get_current_theme_name())]
@ -552,7 +557,7 @@ def autocompleter():
if not language or language == 'all':
language = 'en'
else:
language = language.split('_')[0]
language = language.split('-')[0]
# run autocompletion
raw_results.extend(completer(raw_text_query.getSearchQuery(), language))
@ -615,9 +620,7 @@ def preferences():
return render('preferences.html',
locales=settings['locales'],
current_locale=get_locale(),
current_language=lang,
image_proxy=image_proxy,
language_codes=language_codes,
engines_by_category=categories,
stats=stats,
answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers],
@ -627,7 +630,8 @@ def preferences():
themes=themes,
plugins=plugins,
allowed_plugins=allowed_plugins,
theme=get_current_theme_name())
theme=get_current_theme_name(),
preferences=True)
@app.route('/image_proxy', methods=['GET'])

View file

@ -101,11 +101,11 @@ Change search language
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
List Selection Should Be language Automatic
Select From List language Turkish (Turkey) - tr_TR
List Selection Should Be language Default language
Select From List language Türkçe (Türkiye) - tr-TR
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be language Turkish (Turkey) - tr_TR
List Selection Should Be language Türkçe (Türkiye) - tr-TR
Change autocomplete
Page Should Contain about

View file

@ -86,3 +86,35 @@ class TestBingEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'This should be the title')
self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
self.assertEqual(results[0]['content'], 'This should be the content.')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
results = bing._fetch_supported_languages(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
<html>
<body>
<form>
<div id="limit-languages">
<div>
<div><input id="es" value="es"></input></div>
</div>
<div>
<div><input id="pt_BR" value="pt_BR"></input></div>
<div><input id="pt_PT" value="pt_PT"></input></div>
</div>
</div>
</form>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = bing._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('es', languages)
self.assertIn('pt-BR', languages)
self.assertIn('pt-PT', languages)

View file

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from collections import defaultdict
import mock
from searx.engines import dailymotion
@ -72,3 +73,39 @@ class TestDailymotionEngine(SearxTestCase):
results = dailymotion.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
def test_fetch_supported_languages(self):
json = r"""
{"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans",
"localized_name":"Afrikaans","display_name":"Afrikaans"},
{"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629",
"localized_name":"Arabic","display_name":"Arabic"},
{"code":"la","name":"Latin","native_name":null,
"localized_name":"Latin","display_name":"Latin"}
]}
"""
response = mock.Mock(text=json)
languages = dailymotion._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 3)
self.assertIn('af', languages)
self.assertIn('ar', languages)
self.assertIn('la', languages)
self.assertEqual(type(languages['af']), dict)
self.assertEqual(type(languages['ar']), dict)
self.assertEqual(type(languages['la']), dict)
self.assertIn('name', languages['af'])
self.assertIn('name', languages['ar'])
self.assertNotIn('name', languages['la'])
self.assertIn('english_name', languages['af'])
self.assertIn('english_name', languages['ar'])
self.assertIn('english_name', languages['la'])
self.assertEqual(languages['af']['name'], 'Afrikaans')
self.assertEqual(languages['af']['english_name'], 'Afrikaans')
self.assertEqual(languages['ar']['name'], u'العربية')
self.assertEqual(languages['ar']['english_name'], 'Arabic')
self.assertEqual(languages['la']['english_name'], 'Latin')

View file

@ -11,7 +11,7 @@ class TestDuckduckgoEngine(SearxTestCase):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
dicto['language'] = 'de_CH'
dicto['language'] = 'de-CH'
dicto['time_range'] = ''
params = duckduckgo.request(query, dicto)
self.assertIn('url', params)
@ -19,6 +19,17 @@ class TestDuckduckgoEngine(SearxTestCase):
self.assertIn('duckduckgo.com', params['url'])
self.assertIn('ch-de', params['url'])
# when ddg uses non standard code
dicto['language'] = 'en-GB'
params = duckduckgo.request(query, dicto)
self.assertIn('uk-en', params['url'])
# no country given
duckduckgo.supported_languages = ['de-CH', 'en-US']
dicto['language'] = 'de'
params = duckduckgo.request(query, dicto)
self.assertIn('ch-de', params['url'])
def test_no_url_in_request_year_time_range(self):
dicto = defaultdict(dict)
query = 'test_query'
@ -73,3 +84,17 @@ class TestDuckduckgoEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'This is the title')
self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
self.assertEqual(results[0]['content'], 'This should be the content.')
def test_fetch_supported_languages(self):
js = """some code...regions:{
"wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)"
}some more code..."""
response = mock.Mock(text=js)
languages = duckduckgo._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 5)
self.assertIn('wt-WT', languages)
self.assertIn('es-AR', languages)
self.assertIn('en-AU', languages)
self.assertIn('de-AT', languages)
self.assertIn('fr-BE', languages)

View file

@ -21,10 +21,14 @@ class TestDDGDefinitionsEngine(SearxTestCase):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
dicto['language'] = 'es'
params = duckduckgo_definitions.request(query, dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
self.assertIn('duckduckgo.com', params['url'])
self.assertIn('headers', params)
self.assertIn('Accept-Language', params['headers'])
self.assertIn('es', params['headers']['Accept-Language'])
def test_response(self):
self.assertRaises(AttributeError, duckduckgo_definitions.response, None)

View file

@ -15,6 +15,12 @@ class TestGigablastEngine(SearxTestCase):
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
self.assertTrue('gigablast.com' in params['url'])
self.assertTrue('xx' in params['url'])
dicto['language'] = 'en-US'
params = gigablast.request(query, dicto)
self.assertTrue('en' in params['url'])
self.assertFalse('en-US' in params['url'])
def test_response(self):
self.assertRaises(AttributeError, gigablast.response, None)
@ -83,3 +89,28 @@ class TestGigablastEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'South by Southwest 2016')
self.assertEqual(results[0]['url'], 'www.sxsw.com')
self.assertEqual(results[0]['content'], 'This should be the content.')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
results = gigablast._fetch_supported_languages(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
<html>
<body>
<span id="menu2">
<a href="/search?&rxikd=1&qlang=xx"></a>
<a href="/search?&rxikd=1&qlang=en"></a>
<a href="/search?&rxikd=1&qlang=fr"></a>
</span>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = gigablast._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 2)
self.assertIn('en', languages)
self.assertIn('fr', languages)

View file

@ -18,7 +18,7 @@ class TestGoogleEngine(SearxTestCase):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
dicto['language'] = 'fr_FR'
dicto['language'] = 'fr-FR'
dicto['time_range'] = ''
params = google.request(query, dicto)
self.assertIn('url', params)
@ -177,3 +177,60 @@ class TestGoogleEngine(SearxTestCase):
self.assertEqual(results[0]['title'], '')
self.assertEqual(results[0]['content'], '')
self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
languages = google._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 0)
html = u"""
<html>
<body>
<table>
<tbody>
<tr>
<td>
<font>
<label>
<span id="ten">English</span>
</label>
</font>
</td>
<td>
<font>
<label>
<span id="tzh-CN">中文 (简体)</span>
</label>
<label>
<span id="tzh-TW">中文 (繁體)</span>
</label>
</font>
</td>
</tr>
</tbody>
</table>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = google._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 3)
self.assertIn('en', languages)
self.assertIn('zh-CN', languages)
self.assertIn('zh-TW', languages)
self.assertEquals(type(languages['en']), dict)
self.assertEquals(type(languages['zh-CN']), dict)
self.assertEquals(type(languages['zh-TW']), dict)
self.assertIn('name', languages['en'])
self.assertIn('name', languages['zh-CN'])
self.assertIn('name', languages['zh-TW'])
self.assertEquals(languages['en']['name'], 'English')
self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')

View file

@ -10,7 +10,7 @@ class TestQwantEngine(SearxTestCase):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 0
dicto['language'] = 'fr_FR'
dicto['language'] = 'fr-FR'
qwant.categories = ['']
params = qwant.request(query, dicto)
self.assertIn('url', params)

View file

@ -10,6 +10,7 @@ class TestSubtitleseekerEngine(SearxTestCase):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
dicto['language'] = 'fr-FR'
params = subtitleseeker.request(query, dicto)
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
@ -17,7 +18,7 @@ class TestSubtitleseekerEngine(SearxTestCase):
def test_response(self):
dicto = defaultdict(dict)
dicto['language'] = 'fr_FR'
dicto['language'] = 'fr-FR'
response = mock.Mock(search_params=dicto)
self.assertRaises(AttributeError, subtitleseeker.response, None)
@ -68,6 +69,10 @@ class TestSubtitleseekerEngine(SearxTestCase):
self.assertIn('1039 Subs', results[0]['content'])
self.assertIn('Alternative Title', results[0]['content'])
dicto['language'] = 'pt-BR'
results = subtitleseeker.response(response)
self.assertEqual(results[0]['url'], 'http://this.is.the.url/Brazilian/')
html = """
<div class="boxRows">
<div class="boxRowsInner" style="width:600px;">

View file

@ -10,7 +10,7 @@ class TestSwisscowsEngine(SearxTestCase):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
dicto['language'] = 'de_DE'
dicto['language'] = 'de-DE'
params = swisscows.request(query, dicto)
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
@ -126,3 +126,30 @@ class TestSwisscowsEngine(SearxTestCase):
self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
self.assertEqual(results[2]['template'], 'images.html')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
languages = swisscows._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 0)
html = """
<html>
<div id="regions-popup">
<div>
<ul>
<li><a data-val="browser"></a></li>
<li><a data-val="de-CH"></a></li>
<li><a data-val="fr-CH"></a></li>
</ul>
</div>
</div>
</html>
"""
response = mock.Mock(text=html)
languages = swisscows._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('de-CH', languages)
self.assertIn('fr-CH', languages)

View file

@ -8,9 +8,11 @@ from searx.testing import SearxTestCase
class TestWikipediaEngine(SearxTestCase):
def test_request(self):
wikipedia.supported_languages = ['fr', 'en']
query = 'test_query'
dicto = defaultdict(dict)
dicto['language'] = 'fr_FR'
dicto['language'] = 'fr-FR'
params = wikipedia.request(query, dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
@ -27,6 +29,10 @@ class TestWikipediaEngine(SearxTestCase):
params = wikipedia.request(query, dicto)
self.assertIn('en', params['url'])
dicto['language'] = 'xx'
params = wikipedia.request(query, dicto)
self.assertIn('en', params['url'])
def test_response(self):
dicto = defaultdict(dict)
dicto['language'] = 'fr'
@ -158,3 +164,96 @@ class TestWikipediaEngine(SearxTestCase):
self.assertEqual(len(results), 2)
self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
self.assertIn(u'披头士乐队...', results[1]['content'])
def test_fetch_supported_languages(self):
html = u"""<html></html>"""
response = mock.Mock(text=html)
languages = wikipedia._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 0)
html = u"""
<html>
<body>
<div>
<div>
<h3>Table header</h3>
<table class="sortable jquery-tablesorter">
<thead>
<tr>
<th>N</th>
<th>Language</th>
<th>Language (local)</th>
<th>Wiki</th>
<th>Articles</th>
</tr>
</thead>
<tbody>
<tr>
<td>2</td>
<td><a>Swedish</a></td>
<td><a>Svenska</a></td>
<td><a>sv</a></td>
<td><a><b>3000000</b></a></td>
</tr>
<tr>
<td>3</td>
<td><a>Cebuano</a></td>
<td><a>Sinugboanong Binisaya</a></td>
<td><a>ceb</a></td>
<td><a><b>3000000</b></a></td>
</tr>
</tbody>
</table>
<h3>Table header</h3>
<table class="sortable jquery-tablesorter">
<thead>
<tr>
<th>N</th>
<th>Language</th>
<th>Language (local)</th>
<th>Wiki</th>
<th>Articles</th>
</tr>
</thead>
<tbody>
<tr>
<td>2</td>
<td><a>Norwegian (Bokmål)</a></td>
<td><a>Norsk (Bokmål)</a></td>
<td><a>no</a></td>
<td><a><b>100000</b></a></td>
</tr>
</tbody>
</table>
</div>
</div>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = wikipedia._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 3)
self.assertIn('sv', languages)
self.assertIn('ceb', languages)
self.assertIn('no', languages)
self.assertEqual(type(languages['sv']), dict)
self.assertEqual(type(languages['ceb']), dict)
self.assertEqual(type(languages['no']), dict)
self.assertIn('name', languages['sv'])
self.assertIn('english_name', languages['sv'])
self.assertIn('articles', languages['sv'])
self.assertEqual(languages['sv']['name'], 'Svenska')
self.assertEqual(languages['sv']['english_name'], 'Swedish')
self.assertEqual(languages['sv']['articles'], 3000000)
self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya')
self.assertEqual(languages['ceb']['english_name'], 'Cebuano')
self.assertEqual(languages['ceb']['articles'], 3000000)
self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)')
self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)')
self.assertEqual(languages['no']['articles'], 100000)

View file

@ -147,3 +147,33 @@ class TestYahooEngine(SearxTestCase):
results = yahoo.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
results = yahoo._fetch_supported_languages(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
<html>
<div>
<div id="yschlang">
<span>
<label><input value="lang_ar"></input></label>
</span>
<span>
<label><input value="lang_zh_chs"></input></label>
<label><input value="lang_zh_cht"></input></label>
</span>
</div>
</div>
</html>
"""
response = mock.Mock(text=html)
languages = yahoo._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('ar', languages)
self.assertIn('zh-chs', languages)
self.assertIn('zh-cht', languages)

View file

@ -1,4 +1,4 @@
from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException,
from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException, SearchLanguageSetting,
MultipleChoiceSetting, PluginsSetting, ValidationException)
from searx.testing import SearxTestCase
@ -88,6 +88,27 @@ class TestSettings(SearxTestCase):
setting.parse('2')
self.assertEquals(setting.get_value(), ['2'])
# search language settings
def test_lang_setting_valid_choice(self):
setting = SearchLanguageSetting('all', choices=['all', 'de', 'en'])
setting.parse('de')
self.assertEquals(setting.get_value(), 'de')
def test_lang_setting_invalid_choice(self):
setting = SearchLanguageSetting('all', choices=['all', 'de', 'en'])
setting.parse('xx')
self.assertEquals(setting.get_value(), 'all')
def test_lang_setting_old_cookie_choice(self):
setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES'])
setting.parse('es_XA')
self.assertEquals(setting.get_value(), 'es')
def test_lang_setting_old_cookie_format(self):
setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES'])
setting.parse('es_ES')
self.assertEquals(setting.get_value(), 'es-ES')
# plugins settings
def test_plugins_setting_all_default_enabled(self):
plugin1 = PluginStub('plugin1', True)

171
utils/fetch_languages.py Normal file
View file

@ -0,0 +1,171 @@
# -*- coding: utf-8 -*-
# This script generates languages.py from intersecting each engine's supported languages.
#
# The country names are obtained from http://api.geonames.org which requires registering as a user.
#
# Output files (engines_languages.json and languages.py)
# are written in current directory to avoid overwriting in case something goes wrong.
from requests import get
from urllib import urlencode
from lxml.html import fromstring
from json import loads, dumps
import io
from sys import path
path.append('../searx') # noqa
from searx.engines import engines
# Geonames API for country names.
geonames_user = '' # ADD USER NAME HERE
country_names_url = 'http://api.geonames.org/countryInfoJSON?{parameters}'
# Output files.
engines_languages_file = 'engines_languages.json'
languages_file = 'languages.py'
engines_languages = {}
languages = {}
# To filter out invalid codes and dialects.
def valid_code(lang_code):
# filter invalid codes
# sl-SL is technically not invalid, but still a mistake
invalid_codes = ['sl-SL', 'wt-WT', 'jw']
invalid_countries = ['UK', 'XA', 'XL']
if lang_code[:2] == 'xx'\
or lang_code in invalid_codes\
or lang_code[-2:] in invalid_countries\
or is_dialect(lang_code):
return False
return True
# Language codes with any additional tags other than language and country.
def is_dialect(lang_code):
lang_code = lang_code.split('-')
if len(lang_code) > 2 or len(lang_code[0]) > 3:
return True
if len(lang_code) == 2 and len(lang_code[1]) > 2:
return True
return False
# Get country name in specified language.
def get_country_name(locale):
if geonames_user is '':
return ''
locale = locale.split('-')
if len(locale) != 2:
return ''
url = country_names_url.format(parameters=urlencode({'lang': locale[0],
'country': locale[1],
'username': geonames_user}))
response = get(url)
json = loads(response.text)
content = json.get('geonames', None)
if content is None or len(content) != 1:
print "No country name found for " + locale[0] + "-" + locale[1]
return ''
return content[0].get('countryName', '')
# Fetchs supported languages for each engine and writes json file with those.
def fetch_supported_languages():
for engine_name in engines:
if hasattr(engines[engine_name], 'fetch_supported_languages'):
try:
engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
except Exception as e:
print e
# write json file
with io.open(engines_languages_file, "w", encoding="utf-8") as f:
f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8")))
# Join all language lists.
# Iterate all languages supported by each engine.
def join_language_lists():
# include wikipedia first for more accurate language names
languages.update({code: lang for code, lang
in engines_languages['wikipedia'].iteritems()
if valid_code(code)})
for engine_name in engines_languages:
for locale in engines_languages[engine_name]:
if not valid_code(locale):
continue
# if language is not on list or if it has no name yet
if locale not in languages or not languages[locale].get('name'):
if isinstance(engines_languages[engine_name], dict):
languages[locale] = engines_languages[engine_name][locale]
else:
languages[locale] = {}
# get locales that have no name or country yet
for locale in languages.keys():
# try to get language names
if not languages[locale].get('name'):
name = languages.get(locale.split('-')[0], {}).get('name', None)
if name:
languages[locale]['name'] = name
else:
# filter out locales with no name
del languages[locale]
continue
# try to get language name in english
if not languages[locale].get('english_name'):
languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '')
# try to get country name
if locale.find('-') > 0 and not languages[locale].get('country'):
languages[locale]['country'] = get_country_name(locale) or ''
# Remove countryless language if language is featured in only one country.
def filter_single_country_languages():
prev_lang = None
for code in sorted(languages):
lang = code.split('-')[0]
if lang == prev_lang:
countries += 1
else:
if prev_lang is not None and countries == 1:
del languages[prev_lang]
countries = 0
prev_lang = lang
# Write languages.py.
def write_languages_file():
new_file = open(languages_file, 'w')
file_content = '# -*- coding: utf-8 -*-\n'\
+ '# list of language codes\n'\
+ '# this file is generated automatically by utils/update_search_languages.py\n'\
+ '\nlanguage_codes = ('
for code in sorted(languages):
file_content += '\n (u"' + code + '"'\
+ ', u"' + languages[code]['name'].split(' (')[0] + '"'\
+ ', u"' + languages[code].get('country', '') + '"'\
+ ', u"' + languages[code].get('english_name', '').split(' (')[0] + '"),'
# remove last comma
file_content = file_content[:-1]
file_content += '\n)\n'
new_file.write(file_content.encode('utf8'))
new_file.close()
if __name__ == "__main__":
fetch_supported_languages()
join_language_lists()
filter_single_country_languages()
write_languages_file()