tests for _fetch_supported_languages in engines

and refactor method to make it testable without making requests
This commit is contained in:
marc 2016-12-15 00:34:43 -06:00
parent e0c270bd72
commit af35eee10b
27 changed files with 387 additions and 3388 deletions

File diff suppressed because one or more lines are too long

Binary file not shown.

View file

@ -21,6 +21,7 @@ import sys
from flask_babel import gettext
from operator import itemgetter
from json import loads
from requests import get
from searx import settings
from searx import logger
from searx.utils import load_module
@ -79,9 +80,6 @@ def load_engine(engine_data):
if not hasattr(engine, arg_name):
setattr(engine, arg_name, arg_value)
if engine_data['name'] in languages:
setattr(engine, 'supported_languages', languages[engine_data['name']])
# checking required variables
for engine_attr in dir(engine):
if engine_attr.startswith('_'):
@ -91,6 +89,15 @@ def load_engine(engine_data):
.format(engine.name, engine_attr))
sys.exit(1)
# assign supported languages from json file
if engine_data['name'] in languages:
setattr(engine, 'supported_languages', languages[engine_data['name']])
# assign language fetching method if auxiliary method exists
if hasattr(engine, '_fetch_supported_languages'):
setattr(engine, 'fetch_supported_languages',
lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
engine.stats = {
'result_count': 0,
'search_count': 0,

View file

@ -15,7 +15,6 @@
from urllib import urlencode
from lxml import html
from requests import get
from searx.engines.xpath import extract_text
# engine dependent config
@ -86,10 +85,9 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = []
response = get(supported_languages_url)
dom = html.fromstring(response.text)
dom = html.fromstring(resp.text)
options = dom.xpath('//div[@id="limit-languages"]//input')
for option in options:
code = option.xpath('./@id')[0].replace('_', '-')

View file

@ -19,7 +19,7 @@ from urllib import urlencode
from lxml import html
from json import loads
import re
from searx.engines.bing import fetch_supported_languages
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config
categories = ['images']

View file

@ -17,7 +17,7 @@ from datetime import datetime
from dateutil import parser
from lxml import etree
from searx.utils import list_get
from searx.engines.bing import fetch_supported_languages
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
# engine dependent config
categories = ['news']

View file

@ -80,11 +80,10 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = {}
response = get(supported_languages_url)
response_json = loads(response.text)
response_json = loads(resp.text)
for language in response_json['list']:
supported_languages[language['code']] = {}

View file

@ -119,11 +119,10 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
response = get(supported_languages_url)
def _fetch_supported_languages(resp):
# response is a js file with regions as an embedded object
response_page = response.text
response_page = resp.text
response_page = response_page[response_page.find('regions:{') + 8:]
response_page = response_page[:response_page.find('}') + 1]

View file

@ -4,7 +4,7 @@ from re import compile, sub
from lxml import html
from searx.utils import html_to_text
from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import fetch_supported_languages
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'

View file

@ -14,7 +14,6 @@ from json import loads
from random import randint
from time import time
from urllib import urlencode
from requests import get
from lxml.html import fromstring
# engine dependent config
@ -91,10 +90,9 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = []
response = get(supported_languages_url)
dom = fromstring(response.text)
dom = fromstring(resp.text)
links = dom.xpath('//span[@id="menu2"]/a')
for link in links:
code = link.xpath('./@href')[0][-2:]

View file

@ -12,7 +12,6 @@ import re
from urllib import urlencode
from urlparse import urlparse, parse_qsl
from lxml import html, etree
from requests import get
from searx.engines.xpath import extract_text, extract_url
from searx.search import logger
@ -364,14 +363,13 @@ def attributes_to_html(attributes):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = {}
response = get(supported_languages_url)
dom = html.fromstring(response.text)
options = dom.xpath('//select[@name="hl"]/option')
dom = html.fromstring(resp.text)
options = dom.xpath('//table//td/font/label/span')
for option in options:
code = option.xpath('./@value')[0].split('-')[0]
name = option.text[:-1].title()
code = option.xpath('./@id')[0][1:]
name = option.text.title()
supported_languages[code] = {"name": name}
return supported_languages

View file

@ -13,7 +13,7 @@
from lxml import html
from urllib import urlencode
from json import loads
from searx.engines.google import fetch_supported_languages
from searx.engines.google import _fetch_supported_languages, supported_languages_url
# search-url
categories = ['news']

View file

@ -13,7 +13,6 @@
from json import loads
from urllib import urlencode, unquote
import re
from requests import get
from lxml.html import fromstring
# engine dependent config
@ -25,6 +24,8 @@ language_support = True
base_url = 'https://swisscows.ch/'
search_string = '?{query}&page={page}'
supported_languages_url = base_url
# regex
regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
regex_json_remove_start = re.compile(r'^initialData:\s*')
@ -113,10 +114,9 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = []
response = get(base_url)
dom = fromstring(response.text)
dom = fromstring(resp.text)
options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
for option in options:
code = option.xpath('./@data-val')[0]

View file

@ -15,7 +15,7 @@ from searx import logger
from searx.poolrequests import get
from searx.engines.xpath import extract_text
from searx.utils import format_date_by_locale
from searx.engines.wikipedia import fetch_supported_languages
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from json import loads
from lxml.html import fromstring

View file

@ -12,7 +12,6 @@
from json import loads
from urllib import urlencode, quote
from requests import get
from lxml.html import fromstring
@ -119,10 +118,9 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = {}
response = get(supported_languages_url)
dom = fromstring(response.text)
dom = fromstring(resp.text)
tables = dom.xpath('//table[contains(@class,"sortable")]')
for table in tables:
# exclude header row

View file

@ -14,7 +14,6 @@
from urllib import urlencode
from urlparse import unquote
from lxml import html
from requests import get
from searx.engines.xpath import extract_text, extract_url
# engine dependent config
@ -144,13 +143,12 @@ def response(resp):
# get supported languages from their site
def fetch_supported_languages():
def _fetch_supported_languages(resp):
supported_languages = []
response = get(supported_languages_url)
dom = html.fromstring(response.text)
dom = html.fromstring(resp.text)
options = dom.xpath('//div[@id="yschlang"]/span/label/input')
for option in options:
code = option.xpath('./@value')[0][5:]
code = option.xpath('./@value')[0][5:].replace('_', '-')
supported_languages.append(code)
return supported_languages

View file

@ -12,7 +12,7 @@
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text, extract_url
from searx.engines.yahoo import parse_url, fetch_supported_languages
from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
from datetime import datetime, timedelta
import re
from dateutil import parser

View file

@ -3,36 +3,27 @@
# this file is generated automatically by utils/update_search_languages.py
language_codes = (
(u"ach", u"Acoli", u"", u""),
(u"af", u"Afrikaans", u"", u""),
(u"ak", u"Akan", u"", u""),
(u"am", u"አማርኛ", u"", u""),
(u"am", u"አማርኛ", u"", u"Amharic"),
(u"ar-SA", u"العربية", u"المملكة العربية السعودية", u"Arabic"),
(u"az", u"Azərbaycanca", u"", u"Azerbaijani"),
(u"ban", u"Balinese", u"", u""),
(u"be", u"Беларуская", u"", u"Belarusian"),
(u"bem", u"Ichibemba", u"", u""),
(u"bg-BG", u"Български", u"България", u"Bulgarian"),
(u"bn", u"বাংলা", u"", u""),
(u"br", u"Brezhoneg", u"", u""),
(u"bs", u"Bosanski", u"", u""),
(u"bn", u"বাংলা", u"", u"Bengali"),
(u"br", u"Brezhoneg", u"", u"Breton"),
(u"bs", u"Bosnian", u"", u"Bosnian"),
(u"ca", u"Català", u"", u"Catalan"),
(u"ca-CT", u"Català", u"", u"Catalan"),
(u"ca-ES", u"Català", u"Espanya", u"Catalan"),
(u"ce", u"Нохчийн", u"", u"Chechen"),
(u"ceb", u"Sinugboanong Binisaya", u"", u"Cebuano"),
(u"chr", u"ᏣᎳᎩ", u"", u""),
(u"ckb", u"Central Kurdish", u"", u""),
(u"co", u"Corsican", u"", u""),
(u"crs", u"Seychellois Creole", u"", u""),
(u"cs-CZ", u"Čeština", u"Česko", u"Czech"),
(u"cy", u"Cymraeg", u"", u""),
(u"cy", u"Cymraeg", u"", u"Welsh"),
(u"da-DK", u"Dansk", u"Danmark", u"Danish"),
(u"de", u"Deutsch", u"", u"German"),
(u"de-AT", u"Deutsch", u"Österreich", u"German"),
(u"de-CH", u"Deutsch", u"Schweiz", u"German"),
(u"de-DE", u"Deutsch", u"Deutschland", u"German"),
(u"ee", u"Eʋegbe", u"", u""),
(u"el-GR", u"Ελληνικά", u"Ελλάδα", u"Greek"),
(u"en", u"English", u"", u"English"),
(u"en-AU", u"English", u"Australia", u"English"),
@ -60,30 +51,20 @@ language_codes = (
(u"eu", u"Euskara", u"", u"Basque"),
(u"fa", u"فارسی", u"", u"Persian"),
(u"fi-FI", u"Suomi", u"Suomi", u"Finnish"),
(u"fo", u"Føroyskt", u"", u""),
(u"fr", u"Français", u"", u"French"),
(u"fr-BE", u"Français", u"Belgique", u"French"),
(u"fr-CA", u"Français", u"Canada", u"French"),
(u"fr-CH", u"Français", u"Suisse", u"French"),
(u"fr-FR", u"Français", u"France", u"French"),
(u"fy", u"West-Frysk", u"", u""),
(u"ga", u"Gaeilge", u"", u""),
(u"gaa", u"Ga", u"", u""),
(u"gd", u"Gàidhlig", u"", u""),
(u"ga", u"Gaeilge", u"", u"Irish"),
(u"gl", u"Galego", u"", u"Galician"),
(u"gn", u"Guarani", u"", u""),
(u"gu", u"ગુજરાતી", u"", u""),
(u"ha", u"Hausa", u"", u""),
(u"haw", u"ʻŌlelo HawaiʻI", u"", u""),
(u"gu", u"ગુજરાતી", u"", u"Gujarati"),
(u"he-IL", u"עברית", u"ישראל", u"Hebrew"),
(u"hi", u"हिन्दी", u"", u"Hindi"),
(u"hr-HR", u"Hrvatski", u"Hrvatska", u"Croatian"),
(u"ht", u"Haitian Creole", u"", u""),
(u"hu-HU", u"Magyar", u"Magyarország", u"Hungarian"),
(u"hy", u"Հայերեն", u"", u"Armenian"),
(u"ia", u"Interlingua", u"", u""),
(u"id-ID", u"Bahasa Indonesia", u"Indonesia", u"Indonesian"),
(u"ig", u"Igbo", u"", u""),
(u"is", u"Íslenska", u"", u""),
(u"it", u"Italiano", u"", u"Italian"),
(u"it-CH", u"Italiano", u"Svizzera", u"Italian"),
@ -91,86 +72,48 @@ language_codes = (
(u"iw", u"עברית", u"", u""),
(u"ja-JP", u"日本語", u"日本", u"Japanese"),
(u"ka", u"ქართული", u"", u"Georgian"),
(u"kg", u"Kongo", u"", u""),
(u"kk", u"Қазақша", u"", u"Kazakh"),
(u"km", u"ខ្មែរ", u"", u""),
(u"kn", u"ಕನ್ನಡ", u"", u""),
(u"kn", u"ಕನ್ನಡ", u"", u"Kannada"),
(u"ko-KR", u"한국어", u"대한민국", u"Korean"),
(u"kri", u"Krio", u"", u""),
(u"ky", u"Кыргызча", u"", u""),
(u"la", u"Latina", u"", u"Latin"),
(u"lg", u"Luganda", u"", u""),
(u"ln", u"Lingála", u"", u""),
(u"lo", u"ລາວ", u"", u""),
(u"loz", u"Lozi", u"", u""),
(u"lt-LT", u"Lietuvių", u"Lietuva", u"Lithuanian"),
(u"lua", u"Luba-Lulua", u"", u""),
(u"lv-LV", u"Latviešu", u"Latvijas Republika", u""),
(u"mfe", u"Kreol Morisien", u"", u""),
(u"mg", u"Malagasy", u"", u""),
(u"mi", u"Maori", u"", u""),
(u"mi", u"Reo Māori", u"", u"Maori"),
(u"min", u"Minangkabau", u"", u"Minangkabau"),
(u"mk", u"Македонски", u"", u""),
(u"ml", u"മലയാളം", u"", u""),
(u"mn", u"Монгол", u"", u""),
(u"mr", u"मराठी", u"", u""),
(u"mk", u"Македонски", u"", u"Macedonian"),
(u"mn", u"Монгол", u"", u"Mongolian"),
(u"mr", u"मराठी", u"", u"Marathi"),
(u"ms-MY", u"Bahasa Melayu", u"Malaysia", u"Malay"),
(u"mt", u"Malti", u"", u""),
(u"my", u"ဗမာ", u"", u""),
(u"mt", u"Malti", u"", u"Maltese"),
(u"nb-NO", u"Norwegian Bokmål", u"Norge", u"Norwegian Bokmål"),
(u"ne", u"नेपाली", u"", u""),
(u"nl", u"Nederlands", u"", u"Dutch"),
(u"nl-BE", u"Nederlands", u"België", u"Dutch"),
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
(u"nn", u"Nynorsk", u"", u"Norwegian"),
(u"no-NO", u"Norsk", u"Norge", u"Norwegian"),
(u"nso", u"Northern Sotho", u"", u""),
(u"ny", u"Nyanja", u"", u""),
(u"nyn", u"Runyankore", u"", u""),
(u"oc", u"Occitan", u"", u""),
(u"om", u"Oromoo", u"", u""),
(u"or", u"ଓଡ଼ିଆ", u"", u""),
(u"pa", u"ਪੰਜਾਬੀ", u"", u""),
(u"pcm", u"Nigerian Pidgin", u"", u""),
(u"oc", u"Occitan", u"", u"Occitan"),
(u"or", u"Oriya", u"", u"Oriya"),
(u"pa", u"ਪੰਜਾਬੀ", u"", u"Panjabi"),
(u"pl-PL", u"Polski", u"Rzeczpospolita Polska", u"Polish"),
(u"ps", u"پښتو", u"", u""),
(u"ps", u"Pushto", u"", u"Pushto"),
(u"pt", u"Português", u"", u"Portuguese"),
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
(u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
(u"qu", u"Runasimi", u"", u""),
(u"rm", u"Rumantsch", u"", u""),
(u"rn", u"Ikirundi", u"", u""),
(u"ro-RO", u"Română", u"România", u"Romanian"),
(u"ru-RU", u"Русский", u"Россия", u"Russian"),
(u"rw", u"Kinyarwanda", u"", u""),
(u"sd", u"Sindhi", u"", u""),
(u"rw", u"Ikinyarwanda", u"", u"Kinyarwanda"),
(u"sh", u"Srpskohrvatski / Српскохрватски", u"", u"Serbo-Croatian"),
(u"si", u"සිංහල", u"", u""),
(u"sk-SK", u"Slovenčina", u"Slovenská republika", u"Slovak"),
(u"sl", u"Slovenščina", u"", u"Slovenian"),
(u"sn", u"Chishona", u"", u""),
(u"so", u"Soomaali", u"", u""),
(u"sq", u"Shqip", u"", u""),
(u"sr", u"Српски / Srpski", u"", u"Serbian"),
(u"st", u"Southern Sotho", u"", u""),
(u"su", u"Sundanese", u"", u""),
(u"sv-SE", u"Svenska", u"Sverige", u"Swedish"),
(u"sw", u"Kiswahili", u"", u""),
(u"ta", u"தமிழ்", u"", u""),
(u"te", u"తెలుగు", u"", u""),
(u"tg", u"Tajik", u"", u""),
(u"ta", u"தமிழ்", u"", u"Tamil"),
(u"th-TH", u"ไทย", u"ไทย", u"Thai"),
(u"ti", u"ትግርኛ", u"", u""),
(u"tk", u"Turkmen", u"", u""),
(u"ti", u"ትግርኛ", u"", u"Tigrinya"),
(u"tl-PH", u"Filipino", u"Pilipinas", u""),
(u"tlh", u"Klingon", u"", u""),
(u"tn", u"Tswana", u"", u""),
(u"to", u"Lea Fakatonga", u"", u""),
(u"tr-TR", u"Türkçe", u"Türkiye", u"Turkish"),
(u"tt", u"Tatar", u"", u""),
(u"tum", u"Tumbuka", u"", u""),
(u"tw", u"Twi", u"", u""),
(u"ug", u"ئۇيغۇرچە", u"", u""),
(u"tt", u"Татарча", u"", u"Tatar"),
(u"uk-UA", u"Українська", u"Україна", u"Ukrainian"),
(u"ur", u"اردو", u"", u"Urdu"),
(u"uz", u"Ozbek", u"", u"Uzbek"),
@ -179,13 +122,10 @@ language_codes = (
(u"vo", u"Volapük", u"", u"Volapük"),
(u"wa", u"Walon", u"", u"Walloon"),
(u"war", u"Winaray", u"", u"Waray-Waray"),
(u"wo", u"Wolof", u"", u""),
(u"xh", u"Xhosa", u"", u""),
(u"yi", u"ייִדיש", u"", u""),
(u"yo", u"Èdè Yorùbá", u"", u""),
(u"xh", u"Xhosa", u"", u"Xhosa"),
(u"zh", u"中文", u"", u"Chinese"),
(u"zh-CN", u"中文", u"中国", u"Chinese"),
(u"zh-CN", u"中文", u"中国", u""),
(u"zh-HK", u"中文", u"香港", u"Chinese"),
(u"zh-TW", u"中文", u"台湾", u"Chinese"),
(u"zu", u"Isizulu", u"", u"")
(u"zh-TW", u"中文", u"台湾", u""),
(u"zu", u"Isi-Zulu", u"", u"Zulu")
)

View file

@ -86,3 +86,35 @@ class TestBingEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'This should be the title')
self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
self.assertEqual(results[0]['content'], 'This should be the content.')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
results = bing._fetch_supported_languages(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
<html>
<body>
<form>
<div id="limit-languages">
<div>
<div><input id="es" value="es"></input></div>
</div>
<div>
<div><input id="pt_BR" value="pt_BR"></input></div>
<div><input id="pt_PT" value="pt_PT"></input></div>
</div>
</div>
</form>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = bing._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('es', languages)
self.assertIn('pt-BR', languages)
self.assertIn('pt-PT', languages)

View file

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from collections import defaultdict
import mock
from searx.engines import dailymotion
@ -72,3 +73,39 @@ class TestDailymotionEngine(SearxTestCase):
results = dailymotion.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
def test_fetch_supported_languages(self):
json = r"""
{"list":[{"code":"af","name":"Afrikaans","native_name":"Afrikaans",
"localized_name":"Afrikaans","display_name":"Afrikaans"},
{"code":"ar","name":"Arabic","native_name":"\u0627\u0644\u0639\u0631\u0628\u064a\u0629",
"localized_name":"Arabic","display_name":"Arabic"},
{"code":"la","name":"Latin","native_name":null,
"localized_name":"Latin","display_name":"Latin"}
]}
"""
response = mock.Mock(text=json)
languages = dailymotion._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 3)
self.assertIn('af', languages)
self.assertIn('ar', languages)
self.assertIn('la', languages)
self.assertEqual(type(languages['af']), dict)
self.assertEqual(type(languages['ar']), dict)
self.assertEqual(type(languages['la']), dict)
self.assertIn('name', languages['af'])
self.assertIn('name', languages['ar'])
self.assertNotIn('name', languages['la'])
self.assertIn('english_name', languages['af'])
self.assertIn('english_name', languages['ar'])
self.assertIn('english_name', languages['la'])
self.assertEqual(languages['af']['name'], 'Afrikaans')
self.assertEqual(languages['af']['english_name'], 'Afrikaans')
self.assertEqual(languages['ar']['name'], u'العربية')
self.assertEqual(languages['ar']['english_name'], 'Arabic')
self.assertEqual(languages['la']['english_name'], 'Latin')

View file

@ -84,3 +84,17 @@ class TestDuckduckgoEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'This is the title')
self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
self.assertEqual(results[0]['content'], 'This should be the content.')
def test_fetch_supported_languages(self):
js = """some code...regions:{
"wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)"
}some more code..."""
response = mock.Mock(text=js)
languages = duckduckgo._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 5)
self.assertIn('wt-WT', languages)
self.assertIn('es-AR', languages)
self.assertIn('en-AU', languages)
self.assertIn('de-AT', languages)
self.assertIn('fr-BE', languages)

View file

@ -89,3 +89,28 @@ class TestGigablastEngine(SearxTestCase):
self.assertEqual(results[0]['title'], 'South by Southwest 2016')
self.assertEqual(results[0]['url'], 'www.sxsw.com')
self.assertEqual(results[0]['content'], 'This should be the content.')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
results = gigablast._fetch_supported_languages(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
<html>
<body>
<span id="menu2">
<a href="/search?&rxikd=1&qlang=xx"></a>
<a href="/search?&rxikd=1&qlang=en"></a>
<a href="/search?&rxikd=1&qlang=fr"></a>
</span>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = gigablast._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 2)
self.assertIn('en', languages)
self.assertIn('fr', languages)

View file

@ -177,3 +177,60 @@ class TestGoogleEngine(SearxTestCase):
self.assertEqual(results[0]['title'], '')
self.assertEqual(results[0]['content'], '')
self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
languages = google._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 0)
html = u"""
<html>
<body>
<table>
<tbody>
<tr>
<td>
<font>
<label>
<span id="ten">English</span>
</label>
</font>
</td>
<td>
<font>
<label>
<span id="tzh-CN">中文 (简体)</span>
</label>
<label>
<span id="tzh-TW">中文 (繁體)</span>
</label>
</font>
</td>
</tr>
</tbody>
</table>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = google._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 3)
self.assertIn('en', languages)
self.assertIn('zh-CN', languages)
self.assertIn('zh-TW', languages)
self.assertEquals(type(languages['en']), dict)
self.assertEquals(type(languages['zh-CN']), dict)
self.assertEquals(type(languages['zh-TW']), dict)
self.assertIn('name', languages['en'])
self.assertIn('name', languages['zh-CN'])
self.assertIn('name', languages['zh-TW'])
self.assertEquals(languages['en']['name'], 'English')
self.assertEquals(languages['zh-CN']['name'], u'中文 (简体)')
self.assertEquals(languages['zh-TW']['name'], u'中文 (繁體)')

View file

@ -126,3 +126,30 @@ class TestSwisscowsEngine(SearxTestCase):
self.assertEqual(results[2]['url'], 'http://de.wikipedia.org/wiki/Datei:This should.svg')
self.assertEqual(results[2]['img_src'], 'http://ts2.mm.This/should.png')
self.assertEqual(results[2]['template'], 'images.html')
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
languages = swisscows._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 0)
html = """
<html>
<div id="regions-popup">
<div>
<ul>
<li><a data-val="browser"></a></li>
<li><a data-val="de-CH"></a></li>
<li><a data-val="fr-CH"></a></li>
</ul>
</div>
</div>
</html>
"""
response = mock.Mock(text=html)
languages = swisscows._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('de-CH', languages)
self.assertIn('fr-CH', languages)

View file

@ -164,3 +164,96 @@ class TestWikipediaEngine(SearxTestCase):
self.assertEqual(len(results), 2)
self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
self.assertIn(u'披头士乐队...', results[1]['content'])
def test_fetch_supported_languages(self):
html = u"""<html></html>"""
response = mock.Mock(text=html)
languages = wikipedia._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 0)
html = u"""
<html>
<body>
<div>
<div>
<h3>Table header</h3>
<table class="sortable jquery-tablesorter">
<thead>
<tr>
<th>N</th>
<th>Language</th>
<th>Language (local)</th>
<th>Wiki</th>
<th>Articles</th>
</tr>
</thead>
<tbody>
<tr>
<td>2</td>
<td><a>Swedish</a></td>
<td><a>Svenska</a></td>
<td><a>sv</a></td>
<td><a><b>3000000</b></a></td>
</tr>
<tr>
<td>3</td>
<td><a>Cebuano</a></td>
<td><a>Sinugboanong Binisaya</a></td>
<td><a>ceb</a></td>
<td><a><b>3000000</b></a></td>
</tr>
</tbody>
</table>
<h3>Table header</h3>
<table class="sortable jquery-tablesorter">
<thead>
<tr>
<th>N</th>
<th>Language</th>
<th>Language (local)</th>
<th>Wiki</th>
<th>Articles</th>
</tr>
</thead>
<tbody>
<tr>
<td>2</td>
<td><a>Norwegian (Bokmål)</a></td>
<td><a>Norsk (Bokmål)</a></td>
<td><a>no</a></td>
<td><a><b>100000</b></a></td>
</tr>
</tbody>
</table>
</div>
</div>
</body>
</html>
"""
response = mock.Mock(text=html)
languages = wikipedia._fetch_supported_languages(response)
self.assertEqual(type(languages), dict)
self.assertEqual(len(languages), 3)
self.assertIn('sv', languages)
self.assertIn('ceb', languages)
self.assertIn('no', languages)
self.assertEqual(type(languages['sv']), dict)
self.assertEqual(type(languages['ceb']), dict)
self.assertEqual(type(languages['no']), dict)
self.assertIn('name', languages['sv'])
self.assertIn('english_name', languages['sv'])
self.assertIn('articles', languages['sv'])
self.assertEqual(languages['sv']['name'], 'Svenska')
self.assertEqual(languages['sv']['english_name'], 'Swedish')
self.assertEqual(languages['sv']['articles'], 3000000)
self.assertEqual(languages['ceb']['name'], 'Sinugboanong Binisaya')
self.assertEqual(languages['ceb']['english_name'], 'Cebuano')
self.assertEqual(languages['ceb']['articles'], 3000000)
self.assertEqual(languages['no']['name'], u'Norsk (Bokmål)')
self.assertEqual(languages['no']['english_name'], u'Norwegian (Bokmål)')
self.assertEqual(languages['no']['articles'], 100000)

View file

@ -147,3 +147,33 @@ class TestYahooEngine(SearxTestCase):
results = yahoo.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
def test_fetch_supported_languages(self):
html = """<html></html>"""
response = mock.Mock(text=html)
results = yahoo._fetch_supported_languages(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
<html>
<div>
<div id="yschlang">
<span>
<label><input value="lang_ar"></input></label>
</span>
<span>
<label><input value="lang_zh_chs"></input></label>
<label><input value="lang_zh_cht"></input></label>
</span>
</div>
</div>
</html>
"""
response = mock.Mock(text=html)
languages = yahoo._fetch_supported_languages(response)
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('ar', languages)
self.assertIn('zh-chs', languages)
self.assertIn('zh-cht', languages)

View file

@ -84,7 +84,7 @@ def fetch_supported_languages():
# write json file
f = io.open(engines_languages_file, "w", encoding="utf-8")
f.write(unicode(dumps(engines_languages, indent=4, ensure_ascii=False, encoding="utf-8")))
f.write(unicode(dumps(engines_languages, ensure_ascii=False, encoding="utf-8")))
f.close()
@ -110,18 +110,22 @@ def join_language_lists():
else:
languages[locale] = {}
# get locales that have no name yet
# get locales that have no name or country yet
for locale in languages.keys():
if not languages[locale].get('name'):
# try to get language and country names
# try to get language names
name = languages.get(locale.split('-')[0], {}).get('name', None)
if name:
languages[locale]['name'] = name
languages[locale]['country'] = get_country_name(locale) or ''
languages[locale]['english_name'] = languages.get(locale.split('-')[0], {}).get('english_name', '')
else:
# filter out locales with no name
del languages[locale]
continue
# try to get country name
if locale.find('-') > 0 and not languages[locale].get('country'):
languages[locale]['country'] = get_country_name(locale) or ''
# Remove countryless language if language is featured in only one country.