Merge pull request #746 from kvch/moar-time-range-support

Support time range search in more engines
This commit is contained in:
Adam Tauber 2016-11-01 20:01:51 +01:00 committed by GitHub
commit e23c8f954b
9 changed files with 53 additions and 8 deletions

View file

@ -24,11 +24,16 @@ import re
categories = ['images'] categories = ['images']
paging = True paging = True
safesearch = True safesearch = True
time_range_support = True
# search-url # search-url
base_url = 'https://www.bing.com/' base_url = 'https://www.bing.com/'
search_string = 'images/search?{query}&count=10&first={offset}' search_string = 'images/search?{query}&count=10&first={offset}'
time_range_string = '&qft=+filterui:age-lt{interval}'
thumb_url = "https://www.bing.com/th?id={ihk}" thumb_url = "https://www.bing.com/th?id={ihk}"
time_range_dict = {'day': '1440',
'week': '10080',
'month': '43200'}
# safesearch definitions # safesearch definitions
safesearch_types = {2: 'STRICT', safesearch_types = {2: 'STRICT',
@ -58,6 +63,8 @@ def request(query, params):
'&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') '&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
params['url'] = base_url + search_path params['url'] = base_url + search_path
if params['time_range'] in time_range_dict:
params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
return params return params

View file

@ -22,10 +22,15 @@ from searx.utils import list_get
categories = ['news'] categories = ['news']
paging = True paging = True
language_support = True language_support = True
time_range_support = True
# search-url # search-url
base_url = 'https://www.bing.com/' base_url = 'https://www.bing.com/'
search_string = 'news/search?{query}&first={offset}&format=RSS' search_string = 'news/search?{query}&first={offset}&format=RSS'
search_string_with_time = 'news/search?{query}&first={offset}&qft=interval%3d"{interval}"&format=RSS'
time_range_dict = {'day': '7',
'week': '8',
'month': '9'}
# remove click # remove click
@ -46,6 +51,19 @@ def image_url_cleanup(url_string):
return url_string return url_string
def _get_url(query, language, offset, time_range):
if time_range in time_range_dict:
search_path = search_string_with_time.format(
query=urlencode({'q': query, 'setmkt': language}),
offset=offset,
interval=time_range_dict[time_range])
else:
search_path = search_string.format(
query=urlencode({'q': query, 'setmkt': language}),
offset=offset)
return base_url + search_path
# do search-request # do search-request
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1 offset = (params['pageno'] - 1) * 10 + 1
@ -55,11 +73,7 @@ def request(query, params):
else: else:
language = params['language'].replace('_', '-') language = params['language'].replace('_', '-')
search_path = search_string.format( params['url'] = _get_url(query, language, offset, params['time_range'])
query=urlencode({'q': query, 'setmkt': language}),
offset=offset)
params['url'] = base_url + search_path
return params return params

View file

@ -14,6 +14,7 @@
from urllib import urlencode from urllib import urlencode
from json import loads from json import loads
from time import time
import re import re
from searx.engines import logger from searx.engines import logger
@ -24,21 +25,31 @@ categories = ['images']
url = 'https://www.flickr.com/' url = 'https://www.flickr.com/'
search_url = url + 'search?{query}&page={page}' search_url = url + 'search?{query}&page={page}'
time_range_url = '&min_upload_date={start}&max_upload_date={end}'
photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
regex = re.compile(r"\"search-photos-lite-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL) regex = re.compile(r"\"search-photos-lite-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's') image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
paging = True paging = True
time_range_support = True
time_range_dict = {'day': 60 * 60 * 24,
'week': 60 * 60 * 24 * 7,
'month': 60 * 60 * 24 * 7 * 4}
def build_flickr_url(user_id, photo_id): def build_flickr_url(user_id, photo_id):
return photo_url.format(userid=user_id, photoid=photo_id) return photo_url.format(userid=user_id, photoid=photo_id)
def request(query, params): def _get_time_range_url(time_range):
params['url'] = search_url.format(query=urlencode({'text': query}), if time_range in time_range_dict:
page=params['pageno']) return time_range_url.format(start=time(), end=str(int(time()) - time_range_dict[time_range]))
return ''
def request(query, params):
params['url'] = (search_url.format(query=urlencode({'text': query}), page=params['pageno'])
+ _get_time_range_url(params['time_range']))
return params return params

View file

@ -17,10 +17,15 @@ from searx.utils import list_get
categories = ['videos', 'music'] categories = ['videos', 'music']
paging = True paging = True
language_support = False language_support = False
time_range_support = True
# search-url # search-url
base_url = 'https://www.youtube.com/results' base_url = 'https://www.youtube.com/results'
search_url = base_url + '?search_query={query}&page={page}' search_url = base_url + '?search_query={query}&page={page}'
time_range_url = '&sp=EgII{time_range}%253D%253D'
time_range_dict = {'day': 'Ag',
'week': 'Aw',
'month': 'BA'}
embedded_url = '<iframe width="540" height="304" ' +\ embedded_url = '<iframe width="540" height="304" ' +\
'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\ 'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
@ -47,6 +52,8 @@ def extract_text_from_dom(result, xpath):
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=quote_plus(query), params['url'] = search_url.format(query=quote_plus(query),
page=params['pageno']) page=params['pageno'])
if params['time_range'] in time_range_dict:
params['url'] += time_range_url.format(time_range=time_range_dict[params['time_range']])
return params return params

View file

@ -158,6 +158,7 @@
<th>{{ _("Engine name") }}</th> <th>{{ _("Engine name") }}</th>
<th>{{ _("Shortcut") }}</th> <th>{{ _("Shortcut") }}</th>
<th>{{ _("SafeSearch") }}</th> <th>{{ _("SafeSearch") }}</th>
<th>{{ _("Time range") }}</th>
<th>{{ _("Avg. time") }}</th> <th>{{ _("Avg. time") }}</th>
<th>{{ _("Max time") }}</th> <th>{{ _("Max time") }}</th>
{% else %} {% else %}
@ -179,6 +180,7 @@
<th>{{ search_engine.name }}</th> <th>{{ search_engine.name }}</th>
<td>{{ shortcuts[search_engine.name] }}</td> <td>{{ shortcuts[search_engine.name] }}</td>
<td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td> <td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
<td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td>
<td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
<td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td> <td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td>
{% else %} {% else %}

View file

@ -13,6 +13,7 @@ class TestBingImagesEngine(SearxTestCase):
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['language'] = 'fr_FR' dicto['language'] = 'fr_FR'
dicto['safesearch'] = 1 dicto['safesearch'] = 1
dicto['time_range'] = ''
params = bing_images.request(query, dicto) params = bing_images.request(query, dicto)
self.assertTrue('url' in params) self.assertTrue('url' in params)
self.assertTrue(query in params['url']) self.assertTrue(query in params['url'])

View file

@ -12,6 +12,7 @@ class TestBingNewsEngine(SearxTestCase):
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['language'] = 'fr_FR' dicto['language'] = 'fr_FR'
dicto['time_range'] = ''
params = bing_news.request(query, dicto) params = bing_news.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)
self.assertIn(query, params['url']) self.assertIn(query, params['url'])

View file

@ -15,6 +15,7 @@ class TestFlickrNoapiEngine(SearxTestCase):
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['time_range'] = ''
params = flickr_noapi.request(query, dicto) params = flickr_noapi.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)
self.assertIn(query, params['url']) self.assertIn(query, params['url'])

View file

@ -11,6 +11,7 @@ class TestYoutubeNoAPIEngine(SearxTestCase):
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 0 dicto['pageno'] = 0
dicto['time_range'] = ''
params = youtube_noapi.request(query, dicto) params = youtube_noapi.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)
self.assertIn(query, params['url']) self.assertIn(query, params['url'])