forked from Ponysearch/Ponysearch
Merge pull request #746 from kvch/moar-time-range-support
Support time range search in more engines
This commit is contained in:
commit
e23c8f954b
9 changed files with 53 additions and 8 deletions
|
@ -24,11 +24,16 @@ import re
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
paging = True
|
paging = True
|
||||||
safesearch = True
|
safesearch = True
|
||||||
|
time_range_support = True
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://www.bing.com/'
|
base_url = 'https://www.bing.com/'
|
||||||
search_string = 'images/search?{query}&count=10&first={offset}'
|
search_string = 'images/search?{query}&count=10&first={offset}'
|
||||||
|
time_range_string = '&qft=+filterui:age-lt{interval}'
|
||||||
thumb_url = "https://www.bing.com/th?id={ihk}"
|
thumb_url = "https://www.bing.com/th?id={ihk}"
|
||||||
|
time_range_dict = {'day': '1440',
|
||||||
|
'week': '10080',
|
||||||
|
'month': '43200'}
|
||||||
|
|
||||||
# safesearch definitions
|
# safesearch definitions
|
||||||
safesearch_types = {2: 'STRICT',
|
safesearch_types = {2: 'STRICT',
|
||||||
|
@ -58,6 +63,8 @@ def request(query, params):
|
||||||
'&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
'&ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
|
||||||
|
|
||||||
params['url'] = base_url + search_path
|
params['url'] = base_url + search_path
|
||||||
|
if params['time_range'] in time_range_dict:
|
||||||
|
params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
|
@ -22,10 +22,15 @@ from searx.utils import list_get
|
||||||
categories = ['news']
|
categories = ['news']
|
||||||
paging = True
|
paging = True
|
||||||
language_support = True
|
language_support = True
|
||||||
|
time_range_support = True
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://www.bing.com/'
|
base_url = 'https://www.bing.com/'
|
||||||
search_string = 'news/search?{query}&first={offset}&format=RSS'
|
search_string = 'news/search?{query}&first={offset}&format=RSS'
|
||||||
|
search_string_with_time = 'news/search?{query}&first={offset}&qft=interval%3d"{interval}"&format=RSS'
|
||||||
|
time_range_dict = {'day': '7',
|
||||||
|
'week': '8',
|
||||||
|
'month': '9'}
|
||||||
|
|
||||||
|
|
||||||
# remove click
|
# remove click
|
||||||
|
@ -46,6 +51,19 @@ def image_url_cleanup(url_string):
|
||||||
return url_string
|
return url_string
|
||||||
|
|
||||||
|
|
||||||
|
def _get_url(query, language, offset, time_range):
|
||||||
|
if time_range in time_range_dict:
|
||||||
|
search_path = search_string_with_time.format(
|
||||||
|
query=urlencode({'q': query, 'setmkt': language}),
|
||||||
|
offset=offset,
|
||||||
|
interval=time_range_dict[time_range])
|
||||||
|
else:
|
||||||
|
search_path = search_string.format(
|
||||||
|
query=urlencode({'q': query, 'setmkt': language}),
|
||||||
|
offset=offset)
|
||||||
|
return base_url + search_path
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
offset = (params['pageno'] - 1) * 10 + 1
|
offset = (params['pageno'] - 1) * 10 + 1
|
||||||
|
@ -55,11 +73,7 @@ def request(query, params):
|
||||||
else:
|
else:
|
||||||
language = params['language'].replace('_', '-')
|
language = params['language'].replace('_', '-')
|
||||||
|
|
||||||
search_path = search_string.format(
|
params['url'] = _get_url(query, language, offset, params['time_range'])
|
||||||
query=urlencode({'q': query, 'setmkt': language}),
|
|
||||||
offset=offset)
|
|
||||||
|
|
||||||
params['url'] = base_url + search_path
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from json import loads
|
from json import loads
|
||||||
|
from time import time
|
||||||
import re
|
import re
|
||||||
from searx.engines import logger
|
from searx.engines import logger
|
||||||
|
|
||||||
|
@ -24,21 +25,31 @@ categories = ['images']
|
||||||
|
|
||||||
url = 'https://www.flickr.com/'
|
url = 'https://www.flickr.com/'
|
||||||
search_url = url + 'search?{query}&page={page}'
|
search_url = url + 'search?{query}&page={page}'
|
||||||
|
time_range_url = '&min_upload_date={start}&max_upload_date={end}'
|
||||||
photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
|
photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
|
||||||
regex = re.compile(r"\"search-photos-lite-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
|
regex = re.compile(r"\"search-photos-lite-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL)
|
||||||
image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
|
image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
|
||||||
|
|
||||||
paging = True
|
paging = True
|
||||||
|
time_range_support = True
|
||||||
|
time_range_dict = {'day': 60 * 60 * 24,
|
||||||
|
'week': 60 * 60 * 24 * 7,
|
||||||
|
'month': 60 * 60 * 24 * 7 * 4}
|
||||||
|
|
||||||
|
|
||||||
def build_flickr_url(user_id, photo_id):
|
def build_flickr_url(user_id, photo_id):
|
||||||
return photo_url.format(userid=user_id, photoid=photo_id)
|
return photo_url.format(userid=user_id, photoid=photo_id)
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def _get_time_range_url(time_range):
|
||||||
params['url'] = search_url.format(query=urlencode({'text': query}),
|
if time_range in time_range_dict:
|
||||||
page=params['pageno'])
|
return time_range_url.format(start=time(), end=str(int(time()) - time_range_dict[time_range]))
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
|
def request(query, params):
|
||||||
|
params['url'] = (search_url.format(query=urlencode({'text': query}), page=params['pageno'])
|
||||||
|
+ _get_time_range_url(params['time_range']))
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,10 +17,15 @@ from searx.utils import list_get
|
||||||
categories = ['videos', 'music']
|
categories = ['videos', 'music']
|
||||||
paging = True
|
paging = True
|
||||||
language_support = False
|
language_support = False
|
||||||
|
time_range_support = True
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://www.youtube.com/results'
|
base_url = 'https://www.youtube.com/results'
|
||||||
search_url = base_url + '?search_query={query}&page={page}'
|
search_url = base_url + '?search_query={query}&page={page}'
|
||||||
|
time_range_url = '&sp=EgII{time_range}%253D%253D'
|
||||||
|
time_range_dict = {'day': 'Ag',
|
||||||
|
'week': 'Aw',
|
||||||
|
'month': 'BA'}
|
||||||
|
|
||||||
embedded_url = '<iframe width="540" height="304" ' +\
|
embedded_url = '<iframe width="540" height="304" ' +\
|
||||||
'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
|
'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\
|
||||||
|
@ -47,6 +52,8 @@ def extract_text_from_dom(result, xpath):
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(query=quote_plus(query),
|
params['url'] = search_url.format(query=quote_plus(query),
|
||||||
page=params['pageno'])
|
page=params['pageno'])
|
||||||
|
if params['time_range'] in time_range_dict:
|
||||||
|
params['url'] += time_range_url.format(time_range=time_range_dict[params['time_range']])
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
|
@ -158,6 +158,7 @@
|
||||||
<th>{{ _("Engine name") }}</th>
|
<th>{{ _("Engine name") }}</th>
|
||||||
<th>{{ _("Shortcut") }}</th>
|
<th>{{ _("Shortcut") }}</th>
|
||||||
<th>{{ _("SafeSearch") }}</th>
|
<th>{{ _("SafeSearch") }}</th>
|
||||||
|
<th>{{ _("Time range") }}</th>
|
||||||
<th>{{ _("Avg. time") }}</th>
|
<th>{{ _("Avg. time") }}</th>
|
||||||
<th>{{ _("Max time") }}</th>
|
<th>{{ _("Max time") }}</th>
|
||||||
{% else %}
|
{% else %}
|
||||||
|
@ -179,6 +180,7 @@
|
||||||
<th>{{ search_engine.name }}</th>
|
<th>{{ search_engine.name }}</th>
|
||||||
<td>{{ shortcuts[search_engine.name] }}</td>
|
<td>{{ shortcuts[search_engine.name] }}</td>
|
||||||
<td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
|
<td><input type="checkbox" {{ "checked" if search_engine.safesearch==True else ""}} readonly="readonly" disabled="disabled"></td>
|
||||||
|
<td><input type="checkbox" {{ "checked" if search_engine.time_range_support==True else ""}} readonly="readonly" disabled="disabled"></td>
|
||||||
<td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
|
<td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
|
||||||
<td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td>
|
<td class="{{ 'danger' if stats[search_engine.name]['warn_timeout'] else '' }}">{{ search_engine.timeout }}</td>
|
||||||
{% else %}
|
{% else %}
|
||||||
|
|
|
@ -13,6 +13,7 @@ class TestBingImagesEngine(SearxTestCase):
|
||||||
dicto['pageno'] = 1
|
dicto['pageno'] = 1
|
||||||
dicto['language'] = 'fr_FR'
|
dicto['language'] = 'fr_FR'
|
||||||
dicto['safesearch'] = 1
|
dicto['safesearch'] = 1
|
||||||
|
dicto['time_range'] = ''
|
||||||
params = bing_images.request(query, dicto)
|
params = bing_images.request(query, dicto)
|
||||||
self.assertTrue('url' in params)
|
self.assertTrue('url' in params)
|
||||||
self.assertTrue(query in params['url'])
|
self.assertTrue(query in params['url'])
|
||||||
|
|
|
@ -12,6 +12,7 @@ class TestBingNewsEngine(SearxTestCase):
|
||||||
dicto = defaultdict(dict)
|
dicto = defaultdict(dict)
|
||||||
dicto['pageno'] = 1
|
dicto['pageno'] = 1
|
||||||
dicto['language'] = 'fr_FR'
|
dicto['language'] = 'fr_FR'
|
||||||
|
dicto['time_range'] = ''
|
||||||
params = bing_news.request(query, dicto)
|
params = bing_news.request(query, dicto)
|
||||||
self.assertIn('url', params)
|
self.assertIn('url', params)
|
||||||
self.assertIn(query, params['url'])
|
self.assertIn(query, params['url'])
|
||||||
|
|
|
@ -15,6 +15,7 @@ class TestFlickrNoapiEngine(SearxTestCase):
|
||||||
query = 'test_query'
|
query = 'test_query'
|
||||||
dicto = defaultdict(dict)
|
dicto = defaultdict(dict)
|
||||||
dicto['pageno'] = 1
|
dicto['pageno'] = 1
|
||||||
|
dicto['time_range'] = ''
|
||||||
params = flickr_noapi.request(query, dicto)
|
params = flickr_noapi.request(query, dicto)
|
||||||
self.assertIn('url', params)
|
self.assertIn('url', params)
|
||||||
self.assertIn(query, params['url'])
|
self.assertIn(query, params['url'])
|
||||||
|
|
|
@ -11,6 +11,7 @@ class TestYoutubeNoAPIEngine(SearxTestCase):
|
||||||
query = 'test_query'
|
query = 'test_query'
|
||||||
dicto = defaultdict(dict)
|
dicto = defaultdict(dict)
|
||||||
dicto['pageno'] = 0
|
dicto['pageno'] = 0
|
||||||
|
dicto['time_range'] = ''
|
||||||
params = youtube_noapi.request(query, dicto)
|
params = youtube_noapi.request(query, dicto)
|
||||||
self.assertIn('url', params)
|
self.assertIn('url', params)
|
||||||
self.assertIn(query, params['url'])
|
self.assertIn(query, params['url'])
|
||||||
|
|
Loading…
Reference in a new issue