Merge pull request #542 from ukwt/fix538

[fix] incorrect URLs in Reddit image search results - closes #538
This commit is contained in:
Adam Tauber 2016-04-16 13:29:12 +02:00
commit 817c74e523
2 changed files with 11 additions and 5 deletions

View file

@ -13,7 +13,7 @@
import json import json
from cgi import escape from cgi import escape
from urllib import urlencode from urllib import urlencode
from urlparse import urlparse from urlparse import urlparse, urljoin
from datetime import datetime from datetime import datetime
# engine dependent config # engine dependent config
@ -21,7 +21,8 @@ categories = ['general', 'images', 'news', 'social media']
page_size = 25 page_size = 25
# search-url # search-url
search_url = 'https://www.reddit.com/search.json?{query}' base_url = 'https://www.reddit.com/'
search_url = base_url + 'search.json?{query}'
# do search-request # do search-request
@ -52,7 +53,7 @@ def response(resp):
# extract post information # extract post information
params = { params = {
'url': data['url'], 'url': urljoin(base_url, data['permalink']),
'title': data['title'] 'title': data['title']
} }
@ -61,6 +62,7 @@ def response(resp):
url_info = urlparse(thumbnail) url_info = urlparse(thumbnail)
# netloc & path # netloc & path
if url_info[1] != '' and url_info[2] != '': if url_info[1] != '' and url_info[2] != '':
params['img_src'] = data['url']
params['thumbnail_src'] = thumbnail params['thumbnail_src'] = thumbnail
params['template'] = 'images.html' params['template'] = 'images.html'
img_results.append(params) img_results.append(params)

View file

@ -25,7 +25,8 @@ class TestRedditEngine(SearxTestCase):
"data": { "data": {
"children": [{ "children": [{
"data": { "data": {
"url": "http://google.com/", "url": "http://google2.com/",
"permalink": "http://google.com/",
"title": "Title number one", "title": "Title number one",
"selftext": "Sample", "selftext": "Sample",
"created_utc": 1401219957.0, "created_utc": 1401219957.0,
@ -33,7 +34,8 @@ class TestRedditEngine(SearxTestCase):
} }
}, { }, {
"data": { "data": {
"url": "https://reddit.com/", "url": "https://reddit2.com/",
"permalink": "https://reddit.com/",
"title": "Title number two", "title": "Title number two",
"selftext": "Dominus vobiscum", "selftext": "Dominus vobiscum",
"created_utc": 1438792533.0, "created_utc": 1438792533.0,
@ -55,6 +57,7 @@ class TestRedditEngine(SearxTestCase):
self.assertEqual(r['url'], 'http://google.com/') self.assertEqual(r['url'], 'http://google.com/')
self.assertEqual(r['title'], 'Title number one') self.assertEqual(r['title'], 'Title number one')
self.assertEqual(r['template'], 'images.html') self.assertEqual(r['template'], 'images.html')
self.assertEqual(r['img_src'], 'http://google2.com/')
self.assertEqual(r['thumbnail_src'], 'http://image.com/picture.jpg') self.assertEqual(r['thumbnail_src'], 'http://image.com/picture.jpg')
# testing second result (self-post) # testing second result (self-post)
@ -65,3 +68,4 @@ class TestRedditEngine(SearxTestCase):
created = datetime.fromtimestamp(1438792533.0) created = datetime.fromtimestamp(1438792533.0)
self.assertEqual(r['publishedDate'], created) self.assertEqual(r['publishedDate'], created)
self.assertTrue('thumbnail_src' not in r) self.assertTrue('thumbnail_src' not in r)
self.assertTrue('img_src' not in r)