Add 500px and Searchcode engines

Allow to search for images on 500px. It doesn't use the official API, but the page result. Less stable, but less API key to possess...

Two engines were necessary for Searchcode because there are to search mode : search for documentation or search for code example. Both use open APIs.
This commit is contained in:
Cqoicebordel 2014-12-20 07:07:32 +01:00
parent 667f4d5cfc
commit 56399cf1ea
4 changed files with 183 additions and 0 deletions

57
searx/engines/500px.py Normal file
View file

@ -0,0 +1,57 @@
## 500px (Images)
#
# @website https://500px.com
# @provide-api yes (https://developers.500px.com/)
#
# @using-api no
# @results HTML
# @stable no (HTML can change)
# @parse url, title, thumbnail, img_src, content
#
# @todo rewrite to api
from urllib import urlencode
from urlparse import urljoin
from lxml import html
# engine dependent config
categories = ['images']
paging = True
# search-url
base_url = 'https://500px.com'
search_url = base_url+'/search?search?page={pageno}&type=photos&{query}'
# do search-request
def request(query, params):
params['url'] = search_url.format(pageno=params['pageno'],
query=urlencode({'q': query}))
return params
# get response from search-request
def response(resp):
results = []
dom = html.fromstring(resp.text)
# parse results
for result in dom.xpath('//div[@class="photo"]'):
link = result.xpath('.//a')[0]
url = urljoin(base_url, link.attrib.get('href'))
title = result.xpath('.//div[@class="title"]//text()')[0]
img_src = link.xpath('.//img')[0].attrib['src']
content = result.xpath('.//div[@class="info"]//text()')[0]
# append result
results.append({'url': url,
'title': title,
'img_src': img_src,
'content': content,
'template': 'images.html'})
# return results
return results

View file

@ -0,0 +1,65 @@
## Searchcode (It)
#
# @website https://searchcode.com/
# @provide-api yes (https://searchcode.com/api/)
#
# @using-api yes
# @results JSON
# @stable yes
# @parse url, title, content
from urllib import urlencode
from json import loads
import cgi
import re
# engine dependent config
categories = ['it']
paging = True
# search-url
url = 'https://searchcode.com/'
search_url = url+'api/codesearch_I/?{query}&p={pageno}'
# do search-request
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}),
pageno=params['pageno']-1)
return params
# get response from search-request
def response(resp):
results = []
search_results = loads(resp.text)
# parse results
for result in search_results['results']:
href = result['url']
title = "" + result['name'] + " - " + result['filename']
content = result['repo'] + "<br />"
lines = dict()
for line, code in result['lines'].items():
lines[int(line)] = code
content = content + '<pre class="code-formatter"><table class="code">'
for line, code in sorted(lines.items()):
content = content + '<tr><td class="line-number" style="padding-right:5px;">'
content = content + str(line) + '</td><td class="code-snippet">'
# Replace every two spaces with ' &nbps;' to keep formatting while allowing the browser to break the line if necessary
content = content + cgi.escape(code).replace('\t', ' ').replace(' ', '&nbsp; ').replace(' ', ' &nbsp;')
content = content + "</td></tr>"
content = content + "</table></pre>"
# append result
results.append({'url': href,
'title': title,
'content': content})
# return results
return results

View file

@ -0,0 +1,49 @@
## Searchcode (It)
#
# @website https://searchcode.com/
# @provide-api yes (https://searchcode.com/api/)
#
# @using-api yes
# @results JSON
# @stable yes
# @parse url, title, content
from urllib import urlencode
from json import loads
# engine dependent config
categories = ['it']
paging = True
# search-url
url = 'https://searchcode.com/'
search_url = url+'api/search_IV/?{query}&p={pageno}'
# do search-request
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}),
pageno=params['pageno']-1)
return params
# get response from search-request
def response(resp):
results = []
search_results = loads(resp.text)
# parse results
for result in search_results['results']:
href = result['url']
title = "[" + result['type'] + "] " + result['namespace'] + " " + result['name']
content = '<span class="highlight">[' + result['type'] + "] " + result['name'] + " " + result['synopsis'] + "</span><br />" + result['description']
# append result
results.append({'url': href,
'title': title,
'content': content})
# return results
return results

View file

@ -64,6 +64,10 @@ engines:
# engine : filecrop
# categories : files
# shortcut : fc
- name : 500px
engine : 500px
shortcut : px
- name : flickr
engine : flickr
@ -114,6 +118,14 @@ engines:
- name : stackoverflow
engine : stackoverflow
shortcut : st
- name : searchcode doc
engine : searchcode_doc
shortcut : scd
- name : searchcode code
engine : searchcode_code
shortcut : scc
- name : startpage
engine : startpage