forked from Ponysearch/Ponysearch
Merge pull request #639 from kvch/digbt-engine
add digbt engine - fixes #638
This commit is contained in:
commit
13bed1f872
5 changed files with 144 additions and 15 deletions
|
@ -16,6 +16,7 @@ from urllib import quote
|
|||
from lxml import html
|
||||
from operator import itemgetter
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.utils import get_torrent_size
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos', 'music', 'files']
|
||||
|
@ -68,20 +69,7 @@ def response(resp):
|
|||
leech = 0
|
||||
|
||||
# convert filesize to byte if possible
|
||||
try:
|
||||
filesize = float(filesize)
|
||||
|
||||
# convert filesize to byte
|
||||
if filesize_multiplier == 'TB':
|
||||
filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
|
||||
elif filesize_multiplier == 'GB':
|
||||
filesize = int(filesize * 1024 * 1024 * 1024)
|
||||
elif filesize_multiplier == 'MB':
|
||||
filesize = int(filesize * 1024 * 1024)
|
||||
elif filesize_multiplier == 'KB':
|
||||
filesize = int(filesize * 1024)
|
||||
except:
|
||||
filesize = None
|
||||
filesize = get_torrent_size(filesize, filesize_multiplier)
|
||||
|
||||
# convert files to int if possible
|
||||
if files.isdigit():
|
||||
|
|
58
searx/engines/digbt.py
Normal file
58
searx/engines/digbt.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
"""
|
||||
DigBT (Videos, Music, Files)
|
||||
|
||||
@website https://digbt.org
|
||||
@provide-api no
|
||||
|
||||
@using-api no
|
||||
@results HTML (using search portal)
|
||||
@stable no (HTML can change)
|
||||
@parse url, title, content, magnetlink
|
||||
"""
|
||||
|
||||
from urlparse import urljoin
|
||||
from lxml import html
|
||||
from searx.engines.xpath import extract_text
|
||||
from searx.utils import get_torrent_size
|
||||
|
||||
categories = ['videos', 'music', 'files']
|
||||
paging = True
|
||||
|
||||
URL = 'https://digbt.org'
|
||||
SEARCH_URL = URL + '/search/{query}-time-{pageno}'
|
||||
FILESIZE = 3
|
||||
FILESIZE_MULTIPLIER = 4
|
||||
|
||||
|
||||
def request(query, params):
|
||||
params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno'])
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
dom = html.fromstring(resp.content)
|
||||
search_res = dom.xpath('.//td[@class="x-item"]')
|
||||
|
||||
if not search_res:
|
||||
return list()
|
||||
|
||||
results = list()
|
||||
for result in search_res:
|
||||
url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
|
||||
title = result.xpath('.//a[@title]/text()')[0]
|
||||
content = extract_text(result.xpath('.//div[@class="files"]'))
|
||||
files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
|
||||
filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
|
||||
magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
|
||||
|
||||
results.append({'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
'filesize': filesize,
|
||||
'magnetlink': magnetlink,
|
||||
'seed': 'N/A',
|
||||
'leech': 'N/A',
|
||||
'template': 'torrent.html'})
|
||||
|
||||
return results
|
|
@ -87,7 +87,7 @@ engines:
|
|||
- name : btdigg
|
||||
engine : btdigg
|
||||
shortcut : bt
|
||||
|
||||
|
||||
- name : crossref
|
||||
engine : json_engine
|
||||
paging : True
|
||||
|
@ -118,6 +118,12 @@ engines:
|
|||
weight : 2
|
||||
disabled : True
|
||||
|
||||
- name : digbt
|
||||
engine : digbt
|
||||
shortcut : dbt
|
||||
timeout : 6.0
|
||||
disabled : True
|
||||
|
||||
- name : digg
|
||||
engine : digg
|
||||
shortcut : dg
|
||||
|
|
|
@ -237,3 +237,21 @@ def list_get(a_list, index, default=None):
|
|||
return a_list[index]
|
||||
else:
|
||||
return default
|
||||
|
||||
|
||||
def get_torrent_size(filesize, filesize_multiplier):
|
||||
try:
|
||||
filesize = float(filesize)
|
||||
|
||||
if filesize_multiplier == 'TB':
|
||||
filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
|
||||
elif filesize_multiplier == 'GB':
|
||||
filesize = int(filesize * 1024 * 1024 * 1024)
|
||||
elif filesize_multiplier == 'MB':
|
||||
filesize = int(filesize * 1024 * 1024)
|
||||
elif filesize_multiplier == 'KB':
|
||||
filesize = int(filesize * 1024)
|
||||
except:
|
||||
filesize = None
|
||||
|
||||
return filesize
|
||||
|
|
59
tests/unit/engines/test_digbt.py
Normal file
59
tests/unit/engines/test_digbt.py
Normal file
|
@ -0,0 +1,59 @@
|
|||
from collections import defaultdict
|
||||
import mock
|
||||
from searx.engines import digbt
|
||||
from searx.testing import SearxTestCase
|
||||
|
||||
|
||||
class TestDigBTEngine(SearxTestCase):
|
||||
|
||||
def test_request(self):
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
dicto['pageno'] = 0
|
||||
params = digbt.request(query, dicto)
|
||||
self.assertIn('url', params)
|
||||
self.assertIn(query, params['url'])
|
||||
self.assertIn('digbt.org', params['url'])
|
||||
|
||||
def test_response(self):
|
||||
self.assertRaises(AttributeError, digbt.response, None)
|
||||
self.assertRaises(AttributeError, digbt.response, [])
|
||||
self.assertRaises(AttributeError, digbt.response, '')
|
||||
self.assertRaises(AttributeError, digbt.response, '[]')
|
||||
|
||||
response = mock.Mock(content='<html></html>')
|
||||
self.assertEqual(digbt.response(response), [])
|
||||
|
||||
html = """
|
||||
<table class="table">
|
||||
<tr><td class="x-item">
|
||||
<div>
|
||||
<a title="The Big Bang Theory" class="title" href="/The-Big-Bang-Theory-d2.html">The Big Bang Theory</a>
|
||||
<span class="ctime"><span style="color:red;">4 hours ago</span></span>
|
||||
</div>
|
||||
<div class="files">
|
||||
<ul>
|
||||
<li>The Big Bang Theory 2.9 GB</li>
|
||||
<li>....</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="tail">
|
||||
Files: 1 Size: 2.9 GB Downloads: 1 Updated: <span style="color:red;">4 hours ago</span>
|
||||
|
||||
<a class="title" href="magnet:?xt=urn:btih:a&dn=The+Big+Bang+Theory">
|
||||
<span class="glyphicon glyphicon-magnet"></span> magnet-link
|
||||
</a>
|
||||
|
||||
</div>
|
||||
</td></tr>
|
||||
</table>
|
||||
"""
|
||||
response = mock.Mock(content=html)
|
||||
results = digbt.response(response)
|
||||
self.assertEqual(type(results), list)
|
||||
self.assertEqual(len(results), 1)
|
||||
self.assertEqual(results[0]['title'], 'The Big Bang Theory')
|
||||
self.assertEqual(results[0]['url'], 'https://digbt.org/The-Big-Bang-Theory-d2.html')
|
||||
self.assertEqual(results[0]['content'], 'The Big Bang Theory 2.9 GB ....')
|
||||
self.assertEqual(results[0]['filesize'], 3113851289)
|
||||
self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:a&dn=The+Big+Bang+Theory')
|
Loading…
Reference in a new issue