[mod] restore btdigg engine as btdig.com (#1515)

This commit is contained in:
volth 2019-07-25 06:40:48 +00:00 committed by Alexandre Flament
parent 3b1122c5fa
commit eb182df132
6 changed files with 94 additions and 369 deletions

View file

@ -1,7 +1,7 @@
""" """
BTDigg (Videos, Music, Files) BTDigg (Videos, Music, Files)
@website https://btdigg.org @website https://btdig.com
@provide-api yes (on demand) @provide-api yes (on demand)
@using-api no @using-api no
@ -21,7 +21,7 @@ categories = ['videos', 'music', 'files']
paging = True paging = True
# search-url # search-url
url = 'https://btdigg.org' url = 'https://btdig.com'
search_url = url + '/search?q={search_term}&p={pageno}' search_url = url + '/search?q={search_term}&p={pageno}'
@ -39,7 +39,7 @@ def response(resp):
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
search_res = dom.xpath('//div[@id="search_res"]/table/tr') search_res = dom.xpath('//div[@class="one_result"]')
# return empty array if nothing is found # return empty array if nothing is found
if not search_res: if not search_res:
@ -47,46 +47,39 @@ def response(resp):
# parse results # parse results
for result in search_res: for result in search_res:
link = result.xpath('.//td[@class="torrent_name"]//a')[0] link = result.xpath('.//div[@class="torrent_name"]//a')[0]
href = urljoin(url, link.attrib.get('href')) href = urljoin(url, link.attrib.get('href'))
title = extract_text(link) title = extract_text(link)
content = extract_text(result.xpath('.//pre[@class="snippet"]')[0])
content = "<br />".join(content.split("\n"))
filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0] excerpt = result.xpath('.//div[@class="torrent_excerpt"]')[0]
filesize_multiplier = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[1] content = html.tostring(excerpt, encoding='unicode', method='text', with_tail=False)
files = result.xpath('.//span[@class="attr_val"]/text()')[1] # it is better to emit <br/> instead of |, but html tags are verboten
seed = result.xpath('.//span[@class="attr_val"]/text()')[2] content = content.strip().replace('\n', ' | ')
content = ' '.join(content.split())
# convert seed to int if possible filesize = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[0]
if seed.isdigit(): filesize_multiplier = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[1]
seed = int(seed) files = (result.xpath('.//span[@class="torrent_files"]/text()') or ['1'])[0]
else:
seed = 0
leech = 0
# convert filesize to byte if possible # convert filesize to byte if possible
filesize = get_torrent_size(filesize, filesize_multiplier) filesize = get_torrent_size(filesize, filesize_multiplier)
# convert files to int if possible # convert files to int if possible
if files.isdigit(): try:
files = int(files) files = int(files)
else: except:
files = None files = None
magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href'] magnetlink = result.xpath('.//div[@class="torrent_magnet"]//a')[0].attrib['href']
# append result # append result
results.append({'url': href, results.append({'url': href,
'title': title, 'title': title,
'content': content, 'content': content,
'seed': seed,
'leech': leech,
'filesize': filesize, 'filesize': filesize,
'files': files, 'files': files,
'magnetlink': magnetlink, 'magnetlink': magnetlink,
'template': 'torrent.html'}) 'template': 'torrent.html'})
# return results sorted by seeder # return results sorted by seeder
return sorted(results, key=itemgetter('seed'), reverse=True) return results

View file

@ -115,6 +115,10 @@ engines:
disabled : True disabled : True
shortcut : bb shortcut : bb
- name : btdigg
engine : btdigg
shortcut : bt
- name : ccc-tv - name : ccc-tv
engine : xpath engine : xpath
paging : False paging : False

View file

@ -4,7 +4,7 @@
{% endif %} {% endif %}
<h3 class="result_title"><a href="{{ result.url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ result.title|safe }}</a></h3> <h3 class="result_title"><a href="{{ result.url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ result.title|safe }}</a></h3>
{% if result.content %}<span class="content">{{ result.content|safe }}</span><br />{% endif %} {% if result.content %}<span class="content">{{ result.content|safe }}</span><br />{% endif %}
<span class="stats">{{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }}</span><br /> {% if result.seed %}<span class="stats">{{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }}</span><br />{% endif %}
<span> <span>
{% if result.magnetlink %}<a href="{{ result.magnetlink }}" class="magnetlink">{{ _('magnet link') }}</a>{% endif %} {% if result.magnetlink %}<a href="{{ result.magnetlink }}" class="magnetlink">{{ _('magnet link') }}</a>{% endif %}
{% if result.torrentfile %}<a href="{{ result.torrentfile }}" class="torrentfile" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('torrent file') }}</a>{% endif %} {% if result.torrentfile %}<a href="{{ result.torrentfile }}" class="torrentfile" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('torrent file') }}</a>{% endif %}

View file

@ -8,6 +8,6 @@
<p> <p>
{% if result.magnetlink %}<a href="{{ result.magnetlink }}" class="magnetlink">{{ _('magnet link') }}</a>{% endif %} {% if result.magnetlink %}<a href="{{ result.magnetlink }}" class="magnetlink">{{ _('magnet link') }}</a>{% endif %}
{% if result.torrentfile %}<a href="{{ result.torrentfile }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %} class="torrentfile">{{ _('torrent file') }}</a>{% endif %} - {% if result.torrentfile %}<a href="{{ result.torrentfile }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %} class="torrentfile">{{ _('torrent file') }}</a>{% endif %} -
<span class="stats">{{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }}</span> {% if result.seed %}<span class="stats">{{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }}</span>{% endif %}
</p> </p>
</div> </div>

View file

@ -3,7 +3,7 @@
{{ result_header(result, favicons) }} {{ result_header(result, favicons) }}
{{ result_sub_header(result) }} {{ result_sub_header(result) }}
<p class="result-content">{{ icon('transfer') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> &bull; {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span> {% if result.seed %}<p class="result-content">{{ icon('transfer') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> &bull; {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span>{% endif %}
{% if result.filesize %}<br />{{ icon('floppy-disk') }} {{ _('Filesize') }} {% if result.filesize %}<br />{{ icon('floppy-disk') }} {{ _('Filesize') }}
<span class="badge"> <span class="badge">
{% if result.filesize < 1024 %}{{ result.filesize }} {{ _('Bytes') }} {% if result.filesize < 1024 %}{{ result.filesize }} {{ _('Bytes') }}

View file

@ -14,7 +14,7 @@ class TestBtdiggEngine(SearxTestCase):
params = btdigg.request(query, dicto) params = btdigg.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)
self.assertIn(query, params['url']) self.assertIn(query, params['url'])
self.assertIn('btdigg.org', params['url']) self.assertIn('btdig.com', params['url'])
def test_response(self): def test_response(self):
self.assertRaises(AttributeError, btdigg.response, None) self.assertRaises(AttributeError, btdigg.response, None)
@ -26,359 +26,87 @@ class TestBtdiggEngine(SearxTestCase):
self.assertEqual(btdigg.response(response), []) self.assertEqual(btdigg.response(response), [])
html = u""" html = u"""
<div id="search_res"> <div class="one_result" style="display:table-row;background-color:#e8e8e8">
<table> <div style="display:table-cell;color:rgb(0, 0, 0)">
<tr> <div style="display:table">
<td class="idx">1</td> <div style="display:table-row">
<td> <div class="torrent_name" style="display:table-cell">
<table class="torrent_name_tbl"> <a style="color:rgb(0, 0, 204);text-decoration:underline;font-size:150%"
<tr> href="http://btdig.com/a72f35b7ee3a10928f02bb799e40ae5db701ed1c/pdf?q=pdf&amp;p=1&amp;order=0"
<td class="torrent_name"> >3.9GBdeLibrosByHuasoFromHell(3de4)</a>
<a href="/url">Should be the title</a> </div>
</td> </div>
</tr> </div>
</table> <div style="display:table">
<table class="torrent_name_tbl"> <div style="display:table-row">
<tr> <div style="display:table-cell">
<td class="ttth"> <span class="torrent_files" style="color:#666;padding-left:10px">4217</span> files <span
<a onclick="fclck(this.href)" href="magnet:?xt=urn:btih:magnet&amp;dn=Test" class="torrent_size" style="color:#666;padding-left:10px">1 GB</span><span
title="Télécharger des liens Magnet">[magnet]</a> class="torrent_age" style="color:rgb(0, 102, 0);padding-left:10px;margin: 0px 4px"
</td> >found 3 years ago</span>
<td class="ttth"> </div>
<a href="https://btcloud.io/manager?cmd=add&amp;info_hash=hash" </div>
target="_blank" title="Ajouter à BTCloud">[cloud]</a> </div>
</td> <div style="display:table;width:100%;padding:10px">
<td> <div style="display:table-row">
<span class="attr_name">Taille:</span> <div class="torrent_magnet" style="display:table-cell">
<span class="attr_val">8 B</span> <div class="fa fa-magnet" style="color:#cc0000">
</td> <a href="magnet:?xt=urn:btih:a72f35b7ee3a10928f02bb799e40ae5db701ed1c&amp;dn=3.9GBdeLibrosBy..."
<td> title="Download via magnet-link"> magnet:?xt=urn:btih:a72f35b7ee...</a>
<span class="attr_name">Fichiers:</span> </div>
<span class="attr_val">710</span> </div>
</td> <div style="display:table-cell;color:rgb(0, 0, 0);text-align:right">
<td> <span style="color:rgb(136, 136, 136);margin: 0px 0px 0px 4px"></span><span
<span class="attr_name">Téléchargements:</span> style="color:rgb(0, 102, 0);margin: 0px 4px">found 3 years ago</span>
<span class="attr_val">5</span> </div>
</td> </div>
<td> </div>
<span class="attr_name">Temps:</span> <div class="torrent_excerpt" style="display:table;padding:10px;white-space:nowrap">
<span class="attr_val">417.8&nbsp;jours</span> <div class="fa fa-folder-open" style="padding-left:0em"> 3.9GBdeLibrosByHuasoFromHell(3de4)</div><br/>
</td> <div class="fa fa-folder-open" style="padding-left:1em"> Libros H-Z</div><br/>
<td> <div class="fa fa-folder-open" style="padding-left:2em"> H</div><br/><div class="fa fa-file-archive-o"
<span class="attr_name">Dernière&nbsp;mise&nbsp;à&nbsp;jour:</span> style="padding-left:3em"> H.H. Hollis - El truco de la espada-<b
<span class="attr_val">5.3&nbsp;jours</span> style="color:red; background-color:yellow">pdf</b>.zip</div><span
</td> style="color:#666;padding-left:10px">17 KB</span><br/>
<td> <div class="fa fa-file-archive-o" style="padding-left:3em"> Hagakure - El Libro del Samurai-<b
<span class="attr_name">Faux:</span> style="color:red; background-color:yellow">pdf</b>.zip</div><span
<span class="attr_val">Aucun</span> style="color:#666;padding-left:10px">95 KB</span><br/>
</td> <div class="fa fa-folder-open" style="padding-left:3em"> Hamsun, Knut (1859-1952)</div><br/>
</tr> <div class="fa fa-file-archive-o" style="padding-left:4em"> Hamsun, Knut - Hambre-<b
</table> style="color:red; background-color:yellow">pdf</b>.zip</div><span
<pre class="snippet"> style="color:#666;padding-left:10px">786 KB</span><br/>
Content <div class="fa fa-plus-circle"><a
</pre> href="http://btdig.com/a72f35b7ee3a10928f02bb799e40ae5db701ed1c/pdf?q=pdf&amp;p=1&amp;order=0"
</td> > 4214 hidden files<span style="color:#666;padding-left:10px">1 GB</span></a></div>
</tr> </div>
</table> </div>
</div> </div>
""" """
response = mock.Mock(text=html.encode('utf-8')) response = mock.Mock(text=html.encode('utf-8'))
results = btdigg.response(response) results = btdigg.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 1) self.assertEqual(len(results), 1)
self.assertEqual(results[0]['title'], 'Should be the title') self.assertEqual(results[0]['title'], '3.9GBdeLibrosByHuasoFromHell(3de4)')
self.assertEqual(results[0]['url'], 'https://btdigg.org/url') self.assertEqual(results[0]['url'],
self.assertEqual(results[0]['content'], 'Content') 'http://btdig.com/a72f35b7ee3a10928f02bb799e40ae5db701ed1c/pdf?q=pdf&p=1&order=0')
self.assertEqual(results[0]['seed'], 5) self.assertEqual(results[0]['content'],
self.assertEqual(results[0]['leech'], 0) '3.9GBdeLibrosByHuasoFromHell(3de4) | ' +
self.assertEqual(results[0]['filesize'], 8) 'Libros H-Z | ' +
self.assertEqual(results[0]['files'], 710) 'H H.H. Hollis - El truco de la espada-pdf.zip17 KB | ' +
self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:magnet&dn=Test') 'Hagakure - El Libro del Samurai-pdf.zip95 KB | ' +
'Hamsun, Knut (1859-1952) | Hamsun, Knut - Hambre-pdf.zip786 KB | ' +
'4214 hidden files1 GB')
self.assertEqual(results[0]['filesize'], 1 * 1024 * 1024 * 1024)
self.assertEqual(results[0]['files'], 4217)
self.assertEqual(results[0]['magnetlink'],
'magnet:?xt=urn:btih:a72f35b7ee3a10928f02bb799e40ae5db701ed1c&dn=3.9GBdeLibrosBy...')
html = """ html = """
<div id="search_res"> <div style="display:table-row;background-color:#e8e8e8">
<table>
</table>
</div> </div>
""" """
response = mock.Mock(text=html.encode('utf-8')) response = mock.Mock(text=html.encode('utf-8'))
results = btdigg.response(response) results = btdigg.response(response)
self.assertEqual(type(results), list) self.assertEqual(type(results), list)
self.assertEqual(len(results), 0) self.assertEqual(len(results), 0)
html = u"""
<div id="search_res">
<table>
<tr>
<td class="idx">1</td>
<td>
<table class="torrent_name_tbl">
<tr>
<td class="torrent_name">
<a href="/url">Should be the title</a>
</td>
</tr>
</table>
<table class="torrent_name_tbl">
<tr>
<td class="ttth">
<a onclick="fclck(this.href)" href="magnet:?xt=urn:btih:magnet&amp;dn=Test"
title="Télécharger des liens Magnet">[magnet]</a>
</td>
<td class="ttth">
<a href="https://btcloud.io/manager?cmd=add&amp;info_hash=hash"
target="_blank" title="Ajouter à BTCloud">[cloud]</a>
</td>
<td>
<span class="attr_name">Taille:</span>
<span class="attr_val">1 KB</span>
</td>
<td>
<span class="attr_name">Fichiers:</span>
<span class="attr_val">710</span>
</td>
<td>
<span class="attr_name">Téléchargements:</span>
<span class="attr_val">5</span>
</td>
<td>
<span class="attr_name">Temps:</span>
<span class="attr_val">417.8&nbsp;jours</span>
</td>
<td>
<span class="attr_name">Dernière&nbsp;mise&nbsp;à&nbsp;jour:</span>
<span class="attr_val">5.3&nbsp;jours</span>
</td>
<td>
<span class="attr_name">Faux:</span>
<span class="attr_val">Aucun</span>
</td>
</tr>
</table>
<pre class="snippet">
Content
</pre>
</td>
</tr>
<tr>
<td class="idx">1</td>
<td>
<table class="torrent_name_tbl">
<tr>
<td class="torrent_name">
<a href="/url">Should be the title</a>
</td>
</tr>
</table>
<table class="torrent_name_tbl">
<tr>
<td class="ttth">
<a onclick="fclck(this.href)" href="magnet:?xt=urn:btih:magnet&amp;dn=Test"
title="Télécharger des liens Magnet">[magnet]</a>
</td>
<td class="ttth">
<a href="https://btcloud.io/manager?cmd=add&amp;info_hash=hash"
target="_blank" title="Ajouter à BTCloud">[cloud]</a>
</td>
<td>
<span class="attr_name">Taille:</span>
<span class="attr_val">1 MB</span>
</td>
<td>
<span class="attr_name">Fichiers:</span>
<span class="attr_val">a</span>
</td>
<td>
<span class="attr_name">Téléchargements:</span>
<span class="attr_val">4</span>
</td>
<td>
<span class="attr_name">Temps:</span>
<span class="attr_val">417.8&nbsp;jours</span>
</td>
<td>
<span class="attr_name">Dernière&nbsp;mise&nbsp;à&nbsp;jour:</span>
<span class="attr_val">5.3&nbsp;jours</span>
</td>
<td>
<span class="attr_name">Faux:</span>
<span class="attr_val">Aucun</span>
</td>
</tr>
</table>
<pre class="snippet">
Content
</pre>
</td>
</tr>
<tr>
<td class="idx">1</td>
<td>
<table class="torrent_name_tbl">
<tr>
<td class="torrent_name">
<a href="/url">Should be the title</a>
</td>
</tr>
</table>
<table class="torrent_name_tbl">
<tr>
<td class="ttth">
<a onclick="fclck(this.href)" href="magnet:?xt=urn:btih:magnet&amp;dn=Test"
title="Télécharger des liens Magnet">[magnet]</a>
</td>
<td class="ttth">
<a href="https://btcloud.io/manager?cmd=add&amp;info_hash=hash"
target="_blank" title="Ajouter à BTCloud">[cloud]</a>
</td>
<td>
<span class="attr_name">Taille:</span>
<span class="attr_val">1 GB</span>
</td>
<td>
<span class="attr_name">Fichiers:</span>
<span class="attr_val">710</span>
</td>
<td>
<span class="attr_name">Téléchargements:</span>
<span class="attr_val">3</span>
</td>
<td>
<span class="attr_name">Temps:</span>
<span class="attr_val">417.8&nbsp;jours</span>
</td>
<td>
<span class="attr_name">Dernière&nbsp;mise&nbsp;à&nbsp;jour:</span>
<span class="attr_val">5.3&nbsp;jours</span>
</td>
<td>
<span class="attr_name">Faux:</span>
<span class="attr_val">Aucun</span>
</td>
</tr>
</table>
<pre class="snippet">
Content
</pre>
</td>
</tr>
<tr>
<td class="idx">1</td>
<td>
<table class="torrent_name_tbl">
<tr>
<td class="torrent_name">
<a href="/url">Should be the title</a>
</td>
</tr>
</table>
<table class="torrent_name_tbl">
<tr>
<td class="ttth">
<a onclick="fclck(this.href)" href="magnet:?xt=urn:btih:magnet&amp;dn=Test"
title="Télécharger des liens Magnet">[magnet]</a>
</td>
<td class="ttth">
<a href="https://btcloud.io/manager?cmd=add&amp;info_hash=hash"
target="_blank" title="Ajouter à BTCloud">[cloud]</a>
</td>
<td>
<span class="attr_name">Taille:</span>
<span class="attr_val">1 TB</span>
</td>
<td>
<span class="attr_name">Fichiers:</span>
<span class="attr_val">710</span>
</td>
<td>
<span class="attr_name">Téléchargements:</span>
<span class="attr_val">2</span>
</td>
<td>
<span class="attr_name">Temps:</span>
<span class="attr_val">417.8&nbsp;jours</span>
</td>
<td>
<span class="attr_name">Dernière&nbsp;mise&nbsp;à&nbsp;jour:</span>
<span class="attr_val">5.3&nbsp;jours</span>
</td>
<td>
<span class="attr_name">Faux:</span>
<span class="attr_val">Aucun</span>
</td>
</tr>
</table>
<pre class="snippet">
Content
</pre>
</td>
</tr>
<tr>
<td class="idx">1</td>
<td>
<table class="torrent_name_tbl">
<tr>
<td class="torrent_name">
<a href="/url">Should be the title</a>
</td>
</tr>
</table>
<table class="torrent_name_tbl">
<tr>
<td class="ttth">
<a onclick="fclck(this.href)" href="magnet:?xt=urn:btih:magnet&amp;dn=Test"
title="Télécharger des liens Magnet">[magnet]</a>
</td>
<td class="ttth">
<a href="https://btcloud.io/manager?cmd=add&amp;info_hash=hash"
target="_blank" title="Ajouter à BTCloud">[cloud]</a>
</td>
<td>
<span class="attr_name">Taille:</span>
<span class="attr_val">a TB</span>
</td>
<td>
<span class="attr_name">Fichiers:</span>
<span class="attr_val">710</span>
</td>
<td>
<span class="attr_name">Téléchargements:</span>
<span class="attr_val">z</span>
</td>
<td>
<span class="attr_name">Temps:</span>
<span class="attr_val">417.8&nbsp;jours</span>
</td>
<td>
<span class="attr_name">Dernière&nbsp;mise&nbsp;à&nbsp;jour:</span>
<span class="attr_val">5.3&nbsp;jours</span>
</td>
<td>
<span class="attr_name">Faux:</span>
<span class="attr_val">Aucun</span>
</td>
</tr>
</table>
<pre class="snippet">
Content
</pre>
</td>
</tr>
</table>
</div>
"""
response = mock.Mock(text=html.encode('utf-8'))
results = btdigg.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 5)
self.assertEqual(results[0]['title'], 'Should be the title')
self.assertEqual(results[0]['url'], 'https://btdigg.org/url')
self.assertEqual(results[0]['content'], 'Content')
self.assertEqual(results[0]['seed'], 5)
self.assertEqual(results[0]['leech'], 0)
self.assertEqual(results[0]['files'], 710)
self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:magnet&dn=Test')
self.assertEqual(results[0]['filesize'], 1024)
self.assertEqual(results[1]['filesize'], 1048576)
self.assertEqual(results[2]['filesize'], 1073741824)
self.assertEqual(results[3]['filesize'], 1099511627776)