From 6042f2bc53d2b6f0d03e6b882db83377b27029be Mon Sep 17 00:00:00 2001
From: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date: Sun, 1 Feb 2015 11:27:28 +0100
Subject: [PATCH 1/3] [enh] add 1x.com engine

* Deacivated by default, because of the big amount of results
---
 searx/engines/www1x.py | 81 ++++++++++++++++++++++++++++++++++++++++++
 searx/settings.yml     |  5 +++
 2 files changed, 86 insertions(+)
 create mode 100644 searx/engines/www1x.py

diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py
new file mode 100644
index 000000000..d10c4ca37
--- /dev/null
+++ b/searx/engines/www1x.py
@@ -0,0 +1,81 @@
+## 1x (Images)
+#
+# @website     http://1x.com/
+# @provide-api no
+#
+# @using-api   no
+# @results     HTML
+# @stable      no (HTML can change)
+# @parse       url, title, thumbnail, img_src, content
+
+
+from urllib import urlencode
+from urlparse import urljoin
+from lxml import html
+import string
+import re
+
+# engine dependent config
+categories = ['images']
+paging = False
+
+# search-url
+base_url = 'http://1x.com'
+search_url = base_url+'/backend/search.php?{query}'
+
+
+# do search-request
+def request(query, params):
+    params['url'] = search_url.format(query=urlencode({'q': query}))
+
+    return params
+
+
+# get response from search-request
+def response(resp):
+    results = []
+
+    # get links from result-text
+    results_parts = re.split(r'(</a>|<a)', resp.text)
+
+    cur_element = ''
+
+    # iterate over link parts
+    for result_part in results_parts:
+        # processed start and end of link
+        if result_part == '<a':
+            cur_element = result_part
+            continue
+        elif result_part != '</a>':
+            cur_element += result_part
+            continue
+
+        cur_element += result_part
+
+        # fix xml-error
+        cur_element = string.replace(cur_element, '"></a>', '"/></a>')
+
+        dom = html.fromstring(cur_element)
+        link = dom.xpath('//a')[0]
+
+        url = urljoin(base_url, link.attrib.get('href'))
+        title = link.attrib.get('title', '')
+
+        thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src'])
+        # TODO: get image with higher resolution
+        img_src = thumbnail_src
+
+        # check if url is showing to a photo
+        if '/photo/' not in url:
+            continue
+
+        # append result
+        results.append({'url': url,
+                        'title': title,
+                        'img_src': img_src,
+                        'content': '',
+                        'thumbnail_src': thumbnail_src,
+                        'template': 'images.html'})
+
+    # return results
+    return results
diff --git a/searx/settings.yml b/searx/settings.yml
index ebae8af62..f4fca8985 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -83,6 +83,11 @@ engines:
     engine : www500px
     shortcut : px
 
+  - name : 1x
+    engine : www1x
+    shortcut : 1x
+    disabled : True
+
   - name : flickr
     categories : images
     shortcut : fl

From 7e30633edd823d7692836320004e7a920210fdc0 Mon Sep 17 00:00:00 2001
From: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date: Sun, 1 Feb 2015 11:48:15 +0100
Subject: [PATCH 2/3] [enh] improve yacy engine

---
 searx/engines/yacy.py | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py
index 4c4fac7df..17e2a7aab 100644
--- a/searx/engines/yacy.py
+++ b/searx/engines/yacy.py
@@ -68,9 +68,18 @@ def response(resp):
 
     search_results = raw_search_results.get('channels', {})[0].get('items', [])
 
-    if resp.search_params['category'] == 'general':
+    for result in search_results:
+        # parse image results
+        if result.get('image'):
+            # append result
+            results.append({'url': result['url'],
+                            'title': result['title'],
+                            'content': '',
+                            'img_src': result['image'],
+                            'template': 'images.html'})
+
         # parse general results
-        for result in search_results:
+        else:
             publishedDate = parser.parse(result['pubDate'])
 
             # append result
@@ -79,17 +88,7 @@ def response(resp):
                             'content': result['description'],
                             'publishedDate': publishedDate})
 
-    elif resp.search_params['category'] == 'images':
-        # parse image results
-        for result in search_results:
-            # append result
-            results.append({'url': result['url'],
-                            'title': result['title'],
-                            'content': '',
-                            'img_src': result['image'],
-                            'template': 'images.html'})
-
-    #TODO parse video, audio and file results
+        #TODO parse video, audio and file results
 
     # return results
     return results

From a605d0ae698e8a5555935780d83df50b06727f24 Mon Sep 17 00:00:00 2001
From: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date: Sun, 1 Feb 2015 13:52:43 +0100
Subject: [PATCH 3/3] [enh] add test for 1x.com

---
 searx/engines/www1x.py            |  3 +-
 searx/tests/engines/test_www1x.py | 57 +++++++++++++++++++++++++++++++
 searx/tests/test_engines.py       |  1 +
 3 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 searx/tests/engines/test_www1x.py

diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py
index d10c4ca37..a68c105ce 100644
--- a/searx/engines/www1x.py
+++ b/searx/engines/www1x.py
@@ -36,7 +36,8 @@ def response(resp):
     results = []
 
     # get links from result-text
-    results_parts = re.split(r'(</a>|<a)', resp.text)
+    regex = re.compile('(</a>|<a)')
+    results_parts = re.split(regex, resp.text)
 
     cur_element = ''
 
diff --git a/searx/tests/engines/test_www1x.py b/searx/tests/engines/test_www1x.py
new file mode 100644
index 000000000..ab4f282c1
--- /dev/null
+++ b/searx/tests/engines/test_www1x.py
@@ -0,0 +1,57 @@
+from collections import defaultdict
+import mock
+from searx.engines import www1x
+from searx.testing import SearxTestCase
+
+
+class TestWww1xEngine(SearxTestCase):
+
+    def test_request(self):
+        query = 'test_query'
+        params = www1x.request(query, defaultdict(dict))
+        self.assertTrue('url' in params)
+        self.assertTrue(query in params['url'])
+        self.assertTrue('1x.com' in params['url'])
+
+    def test_response(self):
+        self.assertRaises(AttributeError, www1x.response, None)
+        self.assertRaises(AttributeError, www1x.response, [])
+        self.assertRaises(AttributeError, www1x.response, '')
+        self.assertRaises(AttributeError, www1x.response, '[]')
+
+        response = mock.Mock(text='<html></html>')
+        self.assertEqual(www1x.response(response), [])
+        html = """
+        <?xml version="1.0" encoding="UTF-8"?><!DOCTYPE characters
+        [
+        <!ELEMENT characters (character*) >
+        <!ELEMENT character  (#PCDATA   ) >
+
+        <!ENTITY iexcl   "&#161;" >
+        <!ENTITY cent    "&#162;" >
+        <!ENTITY pound   "&#163;" >
+        ]
+        ><root><searchresult><![CDATA[<table border="0" cellpadding="0" cellspacing="0" width="100%">
+        <tr>
+            <td style="min-width: 220px;" valign="top">
+                <div style="font-size: 30px; margin: 0px 0px 20px 0px;">Photos</div>
+                <div>
+                    <a href="/photo/123456" class="dynamiclink">
+<img border="0" class="searchresult" src="/images/user/testimage-123456.jpg" style="width: 125px; height: 120px;">
+                    </a>
+                    <a title="sjoerd lammers street photography" href="/member/sjoerdlammers" class="dynamiclink">
+<img border="0" class="searchresult" src="/images/profile/60c48b394c677d2fa4d9e7d263aabf44-square.jpg">
+                    </a>
+                </div>
+            </td>
+        </table>
+        ]]></searchresult></root>
+        """
+        response = mock.Mock(text=html)
+        results = www1x.response(response)
+        self.assertEqual(type(results), list)
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]['url'], 'http://1x.com/photo/123456')
+        self.assertEqual(results[0]['thumbnail_src'], 'http://1x.com/images/user/testimage-123456.jpg')
+        self.assertEqual(results[0]['content'], '')
+        self.assertEqual(results[0]['template'], 'images.html')
diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py
index 1ffdbe529..cfd7fa26a 100644
--- a/searx/tests/test_engines.py
+++ b/searx/tests/test_engines.py
@@ -1,2 +1,3 @@
 from searx.tests.engines.test_dummy import *  # noqa
 from searx.tests.engines.test_github import *  # noqa
+from searx.tests.engines.test_www1x import *  # noqa