Ponysearch/searx/engines/vimeo.py

from urllib import urlencode
from HTMLParser import HTMLParser
from xpath import extract_text
from lxml import html

base_url = 'http://vimeo.com'
search_url = base_url + '/search?{query}'

# the cookie set by vime contains all the following values, but only __utma seems to be requiered 
Cookie = {
    #'vuid':'918282893.1027205400'
    # 'ab_bs':'%7B%223%22%3A279%7D'
     '__utma':'00000000.000#0000000.0000000000.0000000000.0000000000.0'
    # '__utmb':'18302654.1.10.1388942090'
    #, '__utmc':'18302654'
    #, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'
    #, '__utml':'search'
}

def request(query, params):
    params['url'] = search_url.format(query=urlencode({'q' :query}))
    print params['url']
    params['cookies'] = Cookie
    return params

def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    
    p = HTMLParser()

    for result in dom.xpath(results_xpath):
        url = base_url + result.xpath(url_xpath)[0]
        title = p.unescape(extract_text(result.xpath(title_xpath)))
        content = '<a href="{0}">  <img src="{2}"/> </a>'.format(url, title, extract_text(result.xpath(content_xpath)[0]))
        results.append({'url': url, 'title': title, 'content': content})

    return results
[enh] 1st version of vimeo search engine (need improvments) 2014-01-05 22:10:46 +01:00			`from urllib import urlencode`
			`from HTMLParser import HTMLParser`
[enh] Improved vimeo search engines, and add the configuration in the sample 2014-01-06 22:15:46 +01:00			`from xpath import extract_text`
			`from lxml import html`
[enh] 1st version of vimeo search engine (need improvments) 2014-01-05 22:10:46 +01:00
[enh] Improved vimeo search engines, and add the configuration in the sample 2014-01-06 22:15:46 +01:00			`base_url = 'http://vimeo.com'`
			`search_url = base_url + '/search?{query}'`
[enh] 1st version of vimeo search engine (need improvments) 2014-01-05 22:10:46 +01:00
[enh] Improved vimeo search engines, and add the configuration in the sample 2014-01-06 22:15:46 +01:00			`# the cookie set by vime contains all the following values, but only __utma seems to be requiered`
[enh] 1st version of vimeo search engine (need improvments) 2014-01-05 22:10:46 +01:00			`Cookie = {`
[enh] Improved vimeo search engines, and add the configuration in the sample 2014-01-06 22:15:46 +01:00			`#'vuid':'918282893.1027205400'`
			`# 'ab_bs':'%7B%223%22%3A279%7D'`
			`'__utma':'00000000.000#0000000.0000000000.0000000000.0000000000.0'`
			`# '__utmb':'18302654.1.10.1388942090'`
			`#, '__utmc':'18302654'`
			`#, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)\|utmccn=(direct)\|utmcmd=(none)'`
			`#, '__utml':'search'`
[enh] 1st version of vimeo search engine (need improvments) 2014-01-05 22:10:46 +01:00			`}`

			`def request(query, params):`
			`params['url'] = search_url.format(query=urlencode({'q' :query}))`
			`print params['url']`
			`params['cookies'] = Cookie`
			`return params`

			`def response(resp):`
			`results = []`
			`dom = html.fromstring(resp.text)`
[enh] Improved vimeo search engines, and add the configuration in the sample 2014-01-06 22:15:46 +01:00
			`p = HTMLParser()`
[enh] 1st version of vimeo search engine (need improvments) 2014-01-05 22:10:46 +01:00
[enh] Improved vimeo search engines, and add the configuration in the sample 2014-01-06 22:15:46 +01:00			`for result in dom.xpath(results_xpath):`
			`url = base_url + result.xpath(url_xpath)[0]`
			`title = p.unescape(extract_text(result.xpath(title_xpath)))`
			`content = '<a href="{0}"> <img src="{2}"/> </a>'.format(url, title, extract_text(result.xpath(content_xpath)[0]))`
			`results.append({'url': url, 'title': title, 'content': content})`
[enh] 1st version of vimeo search engine (need improvments) 2014-01-05 22:10:46 +01:00
			`return results`