/usr/bin/env python
# encoding: utf-8
import logging
import urllib
import urlparse
import json
from collections import OrderedDict
import requests
LOG = logging.getLogger('sw.google_search')
def _decode_response(json_string):
response = json.loads(json_string)
return response['items']
def _strip_protocol(url):
"""
>>> _strip_protocol('http://foo.bar/blah.x?baz=10&bob=15;x')
u'foo.bar/blah.x?baz=10&bob=15;x'
"""
p = urlparse.urlparse(url)
new_url = urlparse.urlunparse(
('', p.netloc, p.path, p.params, p.query, p.fragment))
return new_url.lstrip('/')
class GoogleCustomSearch(object):
def __init__(self, search_engine_id, api_key):
self.search_engine_id = search_engine_id
self.api_key = api_key
response = requests.get(url)
if response.status_code == 403:
LOG.info(response.content)
response.raise_for_status()
for search_result in _decode_response(response.content):
yield search_result
if 'nextPage' not in search_result['meta']['queries']:
print("No more pages...")
return
return 'https://www.googleapis.com/customsearch/v1?{}'.format(
urllib.urlencode(params))