|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
在使用pyspider爬去网页信息时会出现如下错误提示, 在网上查了一下,说是mongodb未开启,但网上说的都很笼统,不知如何开启mongodb,希望大神能帮忙指点下,小弟先行谢过
[E 170827 10:16:31 base_handler:203] localhost:27017: [WinError 10061] 由于目标计算机积极拒绝,无法连接。
Traceback (most recent call last):
File "d:\download\anconda3\lib\site-packages\pyspider\libs\base_handler.py", line 201, in run_task
self._run_func(self.on_result, result, response, task)
File "d:\download\anconda3\lib\site-packages\pyspider\libs\base_handler.py", line 155, in _run_func
ret = function(*arguments[:len(args) - 1])
File "<tripadvisor>", line 43, in on_result
File "<tripadvisor>", line 46, in save_to_mongo
File "d:\download\anconda3\lib\site-packages\pymongo\collection.py", line 2467, in insert
with self._socket_for_writes() as sock_info:
File "d:\download\anconda3\lib\contextlib.py", line 82, in __enter__
return next(self.gen)
File "d:\download\anconda3\lib\site-packages\pymongo\mongo_client.py", line 823, in _get_socket
server = self._get_topology().select_server(selector)
File "d:\download\anconda3\lib\site-packages\pymongo\topology.py", line 214, in select_server
address))
File "d:\download\anconda3\lib\site-packages\pymongo\topology.py", line 189, in select_servers
self._error_message(selector))
pymongo.errors.ServerSelectionTimeoutError: localhost:27017: [WinError 10061] 由于目标计算机积极拒绝,无法连接。
以下是代码部分,
from pyspider.libs.base_handler import *
import pymongo
class Handler(BaseHandler):
crawl_config = {
}
client = pymongo.MongoClient('localhost')
db = client['trip']
@every(minutes=24 * 60)
def on_start(self):
self.crawl('https://www.tripadvisor.cn/Attractions-g186338-Activities-c47-t163-London_England.html', callback=self.index_page)
@config(age=10 * 24 * 60 * 60)
def index_page(self, response):
for each in response.doc('.listing_title > a').items():
self.crawl(each.attr.href, callback=self.detail_page)
@config(priority=2)
def detail_page(self, response):
url = response.url
name = response.doc('.heading_title').text()
rating = response.doc('div > .more').text()
address = response.doc('.address.colCnt2').text()
phone = response.doc('.headerBL .phone > span').text()
return {
'url':url,
'name':name,
'rating':rating,
'address':address,
'phone':phone
}
def on_result(self, result):
if result:
self.save_to_mongo(result)
def save_to_mongo(self, result):
if self.db['London'].insert(result):
print ('save to mongo', result)[/code]
|
|