新手刚开始学爬虫,不知道问题出在哪,requests已经是最新版本,用get获取公司内部网页不会报错,是否是防火墙的问题?
安装了Anaconda3后,在Jupyter notebook里写入如下代码:
import requests
res = requests.get('http://www.sina.com.cn/china')
res.encoding='utf-8'
print(res.text)
ConnectionRefusedError Traceback (most recent call last)
D:Anaconda3libsite-packagesrequestspackagesurllib3connection.py in _new_conn(self)
140 conn = connection.create_connection(
--> 141 (self.host, self.port), self.timeout, **extra_kw)
142
D:Anaconda3libsite-packagesrequestspackagesurllib3utilconnection.py in create_connection(address, timeout, source_address, socket_options)
82 if err is not None:
---> 83 raise err
84
D:Anaconda3libsite-packagesrequestspackagesurllib3utilconnection.py in create_connection(address, timeout, source_address, socket_options)
72 sock.bind(source_address)
---> 73 sock.connect(sa)
74 return sock
ConnectionRefusedError: [WinError 10061] No connection could be made because the target machine actively refused it
During handling of the above exception, another exception occurred:
NewConnectionError Traceback (most recent call last)
D:Anaconda3libsite-packagesrequestspackagesurllib3connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601
D:Anaconda3libsite-packagesrequestspackagesurllib3connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
355 else:
--> 356 conn.request(method, url, **httplib_request_kw)
357
D:Anaconda3libhttpclient.py in request(self, method, url, body, headers, encode_chunked)
1238 """Send a complete request to the server."""
-> 1239 self._send_request(method, url, body, headers, encode_chunked)
1240
D:Anaconda3libhttpclient.py in _send_request(self, method, url, body, headers, encode_chunked)
1284 body = _encode(body, 'body')
-> 1285 self.endheaders(body, encode_chunked=encode_chunked)
1286
D:Anaconda3libhttpclient.py in endheaders(self, message_body, encode_chunked)
1233 raise CannotSendHeader()
-> 1234 self._send_output(message_body, encode_chunked=encode_chunked)
1235
D:Anaconda3libhttpclient.py in _send_output(self, message_body, encode_chunked)
1025 del self._buffer[:]
-> 1026 self.send(msg)
1027
D:Anaconda3libhttpclient.py in send(self, data)
963 if self.auto_open:
--> 964 self.connect()
965 else:
D:Anaconda3libsite-packagesrequestspackagesurllib3connection.py in connect(self)
165 def connect(self):
--> 166 conn = self._new_conn()
167 self._prepare_conn(conn)
D:Anaconda3libsite-packagesrequestspackagesurllib3connection.py in _new_conn(self)
149 raise NewConnectionError(
--> 150 self, "Failed to establish a new connection: %s" % e)
151
NewConnectionError: <requests.packages.urllib3.connection.HTTPConnection object at 0x00000000065376A0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
D:Anaconda3libsite-packagesrequestsadapters.py in send(self, request, stream, timeout, verify, cert, proxies)
422 retries=self.max_retries,
--> 423 timeout=timeout
424 )
D:Anaconda3libsite-packagesrequestspackagesurllib3connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
648 retries = retries.increment(method, url, error=e, _pool=self,
--> 649 _stacktrace=sys.exc_info()[2])
650 retries.sleep()
D:Anaconda3libsite-packagesrequestspackagesurllib3utilretry.py in increment(self, method, url, response, error, _pool, _stacktrace)
375 if new_retry.is_exhausted():
--> 376 raise MaxRetryError(_pool, url, error or ResponseError(cause))
377
MaxRetryError: HTTPConnectionPool(host='www.sina.com.cn', port=80): Max retries exceeded with url: /china (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x00000000065376A0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it',))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
<ipython-input-12-03b138755a7e> in <module>()
1 import requests
----> 2 res = requests.get('http://www.sina.com.cn/china')
3 res.encoding='utf-8'
4 print(res.text)
D:Anaconda3libsite-packagesrequestsapi.py in get(url, params, **kwargs)
68
69 kwargs.setdefault('allow_redirects', True)
---> 70 return request('get', url, params=params, **kwargs)
71
72
D:Anaconda3libsite-packagesrequestsapi.py in request(method, url, **kwargs)
54 # cases, and look like a memory leak in others.
55 with sessions.Session() as session:
---> 56 return session.request(method=method, url=url, **kwargs)
57
58
D:Anaconda3libsite-packagesrequestssessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
486 }
487 send_kwargs.update(settings)
--> 488 resp = self.send(prep, **send_kwargs)
489
490 return resp
D:Anaconda3libsite-packagesrequestssessions.py in send(self, request, **kwargs)
607
608 # Send the request
--> 609 r = adapter.send(request, **kwargs)
610
611 # Total elapsed time of the request (approximately)
D:Anaconda3libsite-packagesrequestsadapters.py in send(self, request, stream, timeout, verify, cert, proxies)
485 raise ProxyError(e, request=request)
486
--> 487 raise ConnectionError(e, request=request)
488
489 except ClosedPoolError as e:
ConnectionError: HTTPConnectionPool(host='www.sina.com.cn', port=80): Max retries exceeded with url: /china (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x00000000065376A0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it',))
If the connection fails to be established, it may be that your IP is in the blacklist, it may be that you have violated the robot.txt, or it may be that your UA is not allowed to establish a connection.
Judging from the error message, access is denied.
It is recommended to use official Python and not use third-party installation packages.