采集内容常需要得到网页返回的验证码做进一步处理
下面代码是用python写的用来获取网页http状态码的脚本
#!/usr/bin/python # -*- coding: utf-8 -*- #encoding=utf-8 #Filename:states_code.py import urllib2 url = 'http://www.bitsCN.com/' response = None try: response = urllib2.urlopen(url,timeout=5) except urllib2.URLError as e: if hasattr(e, 'code'): print 'Error code:',e.code elif hasattr(e, 'reason'): print 'Reason:',e.reason finally: if response: response.close()