首页 > 后端开发 > Python教程 > 使用python爬虫模拟12306登录方法

使用python爬虫模拟12306登录方法

高洛峰
发布: 2017-03-22 13:08:35
原创
3220 人浏览过

试了好久登录的时候总是显示:系统忙,请刷新,,,太折磨人了,搞了半天才想到是请求头部的问题.....

验证码还是要人工识图..

#!/bin/env python
# -*- coding=utf-8 -*-
import ssl
import sys
import urllib2
import random
import httplib
import json
from cookielib import LWPCookieJar
import urllib
import re
import getpass
 
reload(sys)
sys.setdefaultencoding('UTF8')
cookiejar = LWPCookieJar()
cookiesuppor = urllib2.HTTPCookieProcessor(cookiejar)
opener = urllib2.build_opener(cookiesuppor, urllib2.HTTPHandler)
urllib2.install_opener(opener)
ssl._create_default_https_context = ssl._create_unverified_context
codeimg = 'https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&%s' % random.random()
 
baner = """
##################################
    12306登录脚本,作者Mr RJL
    python版本:2.7,适用于linux
    验证码输入方式:
    输入问题对应的图片序号,1-8;
    多个以','分隔.如:1,2,3
##################################
"""
def get(url):
    try:
        request = urllib2.Request(url=url)
        # req.add_header('User-Agent', 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0')
        request.add_header("Content-Type", "application/x-www-form-urlencoded; charset=utf-8")
        request.add_header('X-Requested-With', 'xmlHttpRequest')
        request.add_header('User-Agent',
                           'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36')
        request.add_header('Referer', 'https://kyfw.12306.cn/otn/login/init')
        request.add_header('Accept', '*/*')
        result = urllib2.urlopen(request).read()
        assert isinstance(result, object)
        return result
    except httplib.error as e:
        print e
        pass
    except urllib2.URLError as e:
        print e
        pass
    except urllib2.HTTPBasicAuthHandler, urllib2.HTTPError:
        print 'error'
        pass
 
 
def Post(url, data):
    try:
        request = urllib2.Request(url=url, data=urllib.urlencode(data))
        # req.add_header('User-Agent', 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0')
        # request = urllib2.Request(ajax_url, urllib.urlencode(dc))
        request.add_header("Content-Type", "application/x-www-form-urlencoded; charset=utf-8")
        request.add_header('X-Requested-With', 'xmlHttpRequest')
        request.add_header('User-Agent',
                           'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36')
        request.add_header('Referer', 'https://kyfw.12306.cn/otn/login/init')
        request.add_header('Accept', '*/*')
        # request.add_header('Accept-Encoding', 'gzip, deflate')
        result = urllib2.urlopen(request).read()
        return result
    except httplib.error as e:
        return e
    except urllib2.URLError as e:
        return e
    except urllib2.HTTPBasicAuthHandler, urllib2.HTTPError:
        return 'error'
 
 
def cookietp():
    stoidinput("获取Cookie")
    Url = "https://kyfw.12306.cn/otn/login/init"
    get(Url)
    for index, c in enumerate(cookiejar):
        stoidinput(c)
 
 
def getImg():
    stoidinput("下载验证码...")
    result = get(codeimg)
    try:
        if open('/tmp/tkcode', 'wb').write(result) :
            import os
            os.system("oeg /tmp/tkcode &")
        else:
            return False
    except OSError as e:
        print e
        pass
 
 
def stoidinput(text):
    """
    正常信息输出
    :param text: 
    :return: 
    """
    print "\033[34m[*]\033[0m %s " % text
 
 
def errorinput(text):
    """
    错误信息输出
    :param text: 
    :return: 
    """
    print "\033[32m[!]\033[0m %s " % text
    return False
 
 
def codexy():
    """
    获取验证码
    :return: str
    """
     
    Ofset = raw_input("[*] 请输入验证码: ")
    select = Ofset.split(',')
    global randCode
    post = []
    offsetsX = 0  # 选择的答案的left值,通过浏览器点击8个小图的中点得到的,这样基本没问题
    offsetsY = 0  # 选择的答案的top值
    for ofset in select:
        if ofset == '1':
            offsetsY = 46
            offsetsX = 42
        elif ofset == '2':
            offsetsY = 46
            offsetsX = 105
        elif ofset == '3':
            offsetsY = 45
            offsetsX = 184
        elif ofset == '4':
            offsetsY = 48
            offsetsX = 256
        elif ofset == '5':
            offsetsY = 36
            offsetsX = 117
        elif ofset == '6':
            offsetsY = 112
            offsetsX = 115
        elif ofset == '7':
            offsetsY = 114
            offsetsX = 181
        elif ofset == '8':
            offsetsY = 111
            offsetsX = 252
        else:
            pass
        post.append(offsetsX)
        post.append(offsetsY)
    randCode = str(post).replace(']', '').replace('[', '').replace("'", '').replace(' ', '')
 
 
def login(user, passwd):
    randurl = 'https://kyfw.12306.cn/otn/passcodeNew/checkRandCodeAnsyn'
    logurl = 'https://kyfw.12306.cn/otn/login/loginAysnSuggest'
    surl = 'https://kyfw.12306.cn/otn/login/userLogin'
    geturl = 'https://kyfw.12306.cn/otn/index/initMy12306'
    randdata = {
        "randCode": randCode,
        "rand": "sjrand"
    }
    logdata = {
        "loginUserDTO.user_name": user,
        "userDTO.password": passwd,
        "randCode": randCode
    }
    ldata = {
        "_json_att": None
    }
    fresult = json.loads(Post(randurl, randdata), encoding='utf8')
    checkcode = fresult['data']['msg']
    if checkcode == 'FALSE':
        errorinput("验证码有误,请重试")
    else:
        stoidinput("验证码通过,开始登录..")
        try:
            tresult = json.loads(Post(logurl, logdata), encoding='utf8')
            if tresult['data'].__len__() == 0:
                errorinput("登录失败: %s" % tresult['messages'][0])
            else:
 
                stoidinput("登录成功")
                sult = Post(surl, ldata)
                getUserinfo()
        except ValueError as e:
            errorinput(e)
 
 
def getUserinfo():
    """
    登录成功后,显示用户名
    :return:
    """
    url = 'https://kyfw.12306.cn/otn/modifyUser/initQueryUserInfo'
    data = dict(_json_att=None)
    result = Post(url, data)
    userinfo = result
    name = r&#39;<input name="userDTO.loginUserDTO.user_name" style="display:none;" type="text" value="(\S+)" />&#39;
    try:
        stoidinput("欢迎 %s 登录" % re.search(name, result).group(1))
    except AttributeError:
        pass
 
 
def main():
    user = raw_input("[+] 用户名(用户名/邮箱/手机): ")
    passwd = getpass.getpass("[+] 密码: ")
    if user == &#39;&#39; or passwd == &#39;&#39;:
        errorinput("用户名或密码不能为空!")
    else:
        cookietp()
        getImg()
        codexy()
        login(user, passwd)
def logout():
    url = &#39;https://kyfw.12306.cn/otn/login/loginOut&#39;
    result = get(url)
    if result:
        stoidinput("已退出")
    else:
        errorinput("退出失败")
 
if __name__ == "__main__":
    print baner
    main()
    logout()
登录后复制

以上是使用python爬虫模拟12306登录方法的详细内容。更多信息请关注PHP中文网其他相关文章!

相关标签:
来源:php.cn
本站声明
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系admin@php.cn
热门教程
更多>
最新下载
更多>
网站特效
网站源码
网站素材
前端模板