Python爬虫-抓取手机APP数据-Tutoriel Python-php.cn

Python爬虫-抓取手机APP数据

大家讲道理

Libérer： 2016-11-09 11:18:42

original

2537 Les gens l'ont consulté

抓取超级课程表话题数据。

#!/usr/local/bin/python2.7
# -*- coding: utf8 -*-
"""
  超级课程表话题抓取
"""
import urllib2
from cookielib import CookieJar
import json
 
 
&#39;&#39;&#39; 读Json数据 &#39;&#39;&#39;
def fetch_data(json_data):
    data = json_data[&#39;data&#39;]
    timestampLong = data[&#39;timestampLong&#39;]
    messageBO = data[&#39;messageBOs&#39;]
    topicList = []
    for each in messageBO:
        topicDict = {}
        if each.get(&#39;content&#39;, False):
            topicDict[&#39;content&#39;] = each[&#39;content&#39;]
            topicDict[&#39;schoolName&#39;] = each[&#39;schoolName&#39;]
            topicDict[&#39;messageId&#39;] = each[&#39;messageId&#39;]
            topicDict[&#39;gender&#39;] = each[&#39;studentBO&#39;][&#39;gender&#39;]
            topicDict[&#39;time&#39;] = each[&#39;issueTime&#39;]
            print each[&#39;schoolName&#39;],each[&#39;content&#39;]
            topicList.append(topicDict)
    return timestampLong, topicList
 
 
&#39;&#39;&#39; 加载更多 &#39;&#39;&#39;
def load(timestamp, headers, url):
    headers[&#39;Content-Length&#39;] = &#39;159&#39;
    loadData = &#39;timestamp=%s&phoneBrand=Meizu&platform=1&genderType=-1&topicId=19&phoneVersion=16&selectType=3&channel=MXMarket&phoneModel=M040&versionNumber=7.2.1&&#39; % timestamp
    req = urllib2.Request(url, loadData, headers)
    loadResult = opener.open(req).read()
    loginStatus = json.loads(loadResult).get(&#39;status&#39;, False)
    if loginStatus == 1:
        print &#39;load successful!&#39;
        timestamp, topicList = fetch_data(json.loads(loadResult))
        load(timestamp, headers, url)
    else:
        print &#39;load fail&#39;
        print loadResult
        return False
 
loginUrl = &#39;http://120.55.151.61/V2/StudentSkip/loginCheckV4.action&#39;
topicUrl = &#39;http://120.55.151.61/V2/Treehole/Message/getMessageByTopicIdV3.action&#39;
headers = {
    &#39;Content-Type&#39;: &#39;application/x-www-form-urlencoded; charset=UTF-8&#39;,
    &#39;User-Agent&#39;: &#39;Dalvik/1.6.0 (Linux; U; Android 4.1.1; M040 Build/JRO03H)&#39;,
    &#39;Host&#39;: &#39;120.55.151.61&#39;,
    &#39;Connection&#39;: &#39;Keep-Alive&#39;,
    &#39;Accept-Encoding&#39;: &#39;gzip&#39;,
    &#39;Content-Length&#39;: &#39;207&#39;,
    }
 
&#39;&#39;&#39; ---登录部分--- &#39;&#39;&#39;
loginData = &#39;phoneBrand=Meizu&platform=1&deviceCode=868033014919494&account=FCF030E1F2F6341C1C93BE5BBC422A3D&phoneVersion=16&password=A55B48BB75C79200379D82A18C5F47D6&channel=MXMarket&phoneModel=M040&versionNumber=7.2.1&&#39;
cookieJar = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookieJar))
req = urllib2.Request(loginUrl, loginData, headers)
loginResult = opener.open(req).read()
loginStatus = json.loads(loginResult).get(&#39;data&#39;, False)
if loginResult:
    print &#39;login successful!&#39;
else:
    print &#39;login fail&#39;
    print loginResult
 
&#39;&#39;&#39; ---获取话题--- &#39;&#39;&#39;
topicData = &#39;timestamp=0&phoneBrand=Meizu&platform=1&genderType=-1&topicId=19&phoneVersion=16&selectType=3&channel=MXMarket&phoneModel=M040&versionNumber=7.2.1&&#39;
headers[&#39;Content-Length&#39;] = &#39;147&#39;
topicRequest = urllib2.Request(topicUrl, topicData, headers)
topicHtml = opener.open(topicRequest).read()
topicJson = json.loads(topicHtml)
topicStatus = topicJson.get(&#39;status&#39;, False)
print topicJson
if topicStatus == 1:
    print &#39;fetch topic success!&#39;
    timestamp, topicList = fetch_data(topicJson)
    data = load(timestamp, headers, topicUrl)
    if data:
        timestamp, topicList = fetch_data(data)

Copier après la connexion