About the use of Scrapy under Python3
import re
import scrapy
from bs4 import BeautifulSoup
from scrapy.http import Request
from ..items import ZhibobaItem
import json
import lxml.html
import requests
import json
class Myspider(scrapy.Spider):
name = 'zhiboba'
allowed_domains = ['zhibo8.cc']
json_url = 'https://bifen4pc.qiumibao.com/json/list.htm?85591'
bash_url = 'https://www.zhibo8.cc/'
def start_requests(self):
yield Request(self.bash_url, self.parse_index)
def parse_index(self, response):
print("enter the parse_index")
print(self.bash_url)
ps = BeautifulSoup(response.text, 'lxml').find_all(label=re.compile("足球"))
item = ZhibobaItem()
for single_p in ps:
item['label'] = single_p.get('label')
item['sdate'] = single_p.get('data-time')
item['linkurl'] = self.bash_url + single_p.find('a')['href']
home_team = single_p.get_text().split()[2]
item['home_team'] = home_team
visit_team = single_p.get_text().split()[4]
item['visit_team'] = visit_team
print("quit the parse_index")
print(self.json_url)
yield Request(self.json_url, callback=self.get_score, meta={'home_team': home_team,
'visit_team': visit_team
})
def get_score(self, response):
print("enter the get_score")
json_url = self.json_url
wbdata = response.get(json_url).text
data = json.loads(wbdata)
news = data['list']
print(wbdata)
print("quit the get_score")
When I execute the above code, I cannot successfully call json_url and the corresponding response function get_score. What's wrong?
Try to modify
allow_domains = []
.