Ini ialah mesej ralat:
Traceback (most recent call last):
File "D:\py\pic_downfrom2255ok.py", line 45, in <module>
html = getHtml(url_all[i])
File "D:\py\pic_downfrom2255ok.py", line 32, in getHtml
html = response.read().decode()
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb3 in position 184: invalid start byte
Banyak yang telah diubah Sebab utama mungkin tapak web sasaran dikodkan dalam gb2312
Program ini boleh memuat turun gambar secara normal di laman web lain, tetapi akan ada masalah apabila beralih ke laman web semasa beberapa nasihat. Saya mencuba beberapa kaedah tetapi tiada yang berjaya Kod sumber adalah seperti berikut:
#coding=utf-8
import urllib.request
from urllib.request import urlopen, urlretrieve
import urllib
import urllib.parse
import re
import os
from bs4 import BeautifulSoup
url_all =[
'http://www.shop2255.com/showpro/2603.html',
'http://www.shop2255.com/showpro/1558.html',
'http://www.shop2255.com/showpro/1564.html',
'http://www.shop2255.com/showpro/2411.html',
'http://www.shop2255.com/showpro/2409.html',
'http://www.shop2255.com/showpro/1561.html',
'http://www.shop2255.com/showpro/2414.html',
'http://www.shop2255.com/showpro/2609.html',
'http://www.shop2255.com/showpro/2413.html',
'http://www.shop2255.com/showpro/2604.html',
'http://www.shop2255.com/showpro/2605.html',
'http://www.shop2255.com/showpro/2606.html',
'http://www.shop2255.com/showpro/2608.html',
'http://www.shop2255.com/showpro/2607.html',
'http://www.shop2255.com/showpro/2610.html']
def getHtml(url):
response = urlopen(url)
html = response.read().decode("gbk")
return html
def getImg(html):
reg = 'src="(.+?\.jpg)"'
imgre = re.compile(reg)
imglist = re.findall(imgre,html)
return imglist
for i in range(len(url_all)):
html = getHtml(url_all[i])
list=getImg(html.decode())
x = 0
for imgurl in list:
print(x)
file_path = url_all[i]
(filepath,tempfilename) = os.path.split(file_path)
(filename,extension) = os.path.splitext(tempfilename)
if not os.path.exists('d:\%s' % filename):
os.mkdir('d:\%s' % filename)
# os.mkdir('D:\%s' % filename2)
local=r'D:\%s\%s.jpg' % (filename,imgurl.splite("/")[-1])
urllib.request.urlretrieve(imgurl,local)
x+=1
print("done")
Pertama, dalam kod anda
local=r'D:%s%s.jpg' % (nama fail,imgurl.splite("/")[-1])
split ditulis sebagai
splite
local=r'D:%s%s.jpg' % (filename,imgurl.splite("/")[-1])
中split
写成了splite
.还有
Jugaurllib.request.urlretrieve(imgurl,local)
这个imgurl
不是一个合法的url,只是一个相对 url, 要改成绝对 url,需要加上
base_url = 'http://www.shop2255.com/'
urllib.request.urlretrieve(imgurl,local)
imgurl
ini bukan urlsah, hanya url relatif, Kepada tukar kepada url mutlak, anda perlu menambah
base_url = 'http://www.shop2255.com/'