#coding:utf-8import requests,pyquery,urllibimport redef get_html(url): html=urllib.urlopen(url).read() return htmldef get_image(html): reg=r'src="(.*?\.jpg)"' #html=requests.get(url).read() result=re.compile(reg) aim=result.findall(html) i=0 for imgurl in aim: urllib.urlretrieve(imgurl,'%s.jpg'%i) i+=1html=get_html('http://tieba.baidu.com/p/2166231880')print get_image(html)#coding:UTF-8from bs4 import BeautifulSoupimport requestsimport urllibimport reimport osurl='http://www.qiubaichengren.com/'def get_html(url): html=requests.get(url).text return htmldef get_image(url): result=re.compile(r'src="(.*.jpg)"') i=0 for n in range(1,679): page_url=url+'%d.html'%n html=get_html(page_url) aim=result.findall(html) for img_url in aim: urllib.urlretrieve(img_url,"/home/vincebye/Pictures/%s.jpg"%i) print '\033[0;36;40m' print "正在下载第"+str(i)+"张图片,请稍等......." i+=1 print '\033[0m' if __name__ == '__main__': get_image(url)
耗时:1小时30分钟
错误原因: IndentationError: unexpected indent --Python及其要求代码格式对齐
request对象没有read()方法
是将read之后的网页拿去正则匹配
正则文法中()里面是我们匹配的目的,括号之外是描述词