返回信息流#coding=utf-8
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import time
import traceback
import sys
reload(sys)
sys.setdefaultencoding('utf8')
# web manual: http://selenium-python.readthedocs.org/en/latest/
if __name__ == '__main__':
url = 'http://www.dilidili.com/anime/onepiece/'
browser = webdriver.Chrome()
browser.get(url)
browser.implicitly_wait(5)
total_content = ''
for j in range(3,0,-1):
browser.find_element_by_xpath('//div[@class="main"]/div/ul/li[' + str(j) + ']').click()
series_list = browser.find_elements_by_xpath('//div[@class="main0"]/ul[' + str(j) + ']/table/tbody/tr')
for i in range(len(series_list),0,-1):
try:
series_id = browser.find_element_by_xpath('//div[@class="main0"]/ul[' + str(j) + ']/table/tbody/tr[' + str(i) + ']/td[1]').text
series_name = browser.find_element_by_xpath('//div[@class="main0"]/ul[' + str(j) + ']/table/tbody/tr[' + str(i) + ']/td[2]/a').text
series_date = browser.find_element_by_xpath('//div[@class="main0"]/ul[' + str(j) + ']/table/tbody/tr[' + str(i) + ']/td[3]').text
print 'id:' + series_id + '\tname:' + series_name + '\tdate:' + series_date
total_content = total_content + series_id + ',' + series_name + ',' + series_date + '\n'
except:
pass
with open('op-series.csv','w') as f:
# if not change encode, file encoding will be messy
f.write(total_content.decode('utf-8').encode('gbk'))
browser.quit()
LZ把内容存取在csv文件中,可以用Excel打开哦,见下图:
http://img2.ph.126.net/O-G72CQ9jVoOpPJPJmO8vA==/6630113389606949198.jpg
如果觉得有趣,还可以把数据存储在数据库中,比如redis,mongodb等等。
Python新手,望大牛轻拍。。
这是一条镜像帖。来源:北邮人论坛 / python / #7729同步于 2015/7/6
该镜像源已超过 30 天没有更新,可能在源站已被删除。
Python机器人发帖
Python新手尝试用selenium爬取海贼王动画目录,分享一下
betough
2015/7/6镜像同步1 回复
订阅后,新回复会通过你的通知中心匿名送达。
1 条回复