Python爬虫 Day 3

 2023-09-19 阅读 28 评论 0

摘要:一.Selenium剩余部分1.元素交互操作 - 点击、清除 click clear - ActionChains 是一个动作链对象,需要把driver驱动传给它 动作链对象可以操作一系列设定好的动作 - frame的切换 - 执行js代码 ''' 点击、清除 ''' from selenium import webdr
一.Selenium剩余部分
1.元素交互操作
- 点击、清除
click
clear

- ActionChains
是一个动作链对象,需要把driver驱动传给它
动作链对象可以操作一系列设定好的动作

- frame的切换

- 执行js代码

'''
点击、清除
'''
from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver import ActionChains  # 破解滑动验证码的时候用的 可以拖动图片
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import timedriver = webdriver.Chrome()
try:driver.implicitly_wait(10)driver.get('https://www.jd.com/')# 点击、清除input = driver.find_element_by_id('key')input.send_keys('围城')# 通过class查找搜索按钮search = driver.find_element_by_class_name('button')search.click() #点击搜索按钮
time.sleep(3)input2 = driver.find_element_by_id('key')input2.clear()   #清空输入框
time.sleep(1)input2.send_keys('墨菲定律')input2.send_keys(Keys.ENTER)time.sleep(10)finally:driver.close()'''
ActionChains
秒移
'''
from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver import ActionChains  # 破解滑动验证码的时候用的 可以拖动图片
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import timedriver = webdriver.Chrome()
try:driver.implicitly_wait(10)driver.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')time.sleep(5)# 遗弃方法# driver.switch_to_frame()# 新方法driver.switch_to.frame('iframeResult')time.sleep(1)#获取动作链对象action = ActionChains(driver)#起始方块id:draggablesource = driver.find_element_by_id('draggable')#目标方块id:droppabletarget = driver.find_element_by_id('droppable')#方式一、秒移#起始方块瞬间移动到目标方块中#拟定好一个动作,需要调用执行方法
    action.drag_and_drop(source,target).perform()time.sleep(10)finally:driver.close()'''
ActionChains
一点一点移动
'''
from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver import ActionChains  # 破解滑动验证码的时候用的 可以拖动图片
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import timedriver = webdriver.Chrome()
try:driver.implicitly_wait(10)driver.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')time.sleep(5)# 遗弃方法# driver.switch_to_frame()# 新方法driver.switch_to.frame('iframeResult')time.sleep(1)#起始方块id:draggablesource = driver.find_element_by_id('draggable')#目标方块id:droppabletarget = driver.find_element_by_id('droppable')print(source.size)     #大小print(source.tag_name)    #标签名print(source.text)      #文本print(source.location)    #坐标:x与y轴# 找到滑动距离distance = target.location['x'] - source.location['x']ActionChains(driver).click_and_hold(source).perform()#方式二、一点一点移动s=0while s<distance:#获取动作链对象# 每一次位移s距离ActionChains(driver).move_by_offset(xoffset=2,yoffset=0).perform()s += 2time.sleep(0.1)# 松开起始滑块
    ActionChains(driver).release().perform()time.sleep(10)finally:driver.close()'''
执行js代码
'''
from selenium import webdriver  # 用来驱动浏览器的
import timedriver = webdriver.Chrome()
try:driver.implicitly_wait(10)driver.get('https://www.baidu.com/')driver.execute_script('''alert("浙江万里学院是浙江最牛逼的学院")''')time.sleep(5)finally:driver.close()

其他操作

# 模拟浏览器的前进后退
import time
from selenium import webdriverbrowser=webdriver.Chrome()
browser.get('https://www.baidu.com')
browser.get('https://www.taobao.com')
browser.get('http://www.sina.com.cn/')# 回退
browser.back()
time.sleep(5)
# 前进
browser.forward()
time.sleep(3)
browser.close()

爬取京东商品信息

'''
初级版'''
from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import timedriver = webdriver.Chrome()try:driver.implicitly_wait(10)#往京东发送请求driver.get('https://www.jd.com/')input_tag = driver.find_element_by_id('key')input_tag.send_keys('墨菲定律')input_tag.send_keys(Keys.ENTER)time.sleep(5)good_list = driver.find_elements_by_class_name('gl-item')for good in good_list:print(good)# 商品名称good_name = good.find_element_by_css_selector('.p-name em').textprint(good_name)#商品链接good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href')print(good_url)#商品价格good_price = good.find_element_by_class_name('p-price').textprint(good_price)#商品评价good_commit = good.find_element_by_class_name('p-commit').textprint(good_commit)good_content = '''num:{}商品名称:{}商品链接:{}商品价格:{}商品评价:{}\n'''.format(num,good_name,good_url,good_price,good_commit)print(good_content)with open('jd.txt','a',encoding='utf-8') as f:f.write(good_content)print('商品信息写入成功!')finally:driver.close()
'''
中级版'''
from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import timedriver = webdriver.Chrome()num = 1
try:driver.implicitly_wait(10)#往京东发送请求driver.get('https://www.jd.com/')input_tag = driver.find_element_by_id('key')input_tag.send_keys('墨菲定律')input_tag.send_keys(Keys.ENTER)time.sleep(5)# 下拉滑动5000pxjs_code = '''window.scrollTo(0,5000)'''driver.execute_script(js_code)# 等待商品加载5stime.sleep(5)good_list = driver.find_elements_by_class_name('gl-item')for good in good_list:print(good)# 商品名称good_name = good.find_element_by_css_selector('.p-name em').textprint(good_name)#商品链接good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href')print(good_url)#商品价格good_price = good.find_element_by_class_name('p-price').textprint(good_price)#商品评价good_commit = good.find_element_by_class_name('p-commit').textprint(good_commit)good_content = '''num:{}商品名称:{}商品链接:{}商品价格:{}商品评价:{}\n'''.format(num,good_name,good_url,good_price,good_commit)print(good_content)with open('jd.txt','a',encoding='utf-8') as f:f.write(good_content)num += 1print('商品信息写入成功!')# 找到下一页并点击next_tag = driver.find_elements_by_class_name('pn-next')next_tag.click()time.sleep(10)finally:driver.close()
'''
狂暴版'''
from selenium import webdriver  # 用来驱动浏览器的
from selenium.webdriver.common.keys import Keys  # 键盘按键操作
import timedef get_good(driver):num = 1try:time.sleep(5)# 下拉滑动5000pxjs_code = '''window.scrollTo(0,5000)'''driver.execute_script(js_code)# 等待商品加载5stime.sleep(5)good_list = driver.find_elements_by_class_name('gl-item')for good in good_list:# 商品名称good_name = good.find_element_by_css_selector('.p-name em').text#商品链接good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href')#商品价格good_price = good.find_element_by_class_name('p-price').text#商品评价good_commit = good.find_element_by_class_name('p-commit').textgood_content = '''num:{}商品名称:{}商品链接:{}商品价格:{}商品评价:{}\n'''.format(num,good_name,good_url,good_price,good_commit)print(good_content)with open('jd.txt','a',encoding='utf-8') as f:f.write(good_content)num += 1print('商品信息写入成功!')# 找到下一页并点击next_tag = driver.find_element_by_class_name('pn-next')next_tag.click()time.sleep(5)# 递归调用函数本身
        get_good(driver)finally:driver.close()if __name__ == '__main__':driver = webdriver.Chrome()try:driver.implicitly_wait(10)#往京东发送请求driver.get('https://www.jd.com/')# 往京东主页输入框输入墨菲定律,按回车键input_tag = driver.find_element_by_id('key')input_tag.send_keys('墨菲定律')input_tag.send_keys(Keys.ENTER)# 调用获取商品信息函数
        get_good(driver)finally:driver.close()

 

python 爬虫库。转载于:https://www.cnblogs.com/merliah/p/11125388.html

版权声明:本站所有资料均为网友推荐收集整理而来,仅供学习和研究交流使用。

原文链接:https://hbdhgg.com/4/78056.html

发表评论:

本站为非赢利网站,部分文章来源或改编自互联网及其他公众平台,主要目的在于分享信息,版权归原作者所有,内容仅供读者参考,如有侵权请联系我们删除!

Copyright © 2022 匯編語言學習筆記 Inc. 保留所有权利。

底部版权信息