Python Selenium的使用
代码内容
废话少说,先上代码块,复制粘贴可以直接使用的那种
cmd_port = '请打开cmd,并输入以下内容,调开浏览器 \n
chrome.exe --remote-debugging-port=9222 --user-data-dir=“E:\IT\PyDOC\selenum\AutomationProfile”\n'
print(cmd_port)
import os
import requests
import fake_useragent
import os
import threading
import time
import random
def down_parse(main_url='', image_urls=''):
url_name = main_url.split('/')[-1]
url_name = url_name.split('.')[0]
# input(book_name)
# co_name = url.split('/')[-5] + '-' + url.split('/')[-4] + '-' + url.split('/')[-3]
# co_name = url.split('/')[-5] + '-' + url.split('/')[-4]
file_path = os.path.join('E:\图片', 'trade', url_name)
if os.path.exists(file_path):
print(file_path, '即将存在到这里')
else:
print(file_path, '不存在,现在创建文件夹路径, 然后保存到这里')
os.makedirs(file_path)
# input()
c = 100
for url in image_urls:
c = c + 1
image_id = (url.split('/')[-1]).split('.')[0]
image_path = file_path + '/img-' + str(c) + '-' + image_id + '.jpg'
# down_seka_book(url=url, image_path=image_path)
try:
t = threading.Thread(target=save_img, args=(url, image_path))
t.start()
except Exception as exp:
print(exp)
if c == 103:
time.sleep(0.7)
os.system(f'explorer {file_path}')
elif c > 103:
time.sleep(0.3)
def save_img(url, image_path):
ua = fake_useragent.UserAgent()
ua = ua.random
img_response = requests.get(url=url, headers={"User-Agent": ua})
with open(image_path, 'wb') as file:
file.write(img_response.content)
# import pyautogui
import time
import selenium
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_experimental_option("debuggerAddress", '127.0.0.1:9222')
driver = selenium.webdriver.Chrome(options=options)
# 等待设置
WebDriverWait(driver, 3, 0.3).until(EC.presence_of_all_elements_located((By.XPATH, '//head/title')))
# python selenium 控制已经打开的窗口
def web(url=''): # python selenium 控制已经打开的窗口
# python selenium 控制已经打开的窗口
print(driver.title, '\n\n')
# 浏览器标签切换 当前标签 0, 最新标签 -1, 1
driver.switch_to.window(driver.window_handles[0]) # 锁定在当前的窗口加载
driver.get(url)
# driver.implicitly_wait(3) # 隐式等待:设置了一个最长等待时间,如果在规定时间内网页加载完成,则执行下一步,否则一直等到时间截止,然后执行下一步
# WebDriverWait(driver, 3, 0.5).until(EC.presence_of_all_elements_located((By.XPATH, '//head/title')))
# WebDriverWait(driver, 3).until(lambda x: x.find_element_by_xpath('//head/title'))
# WebDriverWait(driver, 3)
print(driver.title)
for i in range(1, 20, 1):
# print(i)
# pyautogui.hotkey('pagedown') # 翻页滚动
driver.execute_script('window.scrollBy(0,2000)') # 翻页滚动,实现加载动态图片
time.sleep(0.2) # 强制等待
# 下载阿里巴巴国际站图片
def web_img_alibaba(main_url=''):
case = driver.find_element(By.ID, "block-tab-product")
imgs = case.find_elements(By.TAG_NAME, 'img')
# imgs = driver.find_elements(By.TAG_NAME, 'img')
img_urls = []
for img in imgs:
img_url = img.get_attribute("src").split('?')[0]
img_urls.append(img_url)
# time.sleep(0.3)
# print(img_url)
down_parse(main_url=main_url, image_urls=img_urls)
# input('waiting\n')
# 上传图片等文件
def web_upload(file_path): # 上传图片等文件
driver.find_element(By.CLASS_NAME, 'sttb').click()
time.sleep(2)
driver.find_element(By.ID, 'stfile').send_keys(file_path)
time.sleep(2)
pass
# 图片加水印
def img_water_mark(img_path):
"""
教程:
:param img_path:
:return:
"""
pass
if __name__ == "__main__":
print(urls)
url = input('请输入要下载图片的链接\n\n')
web(url)
web_img_alibaba(url)
# input('waiting\n')
file_path = 'E:\图片/trade\Free-Shipping-USB-2-0-to_60822096819/img-103-HTB13EkcJ4SYBuNjSsphq6zGvVXaS.jpg'
# web_upload(file_path)
知识汇总
urls = “””test urls
https://image.baidu.com/
https://www.alibaba.com/product-detail/Custom-Logo-High-Quality-Colorblocking-Pullover_1600557026124.html?
“””
“””
教程 selenium上传图片:
https://blog.csdn.net/yuxuan6699/article/details/81331625?
教程 pywinauto教程 多图上传
https://blog.csdn.net/fallenjency/article/details/118573156?
教程 AutoIt Windows Info
https://blog.csdn.net/weixin_42763696/article/details/105781884?
“””
“””
selenium 下载图片教程
https://blog.csdn.net/qq_47733923/article/details/124963682?
selenium 选择器
https://blog.csdn.net/weixin_56349063/article/details/121798449?
“””
“””
# url = ‘https://httpbin.org/get’
# url = ‘https://www.alibaba.com/product-detail/OA-3D-rose-dress-flower-girls_1600194694686.html
# url = ‘https://www.alibaba.com/product-detail/OA-60-Days-OEM-ODM-Hot_1600247501688.html
# url = input(‘请输入产品链接 \n’)
pyautogui 教程
https://blog.csdn.net/weixin_41659822/article/details/117406950?
https://blog.csdn.net/wblylh/article/details/114533120?
https://blog.csdn.net/weixin_41659822/article/details/117406950?
企查查爬取
https://blog.csdn.net/luluzsa/article/details/124333480?
selenium 标签控制
https://blog.csdn.net/xhscxj/article/details/124628926?
selenium 等待时间
https://blog.csdn.net/caicau/article/details/120741971?
selenium 翻页滚动
https://blog.csdn.net/weixin_42205056/article/details/121381228
selenium 综合教程
https://zmeng.blog.csdn.net/article/details/115395331
“””
# chrome.exe –remote-debugging-port=9222 –user-data-dir=“E:\IT\PyDOC\selenum\AutomationProfile”
评论效果
第一次写代码相关的内容,不懂,请多多谅解
测试回复