python中requests發送請求後,沒有辦法執行裡面的javascript代碼,因此有很多信息會爬取不到。這裡有個自動化測試的工具selenium,可以模擬網頁打開的過程。使用pip install即可安裝。
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("start-maximized")
# 這裡要去官網上下載chromedriver
driver = webdriver.Chrome(executable_path =..., options = options)
# 靜待5秒鐘,讓網頁加載完畢
driver.implicitly_wait(5)
driver.get("https://www.jst-purple.com.cn/purple/index.php#searchProduct")
for page in range(1,247):
# 點擊下一頁按鈕
next_page = WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.LINK_TEXT, '下一頁')))
next_page.click()
element = driver.find_element_by_id('div_content_sub')
for i in element.text.split('\n'):
......