Use of xpath:
Note: Install the xpath plugin in advance
pip install lxml ‐i https://pypi.douban.com/simple
from lxml import etree
html_tree = etree.parse('XX.html')
html_tree = etree.HTML(response.read().decode('utf-8')
Path query
//: Find all descendant nodes, regardless of hierarchy
/ : find direct child nodes
Predicate query
//div[@id]
//div[@id="maincontent"]
Property query
//@class
Fuzzy query
//div[contains(@id, "he")]
//div[starts‐with(@id, "he")]
Content query
//div/h1/text()
Logical operations
//div[@id="head" and @class="s_down"]
//title | //pric
from lxml import etree#xpath parsing# 1. Local file# 2. Server response data response.read().decode('utf-8') etree.HTMLtree=etree.parse('1.xpath's basic use.html')#tree.xpath('xpath path')#find the li under the url#li_list=tree.xpath('//body//li')#Find the Li tags of all id attributes#li_list=tree.xpath('//ul/li[@id]/text()')#Find the li tag whose id is l1, pay attention to the quotation marks#li_list=tree.xpath('//ul/li[@id="l1"]/text()')#Find the attribute value of the class whose id is the l1 tag#li=tree.xpath('//ul/li[@id="l1"]/@class')#Query the li tag containing l in the id#li_list=tree.xpath('//ul/li[contains(@id,"l")]/text()')#Query li tags whose id value starts with l#li_list=tree.xpath('//ul/li[starts-with(@class,"c")]/text()')#Query id is l1 and class is c1li_list=tree.xpath('//ul/li[@id="l1" and @class="c1"]/text()')# Determine the length of the listprint(li_list)print(len(li_list))