import scrapyclass ThreedmSpider(scrapy.Spider): name = 'threedm' # allowed_domains = ['www.3xxxdm.com'] start_urls = ['https://www.3dmgame.com/bagua_62_1/'] # # Generate a generic URL Templates # url = "https://www.3dmgame.com/bagua_62_%d/" # page_num = 2 def parse(self, response): li_list = response.xpath("/html/body/div[3]/div[2]/div[2]/ul") for li in li_list: text = li.xpath("./li/a/div[2]/text()").extract_first() print(text) detail_url = li.xpath("./li/a/@href").extract_first() # for detail in detail_url: yield scrapy.Request(url=detail_url, callback=self.parse_detail) # pass # if self.page_num <= 2: # new_url = format(self.url%self.page_num) # self.page_num += 1 # # Manual request sending ,callback Callback functions are specifically used for data parsing # yield scrapy.Request(url=new_url, callback=self.parse) def parse_detail(self, response): detail = response.xpath("/html/body/div[2]/div[2]/div[3]//text()").extract() detail = ''.join(detail).strip() print(detail)
(venv) PS C:\Users\Administrator\Desktop\douban\threedmPro> scrapy crawl threedm
Stephen Chow 《 God of food 》 Location: Treasure seafood restaurant Officially bid farewell to Hong Kong
Stephen Chow movie 《 God of food 》 Location —— Treasure seafood restaurant , Officially bid farewell to Hong Kong , Transfer to Southeast Asia .