import datetime import sys import requests import scrapy import time import json import scrapy.http from peilv.items import PeilvItem from lxml import etree # Get the address of the current or future data wl_url = 'https://live.leisu.com/saicheng?date='#wl history https://live.leisu.com/saicheng?date=20190620 # Get the address of historical data ls_url = 'https://live.leisu.com/wanchang?date='#ls history https://live.leisu.com/wanchang?date=20190606 class LiveJiangSpider(scrapy.Spider): name = 'FBP' allowed_domains = ['leisu.com'] def start_requests(self): d1='20190606' # The game of history # d1='20190914' # Future games request = scrapy.http.FormRequest(ls_url + d1,callback=self.parseLs, meta={'d1': d1}) # The game of history # request = scrapy.http.FormRequest(wl_url + d1,callback=self.parseWl, meta={'d1': d1})# Future games yield request def parseLs(self,response): d2=response.meta['d1'] sel=response.xpath racelist=[e5.split("'") for e5 in sel('//li[@data-status="8"]/@data-id').extract()] for raceid in racelist:#raceid=['2674547'];raceid[0]=2674547 item = PeilvItem() sel_div=sel('//li[@data-id='+str(raceid[0])+']/div[@class="find-table layout-grid-tbody hide"]/div[@class="clearfix-row"]') if str(sel_div.xpath('span[@class="lab-lottery"]/span[@class="text-jc"]/text()').extract()) == "[]": item['cc']="" else: item['cc']=str(d2) + str(sel_div.xpath('span[@class="lab-lottery"]/span[@class="text-jc"]/text()').extract()[0]) if " Zhou " in item['cc']:# Take the prize - Monday 001 etc. plurl='https://live.leisu.com/3in1-'+raceid[0] request = scrapy.http.FormRequest(plurl,callback=self.parse,meta={'item':item}) yield request # Is not return,yield Queue up ,parse The function will be used as a generator .scrapy One by one parse Method , No direct execution parse, After the cycle is completed , Re execution parse def parseWl(self,response): d2=response.meta['d1'] # Receiving parameters sel=response.xpath racelist=[e5.split("'") for e5 in sel('//li[@data-status="1"]/@data-id').extract()] for raceid in racelist:#raceid=['2674547'];raceid[0]=2674547 item = PeilvItem() sel_div=sel('//*[@data-id=' + str(raceid[0]) + ']/div[@class="find-table layout-grid-tbody hide"]/div[@class="clearfix-row"]') if str(sel_div.xpath('span[@class="lab-lottery"]/span[@class="text-jc"]/text()').extract()) == "[]": changci="" else: changci =str(sel_div.xpath('span[@class="lab-lottery"]/span[@class="text-jc"]/text()').extract()[0]) if " Zhou " in changci:# Take the prize - Monday 001 etc. item['cc']=str(d2) + changci plurl='https://live.leisu.com/3in1-'+raceid[0] request = scrapy.http.FormRequest(plurl, callback=self.parse,meta={'item':item}) yield request # Is not return,yield Queue up ,parse The function will be used as a generator .scrapy One by one parse Method , No direct execution parse, After the cycle is completed , Re execution parse def parse(self, response): print('--------------into parse----------------------') item = response.meta['item'] pv=response.xpath pl_str = '/td[@class="bd-left"]/div[@class="begin float-left w-bar-100 bd-bottom p-b-8 color-999 m-b-8"]/span[@class="float-left col-3"]/text()' if str(pv('//*[@data-id="5"]'+pl_str).extract())=="[]": item['li'] = '' else: item['li']=pv('//*[@data-id="5"]' + pl_str).extract()[0] if str(pv('//*[@data-id="2"]'+pl_str).extract())=="[]": item['b5'] = '' else: item['b5']=pv('//*[@data-id="2"]' + pl_str).extract()[0] yield item# The program is getting the items front , I'll take care of all the previous ones request Requests in the queue , Then extract items