import scrapy
import re
class GithubSpider(scrapy.Spider):
name = 'github'
allowed_domains = ['github.com']
# The login page URL
start_urls = ['https://github.com/login']
def parse(self, response):
# Get request parameters
commit = response.xpath("//input[@name='commit']/@value").extract_first()
utf8 = response.xpath("//input[@name='utf8']/@value").extract_first()
authenticity_token = response.xpath("//input[@name='authenticity_token']/@value").extract_first()
ga_id = response.xpath("//input[@name='ga_id']/@value").extract_first()
if ga_id is None:
ga_id = ""
webauthn_support = response.xpath("//input[@name='webauthn-support']/@value").extract_first()
webauthn_iuvpaa_support = response.xpath("//input[@name='webauthn-iuvpaa-support']/@value").extract_first()
# required_field_157f = response.xpath("//input[@name='required_field_4ed5']/@value").extract_first()
timestamp = response.xpath("//input[@name='timestamp']/@value").extract_first()
timestamp_secret = response.xpath("//input[@name='timestamp_secret']/@value").extract_first()
# structure post Parameters
post_data = {
"commit": commit,
"utf8": utf8,
"authenticity_token": authenticity_token,
"ga_id": ga_id,
"login": "[email protected]",
"password": "xxx",
"webauthn-support": webauthn_support,
"webauthn-iuvpaa-support": webauthn_iuvpaa_support,
# "required_field_4ed5": required_field_4ed5,
"timestamp": timestamp,
"timestamp_secret": timestamp_secret
}
# Printing parameters
print(post_data)
# send out post request
yield scrapy.FormRequest(
"https://github.com/session", # Login request method
formdata=post_data,
callback=self.after_login
)
# After successful login operation
def after_login(self, response):
# Find... On the page Issues Fields and print
print(re.findall("Issues", response.body.decode()))
# -*- coding: utf-8 -*-
import scrapy
import re
class Github2Spider(scrapy.Spider):
name = 'github2'
allowed_domains = ['github.com']
start_urls = ['http://github.com/login']
def parse(self, response):
yield scrapy.FormRequest.from_response(
response, # Automatically from response Search for form Forms
formdata={"login": "[email protected]", "password": "xxx"},
callback=self.after_login
)
# After successful login operation
def after_login(self, response):
# Find... On the page Issues Fields and print
print(re.findall("Issues", response.body.decode()))
# -*- coding: utf-8 -*-
import scrapy
import re
class RenrenSpider(scrapy.Spider):
name = 'renren'
allowed_domains = ['renren.com']
# Website of personal center page
start_urls = ['http://www.renren.com/972990680/profile']
def start_requests(self):
# Log in and use chrome Of debug The tool gets... From the request cookies
cookiesstr = "anonymid=k3miegqc-hho317; depovince=ZGQT; _r01_=1; JSESSIONID=abcDdtGp7yEtG91r_U-6w; ick_login=d2631ff6-7b2d-4638-a2f5-c3a3f46b1595; ick=5499cd3f-c7a3-44ac-9146-60ac04440cb7; t=d1b681e8b5568a8f6140890d4f05c30f0; societyguester=d1b681e8b5568a8f6140890d4f05c30f0; id=972990680; xnsid=404266eb; XNESSESSIONID=62de8f52d318; jebecookies=4205498d-d0f7-4757-acd3-416f7aa0ae98|||||; ver=7.0; loginfrom=null; jebe_key=8800dc4d-e013-472b-a6aa-552ebfc11486%7Cb1a400326a5d6b2877f8c884e4fe9832%7C1575175011619%7C1%7C1575175011639; jebe_key=8800dc4d-e013-472b-a6aa-552ebfc11486%7Cb1a400326a5d6b2877f8c884e4fe9832%7C1575175011619%7C1%7C1575175011641; wp_fold=0"
cookies = {i.split("=")[0]:i.split("=")[1] for i in cookiesstr.split("; ")}
# carry cookies Of Request request
yield scrapy.Request(
self.start_urls[0],
callback=self.parse,
cookies=cookies
)
def parse(self, response):
# Search for keywords from the personal center page " Leisure and joy " And print
print(re.findall(" Leisure and joy ", response.body.decode()))