scrapy模拟用户登录

scrapy框架编写模拟用户登录的三种方式：

方式一：携带cookie登录，携带cookie一般请求的url为登录后的页面，获取cookie信息应在登录后的页面获取，cookie参数应转成字典形式

# -*- coding: utf- -*-

import re

import scrapy

class RenrenSpider(scrapy.Spider):

    name = 'renren'

    allowed_domains = ['renren.com']

    start_urls = ['http://www.renren.com/966403607/profile']

    #要对star_urls进行请求，则要重写start_requests

    def start_requests(self):

        cookies = 'anonymid=joz9buh7-q7cfyi; depovince=GUZ; _r01_=1; _de=A10BB6D966D15FBA1F90E79AB0D2FDF8; ln_uact=18520877258; ln_hurl=http://head.xiaonei.com/photos/0/0/men_main.gif; jebe_key=b605eb90-21b9-4072-9d48-b75b233c1cea%7Cb156ee0bfd56183e1b8eb9e5994eb5ef%7C1543293909743%7C1%7C1543293910671; jebecookies=9700aefc-77a1-49a7-8d74-882aa173e271|||||; JSESSIONID=abcxtZqTI1hOu4WzN0tDw; ick_login=21361cc0-986f-49bc-95f9-de3a9ed54a69; p=29e3cce85947859ee0e1d6264160539f7; first_login_flag=1; t=b6e6ac604c66019acf93cb471550349e7; societyguester=b6e6ac604c66019acf93cb471550349e7; id=966403607; xnsid=ac2d6a1a; loginfrom=syshome; wp_fold=0'

        # 将cookies分割成字典形式

        cookies = {

            i.split("=")[]:i.split("=")[] for i in cookies.split("; ")

        }

        # 回调中携带cookies的值

        yield scrapy.Request(

            self.start_urls[],

            cookies=cookies,

            callback=self.parse,

        )

    def parse(self, response):

        item = {}

        name = re.findall("尚学堂",response.body.decode())

        item["name"] = name

        yield item

为了可以知道cookie确实是在不同的解析函数中传递在setting中设置一个参数如下
# 该设置可以知道cookie确实是在不同的解析函数中传递
COOKIES_DEBUG=True

方式二：表单dataform的post请求，

# -*- coding: utf- -*-

import re

import scrapy

class GithubSpider(scrapy.Spider):

    name = 'github'

    allowed_domains = ['github.com']

    start_urls = ['https://github.com/login']

    def parse(self, response):

        authenticity_token = response.xpath('//input[@name="authenticity_token"]/@value').extract_first()

        # form表单提交post

        formdata = {

                    "commit": "Sign in",

                    "utf8": "✓",

                    "authenticity_token": authenticity_token,

                    "login": "sxtpython",

                    "password": "sxt123456"

        }

        yield scrapy.FormRequest(

            # 回调提交表单跳转后的url地址

            'https://github.com/session',

            formdata=formdata,

            callback=self.parse_item,

        )

    def parse_item(self,response):

        item = {}

        item["name"] = re.findall('sxtpython',response.body.decode())

        yield item

方式三：自动从f响应中找到form表单进行登录

# -*- coding: utf- -*-

import re

import scrapy

# 方式三

class Github2Spider(scrapy.Spider):

    name = 'github2'

    allowed_domains = ['github.com']

    start_urls = ['https://github.com/login']

    def parse(self, response):

        yield scrapy.FormRequest.from_response(

            response,#自动从该响应中获取form表单

            formdata={

                'login':'sxtpython',

                'password':'sxt123456'

            },

            callback=self.parse_item,

        )

    def parse_item(self,response):

        item = {}

        item["name"] = re.findall('sxtpython',response.body.decode())

        yield item

秒客网

scrapy模拟用户登录

相关文章