999 status code when trying to login to linked with scrapy

Question

I am trying the Scrapy framework to extract some information from LinkedIn. This is my scraper for crawling linkedin profile. When I run this code, it shows response 999 status code on check_login_response, so I can't login to linkedin.

How can I solve this problem?

================= Code =====================

import scrapy
import requests
import re
from scrapy.http import Request, FormRequest
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors.sgml import SgmlLinkExtractor
from scrapy.linkextractors import LinkExtractor
from scrapy.conf import settings

class LinkedinProfileItem(scrapy.Item):
    Profile_Image_Link = scrapy.Field()
    Name = scrapy.Field()
    Profile_Summary = scrapy.Field()
    Headline = scrapy.Field()
    Location = scrapy.Field()
    Industry = scrapy.Field()

class LinkedinProfilesSpider(CrawlSpider):
    name = 'linkedin_scrapy'
    allowed_domains = ['www.linkedin.com']
    login_page = 'https://www.linkedin.com/uas/login'
    login_url = "https://www.linkedin.com"
    start_urls = ['https://www.linkedin.com/in/luca-candela-a83738143']

    handle_httpstatus_list = [999]
    settings.overrides['ROBOTSTXT_OBEY'] = False

    headers = {
        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        'upgrade-insecure-requests': '1',
        "accept-encoding": "gzip, deflate, sdch, br",
        "accept-language": "en-US,en;q=0.8,ms;q=0.6",
        "X-Requested-With": "XMLHttpRequest",
        'referer': 'https://www.linkedin.com',
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
    }
    DOWNLOADER_MIDDLEWARES = {'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None, }

    rules = (
        Rule(
            LinkExtractor(allow=[r'.*']),
            callback='parse_item',
            follow=True,
        ),
    )

    def start_requests(self):
        self.username = self.settings['myemail']
        self.password = self.settings['password']

        yield Request(
            url=self.login_url,
            callback=self.login,
            dont_filter=True,
        )

    def login(self, response):
        token = response.xpath("//meta[@name='lnkd-track-error']/@content").extract()
        try:
            real_token = re.search('csrfToken=(.*)', token[0]).group(1)
        except:
            self.log("Error Token")
            real_token = None
        if real_token:
            return Request(
                url=self.start_urls[0],
                headers=self.headers,
                cookies={'JSESSIONID': real_token},
                callback=self.check_login_response,
            )

    def check_login_response(self, response):

        if self.username not in response.body:
            self.log("Login failed")
            return
        self.log("Successfully logged in")

        return [Request(url=url, dont_filter=True) for url in self.start_urls]

    def parse_item(self, response):
        Item = LinkedinProfileItem()

        return Item

What's in the response's body? Also, could [this](https://doc.scrapy.org/en/latest/faq.html#what-does-the-response-status-code-999-means) help? — Tomáš Linhart, Jan 22 '18 at 06:24
This sounds like a LinkedIn-related question, more than a scrapy one. Have you checked questions like https://stackoverflow.com/questions/27571419/how-to-avoid-http-1-1-999-request-denied-response-from-linkedin ? — Andrea Corbellini, May 16 '18 at 17:01

999 status code when trying to login to linked with scrapy

0 Answers0