0

This code is supposed to loop through the urls that get scraped from the scrapeProductPage function. But before looping, it needs to log in so that it can obtain the prices. The prices are only displayed to logged in users. Instead of looping through the urls it just returns the scraped data from one page. The error I get is "MaxListenersExceededWarning: Possible EventEmitter memory leak detected".

const request = require("request-promise");
const cheerio = require("cheerio");
const ObjectsToCsv = require("objects-to-csv");
const puppeteer = require('puppeteer');


const url = "https://www.example.com";

const scrapeResults = [];

async function scrapeProductPage() {
  try {
    const htmlResult = await request.get(url);
    const $ = await cheerio.load(htmlResult);

$("td.productListing-data > a[style='position:relative;float:left;']").each((index, element) => {
     let url = $(element).attr("href");
     url = "https\://www.example.com/" + url;
      const scrapeResult = { url };
      scrapeResults.push(scrapeResult);
    });
    return scrapeResults;
} catch (err) {
    console.error(err);
}
}

async function scrapeDescription(productsWithImages) {
process.setMaxListeners(0);
  const browser = await puppeteer.launch({
      headless: false
  }); 

  const page = await browser.newPage();
  await page.goto('https://www.example.com/login');

  await page.waitFor(500);

  await page.waitFor('input[name="email_address"]');
  await page.type('input[name="email_address"]', 'example@gmail.com');
  await page.type('input[name="password"]', '123test');
  await page.click('#btnLogin');

return await Promise.all(
   productsWithImages.map(async job => {
     try {
         await page.goto(job.url, { waitUntil: "load" });
        const content = await page.content();
        const $ = await cheerio.load(content);

        job.main_img = $('img#main_img').attr('src');
        job.name = $('h2').text();
        job.price =  $("td.products_info_price").text();

        return job; 
      } catch (error) {
        console.error(error);
      }
    })
  );
}



async function saveDataToCsv(data) {
  const csv = new ObjectsToCsv(data);
  console.log(csv);
}

async function scrapeWona() {
  const productsWithImages = await scrapeProductPage();
  const wonaFullData = await scrapeDescription(productsWithImages);
  await saveDataToCsv(productsWithImages);
}

scrapeWona();
Maureen Moore
  • 1,049
  • 2
  • 9
  • 21
  • Added an answer about the message. In relation to why you're not scraping correctly, could you post the sample html or the webpages in question? – razki Apr 07 '20 at 16:48

1 Answers1

0

The reason you're getting the warning is because of process.setMaxListeners(0)

Indicates you have a memory leak somewhere in the code.

You can take a look at the documentation here also: https://nodejs.org/docs/latest/api/events.html#events_emitter_setmaxlisteners_n

Take a look at the answer from here: node.js - request - How to "emitter.setMaxListeners()"?

razki
  • 1,171
  • 2
  • 8
  • 16