You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I currently have a page that needs to be scrolled down to fully load the elements I'm trying to scrape.
When I run my scraping code locally, it scrolls fine and successfully loads all the elements.
Snapshot after local run where all elements are successfully loaded:
Current Behavior
However, when I run this in lambda the scrolling does not work.
The elements are not totally loaded (50 elements that are loaded in the beginning vs the 400+ that get loaded in when I run it locally.
I've tried using different selectors as targets to scroll to but none seem to work.
Snapshot after lambda run after scrolling is called:
constAWS=require('aws-sdk')consts3=newAWS.S3({apiVersion: '2006-03-01'});constchromium=require('chrome-aws-lambda');constpageURL=process.env.TARGET_URLconstagent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'constBwin=require('./scrapers/bwin.js')constdb=require('./db.js')exports.handler=async(event,context)=>{letresult=null;letbrowser=null;try{browser=awaitchromium.puppeteer.launch({args: chromium.args,defaultViewport: chromium.defaultViewport,executablePath: awaitchromium.executablePath,headless: chromium.headless,ignoreHTTPSErrors: true,});awaitdb.connectToDb()letpage=awaitbrowser.newPage();awaitpage.setUserAgent(agent)console.log('Navigating to page: ',pageURL)awaitpage.goto(pageURL,{waitUntil: 'networkidle2'})letbwin=newBwin()letevents=awaitbwin.scrapeEvents(page)console.log('length: ',events.length)constbuffer=awaitpage.screenshot()// upload the image using the current timestamp as filenameconsts3result=awaits3.upload({Bucket: 'mybucket',Key: `${Date.now()}-${events.length}.png`,Body: buffer,ContentType: 'image/png',ACL: 'public-read'}).promise()console.log('S3 image URL:',s3result.Location)console.log('URL: ',page.url())awaitpage.close();awaitbrowser.close();}catch(error){console.log(error)}finally{if(browser!==null){awaitbrowser.close();}}returnresult}
constdb=require('../db.js')constAWS=require('aws-sdk')consts3=newAWS.S3({apiVersion: '2006-03-01'});classScraper{asyncscrapeEvents(page){awaitthis.loadPage(page)page.on('console',msg=>console.log('PAGE LOG:',msg.text()));awaitthis.loadAllElements(page,this.eventSelector)letevents=awaitpage.$$eval(this.eventSelector,this.getEventInfo)events=events.map(event=>this.parseEventInfo(event))returnevents}asyncloadPage(page){awaitthis.closePopUp(page);awaitthis.loadElements(page,this.eventSelector,0)}asyncloadElements(page,elementSelector,elementCount){console.log('count: ',elementCount)constbuffer=awaitpage.screenshot()// upload the image using the current timestamp as filenameconsts3result=awaits3.upload({Bucket: 'mybucket',Key: `${Date.now()}-elementCount:${elementCount}.png`,Body: buffer,ContentType: 'image/png',ACL: 'public-read'}).promise()console.log('S3 image URL:',s3result.Location)try{awaitpage.waitForFunction((elementSelector,elementCount)=>{returndocument.querySelectorAll(elementSelector).length!=elementCount;},{timeout: 30000},elementSelector,elementCount);}catch(error){throwerror}}asyncloadAllElements(page,elementSelector){try{while(true){letelementCount=awaitpage.evaluate(this.scrollToBottom,this.scrollableSelector,elementSelector)awaitthis.loadElements(page,elementSelector,elementCount)}}catch(error){console.error(error)}}asyncclosePopUp(page){awaitthis.loadElements(page,this.popupSelector,0)awaitpage.evaluate((sel)=>document.querySelector(sel).click(),this.popupSelector)console.log('popup closed')}scrollToBottom(scrollableSelector,elementSelector){letelementCount=document.querySelectorAll(elementSelector).length;document.querySelector(scrollableSelector).scrollIntoView({behavior:"smooth",block: "end"})console.log('SCROLLED TO BOTTOM')returnelementCount;}}module.exports=Scraper;
Environment
chrome-aws-lambda
Version: chrome-aws-lambda:22puppeteer
/puppeteer-core
Version: 21.5.2Expected Behavior
I currently have a page that needs to be scrolled down to fully load the elements I'm trying to scrape.
When I run my scraping code locally, it scrolls fine and successfully loads all the elements.
Snapshot after local run where all elements are successfully loaded:

Current Behavior
However, when I run this in lambda the scrolling does not work.
The elements are not totally loaded (50 elements that are loaded in the beginning vs the 400+ that get loaded in when I run it locally.
I've tried using different selectors as targets to scroll to but none seem to work.
Snapshot after lambda run after scrolling is called:

Steps to Reproduce
URL: https://sports.bwin.pt/pt/sports/futebol-4/apostar
Has anyone experienced this?
The text was updated successfully, but these errors were encountered: