installation

Installation

npx crawlee create your-folder-name

test your example code

cd /your-folder
npm start

// For more information, see https://crawlee.dev/
import { PlaywrightCrawler, Dataset } from 'crawlee';

// PlaywrightCrawler crawls the web using a headless
// browser controlled by the Playwright library.
const crawler = new PlaywrightCrawler({
    // Use the requestHandler to process each of the crawled pages.
    async requestHandler({ request, page, enqueueLinks, log }) {
        const title = await page.title();
        log.info(`Title of ${request.loadedUrl} is '${title}'`);

        // Save results as JSON to ./storage/datasets/default
        await Dataset.pushData({ title, url: request.loadedUrl });

        // Extract links from the current page
        // and add them to the crawling queue.
        await enqueueLinks();
    },
    // Uncomment this option to see the browser window.
    // headless: false,
});

// Add first URL to the queue and start the crawl.
await crawler.run(['https://crawlee.dev']);
t

code explanation

// crawl on website crawlee.dev
await crawler.run(['https://crawlee.dev']);

// Get title of the web page
 const title = await page.title();

// push data to folder ./storage/datasets/default
await Dataset.pushData({ title, url: request.loadedUrl });

PreviousApify Nextfunctions

Last updated 2 years ago

Was this helpful?