diff --git a/docs/api.md b/docs/api.md index 9862a0f704a..34ef02d644e 100644 --- a/docs/api.md +++ b/docs/api.md @@ -136,6 +136,7 @@ * [page.content()](#pagecontent) * [page.cookies([...urls])](#pagecookiesurls) * [page.coverage](#pagecoverage) + * [page.createPDFStream([options])](#pagecreatepdfstreamoptions) * [page.deleteCookie(...cookies)](#pagedeletecookiecookies) * [page.emulate(options)](#pageemulateoptions) * [page.emulateCPUThrottling(factor)](#pageemulatecputhrottlingfactor) @@ -1489,6 +1490,50 @@ If URLs are specified, only cookies for those URLs are returned. - returns: <[Coverage]> +#### page.createPDFStream([options]) +- `options` <[Object]> Options object which might have the following properties: + - `path` <[string]> The file path to save the PDF to. If `path` is a relative path, then it is resolved relative to [current working directory](https://nodejs.org/api/process.html#process_process_cwd). If no path is provided, the PDF won't be saved to the disk. + - `scale` <[number]> Scale of the webpage rendering. Defaults to `1`. Scale amount must be between 0.1 and 2. + - `displayHeaderFooter` <[boolean]> Display header and footer. Defaults to `false`. + - `headerTemplate` <[string]> HTML template for the print header. Should be valid HTML markup with following classes used to inject printing values into them: + - `date` formatted print date + - `title` document title + - `url` document location + - `pageNumber` current page number + - `totalPages` total pages in the document + - `footerTemplate` <[string]> HTML template for the print footer. Should use the same format as the `headerTemplate`. + - `printBackground` <[boolean]> Print background graphics. Defaults to `false`. + - `landscape` <[boolean]> Paper orientation. Defaults to `false`. + - `pageRanges` <[string]> Paper ranges to print, e.g., '1-5, 8, 11-13'. Defaults to the empty string, which means print all pages. + - `format` <[string]> Paper format. If set, takes priority over `width` or `height` options. Defaults to 'Letter'. + - `width` <[string]|[number]> Paper width, accepts values labeled with units. + - `height` <[string]|[number]> Paper height, accepts values labeled with units. + - `margin` <[Object]> Paper margins, defaults to none. + - `top` <[string]|[number]> Top margin, accepts values labeled with units. + - `right` <[string]|[number]> Right margin, accepts values labeled with units. + - `bottom` <[string]|[number]> Bottom margin, accepts values labeled with units. + - `left` <[string]|[number]> Left margin, accepts values labeled with units. + - `preferCSSPageSize` <[boolean]> Give any CSS `@page` size declared in the page priority over what is declared in `width` and `height` or `format` options. Defaults to `false`, which will scale the content to fit the paper size. + - `omitBackground` <[boolean]> Hides default white background and allows capturing screenshots with transparency. Defaults to `false`. +- returns: <[Promise]<[Readable]>> Promise which resolves with a Node.js stream for the PDF file. + +> **NOTE** This method is identical to [page.pdf](#pagepdfoptions), except it returns the PDF as a readable stream of binary data. If you are generating very large PDFs, it may be useful to use a stream to avoid high memory usage. This version will ignore the `path` option. + +```js +const puppeteer = require('puppeteer'); + +(async () => { + const browser = await puppeteer.launch(); + const page = await browser.newPage(); + + // Stream a PDF into a file + const pdfStream = await page.createPDFStream(); + const writeStream = fs.createWriteStream('test.pdf'); + pdfStream.pipe(writeStream); + await browser.close(); +})(); +``` + #### page.deleteCookie(...cookies) - `...cookies` <...[Object]> @@ -2000,7 +2045,6 @@ Page is guaranteed to have a main frame which persists during navigations. - returns: <[Mouse]> #### page.pdf([options]) - - `options` <[Object]> Options object which might have the following properties: - `path` <[string]> The file path to save the PDF to. If `path` is a relative path, then it is resolved relative to [current working directory](https://nodejs.org/api/process.html#process_process_cwd). If no path is provided, the PDF won't be saved to the disk. - `scale` <[number]> Scale of the webpage rendering. Defaults to `1`. Scale amount must be between 0.1 and 2. @@ -2033,6 +2077,8 @@ Page is guaranteed to have a main frame which persists during navigations. > **NOTE** By default, `page.pdf()` generates a pdf with modified colors for printing. Use the [`-webkit-print-color-adjust`](https://developer.mozilla.org/en-US/docs/Web/CSS/-webkit-print-color-adjust) property to force rendering of exact colors. +> **NOTE** If you are generating very large PDFs, it may be useful to use the streaming version of this function ([page.createPDFStream](#pagecreatepdfstreamoptions)) to avoid high memory usage. + ```js // Generates a PDF with 'screen' media type. await page.emulateMediaType('screen'); diff --git a/src/common/Page.ts b/src/common/Page.ts index 155dd600283..238d7bde360 100644 --- a/src/common/Page.ts +++ b/src/common/Page.ts @@ -14,6 +14,8 @@ * limitations under the License. */ +import type { Readable } from 'stream'; + import { EventEmitter } from './EventEmitter.js'; import { Connection, @@ -2546,7 +2548,7 @@ export class Page extends EventEmitter { * * @param options - options for generating the PDF. */ - async pdf(options: PDFOptions = {}): Promise { + async createPDFStream(options: PDFOptions = {}): Promise { const { scale = 1, displayHeaderFooter = false, @@ -2557,7 +2559,6 @@ export class Page extends EventEmitter { pageRanges = '', preferCSSPageSize = false, margin = {}, - path = null, omitBackground = false, } = options; @@ -2605,7 +2606,17 @@ export class Page extends EventEmitter { await this._resetDefaultBackgroundColor(); } - return await helper.readProtocolStream(this._client, result.stream, path); + return helper.getReadableFromProtocolStream(this._client, result.stream); + } + + /** + * @param {!PDFOptions=} options + * @return {!Promise} + */ + async pdf(options: PDFOptions = {}): Promise { + const { path = undefined } = options; + const readable = await this.createPDFStream(options); + return await helper.getReadableAsBuffer(readable, path); } /** diff --git a/src/common/Tracing.ts b/src/common/Tracing.ts index ad075e9bacc..cc0a13c2952 100644 --- a/src/common/Tracing.ts +++ b/src/common/Tracing.ts @@ -106,10 +106,17 @@ export class Tracing { fulfill = x; reject = y; }); - this._client.once('Tracing.tracingComplete', (event) => { - helper - .readProtocolStream(this._client, event.stream, this._path) - .then(fulfill, reject); + this._client.once('Tracing.tracingComplete', async (event) => { + try { + const readable = await helper.getReadableFromProtocolStream( + this._client, + event.stream + ); + const buffer = await helper.getReadableAsBuffer(readable, this._path); + fulfill(buffer); + } catch (error) { + reject(error); + } }); await this._client.send('Tracing.end'); this._recording = false; diff --git a/src/common/helper.ts b/src/common/helper.ts index 122fa135bc9..a0f4e627ea9 100644 --- a/src/common/helper.ts +++ b/src/common/helper.ts @@ -13,6 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +import type { Readable } from 'stream'; + import { TimeoutError } from './Errors.js'; import { debug } from './Debug.js'; import { CDPSession } from './Connection.js'; @@ -307,9 +310,8 @@ async function waitWithTimeout( } } -async function readProtocolStream( - client: CDPSession, - handle: string, +async function getReadableAsBuffer( + readable: Readable, path?: string ): Promise { if (!isNode && path) { @@ -318,35 +320,58 @@ async function readProtocolStream( const fs = isNode ? await importFSModule() : null; - let eof = false; let fileHandle: import('fs').promises.FileHandle; if (path && fs) { fileHandle = await fs.promises.open(path, 'w'); } - const bufs = []; - while (!eof) { - const response = await client.send('IO.read', { handle }); - eof = response.eof; - const buf = Buffer.from( - response.data, - response.base64Encoded ? 'base64' : undefined - ); - bufs.push(buf); - if (path && fs) { - await fs.promises.writeFile(fileHandle, buf); + const buffers = []; + for await (const chunk of readable) { + buffers.push(chunk); + if (fileHandle) { + await fs.promises.writeFile(fileHandle, chunk); } } + if (path) await fileHandle.close(); - await client.send('IO.close', { handle }); let resultBuffer = null; try { - resultBuffer = Buffer.concat(bufs); + resultBuffer = Buffer.concat(buffers); } finally { return resultBuffer; } } +async function getReadableFromProtocolStream( + client: CDPSession, + handle: string +): Promise { + // TODO: + // This restriction can be lifted once https://github.com/nodejs/node/pull/39062 has landed + if (!isNode) { + throw new Error('Cannot create a stream outside of Node.js environment.'); + } + + const { Readable } = await import('stream'); + + let eof = false; + return new Readable({ + async read(size: number) { + if (eof) { + return null; + } + + const response = await client.send('IO.read', { handle, size }); + this.push(response.data, response.base64Encoded ? 'base64' : undefined); + if (response.eof) { + this.push(null); + eof = true; + await client.send('IO.close', { handle }); + } + }, + }); +} + /** * Loads the Node fs promises API. Needed because on Node 10.17 and below, * fs.promises is experimental, and therefore not marked as enumerable. That @@ -378,7 +403,8 @@ export const helper = { pageBindingDeliverErrorString, pageBindingDeliverErrorValueString, makePredicateString, - readProtocolStream, + getReadableAsBuffer, + getReadableFromProtocolStream, waitWithTimeout, waitForEvent, isString, diff --git a/test/page.spec.ts b/test/page.spec.ts index b947b6f8b37..de8b56f370d 100644 --- a/test/page.spec.ts +++ b/test/page.spec.ts @@ -1514,6 +1514,20 @@ describe('Page', function () { expect(fs.readFileSync(outputFile).byteLength).toBeGreaterThan(0); fs.unlinkSync(outputFile); }); + + it('can print to PDF and stream the result', async () => { + // Printing to pdf is currently only supported in headless + const { isHeadless, page } = getTestState(); + + if (!isHeadless) return; + + const stream = await page.createPDFStream(); + let size = 0; + for await (const chunk of stream) { + size += chunk.length; + } + expect(size).toBeGreaterThan(0); + }); }); describe('Page.title', function () { diff --git a/utils/doclint/check_public_api/index.js b/utils/doclint/check_public_api/index.js index 5b4328ca76d..cafd0cfd877 100644 --- a/utils/doclint/check_public_api/index.js +++ b/utils/doclint/check_public_api/index.js @@ -774,6 +774,13 @@ function compareDocumentations(actual, expected) { expectedName: 'PDFOptions', }, ], + [ + 'Method Page.createPDFStream() options', + { + actualName: 'Object', + expectedName: 'PDFOptions', + }, + ], [ 'Method Page.screenshot() options', {