feat: add a streaming version for page.pdf

Co-authored-by: Jan Scheffler <janscheffler@chromium.org>
This commit is contained in:
Brian Misiak 2021-06-23 05:51:38 -07:00 committed by GitHub
parent edc18b46f9
commit e3699e248b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 137 additions and 26 deletions

View File

@ -136,6 +136,7 @@
* [page.content()](#pagecontent)
* [page.cookies([...urls])](#pagecookiesurls)
* [page.coverage](#pagecoverage)
* [page.createPDFStream([options])](#pagecreatepdfstreamoptions)
* [page.deleteCookie(...cookies)](#pagedeletecookiecookies)
* [page.emulate(options)](#pageemulateoptions)
* [page.emulateCPUThrottling(factor)](#pageemulatecputhrottlingfactor)
@ -1489,6 +1490,50 @@ If URLs are specified, only cookies for those URLs are returned.
- returns: <[Coverage]>
#### page.createPDFStream([options])
- `options` <[Object]> Options object which might have the following properties:
- `path` <[string]> The file path to save the PDF to. If `path` is a relative path, then it is resolved relative to [current working directory](https://nodejs.org/api/process.html#process_process_cwd). If no path is provided, the PDF won't be saved to the disk.
- `scale` <[number]> Scale of the webpage rendering. Defaults to `1`. Scale amount must be between 0.1 and 2.
- `displayHeaderFooter` <[boolean]> Display header and footer. Defaults to `false`.
- `headerTemplate` <[string]> HTML template for the print header. Should be valid HTML markup with following classes used to inject printing values into them:
- `date` formatted print date
- `title` document title
- `url` document location
- `pageNumber` current page number
- `totalPages` total pages in the document
- `footerTemplate` <[string]> HTML template for the print footer. Should use the same format as the `headerTemplate`.
- `printBackground` <[boolean]> Print background graphics. Defaults to `false`.
- `landscape` <[boolean]> Paper orientation. Defaults to `false`.
- `pageRanges` <[string]> Paper ranges to print, e.g., '1-5, 8, 11-13'. Defaults to the empty string, which means print all pages.
- `format` <[string]> Paper format. If set, takes priority over `width` or `height` options. Defaults to 'Letter'.
- `width` <[string]|[number]> Paper width, accepts values labeled with units.
- `height` <[string]|[number]> Paper height, accepts values labeled with units.
- `margin` <[Object]> Paper margins, defaults to none.
- `top` <[string]|[number]> Top margin, accepts values labeled with units.
- `right` <[string]|[number]> Right margin, accepts values labeled with units.
- `bottom` <[string]|[number]> Bottom margin, accepts values labeled with units.
- `left` <[string]|[number]> Left margin, accepts values labeled with units.
- `preferCSSPageSize` <[boolean]> Give any CSS `@page` size declared in the page priority over what is declared in `width` and `height` or `format` options. Defaults to `false`, which will scale the content to fit the paper size.
- `omitBackground` <[boolean]> Hides default white background and allows capturing screenshots with transparency. Defaults to `false`.
- returns: <[Promise]<[Readable]>> Promise which resolves with a Node.js stream for the PDF file.
> **NOTE** This method is identical to [page.pdf](#pagepdfoptions), except it returns the PDF as a readable stream of binary data. If you are generating very large PDFs, it may be useful to use a stream to avoid high memory usage. This version will ignore the `path` option.
```js
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// Stream a PDF into a file
const pdfStream = await page.createPDFStream();
const writeStream = fs.createWriteStream('test.pdf');
pdfStream.pipe(writeStream);
await browser.close();
})();
```
#### page.deleteCookie(...cookies)
- `...cookies` <...[Object]>
@ -2000,7 +2045,6 @@ Page is guaranteed to have a main frame which persists during navigations.
- returns: <[Mouse]>
#### page.pdf([options])
- `options` <[Object]> Options object which might have the following properties:
- `path` <[string]> The file path to save the PDF to. If `path` is a relative path, then it is resolved relative to [current working directory](https://nodejs.org/api/process.html#process_process_cwd). If no path is provided, the PDF won't be saved to the disk.
- `scale` <[number]> Scale of the webpage rendering. Defaults to `1`. Scale amount must be between 0.1 and 2.
@ -2033,6 +2077,8 @@ Page is guaranteed to have a main frame which persists during navigations.
> **NOTE** By default, `page.pdf()` generates a pdf with modified colors for printing. Use the [`-webkit-print-color-adjust`](https://developer.mozilla.org/en-US/docs/Web/CSS/-webkit-print-color-adjust) property to force rendering of exact colors.
> **NOTE** If you are generating very large PDFs, it may be useful to use the streaming version of this function ([page.createPDFStream](#pagecreatepdfstreamoptions)) to avoid high memory usage.
```js
// Generates a PDF with 'screen' media type.
await page.emulateMediaType('screen');

View File

@ -14,6 +14,8 @@
* limitations under the License.
*/
import type { Readable } from 'stream';
import { EventEmitter } from './EventEmitter.js';
import {
Connection,
@ -2546,7 +2548,7 @@ export class Page extends EventEmitter {
*
* @param options - options for generating the PDF.
*/
async pdf(options: PDFOptions = {}): Promise<Buffer> {
async createPDFStream(options: PDFOptions = {}): Promise<Readable> {
const {
scale = 1,
displayHeaderFooter = false,
@ -2557,7 +2559,6 @@ export class Page extends EventEmitter {
pageRanges = '',
preferCSSPageSize = false,
margin = {},
path = null,
omitBackground = false,
} = options;
@ -2605,7 +2606,17 @@ export class Page extends EventEmitter {
await this._resetDefaultBackgroundColor();
}
return await helper.readProtocolStream(this._client, result.stream, path);
return helper.getReadableFromProtocolStream(this._client, result.stream);
}
/**
* @param {!PDFOptions=} options
* @return {!Promise<!Buffer>}
*/
async pdf(options: PDFOptions = {}): Promise<Buffer> {
const { path = undefined } = options;
const readable = await this.createPDFStream(options);
return await helper.getReadableAsBuffer(readable, path);
}
/**

View File

@ -106,10 +106,17 @@ export class Tracing {
fulfill = x;
reject = y;
});
this._client.once('Tracing.tracingComplete', (event) => {
helper
.readProtocolStream(this._client, event.stream, this._path)
.then(fulfill, reject);
this._client.once('Tracing.tracingComplete', async (event) => {
try {
const readable = await helper.getReadableFromProtocolStream(
this._client,
event.stream
);
const buffer = await helper.getReadableAsBuffer(readable, this._path);
fulfill(buffer);
} catch (error) {
reject(error);
}
});
await this._client.send('Tracing.end');
this._recording = false;

View File

@ -13,6 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import type { Readable } from 'stream';
import { TimeoutError } from './Errors.js';
import { debug } from './Debug.js';
import { CDPSession } from './Connection.js';
@ -307,9 +310,8 @@ async function waitWithTimeout<T extends any>(
}
}
async function readProtocolStream(
client: CDPSession,
handle: string,
async function getReadableAsBuffer(
readable: Readable,
path?: string
): Promise<Buffer> {
if (!isNode && path) {
@ -318,35 +320,58 @@ async function readProtocolStream(
const fs = isNode ? await importFSModule() : null;
let eof = false;
let fileHandle: import('fs').promises.FileHandle;
if (path && fs) {
fileHandle = await fs.promises.open(path, 'w');
}
const bufs = [];
while (!eof) {
const response = await client.send('IO.read', { handle });
eof = response.eof;
const buf = Buffer.from(
response.data,
response.base64Encoded ? 'base64' : undefined
);
bufs.push(buf);
if (path && fs) {
await fs.promises.writeFile(fileHandle, buf);
const buffers = [];
for await (const chunk of readable) {
buffers.push(chunk);
if (fileHandle) {
await fs.promises.writeFile(fileHandle, chunk);
}
}
if (path) await fileHandle.close();
await client.send('IO.close', { handle });
let resultBuffer = null;
try {
resultBuffer = Buffer.concat(bufs);
resultBuffer = Buffer.concat(buffers);
} finally {
return resultBuffer;
}
}
async function getReadableFromProtocolStream(
client: CDPSession,
handle: string
): Promise<Readable> {
// TODO:
// This restriction can be lifted once https://github.com/nodejs/node/pull/39062 has landed
if (!isNode) {
throw new Error('Cannot create a stream outside of Node.js environment.');
}
const { Readable } = await import('stream');
let eof = false;
return new Readable({
async read(size: number) {
if (eof) {
return null;
}
const response = await client.send('IO.read', { handle, size });
this.push(response.data, response.base64Encoded ? 'base64' : undefined);
if (response.eof) {
this.push(null);
eof = true;
await client.send('IO.close', { handle });
}
},
});
}
/**
* Loads the Node fs promises API. Needed because on Node 10.17 and below,
* fs.promises is experimental, and therefore not marked as enumerable. That
@ -378,7 +403,8 @@ export const helper = {
pageBindingDeliverErrorString,
pageBindingDeliverErrorValueString,
makePredicateString,
readProtocolStream,
getReadableAsBuffer,
getReadableFromProtocolStream,
waitWithTimeout,
waitForEvent,
isString,

View File

@ -1514,6 +1514,20 @@ describe('Page', function () {
expect(fs.readFileSync(outputFile).byteLength).toBeGreaterThan(0);
fs.unlinkSync(outputFile);
});
it('can print to PDF and stream the result', async () => {
// Printing to pdf is currently only supported in headless
const { isHeadless, page } = getTestState();
if (!isHeadless) return;
const stream = await page.createPDFStream();
let size = 0;
for await (const chunk of stream) {
size += chunk.length;
}
expect(size).toBeGreaterThan(0);
});
});
describe('Page.title', function () {

View File

@ -774,6 +774,13 @@ function compareDocumentations(actual, expected) {
expectedName: 'PDFOptions',
},
],
[
'Method Page.createPDFStream() options',
{
actualName: 'Object',
expectedName: 'PDFOptions',
},
],
[
'Method Page.screenshot() options',
{