feat: add a streaming version for page.pdf
Co-authored-by: Jan Scheffler <janscheffler@chromium.org>
This commit is contained in:
parent
edc18b46f9
commit
e3699e248b
48
docs/api.md
48
docs/api.md
@ -136,6 +136,7 @@
|
||||
* [page.content()](#pagecontent)
|
||||
* [page.cookies([...urls])](#pagecookiesurls)
|
||||
* [page.coverage](#pagecoverage)
|
||||
* [page.createPDFStream([options])](#pagecreatepdfstreamoptions)
|
||||
* [page.deleteCookie(...cookies)](#pagedeletecookiecookies)
|
||||
* [page.emulate(options)](#pageemulateoptions)
|
||||
* [page.emulateCPUThrottling(factor)](#pageemulatecputhrottlingfactor)
|
||||
@ -1489,6 +1490,50 @@ If URLs are specified, only cookies for those URLs are returned.
|
||||
|
||||
- returns: <[Coverage]>
|
||||
|
||||
#### page.createPDFStream([options])
|
||||
- `options` <[Object]> Options object which might have the following properties:
|
||||
- `path` <[string]> The file path to save the PDF to. If `path` is a relative path, then it is resolved relative to [current working directory](https://nodejs.org/api/process.html#process_process_cwd). If no path is provided, the PDF won't be saved to the disk.
|
||||
- `scale` <[number]> Scale of the webpage rendering. Defaults to `1`. Scale amount must be between 0.1 and 2.
|
||||
- `displayHeaderFooter` <[boolean]> Display header and footer. Defaults to `false`.
|
||||
- `headerTemplate` <[string]> HTML template for the print header. Should be valid HTML markup with following classes used to inject printing values into them:
|
||||
- `date` formatted print date
|
||||
- `title` document title
|
||||
- `url` document location
|
||||
- `pageNumber` current page number
|
||||
- `totalPages` total pages in the document
|
||||
- `footerTemplate` <[string]> HTML template for the print footer. Should use the same format as the `headerTemplate`.
|
||||
- `printBackground` <[boolean]> Print background graphics. Defaults to `false`.
|
||||
- `landscape` <[boolean]> Paper orientation. Defaults to `false`.
|
||||
- `pageRanges` <[string]> Paper ranges to print, e.g., '1-5, 8, 11-13'. Defaults to the empty string, which means print all pages.
|
||||
- `format` <[string]> Paper format. If set, takes priority over `width` or `height` options. Defaults to 'Letter'.
|
||||
- `width` <[string]|[number]> Paper width, accepts values labeled with units.
|
||||
- `height` <[string]|[number]> Paper height, accepts values labeled with units.
|
||||
- `margin` <[Object]> Paper margins, defaults to none.
|
||||
- `top` <[string]|[number]> Top margin, accepts values labeled with units.
|
||||
- `right` <[string]|[number]> Right margin, accepts values labeled with units.
|
||||
- `bottom` <[string]|[number]> Bottom margin, accepts values labeled with units.
|
||||
- `left` <[string]|[number]> Left margin, accepts values labeled with units.
|
||||
- `preferCSSPageSize` <[boolean]> Give any CSS `@page` size declared in the page priority over what is declared in `width` and `height` or `format` options. Defaults to `false`, which will scale the content to fit the paper size.
|
||||
- `omitBackground` <[boolean]> Hides default white background and allows capturing screenshots with transparency. Defaults to `false`.
|
||||
- returns: <[Promise]<[Readable]>> Promise which resolves with a Node.js stream for the PDF file.
|
||||
|
||||
> **NOTE** This method is identical to [page.pdf](#pagepdfoptions), except it returns the PDF as a readable stream of binary data. If you are generating very large PDFs, it may be useful to use a stream to avoid high memory usage. This version will ignore the `path` option.
|
||||
|
||||
```js
|
||||
const puppeteer = require('puppeteer');
|
||||
|
||||
(async () => {
|
||||
const browser = await puppeteer.launch();
|
||||
const page = await browser.newPage();
|
||||
|
||||
// Stream a PDF into a file
|
||||
const pdfStream = await page.createPDFStream();
|
||||
const writeStream = fs.createWriteStream('test.pdf');
|
||||
pdfStream.pipe(writeStream);
|
||||
await browser.close();
|
||||
})();
|
||||
```
|
||||
|
||||
#### page.deleteCookie(...cookies)
|
||||
|
||||
- `...cookies` <...[Object]>
|
||||
@ -2000,7 +2045,6 @@ Page is guaranteed to have a main frame which persists during navigations.
|
||||
- returns: <[Mouse]>
|
||||
|
||||
#### page.pdf([options])
|
||||
|
||||
- `options` <[Object]> Options object which might have the following properties:
|
||||
- `path` <[string]> The file path to save the PDF to. If `path` is a relative path, then it is resolved relative to [current working directory](https://nodejs.org/api/process.html#process_process_cwd). If no path is provided, the PDF won't be saved to the disk.
|
||||
- `scale` <[number]> Scale of the webpage rendering. Defaults to `1`. Scale amount must be between 0.1 and 2.
|
||||
@ -2033,6 +2077,8 @@ Page is guaranteed to have a main frame which persists during navigations.
|
||||
|
||||
> **NOTE** By default, `page.pdf()` generates a pdf with modified colors for printing. Use the [`-webkit-print-color-adjust`](https://developer.mozilla.org/en-US/docs/Web/CSS/-webkit-print-color-adjust) property to force rendering of exact colors.
|
||||
|
||||
> **NOTE** If you are generating very large PDFs, it may be useful to use the streaming version of this function ([page.createPDFStream](#pagecreatepdfstreamoptions)) to avoid high memory usage.
|
||||
|
||||
```js
|
||||
// Generates a PDF with 'screen' media type.
|
||||
await page.emulateMediaType('screen');
|
||||
|
@ -14,6 +14,8 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import type { Readable } from 'stream';
|
||||
|
||||
import { EventEmitter } from './EventEmitter.js';
|
||||
import {
|
||||
Connection,
|
||||
@ -2546,7 +2548,7 @@ export class Page extends EventEmitter {
|
||||
*
|
||||
* @param options - options for generating the PDF.
|
||||
*/
|
||||
async pdf(options: PDFOptions = {}): Promise<Buffer> {
|
||||
async createPDFStream(options: PDFOptions = {}): Promise<Readable> {
|
||||
const {
|
||||
scale = 1,
|
||||
displayHeaderFooter = false,
|
||||
@ -2557,7 +2559,6 @@ export class Page extends EventEmitter {
|
||||
pageRanges = '',
|
||||
preferCSSPageSize = false,
|
||||
margin = {},
|
||||
path = null,
|
||||
omitBackground = false,
|
||||
} = options;
|
||||
|
||||
@ -2605,7 +2606,17 @@ export class Page extends EventEmitter {
|
||||
await this._resetDefaultBackgroundColor();
|
||||
}
|
||||
|
||||
return await helper.readProtocolStream(this._client, result.stream, path);
|
||||
return helper.getReadableFromProtocolStream(this._client, result.stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {!PDFOptions=} options
|
||||
* @return {!Promise<!Buffer>}
|
||||
*/
|
||||
async pdf(options: PDFOptions = {}): Promise<Buffer> {
|
||||
const { path = undefined } = options;
|
||||
const readable = await this.createPDFStream(options);
|
||||
return await helper.getReadableAsBuffer(readable, path);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -106,10 +106,17 @@ export class Tracing {
|
||||
fulfill = x;
|
||||
reject = y;
|
||||
});
|
||||
this._client.once('Tracing.tracingComplete', (event) => {
|
||||
helper
|
||||
.readProtocolStream(this._client, event.stream, this._path)
|
||||
.then(fulfill, reject);
|
||||
this._client.once('Tracing.tracingComplete', async (event) => {
|
||||
try {
|
||||
const readable = await helper.getReadableFromProtocolStream(
|
||||
this._client,
|
||||
event.stream
|
||||
);
|
||||
const buffer = await helper.getReadableAsBuffer(readable, this._path);
|
||||
fulfill(buffer);
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
await this._client.send('Tracing.end');
|
||||
this._recording = false;
|
||||
|
@ -13,6 +13,9 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import type { Readable } from 'stream';
|
||||
|
||||
import { TimeoutError } from './Errors.js';
|
||||
import { debug } from './Debug.js';
|
||||
import { CDPSession } from './Connection.js';
|
||||
@ -307,9 +310,8 @@ async function waitWithTimeout<T extends any>(
|
||||
}
|
||||
}
|
||||
|
||||
async function readProtocolStream(
|
||||
client: CDPSession,
|
||||
handle: string,
|
||||
async function getReadableAsBuffer(
|
||||
readable: Readable,
|
||||
path?: string
|
||||
): Promise<Buffer> {
|
||||
if (!isNode && path) {
|
||||
@ -318,35 +320,58 @@ async function readProtocolStream(
|
||||
|
||||
const fs = isNode ? await importFSModule() : null;
|
||||
|
||||
let eof = false;
|
||||
let fileHandle: import('fs').promises.FileHandle;
|
||||
|
||||
if (path && fs) {
|
||||
fileHandle = await fs.promises.open(path, 'w');
|
||||
}
|
||||
const bufs = [];
|
||||
while (!eof) {
|
||||
const response = await client.send('IO.read', { handle });
|
||||
eof = response.eof;
|
||||
const buf = Buffer.from(
|
||||
response.data,
|
||||
response.base64Encoded ? 'base64' : undefined
|
||||
);
|
||||
bufs.push(buf);
|
||||
if (path && fs) {
|
||||
await fs.promises.writeFile(fileHandle, buf);
|
||||
const buffers = [];
|
||||
for await (const chunk of readable) {
|
||||
buffers.push(chunk);
|
||||
if (fileHandle) {
|
||||
await fs.promises.writeFile(fileHandle, chunk);
|
||||
}
|
||||
}
|
||||
|
||||
if (path) await fileHandle.close();
|
||||
await client.send('IO.close', { handle });
|
||||
let resultBuffer = null;
|
||||
try {
|
||||
resultBuffer = Buffer.concat(bufs);
|
||||
resultBuffer = Buffer.concat(buffers);
|
||||
} finally {
|
||||
return resultBuffer;
|
||||
}
|
||||
}
|
||||
|
||||
async function getReadableFromProtocolStream(
|
||||
client: CDPSession,
|
||||
handle: string
|
||||
): Promise<Readable> {
|
||||
// TODO:
|
||||
// This restriction can be lifted once https://github.com/nodejs/node/pull/39062 has landed
|
||||
if (!isNode) {
|
||||
throw new Error('Cannot create a stream outside of Node.js environment.');
|
||||
}
|
||||
|
||||
const { Readable } = await import('stream');
|
||||
|
||||
let eof = false;
|
||||
return new Readable({
|
||||
async read(size: number) {
|
||||
if (eof) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const response = await client.send('IO.read', { handle, size });
|
||||
this.push(response.data, response.base64Encoded ? 'base64' : undefined);
|
||||
if (response.eof) {
|
||||
this.push(null);
|
||||
eof = true;
|
||||
await client.send('IO.close', { handle });
|
||||
}
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the Node fs promises API. Needed because on Node 10.17 and below,
|
||||
* fs.promises is experimental, and therefore not marked as enumerable. That
|
||||
@ -378,7 +403,8 @@ export const helper = {
|
||||
pageBindingDeliverErrorString,
|
||||
pageBindingDeliverErrorValueString,
|
||||
makePredicateString,
|
||||
readProtocolStream,
|
||||
getReadableAsBuffer,
|
||||
getReadableFromProtocolStream,
|
||||
waitWithTimeout,
|
||||
waitForEvent,
|
||||
isString,
|
||||
|
@ -1514,6 +1514,20 @@ describe('Page', function () {
|
||||
expect(fs.readFileSync(outputFile).byteLength).toBeGreaterThan(0);
|
||||
fs.unlinkSync(outputFile);
|
||||
});
|
||||
|
||||
it('can print to PDF and stream the result', async () => {
|
||||
// Printing to pdf is currently only supported in headless
|
||||
const { isHeadless, page } = getTestState();
|
||||
|
||||
if (!isHeadless) return;
|
||||
|
||||
const stream = await page.createPDFStream();
|
||||
let size = 0;
|
||||
for await (const chunk of stream) {
|
||||
size += chunk.length;
|
||||
}
|
||||
expect(size).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Page.title', function () {
|
||||
|
@ -774,6 +774,13 @@ function compareDocumentations(actual, expected) {
|
||||
expectedName: 'PDFOptions',
|
||||
},
|
||||
],
|
||||
[
|
||||
'Method Page.createPDFStream() options',
|
||||
{
|
||||
actualName: 'Object',
|
||||
expectedName: 'PDFOptions',
|
||||
},
|
||||
],
|
||||
[
|
||||
'Method Page.screenshot() options',
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user