From b99d478cd48adc261878836e04eac55ecc2890f2 Mon Sep 17 00:00:00 2001 From: Nikolay Vitkov <34244704+Lightning00Blade@users.noreply.github.com> Date: Tue, 30 Jan 2024 12:34:33 +0100 Subject: [PATCH] feat: add outline to PDF generation (#11779) --- docs/api/puppeteer.pdfoptions.md | 1 + packages/puppeteer-core/src/cdp/Page.ts | 2 + .../puppeteer-core/src/common/PDFOptions.ts | 12 ++++ packages/puppeteer-core/src/common/util.ts | 6 ++ .../puppeteer-core/src/node/ChromeLauncher.ts | 1 + test/TestExpectations.json | 36 ++++++++---- test/assets/pdf.html | 8 ++- test/src/cdp/pdf.spec.ts | 55 +++++++++++++++++++ test/src/page.spec.ts | 19 ------- 9 files changed, 108 insertions(+), 32 deletions(-) create mode 100644 test/src/cdp/pdf.spec.ts diff --git a/docs/api/puppeteer.pdfoptions.md b/docs/api/puppeteer.pdfoptions.md index 00cc8aa32a5..eacffd30500 100644 --- a/docs/api/puppeteer.pdfoptions.md +++ b/docs/api/puppeteer.pdfoptions.md @@ -24,6 +24,7 @@ export interface PDFOptions | landscape | optional | boolean | Whether to print in landscape orientation. | false | | margin | optional | [PDFMargin](./puppeteer.pdfmargin.md) | Set the PDF margins. | undefined no margins are set. | | omitBackground | optional | boolean | Hides default white background and allows generating pdfs with transparency. | false | +| outline | optional | boolean | Generate document outline. | false | | pageRanges | optional | string | Paper ranges to print, e.g. 1-5, 8, 11-13. | The empty string, which means all pages are printed. | | path | optional | string | The path to save the file to. | undefined, which means the PDF will not be written to disk. | | preferCSSPageSize | optional | boolean | Give any CSS @page size declared in the page priority over what is declared in the width or height or format option. | false, which will scale the content to fit the paper size. | diff --git a/packages/puppeteer-core/src/cdp/Page.ts b/packages/puppeteer-core/src/cdp/Page.ts index 491637f0ea3..701a204aff8 100644 --- a/packages/puppeteer-core/src/cdp/Page.ts +++ b/packages/puppeteer-core/src/cdp/Page.ts @@ -1102,6 +1102,7 @@ export class CdpPage extends Page { preferCSSPageSize, omitBackground, tagged: generateTaggedPDF, + outline: generateDocumentOutline, } = parsePDFOptions(options); if (omitBackground) { @@ -1127,6 +1128,7 @@ export class CdpPage extends Page { pageRanges, preferCSSPageSize, generateTaggedPDF, + generateDocumentOutline, } ); diff --git a/packages/puppeteer-core/src/common/PDFOptions.ts b/packages/puppeteer-core/src/common/PDFOptions.ts index 7cae9191a9d..0271faf7a5f 100644 --- a/packages/puppeteer-core/src/common/PDFOptions.ts +++ b/packages/puppeteer-core/src/common/PDFOptions.ts @@ -162,6 +162,18 @@ export interface PDFOptions { * @experimental */ tagged?: boolean; + /** + * Generate document outline. + * + * @remarks + * If this is enabled the PDF will also be tagged (accessible) + * Currently only works in old Headless (headless = true) + * crbug/840455#c47 + * + * @defaultValue `false` + * @experimental + */ + outline?: boolean; /** * Timeout in milliseconds. Pass `0` to disable timeout. * @defaultValue `30_000` diff --git a/packages/puppeteer-core/src/common/util.ts b/packages/puppeteer-core/src/common/util.ts index 2c8f76f664b..f8648d6d332 100644 --- a/packages/puppeteer-core/src/common/util.ts +++ b/packages/puppeteer-core/src/common/util.ts @@ -350,6 +350,7 @@ export function parsePDFOptions( preferCSSPageSize: false, omitBackground: false, tagged: false, + outline: false, }; let width = 8.5; @@ -375,6 +376,11 @@ export function parsePDFOptions( convertPrintParameterToInches(options.margin?.right, lengthUnit) || 0, }; + // Quirk https://bugs.chromium.org/p/chromium/issues/detail?id=840455#c44 + if (options.outline) { + options.tagged = true; + } + return { ...defaults, ...options, diff --git a/packages/puppeteer-core/src/node/ChromeLauncher.ts b/packages/puppeteer-core/src/node/ChromeLauncher.ts index 51d5a199832..74315191390 100644 --- a/packages/puppeteer-core/src/node/ChromeLauncher.ts +++ b/packages/puppeteer-core/src/node/ChromeLauncher.ts @@ -231,6 +231,7 @@ export class ChromeLauncher extends ProductLauncher { '--disable-sync', '--enable-automation', '--export-tagged-pdf', + '--generate-pdf-document-outline', '--force-color-profile=srgb', '--metrics-recording-only', '--no-first-run', diff --git a/test/TestExpectations.json b/test/TestExpectations.json index 86bf81094e7..5f8e028f988 100644 --- a/test/TestExpectations.json +++ b/test/TestExpectations.json @@ -233,6 +233,12 @@ "parameters": ["webDriverBiDi"], "expectations": ["FAIL"] }, + { + "testIdPattern": "[pdf.spec] Page.pdf *", + "platforms": ["darwin", "linux", "win32"], + "parameters": ["webDriverBiDi"], + "expectations": ["SKIP"] + }, { "testIdPattern": "[prerender.spec] *", "platforms": ["darwin", "linux", "win32"], @@ -943,12 +949,6 @@ "parameters": ["webDriverBiDi"], "expectations": ["FAIL"] }, - { - "testIdPattern": "[page.spec] Page Page.pdf can print to PDF with accessible", - "platforms": ["darwin", "linux", "win32"], - "parameters": ["webDriverBiDi"], - "expectations": ["SKIP"] - }, { "testIdPattern": "[page.spec] Page Page.pdf should respect timeout", "platforms": ["darwin", "linux", "win32"], @@ -1003,6 +1003,12 @@ "parameters": ["webDriverBiDi"], "expectations": ["FAIL", "PASS", "TIMEOUT"] }, + { + "testIdPattern": "[pdf.spec] Page.pdf *", + "platforms": ["darwin", "linux", "win32"], + "parameters": ["cdp", "firefox"], + "expectations": ["SKIP"] + }, { "testIdPattern": "[prerender.spec] Prerender can screencast", "platforms": ["darwin", "linux", "win32"], @@ -3002,12 +3008,6 @@ "parameters": ["cdp", "firefox"], "expectations": ["FAIL"] }, - { - "testIdPattern": "[page.spec] Page Page.pdf can print to PDF with accessible", - "platforms": ["darwin", "linux", "win32"], - "parameters": ["cdp", "firefox"], - "expectations": ["SKIP"] - }, { "testIdPattern": "[page.spec] Page Page.removeExposedFunction should work", "platforms": ["darwin", "linux", "win32"], @@ -3128,6 +3128,18 @@ "parameters": ["cdp", "firefox"], "expectations": ["FAIL"] }, + { + "testIdPattern": "[pdf.spec] Page.pdf can print to PDF with outline", + "platforms": ["darwin", "linux", "win32"], + "parameters": ["chrome", "headful"], + "expectations": ["FAIL"] + }, + { + "testIdPattern": "[pdf.spec] Page.pdf can print to PDF with outline", + "platforms": ["darwin", "linux", "win32"], + "parameters": ["chrome", "new-headless"], + "expectations": ["FAIL"] + }, { "testIdPattern": "[prerender.spec] Prerender can navigate to a prerendered page via Puppeteer", "platforms": ["darwin", "linux", "win32"], diff --git a/test/assets/pdf.html b/test/assets/pdf.html index 987df27ebef..ef046e3d36a 100644 --- a/test/assets/pdf.html +++ b/test/assets/pdf.html @@ -6,6 +6,12 @@ PDF -
PDF Content
+

PDF Content

+
+

PDF Subcontent 1

+
+
+

PDF Subcontent 2

+
diff --git a/test/src/cdp/pdf.spec.ts b/test/src/cdp/pdf.spec.ts new file mode 100644 index 00000000000..12588573974 --- /dev/null +++ b/test/src/cdp/pdf.spec.ts @@ -0,0 +1,55 @@ +/** + * @license + * Copyright 2017 Google Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +import {readFile, unlink} from 'fs/promises'; + +import expect from 'expect'; + +import {getTestState, setupTestBrowserHooks} from '../mocha-utils.js'; + +describe('Page.pdf', () => { + setupTestBrowserHooks(); + + it('can print to PDF with accessible', async () => { + const {page, server} = await getTestState(); + + const outputFile = __dirname + '/../../assets/output.pdf'; + const outputFileAccessible = + __dirname + '/../../assets/output-accessible.pdf'; + await page.goto(server.PREFIX + '/pdf.html'); + await page.pdf({path: outputFile}); + await page.pdf({path: outputFileAccessible, tagged: true}); + try { + const [base, tagged] = await Promise.all([ + readFile(outputFile), + readFile(outputFileAccessible), + ]); + expect(tagged.byteLength).toBeGreaterThan(base.byteLength); + } finally { + await Promise.all([unlink(outputFile), unlink(outputFileAccessible)]); + } + }); + + it('can print to PDF with outline', async () => { + const {page, server} = await getTestState(); + + const outputFile = __dirname + '/../../assets/output.pdf'; + const outputFileOutlined = __dirname + '/../../assets/output-outlined.pdf'; + await page.goto(server.PREFIX + '/pdf.html'); + await page.pdf({path: outputFile, tagged: true}); + await page.pdf({path: outputFileOutlined, tagged: true, outline: true}); + try { + const [base, outlined] = await Promise.all([ + readFile(outputFile), + readFile(outputFileOutlined), + ]); + + expect(outlined.byteLength).toBeGreaterThan(base.byteLength); + } finally { + await Promise.all([unlink(outputFile), unlink(outputFileOutlined)]); + } + }); +}); diff --git a/test/src/page.spec.ts b/test/src/page.spec.ts index 79fc69ebbc3..8e585df9993 100644 --- a/test/src/page.spec.ts +++ b/test/src/page.spec.ts @@ -1940,25 +1940,6 @@ describe('Page', function () { } }); - it('can print to PDF with accessible', async () => { - const {page, server} = await getTestState(); - - const outputFile = __dirname + '/../assets/output.pdf'; - const outputFileAccessible = - __dirname + '/../assets/output-accessible.pdf'; - await page.goto(server.PREFIX + '/pdf.html'); - await page.pdf({path: outputFile}); - await page.pdf({path: outputFileAccessible, tagged: true}); - try { - expect( - fs.readFileSync(outputFileAccessible).byteLength - ).toBeGreaterThan(fs.readFileSync(outputFile).byteLength); - } finally { - fs.unlinkSync(outputFileAccessible); - fs.unlinkSync(outputFile); - } - }); - it('can print to PDF and stream the result', async () => { const {page} = await getTestState();