From 80bbd766267ab2beb55e6724a47ccc009ef246e1 Mon Sep 17 00:00:00 2001 From: Alex Rudenko Date: Mon, 10 Jun 2024 14:12:30 +0200 Subject: [PATCH] refactor: move selector parsing to Node (#12543) --- packages/puppeteer-core/Herebyfile.mjs | 13 +++- .../src/common/CSSQueryHandler.ts | 29 +++++++ .../src/common/GetQueryHandler.ts | 14 +++- .../{injected => common}/PSelectorParser.ts | 29 +++---- packages/puppeteer-core/src/common/common.ts | 1 + .../src/injected/CSSSelector.ts | 20 +++++ .../src/injected/PQuerySelector.ts | 76 +++++++++---------- .../puppeteer-core/src/injected/injected.ts | 2 + .../third_party/parsel-js/package.json | 3 + .../third_party/parsel-js/parsel-js.ts | 4 + 10 files changed, 135 insertions(+), 56 deletions(-) create mode 100644 packages/puppeteer-core/src/common/CSSQueryHandler.ts rename packages/puppeteer-core/src/{injected => common}/PSelectorParser.ts (85%) create mode 100644 packages/puppeteer-core/src/injected/CSSSelector.ts create mode 100644 packages/puppeteer-core/third_party/parsel-js/package.json create mode 100644 packages/puppeteer-core/third_party/parsel-js/parsel-js.ts diff --git a/packages/puppeteer-core/Herebyfile.mjs b/packages/puppeteer-core/Herebyfile.mjs index 426d908c0c0..17af7bccc03 100644 --- a/packages/puppeteer-core/Herebyfile.mjs +++ b/packages/puppeteer-core/Herebyfile.mjs @@ -48,9 +48,10 @@ export const generateInjectedTask = task({ entryPoints: ['src/injected/injected.ts'], bundle: true, format: 'cjs', - target: ['chrome117', 'firefox118'], + target: ['chrome125', 'firefox125'], minify: true, write: false, + legalComments: 'none', }); const template = await readFile('src/templates/injected.ts.tmpl', 'utf8'); await mkdir('src/generated', {recursive: true}); @@ -136,6 +137,16 @@ export const buildTask = task({ 'utf-8' ); break; + case 'parsel-js': + license = await readFile( + path.join( + path.dirname(require.resolve('parsel-js')), + '..', + 'LICENSE' + ), + 'utf-8' + ); + break; default: throw new Error(`Add license handling for ${path}`); } diff --git a/packages/puppeteer-core/src/common/CSSQueryHandler.ts b/packages/puppeteer-core/src/common/CSSQueryHandler.ts new file mode 100644 index 00000000000..5e0dda39894 --- /dev/null +++ b/packages/puppeteer-core/src/common/CSSQueryHandler.ts @@ -0,0 +1,29 @@ +/** + * @license + * Copyright 2023 Google Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +import type PuppeteerUtil from '../injected/injected.js'; + +import {QueryHandler} from './QueryHandler.js'; + +/** + * @internal + */ +export class CSSQueryHandler extends QueryHandler { + static override querySelector = ( + element: Node, + selector: string, + {cssQuerySelector}: PuppeteerUtil + ): Node | null => { + return cssQuerySelector(element, selector); + }; + static override querySelectorAll = ( + element: Node, + selector: string, + {cssQuerySelectorAll}: PuppeteerUtil + ): Iterable => { + return cssQuerySelectorAll(element, selector); + }; +} diff --git a/packages/puppeteer-core/src/common/GetQueryHandler.ts b/packages/puppeteer-core/src/common/GetQueryHandler.ts index 1d8bb014146..8a226842cbf 100644 --- a/packages/puppeteer-core/src/common/GetQueryHandler.ts +++ b/packages/puppeteer-core/src/common/GetQueryHandler.ts @@ -6,9 +6,11 @@ import {ARIAQueryHandler} from '../cdp/AriaQueryHandler.js'; +import {CSSQueryHandler} from './CSSQueryHandler.js'; import {customQueryHandlers} from './CustomQueryHandler.js'; import {PierceQueryHandler} from './PierceQueryHandler.js'; import {PQueryHandler} from './PQueryHandler.js'; +import {parsePSelectors} from './PSelectorParser.js'; import type {QueryHandler} from './QueryHandler.js'; import {TextQueryHandler} from './TextQueryHandler.js'; import {XPathQueryHandler} from './XPathQueryHandler.js'; @@ -45,5 +47,15 @@ export function getQueryHandlerAndSelector(selector: string): { } } } - return {updatedSelector: selector, QueryHandler: PQueryHandler}; + const [pSelector, isPureCSS] = parsePSelectors(selector); + if (isPureCSS) { + return { + updatedSelector: selector, + QueryHandler: CSSQueryHandler, + }; + } + return { + updatedSelector: JSON.stringify(pSelector), + QueryHandler: PQueryHandler, + }; } diff --git a/packages/puppeteer-core/src/injected/PSelectorParser.ts b/packages/puppeteer-core/src/common/PSelectorParser.ts similarity index 85% rename from packages/puppeteer-core/src/injected/PSelectorParser.ts rename to packages/puppeteer-core/src/common/PSelectorParser.ts index 8044562348c..1d1d73692c4 100644 --- a/packages/puppeteer-core/src/injected/PSelectorParser.ts +++ b/packages/puppeteer-core/src/common/PSelectorParser.ts @@ -4,20 +4,18 @@ * SPDX-License-Identifier: Apache-2.0 */ -import {type Token, tokenize, TOKENS, stringify} from 'parsel-js'; - -export type CSSSelector = string; -export interface PPseudoSelector { - name: string; - value: string; -} -export const enum PCombinator { - Descendent = '>>>', - Child = '>>>>', -} -export type CompoundPSelector = Array; -export type ComplexPSelector = Array; -export type ComplexPSelectorList = ComplexPSelector[]; +import { + type Token, + tokenize, + TOKENS, + stringify, +} from '../../third_party/parsel-js/parsel-js.js'; +import type { + ComplexPSelector, + ComplexPSelectorList, + CompoundPSelector, +} from '../injected/PQuerySelector.js'; +import {PCombinator} from '../injected/PQuerySelector.js'; TOKENS['combinator'] = /\s*(>>>>?|[\s>+~])\s*/g; @@ -34,6 +32,9 @@ const unquote = (text: string): string => { }); }; +/** + * @internal + */ export function parsePSelectors( selector: string ): [selector: ComplexPSelectorList, isPureCSS: boolean] { diff --git a/packages/puppeteer-core/src/common/common.ts b/packages/puppeteer-core/src/common/common.ts index bf4274fcf17..02db46910da 100644 --- a/packages/puppeteer-core/src/common/common.ts +++ b/packages/puppeteer-core/src/common/common.ts @@ -25,6 +25,7 @@ export * from './PDFOptions.js'; export * from './PierceQueryHandler.js'; export * from './PQueryHandler.js'; export * from './Product.js'; +export * from './PSelectorParser.js'; export * from './Puppeteer.js'; export * from './QueryHandler.js'; export * from './ScriptInjector.js'; diff --git a/packages/puppeteer-core/src/injected/CSSSelector.ts b/packages/puppeteer-core/src/injected/CSSSelector.ts new file mode 100644 index 00000000000..cc26c69d381 --- /dev/null +++ b/packages/puppeteer-core/src/injected/CSSSelector.ts @@ -0,0 +1,20 @@ +/** + * @license + * Copyright 2024 Google Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +export const cssQuerySelector = ( + root: Node, + selector: string +): Element | null => { + // @ts-expect-error assume element root + return root.querySelector(selector); +}; +export const cssQuerySelectorAll = function ( + root: Node, + selector: string +): Iterable { + // @ts-expect-error assume element root + return root.querySelectorAll(selector); +}; diff --git a/packages/puppeteer-core/src/injected/PQuerySelector.ts b/packages/puppeteer-core/src/injected/PQuerySelector.ts index 11499c072f3..ba5fd7e9b16 100644 --- a/packages/puppeteer-core/src/injected/PQuerySelector.ts +++ b/packages/puppeteer-core/src/injected/PQuerySelector.ts @@ -9,21 +9,43 @@ import {AsyncIterableUtil} from '../util/AsyncIterableUtil.js'; import {ariaQuerySelectorAll} from './ARIAQuerySelector.js'; import {customQuerySelectors} from './CustomQuerySelector.js'; -import { - type ComplexPSelector, - type ComplexPSelectorList, - type CompoundPSelector, - type CSSSelector, - parsePSelectors, - PCombinator, - type PPseudoSelector, -} from './PSelectorParser.js'; import {textQuerySelectorAll} from './TextQuerySelector.js'; import {pierce, pierceAll} from './util.js'; import {xpathQuerySelectorAll} from './XPathQuerySelector.js'; const IDENT_TOKEN_START = /[-\w\P{ASCII}*]/; +/** + * @internal + */ +export type CSSSelector = string; +/** + * @internal + */ +export interface PPseudoSelector { + name: string; + value: string; +} +/** + * @internal + */ +export const enum PCombinator { + Descendent = '>>>', + Child = '>>>>', +} +/** + * @internal + */ +export type CompoundPSelector = Array; +/** + * @internal + */ +export type ComplexPSelector = Array; +/** + * @internal + */ +export type ComplexPSelectorList = ComplexPSelector[]; + interface QueryableNode extends Node { querySelectorAll: typeof Document.prototype.querySelectorAll; } @@ -32,24 +54,15 @@ const isQueryableNode = (node: Node): node is QueryableNode => { return 'querySelectorAll' in node; }; -class SelectorError extends Error { - constructor(selector: string, message: string) { - super(`${selector} is not a valid selector: ${message}`); - } -} - class PQueryEngine { - #input: string; - #complexSelector: ComplexPSelector; #compoundSelector: CompoundPSelector = []; #selector: CSSSelector | PPseudoSelector | undefined = undefined; elements: AwaitableIterable; - constructor(element: Node, input: string, complexSelector: ComplexPSelector) { + constructor(element: Node, complexSelector: ComplexPSelector) { this.elements = [element]; - this.#input = input; this.#complexSelector = complexSelector; this.#next(); } @@ -71,7 +84,6 @@ class PQueryEngine { for (; this.#selector !== undefined; this.#next()) { const selector = this.#selector; - const input = this.#input; if (typeof selector === 'string') { // The regular expression tests if the selector is a type/universal // selector. Any other case means we want to apply the selector onto @@ -128,10 +140,7 @@ class PQueryEngine { default: const querySelector = customQuerySelectors.get(selector.name); if (!querySelector) { - throw new SelectorError( - input, - `Unknown selector type: ${selector.name}` - ); + throw new Error(`Unknown selector type: ${selector.name}`); } yield* querySelector.querySelectorAll(element, selector.value); } @@ -240,17 +249,7 @@ export const pQuerySelectorAll = function ( root: Node, selector: string ): AwaitableIterable { - let selectors: ComplexPSelectorList; - let isPureCSS: boolean; - try { - [selectors, isPureCSS] = parsePSelectors(selector); - } catch (error) { - return (root as unknown as QueryableNode).querySelectorAll(selector); - } - - if (isPureCSS) { - return (root as unknown as QueryableNode).querySelectorAll(selector); - } + const selectors = JSON.parse(selector) as ComplexPSelectorList; // If there are any empty elements, then this implies the selector has // contiguous combinators (e.g. `>>> >>>>`) or starts/ends with one which we // treat as illegal, similar to existing behavior. @@ -267,15 +266,12 @@ export const pQuerySelectorAll = function ( }); }) ) { - throw new SelectorError( - selector, - 'Multiple deep combinators found in sequence.' - ); + throw new Error('Multiple deep combinators found in sequence.'); } return domSort( AsyncIterableUtil.flatMap(selectors, selectorParts => { - const query = new PQueryEngine(root, selector, selectorParts); + const query = new PQueryEngine(root, selectorParts); void query.run(); return query.elements; }) diff --git a/packages/puppeteer-core/src/injected/injected.ts b/packages/puppeteer-core/src/injected/injected.ts index e81d274290a..2f85389f0fa 100644 --- a/packages/puppeteer-core/src/injected/injected.ts +++ b/packages/puppeteer-core/src/injected/injected.ts @@ -8,6 +8,7 @@ import {Deferred} from '../util/Deferred.js'; import {createFunction} from '../util/Function.js'; import * as ARIAQuerySelector from './ARIAQuerySelector.js'; +import * as CSSSelector from './CSSSelector.js'; import * as CustomQuerySelectors from './CustomQuerySelector.js'; import * as PierceQuerySelector from './PierceQuerySelector.js'; import {IntervalPoller, MutationPoller, RAFPoller} from './Poller.js'; @@ -31,6 +32,7 @@ const PuppeteerUtil = Object.freeze({ ...TextQuerySelector, ...util, ...XPathQuerySelector, + ...CSSSelector, Deferred, createFunction, createTextContent, diff --git a/packages/puppeteer-core/third_party/parsel-js/package.json b/packages/puppeteer-core/third_party/parsel-js/package.json new file mode 100644 index 00000000000..3dbc1ca591c --- /dev/null +++ b/packages/puppeteer-core/third_party/parsel-js/package.json @@ -0,0 +1,3 @@ +{ + "type": "module" +} diff --git a/packages/puppeteer-core/third_party/parsel-js/parsel-js.ts b/packages/puppeteer-core/third_party/parsel-js/parsel-js.ts new file mode 100644 index 00000000000..fb998b2b557 --- /dev/null +++ b/packages/puppeteer-core/third_party/parsel-js/parsel-js.ts @@ -0,0 +1,4 @@ +// esline-disable rulesdir/check-license +export {tokenize, TOKENS, stringify} from 'parsel-js'; + +export type * from 'parsel-js';