refactor: move selector parsing to Node (#12543)

This commit is contained in:
Alex Rudenko 2024-06-10 14:12:30 +02:00 committed by GitHub
parent 8aac8b1ccb
commit 80bbd76626
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 135 additions and 56 deletions

View File

@ -48,9 +48,10 @@ export const generateInjectedTask = task({
entryPoints: ['src/injected/injected.ts'], entryPoints: ['src/injected/injected.ts'],
bundle: true, bundle: true,
format: 'cjs', format: 'cjs',
target: ['chrome117', 'firefox118'], target: ['chrome125', 'firefox125'],
minify: true, minify: true,
write: false, write: false,
legalComments: 'none',
}); });
const template = await readFile('src/templates/injected.ts.tmpl', 'utf8'); const template = await readFile('src/templates/injected.ts.tmpl', 'utf8');
await mkdir('src/generated', {recursive: true}); await mkdir('src/generated', {recursive: true});
@ -136,6 +137,16 @@ export const buildTask = task({
'utf-8' 'utf-8'
); );
break; break;
case 'parsel-js':
license = await readFile(
path.join(
path.dirname(require.resolve('parsel-js')),
'..',
'LICENSE'
),
'utf-8'
);
break;
default: default:
throw new Error(`Add license handling for ${path}`); throw new Error(`Add license handling for ${path}`);
} }

View File

@ -0,0 +1,29 @@
/**
* @license
* Copyright 2023 Google Inc.
* SPDX-License-Identifier: Apache-2.0
*/
import type PuppeteerUtil from '../injected/injected.js';
import {QueryHandler} from './QueryHandler.js';
/**
* @internal
*/
export class CSSQueryHandler extends QueryHandler {
static override querySelector = (
element: Node,
selector: string,
{cssQuerySelector}: PuppeteerUtil
): Node | null => {
return cssQuerySelector(element, selector);
};
static override querySelectorAll = (
element: Node,
selector: string,
{cssQuerySelectorAll}: PuppeteerUtil
): Iterable<Node> => {
return cssQuerySelectorAll(element, selector);
};
}

View File

@ -6,9 +6,11 @@
import {ARIAQueryHandler} from '../cdp/AriaQueryHandler.js'; import {ARIAQueryHandler} from '../cdp/AriaQueryHandler.js';
import {CSSQueryHandler} from './CSSQueryHandler.js';
import {customQueryHandlers} from './CustomQueryHandler.js'; import {customQueryHandlers} from './CustomQueryHandler.js';
import {PierceQueryHandler} from './PierceQueryHandler.js'; import {PierceQueryHandler} from './PierceQueryHandler.js';
import {PQueryHandler} from './PQueryHandler.js'; import {PQueryHandler} from './PQueryHandler.js';
import {parsePSelectors} from './PSelectorParser.js';
import type {QueryHandler} from './QueryHandler.js'; import type {QueryHandler} from './QueryHandler.js';
import {TextQueryHandler} from './TextQueryHandler.js'; import {TextQueryHandler} from './TextQueryHandler.js';
import {XPathQueryHandler} from './XPathQueryHandler.js'; import {XPathQueryHandler} from './XPathQueryHandler.js';
@ -45,5 +47,15 @@ export function getQueryHandlerAndSelector(selector: string): {
} }
} }
} }
return {updatedSelector: selector, QueryHandler: PQueryHandler}; const [pSelector, isPureCSS] = parsePSelectors(selector);
if (isPureCSS) {
return {
updatedSelector: selector,
QueryHandler: CSSQueryHandler,
};
}
return {
updatedSelector: JSON.stringify(pSelector),
QueryHandler: PQueryHandler,
};
} }

View File

@ -4,20 +4,18 @@
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
import {type Token, tokenize, TOKENS, stringify} from 'parsel-js'; import {
type Token,
export type CSSSelector = string; tokenize,
export interface PPseudoSelector { TOKENS,
name: string; stringify,
value: string; } from '../../third_party/parsel-js/parsel-js.js';
} import type {
export const enum PCombinator { ComplexPSelector,
Descendent = '>>>', ComplexPSelectorList,
Child = '>>>>', CompoundPSelector,
} } from '../injected/PQuerySelector.js';
export type CompoundPSelector = Array<CSSSelector | PPseudoSelector>; import {PCombinator} from '../injected/PQuerySelector.js';
export type ComplexPSelector = Array<CompoundPSelector | PCombinator>;
export type ComplexPSelectorList = ComplexPSelector[];
TOKENS['combinator'] = /\s*(>>>>?|[\s>+~])\s*/g; TOKENS['combinator'] = /\s*(>>>>?|[\s>+~])\s*/g;
@ -34,6 +32,9 @@ const unquote = (text: string): string => {
}); });
}; };
/**
* @internal
*/
export function parsePSelectors( export function parsePSelectors(
selector: string selector: string
): [selector: ComplexPSelectorList, isPureCSS: boolean] { ): [selector: ComplexPSelectorList, isPureCSS: boolean] {

View File

@ -25,6 +25,7 @@ export * from './PDFOptions.js';
export * from './PierceQueryHandler.js'; export * from './PierceQueryHandler.js';
export * from './PQueryHandler.js'; export * from './PQueryHandler.js';
export * from './Product.js'; export * from './Product.js';
export * from './PSelectorParser.js';
export * from './Puppeteer.js'; export * from './Puppeteer.js';
export * from './QueryHandler.js'; export * from './QueryHandler.js';
export * from './ScriptInjector.js'; export * from './ScriptInjector.js';

View File

@ -0,0 +1,20 @@
/**
* @license
* Copyright 2024 Google Inc.
* SPDX-License-Identifier: Apache-2.0
*/
export const cssQuerySelector = (
root: Node,
selector: string
): Element | null => {
// @ts-expect-error assume element root
return root.querySelector(selector);
};
export const cssQuerySelectorAll = function (
root: Node,
selector: string
): Iterable<Element> {
// @ts-expect-error assume element root
return root.querySelectorAll(selector);
};

View File

@ -9,21 +9,43 @@ import {AsyncIterableUtil} from '../util/AsyncIterableUtil.js';
import {ariaQuerySelectorAll} from './ARIAQuerySelector.js'; import {ariaQuerySelectorAll} from './ARIAQuerySelector.js';
import {customQuerySelectors} from './CustomQuerySelector.js'; import {customQuerySelectors} from './CustomQuerySelector.js';
import {
type ComplexPSelector,
type ComplexPSelectorList,
type CompoundPSelector,
type CSSSelector,
parsePSelectors,
PCombinator,
type PPseudoSelector,
} from './PSelectorParser.js';
import {textQuerySelectorAll} from './TextQuerySelector.js'; import {textQuerySelectorAll} from './TextQuerySelector.js';
import {pierce, pierceAll} from './util.js'; import {pierce, pierceAll} from './util.js';
import {xpathQuerySelectorAll} from './XPathQuerySelector.js'; import {xpathQuerySelectorAll} from './XPathQuerySelector.js';
const IDENT_TOKEN_START = /[-\w\P{ASCII}*]/; const IDENT_TOKEN_START = /[-\w\P{ASCII}*]/;
/**
* @internal
*/
export type CSSSelector = string;
/**
* @internal
*/
export interface PPseudoSelector {
name: string;
value: string;
}
/**
* @internal
*/
export const enum PCombinator {
Descendent = '>>>',
Child = '>>>>',
}
/**
* @internal
*/
export type CompoundPSelector = Array<CSSSelector | PPseudoSelector>;
/**
* @internal
*/
export type ComplexPSelector = Array<CompoundPSelector | PCombinator>;
/**
* @internal
*/
export type ComplexPSelectorList = ComplexPSelector[];
interface QueryableNode extends Node { interface QueryableNode extends Node {
querySelectorAll: typeof Document.prototype.querySelectorAll; querySelectorAll: typeof Document.prototype.querySelectorAll;
} }
@ -32,24 +54,15 @@ const isQueryableNode = (node: Node): node is QueryableNode => {
return 'querySelectorAll' in node; return 'querySelectorAll' in node;
}; };
class SelectorError extends Error {
constructor(selector: string, message: string) {
super(`${selector} is not a valid selector: ${message}`);
}
}
class PQueryEngine { class PQueryEngine {
#input: string;
#complexSelector: ComplexPSelector; #complexSelector: ComplexPSelector;
#compoundSelector: CompoundPSelector = []; #compoundSelector: CompoundPSelector = [];
#selector: CSSSelector | PPseudoSelector | undefined = undefined; #selector: CSSSelector | PPseudoSelector | undefined = undefined;
elements: AwaitableIterable<Node>; elements: AwaitableIterable<Node>;
constructor(element: Node, input: string, complexSelector: ComplexPSelector) { constructor(element: Node, complexSelector: ComplexPSelector) {
this.elements = [element]; this.elements = [element];
this.#input = input;
this.#complexSelector = complexSelector; this.#complexSelector = complexSelector;
this.#next(); this.#next();
} }
@ -71,7 +84,6 @@ class PQueryEngine {
for (; this.#selector !== undefined; this.#next()) { for (; this.#selector !== undefined; this.#next()) {
const selector = this.#selector; const selector = this.#selector;
const input = this.#input;
if (typeof selector === 'string') { if (typeof selector === 'string') {
// The regular expression tests if the selector is a type/universal // The regular expression tests if the selector is a type/universal
// selector. Any other case means we want to apply the selector onto // selector. Any other case means we want to apply the selector onto
@ -128,10 +140,7 @@ class PQueryEngine {
default: default:
const querySelector = customQuerySelectors.get(selector.name); const querySelector = customQuerySelectors.get(selector.name);
if (!querySelector) { if (!querySelector) {
throw new SelectorError( throw new Error(`Unknown selector type: ${selector.name}`);
input,
`Unknown selector type: ${selector.name}`
);
} }
yield* querySelector.querySelectorAll(element, selector.value); yield* querySelector.querySelectorAll(element, selector.value);
} }
@ -240,17 +249,7 @@ export const pQuerySelectorAll = function (
root: Node, root: Node,
selector: string selector: string
): AwaitableIterable<Node> { ): AwaitableIterable<Node> {
let selectors: ComplexPSelectorList; const selectors = JSON.parse(selector) as ComplexPSelectorList;
let isPureCSS: boolean;
try {
[selectors, isPureCSS] = parsePSelectors(selector);
} catch (error) {
return (root as unknown as QueryableNode).querySelectorAll(selector);
}
if (isPureCSS) {
return (root as unknown as QueryableNode).querySelectorAll(selector);
}
// If there are any empty elements, then this implies the selector has // If there are any empty elements, then this implies the selector has
// contiguous combinators (e.g. `>>> >>>>`) or starts/ends with one which we // contiguous combinators (e.g. `>>> >>>>`) or starts/ends with one which we
// treat as illegal, similar to existing behavior. // treat as illegal, similar to existing behavior.
@ -267,15 +266,12 @@ export const pQuerySelectorAll = function (
}); });
}) })
) { ) {
throw new SelectorError( throw new Error('Multiple deep combinators found in sequence.');
selector,
'Multiple deep combinators found in sequence.'
);
} }
return domSort( return domSort(
AsyncIterableUtil.flatMap(selectors, selectorParts => { AsyncIterableUtil.flatMap(selectors, selectorParts => {
const query = new PQueryEngine(root, selector, selectorParts); const query = new PQueryEngine(root, selectorParts);
void query.run(); void query.run();
return query.elements; return query.elements;
}) })

View File

@ -8,6 +8,7 @@ import {Deferred} from '../util/Deferred.js';
import {createFunction} from '../util/Function.js'; import {createFunction} from '../util/Function.js';
import * as ARIAQuerySelector from './ARIAQuerySelector.js'; import * as ARIAQuerySelector from './ARIAQuerySelector.js';
import * as CSSSelector from './CSSSelector.js';
import * as CustomQuerySelectors from './CustomQuerySelector.js'; import * as CustomQuerySelectors from './CustomQuerySelector.js';
import * as PierceQuerySelector from './PierceQuerySelector.js'; import * as PierceQuerySelector from './PierceQuerySelector.js';
import {IntervalPoller, MutationPoller, RAFPoller} from './Poller.js'; import {IntervalPoller, MutationPoller, RAFPoller} from './Poller.js';
@ -31,6 +32,7 @@ const PuppeteerUtil = Object.freeze({
...TextQuerySelector, ...TextQuerySelector,
...util, ...util,
...XPathQuerySelector, ...XPathQuerySelector,
...CSSSelector,
Deferred, Deferred,
createFunction, createFunction,
createTextContent, createTextContent,

View File

@ -0,0 +1,3 @@
{
"type": "module"
}

View File

@ -0,0 +1,4 @@
// esline-disable rulesdir/check-license
export {tokenize, TOKENS, stringify} from 'parsel-js';
export type * from 'parsel-js';