refactor: move selector parsing to Node (#12543)

This commit is contained in:
Alex Rudenko 2024-06-10 14:12:30 +02:00 committed by GitHub
parent 8aac8b1ccb
commit 80bbd76626
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 135 additions and 56 deletions

View File

@ -48,9 +48,10 @@ export const generateInjectedTask = task({
entryPoints: ['src/injected/injected.ts'],
bundle: true,
format: 'cjs',
target: ['chrome117', 'firefox118'],
target: ['chrome125', 'firefox125'],
minify: true,
write: false,
legalComments: 'none',
});
const template = await readFile('src/templates/injected.ts.tmpl', 'utf8');
await mkdir('src/generated', {recursive: true});
@ -136,6 +137,16 @@ export const buildTask = task({
'utf-8'
);
break;
case 'parsel-js':
license = await readFile(
path.join(
path.dirname(require.resolve('parsel-js')),
'..',
'LICENSE'
),
'utf-8'
);
break;
default:
throw new Error(`Add license handling for ${path}`);
}

View File

@ -0,0 +1,29 @@
/**
* @license
* Copyright 2023 Google Inc.
* SPDX-License-Identifier: Apache-2.0
*/
import type PuppeteerUtil from '../injected/injected.js';
import {QueryHandler} from './QueryHandler.js';
/**
* @internal
*/
export class CSSQueryHandler extends QueryHandler {
static override querySelector = (
element: Node,
selector: string,
{cssQuerySelector}: PuppeteerUtil
): Node | null => {
return cssQuerySelector(element, selector);
};
static override querySelectorAll = (
element: Node,
selector: string,
{cssQuerySelectorAll}: PuppeteerUtil
): Iterable<Node> => {
return cssQuerySelectorAll(element, selector);
};
}

View File

@ -6,9 +6,11 @@
import {ARIAQueryHandler} from '../cdp/AriaQueryHandler.js';
import {CSSQueryHandler} from './CSSQueryHandler.js';
import {customQueryHandlers} from './CustomQueryHandler.js';
import {PierceQueryHandler} from './PierceQueryHandler.js';
import {PQueryHandler} from './PQueryHandler.js';
import {parsePSelectors} from './PSelectorParser.js';
import type {QueryHandler} from './QueryHandler.js';
import {TextQueryHandler} from './TextQueryHandler.js';
import {XPathQueryHandler} from './XPathQueryHandler.js';
@ -45,5 +47,15 @@ export function getQueryHandlerAndSelector(selector: string): {
}
}
}
return {updatedSelector: selector, QueryHandler: PQueryHandler};
const [pSelector, isPureCSS] = parsePSelectors(selector);
if (isPureCSS) {
return {
updatedSelector: selector,
QueryHandler: CSSQueryHandler,
};
}
return {
updatedSelector: JSON.stringify(pSelector),
QueryHandler: PQueryHandler,
};
}

View File

@ -4,20 +4,18 @@
* SPDX-License-Identifier: Apache-2.0
*/
import {type Token, tokenize, TOKENS, stringify} from 'parsel-js';
export type CSSSelector = string;
export interface PPseudoSelector {
name: string;
value: string;
}
export const enum PCombinator {
Descendent = '>>>',
Child = '>>>>',
}
export type CompoundPSelector = Array<CSSSelector | PPseudoSelector>;
export type ComplexPSelector = Array<CompoundPSelector | PCombinator>;
export type ComplexPSelectorList = ComplexPSelector[];
import {
type Token,
tokenize,
TOKENS,
stringify,
} from '../../third_party/parsel-js/parsel-js.js';
import type {
ComplexPSelector,
ComplexPSelectorList,
CompoundPSelector,
} from '../injected/PQuerySelector.js';
import {PCombinator} from '../injected/PQuerySelector.js';
TOKENS['combinator'] = /\s*(>>>>?|[\s>+~])\s*/g;
@ -34,6 +32,9 @@ const unquote = (text: string): string => {
});
};
/**
* @internal
*/
export function parsePSelectors(
selector: string
): [selector: ComplexPSelectorList, isPureCSS: boolean] {

View File

@ -25,6 +25,7 @@ export * from './PDFOptions.js';
export * from './PierceQueryHandler.js';
export * from './PQueryHandler.js';
export * from './Product.js';
export * from './PSelectorParser.js';
export * from './Puppeteer.js';
export * from './QueryHandler.js';
export * from './ScriptInjector.js';

View File

@ -0,0 +1,20 @@
/**
* @license
* Copyright 2024 Google Inc.
* SPDX-License-Identifier: Apache-2.0
*/
export const cssQuerySelector = (
root: Node,
selector: string
): Element | null => {
// @ts-expect-error assume element root
return root.querySelector(selector);
};
export const cssQuerySelectorAll = function (
root: Node,
selector: string
): Iterable<Element> {
// @ts-expect-error assume element root
return root.querySelectorAll(selector);
};

View File

@ -9,21 +9,43 @@ import {AsyncIterableUtil} from '../util/AsyncIterableUtil.js';
import {ariaQuerySelectorAll} from './ARIAQuerySelector.js';
import {customQuerySelectors} from './CustomQuerySelector.js';
import {
type ComplexPSelector,
type ComplexPSelectorList,
type CompoundPSelector,
type CSSSelector,
parsePSelectors,
PCombinator,
type PPseudoSelector,
} from './PSelectorParser.js';
import {textQuerySelectorAll} from './TextQuerySelector.js';
import {pierce, pierceAll} from './util.js';
import {xpathQuerySelectorAll} from './XPathQuerySelector.js';
const IDENT_TOKEN_START = /[-\w\P{ASCII}*]/;
/**
* @internal
*/
export type CSSSelector = string;
/**
* @internal
*/
export interface PPseudoSelector {
name: string;
value: string;
}
/**
* @internal
*/
export const enum PCombinator {
Descendent = '>>>',
Child = '>>>>',
}
/**
* @internal
*/
export type CompoundPSelector = Array<CSSSelector | PPseudoSelector>;
/**
* @internal
*/
export type ComplexPSelector = Array<CompoundPSelector | PCombinator>;
/**
* @internal
*/
export type ComplexPSelectorList = ComplexPSelector[];
interface QueryableNode extends Node {
querySelectorAll: typeof Document.prototype.querySelectorAll;
}
@ -32,24 +54,15 @@ const isQueryableNode = (node: Node): node is QueryableNode => {
return 'querySelectorAll' in node;
};
class SelectorError extends Error {
constructor(selector: string, message: string) {
super(`${selector} is not a valid selector: ${message}`);
}
}
class PQueryEngine {
#input: string;
#complexSelector: ComplexPSelector;
#compoundSelector: CompoundPSelector = [];
#selector: CSSSelector | PPseudoSelector | undefined = undefined;
elements: AwaitableIterable<Node>;
constructor(element: Node, input: string, complexSelector: ComplexPSelector) {
constructor(element: Node, complexSelector: ComplexPSelector) {
this.elements = [element];
this.#input = input;
this.#complexSelector = complexSelector;
this.#next();
}
@ -71,7 +84,6 @@ class PQueryEngine {
for (; this.#selector !== undefined; this.#next()) {
const selector = this.#selector;
const input = this.#input;
if (typeof selector === 'string') {
// The regular expression tests if the selector is a type/universal
// selector. Any other case means we want to apply the selector onto
@ -128,10 +140,7 @@ class PQueryEngine {
default:
const querySelector = customQuerySelectors.get(selector.name);
if (!querySelector) {
throw new SelectorError(
input,
`Unknown selector type: ${selector.name}`
);
throw new Error(`Unknown selector type: ${selector.name}`);
}
yield* querySelector.querySelectorAll(element, selector.value);
}
@ -240,17 +249,7 @@ export const pQuerySelectorAll = function (
root: Node,
selector: string
): AwaitableIterable<Node> {
let selectors: ComplexPSelectorList;
let isPureCSS: boolean;
try {
[selectors, isPureCSS] = parsePSelectors(selector);
} catch (error) {
return (root as unknown as QueryableNode).querySelectorAll(selector);
}
if (isPureCSS) {
return (root as unknown as QueryableNode).querySelectorAll(selector);
}
const selectors = JSON.parse(selector) as ComplexPSelectorList;
// If there are any empty elements, then this implies the selector has
// contiguous combinators (e.g. `>>> >>>>`) or starts/ends with one which we
// treat as illegal, similar to existing behavior.
@ -267,15 +266,12 @@ export const pQuerySelectorAll = function (
});
})
) {
throw new SelectorError(
selector,
'Multiple deep combinators found in sequence.'
);
throw new Error('Multiple deep combinators found in sequence.');
}
return domSort(
AsyncIterableUtil.flatMap(selectors, selectorParts => {
const query = new PQueryEngine(root, selector, selectorParts);
const query = new PQueryEngine(root, selectorParts);
void query.run();
return query.elements;
})

View File

@ -8,6 +8,7 @@ import {Deferred} from '../util/Deferred.js';
import {createFunction} from '../util/Function.js';
import * as ARIAQuerySelector from './ARIAQuerySelector.js';
import * as CSSSelector from './CSSSelector.js';
import * as CustomQuerySelectors from './CustomQuerySelector.js';
import * as PierceQuerySelector from './PierceQuerySelector.js';
import {IntervalPoller, MutationPoller, RAFPoller} from './Poller.js';
@ -31,6 +32,7 @@ const PuppeteerUtil = Object.freeze({
...TextQuerySelector,
...util,
...XPathQuerySelector,
...CSSSelector,
Deferred,
createFunction,
createTextContent,

View File

@ -0,0 +1,3 @@
{
"type": "module"
}

View File

@ -0,0 +1,4 @@
// esline-disable rulesdir/check-license
export {tokenize, TOKENS, stringify} from 'parsel-js';
export type * from 'parsel-js';