mirror of
https://github.com/puppeteer/puppeteer
synced 2024-06-14 14:02:48 +00:00
refactor: move selector parsing to Node (#12543)
This commit is contained in:
parent
8aac8b1ccb
commit
80bbd76626
@ -48,9 +48,10 @@ export const generateInjectedTask = task({
|
||||
entryPoints: ['src/injected/injected.ts'],
|
||||
bundle: true,
|
||||
format: 'cjs',
|
||||
target: ['chrome117', 'firefox118'],
|
||||
target: ['chrome125', 'firefox125'],
|
||||
minify: true,
|
||||
write: false,
|
||||
legalComments: 'none',
|
||||
});
|
||||
const template = await readFile('src/templates/injected.ts.tmpl', 'utf8');
|
||||
await mkdir('src/generated', {recursive: true});
|
||||
@ -136,6 +137,16 @@ export const buildTask = task({
|
||||
'utf-8'
|
||||
);
|
||||
break;
|
||||
case 'parsel-js':
|
||||
license = await readFile(
|
||||
path.join(
|
||||
path.dirname(require.resolve('parsel-js')),
|
||||
'..',
|
||||
'LICENSE'
|
||||
),
|
||||
'utf-8'
|
||||
);
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Add license handling for ${path}`);
|
||||
}
|
||||
|
29
packages/puppeteer-core/src/common/CSSQueryHandler.ts
Normal file
29
packages/puppeteer-core/src/common/CSSQueryHandler.ts
Normal file
@ -0,0 +1,29 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2023 Google Inc.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type PuppeteerUtil from '../injected/injected.js';
|
||||
|
||||
import {QueryHandler} from './QueryHandler.js';
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
export class CSSQueryHandler extends QueryHandler {
|
||||
static override querySelector = (
|
||||
element: Node,
|
||||
selector: string,
|
||||
{cssQuerySelector}: PuppeteerUtil
|
||||
): Node | null => {
|
||||
return cssQuerySelector(element, selector);
|
||||
};
|
||||
static override querySelectorAll = (
|
||||
element: Node,
|
||||
selector: string,
|
||||
{cssQuerySelectorAll}: PuppeteerUtil
|
||||
): Iterable<Node> => {
|
||||
return cssQuerySelectorAll(element, selector);
|
||||
};
|
||||
}
|
@ -6,9 +6,11 @@
|
||||
|
||||
import {ARIAQueryHandler} from '../cdp/AriaQueryHandler.js';
|
||||
|
||||
import {CSSQueryHandler} from './CSSQueryHandler.js';
|
||||
import {customQueryHandlers} from './CustomQueryHandler.js';
|
||||
import {PierceQueryHandler} from './PierceQueryHandler.js';
|
||||
import {PQueryHandler} from './PQueryHandler.js';
|
||||
import {parsePSelectors} from './PSelectorParser.js';
|
||||
import type {QueryHandler} from './QueryHandler.js';
|
||||
import {TextQueryHandler} from './TextQueryHandler.js';
|
||||
import {XPathQueryHandler} from './XPathQueryHandler.js';
|
||||
@ -45,5 +47,15 @@ export function getQueryHandlerAndSelector(selector: string): {
|
||||
}
|
||||
}
|
||||
}
|
||||
return {updatedSelector: selector, QueryHandler: PQueryHandler};
|
||||
const [pSelector, isPureCSS] = parsePSelectors(selector);
|
||||
if (isPureCSS) {
|
||||
return {
|
||||
updatedSelector: selector,
|
||||
QueryHandler: CSSQueryHandler,
|
||||
};
|
||||
}
|
||||
return {
|
||||
updatedSelector: JSON.stringify(pSelector),
|
||||
QueryHandler: PQueryHandler,
|
||||
};
|
||||
}
|
||||
|
@ -4,20 +4,18 @@
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import {type Token, tokenize, TOKENS, stringify} from 'parsel-js';
|
||||
|
||||
export type CSSSelector = string;
|
||||
export interface PPseudoSelector {
|
||||
name: string;
|
||||
value: string;
|
||||
}
|
||||
export const enum PCombinator {
|
||||
Descendent = '>>>',
|
||||
Child = '>>>>',
|
||||
}
|
||||
export type CompoundPSelector = Array<CSSSelector | PPseudoSelector>;
|
||||
export type ComplexPSelector = Array<CompoundPSelector | PCombinator>;
|
||||
export type ComplexPSelectorList = ComplexPSelector[];
|
||||
import {
|
||||
type Token,
|
||||
tokenize,
|
||||
TOKENS,
|
||||
stringify,
|
||||
} from '../../third_party/parsel-js/parsel-js.js';
|
||||
import type {
|
||||
ComplexPSelector,
|
||||
ComplexPSelectorList,
|
||||
CompoundPSelector,
|
||||
} from '../injected/PQuerySelector.js';
|
||||
import {PCombinator} from '../injected/PQuerySelector.js';
|
||||
|
||||
TOKENS['combinator'] = /\s*(>>>>?|[\s>+~])\s*/g;
|
||||
|
||||
@ -34,6 +32,9 @@ const unquote = (text: string): string => {
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
export function parsePSelectors(
|
||||
selector: string
|
||||
): [selector: ComplexPSelectorList, isPureCSS: boolean] {
|
@ -25,6 +25,7 @@ export * from './PDFOptions.js';
|
||||
export * from './PierceQueryHandler.js';
|
||||
export * from './PQueryHandler.js';
|
||||
export * from './Product.js';
|
||||
export * from './PSelectorParser.js';
|
||||
export * from './Puppeteer.js';
|
||||
export * from './QueryHandler.js';
|
||||
export * from './ScriptInjector.js';
|
||||
|
20
packages/puppeteer-core/src/injected/CSSSelector.ts
Normal file
20
packages/puppeteer-core/src/injected/CSSSelector.ts
Normal file
@ -0,0 +1,20 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2024 Google Inc.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
export const cssQuerySelector = (
|
||||
root: Node,
|
||||
selector: string
|
||||
): Element | null => {
|
||||
// @ts-expect-error assume element root
|
||||
return root.querySelector(selector);
|
||||
};
|
||||
export const cssQuerySelectorAll = function (
|
||||
root: Node,
|
||||
selector: string
|
||||
): Iterable<Element> {
|
||||
// @ts-expect-error assume element root
|
||||
return root.querySelectorAll(selector);
|
||||
};
|
@ -9,21 +9,43 @@ import {AsyncIterableUtil} from '../util/AsyncIterableUtil.js';
|
||||
|
||||
import {ariaQuerySelectorAll} from './ARIAQuerySelector.js';
|
||||
import {customQuerySelectors} from './CustomQuerySelector.js';
|
||||
import {
|
||||
type ComplexPSelector,
|
||||
type ComplexPSelectorList,
|
||||
type CompoundPSelector,
|
||||
type CSSSelector,
|
||||
parsePSelectors,
|
||||
PCombinator,
|
||||
type PPseudoSelector,
|
||||
} from './PSelectorParser.js';
|
||||
import {textQuerySelectorAll} from './TextQuerySelector.js';
|
||||
import {pierce, pierceAll} from './util.js';
|
||||
import {xpathQuerySelectorAll} from './XPathQuerySelector.js';
|
||||
|
||||
const IDENT_TOKEN_START = /[-\w\P{ASCII}*]/;
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
export type CSSSelector = string;
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
export interface PPseudoSelector {
|
||||
name: string;
|
||||
value: string;
|
||||
}
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
export const enum PCombinator {
|
||||
Descendent = '>>>',
|
||||
Child = '>>>>',
|
||||
}
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
export type CompoundPSelector = Array<CSSSelector | PPseudoSelector>;
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
export type ComplexPSelector = Array<CompoundPSelector | PCombinator>;
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
export type ComplexPSelectorList = ComplexPSelector[];
|
||||
|
||||
interface QueryableNode extends Node {
|
||||
querySelectorAll: typeof Document.prototype.querySelectorAll;
|
||||
}
|
||||
@ -32,24 +54,15 @@ const isQueryableNode = (node: Node): node is QueryableNode => {
|
||||
return 'querySelectorAll' in node;
|
||||
};
|
||||
|
||||
class SelectorError extends Error {
|
||||
constructor(selector: string, message: string) {
|
||||
super(`${selector} is not a valid selector: ${message}`);
|
||||
}
|
||||
}
|
||||
|
||||
class PQueryEngine {
|
||||
#input: string;
|
||||
|
||||
#complexSelector: ComplexPSelector;
|
||||
#compoundSelector: CompoundPSelector = [];
|
||||
#selector: CSSSelector | PPseudoSelector | undefined = undefined;
|
||||
|
||||
elements: AwaitableIterable<Node>;
|
||||
|
||||
constructor(element: Node, input: string, complexSelector: ComplexPSelector) {
|
||||
constructor(element: Node, complexSelector: ComplexPSelector) {
|
||||
this.elements = [element];
|
||||
this.#input = input;
|
||||
this.#complexSelector = complexSelector;
|
||||
this.#next();
|
||||
}
|
||||
@ -71,7 +84,6 @@ class PQueryEngine {
|
||||
|
||||
for (; this.#selector !== undefined; this.#next()) {
|
||||
const selector = this.#selector;
|
||||
const input = this.#input;
|
||||
if (typeof selector === 'string') {
|
||||
// The regular expression tests if the selector is a type/universal
|
||||
// selector. Any other case means we want to apply the selector onto
|
||||
@ -128,10 +140,7 @@ class PQueryEngine {
|
||||
default:
|
||||
const querySelector = customQuerySelectors.get(selector.name);
|
||||
if (!querySelector) {
|
||||
throw new SelectorError(
|
||||
input,
|
||||
`Unknown selector type: ${selector.name}`
|
||||
);
|
||||
throw new Error(`Unknown selector type: ${selector.name}`);
|
||||
}
|
||||
yield* querySelector.querySelectorAll(element, selector.value);
|
||||
}
|
||||
@ -240,17 +249,7 @@ export const pQuerySelectorAll = function (
|
||||
root: Node,
|
||||
selector: string
|
||||
): AwaitableIterable<Node> {
|
||||
let selectors: ComplexPSelectorList;
|
||||
let isPureCSS: boolean;
|
||||
try {
|
||||
[selectors, isPureCSS] = parsePSelectors(selector);
|
||||
} catch (error) {
|
||||
return (root as unknown as QueryableNode).querySelectorAll(selector);
|
||||
}
|
||||
|
||||
if (isPureCSS) {
|
||||
return (root as unknown as QueryableNode).querySelectorAll(selector);
|
||||
}
|
||||
const selectors = JSON.parse(selector) as ComplexPSelectorList;
|
||||
// If there are any empty elements, then this implies the selector has
|
||||
// contiguous combinators (e.g. `>>> >>>>`) or starts/ends with one which we
|
||||
// treat as illegal, similar to existing behavior.
|
||||
@ -267,15 +266,12 @@ export const pQuerySelectorAll = function (
|
||||
});
|
||||
})
|
||||
) {
|
||||
throw new SelectorError(
|
||||
selector,
|
||||
'Multiple deep combinators found in sequence.'
|
||||
);
|
||||
throw new Error('Multiple deep combinators found in sequence.');
|
||||
}
|
||||
|
||||
return domSort(
|
||||
AsyncIterableUtil.flatMap(selectors, selectorParts => {
|
||||
const query = new PQueryEngine(root, selector, selectorParts);
|
||||
const query = new PQueryEngine(root, selectorParts);
|
||||
void query.run();
|
||||
return query.elements;
|
||||
})
|
||||
|
@ -8,6 +8,7 @@ import {Deferred} from '../util/Deferred.js';
|
||||
import {createFunction} from '../util/Function.js';
|
||||
|
||||
import * as ARIAQuerySelector from './ARIAQuerySelector.js';
|
||||
import * as CSSSelector from './CSSSelector.js';
|
||||
import * as CustomQuerySelectors from './CustomQuerySelector.js';
|
||||
import * as PierceQuerySelector from './PierceQuerySelector.js';
|
||||
import {IntervalPoller, MutationPoller, RAFPoller} from './Poller.js';
|
||||
@ -31,6 +32,7 @@ const PuppeteerUtil = Object.freeze({
|
||||
...TextQuerySelector,
|
||||
...util,
|
||||
...XPathQuerySelector,
|
||||
...CSSSelector,
|
||||
Deferred,
|
||||
createFunction,
|
||||
createTextContent,
|
||||
|
3
packages/puppeteer-core/third_party/parsel-js/package.json
vendored
Normal file
3
packages/puppeteer-core/third_party/parsel-js/package.json
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
"type": "module"
|
||||
}
|
4
packages/puppeteer-core/third_party/parsel-js/parsel-js.ts
vendored
Normal file
4
packages/puppeteer-core/third_party/parsel-js/parsel-js.ts
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
// esline-disable rulesdir/check-license
|
||||
export {tokenize, TOKENS, stringify} from 'parsel-js';
|
||||
|
||||
export type * from 'parsel-js';
|
Loading…
Reference in New Issue
Block a user