mirror of
https://github.com/puppeteer/puppeteer
synced 2024-06-14 14:02:48 +00:00
refactor: move selector parsing to Node (#12543)
This commit is contained in:
parent
8aac8b1ccb
commit
80bbd76626
@ -48,9 +48,10 @@ export const generateInjectedTask = task({
|
|||||||
entryPoints: ['src/injected/injected.ts'],
|
entryPoints: ['src/injected/injected.ts'],
|
||||||
bundle: true,
|
bundle: true,
|
||||||
format: 'cjs',
|
format: 'cjs',
|
||||||
target: ['chrome117', 'firefox118'],
|
target: ['chrome125', 'firefox125'],
|
||||||
minify: true,
|
minify: true,
|
||||||
write: false,
|
write: false,
|
||||||
|
legalComments: 'none',
|
||||||
});
|
});
|
||||||
const template = await readFile('src/templates/injected.ts.tmpl', 'utf8');
|
const template = await readFile('src/templates/injected.ts.tmpl', 'utf8');
|
||||||
await mkdir('src/generated', {recursive: true});
|
await mkdir('src/generated', {recursive: true});
|
||||||
@ -136,6 +137,16 @@ export const buildTask = task({
|
|||||||
'utf-8'
|
'utf-8'
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
|
case 'parsel-js':
|
||||||
|
license = await readFile(
|
||||||
|
path.join(
|
||||||
|
path.dirname(require.resolve('parsel-js')),
|
||||||
|
'..',
|
||||||
|
'LICENSE'
|
||||||
|
),
|
||||||
|
'utf-8'
|
||||||
|
);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new Error(`Add license handling for ${path}`);
|
throw new Error(`Add license handling for ${path}`);
|
||||||
}
|
}
|
||||||
|
29
packages/puppeteer-core/src/common/CSSQueryHandler.ts
Normal file
29
packages/puppeteer-core/src/common/CSSQueryHandler.ts
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2023 Google Inc.
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type PuppeteerUtil from '../injected/injected.js';
|
||||||
|
|
||||||
|
import {QueryHandler} from './QueryHandler.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export class CSSQueryHandler extends QueryHandler {
|
||||||
|
static override querySelector = (
|
||||||
|
element: Node,
|
||||||
|
selector: string,
|
||||||
|
{cssQuerySelector}: PuppeteerUtil
|
||||||
|
): Node | null => {
|
||||||
|
return cssQuerySelector(element, selector);
|
||||||
|
};
|
||||||
|
static override querySelectorAll = (
|
||||||
|
element: Node,
|
||||||
|
selector: string,
|
||||||
|
{cssQuerySelectorAll}: PuppeteerUtil
|
||||||
|
): Iterable<Node> => {
|
||||||
|
return cssQuerySelectorAll(element, selector);
|
||||||
|
};
|
||||||
|
}
|
@ -6,9 +6,11 @@
|
|||||||
|
|
||||||
import {ARIAQueryHandler} from '../cdp/AriaQueryHandler.js';
|
import {ARIAQueryHandler} from '../cdp/AriaQueryHandler.js';
|
||||||
|
|
||||||
|
import {CSSQueryHandler} from './CSSQueryHandler.js';
|
||||||
import {customQueryHandlers} from './CustomQueryHandler.js';
|
import {customQueryHandlers} from './CustomQueryHandler.js';
|
||||||
import {PierceQueryHandler} from './PierceQueryHandler.js';
|
import {PierceQueryHandler} from './PierceQueryHandler.js';
|
||||||
import {PQueryHandler} from './PQueryHandler.js';
|
import {PQueryHandler} from './PQueryHandler.js';
|
||||||
|
import {parsePSelectors} from './PSelectorParser.js';
|
||||||
import type {QueryHandler} from './QueryHandler.js';
|
import type {QueryHandler} from './QueryHandler.js';
|
||||||
import {TextQueryHandler} from './TextQueryHandler.js';
|
import {TextQueryHandler} from './TextQueryHandler.js';
|
||||||
import {XPathQueryHandler} from './XPathQueryHandler.js';
|
import {XPathQueryHandler} from './XPathQueryHandler.js';
|
||||||
@ -45,5 +47,15 @@ export function getQueryHandlerAndSelector(selector: string): {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return {updatedSelector: selector, QueryHandler: PQueryHandler};
|
const [pSelector, isPureCSS] = parsePSelectors(selector);
|
||||||
|
if (isPureCSS) {
|
||||||
|
return {
|
||||||
|
updatedSelector: selector,
|
||||||
|
QueryHandler: CSSQueryHandler,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
updatedSelector: JSON.stringify(pSelector),
|
||||||
|
QueryHandler: PQueryHandler,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
@ -4,20 +4,18 @@
|
|||||||
* SPDX-License-Identifier: Apache-2.0
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import {type Token, tokenize, TOKENS, stringify} from 'parsel-js';
|
import {
|
||||||
|
type Token,
|
||||||
export type CSSSelector = string;
|
tokenize,
|
||||||
export interface PPseudoSelector {
|
TOKENS,
|
||||||
name: string;
|
stringify,
|
||||||
value: string;
|
} from '../../third_party/parsel-js/parsel-js.js';
|
||||||
}
|
import type {
|
||||||
export const enum PCombinator {
|
ComplexPSelector,
|
||||||
Descendent = '>>>',
|
ComplexPSelectorList,
|
||||||
Child = '>>>>',
|
CompoundPSelector,
|
||||||
}
|
} from '../injected/PQuerySelector.js';
|
||||||
export type CompoundPSelector = Array<CSSSelector | PPseudoSelector>;
|
import {PCombinator} from '../injected/PQuerySelector.js';
|
||||||
export type ComplexPSelector = Array<CompoundPSelector | PCombinator>;
|
|
||||||
export type ComplexPSelectorList = ComplexPSelector[];
|
|
||||||
|
|
||||||
TOKENS['combinator'] = /\s*(>>>>?|[\s>+~])\s*/g;
|
TOKENS['combinator'] = /\s*(>>>>?|[\s>+~])\s*/g;
|
||||||
|
|
||||||
@ -34,6 +32,9 @@ const unquote = (text: string): string => {
|
|||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
export function parsePSelectors(
|
export function parsePSelectors(
|
||||||
selector: string
|
selector: string
|
||||||
): [selector: ComplexPSelectorList, isPureCSS: boolean] {
|
): [selector: ComplexPSelectorList, isPureCSS: boolean] {
|
@ -25,6 +25,7 @@ export * from './PDFOptions.js';
|
|||||||
export * from './PierceQueryHandler.js';
|
export * from './PierceQueryHandler.js';
|
||||||
export * from './PQueryHandler.js';
|
export * from './PQueryHandler.js';
|
||||||
export * from './Product.js';
|
export * from './Product.js';
|
||||||
|
export * from './PSelectorParser.js';
|
||||||
export * from './Puppeteer.js';
|
export * from './Puppeteer.js';
|
||||||
export * from './QueryHandler.js';
|
export * from './QueryHandler.js';
|
||||||
export * from './ScriptInjector.js';
|
export * from './ScriptInjector.js';
|
||||||
|
20
packages/puppeteer-core/src/injected/CSSSelector.ts
Normal file
20
packages/puppeteer-core/src/injected/CSSSelector.ts
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
/**
|
||||||
|
* @license
|
||||||
|
* Copyright 2024 Google Inc.
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
export const cssQuerySelector = (
|
||||||
|
root: Node,
|
||||||
|
selector: string
|
||||||
|
): Element | null => {
|
||||||
|
// @ts-expect-error assume element root
|
||||||
|
return root.querySelector(selector);
|
||||||
|
};
|
||||||
|
export const cssQuerySelectorAll = function (
|
||||||
|
root: Node,
|
||||||
|
selector: string
|
||||||
|
): Iterable<Element> {
|
||||||
|
// @ts-expect-error assume element root
|
||||||
|
return root.querySelectorAll(selector);
|
||||||
|
};
|
@ -9,21 +9,43 @@ import {AsyncIterableUtil} from '../util/AsyncIterableUtil.js';
|
|||||||
|
|
||||||
import {ariaQuerySelectorAll} from './ARIAQuerySelector.js';
|
import {ariaQuerySelectorAll} from './ARIAQuerySelector.js';
|
||||||
import {customQuerySelectors} from './CustomQuerySelector.js';
|
import {customQuerySelectors} from './CustomQuerySelector.js';
|
||||||
import {
|
|
||||||
type ComplexPSelector,
|
|
||||||
type ComplexPSelectorList,
|
|
||||||
type CompoundPSelector,
|
|
||||||
type CSSSelector,
|
|
||||||
parsePSelectors,
|
|
||||||
PCombinator,
|
|
||||||
type PPseudoSelector,
|
|
||||||
} from './PSelectorParser.js';
|
|
||||||
import {textQuerySelectorAll} from './TextQuerySelector.js';
|
import {textQuerySelectorAll} from './TextQuerySelector.js';
|
||||||
import {pierce, pierceAll} from './util.js';
|
import {pierce, pierceAll} from './util.js';
|
||||||
import {xpathQuerySelectorAll} from './XPathQuerySelector.js';
|
import {xpathQuerySelectorAll} from './XPathQuerySelector.js';
|
||||||
|
|
||||||
const IDENT_TOKEN_START = /[-\w\P{ASCII}*]/;
|
const IDENT_TOKEN_START = /[-\w\P{ASCII}*]/;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export type CSSSelector = string;
|
||||||
|
/**
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export interface PPseudoSelector {
|
||||||
|
name: string;
|
||||||
|
value: string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export const enum PCombinator {
|
||||||
|
Descendent = '>>>',
|
||||||
|
Child = '>>>>',
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export type CompoundPSelector = Array<CSSSelector | PPseudoSelector>;
|
||||||
|
/**
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export type ComplexPSelector = Array<CompoundPSelector | PCombinator>;
|
||||||
|
/**
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
export type ComplexPSelectorList = ComplexPSelector[];
|
||||||
|
|
||||||
interface QueryableNode extends Node {
|
interface QueryableNode extends Node {
|
||||||
querySelectorAll: typeof Document.prototype.querySelectorAll;
|
querySelectorAll: typeof Document.prototype.querySelectorAll;
|
||||||
}
|
}
|
||||||
@ -32,24 +54,15 @@ const isQueryableNode = (node: Node): node is QueryableNode => {
|
|||||||
return 'querySelectorAll' in node;
|
return 'querySelectorAll' in node;
|
||||||
};
|
};
|
||||||
|
|
||||||
class SelectorError extends Error {
|
|
||||||
constructor(selector: string, message: string) {
|
|
||||||
super(`${selector} is not a valid selector: ${message}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class PQueryEngine {
|
class PQueryEngine {
|
||||||
#input: string;
|
|
||||||
|
|
||||||
#complexSelector: ComplexPSelector;
|
#complexSelector: ComplexPSelector;
|
||||||
#compoundSelector: CompoundPSelector = [];
|
#compoundSelector: CompoundPSelector = [];
|
||||||
#selector: CSSSelector | PPseudoSelector | undefined = undefined;
|
#selector: CSSSelector | PPseudoSelector | undefined = undefined;
|
||||||
|
|
||||||
elements: AwaitableIterable<Node>;
|
elements: AwaitableIterable<Node>;
|
||||||
|
|
||||||
constructor(element: Node, input: string, complexSelector: ComplexPSelector) {
|
constructor(element: Node, complexSelector: ComplexPSelector) {
|
||||||
this.elements = [element];
|
this.elements = [element];
|
||||||
this.#input = input;
|
|
||||||
this.#complexSelector = complexSelector;
|
this.#complexSelector = complexSelector;
|
||||||
this.#next();
|
this.#next();
|
||||||
}
|
}
|
||||||
@ -71,7 +84,6 @@ class PQueryEngine {
|
|||||||
|
|
||||||
for (; this.#selector !== undefined; this.#next()) {
|
for (; this.#selector !== undefined; this.#next()) {
|
||||||
const selector = this.#selector;
|
const selector = this.#selector;
|
||||||
const input = this.#input;
|
|
||||||
if (typeof selector === 'string') {
|
if (typeof selector === 'string') {
|
||||||
// The regular expression tests if the selector is a type/universal
|
// The regular expression tests if the selector is a type/universal
|
||||||
// selector. Any other case means we want to apply the selector onto
|
// selector. Any other case means we want to apply the selector onto
|
||||||
@ -128,10 +140,7 @@ class PQueryEngine {
|
|||||||
default:
|
default:
|
||||||
const querySelector = customQuerySelectors.get(selector.name);
|
const querySelector = customQuerySelectors.get(selector.name);
|
||||||
if (!querySelector) {
|
if (!querySelector) {
|
||||||
throw new SelectorError(
|
throw new Error(`Unknown selector type: ${selector.name}`);
|
||||||
input,
|
|
||||||
`Unknown selector type: ${selector.name}`
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
yield* querySelector.querySelectorAll(element, selector.value);
|
yield* querySelector.querySelectorAll(element, selector.value);
|
||||||
}
|
}
|
||||||
@ -240,17 +249,7 @@ export const pQuerySelectorAll = function (
|
|||||||
root: Node,
|
root: Node,
|
||||||
selector: string
|
selector: string
|
||||||
): AwaitableIterable<Node> {
|
): AwaitableIterable<Node> {
|
||||||
let selectors: ComplexPSelectorList;
|
const selectors = JSON.parse(selector) as ComplexPSelectorList;
|
||||||
let isPureCSS: boolean;
|
|
||||||
try {
|
|
||||||
[selectors, isPureCSS] = parsePSelectors(selector);
|
|
||||||
} catch (error) {
|
|
||||||
return (root as unknown as QueryableNode).querySelectorAll(selector);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isPureCSS) {
|
|
||||||
return (root as unknown as QueryableNode).querySelectorAll(selector);
|
|
||||||
}
|
|
||||||
// If there are any empty elements, then this implies the selector has
|
// If there are any empty elements, then this implies the selector has
|
||||||
// contiguous combinators (e.g. `>>> >>>>`) or starts/ends with one which we
|
// contiguous combinators (e.g. `>>> >>>>`) or starts/ends with one which we
|
||||||
// treat as illegal, similar to existing behavior.
|
// treat as illegal, similar to existing behavior.
|
||||||
@ -267,15 +266,12 @@ export const pQuerySelectorAll = function (
|
|||||||
});
|
});
|
||||||
})
|
})
|
||||||
) {
|
) {
|
||||||
throw new SelectorError(
|
throw new Error('Multiple deep combinators found in sequence.');
|
||||||
selector,
|
|
||||||
'Multiple deep combinators found in sequence.'
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return domSort(
|
return domSort(
|
||||||
AsyncIterableUtil.flatMap(selectors, selectorParts => {
|
AsyncIterableUtil.flatMap(selectors, selectorParts => {
|
||||||
const query = new PQueryEngine(root, selector, selectorParts);
|
const query = new PQueryEngine(root, selectorParts);
|
||||||
void query.run();
|
void query.run();
|
||||||
return query.elements;
|
return query.elements;
|
||||||
})
|
})
|
||||||
|
@ -8,6 +8,7 @@ import {Deferred} from '../util/Deferred.js';
|
|||||||
import {createFunction} from '../util/Function.js';
|
import {createFunction} from '../util/Function.js';
|
||||||
|
|
||||||
import * as ARIAQuerySelector from './ARIAQuerySelector.js';
|
import * as ARIAQuerySelector from './ARIAQuerySelector.js';
|
||||||
|
import * as CSSSelector from './CSSSelector.js';
|
||||||
import * as CustomQuerySelectors from './CustomQuerySelector.js';
|
import * as CustomQuerySelectors from './CustomQuerySelector.js';
|
||||||
import * as PierceQuerySelector from './PierceQuerySelector.js';
|
import * as PierceQuerySelector from './PierceQuerySelector.js';
|
||||||
import {IntervalPoller, MutationPoller, RAFPoller} from './Poller.js';
|
import {IntervalPoller, MutationPoller, RAFPoller} from './Poller.js';
|
||||||
@ -31,6 +32,7 @@ const PuppeteerUtil = Object.freeze({
|
|||||||
...TextQuerySelector,
|
...TextQuerySelector,
|
||||||
...util,
|
...util,
|
||||||
...XPathQuerySelector,
|
...XPathQuerySelector,
|
||||||
|
...CSSSelector,
|
||||||
Deferred,
|
Deferred,
|
||||||
createFunction,
|
createFunction,
|
||||||
createTextContent,
|
createTextContent,
|
||||||
|
3
packages/puppeteer-core/third_party/parsel-js/package.json
vendored
Normal file
3
packages/puppeteer-core/third_party/parsel-js/package.json
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"type": "module"
|
||||||
|
}
|
4
packages/puppeteer-core/third_party/parsel-js/parsel-js.ts
vendored
Normal file
4
packages/puppeteer-core/third_party/parsel-js/parsel-js.ts
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
// esline-disable rulesdir/check-license
|
||||||
|
export {tokenize, TOKENS, stringify} from 'parsel-js';
|
||||||
|
|
||||||
|
export type * from 'parsel-js';
|
Loading…
Reference in New Issue
Block a user