chore: support commas in P selectors (#9769)

This commit is contained in:
jrandolf 2023-03-06 10:56:52 +01:00 committed by GitHub
parent 62d5f398b4
commit f84873c121
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 260 additions and 226 deletions

18
package-lock.json generated
View File

@ -61,7 +61,6 @@
"jpeg-js": "0.4.4", "jpeg-js": "0.4.4",
"mime": "3.0.0", "mime": "3.0.0",
"minimist": "1.2.7", "minimist": "1.2.7",
"mitt": "3.0.0",
"mocha": "10.2.0", "mocha": "10.2.0",
"ncp": "2.0.0", "ncp": "2.0.0",
"npm-run-all": "4.1.5", "npm-run-all": "4.1.5",
@ -6663,6 +6662,12 @@
"url": "https://github.com/sponsors/sindresorhus" "url": "https://github.com/sponsors/sindresorhus"
} }
}, },
"node_modules/parsel-js": {
"version": "1.0.3",
"resolved": "git+ssh://git@github.com/jrandolf/parsel.git#f3ac5a371c382df9e5a8425d239594802562ee0f",
"dev": true,
"license": "MIT"
},
"node_modules/path-exists": { "node_modules/path-exists": {
"version": "4.0.0", "version": "4.0.0",
"resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
@ -8695,6 +8700,10 @@
"unbzip2-stream": "1.4.3", "unbzip2-stream": "1.4.3",
"ws": "8.11.0" "ws": "8.11.0"
}, },
"devDependencies": {
"mitt": "3.0.0",
"parsel-js": "github:jrandolf/parsel"
},
"engines": { "engines": {
"node": ">=14.1.0" "node": ">=14.1.0"
}, },
@ -13926,6 +13935,11 @@
"lines-and-columns": "^1.1.6" "lines-and-columns": "^1.1.6"
} }
}, },
"parsel-js": {
"version": "git+ssh://git@github.com/jrandolf/parsel.git#f3ac5a371c382df9e5a8425d239594802562ee0f",
"dev": true,
"from": "parsel-js@github:jrandolf/parsel"
},
"path-exists": { "path-exists": {
"version": "4.0.0", "version": "4.0.0",
"resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
@ -14121,6 +14135,8 @@
"devtools-protocol": "0.0.1094867", "devtools-protocol": "0.0.1094867",
"extract-zip": "2.0.1", "extract-zip": "2.0.1",
"https-proxy-agent": "5.0.1", "https-proxy-agent": "5.0.1",
"mitt": "3.0.0",
"parsel-js": "github:jrandolf/parsel",
"proxy-from-env": "1.1.0", "proxy-from-env": "1.1.0",
"rimraf": "3.0.2", "rimraf": "3.0.2",
"tar-fs": "2.1.1", "tar-fs": "2.1.1",

View File

@ -163,7 +163,6 @@
"jpeg-js": "0.4.4", "jpeg-js": "0.4.4",
"mime": "3.0.0", "mime": "3.0.0",
"minimist": "1.2.7", "minimist": "1.2.7",
"mitt": "3.0.0",
"mocha": "10.2.0", "mocha": "10.2.0",
"ncp": "2.0.0", "ncp": "2.0.0",
"npm-run-all": "4.1.5", "npm-run-all": "4.1.5",

View File

@ -150,5 +150,9 @@
"typescript": { "typescript": {
"optional": true "optional": true
} }
},
"devDependencies": {
"mitt": "3.0.0",
"parsel-js": "github:jrandolf/parsel"
} }
} }

View File

@ -1,10 +1,34 @@
/**
* Copyright 2023 Google Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import type {AwaitableIterable} from '../common/types.js'; import type {AwaitableIterable} from '../common/types.js';
import {AsyncIterableUtil} from '../util/AsyncIterableUtil.js'; import {AsyncIterableUtil} from '../util/AsyncIterableUtil.js';
import {isErrorLike} from '../util/ErrorLike.js'; import {isErrorLike} from '../util/ErrorLike.js';
import {ariaQuerySelectorAll} from './ARIAQuerySelector.js'; import {ariaQuerySelectorAll} from './ARIAQuerySelector.js';
import {customQuerySelectors} from './CustomQuerySelector.js'; import {customQuerySelectors} from './CustomQuerySelector.js';
import {parsePSelectors, PSelector} from './PSelectorParser.js'; import {
ComplexPSelector,
ComplexPSelectorList,
CompoundPSelector,
CSSSelector,
parsePSelectors,
PCombinator,
PPseudoSelector,
} from './PSelectorParser.js';
import {textQuerySelectorAll} from './TextQuerySelector.js'; import {textQuerySelectorAll} from './TextQuerySelector.js';
import {deepChildren, deepDescendents} from './util.js'; import {deepChildren, deepDescendents} from './util.js';
import {xpathQuerySelectorAll} from './XPathQuerySelector.js'; import {xpathQuerySelectorAll} from './XPathQuerySelector.js';
@ -18,50 +42,17 @@ class SelectorError extends Error {
class PQueryEngine { class PQueryEngine {
#input: string; #input: string;
#deepShadowSelectors: PSelector[][][]; #complexSelector: ComplexPSelector;
#shadowSelectors: PSelector[][]; #compoundSelector: CompoundPSelector = [];
#selectors: PSelector[]; #selector: CSSSelector | PPseudoSelector | undefined = undefined;
#selector: PSelector | undefined;
elements: AwaitableIterable<Node>; elements: AwaitableIterable<Node>;
constructor(element: Node, selector: string) { constructor(element: Node, input: string, complexSelector: ComplexPSelector) {
this.#input = selector.trim();
if (this.#input.length === 0) {
throw new SelectorError(this.#input, 'The provided selector is empty.');
}
try {
this.#deepShadowSelectors = parsePSelectors(this.#input);
} catch (error) {
if (!isErrorLike(error)) {
throw new SelectorError(this.#input, String(error));
}
throw new SelectorError(this.#input, error.message);
}
// If there are any empty elements, then this implies the selector has
// contiguous combinators (e.g. `>>> >>>>`) or starts/ends with one which we
// treat as illegal, similar to existing behavior.
if (
this.#deepShadowSelectors.some(shadowSelectors => {
return shadowSelectors.some(selectors => {
return selectors.length === 0;
});
})
) {
throw new SelectorError(
this.#input,
'Multiple deep combinators found in sequence.'
);
}
this.#shadowSelectors = this.#deepShadowSelectors.shift() as PSelector[][];
this.#selectors = this.#shadowSelectors.shift() as PSelector[];
this.#selector = this.#selectors.shift();
this.elements = [element]; this.elements = [element];
this.#input = input;
this.#complexSelector = complexSelector;
this.#next();
} }
async run(): Promise<void> { async run(): Promise<void> {
@ -89,10 +80,10 @@ class PQueryEngine {
for (; this.#selector !== undefined; this.#next()) { for (; this.#selector !== undefined; this.#next()) {
const selector = this.#selector; const selector = this.#selector;
const input = this.#input; const input = this.#input;
this.elements = AsyncIterableUtil.flatMap( if (typeof selector === 'string') {
this.elements, this.elements = AsyncIterableUtil.flatMap(
async function* (element) { this.elements,
if (typeof selector === 'string') { async function* (element) {
if (!element.parentElement) { if (!element.parentElement) {
yield* (element as Element).querySelectorAll(selector); yield* (element as Element).querySelectorAll(selector);
return; return;
@ -108,59 +99,74 @@ class PQueryEngine {
yield* element.parentElement.querySelectorAll( yield* element.parentElement.querySelectorAll(
`:scope > :nth-child(${index})${selector}` `:scope > :nth-child(${index})${selector}`
); );
return;
} }
);
switch (selector.name) { } else {
case 'text': this.elements = AsyncIterableUtil.flatMap(
yield* textQuerySelectorAll(element, selector.value); this.elements,
break; async function* (element) {
case 'xpath': switch (selector.name) {
yield* xpathQuerySelectorAll(element, selector.value); case 'text':
break; yield* textQuerySelectorAll(element, selector.value);
case 'aria': break;
yield* ariaQuerySelectorAll(element, selector.value); case 'xpath':
break; yield* xpathQuerySelectorAll(element, selector.value);
default: break;
const querySelector = customQuerySelectors.get(selector.name); case 'aria':
if (!querySelector) { yield* ariaQuerySelectorAll(element, selector.value);
throw new SelectorError( break;
input, default:
`Unknown selector type: ${selector.name}` const querySelector = customQuerySelectors.get(selector.name);
); if (!querySelector) {
} throw new SelectorError(
yield* querySelector.querySelectorAll(element, selector.value); input,
`Unknown selector type: ${selector.name}`
);
}
yield* querySelector.querySelectorAll(element, selector.value);
}
} }
} );
); }
} }
} }
#next() { #next() {
if (this.#selectors.length === 0) { if (this.#compoundSelector.length !== 0) {
if (this.#shadowSelectors.length === 0) { this.#selector = this.#compoundSelector.shift();
if (this.#deepShadowSelectors.length === 0) { return;
this.#selector = undefined; }
return; if (this.#complexSelector.length === 0) {
} this.#selector = undefined;
return;
}
const selector = this.#complexSelector.shift();
switch (selector) {
case PCombinator.Child: {
this.elements = AsyncIterableUtil.flatMap(
this.elements,
function* (element) {
yield* deepChildren(element);
}
);
this.#next();
break;
}
case PCombinator.Descendent: {
this.elements = AsyncIterableUtil.flatMap( this.elements = AsyncIterableUtil.flatMap(
this.elements, this.elements,
function* (element) { function* (element) {
yield* deepDescendents(element); yield* deepDescendents(element);
} }
); );
this.#shadowSelectors = this.#next();
this.#deepShadowSelectors.shift() as PSelector[][]; break;
} }
this.elements = AsyncIterableUtil.flatMap( default:
this.elements, this.#compoundSelector = selector as CompoundPSelector;
function* (element) { this.#next();
yield* deepChildren(element); break;
}
);
this.#selectors = this.#shadowSelectors.shift() as PSelector[];
} }
this.#selector = this.#selectors.shift() as PSelector;
} }
} }
@ -173,9 +179,43 @@ export const pQuerySelectorAll = async function* (
root: Node, root: Node,
selector: string selector: string
): AwaitableIterable<Node> { ): AwaitableIterable<Node> {
const query = new PQueryEngine(root, selector); let selectors: ComplexPSelectorList;
query.run(); try {
yield* query.elements; selectors = parsePSelectors(selector);
} catch (error) {
if (!isErrorLike(error)) {
throw new SelectorError(selector, String(error));
}
throw new SelectorError(selector, error.message);
}
// If there are any empty elements, then this implies the selector has
// contiguous combinators (e.g. `>>> >>>>`) or starts/ends with one which we
// treat as illegal, similar to existing behavior.
if (
selectors.some(parts => {
let i = 0;
return parts.some(parts => {
if (typeof parts === 'string') {
++i;
} else {
i = 0;
}
return i > 1;
});
})
) {
throw new SelectorError(
selector,
'Multiple deep combinators found in sequence.'
);
}
for (const selectorParts of selectors) {
const query = new PQueryEngine(root, selector, selectorParts);
query.run();
yield* query.elements;
}
}; };
/** /**

View File

@ -14,145 +14,123 @@
* limitations under the License. * limitations under the License.
*/ */
type CSSSelector = string; import {tokenize, Tokens, TOKENS} from 'parsel-js';
export type PSelector = export type CSSSelector = string;
| { export type PPseudoSelector = {
name: string; name: string;
value: string; value: string;
} };
| CSSSelector; export const enum PCombinator {
Descendent = '>>>',
Child = '>>>>',
}
export type CompoundPSelector = Array<CSSSelector | PPseudoSelector>;
export type ComplexPSelector = Array<CompoundPSelector | PCombinator>;
export type ComplexPSelectorList = ComplexPSelector[];
const PUPPETEER_PSEUDO_ELEMENT = /^::-p-([-a-zA-Z_]+)\(/; TOKENS['combinator'] = new RegExp(
`${/\s*(?:>{3,4})\s*|/.source}${TOKENS['combinator']!.source}`,
'g'
);
class PSelectorParser { class TokenSpan {
#input: string; #tokens: Tokens[] = [];
#escaped = false; #selector: string;
#quoted = false;
// The first level are deep roots. The second level are shallow roots. constructor(selector: string) {
#selectors: PSelector[][][] = [[[]]]; this.#selector = selector;
constructor(input: string) {
this.#input = input;
} }
get selectors(): PSelector[][][] { get length(): number {
return this.#selectors; return this.#tokens.length;
} }
parse(): void { add(token: Tokens) {
for (let i = 0; i < this.#input.length; ++i) { this.#tokens.push(token);
if (this.#escaped) { }
this.#escaped = false;
toStringAndClear() {
const startToken = this.#tokens[0] as Tokens;
const endToken = this.#tokens[this.#tokens.length - 1] as Tokens;
this.#tokens.splice(0);
return this.#selector.slice(startToken.pos[0], endToken.pos[1]);
}
}
const ESCAPE_REGEXP = /\\[\s\S]/g;
const unquote = (text: string): string => {
if (text.length > 1) {
for (const char of ['"', "'"]) {
if (!text.startsWith(char) || !text.endsWith(char)) {
continue; continue;
} }
switch (this.#input[i]) { return text
case '\\': { .slice(char.length, -char.length)
this.#escaped = true; .replace(ESCAPE_REGEXP, match => {
break; return match.slice(1);
} });
case '"': { }
this.#quoted = !this.#quoted; }
break; return text;
} };
default: {
if (this.#quoted) { export function parsePSelectors(selector: string): ComplexPSelectorList {
break; const tokens = tokenize(selector);
} if (tokens.length === 0) {
const remainder = this.#input.slice(i); return [];
if (remainder.startsWith('>>>>')) { }
this.#push(this.#input.slice(0, i)); let compoundSelector: CompoundPSelector = [];
this.#input = remainder.slice('>>>>'.length); let complexSelector: ComplexPSelector = [compoundSelector];
this.#parseDeepChild(); const selectors: ComplexPSelectorList = [complexSelector];
} else if (remainder.startsWith('>>>')) { const storage = new TokenSpan(selector);
this.#push(this.#input.slice(0, i)); for (const token of tokens) {
this.#input = remainder.slice('>>>'.length); switch (token.type) {
this.#parseDeepDescendent(); case 'combinator':
} else { switch (token.content) {
const result = PUPPETEER_PSEUDO_ELEMENT.exec(remainder); case '>>>':
if (!result) { if (storage.length) {
continue; compoundSelector.push(storage.toStringAndClear());
} }
const [match, name] = result; compoundSelector = [];
this.#push(this.#input.slice(0, i)); complexSelector.push(PCombinator.Descendent);
this.#input = remainder.slice(match.length); complexSelector.push(compoundSelector);
this.#push({ continue;
name: name as string, case '>>>>':
value: this.#scanParameter(), if (storage.length) {
}); compoundSelector.push(storage.toStringAndClear());
} }
compoundSelector = [];
complexSelector.push(PCombinator.Child);
complexSelector.push(compoundSelector);
continue;
} }
} break;
} case 'pseudo-element':
this.#push(this.#input); if (!token.name.startsWith('-p-')) {
}
#push(selector: PSelector) {
if (typeof selector === 'string') {
// We only trim the end only since `.foo` and ` .foo` are different.
selector = selector.trimEnd();
if (selector.length === 0) {
return;
}
}
const roots = this.#selectors[this.#selectors.length - 1]!;
roots[roots.length - 1]!.push(selector);
}
#parseDeepChild() {
this.#selectors[this.#selectors.length - 1]!.push([]);
}
#parseDeepDescendent() {
this.#selectors.push([[]]);
}
#scanParameter(): string {
const char = this.#input[0];
switch (char) {
case "'":
case '"':
this.#input = this.#input.slice(1);
const parameter = this.#scanEscapedValueTill(char);
if (!this.#input.startsWith(')')) {
throw new Error("Expected ')'");
}
this.#input = this.#input.slice(1);
return parameter;
default:
return this.#scanEscapedValueTill(')');
}
}
#scanEscapedValueTill(end: string): string {
let string = '';
for (let i = 0; i < this.#input.length; ++i) {
if (this.#escaped) {
this.#escaped = false;
string += this.#input[i];
continue;
}
switch (this.#input[i]) {
case '\\': {
this.#escaped = true;
break; break;
} }
case end: { if (storage.length) {
this.#input = this.#input.slice(i + 1); compoundSelector.push(storage.toStringAndClear());
return string;
} }
default: { compoundSelector.push({
string += this.#input[i]; name: token.name.slice(3),
value: unquote(token.argument ?? ''),
});
continue;
case 'comma':
if (storage.length) {
compoundSelector.push(storage.toStringAndClear());
} }
} compoundSelector = [];
complexSelector = [compoundSelector];
selectors.push(complexSelector);
continue;
} }
throw new Error(`Expected \`${end}\``); storage.add(token);
} }
} if (storage.length) {
compoundSelector.push(storage.toStringAndClear());
export function parsePSelectors(selector: string): PSelector[][][] { }
const parser = new PSelectorParser(selector); return selectors;
parser.parse();
return parser.selectors;
} }

View File

@ -1,5 +1,5 @@
/** /**
* CommonJS JavaScript code that provides the puppeteer utilities. See the * JavaScript code that provides the puppeteer utilities. See the
* [README](https://github.com/puppeteer/puppeteer/blob/main/src/injected/README.md) * [README](https://github.com/puppeteer/puppeteer/blob/main/src/injected/README.md)
* for injection for more information. * for injection for more information.
* *

View File

@ -360,6 +360,7 @@ describe('Query handler tests', function () {
beforeEach(async () => { beforeEach(async () => {
const {page} = getTestState(); const {page} = getTestState();
await page.setContent('<div>hello <button>world</button></div>'); await page.setContent('<div>hello <button>world</button></div>');
Puppeteer.clearCustomQueryHandlers();
}); });
it('should work with CSS selectors', async () => { it('should work with CSS selectors', async () => {
@ -386,8 +387,6 @@ describe('Query handler tests', function () {
it('should work ARIA selectors', async () => { it('should work ARIA selectors', async () => {
const {page} = getTestState(); const {page} = getTestState();
await page.setContent('<div>hello <button>world</button></div>');
const element = await page.$('div ::-p-aria(world)'); const element = await page.$('div ::-p-aria(world)');
assert(element, 'Could not find element'); assert(element, 'Could not find element');
expect( expect(
@ -399,8 +398,6 @@ describe('Query handler tests', function () {
it('should work XPath selectors', async () => { it('should work XPath selectors', async () => {
const {page} = getTestState(); const {page} = getTestState();
await page.setContent('<div>hello <button>world</button></div>');
const element = await page.$('div ::-p-xpath(//button)'); const element = await page.$('div ::-p-xpath(//button)');
assert(element, 'Could not find element'); assert(element, 'Could not find element');
expect( expect(
@ -411,16 +408,14 @@ describe('Query handler tests', function () {
}); });
it('should work with custom selectors', async () => { it('should work with custom selectors', async () => {
const {page} = getTestState();
await page.setContent('<div>hello <button>world</button></div>');
Puppeteer.clearCustomQueryHandlers();
Puppeteer.registerCustomQueryHandler('div', { Puppeteer.registerCustomQueryHandler('div', {
queryOne() { queryOne() {
return document.querySelector('div'); return document.querySelector('div');
}, },
}); });
const element = await page.$('::-p-div()'); const {page} = getTestState();
const element = await page.$('::-p-div');
assert(element, 'Could not find element'); assert(element, 'Could not find element');
expect( expect(
await element.evaluate(element => { await element.evaluate(element => {
@ -431,8 +426,6 @@ describe('Query handler tests', function () {
it('should work with custom selectors with args', async () => { it('should work with custom selectors with args', async () => {
const {page} = getTestState(); const {page} = getTestState();
await page.setContent('<div>hello <button>world</button></div>');
Puppeteer.clearCustomQueryHandlers();
Puppeteer.registerCustomQueryHandler('div', { Puppeteer.registerCustomQueryHandler('div', {
queryOne(_, selector) { queryOne(_, selector) {
if (selector === 'true') { if (selector === 'true') {
@ -471,7 +464,7 @@ describe('Query handler tests', function () {
).toBeTruthy(); ).toBeTruthy();
} }
{ {
const element = await page.$('::-p-div()'); const element = await page.$('::-p-div');
assert(element, 'Could not find element'); assert(element, 'Could not find element');
expect( expect(
await element.evaluate(element => { await element.evaluate(element => {
@ -483,8 +476,6 @@ describe('Query handler tests', function () {
it('should work with :hover', async () => { it('should work with :hover', async () => {
const {page} = getTestState(); const {page} = getTestState();
await page.setContent('<div>hello <button>world</button></div>');
let button = await page.$('div ::-p-text(world)'); let button = await page.$('div ::-p-text(world)');
assert(button, 'Could not find element'); assert(button, 'Could not find element');
await button.hover(); await button.hover();
@ -497,5 +488,11 @@ describe('Query handler tests', function () {
}); });
expect(value).toMatchObject({textContent: 'world', tagName: 'BUTTON'}); expect(value).toMatchObject({textContent: 'world', tagName: 'BUTTON'});
}); });
it('should work with commas', async () => {
const {page} = getTestState();
const elements = await page.$$('div, ::-p-text(world)');
expect(elements.length).toStrictEqual(2);
});
}); });
}); });