chore: support commas in P selectors (#9769)

This commit is contained in:
jrandolf 2023-03-06 10:56:52 +01:00 committed by GitHub
parent 62d5f398b4
commit f84873c121
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 260 additions and 226 deletions

18
package-lock.json generated
View File

@ -61,7 +61,6 @@
"jpeg-js": "0.4.4",
"mime": "3.0.0",
"minimist": "1.2.7",
"mitt": "3.0.0",
"mocha": "10.2.0",
"ncp": "2.0.0",
"npm-run-all": "4.1.5",
@ -6663,6 +6662,12 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/parsel-js": {
"version": "1.0.3",
"resolved": "git+ssh://git@github.com/jrandolf/parsel.git#f3ac5a371c382df9e5a8425d239594802562ee0f",
"dev": true,
"license": "MIT"
},
"node_modules/path-exists": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
@ -8695,6 +8700,10 @@
"unbzip2-stream": "1.4.3",
"ws": "8.11.0"
},
"devDependencies": {
"mitt": "3.0.0",
"parsel-js": "github:jrandolf/parsel"
},
"engines": {
"node": ">=14.1.0"
},
@ -13926,6 +13935,11 @@
"lines-and-columns": "^1.1.6"
}
},
"parsel-js": {
"version": "git+ssh://git@github.com/jrandolf/parsel.git#f3ac5a371c382df9e5a8425d239594802562ee0f",
"dev": true,
"from": "parsel-js@github:jrandolf/parsel"
},
"path-exists": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
@ -14121,6 +14135,8 @@
"devtools-protocol": "0.0.1094867",
"extract-zip": "2.0.1",
"https-proxy-agent": "5.0.1",
"mitt": "3.0.0",
"parsel-js": "github:jrandolf/parsel",
"proxy-from-env": "1.1.0",
"rimraf": "3.0.2",
"tar-fs": "2.1.1",

View File

@ -163,7 +163,6 @@
"jpeg-js": "0.4.4",
"mime": "3.0.0",
"minimist": "1.2.7",
"mitt": "3.0.0",
"mocha": "10.2.0",
"ncp": "2.0.0",
"npm-run-all": "4.1.5",

View File

@ -150,5 +150,9 @@
"typescript": {
"optional": true
}
},
"devDependencies": {
"mitt": "3.0.0",
"parsel-js": "github:jrandolf/parsel"
}
}

View File

@ -1,10 +1,34 @@
/**
* Copyright 2023 Google Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import type {AwaitableIterable} from '../common/types.js';
import {AsyncIterableUtil} from '../util/AsyncIterableUtil.js';
import {isErrorLike} from '../util/ErrorLike.js';
import {ariaQuerySelectorAll} from './ARIAQuerySelector.js';
import {customQuerySelectors} from './CustomQuerySelector.js';
import {parsePSelectors, PSelector} from './PSelectorParser.js';
import {
ComplexPSelector,
ComplexPSelectorList,
CompoundPSelector,
CSSSelector,
parsePSelectors,
PCombinator,
PPseudoSelector,
} from './PSelectorParser.js';
import {textQuerySelectorAll} from './TextQuerySelector.js';
import {deepChildren, deepDescendents} from './util.js';
import {xpathQuerySelectorAll} from './XPathQuerySelector.js';
@ -18,50 +42,17 @@ class SelectorError extends Error {
class PQueryEngine {
#input: string;
#deepShadowSelectors: PSelector[][][];
#shadowSelectors: PSelector[][];
#selectors: PSelector[];
#selector: PSelector | undefined;
#complexSelector: ComplexPSelector;
#compoundSelector: CompoundPSelector = [];
#selector: CSSSelector | PPseudoSelector | undefined = undefined;
elements: AwaitableIterable<Node>;
constructor(element: Node, selector: string) {
this.#input = selector.trim();
if (this.#input.length === 0) {
throw new SelectorError(this.#input, 'The provided selector is empty.');
}
try {
this.#deepShadowSelectors = parsePSelectors(this.#input);
} catch (error) {
if (!isErrorLike(error)) {
throw new SelectorError(this.#input, String(error));
}
throw new SelectorError(this.#input, error.message);
}
// If there are any empty elements, then this implies the selector has
// contiguous combinators (e.g. `>>> >>>>`) or starts/ends with one which we
// treat as illegal, similar to existing behavior.
if (
this.#deepShadowSelectors.some(shadowSelectors => {
return shadowSelectors.some(selectors => {
return selectors.length === 0;
});
})
) {
throw new SelectorError(
this.#input,
'Multiple deep combinators found in sequence.'
);
}
this.#shadowSelectors = this.#deepShadowSelectors.shift() as PSelector[][];
this.#selectors = this.#shadowSelectors.shift() as PSelector[];
this.#selector = this.#selectors.shift();
constructor(element: Node, input: string, complexSelector: ComplexPSelector) {
this.elements = [element];
this.#input = input;
this.#complexSelector = complexSelector;
this.#next();
}
async run(): Promise<void> {
@ -89,10 +80,10 @@ class PQueryEngine {
for (; this.#selector !== undefined; this.#next()) {
const selector = this.#selector;
const input = this.#input;
if (typeof selector === 'string') {
this.elements = AsyncIterableUtil.flatMap(
this.elements,
async function* (element) {
if (typeof selector === 'string') {
if (!element.parentElement) {
yield* (element as Element).querySelectorAll(selector);
return;
@ -108,9 +99,12 @@ class PQueryEngine {
yield* element.parentElement.querySelectorAll(
`:scope > :nth-child(${index})${selector}`
);
return;
}
);
} else {
this.elements = AsyncIterableUtil.flatMap(
this.elements,
async function* (element) {
switch (selector.name) {
case 'text':
yield* textQuerySelectorAll(element, selector.value);
@ -135,32 +129,44 @@ class PQueryEngine {
);
}
}
}
#next() {
if (this.#selectors.length === 0) {
if (this.#shadowSelectors.length === 0) {
if (this.#deepShadowSelectors.length === 0) {
if (this.#compoundSelector.length !== 0) {
this.#selector = this.#compoundSelector.shift();
return;
}
if (this.#complexSelector.length === 0) {
this.#selector = undefined;
return;
}
this.elements = AsyncIterableUtil.flatMap(
this.elements,
function* (element) {
yield* deepDescendents(element);
}
);
this.#shadowSelectors =
this.#deepShadowSelectors.shift() as PSelector[][];
}
const selector = this.#complexSelector.shift();
switch (selector) {
case PCombinator.Child: {
this.elements = AsyncIterableUtil.flatMap(
this.elements,
function* (element) {
yield* deepChildren(element);
}
);
this.#selectors = this.#shadowSelectors.shift() as PSelector[];
this.#next();
break;
}
case PCombinator.Descendent: {
this.elements = AsyncIterableUtil.flatMap(
this.elements,
function* (element) {
yield* deepDescendents(element);
}
);
this.#next();
break;
}
default:
this.#compoundSelector = selector as CompoundPSelector;
this.#next();
break;
}
this.#selector = this.#selectors.shift() as PSelector;
}
}
@ -173,9 +179,43 @@ export const pQuerySelectorAll = async function* (
root: Node,
selector: string
): AwaitableIterable<Node> {
const query = new PQueryEngine(root, selector);
let selectors: ComplexPSelectorList;
try {
selectors = parsePSelectors(selector);
} catch (error) {
if (!isErrorLike(error)) {
throw new SelectorError(selector, String(error));
}
throw new SelectorError(selector, error.message);
}
// If there are any empty elements, then this implies the selector has
// contiguous combinators (e.g. `>>> >>>>`) or starts/ends with one which we
// treat as illegal, similar to existing behavior.
if (
selectors.some(parts => {
let i = 0;
return parts.some(parts => {
if (typeof parts === 'string') {
++i;
} else {
i = 0;
}
return i > 1;
});
})
) {
throw new SelectorError(
selector,
'Multiple deep combinators found in sequence.'
);
}
for (const selectorParts of selectors) {
const query = new PQueryEngine(root, selector, selectorParts);
query.run();
yield* query.elements;
}
};
/**

View File

@ -14,145 +14,123 @@
* limitations under the License.
*/
type CSSSelector = string;
import {tokenize, Tokens, TOKENS} from 'parsel-js';
export type PSelector =
| {
export type CSSSelector = string;
export type PPseudoSelector = {
name: string;
value: string;
}
| CSSSelector;
};
export const enum PCombinator {
Descendent = '>>>',
Child = '>>>>',
}
export type CompoundPSelector = Array<CSSSelector | PPseudoSelector>;
export type ComplexPSelector = Array<CompoundPSelector | PCombinator>;
export type ComplexPSelectorList = ComplexPSelector[];
const PUPPETEER_PSEUDO_ELEMENT = /^::-p-([-a-zA-Z_]+)\(/;
TOKENS['combinator'] = new RegExp(
`${/\s*(?:>{3,4})\s*|/.source}${TOKENS['combinator']!.source}`,
'g'
);
class PSelectorParser {
#input: string;
#escaped = false;
#quoted = false;
class TokenSpan {
#tokens: Tokens[] = [];
#selector: string;
// The first level are deep roots. The second level are shallow roots.
#selectors: PSelector[][][] = [[[]]];
constructor(input: string) {
this.#input = input;
constructor(selector: string) {
this.#selector = selector;
}
get selectors(): PSelector[][][] {
return this.#selectors;
get length(): number {
return this.#tokens.length;
}
parse(): void {
for (let i = 0; i < this.#input.length; ++i) {
if (this.#escaped) {
this.#escaped = false;
add(token: Tokens) {
this.#tokens.push(token);
}
toStringAndClear() {
const startToken = this.#tokens[0] as Tokens;
const endToken = this.#tokens[this.#tokens.length - 1] as Tokens;
this.#tokens.splice(0);
return this.#selector.slice(startToken.pos[0], endToken.pos[1]);
}
}
const ESCAPE_REGEXP = /\\[\s\S]/g;
const unquote = (text: string): string => {
if (text.length > 1) {
for (const char of ['"', "'"]) {
if (!text.startsWith(char) || !text.endsWith(char)) {
continue;
}
switch (this.#input[i]) {
case '\\': {
this.#escaped = true;
break;
}
case '"': {
this.#quoted = !this.#quoted;
break;
}
default: {
if (this.#quoted) {
break;
}
const remainder = this.#input.slice(i);
if (remainder.startsWith('>>>>')) {
this.#push(this.#input.slice(0, i));
this.#input = remainder.slice('>>>>'.length);
this.#parseDeepChild();
} else if (remainder.startsWith('>>>')) {
this.#push(this.#input.slice(0, i));
this.#input = remainder.slice('>>>'.length);
this.#parseDeepDescendent();
} else {
const result = PUPPETEER_PSEUDO_ELEMENT.exec(remainder);
if (!result) {
continue;
}
const [match, name] = result;
this.#push(this.#input.slice(0, i));
this.#input = remainder.slice(match.length);
this.#push({
name: name as string,
value: this.#scanParameter(),
return text
.slice(char.length, -char.length)
.replace(ESCAPE_REGEXP, match => {
return match.slice(1);
});
}
}
}
}
this.#push(this.#input);
}
return text;
};
#push(selector: PSelector) {
if (typeof selector === 'string') {
// We only trim the end only since `.foo` and ` .foo` are different.
selector = selector.trimEnd();
if (selector.length === 0) {
return;
export function parsePSelectors(selector: string): ComplexPSelectorList {
const tokens = tokenize(selector);
if (tokens.length === 0) {
return [];
}
let compoundSelector: CompoundPSelector = [];
let complexSelector: ComplexPSelector = [compoundSelector];
const selectors: ComplexPSelectorList = [complexSelector];
const storage = new TokenSpan(selector);
for (const token of tokens) {
switch (token.type) {
case 'combinator':
switch (token.content) {
case '>>>':
if (storage.length) {
compoundSelector.push(storage.toStringAndClear());
}
const roots = this.#selectors[this.#selectors.length - 1]!;
roots[roots.length - 1]!.push(selector);
compoundSelector = [];
complexSelector.push(PCombinator.Descendent);
complexSelector.push(compoundSelector);
continue;
case '>>>>':
if (storage.length) {
compoundSelector.push(storage.toStringAndClear());
}
#parseDeepChild() {
this.#selectors[this.#selectors.length - 1]!.push([]);
}
#parseDeepDescendent() {
this.#selectors.push([[]]);
}
#scanParameter(): string {
const char = this.#input[0];
switch (char) {
case "'":
case '"':
this.#input = this.#input.slice(1);
const parameter = this.#scanEscapedValueTill(char);
if (!this.#input.startsWith(')')) {
throw new Error("Expected ')'");
}
this.#input = this.#input.slice(1);
return parameter;
default:
return this.#scanEscapedValueTill(')');
}
}
#scanEscapedValueTill(end: string): string {
let string = '';
for (let i = 0; i < this.#input.length; ++i) {
if (this.#escaped) {
this.#escaped = false;
string += this.#input[i];
compoundSelector = [];
complexSelector.push(PCombinator.Child);
complexSelector.push(compoundSelector);
continue;
}
switch (this.#input[i]) {
case '\\': {
this.#escaped = true;
break;
case 'pseudo-element':
if (!token.name.startsWith('-p-')) {
break;
}
case end: {
this.#input = this.#input.slice(i + 1);
return string;
if (storage.length) {
compoundSelector.push(storage.toStringAndClear());
}
default: {
string += this.#input[i];
compoundSelector.push({
name: token.name.slice(3),
value: unquote(token.argument ?? ''),
});
continue;
case 'comma':
if (storage.length) {
compoundSelector.push(storage.toStringAndClear());
}
compoundSelector = [];
complexSelector = [compoundSelector];
selectors.push(complexSelector);
continue;
}
storage.add(token);
}
throw new Error(`Expected \`${end}\``);
if (storage.length) {
compoundSelector.push(storage.toStringAndClear());
}
}
export function parsePSelectors(selector: string): PSelector[][][] {
const parser = new PSelectorParser(selector);
parser.parse();
return parser.selectors;
return selectors;
}

View File

@ -1,5 +1,5 @@
/**
* CommonJS JavaScript code that provides the puppeteer utilities. See the
* JavaScript code that provides the puppeteer utilities. See the
* [README](https://github.com/puppeteer/puppeteer/blob/main/src/injected/README.md)
* for injection for more information.
*

View File

@ -360,6 +360,7 @@ describe('Query handler tests', function () {
beforeEach(async () => {
const {page} = getTestState();
await page.setContent('<div>hello <button>world</button></div>');
Puppeteer.clearCustomQueryHandlers();
});
it('should work with CSS selectors', async () => {
@ -386,8 +387,6 @@ describe('Query handler tests', function () {
it('should work ARIA selectors', async () => {
const {page} = getTestState();
await page.setContent('<div>hello <button>world</button></div>');
const element = await page.$('div ::-p-aria(world)');
assert(element, 'Could not find element');
expect(
@ -399,8 +398,6 @@ describe('Query handler tests', function () {
it('should work XPath selectors', async () => {
const {page} = getTestState();
await page.setContent('<div>hello <button>world</button></div>');
const element = await page.$('div ::-p-xpath(//button)');
assert(element, 'Could not find element');
expect(
@ -411,16 +408,14 @@ describe('Query handler tests', function () {
});
it('should work with custom selectors', async () => {
const {page} = getTestState();
await page.setContent('<div>hello <button>world</button></div>');
Puppeteer.clearCustomQueryHandlers();
Puppeteer.registerCustomQueryHandler('div', {
queryOne() {
return document.querySelector('div');
},
});
const element = await page.$('::-p-div()');
const {page} = getTestState();
const element = await page.$('::-p-div');
assert(element, 'Could not find element');
expect(
await element.evaluate(element => {
@ -431,8 +426,6 @@ describe('Query handler tests', function () {
it('should work with custom selectors with args', async () => {
const {page} = getTestState();
await page.setContent('<div>hello <button>world</button></div>');
Puppeteer.clearCustomQueryHandlers();
Puppeteer.registerCustomQueryHandler('div', {
queryOne(_, selector) {
if (selector === 'true') {
@ -471,7 +464,7 @@ describe('Query handler tests', function () {
).toBeTruthy();
}
{
const element = await page.$('::-p-div()');
const element = await page.$('::-p-div');
assert(element, 'Could not find element');
expect(
await element.evaluate(element => {
@ -483,8 +476,6 @@ describe('Query handler tests', function () {
it('should work with :hover', async () => {
const {page} = getTestState();
await page.setContent('<div>hello <button>world</button></div>');
let button = await page.$('div ::-p-text(world)');
assert(button, 'Could not find element');
await button.hover();
@ -497,5 +488,11 @@ describe('Query handler tests', function () {
});
expect(value).toMatchObject({textContent: 'world', tagName: 'BUTTON'});
});
it('should work with commas', async () => {
const {page} = getTestState();
const elements = await page.$$('div, ::-p-text(world)');
expect(elements.length).toStrictEqual(2);
});
});
});