2023-03-06 09:56:52 +00:00
|
|
|
/**
|
|
|
|
* Copyright 2023 Google Inc. All rights reserved.
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2023-02-15 18:42:32 +00:00
|
|
|
import type {AwaitableIterable} from '../common/types.js';
|
|
|
|
import {AsyncIterableUtil} from '../util/AsyncIterableUtil.js';
|
2023-02-15 23:09:31 +00:00
|
|
|
|
2023-02-15 18:42:32 +00:00
|
|
|
import {ariaQuerySelectorAll} from './ARIAQuerySelector.js';
|
|
|
|
import {customQuerySelectors} from './CustomQuerySelector.js';
|
2023-03-06 09:56:52 +00:00
|
|
|
import {
|
|
|
|
ComplexPSelector,
|
|
|
|
ComplexPSelectorList,
|
|
|
|
CompoundPSelector,
|
|
|
|
CSSSelector,
|
|
|
|
parsePSelectors,
|
|
|
|
PCombinator,
|
|
|
|
PPseudoSelector,
|
|
|
|
} from './PSelectorParser.js';
|
2023-02-15 18:42:32 +00:00
|
|
|
import {textQuerySelectorAll} from './TextQuerySelector.js';
|
|
|
|
import {deepChildren, deepDescendents} from './util.js';
|
|
|
|
import {xpathQuerySelectorAll} from './XPathQuerySelector.js';
|
|
|
|
|
2023-03-16 13:41:38 +00:00
|
|
|
const IDENT_TOKEN_START = /[-\w\P{ASCII}*]/;
|
|
|
|
|
2023-02-15 18:42:32 +00:00
|
|
|
class SelectorError extends Error {
|
|
|
|
constructor(selector: string, message: string) {
|
|
|
|
super(`${selector} is not a valid selector: ${message}`);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
class PQueryEngine {
|
|
|
|
#input: string;
|
|
|
|
|
2023-03-06 09:56:52 +00:00
|
|
|
#complexSelector: ComplexPSelector;
|
|
|
|
#compoundSelector: CompoundPSelector = [];
|
|
|
|
#selector: CSSSelector | PPseudoSelector | undefined = undefined;
|
2023-02-15 18:42:32 +00:00
|
|
|
|
|
|
|
elements: AwaitableIterable<Node>;
|
|
|
|
|
2023-03-06 09:56:52 +00:00
|
|
|
constructor(element: Node, input: string, complexSelector: ComplexPSelector) {
|
2023-02-15 18:42:32 +00:00
|
|
|
this.elements = [element];
|
2023-03-06 09:56:52 +00:00
|
|
|
this.#input = input;
|
|
|
|
this.#complexSelector = complexSelector;
|
|
|
|
this.#next();
|
2023-02-15 18:42:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
async run(): Promise<void> {
|
|
|
|
if (typeof this.#selector === 'string') {
|
|
|
|
switch (this.#selector.trimStart()) {
|
|
|
|
case ':scope':
|
|
|
|
// `:scope` has some special behavior depending on the node. It always
|
|
|
|
// represents the current node within a compound selector, but by
|
|
|
|
// itself, it depends on the node. For example, Document is
|
|
|
|
// represented by `<html>`, but any HTMLElement is not represented by
|
|
|
|
// itself (i.e. `null`). This can be troublesome if our combinators
|
|
|
|
// are used right after so we treat this selector specially.
|
|
|
|
this.#next();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (; this.#selector !== undefined; this.#next()) {
|
|
|
|
const selector = this.#selector;
|
|
|
|
const input = this.#input;
|
2023-03-06 09:56:52 +00:00
|
|
|
if (typeof selector === 'string') {
|
|
|
|
this.elements = AsyncIterableUtil.flatMap(
|
|
|
|
this.elements,
|
|
|
|
async function* (element) {
|
2023-03-16 13:41:38 +00:00
|
|
|
if (!selector[0]) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// The regular expression tests if the selector is a type/universal
|
|
|
|
// selector. Any other case means we want to apply the selector onto
|
|
|
|
// the element itself (e.g. `element.class`, `element>div`,
|
|
|
|
// `element:hover`, etc.).
|
|
|
|
if (IDENT_TOKEN_START.test(selector[0]) || !element.parentElement) {
|
2023-02-15 18:42:32 +00:00
|
|
|
yield* (element as Element).querySelectorAll(selector);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
let index = 0;
|
|
|
|
for (const child of element.parentElement.children) {
|
|
|
|
++index;
|
|
|
|
if (child === element) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
yield* element.parentElement.querySelectorAll(
|
2023-03-16 13:41:38 +00:00
|
|
|
`:scope>:nth-child(${index})${selector}`
|
2023-02-15 18:42:32 +00:00
|
|
|
);
|
|
|
|
}
|
2023-03-06 09:56:52 +00:00
|
|
|
);
|
|
|
|
} else {
|
|
|
|
this.elements = AsyncIterableUtil.flatMap(
|
|
|
|
this.elements,
|
|
|
|
async function* (element) {
|
|
|
|
switch (selector.name) {
|
|
|
|
case 'text':
|
|
|
|
yield* textQuerySelectorAll(element, selector.value);
|
|
|
|
break;
|
|
|
|
case 'xpath':
|
|
|
|
yield* xpathQuerySelectorAll(element, selector.value);
|
|
|
|
break;
|
|
|
|
case 'aria':
|
|
|
|
yield* ariaQuerySelectorAll(element, selector.value);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
const querySelector = customQuerySelectors.get(selector.name);
|
|
|
|
if (!querySelector) {
|
|
|
|
throw new SelectorError(
|
|
|
|
input,
|
|
|
|
`Unknown selector type: ${selector.name}`
|
|
|
|
);
|
|
|
|
}
|
|
|
|
yield* querySelector.querySelectorAll(element, selector.value);
|
|
|
|
}
|
2023-02-15 18:42:32 +00:00
|
|
|
}
|
2023-03-06 09:56:52 +00:00
|
|
|
);
|
|
|
|
}
|
2023-02-15 18:42:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#next() {
|
2023-03-06 09:56:52 +00:00
|
|
|
if (this.#compoundSelector.length !== 0) {
|
|
|
|
this.#selector = this.#compoundSelector.shift();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (this.#complexSelector.length === 0) {
|
|
|
|
this.#selector = undefined;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
const selector = this.#complexSelector.shift();
|
|
|
|
switch (selector) {
|
|
|
|
case PCombinator.Child: {
|
|
|
|
this.elements = AsyncIterableUtil.flatMap(
|
|
|
|
this.elements,
|
|
|
|
function* (element) {
|
|
|
|
yield* deepChildren(element);
|
|
|
|
}
|
|
|
|
);
|
|
|
|
this.#next();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case PCombinator.Descendent: {
|
2023-02-15 18:42:32 +00:00
|
|
|
this.elements = AsyncIterableUtil.flatMap(
|
|
|
|
this.elements,
|
|
|
|
function* (element) {
|
|
|
|
yield* deepDescendents(element);
|
|
|
|
}
|
|
|
|
);
|
2023-03-06 09:56:52 +00:00
|
|
|
this.#next();
|
|
|
|
break;
|
2023-02-15 18:42:32 +00:00
|
|
|
}
|
2023-03-06 09:56:52 +00:00
|
|
|
default:
|
|
|
|
this.#compoundSelector = selector as CompoundPSelector;
|
|
|
|
this.#next();
|
|
|
|
break;
|
2023-02-15 18:42:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-13 15:11:16 +00:00
|
|
|
class DepthCalculator {
|
2023-03-21 13:09:50 +00:00
|
|
|
#cache = new WeakMap<Node, number[]>();
|
2023-03-13 15:11:16 +00:00
|
|
|
|
2023-03-21 13:09:50 +00:00
|
|
|
calculate(node: Node | null, depth: number[] = []): number[] {
|
|
|
|
if (node === null) {
|
2023-03-13 15:11:16 +00:00
|
|
|
return depth;
|
|
|
|
}
|
|
|
|
if (node instanceof ShadowRoot) {
|
|
|
|
node = node.host;
|
|
|
|
}
|
|
|
|
|
|
|
|
const cachedDepth = this.#cache.get(node);
|
|
|
|
if (cachedDepth) {
|
|
|
|
return [...cachedDepth, ...depth];
|
|
|
|
}
|
|
|
|
|
|
|
|
let index = 0;
|
|
|
|
for (
|
|
|
|
let prevSibling = node.previousSibling;
|
|
|
|
prevSibling;
|
|
|
|
prevSibling = prevSibling.previousSibling
|
|
|
|
) {
|
|
|
|
++index;
|
|
|
|
}
|
|
|
|
|
2023-03-21 13:09:50 +00:00
|
|
|
const value = this.calculate(node.parentNode, [index]);
|
2023-03-13 15:11:16 +00:00
|
|
|
this.#cache.set(node, value);
|
|
|
|
return [...value, ...depth];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const compareDepths = (a: number[], b: number[]): -1 | 0 | 1 => {
|
|
|
|
if (a.length + b.length === 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
const [i = Infinity, ...otherA] = a;
|
|
|
|
const [j = Infinity, ...otherB] = b;
|
|
|
|
if (i === j) {
|
|
|
|
return compareDepths(otherA, otherB);
|
|
|
|
}
|
|
|
|
return i < j ? 1 : -1;
|
|
|
|
};
|
|
|
|
|
|
|
|
const domSort = async function* (elements: AwaitableIterable<Node>) {
|
|
|
|
const results = new Set<Node>();
|
|
|
|
for await (const element of elements) {
|
|
|
|
results.add(element);
|
|
|
|
}
|
|
|
|
const calculator = new DepthCalculator();
|
|
|
|
yield* [...results.values()]
|
|
|
|
.map(result => {
|
|
|
|
return [result, calculator.calculate(result)] as const;
|
|
|
|
})
|
|
|
|
.sort(([, a], [, b]) => {
|
|
|
|
return compareDepths(a, b);
|
|
|
|
})
|
|
|
|
.map(([result]) => {
|
|
|
|
return result;
|
|
|
|
});
|
|
|
|
};
|
|
|
|
|
2023-03-13 14:37:30 +00:00
|
|
|
type QueryableNode = {
|
|
|
|
querySelectorAll: typeof Document.prototype.querySelectorAll;
|
|
|
|
};
|
|
|
|
|
2023-02-15 18:42:32 +00:00
|
|
|
/**
|
|
|
|
* Queries the given node for all nodes matching the given text selector.
|
|
|
|
*
|
|
|
|
* @internal
|
|
|
|
*/
|
2023-03-13 15:11:16 +00:00
|
|
|
export const pQuerySelectorAll = function (
|
2023-02-15 18:42:32 +00:00
|
|
|
root: Node,
|
|
|
|
selector: string
|
|
|
|
): AwaitableIterable<Node> {
|
2023-03-06 09:56:52 +00:00
|
|
|
let selectors: ComplexPSelectorList;
|
2023-03-13 14:37:30 +00:00
|
|
|
let isPureCSS: boolean;
|
2023-03-06 09:56:52 +00:00
|
|
|
try {
|
2023-03-13 14:37:30 +00:00
|
|
|
[selectors, isPureCSS] = parsePSelectors(selector);
|
2023-03-06 09:56:52 +00:00
|
|
|
} catch (error) {
|
2023-03-17 12:23:14 +00:00
|
|
|
return (root as unknown as QueryableNode).querySelectorAll(selector);
|
2023-03-06 09:56:52 +00:00
|
|
|
}
|
|
|
|
|
2023-03-13 14:37:30 +00:00
|
|
|
if (isPureCSS) {
|
2023-03-13 15:11:16 +00:00
|
|
|
return (root as unknown as QueryableNode).querySelectorAll(selector);
|
2023-03-13 14:37:30 +00:00
|
|
|
}
|
2023-03-06 09:56:52 +00:00
|
|
|
// If there are any empty elements, then this implies the selector has
|
|
|
|
// contiguous combinators (e.g. `>>> >>>>`) or starts/ends with one which we
|
|
|
|
// treat as illegal, similar to existing behavior.
|
|
|
|
if (
|
|
|
|
selectors.some(parts => {
|
|
|
|
let i = 0;
|
|
|
|
return parts.some(parts => {
|
|
|
|
if (typeof parts === 'string') {
|
|
|
|
++i;
|
|
|
|
} else {
|
|
|
|
i = 0;
|
|
|
|
}
|
|
|
|
return i > 1;
|
|
|
|
});
|
|
|
|
})
|
|
|
|
) {
|
|
|
|
throw new SelectorError(
|
|
|
|
selector,
|
|
|
|
'Multiple deep combinators found in sequence.'
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2023-03-13 15:11:16 +00:00
|
|
|
return domSort(
|
|
|
|
AsyncIterableUtil.flatMap(selectors, selectorParts => {
|
|
|
|
const query = new PQueryEngine(root, selector, selectorParts);
|
|
|
|
query.run();
|
|
|
|
return query.elements;
|
|
|
|
})
|
|
|
|
);
|
2023-02-15 18:42:32 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Queries the given node for all nodes matching the given text selector.
|
|
|
|
*
|
|
|
|
* @internal
|
|
|
|
*/
|
|
|
|
export const pQuerySelector = async function (
|
|
|
|
root: Node,
|
|
|
|
selector: string
|
|
|
|
): Promise<Node | null> {
|
|
|
|
for await (const element of pQuerySelectorAll(root, selector)) {
|
|
|
|
return element;
|
|
|
|
}
|
|
|
|
return null;
|
|
|
|
};
|