puppeteer/src/common/QueryHandler.ts

297 lines
8.4 KiB
TypeScript
Raw Normal View History

/**
* Copyright 2020 Google Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
feat!: type inference for evaluation types (#8547) This PR greatly improves the types within Puppeteer: - **Almost everything** is auto-deduced. - Parameters don't need to be specified in the function. They are deduced from the spread. - Return types don't need to be specified. They are deduced from the function. (More on this below) - Selections based on tag names correctly deduce element type, similar to TypeScript's mechanism for `getElementByTagName`. - [**BREAKING CHANGE**] We've removed the ability to declare return types in type arguments for the following reasons: 1. Setting them will indubitably break auto-deduction. 2. You can just use `as ...` in TypeScript to coerce the correct type (given it makes sense). - [**BREAKING CHANGE**] `waitFor` is officially gone. To migrate to these changes, there are only four things you may need to change: - If you set a return type using the `ReturnType` type parameter, remove it and use `as ...` and `HandleFor` (if necessary). ⛔ `evaluate<ReturnType>(a: number, b: number) => {...}, a, b)` ✅ `(await evaluate(a, b) => {...}, a, b)) as ReturnType` ⛔ `evaluateHandle<ReturnType>(a: number, b: number) => {...}, a, b)` ✅ `(await evaluateHandle(a, b) => {...}, a, b)) as HandleFor<ReturnType>` - If you set any type parameters in the *parameters* of an evaluation function, remove them. ⛔ `evaluate(a: number, b: number) => {...}, a, b)` ✅ `evaluate(a, b) => {...}, a, b)` - If you set any type parameters in the method's declaration, remove them. ⛔ `evaluate<(a: number, b: number) => void>((a, b) => {...}, a, b)` ✅ `evaluate(a, b) => {...}, a, b)`
2022-06-23 09:29:46 +00:00
import {ariaHandler} from './AriaQueryHandler.js';
import {DOMWorld, WaitForSelectorOptions} from './DOMWorld.js';
2022-06-23 09:31:43 +00:00
import {ElementHandle} from './ElementHandle.js';
import {JSHandle} from './JSHandle.js';
/**
* @internal
*/
export interface InternalQueryHandler {
queryOne?: (
element: ElementHandle<Node>,
selector: string
) => Promise<ElementHandle<Node> | null>;
queryAll?: (
element: ElementHandle<Node>,
selector: string
) => Promise<Array<ElementHandle<Node>>>;
waitFor?: (
domWorld: DOMWorld,
selector: string,
options: WaitForSelectorOptions
) => Promise<ElementHandle<Node> | null>;
queryAllArray?: (
element: ElementHandle<Node>,
selector: string
) => Promise<JSHandle<Node[]>>;
}
/**
* Contains two functions `queryOne` and `queryAll` that can
2022-06-27 07:24:23 +00:00
* be {@link registerCustomQueryHandler | registered}
* as alternative querying strategies. The functions `queryOne` and `queryAll`
* are executed in the page context. `queryOne` should take an `Element` and a
* selector string as argument and return a single `Element` or `null` if no
* element is found. `queryAll` takes the same arguments but should instead
* return a `NodeListOf<Element>` or `Array<Element>` with all the elements
* that match the given query selector.
* @public
*/
export interface CustomQueryHandler {
queryOne?: (element: Node, selector: string) => Node | null;
queryAll?: (element: Node, selector: string) => Node[];
}
function createInternalQueryHandler(
handler: CustomQueryHandler
): InternalQueryHandler {
const internalHandler: InternalQueryHandler = {};
if (handler.queryOne) {
2022-05-31 14:34:16 +00:00
const queryOne = handler.queryOne;
internalHandler.queryOne = async (element, selector) => {
2022-05-31 14:34:16 +00:00
const jsHandle = await element.evaluateHandle(queryOne, selector);
const elementHandle = jsHandle.asElement();
2022-06-14 11:55:35 +00:00
if (elementHandle) {
return elementHandle;
}
await jsHandle.dispose();
return null;
};
internalHandler.waitFor = (
domWorld: DOMWorld,
selector: string,
options: WaitForSelectorOptions
) => {
return domWorld._waitForSelectorInPage(queryOne, selector, options);
};
}
if (handler.queryAll) {
2022-05-31 14:34:16 +00:00
const queryAll = handler.queryAll;
internalHandler.queryAll = async (element, selector) => {
2022-05-31 14:34:16 +00:00
const jsHandle = await element.evaluateHandle(queryAll, selector);
const properties = await jsHandle.getProperties();
await jsHandle.dispose();
const result = [];
for (const property of properties.values()) {
const elementHandle = property.asElement();
2022-06-14 11:55:35 +00:00
if (elementHandle) {
result.push(elementHandle);
}
}
return result;
};
internalHandler.queryAllArray = async (element, selector) => {
feat!: type inference for evaluation types (#8547) This PR greatly improves the types within Puppeteer: - **Almost everything** is auto-deduced. - Parameters don't need to be specified in the function. They are deduced from the spread. - Return types don't need to be specified. They are deduced from the function. (More on this below) - Selections based on tag names correctly deduce element type, similar to TypeScript's mechanism for `getElementByTagName`. - [**BREAKING CHANGE**] We've removed the ability to declare return types in type arguments for the following reasons: 1. Setting them will indubitably break auto-deduction. 2. You can just use `as ...` in TypeScript to coerce the correct type (given it makes sense). - [**BREAKING CHANGE**] `waitFor` is officially gone. To migrate to these changes, there are only four things you may need to change: - If you set a return type using the `ReturnType` type parameter, remove it and use `as ...` and `HandleFor` (if necessary). ⛔ `evaluate<ReturnType>(a: number, b: number) => {...}, a, b)` ✅ `(await evaluate(a, b) => {...}, a, b)) as ReturnType` ⛔ `evaluateHandle<ReturnType>(a: number, b: number) => {...}, a, b)` ✅ `(await evaluateHandle(a, b) => {...}, a, b)) as HandleFor<ReturnType>` - If you set any type parameters in the *parameters* of an evaluation function, remove them. ⛔ `evaluate(a: number, b: number) => {...}, a, b)` ✅ `evaluate(a, b) => {...}, a, b)` - If you set any type parameters in the method's declaration, remove them. ⛔ `evaluate<(a: number, b: number) => void>((a, b) => {...}, a, b)` ✅ `evaluate(a, b) => {...}, a, b)`
2022-06-23 09:29:46 +00:00
const resultHandle = (await element.evaluateHandle(
queryAll,
selector
)) as JSHandle<Element[] | NodeListOf<Element>>;
const arrayHandle = await resultHandle.evaluateHandle(res => {
return Array.from(res);
});
return arrayHandle;
};
}
return internalHandler;
}
const defaultHandler = createInternalQueryHandler({
queryOne: (element, selector) => {
if (!('querySelector' in element)) {
throw new Error(
`Could not invoke \`querySelector\` on node of type ${element.nodeName}.`
);
}
return (
element as unknown as {querySelector(selector: string): Element}
).querySelector(selector);
},
queryAll: (element, selector) => {
if (!('querySelectorAll' in element)) {
throw new Error(
`Could not invoke \`querySelectorAll\` on node of type ${element.nodeName}.`
);
}
return [
...(
element as unknown as {
querySelectorAll(selector: string): NodeList;
}
).querySelectorAll(selector),
];
},
});
const pierceHandler = createInternalQueryHandler({
queryOne: (element, selector) => {
let found: Node | null = null;
const search = (root: Node) => {
const iter = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT);
do {
const currentNode = iter.currentNode as HTMLElement;
if (currentNode.shadowRoot) {
search(currentNode.shadowRoot);
}
if (currentNode instanceof ShadowRoot) {
continue;
}
if (currentNode !== root && !found && currentNode.matches(selector)) {
found = currentNode;
}
} while (!found && iter.nextNode());
};
if (element instanceof Document) {
element = element.documentElement;
}
search(element);
return found;
},
queryAll: (element, selector) => {
const result: Node[] = [];
const collect = (root: Node) => {
const iter = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT);
do {
const currentNode = iter.currentNode as HTMLElement;
if (currentNode.shadowRoot) {
collect(currentNode.shadowRoot);
}
if (currentNode instanceof ShadowRoot) {
continue;
}
if (currentNode !== root && currentNode.matches(selector)) {
result.push(currentNode);
}
} while (iter.nextNode());
};
if (element instanceof Document) {
element = element.documentElement;
}
collect(element);
return result;
},
});
2022-06-13 09:16:25 +00:00
const builtInHandlers = new Map([
feat!: type inference for evaluation types (#8547) This PR greatly improves the types within Puppeteer: - **Almost everything** is auto-deduced. - Parameters don't need to be specified in the function. They are deduced from the spread. - Return types don't need to be specified. They are deduced from the function. (More on this below) - Selections based on tag names correctly deduce element type, similar to TypeScript's mechanism for `getElementByTagName`. - [**BREAKING CHANGE**] We've removed the ability to declare return types in type arguments for the following reasons: 1. Setting them will indubitably break auto-deduction. 2. You can just use `as ...` in TypeScript to coerce the correct type (given it makes sense). - [**BREAKING CHANGE**] `waitFor` is officially gone. To migrate to these changes, there are only four things you may need to change: - If you set a return type using the `ReturnType` type parameter, remove it and use `as ...` and `HandleFor` (if necessary). ⛔ `evaluate<ReturnType>(a: number, b: number) => {...}, a, b)` ✅ `(await evaluate(a, b) => {...}, a, b)) as ReturnType` ⛔ `evaluateHandle<ReturnType>(a: number, b: number) => {...}, a, b)` ✅ `(await evaluateHandle(a, b) => {...}, a, b)) as HandleFor<ReturnType>` - If you set any type parameters in the *parameters* of an evaluation function, remove them. ⛔ `evaluate(a: number, b: number) => {...}, a, b)` ✅ `evaluate(a, b) => {...}, a, b)` - If you set any type parameters in the method's declaration, remove them. ⛔ `evaluate<(a: number, b: number) => void>((a, b) => {...}, a, b)` ✅ `evaluate(a, b) => {...}, a, b)`
2022-06-23 09:29:46 +00:00
['aria', ariaHandler],
['pierce', pierceHandler],
]);
2022-06-13 09:16:25 +00:00
const queryHandlers = new Map(builtInHandlers);
/**
2022-06-27 07:24:23 +00:00
* Registers a {@link CustomQueryHandler | custom query handler}.
*
* @remarks
* After registration, the handler can be used everywhere where a selector is
* expected by prepending the selection string with `<name>/`. The name is only
* allowed to consist of lower- and upper case latin letters.
*
* @example
* ```
* puppeteer.registerCustomQueryHandler('text', { });
* const aHandle = await page.$('text/…');
* ```
*
* @param name - The name that the custom query handler will be registered
* under.
* @param queryHandler - The {@link CustomQueryHandler | custom query handler}
* to register.
*
* @public
*/
2022-06-27 07:24:23 +00:00
export function registerCustomQueryHandler(
2020-05-07 10:54:55 +00:00
name: string,
handler: CustomQueryHandler
2020-05-07 10:54:55 +00:00
): void {
2022-06-14 11:55:35 +00:00
if (queryHandlers.get(name)) {
throw new Error(`A custom query handler named "${name}" already exists`);
2022-06-14 11:55:35 +00:00
}
const isValidName = /^[a-zA-Z]+$/.test(name);
2022-06-14 11:55:35 +00:00
if (!isValidName) {
throw new Error(`Custom query handler names may only contain [a-zA-Z]`);
2022-06-14 11:55:35 +00:00
}
const internalHandler = createInternalQueryHandler(handler);
2022-06-13 09:16:25 +00:00
queryHandlers.set(name, internalHandler);
}
/**
2022-06-27 07:24:23 +00:00
* @param name - The name of the query handler to unregistered.
*
* @public
*/
2022-06-27 07:24:23 +00:00
export function unregisterCustomQueryHandler(name: string): void {
2022-06-13 09:16:25 +00:00
if (queryHandlers.has(name) && !builtInHandlers.has(name)) {
queryHandlers.delete(name);
}
}
/**
2022-06-27 07:24:23 +00:00
* @returns a list with the names of all registered custom query handlers.
*
* @public
*/
2022-06-27 07:24:23 +00:00
export function customQueryHandlerNames(): string[] {
return [...queryHandlers.keys()].filter(name => {
return !builtInHandlers.has(name);
});
}
/**
2022-06-27 07:24:23 +00:00
* Clears all registered handlers.
*
* @public
*/
2022-06-27 07:24:23 +00:00
export function clearCustomQueryHandlers(): void {
customQueryHandlerNames().forEach(unregisterCustomQueryHandler);
}
/**
* @internal
*/
2022-06-27 07:24:23 +00:00
export function getQueryHandlerAndSelector(selector: string): {
updatedSelector: string;
queryHandler: InternalQueryHandler;
} {
const hasCustomQueryHandler = /^[a-zA-Z]+\//.test(selector);
2022-06-14 11:55:35 +00:00
if (!hasCustomQueryHandler) {
2022-06-27 07:24:23 +00:00
return {updatedSelector: selector, queryHandler: defaultHandler};
2022-06-14 11:55:35 +00:00
}
const index = selector.indexOf('/');
const name = selector.slice(0, index);
const updatedSelector = selector.slice(index + 1);
2022-06-13 09:16:25 +00:00
const queryHandler = queryHandlers.get(name);
2022-06-14 11:55:35 +00:00
if (!queryHandler) {
2020-05-07 10:54:55 +00:00
throw new Error(
`Query set to use "${name}", but no query handler of that name was found`
);
2022-06-14 11:55:35 +00:00
}
return {
updatedSelector,
2020-05-07 10:54:55 +00:00
queryHandler,
};
}