From 56f99f7b10e7f696b92287af3abdc0fca0d51ddb Mon Sep 17 00:00:00 2001 From: jrandolf <101637635+jrandolf@users.noreply.github.com> Date: Tue, 14 Feb 2023 13:31:30 -0800 Subject: [PATCH] refactor: migrate to iterator-based query handlers (#9676) --- docs/api/index.md | 1 + docs/api/puppeteer.awaitableiterable.md | 11 + docs/api/puppeteer.customqueryhandler.md | 8 +- .../puppeteer.customqueryhandler.queryall.md | 2 +- .../src/common/AriaQueryHandler.ts | 197 +++----- .../src/common/CSSQueryHandler.ts | 29 ++ .../src/common/CustomQueryHandler.ts | 95 ++++ .../src/common/ElementHandle.ts | 37 +- packages/puppeteer-core/src/common/Frame.ts | 4 +- .../src/common/GetQueryHandler.ts | 68 +++ .../src/common/HandleIterator.ts | 80 ++++ .../src/common/IsolatedWorld.ts | 19 +- .../puppeteer-core/src/common/IterableUtil.ts | 48 ++ .../src/common/PierceQueryHandler.ts | 38 ++ .../puppeteer-core/src/common/Puppeteer.ts | 3 +- .../puppeteer-core/src/common/QueryHandler.ts | 430 ++++++------------ .../src/common/TextQueryHandler.ts | 30 ++ .../src/common/XPathQueryHandler.ts | 30 ++ packages/puppeteer-core/src/common/common.ts | 1 + packages/puppeteer-core/src/common/types.ts | 5 + packages/puppeteer-core/src/common/util.ts | 13 +- .../src/injected/TextQuerySelector.ts | 62 +-- .../src/injected/XPathQuerySelector.ts | 22 +- .../puppeteer-core/src/injected/injected.ts | 2 + packages/puppeteer-core/src/injected/util.ts | 37 -- packages/puppeteer-core/src/puppeteer-core.ts | 2 +- packages/puppeteer-core/src/util/Function.ts | 35 ++ test/src/queryselector.spec.ts | 2 +- 28 files changed, 753 insertions(+), 558 deletions(-) create mode 100644 docs/api/puppeteer.awaitableiterable.md create mode 100644 packages/puppeteer-core/src/common/CSSQueryHandler.ts create mode 100644 packages/puppeteer-core/src/common/CustomQueryHandler.ts create mode 100644 packages/puppeteer-core/src/common/GetQueryHandler.ts create mode 100644 packages/puppeteer-core/src/common/HandleIterator.ts create mode 100644 packages/puppeteer-core/src/common/IterableUtil.ts create mode 100644 packages/puppeteer-core/src/common/PierceQueryHandler.ts create mode 100644 packages/puppeteer-core/src/common/TextQueryHandler.ts create mode 100644 packages/puppeteer-core/src/common/XPathQueryHandler.ts create mode 100644 packages/puppeteer-core/src/util/Function.ts diff --git a/docs/api/index.md b/docs/api/index.md index 091c5c53..93c5da94 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -144,6 +144,7 @@ sidebar_label: API | ------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | | [ActionResult](./puppeteer.actionresult.md) | | | [Awaitable](./puppeteer.awaitable.md) | | +| [AwaitableIterable](./puppeteer.awaitableiterable.md) | | | [ChromeReleaseChannel](./puppeteer.chromereleasechannel.md) | | | [ConsoleMessageType](./puppeteer.consolemessagetype.md) | The supported types for console messages. | | [ElementFor](./puppeteer.elementfor.md) | | diff --git a/docs/api/puppeteer.awaitableiterable.md b/docs/api/puppeteer.awaitableiterable.md new file mode 100644 index 00000000..9c3fc65e --- /dev/null +++ b/docs/api/puppeteer.awaitableiterable.md @@ -0,0 +1,11 @@ +--- +sidebar_label: AwaitableIterable +--- + +# AwaitableIterable type + +#### Signature: + +```typescript +export type AwaitableIterable = Iterable | AsyncIterable; +``` diff --git a/docs/api/puppeteer.customqueryhandler.md b/docs/api/puppeteer.customqueryhandler.md index a18849dc..7574b21f 100644 --- a/docs/api/puppeteer.customqueryhandler.md +++ b/docs/api/puppeteer.customqueryhandler.md @@ -12,7 +12,7 @@ export interface CustomQueryHandler ## Properties -| Property | Modifiers | Type | Description | Default | -| ------------------------------------------------------- | --------- | ------------------------------------------------- | ----------------- | ------- | -| [queryAll?](./puppeteer.customqueryhandler.queryall.md) | | (node: Node, selector: string) => Node\[\] | (Optional) | | -| [queryOne?](./puppeteer.customqueryhandler.queryone.md) | | (node: Node, selector: string) => Node \| null | (Optional) | | +| Property | Modifiers | Type | Description | Default | +| ------------------------------------------------------- | --------- | --------------------------------------------------------- | ----------------- | ------- | +| [queryAll?](./puppeteer.customqueryhandler.queryall.md) | | (node: Node, selector: string) => Iterable<Node> | (Optional) | | +| [queryOne?](./puppeteer.customqueryhandler.queryone.md) | | (node: Node, selector: string) => Node \| null | (Optional) | | diff --git a/docs/api/puppeteer.customqueryhandler.queryall.md b/docs/api/puppeteer.customqueryhandler.queryall.md index 167a3860..6ee8044f 100644 --- a/docs/api/puppeteer.customqueryhandler.queryall.md +++ b/docs/api/puppeteer.customqueryhandler.queryall.md @@ -8,6 +8,6 @@ sidebar_label: CustomQueryHandler.queryAll ```typescript interface CustomQueryHandler { - queryAll?: (node: Node, selector: string) => Node[]; + queryAll?: (node: Node, selector: string) => Iterable; } ``` diff --git a/packages/puppeteer-core/src/common/AriaQueryHandler.ts b/packages/puppeteer-core/src/common/AriaQueryHandler.ts index abbe6f6b..bfcea4bd 100644 --- a/packages/puppeteer-core/src/common/AriaQueryHandler.ts +++ b/packages/puppeteer-core/src/common/AriaQueryHandler.ts @@ -16,46 +16,43 @@ import {Protocol} from 'devtools-protocol'; +import {ElementHandle} from '../api/ElementHandle.js'; import {assert} from '../util/assert.js'; import {CDPSession} from './Connection.js'; -import {MAIN_WORLD, PUPPETEER_WORLD} from './IsolatedWorlds.js'; - -import type {ElementHandle} from '../api/ElementHandle.js'; -import type {PuppeteerQueryHandler} from './QueryHandler.js'; import type {Frame} from './Frame.js'; +import type {WaitForSelectorOptions} from './IsolatedWorld.js'; +import {IterableUtil} from './IterableUtil.js'; +import {QueryHandler, QuerySelector} from './QueryHandler.js'; +import {AwaitableIterable} from './types.js'; -async function queryAXTree( +const queryAXTree = async ( client: CDPSession, element: ElementHandle, accessibleName?: string, role?: string -): Promise { +): Promise => { const {nodes} = await client.send('Accessibility.queryAXTree', { objectId: element.remoteObject().objectId, accessibleName, role, }); - const filteredNodes: Protocol.Accessibility.AXNode[] = nodes.filter( - (node: Protocol.Accessibility.AXNode) => { - return !node.role || node.role.value !== 'StaticText'; - } - ); - return filteredNodes; -} + return nodes.filter((node: Protocol.Accessibility.AXNode) => { + return !node.role || node.role.value !== 'StaticText'; + }); +}; + +type ARIASelector = {name?: string; role?: string}; + +const KNOWN_ATTRIBUTES = Object.freeze(['name', 'role']); +const isKnownAttribute = ( + attribute: string +): attribute is keyof ARIASelector => { + return KNOWN_ATTRIBUTES.includes(attribute); +}; const normalizeValue = (value: string): string => { return value.replace(/ +/g, ' ').trim(); }; -const knownAttributes = new Set(['name', 'role']); -const attributeRegexp = - /\[\s*(?\w+)\s*=\s*(?"|')(?\\.|.*?(?=\k))\k\s*\]/g; - -type ARIAQueryOption = {name?: string; role?: string}; -function isKnownAttribute( - attribute: string -): attribute is keyof ARIAQueryOption { - return knownAttributes.has(attribute); -} /** * The selectors consist of an accessible name to query for and optionally @@ -68,11 +65,13 @@ function isKnownAttribute( * - 'label' queries for elements with name 'label' and any role. * - '[name=""][role="button"]' queries for elements with no name and role 'button'. */ -function parseAriaSelector(selector: string): ARIAQueryOption { - const queryOptions: ARIAQueryOption = {}; +const ATTRIBUTE_REGEXP = + /\[\s*(?\w+)\s*=\s*(?"|')(?\\.|.*?(?=\k))\k\s*\]/g; +const parseARIASelector = (selector: string): ARIASelector => { + const queryOptions: ARIASelector = {}; const defaultName = selector.replace( - attributeRegexp, - (_, attribute: string, _quote: string, value: string) => { + ATTRIBUTE_REGEXP, + (_, attribute, __, value) => { attribute = attribute.trim(); assert( isKnownAttribute(attribute), @@ -86,104 +85,56 @@ function parseAriaSelector(selector: string): ARIAQueryOption { queryOptions.name = normalizeValue(defaultName); } return queryOptions; -} - -const queryOneId = async (element: ElementHandle, selector: string) => { - const {name, role} = parseAriaSelector(selector); - const res = await queryAXTree(element.client, element, name, role); - if (!res[0] || !res[0].backendDOMNodeId) { - return null; - } - return res[0].backendDOMNodeId; -}; - -const queryOne: PuppeteerQueryHandler['queryOne'] = async ( - element, - selector -) => { - const id = await queryOneId(element, selector); - if (!id) { - return null; - } - return (await element.frame.worlds[MAIN_WORLD].adoptBackendNode( - id - )) as ElementHandle; -}; - -const waitFor: PuppeteerQueryHandler['waitFor'] = async ( - elementOrFrame, - selector, - options -) => { - let frame: Frame; - let element: ElementHandle | undefined; - if ('isOOPFrame' in elementOrFrame) { - frame = elementOrFrame; - } else { - frame = elementOrFrame.frame; - element = await frame.worlds[PUPPETEER_WORLD].adoptHandle(elementOrFrame); - } - - const ariaQuerySelector = async (selector: string) => { - const id = await queryOneId( - element || (await frame.worlds[PUPPETEER_WORLD].document()), - selector - ); - if (!id) { - return null; - } - return (await frame.worlds[PUPPETEER_WORLD].adoptBackendNode( - id - )) as ElementHandle; - }; - - const result = await frame.worlds[PUPPETEER_WORLD]._waitForSelectorInPage( - (_: Element, selector: string) => { - return ( - globalThis as unknown as { - ariaQuerySelector(selector: string): Node | null; - } - ).ariaQuerySelector(selector); - }, - element, - selector, - options, - new Map([['ariaQuerySelector', ariaQuerySelector]]) - ); - if (element) { - await element.dispose(); - } - - const handle = result?.asElement(); - if (!handle) { - await result?.dispose(); - return null; - } - return handle.frame.worlds[MAIN_WORLD].transferHandle(handle); -}; - -const queryAll: PuppeteerQueryHandler['queryAll'] = async ( - element, - selector -) => { - const exeCtx = element.executionContext(); - const {name, role} = parseAriaSelector(selector); - const res = await queryAXTree(exeCtx._client, element, name, role); - const world = exeCtx._world!; - return Promise.all( - res.map(axNode => { - return world.adoptBackendNode(axNode.backendDOMNodeId) as Promise< - ElementHandle - >; - }) - ); }; /** * @internal */ -export const ariaHandler: PuppeteerQueryHandler = { - queryOne, - waitFor, - queryAll, -}; +export interface ARIAQuerySelectorContext { + __ariaQuerySelector(node: Node, selector: string): Promise; +} + +/** + * @internal + */ +export class ARIAQueryHandler extends QueryHandler { + static override querySelector: QuerySelector = async (node, selector) => { + const context = globalThis as unknown as ARIAQuerySelectorContext; + return context.__ariaQuerySelector(node, selector); + }; + + static override async *queryAll( + element: ElementHandle, + selector: string + ): AwaitableIterable> { + const context = element.executionContext(); + const {name, role} = parseARIASelector(selector); + const results = await queryAXTree(context._client, element, name, role); + const world = context._world!; + yield* IterableUtil.map(results, node => { + return world.adoptBackendNode(node.backendDOMNodeId) as Promise< + ElementHandle + >; + }); + } + + static override queryOne = async ( + element: ElementHandle, + selector: string + ): Promise | null> => { + return (await IterableUtil.first(this.queryAll(element, selector))) ?? null; + }; + + static override async waitFor( + elementOrFrame: ElementHandle | Frame, + selector: string, + options: WaitForSelectorOptions + ): Promise | null> { + return super.waitFor( + elementOrFrame, + selector, + options, + new Map([['__ariaQuerySelector', this.queryOne]]) + ); + } +} diff --git a/packages/puppeteer-core/src/common/CSSQueryHandler.ts b/packages/puppeteer-core/src/common/CSSQueryHandler.ts new file mode 100644 index 00000000..03ca2266 --- /dev/null +++ b/packages/puppeteer-core/src/common/CSSQueryHandler.ts @@ -0,0 +1,29 @@ +/** + * Copyright 2023 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {QueryHandler, QuerySelector, QuerySelectorAll} from './QueryHandler.js'; + +/** + * @internal + */ +export class CSSQueryHandler extends QueryHandler { + static override querySelector: QuerySelector = (element, selector) => { + return (element as Element).querySelector(selector); + }; + static override querySelectorAll: QuerySelectorAll = (element, selector) => { + return (element as Element).querySelectorAll(selector); + }; +} diff --git a/packages/puppeteer-core/src/common/CustomQueryHandler.ts b/packages/puppeteer-core/src/common/CustomQueryHandler.ts new file mode 100644 index 00000000..e5aec9c2 --- /dev/null +++ b/packages/puppeteer-core/src/common/CustomQueryHandler.ts @@ -0,0 +1,95 @@ +/** + * Copyright 2023 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {QueryHandler} from './QueryHandler.js'; +import {getQueryHandlerByName} from './GetQueryHandler.js'; + +/** + * @internal + */ +export const customQueryHandlers = new Map(); + +/** + * @public + */ +export interface CustomQueryHandler { + /** + * @returns A {@link Node} matching the given `selector` from {@link node}. + */ + queryOne?: (node: Node, selector: string) => Node | null; + /** + * @returns Some {@link Node}s matching the given `selector` from {@link node}. + */ + queryAll?: (node: Node, selector: string) => Iterable; +} + +/** + * @deprecated Import {@link Puppeteer} and use the static method + * {@link Puppeteer.registerCustomQueryHandler} + * + * @public + */ +export function registerCustomQueryHandler( + name: string, + handler: CustomQueryHandler +): void { + if (getQueryHandlerByName(name)) { + throw new Error(`A query handler named "${name}" already exists`); + } + + const isValidName = /^[a-zA-Z]+$/.test(name); + if (!isValidName) { + throw new Error(`Custom query handler names may only contain [a-zA-Z]`); + } + + customQueryHandlers.set( + name, + class extends QueryHandler { + static override querySelector = handler.queryOne; + static override querySelectorAll = handler.queryAll; + } + ); +} + +/** + * @deprecated Import {@link Puppeteer} and use the static method + * {@link Puppeteer.unregisterCustomQueryHandler} + * + * @public + */ +export function unregisterCustomQueryHandler(name: string): void { + customQueryHandlers.delete(name); +} + +/** + * @deprecated Import {@link Puppeteer} and use the static method + * {@link Puppeteer.customQueryHandlerNames} + * + * @public + */ +export function customQueryHandlerNames(): string[] { + return [...customQueryHandlers.keys()]; +} + +/** + * @deprecated Import {@link Puppeteer} and use the static method + * {@link Puppeteer.clearCustomQueryHandlers} + * + * @public + */ +export function clearCustomQueryHandlers(): void { + customQueryHandlers.clear(); +} diff --git a/packages/puppeteer-core/src/common/ElementHandle.ts b/packages/puppeteer-core/src/common/ElementHandle.ts index 4562f226..19b574ea 100644 --- a/packages/puppeteer-core/src/common/ElementHandle.ts +++ b/packages/puppeteer-core/src/common/ElementHandle.ts @@ -31,9 +31,10 @@ import {CDPSession} from './Connection.js'; import {ExecutionContext} from './ExecutionContext.js'; import {Frame} from './Frame.js'; import {FrameManager} from './FrameManager.js'; +import {getQueryHandlerAndSelector} from './GetQueryHandler.js'; import {WaitForSelectorOptions} from './IsolatedWorld.js'; +import {IterableUtil} from './IterableUtil.js'; import {CDPPage} from './Page.js'; -import {getQueryHandlerAndSelector} from './QueryHandler.js'; import { ElementFor, EvaluateFuncWith, @@ -183,10 +184,6 @@ export class CDPElementHandle< ): Promise> | null> { const {updatedSelector, queryHandler} = getQueryHandlerAndSelector(selector); - assert( - queryHandler.queryOne, - 'Cannot handle queries for a single element with the given selector' - ); return (await queryHandler.queryOne( this, updatedSelector @@ -198,13 +195,9 @@ export class CDPElementHandle< ): Promise>>> { const {updatedSelector, queryHandler} = getQueryHandlerAndSelector(selector); - assert( - queryHandler.queryAll, - 'Cannot handle queries for a multiple element with the given selector' - ); - return (await queryHandler.queryAll(this, updatedSelector)) as Array< - CDPElementHandle> - >; + return IterableUtil.collect( + queryHandler.queryAll(this, updatedSelector) + ) as Promise>>>; } override async $eval< @@ -242,23 +235,14 @@ export class CDPElementHandle< pageFunction: Func | string, ...args: Params ): Promise>> { - const {updatedSelector, queryHandler} = - getQueryHandlerAndSelector(selector); - assert( - queryHandler.queryAll, - 'Cannot handle queries for a multiple element with the given selector' - ); - const handles = (await queryHandler.queryAll( - this, - updatedSelector - )) as Array>>; - const elements = (await this.evaluateHandle((_, ...elements) => { + const results = await this.$$(selector); + const elements = await this.evaluateHandle((_, ...elements) => { return elements; - }, ...handles)) as JSHandle>>; + }, ...results); const [result] = await Promise.all([ elements.evaluate(pageFunction, ...args), - ...handles.map(handle => { - return handle.dispose(); + ...results.map(results => { + return results.dispose(); }), ]); await elements.dispose(); @@ -280,7 +264,6 @@ export class CDPElementHandle< ): Promise> | null> { const {updatedSelector, queryHandler} = getQueryHandlerAndSelector(selector); - assert(queryHandler.waitFor, 'Query handler does not support waiting'); return (await queryHandler.waitFor( this, updatedSelector, diff --git a/packages/puppeteer-core/src/common/Frame.ts b/packages/puppeteer-core/src/common/Frame.ts index 3bd2c7cd..41e4bef1 100644 --- a/packages/puppeteer-core/src/common/Frame.ts +++ b/packages/puppeteer-core/src/common/Frame.ts @@ -17,11 +17,11 @@ import {Protocol} from 'devtools-protocol'; import {ElementHandle} from '../api/ElementHandle.js'; import {Page} from '../api/Page.js'; -import {assert} from '../util/assert.js'; import {isErrorLike} from '../util/ErrorLike.js'; import {CDPSession} from './Connection.js'; import {ExecutionContext} from './ExecutionContext.js'; import {FrameManager} from './FrameManager.js'; +import {getQueryHandlerAndSelector} from './GetQueryHandler.js'; import {HTTPResponse} from './HTTPResponse.js'; import {MouseButton} from './Input.js'; import { @@ -32,7 +32,6 @@ import { import {MAIN_WORLD, PUPPETEER_WORLD} from './IsolatedWorlds.js'; import {LazyArg} from './LazyArg.js'; import {LifecycleWatcher, PuppeteerLifeCycleEvent} from './LifecycleWatcher.js'; -import {getQueryHandlerAndSelector} from './QueryHandler.js'; import {EvaluateFunc, EvaluateFuncWith, HandleFor, NodeFor} from './types.js'; import {importFS} from './util.js'; @@ -620,7 +619,6 @@ export class Frame { ): Promise> | null> { const {updatedSelector, queryHandler} = getQueryHandlerAndSelector(selector); - assert(queryHandler.waitFor, 'Query handler does not support waiting'); return (await queryHandler.waitFor( this, updatedSelector, diff --git a/packages/puppeteer-core/src/common/GetQueryHandler.ts b/packages/puppeteer-core/src/common/GetQueryHandler.ts new file mode 100644 index 00000000..87745ac9 --- /dev/null +++ b/packages/puppeteer-core/src/common/GetQueryHandler.ts @@ -0,0 +1,68 @@ +/** + * Copyright 2023 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {ARIAQueryHandler} from './AriaQueryHandler.js'; +import {PierceQueryHandler} from './PierceQueryHandler.js'; +import {XPathQueryHandler} from './XPathQueryHandler.js'; +import {TextQueryHandler} from './TextQueryHandler.js'; +import {CSSQueryHandler} from './CSSQueryHandler.js'; +import {customQueryHandlers} from './CustomQueryHandler.js'; +import type {QueryHandler} from './QueryHandler.js'; + +export const BUILTIN_QUERY_HANDLERS = Object.freeze({ + aria: ARIAQueryHandler, + pierce: PierceQueryHandler, + xpath: XPathQueryHandler, + text: TextQueryHandler, +}); + +const QUERY_SEPARATORS = ['=', '/']; + +/** + * @internal + */ +export function getQueryHandlerByName( + name: string +): typeof QueryHandler | undefined { + if (name in BUILTIN_QUERY_HANDLERS) { + return BUILTIN_QUERY_HANDLERS[name as 'aria']; + } + return customQueryHandlers.get(name); +} + +/** + * @internal + */ +export function getQueryHandlerAndSelector(selector: string): { + updatedSelector: string; + queryHandler: typeof QueryHandler; +} { + for (const handlerMap of [ + customQueryHandlers, + Object.entries(BUILTIN_QUERY_HANDLERS), + ]) { + for (const [name, queryHandler] of handlerMap) { + for (const separator of QUERY_SEPARATORS) { + const prefix = `${name}${separator}`; + if (selector.startsWith(prefix)) { + selector = selector.slice(prefix.length); + return {updatedSelector: selector, queryHandler}; + } + } + } + } + return {updatedSelector: selector, queryHandler: CSSQueryHandler}; +} diff --git a/packages/puppeteer-core/src/common/HandleIterator.ts b/packages/puppeteer-core/src/common/HandleIterator.ts new file mode 100644 index 00000000..bcba2ebc --- /dev/null +++ b/packages/puppeteer-core/src/common/HandleIterator.ts @@ -0,0 +1,80 @@ +/** + * Copyright 2023 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {JSHandle} from '../api/JSHandle.js'; +import {AwaitableIterable, HandleFor} from './types.js'; + +const DEFAULT_BATCH_SIZE = 20; + +/** + * This will transpose an iterator JSHandle into a fast, Puppeteer-side iterator + * of JSHandles. + * + * @param size - The number of elements to transpose. This should be something + * reasonable. + */ +async function* fastTransposeIteratorHandle( + iterator: JSHandle>, + size = DEFAULT_BATCH_SIZE +) { + const array = await iterator.evaluateHandle(async (iterator, size) => { + const results = []; + while (results.length < size) { + const result = await iterator.next(); + if (result.done) { + break; + } + results.push(result.value); + } + return results; + }, size); + const properties = (await array.getProperties()) as Map>; + await array.dispose(); + yield* properties.values(); + return properties.size === 0; +} + +/** + * This will transpose an iterator JSHandle in batches based on the default size + * of {@link fastTransposeIteratorHandle}. + */ + +async function* transposeIteratorHandle( + iterator: JSHandle> +) { + try { + while (!(yield* fastTransposeIteratorHandle(iterator))) {} + } finally { + await iterator.dispose(); + } +} + +type AwaitableIterator = Iterator | AsyncIterator; + +/** + * @internal + */ +export async function* transposeIterableHandle( + handle: JSHandle> +): AsyncIterableIterator> { + yield* transposeIteratorHandle( + await handle.evaluateHandle(iterable => { + return (async function* () { + yield* iterable; + })(); + }) + ); +} diff --git a/packages/puppeteer-core/src/common/IsolatedWorld.ts b/packages/puppeteer-core/src/common/IsolatedWorld.ts index 20537903..64425f72 100644 --- a/packages/puppeteer-core/src/common/IsolatedWorld.ts +++ b/packages/puppeteer-core/src/common/IsolatedWorld.ts @@ -440,7 +440,7 @@ export class IsolatedWorld { return; } const node = (await PuppeteerUtil.createFunction(query)( - root || document, + root ?? document, selector, PuppeteerUtil )) as Node | null; @@ -533,9 +533,9 @@ export class IsolatedWorld { } async adoptHandle>(handle: T): Promise { - const executionContext = await this.executionContext(); + const context = await this.executionContext(); assert( - handle.executionContext() !== executionContext, + handle.executionContext() !== context, 'Cannot adopt handle that already belongs to this execution context' ); const nodeInfo = await this.#client.send('DOM.describeNode', { @@ -545,9 +545,18 @@ export class IsolatedWorld { } async transferHandle>(handle: T): Promise { - const result = await this.adoptHandle(handle); + const context = await this.executionContext(); + if (handle.executionContext() === context) { + return handle; + } + const info = await this.#client.send('DOM.describeNode', { + objectId: handle.remoteObject().objectId, + }); + const newHandle = (await this.adoptBackendNode( + info.node.backendNodeId + )) as T; await handle.dispose(); - return result; + return newHandle; } } diff --git a/packages/puppeteer-core/src/common/IterableUtil.ts b/packages/puppeteer-core/src/common/IterableUtil.ts new file mode 100644 index 00000000..f864e60f --- /dev/null +++ b/packages/puppeteer-core/src/common/IterableUtil.ts @@ -0,0 +1,48 @@ +/** + * Copyright 2023 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import type {AwaitableIterable} from './types.js'; + +/** + * @internal + */ +export class IterableUtil { + static async *map( + iterable: AwaitableIterable, + map: (item: T) => Promise + ): AwaitableIterable { + for await (const value of iterable) { + yield await map(value); + } + } + + static async collect(iterable: AwaitableIterable): Promise { + const result = []; + for await (const value of iterable) { + result.push(value); + } + return result; + } + + static async first( + iterable: AwaitableIterable + ): Promise { + for await (const value of iterable) { + return value; + } + return undefined; + } +} diff --git a/packages/puppeteer-core/src/common/PierceQueryHandler.ts b/packages/puppeteer-core/src/common/PierceQueryHandler.ts new file mode 100644 index 00000000..85d1326e --- /dev/null +++ b/packages/puppeteer-core/src/common/PierceQueryHandler.ts @@ -0,0 +1,38 @@ +/** + * Copyright 2023 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import type PuppeteerUtil from '../injected/injected.js'; +import {QueryHandler} from './QueryHandler.js'; + +/** + * @internal + */ +export class PierceQueryHandler extends QueryHandler { + static override querySelector = ( + element: Node, + selector: string, + {pierceQuerySelector}: PuppeteerUtil + ): Node | null => { + return pierceQuerySelector(element, selector); + }; + static override querySelectorAll = ( + element: Node, + selector: string, + {pierceQuerySelectorAll}: PuppeteerUtil + ): Iterable => { + return pierceQuerySelectorAll(element, selector); + }; +} diff --git a/packages/puppeteer-core/src/common/Puppeteer.ts b/packages/puppeteer-core/src/common/Puppeteer.ts index 31410e85..f5e46c58 100644 --- a/packages/puppeteer-core/src/common/Puppeteer.ts +++ b/packages/puppeteer-core/src/common/Puppeteer.ts @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + import {Browser} from '../api/Browser.js'; import { BrowserConnectOptions, @@ -25,7 +26,7 @@ import { customQueryHandlerNames, registerCustomQueryHandler, unregisterCustomQueryHandler, -} from './QueryHandler.js'; +} from './CustomQueryHandler.js'; /** * Settings that are common to the Puppeteer class, regardless of environment. diff --git a/packages/puppeteer-core/src/common/QueryHandler.ts b/packages/puppeteer-core/src/common/QueryHandler.ts index b9cc3d7e..03d68203 100644 --- a/packages/puppeteer-core/src/common/QueryHandler.ts +++ b/packages/puppeteer-core/src/common/QueryHandler.ts @@ -1,5 +1,5 @@ /** - * Copyright 2020 Google Inc. All rights reserved. + * Copyright 2023 Google Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,315 +14,177 @@ * limitations under the License. */ -import PuppeteerUtil from '../injected/injected.js'; -import {assert} from '../util/assert.js'; -import {ariaHandler} from './AriaQueryHandler.js'; import {ElementHandle} from '../api/ElementHandle.js'; -import {Frame} from './Frame.js'; -import {WaitForSelectorOptions} from './IsolatedWorld.js'; +import type PuppeteerUtil from '../injected/injected.js'; +import {assert} from '../util/assert.js'; +import {createFunction} from '../util/Function.js'; +import {transposeIterableHandle} from './HandleIterator.js'; +import type {Frame} from './Frame.js'; +import type {WaitForSelectorOptions} from './IsolatedWorld.js'; import {MAIN_WORLD, PUPPETEER_WORLD} from './IsolatedWorlds.js'; import {LazyArg} from './LazyArg.js'; - -/** - * @public - */ -export interface CustomQueryHandler { - /** - * @returns A {@link Node} matching the given `selector` from {@link node}. - */ - queryOne?: (node: Node, selector: string) => Node | null; - /** - * @returns Some {@link Node}s matching the given `selector` from {@link node}. - */ - queryAll?: (node: Node, selector: string) => Node[]; -} +import type {Awaitable, AwaitableIterable} from './types.js'; /** * @internal */ -export interface InternalQueryHandler { - /** - * @returns A {@link Node} matching the given `selector` from {@link node}. - */ - queryOne?: ( - node: Node, - selector: string, - PuppeteerUtil: PuppeteerUtil - ) => Node | null; - /** - * @returns Some {@link Node}s matching the given `selector` from {@link node}. - */ - queryAll?: ( - node: Node, - selector: string, - PuppeteerUtil: PuppeteerUtil - ) => Node[]; -} +export type QuerySelectorAll = ( + node: Node, + selector: string, + PuppeteerUtil: PuppeteerUtil +) => AwaitableIterable; /** * @internal */ -export interface PuppeteerQueryHandler { - /** - * Queries for a single node given a selector and {@link ElementHandle}. - * - * Akin to {@link Window.prototype.querySelector}. - */ - queryOne?: ( - element: ElementHandle, - selector: string - ) => Promise | null>; +export type QuerySelector = ( + node: Node, + selector: string, + PuppeteerUtil: PuppeteerUtil +) => Awaitable; + +/** + * @internal + */ +export class QueryHandler { + // Either one of these may be implemented, but at least one must be. + static querySelectorAll?: QuerySelectorAll; + static querySelector?: QuerySelector; + + static get _querySelector(): QuerySelector { + if (this.querySelector) { + return this.querySelector; + } + if (!this.querySelectorAll) { + throw new Error('Cannot create default query selector'); + } + + const querySelector: QuerySelector = async ( + node, + selector, + PuppeteerUtil + ) => { + const querySelectorAll = + 'FUNCTION_DEFINITION' as unknown as QuerySelectorAll; + const results = querySelectorAll(node, selector, PuppeteerUtil); + for await (const result of results) { + return result; + } + return null; + }; + + return (this.querySelector = createFunction( + querySelector + .toString() + .replace("'FUNCTION_DEFINITION'", this.querySelectorAll.toString()) + ) as typeof querySelector); + } + + static get _querySelectorAll(): QuerySelectorAll { + if (this.querySelectorAll) { + return this.querySelectorAll; + } + if (!this.querySelector) { + throw new Error('Cannot create default query selector'); + } + + const querySelectorAll: QuerySelectorAll = async function* ( + node, + selector, + PuppeteerUtil + ) { + const querySelector = 'FUNCTION_DEFINITION' as unknown as QuerySelector; + const result = await querySelector(node, selector, PuppeteerUtil); + if (result) { + yield result; + } + }; + + return (this.querySelectorAll = createFunction( + querySelectorAll + .toString() + .replace("'FUNCTION_DEFINITION'", this.querySelector.toString()) + ) as typeof querySelectorAll); + } + /** * Queries for multiple nodes given a selector and {@link ElementHandle}. * * Akin to {@link Window.prototype.querySelectorAll}. */ - queryAll?: ( + static async *queryAll( element: ElementHandle, selector: string - ) => Promise>>; + ): AwaitableIterable> { + const world = element.executionContext()._world; + assert(world); + const handle = await element.evaluateHandle( + this._querySelectorAll, + selector, + LazyArg.create(context => { + return context.puppeteerUtil; + }) + ); + yield* transposeIterableHandle(handle); + } + + /** + * Queries for a single node given a selector and {@link ElementHandle}. + * + * Akin to {@link Window.prototype.querySelector}. + */ + static async queryOne( + element: ElementHandle, + selector: string + ): Promise | null> { + const world = element.executionContext()._world; + assert(world); + const result = await element.evaluateHandle( + this._querySelector, + selector, + LazyArg.create(context => { + return context.puppeteerUtil; + }) + ); + if (!(result instanceof ElementHandle)) { + await result.dispose(); + return null; + } + return result; + } /** * Waits until a single node appears for a given selector and * {@link ElementHandle}. */ - waitFor?: ( + static async waitFor( elementOrFrame: ElementHandle | Frame, selector: string, - options: WaitForSelectorOptions - ) => Promise | null>; -} - -function createPuppeteerQueryHandler( - handler: InternalQueryHandler -): PuppeteerQueryHandler { - const internalHandler: PuppeteerQueryHandler = {}; - - if (handler.queryOne) { - const queryOne = handler.queryOne; - internalHandler.queryOne = async (element, selector) => { - const world = element.executionContext()._world; - assert(world); - const jsHandle = await element.evaluateHandle( - queryOne, - selector, - LazyArg.create(context => { - return context.puppeteerUtil; - }) - ); - const elementHandle = jsHandle.asElement(); - if (elementHandle) { - return elementHandle; - } - await jsHandle.dispose(); + options: WaitForSelectorOptions, + bindings = new Map unknown>() + ): Promise | null> { + let frame: Frame; + let element: ElementHandle | undefined; + if (!(elementOrFrame instanceof ElementHandle)) { + frame = elementOrFrame; + } else { + frame = elementOrFrame.frame; + element = await frame.worlds[PUPPETEER_WORLD].adoptHandle(elementOrFrame); + } + const result = await frame.worlds[PUPPETEER_WORLD]._waitForSelectorInPage( + this._querySelector, + element, + selector, + options, + bindings + ); + if (element) { + await element.dispose(); + } + if (!(result instanceof ElementHandle)) { + await result?.dispose(); return null; - }; - internalHandler.waitFor = async (elementOrFrame, selector, options) => { - let frame: Frame; - let element: ElementHandle | undefined; - if (elementOrFrame instanceof Frame) { - frame = elementOrFrame; - } else { - frame = elementOrFrame.frame; - element = await frame.worlds[PUPPETEER_WORLD].adoptHandle( - elementOrFrame - ); - } - const result = await frame.worlds[PUPPETEER_WORLD]._waitForSelectorInPage( - queryOne, - element, - selector, - options - ); - if (element) { - await element.dispose(); - } - if (!result) { - return null; - } - if (!(result instanceof ElementHandle)) { - await result.dispose(); - return null; - } - return frame.worlds[MAIN_WORLD].transferHandle(result); - }; - } - - if (handler.queryAll) { - const queryAll = handler.queryAll; - internalHandler.queryAll = async (element, selector) => { - const world = element.executionContext()._world; - assert(world); - const jsHandle = await element.evaluateHandle( - queryAll, - selector, - LazyArg.create(context => { - return context.puppeteerUtil; - }) - ); - const properties = await jsHandle.getProperties(); - await jsHandle.dispose(); - const result = []; - for (const property of properties.values()) { - const elementHandle = property.asElement(); - if (elementHandle) { - result.push(elementHandle); - } - } - return result; - }; - } - - return internalHandler; -} - -const defaultHandler = createPuppeteerQueryHandler({ - queryOne: (element, selector) => { - if (!('querySelector' in element)) { - throw new Error( - `Could not invoke \`querySelector\` on node of type ${element.nodeName}.` - ); } - return ( - element as unknown as {querySelector(selector: string): Element} - ).querySelector(selector); - }, - queryAll: (element, selector) => { - if (!('querySelectorAll' in element)) { - throw new Error( - `Could not invoke \`querySelectorAll\` on node of type ${element.nodeName}.` - ); - } - return [ - ...( - element as unknown as { - querySelectorAll(selector: string): NodeList; - } - ).querySelectorAll(selector), - ]; - }, -}); - -const pierceHandler = createPuppeteerQueryHandler({ - queryOne: (element, selector, {pierceQuerySelector}) => { - return pierceQuerySelector(element, selector); - }, - queryAll: (element, selector, {pierceQuerySelectorAll}) => { - return pierceQuerySelectorAll(element, selector); - }, -}); - -const xpathHandler = createPuppeteerQueryHandler({ - queryOne: (element, selector, {xpathQuerySelector}) => { - return xpathQuerySelector(element, selector); - }, - queryAll: (element, selector, {xpathQuerySelectorAll}) => { - return xpathQuerySelectorAll(element, selector); - }, -}); - -const textQueryHandler = createPuppeteerQueryHandler({ - queryOne: (element, selector, {textQuerySelector}) => { - return textQuerySelector(element, selector); - }, - queryAll: (element, selector, {textQuerySelectorAll}) => { - return textQuerySelectorAll(element, selector); - }, -}); - -interface RegisteredQueryHandler { - handler: PuppeteerQueryHandler; - transformSelector?: (selector: string) => string; -} - -const INTERNAL_QUERY_HANDLERS = new Map([ - ['aria', {handler: ariaHandler}], - ['pierce', {handler: pierceHandler}], - ['xpath', {handler: xpathHandler}], - ['text', {handler: textQueryHandler}], -]); -const QUERY_HANDLERS = new Map(); - -/** - * @deprecated Import {@link Puppeteer} and use the static method - * {@link Puppeteer.registerCustomQueryHandler} - * - * @public - */ -export function registerCustomQueryHandler( - name: string, - handler: CustomQueryHandler -): void { - if (INTERNAL_QUERY_HANDLERS.has(name)) { - throw new Error(`A query handler named "${name}" already exists`); + return frame.worlds[MAIN_WORLD].transferHandle(result); } - if (QUERY_HANDLERS.has(name)) { - throw new Error(`A custom query handler named "${name}" already exists`); - } - - const isValidName = /^[a-zA-Z]+$/.test(name); - if (!isValidName) { - throw new Error(`Custom query handler names may only contain [a-zA-Z]`); - } - - QUERY_HANDLERS.set(name, {handler: createPuppeteerQueryHandler(handler)}); -} - -/** - * @deprecated Import {@link Puppeteer} and use the static method - * {@link Puppeteer.unregisterCustomQueryHandler} - * - * @public - */ -export function unregisterCustomQueryHandler(name: string): void { - QUERY_HANDLERS.delete(name); -} - -/** - * @deprecated Import {@link Puppeteer} and use the static method - * {@link Puppeteer.customQueryHandlerNames} - * - * @public - */ -export function customQueryHandlerNames(): string[] { - return [...QUERY_HANDLERS.keys()]; -} - -/** - * @deprecated Import {@link Puppeteer} and use the static method - * {@link Puppeteer.clearCustomQueryHandlers} - * - * @public - */ -export function clearCustomQueryHandlers(): void { - QUERY_HANDLERS.clear(); -} - -const CUSTOM_QUERY_SEPARATORS = ['=', '/']; - -/** - * @internal - */ -export function getQueryHandlerAndSelector(selector: string): { - updatedSelector: string; - queryHandler: PuppeteerQueryHandler; -} { - for (const handlerMap of [QUERY_HANDLERS, INTERNAL_QUERY_HANDLERS]) { - for (const [ - name, - {handler: queryHandler, transformSelector}, - ] of handlerMap) { - for (const separator of CUSTOM_QUERY_SEPARATORS) { - const prefix = `${name}${separator}`; - if (selector.startsWith(prefix)) { - selector = selector.slice(prefix.length); - if (transformSelector) { - selector = transformSelector(selector); - } - return {updatedSelector: selector, queryHandler}; - } - } - } - } - return {updatedSelector: selector, queryHandler: defaultHandler}; } diff --git a/packages/puppeteer-core/src/common/TextQueryHandler.ts b/packages/puppeteer-core/src/common/TextQueryHandler.ts new file mode 100644 index 00000000..02ecdddc --- /dev/null +++ b/packages/puppeteer-core/src/common/TextQueryHandler.ts @@ -0,0 +1,30 @@ +/** + * Copyright 2023 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {QueryHandler, QuerySelectorAll} from './QueryHandler.js'; + +/** + * @internal + */ +export class TextQueryHandler extends QueryHandler { + static override querySelectorAll: QuerySelectorAll = ( + element, + selector, + {textQuerySelectorAll} + ) => { + return textQuerySelectorAll(element, selector); + }; +} diff --git a/packages/puppeteer-core/src/common/XPathQueryHandler.ts b/packages/puppeteer-core/src/common/XPathQueryHandler.ts new file mode 100644 index 00000000..34f824d5 --- /dev/null +++ b/packages/puppeteer-core/src/common/XPathQueryHandler.ts @@ -0,0 +1,30 @@ +/** + * Copyright 2023 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {QueryHandler, QuerySelectorAll} from './QueryHandler.js'; + +/** + * @internal + */ +export class XPathQueryHandler extends QueryHandler { + static override querySelectorAll: QuerySelectorAll = ( + element, + selector, + {xpathQuerySelectorAll} + ) => { + return xpathQuerySelectorAll(element, selector); + }; +} diff --git a/packages/puppeteer-core/src/common/common.ts b/packages/puppeteer-core/src/common/common.ts index a8afa1cc..99be5931 100644 --- a/packages/puppeteer-core/src/common/common.ts +++ b/packages/puppeteer-core/src/common/common.ts @@ -25,6 +25,7 @@ export * from './Connection.js'; export * from './ConnectionTransport.js'; export * from './ConsoleMessage.js'; export * from './Coverage.js'; +export * from './CustomQueryHandler.js'; export * from './Debug.js'; export * from './Device.js'; export * from './Dialog.js'; diff --git a/packages/puppeteer-core/src/common/types.ts b/packages/puppeteer-core/src/common/types.ts index dbb3adc8..7d2635e1 100644 --- a/packages/puppeteer-core/src/common/types.ts +++ b/packages/puppeteer-core/src/common/types.ts @@ -32,6 +32,11 @@ export type BindingPayload = { isTrivial: boolean; }; +/** + * @public + */ +export type AwaitableIterable = Iterable | AsyncIterable; + /** * @public */ diff --git a/packages/puppeteer-core/src/common/util.ts b/packages/puppeteer-core/src/common/util.ts index b74cd14e..1e218069 100644 --- a/packages/puppeteer-core/src/common/util.ts +++ b/packages/puppeteer-core/src/common/util.ts @@ -14,20 +14,21 @@ * limitations under the License. */ -import {Protocol} from 'devtools-protocol'; +import type {Protocol} from 'devtools-protocol'; import type {Readable} from 'stream'; +import type {ElementHandle} from '../api/ElementHandle.js'; +import type {JSHandle} from '../api/JSHandle.js'; import {isNode} from '../environment.js'; import {assert} from '../util/assert.js'; import {isErrorLike} from '../util/ErrorLike.js'; -import {CDPSession} from './Connection.js'; +import type {CDPSession} from './Connection.js'; import {debug} from './Debug.js'; -import {ElementHandle} from '../api/ElementHandle.js'; import {CDPElementHandle} from './ElementHandle.js'; import {TimeoutError} from './Errors.js'; -import {CommonEventEmitter} from './EventEmitter.js'; -import {ExecutionContext} from './ExecutionContext.js'; -import {JSHandle} from '../api/JSHandle.js'; +import type {CommonEventEmitter} from './EventEmitter.js'; +import type {ExecutionContext} from './ExecutionContext.js'; import {CDPJSHandle} from './JSHandle.js'; + /** * @internal */ diff --git a/packages/puppeteer-core/src/injected/TextQuerySelector.ts b/packages/puppeteer-core/src/injected/TextQuerySelector.ts index 77c17a73..eebd59f6 100644 --- a/packages/puppeteer-core/src/injected/TextQuerySelector.ts +++ b/packages/puppeteer-core/src/injected/TextQuerySelector.ts @@ -19,68 +19,38 @@ import { isSuitableNodeForTextMatching, } from './TextContent.js'; -/** - * Queries the given node for a node matching the given text selector. - * - * @internal - */ -export const textQuerySelector = ( - root: Node, - selector: string -): Element | null => { - for (const node of root.childNodes) { - if (node instanceof Element && isSuitableNodeForTextMatching(node)) { - let matchedNode: Element | null; - if (node.shadowRoot) { - matchedNode = textQuerySelector(node.shadowRoot, selector); - } else { - matchedNode = textQuerySelector(node, selector); - } - if (matchedNode) { - return matchedNode; - } - } - } - - if (root instanceof Element) { - const textContent = createTextContent(root); - if (textContent.full.includes(selector)) { - return root; - } - } - return null; -}; - /** * Queries the given node for all nodes matching the given text selector. * * @internal */ -export const textQuerySelectorAll = ( +export const textQuerySelectorAll = function* ( root: Node, selector: string -): Element[] => { - let results: Element[] = []; +): Generator { + let yielded = false; for (const node of root.childNodes) { - if (node instanceof Element) { - let matchedNodes: Element[]; - if (node.shadowRoot) { - matchedNodes = textQuerySelectorAll(node.shadowRoot, selector); + if (node instanceof Element && isSuitableNodeForTextMatching(node)) { + let matches: Generator; + if (!node.shadowRoot) { + matches = textQuerySelectorAll(node, selector); } else { - matchedNodes = textQuerySelectorAll(node, selector); + matches = textQuerySelectorAll(node.shadowRoot, selector); + } + for (const match of matches) { + yield match; + yielded = true; } - results = results.concat(matchedNodes); } } - if (results.length > 0) { - return results; + if (yielded) { + return; } - if (root instanceof Element) { + if (root instanceof Element && isSuitableNodeForTextMatching(root)) { const textContent = createTextContent(root); if (textContent.full.includes(selector)) { - return [root]; + yield root; } } - return []; }; diff --git a/packages/puppeteer-core/src/injected/XPathQuerySelector.ts b/packages/puppeteer-core/src/injected/XPathQuerySelector.ts index 3def05c2..787e3afa 100644 --- a/packages/puppeteer-core/src/injected/XPathQuerySelector.ts +++ b/packages/puppeteer-core/src/injected/XPathQuerySelector.ts @@ -17,24 +17,10 @@ /** * @internal */ -export const xpathQuerySelector = ( +export const xpathQuerySelectorAll = function* ( root: Node, selector: string -): Node | null => { - const doc = root.ownerDocument || document; - const result = doc.evaluate( - selector, - root, - null, - XPathResult.FIRST_ORDERED_NODE_TYPE - ); - return result.singleNodeValue; -}; - -/** - * @internal - */ -export const xpathQuerySelectorAll = (root: Node, selector: string): Node[] => { +): Iterable { const doc = root.ownerDocument || document; const iterator = doc.evaluate( selector, @@ -42,10 +28,8 @@ export const xpathQuerySelectorAll = (root: Node, selector: string): Node[] => { null, XPathResult.ORDERED_NODE_ITERATOR_TYPE ); - const array: Node[] = []; let item; while ((item = iterator.iterateNext())) { - array.push(item); + yield item; } - return array; }; diff --git a/packages/puppeteer-core/src/injected/injected.ts b/packages/puppeteer-core/src/injected/injected.ts index d994f1f5..21d8cdee 100644 --- a/packages/puppeteer-core/src/injected/injected.ts +++ b/packages/puppeteer-core/src/injected/injected.ts @@ -15,6 +15,7 @@ */ import {createDeferredPromise} from '../util/DeferredPromise.js'; +import {createFunction} from '../util/Function.js'; import {RAFPoller, MutationPoller, IntervalPoller} from './Poller.js'; import { isSuitableNodeForTextMatching, @@ -33,6 +34,7 @@ const PuppeteerUtil = Object.freeze({ ...TextQuerySelector, ...XPathQuerySelector, ...PierceQuerySelector, + createFunction, createDeferredPromise, createTextContent, IntervalPoller, diff --git a/packages/puppeteer-core/src/injected/util.ts b/packages/puppeteer-core/src/injected/util.ts index 156f6716..d41b4eec 100644 --- a/packages/puppeteer-core/src/injected/util.ts +++ b/packages/puppeteer-core/src/injected/util.ts @@ -1,40 +1,3 @@ -/** - * Copyright 2022 Google Inc. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -const createdFunctions = new Map unknown>(); - -/** - * Creates a function from a string. - * - * @internal - */ -export const createFunction = ( - functionValue: string -): ((...args: unknown[]) => unknown) => { - let fn = createdFunctions.get(functionValue); - if (fn) { - return fn; - } - fn = new Function(`return ${functionValue}`)() as ( - ...args: unknown[] - ) => unknown; - createdFunctions.set(functionValue, fn); - return fn; -}; - const HIDDEN_VISIBILITY_VALUES = ['hidden', 'collapse']; /** diff --git a/packages/puppeteer-core/src/puppeteer-core.ts b/packages/puppeteer-core/src/puppeteer-core.ts index c6cc6d34..1baaef13 100644 --- a/packages/puppeteer-core/src/puppeteer-core.ts +++ b/packages/puppeteer-core/src/puppeteer-core.ts @@ -25,7 +25,7 @@ export * from './util/util.js'; /** * @deprecated Use the query handler API defined on {@link Puppeteer} */ -export * from './common/QueryHandler.js'; +export * from './common/CustomQueryHandler.js'; import {PuppeteerNode} from './node/PuppeteerNode.js'; diff --git a/packages/puppeteer-core/src/util/Function.ts b/packages/puppeteer-core/src/util/Function.ts new file mode 100644 index 00000000..dc33bf4d --- /dev/null +++ b/packages/puppeteer-core/src/util/Function.ts @@ -0,0 +1,35 @@ +/** + * Copyright 2023 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +const createdFunctions = new Map unknown>(); + +/** + * Creates a function from a string. + * + * @internal + */ +export const createFunction = ( + functionValue: string +): ((...args: unknown[]) => unknown) => { + let fn = createdFunctions.get(functionValue); + if (fn) { + return fn; + } + fn = new Function(`return ${functionValue}`)() as ( + ...args: unknown[] + ) => unknown; + createdFunctions.set(functionValue, fn); + return fn; +}; diff --git a/test/src/queryselector.spec.ts b/test/src/queryselector.spec.ts index a6a08d40..8120ac28 100644 --- a/test/src/queryselector.spec.ts +++ b/test/src/queryselector.spec.ts @@ -15,7 +15,7 @@ */ import expect from 'expect'; import {Puppeteer} from 'puppeteer'; -import {CustomQueryHandler} from 'puppeteer-core/internal/common/QueryHandler.js'; +import type {CustomQueryHandler} from 'puppeteer-core/internal/common/CustomQueryHandler.js'; import { getTestState, setupTestBrowserHooks,