From 633e7cfdf99d42f420d0af381394bd1f6ac7bcd1 Mon Sep 17 00:00:00 2001 From: jrandolf <101637635+jrandolf@users.noreply.github.com> Date: Thu, 15 Sep 2022 13:12:13 +0200 Subject: [PATCH] feat: add text query handler (#8956) --- src/common/IsolatedWorld.ts | 3 +- src/common/QueryHandler.ts | 91 +++++++++++++++++++++++- src/injected/TextContent.ts | 98 ++++++++++++++++++++++++++ src/injected/injected.ts | 2 + test/src/page.spec.ts | 90 ++++++++++++++++-------- test/src/queryhandler.spec.ts | 127 ++++++++++++++++++++++++++++++++++ 6 files changed, 377 insertions(+), 34 deletions(-) create mode 100644 src/injected/TextContent.ts diff --git a/src/common/IsolatedWorld.ts b/src/common/IsolatedWorld.ts index 38a40843bcd..04dba2e82ee 100644 --- a/src/common/IsolatedWorld.ts +++ b/src/common/IsolatedWorld.ts @@ -518,7 +518,8 @@ export class IsolatedWorld { } const node = (await PuppeteerUtil.createFunction(query)( root || document, - selector + selector, + PuppeteerUtil )) as Node | null; return PuppeteerUtil.checkVisibility(node, visible); }, diff --git a/src/common/QueryHandler.ts b/src/common/QueryHandler.ts index f89fec91029..7f29ae31c9e 100644 --- a/src/common/QueryHandler.ts +++ b/src/common/QueryHandler.ts @@ -14,6 +14,7 @@ * limitations under the License. */ +import PuppeteerUtil from '../injected/injected.js'; import {ariaHandler} from './AriaQueryHandler.js'; import {ElementHandle} from './ElementHandle.js'; import {Frame} from './Frame.js'; @@ -37,6 +38,28 @@ export interface CustomQueryHandler { queryAll?: (node: Node, selector: string) => Node[]; } +/** + * @internal + */ +export interface InternalQueryHandler { + /** + * @returns A {@link Node} matching the given `selector` from {@link node}. + */ + queryOne?: ( + node: Node, + selector: string, + PuppeteerUtil: PuppeteerUtil + ) => Node | null; + /** + * @returns Some {@link Node}s matching the given `selector` from {@link node}. + */ + queryAll?: ( + node: Node, + selector: string, + PuppeteerUtil: PuppeteerUtil + ) => Node[]; +} + /** * @internal */ @@ -72,14 +95,18 @@ export interface PuppeteerQueryHandler { } function createPuppeteerQueryHandler( - handler: CustomQueryHandler + handler: InternalQueryHandler ): PuppeteerQueryHandler { const internalHandler: PuppeteerQueryHandler = {}; if (handler.queryOne) { const queryOne = handler.queryOne; internalHandler.queryOne = async (element, selector) => { - const jsHandle = await element.evaluateHandle(queryOne, selector); + const jsHandle = await element.evaluateHandle( + queryOne, + selector, + await element.executionContext()._world!.puppeteerUtil + ); const elementHandle = jsHandle.asElement(); if (elementHandle) { return elementHandle; @@ -121,7 +148,11 @@ function createPuppeteerQueryHandler( if (handler.queryAll) { const queryAll = handler.queryAll; internalHandler.queryAll = async (element, selector) => { - const jsHandle = await element.evaluateHandle(queryAll, selector); + const jsHandle = await element.evaluateHandle( + queryAll, + selector, + await element.executionContext()._world!.puppeteerUtil + ); const properties = await jsHandle.getProperties(); await jsHandle.dispose(); const result = []; @@ -244,6 +275,59 @@ const xpathHandler = createPuppeteerQueryHandler({ }, }); +const textQueryHandler = createPuppeteerQueryHandler({ + queryOne: (element, selector, {createTextContent}) => { + const search = (root: Node): Node | null => { + for (const node of root.childNodes) { + if (node instanceof Element) { + let matchedNode: Node | null; + if (node.shadowRoot) { + matchedNode = search(node.shadowRoot); + } else { + matchedNode = search(node); + } + if (matchedNode) { + return matchedNode; + } + } + } + const textContent = createTextContent(root); + if (textContent.full.includes(selector)) { + return root; + } + return null; + }; + return search(element); + }, + + queryAll: (element, selector, {createTextContent}) => { + const search = (root: Node): Node[] => { + let results: Node[] = []; + for (const node of root.childNodes) { + if (node instanceof Element) { + let matchedNodes: Node[]; + if (node.shadowRoot) { + matchedNodes = search(node.shadowRoot); + } else { + matchedNodes = search(node); + } + results = results.concat(matchedNodes); + } + } + if (results.length > 0) { + return results; + } + + const textContent = createTextContent(root); + if (textContent.full.includes(selector)) { + return [root]; + } + return []; + }; + return search(element); + }, +}); + interface RegisteredQueryHandler { handler: PuppeteerQueryHandler; transformSelector?: (selector: string) => string; @@ -253,6 +337,7 @@ const INTERNAL_QUERY_HANDLERS = new Map([ ['aria', {handler: ariaHandler}], ['pierce', {handler: pierceHandler}], ['xpath', {handler: xpathHandler}], + ['text', {handler: textQueryHandler}], ]); const QUERY_HANDLERS = new Map(); diff --git a/src/injected/TextContent.ts b/src/injected/TextContent.ts new file mode 100644 index 00000000000..c4bb2cf10c8 --- /dev/null +++ b/src/injected/TextContent.ts @@ -0,0 +1,98 @@ +interface NonTrivialValueNode extends Node { + value: string; +} + +const TRIVIAL_VALUE_INPUT_TYPES = new Set(['checkbox', 'image', 'radio']); + +/** + * Determines if the node has a non-trivial value property. + */ +const isNonTrivialValueNode = (node: Node): node is NonTrivialValueNode => { + if (node instanceof HTMLSelectElement) { + return true; + } + if (node instanceof HTMLTextAreaElement) { + return true; + } + if ( + node instanceof HTMLInputElement && + !TRIVIAL_VALUE_INPUT_TYPES.has(node.type) + ) { + return true; + } + return false; +}; + +const UNSUITABLE_NODE_NAMES = new Set(['SCRIPT', 'STYLE']); + +/** + * Determines whether a given node is suitable for text matching. + */ +const isSuitableNodeForTextMatching = (node: Node): boolean => { + return ( + !UNSUITABLE_NODE_NAMES.has(node.nodeName) && !document.head?.contains(node) + ); +}; + +/** + * @internal + */ +export type TextContent = { + // Contains the full text of the node. + full: string; + // Contains the text immediately beneath the node. + immediate: string[]; +}; + +/** + * Maps {@link Node}s to their computed {@link TextContent}. + */ +const textContentCache = new Map(); + +/** + * Builds the text content of a node using some custom logic. + * + * @remarks + * The primary reason this function exists is due to {@link ShadowRoot}s not having + * text content. + * + * @internal + */ +export const createTextContent = (root: Node): TextContent => { + let value = textContentCache.get(root); + if (value) { + return value; + } + value = {full: '', immediate: []}; + if (!isSuitableNodeForTextMatching(root)) { + return value; + } + let currentImmediate = ''; + if (isNonTrivialValueNode(root)) { + value.full = root.value; + value.immediate.push(root.value); + } else { + for (let child = root.firstChild; child; child = child.nextSibling) { + if (child.nodeType === Node.TEXT_NODE) { + value.full += child.nodeValue ?? ''; + currentImmediate += child.nodeValue ?? ''; + continue; + } + if (currentImmediate) { + value.immediate.push(currentImmediate); + } + currentImmediate = ''; + if (child.nodeType === Node.ELEMENT_NODE) { + value.full += createTextContent(child).full; + } + } + if (currentImmediate) { + value.immediate.push(currentImmediate); + } + if (root instanceof Element && root.shadowRoot) { + value.full += createTextContent(root.shadowRoot).full; + } + } + textContentCache.set(root, value); + return value; +}; diff --git a/src/injected/injected.ts b/src/injected/injected.ts index 0b11fff763a..f15ec110cda 100644 --- a/src/injected/injected.ts +++ b/src/injected/injected.ts @@ -1,10 +1,12 @@ import {createDeferredPromise} from '../util/DeferredPromise.js'; import * as util from './util.js'; import * as Poller from './Poller.js'; +import * as TextContent from './TextContent.js'; const PuppeteerUtil = Object.freeze({ ...util, ...Poller, + ...TextContent, createDeferredPromise, }); diff --git a/test/src/page.spec.ts b/test/src/page.spec.ts index 0da27ebdf69..11e2d9e34b2 100644 --- a/test/src/page.spec.ts +++ b/test/src/page.spec.ts @@ -545,39 +545,69 @@ describe('Page', function () { it('should work', async () => { const {page} = getTestState(); - // Instantiate an object - await page.evaluate(() => { - return ((globalThis as any).set = new Set(['hello', 'world'])); + // Create a custom class + const classHandle = await page.evaluateHandle(() => { + return class CustomClass {}; }); - const prototypeHandle = await page.evaluateHandle(() => { - return Set.prototype; - }); - const objectsHandle = await page.queryObjects(prototypeHandle); - const count = await page.evaluate(objects => { - return objects.length; - }, objectsHandle); - expect(count).toBe(1); - const values = await page.evaluate(objects => { - return Array.from(objects[0]!.values()); - }, objectsHandle); - expect(values).toEqual(['hello', 'world']); - }); - it('should work for non-blank page', async () => { - const {page, server} = getTestState(); - // Instantiate an object - await page.goto(server.EMPTY_PAGE); - await page.evaluate(() => { - return ((globalThis as any).set = new Set(['hello', 'world'])); - }); - const prototypeHandle = await page.evaluateHandle(() => { - return Set.prototype; - }); + // Create an instance. + await page.evaluate(CustomClass => { + // @ts-expect-error: Different context. + self.customClass = new CustomClass(); + }, classHandle); + + // Validate only one has been added. + const prototypeHandle = await page.evaluateHandle(CustomClass => { + return CustomClass.prototype; + }, classHandle); const objectsHandle = await page.queryObjects(prototypeHandle); - const count = await page.evaluate(objects => { - return objects.length; - }, objectsHandle); - expect(count).toBe(1); + await expect( + page.evaluate(objects => { + return objects.length; + }, objectsHandle) + ).resolves.toBe(1); + + // Check that instances. + await expect( + page.evaluate(objects => { + // @ts-expect-error: Different context. + return objects[0] === self.customClass; + }, objectsHandle) + ).resolves.toBeTruthy(); + }); + it('should work for non-trivial page', async () => { + const {page, server} = getTestState(); + await page.goto(server.EMPTY_PAGE); + + // Create a custom class + const classHandle = await page.evaluateHandle(() => { + return class CustomClass {}; + }); + + // Create an instance. + await page.evaluate(CustomClass => { + // @ts-expect-error: Different context. + self.customClass = new CustomClass(); + }, classHandle); + + // Validate only one has been added. + const prototypeHandle = await page.evaluateHandle(CustomClass => { + return CustomClass.prototype; + }, classHandle); + const objectsHandle = await page.queryObjects(prototypeHandle); + await expect( + page.evaluate(objects => { + return objects.length; + }, objectsHandle) + ).resolves.toBe(1); + + // Check that instances. + await expect( + page.evaluate(objects => { + // @ts-expect-error: Different context. + return objects[0] === self.customClass; + }, objectsHandle) + ).resolves.toBeTruthy(); }); it('should fail for disposed handles', async () => { const {page} = getTestState(); diff --git a/test/src/queryhandler.spec.ts b/test/src/queryhandler.spec.ts index 30e5d69254d..f90c6dcf9ad 100644 --- a/test/src/queryhandler.spec.ts +++ b/test/src/queryhandler.spec.ts @@ -94,6 +94,133 @@ describe('Query handler tests', function () { }); }); + describe('Text selectors', function () { + describe('in Page', function () { + it('should query existing element', async () => { + const {page} = getTestState(); + + await page.setContent('
test
'); + + expect(await page.$('text/test')).toBeTruthy(); + expect((await page.$$('text/test')).length).toBe(1); + }); + it('should return empty array for non-existing element', async () => { + const {page} = getTestState(); + + expect(await page.$('text/test')).toBeFalsy(); + expect((await page.$$('text/test')).length).toBe(0); + }); + it('should return first element', async () => { + const {page} = getTestState(); + + await page.setContent('
a
a
'); + + const element = await page.$('text/a'); + expect( + await element?.evaluate(e => { + return e.id; + }) + ).toBe('1'); + }); + it('should return multiple elements', async () => { + const {page} = getTestState(); + + await page.setContent('
a
a
'); + + const elements = await page.$$('text/a'); + expect(elements.length).toBe(2); + }); + it('should pierce shadow DOM', async () => { + const {page} = getTestState(); + + await page.evaluate(() => { + const div = document.createElement('div'); + const shadow = div.attachShadow({mode: 'open'}); + const diva = document.createElement('div'); + shadow.append(diva); + const divb = document.createElement('div'); + shadow.append(divb); + diva.innerHTML = 'a'; + divb.innerHTML = 'b'; + document.body.append(div); + }); + + const element = await page.$('text/a'); + expect( + await element?.evaluate(e => { + return e.textContent; + }) + ).toBe('a'); + }); + it('should query deeply nested text', async () => { + const {page} = getTestState(); + + await page.setContent('
a
b
'); + + const element = await page.$('text/a'); + expect( + await element?.evaluate(e => { + return e.textContent; + }) + ).toBe('a'); + }); + it('should query inputs', async () => { + const {page} = getTestState(); + + await page.setContent(''); + + const element = (await page.$( + 'text/a' + )) as ElementHandle; + expect( + await element?.evaluate(e => { + return e.value; + }) + ).toBe('a'); + }); + it('should not query radio', async () => { + const {page} = getTestState(); + + await page.setContent(''); + + expect(await page.$('text/a')).toBeNull(); + }); + it('should query text spanning multiple elements', async () => { + const {page} = getTestState(); + + await page.setContent('
a b
'); + + const element = await page.$('text/a b'); + expect( + await element?.evaluate(e => { + return e.textContent; + }) + ).toBe('a b'); + }); + }); + describe('in ElementHandles', function () { + it('should query existing element', async () => { + const {page} = getTestState(); + + await page.setContent('
a
'); + + const elementHandle = (await page.$('div'))!; + expect(await elementHandle.$(`text/a`)).toBeTruthy(); + expect((await elementHandle.$$(`text/a`)).length).toBe(1); + }); + + it('should return null for non-existing element', async () => { + const {page} = getTestState(); + + await page.setContent('
'); + + const elementHandle = (await page.$('div'))!; + expect(await elementHandle.$(`text/a`)).toBeFalsy(); + expect((await elementHandle.$$(`text/a`)).length).toBe(0); + }); + }); + }); + describe('XPath selectors', function () { describe('in Page', function () { it('should query existing element', async () => {