feat: add text query handler (#8956)

This commit is contained in:
jrandolf 2022-09-15 13:12:13 +02:00 committed by GitHub
parent 42cd6d04d6
commit 633e7cfdf9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 377 additions and 34 deletions

View File

@ -518,7 +518,8 @@ export class IsolatedWorld {
} }
const node = (await PuppeteerUtil.createFunction(query)( const node = (await PuppeteerUtil.createFunction(query)(
root || document, root || document,
selector selector,
PuppeteerUtil
)) as Node | null; )) as Node | null;
return PuppeteerUtil.checkVisibility(node, visible); return PuppeteerUtil.checkVisibility(node, visible);
}, },

View File

@ -14,6 +14,7 @@
* limitations under the License. * limitations under the License.
*/ */
import PuppeteerUtil from '../injected/injected.js';
import {ariaHandler} from './AriaQueryHandler.js'; import {ariaHandler} from './AriaQueryHandler.js';
import {ElementHandle} from './ElementHandle.js'; import {ElementHandle} from './ElementHandle.js';
import {Frame} from './Frame.js'; import {Frame} from './Frame.js';
@ -37,6 +38,28 @@ export interface CustomQueryHandler {
queryAll?: (node: Node, selector: string) => Node[]; queryAll?: (node: Node, selector: string) => Node[];
} }
/**
* @internal
*/
export interface InternalQueryHandler {
/**
* @returns A {@link Node} matching the given `selector` from {@link node}.
*/
queryOne?: (
node: Node,
selector: string,
PuppeteerUtil: PuppeteerUtil
) => Node | null;
/**
* @returns Some {@link Node}s matching the given `selector` from {@link node}.
*/
queryAll?: (
node: Node,
selector: string,
PuppeteerUtil: PuppeteerUtil
) => Node[];
}
/** /**
* @internal * @internal
*/ */
@ -72,14 +95,18 @@ export interface PuppeteerQueryHandler {
} }
function createPuppeteerQueryHandler( function createPuppeteerQueryHandler(
handler: CustomQueryHandler handler: InternalQueryHandler
): PuppeteerQueryHandler { ): PuppeteerQueryHandler {
const internalHandler: PuppeteerQueryHandler = {}; const internalHandler: PuppeteerQueryHandler = {};
if (handler.queryOne) { if (handler.queryOne) {
const queryOne = handler.queryOne; const queryOne = handler.queryOne;
internalHandler.queryOne = async (element, selector) => { internalHandler.queryOne = async (element, selector) => {
const jsHandle = await element.evaluateHandle(queryOne, selector); const jsHandle = await element.evaluateHandle(
queryOne,
selector,
await element.executionContext()._world!.puppeteerUtil
);
const elementHandle = jsHandle.asElement(); const elementHandle = jsHandle.asElement();
if (elementHandle) { if (elementHandle) {
return elementHandle; return elementHandle;
@ -121,7 +148,11 @@ function createPuppeteerQueryHandler(
if (handler.queryAll) { if (handler.queryAll) {
const queryAll = handler.queryAll; const queryAll = handler.queryAll;
internalHandler.queryAll = async (element, selector) => { internalHandler.queryAll = async (element, selector) => {
const jsHandle = await element.evaluateHandle(queryAll, selector); const jsHandle = await element.evaluateHandle(
queryAll,
selector,
await element.executionContext()._world!.puppeteerUtil
);
const properties = await jsHandle.getProperties(); const properties = await jsHandle.getProperties();
await jsHandle.dispose(); await jsHandle.dispose();
const result = []; const result = [];
@ -244,6 +275,59 @@ const xpathHandler = createPuppeteerQueryHandler({
}, },
}); });
const textQueryHandler = createPuppeteerQueryHandler({
queryOne: (element, selector, {createTextContent}) => {
const search = (root: Node): Node | null => {
for (const node of root.childNodes) {
if (node instanceof Element) {
let matchedNode: Node | null;
if (node.shadowRoot) {
matchedNode = search(node.shadowRoot);
} else {
matchedNode = search(node);
}
if (matchedNode) {
return matchedNode;
}
}
}
const textContent = createTextContent(root);
if (textContent.full.includes(selector)) {
return root;
}
return null;
};
return search(element);
},
queryAll: (element, selector, {createTextContent}) => {
const search = (root: Node): Node[] => {
let results: Node[] = [];
for (const node of root.childNodes) {
if (node instanceof Element) {
let matchedNodes: Node[];
if (node.shadowRoot) {
matchedNodes = search(node.shadowRoot);
} else {
matchedNodes = search(node);
}
results = results.concat(matchedNodes);
}
}
if (results.length > 0) {
return results;
}
const textContent = createTextContent(root);
if (textContent.full.includes(selector)) {
return [root];
}
return [];
};
return search(element);
},
});
interface RegisteredQueryHandler { interface RegisteredQueryHandler {
handler: PuppeteerQueryHandler; handler: PuppeteerQueryHandler;
transformSelector?: (selector: string) => string; transformSelector?: (selector: string) => string;
@ -253,6 +337,7 @@ const INTERNAL_QUERY_HANDLERS = new Map<string, RegisteredQueryHandler>([
['aria', {handler: ariaHandler}], ['aria', {handler: ariaHandler}],
['pierce', {handler: pierceHandler}], ['pierce', {handler: pierceHandler}],
['xpath', {handler: xpathHandler}], ['xpath', {handler: xpathHandler}],
['text', {handler: textQueryHandler}],
]); ]);
const QUERY_HANDLERS = new Map<string, RegisteredQueryHandler>(); const QUERY_HANDLERS = new Map<string, RegisteredQueryHandler>();

View File

@ -0,0 +1,98 @@
interface NonTrivialValueNode extends Node {
value: string;
}
const TRIVIAL_VALUE_INPUT_TYPES = new Set(['checkbox', 'image', 'radio']);
/**
* Determines if the node has a non-trivial value property.
*/
const isNonTrivialValueNode = (node: Node): node is NonTrivialValueNode => {
if (node instanceof HTMLSelectElement) {
return true;
}
if (node instanceof HTMLTextAreaElement) {
return true;
}
if (
node instanceof HTMLInputElement &&
!TRIVIAL_VALUE_INPUT_TYPES.has(node.type)
) {
return true;
}
return false;
};
const UNSUITABLE_NODE_NAMES = new Set(['SCRIPT', 'STYLE']);
/**
* Determines whether a given node is suitable for text matching.
*/
const isSuitableNodeForTextMatching = (node: Node): boolean => {
return (
!UNSUITABLE_NODE_NAMES.has(node.nodeName) && !document.head?.contains(node)
);
};
/**
* @internal
*/
export type TextContent = {
// Contains the full text of the node.
full: string;
// Contains the text immediately beneath the node.
immediate: string[];
};
/**
* Maps {@link Node}s to their computed {@link TextContent}.
*/
const textContentCache = new Map<Node, TextContent>();
/**
* Builds the text content of a node using some custom logic.
*
* @remarks
* The primary reason this function exists is due to {@link ShadowRoot}s not having
* text content.
*
* @internal
*/
export const createTextContent = (root: Node): TextContent => {
let value = textContentCache.get(root);
if (value) {
return value;
}
value = {full: '', immediate: []};
if (!isSuitableNodeForTextMatching(root)) {
return value;
}
let currentImmediate = '';
if (isNonTrivialValueNode(root)) {
value.full = root.value;
value.immediate.push(root.value);
} else {
for (let child = root.firstChild; child; child = child.nextSibling) {
if (child.nodeType === Node.TEXT_NODE) {
value.full += child.nodeValue ?? '';
currentImmediate += child.nodeValue ?? '';
continue;
}
if (currentImmediate) {
value.immediate.push(currentImmediate);
}
currentImmediate = '';
if (child.nodeType === Node.ELEMENT_NODE) {
value.full += createTextContent(child).full;
}
}
if (currentImmediate) {
value.immediate.push(currentImmediate);
}
if (root instanceof Element && root.shadowRoot) {
value.full += createTextContent(root.shadowRoot).full;
}
}
textContentCache.set(root, value);
return value;
};

View File

@ -1,10 +1,12 @@
import {createDeferredPromise} from '../util/DeferredPromise.js'; import {createDeferredPromise} from '../util/DeferredPromise.js';
import * as util from './util.js'; import * as util from './util.js';
import * as Poller from './Poller.js'; import * as Poller from './Poller.js';
import * as TextContent from './TextContent.js';
const PuppeteerUtil = Object.freeze({ const PuppeteerUtil = Object.freeze({
...util, ...util,
...Poller, ...Poller,
...TextContent,
createDeferredPromise, createDeferredPromise,
}); });

View File

@ -545,39 +545,69 @@ describe('Page', function () {
it('should work', async () => { it('should work', async () => {
const {page} = getTestState(); const {page} = getTestState();
// Instantiate an object // Create a custom class
await page.evaluate(() => { const classHandle = await page.evaluateHandle(() => {
return ((globalThis as any).set = new Set(['hello', 'world'])); return class CustomClass {};
}); });
const prototypeHandle = await page.evaluateHandle(() => {
return Set.prototype;
});
const objectsHandle = await page.queryObjects(prototypeHandle);
const count = await page.evaluate(objects => {
return objects.length;
}, objectsHandle);
expect(count).toBe(1);
const values = await page.evaluate(objects => {
return Array.from(objects[0]!.values());
}, objectsHandle);
expect(values).toEqual(['hello', 'world']);
});
it('should work for non-blank page', async () => {
const {page, server} = getTestState();
// Instantiate an object // Create an instance.
await page.goto(server.EMPTY_PAGE); await page.evaluate(CustomClass => {
await page.evaluate(() => { // @ts-expect-error: Different context.
return ((globalThis as any).set = new Set(['hello', 'world'])); self.customClass = new CustomClass();
}); }, classHandle);
const prototypeHandle = await page.evaluateHandle(() => {
return Set.prototype; // Validate only one has been added.
}); const prototypeHandle = await page.evaluateHandle(CustomClass => {
return CustomClass.prototype;
}, classHandle);
const objectsHandle = await page.queryObjects(prototypeHandle); const objectsHandle = await page.queryObjects(prototypeHandle);
const count = await page.evaluate(objects => { await expect(
return objects.length; page.evaluate(objects => {
}, objectsHandle); return objects.length;
expect(count).toBe(1); }, objectsHandle)
).resolves.toBe(1);
// Check that instances.
await expect(
page.evaluate(objects => {
// @ts-expect-error: Different context.
return objects[0] === self.customClass;
}, objectsHandle)
).resolves.toBeTruthy();
});
it('should work for non-trivial page', async () => {
const {page, server} = getTestState();
await page.goto(server.EMPTY_PAGE);
// Create a custom class
const classHandle = await page.evaluateHandle(() => {
return class CustomClass {};
});
// Create an instance.
await page.evaluate(CustomClass => {
// @ts-expect-error: Different context.
self.customClass = new CustomClass();
}, classHandle);
// Validate only one has been added.
const prototypeHandle = await page.evaluateHandle(CustomClass => {
return CustomClass.prototype;
}, classHandle);
const objectsHandle = await page.queryObjects(prototypeHandle);
await expect(
page.evaluate(objects => {
return objects.length;
}, objectsHandle)
).resolves.toBe(1);
// Check that instances.
await expect(
page.evaluate(objects => {
// @ts-expect-error: Different context.
return objects[0] === self.customClass;
}, objectsHandle)
).resolves.toBeTruthy();
}); });
it('should fail for disposed handles', async () => { it('should fail for disposed handles', async () => {
const {page} = getTestState(); const {page} = getTestState();

View File

@ -94,6 +94,133 @@ describe('Query handler tests', function () {
}); });
}); });
describe('Text selectors', function () {
describe('in Page', function () {
it('should query existing element', async () => {
const {page} = getTestState();
await page.setContent('<section>test</section>');
expect(await page.$('text/test')).toBeTruthy();
expect((await page.$$('text/test')).length).toBe(1);
});
it('should return empty array for non-existing element', async () => {
const {page} = getTestState();
expect(await page.$('text/test')).toBeFalsy();
expect((await page.$$('text/test')).length).toBe(0);
});
it('should return first element', async () => {
const {page} = getTestState();
await page.setContent('<div id="1">a</div><div>a</div>');
const element = await page.$('text/a');
expect(
await element?.evaluate(e => {
return e.id;
})
).toBe('1');
});
it('should return multiple elements', async () => {
const {page} = getTestState();
await page.setContent('<div>a</div><div>a</div>');
const elements = await page.$$('text/a');
expect(elements.length).toBe(2);
});
it('should pierce shadow DOM', async () => {
const {page} = getTestState();
await page.evaluate(() => {
const div = document.createElement('div');
const shadow = div.attachShadow({mode: 'open'});
const diva = document.createElement('div');
shadow.append(diva);
const divb = document.createElement('div');
shadow.append(divb);
diva.innerHTML = 'a';
divb.innerHTML = 'b';
document.body.append(div);
});
const element = await page.$('text/a');
expect(
await element?.evaluate(e => {
return e.textContent;
})
).toBe('a');
});
it('should query deeply nested text', async () => {
const {page} = getTestState();
await page.setContent('<div><div>a</div><div>b</div></div>');
const element = await page.$('text/a');
expect(
await element?.evaluate(e => {
return e.textContent;
})
).toBe('a');
});
it('should query inputs', async () => {
const {page} = getTestState();
await page.setContent('<input value="a">');
const element = (await page.$(
'text/a'
)) as ElementHandle<HTMLInputElement>;
expect(
await element?.evaluate(e => {
return e.value;
})
).toBe('a');
});
it('should not query radio', async () => {
const {page} = getTestState();
await page.setContent('<radio value="a">');
expect(await page.$('text/a')).toBeNull();
});
it('should query text spanning multiple elements', async () => {
const {page} = getTestState();
await page.setContent('<div><span>a</span> <span>b</span><div>');
const element = await page.$('text/a b');
expect(
await element?.evaluate(e => {
return e.textContent;
})
).toBe('a b');
});
});
describe('in ElementHandles', function () {
it('should query existing element', async () => {
const {page} = getTestState();
await page.setContent('<div class="a"><span>a</span></div>');
const elementHandle = (await page.$('div'))!;
expect(await elementHandle.$(`text/a`)).toBeTruthy();
expect((await elementHandle.$$(`text/a`)).length).toBe(1);
});
it('should return null for non-existing element', async () => {
const {page} = getTestState();
await page.setContent('<div class="a"></div>');
const elementHandle = (await page.$('div'))!;
expect(await elementHandle.$(`text/a`)).toBeFalsy();
expect((await elementHandle.$$(`text/a`)).length).toBe(0);
});
});
});
describe('XPath selectors', function () { describe('XPath selectors', function () {
describe('in Page', function () { describe('in Page', function () {
it('should query existing element', async () => { it('should query existing element', async () => {