mirror of
https://github.com/puppeteer/puppeteer
synced 2024-06-14 14:02:48 +00:00
feat: add text query handler (#8956)
This commit is contained in:
parent
42cd6d04d6
commit
633e7cfdf9
@ -518,7 +518,8 @@ export class IsolatedWorld {
|
||||
}
|
||||
const node = (await PuppeteerUtil.createFunction(query)(
|
||||
root || document,
|
||||
selector
|
||||
selector,
|
||||
PuppeteerUtil
|
||||
)) as Node | null;
|
||||
return PuppeteerUtil.checkVisibility(node, visible);
|
||||
},
|
||||
|
@ -14,6 +14,7 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import PuppeteerUtil from '../injected/injected.js';
|
||||
import {ariaHandler} from './AriaQueryHandler.js';
|
||||
import {ElementHandle} from './ElementHandle.js';
|
||||
import {Frame} from './Frame.js';
|
||||
@ -37,6 +38,28 @@ export interface CustomQueryHandler {
|
||||
queryAll?: (node: Node, selector: string) => Node[];
|
||||
}
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
export interface InternalQueryHandler {
|
||||
/**
|
||||
* @returns A {@link Node} matching the given `selector` from {@link node}.
|
||||
*/
|
||||
queryOne?: (
|
||||
node: Node,
|
||||
selector: string,
|
||||
PuppeteerUtil: PuppeteerUtil
|
||||
) => Node | null;
|
||||
/**
|
||||
* @returns Some {@link Node}s matching the given `selector` from {@link node}.
|
||||
*/
|
||||
queryAll?: (
|
||||
node: Node,
|
||||
selector: string,
|
||||
PuppeteerUtil: PuppeteerUtil
|
||||
) => Node[];
|
||||
}
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
@ -72,14 +95,18 @@ export interface PuppeteerQueryHandler {
|
||||
}
|
||||
|
||||
function createPuppeteerQueryHandler(
|
||||
handler: CustomQueryHandler
|
||||
handler: InternalQueryHandler
|
||||
): PuppeteerQueryHandler {
|
||||
const internalHandler: PuppeteerQueryHandler = {};
|
||||
|
||||
if (handler.queryOne) {
|
||||
const queryOne = handler.queryOne;
|
||||
internalHandler.queryOne = async (element, selector) => {
|
||||
const jsHandle = await element.evaluateHandle(queryOne, selector);
|
||||
const jsHandle = await element.evaluateHandle(
|
||||
queryOne,
|
||||
selector,
|
||||
await element.executionContext()._world!.puppeteerUtil
|
||||
);
|
||||
const elementHandle = jsHandle.asElement();
|
||||
if (elementHandle) {
|
||||
return elementHandle;
|
||||
@ -121,7 +148,11 @@ function createPuppeteerQueryHandler(
|
||||
if (handler.queryAll) {
|
||||
const queryAll = handler.queryAll;
|
||||
internalHandler.queryAll = async (element, selector) => {
|
||||
const jsHandle = await element.evaluateHandle(queryAll, selector);
|
||||
const jsHandle = await element.evaluateHandle(
|
||||
queryAll,
|
||||
selector,
|
||||
await element.executionContext()._world!.puppeteerUtil
|
||||
);
|
||||
const properties = await jsHandle.getProperties();
|
||||
await jsHandle.dispose();
|
||||
const result = [];
|
||||
@ -244,6 +275,59 @@ const xpathHandler = createPuppeteerQueryHandler({
|
||||
},
|
||||
});
|
||||
|
||||
const textQueryHandler = createPuppeteerQueryHandler({
|
||||
queryOne: (element, selector, {createTextContent}) => {
|
||||
const search = (root: Node): Node | null => {
|
||||
for (const node of root.childNodes) {
|
||||
if (node instanceof Element) {
|
||||
let matchedNode: Node | null;
|
||||
if (node.shadowRoot) {
|
||||
matchedNode = search(node.shadowRoot);
|
||||
} else {
|
||||
matchedNode = search(node);
|
||||
}
|
||||
if (matchedNode) {
|
||||
return matchedNode;
|
||||
}
|
||||
}
|
||||
}
|
||||
const textContent = createTextContent(root);
|
||||
if (textContent.full.includes(selector)) {
|
||||
return root;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
return search(element);
|
||||
},
|
||||
|
||||
queryAll: (element, selector, {createTextContent}) => {
|
||||
const search = (root: Node): Node[] => {
|
||||
let results: Node[] = [];
|
||||
for (const node of root.childNodes) {
|
||||
if (node instanceof Element) {
|
||||
let matchedNodes: Node[];
|
||||
if (node.shadowRoot) {
|
||||
matchedNodes = search(node.shadowRoot);
|
||||
} else {
|
||||
matchedNodes = search(node);
|
||||
}
|
||||
results = results.concat(matchedNodes);
|
||||
}
|
||||
}
|
||||
if (results.length > 0) {
|
||||
return results;
|
||||
}
|
||||
|
||||
const textContent = createTextContent(root);
|
||||
if (textContent.full.includes(selector)) {
|
||||
return [root];
|
||||
}
|
||||
return [];
|
||||
};
|
||||
return search(element);
|
||||
},
|
||||
});
|
||||
|
||||
interface RegisteredQueryHandler {
|
||||
handler: PuppeteerQueryHandler;
|
||||
transformSelector?: (selector: string) => string;
|
||||
@ -253,6 +337,7 @@ const INTERNAL_QUERY_HANDLERS = new Map<string, RegisteredQueryHandler>([
|
||||
['aria', {handler: ariaHandler}],
|
||||
['pierce', {handler: pierceHandler}],
|
||||
['xpath', {handler: xpathHandler}],
|
||||
['text', {handler: textQueryHandler}],
|
||||
]);
|
||||
const QUERY_HANDLERS = new Map<string, RegisteredQueryHandler>();
|
||||
|
||||
|
98
src/injected/TextContent.ts
Normal file
98
src/injected/TextContent.ts
Normal file
@ -0,0 +1,98 @@
|
||||
interface NonTrivialValueNode extends Node {
|
||||
value: string;
|
||||
}
|
||||
|
||||
const TRIVIAL_VALUE_INPUT_TYPES = new Set(['checkbox', 'image', 'radio']);
|
||||
|
||||
/**
|
||||
* Determines if the node has a non-trivial value property.
|
||||
*/
|
||||
const isNonTrivialValueNode = (node: Node): node is NonTrivialValueNode => {
|
||||
if (node instanceof HTMLSelectElement) {
|
||||
return true;
|
||||
}
|
||||
if (node instanceof HTMLTextAreaElement) {
|
||||
return true;
|
||||
}
|
||||
if (
|
||||
node instanceof HTMLInputElement &&
|
||||
!TRIVIAL_VALUE_INPUT_TYPES.has(node.type)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
const UNSUITABLE_NODE_NAMES = new Set(['SCRIPT', 'STYLE']);
|
||||
|
||||
/**
|
||||
* Determines whether a given node is suitable for text matching.
|
||||
*/
|
||||
const isSuitableNodeForTextMatching = (node: Node): boolean => {
|
||||
return (
|
||||
!UNSUITABLE_NODE_NAMES.has(node.nodeName) && !document.head?.contains(node)
|
||||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
export type TextContent = {
|
||||
// Contains the full text of the node.
|
||||
full: string;
|
||||
// Contains the text immediately beneath the node.
|
||||
immediate: string[];
|
||||
};
|
||||
|
||||
/**
|
||||
* Maps {@link Node}s to their computed {@link TextContent}.
|
||||
*/
|
||||
const textContentCache = new Map<Node, TextContent>();
|
||||
|
||||
/**
|
||||
* Builds the text content of a node using some custom logic.
|
||||
*
|
||||
* @remarks
|
||||
* The primary reason this function exists is due to {@link ShadowRoot}s not having
|
||||
* text content.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
export const createTextContent = (root: Node): TextContent => {
|
||||
let value = textContentCache.get(root);
|
||||
if (value) {
|
||||
return value;
|
||||
}
|
||||
value = {full: '', immediate: []};
|
||||
if (!isSuitableNodeForTextMatching(root)) {
|
||||
return value;
|
||||
}
|
||||
let currentImmediate = '';
|
||||
if (isNonTrivialValueNode(root)) {
|
||||
value.full = root.value;
|
||||
value.immediate.push(root.value);
|
||||
} else {
|
||||
for (let child = root.firstChild; child; child = child.nextSibling) {
|
||||
if (child.nodeType === Node.TEXT_NODE) {
|
||||
value.full += child.nodeValue ?? '';
|
||||
currentImmediate += child.nodeValue ?? '';
|
||||
continue;
|
||||
}
|
||||
if (currentImmediate) {
|
||||
value.immediate.push(currentImmediate);
|
||||
}
|
||||
currentImmediate = '';
|
||||
if (child.nodeType === Node.ELEMENT_NODE) {
|
||||
value.full += createTextContent(child).full;
|
||||
}
|
||||
}
|
||||
if (currentImmediate) {
|
||||
value.immediate.push(currentImmediate);
|
||||
}
|
||||
if (root instanceof Element && root.shadowRoot) {
|
||||
value.full += createTextContent(root.shadowRoot).full;
|
||||
}
|
||||
}
|
||||
textContentCache.set(root, value);
|
||||
return value;
|
||||
};
|
@ -1,10 +1,12 @@
|
||||
import {createDeferredPromise} from '../util/DeferredPromise.js';
|
||||
import * as util from './util.js';
|
||||
import * as Poller from './Poller.js';
|
||||
import * as TextContent from './TextContent.js';
|
||||
|
||||
const PuppeteerUtil = Object.freeze({
|
||||
...util,
|
||||
...Poller,
|
||||
...TextContent,
|
||||
createDeferredPromise,
|
||||
});
|
||||
|
||||
|
@ -545,39 +545,69 @@ describe('Page', function () {
|
||||
it('should work', async () => {
|
||||
const {page} = getTestState();
|
||||
|
||||
// Instantiate an object
|
||||
await page.evaluate(() => {
|
||||
return ((globalThis as any).set = new Set(['hello', 'world']));
|
||||
// Create a custom class
|
||||
const classHandle = await page.evaluateHandle(() => {
|
||||
return class CustomClass {};
|
||||
});
|
||||
const prototypeHandle = await page.evaluateHandle(() => {
|
||||
return Set.prototype;
|
||||
});
|
||||
const objectsHandle = await page.queryObjects(prototypeHandle);
|
||||
const count = await page.evaluate(objects => {
|
||||
return objects.length;
|
||||
}, objectsHandle);
|
||||
expect(count).toBe(1);
|
||||
const values = await page.evaluate(objects => {
|
||||
return Array.from(objects[0]!.values());
|
||||
}, objectsHandle);
|
||||
expect(values).toEqual(['hello', 'world']);
|
||||
});
|
||||
it('should work for non-blank page', async () => {
|
||||
const {page, server} = getTestState();
|
||||
|
||||
// Instantiate an object
|
||||
await page.goto(server.EMPTY_PAGE);
|
||||
await page.evaluate(() => {
|
||||
return ((globalThis as any).set = new Set(['hello', 'world']));
|
||||
});
|
||||
const prototypeHandle = await page.evaluateHandle(() => {
|
||||
return Set.prototype;
|
||||
});
|
||||
// Create an instance.
|
||||
await page.evaluate(CustomClass => {
|
||||
// @ts-expect-error: Different context.
|
||||
self.customClass = new CustomClass();
|
||||
}, classHandle);
|
||||
|
||||
// Validate only one has been added.
|
||||
const prototypeHandle = await page.evaluateHandle(CustomClass => {
|
||||
return CustomClass.prototype;
|
||||
}, classHandle);
|
||||
const objectsHandle = await page.queryObjects(prototypeHandle);
|
||||
const count = await page.evaluate(objects => {
|
||||
return objects.length;
|
||||
}, objectsHandle);
|
||||
expect(count).toBe(1);
|
||||
await expect(
|
||||
page.evaluate(objects => {
|
||||
return objects.length;
|
||||
}, objectsHandle)
|
||||
).resolves.toBe(1);
|
||||
|
||||
// Check that instances.
|
||||
await expect(
|
||||
page.evaluate(objects => {
|
||||
// @ts-expect-error: Different context.
|
||||
return objects[0] === self.customClass;
|
||||
}, objectsHandle)
|
||||
).resolves.toBeTruthy();
|
||||
});
|
||||
it('should work for non-trivial page', async () => {
|
||||
const {page, server} = getTestState();
|
||||
await page.goto(server.EMPTY_PAGE);
|
||||
|
||||
// Create a custom class
|
||||
const classHandle = await page.evaluateHandle(() => {
|
||||
return class CustomClass {};
|
||||
});
|
||||
|
||||
// Create an instance.
|
||||
await page.evaluate(CustomClass => {
|
||||
// @ts-expect-error: Different context.
|
||||
self.customClass = new CustomClass();
|
||||
}, classHandle);
|
||||
|
||||
// Validate only one has been added.
|
||||
const prototypeHandle = await page.evaluateHandle(CustomClass => {
|
||||
return CustomClass.prototype;
|
||||
}, classHandle);
|
||||
const objectsHandle = await page.queryObjects(prototypeHandle);
|
||||
await expect(
|
||||
page.evaluate(objects => {
|
||||
return objects.length;
|
||||
}, objectsHandle)
|
||||
).resolves.toBe(1);
|
||||
|
||||
// Check that instances.
|
||||
await expect(
|
||||
page.evaluate(objects => {
|
||||
// @ts-expect-error: Different context.
|
||||
return objects[0] === self.customClass;
|
||||
}, objectsHandle)
|
||||
).resolves.toBeTruthy();
|
||||
});
|
||||
it('should fail for disposed handles', async () => {
|
||||
const {page} = getTestState();
|
||||
|
@ -94,6 +94,133 @@ describe('Query handler tests', function () {
|
||||
});
|
||||
});
|
||||
|
||||
describe('Text selectors', function () {
|
||||
describe('in Page', function () {
|
||||
it('should query existing element', async () => {
|
||||
const {page} = getTestState();
|
||||
|
||||
await page.setContent('<section>test</section>');
|
||||
|
||||
expect(await page.$('text/test')).toBeTruthy();
|
||||
expect((await page.$$('text/test')).length).toBe(1);
|
||||
});
|
||||
it('should return empty array for non-existing element', async () => {
|
||||
const {page} = getTestState();
|
||||
|
||||
expect(await page.$('text/test')).toBeFalsy();
|
||||
expect((await page.$$('text/test')).length).toBe(0);
|
||||
});
|
||||
it('should return first element', async () => {
|
||||
const {page} = getTestState();
|
||||
|
||||
await page.setContent('<div id="1">a</div><div>a</div>');
|
||||
|
||||
const element = await page.$('text/a');
|
||||
expect(
|
||||
await element?.evaluate(e => {
|
||||
return e.id;
|
||||
})
|
||||
).toBe('1');
|
||||
});
|
||||
it('should return multiple elements', async () => {
|
||||
const {page} = getTestState();
|
||||
|
||||
await page.setContent('<div>a</div><div>a</div>');
|
||||
|
||||
const elements = await page.$$('text/a');
|
||||
expect(elements.length).toBe(2);
|
||||
});
|
||||
it('should pierce shadow DOM', async () => {
|
||||
const {page} = getTestState();
|
||||
|
||||
await page.evaluate(() => {
|
||||
const div = document.createElement('div');
|
||||
const shadow = div.attachShadow({mode: 'open'});
|
||||
const diva = document.createElement('div');
|
||||
shadow.append(diva);
|
||||
const divb = document.createElement('div');
|
||||
shadow.append(divb);
|
||||
diva.innerHTML = 'a';
|
||||
divb.innerHTML = 'b';
|
||||
document.body.append(div);
|
||||
});
|
||||
|
||||
const element = await page.$('text/a');
|
||||
expect(
|
||||
await element?.evaluate(e => {
|
||||
return e.textContent;
|
||||
})
|
||||
).toBe('a');
|
||||
});
|
||||
it('should query deeply nested text', async () => {
|
||||
const {page} = getTestState();
|
||||
|
||||
await page.setContent('<div><div>a</div><div>b</div></div>');
|
||||
|
||||
const element = await page.$('text/a');
|
||||
expect(
|
||||
await element?.evaluate(e => {
|
||||
return e.textContent;
|
||||
})
|
||||
).toBe('a');
|
||||
});
|
||||
it('should query inputs', async () => {
|
||||
const {page} = getTestState();
|
||||
|
||||
await page.setContent('<input value="a">');
|
||||
|
||||
const element = (await page.$(
|
||||
'text/a'
|
||||
)) as ElementHandle<HTMLInputElement>;
|
||||
expect(
|
||||
await element?.evaluate(e => {
|
||||
return e.value;
|
||||
})
|
||||
).toBe('a');
|
||||
});
|
||||
it('should not query radio', async () => {
|
||||
const {page} = getTestState();
|
||||
|
||||
await page.setContent('<radio value="a">');
|
||||
|
||||
expect(await page.$('text/a')).toBeNull();
|
||||
});
|
||||
it('should query text spanning multiple elements', async () => {
|
||||
const {page} = getTestState();
|
||||
|
||||
await page.setContent('<div><span>a</span> <span>b</span><div>');
|
||||
|
||||
const element = await page.$('text/a b');
|
||||
expect(
|
||||
await element?.evaluate(e => {
|
||||
return e.textContent;
|
||||
})
|
||||
).toBe('a b');
|
||||
});
|
||||
});
|
||||
describe('in ElementHandles', function () {
|
||||
it('should query existing element', async () => {
|
||||
const {page} = getTestState();
|
||||
|
||||
await page.setContent('<div class="a"><span>a</span></div>');
|
||||
|
||||
const elementHandle = (await page.$('div'))!;
|
||||
expect(await elementHandle.$(`text/a`)).toBeTruthy();
|
||||
expect((await elementHandle.$$(`text/a`)).length).toBe(1);
|
||||
});
|
||||
|
||||
it('should return null for non-existing element', async () => {
|
||||
const {page} = getTestState();
|
||||
|
||||
await page.setContent('<div class="a"></div>');
|
||||
|
||||
const elementHandle = (await page.$('div'))!;
|
||||
expect(await elementHandle.$(`text/a`)).toBeFalsy();
|
||||
expect((await elementHandle.$$(`text/a`)).length).toBe(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('XPath selectors', function () {
|
||||
describe('in Page', function () {
|
||||
it('should query existing element', async () => {
|
||||
|
Loading…
Reference in New Issue
Block a user