From 2a2af7134f267cdde8665c17c4b34277dc92ae70 Mon Sep 17 00:00:00 2001 From: jrandolf <101637635+jrandolf@users.noreply.github.com> Date: Thu, 15 Sep 2022 18:48:55 +0200 Subject: [PATCH] chore: incrementally erase cache on text change (#8961) --- src/common/QueryHandler.ts | 51 +------- src/injected/CSSSelector.ts | 198 ++++++++++++++++++++++++++++++ src/injected/PathPart.ts | 15 +++ src/injected/Poller.ts | 1 - src/injected/TextContent.ts | 43 ++++++- src/injected/TextSelector.ts | 98 +++++++++++++++ src/injected/XPath.ts | 128 +++++++++++++++++++ src/injected/injected.ts | 2 + test/src/computeselectors.spec.ts | 61 +++++++++ test/src/queryhandler.spec.ts | 67 ++++++++++ utils/generate_sources.ts | 2 +- 11 files changed, 615 insertions(+), 51 deletions(-) create mode 100644 src/injected/CSSSelector.ts create mode 100644 src/injected/PathPart.ts create mode 100644 src/injected/TextSelector.ts create mode 100644 src/injected/XPath.ts create mode 100644 test/src/computeselectors.spec.ts diff --git a/src/common/QueryHandler.ts b/src/common/QueryHandler.ts index 7f29ae31..e3290a80 100644 --- a/src/common/QueryHandler.ts +++ b/src/common/QueryHandler.ts @@ -276,55 +276,12 @@ const xpathHandler = createPuppeteerQueryHandler({ }); const textQueryHandler = createPuppeteerQueryHandler({ - queryOne: (element, selector, {createTextContent}) => { - const search = (root: Node): Node | null => { - for (const node of root.childNodes) { - if (node instanceof Element) { - let matchedNode: Node | null; - if (node.shadowRoot) { - matchedNode = search(node.shadowRoot); - } else { - matchedNode = search(node); - } - if (matchedNode) { - return matchedNode; - } - } - } - const textContent = createTextContent(root); - if (textContent.full.includes(selector)) { - return root; - } - return null; - }; - return search(element); + queryOne: (element, selector, {textQuerySelector}) => { + return textQuerySelector(selector, element); }, - queryAll: (element, selector, {createTextContent}) => { - const search = (root: Node): Node[] => { - let results: Node[] = []; - for (const node of root.childNodes) { - if (node instanceof Element) { - let matchedNodes: Node[]; - if (node.shadowRoot) { - matchedNodes = search(node.shadowRoot); - } else { - matchedNodes = search(node); - } - results = results.concat(matchedNodes); - } - } - if (results.length > 0) { - return results; - } - - const textContent = createTextContent(root); - if (textContent.full.includes(selector)) { - return [root]; - } - return []; - }; - return search(element); + queryAll: (element, selector, {textQuerySelectorAll}) => { + return textQuerySelectorAll(selector, element); }, }); diff --git a/src/injected/CSSSelector.ts b/src/injected/CSSSelector.ts new file mode 100644 index 00000000..3e6b705b --- /dev/null +++ b/src/injected/CSSSelector.ts @@ -0,0 +1,198 @@ +import {SelectorPart} from './PathPart.js'; + +const getCSSNodeName = (node: Node): string => { + // If node is not an element, it's case sensitive + if (!(node instanceof Element)) { + return node.nodeName; + } + + // If the names are different lengths, there is a prefix and it's case sensitive + if (node.localName.length !== node.nodeName.length) { + return node.nodeName; + } + + // Return the local name, which will be case insensitive if its an html node + return node.localName; +}; + +const getPrefixedClassNames = (node: Element): Set => { + const classAttribute = node.getAttribute('class'); + if (!classAttribute) { + return new Set(); + } + + return new Set( + classAttribute + .split(/\s+/g) + .filter(Boolean) + .map(name => { + // The prefix is required to store "__proto__" in a object-based map. + return `$${name}`; + }) + ); +}; + +const idSelector = (id: string): string => { + return `#${CSS.escape(id)}`; +}; + +const attributeSelector = (name: string, value: string): string => { + return `[${name}=${CSS.escape(value)}]`; +}; + +const getSelectorPart = ( + node: Node, + optimized: boolean, + isTargetNode: boolean, + attributes: string[] = [] +): SelectorPart | undefined => { + if (!(node instanceof Element)) { + return; + } + + const id = node.id; + if (optimized) { + for (const attribute of attributes) { + const value = node.getAttribute(attribute); + if (value) { + return new SelectorPart(attributeSelector(attribute, value), true); + } + } + if (id) { + return new SelectorPart(idSelector(id), true); + } + switch (node.nodeName) { + case 'BODY': + case 'HEAD': + case 'HTML': + return new SelectorPart(getCSSNodeName(node), true); + } + } + const nodeName = getCSSNodeName(node); + + if (id) { + return new SelectorPart(`${nodeName}${idSelector(id)}`, true); + } + const parent = node.parentNode; + if (!parent) { + return new SelectorPart(nodeName, true); + } + + const classNames = getPrefixedClassNames(node); + let needsClassNames = false; + let needsNthChild = false; + let nodeIndex = -1; + const children = parent.children; + + // If there are no class names, we will use the `nth-child` selector. + if (!classNames.size) { + needsNthChild = true; + } + + for ( + let i = 0; + (nodeIndex < 0 || !needsNthChild) && i < children.length; + ++i + ) { + const child = children[i]!; + if (child === node) { + nodeIndex = i; + continue; + } + if (needsNthChild) { + continue; + } + if (getCSSNodeName(child) !== nodeName) { + continue; + } + + // Remove class names that are from children to keep things unique. + needsClassNames = true; + for (const childClassName of getPrefixedClassNames(child)) { + if (!classNames.has(childClassName)) { + continue; + } + classNames.delete(childClassName); + // If we run out of unique class names, we circle back to the `nth-child` selector. + if (!classNames.size) { + needsNthChild = true; + break; + } + } + } + + let selector = nodeName; + if ( + isTargetNode && + nodeName.toLowerCase() === 'input' && + node.getAttribute('type') && + !node.getAttribute('id') && + !node.getAttribute('class') + ) { + selector += '[type=' + CSS.escape(node.getAttribute('type') || '') + ']'; + } + + if (needsNthChild) { + selector += ':nth-child(' + (nodeIndex + 1) + ')'; + } else if (needsClassNames) { + for (const prefixedName of classNames) { + selector += '.' + CSS.escape(prefixedName.slice(1)); + } + } + + return new SelectorPart(selector, false); +}; + +/** + * Computes the CSS selector for a node. + * + * @param node - The node to compute. + * @param optimized - Whether to optimize the CSS selector for the node. Does + * not imply the selector is shorter; implies the selector will be highly-scoped + * to the node. + * @returns The computed CSS selector. + * + * @internal + */ +export const computeCSSSelector = ( + node: Node | null, + optimized?: boolean, + attributes?: string[] +): {root: Node | null; selector: string} => { + const parts = []; + let contextNode: Node | null = node; + while (contextNode) { + const part = getSelectorPart( + contextNode, + !!optimized, + contextNode === node, + attributes + ); + if (!part) { + break; + } // Error - bail out early. + parts.push(part); + if (part.optimized) { + break; + } + contextNode = contextNode.parentNode; + } + + parts.reverse(); + + contextNode = node; + while (contextNode) { + if (contextNode instanceof ShadowRoot) { + return { + selector: parts.join(' > '), + root: contextNode, + }; + } + contextNode = contextNode.parentNode; + } + + return { + selector: parts.join(' > '), + root: null, + }; +}; diff --git a/src/injected/PathPart.ts b/src/injected/PathPart.ts new file mode 100644 index 00000000..18b9d420 --- /dev/null +++ b/src/injected/PathPart.ts @@ -0,0 +1,15 @@ +/** + * @internal + */ +export class SelectorPart { + value: string; + optimized: boolean; + constructor(value: string, optimized: boolean) { + this.value = value; + this.optimized = optimized || false; + } + + toString(): string { + return this.value; + } +} diff --git a/src/injected/Poller.ts b/src/injected/Poller.ts index 4966f816..8abf4673 100644 --- a/src/injected/Poller.ts +++ b/src/injected/Poller.ts @@ -37,7 +37,6 @@ export class MutationPoller implements Poller { } this.#observer = new MutationObserver(async () => { - console.log(1); const result = await this.#fn(); if (!result) { return; diff --git a/src/injected/TextContent.ts b/src/injected/TextContent.ts index c4bb2cf1..60455cba 100644 --- a/src/injected/TextContent.ts +++ b/src/injected/TextContent.ts @@ -27,8 +27,10 @@ const UNSUITABLE_NODE_NAMES = new Set(['SCRIPT', 'STYLE']); /** * Determines whether a given node is suitable for text matching. + * + * @internal */ -const isSuitableNodeForTextMatching = (node: Node): boolean => { +export const isSuitableNodeForTextMatching = (node: Node): boolean => { return ( !UNSUITABLE_NODE_NAMES.has(node.nodeName) && !document.head?.contains(node) ); @@ -47,7 +49,27 @@ export type TextContent = { /** * Maps {@link Node}s to their computed {@link TextContent}. */ -const textContentCache = new Map(); +const textContentCache = new WeakMap(); +const eraseFromCache = (node: Node | null) => { + while (node) { + textContentCache.delete(node); + if (node instanceof ShadowRoot) { + node = node.host; + } else { + node = node.parentNode; + } + } +}; + +/** + * Erases the cache when the tree has mutated text. + */ +const observedNodes = new WeakSet(); +const textChangeObserver = new MutationObserver(mutations => { + for (const mutation of mutations) { + eraseFromCache(mutation.target); + } +}); /** * Builds the text content of a node using some custom logic. @@ -67,10 +89,19 @@ export const createTextContent = (root: Node): TextContent => { if (!isSuitableNodeForTextMatching(root)) { return value; } + let currentImmediate = ''; if (isNonTrivialValueNode(root)) { value.full = root.value; value.immediate.push(root.value); + + root.addEventListener( + 'input', + event => { + eraseFromCache(event.target as HTMLInputElement); + }, + {once: true, capture: true} + ); } else { for (let child = root.firstChild; child; child = child.nextSibling) { if (child.nodeType === Node.TEXT_NODE) { @@ -92,6 +123,14 @@ export const createTextContent = (root: Node): TextContent => { if (root instanceof Element && root.shadowRoot) { value.full += createTextContent(root.shadowRoot).full; } + + if (!observedNodes.has(root)) { + textChangeObserver.observe(root, { + childList: true, + characterData: true, + }); + observedNodes.add(root); + } } textContentCache.set(root, value); return value; diff --git a/src/injected/TextSelector.ts b/src/injected/TextSelector.ts new file mode 100644 index 00000000..b63a8309 --- /dev/null +++ b/src/injected/TextSelector.ts @@ -0,0 +1,98 @@ +import { + createTextContent, + isSuitableNodeForTextMatching, +} from './TextContent.js'; + +/** + * Queries the given node for a node matching the given text selector. + * + * @internal + */ +export const textQuerySelector = ( + selector: string, + root: Node +): Element | null => { + for (const node of root.childNodes) { + if (node instanceof Element && isSuitableNodeForTextMatching(node)) { + let matchedNode: Element | null; + if (node.shadowRoot) { + matchedNode = textQuerySelector(selector, node.shadowRoot); + } else { + matchedNode = textQuerySelector(selector, node); + } + if (matchedNode) { + return matchedNode; + } + } + } + + if (root instanceof Element) { + const textContent = createTextContent(root); + if (textContent.full.includes(selector)) { + return root; + } + } + return null; +}; + +/** + * Queries the given node for all nodes matching the given text selector. + * + * @internal + */ +export const textQuerySelectorAll = ( + selector: string, + root: Node +): Element[] => { + let results: Element[] = []; + for (const node of root.childNodes) { + if (node instanceof Element) { + let matchedNodes: Element[]; + if (node.shadowRoot) { + matchedNodes = textQuerySelectorAll(selector, node.shadowRoot); + } else { + matchedNodes = textQuerySelectorAll(selector, node); + } + results = results.concat(matchedNodes); + } + } + if (results.length > 0) { + return results; + } + + if (root instanceof Element) { + const textContent = createTextContent(root); + if (textContent.full.includes(selector)) { + return [root]; + } + } + return []; +}; + +/** + * Computes the text selector for a node. + * + * @param node - The node to compute. + * @returns The computed text selector. + * + * @internal + */ +export const computeTextSelector = (node: Node): string => { + const content = createTextContent(node).full; + + // We do a binary search for the optimal length. + let i = 0; + let j = content.length; + while (i <= j) { + const k = i + ((j - i) >> 2); + // Centering the slice. + const left = (content.length - k) >> 2; + const right = k + left; + if (textQuerySelector(content.slice(left, right), document) !== node) { + i = k + 1; + } else { + j = k - 1; + } + } + return content.slice(0, i); +}; diff --git a/src/injected/XPath.ts b/src/injected/XPath.ts new file mode 100644 index 00000000..d4ed64c1 --- /dev/null +++ b/src/injected/XPath.ts @@ -0,0 +1,128 @@ +import {assert} from '../util/assert.js'; +import {SelectorPart} from './PathPart.js'; + +const getSelectorPart = (node: Node, optimized?: boolean): SelectorPart => { + let value; + switch (node.nodeType) { + case Node.ELEMENT_NODE: + assert(node instanceof Element); + if (optimized && node.getAttribute('id')) { + return new SelectorPart(`//*[@id="${node.getAttribute('id')}"]`, true); + } + value = node.localName; + break; + case Node.ATTRIBUTE_NODE: + value = '@' + node.nodeName; + break; + case Node.TEXT_NODE: + case Node.CDATA_SECTION_NODE: + value = 'text()'; + break; + case Node.PROCESSING_INSTRUCTION_NODE: + value = 'processing-instruction()'; + break; + case Node.COMMENT_NODE: + value = 'comment()'; + break; + case Node.DOCUMENT_NODE: + value = ''; + break; + default: + value = ''; + break; + } + + const index = getXPathIndexInParent(node); + if (index > 0) { + value += `[${index}]`; + } + + return new SelectorPart(value, node.nodeType === Node.DOCUMENT_NODE); +}; + +const getXPathIndexInParent = (node: Node): number => { + /** + * @returns -1 in case of error, 0 if no siblings matching the same expression, + * XPath index among the same expression-matching sibling nodes otherwise. + */ + function areNodesSimilar(left: Node, right: Node): boolean { + if (left === right) { + return true; + } + + if (left instanceof Element && right instanceof Element) { + return left.localName === right.localName; + } + + if (left.nodeType === right.nodeType) { + return true; + } + + // XPath treats CDATA as text nodes. + const leftType = + left.nodeType === Node.CDATA_SECTION_NODE + ? Node.TEXT_NODE + : left.nodeType; + const rightType = + right.nodeType === Node.CDATA_SECTION_NODE + ? Node.TEXT_NODE + : right.nodeType; + return leftType === rightType; + } + + const children = node.parentNode ? node.parentNode.children : null; + if (!children) { + return 0; + } + let hasSameNamedElements; + for (let i = 0; i < children.length; ++i) { + if (areNodesSimilar(node, children[i]!) && children[i] !== node) { + hasSameNamedElements = true; + break; + } + } + if (!hasSameNamedElements) { + return 0; + } + let ownIndex = 1; // XPath indices start with 1. + for (let i = 0; i < children.length; ++i) { + if (areNodesSimilar(node, children[i]!)) { + if (children[i] === node) { + return ownIndex; + } + ++ownIndex; + } + } + + assert(false, 'This is impossible; a child must be the child of the parent'); +}; + +/** + * Computes the XPath for a node. + * + * @param node - The node to compute. + * @param optimized - Whether to optimize the XPath for the node. Does not imply + * the XPath is shorter; implies the XPath will be highly-scoped to the node. + * @returns The computed XPath. + * + * @internal + */ +export const computeXPath = (node: Node, optimized?: boolean): string => { + if (node.nodeType === Node.DOCUMENT_NODE) { + return '/'; + } + + const parts = []; + let contextNode: Node | null = node; + while (contextNode) { + const part = getSelectorPart(contextNode, optimized); + parts.push(part); + if (part.optimized) { + break; + } + contextNode = contextNode.parentNode; + } + + parts.reverse(); + return (parts.length && parts[0]!.optimized ? '' : '/') + parts.join('/'); +}; diff --git a/src/injected/injected.ts b/src/injected/injected.ts index f15ec110..e7f86d5f 100644 --- a/src/injected/injected.ts +++ b/src/injected/injected.ts @@ -2,11 +2,13 @@ import {createDeferredPromise} from '../util/DeferredPromise.js'; import * as util from './util.js'; import * as Poller from './Poller.js'; import * as TextContent from './TextContent.js'; +import * as TextSelector from './TextSelector.js'; const PuppeteerUtil = Object.freeze({ ...util, ...Poller, ...TextContent, + ...TextSelector, createDeferredPromise, }); diff --git a/test/src/computeselectors.spec.ts b/test/src/computeselectors.spec.ts new file mode 100644 index 00000000..9f69d7dd --- /dev/null +++ b/test/src/computeselectors.spec.ts @@ -0,0 +1,61 @@ +/** + * Copyright 2022 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import expect from 'expect'; +import {MAIN_WORLD} from '../../lib/cjs/puppeteer/common/IsolatedWorld.js'; +import { + getTestState, + setupTestBrowserHooks, + setupTestPageAndContextHooks, +} from './mocha-utils.js'; + +describe('Selector computation tests', function () { + setupTestBrowserHooks(); + setupTestPageAndContextHooks(); + + describe('for text selectors', () => { + it('should compute text selectors correctly.', async () => { + const {page} = getTestState(); + + // Each element is a list of `a`s. Since `computeTextSelector` performs + // binary search only on the front slice, the selector should be the + // smallest number of `a`s that that make the selector unique. + await page.setContent( + `
${'a'.repeat(7)}
${'a'.repeat( + 9 + )}
${'a'.repeat(5)}
${'a'.repeat( + 10 + )}
${'a'.repeat(4)}
` + ); + + const selector = await page.evaluate(({computeTextSelector}) => { + return computeTextSelector(document.getElementById('to-be-computed')!); + }, await page.mainFrame().worlds[MAIN_WORLD].puppeteerUtil); + + // Since to-be-computed has the most amount of `a`s, it just needs to have + // one more than every other element which computes to 11. + expect(selector).toBe('a'.repeat(11)); + + // Make sure the inverse operation works! + const element = await page.$(`text/${selector}`); + await expect( + element?.evaluate(e => { + return e.id; + }) + ).resolves.toBe('to-be-computed'); + }); + }); +}); diff --git a/test/src/queryhandler.spec.ts b/test/src/queryhandler.spec.ts index f90c6dcf..1d1a0b97 100644 --- a/test/src/queryhandler.spec.ts +++ b/test/src/queryhandler.spec.ts @@ -197,6 +197,73 @@ describe('Query handler tests', function () { }) ).toBe('a b'); }); + it('should clear caches', async () => { + const {page} = getTestState(); + + await page.setContent( + '
text
text
' + ); + const div = (await page.$('#target1')) as ElementHandle; + const input = (await page.$( + '#target2' + )) as ElementHandle; + + await div.evaluate(div => { + div.textContent = 'text'; + }); + expect( + await page.$eval(`text/text`, e => { + return e.id; + }) + ).toBe('target1'); + await div.evaluate(div => { + div.textContent = 'foo'; + }); + expect( + await page.$eval(`text/text`, e => { + return e.id; + }) + ).toBe('target2'); + await input.evaluate(input => { + input.value = ''; + }); + await input.type('foo'); + expect( + await page.$eval(`text/text`, e => { + return e.id; + }) + ).toBe('target3'); + + await div.evaluate(div => { + div.textContent = 'text'; + }); + await input.evaluate(input => { + input.value = ''; + }); + await input.type('text'); + expect( + await page.$$eval(`text/text`, es => { + return es.length; + }) + ).toBe(3); + await div.evaluate(div => { + div.textContent = 'foo'; + }); + expect( + await page.$$eval(`text/text`, es => { + return es.length; + }) + ).toBe(2); + await input.evaluate(input => { + input.value = ''; + }); + await input.type('foo'); + expect( + await page.$$eval(`text/text`, es => { + return es.length; + }) + ).toBe(1); + }); }); describe('in ElementHandles', function () { it('should query existing element', async () => { diff --git a/utils/generate_sources.ts b/utils/generate_sources.ts index 8aa0d502..0e0ad1f6 100644 --- a/utils/generate_sources.ts +++ b/utils/generate_sources.ts @@ -36,7 +36,7 @@ const INCLUDED_FOLDERS = ['common', 'node', 'generated', 'util', 'api']; outdir: tmp, format: 'cjs', platform: 'browser', - target: 'ES2019', + target: 'ES2022', }); const baseName = path.basename(input); const content = await readFile(