chore: incrementally erase cache on text change (#8961)

This commit is contained in:
jrandolf 2022-09-15 18:48:55 +02:00 committed by GitHub
parent fa084bcdcd
commit 2a2af7134f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 615 additions and 51 deletions

View File

@ -276,55 +276,12 @@ const xpathHandler = createPuppeteerQueryHandler({
});
const textQueryHandler = createPuppeteerQueryHandler({
queryOne: (element, selector, {createTextContent}) => {
const search = (root: Node): Node | null => {
for (const node of root.childNodes) {
if (node instanceof Element) {
let matchedNode: Node | null;
if (node.shadowRoot) {
matchedNode = search(node.shadowRoot);
} else {
matchedNode = search(node);
}
if (matchedNode) {
return matchedNode;
}
}
}
const textContent = createTextContent(root);
if (textContent.full.includes(selector)) {
return root;
}
return null;
};
return search(element);
queryOne: (element, selector, {textQuerySelector}) => {
return textQuerySelector(selector, element);
},
queryAll: (element, selector, {createTextContent}) => {
const search = (root: Node): Node[] => {
let results: Node[] = [];
for (const node of root.childNodes) {
if (node instanceof Element) {
let matchedNodes: Node[];
if (node.shadowRoot) {
matchedNodes = search(node.shadowRoot);
} else {
matchedNodes = search(node);
}
results = results.concat(matchedNodes);
}
}
if (results.length > 0) {
return results;
}
const textContent = createTextContent(root);
if (textContent.full.includes(selector)) {
return [root];
}
return [];
};
return search(element);
queryAll: (element, selector, {textQuerySelectorAll}) => {
return textQuerySelectorAll(selector, element);
},
});

198
src/injected/CSSSelector.ts Normal file
View File

@ -0,0 +1,198 @@
import {SelectorPart} from './PathPart.js';
const getCSSNodeName = (node: Node): string => {
// If node is not an element, it's case sensitive
if (!(node instanceof Element)) {
return node.nodeName;
}
// If the names are different lengths, there is a prefix and it's case sensitive
if (node.localName.length !== node.nodeName.length) {
return node.nodeName;
}
// Return the local name, which will be case insensitive if its an html node
return node.localName;
};
const getPrefixedClassNames = (node: Element): Set<string> => {
const classAttribute = node.getAttribute('class');
if (!classAttribute) {
return new Set();
}
return new Set(
classAttribute
.split(/\s+/g)
.filter(Boolean)
.map(name => {
// The prefix is required to store "__proto__" in a object-based map.
return `$${name}`;
})
);
};
const idSelector = (id: string): string => {
return `#${CSS.escape(id)}`;
};
const attributeSelector = (name: string, value: string): string => {
return `[${name}=${CSS.escape(value)}]`;
};
const getSelectorPart = (
node: Node,
optimized: boolean,
isTargetNode: boolean,
attributes: string[] = []
): SelectorPart | undefined => {
if (!(node instanceof Element)) {
return;
}
const id = node.id;
if (optimized) {
for (const attribute of attributes) {
const value = node.getAttribute(attribute);
if (value) {
return new SelectorPart(attributeSelector(attribute, value), true);
}
}
if (id) {
return new SelectorPart(idSelector(id), true);
}
switch (node.nodeName) {
case 'BODY':
case 'HEAD':
case 'HTML':
return new SelectorPart(getCSSNodeName(node), true);
}
}
const nodeName = getCSSNodeName(node);
if (id) {
return new SelectorPart(`${nodeName}${idSelector(id)}`, true);
}
const parent = node.parentNode;
if (!parent) {
return new SelectorPart(nodeName, true);
}
const classNames = getPrefixedClassNames(node);
let needsClassNames = false;
let needsNthChild = false;
let nodeIndex = -1;
const children = parent.children;
// If there are no class names, we will use the `nth-child` selector.
if (!classNames.size) {
needsNthChild = true;
}
for (
let i = 0;
(nodeIndex < 0 || !needsNthChild) && i < children.length;
++i
) {
const child = children[i]!;
if (child === node) {
nodeIndex = i;
continue;
}
if (needsNthChild) {
continue;
}
if (getCSSNodeName(child) !== nodeName) {
continue;
}
// Remove class names that are from children to keep things unique.
needsClassNames = true;
for (const childClassName of getPrefixedClassNames(child)) {
if (!classNames.has(childClassName)) {
continue;
}
classNames.delete(childClassName);
// If we run out of unique class names, we circle back to the `nth-child` selector.
if (!classNames.size) {
needsNthChild = true;
break;
}
}
}
let selector = nodeName;
if (
isTargetNode &&
nodeName.toLowerCase() === 'input' &&
node.getAttribute('type') &&
!node.getAttribute('id') &&
!node.getAttribute('class')
) {
selector += '[type=' + CSS.escape(node.getAttribute('type') || '') + ']';
}
if (needsNthChild) {
selector += ':nth-child(' + (nodeIndex + 1) + ')';
} else if (needsClassNames) {
for (const prefixedName of classNames) {
selector += '.' + CSS.escape(prefixedName.slice(1));
}
}
return new SelectorPart(selector, false);
};
/**
* Computes the CSS selector for a node.
*
* @param node - The node to compute.
* @param optimized - Whether to optimize the CSS selector for the node. Does
* not imply the selector is shorter; implies the selector will be highly-scoped
* to the node.
* @returns The computed CSS selector.
*
* @internal
*/
export const computeCSSSelector = (
node: Node | null,
optimized?: boolean,
attributes?: string[]
): {root: Node | null; selector: string} => {
const parts = [];
let contextNode: Node | null = node;
while (contextNode) {
const part = getSelectorPart(
contextNode,
!!optimized,
contextNode === node,
attributes
);
if (!part) {
break;
} // Error - bail out early.
parts.push(part);
if (part.optimized) {
break;
}
contextNode = contextNode.parentNode;
}
parts.reverse();
contextNode = node;
while (contextNode) {
if (contextNode instanceof ShadowRoot) {
return {
selector: parts.join(' > '),
root: contextNode,
};
}
contextNode = contextNode.parentNode;
}
return {
selector: parts.join(' > '),
root: null,
};
};

15
src/injected/PathPart.ts Normal file
View File

@ -0,0 +1,15 @@
/**
* @internal
*/
export class SelectorPart {
value: string;
optimized: boolean;
constructor(value: string, optimized: boolean) {
this.value = value;
this.optimized = optimized || false;
}
toString(): string {
return this.value;
}
}

View File

@ -37,7 +37,6 @@ export class MutationPoller<T> implements Poller<T> {
}
this.#observer = new MutationObserver(async () => {
console.log(1);
const result = await this.#fn();
if (!result) {
return;

View File

@ -27,8 +27,10 @@ const UNSUITABLE_NODE_NAMES = new Set(['SCRIPT', 'STYLE']);
/**
* Determines whether a given node is suitable for text matching.
*
* @internal
*/
const isSuitableNodeForTextMatching = (node: Node): boolean => {
export const isSuitableNodeForTextMatching = (node: Node): boolean => {
return (
!UNSUITABLE_NODE_NAMES.has(node.nodeName) && !document.head?.contains(node)
);
@ -47,7 +49,27 @@ export type TextContent = {
/**
* Maps {@link Node}s to their computed {@link TextContent}.
*/
const textContentCache = new Map<Node, TextContent>();
const textContentCache = new WeakMap<Node, TextContent>();
const eraseFromCache = (node: Node | null) => {
while (node) {
textContentCache.delete(node);
if (node instanceof ShadowRoot) {
node = node.host;
} else {
node = node.parentNode;
}
}
};
/**
* Erases the cache when the tree has mutated text.
*/
const observedNodes = new WeakSet<Node>();
const textChangeObserver = new MutationObserver(mutations => {
for (const mutation of mutations) {
eraseFromCache(mutation.target);
}
});
/**
* Builds the text content of a node using some custom logic.
@ -67,10 +89,19 @@ export const createTextContent = (root: Node): TextContent => {
if (!isSuitableNodeForTextMatching(root)) {
return value;
}
let currentImmediate = '';
if (isNonTrivialValueNode(root)) {
value.full = root.value;
value.immediate.push(root.value);
root.addEventListener(
'input',
event => {
eraseFromCache(event.target as HTMLInputElement);
},
{once: true, capture: true}
);
} else {
for (let child = root.firstChild; child; child = child.nextSibling) {
if (child.nodeType === Node.TEXT_NODE) {
@ -92,6 +123,14 @@ export const createTextContent = (root: Node): TextContent => {
if (root instanceof Element && root.shadowRoot) {
value.full += createTextContent(root.shadowRoot).full;
}
if (!observedNodes.has(root)) {
textChangeObserver.observe(root, {
childList: true,
characterData: true,
});
observedNodes.add(root);
}
}
textContentCache.set(root, value);
return value;

View File

@ -0,0 +1,98 @@
import {
createTextContent,
isSuitableNodeForTextMatching,
} from './TextContent.js';
/**
* Queries the given node for a node matching the given text selector.
*
* @internal
*/
export const textQuerySelector = (
selector: string,
root: Node
): Element | null => {
for (const node of root.childNodes) {
if (node instanceof Element && isSuitableNodeForTextMatching(node)) {
let matchedNode: Element | null;
if (node.shadowRoot) {
matchedNode = textQuerySelector(selector, node.shadowRoot);
} else {
matchedNode = textQuerySelector(selector, node);
}
if (matchedNode) {
return matchedNode;
}
}
}
if (root instanceof Element) {
const textContent = createTextContent(root);
if (textContent.full.includes(selector)) {
return root;
}
}
return null;
};
/**
* Queries the given node for all nodes matching the given text selector.
*
* @internal
*/
export const textQuerySelectorAll = (
selector: string,
root: Node
): Element[] => {
let results: Element[] = [];
for (const node of root.childNodes) {
if (node instanceof Element) {
let matchedNodes: Element[];
if (node.shadowRoot) {
matchedNodes = textQuerySelectorAll(selector, node.shadowRoot);
} else {
matchedNodes = textQuerySelectorAll(selector, node);
}
results = results.concat(matchedNodes);
}
}
if (results.length > 0) {
return results;
}
if (root instanceof Element) {
const textContent = createTextContent(root);
if (textContent.full.includes(selector)) {
return [root];
}
}
return [];
};
/**
* Computes the text selector for a node.
*
* @param node - The node to compute.
* @returns The computed text selector.
*
* @internal
*/
export const computeTextSelector = (node: Node): string => {
const content = createTextContent(node).full;
// We do a binary search for the optimal length.
let i = 0;
let j = content.length;
while (i <= j) {
const k = i + ((j - i) >> 2);
// Centering the slice.
const left = (content.length - k) >> 2;
const right = k + left;
if (textQuerySelector(content.slice(left, right), document) !== node) {
i = k + 1;
} else {
j = k - 1;
}
}
return content.slice(0, i);
};

128
src/injected/XPath.ts Normal file
View File

@ -0,0 +1,128 @@
import {assert} from '../util/assert.js';
import {SelectorPart} from './PathPart.js';
const getSelectorPart = (node: Node, optimized?: boolean): SelectorPart => {
let value;
switch (node.nodeType) {
case Node.ELEMENT_NODE:
assert(node instanceof Element);
if (optimized && node.getAttribute('id')) {
return new SelectorPart(`//*[@id="${node.getAttribute('id')}"]`, true);
}
value = node.localName;
break;
case Node.ATTRIBUTE_NODE:
value = '@' + node.nodeName;
break;
case Node.TEXT_NODE:
case Node.CDATA_SECTION_NODE:
value = 'text()';
break;
case Node.PROCESSING_INSTRUCTION_NODE:
value = 'processing-instruction()';
break;
case Node.COMMENT_NODE:
value = 'comment()';
break;
case Node.DOCUMENT_NODE:
value = '';
break;
default:
value = '';
break;
}
const index = getXPathIndexInParent(node);
if (index > 0) {
value += `[${index}]`;
}
return new SelectorPart(value, node.nodeType === Node.DOCUMENT_NODE);
};
const getXPathIndexInParent = (node: Node): number => {
/**
* @returns -1 in case of error, 0 if no siblings matching the same expression,
* XPath index among the same expression-matching sibling nodes otherwise.
*/
function areNodesSimilar(left: Node, right: Node): boolean {
if (left === right) {
return true;
}
if (left instanceof Element && right instanceof Element) {
return left.localName === right.localName;
}
if (left.nodeType === right.nodeType) {
return true;
}
// XPath treats CDATA as text nodes.
const leftType =
left.nodeType === Node.CDATA_SECTION_NODE
? Node.TEXT_NODE
: left.nodeType;
const rightType =
right.nodeType === Node.CDATA_SECTION_NODE
? Node.TEXT_NODE
: right.nodeType;
return leftType === rightType;
}
const children = node.parentNode ? node.parentNode.children : null;
if (!children) {
return 0;
}
let hasSameNamedElements;
for (let i = 0; i < children.length; ++i) {
if (areNodesSimilar(node, children[i]!) && children[i] !== node) {
hasSameNamedElements = true;
break;
}
}
if (!hasSameNamedElements) {
return 0;
}
let ownIndex = 1; // XPath indices start with 1.
for (let i = 0; i < children.length; ++i) {
if (areNodesSimilar(node, children[i]!)) {
if (children[i] === node) {
return ownIndex;
}
++ownIndex;
}
}
assert(false, 'This is impossible; a child must be the child of the parent');
};
/**
* Computes the XPath for a node.
*
* @param node - The node to compute.
* @param optimized - Whether to optimize the XPath for the node. Does not imply
* the XPath is shorter; implies the XPath will be highly-scoped to the node.
* @returns The computed XPath.
*
* @internal
*/
export const computeXPath = (node: Node, optimized?: boolean): string => {
if (node.nodeType === Node.DOCUMENT_NODE) {
return '/';
}
const parts = [];
let contextNode: Node | null = node;
while (contextNode) {
const part = getSelectorPart(contextNode, optimized);
parts.push(part);
if (part.optimized) {
break;
}
contextNode = contextNode.parentNode;
}
parts.reverse();
return (parts.length && parts[0]!.optimized ? '' : '/') + parts.join('/');
};

View File

@ -2,11 +2,13 @@ import {createDeferredPromise} from '../util/DeferredPromise.js';
import * as util from './util.js';
import * as Poller from './Poller.js';
import * as TextContent from './TextContent.js';
import * as TextSelector from './TextSelector.js';
const PuppeteerUtil = Object.freeze({
...util,
...Poller,
...TextContent,
...TextSelector,
createDeferredPromise,
});

View File

@ -0,0 +1,61 @@
/**
* Copyright 2022 Google Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import expect from 'expect';
import {MAIN_WORLD} from '../../lib/cjs/puppeteer/common/IsolatedWorld.js';
import {
getTestState,
setupTestBrowserHooks,
setupTestPageAndContextHooks,
} from './mocha-utils.js';
describe('Selector computation tests', function () {
setupTestBrowserHooks();
setupTestPageAndContextHooks();
describe('for text selectors', () => {
it('should compute text selectors correctly.', async () => {
const {page} = getTestState();
// Each element is a list of `a`s. Since `computeTextSelector` performs
// binary search only on the front slice, the selector should be the
// smallest number of `a`s that that make the selector unique.
await page.setContent(
`<div>${'a'.repeat(7)}</div><div>${'a'.repeat(
9
)}</div><div id="to-be-computed">${'a'.repeat(5)}<div>${'a'.repeat(
10
)}</div>${'a'.repeat(4)}</div>`
);
const selector = await page.evaluate(({computeTextSelector}) => {
return computeTextSelector(document.getElementById('to-be-computed')!);
}, await page.mainFrame().worlds[MAIN_WORLD].puppeteerUtil);
// Since to-be-computed has the most amount of `a`s, it just needs to have
// one more than every other element which computes to 11.
expect(selector).toBe('a'.repeat(11));
// Make sure the inverse operation works!
const element = await page.$(`text/${selector}`);
await expect(
element?.evaluate(e => {
return e.id;
})
).resolves.toBe('to-be-computed');
});
});
});

View File

@ -197,6 +197,73 @@ describe('Query handler tests', function () {
})
).toBe('a b');
});
it('should clear caches', async () => {
const {page} = getTestState();
await page.setContent(
'<div id=target1>text</div><input id=target2 value=text><div id=target3>text</div>'
);
const div = (await page.$('#target1')) as ElementHandle<HTMLDivElement>;
const input = (await page.$(
'#target2'
)) as ElementHandle<HTMLInputElement>;
await div.evaluate(div => {
div.textContent = 'text';
});
expect(
await page.$eval(`text/text`, e => {
return e.id;
})
).toBe('target1');
await div.evaluate(div => {
div.textContent = 'foo';
});
expect(
await page.$eval(`text/text`, e => {
return e.id;
})
).toBe('target2');
await input.evaluate(input => {
input.value = '';
});
await input.type('foo');
expect(
await page.$eval(`text/text`, e => {
return e.id;
})
).toBe('target3');
await div.evaluate(div => {
div.textContent = 'text';
});
await input.evaluate(input => {
input.value = '';
});
await input.type('text');
expect(
await page.$$eval(`text/text`, es => {
return es.length;
})
).toBe(3);
await div.evaluate(div => {
div.textContent = 'foo';
});
expect(
await page.$$eval(`text/text`, es => {
return es.length;
})
).toBe(2);
await input.evaluate(input => {
input.value = '';
});
await input.type('foo');
expect(
await page.$$eval(`text/text`, es => {
return es.length;
})
).toBe(1);
});
});
describe('in ElementHandles', function () {
it('should query existing element', async () => {

View File

@ -36,7 +36,7 @@ const INCLUDED_FOLDERS = ['common', 'node', 'generated', 'util', 'api'];
outdir: tmp,
format: 'cjs',
platform: 'browser',
target: 'ES2019',
target: 'ES2022',
});
const baseName = path.basename(input);
const content = await readFile(