puppeteer/src/DOMWorld.ts

754 lines
21 KiB
TypeScript
Raw Normal View History

/**
* Copyright 2019 Google Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import * as fs from 'fs';
2020-05-07 10:54:55 +00:00
import { helper, assert } from './helper';
import { LifecycleWatcher, PuppeteerLifeCycleEvent } from './LifecycleWatcher';
import { TimeoutError } from './Errors';
import { JSHandle, ElementHandle } from './JSHandle';
import { ExecutionContext } from './ExecutionContext';
import { TimeoutSettings } from './TimeoutSettings';
import { MouseButtonInput } from './Input';
import { FrameManager, Frame } from './FrameManager';
import { getQueryHandlerAndSelector, QueryHandler } from './QueryHandler';
// This predicateQueryHandler is declared here so that TypeScript knows about it
// when it is used in the predicate function below.
declare const predicateQueryHandler: QueryHandler;
const readFileAsync = helper.promisify(fs.readFile);
export interface WaitForSelectorOptions {
visible?: boolean;
hidden?: boolean;
timeout?: number;
}
export class DOMWorld {
_frameManager: FrameManager;
_frame: Frame;
_timeoutSettings: TimeoutSettings;
_documentPromise?: Promise<ElementHandle> = null;
_contextPromise?: Promise<ExecutionContext> = null;
_contextResolveCallback?: (x?: ExecutionContext) => void = null;
_detached = false;
_waitTasks = new Set<WaitTask>();
2020-05-07 10:54:55 +00:00
constructor(
frameManager: FrameManager,
frame: Frame,
timeoutSettings: TimeoutSettings
) {
this._frameManager = frameManager;
this._frame = frame;
this._timeoutSettings = timeoutSettings;
this._setContext(null);
}
frame(): Frame {
return this._frame;
}
/**
chore: migrate src/ExecutionContext (#5705) * chore: migrate src/ExecutionContext to TypeScript I spent a while trying to decide on the best course of action for typing the `evaluate` function. Ideally I wanted to use generics so that as a user you could type something like: ``` handle.evaluate<HTMLElement, number, boolean>((node, x) => true, 5) ``` And have TypeScript know the arguments of `node` and `x` based on those generics. But I hit two problems with that: * you have to have n overloads of `evaluate` to cope for as many number of arguments as you can be bothered too (e.g. we'd need an overload for 1 arg, 2 args, 3 args, etc) * I decided it's actually confusing because you don't know as a user what those generics actually map to. So in the end I went with one generic which is the return type of the function: ``` handle.evaluate<boolean>((node, x) => true, 5) ``` And `node` and `x` get typed as `any` which means you can tell TS yourself: ``` handle.evaluate<boolean>((node: HTMLElement, x: number) => true, 5) ``` I'd like to find a way to force that the arguments after the function do match the arguments you've given (in the above example, TS would moan if I swapped that `5` for `"foo"`), but I tried a few things and to be honest the complexity of the types wasn't worth it, I don't think. I'm very open to tweaking these but I'd rather ship this and tweak going forwards rather than spend hours now tweaking. Once we ship these typedefs and get feedback from the community I'm sure we can improve them.
2020-04-22 09:33:44 +00:00
* @param {?ExecutionContext} context
*/
_setContext(context?: ExecutionContext): void {
if (context) {
this._contextResolveCallback.call(null, context);
this._contextResolveCallback = null;
2020-05-07 10:54:55 +00:00
for (const waitTask of this._waitTasks) waitTask.rerun();
} else {
this._documentPromise = null;
2020-05-07 10:54:55 +00:00
this._contextPromise = new Promise((fulfill) => {
this._contextResolveCallback = fulfill;
});
}
}
_hasContext(): boolean {
return !this._contextResolveCallback;
}
_detach(): void {
this._detached = true;
for (const waitTask of this._waitTasks)
2020-05-07 10:54:55 +00:00
waitTask.terminate(
new Error('waitForFunction failed: frame got detached.')
);
}
/**
chore: migrate src/ExecutionContext (#5705) * chore: migrate src/ExecutionContext to TypeScript I spent a while trying to decide on the best course of action for typing the `evaluate` function. Ideally I wanted to use generics so that as a user you could type something like: ``` handle.evaluate<HTMLElement, number, boolean>((node, x) => true, 5) ``` And have TypeScript know the arguments of `node` and `x` based on those generics. But I hit two problems with that: * you have to have n overloads of `evaluate` to cope for as many number of arguments as you can be bothered too (e.g. we'd need an overload for 1 arg, 2 args, 3 args, etc) * I decided it's actually confusing because you don't know as a user what those generics actually map to. So in the end I went with one generic which is the return type of the function: ``` handle.evaluate<boolean>((node, x) => true, 5) ``` And `node` and `x` get typed as `any` which means you can tell TS yourself: ``` handle.evaluate<boolean>((node: HTMLElement, x: number) => true, 5) ``` I'd like to find a way to force that the arguments after the function do match the arguments you've given (in the above example, TS would moan if I swapped that `5` for `"foo"`), but I tried a few things and to be honest the complexity of the types wasn't worth it, I don't think. I'm very open to tweaking these but I'd rather ship this and tweak going forwards rather than spend hours now tweaking. Once we ship these typedefs and get feedback from the community I'm sure we can improve them.
2020-04-22 09:33:44 +00:00
* @return {!Promise<!ExecutionContext>}
*/
executionContext(): Promise<ExecutionContext> {
if (this._detached)
2020-05-07 10:54:55 +00:00
throw new Error(
`Execution Context is not available in detached frame "${this._frame.url()}" (are you trying to evaluate?)`
);
return this._contextPromise;
}
/**
* @param {Function|string} pageFunction
* @param {!Array<*>} args
* @return {!Promise<!JSHandle>}
*/
2020-05-07 10:54:55 +00:00
async evaluateHandle(
pageFunction: Function | string,
...args: unknown[]
): Promise<JSHandle> {
const context = await this.executionContext();
return context.evaluateHandle(pageFunction, ...args);
}
/**
* @param {Function|string} pageFunction
* @param {!Array<*>} args
* @return {!Promise<*>}
*/
2020-05-07 10:54:55 +00:00
async evaluate<ReturnType extends any>(
pageFunction: Function | string,
...args: unknown[]
): Promise<ReturnType> {
const context = await this.executionContext();
return context.evaluate<ReturnType>(pageFunction, ...args);
}
/**
* @param {string} selector
* @return {!Promise<?ElementHandle>}
*/
async $(selector: string): Promise<ElementHandle | null> {
const document = await this._document();
const value = await document.$(selector);
return value;
}
async _document(): Promise<ElementHandle> {
2020-05-07 10:54:55 +00:00
if (this._documentPromise) return this._documentPromise;
this._documentPromise = this.executionContext().then(async (context) => {
const document = await context.evaluateHandle('document');
return document.asElement();
});
return this._documentPromise;
}
async $x(expression: string): Promise<ElementHandle[]> {
const document = await this._document();
const value = await document.$x(expression);
return value;
}
2020-05-07 10:54:55 +00:00
async $eval<ReturnType extends any>(
selector: string,
pageFunction: Function | string,
...args: unknown[]
): Promise<ReturnType> {
const document = await this._document();
return document.$eval<ReturnType>(selector, pageFunction, ...args);
}
2020-05-07 10:54:55 +00:00
async $$eval<ReturnType extends any>(
selector: string,
pageFunction: Function | string,
...args: unknown[]
): Promise<ReturnType> {
const document = await this._document();
2020-05-07 10:54:55 +00:00
const value = await document.$$eval<ReturnType>(
selector,
pageFunction,
...args
);
return value;
}
/**
* @param {string} selector
* @return {!Promise<!Array<!ElementHandle>>}
*/
async $$(selector: string): Promise<ElementHandle[]> {
const document = await this._document();
const value = await document.$$(selector);
return value;
}
async content(): Promise<string> {
return await this.evaluate(() => {
let retVal = '';
if (document.doctype)
retVal = new XMLSerializer().serializeToString(document.doctype);
if (document.documentElement)
retVal += document.documentElement.outerHTML;
return retVal;
});
}
2020-05-07 10:54:55 +00:00
async setContent(
html: string,
options: {
timeout?: number;
waitUntil?: PuppeteerLifeCycleEvent | PuppeteerLifeCycleEvent[];
} = {}
): Promise<void> {
const {
waitUntil = ['load'],
timeout = this._timeoutSettings.navigationTimeout(),
} = options;
// We rely upon the fact that document.open() will reset frame lifecycle with "init"
// lifecycle event. @see https://crrev.com/608658
2020-05-07 10:54:55 +00:00
await this.evaluate((html) => {
document.open();
document.write(html);
document.close();
}, html);
2020-05-07 10:54:55 +00:00
const watcher = new LifecycleWatcher(
this._frameManager,
this._frame,
waitUntil,
timeout
);
const error = await Promise.race([
watcher.timeoutOrTerminationPromise(),
watcher.lifecyclePromise(),
]);
watcher.dispose();
2020-05-07 10:54:55 +00:00
if (error) throw error;
}
/**
* @param {!{url?: string, path?: string, content?: string, type?: string}} options
* @return {!Promise<!ElementHandle>}
*/
2020-05-07 10:54:55 +00:00
async addScriptTag(options: {
url?: string;
path?: string;
content?: string;
type?: string;
}): Promise<ElementHandle> {
const { url = null, path = null, content = null, type = '' } = options;
if (url !== null) {
try {
const context = await this.executionContext();
2020-05-07 10:54:55 +00:00
return (
await context.evaluateHandle(addScriptUrl, url, type)
).asElement();
} catch (error) {
throw new Error(`Loading script from ${url} failed`);
}
}
if (path !== null) {
let contents = await readFileAsync(path, 'utf8');
contents += '//# sourceURL=' + path.replace(/\n/g, '');
const context = await this.executionContext();
2020-05-07 10:54:55 +00:00
return (
await context.evaluateHandle(addScriptContent, contents, type)
).asElement();
}
if (content !== null) {
const context = await this.executionContext();
2020-05-07 10:54:55 +00:00
return (
await context.evaluateHandle(addScriptContent, content, type)
).asElement();
}
2020-05-07 10:54:55 +00:00
throw new Error(
'Provide an object with a `url`, `path` or `content` property'
);
2020-05-07 10:54:55 +00:00
async function addScriptUrl(
url: string,
type: string
): Promise<HTMLElement> {
const script = document.createElement('script');
script.src = url;
2020-05-07 10:54:55 +00:00
if (type) script.type = type;
const promise = new Promise((res, rej) => {
script.onload = res;
script.onerror = rej;
});
document.head.appendChild(script);
await promise;
return script;
}
2020-05-07 10:54:55 +00:00
function addScriptContent(
content: string,
type = 'text/javascript'
): HTMLElement {
const script = document.createElement('script');
script.type = type;
script.text = content;
let error = null;
2020-05-07 10:54:55 +00:00
script.onerror = (e) => (error = e);
document.head.appendChild(script);
2020-05-07 10:54:55 +00:00
if (error) throw error;
return script;
}
}
2020-05-07 10:54:55 +00:00
async addStyleTag(options: {
url?: string;
path?: string;
content?: string;
}): Promise<ElementHandle> {
const { url = null, path = null, content = null } = options;
if (url !== null) {
try {
const context = await this.executionContext();
return (await context.evaluateHandle(addStyleUrl, url)).asElement();
} catch (error) {
throw new Error(`Loading style from ${url} failed`);
}
}
if (path !== null) {
let contents = await readFileAsync(path, 'utf8');
contents += '/*# sourceURL=' + path.replace(/\n/g, '') + '*/';
const context = await this.executionContext();
2020-05-07 10:54:55 +00:00
return (
await context.evaluateHandle(addStyleContent, contents)
).asElement();
}
if (content !== null) {
const context = await this.executionContext();
2020-05-07 10:54:55 +00:00
return (
await context.evaluateHandle(addStyleContent, content)
).asElement();
}
2020-05-07 10:54:55 +00:00
throw new Error(
'Provide an object with a `url`, `path` or `content` property'
);
async function addStyleUrl(url: string): Promise<HTMLElement> {
const link = document.createElement('link');
link.rel = 'stylesheet';
link.href = url;
const promise = new Promise((res, rej) => {
link.onload = res;
link.onerror = rej;
});
document.head.appendChild(link);
await promise;
return link;
}
async function addStyleContent(content: string): Promise<HTMLElement> {
const style = document.createElement('style');
style.type = 'text/css';
style.appendChild(document.createTextNode(content));
const promise = new Promise((res, rej) => {
style.onload = res;
style.onerror = rej;
});
document.head.appendChild(style);
await promise;
return style;
}
}
2020-05-07 10:54:55 +00:00
async click(
selector: string,
options: { delay?: number; button?: MouseButtonInput; clickCount?: number }
): Promise<void> {
const handle = await this.$(selector);
assert(handle, 'No node found for selector: ' + selector);
await handle.click(options);
await handle.dispose();
}
async focus(selector: string): Promise<void> {
const handle = await this.$(selector);
assert(handle, 'No node found for selector: ' + selector);
await handle.focus();
await handle.dispose();
}
async hover(selector: string): Promise<void> {
const handle = await this.$(selector);
assert(handle, 'No node found for selector: ' + selector);
await handle.hover();
await handle.dispose();
}
async select(selector: string, ...values: string[]): Promise<string[]> {
const handle = await this.$(selector);
assert(handle, 'No node found for selector: ' + selector);
const result = await handle.select(...values);
await handle.dispose();
return result;
}
async tap(selector: string): Promise<void> {
const handle = await this.$(selector);
assert(handle, 'No node found for selector: ' + selector);
await handle.tap();
await handle.dispose();
}
2020-05-07 10:54:55 +00:00
async type(
selector: string,
text: string,
options?: { delay: number }
): Promise<void> {
const handle = await this.$(selector);
assert(handle, 'No node found for selector: ' + selector);
await handle.type(text, options);
await handle.dispose();
}
2020-05-07 10:54:55 +00:00
waitForSelector(
selector: string,
options: WaitForSelectorOptions
): Promise<ElementHandle | null> {
return this._waitForSelectorOrXPath(selector, false, options);
}
2020-05-07 10:54:55 +00:00
waitForXPath(
xpath: string,
options: WaitForSelectorOptions
): Promise<ElementHandle | null> {
return this._waitForSelectorOrXPath(xpath, true, options);
}
2020-05-07 10:54:55 +00:00
waitForFunction(
pageFunction: Function | string,
options: { polling?: string | number; timeout?: number } = {},
...args: unknown[]
): Promise<JSHandle> {
const {
polling = 'raf',
timeout = this._timeoutSettings.timeout(),
} = options;
2020-05-07 10:54:55 +00:00
return new WaitTask(
this,
pageFunction,
undefined,
'function',
polling,
timeout,
...args
).promise;
}
async title(): Promise<string> {
return this.evaluate(() => document.title);
}
2020-05-07 10:54:55 +00:00
private async _waitForSelectorOrXPath(
selectorOrXPath: string,
isXPath: boolean,
options: WaitForSelectorOptions = {}
): Promise<ElementHandle | null> {
const {
visible: waitForVisible = false,
hidden: waitForHidden = false,
timeout = this._timeoutSettings.timeout(),
} = options;
const polling = waitForVisible || waitForHidden ? 'raf' : 'mutation';
2020-05-07 10:54:55 +00:00
const title = `${isXPath ? 'XPath' : 'selector'} "${selectorOrXPath}"${
waitForHidden ? ' to be hidden' : ''
}`;
const {
updatedSelector,
queryHandler,
} = getQueryHandlerAndSelector(selectorOrXPath, (element, selector) =>
document.querySelector(selector)
);
const waitTask = new WaitTask(
this,
predicate,
queryHandler,
title,
polling,
timeout,
updatedSelector,
isXPath,
waitForVisible,
waitForHidden
);
const handle = await waitTask.promise;
if (!handle.asElement()) {
await handle.dispose();
return null;
}
return handle.asElement();
/**
* @param {string} selectorOrXPath
* @param {boolean} isXPath
* @param {boolean} waitForVisible
* @param {boolean} waitForHidden
* @return {?Node|boolean}
*/
2020-05-07 10:54:55 +00:00
function predicate(
selectorOrXPath: string,
isXPath: boolean,
waitForVisible: boolean,
waitForHidden: boolean
): Node | null | boolean {
const node = isXPath
2020-05-07 10:54:55 +00:00
? document.evaluate(
selectorOrXPath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
).singleNodeValue
: predicateQueryHandler
? (predicateQueryHandler(document, selectorOrXPath) as Element)
: document.querySelector(selectorOrXPath);
if (!node) return waitForHidden;
if (!waitForVisible && !waitForHidden) return node;
const element =
node.nodeType === Node.TEXT_NODE
? node.parentElement
: (node as Element);
const style = window.getComputedStyle(element);
2020-05-07 10:54:55 +00:00
const isVisible =
style && style.visibility !== 'hidden' && hasVisibleBoundingBox();
const success =
waitForVisible === isVisible || waitForHidden === !isVisible;
return success ? node : null;
function hasVisibleBoundingBox(): boolean {
const rect = element.getBoundingClientRect();
return !!(rect.top || rect.bottom || rect.width || rect.height);
}
}
}
}
class WaitTask {
_domWorld: DOMWorld;
_polling: string | number;
_timeout: number;
_predicateBody: string;
_args: unknown[];
_runCount = 0;
promise: Promise<JSHandle>;
_resolve: (x: JSHandle) => void;
_reject: (x: Error) => void;
_timeoutTimer?: NodeJS.Timeout;
_terminated = false;
2020-05-07 10:54:55 +00:00
constructor(
domWorld: DOMWorld,
predicateBody: Function | string,
predicateQueryHandlerBody: Function | string | undefined,
title: string,
polling: string | number,
timeout: number,
...args: unknown[]
) {
if (helper.isString(polling))
2020-05-07 10:54:55 +00:00
assert(
polling === 'raf' || polling === 'mutation',
'Unknown polling option: ' + polling
);
else if (helper.isNumber(polling))
assert(polling > 0, 'Cannot poll with non-positive interval: ' + polling);
2020-05-07 10:54:55 +00:00
else throw new Error('Unknown polling options: ' + polling);
2020-05-07 10:54:55 +00:00
function getPredicateBody(
predicateBody: Function | string,
predicateQueryHandlerBody: Function | string
) {
if (helper.isString(predicateBody)) return `return (${predicateBody});`;
if (predicateQueryHandlerBody) {
return `
return (function wrapper(args) {
const predicateQueryHandler = ${predicateQueryHandlerBody};
return (${predicateBody})(...args);
})(args);`;
}
return `return (${predicateBody})(...args);`;
}
this._domWorld = domWorld;
this._polling = polling;
this._timeout = timeout;
2020-05-07 10:54:55 +00:00
this._predicateBody = getPredicateBody(
predicateBody,
predicateQueryHandlerBody
);
this._args = args;
this._runCount = 0;
domWorld._waitTasks.add(this);
this.promise = new Promise<JSHandle>((resolve, reject) => {
this._resolve = resolve;
this._reject = reject;
});
// Since page navigation requires us to re-install the pageScript, we should track
// timeout on our end.
if (timeout) {
2020-05-07 10:54:55 +00:00
const timeoutError = new TimeoutError(
`waiting for ${title} failed: timeout ${timeout}ms exceeded`
);
this._timeoutTimer = setTimeout(
() => this.terminate(timeoutError),
timeout
);
}
this.rerun();
}
terminate(error: Error): void {
this._terminated = true;
this._reject(error);
this._cleanup();
}
async rerun(): Promise<void> {
const runCount = ++this._runCount;
/** @type {?JSHandle} */
let success = null;
let error = null;
try {
2020-05-07 10:54:55 +00:00
success = await (await this._domWorld.executionContext()).evaluateHandle(
waitForPredicatePageFunction,
this._predicateBody,
this._polling,
this._timeout,
...this._args
);
} catch (error_) {
error = error_;
}
if (this._terminated || runCount !== this._runCount) {
2020-05-07 10:54:55 +00:00
if (success) await success.dispose();
return;
}
// Ignore timeouts in pageScript - we track timeouts ourselves.
// If the frame's execution context has already changed, `frame.evaluate` will
// throw an error - ignore this predicate run altogether.
2020-05-07 10:54:55 +00:00
if (
!error &&
(await this._domWorld.evaluate((s) => !s, success).catch(() => true))
) {
await success.dispose();
return;
}
// When the page is navigated, the promise is rejected.
// We will try again in the new execution context.
if (error && error.message.includes('Execution context was destroyed'))
return;
// We could have tried to evaluate in a context which was already
// destroyed.
2020-05-07 10:54:55 +00:00
if (
error &&
error.message.includes('Cannot find context with specified id')
)
return;
2020-05-07 10:54:55 +00:00
if (error) this._reject(error);
else this._resolve(success);
this._cleanup();
}
_cleanup(): void {
clearTimeout(this._timeoutTimer);
this._domWorld._waitTasks.delete(this);
}
}
2020-05-07 10:54:55 +00:00
async function waitForPredicatePageFunction(
predicateBody: string,
polling: string,
timeout: number,
...args: unknown[]
): Promise<unknown> {
const predicate = new Function('...args', predicateBody);
let timedOut = false;
2020-05-07 10:54:55 +00:00
if (timeout) setTimeout(() => (timedOut = true), timeout);
if (polling === 'raf') return await pollRaf();
if (polling === 'mutation') return await pollMutation();
if (typeof polling === 'number') return await pollInterval(polling);
/**
* @return {!Promise<*>}
*/
async function pollMutation(): Promise<unknown> {
const success = await predicate(...args);
2020-05-07 10:54:55 +00:00
if (success) return Promise.resolve(success);
let fulfill;
2020-05-07 10:54:55 +00:00
const result = new Promise((x) => (fulfill = x));
const observer = new MutationObserver(async () => {
if (timedOut) {
observer.disconnect();
fulfill();
}
const success = await predicate(...args);
if (success) {
observer.disconnect();
fulfill(success);
}
});
observer.observe(document, {
childList: true,
subtree: true,
2020-05-07 10:54:55 +00:00
attributes: true,
});
return result;
}
async function pollRaf(): Promise<unknown> {
let fulfill;
2020-05-07 10:54:55 +00:00
const result = new Promise((x) => (fulfill = x));
await onRaf();
return result;
async function onRaf(): Promise<unknown> {
if (timedOut) {
fulfill();
return;
}
const success = await predicate(...args);
2020-05-07 10:54:55 +00:00
if (success) fulfill(success);
else requestAnimationFrame(onRaf);
}
}
async function pollInterval(pollInterval: number): Promise<unknown> {
let fulfill;
2020-05-07 10:54:55 +00:00
const result = new Promise((x) => (fulfill = x));
await onTimeout();
return result;
async function onTimeout(): Promise<unknown> {
if (timedOut) {
fulfill();
return;
}
const success = await predicate(...args);
2020-05-07 10:54:55 +00:00
if (success) fulfill(success);
else setTimeout(onTimeout, pollInterval);
}
}
}