feat: add page.xpath (#1620)

This patch adds xpath support with the following methods:
- page.xpath
- frame.xpath
- elementHandle.xpath

Fixes #537
This commit is contained in:
JoelEinbinder 2017-12-19 16:23:45 -08:00 committed by Andrey Lushnikov
parent 1684104dc3
commit 60ba8c3af9
6 changed files with 123 additions and 26 deletions

View File

@ -92,6 +92,7 @@
* [page.waitForFunction(pageFunction[, options[, ...args]])](#pagewaitforfunctionpagefunction-options-args) * [page.waitForFunction(pageFunction[, options[, ...args]])](#pagewaitforfunctionpagefunction-options-args)
* [page.waitForNavigation(options)](#pagewaitfornavigationoptions) * [page.waitForNavigation(options)](#pagewaitfornavigationoptions)
* [page.waitForSelector(selector[, options])](#pagewaitforselectorselector-options) * [page.waitForSelector(selector[, options])](#pagewaitforselectorselector-options)
* [page.xpath(expression)](#pagexpathexpression)
- [class: Keyboard](#class-keyboard) - [class: Keyboard](#class-keyboard)
* [keyboard.down(key[, options])](#keyboarddownkey-options) * [keyboard.down(key[, options])](#keyboarddownkey-options)
* [keyboard.press(key[, options])](#keyboardpresskey-options) * [keyboard.press(key[, options])](#keyboardpresskey-options)
@ -139,6 +140,7 @@
* [frame.waitFor(selectorOrFunctionOrTimeout[, options[, ...args]])](#framewaitforselectororfunctionortimeout-options-args) * [frame.waitFor(selectorOrFunctionOrTimeout[, options[, ...args]])](#framewaitforselectororfunctionortimeout-options-args)
* [frame.waitForFunction(pageFunction[, options[, ...args]])](#framewaitforfunctionpagefunction-options-args) * [frame.waitForFunction(pageFunction[, options[, ...args]])](#framewaitforfunctionpagefunction-options-args)
* [frame.waitForSelector(selector[, options])](#framewaitforselectorselector-options) * [frame.waitForSelector(selector[, options])](#framewaitforselectorselector-options)
* [frame.xpath(expression)](#framexpathexpression)
- [class: ExecutionContext](#class-executioncontext) - [class: ExecutionContext](#class-executioncontext)
* [executionContext.evaluate(pageFunction, ...args)](#executioncontextevaluatepagefunction-args) * [executionContext.evaluate(pageFunction, ...args)](#executioncontextevaluatepagefunction-args)
* [executionContext.evaluateHandle(pageFunction, ...args)](#executioncontextevaluatehandlepagefunction-args) * [executionContext.evaluateHandle(pageFunction, ...args)](#executioncontextevaluatehandlepagefunction-args)
@ -169,6 +171,7 @@
* [elementHandle.toString()](#elementhandletostring) * [elementHandle.toString()](#elementhandletostring)
* [elementHandle.type(text[, options])](#elementhandletypetext-options) * [elementHandle.type(text[, options])](#elementhandletypetext-options)
* [elementHandle.uploadFile(...filePaths)](#elementhandleuploadfilefilepaths) * [elementHandle.uploadFile(...filePaths)](#elementhandleuploadfilefilepaths)
* [elementHandle.xpath(expression)](#elementhandlexpathexpression)
- [class: Request](#class-request) - [class: Request](#class-request)
* [request.abort([errorCode])](#requestaborterrorcode) * [request.abort([errorCode])](#requestaborterrorcode)
* [request.continue([overrides])](#requestcontinueoverrides) * [request.continue([overrides])](#requestcontinueoverrides)
@ -1202,6 +1205,14 @@ puppeteer.launch().then(async browser => {
``` ```
Shortcut for [page.mainFrame().waitForSelector(selector[, options])](#framewaitforselectorselector-options). Shortcut for [page.mainFrame().waitForSelector(selector[, options])](#framewaitforselectorselector-options).
#### page.xpath(expression)
- `expression` <[string]> Expression to [evaluate](https://developer.mozilla.org/en-US/docs/Web/API/Document/evaluate).
- returns: <[Promise]<?[ElementHandle]>> Promise which resolves to ElementHandle pointing to the page element.
The method evluates the XPath expression. If there's no such element within the page, the method will resolve to `null`.
Shortcut for [page.mainFrame().xpath(expression)](#framexpathexpression)
### class: Keyboard ### class: Keyboard
Keyboard provides an api for managing a virtual keyboard. The high level api is [`keyboard.type`](#keyboardtypetext-options), which takes raw characters and generates proper keydown, keypress/input, and keyup events on your page. Keyboard provides an api for managing a virtual keyboard. The high level api is [`keyboard.type`](#keyboardtypetext-options), which takes raw characters and generates proper keydown, keypress/input, and keyup events on your page.
@ -1650,6 +1661,12 @@ puppeteer.launch().then(async browser => {
}); });
``` ```
#### frame.xpath(expression)
- `expression` <[string]> Expression to [evaluate](https://developer.mozilla.org/en-US/docs/Web/API/Document/evaluate).
- returns: <[Promise]<?[ElementHandle]>> Promise which resolves to ElementHandle pointing to the frame element.
The method evluates the XPath expression. If there's no such element within the frame, the method will resolve to `null`.
### class: ExecutionContext ### class: ExecutionContext
The class represents a context for JavaScript execution. Examples of JavaScript contexts are: The class represents a context for JavaScript execution. Examples of JavaScript contexts are:
@ -1950,6 +1967,12 @@ await elementHandle.press('Enter');
This method expects `elementHandle` to point to an [input element](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/input). This method expects `elementHandle` to point to an [input element](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/input).
#### elementHandle.xpath(expression)
- `expression` <[string]> Expression to [evaluate](https://developer.mozilla.org/en-US/docs/Web/API/Document/evaluate).
- returns: <[Promise]<?[ElementHandle]>> Promise which resolves to ElementHandle pointing to the frame element.
The method evluates the XPath expression relative to the elementHandle. If there's no such element, the method will resolve to `null`.
### class: Request ### class: Request
Whenever the page sends a request, the following events are emitted by puppeteer's page: Whenever the page sends a request, the following events are emitted by puppeteer's page:

View File

@ -192,6 +192,25 @@ class ElementHandle extends JSHandle {
} }
return result; return result;
} }
/**
* @param {string} expression
* @return {!Promise<?ElementHandle>}
*/
async xpath(expression) {
const handle = await this.executionContext().evaluateHandle(
(element, expression) => {
const document = element.ownerDocument || element;
return document.evaluate(expression, element, null, XPathResult.FIRST_ORDERED_NODE_TYPE).singleNodeValue;
},
this, expression
);
const element = handle.asElement();
if (element)
return element;
await handle.dispose();
return null;
}
} }
module.exports = ElementHandle; module.exports = ElementHandle;

View File

@ -33,7 +33,7 @@ class ExecutionContext {
} }
/** /**
* @param {function(*)|string} pageFunction * @param {Function|string} pageFunction
* @param {...*} args * @param {...*} args
* @return {!Promise<(!Object|undefined)>} * @return {!Promise<(!Object|undefined)>}
*/ */
@ -45,7 +45,7 @@ class ExecutionContext {
} }
/** /**
* @param {function(*)|string} pageFunction * @param {Function|string} pageFunction
* @param {...*} args * @param {...*} args
* @return {!Promise<!JSHandle>} * @return {!Promise<!JSHandle>}
*/ */

View File

@ -229,6 +229,9 @@ class Frame {
this._url = ''; this._url = '';
this._id = frameId; this._id = frameId;
/** @type {?Promise<!ElementHandle>} */
this._documentPromise = null;
/** @type {?Promise<!ExecutionContext>} */
this._contextPromise = null; this._contextPromise = null;
this._contextResolveCallback = null; this._contextResolveCallback = null;
this._setDefaultContext(null); this._setDefaultContext(null);
@ -255,6 +258,7 @@ class Frame {
for (const waitTask of this._waitTasks) for (const waitTask of this._waitTasks)
waitTask.rerun(); waitTask.rerun();
} else { } else {
this._documentPromise = null;
this._contextPromise = new Promise(fulfill => { this._contextPromise = new Promise(fulfill => {
this._contextResolveCallback = fulfill; this._contextResolveCallback = fulfill;
}); });
@ -283,13 +287,32 @@ class Frame {
* @return {!Promise<?ElementHandle>} * @return {!Promise<?ElementHandle>}
*/ */
async $(selector) { async $(selector) {
const context = await this._contextPromise; const document = await this._document();
const handle = await context.evaluateHandle(selector => document.querySelector(selector), selector); const value = await document.$(selector);
const element = handle.asElement(); return value;
if (element) }
return element;
await handle.dispose(); /**
return null; * @return {!Promise<!ElementHandle>}
*/
async _document() {
if (this._documentPromise)
return this._documentPromise;
this._documentPromise = this._contextPromise.then(async context => {
const document = await context.evaluateHandle('document');
return document.asElement();
});
return this._documentPromise;
}
/**
* @param {string} expression
* @return {!Promise<?ElementHandle>}
*/
async xpath(expression) {
const document = await this._document();
const value = await document.xpath(expression);
return value;
} }
/** /**
@ -326,17 +349,9 @@ class Frame {
* @return {!Promise<!Array<!ElementHandle>>} * @return {!Promise<!Array<!ElementHandle>>}
*/ */
async $$(selector) { async $$(selector) {
const context = await this._contextPromise; const document = await this._document();
const arrayHandle = await context.evaluateHandle(selector => document.querySelectorAll(selector), selector); const value = await document.$$(selector);
const properties = await arrayHandle.getProperties(); return value;
await arrayHandle.dispose();
const result = [];
for (const property of properties.values()) {
const elementHandle = property.asElement();
if (elementHandle)
result.push(elementHandle);
}
return result;
} }
/** /**
@ -408,7 +423,7 @@ class Frame {
const url = options.url; const url = options.url;
try { try {
const context = await this._contextPromise; const context = await this._contextPromise;
return await context.evaluateHandle(addScriptUrl, url); return (await context.evaluateHandle(addScriptUrl, url)).asElement();
} catch (error) { } catch (error) {
throw new Error(`Loading script from ${url} failed`); throw new Error(`Loading script from ${url} failed`);
} }
@ -418,12 +433,12 @@ class Frame {
let contents = await readFileAsync(options.path, 'utf8'); let contents = await readFileAsync(options.path, 'utf8');
contents += '//# sourceURL=' + options.path.replace(/\n/g, ''); contents += '//# sourceURL=' + options.path.replace(/\n/g, '');
const context = await this._contextPromise; const context = await this._contextPromise;
return context.evaluateHandle(addScriptContent, contents); return (await context.evaluateHandle(addScriptContent, contents)).asElement();
} }
if (typeof options.content === 'string') { if (typeof options.content === 'string') {
const context = await this._contextPromise; const context = await this._contextPromise;
return context.evaluateHandle(addScriptContent, options.content); return (await context.evaluateHandle(addScriptContent, options.content)).asElement();
} }
throw new Error('Provide an object with a `url`, `path` or `content` property'); throw new Error('Provide an object with a `url`, `path` or `content` property');
@ -465,7 +480,7 @@ class Frame {
const url = options.url; const url = options.url;
try { try {
const context = await this._contextPromise; const context = await this._contextPromise;
return await context.evaluateHandle(addStyleUrl, url); return (await context.evaluateHandle(addStyleUrl, url)).asElement();
} catch (error) { } catch (error) {
throw new Error(`Loading style from ${url} failed`); throw new Error(`Loading style from ${url} failed`);
} }
@ -475,12 +490,12 @@ class Frame {
let contents = await readFileAsync(options.path, 'utf8'); let contents = await readFileAsync(options.path, 'utf8');
contents += '/*# sourceURL=' + options.path.replace(/\n/g, '') + '*/'; contents += '/*# sourceURL=' + options.path.replace(/\n/g, '') + '*/';
const context = await this._contextPromise; const context = await this._contextPromise;
return await context.evaluateHandle(addStyleContent, contents); return (await context.evaluateHandle(addStyleContent, contents)).asElement();
} }
if (typeof options.content === 'string') { if (typeof options.content === 'string') {
const context = await this._contextPromise; const context = await this._contextPromise;
return await context.evaluateHandle(addStyleContent, options.content); return (await context.evaluateHandle(addStyleContent, options.content)).asElement();
} }
throw new Error('Provide an object with a `url`, `path` or `content` property'); throw new Error('Provide an object with a `url`, `path` or `content` property');

View File

@ -228,6 +228,14 @@ class Page extends EventEmitter {
return this.mainFrame().$$(selector); return this.mainFrame().$$(selector);
} }
/**
* @param {string} expression
* @return {!Promise<?Puppeteer.ElementHandle>}
*/
async xpath(expression) {
return this.mainFrame().xpath(expression);
}
/** /**
* @param {!Array<string>} urls * @param {!Array<string>} urls
* @return {!Promise<!Array<Network.Cookie>>} * @return {!Promise<!Array<Network.Cookie>>}

View File

@ -1720,6 +1720,18 @@ describe('Page', function() {
}); });
}); });
describe('Path.xpath', function() {
it('should query existing element', async({page, server}) => {
await page.setContent('<section>test</section>');
const element = await page.xpath('/html/body/section');
expect(element).toBeTruthy();
});
it('should return null for non-existing element', async({page, server}) => {
const element = await page.xpath('/html/body/non-existing-element');
expect(element).toBe(null);
});
});
describe('ElementHandle.boundingBox', function() { describe('ElementHandle.boundingBox', function() {
it('should work', async({page, server}) => { it('should work', async({page, server}) => {
await page.setViewport({width: 500, height: 500}); await page.setViewport({width: 500, height: 500});
@ -1910,6 +1922,26 @@ describe('Page', function() {
}); });
}); });
describe('ElementHandle.xpath', function() {
it('should query existing element', async({page, server}) => {
await page.goto(server.PREFIX + '/playground.html');
await page.setContent('<html><body><div class="second"><div class="inner">A</div></div></body></html>');
const html = await page.$('html');
const second = await html.xpath(`./body/div[contains(@class, 'second')]`);
const inner = await second.xpath(`./div[contains(@class, 'inner')]`);
const content = await page.evaluate(e => e.textContent, inner);
expect(content).toBe('A');
});
it('should return null for non-existing element', async({page, server}) => {
await page.setContent('<html><body><div class="second"><div class="inner">B</div></div></body></html>');
const html = await page.$('html');
const second = await html.xpath(`/div[contains(@class, 'third')]`);
expect(second).toBe(null);
});
});
describe('input', function() { describe('input', function() {
it('should click the button', async({page, server}) => { it('should click the button', async({page, server}) => {
await page.goto(server.PREFIX + '/input/button.html'); await page.goto(server.PREFIX + '/input/button.html');