From 60ba8c3af9f18dbceb0ce93815d378bb423b658a Mon Sep 17 00:00:00 2001 From: JoelEinbinder Date: Tue, 19 Dec 2017 16:23:45 -0800 Subject: [PATCH] feat: add page.xpath (#1620) This patch adds xpath support with the following methods: - page.xpath - frame.xpath - elementHandle.xpath Fixes #537 --- docs/api.md | 23 +++++++++++++++ lib/ElementHandle.js | 19 +++++++++++++ lib/ExecutionContext.js | 4 +-- lib/FrameManager.js | 63 +++++++++++++++++++++++++---------------- lib/Page.js | 8 ++++++ test/test.js | 32 +++++++++++++++++++++ 6 files changed, 123 insertions(+), 26 deletions(-) diff --git a/docs/api.md b/docs/api.md index c0fe4353879..7a90a79ac8d 100644 --- a/docs/api.md +++ b/docs/api.md @@ -92,6 +92,7 @@ * [page.waitForFunction(pageFunction[, options[, ...args]])](#pagewaitforfunctionpagefunction-options-args) * [page.waitForNavigation(options)](#pagewaitfornavigationoptions) * [page.waitForSelector(selector[, options])](#pagewaitforselectorselector-options) + * [page.xpath(expression)](#pagexpathexpression) - [class: Keyboard](#class-keyboard) * [keyboard.down(key[, options])](#keyboarddownkey-options) * [keyboard.press(key[, options])](#keyboardpresskey-options) @@ -139,6 +140,7 @@ * [frame.waitFor(selectorOrFunctionOrTimeout[, options[, ...args]])](#framewaitforselectororfunctionortimeout-options-args) * [frame.waitForFunction(pageFunction[, options[, ...args]])](#framewaitforfunctionpagefunction-options-args) * [frame.waitForSelector(selector[, options])](#framewaitforselectorselector-options) + * [frame.xpath(expression)](#framexpathexpression) - [class: ExecutionContext](#class-executioncontext) * [executionContext.evaluate(pageFunction, ...args)](#executioncontextevaluatepagefunction-args) * [executionContext.evaluateHandle(pageFunction, ...args)](#executioncontextevaluatehandlepagefunction-args) @@ -169,6 +171,7 @@ * [elementHandle.toString()](#elementhandletostring) * [elementHandle.type(text[, options])](#elementhandletypetext-options) * [elementHandle.uploadFile(...filePaths)](#elementhandleuploadfilefilepaths) + * [elementHandle.xpath(expression)](#elementhandlexpathexpression) - [class: Request](#class-request) * [request.abort([errorCode])](#requestaborterrorcode) * [request.continue([overrides])](#requestcontinueoverrides) @@ -1202,6 +1205,14 @@ puppeteer.launch().then(async browser => { ``` Shortcut for [page.mainFrame().waitForSelector(selector[, options])](#framewaitforselectorselector-options). +#### page.xpath(expression) +- `expression` <[string]> Expression to [evaluate](https://developer.mozilla.org/en-US/docs/Web/API/Document/evaluate). +- returns: <[Promise]> Promise which resolves to ElementHandle pointing to the page element. + +The method evluates the XPath expression. If there's no such element within the page, the method will resolve to `null`. + +Shortcut for [page.mainFrame().xpath(expression)](#framexpathexpression) + ### class: Keyboard Keyboard provides an api for managing a virtual keyboard. The high level api is [`keyboard.type`](#keyboardtypetext-options), which takes raw characters and generates proper keydown, keypress/input, and keyup events on your page. @@ -1650,6 +1661,12 @@ puppeteer.launch().then(async browser => { }); ``` +#### frame.xpath(expression) +- `expression` <[string]> Expression to [evaluate](https://developer.mozilla.org/en-US/docs/Web/API/Document/evaluate). +- returns: <[Promise]> Promise which resolves to ElementHandle pointing to the frame element. + +The method evluates the XPath expression. If there's no such element within the frame, the method will resolve to `null`. + ### class: ExecutionContext The class represents a context for JavaScript execution. Examples of JavaScript contexts are: @@ -1950,6 +1967,12 @@ await elementHandle.press('Enter'); This method expects `elementHandle` to point to an [input element](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/input). +#### elementHandle.xpath(expression) +- `expression` <[string]> Expression to [evaluate](https://developer.mozilla.org/en-US/docs/Web/API/Document/evaluate). +- returns: <[Promise]> Promise which resolves to ElementHandle pointing to the frame element. + +The method evluates the XPath expression relative to the elementHandle. If there's no such element, the method will resolve to `null`. + ### class: Request Whenever the page sends a request, the following events are emitted by puppeteer's page: diff --git a/lib/ElementHandle.js b/lib/ElementHandle.js index 49e7a8db5da..517d57012b7 100644 --- a/lib/ElementHandle.js +++ b/lib/ElementHandle.js @@ -192,6 +192,25 @@ class ElementHandle extends JSHandle { } return result; } + + /** + * @param {string} expression + * @return {!Promise} + */ + async xpath(expression) { + const handle = await this.executionContext().evaluateHandle( + (element, expression) => { + const document = element.ownerDocument || element; + return document.evaluate(expression, element, null, XPathResult.FIRST_ORDERED_NODE_TYPE).singleNodeValue; + }, + this, expression + ); + const element = handle.asElement(); + if (element) + return element; + await handle.dispose(); + return null; + } } module.exports = ElementHandle; diff --git a/lib/ExecutionContext.js b/lib/ExecutionContext.js index 90b0058583b..f6ba5630972 100644 --- a/lib/ExecutionContext.js +++ b/lib/ExecutionContext.js @@ -33,7 +33,7 @@ class ExecutionContext { } /** - * @param {function(*)|string} pageFunction + * @param {Function|string} pageFunction * @param {...*} args * @return {!Promise<(!Object|undefined)>} */ @@ -45,7 +45,7 @@ class ExecutionContext { } /** - * @param {function(*)|string} pageFunction + * @param {Function|string} pageFunction * @param {...*} args * @return {!Promise} */ diff --git a/lib/FrameManager.js b/lib/FrameManager.js index 7de7d0c170a..1508e4af22b 100644 --- a/lib/FrameManager.js +++ b/lib/FrameManager.js @@ -229,6 +229,9 @@ class Frame { this._url = ''; this._id = frameId; + /** @type {?Promise} */ + this._documentPromise = null; + /** @type {?Promise} */ this._contextPromise = null; this._contextResolveCallback = null; this._setDefaultContext(null); @@ -255,6 +258,7 @@ class Frame { for (const waitTask of this._waitTasks) waitTask.rerun(); } else { + this._documentPromise = null; this._contextPromise = new Promise(fulfill => { this._contextResolveCallback = fulfill; }); @@ -283,13 +287,32 @@ class Frame { * @return {!Promise} */ async $(selector) { - const context = await this._contextPromise; - const handle = await context.evaluateHandle(selector => document.querySelector(selector), selector); - const element = handle.asElement(); - if (element) - return element; - await handle.dispose(); - return null; + const document = await this._document(); + const value = await document.$(selector); + return value; + } + + /** + * @return {!Promise} + */ + async _document() { + if (this._documentPromise) + return this._documentPromise; + this._documentPromise = this._contextPromise.then(async context => { + const document = await context.evaluateHandle('document'); + return document.asElement(); + }); + return this._documentPromise; + } + + /** + * @param {string} expression + * @return {!Promise} + */ + async xpath(expression) { + const document = await this._document(); + const value = await document.xpath(expression); + return value; } /** @@ -326,17 +349,9 @@ class Frame { * @return {!Promise>} */ async $$(selector) { - const context = await this._contextPromise; - const arrayHandle = await context.evaluateHandle(selector => document.querySelectorAll(selector), selector); - const properties = await arrayHandle.getProperties(); - await arrayHandle.dispose(); - const result = []; - for (const property of properties.values()) { - const elementHandle = property.asElement(); - if (elementHandle) - result.push(elementHandle); - } - return result; + const document = await this._document(); + const value = await document.$$(selector); + return value; } /** @@ -408,7 +423,7 @@ class Frame { const url = options.url; try { const context = await this._contextPromise; - return await context.evaluateHandle(addScriptUrl, url); + return (await context.evaluateHandle(addScriptUrl, url)).asElement(); } catch (error) { throw new Error(`Loading script from ${url} failed`); } @@ -418,12 +433,12 @@ class Frame { let contents = await readFileAsync(options.path, 'utf8'); contents += '//# sourceURL=' + options.path.replace(/\n/g, ''); const context = await this._contextPromise; - return context.evaluateHandle(addScriptContent, contents); + return (await context.evaluateHandle(addScriptContent, contents)).asElement(); } if (typeof options.content === 'string') { const context = await this._contextPromise; - return context.evaluateHandle(addScriptContent, options.content); + return (await context.evaluateHandle(addScriptContent, options.content)).asElement(); } throw new Error('Provide an object with a `url`, `path` or `content` property'); @@ -465,7 +480,7 @@ class Frame { const url = options.url; try { const context = await this._contextPromise; - return await context.evaluateHandle(addStyleUrl, url); + return (await context.evaluateHandle(addStyleUrl, url)).asElement(); } catch (error) { throw new Error(`Loading style from ${url} failed`); } @@ -475,12 +490,12 @@ class Frame { let contents = await readFileAsync(options.path, 'utf8'); contents += '/*# sourceURL=' + options.path.replace(/\n/g, '') + '*/'; const context = await this._contextPromise; - return await context.evaluateHandle(addStyleContent, contents); + return (await context.evaluateHandle(addStyleContent, contents)).asElement(); } if (typeof options.content === 'string') { const context = await this._contextPromise; - return await context.evaluateHandle(addStyleContent, options.content); + return (await context.evaluateHandle(addStyleContent, options.content)).asElement(); } throw new Error('Provide an object with a `url`, `path` or `content` property'); diff --git a/lib/Page.js b/lib/Page.js index 7d745dd6dbb..2146aac1a49 100644 --- a/lib/Page.js +++ b/lib/Page.js @@ -228,6 +228,14 @@ class Page extends EventEmitter { return this.mainFrame().$$(selector); } + /** + * @param {string} expression + * @return {!Promise} + */ + async xpath(expression) { + return this.mainFrame().xpath(expression); + } + /** * @param {!Array} urls * @return {!Promise>} diff --git a/test/test.js b/test/test.js index 88385316b12..206a0dc5e59 100644 --- a/test/test.js +++ b/test/test.js @@ -1720,6 +1720,18 @@ describe('Page', function() { }); }); + describe('Path.xpath', function() { + it('should query existing element', async({page, server}) => { + await page.setContent('
test
'); + const element = await page.xpath('/html/body/section'); + expect(element).toBeTruthy(); + }); + it('should return null for non-existing element', async({page, server}) => { + const element = await page.xpath('/html/body/non-existing-element'); + expect(element).toBe(null); + }); + }); + describe('ElementHandle.boundingBox', function() { it('should work', async({page, server}) => { await page.setViewport({width: 500, height: 500}); @@ -1910,6 +1922,26 @@ describe('Page', function() { }); }); + + describe('ElementHandle.xpath', function() { + it('should query existing element', async({page, server}) => { + await page.goto(server.PREFIX + '/playground.html'); + await page.setContent('
A
'); + const html = await page.$('html'); + const second = await html.xpath(`./body/div[contains(@class, 'second')]`); + const inner = await second.xpath(`./div[contains(@class, 'inner')]`); + const content = await page.evaluate(e => e.textContent, inner); + expect(content).toBe('A'); + }); + + it('should return null for non-existing element', async({page, server}) => { + await page.setContent('
B
'); + const html = await page.$('html'); + const second = await html.xpath(`/div[contains(@class, 'third')]`); + expect(second).toBe(null); + }); + }); + describe('input', function() { it('should click the button', async({page, server}) => { await page.goto(server.PREFIX + '/input/button.html');