mirror of
https://github.com/puppeteer/puppeteer
synced 2024-06-14 14:02:48 +00:00
feat: add page.xpath (#1620)
This patch adds xpath support with the following methods: - page.xpath - frame.xpath - elementHandle.xpath Fixes #537
This commit is contained in:
parent
1684104dc3
commit
60ba8c3af9
23
docs/api.md
23
docs/api.md
@ -92,6 +92,7 @@
|
||||
* [page.waitForFunction(pageFunction[, options[, ...args]])](#pagewaitforfunctionpagefunction-options-args)
|
||||
* [page.waitForNavigation(options)](#pagewaitfornavigationoptions)
|
||||
* [page.waitForSelector(selector[, options])](#pagewaitforselectorselector-options)
|
||||
* [page.xpath(expression)](#pagexpathexpression)
|
||||
- [class: Keyboard](#class-keyboard)
|
||||
* [keyboard.down(key[, options])](#keyboarddownkey-options)
|
||||
* [keyboard.press(key[, options])](#keyboardpresskey-options)
|
||||
@ -139,6 +140,7 @@
|
||||
* [frame.waitFor(selectorOrFunctionOrTimeout[, options[, ...args]])](#framewaitforselectororfunctionortimeout-options-args)
|
||||
* [frame.waitForFunction(pageFunction[, options[, ...args]])](#framewaitforfunctionpagefunction-options-args)
|
||||
* [frame.waitForSelector(selector[, options])](#framewaitforselectorselector-options)
|
||||
* [frame.xpath(expression)](#framexpathexpression)
|
||||
- [class: ExecutionContext](#class-executioncontext)
|
||||
* [executionContext.evaluate(pageFunction, ...args)](#executioncontextevaluatepagefunction-args)
|
||||
* [executionContext.evaluateHandle(pageFunction, ...args)](#executioncontextevaluatehandlepagefunction-args)
|
||||
@ -169,6 +171,7 @@
|
||||
* [elementHandle.toString()](#elementhandletostring)
|
||||
* [elementHandle.type(text[, options])](#elementhandletypetext-options)
|
||||
* [elementHandle.uploadFile(...filePaths)](#elementhandleuploadfilefilepaths)
|
||||
* [elementHandle.xpath(expression)](#elementhandlexpathexpression)
|
||||
- [class: Request](#class-request)
|
||||
* [request.abort([errorCode])](#requestaborterrorcode)
|
||||
* [request.continue([overrides])](#requestcontinueoverrides)
|
||||
@ -1202,6 +1205,14 @@ puppeteer.launch().then(async browser => {
|
||||
```
|
||||
Shortcut for [page.mainFrame().waitForSelector(selector[, options])](#framewaitforselectorselector-options).
|
||||
|
||||
#### page.xpath(expression)
|
||||
- `expression` <[string]> Expression to [evaluate](https://developer.mozilla.org/en-US/docs/Web/API/Document/evaluate).
|
||||
- returns: <[Promise]<?[ElementHandle]>> Promise which resolves to ElementHandle pointing to the page element.
|
||||
|
||||
The method evluates the XPath expression. If there's no such element within the page, the method will resolve to `null`.
|
||||
|
||||
Shortcut for [page.mainFrame().xpath(expression)](#framexpathexpression)
|
||||
|
||||
### class: Keyboard
|
||||
|
||||
Keyboard provides an api for managing a virtual keyboard. The high level api is [`keyboard.type`](#keyboardtypetext-options), which takes raw characters and generates proper keydown, keypress/input, and keyup events on your page.
|
||||
@ -1650,6 +1661,12 @@ puppeteer.launch().then(async browser => {
|
||||
});
|
||||
```
|
||||
|
||||
#### frame.xpath(expression)
|
||||
- `expression` <[string]> Expression to [evaluate](https://developer.mozilla.org/en-US/docs/Web/API/Document/evaluate).
|
||||
- returns: <[Promise]<?[ElementHandle]>> Promise which resolves to ElementHandle pointing to the frame element.
|
||||
|
||||
The method evluates the XPath expression. If there's no such element within the frame, the method will resolve to `null`.
|
||||
|
||||
### class: ExecutionContext
|
||||
|
||||
The class represents a context for JavaScript execution. Examples of JavaScript contexts are:
|
||||
@ -1950,6 +1967,12 @@ await elementHandle.press('Enter');
|
||||
|
||||
This method expects `elementHandle` to point to an [input element](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/input).
|
||||
|
||||
#### elementHandle.xpath(expression)
|
||||
- `expression` <[string]> Expression to [evaluate](https://developer.mozilla.org/en-US/docs/Web/API/Document/evaluate).
|
||||
- returns: <[Promise]<?[ElementHandle]>> Promise which resolves to ElementHandle pointing to the frame element.
|
||||
|
||||
The method evluates the XPath expression relative to the elementHandle. If there's no such element, the method will resolve to `null`.
|
||||
|
||||
### class: Request
|
||||
|
||||
Whenever the page sends a request, the following events are emitted by puppeteer's page:
|
||||
|
@ -192,6 +192,25 @@ class ElementHandle extends JSHandle {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} expression
|
||||
* @return {!Promise<?ElementHandle>}
|
||||
*/
|
||||
async xpath(expression) {
|
||||
const handle = await this.executionContext().evaluateHandle(
|
||||
(element, expression) => {
|
||||
const document = element.ownerDocument || element;
|
||||
return document.evaluate(expression, element, null, XPathResult.FIRST_ORDERED_NODE_TYPE).singleNodeValue;
|
||||
},
|
||||
this, expression
|
||||
);
|
||||
const element = handle.asElement();
|
||||
if (element)
|
||||
return element;
|
||||
await handle.dispose();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ElementHandle;
|
||||
|
@ -33,7 +33,7 @@ class ExecutionContext {
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {function(*)|string} pageFunction
|
||||
* @param {Function|string} pageFunction
|
||||
* @param {...*} args
|
||||
* @return {!Promise<(!Object|undefined)>}
|
||||
*/
|
||||
@ -45,7 +45,7 @@ class ExecutionContext {
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {function(*)|string} pageFunction
|
||||
* @param {Function|string} pageFunction
|
||||
* @param {...*} args
|
||||
* @return {!Promise<!JSHandle>}
|
||||
*/
|
||||
|
@ -229,6 +229,9 @@ class Frame {
|
||||
this._url = '';
|
||||
this._id = frameId;
|
||||
|
||||
/** @type {?Promise<!ElementHandle>} */
|
||||
this._documentPromise = null;
|
||||
/** @type {?Promise<!ExecutionContext>} */
|
||||
this._contextPromise = null;
|
||||
this._contextResolveCallback = null;
|
||||
this._setDefaultContext(null);
|
||||
@ -255,6 +258,7 @@ class Frame {
|
||||
for (const waitTask of this._waitTasks)
|
||||
waitTask.rerun();
|
||||
} else {
|
||||
this._documentPromise = null;
|
||||
this._contextPromise = new Promise(fulfill => {
|
||||
this._contextResolveCallback = fulfill;
|
||||
});
|
||||
@ -283,13 +287,32 @@ class Frame {
|
||||
* @return {!Promise<?ElementHandle>}
|
||||
*/
|
||||
async $(selector) {
|
||||
const context = await this._contextPromise;
|
||||
const handle = await context.evaluateHandle(selector => document.querySelector(selector), selector);
|
||||
const element = handle.asElement();
|
||||
if (element)
|
||||
return element;
|
||||
await handle.dispose();
|
||||
return null;
|
||||
const document = await this._document();
|
||||
const value = await document.$(selector);
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {!Promise<!ElementHandle>}
|
||||
*/
|
||||
async _document() {
|
||||
if (this._documentPromise)
|
||||
return this._documentPromise;
|
||||
this._documentPromise = this._contextPromise.then(async context => {
|
||||
const document = await context.evaluateHandle('document');
|
||||
return document.asElement();
|
||||
});
|
||||
return this._documentPromise;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} expression
|
||||
* @return {!Promise<?ElementHandle>}
|
||||
*/
|
||||
async xpath(expression) {
|
||||
const document = await this._document();
|
||||
const value = await document.xpath(expression);
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -326,17 +349,9 @@ class Frame {
|
||||
* @return {!Promise<!Array<!ElementHandle>>}
|
||||
*/
|
||||
async $$(selector) {
|
||||
const context = await this._contextPromise;
|
||||
const arrayHandle = await context.evaluateHandle(selector => document.querySelectorAll(selector), selector);
|
||||
const properties = await arrayHandle.getProperties();
|
||||
await arrayHandle.dispose();
|
||||
const result = [];
|
||||
for (const property of properties.values()) {
|
||||
const elementHandle = property.asElement();
|
||||
if (elementHandle)
|
||||
result.push(elementHandle);
|
||||
}
|
||||
return result;
|
||||
const document = await this._document();
|
||||
const value = await document.$$(selector);
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -408,7 +423,7 @@ class Frame {
|
||||
const url = options.url;
|
||||
try {
|
||||
const context = await this._contextPromise;
|
||||
return await context.evaluateHandle(addScriptUrl, url);
|
||||
return (await context.evaluateHandle(addScriptUrl, url)).asElement();
|
||||
} catch (error) {
|
||||
throw new Error(`Loading script from ${url} failed`);
|
||||
}
|
||||
@ -418,12 +433,12 @@ class Frame {
|
||||
let contents = await readFileAsync(options.path, 'utf8');
|
||||
contents += '//# sourceURL=' + options.path.replace(/\n/g, '');
|
||||
const context = await this._contextPromise;
|
||||
return context.evaluateHandle(addScriptContent, contents);
|
||||
return (await context.evaluateHandle(addScriptContent, contents)).asElement();
|
||||
}
|
||||
|
||||
if (typeof options.content === 'string') {
|
||||
const context = await this._contextPromise;
|
||||
return context.evaluateHandle(addScriptContent, options.content);
|
||||
return (await context.evaluateHandle(addScriptContent, options.content)).asElement();
|
||||
}
|
||||
|
||||
throw new Error('Provide an object with a `url`, `path` or `content` property');
|
||||
@ -465,7 +480,7 @@ class Frame {
|
||||
const url = options.url;
|
||||
try {
|
||||
const context = await this._contextPromise;
|
||||
return await context.evaluateHandle(addStyleUrl, url);
|
||||
return (await context.evaluateHandle(addStyleUrl, url)).asElement();
|
||||
} catch (error) {
|
||||
throw new Error(`Loading style from ${url} failed`);
|
||||
}
|
||||
@ -475,12 +490,12 @@ class Frame {
|
||||
let contents = await readFileAsync(options.path, 'utf8');
|
||||
contents += '/*# sourceURL=' + options.path.replace(/\n/g, '') + '*/';
|
||||
const context = await this._contextPromise;
|
||||
return await context.evaluateHandle(addStyleContent, contents);
|
||||
return (await context.evaluateHandle(addStyleContent, contents)).asElement();
|
||||
}
|
||||
|
||||
if (typeof options.content === 'string') {
|
||||
const context = await this._contextPromise;
|
||||
return await context.evaluateHandle(addStyleContent, options.content);
|
||||
return (await context.evaluateHandle(addStyleContent, options.content)).asElement();
|
||||
}
|
||||
|
||||
throw new Error('Provide an object with a `url`, `path` or `content` property');
|
||||
|
@ -228,6 +228,14 @@ class Page extends EventEmitter {
|
||||
return this.mainFrame().$$(selector);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} expression
|
||||
* @return {!Promise<?Puppeteer.ElementHandle>}
|
||||
*/
|
||||
async xpath(expression) {
|
||||
return this.mainFrame().xpath(expression);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {!Array<string>} urls
|
||||
* @return {!Promise<!Array<Network.Cookie>>}
|
||||
|
32
test/test.js
32
test/test.js
@ -1720,6 +1720,18 @@ describe('Page', function() {
|
||||
});
|
||||
});
|
||||
|
||||
describe('Path.xpath', function() {
|
||||
it('should query existing element', async({page, server}) => {
|
||||
await page.setContent('<section>test</section>');
|
||||
const element = await page.xpath('/html/body/section');
|
||||
expect(element).toBeTruthy();
|
||||
});
|
||||
it('should return null for non-existing element', async({page, server}) => {
|
||||
const element = await page.xpath('/html/body/non-existing-element');
|
||||
expect(element).toBe(null);
|
||||
});
|
||||
});
|
||||
|
||||
describe('ElementHandle.boundingBox', function() {
|
||||
it('should work', async({page, server}) => {
|
||||
await page.setViewport({width: 500, height: 500});
|
||||
@ -1910,6 +1922,26 @@ describe('Page', function() {
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
describe('ElementHandle.xpath', function() {
|
||||
it('should query existing element', async({page, server}) => {
|
||||
await page.goto(server.PREFIX + '/playground.html');
|
||||
await page.setContent('<html><body><div class="second"><div class="inner">A</div></div></body></html>');
|
||||
const html = await page.$('html');
|
||||
const second = await html.xpath(`./body/div[contains(@class, 'second')]`);
|
||||
const inner = await second.xpath(`./div[contains(@class, 'inner')]`);
|
||||
const content = await page.evaluate(e => e.textContent, inner);
|
||||
expect(content).toBe('A');
|
||||
});
|
||||
|
||||
it('should return null for non-existing element', async({page, server}) => {
|
||||
await page.setContent('<html><body><div class="second"><div class="inner">B</div></div></body></html>');
|
||||
const html = await page.$('html');
|
||||
const second = await html.xpath(`/div[contains(@class, 'third')]`);
|
||||
expect(second).toBe(null);
|
||||
});
|
||||
});
|
||||
|
||||
describe('input', function() {
|
||||
it('should click the button', async({page, server}) => {
|
||||
await page.goto(server.PREFIX + '/input/button.html');
|
||||
|
Loading…
Reference in New Issue
Block a user