puppeteer/src/node/BrowserFetcher.ts

702 lines
19 KiB
TypeScript
Raw Normal View History

/**
* Copyright 2017 Google Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import * as os from 'os';
import * as fs from 'fs';
import * as path from 'path';
import * as util from 'util';
import * as childProcess from 'child_process';
import * as https from 'https';
import * as http from 'http';
import { Product } from '../common/Product.js';
import extractZip from 'extract-zip';
import { debug } from '../common/Debug.js';
import { promisify } from 'util';
import removeRecursive from 'rimraf';
import * as URL from 'url';
import createHttpsProxyAgent, {
HttpsProxyAgent,
HttpsProxyAgentOptions,
} from 'https-proxy-agent';
2020-05-07 10:54:55 +00:00
import { getProxyForUrl } from 'proxy-from-env';
import { assert } from '../common/assert.js';
2022-05-09 11:17:24 +00:00
import tar from 'tar-fs';
import bzip from 'unbzip2-stream';
const { PUPPETEER_EXPERIMENTAL_CHROMIUM_MAC_ARM } = process.env;
const debugFetcher = debug('puppeteer:fetcher');
2022-05-31 14:34:16 +00:00
const downloadURLs: Record<Product, Partial<Record<Platform, string>>> = {
chrome: {
linux: '%s/chromium-browser-snapshots/Linux_x64/%d/%s.zip',
mac: '%s/chromium-browser-snapshots/Mac/%d/%s.zip',
mac_arm: '%s/chromium-browser-snapshots/Mac_Arm/%d/%s.zip',
win32: '%s/chromium-browser-snapshots/Win/%d/%s.zip',
win64: '%s/chromium-browser-snapshots/Win_x64/%d/%s.zip',
},
firefox: {
linux: '%s/firefox-%s.en-US.%s-x86_64.tar.bz2',
mac: '%s/firefox-%s.en-US.%s.dmg',
win32: '%s/firefox-%s.en-US.%s.zip',
win64: '%s/firefox-%s.en-US.%s.zip',
},
2022-05-31 14:34:16 +00:00
};
const browserConfig = {
chrome: {
host: 'https://storage.googleapis.com',
destination: '.local-chromium',
},
firefox: {
host: 'https://archive.mozilla.org/pub/firefox/nightly/latest-mozilla-central',
destination: '.local-firefox',
2020-05-07 10:54:55 +00:00
},
} as const;
/**
* Supported platforms.
* @public
*/
export type Platform = 'linux' | 'mac' | 'mac_arm' | 'win32' | 'win64';
2020-05-07 10:54:55 +00:00
function archiveName(
product: Product,
platform: Platform,
revision: string
): string {
2022-05-31 14:34:16 +00:00
switch (product) {
case 'chrome':
switch (platform) {
case 'linux':
return 'chrome-linux';
case 'mac_arm':
case 'mac':
return 'chrome-mac';
case 'win32':
case 'win64':
// Windows archive name changed at r591479.
return parseInt(revision, 10) > 591479
? 'chrome-win'
: 'chrome-win32';
}
case 'firefox':
return platform;
}
}
/**
* @internal
*/
2022-06-13 09:16:25 +00:00
function _downloadURL(
2020-05-07 10:54:55 +00:00
product: Product,
platform: Platform,
host: string,
revision: string
): string {
const url = util.format(
downloadURLs[product][platform],
host,
revision,
archiveName(product, platform, revision)
);
return url;
}
function handleArm64(): void {
2022-06-13 09:16:25 +00:00
let exists = fs.existsSync('/usr/bin/chromium-browser');
if (exists) {
return;
}
exists = fs.existsSync('/usr/bin/chromium');
if (exists) {
return;
}
console.error(
'The chromium binary is not available for arm64.' +
'\nIf you are on Ubuntu, you can install with: ' +
'\n\n sudo apt install chromium\n' +
'\n\n sudo apt install chromium-browser\n'
);
throw new Error();
}
2022-06-13 09:16:25 +00:00
const readdirAsync = promisify(fs.readdir.bind(fs));
const mkdirAsync = promisify(fs.mkdir.bind(fs));
const unlinkAsync = promisify(fs.unlink.bind(fs));
const chmodAsync = promisify(fs.chmod.bind(fs));
function existsAsync(filePath: string): Promise<boolean> {
2020-05-07 10:54:55 +00:00
return new Promise((resolve) => {
fs.access(filePath, (err) => {
return resolve(!err);
});
});
}
/**
* @public
*/
export interface BrowserFetcherOptions {
platform?: Platform;
product?: string;
path?: string;
host?: string;
}
/**
* @public
*/
export interface BrowserFetcherRevisionInfo {
folderPath: string;
executablePath: string;
url: string;
local: boolean;
revision: string;
product: string;
}
/**
* BrowserFetcher can download and manage different versions of Chromium and Firefox.
*
* @remarks
* BrowserFetcher operates on revision strings that specify a precise version of Chromium, e.g. `"533271"`. Revision strings can be obtained from {@link http://omahaproxy.appspot.com/ | omahaproxy.appspot.com}.
* In the Firefox case, BrowserFetcher downloads Firefox Nightly and
* operates on version numbers such as `"75"`.
*
* @example
* An example of using BrowserFetcher to download a specific version of Chromium
* and running Puppeteer against it:
*
* ```js
* const browserFetcher = puppeteer.createBrowserFetcher();
* const revisionInfo = await browserFetcher.download('533271');
* const browser = await puppeteer.launch({executablePath: revisionInfo.executablePath})
* ```
*
* **NOTE** BrowserFetcher is not designed to work concurrently with other
* instances of BrowserFetcher that share the same downloads directory.
*
* @public
*/
export class BrowserFetcher {
2022-06-13 09:16:25 +00:00
#product: Product;
#downloadsFolder: string;
#downloadHost: string;
#platform: Platform;
/**
* @internal
*/
constructor(projectRoot: string, options: BrowserFetcherOptions = {}) {
2022-06-13 09:16:25 +00:00
this.#product = (options.product || 'chrome').toLowerCase() as Product;
2020-05-07 10:54:55 +00:00
assert(
2022-06-13 09:16:25 +00:00
this.#product === 'chrome' || this.#product === 'firefox',
2020-05-07 10:54:55 +00:00
`Unknown product: "${options.product}"`
);
2022-06-13 09:16:25 +00:00
this.#downloadsFolder =
2020-05-07 10:54:55 +00:00
options.path ||
2022-06-13 09:16:25 +00:00
path.join(projectRoot, browserConfig[this.#product].destination);
this.#downloadHost = options.host || browserConfig[this.#product].host;
2022-05-31 14:34:16 +00:00
if (options.platform) {
2022-06-13 09:16:25 +00:00
this.#platform = options.platform;
2022-05-31 14:34:16 +00:00
} else {
const platform = os.platform();
switch (platform) {
case 'darwin':
2022-06-13 09:16:25 +00:00
switch (this.#product) {
2022-05-31 14:34:16 +00:00
case 'chrome':
2022-06-13 09:16:25 +00:00
this.#platform =
2022-05-31 14:34:16 +00:00
os.arch() === 'arm64' && PUPPETEER_EXPERIMENTAL_CHROMIUM_MAC_ARM
? 'mac_arm'
: 'mac';
break;
case 'firefox':
2022-06-13 09:16:25 +00:00
this.#platform = 'mac';
2022-05-31 14:34:16 +00:00
break;
}
break;
case 'linux':
2022-06-13 09:16:25 +00:00
this.#platform = 'linux';
2022-05-31 14:34:16 +00:00
break;
case 'win32':
2022-06-13 09:16:25 +00:00
this.#platform = os.arch() === 'x64' ? 'win64' : 'win32';
2022-05-31 14:34:16 +00:00
return;
default:
assert(false, 'Unsupported platform: ' + platform);
}
}
2020-05-07 10:54:55 +00:00
assert(
2022-06-13 09:16:25 +00:00
downloadURLs[this.#product][this.#platform],
'Unsupported platform: ' + this.#platform
2020-05-07 10:54:55 +00:00
);
}
/**
* @returns Returns the current `Platform`, which is one of `mac`, `linux`,
* `win32` or `win64`.
*/
platform(): Platform {
2022-06-13 09:16:25 +00:00
return this.#platform;
}
/**
* @returns Returns the current `Product`, which is one of `chrome` or
* `firefox`.
*/
product(): Product {
2022-06-13 09:16:25 +00:00
return this.#product;
}
/**
* @returns The download host being used.
*/
host(): string {
2022-06-13 09:16:25 +00:00
return this.#downloadHost;
}
/**
* Initiates a HEAD request to check if the revision is available.
* @remarks
* This method is affected by the current `product`.
* @param revision - The revision to check availability for.
* @returns A promise that resolves to `true` if the revision could be downloaded
* from the host.
*/
canDownload(revision: string): Promise<boolean> {
2022-06-13 09:16:25 +00:00
const url = _downloadURL(
this.#product,
this.#platform,
this.#downloadHost,
2020-05-07 10:54:55 +00:00
revision
);
return new Promise((resolve) => {
const request = httpRequest(
url,
'HEAD',
(response) => {
resolve(response.statusCode === 200);
},
false
);
2020-05-07 10:54:55 +00:00
request.on('error', (error) => {
console.error(error);
resolve(false);
});
2017-06-21 20:51:06 +00:00
});
}
2017-06-21 20:51:06 +00:00
/**
* Initiates a GET request to download the revision from the host.
* @remarks
* This method is affected by the current `product`.
* @param revision - The revision to download.
* @param progressCallback - A function that will be called with two arguments:
* How many bytes have been downloaded and the total number of bytes of the download.
* @returns A promise with revision information when the revision is downloaded
* and extracted.
*/
2020-05-07 10:54:55 +00:00
async download(
revision: string,
progressCallback: (x: number, y: number) => void = (): void => {}
2022-05-31 14:34:16 +00:00
): Promise<BrowserFetcherRevisionInfo | undefined> {
2022-06-13 09:16:25 +00:00
const url = _downloadURL(
this.#product,
this.#platform,
this.#downloadHost,
2020-05-07 10:54:55 +00:00
revision
);
const fileName = url.split('/').pop();
2022-05-31 14:34:16 +00:00
assert(fileName, `A malformed download URL was found: ${url}.`);
2022-06-13 09:16:25 +00:00
const archivePath = path.join(this.#downloadsFolder, fileName);
const outputPath = this.#getFolderPath(revision);
2022-06-14 11:55:35 +00:00
if (await existsAsync(outputPath)) {
return this.revisionInfo(revision);
}
if (!(await existsAsync(this.#downloadsFolder))) {
2022-06-13 09:16:25 +00:00
await mkdirAsync(this.#downloadsFolder);
2022-06-14 11:55:35 +00:00
}
// Use system Chromium builds on Linux ARM devices
if (os.platform() !== 'darwin' && os.arch() === 'arm64') {
handleArm64();
return;
}
try {
2022-06-13 09:16:25 +00:00
await _downloadFile(url, archivePath, progressCallback);
await install(archivePath, outputPath);
} finally {
2022-06-14 11:55:35 +00:00
if (await existsAsync(archivePath)) {
await unlinkAsync(archivePath);
}
}
const revisionInfo = this.revisionInfo(revision);
2022-06-14 11:55:35 +00:00
if (revisionInfo) {
await chmodAsync(revisionInfo.executablePath, 0o755);
}
return revisionInfo;
}
/**
* @remarks
* This method is affected by the current `product`.
* @returns A promise with a list of all revision strings (for the current `product`)
* available locally on disk.
*/
async localRevisions(): Promise<string[]> {
2022-06-14 11:55:35 +00:00
if (!(await existsAsync(this.#downloadsFolder))) {
return [];
}
2022-06-13 09:16:25 +00:00
const fileNames = await readdirAsync(this.#downloadsFolder);
2020-05-07 10:54:55 +00:00
return fileNames
.map((fileName) => {
return parseFolderPath(this.#product, fileName);
})
2022-05-31 14:34:16 +00:00
.filter(
(
entry
): entry is { product: string; platform: string; revision: string } => {
return (entry && entry.platform === this.#platform) ?? false;
}
2022-05-31 14:34:16 +00:00
)
.map((entry) => {
return entry.revision;
});
}
/**
* @remarks
* This method is affected by the current `product`.
* @param revision - A revision to remove for the current `product`.
* @returns A promise that resolves when the revision has been removes or
* throws if the revision has not been downloaded.
*/
async remove(revision: string): Promise<void> {
2022-06-13 09:16:25 +00:00
const folderPath = this.#getFolderPath(revision);
2020-05-07 10:54:55 +00:00
assert(
await existsAsync(folderPath),
`Failed to remove: revision ${revision} is not downloaded`
);
await new Promise((fulfill) => {
return removeRecursive(folderPath, fulfill);
});
}
/**
* @param revision - The revision to get info for.
* @returns The revision info for the given revision.
*/
revisionInfo(revision: string): BrowserFetcherRevisionInfo {
2022-06-13 09:16:25 +00:00
const folderPath = this.#getFolderPath(revision);
let executablePath = '';
2022-06-13 09:16:25 +00:00
if (this.#product === 'chrome') {
2022-06-14 11:55:35 +00:00
if (this.#platform === 'mac' || this.#platform === 'mac_arm') {
2020-05-07 10:54:55 +00:00
executablePath = path.join(
folderPath,
2022-06-13 09:16:25 +00:00
archiveName(this.#product, this.#platform, revision),
2020-05-07 10:54:55 +00:00
'Chromium.app',
'Contents',
'MacOS',
'Chromium'
);
2022-06-14 11:55:35 +00:00
} else if (this.#platform === 'linux') {
2020-05-07 10:54:55 +00:00
executablePath = path.join(
folderPath,
2022-06-13 09:16:25 +00:00
archiveName(this.#product, this.#platform, revision),
2020-05-07 10:54:55 +00:00
'chrome'
);
2022-06-14 11:55:35 +00:00
} else if (this.#platform === 'win32' || this.#platform === 'win64') {
2020-05-07 10:54:55 +00:00
executablePath = path.join(
folderPath,
2022-06-13 09:16:25 +00:00
archiveName(this.#product, this.#platform, revision),
2020-05-07 10:54:55 +00:00
'chrome.exe'
);
2022-06-14 11:55:35 +00:00
} else {
throw new Error('Unsupported platform: ' + this.#platform);
}
2022-06-13 09:16:25 +00:00
} else if (this.#product === 'firefox') {
2022-06-14 11:55:35 +00:00
if (this.#platform === 'mac' || this.#platform === 'mac_arm') {
2020-05-07 10:54:55 +00:00
executablePath = path.join(
folderPath,
'Firefox Nightly.app',
'Contents',
'MacOS',
'firefox'
);
2022-06-14 11:55:35 +00:00
} else if (this.#platform === 'linux') {
executablePath = path.join(folderPath, 'firefox', 'firefox');
2022-06-14 11:55:35 +00:00
} else if (this.#platform === 'win32' || this.#platform === 'win64') {
executablePath = path.join(folderPath, 'firefox', 'firefox.exe');
2022-06-14 11:55:35 +00:00
} else {
throw new Error('Unsupported platform: ' + this.#platform);
}
} else {
throw new Error('Unsupported product: ' + this.#product);
}
2022-06-13 09:16:25 +00:00
const url = _downloadURL(
this.#product,
this.#platform,
this.#downloadHost,
2020-05-07 10:54:55 +00:00
revision
);
const local = fs.existsSync(folderPath);
2020-05-07 10:54:55 +00:00
debugFetcher({
revision,
executablePath,
folderPath,
local,
url,
2022-06-13 09:16:25 +00:00
product: this.#product,
2020-05-07 10:54:55 +00:00
});
return {
revision,
executablePath,
folderPath,
local,
url,
2022-06-13 09:16:25 +00:00
product: this.#product,
2020-05-07 10:54:55 +00:00
};
}
2022-06-13 09:16:25 +00:00
#getFolderPath(revision: string): string {
return path.resolve(this.#downloadsFolder, `${this.#platform}-${revision}`);
}
}
2020-05-07 10:54:55 +00:00
function parseFolderPath(
product: Product,
folderPath: string
2022-05-31 14:34:16 +00:00
): { product: string; platform: string; revision: string } | undefined {
const name = path.basename(folderPath);
const splits = name.split('-');
2022-06-14 11:55:35 +00:00
if (splits.length !== 2) {
return;
}
const [platform, revision] = splits;
2022-06-14 11:55:35 +00:00
if (!revision || !platform || !(platform in downloadURLs[product])) {
return;
}
2020-05-07 10:54:55 +00:00
return { product, platform, revision };
}
/**
* @internal
*/
2022-06-13 09:16:25 +00:00
function _downloadFile(
2020-05-07 10:54:55 +00:00
url: string,
destinationPath: string,
2022-05-31 14:34:16 +00:00
progressCallback?: (x: number, y: number) => void
2020-05-07 10:54:55 +00:00
): Promise<void> {
debugFetcher(`Downloading binary from ${url}`);
2022-05-31 14:34:16 +00:00
let fulfill: (value: void | PromiseLike<void>) => void;
let reject: (err: Error) => void;
2020-05-07 10:54:55 +00:00
const promise = new Promise<void>((x, y) => {
fulfill = x;
reject = y;
});
2022-05-31 14:34:16 +00:00
let downloadedBytes = 0;
let totalBytes = 0;
2020-05-07 10:54:55 +00:00
const request = httpRequest(url, 'GET', (response) => {
2017-06-21 20:51:06 +00:00
if (response.statusCode !== 200) {
2020-05-07 10:54:55 +00:00
const error = new Error(
`Download failed: server returned code ${response.statusCode}. URL: ${url}`
);
2017-06-21 20:51:06 +00:00
// consume response data to free up memory
response.resume();
reject(error);
return;
}
const file = fs.createWriteStream(destinationPath);
file.on('finish', () => {
return fulfill();
});
file.on('error', (error) => {
return reject(error);
});
2017-06-21 20:51:06 +00:00
response.pipe(file);
2022-05-31 14:34:16 +00:00
totalBytes = parseInt(response.headers['content-length']!, 10);
2022-06-14 11:55:35 +00:00
if (progressCallback) {
response.on('data', onData);
}
2017-06-21 20:51:06 +00:00
});
request.on('error', (error) => {
return reject(error);
});
2017-06-21 20:51:06 +00:00
return promise;
function onData(chunk: string): void {
downloadedBytes += chunk.length;
2022-05-31 14:34:16 +00:00
progressCallback!(downloadedBytes, totalBytes);
2017-06-21 20:51:06 +00:00
}
}
function install(archivePath: string, folderPath: string): Promise<unknown> {
debugFetcher(`Installing ${archivePath} to ${folderPath}`);
2022-06-14 11:55:35 +00:00
if (archivePath.endsWith('.zip')) {
2020-05-07 10:54:55 +00:00
return extractZip(archivePath, { dir: folderPath });
2022-06-14 11:55:35 +00:00
} else if (archivePath.endsWith('.tar.bz2')) {
2022-06-13 09:16:25 +00:00
return _extractTar(archivePath, folderPath);
2022-06-14 11:55:35 +00:00
} else if (archivePath.endsWith('.dmg')) {
return mkdirAsync(folderPath).then(() => {
return _installDMG(archivePath, folderPath);
});
2022-06-14 11:55:35 +00:00
} else {
throw new Error(`Unsupported archive format: ${archivePath}`);
}
}
/**
* @internal
*/
2022-06-13 09:16:25 +00:00
function _extractTar(tarPath: string, folderPath: string): Promise<unknown> {
return new Promise((fulfill, reject) => {
const tarStream = tar.extract(folderPath);
tarStream.on('error', reject);
tarStream.on('finish', fulfill);
const readStream = fs.createReadStream(tarPath);
readStream.pipe(bzip()).pipe(tarStream);
});
}
/**
* @internal
*/
2022-06-13 09:16:25 +00:00
function _installDMG(dmgPath: string, folderPath: string): Promise<void> {
2022-05-31 14:34:16 +00:00
let mountPath: string | undefined;
2022-05-31 14:34:16 +00:00
return new Promise<void>((fulfill, reject): void => {
const mountCommand = `hdiutil attach -nobrowse -noautoopen "${dmgPath}"`;
childProcess.exec(mountCommand, (err, stdout) => {
2022-06-14 11:55:35 +00:00
if (err) {
return reject(err);
}
const volumes = stdout.match(/\/Volumes\/(.*)/m);
2022-06-14 11:55:35 +00:00
if (!volumes) {
return reject(new Error(`Could not find volume path in ${stdout}`));
2022-06-14 11:55:35 +00:00
}
2022-05-31 14:34:16 +00:00
mountPath = volumes[0]!;
2020-05-07 10:54:55 +00:00
readdirAsync(mountPath)
.then((fileNames) => {
const appName = fileNames.find((item) => {
return typeof item === 'string' && item.endsWith('.app');
});
2022-06-14 11:55:35 +00:00
if (!appName) {
2020-05-07 10:54:55 +00:00
return reject(new Error(`Cannot find app in ${mountPath}`));
2022-06-14 11:55:35 +00:00
}
2022-05-31 14:34:16 +00:00
const copyPath = path.join(mountPath!, appName);
2020-05-07 10:54:55 +00:00
debugFetcher(`Copying ${copyPath} to ${folderPath}`);
childProcess.exec(`cp -R "${copyPath}" "${folderPath}"`, (err) => {
2022-06-14 11:55:35 +00:00
if (err) {
reject(err);
} else {
fulfill();
}
2020-05-07 10:54:55 +00:00
});
})
.catch(reject);
});
2022-05-31 14:34:16 +00:00
})
2020-05-07 10:54:55 +00:00
.catch((error) => {
console.error(error);
})
2022-05-31 14:34:16 +00:00
.finally((): void => {
2022-06-14 11:55:35 +00:00
if (!mountPath) {
return;
}
2022-05-31 14:34:16 +00:00
const unmountCommand = `hdiutil detach "${mountPath}" -quiet`;
debugFetcher(`Unmounting ${mountPath}`);
childProcess.exec(unmountCommand, (err) => {
2022-06-14 11:55:35 +00:00
if (err) {
console.error(`Error unmounting dmg: ${err}`);
}
2022-05-31 14:34:16 +00:00
});
});
}
2020-05-07 10:54:55 +00:00
function httpRequest(
url: string,
method: string,
response: (x: http.IncomingMessage) => void,
keepAlive = true
2020-05-07 10:54:55 +00:00
): http.ClientRequest {
const urlParsed = URL.parse(url);
2020-05-07 10:54:55 +00:00
type Options = Partial<URL.UrlWithStringQuery> & {
method?: string;
agent?: HttpsProxyAgent;
2020-05-07 10:54:55 +00:00
rejectUnauthorized?: boolean;
headers?: http.OutgoingHttpHeaders | undefined;
2020-05-07 10:54:55 +00:00
};
let options: Options = {
...urlParsed,
method,
headers: keepAlive
? {
Connection: 'keep-alive',
}
: undefined,
2020-05-07 10:54:55 +00:00
};
const proxyURL = getProxyForUrl(url);
if (proxyURL) {
if (url.startsWith('http:')) {
const proxy = URL.parse(proxyURL);
options = {
path: options.href,
host: proxy.hostname,
port: proxy.port,
};
} else {
const parsedProxyURL = URL.parse(proxyURL);
2020-05-07 10:54:55 +00:00
const proxyOptions = {
...parsedProxyURL,
secureProxy: parsedProxyURL.protocol === 'https:',
} as HttpsProxyAgentOptions;
2020-05-07 10:54:55 +00:00
options.agent = createHttpsProxyAgent(proxyOptions);
2020-05-07 10:54:55 +00:00
options.rejectUnauthorized = false;
}
}
const requestCallback = (res: http.IncomingMessage): void => {
2022-05-31 14:34:16 +00:00
if (
res.statusCode &&
res.statusCode >= 300 &&
res.statusCode < 400 &&
res.headers.location
2022-06-14 11:55:35 +00:00
) {
2020-05-07 10:54:55 +00:00
httpRequest(res.headers.location, method, response);
2022-06-14 11:55:35 +00:00
} else {
response(res);
}
2020-05-07 10:54:55 +00:00
};
const request =
options.protocol === 'https:'
? https.request(options, requestCallback)
: http.request(options, requestCallback);
request.end();
return request;
}