puppeteer/packages/puppeteer-core/src/node/BrowserFetcher.ts

725 lines
19 KiB
TypeScript
Raw Normal View History

/**
* Copyright 2017 Google Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import {exec as execChildProcess} from 'child_process';
import extractZip from 'extract-zip';
import {createReadStream, createWriteStream, existsSync, readdirSync} from 'fs';
import {chmod, mkdir, readdir, unlink} from 'fs/promises';
import * as http from 'http';
import * as https from 'https';
import createHttpsProxyAgent, {
HttpsProxyAgent,
HttpsProxyAgentOptions,
} from 'https-proxy-agent';
import * as os from 'os';
import * as path from 'path';
import {getProxyForUrl} from 'proxy-from-env';
import removeRecursive from 'rimraf';
2022-05-09 11:17:24 +00:00
import tar from 'tar-fs';
import bzip from 'unbzip2-stream';
import * as URL from 'url';
import * as util from 'util';
import {promisify} from 'util';
import {debug} from '../common/Debug.js';
import {Product} from '../common/Product.js';
import {assert} from '../util/assert.js';
2022-05-09 11:17:24 +00:00
const debugFetcher = debug('puppeteer:fetcher');
2022-05-31 14:34:16 +00:00
const downloadURLs: Record<Product, Partial<Record<Platform, string>>> = {
chrome: {
linux: '%s/chromium-browser-snapshots/Linux_x64/%d/%s.zip',
mac: '%s/chromium-browser-snapshots/Mac/%d/%s.zip',
mac_arm: '%s/chromium-browser-snapshots/Mac_Arm/%d/%s.zip',
win32: '%s/chromium-browser-snapshots/Win/%d/%s.zip',
win64: '%s/chromium-browser-snapshots/Win_x64/%d/%s.zip',
},
firefox: {
linux: '%s/firefox-%s.en-US.%s-x86_64.tar.bz2',
mac: '%s/firefox-%s.en-US.%s.dmg',
win32: '%s/firefox-%s.en-US.%s.zip',
win64: '%s/firefox-%s.en-US.%s.zip',
},
2022-05-31 14:34:16 +00:00
};
const browserConfig = {
chrome: {
host: 'https://storage.googleapis.com',
},
firefox: {
host: 'https://archive.mozilla.org/pub/firefox/nightly/latest-mozilla-central',
2020-05-07 10:54:55 +00:00
},
} as const;
const exec = promisify(execChildProcess);
/**
* Supported platforms.
2022-06-27 07:24:23 +00:00
*
* @public
*/
export type Platform = 'linux' | 'mac' | 'mac_arm' | 'win32' | 'win64';
2020-05-07 10:54:55 +00:00
function archiveName(
product: Product,
platform: Platform,
revision: string
): string {
2022-05-31 14:34:16 +00:00
switch (product) {
case 'chrome':
switch (platform) {
case 'linux':
return 'chrome-linux';
case 'mac_arm':
case 'mac':
return 'chrome-mac';
case 'win32':
case 'win64':
// Windows archive name changed at r591479.
return parseInt(revision, 10) > 591479
? 'chrome-win'
: 'chrome-win32';
}
case 'firefox':
return platform;
}
}
2022-06-27 07:24:23 +00:00
function downloadURL(
2020-05-07 10:54:55 +00:00
product: Product,
platform: Platform,
host: string,
revision: string
): string {
const url = util.format(
downloadURLs[product][platform],
host,
revision,
archiveName(product, platform, revision)
);
return url;
}
function handleArm64(): void {
let exists = existsSync('/usr/bin/chromium-browser');
2022-06-13 09:16:25 +00:00
if (exists) {
return;
}
exists = existsSync('/usr/bin/chromium');
2022-06-13 09:16:25 +00:00
if (exists) {
return;
}
console.error(
'The chromium binary is not available for arm64.' +
'\nIf you are on Ubuntu, you can install with: ' +
'\n\n sudo apt install chromium\n' +
'\n\n sudo apt install chromium-browser\n'
);
throw new Error();
}
2022-06-13 09:16:25 +00:00
/**
* @public
*/
export interface BrowserFetcherOptions {
/**
* Determines the path to download browsers to.
*/
path: string;
/**
* Determines which platform the browser will be suited for.
*
* @defaultValue Auto-detected.
*/
platform?: Platform;
/**
* Determines which product the {@link BrowserFetcher} is for.
*
* @defaultValue `"chrome"`.
*/
product?: 'chrome' | 'firefox';
/**
* Determines the host that will be used for downloading.
*
* @defaultValue Either
*
* - https://storage.googleapis.com or
* - https://archive.mozilla.org/pub/firefox/nightly/latest-mozilla-central
*
*/
host?: string;
/**
* Enables the use of the Chromium binary for macOS ARM.
*
* @experimental
*/
useMacOSARMBinary?: boolean;
}
/**
* @public
*/
export interface BrowserFetcherRevisionInfo {
folderPath: string;
executablePath: string;
url: string;
local: boolean;
revision: string;
product: string;
}
/**
* BrowserFetcher can download and manage different versions of Chromium and
* Firefox.
*
* @remarks
* BrowserFetcher operates on revision strings that specify a precise version of
* Chromium, e.g. `"533271"`. Revision strings can be obtained from
* {@link http://omahaproxy.appspot.com/ | omahaproxy.appspot.com}. For Firefox,
* BrowserFetcher downloads Firefox Nightly and operates on version numbers such
* as `"75"`.
*
* @remarks
* The default constructed fetcher will always be for Chromium unless otherwise
* specified.
*
* @remarks
* BrowserFetcher is not designed to work concurrently with other instances of
* BrowserFetcher that share the same downloads directory.
*
* @example
* An example of using BrowserFetcher to download a specific version of Chromium
* and running Puppeteer against it:
*
2022-07-01 11:52:39 +00:00
* ```ts
* const browserFetcher = new BrowserFetcher({path: 'path/to/download/folder'});
* const revisionInfo = await browserFetcher.download('533271');
* const browser = await puppeteer.launch({
* executablePath: revisionInfo.executablePath,
* });
* ```
*
* @public
*/
export class BrowserFetcher {
2022-06-13 09:16:25 +00:00
#product: Product;
#downloadPath: string;
2022-06-13 09:16:25 +00:00
#downloadHost: string;
#platform: Platform;
/**
* Constructs a browser fetcher for the given options.
*/
constructor(options: BrowserFetcherOptions) {
this.#product = options.product ?? 'chrome';
this.#downloadPath = options.path;
this.#downloadHost = options.host ?? browserConfig[this.#product].host;
2022-05-31 14:34:16 +00:00
if (options.platform) {
2022-06-13 09:16:25 +00:00
this.#platform = options.platform;
2022-05-31 14:34:16 +00:00
} else {
const platform = os.platform();
switch (platform) {
case 'darwin':
2022-06-13 09:16:25 +00:00
switch (this.#product) {
2022-05-31 14:34:16 +00:00
case 'chrome':
2022-06-13 09:16:25 +00:00
this.#platform =
os.arch() === 'arm64' && options.useMacOSARMBinary
2022-05-31 14:34:16 +00:00
? 'mac_arm'
: 'mac';
break;
case 'firefox':
2022-06-13 09:16:25 +00:00
this.#platform = 'mac';
2022-05-31 14:34:16 +00:00
break;
}
break;
case 'linux':
2022-06-13 09:16:25 +00:00
this.#platform = 'linux';
2022-05-31 14:34:16 +00:00
break;
case 'win32':
this.#platform =
os.arch() === 'x64' ||
// Windows 11 for ARM supports x64 emulation
(os.arch() === 'arm64' && isWindows11(os.release()))
? 'win64'
: 'win32';
2022-05-31 14:34:16 +00:00
return;
default:
assert(false, 'Unsupported platform: ' + platform);
}
}
2020-05-07 10:54:55 +00:00
assert(
2022-06-13 09:16:25 +00:00
downloadURLs[this.#product][this.#platform],
'Unsupported platform: ' + this.#platform
2020-05-07 10:54:55 +00:00
);
}
/**
* @returns Returns the current `Platform`, which is one of `mac`, `linux`,
* `win32` or `win64`.
*/
platform(): Platform {
2022-06-13 09:16:25 +00:00
return this.#platform;
}
/**
* @returns Returns the current `Product`, which is one of `chrome` or
* `firefox`.
*/
product(): Product {
2022-06-13 09:16:25 +00:00
return this.#product;
}
/**
* @returns The download host being used.
*/
host(): string {
2022-06-13 09:16:25 +00:00
return this.#downloadHost;
}
/**
* Initiates a HEAD request to check if the revision is available.
* @remarks
* This method is affected by the current `product`.
* @param revision - The revision to check availability for.
* @returns A promise that resolves to `true` if the revision could be downloaded
* from the host.
*/
canDownload(revision: string): Promise<boolean> {
2022-06-27 07:24:23 +00:00
const url = downloadURL(
2022-06-13 09:16:25 +00:00
this.#product,
this.#platform,
this.#downloadHost,
2020-05-07 10:54:55 +00:00
revision
);
return new Promise(resolve => {
const request = httpRequest(
url,
'HEAD',
response => {
resolve(response.statusCode === 200);
},
false
);
request.on('error', error => {
console.error(error);
resolve(false);
});
2017-06-21 20:51:06 +00:00
});
}
2017-06-21 20:51:06 +00:00
/**
* Initiates a GET request to download the revision from the host.
* @remarks
* This method is affected by the current `product`.
* @param revision - The revision to download.
* @param progressCallback - A function that will be called with two arguments:
* How many bytes have been downloaded and the total number of bytes of the download.
* @returns A promise with revision information when the revision is downloaded
* and extracted.
*/
2020-05-07 10:54:55 +00:00
async download(
revision: string,
progressCallback: (x: number, y: number) => void = (): void => {}
2022-05-31 14:34:16 +00:00
): Promise<BrowserFetcherRevisionInfo | undefined> {
2022-06-27 07:24:23 +00:00
const url = downloadURL(
2022-06-13 09:16:25 +00:00
this.#product,
this.#platform,
this.#downloadHost,
2020-05-07 10:54:55 +00:00
revision
);
const fileName = url.split('/').pop();
2022-05-31 14:34:16 +00:00
assert(fileName, `A malformed download URL was found: ${url}.`);
const archivePath = path.join(this.#downloadPath, fileName);
2022-06-13 09:16:25 +00:00
const outputPath = this.#getFolderPath(revision);
if (existsSync(outputPath)) {
2022-06-14 11:55:35 +00:00
return this.revisionInfo(revision);
}
if (!existsSync(this.#downloadPath)) {
await mkdir(this.#downloadPath, {recursive: true});
2022-06-14 11:55:35 +00:00
}
// Use system Chromium builds on Linux ARM devices
if (os.platform() === 'linux' && os.arch() === 'arm64') {
handleArm64();
return;
}
try {
2022-06-13 09:16:25 +00:00
await _downloadFile(url, archivePath, progressCallback);
await install(archivePath, outputPath);
} finally {
if (existsSync(archivePath)) {
await unlink(archivePath);
2022-06-14 11:55:35 +00:00
}
}
const revisionInfo = this.revisionInfo(revision);
2022-06-14 11:55:35 +00:00
if (revisionInfo) {
await chmod(revisionInfo.executablePath, 0o755);
2022-06-14 11:55:35 +00:00
}
return revisionInfo;
}
/**
* @remarks
* This method is affected by the current `product`.
* @returns A list of all revision strings (for the current `product`)
* available locally on disk.
*/
localRevisions(): string[] {
if (!existsSync(this.#downloadPath)) {
2022-06-14 11:55:35 +00:00
return [];
}
const fileNames = readdirSync(this.#downloadPath);
2020-05-07 10:54:55 +00:00
return fileNames
.map(fileName => {
return parseFolderPath(this.#product, fileName);
})
.filter((entry): entry is Exclude<typeof entry, undefined> => {
return (entry && entry.platform === this.#platform) ?? false;
})
.map(entry => {
return entry.revision;
});
}
/**
* @remarks
* This method is affected by the current `product`.
* @param revision - A revision to remove for the current `product`.
* @returns A promise that resolves when the revision has been removes or
* throws if the revision has not been downloaded.
*/
async remove(revision: string): Promise<void> {
2022-06-13 09:16:25 +00:00
const folderPath = this.#getFolderPath(revision);
2020-05-07 10:54:55 +00:00
assert(
existsSync(folderPath),
2020-05-07 10:54:55 +00:00
`Failed to remove: revision ${revision} is not downloaded`
);
await new Promise(fulfill => {
return removeRecursive(folderPath, fulfill);
});
}
/**
* @param revision - The revision to get info for.
* @returns The revision info for the given revision.
*/
revisionInfo(revision: string): BrowserFetcherRevisionInfo {
2022-06-13 09:16:25 +00:00
const folderPath = this.#getFolderPath(revision);
let executablePath = '';
switch (this.#product) {
case 'chrome':
switch (this.#platform) {
case 'mac':
case 'mac_arm':
executablePath = path.join(
folderPath,
archiveName(this.#product, this.#platform, revision),
'Chromium.app',
'Contents',
'MacOS',
'Chromium'
);
break;
case 'linux':
executablePath = path.join(
folderPath,
archiveName(this.#product, this.#platform, revision),
'chrome'
);
break;
case 'win32':
case 'win64':
executablePath = path.join(
folderPath,
archiveName(this.#product, this.#platform, revision),
'chrome.exe'
);
break;
}
break;
case 'firefox':
switch (this.#platform) {
case 'mac':
case 'mac_arm':
executablePath = path.join(
folderPath,
'Firefox Nightly.app',
'Contents',
'MacOS',
'firefox'
);
break;
case 'linux':
executablePath = path.join(folderPath, 'firefox', 'firefox');
break;
case 'win32':
case 'win64':
executablePath = path.join(folderPath, 'firefox', 'firefox.exe');
break;
}
2022-06-14 11:55:35 +00:00
}
2022-06-27 07:24:23 +00:00
const url = downloadURL(
2022-06-13 09:16:25 +00:00
this.#product,
this.#platform,
this.#downloadHost,
2020-05-07 10:54:55 +00:00
revision
);
const local = existsSync(folderPath);
2020-05-07 10:54:55 +00:00
debugFetcher({
revision,
executablePath,
folderPath,
local,
url,
2022-06-13 09:16:25 +00:00
product: this.#product,
2020-05-07 10:54:55 +00:00
});
return {
revision,
executablePath,
folderPath,
local,
url,
2022-06-13 09:16:25 +00:00
product: this.#product,
2020-05-07 10:54:55 +00:00
};
}
2022-06-13 09:16:25 +00:00
#getFolderPath(revision: string): string {
return path.resolve(this.#downloadPath, `${this.#platform}-${revision}`);
}
/**
* @internal
*/
getDownloadPath(): string {
return this.#downloadPath;
}
}
2020-05-07 10:54:55 +00:00
function parseFolderPath(
product: Product,
folderPath: string
): {product: string; platform: string; revision: string} | undefined {
const name = path.basename(folderPath);
const splits = name.split('-');
2022-06-14 11:55:35 +00:00
if (splits.length !== 2) {
return;
}
const [platform, revision] = splits;
2022-06-14 11:55:35 +00:00
if (!revision || !platform || !(platform in downloadURLs[product])) {
return;
}
return {product, platform, revision};
}
/**
* Windows 11 is identified by 10.0.22000 or greater
* @internal
*/
function isWindows11(version: string): boolean {
const parts = version.split('.');
if (parts.length > 2) {
const major = parseInt(parts[0] as string, 10);
const minor = parseInt(parts[1] as string, 10);
const patch = parseInt(parts[2] as string, 10);
return (
major > 10 ||
(major === 10 && minor > 0) ||
(major === 10 && minor === 0 && patch >= 22000)
);
}
return false;
}
/**
* @internal
*/
2022-06-13 09:16:25 +00:00
function _downloadFile(
2020-05-07 10:54:55 +00:00
url: string,
destinationPath: string,
2022-05-31 14:34:16 +00:00
progressCallback?: (x: number, y: number) => void
2020-05-07 10:54:55 +00:00
): Promise<void> {
debugFetcher(`Downloading binary from ${url}`);
2022-05-31 14:34:16 +00:00
let fulfill: (value: void | PromiseLike<void>) => void;
let reject: (err: Error) => void;
2020-05-07 10:54:55 +00:00
const promise = new Promise<void>((x, y) => {
fulfill = x;
reject = y;
});
2022-05-31 14:34:16 +00:00
let downloadedBytes = 0;
let totalBytes = 0;
const request = httpRequest(url, 'GET', response => {
2017-06-21 20:51:06 +00:00
if (response.statusCode !== 200) {
2020-05-07 10:54:55 +00:00
const error = new Error(
`Download failed: server returned code ${response.statusCode}. URL: ${url}`
);
2017-06-21 20:51:06 +00:00
// consume response data to free up memory
response.resume();
reject(error);
return;
}
const file = createWriteStream(destinationPath);
file.on('finish', () => {
return fulfill();
});
file.on('error', error => {
return reject(error);
});
2017-06-21 20:51:06 +00:00
response.pipe(file);
2022-05-31 14:34:16 +00:00
totalBytes = parseInt(response.headers['content-length']!, 10);
2022-06-14 11:55:35 +00:00
if (progressCallback) {
response.on('data', onData);
}
2017-06-21 20:51:06 +00:00
});
request.on('error', error => {
return reject(error);
});
2017-06-21 20:51:06 +00:00
return promise;
function onData(chunk: string): void {
downloadedBytes += chunk.length;
2022-05-31 14:34:16 +00:00
progressCallback!(downloadedBytes, totalBytes);
2017-06-21 20:51:06 +00:00
}
}
async function install(archivePath: string, folderPath: string): Promise<void> {
debugFetcher(`Installing ${archivePath} to ${folderPath}`);
2022-06-14 11:55:35 +00:00
if (archivePath.endsWith('.zip')) {
await extractZip(archivePath, {dir: folderPath});
2022-06-14 11:55:35 +00:00
} else if (archivePath.endsWith('.tar.bz2')) {
await extractTar(archivePath, folderPath);
2022-06-14 11:55:35 +00:00
} else if (archivePath.endsWith('.dmg')) {
await mkdir(folderPath);
await installDMG(archivePath, folderPath);
2022-06-14 11:55:35 +00:00
} else {
throw new Error(`Unsupported archive format: ${archivePath}`);
}
}
/**
* @internal
*/
function extractTar(tarPath: string, folderPath: string): Promise<void> {
return new Promise((fulfill, reject) => {
const tarStream = tar.extract(folderPath);
tarStream.on('error', reject);
tarStream.on('finish', fulfill);
const readStream = createReadStream(tarPath);
readStream.pipe(bzip()).pipe(tarStream);
});
}
/**
* @internal
*/
async function installDMG(dmgPath: string, folderPath: string): Promise<void> {
const {stdout} = await exec(
`hdiutil attach -nobrowse -noautoopen "${dmgPath}"`
);
const volumes = stdout.match(/\/Volumes\/(.*)/m);
if (!volumes) {
throw new Error(`Could not find volume path in ${stdout}`);
}
const mountPath = volumes[0]!;
try {
const fileNames = await readdir(mountPath);
const appName = fileNames.find(item => {
return typeof item === 'string' && item.endsWith('.app');
2022-05-31 14:34:16 +00:00
});
if (!appName) {
throw new Error(`Cannot find app in ${mountPath}`);
}
const mountedPath = path.join(mountPath!, appName);
debugFetcher(`Copying ${mountedPath} to ${folderPath}`);
await exec(`cp -R "${mountedPath}" "${folderPath}"`);
} finally {
debugFetcher(`Unmounting ${mountPath}`);
await exec(`hdiutil detach "${mountPath}" -quiet`);
}
}
2020-05-07 10:54:55 +00:00
function httpRequest(
url: string,
method: string,
response: (x: http.IncomingMessage) => void,
keepAlive = true
2020-05-07 10:54:55 +00:00
): http.ClientRequest {
const urlParsed = URL.parse(url);
2020-05-07 10:54:55 +00:00
type Options = Partial<URL.UrlWithStringQuery> & {
method?: string;
agent?: HttpsProxyAgent;
2020-05-07 10:54:55 +00:00
rejectUnauthorized?: boolean;
headers?: http.OutgoingHttpHeaders | undefined;
2020-05-07 10:54:55 +00:00
};
let options: Options = {
...urlParsed,
method,
headers: keepAlive ? {Connection: 'keep-alive'} : undefined,
2020-05-07 10:54:55 +00:00
};
const proxyURL = getProxyForUrl(url);
if (proxyURL) {
if (url.startsWith('http:')) {
const proxy = URL.parse(proxyURL);
options = {
path: options.href,
host: proxy.hostname,
port: proxy.port,
};
} else {
const parsedProxyURL = URL.parse(proxyURL);
2020-05-07 10:54:55 +00:00
const proxyOptions = {
...parsedProxyURL,
secureProxy: parsedProxyURL.protocol === 'https:',
} as HttpsProxyAgentOptions;
2020-05-07 10:54:55 +00:00
options.agent = createHttpsProxyAgent(proxyOptions);
2020-05-07 10:54:55 +00:00
options.rejectUnauthorized = false;
}
}
const requestCallback = (res: http.IncomingMessage): void => {
2022-05-31 14:34:16 +00:00
if (
res.statusCode &&
res.statusCode >= 300 &&
res.statusCode < 400 &&
res.headers.location
2022-06-14 11:55:35 +00:00
) {
2020-05-07 10:54:55 +00:00
httpRequest(res.headers.location, method, response);
2022-06-14 11:55:35 +00:00
} else {
response(res);
}
2020-05-07 10:54:55 +00:00
};
const request =
options.protocol === 'https:'
? https.request(options, requestCallback)
: http.request(options, requestCallback);
request.end();
return request;
}