diff --git a/.gitignore b/.gitignore index f887a09..ebf58ad 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ /.spago .log .purs-repl +.env diff --git a/bun.lockb b/bun.lockb index cded8fb..d8c9f5d 100755 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/package.json b/package.json index 8a8eccd..976e491 100644 --- a/package.json +++ b/package.json @@ -15,9 +15,14 @@ "typescript": "^5.0.0" }, "dependencies": { + "@cliqz/adblocker-puppeteer": "1.23.8", "callsites": "^4.1.0", "puppeteer": "^21.3.5", "puppeteer-core": "^21.3.5", - "puppeteer-extra": "^3.3.6" + "puppeteer-extra": "^3.3.6", + "puppeteer-extra-plugin-adblocker": "^2.13.6", + "puppeteer-extra-plugin-anonymize-ua": "^2.4.6", + "puppeteer-extra-plugin-recaptcha": "^3.6.8", + "puppeteer-extra-plugin-stealth": "^2.11.2" } } diff --git a/packages.dhall b/packages.dhall deleted file mode 100644 index 2fafb1f..0000000 --- a/packages.dhall +++ /dev/null @@ -1,105 +0,0 @@ -{- -Welcome to your new Dhall package-set! - -Below are instructions for how to edit this file for most use -cases, so that you don't need to know Dhall to use it. - -## Use Cases - -Most will want to do one or both of these options: -1. Override/Patch a package's dependency -2. Add a package not already in the default package set - -This file will continue to work whether you use one or both options. -Instructions for each option are explained below. - -### Overriding/Patching a package - -Purpose: -- Change a package's dependency to a newer/older release than the - default package set's release -- Use your own modified version of some dependency that may - include new API, changed API, removed API by - using your custom git repo of the library rather than - the package set's repo - -Syntax: -where `entityName` is one of the following: -- dependencies -- repo -- version -------------------------------- -let upstream = -- -in upstream - with packageName.entityName = "new value" -------------------------------- - -Example: -------------------------------- -let upstream = -- -in upstream - with halogen.version = "master" - with halogen.repo = "https://example.com/path/to/git/repo.git" - - with halogen-vdom.version = "v4.0.0" - with halogen-vdom.dependencies = [ "extra-dependency" ] # halogen-vdom.dependencies -------------------------------- - -### Additions - -Purpose: -- Add packages that aren't already included in the default package set - -Syntax: -where `` is: -- a tag (i.e. "v4.0.0") -- a branch (i.e. "master") -- commit hash (i.e. "701f3e44aafb1a6459281714858fadf2c4c2a977") -------------------------------- -let upstream = -- -in upstream - with new-package-name = - { dependencies = - [ "dependency1" - , "dependency2" - ] - , repo = - "https://example.com/path/to/git/repo.git" - , version = - "" - } -------------------------------- - -Example: -------------------------------- -let upstream = -- -in upstream - with benchotron = - { dependencies = - [ "arrays" - , "exists" - , "profunctor" - , "strings" - , "quickcheck" - , "lcg" - , "transformers" - , "foldable-traversable" - , "exceptions" - , "node-fs" - , "node-buffer" - , "node-readline" - , "datetime" - , "now" - ] - , repo = - "https://github.com/hdgarrood/purescript-benchotron.git" - , version = - "v7.0.0" - } -------------------------------- --} -let upstream = - https://github.com/purescript/package-sets/releases/download/psc-0.15.10-20230921/packages.dhall - sha256:8c2123d78b41b74a5599f220cf526b48003804a490a85c324fd6a25215a94084 - -in upstream diff --git a/spago.dhall b/spago.dhall deleted file mode 100644 index fd1da6f..0000000 --- a/spago.dhall +++ /dev/null @@ -1,55 +0,0 @@ -{- -Welcome to a Spago project! -You can edit this file as you like. - -Need help? See the following resources: -- Spago documentation: https://github.com/purescript/spago -- Dhall language tour: https://docs.dhall-lang.org/tutorials/Language-Tour.html - -When creating a new Spago project, you can use -`spago init --no-comments` or `spago init -C` -to generate this file without the comments in this block. --} -{ name = "my-project" -, dependencies = - [ "aff" - , "aff-promise" - , "arrays" - , "bifunctors" - , "console" - , "control" - , "datetime" - , "effect" - , "either" - , "enums" - , "exceptions" - , "filterable" - , "foldable-traversable" - , "foreign" - , "identity" - , "integers" - , "maybe" - , "newtype" - , "node-buffer" - , "node-path" - , "node-process" - , "node-streams" - , "nullable" - , "ordered-collections" - , "parallel" - , "prelude" - , "simple-json" - , "spec" - , "st" - , "strings" - , "tailrec" - , "transformers" - , "tuples" - , "unsafe-coerce" - , "web-cssom" - , "web-dom" - , "web-html" - ] -, packages = ./packages.dhall -, sources = [ "src/**/*.purs", "test/**/*.purs" ] -} diff --git a/spago.yaml b/spago.yaml new file mode 100644 index 0000000..ee3ed61 --- /dev/null +++ b/spago.yaml @@ -0,0 +1,46 @@ +package: + dependencies: + - aff + - aff-promise + - arrays + - bifunctors + - console + - control + - datetime + - dotenv + - effect + - either + - enums + - exceptions + - filterable + - foldable-traversable + - foreign + - identity + - integers + - maybe + - newtype + - node-buffer + - node-path + - node-process + - node-streams + - nullable + - ordered-collections + - parallel + - prelude + - simple-json + - spec + - st + - strings + - tailrec + - transformers + - tuples + - unsafe-coerce + - web-cssom + - web-dom + - web-html + name: puppeteer +workspace: + extra_packages: {} + package_set: + url: https://raw.githubusercontent.com/purescript/package-sets/psc-0.15.10-20230921/packages.json + hash: sha256-pb4kxdVVOLZIDNaKgTY0oEdPEZlHuEvNj2xOz2nwMnM= diff --git a/src/Puppeteer.Base.js b/src/Puppeteer.Base.js index 9ea7f11..b2db30b 100644 --- a/src/Puppeteer.Base.js +++ b/src/Puppeteer.Base.js @@ -3,3 +3,6 @@ export const unsafeLog = a => { console.log(a) return a } + +/** @type {(a: A) => (b: B) => A & B} */ +export const unsafeUnion = a => b => ({ ...a, ...b }) diff --git a/src/Puppeteer.Base.purs b/src/Puppeteer.Base.purs index 75f23ab..4ffd1e6 100644 --- a/src/Puppeteer.Base.purs +++ b/src/Puppeteer.Base.purs @@ -4,19 +4,45 @@ import Prelude import Control.Alt ((<|>)) import Control.Monad.Error.Class (liftMaybe, try) +import Control.Monad.Except (runExcept) import Control.Parallel (parallel, sequential) -import Data.Either (hush) +import Data.Bifunctor (lmap) +import Data.Either (Either(..), hush) +import Data.Map (Map) import Data.Maybe (Maybe(..)) import Data.Time.Duration (Milliseconds) import Effect.Aff (Aff, delay) -import Effect.Exception (error) +import Effect.Exception (Error, error) import Foreign (Foreign, unsafeFromForeign) +import Foreign.Object (Object) +import Foreign.Object as Object import Prim.Row (class Union) import Puppeteer.FFI as FFI -import Simple.JSON (class ReadForeign, writeImpl) +import Simple.JSON (class ReadForeign, class WriteForeign, readImpl, writeImpl) import Web.HTML as HTML foreign import unsafeLog :: forall a. a -> a +foreign import unsafeUnion :: forall a b c. a -> b -> c + +data JsDuplex a ir = JsDuplex + { from :: ir -> Either String a + , into :: a -> ir + } + +duplex :: forall a ir. (a -> ir) -> (ir -> Either String a) -> JsDuplex a ir +duplex into from = JsDuplex { from, into } + +duplexRead :: forall a ir. ReadForeign ir => JsDuplex a ir -> Foreign -> Either Error a +duplexRead (JsDuplex { from }) = lmap error <<< flip bind from <<< lmap show <<< runExcept <<< readImpl + +duplexWrite :: forall a ir. WriteForeign ir => JsDuplex a ir -> a -> Foreign +duplexWrite (JsDuplex { into }) = writeImpl <<< into + +mapToObject :: forall v. WriteForeign v => Map String v -> Object Foreign +mapToObject = Object.fromFoldableWithIndex <<< map writeImpl + +merge :: forall a b c. Union a b c => Record a -> Record b -> Record c +merge a b = unsafeUnion a b timeout :: forall a. Milliseconds -> Aff a -> Aff (Maybe a) timeout t a = @@ -30,10 +56,10 @@ timeoutThrow t a = liftMaybe (error "timeout") =<< timeout t a newtype Context (a :: Symbol) = Context (Unit -> Aff Unit) -instance semicontext :: Semigroup (Context a) where +instance Semigroup (Context a) where append _ a = a -instance monoidcontext :: Monoid (Context a) where +instance Monoid (Context a) where mempty = Context $ const $ pure unit closeContext :: forall (a :: Symbol). Context a -> Aff Unit @@ -50,122 +76,51 @@ type Viewport = , isMobile :: Maybe Boolean } -prepareViewport :: Viewport -> Foreign -prepareViewport { deviceScaleFactor, hasTouch, height, width, isLandscape, isMobile } = - writeImpl - { deviceScaleFactor: FFI.maybeToUndefined deviceScaleFactor - , hasTouch: FFI.maybeToUndefined hasTouch - , isLandscape: FFI.maybeToUndefined isLandscape - , isMobile: FFI.maybeToUndefined isMobile - , height - , width - } +duplexViewport :: JsDuplex Viewport Viewport +duplexViewport = duplex identity pure --| [`PuppeteerNode`](https://pptr.dev/api/puppeteer.puppeteernode) foreign import data Puppeteer :: Row Type -> Type data LifecycleEvent = Load | DomContentLoaded | NetworkIdleZeroConnections | NetworkIdleAtMostTwoConnections -prepareLifecycleEvent :: LifecycleEvent -> Foreign -prepareLifecycleEvent Load = writeImpl "load" -prepareLifecycleEvent DomContentLoaded = writeImpl "domcontentloaded" -prepareLifecycleEvent NetworkIdleZeroConnections = writeImpl "networkidle0" -prepareLifecycleEvent NetworkIdleAtMostTwoConnections = writeImpl "networkidle2" - ---| A puppeteer plugin ---| ---| [`puppeteer-extra`](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin) ---| ---| `src/DebugPlugin.js` ---| ```javascript ---| import { PuppeteerExtraPlugin } from 'puppeteer-extra-plugin' ---| import { PuppeteerExtra } from 'puppeteer-extra' ---| import { Page } from 'puppeteer' ---| ---| /** @typedef {Page & {sayHello: () => void}} DebugPluginPage */ ---| ---| class DebugPlugin extends PuppeteerExtraPlugin { ---| name = 'hello-world' ---| ---| constructor(opts = {}) { ---| super(opts) ---| } ---| ---| async onPageCreated(page) { ---| page.sayHello = () => console.log('hello') ---| } ---| } ---| ---| /** @type {() => DebugPlugin} */ ---| export const makeDebugPlugin = () => new DebugPlugin() ---| ---| /** @type {(_1: DebugPlugin) => (_2: PuppeteerExtra) => () => PuppeteerExtra} */ ---| export const registerDebugPlugin = dp => p => () => p.use(dp) ---| ---| /** @type {(_1: PuppeteerExtra) => (_2: DebugPluginPage) => () => void} */ ---| export const sayHello = () => page => () => page.sayHello() ---| ``` ---| ---| `src/DebugPlugin.purs` ---| ```purescript ---| module DebugPlugin where ---| ---| import Prelude ---| import Effect (Effect) ---| import Effect.Class (class MonadEffect) ---| import Puppeteer (class Plugin, Puppeteer) ---| import Puppeteer.Page (Page) ---| ---| foreign import data DebugPlugin :: Type ---| ---| foreign import makeDebugPlugin :: Effect DebugPlugin ---| ---| foreign import registerDebugPlugin :: forall (r :: Row Type) ---| . Puppeteer r ---| -> Effect (Puppeteer (debugPlugin :: DebugPlugin | r)) ---| ---| -- Note: ---| -- The puppeteer instance used here must have been ---| -- registered with `DebugPlugin`'s `use` in order to ---| -- invoke `sayHello` ---| foreign import sayHello :: forall (r :: Row Type) ---| . Puppeteer (debugPlugin :: DebugPlugin | r) ---| -> Page ---| -> Effect Unit ---| ---| instance debugPlugin :: Plugin DebugPlugin (debugPlugin :: DebugPlugin) where ---| use pptr _ = liftEffect $ registerDebugPlugin pptr ---| ``` -class Plugin p (r :: Row Type) | p -> r where - --| Register a given puppeteer instance with plugin `p` - --| - --| The row type `r` should be used in that plugin's purescript - --| API to ensure the puppeteer instance used has had that - --| plugin registered. - use :: forall b c. Union r b c => Puppeteer r -> p -> Aff (Puppeteer c) +duplexLifecycleEvent :: JsDuplex LifecycleEvent String +duplexLifecycleEvent = + let + toString Load = "load" + toString DomContentLoaded = "domcontentloaded" + toString NetworkIdleZeroConnections = "networkidle0" + toString NetworkIdleAtMostTwoConnections = "networkidle2" + fromString "load" = Right Load + fromString "domcontentloaded" = Right DomContentLoaded + fromString "networkidle0" = Right NetworkIdleZeroConnections + fromString "networkidle2" = Right NetworkIdleAtMostTwoConnections + fromString o = Left $ "unknown lifecycle event " <> o + in + duplex toString fromString --| [`Browser`](https://pptr.dev/api/puppeteer.browser) foreign import data Browser :: Type -instance browserForeign :: ReadForeign Browser where +instance ReadForeign Browser where readImpl = pure <<< unsafeFromForeign --| [`Page`](https://pptr.dev/api/puppeteer.page) foreign import data Page :: Type -instance pageForeign :: ReadForeign Page where +instance ReadForeign Page where readImpl = pure <<< unsafeFromForeign --| [`Frame`](https://pptr.dev/api/puppeteer.frame) foreign import data Frame :: Type -instance frameForeign :: ReadForeign Frame where +instance ReadForeign Frame where readImpl = pure <<< unsafeFromForeign --| [`BrowserContext`](https://pptr.dev/api/puppeteer.browsercontext) foreign import data BrowserContext :: Type -instance browserContextForeign :: ReadForeign BrowserContext where +instance ReadForeign BrowserContext where readImpl = pure <<< unsafeFromForeign --| Represents both [`JSHandle`](https://pptr.dev/api/puppeteer.jshandle) & [`ElementHandle`](https://pptr.dev/api/puppeteer.elementhandle) @@ -174,100 +129,103 @@ foreign import data Handle :: Type -> Type --| [`Keyboard`](https://pptr.dev/api/puppeteer.keyboard) foreign import data Keyboard :: Type +instance ReadForeign Keyboard where + readImpl = pure <<< unsafeFromForeign + foreign import data Request :: Type -instance foreignRequest :: ReadForeign Request where +instance ReadForeign Request where readImpl = pure <<< unsafeFromForeign foreign import data Response :: Type -instance foreignResponse :: ReadForeign Response where +instance ReadForeign Response where readImpl = pure <<< unsafeFromForeign --| `Browser` or `BrowserContext` class PageProducer :: Type -> Constraint class PageProducer a -instance bpp :: PageProducer Browser -instance bcpp :: PageProducer BrowserContext +instance PageProducer Browser +instance PageProducer BrowserContext --| `Page` or `Handle` class EvalTarget :: Type -> Constraint class EvalTarget a -instance pet :: EvalTarget Page -instance het :: EvalTarget (Handle a) +instance EvalTarget Page +instance EvalTarget (Handle a) --| `Page` or `BrowserContext` class BrowserAccess :: Type -> Constraint class BrowserAccess a -instance pba :: BrowserAccess Browser -instance bcba :: BrowserAccess BrowserContext +instance BrowserAccess Browser +instance BrowserAccess BrowserContext class IsElement :: Type -> Constraint class IsElement e -instance anchorIsElement :: IsElement HTML.HTMLAnchorElement -instance areaIsElement :: IsElement HTML.HTMLAreaElement -instance audioIsElement :: IsElement HTML.HTMLAudioElement -instance bRIsElement :: IsElement HTML.HTMLBRElement -instance baseIsElement :: IsElement HTML.HTMLBaseElement -instance bodyIsElement :: IsElement HTML.HTMLBodyElement -instance buttonIsElement :: IsElement HTML.HTMLButtonElement -instance canvasIsElement :: IsElement HTML.HTMLCanvasElement -instance dListIsElement :: IsElement HTML.HTMLDListElement -instance dataIsElement :: IsElement HTML.HTMLDataElement -instance dataListIsElement :: IsElement HTML.HTMLDataListElement -instance divIsElement :: IsElement HTML.HTMLDivElement -instance document :: IsElement HTML.HTMLDocument -instance element :: IsElement HTML.HTMLElement -instance embedIsElement :: IsElement HTML.HTMLEmbedElement -instance fieldSetIsElement :: IsElement HTML.HTMLFieldSetElement -instance formIsElement :: IsElement HTML.HTMLFormElement -instance hRIsElement :: IsElement HTML.HTMLHRElement -instance headIsElement :: IsElement HTML.HTMLHeadElement -instance headingIsElement :: IsElement HTML.HTMLHeadingElement -instance iFrameIsElement :: IsElement HTML.HTMLIFrameElement -instance imageIsElement :: IsElement HTML.HTMLImageElement -instance inputIsElement :: IsElement HTML.HTMLInputElement -instance keygenIsElement :: IsElement HTML.HTMLKeygenElement -instance lIIsElement :: IsElement HTML.HTMLLIElement -instance labelIsElement :: IsElement HTML.HTMLLabelElement -instance legendIsElement :: IsElement HTML.HTMLLegendElement -instance linkIsElement :: IsElement HTML.HTMLLinkElement -instance mapIsElement :: IsElement HTML.HTMLMapElement -instance mediaIsElement :: IsElement HTML.HTMLMediaElement -instance metaIsElement :: IsElement HTML.HTMLMetaElement -instance meterIsElement :: IsElement HTML.HTMLMeterElement -instance modIsElement :: IsElement HTML.HTMLModElement -instance oListIsElement :: IsElement HTML.HTMLOListElement -instance objectIsElement :: IsElement HTML.HTMLObjectElement -instance optGroupIsElement :: IsElement HTML.HTMLOptGroupElement -instance optionIsElement :: IsElement HTML.HTMLOptionElement -instance outputIsElement :: IsElement HTML.HTMLOutputElement -instance paragraphIsElement :: IsElement HTML.HTMLParagraphElement -instance paramIsElement :: IsElement HTML.HTMLParamElement -instance preIsElement :: IsElement HTML.HTMLPreElement -instance progressIsElement :: IsElement HTML.HTMLProgressElement -instance quoteIsElement :: IsElement HTML.HTMLQuoteElement -instance scriptIsElement :: IsElement HTML.HTMLScriptElement -instance selectIsElement :: IsElement HTML.HTMLSelectElement -instance sourceIsElement :: IsElement HTML.HTMLSourceElement -instance spanIsElement :: IsElement HTML.HTMLSpanElement -instance styleIsElement :: IsElement HTML.HTMLStyleElement -instance tableCaptionIsElement :: IsElement HTML.HTMLTableCaptionElement -instance tableCellIsElement :: IsElement HTML.HTMLTableCellElement -instance tableColIsElement :: IsElement HTML.HTMLTableColElement -instance tableDataCellIsElement :: IsElement HTML.HTMLTableDataCellElement -instance tableIsElement :: IsElement HTML.HTMLTableElement -instance tableHeaderCellIsElement :: IsElement HTML.HTMLTableHeaderCellElement -instance tableRowIsElement :: IsElement HTML.HTMLTableRowElement -instance tableSectionIsElement :: IsElement HTML.HTMLTableSectionElement -instance templateIsElement :: IsElement HTML.HTMLTemplateElement -instance textAreaIsElement :: IsElement HTML.HTMLTextAreaElement -instance timeIsElement :: IsElement HTML.HTMLTimeElement -instance titleIsElement :: IsElement HTML.HTMLTitleElement -instance trackIsElement :: IsElement HTML.HTMLTrackElement -instance uListIsElement :: IsElement HTML.HTMLUListElement -instance videoIsElement :: IsElement HTML.HTMLVideoElement +instance IsElement HTML.HTMLAnchorElement +instance IsElement HTML.HTMLAreaElement +instance IsElement HTML.HTMLAudioElement +instance IsElement HTML.HTMLBRElement +instance IsElement HTML.HTMLBaseElement +instance IsElement HTML.HTMLBodyElement +instance IsElement HTML.HTMLButtonElement +instance IsElement HTML.HTMLCanvasElement +instance IsElement HTML.HTMLDListElement +instance IsElement HTML.HTMLDataElement +instance IsElement HTML.HTMLDataListElement +instance IsElement HTML.HTMLDivElement +instance IsElement HTML.HTMLDocument +instance IsElement HTML.HTMLElement +instance IsElement HTML.HTMLEmbedElement +instance IsElement HTML.HTMLFieldSetElement +instance IsElement HTML.HTMLFormElement +instance IsElement HTML.HTMLHRElement +instance IsElement HTML.HTMLHeadElement +instance IsElement HTML.HTMLHeadingElement +instance IsElement HTML.HTMLIFrameElement +instance IsElement HTML.HTMLImageElement +instance IsElement HTML.HTMLInputElement +instance IsElement HTML.HTMLKeygenElement +instance IsElement HTML.HTMLLIElement +instance IsElement HTML.HTMLLabelElement +instance IsElement HTML.HTMLLegendElement +instance IsElement HTML.HTMLLinkElement +instance IsElement HTML.HTMLMapElement +instance IsElement HTML.HTMLMediaElement +instance IsElement HTML.HTMLMetaElement +instance IsElement HTML.HTMLMeterElement +instance IsElement HTML.HTMLModElement +instance IsElement HTML.HTMLOListElement +instance IsElement HTML.HTMLObjectElement +instance IsElement HTML.HTMLOptGroupElement +instance IsElement HTML.HTMLOptionElement +instance IsElement HTML.HTMLOutputElement +instance IsElement HTML.HTMLParagraphElement +instance IsElement HTML.HTMLParamElement +instance IsElement HTML.HTMLPreElement +instance IsElement HTML.HTMLProgressElement +instance IsElement HTML.HTMLQuoteElement +instance IsElement HTML.HTMLScriptElement +instance IsElement HTML.HTMLSelectElement +instance IsElement HTML.HTMLSourceElement +instance IsElement HTML.HTMLSpanElement +instance IsElement HTML.HTMLStyleElement +instance IsElement HTML.HTMLTableCaptionElement +instance IsElement HTML.HTMLTableCellElement +instance IsElement HTML.HTMLTableColElement +instance IsElement HTML.HTMLTableDataCellElement +instance IsElement HTML.HTMLTableElement +instance IsElement HTML.HTMLTableHeaderCellElement +instance IsElement HTML.HTMLTableRowElement +instance IsElement HTML.HTMLTableSectionElement +instance IsElement HTML.HTMLTemplateElement +instance IsElement HTML.HTMLTextAreaElement +instance IsElement HTML.HTMLTimeElement +instance IsElement HTML.HTMLTitleElement +instance IsElement HTML.HTMLTrackElement +instance IsElement HTML.HTMLUListElement +instance IsElement HTML.HTMLVideoElement diff --git a/src/Puppeteer.Browser.purs b/src/Puppeteer.Browser.purs index e0388dd..1599533 100644 --- a/src/Puppeteer.Browser.purs +++ b/src/Puppeteer.Browser.purs @@ -3,10 +3,12 @@ module Puppeteer.Browser , Product(..) , ChromeReleaseChannel(..) , Connect + , duplexConnect + , duplexProduct + , duplexChromeReleaseChannel , disconnect , websocketEndpoint , connected - , prepareConnectOptions , get , close ) where @@ -15,63 +17,93 @@ import Prelude import Control.Promise (Promise) import Control.Promise as Promise +import Data.Either (Either(..)) import Data.Enum (fromEnum) +import Data.Generic.Rep (class Generic) import Data.Maybe (Maybe) -import Data.Time (Millisecond) +import Data.Newtype (unwrap, wrap) +import Data.Show.Generic (genericShow) +import Data.Time.Duration (Milliseconds(..)) import Effect (Effect) import Effect.Aff (Aff) import Foreign (Foreign, unsafeToForeign) import Puppeteer.Base (Browser) as X -import Puppeteer.Base (class BrowserAccess, Browser, BrowserContext, Viewport) +import Puppeteer.Base (class BrowserAccess, Browser, BrowserContext, JsDuplex(..), Viewport, duplex) import Puppeteer.FFI as FFI +import Record (modify) import Simple.JSON (writeImpl) +import Type.Prelude (Proxy(..)) data Product = Chrome | Firefox +derive instance Generic Product _ +derive instance Eq Product +instance Show Product where + show = genericShow + +duplexProduct :: JsDuplex Product String +duplexProduct = + let + toString Chrome = "chrome" + toString Firefox = "firefox" + fromString "chrome" = pure Chrome + fromString "firefox" = pure Firefox + fromString o = Left $ "unknown browser product " <> o + in + duplex toString fromString + data ChromeReleaseChannel = ChromeStable | ChromeBeta | ChromeCanary | ChromeDev +derive instance Generic ChromeReleaseChannel _ +derive instance Eq ChromeReleaseChannel +instance Show ChromeReleaseChannel where + show = genericShow + +duplexChromeReleaseChannel :: JsDuplex ChromeReleaseChannel String +duplexChromeReleaseChannel = + let + toString ChromeStable = "chrome" + toString ChromeBeta = "chrome-beta" + toString ChromeCanary = "chrome-canary" + toString ChromeDev = "chrome-dev" + fromString "chrome" = pure ChromeStable + fromString "chrome-beta" = pure ChromeBeta + fromString "chrome-canary" = pure ChromeCanary + fromString "chrome-dev" = pure ChromeDev + fromString o = Left $ "unknown chrome release channel " <> o + in + duplex toString fromString + type Connect = { defaultViewport :: Maybe Viewport , ignoreHTTPSErrors :: Maybe Boolean - , protocolTimeout :: Maybe Millisecond - , slowMo :: Maybe Millisecond + , protocolTimeout :: Maybe Milliseconds + , slowMo :: Maybe Milliseconds } -prepareViewport :: Viewport -> Foreign -prepareViewport - { deviceScaleFactor - , hasTouch - , height - , width - , isLandscape - , isMobile - } = writeImpl - { deviceScaleFactor: FFI.maybeToUndefined deviceScaleFactor - , hasTouch: FFI.maybeToUndefined hasTouch - , height - , width - , isLandscape: FFI.maybeToUndefined isLandscape - , isMobile: FFI.maybeToUndefined isMobile +type ConnectRaw = + { defaultViewport :: Maybe Viewport + , ignoreHTTPSErrors :: Maybe Boolean + , protocolTimeout :: Maybe Number + , slowMo :: Maybe Number } -prepareConnectOptions :: Connect -> Foreign -prepareConnectOptions - { defaultViewport - , ignoreHTTPSErrors - , protocolTimeout - , slowMo - } = writeImpl - { defaultViewport: FFI.maybeToUndefined $ map prepareViewport defaultViewport - , ignoreHTTPSErrors: FFI.maybeToUndefined ignoreHTTPSErrors - , protocolTimeout: FFI.maybeToUndefined $ map fromEnum protocolTimeout - , slowMo: FFI.maybeToUndefined $ map fromEnum slowMo - } +duplexConnect :: JsDuplex Connect ConnectRaw +duplexConnect = + let + into r = modify (Proxy :: Proxy "protocolTimeout") (map unwrap) + $ modify (Proxy :: Proxy "slowMo") (map unwrap) r + from r = pure + $ modify (Proxy :: Proxy "protocolTimeout") (map wrap) + $ modify (Proxy :: Proxy "slowMo") (map wrap) r + in + duplex into from foreign import _close :: Browser -> Promise Unit foreign import _get :: Foreign -> Effect Browser diff --git a/src/Puppeteer.Page.Navigate.js b/src/Puppeteer.Page.Navigate.js index 05019e5..fb2c3fe 100644 --- a/src/Puppeteer.Page.Navigate.js +++ b/src/Puppeteer.Page.Navigate.js @@ -1,13 +1,16 @@ import { Page } from 'puppeteer' -/** @type {(ev: import('puppeteer').PuppeteerLifeCycleEvent) => (p: Page) => Promise} */ -export const _forward = ev => p => p.goForward({ timeout: 0, waitUntil: ev }) +/** @type {(ev: import('puppeteer').PuppeteerLifeCycleEvent) => (p: Page) => () => Promise} */ +export const _forward = ev => p => () => + p.goForward({ timeout: 0, waitUntil: ev }) -/** @type {(ev: import('puppeteer').PuppeteerLifeCycleEvent) => (p: Page) => Promise} */ -export const _back = ev => p => p.goBack({ timeout: 0, waitUntil: ev }) +/** @type {(ev: import('puppeteer').PuppeteerLifeCycleEvent) => (p: Page) => () => Promise} */ +export const _back = ev => p => () => p.goBack({ timeout: 0, waitUntil: ev }) -/** @type {(url: string) => (ev: import('puppeteer').PuppeteerLifeCycleEvent) => (p: Page) => Promise} */ -export const _to = url => ev => p => p.goto(url, { timeout: 0, waitUntil: ev }) +/** @type {(url: string) => (ev: import('puppeteer').PuppeteerLifeCycleEvent) => (p: Page) => () => Promise} */ +export const _to = url => ev => p => () => + p.goto(url, { timeout: 0, waitUntil: ev }) -/** @type {(ev: import('puppeteer').PuppeteerLifeCycleEvent) => (p: Page) => Promise} */ -export const _reload = ev => p => p.goForward({ timeout: 0, waitUntil: ev }) +/** @type {(ev: import('puppeteer').PuppeteerLifeCycleEvent) => (p: Page) => () => Promise} */ +export const _reload = ev => p => () => + p.goForward({ timeout: 0, waitUntil: ev }) diff --git a/src/Puppeteer.Page.Navigate.purs b/src/Puppeteer.Page.Navigate.purs index e649025..5ea41a7 100644 --- a/src/Puppeteer.Page.Navigate.purs +++ b/src/Puppeteer.Page.Navigate.purs @@ -7,27 +7,28 @@ import Control.Promise as Promise import Data.Maybe (Maybe) import Data.Newtype (unwrap) import Data.Time.Duration (Milliseconds(..)) +import Effect (Effect) import Effect.Aff (Aff) import Foreign (Foreign) -import Puppeteer.Base (LifecycleEvent(..), Page, URL, prepareLifecycleEvent) +import Puppeteer.Base (LifecycleEvent(..), Page, URL, duplexLifecycleEvent, duplexWrite) import Puppeteer.HTTP as HTTP -foreign import _forward :: Foreign -> Page -> Promise (Maybe HTTP.Response) -foreign import _back :: Foreign -> Page -> Promise (Maybe HTTP.Response) -foreign import _reload :: Foreign -> Page -> Promise (Maybe HTTP.Response) -foreign import _to :: String -> Foreign -> Page -> Promise (Maybe HTTP.Response) +foreign import _forward :: Foreign -> Page -> Effect (Promise (Maybe HTTP.Response)) +foreign import _back :: Foreign -> Page -> Effect (Promise (Maybe HTTP.Response)) +foreign import _reload :: Foreign -> Page -> Effect (Promise (Maybe HTTP.Response)) +foreign import _to :: String -> Foreign -> Page -> Effect (Promise (Maybe HTTP.Response)) forward :: LifecycleEvent -> Page -> Aff (Maybe HTTP.Response) -forward ev = Promise.toAff <<< _forward (prepareLifecycleEvent ev) +forward ev = Promise.toAffE <<< _forward (duplexWrite duplexLifecycleEvent ev) back :: LifecycleEvent -> Page -> Aff (Maybe HTTP.Response) -back ev = Promise.toAff <<< _back (prepareLifecycleEvent ev) +back ev = Promise.toAffE <<< _back (duplexWrite duplexLifecycleEvent ev) -to :: URL -> LifecycleEvent -> Page -> Aff (Maybe HTTP.Response) -to url ev = Promise.toAff <<< _to url (prepareLifecycleEvent ev) +to :: LifecycleEvent -> Page -> URL -> Aff (Maybe HTTP.Response) +to ev p u = Promise.toAffE $ _to u (duplexWrite duplexLifecycleEvent ev) p reload :: LifecycleEvent -> Page -> Aff (Maybe HTTP.Response) -reload ev = Promise.toAff <<< _reload (prepareLifecycleEvent ev) +reload ev = Promise.toAffE <<< _reload (duplexWrite duplexLifecycleEvent ev) forward_ :: Page -> Aff (Maybe HTTP.Response) forward_ = forward Load @@ -35,8 +36,8 @@ forward_ = forward Load back_ :: Page -> Aff (Maybe HTTP.Response) back_ = back Load -to_ :: URL -> Page -> Aff (Maybe HTTP.Response) -to_ url = to url Load +to_ :: Page -> URL -> Aff (Maybe HTTP.Response) +to_ = to Load reload_ :: Page -> Aff (Maybe HTTP.Response) reload_ = reload Load diff --git a/src/Puppeteer.Page.WaitFor.purs b/src/Puppeteer.Page.WaitFor.purs index b1d331c..6396f5b 100644 --- a/src/Puppeteer.Page.WaitFor.purs +++ b/src/Puppeteer.Page.WaitFor.purs @@ -17,7 +17,7 @@ import Data.Time.Duration (Milliseconds(..)) import Effect (Effect) import Effect.Aff (Aff) import Foreign (Foreign) -import Puppeteer.Base (Context(..), Handle, LifecycleEvent, Page, prepareLifecycleEvent) +import Puppeteer.Base (Context(..), Handle, LifecycleEvent, Page, duplexWrite, duplexLifecycleEvent) import Puppeteer.Selector (class Selector, toCSS) newtype NetworkIdleFor = NetworkIdleFor Milliseconds @@ -35,7 +35,7 @@ foreign import _selectorToBeHidden :: String -> Page -> Promise Unit navigation :: LifecycleEvent -> Page -> Effect (Context WaitingForNavigationHint) navigation ev p = do - promise <- _navigation (prepareLifecycleEvent ev) p + promise <- _navigation (duplexWrite duplexLifecycleEvent ev) p pure $ Context (\_ -> Promise.toAff $ promise) networkIdle :: NetworkIdleFor -> Page -> Aff Unit diff --git a/src/Puppeteer.Page.purs b/src/Puppeteer.Page.purs index d1cb092..8ca5425 100644 --- a/src/Puppeteer.Page.purs +++ b/src/Puppeteer.Page.purs @@ -34,14 +34,12 @@ import Control.Promise as Promise import Data.Array as Array import Data.Either (hush) import Data.Maybe (Maybe) -import Data.Nullable (Nullable) -import Data.Nullable as Nullable import Effect (Effect) import Effect.Aff (Aff) import Foreign (Foreign, unsafeToForeign) import Node.Path (FilePath) import Puppeteer.Base (Page) as X -import Puppeteer.Base (class PageProducer, Handle, Keyboard, LifecycleEvent, Page, URL, Viewport, prepareLifecycleEvent, prepareViewport) +import Puppeteer.Base (class PageProducer, Handle, Keyboard, LifecycleEvent, Page, URL, Viewport, duplexLifecycleEvent, duplexViewport, duplexWrite) import Puppeteer.Handle (unsafeCoerceHandle) import Puppeteer.Selector (class Selector, toCSS) import Simple.JSON (readImpl, undefined, writeImpl) @@ -147,10 +145,10 @@ content :: Page -> Aff String content = Promise.toAff <<< _content setContent :: String -> LifecycleEvent -> Page -> Aff Unit -setContent s ev = Promise.toAff <<< _setContent s (prepareLifecycleEvent ev) +setContent s ev = Promise.toAff <<< _setContent s (duplexWrite duplexLifecycleEvent ev) setViewport :: Viewport -> Page -> Aff Unit -setViewport vp = Promise.toAff <<< _setViewport (prepareViewport vp) +setViewport vp = Promise.toAff <<< _setViewport (duplexWrite duplexViewport vp) title :: Page -> Aff String title = Promise.toAff <<< _title diff --git a/src/Puppeteer.Plugin.AdBlock.js b/src/Puppeteer.Plugin.AdBlock.js new file mode 100644 index 0000000..234b3f6 --- /dev/null +++ b/src/Puppeteer.Plugin.AdBlock.js @@ -0,0 +1,20 @@ +import AdBlock, { + PuppeteerExtraPluginAdblocker, +} from 'puppeteer-extra-plugin-adblocker' +import { PuppeteerExtra } from 'puppeteer-extra' + +/** @type {(_: import('puppeteer-extra-plugin-adblocker').PluginOptions) => (_: PuppeteerExtra) => () => PuppeteerExtra} */ +export const _install = o => p => () => p.use(AdBlock(o)) + +/** @type {(_: PuppeteerExtra) => () => Promise} */ +export const _blocker = p => () => { + const adblock = p.plugins.find( + pl => pl instanceof PuppeteerExtraPluginAdblocker, + ) + + if (!adblock || !(adblock instanceof PuppeteerExtraPluginAdblocker)) { + throw new Error('Adblock plugin not registered') + } else { + return adblock.getBlocker() + } +} diff --git a/src/Puppeteer.Plugin.AdBlock.purs b/src/Puppeteer.Plugin.AdBlock.purs new file mode 100644 index 0000000..37c5d60 --- /dev/null +++ b/src/Puppeteer.Plugin.AdBlock.purs @@ -0,0 +1,92 @@ +module Puppeteer.Plugin.AdBlock + ( AdBlockMode(..) + , AdBlockOptions + , AdBlockPlugin + , AdBlocker + , install + , defaultOptions + , blocker + , cspInjectedH + , htmlFilteredH + , requestAllowedH + , requestBlockedH + , requestRedirectedH + , requestWhitelistedH + , scriptInjectedH + , styleInjectedH + ) where + +import Prelude + +import Control.Promise (Promise) +import Control.Promise as Promise +import Data.Maybe (Maybe(..)) +import Effect (Effect) +import Effect.Aff (Aff) +import Foreign (Foreign) +import Node.EventEmitter (EventEmitter) +import Node.EventEmitter as EventEmitter +import Node.EventEmitter.UtilTypes (EventHandle0) as EventEmitter +import Puppeteer.Base (Puppeteer) +import Puppeteer.FFI as FFI +import Simple.JSON (writeImpl) + +-- | https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-adblocker +foreign import data AdBlockPlugin :: Type +foreign import data AdBlocker :: Type + +-- | https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-adblocker#options +data AdBlockMode + -- | Block ads, but not trackers + = BlockAds + -- | Block ads & trackers + | BlockTrackers + -- | Block ads, trackers & annoyances + | BlockAnnoyances + +type AdBlockOptions = { mode :: AdBlockMode, useDiskCache :: Boolean, cacheDir :: Maybe String } + +defaultOptions :: AdBlockOptions +defaultOptions = { mode: BlockAds, useDiskCache: true, cacheDir: Nothing } + +prepareOptions :: AdBlockOptions -> Foreign +prepareOptions { mode, useDiskCache, cacheDir } = FFI.mergeRecords + [ writeImpl case mode of + BlockAds -> { blockTrackers: false, blockTrackersAndAnnoyances: false } + BlockTrackers -> { blockTrackers: true, blockTrackersAndAnnoyances: false } + BlockAnnoyances -> { blockTrackers: true, blockTrackersAndAnnoyances: true } + , writeImpl { useCache: useDiskCache, cacheDir: FFI.maybeToUndefined cacheDir } + ] + +foreign import _install :: forall (r :: Row Type). Foreign -> Puppeteer r -> Effect (Puppeteer (adblock :: AdBlockPlugin | r)) +foreign import _blocker :: forall (r :: Row Type). Puppeteer r -> Effect (Promise AdBlocker) + +install :: forall (r :: Row Type). AdBlockOptions -> Puppeteer r -> Effect (Puppeteer (adblock :: AdBlockPlugin | r)) +install o p = _install (prepareOptions o) p + +blocker :: forall (r :: Row Type). Puppeteer (adblock :: AdBlockPlugin | r) -> Aff AdBlocker +blocker = Promise.toAffE <<< _blocker + +cspInjectedH :: EventEmitter.EventHandle0 AdBlocker +cspInjectedH = EventEmitter.EventHandle "csp-injected" identity + +htmlFilteredH :: EventEmitter.EventHandle0 AdBlocker +htmlFilteredH = EventEmitter.EventHandle "html-filtered" identity + +requestAllowedH :: EventEmitter.EventHandle0 AdBlocker +requestAllowedH = EventEmitter.EventHandle "request-allowed" identity + +requestBlockedH :: EventEmitter.EventHandle0 AdBlocker +requestBlockedH = EventEmitter.EventHandle "request-blocked" identity + +requestRedirectedH :: EventEmitter.EventHandle0 AdBlocker +requestRedirectedH = EventEmitter.EventHandle "request-redirected" identity + +requestWhitelistedH :: EventEmitter.EventHandle0 AdBlocker +requestWhitelistedH = EventEmitter.EventHandle "request-whitelisted" identity + +scriptInjectedH :: EventEmitter.EventHandle0 AdBlocker +scriptInjectedH = EventEmitter.EventHandle "script-injected" identity + +styleInjectedH :: EventEmitter.EventHandle0 AdBlocker +styleInjectedH = EventEmitter.EventHandle "style-injected" identity diff --git a/src/Puppeteer.Plugin.AnonymousUserAgent.js b/src/Puppeteer.Plugin.AnonymousUserAgent.js new file mode 100644 index 0000000..f61c0fb --- /dev/null +++ b/src/Puppeteer.Plugin.AnonymousUserAgent.js @@ -0,0 +1,5 @@ +import AnonUA from 'puppeteer-extra-plugin-anonymize-ua' +import { PuppeteerExtra } from 'puppeteer-extra' + +/** @type {(_: PuppeteerExtra) => () => PuppeteerExtra} */ +export const install = p => () => p.use(AnonUA()) diff --git a/src/Puppeteer.Plugin.AnonymousUserAgent.purs b/src/Puppeteer.Plugin.AnonymousUserAgent.purs new file mode 100644 index 0000000..89a2ab4 --- /dev/null +++ b/src/Puppeteer.Plugin.AnonymousUserAgent.purs @@ -0,0 +1,8 @@ +module Puppeteer.Plugin.AnonymousUserAgent where + +import Effect (Effect) +import Puppeteer.Base (Puppeteer) + +-- | https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-anonymize-ua +foreign import data AnonymousUserAgentPlugin :: Type +foreign import install :: forall (r :: Row Type). Puppeteer r -> Effect (Puppeteer (userAgent :: AnonymousUserAgentPlugin | r)) diff --git a/src/Puppeteer.Plugin.Captcha.js b/src/Puppeteer.Plugin.Captcha.js new file mode 100644 index 0000000..fb81846 --- /dev/null +++ b/src/Puppeteer.Plugin.Captcha.js @@ -0,0 +1,23 @@ +import { PuppeteerExtra } from 'puppeteer-extra' +import Captcha from 'puppeteer-extra-plugin-recaptcha' + +/** @typedef {import('puppeteer-extra-plugin-recaptcha/dist/types').PluginOptions} PluginOptions */ +/** @typedef {import('puppeteer-extra-plugin-recaptcha/dist/types').CaptchaInfo} CaptchaInfo */ +/** @typedef {import('puppeteer-extra-plugin-recaptcha/dist/types').CaptchaSolution} CaptchaSolution */ +/** @typedef {import('puppeteer-extra-plugin-recaptcha/dist/types').CaptchaSolved} CaptchaSolved */ +/** @typedef {import('puppeteer').Page & import('puppeteer-extra-plugin-recaptcha/dist/types').RecaptchaPluginPageAdditions} Page */ + +/** @type {(_: PluginOptions) => (_: PuppeteerExtra) => () => PuppeteerExtra} */ +export const _captcha = o => p => () => p.use(Captcha(o)) + +/** @type {(_: Page) => Promise<{captchas: CaptchaInfo[], filtered: unknown[]}>} */ +export const _findCaptchas = p => p.findRecaptchas() + +/** @type {(_: Page) => (_: CaptchaInfo[]) => Promise<{solutions: CaptchaSolution[]}>} */ +export const _getSolutions = p => cs => p.getRecaptchaSolutions(cs) + +/** @type {(_: Page) => (_: CaptchaSolution[]) => Promise<{solved: CaptchaSolved[]}>} */ +export const _enterSolutions = p => cs => p.enterRecaptchaSolutions(cs) + +/** @type {(_: Page) => Promise<{captchas: CaptchaInfo[], filtered: unknown[], solutions: CaptchaSolution[], solved: CaptchaSolved[]}>} */ +export const _solveCaptchas = p => p.solveRecaptchas() diff --git a/src/Puppeteer.Plugin.Captcha.purs b/src/Puppeteer.Plugin.Captcha.purs new file mode 100644 index 0000000..adfae40 --- /dev/null +++ b/src/Puppeteer.Plugin.Captcha.purs @@ -0,0 +1,308 @@ +module Puppeteer.Plugin.Captcha + ( install + , findCaptchas + , solveCaptchas + , defaultOptions + , CaptchaCallback(..) + , Options + , CaptchaProvider(..) + , CaptchaVendor(..) + , CaptchaPlugin + , CaptchaKind(..) + , CaptchaFiltered(..) + , Token2Captcha(..) + , CaptchaInfo + , CaptchaInfoMaybeFiltered + , CaptchaSolution + , CaptchaSolved + , CaptchaInfoDisplay + , SolveResult + , getSolutions + , enterSolutions + ) where + +import Prelude + +import Control.Monad.Error.Class (liftEither) +import Control.Monad.Except (runExcept) +import Control.Promise (Promise) +import Control.Promise as Promise +import Data.Bifunctor (lmap) +import Data.Either (Either, hush) +import Data.Generic.Rep (class Generic) +import Data.JSDate (JSDate) +import Data.Maybe (Maybe(..)) +import Data.Newtype (class Newtype, unwrap, wrap) +import Data.Show.Generic (genericShow) +import Data.Traversable (for, sequence) +import Data.Tuple (Tuple) +import Data.Tuple.Nested ((/\)) +import Data.Variant (Variant) +import Effect (Effect) +import Effect.Aff (Aff) +import Effect.Exception (Error, error) +import Effect.Unsafe (unsafePerformEffect) +import Foreign (Foreign, unsafeFromForeign, unsafeReadTagged, unsafeToForeign) +import Puppeteer.Base (JsDuplex(..), Page, Puppeteer, duplex, duplexRead, duplexWrite) +import Puppeteer.FFI as FFI +import Record (modify, rename) +import Simple.JSON (class ReadForeign, class WriteForeign, readImpl, writeImpl) +import Type.Prelude (Proxy(..)) + +newtype CoerceDate = CoerceDate (Maybe JSDate) + +derive instance Newtype CoerceDate _ + +instance ReadForeign CoerceDate where + readImpl f = pure $ CoerceDate $ hush $ runExcept $ unsafeReadTagged "Date" f + +instance WriteForeign CoerceDate where + writeImpl (CoerceDate f) = unsafeToForeign f + +newtype Token2Captcha = Token2Captcha String + +derive instance Newtype Token2Captcha _ +derive instance Generic Token2Captcha _ +instance Show Token2Captcha where + show = genericShow + +data CaptchaKind = KindCheckbox | KindInvisible | KindScore | KindOther String + +derive instance Generic CaptchaKind _ +derive instance Eq CaptchaKind +instance Show CaptchaKind where + show = genericShow + +instance WriteForeign CaptchaKind where + writeImpl = writeImpl <<< case _ of + KindCheckbox -> "checkbox" + KindInvisible -> "invisible" + KindScore -> "score" + KindOther s -> s + +instance ReadForeign CaptchaKind where + readImpl = + let + fromStr = case _ of + "checkbox" -> KindCheckbox + "invisible" -> KindInvisible + "score" -> KindScore + s -> KindOther s + in + map fromStr <<< readImpl + +data CaptchaVendor = VendorReCaptcha | VendorHCaptcha | VendorOther String + +derive instance Generic CaptchaVendor _ +derive instance Eq CaptchaVendor +instance Show CaptchaVendor where + show = genericShow + +vendorFromString :: String -> CaptchaVendor +vendorFromString = case _ of + "recaptcha" -> VendorReCaptcha + "hcaptcha" -> VendorHCaptcha + s -> VendorOther s + +instance ReadForeign CaptchaVendor where + readImpl f = vendorFromString <$> readImpl f + +instance WriteForeign CaptchaVendor where + writeImpl VendorHCaptcha = writeImpl "hcaptcha" + writeImpl VendorReCaptcha = writeImpl "recaptcha" + writeImpl (VendorOther s) = writeImpl s + +data CaptchaFiltered = FilteredScoreBased | FilteredNotInViewport | FilteredInactive + +derive instance Generic CaptchaFiltered _ +derive instance Eq CaptchaFiltered +instance Show CaptchaFiltered where + show = genericShow + +newtype CaptchaCallback = CaptchaCallback Foreign + +derive instance Newtype CaptchaCallback _ +derive newtype instance WriteForeign CaptchaCallback +derive newtype instance ReadForeign CaptchaCallback +derive instance Generic CaptchaCallback _ +instance Show CaptchaCallback where + show _ = "CaptchaCallback" + +filteredFromString :: String -> Maybe CaptchaFiltered +filteredFromString = case _ of + "solveInViewportOnly" -> Just FilteredNotInViewport + "solveScoreBased" -> Just FilteredScoreBased + "solveInactiveChallenges" -> Just FilteredInactive + _ -> Nothing + +type CaptchaInfoDisplay = + { size :: Maybe Foreign + , theme :: Maybe String + , top :: Maybe Foreign + , left :: Maybe Foreign + , width :: Maybe Foreign + , height :: Maybe Foreign + } + +type CaptchaInfoMaybeFiltered = Tuple CaptchaInfo (Maybe CaptchaFiltered) + +type CaptchaSolution = + { vendor :: Maybe CaptchaVendor + , id :: Maybe String + , text :: Maybe String + , hasSolution :: Boolean + , requestAt :: Maybe JSDate + , responseAt :: Maybe JSDate + , duration :: Maybe Number + , provider :: Maybe String + , providerCaptchaId :: Maybe String + } + +type CaptchaSolved = + { vendor :: Maybe CaptchaVendor + , id :: Maybe String + , isSolved :: Maybe Boolean + , responseElement :: Maybe Boolean + , responseCallback :: Maybe Boolean + , solvedAt :: Maybe JSDate + } + +duplexSolved :: JsDuplex CaptchaSolved _ +duplexSolved = + let + toRaw r = modify (Proxy :: Proxy "solvedAt") CoerceDate + $ r + fromRaw r = pure + $ modify (Proxy :: Proxy "solvedAt") unwrap + $ r + in + duplex toRaw fromRaw + +type SolveResult = + { captchas :: Array CaptchaInfoMaybeFiltered + , solved :: Array CaptchaSolved + , solutions :: Array CaptchaSolution + } + +data CaptchaProvider + = Provider2Captcha Token2Captcha + | ProviderCustom (Array CaptchaInfo -> Aff (Array CaptchaSolution)) + +prepareCustomProvider :: (Array CaptchaInfo -> Aff (Array CaptchaSolution)) -> Array CaptchaInfo -> Promise { solutions :: Array CaptchaSolution } +prepareCustomProvider f = unsafePerformEffect <<< Promise.fromAff <<< map (\solutions -> { solutions }) <<< f + +type Options = + { visualize :: Maybe Boolean + , skipNotInViewport :: Maybe Boolean + , skipScoreBased :: Maybe Boolean + , skipInactive :: Maybe Boolean + , provider :: CaptchaProvider + } + +defaultOptions :: Token2Captcha -> Options +defaultOptions token = { visualize: Nothing, skipNotInViewport: Nothing, skipInactive: Nothing, skipScoreBased: Nothing, provider: Provider2Captcha token } + +prepareOptions :: Options -> Foreign +prepareOptions { provider, visualize, skipInactive, skipNotInViewport, skipScoreBased } = + writeImpl + { provider: case provider of + Provider2Captcha (Token2Captcha t) -> writeImpl { id: "2captcha", token: t } + ProviderCustom f -> writeImpl { fn: unsafeToForeign $ prepareCustomProvider f } + , visualFeedback: FFI.maybeToUndefined visualize + , solveInViewportOnly: FFI.maybeToUndefined $ skipNotInViewport + , solveScoreBased: FFI.maybeToUndefined $ not <$> skipScoreBased + , solveInactiveChallenges: FFI.maybeToUndefined $ not <$> skipInactive + , throwOnError: true + } + +type CaptchaInfo = + { kind :: Maybe CaptchaKind + , vendor :: Maybe CaptchaVendor + , id :: Maybe String + , sitekey :: Maybe String + , s :: Maybe String + , isInViewport :: Maybe Boolean + , isInvisible :: Maybe Boolean + , hasActiveChallengePopup :: Maybe Boolean + , hasChallengeFrame :: Maybe Boolean + , action :: Maybe String + , callback :: CaptchaCallback + , hasResponseElement :: Maybe Boolean + , url :: Maybe String + , display :: Maybe CaptchaInfoDisplay + } + +duplexSoln :: JsDuplex CaptchaSolution _ +duplexSoln = + let + toRaw r = modify (Proxy :: Proxy "requestAt") CoerceDate + $ modify (Proxy :: Proxy "responseAt") CoerceDate + $ r + fromRaw r = pure + $ modify (Proxy :: Proxy "requestAt") (unwrap) + $ modify (Proxy :: Proxy "responseAt") (unwrap) + $ r + in + duplex toRaw fromRaw + +duplexInfo :: JsDuplex CaptchaInfo _ +duplexInfo = + let + toRaw r = rename (Proxy :: Proxy "kind") (Proxy :: Proxy "_type") $ r + fromRaw r = pure $ rename (Proxy :: Proxy "_type") (Proxy :: Proxy "kind") r + in + duplex toRaw fromRaw + +foreign import data CaptchaPlugin :: Type + +foreign import _captcha :: forall (r :: Row Type). Foreign -> Puppeteer r -> Effect (Puppeteer (captcha :: CaptchaPlugin | r)) +foreign import _findCaptchas :: Page -> Promise Foreign +foreign import _getSolutions :: Page -> Foreign -> Promise Foreign +foreign import _enterSolutions :: Page -> Foreign -> Promise Foreign +foreign import _solveCaptchas :: Page -> Promise Foreign + +read :: forall @a. ReadForeign a => Foreign -> Either Error a +read = lmap (error <<< show) <<< runExcept <<< readImpl + +install :: forall (r :: Row Type). Options -> Puppeteer r -> Effect (Puppeteer (captcha :: CaptchaPlugin | r)) +install o p = _captcha (prepareOptions o) p + +infos :: Foreign -> Either Error (Array CaptchaInfoMaybeFiltered) +infos f = do + { captchas, filtered } <- read @({ captchas :: Array Foreign, filtered :: Array Foreign }) f + captchas' <- sequence $ duplexRead duplexInfo <$> captchas + let captchas'' = (_ /\ Nothing) <$> captchas' + filtered' <- for filtered \f' -> do + c <- duplexRead duplexInfo f' + { filtered: wasF, filteredReason } <- read @({ filtered :: Boolean, filteredReason :: String }) f' + pure $ case filteredFromString filteredReason of + Just r | wasF -> c /\ (Just r) + _ -> c /\ Nothing + pure $ captchas'' <> filtered' + +findCaptchas :: forall (r :: Row Type). Puppeteer (captcha :: CaptchaPlugin | r) -> Page -> Aff (Array CaptchaInfoMaybeFiltered) +findCaptchas _ p = do + f <- Promise.toAff $ _findCaptchas p + liftEither $ infos f + +getSolutions :: forall (r :: Row Type). Puppeteer (captcha :: CaptchaPlugin | r) -> Page -> Array CaptchaInfo -> Aff (Array CaptchaSolution) +getSolutions _ p is = do + f <- Promise.toAff $ _getSolutions p (writeImpl $ duplexWrite duplexInfo <$> is) + { solutions } <- liftEither $ read @({ solutions :: Array Foreign }) f + liftEither $ for solutions $ duplexRead duplexSoln + +enterSolutions :: forall (r :: Row Type). Puppeteer (captcha :: CaptchaPlugin | r) -> Page -> Array CaptchaSolution -> Aff (Array CaptchaSolved) +enterSolutions _ p sols = do + f <- Promise.toAff $ _enterSolutions p (writeImpl $ duplexWrite duplexSoln <$> sols) + { solved } <- liftEither $ read @({ solved :: Array Foreign }) f + liftEither $ for solved $ duplexRead duplexSolved + +solveCaptchas :: forall (r :: Row Type). Puppeteer (captcha :: CaptchaPlugin | r) -> Page -> Aff SolveResult +solveCaptchas _ p = do + f <- Promise.toAff $ _solveCaptchas p + { solved, solutions } <- liftEither $ read @({ solved :: Array Foreign, solutions :: Array Foreign }) f + captchas <- liftEither $ infos f + liftEither do + solved' <- for solved $ duplexRead duplexSolved + solutions' <- for solutions $ duplexRead duplexSoln + pure $ { captchas, solved: solved', solutions: solutions' } diff --git a/src/Puppeteer.Plugin.Stealth.js b/src/Puppeteer.Plugin.Stealth.js new file mode 100644 index 0000000..6056ed8 --- /dev/null +++ b/src/Puppeteer.Plugin.Stealth.js @@ -0,0 +1,5 @@ +import { PuppeteerExtra } from 'puppeteer-extra' +import Stealth from 'puppeteer-extra-plugin-stealth' + +/** @type {(_: PuppeteerExtra) => () => PuppeteerExtra} */ +export const install = p => () => p.use(Stealth()) diff --git a/src/Puppeteer.Plugin.Stealth.purs b/src/Puppeteer.Plugin.Stealth.purs new file mode 100644 index 0000000..0fcb3a7 --- /dev/null +++ b/src/Puppeteer.Plugin.Stealth.purs @@ -0,0 +1,8 @@ +module Puppeteer.Plugin.Stealth where + +import Effect (Effect) +import Puppeteer.Base (Puppeteer) + +-- | https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth +foreign import data StealthPlugin :: Type +foreign import install :: forall (r :: Row Type). Puppeteer r -> Effect (Puppeteer (stealth :: StealthPlugin | r)) diff --git a/src/Puppeteer.purs b/src/Puppeteer.purs index ee542d6..fde2767 100644 --- a/src/Puppeteer.purs +++ b/src/Puppeteer.purs @@ -1,6 +1,6 @@ module Puppeteer ( module X - , puppeteer + , new , connect , launch , connect_ @@ -23,12 +23,12 @@ import Effect (Effect) import Effect.Aff (Aff) import Effect.Unsafe (unsafePerformEffect) import Foreign (Foreign) -import Puppeteer.Base (Puppeteer) +import Puppeteer.Base (Puppeteer, duplexWrite) import Puppeteer.Base as X -import Puppeteer.Screenshot as X import Puppeteer.Browser (Browser) import Puppeteer.Browser as Browser import Puppeteer.FFI as FFI +import Puppeteer.Screenshot as X import Simple.JSON (writeImpl) --| [https://pptr.dev/api/puppeteer.puppeteerlaunchoptions] @@ -113,7 +113,7 @@ prepareConnectOptions , headers: FFI.maybeToUndefined $ map FFI.mapToRecord headers , transport: FFI.maybeToUndefined $ map transport' transport } - , writeImpl $ map Browser.prepareConnectOptions browser + , writeImpl $ map (duplexWrite Browser.duplexConnect) browser ] prepareLaunchOptions :: Launch -> Foreign @@ -157,7 +157,7 @@ prepareLaunchOptions , headless: if headless then writeImpl "new" else writeImpl false , userDataDir: FFI.maybeToUndefined userDataDir } - , writeImpl $ FFI.maybeToUndefined $ map Browser.prepareConnectOptions browser + , writeImpl $ FFI.maybeToUndefined $ map (duplexWrite Browser.duplexConnect) browser ] foreign import _puppeteer :: Effect (Promise (Puppeteer ())) @@ -168,8 +168,8 @@ foreign import _launch :: forall p. Foreign -> Puppeteer p -> Effect (Promise Br --| --| [`PuppeteerExtra`](https://github.com/berstend/puppeteer-extra/blob/master/packages/puppeteer-extra/src/index.ts) --| [`PuppeteerNode`](https://pptr.dev/api/puppeteer.puppeteernode) -puppeteer :: Unit -> Aff (Puppeteer ()) -puppeteer _ = Promise.toAffE _puppeteer +new :: Aff (Puppeteer ()) +new = Promise.toAffE _puppeteer --| Connect to an existing browser instance --| diff --git a/test/Puppeteer.Browser.Spec.purs b/test/Puppeteer.Browser.Spec.purs index 733ae6c..5f1ea91 100644 --- a/test/Puppeteer.Browser.Spec.purs +++ b/test/Puppeteer.Browser.Spec.purs @@ -12,7 +12,7 @@ import Test.Spec.Assertions (shouldEqual, shouldNotEqual) import Test.Util (test, testE) spec :: SpecT Aff Unit Effect Unit -spec = beforeAll (Pup.launch_ =<< Pup.puppeteer unit) +spec = beforeAll (Pup.launch_ =<< Pup.new) $ describe "Browser" do testE "websocketEndpoint" $ shouldNotEqual "" <=< Pup.Browser.websocketEndpoint testE "connected" $ shouldEqual true <=< Pup.Browser.connected @@ -22,6 +22,6 @@ spec = beforeAll (Pup.launch_ =<< Pup.puppeteer unit) connected <- liftEffect $ Pup.Browser.connected b connected `shouldEqual` false - pup <- Pup.puppeteer unit + pup <- Pup.new b' <- Pup.connect (Pup.connectDefault $ Pup.BrowserWebsocket ws) pup Pup.Browser.close b' diff --git a/test/Puppeteer.Handle.Spec.purs b/test/Puppeteer.Handle.Spec.purs index 1d871eb..d383f6f 100644 --- a/test/Puppeteer.Handle.Spec.purs +++ b/test/Puppeteer.Handle.Spec.purs @@ -100,7 +100,7 @@ withPage :: SpecT Aff Pup.Page Effect Unit -> SpecT Aff Unit Effect Unit withPage = let withPage' spec' _ = do - pup <- Pup.puppeteer unit + pup <- Pup.new b <- Pup.launch_ pup page <- Pup.Page.new b failOnPageError page do diff --git a/test/Puppeteer.Page.Event.Spec.purs b/test/Puppeteer.Page.Event.Spec.purs index 64501dd..9c9d1d0 100644 --- a/test/Puppeteer.Page.Event.Spec.purs +++ b/test/Puppeteer.Page.Event.Spec.purs @@ -61,7 +61,7 @@ withPage = spec :: SpecT Aff Unit Effect Unit spec = - beforeAll (Pup.launch_ =<< Pup.puppeteer unit) + beforeAll (Pup.launch_ =<< Pup.new) $ afterAll Pup.Browser.close $ do describe "Event" do diff --git a/test/Puppeteer.Page.Spec.purs b/test/Puppeteer.Page.Spec.purs index 5ee3eb4..e82d356 100644 --- a/test/Puppeteer.Page.Spec.purs +++ b/test/Puppeteer.Page.Spec.purs @@ -75,7 +75,7 @@ inputPage = """ spec :: SpecT Aff Unit Effect Unit -spec = beforeAll (Pup.launch_ =<< Pup.puppeteer unit) +spec = beforeAll (Pup.launch_ =<< Pup.new) $ afterAll Pup.Browser.close $ describe "Page" do test "new, close, isClosed" \b -> do diff --git a/test/Puppeteer.Plugin.Spec.purs b/test/Puppeteer.Plugin.Spec.purs new file mode 100644 index 0000000..3d22857 --- /dev/null +++ b/test/Puppeteer.Plugin.Spec.purs @@ -0,0 +1,99 @@ +module Puppeteer.Plugin.Spec where + +import Prelude + +import Control.Monad.Error.Class (liftMaybe, try) +import Control.Monad.ST as ST +import Control.Monad.ST.Global as ST +import Control.Monad.ST.Ref as ST +import Control.Parallel (parallel, sequential) +import Data.Array as Array +import Data.Foldable (for_) +import Data.Maybe (Maybe(..)) +import Data.Newtype (wrap) +import Data.Traversable (for) +import Effect (Effect) +import Effect.Aff (Aff, delay) +import Effect.Class (liftEffect) +import Effect.Console (warn) +import Effect.Exception (error) +import Node.EventEmitter as EventEmitter +import Node.Process as Process +import Puppeteer as Pup +import Puppeteer.Eval as Pup.Eval +import Puppeteer.Page as Pup.Page +import Puppeteer.Page.Navigate as Pup.Page.Nav +import Puppeteer.Page.WaitFor as Pup.Page.WaitFor +import Puppeteer.Plugin.AdBlock as Pup.AdBlock +import Puppeteer.Plugin.AnonymousUserAgent as Pup.AnonUA +import Puppeteer.Plugin.Captcha as Pup.Captcha +import Puppeteer.Plugin.Stealth as Pup.Stealth +import Test.Spec (SpecT(..), describe, focus, pending) +import Test.Spec.Assertions (shouldEqual, shouldSatisfy) +import Test.Util (test) + +spec :: SpecT Aff Unit Effect Unit +spec = describe "Plugin" do + args <- liftEffect Process.argv + + let + pendingUnlessArg a t b = + if not $ Array.any (_ == a) args then do + let msg = " (skipped unless `" <> a <> "`, ex. `spago test " <> a <> "`)" + pending (t <> msg) + else + test t b + + describe "Captcha" do + test "install" do + pup <- Pup.new + pup' <- liftEffect $ Pup.Captcha.install (Pup.Captcha.defaultOptions $ wrap "") pup + void $ Pup.launch_ pup' + + pendingUnlessArg "--test-captcha" "solves captchas" do + token <- liftMaybe (error "TWOCAPTCHA_API_KEY not present") <=< liftEffect <<< Process.lookupEnv $ "TWOCAPTCHA_API_KEY" + let + urls = + [ "https://www.google.com/recaptcha/api2/demo" + , "https://accounts.hcaptcha.com/demo" + , "https://democaptcha.com/demo-form-eng/hcaptcha.html" + ] + pup <- Pup.new + pup' <- liftEffect $ Pup.Captcha.install (Pup.Captcha.defaultOptions $ wrap token) pup + b <- Pup.launch_ pup' + sequential $ for_ urls \u -> parallel do + p <- Pup.Page.new b + _ <- Pup.Page.Nav.to_ p u + { solved } <- Pup.Captcha.solveCaptchas pup' p + Array.length solved `shouldSatisfy` (_ >= 1) + pure unit + describe "Adblock" do + test "install" do + pup <- Pup.new + pup' <- liftEffect $ Pup.AdBlock.install Pup.AdBlock.defaultOptions pup + void $ Pup.AdBlock.blocker pup' + pendingUnlessArg "--test-adblock" "blocks ads" do + pup <- Pup.new + pup' <- liftEffect $ Pup.AdBlock.install Pup.AdBlock.defaultOptions pup + blocker <- Pup.AdBlock.blocker pup' + requestsBlocked <- liftEffect $ ST.toEffect (ST.new 0) + stylesInjected <- liftEffect $ ST.toEffect (ST.new 0) + let add1On st h = liftEffect $ EventEmitter.on_ h (void $ ST.toEffect $ ST.modify (_ + 1) st) blocker + add1On requestsBlocked Pup.AdBlock.requestBlockedH + add1On stylesInjected Pup.AdBlock.styleInjectedH + b <- Pup.launch_ pup' + p <- Pup.Page.new b + _ <- Pup.Page.Nav.to_ p "https://www.google.com/search?q=rent%20a%20car" + Pup.Page.WaitFor.networkIdle (Pup.Page.WaitFor.NetworkIdleFor $ wrap 200.0) p + reqs <- liftEffect $ ST.toEffect $ ST.read requestsBlocked + stys <- liftEffect $ ST.toEffect $ ST.read stylesInjected + reqs `shouldSatisfy` (_ >= 1) + stys `shouldSatisfy` (_ >= 1) + describe "Stealth" do + test "install" do + pup <- Pup.new + void $ liftEffect $ Pup.Stealth.install pup + describe "AnonymousUserAgent" do + test "install" do + pup <- Pup.new + void $ liftEffect $ Pup.AnonUA.install pup diff --git a/test/Puppeteer.Spec.purs b/test/Puppeteer.Spec.purs index 40b3fc1..0727f98 100644 --- a/test/Puppeteer.Spec.purs +++ b/test/Puppeteer.Spec.purs @@ -11,6 +11,7 @@ import Puppeteer.Browser as Pup.Browser import Puppeteer.Browser.Spec as Spec.Browser import Puppeteer.Handle.Spec as Spec.Handle import Puppeteer.Page.Spec as Spec.Page +import Puppeteer.Plugin.Spec as Spec.Plugin import Puppeteer.Selector.Spec as Spec.Selector import Test.Spec (SpecT, describe, mapSpecTree) import Test.Spec.Assertions (shouldEqual) @@ -19,11 +20,11 @@ import Test.Util (test) spec :: SpecT Aff Unit Effect Unit spec = describe "Puppeteer" do test "launch" do - pup <- Pup.puppeteer unit + pup <- Pup.new map void Pup.launch_ pup test "connect" do - pup <- Pup.puppeteer unit + pup <- Pup.new b1 <- Pup.launch_ pup ws <- liftEffect $ Pup.Browser.websocketEndpoint b1 @@ -39,4 +40,5 @@ spec = describe "Puppeteer" do Spec.Browser.spec Spec.Page.spec Spec.Handle.spec + Spec.Plugin.spec mapSpecTree (pure <<< unwrap) identity Spec.Selector.spec diff --git a/test/Test.Main.purs b/test/Test.Main.purs index 2511673..0eb310e 100644 --- a/test/Test.Main.purs +++ b/test/Test.Main.purs @@ -19,11 +19,13 @@ import Test.Spec.Config (defaultConfig) import Test.Spec.Reporter (consoleReporter) import Test.Spec.Result (Result(..)) import Test.Spec.Runner (runSpecT) +import Dotenv as Dotenv foreign import errorString :: Error -> Effect String main :: Effect Unit main = launchAff_ do + Dotenv.loadFile let cfg = defaultConfig { timeout = Nothing, exit = false } run <- liftEffect $ runSpecT cfg [ consoleReporter ] Spec.spec res <- (map (join <<< map (foldl Array.snoc [])) run) :: Aff (Array Result)