From 2b16a38a66d82cf20680e7c9b31ac0e644dac5dd Mon Sep 17 00:00:00 2001 From: Orion Kindel Date: Mon, 13 May 2024 14:00:13 -0500 Subject: [PATCH] fix: improve performance --- bun.lockb | Bin 2424 -> 2424 bytes spago.lock | 6 ++-- spago.yaml | 2 +- src/Node.Stream.CSV.Stringify.purs | 8 ++--- src/Pipes.CSV.purs | 23 ++++++------- test/Test/Main.purs | 2 +- test/Test/Pipes.CSV.purs | 52 ++++++++++++++--------------- 7 files changed, 45 insertions(+), 48 deletions(-) diff --git a/bun.lockb b/bun.lockb index 4e03452039c52b3032684937ea61ecaf11e39240..15b27c4aef862a4ade6c539c5b4ad2b1c4d20307 100755 GIT binary patch delta 21 dcmew%^h0RFbv6#hI70(H6FuY2_t|DL0RUOf2VejI delta 21 Ycmew%^h0RFbv6zr1_;=EpKT@+087RN-v9sr diff --git a/spago.lock b/spago.lock index 06d32b5..7cf26ff 100644 --- a/spago.lock +++ b/spago.lock @@ -18,7 +18,7 @@ workspace: - newtype: ">=5.0.0 <6.0.0" - node-buffer: ">=9.0.0 <10.0.0" - node-event-emitter: ">=3.0.0 <4.0.0" - - node-stream-pipes: ">=1.2.3 <2.0.0" + - node-stream-pipes: ">=1.3.0 <2.0.0" - node-streams: ">=9.0.0 <10.0.0" - nullable: ">=6.0.0 <7.0.0" - numbers: ">=9.0.1 <10.0.0" @@ -607,8 +607,8 @@ packages: - effect node-stream-pipes: type: registry - version: 1.2.3 - integrity: sha256-lXD3x6+p72uBrRHGHrob2jrrBDakhhZE9O9EYE4aFiE= + version: 1.3.0 + integrity: sha256-5Jpf0BLn0ExQWYxbTTewai4M8quEmEVHxihc9CM1Juo= dependencies: - aff - arrays diff --git a/spago.yaml b/spago.yaml index 8c76703..7cbc581 100644 --- a/spago.yaml +++ b/spago.yaml @@ -10,6 +10,7 @@ package: strict: true pedanticPackages: true dependencies: + - node-stream-pipes: ">=1.3.0 <2.0.0" - aff: ">=7.1.0 <8.0.0" - arrays: ">=7.3.0 <8.0.0" - bifunctors: ">=6.0.0 <7.0.0" @@ -25,7 +26,6 @@ package: - newtype: ">=5.0.0 <6.0.0" - node-buffer: ">=9.0.0 <10.0.0" - node-event-emitter: ">=3.0.0 <4.0.0" - - node-stream-pipes: ">=1.2.3 <2.0.0" - node-streams: ">=9.0.0 <10.0.0" - nullable: ">=6.0.0 <7.0.0" - numbers: ">=9.0.1 <10.0.0" diff --git a/src/Node.Stream.CSV.Stringify.purs b/src/Node.Stream.CSV.Stringify.purs index 06a70fc..2a3a605 100644 --- a/src/Node.Stream.CSV.Stringify.purs +++ b/src/Node.Stream.CSV.Stringify.purs @@ -54,15 +54,15 @@ recordToForeign = unsafeCoerce -- | Requires an ordered array of column names. make :: forall @config @missing @extra - . Union config missing (Config extra) + . Union config missing (Config extra) => Array String -> { | config } -> Effect (CSVStringifier ()) make columns = makeImpl - <<< unsafeToForeign - <<< Object.union (recordToForeign { columns, header: true }) - <<< recordToForeign + <<< unsafeToForeign + <<< Object.union (recordToForeign { columns, header: true }) + <<< recordToForeign -- | Convert the raw stream to a typed ObjectStream toObjectStream :: CSVStringifier () -> Object.Transform (Array String) String diff --git a/src/Pipes.CSV.purs b/src/Pipes.CSV.purs index 58555a7..9ac747c 100644 --- a/src/Pipes.CSV.purs +++ b/src/Pipes.CSV.purs @@ -2,9 +2,9 @@ module Pipes.CSV where import Prelude -import Control.Monad.Error.Class (class MonadThrow, liftEither) +import Control.Monad.Error.Class (liftEither) import Control.Monad.Except (runExcept) -import Control.Monad.Rec.Class (class MonadRec, forever) +import Control.Monad.Rec.Class (forever) import Control.Monad.ST.Global as ST import Control.Monad.ST.Ref as STRef import Data.Array as Array @@ -14,9 +14,9 @@ import Data.FunctorWithIndex (mapWithIndex) import Data.Map as Map import Data.Maybe (Maybe(..)) import Data.Tuple.Nested ((/\)) -import Effect.Aff.Class (class MonadAff) +import Effect.Aff (Aff) import Effect.Class (liftEffect) -import Effect.Exception (Error, error) +import Effect.Exception (error) import Node.Buffer (Buffer) import Node.Stream.CSV.Parse as CSV.Parse import Node.Stream.CSV.Stringify as CSV.Stringify @@ -47,13 +47,10 @@ import Type.Prelude (Proxy(..)) -- | rows `shouldEqual` [{id: 1, foo: "hi", is_deleted: false}, {id: 2, foo: "bye", is_deleted: true}] -- | ``` parse - :: forall @r rl m - . MonadAff m - => MonadThrow Error m - => MonadRec m - => RowToList r rl + :: forall @r rl + . RowToList r rl => ReadCSVRecord r rl - => Pipe (Maybe Buffer) (Maybe { | r }) m Unit + => Pipe (Maybe Buffer) (Maybe { | r }) Aff Unit parse = do raw <- liftEffect $ CSV.Parse.make {} colsST <- liftEffect $ ST.toEffect $ STRef.new Nothing @@ -77,14 +74,14 @@ parse = do -- | Transforms buffer chunks of a CSV file to parsed -- | arrays of CSV values. -parseRaw :: forall m. MonadAff m => MonadThrow Error m => Pipe (Maybe Buffer) (Maybe (Array String)) m Unit +parseRaw :: Pipe (Maybe Buffer) (Maybe (Array String)) Aff Unit parseRaw = do s <- liftEffect $ CSV.Parse.toObjectStream <$> CSV.Parse.make {} Pipes.Stream.fromTransform s -- | Transforms CSV rows into stringified CSV records -- | using the given ordered array of column names. -stringifyRaw :: forall m. MonadAff m => MonadThrow Error m => Array String -> Pipe (Maybe (Array String)) (Maybe String) m Unit +stringifyRaw :: Array String -> Pipe (Maybe (Array String)) (Maybe String) Aff Unit stringifyRaw columns = do s <- liftEffect $ CSV.Stringify.toObjectStream <$> CSV.Stringify.make columns {} Pipes.Stream.fromTransform s @@ -92,7 +89,7 @@ stringifyRaw columns = do -- | Transforms purescript records into stringified CSV records. -- | -- | Columns are inferred from the record's keys, ordered alphabetically. -stringify :: forall m r rl. MonadRec m => MonadAff m => MonadThrow Error m => WriteCSVRecord r rl => RowToList r rl => Keys rl => Pipe (Maybe { | r }) (Maybe String) m Unit +stringify :: forall r rl. WriteCSVRecord r rl => RowToList r rl => Keys rl => Pipe (Maybe { | r }) (Maybe String) Aff Unit stringify = do raw <- liftEffect $ CSV.Stringify.make (Array.fromFoldable $ keys $ Proxy @r) {} let diff --git a/test/Test/Main.purs b/test/Test/Main.purs index 9c56e51..01eb821 100644 --- a/test/Test/Main.purs +++ b/test/Test/Main.purs @@ -10,5 +10,5 @@ import Test.Spec.Reporter (specReporter) import Test.Spec.Runner (defaultConfig, runSpec') main :: Effect Unit -main = launchAff_ $ runSpec' (defaultConfig { timeout = Nothing }) [ specReporter ] do +main = launchAff_ $ runSpec' (defaultConfig { failFast = true, timeout = Nothing }) [ specReporter ] do Test.Pipes.CSV.spec diff --git a/test/Test/Pipes.CSV.purs b/test/Test/Pipes.CSV.purs index cfaa152..d17353a 100644 --- a/test/Test/Pipes.CSV.purs +++ b/test/Test/Pipes.CSV.purs @@ -6,22 +6,26 @@ import Control.Monad.Gen (chooseInt) import Control.Monad.Rec.Class (Step(..), tailRecM) import Data.Array as Array import Data.DateTime (DateTime) -import Data.Foldable (fold) +import Data.Foldable (fold, sum) import Data.Maybe (Maybe(..), fromJust) import Data.Newtype (wrap) import Data.PreciseDateTime (fromRFC3339String, toDateTimeLossy) +import Data.String.CodePoints as String.CodePoints +import Data.Tuple.Nested ((/\)) import Effect.Class (liftEffect) +import Effect.Console (log) import Node.Encoding (Encoding(..)) import Partial.Unsafe (unsafePartial) import Pipes (yield, (>->)) import Pipes.CSV as Pipes.CSV import Pipes.Collect as Pipes.Collect +import Pipes.Construct as Pipes.Construct import Pipes.Node.Buffer as Pipes.Buffer import Pipes.Node.Stream as Pipes.Stream -import Pipes.Prelude (map, toListM) as Pipes +import Pipes.Prelude (chain, map, toListM) as Pipes import Pipes.Util as Pipes.Util import Test.QuickCheck.Gen (randomSample') -import Test.Spec (Spec, describe, it) +import Test.Spec (Spec, before, describe, it) import Test.Spec.Assertions (shouldEqual) csv :: String @@ -62,27 +66,23 @@ spec = , { id: 2, foo: "apple", flag: false, created: dt "2024-02-02T08:00:00Z" } , { id: 3, foo: "hello", flag: true, created: dt "1970-01-01T00:00:00Z" } ] - it "parses large csv" do - nums <- liftEffect $ randomSample' 100000 (chooseInt 0 9) - let - csvRows = [ "id\n" ] <> ((_ <> "\n") <$> show <$> nums) - csv' = - let - go ix - | Just a <- Array.index csvRows ix = yield a $> Loop (ix + 1) - | otherwise = pure $ Done unit - in - tailRecM go 0 - in16kbChunks = - Pipes.Util.chunked 16000 - >-> Pipes.Stream.inEOS (Pipes.map fold) - >-> Pipes.Stream.inEOS (Pipes.Buffer.fromString UTF8) + before + (do + nums <- liftEffect $ randomSample' 100000 (chooseInt 0 9) + let + chars = [ "i","d","\n" ] <> join ((\n -> [show n, "\n"]) <$> nums) + bufs <- Pipes.Collect.toArray + $ Pipes.Stream.withEOS (Pipes.Construct.eachArray chars) + >-> Pipes.Util.chunked 1000 + >-> Pipes.Stream.inEOS (Pipes.map fold >-> Pipes.Buffer.fromString UTF8) + >-> Pipes.Stream.unEOS + pure $ nums /\ bufs + ) + $ it "parses large csv" \(nums /\ bufs) -> do + rows <- + Pipes.Collect.toArray + $ Pipes.Stream.withEOS (Pipes.Construct.eachArray bufs) + >-> Pipes.CSV.parse @(id :: Int) + >-> Pipes.Stream.unEOS - rows <- - Pipes.Collect.toArray - $ Pipes.Stream.withEOS csv' - >-> in16kbChunks - >-> Pipes.CSV.parse - >-> Pipes.Stream.unEOS - - rows `shouldEqual` ((\id -> { id }) <$> nums) + rows `shouldEqual` ((\id -> { id }) <$> nums)