fix: improve performance

This commit is contained in:
orion 2024-05-13 14:00:13 -05:00
parent e87d81cc1d
commit 2b16a38a66
Signed by: orion
GPG Key ID: 6D4165AE4C928719
7 changed files with 45 additions and 48 deletions

BIN
bun.lockb

Binary file not shown.

View File

@ -18,7 +18,7 @@ workspace:
- newtype: ">=5.0.0 <6.0.0"
- node-buffer: ">=9.0.0 <10.0.0"
- node-event-emitter: ">=3.0.0 <4.0.0"
- node-stream-pipes: ">=1.2.3 <2.0.0"
- node-stream-pipes: ">=1.3.0 <2.0.0"
- node-streams: ">=9.0.0 <10.0.0"
- nullable: ">=6.0.0 <7.0.0"
- numbers: ">=9.0.1 <10.0.0"
@ -607,8 +607,8 @@ packages:
- effect
node-stream-pipes:
type: registry
version: 1.2.3
integrity: sha256-lXD3x6+p72uBrRHGHrob2jrrBDakhhZE9O9EYE4aFiE=
version: 1.3.0
integrity: sha256-5Jpf0BLn0ExQWYxbTTewai4M8quEmEVHxihc9CM1Juo=
dependencies:
- aff
- arrays

View File

@ -10,6 +10,7 @@ package:
strict: true
pedanticPackages: true
dependencies:
- node-stream-pipes: ">=1.3.0 <2.0.0"
- aff: ">=7.1.0 <8.0.0"
- arrays: ">=7.3.0 <8.0.0"
- bifunctors: ">=6.0.0 <7.0.0"
@ -25,7 +26,6 @@ package:
- newtype: ">=5.0.0 <6.0.0"
- node-buffer: ">=9.0.0 <10.0.0"
- node-event-emitter: ">=3.0.0 <4.0.0"
- node-stream-pipes: ">=1.2.3 <2.0.0"
- node-streams: ">=9.0.0 <10.0.0"
- nullable: ">=6.0.0 <7.0.0"
- numbers: ">=9.0.1 <10.0.0"

View File

@ -54,15 +54,15 @@ recordToForeign = unsafeCoerce
-- | Requires an ordered array of column names.
make
:: forall @config @missing @extra
. Union config missing (Config extra)
. Union config missing (Config extra)
=> Array String
-> { | config }
-> Effect (CSVStringifier ())
make columns =
makeImpl
<<< unsafeToForeign
<<< Object.union (recordToForeign { columns, header: true })
<<< recordToForeign
<<< unsafeToForeign
<<< Object.union (recordToForeign { columns, header: true })
<<< recordToForeign
-- | Convert the raw stream to a typed ObjectStream
toObjectStream :: CSVStringifier () -> Object.Transform (Array String) String

View File

@ -2,9 +2,9 @@ module Pipes.CSV where
import Prelude
import Control.Monad.Error.Class (class MonadThrow, liftEither)
import Control.Monad.Error.Class (liftEither)
import Control.Monad.Except (runExcept)
import Control.Monad.Rec.Class (class MonadRec, forever)
import Control.Monad.Rec.Class (forever)
import Control.Monad.ST.Global as ST
import Control.Monad.ST.Ref as STRef
import Data.Array as Array
@ -14,9 +14,9 @@ import Data.FunctorWithIndex (mapWithIndex)
import Data.Map as Map
import Data.Maybe (Maybe(..))
import Data.Tuple.Nested ((/\))
import Effect.Aff.Class (class MonadAff)
import Effect.Aff (Aff)
import Effect.Class (liftEffect)
import Effect.Exception (Error, error)
import Effect.Exception (error)
import Node.Buffer (Buffer)
import Node.Stream.CSV.Parse as CSV.Parse
import Node.Stream.CSV.Stringify as CSV.Stringify
@ -47,13 +47,10 @@ import Type.Prelude (Proxy(..))
-- | rows `shouldEqual` [{id: 1, foo: "hi", is_deleted: false}, {id: 2, foo: "bye", is_deleted: true}]
-- | ```
parse
:: forall @r rl m
. MonadAff m
=> MonadThrow Error m
=> MonadRec m
=> RowToList r rl
:: forall @r rl
. RowToList r rl
=> ReadCSVRecord r rl
=> Pipe (Maybe Buffer) (Maybe { | r }) m Unit
=> Pipe (Maybe Buffer) (Maybe { | r }) Aff Unit
parse = do
raw <- liftEffect $ CSV.Parse.make {}
colsST <- liftEffect $ ST.toEffect $ STRef.new Nothing
@ -77,14 +74,14 @@ parse = do
-- | Transforms buffer chunks of a CSV file to parsed
-- | arrays of CSV values.
parseRaw :: forall m. MonadAff m => MonadThrow Error m => Pipe (Maybe Buffer) (Maybe (Array String)) m Unit
parseRaw :: Pipe (Maybe Buffer) (Maybe (Array String)) Aff Unit
parseRaw = do
s <- liftEffect $ CSV.Parse.toObjectStream <$> CSV.Parse.make {}
Pipes.Stream.fromTransform s
-- | Transforms CSV rows into stringified CSV records
-- | using the given ordered array of column names.
stringifyRaw :: forall m. MonadAff m => MonadThrow Error m => Array String -> Pipe (Maybe (Array String)) (Maybe String) m Unit
stringifyRaw :: Array String -> Pipe (Maybe (Array String)) (Maybe String) Aff Unit
stringifyRaw columns = do
s <- liftEffect $ CSV.Stringify.toObjectStream <$> CSV.Stringify.make columns {}
Pipes.Stream.fromTransform s
@ -92,7 +89,7 @@ stringifyRaw columns = do
-- | Transforms purescript records into stringified CSV records.
-- |
-- | Columns are inferred from the record's keys, ordered alphabetically.
stringify :: forall m r rl. MonadRec m => MonadAff m => MonadThrow Error m => WriteCSVRecord r rl => RowToList r rl => Keys rl => Pipe (Maybe { | r }) (Maybe String) m Unit
stringify :: forall r rl. WriteCSVRecord r rl => RowToList r rl => Keys rl => Pipe (Maybe { | r }) (Maybe String) Aff Unit
stringify = do
raw <- liftEffect $ CSV.Stringify.make (Array.fromFoldable $ keys $ Proxy @r) {}
let

View File

@ -10,5 +10,5 @@ import Test.Spec.Reporter (specReporter)
import Test.Spec.Runner (defaultConfig, runSpec')
main :: Effect Unit
main = launchAff_ $ runSpec' (defaultConfig { timeout = Nothing }) [ specReporter ] do
main = launchAff_ $ runSpec' (defaultConfig { failFast = true, timeout = Nothing }) [ specReporter ] do
Test.Pipes.CSV.spec

View File

@ -6,22 +6,26 @@ import Control.Monad.Gen (chooseInt)
import Control.Monad.Rec.Class (Step(..), tailRecM)
import Data.Array as Array
import Data.DateTime (DateTime)
import Data.Foldable (fold)
import Data.Foldable (fold, sum)
import Data.Maybe (Maybe(..), fromJust)
import Data.Newtype (wrap)
import Data.PreciseDateTime (fromRFC3339String, toDateTimeLossy)
import Data.String.CodePoints as String.CodePoints
import Data.Tuple.Nested ((/\))
import Effect.Class (liftEffect)
import Effect.Console (log)
import Node.Encoding (Encoding(..))
import Partial.Unsafe (unsafePartial)
import Pipes (yield, (>->))
import Pipes.CSV as Pipes.CSV
import Pipes.Collect as Pipes.Collect
import Pipes.Construct as Pipes.Construct
import Pipes.Node.Buffer as Pipes.Buffer
import Pipes.Node.Stream as Pipes.Stream
import Pipes.Prelude (map, toListM) as Pipes
import Pipes.Prelude (chain, map, toListM) as Pipes
import Pipes.Util as Pipes.Util
import Test.QuickCheck.Gen (randomSample')
import Test.Spec (Spec, describe, it)
import Test.Spec (Spec, before, describe, it)
import Test.Spec.Assertions (shouldEqual)
csv :: String
@ -62,27 +66,23 @@ spec =
, { id: 2, foo: "apple", flag: false, created: dt "2024-02-02T08:00:00Z" }
, { id: 3, foo: "hello", flag: true, created: dt "1970-01-01T00:00:00Z" }
]
it "parses large csv" do
nums <- liftEffect $ randomSample' 100000 (chooseInt 0 9)
let
csvRows = [ "id\n" ] <> ((_ <> "\n") <$> show <$> nums)
csv' =
let
go ix
| Just a <- Array.index csvRows ix = yield a $> Loop (ix + 1)
| otherwise = pure $ Done unit
in
tailRecM go 0
in16kbChunks =
Pipes.Util.chunked 16000
>-> Pipes.Stream.inEOS (Pipes.map fold)
>-> Pipes.Stream.inEOS (Pipes.Buffer.fromString UTF8)
before
(do
nums <- liftEffect $ randomSample' 100000 (chooseInt 0 9)
let
chars = [ "i","d","\n" ] <> join ((\n -> [show n, "\n"]) <$> nums)
bufs <- Pipes.Collect.toArray
$ Pipes.Stream.withEOS (Pipes.Construct.eachArray chars)
>-> Pipes.Util.chunked 1000
>-> Pipes.Stream.inEOS (Pipes.map fold >-> Pipes.Buffer.fromString UTF8)
>-> Pipes.Stream.unEOS
pure $ nums /\ bufs
)
$ it "parses large csv" \(nums /\ bufs) -> do
rows <-
Pipes.Collect.toArray
$ Pipes.Stream.withEOS (Pipes.Construct.eachArray bufs)
>-> Pipes.CSV.parse @(id :: Int)
>-> Pipes.Stream.unEOS
rows <-
Pipes.Collect.toArray
$ Pipes.Stream.withEOS csv'
>-> in16kbChunks
>-> Pipes.CSV.parse
>-> Pipes.Stream.unEOS
rows `shouldEqual` ((\id -> { id }) <$> nums)
rows `shouldEqual` ((\id -> { id }) <$> nums)