fix: improve performance

This commit is contained in:
orion 2024-05-13 14:00:13 -05:00
parent e87d81cc1d
commit 2b16a38a66
Signed by: orion
GPG Key ID: 6D4165AE4C928719
7 changed files with 45 additions and 48 deletions

BIN
bun.lockb

Binary file not shown.

View File

@ -18,7 +18,7 @@ workspace:
- newtype: ">=5.0.0 <6.0.0" - newtype: ">=5.0.0 <6.0.0"
- node-buffer: ">=9.0.0 <10.0.0" - node-buffer: ">=9.0.0 <10.0.0"
- node-event-emitter: ">=3.0.0 <4.0.0" - node-event-emitter: ">=3.0.0 <4.0.0"
- node-stream-pipes: ">=1.2.3 <2.0.0" - node-stream-pipes: ">=1.3.0 <2.0.0"
- node-streams: ">=9.0.0 <10.0.0" - node-streams: ">=9.0.0 <10.0.0"
- nullable: ">=6.0.0 <7.0.0" - nullable: ">=6.0.0 <7.0.0"
- numbers: ">=9.0.1 <10.0.0" - numbers: ">=9.0.1 <10.0.0"
@ -607,8 +607,8 @@ packages:
- effect - effect
node-stream-pipes: node-stream-pipes:
type: registry type: registry
version: 1.2.3 version: 1.3.0
integrity: sha256-lXD3x6+p72uBrRHGHrob2jrrBDakhhZE9O9EYE4aFiE= integrity: sha256-5Jpf0BLn0ExQWYxbTTewai4M8quEmEVHxihc9CM1Juo=
dependencies: dependencies:
- aff - aff
- arrays - arrays

View File

@ -10,6 +10,7 @@ package:
strict: true strict: true
pedanticPackages: true pedanticPackages: true
dependencies: dependencies:
- node-stream-pipes: ">=1.3.0 <2.0.0"
- aff: ">=7.1.0 <8.0.0" - aff: ">=7.1.0 <8.0.0"
- arrays: ">=7.3.0 <8.0.0" - arrays: ">=7.3.0 <8.0.0"
- bifunctors: ">=6.0.0 <7.0.0" - bifunctors: ">=6.0.0 <7.0.0"
@ -25,7 +26,6 @@ package:
- newtype: ">=5.0.0 <6.0.0" - newtype: ">=5.0.0 <6.0.0"
- node-buffer: ">=9.0.0 <10.0.0" - node-buffer: ">=9.0.0 <10.0.0"
- node-event-emitter: ">=3.0.0 <4.0.0" - node-event-emitter: ">=3.0.0 <4.0.0"
- node-stream-pipes: ">=1.2.3 <2.0.0"
- node-streams: ">=9.0.0 <10.0.0" - node-streams: ">=9.0.0 <10.0.0"
- nullable: ">=6.0.0 <7.0.0" - nullable: ">=6.0.0 <7.0.0"
- numbers: ">=9.0.1 <10.0.0" - numbers: ">=9.0.1 <10.0.0"

View File

@ -54,15 +54,15 @@ recordToForeign = unsafeCoerce
-- | Requires an ordered array of column names. -- | Requires an ordered array of column names.
make make
:: forall @config @missing @extra :: forall @config @missing @extra
. Union config missing (Config extra) . Union config missing (Config extra)
=> Array String => Array String
-> { | config } -> { | config }
-> Effect (CSVStringifier ()) -> Effect (CSVStringifier ())
make columns = make columns =
makeImpl makeImpl
<<< unsafeToForeign <<< unsafeToForeign
<<< Object.union (recordToForeign { columns, header: true }) <<< Object.union (recordToForeign { columns, header: true })
<<< recordToForeign <<< recordToForeign
-- | Convert the raw stream to a typed ObjectStream -- | Convert the raw stream to a typed ObjectStream
toObjectStream :: CSVStringifier () -> Object.Transform (Array String) String toObjectStream :: CSVStringifier () -> Object.Transform (Array String) String

View File

@ -2,9 +2,9 @@ module Pipes.CSV where
import Prelude import Prelude
import Control.Monad.Error.Class (class MonadThrow, liftEither) import Control.Monad.Error.Class (liftEither)
import Control.Monad.Except (runExcept) import Control.Monad.Except (runExcept)
import Control.Monad.Rec.Class (class MonadRec, forever) import Control.Monad.Rec.Class (forever)
import Control.Monad.ST.Global as ST import Control.Monad.ST.Global as ST
import Control.Monad.ST.Ref as STRef import Control.Monad.ST.Ref as STRef
import Data.Array as Array import Data.Array as Array
@ -14,9 +14,9 @@ import Data.FunctorWithIndex (mapWithIndex)
import Data.Map as Map import Data.Map as Map
import Data.Maybe (Maybe(..)) import Data.Maybe (Maybe(..))
import Data.Tuple.Nested ((/\)) import Data.Tuple.Nested ((/\))
import Effect.Aff.Class (class MonadAff) import Effect.Aff (Aff)
import Effect.Class (liftEffect) import Effect.Class (liftEffect)
import Effect.Exception (Error, error) import Effect.Exception (error)
import Node.Buffer (Buffer) import Node.Buffer (Buffer)
import Node.Stream.CSV.Parse as CSV.Parse import Node.Stream.CSV.Parse as CSV.Parse
import Node.Stream.CSV.Stringify as CSV.Stringify import Node.Stream.CSV.Stringify as CSV.Stringify
@ -47,13 +47,10 @@ import Type.Prelude (Proxy(..))
-- | rows `shouldEqual` [{id: 1, foo: "hi", is_deleted: false}, {id: 2, foo: "bye", is_deleted: true}] -- | rows `shouldEqual` [{id: 1, foo: "hi", is_deleted: false}, {id: 2, foo: "bye", is_deleted: true}]
-- | ``` -- | ```
parse parse
:: forall @r rl m :: forall @r rl
. MonadAff m . RowToList r rl
=> MonadThrow Error m
=> MonadRec m
=> RowToList r rl
=> ReadCSVRecord r rl => ReadCSVRecord r rl
=> Pipe (Maybe Buffer) (Maybe { | r }) m Unit => Pipe (Maybe Buffer) (Maybe { | r }) Aff Unit
parse = do parse = do
raw <- liftEffect $ CSV.Parse.make {} raw <- liftEffect $ CSV.Parse.make {}
colsST <- liftEffect $ ST.toEffect $ STRef.new Nothing colsST <- liftEffect $ ST.toEffect $ STRef.new Nothing
@ -77,14 +74,14 @@ parse = do
-- | Transforms buffer chunks of a CSV file to parsed -- | Transforms buffer chunks of a CSV file to parsed
-- | arrays of CSV values. -- | arrays of CSV values.
parseRaw :: forall m. MonadAff m => MonadThrow Error m => Pipe (Maybe Buffer) (Maybe (Array String)) m Unit parseRaw :: Pipe (Maybe Buffer) (Maybe (Array String)) Aff Unit
parseRaw = do parseRaw = do
s <- liftEffect $ CSV.Parse.toObjectStream <$> CSV.Parse.make {} s <- liftEffect $ CSV.Parse.toObjectStream <$> CSV.Parse.make {}
Pipes.Stream.fromTransform s Pipes.Stream.fromTransform s
-- | Transforms CSV rows into stringified CSV records -- | Transforms CSV rows into stringified CSV records
-- | using the given ordered array of column names. -- | using the given ordered array of column names.
stringifyRaw :: forall m. MonadAff m => MonadThrow Error m => Array String -> Pipe (Maybe (Array String)) (Maybe String) m Unit stringifyRaw :: Array String -> Pipe (Maybe (Array String)) (Maybe String) Aff Unit
stringifyRaw columns = do stringifyRaw columns = do
s <- liftEffect $ CSV.Stringify.toObjectStream <$> CSV.Stringify.make columns {} s <- liftEffect $ CSV.Stringify.toObjectStream <$> CSV.Stringify.make columns {}
Pipes.Stream.fromTransform s Pipes.Stream.fromTransform s
@ -92,7 +89,7 @@ stringifyRaw columns = do
-- | Transforms purescript records into stringified CSV records. -- | Transforms purescript records into stringified CSV records.
-- | -- |
-- | Columns are inferred from the record's keys, ordered alphabetically. -- | Columns are inferred from the record's keys, ordered alphabetically.
stringify :: forall m r rl. MonadRec m => MonadAff m => MonadThrow Error m => WriteCSVRecord r rl => RowToList r rl => Keys rl => Pipe (Maybe { | r }) (Maybe String) m Unit stringify :: forall r rl. WriteCSVRecord r rl => RowToList r rl => Keys rl => Pipe (Maybe { | r }) (Maybe String) Aff Unit
stringify = do stringify = do
raw <- liftEffect $ CSV.Stringify.make (Array.fromFoldable $ keys $ Proxy @r) {} raw <- liftEffect $ CSV.Stringify.make (Array.fromFoldable $ keys $ Proxy @r) {}
let let

View File

@ -10,5 +10,5 @@ import Test.Spec.Reporter (specReporter)
import Test.Spec.Runner (defaultConfig, runSpec') import Test.Spec.Runner (defaultConfig, runSpec')
main :: Effect Unit main :: Effect Unit
main = launchAff_ $ runSpec' (defaultConfig { timeout = Nothing }) [ specReporter ] do main = launchAff_ $ runSpec' (defaultConfig { failFast = true, timeout = Nothing }) [ specReporter ] do
Test.Pipes.CSV.spec Test.Pipes.CSV.spec

View File

@ -6,22 +6,26 @@ import Control.Monad.Gen (chooseInt)
import Control.Monad.Rec.Class (Step(..), tailRecM) import Control.Monad.Rec.Class (Step(..), tailRecM)
import Data.Array as Array import Data.Array as Array
import Data.DateTime (DateTime) import Data.DateTime (DateTime)
import Data.Foldable (fold) import Data.Foldable (fold, sum)
import Data.Maybe (Maybe(..), fromJust) import Data.Maybe (Maybe(..), fromJust)
import Data.Newtype (wrap) import Data.Newtype (wrap)
import Data.PreciseDateTime (fromRFC3339String, toDateTimeLossy) import Data.PreciseDateTime (fromRFC3339String, toDateTimeLossy)
import Data.String.CodePoints as String.CodePoints
import Data.Tuple.Nested ((/\))
import Effect.Class (liftEffect) import Effect.Class (liftEffect)
import Effect.Console (log)
import Node.Encoding (Encoding(..)) import Node.Encoding (Encoding(..))
import Partial.Unsafe (unsafePartial) import Partial.Unsafe (unsafePartial)
import Pipes (yield, (>->)) import Pipes (yield, (>->))
import Pipes.CSV as Pipes.CSV import Pipes.CSV as Pipes.CSV
import Pipes.Collect as Pipes.Collect import Pipes.Collect as Pipes.Collect
import Pipes.Construct as Pipes.Construct
import Pipes.Node.Buffer as Pipes.Buffer import Pipes.Node.Buffer as Pipes.Buffer
import Pipes.Node.Stream as Pipes.Stream import Pipes.Node.Stream as Pipes.Stream
import Pipes.Prelude (map, toListM) as Pipes import Pipes.Prelude (chain, map, toListM) as Pipes
import Pipes.Util as Pipes.Util import Pipes.Util as Pipes.Util
import Test.QuickCheck.Gen (randomSample') import Test.QuickCheck.Gen (randomSample')
import Test.Spec (Spec, describe, it) import Test.Spec (Spec, before, describe, it)
import Test.Spec.Assertions (shouldEqual) import Test.Spec.Assertions (shouldEqual)
csv :: String csv :: String
@ -62,27 +66,23 @@ spec =
, { id: 2, foo: "apple", flag: false, created: dt "2024-02-02T08:00:00Z" } , { id: 2, foo: "apple", flag: false, created: dt "2024-02-02T08:00:00Z" }
, { id: 3, foo: "hello", flag: true, created: dt "1970-01-01T00:00:00Z" } , { id: 3, foo: "hello", flag: true, created: dt "1970-01-01T00:00:00Z" }
] ]
it "parses large csv" do before
nums <- liftEffect $ randomSample' 100000 (chooseInt 0 9) (do
let nums <- liftEffect $ randomSample' 100000 (chooseInt 0 9)
csvRows = [ "id\n" ] <> ((_ <> "\n") <$> show <$> nums) let
csv' = chars = [ "i","d","\n" ] <> join ((\n -> [show n, "\n"]) <$> nums)
let bufs <- Pipes.Collect.toArray
go ix $ Pipes.Stream.withEOS (Pipes.Construct.eachArray chars)
| Just a <- Array.index csvRows ix = yield a $> Loop (ix + 1) >-> Pipes.Util.chunked 1000
| otherwise = pure $ Done unit >-> Pipes.Stream.inEOS (Pipes.map fold >-> Pipes.Buffer.fromString UTF8)
in >-> Pipes.Stream.unEOS
tailRecM go 0 pure $ nums /\ bufs
in16kbChunks = )
Pipes.Util.chunked 16000 $ it "parses large csv" \(nums /\ bufs) -> do
>-> Pipes.Stream.inEOS (Pipes.map fold) rows <-
>-> Pipes.Stream.inEOS (Pipes.Buffer.fromString UTF8) Pipes.Collect.toArray
$ Pipes.Stream.withEOS (Pipes.Construct.eachArray bufs)
>-> Pipes.CSV.parse @(id :: Int)
>-> Pipes.Stream.unEOS
rows <- rows `shouldEqual` ((\id -> { id }) <$> nums)
Pipes.Collect.toArray
$ Pipes.Stream.withEOS csv'
>-> in16kbChunks
>-> Pipes.CSV.parse
>-> Pipes.Stream.unEOS
rows `shouldEqual` ((\id -> { id }) <$> nums)