fix: improve performance
This commit is contained in:
parent
e87d81cc1d
commit
2b16a38a66
@ -18,7 +18,7 @@ workspace:
|
||||
- newtype: ">=5.0.0 <6.0.0"
|
||||
- node-buffer: ">=9.0.0 <10.0.0"
|
||||
- node-event-emitter: ">=3.0.0 <4.0.0"
|
||||
- node-stream-pipes: ">=1.2.3 <2.0.0"
|
||||
- node-stream-pipes: ">=1.3.0 <2.0.0"
|
||||
- node-streams: ">=9.0.0 <10.0.0"
|
||||
- nullable: ">=6.0.0 <7.0.0"
|
||||
- numbers: ">=9.0.1 <10.0.0"
|
||||
@ -607,8 +607,8 @@ packages:
|
||||
- effect
|
||||
node-stream-pipes:
|
||||
type: registry
|
||||
version: 1.2.3
|
||||
integrity: sha256-lXD3x6+p72uBrRHGHrob2jrrBDakhhZE9O9EYE4aFiE=
|
||||
version: 1.3.0
|
||||
integrity: sha256-5Jpf0BLn0ExQWYxbTTewai4M8quEmEVHxihc9CM1Juo=
|
||||
dependencies:
|
||||
- aff
|
||||
- arrays
|
||||
|
@ -10,6 +10,7 @@ package:
|
||||
strict: true
|
||||
pedanticPackages: true
|
||||
dependencies:
|
||||
- node-stream-pipes: ">=1.3.0 <2.0.0"
|
||||
- aff: ">=7.1.0 <8.0.0"
|
||||
- arrays: ">=7.3.0 <8.0.0"
|
||||
- bifunctors: ">=6.0.0 <7.0.0"
|
||||
@ -25,7 +26,6 @@ package:
|
||||
- newtype: ">=5.0.0 <6.0.0"
|
||||
- node-buffer: ">=9.0.0 <10.0.0"
|
||||
- node-event-emitter: ">=3.0.0 <4.0.0"
|
||||
- node-stream-pipes: ">=1.2.3 <2.0.0"
|
||||
- node-streams: ">=9.0.0 <10.0.0"
|
||||
- nullable: ">=6.0.0 <7.0.0"
|
||||
- numbers: ">=9.0.1 <10.0.0"
|
||||
|
@ -54,15 +54,15 @@ recordToForeign = unsafeCoerce
|
||||
-- | Requires an ordered array of column names.
|
||||
make
|
||||
:: forall @config @missing @extra
|
||||
. Union config missing (Config extra)
|
||||
. Union config missing (Config extra)
|
||||
=> Array String
|
||||
-> { | config }
|
||||
-> Effect (CSVStringifier ())
|
||||
make columns =
|
||||
makeImpl
|
||||
<<< unsafeToForeign
|
||||
<<< Object.union (recordToForeign { columns, header: true })
|
||||
<<< recordToForeign
|
||||
<<< unsafeToForeign
|
||||
<<< Object.union (recordToForeign { columns, header: true })
|
||||
<<< recordToForeign
|
||||
|
||||
-- | Convert the raw stream to a typed ObjectStream
|
||||
toObjectStream :: CSVStringifier () -> Object.Transform (Array String) String
|
||||
|
@ -2,9 +2,9 @@ module Pipes.CSV where
|
||||
|
||||
import Prelude
|
||||
|
||||
import Control.Monad.Error.Class (class MonadThrow, liftEither)
|
||||
import Control.Monad.Error.Class (liftEither)
|
||||
import Control.Monad.Except (runExcept)
|
||||
import Control.Monad.Rec.Class (class MonadRec, forever)
|
||||
import Control.Monad.Rec.Class (forever)
|
||||
import Control.Monad.ST.Global as ST
|
||||
import Control.Monad.ST.Ref as STRef
|
||||
import Data.Array as Array
|
||||
@ -14,9 +14,9 @@ import Data.FunctorWithIndex (mapWithIndex)
|
||||
import Data.Map as Map
|
||||
import Data.Maybe (Maybe(..))
|
||||
import Data.Tuple.Nested ((/\))
|
||||
import Effect.Aff.Class (class MonadAff)
|
||||
import Effect.Aff (Aff)
|
||||
import Effect.Class (liftEffect)
|
||||
import Effect.Exception (Error, error)
|
||||
import Effect.Exception (error)
|
||||
import Node.Buffer (Buffer)
|
||||
import Node.Stream.CSV.Parse as CSV.Parse
|
||||
import Node.Stream.CSV.Stringify as CSV.Stringify
|
||||
@ -47,13 +47,10 @@ import Type.Prelude (Proxy(..))
|
||||
-- | rows `shouldEqual` [{id: 1, foo: "hi", is_deleted: false}, {id: 2, foo: "bye", is_deleted: true}]
|
||||
-- | ```
|
||||
parse
|
||||
:: forall @r rl m
|
||||
. MonadAff m
|
||||
=> MonadThrow Error m
|
||||
=> MonadRec m
|
||||
=> RowToList r rl
|
||||
:: forall @r rl
|
||||
. RowToList r rl
|
||||
=> ReadCSVRecord r rl
|
||||
=> Pipe (Maybe Buffer) (Maybe { | r }) m Unit
|
||||
=> Pipe (Maybe Buffer) (Maybe { | r }) Aff Unit
|
||||
parse = do
|
||||
raw <- liftEffect $ CSV.Parse.make {}
|
||||
colsST <- liftEffect $ ST.toEffect $ STRef.new Nothing
|
||||
@ -77,14 +74,14 @@ parse = do
|
||||
|
||||
-- | Transforms buffer chunks of a CSV file to parsed
|
||||
-- | arrays of CSV values.
|
||||
parseRaw :: forall m. MonadAff m => MonadThrow Error m => Pipe (Maybe Buffer) (Maybe (Array String)) m Unit
|
||||
parseRaw :: Pipe (Maybe Buffer) (Maybe (Array String)) Aff Unit
|
||||
parseRaw = do
|
||||
s <- liftEffect $ CSV.Parse.toObjectStream <$> CSV.Parse.make {}
|
||||
Pipes.Stream.fromTransform s
|
||||
|
||||
-- | Transforms CSV rows into stringified CSV records
|
||||
-- | using the given ordered array of column names.
|
||||
stringifyRaw :: forall m. MonadAff m => MonadThrow Error m => Array String -> Pipe (Maybe (Array String)) (Maybe String) m Unit
|
||||
stringifyRaw :: Array String -> Pipe (Maybe (Array String)) (Maybe String) Aff Unit
|
||||
stringifyRaw columns = do
|
||||
s <- liftEffect $ CSV.Stringify.toObjectStream <$> CSV.Stringify.make columns {}
|
||||
Pipes.Stream.fromTransform s
|
||||
@ -92,7 +89,7 @@ stringifyRaw columns = do
|
||||
-- | Transforms purescript records into stringified CSV records.
|
||||
-- |
|
||||
-- | Columns are inferred from the record's keys, ordered alphabetically.
|
||||
stringify :: forall m r rl. MonadRec m => MonadAff m => MonadThrow Error m => WriteCSVRecord r rl => RowToList r rl => Keys rl => Pipe (Maybe { | r }) (Maybe String) m Unit
|
||||
stringify :: forall r rl. WriteCSVRecord r rl => RowToList r rl => Keys rl => Pipe (Maybe { | r }) (Maybe String) Aff Unit
|
||||
stringify = do
|
||||
raw <- liftEffect $ CSV.Stringify.make (Array.fromFoldable $ keys $ Proxy @r) {}
|
||||
let
|
||||
|
@ -10,5 +10,5 @@ import Test.Spec.Reporter (specReporter)
|
||||
import Test.Spec.Runner (defaultConfig, runSpec')
|
||||
|
||||
main :: Effect Unit
|
||||
main = launchAff_ $ runSpec' (defaultConfig { timeout = Nothing }) [ specReporter ] do
|
||||
main = launchAff_ $ runSpec' (defaultConfig { failFast = true, timeout = Nothing }) [ specReporter ] do
|
||||
Test.Pipes.CSV.spec
|
||||
|
@ -6,22 +6,26 @@ import Control.Monad.Gen (chooseInt)
|
||||
import Control.Monad.Rec.Class (Step(..), tailRecM)
|
||||
import Data.Array as Array
|
||||
import Data.DateTime (DateTime)
|
||||
import Data.Foldable (fold)
|
||||
import Data.Foldable (fold, sum)
|
||||
import Data.Maybe (Maybe(..), fromJust)
|
||||
import Data.Newtype (wrap)
|
||||
import Data.PreciseDateTime (fromRFC3339String, toDateTimeLossy)
|
||||
import Data.String.CodePoints as String.CodePoints
|
||||
import Data.Tuple.Nested ((/\))
|
||||
import Effect.Class (liftEffect)
|
||||
import Effect.Console (log)
|
||||
import Node.Encoding (Encoding(..))
|
||||
import Partial.Unsafe (unsafePartial)
|
||||
import Pipes (yield, (>->))
|
||||
import Pipes.CSV as Pipes.CSV
|
||||
import Pipes.Collect as Pipes.Collect
|
||||
import Pipes.Construct as Pipes.Construct
|
||||
import Pipes.Node.Buffer as Pipes.Buffer
|
||||
import Pipes.Node.Stream as Pipes.Stream
|
||||
import Pipes.Prelude (map, toListM) as Pipes
|
||||
import Pipes.Prelude (chain, map, toListM) as Pipes
|
||||
import Pipes.Util as Pipes.Util
|
||||
import Test.QuickCheck.Gen (randomSample')
|
||||
import Test.Spec (Spec, describe, it)
|
||||
import Test.Spec (Spec, before, describe, it)
|
||||
import Test.Spec.Assertions (shouldEqual)
|
||||
|
||||
csv :: String
|
||||
@ -62,27 +66,23 @@ spec =
|
||||
, { id: 2, foo: "apple", flag: false, created: dt "2024-02-02T08:00:00Z" }
|
||||
, { id: 3, foo: "hello", flag: true, created: dt "1970-01-01T00:00:00Z" }
|
||||
]
|
||||
it "parses large csv" do
|
||||
nums <- liftEffect $ randomSample' 100000 (chooseInt 0 9)
|
||||
let
|
||||
csvRows = [ "id\n" ] <> ((_ <> "\n") <$> show <$> nums)
|
||||
csv' =
|
||||
let
|
||||
go ix
|
||||
| Just a <- Array.index csvRows ix = yield a $> Loop (ix + 1)
|
||||
| otherwise = pure $ Done unit
|
||||
in
|
||||
tailRecM go 0
|
||||
in16kbChunks =
|
||||
Pipes.Util.chunked 16000
|
||||
>-> Pipes.Stream.inEOS (Pipes.map fold)
|
||||
>-> Pipes.Stream.inEOS (Pipes.Buffer.fromString UTF8)
|
||||
before
|
||||
(do
|
||||
nums <- liftEffect $ randomSample' 100000 (chooseInt 0 9)
|
||||
let
|
||||
chars = [ "i","d","\n" ] <> join ((\n -> [show n, "\n"]) <$> nums)
|
||||
bufs <- Pipes.Collect.toArray
|
||||
$ Pipes.Stream.withEOS (Pipes.Construct.eachArray chars)
|
||||
>-> Pipes.Util.chunked 1000
|
||||
>-> Pipes.Stream.inEOS (Pipes.map fold >-> Pipes.Buffer.fromString UTF8)
|
||||
>-> Pipes.Stream.unEOS
|
||||
pure $ nums /\ bufs
|
||||
)
|
||||
$ it "parses large csv" \(nums /\ bufs) -> do
|
||||
rows <-
|
||||
Pipes.Collect.toArray
|
||||
$ Pipes.Stream.withEOS (Pipes.Construct.eachArray bufs)
|
||||
>-> Pipes.CSV.parse @(id :: Int)
|
||||
>-> Pipes.Stream.unEOS
|
||||
|
||||
rows <-
|
||||
Pipes.Collect.toArray
|
||||
$ Pipes.Stream.withEOS csv'
|
||||
>-> in16kbChunks
|
||||
>-> Pipes.CSV.parse
|
||||
>-> Pipes.Stream.unEOS
|
||||
|
||||
rows `shouldEqual` ((\id -> { id }) <$> nums)
|
||||
rows `shouldEqual` ((\id -> { id }) <$> nums)
|
||||
|
Loading…
Reference in New Issue
Block a user