feat: copy csv-stream

This commit is contained in:
orion 2024-05-14 11:41:29 -05:00
commit 0d1c801720
Signed by: orion
GPG Key ID: 6D4165AE4C928719
20 changed files with 1815 additions and 0 deletions

12
.gitignore vendored Normal file
View File

@ -0,0 +1,12 @@
bower_components/
node_modules/
.pulp-cache/
output/
output-es/
generated-docs/
.psc-package/
.psc*
.purs*
.psa*
.spago
.tmp/

1
.tool-versions Normal file
View File

@ -0,0 +1 @@
purescript 0.15.15

81
README.md Normal file
View File

@ -0,0 +1,81 @@
# purescript-csv-stream
Type-safe bindings for the streaming API of `csv-parse` and `csv-stringify`.
## Installing
```bash
spago install csv-stream
{bun|yarn|npm|pnpm} install csv-parse csv-stringify
```
## Examples
### Stream
```purescript
module Main where
import Prelude
import Effect (Effect)
import Effect.Class (liftEffect)
import Effect.Aff (launchAff_)
import Node.Stream (pipe)
import Node.Stream as Stream
import Node.Stream.CSV.Stringify as CSV.Stringify
import Node.Stream.CSV.Parse as CSV.Parse
type MyCSVType1 = {a :: Int, b :: Int, bar :: String, baz :: Boolean}
type MyCSVType2 = {ab :: Int, bar :: String, baz :: Boolean}
atob :: MyCSVType1 -> MyCSVType2
atob {a, b, bar, baz} = {ab: a + b, bar, baz}
myCSV :: String
myCSV = "a,b,bar,baz\n1,2,\"hello, world!\",true\n3,3,,f"
main :: Effect Unit
main = launchAff_ do
parser <- liftEffect $ CSV.Parse.make {}
stringifier <- liftEffect $ CSV.Stringify.make {}
input <- liftEffect $ Stream.readableFromString myCSV
liftEffect $ Stream.pipe input parser
records <- CSV.Parse.readAll parser
liftEffect $ for_ records \r -> CSV.Stringify.write $ atob r
liftEffect $ Stream.end stringifier
-- "ab,bar,baz\n3,\"hello, world!\",true\n6,,false"
csvString <- CSV.Stringify.readAll stringifier
pure unit
```
### Synchronous
```purescript
module Main where
import Prelude
import Effect (Effect)
import Effect.Class (liftEffect)
import Effect.Aff (launchAff_)
import Node.Stream (pipe)
import Node.Stream as Stream
import Node.Stream.CSV.Stringify as CSV.Stringify
import Node.Stream.CSV.Parse as CSV.Parse
type MyCSVType1 = {a :: Int, b :: Int, bar :: String, baz :: Boolean}
type MyCSVType2 = {ab :: Int, bar :: String, baz :: Boolean}
atob :: MyCSVType1 -> MyCSVType2
atob {a, b, bar, baz} = {ab: a + b, bar, baz}
myCSV :: String
myCSV = "a,b,bar,baz\n1,2,\"hello, world!\",true\n3,3,,f"
main :: Effect Unit
main = launchAff_ do
records :: Array MyCSVType1 <- CSV.Parse.parse myCSV
-- "ab,bar,baz\n3,\"hello, world!\",true\n6,,false"
csvString <- CSV.Stringify.stringify (atob <$> records)
pure unit
```

BIN
bun.lockb Executable file

Binary file not shown.

27
bun/fmt.js Normal file
View File

@ -0,0 +1,27 @@
/** @type {(parser: string, ps: string[]) => import("bun").Subprocess} */
const prettier = (parser, ps) =>
Bun.spawn(["bun", "x", "prettier", "--write", "--parser", parser, ...ps], {
stdout: "inherit",
stderr: "inherit",
});
const procs = [
prettier("babel", ["./src/**/*.js", "./bun/**/*.js", "./.prettierrc.cjs"]),
prettier("json", ["./package.json", "./jsconfig.json"]),
Bun.spawn(
[
"bun",
"x",
"purs-tidy",
"format-in-place",
"src/**/*.purs",
"test/**/*.purs",
],
{
stdout: "inherit",
stderr: "inherit",
},
),
];
await Promise.all(procs.map((p) => p.exited));

34
bun/prepare.js Normal file
View File

@ -0,0 +1,34 @@
import { readFile, writeFile } from "fs/promises";
import { execSync } from "child_process";
let ver = process.argv[2];
if (!ver) {
console.error(`tag required: bun bun/prepare.js v1.0.0`);
process.exit(1);
} else if (!/v\d+\.\d+\.\d+/.test(ver)) {
console.error(`invalid tag: ${ver}`);
process.exit(1);
}
ver = (/\d+\.\d+\.\d+/.exec(ver) || [])[0] || "";
const pkg = await readFile("./package.json", "utf8");
const pkgnew = pkg.replace(/"version": ".+"/, `"version": "v${ver}"`);
await writeFile("./package.json", pkgnew);
const spago = await readFile("./spago.yaml", "utf8");
const spagonew = spago.replace(/version: .+/, `version: '${ver}'`);
await writeFile("./spago.yaml", spagonew);
const readme = await readFile("./README.md", "utf8");
const readmenew = readme.replace(
/packages\/purescript-csv-stream\/.+?\//g,
`/packages/purescript-csv-stream/${ver}/`,
);
await writeFile("./README.md", readmenew);
execSync(`git add spago.yaml package.json README.md`);
execSync(`git commit -m 'chore: prepare v${ver}'`);
execSync(`git tag v${ver}`);
execSync(`git push --tags`);
execSync(`git push --mirror github-mirror`);

16
jsconfig.json Normal file
View File

@ -0,0 +1,16 @@
{
"compilerOptions": {
"types": ["bun-types"],
"lib": ["esnext"],
"target": "esnext",
"module": "esnext",
"moduleResolution": "bundler",
"moduleDetection": "force",
"jsx": "react",
"allowJs": true,
"checkJs": true,
"noEmit": true,
"strict": true
},
"include": ["src/**/*.js", "bun/**/*.js"]
}

13
package.json Normal file
View File

@ -0,0 +1,13 @@
{
"name": "purescript-csv-stream",
"version": "v2.0.1",
"type": "module",
"dependencies": {
"csv-parse": "^5.5.5",
"csv-stringify": "^6.4.6",
"decimal.js": "^10.4.3"
},
"devDependencies": {
"typescript": "^5.4.5"
}
}

1076
spago.lock Normal file

File diff suppressed because it is too large Load Diff

56
spago.yaml Normal file
View File

@ -0,0 +1,56 @@
package:
name: csv-stream
publish:
version: '2.0.1'
license: 'GPL-3.0-or-later'
location:
githubOwner: 'cakekindel'
githubRepo: 'purescript-csv-stream'
build:
strict: true
pedanticPackages: true
dependencies:
- node-stream-pipes: ">=1.3.0 <2.0.0"
- aff: ">=7.1.0 <8.0.0"
- arrays: ">=7.3.0 <8.0.0"
- bifunctors: ">=6.0.0 <7.0.0"
- datetime: ">=6.1.0 <7.0.0"
- effect: ">=4.0.0 <5.0.0"
- exceptions: ">=6.0.0 <7.0.0"
- foldable-traversable: ">=6.0.0 <7.0.0"
- foreign: ">=7.0.0 <8.0.0"
- foreign-object: ">=4.1.0 <5.0.0"
- integers: ">=6.0.0 <7.0.0"
- lists: ">=7.0.0 <8.0.0"
- maybe: ">=6.0.0 <7.0.0"
- newtype: ">=5.0.0 <6.0.0"
- node-buffer: ">=9.0.0 <10.0.0"
- node-event-emitter: ">=3.0.0 <4.0.0"
- node-streams: ">=9.0.0 <10.0.0"
- nullable: ">=6.0.0 <7.0.0"
- numbers: ">=9.0.1 <10.0.0"
- ordered-collections: ">=3.2.0 <4.0.0"
- pipes: ">=8.0.0 <9.0.0"
- precise-datetime: ">=7.0.0 <8.0.0"
- prelude: ">=6.0.1 <7.0.0"
- record: ">=4.0.0 <5.0.0"
- record-extra: ">=5.0.1 <6.0.0"
- st: ">=6.2.0 <7.0.0"
- strings: ">=6.0.1 <7.0.0"
- tailrec: ">=6.1.0 <7.0.0"
- transformers: ">=6.0.0 <7.0.0"
- tuples: ">=7.0.0 <8.0.0"
- typelevel-prelude: ">=7.0.0 <8.0.0"
- unsafe-coerce: ">=6.0.0 <7.0.0"
test:
main: Test.Main
dependencies:
- console
- gen
- node-fs
- node-zlib
- quickcheck
- simple-json
- spec
workspace:
extraPackages: {}

50
src/Data.CSV.Record.purs Normal file
View File

@ -0,0 +1,50 @@
module Data.CSV.Record where
import Prelude
import Control.Monad.Error.Class (liftMaybe)
import Control.Monad.Except (Except)
import Data.Array as Array
import Data.CSV (class ReadCSV, class WriteCSV, readCSV, writeCSV)
import Data.List.NonEmpty (NonEmptyList)
import Data.Map (Map)
import Data.Map as Map
import Data.Maybe (fromMaybe)
import Data.Symbol (class IsSymbol, reflectSymbol)
import Foreign (ForeignError(..))
import Prim.Row (class Cons, class Lacks)
import Prim.RowList (class RowToList, Cons, Nil, RowList)
import Record as Record
import Type.Prelude (Proxy(..))
class WriteCSVRecord :: Row Type -> RowList Type -> Constraint
class RowToList r rl <= WriteCSVRecord r rl | rl -> r where
writeCSVRecord :: { | r } -> Array String
instance (RowToList r (Cons k v tailrl), IsSymbol k, WriteCSV v, Lacks k tail, Cons k v tail r, WriteCSVRecord tail tailrl) => WriteCSVRecord r (Cons k v tailrl) where
writeCSVRecord r =
let
val = writeCSV $ Record.get (Proxy @k) r
tail = writeCSVRecord @tail @tailrl $ Record.delete (Proxy @k) r
in
[ val ] <> tail
instance WriteCSVRecord () Nil where
writeCSVRecord _ = []
class ReadCSVRecord :: Row Type -> RowList Type -> Constraint
class RowToList r rl <= ReadCSVRecord r rl | rl -> r where
readCSVRecord :: Map String Int -> Array String -> Except (NonEmptyList ForeignError) { | r }
instance (RowToList r (Cons k v tailrl), IsSymbol k, ReadCSV v, Lacks k tail, Cons k v tail r, ReadCSVRecord tail tailrl) => ReadCSVRecord r (Cons k v tailrl) where
readCSVRecord cols vals = do
let
k = reflectSymbol (Proxy @k)
pos <- liftMaybe (pure $ ForeignError $ "reached end of row looking for column " <> k) $ Map.lookup k cols
let valraw = fromMaybe "" $ Array.index vals pos
val <- readCSV @v valraw
tail <- readCSVRecord @tail @tailrl cols vals
pure $ Record.insert (Proxy @k) val tail
instance ReadCSVRecord () Nil where
readCSVRecord _ _ = pure {}

74
src/Data.CSV.purs Normal file
View File

@ -0,0 +1,74 @@
module Data.CSV where
import Prelude
import Control.Monad.Error.Class (liftMaybe, throwError)
import Control.Monad.Except (Except)
import Data.DateTime (DateTime)
import Data.Int as Int
import Data.List.NonEmpty (NonEmptyList)
import Data.Maybe (Maybe(..), maybe)
import Data.Newtype (unwrap)
import Data.Number (fromString) as Number
import Data.Number.Format (toString) as Number
import Data.PreciseDateTime (fromDateTime, fromRFC3339String, toDateTimeLossy, toRFC3339String)
import Data.RFC3339String (RFC3339String(..))
import Data.String as String
import Foreign (ForeignError(..))
class ReadCSV a where
readCSV :: String -> Except (NonEmptyList ForeignError) a
class WriteCSV a where
writeCSV :: a -> String
instance ReadCSV Int where
readCSV s = liftMaybe (pure $ ForeignError $ "invalid integer: " <> s) $ Int.fromString s
instance ReadCSV Number where
readCSV s = liftMaybe (pure $ ForeignError $ "invalid number: " <> s) $ Number.fromString s
instance ReadCSV String where
readCSV = pure
instance ReadCSV DateTime where
readCSV s = map toDateTimeLossy $ liftMaybe (pure $ ForeignError $ "invalid ISO date string: " <> s) $ fromRFC3339String $ RFC3339String s
instance ReadCSV Boolean where
readCSV s =
let
inner "t" = pure true
inner "true" = pure true
inner "yes" = pure true
inner "y" = pure true
inner "1" = pure true
inner "f" = pure false
inner "false" = pure false
inner "no" = pure false
inner "n" = pure false
inner "0" = pure false
inner _ = throwError $ pure $ ForeignError $ "invalid boolean value: " <> s
in
inner $ String.toLower s
instance ReadCSV a => ReadCSV (Maybe a) where
readCSV "" = pure Nothing
readCSV s = Just <$> readCSV s
instance WriteCSV Int where
writeCSV = Int.toStringAs Int.decimal
instance WriteCSV Number where
writeCSV = Number.toString
instance WriteCSV String where
writeCSV = identity
instance WriteCSV DateTime where
writeCSV = unwrap <<< toRFC3339String <<< fromDateTime
instance WriteCSV Boolean where
writeCSV = show
instance WriteCSV a => WriteCSV (Maybe a) where
writeCSV = maybe "" writeCSV

View File

@ -0,0 +1,7 @@
import { Parser } from "csv-parse";
/** @type {(s: import('csv-parse').Options) => () => Parser} */
export const makeImpl = (c) => () => new Parser(c);
/** @type {(s: Parser) => () => Array<string> | null} */
export const readImpl = (p) => () => p.read();

View File

@ -0,0 +1,77 @@
module Node.Stream.CSV.Parse where
import Prelude hiding (join)
import Data.Nullable (Nullable)
import Effect (Effect)
import Effect.Uncurried (mkEffectFn1)
import Foreign (Foreign, unsafeToForeign)
import Foreign.Object (Object)
import Foreign.Object (union) as Object
import Node.Buffer (Buffer)
import Node.EventEmitter (EventHandle(..))
import Node.EventEmitter.UtilTypes (EventHandle1)
import Node.Stream (Read, Stream, Write)
import Node.Stream.Object (Transform) as Object
import Prim.Row (class Union)
import Unsafe.Coerce (unsafeCoerce)
data CSVRead
-- | Stream transforming chunks of a CSV file
-- | into parsed purescript objects.
-- |
-- | The CSV contents may be piped into this stream
-- | as Buffer or String chunks.
type CSVParser :: Row Type -> Type
type CSVParser r = Stream (read :: Read, write :: Write, csv :: CSVRead | r)
-- | https://csv.js.org/parse/options/
type Config r =
( bom :: Boolean
, group_columns_by_name :: Boolean
, comment :: String
, comment_no_infix :: Boolean
, delimiter :: String
, encoding :: String
, escape :: String
, from :: Int
, from_line :: Int
, ignore_last_delimiters :: Boolean
, info :: Boolean
, max_record_size :: Int
, quote :: String
, raw :: Boolean
, record_delimiter :: String
, relax_column_count :: Boolean
, skip_empty_lines :: Boolean
, skip_records_with_empty_values :: Boolean
, skip_records_with_error :: Boolean
, to :: Int
, to_line :: Int
, trim :: Boolean
, ltrim :: Boolean
, rtrim :: Boolean
| r
)
-- | Create a CSVParser
make :: forall @config @missing @extra. Union config missing (Config extra) => { | config } -> Effect (CSVParser ())
make = makeImpl <<< unsafeToForeign <<< Object.union (recordToForeign { columns: false, cast: false, cast_date: false }) <<< recordToForeign
toObjectStream :: CSVParser () -> Object.Transform Buffer (Array String)
toObjectStream = unsafeCoerce
-- | `data` event. Emitted when a CSV record has been parsed.
dataH :: forall a. EventHandle1 (CSVParser a) (Array String)
dataH = EventHandle "data" mkEffectFn1
-- | FFI
foreign import makeImpl :: forall r. Foreign -> Effect (Stream r)
-- | FFI
foreign import readImpl :: forall r. Stream r -> Effect (Nullable (Array String))
-- | FFI
recordToForeign :: forall r. Record r -> Object Foreign
recordToForeign = unsafeCoerce

View File

@ -0,0 +1,7 @@
import { stringify } from "csv-stringify";
/** @type {(c: import('csv-stringify').Options) => () => import('csv-stringify').Stringifier} */
export const makeImpl = (c) => () => stringify(c);
/** @type {(s: import('csv-stringify').Stringifier) => (vals: Array<string>) => () => void} */
export const writeImpl = (s) => (vals) => () => s.write(vals);

View File

@ -0,0 +1,83 @@
module Node.Stream.CSV.Stringify where
import Prelude
import Data.CSV.Record (class WriteCSVRecord, writeCSVRecord)
import Data.String.Regex (Regex)
import Effect (Effect)
import Foreign (Foreign, unsafeToForeign)
import Foreign.Object (Object)
import Foreign.Object (union) as Object
import Node.Stream (Read, Stream, Write)
import Node.Stream.Object (Transform) as Object
import Prim.Row (class Union)
import Prim.RowList (class RowToList)
import Unsafe.Coerce (unsafeCoerce)
data CSVWrite
-- | Stream transforming rows of stringified CSV values
-- | to CSV-formatted rows.
-- |
-- | Write rows to the stream using `write`.
-- |
-- | Stringified rows are emitted on the `Readable` end as string
-- | chunks, meaning it can be treated as a `Node.Stream.Readable`
-- | that has had `setEncoding UTF8` invoked on it.
type CSVStringifier :: Row Type -> Type
type CSVStringifier r = Stream (read :: Read, write :: Write, csv :: CSVWrite | r)
-- | https://csv.js.org/stringify/options/
type Config r =
( bom :: Boolean
, delimiter :: String
, record_delimiter :: String
, escape :: String
, escape_formulas :: Boolean
, quote :: String
, quoted :: Boolean
, quoted_empty :: Boolean
, quoted_match :: Regex
, quoted_string :: Boolean
| r
)
foreign import makeImpl :: forall r. Foreign -> Effect (Stream r)
foreign import writeImpl :: forall r. Stream r -> Array String -> Effect Unit
recordToForeign :: forall r. Record r -> Object Foreign
recordToForeign = unsafeCoerce
-- | Create a raw Transform stream that accepts chunks of `Array String`,
-- | and transforms them into string CSV rows.
-- |
-- | Requires an ordered array of column names.
make
:: forall @config @missing @extra
. Union config missing (Config extra)
=> Array String
-> { | config }
-> Effect (CSVStringifier ())
make columns =
makeImpl
<<< unsafeToForeign
<<< Object.union (recordToForeign { columns, header: true })
<<< recordToForeign
-- | Convert the raw stream to a typed ObjectStream
toObjectStream :: CSVStringifier () -> Object.Transform (Array String) String
toObjectStream = unsafeCoerce
-- | Write a record to a CSVStringifier.
-- |
-- | The record will be emitted on the `Readable` end
-- | of the stream as a string chunk.
write :: forall @r rl a. RowToList r rl => WriteCSVRecord r rl => CSVStringifier a -> { | r } -> Effect Unit
write s = writeImpl s <<< writeCSVRecord @r @rl
-- | Write a record to a CSVStringifier.
-- |
-- | The record will be emitted on the `Readable` end
-- | of the stream as a string chunk.
writeRaw :: forall a. CSVStringifier a -> Array String -> Effect Unit
writeRaw = writeImpl

1
src/Node.Stream.CSV.purs Normal file
View File

@ -0,0 +1 @@
module Node.Stream.CSV where

98
src/Pipes.CSV.purs Normal file
View File

@ -0,0 +1,98 @@
module Pipes.CSV where
import Prelude
import Control.Monad.Error.Class (liftEither)
import Control.Monad.Except (runExcept)
import Control.Monad.Rec.Class (forever)
import Control.Monad.ST.Global as ST
import Control.Monad.ST.Ref as STRef
import Data.Array as Array
import Data.Bifunctor (lmap)
import Data.CSV.Record (class ReadCSVRecord, class WriteCSVRecord, readCSVRecord, writeCSVRecord)
import Data.FunctorWithIndex (mapWithIndex)
import Data.Map as Map
import Data.Maybe (Maybe(..))
import Data.Tuple.Nested ((/\))
import Effect.Aff (Aff)
import Effect.Class (liftEffect)
import Effect.Exception (error)
import Node.Buffer (Buffer)
import Node.Stream.CSV.Parse as CSV.Parse
import Node.Stream.CSV.Stringify as CSV.Stringify
import Pipes (await, yield, (>->))
import Pipes.Core (Pipe)
import Pipes.Node.Stream as Pipes.Stream
import Prim.RowList (class RowToList)
import Record.Extra (class Keys, keys)
import Type.Prelude (Proxy(..))
-- | Transforms buffer chunks of a CSV file to parsed
-- | records of `r`.
-- |
-- | ```
-- | -- == my-data.csv.gz ==
-- | -- id,foo,is_deleted
-- | -- 1,hi,f
-- | -- 2,bye,t
-- |
-- | rows
-- | :: Array {id :: Int, foo :: String, is_deleted :: Boolean}
-- | <- map Array.fromFoldable
-- | $ Pipes.toListM
-- | $ Pipes.Node.Stream.unEOS
-- | $ Pipes.Node.FS.read "my-data.csv.gz"
-- | >-> Pipes.Node.Zlib.gunzip
-- | >-> Pipes.CSV.parse
-- | rows `shouldEqual` [{id: 1, foo: "hi", is_deleted: false}, {id: 2, foo: "bye", is_deleted: true}]
-- | ```
parse
:: forall @r rl
. RowToList r rl
=> ReadCSVRecord r rl
=> Pipe (Maybe Buffer) (Maybe { | r }) Aff Unit
parse = do
raw <- liftEffect $ CSV.Parse.make {}
colsST <- liftEffect $ ST.toEffect $ STRef.new Nothing
let
readCols = liftEffect $ ST.toEffect $ STRef.read colsST
putCols a = void $ liftEffect $ ST.toEffect $ STRef.write (Just a) colsST
parse' a cols' = liftEither $ lmap (error <<< show) $ runExcept $ readCSVRecord @r @rl cols' a
firstRow a = putCols $ Map.fromFoldable $ mapWithIndex (flip (/\)) a
row a cols' = yield =<< parse' a cols'
unmarshal = forever do
r <- await
cols <- readCols
case cols of
Just cols' -> row r cols'
Nothing -> firstRow r
parser = Pipes.Stream.fromTransform $ CSV.Parse.toObjectStream raw
parser >-> Pipes.Stream.inEOS unmarshal
-- | Transforms buffer chunks of a CSV file to parsed
-- | arrays of CSV values.
parseRaw :: Pipe (Maybe Buffer) (Maybe (Array String)) Aff Unit
parseRaw = do
s <- liftEffect $ CSV.Parse.toObjectStream <$> CSV.Parse.make {}
Pipes.Stream.fromTransform s
-- | Transforms CSV rows into stringified CSV records
-- | using the given ordered array of column names.
stringifyRaw :: Array String -> Pipe (Maybe (Array String)) (Maybe String) Aff Unit
stringifyRaw columns = do
s <- liftEffect $ CSV.Stringify.toObjectStream <$> CSV.Stringify.make columns {}
Pipes.Stream.fromTransform s
-- | Transforms purescript records into stringified CSV records.
-- |
-- | Columns are inferred from the record's keys, ordered alphabetically.
stringify :: forall r rl. WriteCSVRecord r rl => RowToList r rl => Keys rl => Pipe (Maybe { | r }) (Maybe String) Aff Unit
stringify = do
raw <- liftEffect $ CSV.Stringify.make (Array.fromFoldable $ keys $ Proxy @r) {}
let
printer = Pipes.Stream.fromTransform $ CSV.Stringify.toObjectStream raw
marshal = forever $ yield =<< (writeCSVRecord @r @rl <$> await)
Pipes.Stream.inEOS marshal >-> printer

14
test/Test/Main.purs Normal file
View File

@ -0,0 +1,14 @@
module Test.Main where
import Prelude
import Data.Maybe (Maybe(..))
import Effect (Effect)
import Effect.Aff (launchAff_)
import Test.Pipes.CSV as Test.Pipes.CSV
import Test.Spec.Reporter (specReporter)
import Test.Spec.Runner (defaultConfig, runSpec')
main :: Effect Unit
main = launchAff_ $ runSpec' (defaultConfig { failFast = true, timeout = Nothing }) [ specReporter ] do
Test.Pipes.CSV.spec

88
test/Test/Pipes.CSV.purs Normal file
View File

@ -0,0 +1,88 @@
module Test.Pipes.CSV where
import Prelude
import Control.Monad.Gen (chooseInt)
import Control.Monad.Rec.Class (Step(..), tailRecM)
import Data.Array as Array
import Data.DateTime (DateTime)
import Data.Foldable (fold, sum)
import Data.Maybe (Maybe(..), fromJust)
import Data.Newtype (wrap)
import Data.PreciseDateTime (fromRFC3339String, toDateTimeLossy)
import Data.String.CodePoints as String.CodePoints
import Data.Tuple.Nested ((/\))
import Effect.Class (liftEffect)
import Effect.Console (log)
import Node.Encoding (Encoding(..))
import Partial.Unsafe (unsafePartial)
import Pipes (yield, (>->))
import Pipes.CSV as Pipes.CSV
import Pipes.Collect as Pipes.Collect
import Pipes.Construct as Pipes.Construct
import Pipes.Node.Buffer as Pipes.Buffer
import Pipes.Node.Stream as Pipes.Stream
import Pipes.Prelude (chain, map, toListM) as Pipes
import Pipes.Util as Pipes.Util
import Test.QuickCheck.Gen (randomSample')
import Test.Spec (Spec, before, describe, it)
import Test.Spec.Assertions (shouldEqual)
csv :: String
csv =
"""created,flag,foo,id
2020-01-01T00:00:00.0Z,true,a,1
2024-02-02T08:00:00.0Z,false,apple,2
1970-01-01T00:00:00.0Z,true,hello,3
"""
dt :: String -> DateTime
dt = toDateTimeLossy <<< unsafePartial fromJust <<< fromRFC3339String <<< wrap
spec :: Spec Unit
spec =
describe "Pipes.CSV" do
it "stringify" do
let
objs =
[ { id: 1, foo: "a", flag: true, created: dt "2020-01-01T00:00:00Z" }
, { id: 2, foo: "apple", flag: false, created: dt "2024-02-02T08:00:00Z" }
, { id: 3, foo: "hello", flag: true, created: dt "1970-01-01T00:00:00Z" }
]
csv' <- map fold $ Pipes.Collect.toArray $ Pipes.Stream.withEOS (Pipes.Construct.eachArray objs) >-> Pipes.CSV.stringify >-> Pipes.Stream.unEOS
csv' `shouldEqual` csv
describe "parse" do
it "parses csv" do
rows <- map Array.fromFoldable
$ Pipes.toListM
$ Pipes.Stream.withEOS (yield csv)
>-> Pipes.Stream.inEOS (Pipes.Buffer.fromString UTF8)
>-> Pipes.CSV.parse
>-> Pipes.Stream.unEOS
rows `shouldEqual`
[ { id: 1, foo: "a", flag: true, created: dt "2020-01-01T00:00:00Z" }
, { id: 2, foo: "apple", flag: false, created: dt "2024-02-02T08:00:00Z" }
, { id: 3, foo: "hello", flag: true, created: dt "1970-01-01T00:00:00Z" }
]
before
(do
nums <- liftEffect $ randomSample' 100000 (chooseInt 0 9)
let
chars = [ "i","d","\n" ] <> join ((\n -> [show n, "\n"]) <$> nums)
bufs <- Pipes.Collect.toArray
$ Pipes.Stream.withEOS (Pipes.Construct.eachArray chars)
>-> Pipes.Util.chunked 1000
>-> Pipes.Stream.inEOS (Pipes.map fold >-> Pipes.Buffer.fromString UTF8)
>-> Pipes.Stream.unEOS
pure $ nums /\ bufs
)
$ it "parses large csv" \(nums /\ bufs) -> do
rows <-
Pipes.Collect.toArray
$ Pipes.Stream.withEOS (Pipes.Construct.eachArray bufs)
>-> Pipes.CSV.parse @(id :: Int)
>-> Pipes.Stream.unEOS
rows `shouldEqual` ((\id -> { id }) <$> nums)