Skip to content

Commit

Permalink
Merge pull request #986 from bmcdonald3/parquet-tests
Browse files Browse the repository at this point in the history
Fix parquet tests for multilocale runs
  • Loading branch information
glitch authored Dec 2, 2021
2 parents 8b41dc5 + 7925ffd commit 639faef
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 11 deletions.
6 changes: 3 additions & 3 deletions src/Parquet.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,12 @@ module Parquet {
return ArrowTypes.notimplemented;
}

proc writeDistArrayToParquet(A, filename, dsetname, rowGroupSize) {
proc writeDistArrayToParquet(A, filename, dsetname, rowGroupSize) throws {
extern proc c_writeColumnToParquet(filename, chpl_arr, colnum,
dsetname, numelems, rowGroupSize);
var filenames: [0..#A.targetLocales().size] string;
for i in 0..#A.targetLocales().size {
var suffix = i: string;
var suffix = '%04i'.format(i): string;
filenames[i] = filename + "_LOCALE" + suffix + ".parquet";
}

Expand All @@ -102,7 +102,7 @@ module Parquet {
}
}

proc write1DDistArrayParquet(filename: string, dsetname, A) {
proc write1DDistArrayParquet(filename: string, dsetname, A) throws {
writeDistArrayToParquet(A, filename, dsetname, ROWGROUPS);
return false;
}
Expand Down
17 changes: 9 additions & 8 deletions tests/parquet_test.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import glob, os
from context import arkouda as ak
from base_test import ArkoudaTest
import numpy as np
import pytest

SIZE = 50
SIZE = 100
NUMFILES = 5
verbose = True

Expand All @@ -13,23 +14,23 @@ def test_parquet(self):
ak_arr = ak.randint(0, 2**32, SIZE)
ak_arr.save_parquet("pq_testcorrect", "my-dset")
pq_arr = ak.read_parquet("pq_testcorrect*", "my-dset")
# get the dset from the dictionary in multi-locale cases
self.assertTrue((ak_arr == pq_arr).all())

for f in glob.glob('pq_test*'):
os.remove(f)
self.assertTrue((ak_arr == pq_arr).all())

def test_multi_file(self):
adjusted_size = int(SIZE/NUMFILES)*NUMFILES
test_arrs = []
elems = ak.randint(0, 2**32, adjusted_size)
per_arr = int(adjusted_size/NUMFILES)
for i in range(NUMFILES):
test_arrs.append(ak.randint(0, 2**32, int(adjusted_size/NUMFILES)))
test_arrs.append(elems[(i*per_arr):(i*per_arr)+per_arr])
test_arrs[i].save_parquet("pq_test" + str(i), "test-dset")

pq_arr = ak.read_parquet("pq_test*", "test-dset")
self.assertTrue(len(pq_arr) == adjusted_size)

for i in range(NUMFILES):
sz = len(test_arrs[i])
self.assertTrue((test_arrs[i] == pq_arr[(i*sz):(i*sz)+sz]).all())
self.assertTrue((elems == pq_arr).all())

for f in glob.glob('pq_test*'):
os.remove(f)

0 comments on commit 639faef

Please sign in to comment.