Skip to content

Commit c6bc27f

Browse files
committed
chore: refactor and improve test data structure struct Fragment (#5675)
This pull request does some refactoring and enhancing of our test data structures `struct Fragment1D` and `struct Fragment2D` which are used as containers for inputs to write queries and outputs of read queries. 1) we move the methods which generically create arrays using these and write fragments using these to a header file instead of `sparse_global_order_reader.cc` 2) we refactor functionality into a `template <typename DimensionTuple, typename AttributeTuple> struct Fragment` which is a common base class for `Fragment1D`, `Fragment2D`, a new `Fragment3D` and can be used with an empty dimension tuple for dense fragments 3) we implement various fixes to enable variable-length dimensions 4) some other incidentals This pull request has been refactored out of #5646 for two reasons: 1) it is all enhancements to test code which may be distracting from the functional components of #5646 ; 2) I anticipate using it to add additional tests to #5655 . Specifically, using the `Fragment` with an empty dimension tuple will be very nice for writing tests on dense arrays. There's no direct testing of this, but since the code was factored out of #5646 all of it was used in testing there. Many components are exercised due to the refactor of the existing tests. --- TYPE: NO_HISTORY DESC: refactors and enhancements to test data structure `struct Fragment`
1 parent cc4c641 commit c6bc27f

File tree

6 files changed

+633
-249
lines changed

6 files changed

+633
-249
lines changed

test/src/unit-sparse-global-order-reader.cc

Lines changed: 66 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ struct FxRun1D {
189189
if (subarray.empty()) {
190190
return true;
191191
} else {
192-
const CoordType coord = fragment.dim_[record];
192+
const CoordType coord = fragment.dimension()[record];
193193
for (const auto& range : subarray) {
194194
if (range.contains(coord)) {
195195
return true;
@@ -349,7 +349,7 @@ struct FxRun2D {
349349
if (subarray.empty() && !condition.has_value()) {
350350
return true;
351351
} else {
352-
const int r = fragment.d1_[record], c = fragment.d2_[record];
352+
const int r = fragment.d1()[record], c = fragment.d2()[record];
353353
for (const auto& range : subarray) {
354354
if (range.first.has_value() && !range.first->contains(r)) {
355355
continue;
@@ -649,36 +649,11 @@ void CSparseGlobalOrderFx::write_fragment(
649649
}
650650

651651
CApiArray& array = *existing;
652+
Context cppctx = vfs_test_setup_.ctx();
653+
Array cpparray(cppctx, array, false);
652654

653-
// Create the query.
654-
tiledb_query_t* query;
655-
auto rc = tiledb_query_alloc(context(), array, TILEDB_WRITE, &query);
656-
ASSERTER(rc == TILEDB_OK);
657-
rc = tiledb_query_set_layout(context(), query, TILEDB_UNORDERED);
658-
ASSERTER(rc == TILEDB_OK);
659-
660-
auto field_sizes = templates::query::make_field_sizes<Asserter>(fragment);
661-
templates::query::set_fields<Asserter, Fragment>(
662-
context(),
663-
query,
664-
field_sizes,
665-
fragment,
666-
[](unsigned d) { return "d" + std::to_string(d + 1); },
667-
[](unsigned a) { return "a" + std::to_string(a + 1); });
668-
669-
// Submit query.
670-
rc = tiledb_query_submit(context(), query);
671-
ASSERTER(std::optional<std::string>() == error_if_any(rc));
672-
673-
// check that sizes match what we expect
674-
const uint64_t expect_num_cells = fragment.size();
675-
const uint64_t num_cells =
676-
templates::query::num_cells<Asserter>(fragment, field_sizes);
677-
678-
ASSERTER(num_cells == expect_num_cells);
679-
680-
// Clean up.
681-
tiledb_query_free(&query);
655+
templates::query::write_fragment<Asserter, Fragment>(
656+
fragment, cpparray, TILEDB_UNORDERED);
682657
}
683658

684659
void CSparseGlobalOrderFx::write_1d_fragment_strings(
@@ -1361,23 +1336,23 @@ TEST_CASE_METHOD(
13611336

13621337
// Write a fragment F0 with unique coordinates
13631338
InstanceType::FragmentType fragment0;
1364-
fragment0.dim_.resize(fragment_size);
1365-
std::iota(fragment0.dim_.begin(), fragment0.dim_.end(), 1);
1339+
fragment0.dimension().resize(fragment_size);
1340+
std::iota(fragment0.dimension().begin(), fragment0.dimension().end(), 1);
13661341

13671342
// Write a fragment F1 with lots of duplicates
13681343
// [100,100,100,100,100,101,101,101,101,101,102,102,102,102,102,...]
13691344
InstanceType::FragmentType fragment1;
1370-
fragment1.dim_.resize(fragment0.dim_.num_cells());
1371-
for (size_t i = 0; i < fragment1.dim_.num_cells(); i++) {
1372-
fragment1.dim_[i] =
1373-
static_cast<int>((i / 10) + (fragment0.dim_.num_cells() / 2));
1345+
fragment1.dimension().resize(fragment0.dimension().num_cells());
1346+
for (size_t i = 0; i < fragment1.dimension().num_cells(); i++) {
1347+
fragment1.dimension()[i] =
1348+
static_cast<int>((i / 10) + (fragment0.dimension().num_cells() / 2));
13741349
}
13751350

13761351
// atts are whatever, used just for query condition and correctness check
13771352
auto& f0atts = std::get<0>(fragment0.atts_);
1378-
f0atts.resize(fragment0.dim_.num_cells());
1353+
f0atts.resize(fragment0.dimension().num_cells());
13791354
std::iota(f0atts.begin(), f0atts.end(), 0);
1380-
for (uint64_t i = 0; i < fragment0.dim_.num_cells(); i++) {
1355+
for (uint64_t i = 0; i < fragment0.dimension().num_cells(); i++) {
13811356
if ((i * i) % 7 == 0) {
13821357
std::get<1>(fragment0.atts_).push_back(std::nullopt);
13831358
} else {
@@ -1390,9 +1365,9 @@ TEST_CASE_METHOD(
13901365
}
13911366

13921367
auto& f1atts = std::get<0>(fragment1.atts_);
1393-
f1atts.resize(fragment1.dim_.num_cells());
1394-
std::iota(f1atts.begin(), f1atts.end(), int(fragment0.dim_.num_cells()));
1395-
for (uint64_t i = 0; i < fragment1.dim_.num_cells(); i++) {
1368+
f1atts.resize(fragment1.dimension().num_cells());
1369+
std::iota(f1atts.begin(), f1atts.end(), int(fragment0.num_cells()));
1370+
for (uint64_t i = 0; i < fragment1.num_cells(); i++) {
13961371
if ((i * i) % 11 == 0) {
13971372
std::get<1>(fragment1.atts_).push_back(std::nullopt);
13981373
} else {
@@ -1492,25 +1467,25 @@ TEST_CASE_METHOD(
14921467
templates::Fragment1D<int, int> fragment1;
14931468

14941469
// Write a fragment F0 with tiles [1,3][3,5][5,7][7,9]...
1495-
fragment0.dim_.resize(fragment_size);
1496-
fragment0.dim_[0] = 1;
1497-
for (size_t i = 1; i < fragment0.dim_.num_cells(); i++) {
1498-
fragment0.dim_[i] = static_cast<int>(1 + 2 * ((i + 1) / 2));
1470+
fragment0.dimension().resize(fragment_size);
1471+
fragment0.dimension()[0] = 1;
1472+
for (size_t i = 1; i < fragment0.dimension().num_cells(); i++) {
1473+
fragment0.dimension()[i] = static_cast<int>(1 + 2 * ((i + 1) / 2));
14991474
}
15001475

15011476
// Write a fragment F1 with tiles [2,4][4,6][6,8][8,10]...
1502-
fragment1.dim_.resize(fragment0.dim_.num_cells());
1503-
for (size_t i = 0; i < fragment1.dim_.num_cells(); i++) {
1504-
fragment1.dim_[i] = fragment0.dim_[i] + 1;
1477+
fragment1.dimension().resize(fragment0.dimension().num_cells());
1478+
for (size_t i = 0; i < fragment1.dimension().num_cells(); i++) {
1479+
fragment1.dimension()[i] = fragment0.dimension()[i] + 1;
15051480
}
15061481

15071482
// atts don't really matter
15081483
auto& f0atts = std::get<0>(fragment0.atts_);
1509-
f0atts.resize(fragment0.dim_.num_cells());
1484+
f0atts.resize(fragment0.dimension().num_cells());
15101485
std::iota(f0atts.begin(), f0atts.end(), 0);
15111486

15121487
auto& f1atts = std::get<0>(fragment1.atts_);
1513-
f1atts.resize(fragment1.dim_.num_cells());
1488+
f1atts.resize(fragment1.dimension().num_cells());
15141489
std::iota(f1atts.begin(), f1atts.end(), int(f0atts.num_cells()));
15151490

15161491
FxRun1D instance;
@@ -1614,10 +1589,10 @@ TEST_CASE_METHOD(
16141589

16151590
for (size_t f = 0; f < num_fragments; f++) {
16161591
templates::Fragment1D<int, int> fragment;
1617-
fragment.dim_.resize(fragment_size);
1592+
fragment.dimension().resize(fragment_size);
16181593
std::iota(
1619-
fragment.dim_.begin(),
1620-
fragment.dim_.end(),
1594+
fragment.dimension().begin(),
1595+
fragment.dimension().end(),
16211596
instance.array.dimension_.domain.lower_bound + static_cast<int>(f));
16221597

16231598
auto& atts = std::get<0>(fragment.atts_);
@@ -1741,10 +1716,10 @@ TEST_CASE_METHOD(
17411716

17421717
for (size_t f = 0; f < num_fragments; f++) {
17431718
templates::Fragment1D<int, int> fragment;
1744-
fragment.dim_.resize(fragment_size);
1719+
fragment.dimension().resize(fragment_size);
17451720
std::iota(
1746-
fragment.dim_.begin(),
1747-
fragment.dim_.end(),
1721+
fragment.dimension().begin(),
1722+
fragment.dimension().end(),
17481723
static_cast<int>(f * (fragment_size - 1)));
17491724

17501725
auto& atts = std::get<0>(fragment.atts_);
@@ -1922,13 +1897,13 @@ TEST_CASE_METHOD(
19221897

19231898
for (size_t f = 0; f < num_fragments; f++) {
19241899
templates::Fragment2D<int, int, int> fdata;
1925-
fdata.d1_.reserve(fragment_size);
1926-
fdata.d2_.reserve(fragment_size);
1900+
fdata.d1().reserve(fragment_size);
1901+
fdata.d2().reserve(fragment_size);
19271902
std::get<0>(fdata.atts_).reserve(fragment_size);
19281903

19291904
for (size_t i = 0; i < fragment_size; i++) {
1930-
fdata.d1_.push_back(row(f, i));
1931-
fdata.d2_.push_back(col(f, i));
1905+
fdata.d1().push_back(row(f, i));
1906+
fdata.d2().push_back(col(f, i));
19321907
std::get<0>(fdata.atts_)
19331908
.push_back(static_cast<int>(f * fragment_size + i));
19341909
}
@@ -2126,34 +2101,34 @@ TEST_CASE_METHOD(
21262101
const int tcol = instance.d2.domain.lower_bound +
21272102
static_cast<int>(f * instance.d2.extent);
21282103
for (int i = 0; i < instance.d1.extent * instance.d2.extent - 2; i++) {
2129-
fdata.d1_.push_back(trow + i / instance.d1.extent);
2130-
fdata.d2_.push_back(tcol + i % instance.d1.extent);
2104+
fdata.d1().push_back(trow + i / instance.d1.extent);
2105+
fdata.d2().push_back(tcol + i % instance.d1.extent);
21312106
std::get<0>(fdata.atts_).push_back(att++);
21322107
}
21332108

21342109
// then some sparse coords in the next space tile,
21352110
// fill the data tile (if the capacity is 4), we'll call it T
2136-
fdata.d1_.push_back(trow);
2137-
fdata.d2_.push_back(tcol + instance.d2.extent);
2111+
fdata.d1().push_back(trow);
2112+
fdata.d2().push_back(tcol + instance.d2.extent);
21382113
std::get<0>(fdata.atts_).push_back(att++);
2139-
fdata.d1_.push_back(trow + instance.d1.extent - 1);
2140-
fdata.d2_.push_back(tcol + instance.d2.extent + 2);
2114+
fdata.d1().push_back(trow + instance.d1.extent - 1);
2115+
fdata.d2().push_back(tcol + instance.d2.extent + 2);
21412116
std::get<0>(fdata.atts_).push_back(att++);
21422117

21432118
// then begin a new data tile "Tnext" which straddles the bounds of that
21442119
// space tile. this will have a low MBR.
2145-
fdata.d1_.push_back(trow + instance.d1.extent - 1);
2146-
fdata.d2_.push_back(tcol + instance.d2.extent + 3);
2120+
fdata.d1().push_back(trow + instance.d1.extent - 1);
2121+
fdata.d2().push_back(tcol + instance.d2.extent + 3);
21472122
std::get<0>(fdata.atts_).push_back(att++);
2148-
fdata.d1_.push_back(trow);
2149-
fdata.d2_.push_back(tcol + 2 * instance.d2.extent);
2123+
fdata.d1().push_back(trow);
2124+
fdata.d2().push_back(tcol + 2 * instance.d2.extent);
21502125
std::get<0>(fdata.atts_).push_back(att++);
21512126

21522127
// then add a point P which is less than the lower bound of Tnext's MBR,
21532128
// and also between the last two coordinates of T
21542129
FxRun2D::FragmentType fpoint;
2155-
fpoint.d1_.push_back(trow + instance.d1.extent - 1);
2156-
fpoint.d2_.push_back(tcol + instance.d1.extent + 1);
2130+
fpoint.d1().push_back(trow + instance.d1.extent - 1);
2131+
fpoint.d2().push_back(tcol + instance.d1.extent + 1);
21572132
std::get<0>(fpoint.atts_).push_back(att++);
21582133

21592134
instance.fragments.push_back(fdata);
@@ -2268,13 +2243,13 @@ TEST_CASE_METHOD(
22682243
for (size_t f = 0; f < num_fragments; f++) {
22692244
FxRunType::FragmentType fragment;
22702245

2271-
fragment.dim_.resize(fragment_size);
2246+
fragment.dimension().resize(fragment_size);
22722247
std::iota(
2273-
fragment.dim_.begin(),
2274-
fragment.dim_.end(),
2248+
fragment.dimension().begin(),
2249+
fragment.dimension().end(),
22752250
dimension.domain.lower_bound);
22762251

2277-
std::get<0>(fragment.atts_).resize(fragment.dim_.num_cells());
2252+
std::get<0>(fragment.atts_).resize(fragment.dimension().num_cells());
22782253
std::iota(
22792254
std::get<0>(fragment.atts_).begin(),
22802255
std::get<0>(fragment.atts_).end(),
@@ -3218,8 +3193,8 @@ TEST_CASE_METHOD(
32183193
for (uint64_t t = 0; t < fragment_same_timestamp_runs.size(); t++) {
32193194
for (uint64_t f = 0; f < fragment_same_timestamp_runs[t]; f++) {
32203195
FxRun2D::FragmentType fragment;
3221-
fragment.d1_ = {1, 2 + static_cast<int>(t)};
3222-
fragment.d2_ = {1, 2 + static_cast<int>(f)};
3196+
fragment.d1() = {1, 2 + static_cast<int>(t)};
3197+
fragment.d2() = {1, 2 + static_cast<int>(f)};
32233198
std::get<0>(fragment.atts_) = std::vector<int>{
32243199
static_cast<int>(instance.fragments.size()),
32253200
static_cast<int>(instance.fragments.size())};
@@ -3248,7 +3223,7 @@ TEST_CASE_METHOD(
32483223

32493224
CApiArray array(context(), raw_array, TILEDB_WRITE);
32503225
for (uint64_t f = 0; f < fragment_same_timestamp_runs[t]; f++, i++) {
3251-
write_fragment<Asserter, decltype(instance.fragments[i])>(
3226+
write_fragment<Asserter, FxRun2D::FragmentType>(
32523227
instance.fragments[i], &array);
32533228
}
32543229
}
@@ -3333,64 +3308,15 @@ TEST_CASE_METHOD(
33333308
*/
33343309
template <typename Asserter, InstanceType Instance>
33353310
void CSparseGlobalOrderFx::create_array(const Instance& instance) {
3336-
const auto dimensions = instance.dimensions();
3337-
const auto attributes = instance.attributes();
3338-
3339-
std::vector<std::string> dimension_names;
3340-
std::vector<tiledb_datatype_t> dimension_types;
3341-
std::vector<void*> dimension_ranges;
3342-
std::vector<void*> dimension_extents;
3343-
auto add_dimension = [&]<Datatype D>(
3344-
const templates::Dimension<D>& dimension) {
3345-
using CoordType = templates::Dimension<D>::value_type;
3346-
dimension_names.push_back("d" + std::to_string(dimension_names.size() + 1));
3347-
dimension_types.push_back(static_cast<tiledb_datatype_t>(D));
3348-
dimension_ranges.push_back(
3349-
const_cast<CoordType*>(&dimension.domain.lower_bound));
3350-
dimension_extents.push_back(const_cast<CoordType*>(&dimension.extent));
3351-
};
3352-
std::apply(
3353-
[&]<Datatype... Ds>(const templates::Dimension<Ds>&... dimension) {
3354-
(add_dimension(dimension), ...);
3355-
},
3356-
dimensions);
3357-
3358-
std::vector<std::string> attribute_names;
3359-
std::vector<tiledb_datatype_t> attribute_types;
3360-
std::vector<uint32_t> attribute_cell_val_nums;
3361-
std::vector<bool> attribute_nullables;
3362-
std::vector<std::pair<tiledb_filter_type_t, int>> attribute_compressors;
3363-
auto add_attribute = [&](Datatype datatype,
3364-
uint32_t cell_val_num,
3365-
bool nullable) {
3366-
attribute_names.push_back("a" + std::to_string(attribute_names.size() + 1));
3367-
attribute_types.push_back(static_cast<tiledb_datatype_t>(datatype));
3368-
attribute_cell_val_nums.push_back(cell_val_num);
3369-
attribute_nullables.push_back(nullable);
3370-
attribute_compressors.push_back(std::make_pair(TILEDB_FILTER_NONE, -1));
3371-
};
3372-
for (const auto& [datatype, cell_val_num, nullable] : attributes) {
3373-
add_attribute(datatype, cell_val_num, nullable);
3374-
}
3375-
3376-
tiledb::test::create_array(
3377-
context(),
3311+
templates::ddl::create_array(
33783312
array_name_,
3379-
TILEDB_SPARSE,
3380-
dimension_names,
3381-
dimension_types,
3382-
dimension_ranges,
3383-
dimension_extents,
3384-
attribute_names,
3385-
attribute_types,
3386-
attribute_cell_val_nums,
3387-
attribute_compressors,
3313+
Context(context(), false),
3314+
instance.dimensions(),
3315+
instance.attributes(),
33883316
instance.tile_order(),
33893317
instance.cell_order(),
33903318
instance.tile_capacity(),
3391-
instance.allow_duplicates(),
3392-
false,
3393-
{attribute_nullables});
3319+
instance.allow_duplicates());
33943320
}
33953321

33963322
/**
@@ -3419,13 +3345,13 @@ DeleteArrayGuard CSparseGlobalOrderFx::run_create(Instance& instance) {
34193345

34203346
// the tile extent is 2
34213347
// create_default_array_1d<Asserter>(instance.array);
3422-
create_array<Asserter, decltype(instance)>(instance);
3348+
create_array<Asserter, Instance>(instance);
34233349

34243350
DeleteArrayGuard arrayguard(context(), array_name_.c_str());
34253351

34263352
// write all fragments
34273353
for (auto& fragment : instance.fragments) {
3428-
write_fragment<Asserter, decltype(fragment)>(fragment);
3354+
write_fragment<Asserter, typename Instance::FragmentType>(fragment);
34293355
}
34303356

34313357
return arrayguard;
@@ -3435,7 +3361,7 @@ template <typename Asserter, InstanceType Instance>
34353361
void CSparseGlobalOrderFx::run_execute(Instance& instance) {
34363362
ASSERTER(instance.num_user_cells > 0);
34373363

3438-
std::decay_t<decltype(instance.fragments[0])> expect;
3364+
std::decay_t<typename Instance::FragmentType> expect;
34393365

34403366
// for de-duplicating, track the fragment that each coordinate came from
34413367
// we will use this to select the coordinate from the most recent fragment
@@ -3665,19 +3591,7 @@ void CSparseGlobalOrderFx::run_execute(Instance& instance) {
36653591
ASSERTER(num_cells == num_cells_bound);
36663592
}
36673593

3668-
std::apply(
3669-
[&](auto&... field) {
3670-
std::apply(
3671-
[&](const auto&... field_cursor) {
3672-
std::apply(
3673-
[&](const auto&... field_size) {
3674-
(field.apply_cursor(field_cursor, field_size), ...);
3675-
},
3676-
field_sizes);
3677-
},
3678-
outcursor);
3679-
},
3680-
std::tuple_cat(outdims, outatts));
3594+
templates::query::apply_cursor(out, outcursor, field_sizes);
36813595

36823596
const uint64_t cursor_cells =
36833597
templates::query::num_cells<Asserter>(out, outcursor);
@@ -4014,11 +3928,11 @@ void show<FxRun2D>(const FxRun2D& instance, std::ostream& os) {
40143928
os << "\t\t{" << std::endl;
40153929
os << "\t\t\t\"d1\": [" << std::endl;
40163930
os << "\t\t\t\t";
4017-
show(fragment.d1_, os);
3931+
show(fragment.d1(), os);
40183932
os << std::endl;
40193933
os << "\t\t\t\"d2\": [" << std::endl;
40203934
os << "\t\t\t\t";
4021-
show(fragment.d2_, os);
3935+
show(fragment.d2(), os);
40223936
os << std::endl;
40233937
os << "\t\t\t], " << std::endl;
40243938
os << "\t\t\t\"atts\": [" << std::endl;

0 commit comments

Comments
 (0)