Skip to content

Commit b22ee46

Browse files
committed
Use tile arithmetic functions in global order writer
1 parent 01d9610 commit b22ee46

File tree

1 file changed

+93
-106
lines changed

1 file changed

+93
-106
lines changed

tiledb/sm/query/writers/global_order_writer.cc

Lines changed: 93 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#include "tiledb/sm/query/hilbert_order.h"
5050
#include "tiledb/sm/query/query_macros.h"
5151
#include "tiledb/sm/stats/global_stats.h"
52+
#include "tiledb/sm/tile/arithmetic.h"
5253
#include "tiledb/sm/tile/generic_tile_io.h"
5354
#include "tiledb/sm/tile/tile_metadata_generator.h"
5455
#include "tiledb/sm/tile/writer_tile_tuple.h"
@@ -61,11 +62,78 @@ using namespace tiledb::sm::stats;
6162

6263
namespace tiledb::sm {
6364

64-
static NDRange domain_tile_offset(
65+
/**
66+
* See `tiledb/sm/tile/arithmetic.h` function `is_rectangular_domain`.
67+
*
68+
* When writing multiple dense fragments the domain of each fragment
69+
* must accurately reflect the coordinates contained in that fragment.
70+
* This is called in `GlobalOrderWriter::identify_fragment_tile_boundaries` for
71+
* each of the input tiles to determine whether a rectangle is formed and
72+
* including a tile in a fragment is sound.
73+
*/
74+
static bool is_rectangular_domain(
75+
const ArraySchema& arrayschema,
76+
const NDRange& domain,
77+
uint64_t start_tile,
78+
uint64_t num_tiles) {
79+
const Domain& arraydomain = arrayschema.domain();
80+
81+
auto impl = [&]<typename T>(T) {
82+
if constexpr (TileDBIntegral<T>) {
83+
std::vector<T> tile_extents;
84+
tile_extents.reserve(arraydomain.dim_num());
85+
for (uint64_t d = 0; d < arraydomain.dim_num(); d++) {
86+
tile_extents.push_back(arraydomain.tile_extent(d).rvalue_as<T>());
87+
}
88+
89+
return is_rectangular_domain<T>(
90+
arrayschema.tile_order(),
91+
tile_extents,
92+
domain,
93+
start_tile,
94+
num_tiles);
95+
} else {
96+
return false;
97+
}
98+
};
99+
return apply_with_type(impl, arraydomain.dimension_ptr(0)->type());
100+
}
101+
102+
/**
103+
* See `tiledb/sm/tile/arithmetic.h` function `domain_tile_offset`.
104+
*
105+
* When writing multiple dense fragments the domain of each fragment
106+
* must accurately reflect the coordinates contained in that fragment.
107+
* This is called when starting a new fragment to update the domain of the
108+
* previous fragment and set the correct starting domain of the new one.
109+
*/
110+
static std::optional<NDRange> domain_tile_offset(
65111
const ArraySchema& arrayschema,
66112
const NDRange& domain,
67113
uint64_t start_tile,
68-
uint64_t num_tiles);
114+
uint64_t num_tiles) {
115+
const Domain& arraydomain = arrayschema.domain();
116+
117+
auto impl = [&]<typename T>(T) {
118+
if constexpr (TileDBIntegral<T>) {
119+
std::vector<T> tile_extents;
120+
tile_extents.reserve(arraydomain.dim_num());
121+
for (uint64_t d = 0; d < arraydomain.dim_num(); d++) {
122+
tile_extents.push_back(arraydomain.tile_extent(d).rvalue_as<T>());
123+
}
124+
125+
return domain_tile_offset<T>(
126+
arrayschema.tile_order(),
127+
tile_extents,
128+
domain,
129+
start_tile,
130+
num_tiles);
131+
} else {
132+
return std::optional<NDRange>{};
133+
}
134+
};
135+
return apply_with_type(impl, arraydomain.dimension_ptr(0)->type());
136+
}
69137

70138
class GlobalOrderWriterException : public StatusException {
71139
public:
@@ -716,12 +784,13 @@ Status GlobalOrderWriter::finalize_global_write_state() {
716784
if (dense()) {
717785
const uint64_t num_tiles_in_fragment =
718786
meta->loaded_metadata()->tile_offsets()[0].size();
719-
NDRange fragment_domain = domain_tile_offset(
787+
std::optional<NDRange> fragment_domain = domain_tile_offset(
720788
array_schema_,
721789
subarray_.ndrange(0),
722790
global_write_state_->dense_.domain_tile_offset_,
723791
num_tiles_in_fragment);
724-
meta->set_domain(fragment_domain);
792+
iassert(fragment_domain.has_value());
793+
meta->set_domain(std::move(fragment_domain.value()));
725794
}
726795

727796
// Check that the same number of cells was written across attributes
@@ -1505,33 +1574,6 @@ Status GlobalOrderWriter::prepare_full_tiles_var(
15051574
return Status::Ok();
15061575
}
15071576

1508-
/**
1509-
* @return the number of tiles in a "hyper-row" of `subarray` within
1510-
* `arrayschema`
1511-
*
1512-
* If a "hyper-rectangle" is a generalization of a rectangle to N dimensions,
1513-
* then let's define a "hyper-row" to be a generalization of a row to N
1514-
* dimensions. That is, a "hyper-row" is a hyper-rectangle whose length is 1 in
1515-
* the outer-most dimension.
1516-
*/
1517-
static uint64_t compute_hyperrow_num_tiles(
1518-
const ArraySchema& arrayschema, const NDRange& subarray) {
1519-
const uint64_t rdim =
1520-
(arrayschema.tile_order() == Layout::ROW_MAJOR ?
1521-
0 :
1522-
arrayschema.dim_num() - 1);
1523-
1524-
NDRange adjusted = subarray;
1525-
1526-
// normalize `adjusted` to a single hyper-row
1527-
memcpy(
1528-
adjusted[rdim].end_fixed(),
1529-
adjusted[rdim].start_fixed(),
1530-
adjusted[rdim].size() / 2);
1531-
1532-
return arrayschema.domain().tile_num(adjusted);
1533-
}
1534-
15351577
/**
15361578
* Identifies the division of input cells into target fragments,
15371579
* using `max_fragment_size_` as a hard limit on the target fragment size.
@@ -1574,26 +1616,24 @@ GlobalOrderWriter::identify_fragment_tile_boundaries(
15741616
uint64_t running_tiles_size = current_fragment_size_;
15751617
uint64_t fragment_size = current_fragment_size_;
15761618

1577-
// NB: gcc has a false positive uninitialized use warning for `fragment_end`
1619+
std::optional<uint64_t> subarray_tile_offset;
1620+
if (dense()) {
1621+
if (global_write_state_->frag_meta_) {
1622+
subarray_tile_offset = global_write_state_->dense_.domain_tile_offset_ +
1623+
global_write_state_->frag_meta_->tile_index_base();
1624+
} else {
1625+
subarray_tile_offset = 0;
1626+
}
1627+
}
1628+
1629+
// NB: gcc has a false positive uninitialized use warning for `fragment_end`
15781630
#pragma GCC diagnostic push
15791631
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
15801632
uint64_t fragment_start = 0;
15811633
std::optional<uint64_t> fragment_end;
15821634
std::vector<uint64_t> fragments;
15831635
#pragma GCC diagnostic pop
15841636

1585-
uint64_t hyperrow_offset = 0;
1586-
std::optional<uint64_t> hyperrow_num_tiles;
1587-
if (dense()) {
1588-
hyperrow_num_tiles =
1589-
compute_hyperrow_num_tiles(array_schema_, subarray_.ndrange(0));
1590-
1591-
if (global_write_state_->frag_meta_) {
1592-
hyperrow_offset = global_write_state_->dense_.domain_tile_offset_ +
1593-
global_write_state_->frag_meta_->tile_index_base();
1594-
}
1595-
}
1596-
15971637
// Make sure we don't write more than the desired fragment size.
15981638
for (uint64_t t = 0; t < tile_num; t++) {
15991639
uint64_t tile_size = 0;
@@ -1624,10 +1664,12 @@ GlobalOrderWriter::identify_fragment_tile_boundaries(
16241664
fragment_end = std::nullopt;
16251665
}
16261666

1627-
if (!hyperrow_num_tiles.has_value() ||
1628-
((hyperrow_offset + t + 1) - fragment_start) %
1629-
hyperrow_num_tiles.value() ==
1630-
0) {
1667+
if (!subarray_tile_offset.has_value() ||
1668+
is_rectangular_domain(
1669+
array_schema_,
1670+
subarray_.ndrange(0),
1671+
subarray_tile_offset.value() + fragment_start,
1672+
t - fragment_start + 1)) {
16311673
fragment_size = running_tiles_size + tile_size;
16321674
fragment_end = t + 1;
16331675
}
@@ -1645,62 +1687,6 @@ GlobalOrderWriter::identify_fragment_tile_boundaries(
16451687
.last_fragment_size_ = fragment_size};
16461688
}
16471689

1648-
/**
1649-
* Splits a domain at a tile boundary and returns the two halves of the split.
1650-
*
1651-
* When writing multiple dense fragments the domain of each fragment
1652-
* must accurately reflect the coordinates contained in that fragment.
1653-
* This is called when starting a new fragment to update the domain of the
1654-
* previous fragment and set the correct starting domain of the new one.
1655-
*
1656-
* @precondition `tile_offset` must be an offset which bisects the input
1657-
* hyper-rectangle into two new hyper-rectangle
1658-
*/
1659-
static NDRange domain_tile_offset(
1660-
const ArraySchema& arrayschema,
1661-
const NDRange& domain,
1662-
uint64_t start_tile,
1663-
uint64_t num_tiles) {
1664-
const Domain& arraydomain = arrayschema.domain();
1665-
const uint64_t domain_num_tiles = arraydomain.tile_num(domain);
1666-
const uint64_t hyperrow_num_tiles =
1667-
compute_hyperrow_num_tiles(arrayschema, domain);
1668-
iassert(domain_num_tiles % hyperrow_num_tiles == 0);
1669-
iassert(start_tile % hyperrow_num_tiles == 0);
1670-
iassert(num_tiles % hyperrow_num_tiles == 0);
1671-
1672-
const uint64_t start_hyperrow = start_tile / hyperrow_num_tiles;
1673-
const uint64_t num_hyperrows = num_tiles / hyperrow_num_tiles;
1674-
iassert(num_hyperrows > 0);
1675-
1676-
const uint64_t rdim =
1677-
(arrayschema.tile_order() == Layout::ROW_MAJOR ?
1678-
0 :
1679-
arrayschema.dim_num() - 1);
1680-
1681-
NDRange adjusted = domain;
1682-
1683-
auto fix_bounds = [&]<typename T>(T) {
1684-
const T extent = arraydomain.tile_extent(rdim).rvalue_as<T>();
1685-
const T lower_bound = *static_cast<const T*>(domain[rdim].start_fixed());
1686-
const T upper_bound = *static_cast<const T*>(domain[rdim].end_fixed());
1687-
T* start = static_cast<T*>(adjusted[rdim].start_fixed());
1688-
T* end = static_cast<T*>(adjusted[rdim].end_fixed());
1689-
1690-
// tiles begin at [LB, LB + E, LB + 2E, ...] where LB is lower bound, E is
1691-
// extent
1692-
auto align = [lower_bound, extent](T value) -> T {
1693-
return lower_bound + ((value - lower_bound) / extent) * extent;
1694-
};
1695-
1696-
*start = std::max<T>(lower_bound, align(*start + extent * start_hyperrow));
1697-
*end = std::min<T>(upper_bound, align(*start + extent * num_hyperrows) - 1);
1698-
};
1699-
apply_with_type(fix_bounds, arraydomain.dimension_ptr(rdim)->type());
1700-
1701-
return adjusted;
1702-
}
1703-
17041690
Status GlobalOrderWriter::start_new_fragment() {
17051691
// finish off current fragment if there is one
17061692
if (global_write_state_->frag_meta_) {
@@ -1714,12 +1700,13 @@ Status GlobalOrderWriter::start_new_fragment() {
17141700
if (dense()) {
17151701
const uint64_t num_tiles_in_fragment =
17161702
frag_meta->loaded_metadata()->tile_offsets()[0].size();
1717-
NDRange fragment_domain = domain_tile_offset(
1703+
std::optional<NDRange> fragment_domain = domain_tile_offset(
17181704
array_schema_,
17191705
subarray_.ndrange(0),
17201706
global_write_state_->dense_.domain_tile_offset_,
17211707
num_tiles_in_fragment);
1722-
frag_meta->set_domain(fragment_domain);
1708+
iassert(fragment_domain.has_value());
1709+
frag_meta->set_domain(std::move(fragment_domain.value()));
17231710

17241711
global_write_state_->dense_.domain_tile_offset_ += num_tiles_in_fragment;
17251712
}

0 commit comments

Comments
 (0)