Skip to content

Commit 13a8d3c

Browse files
committed
Merge remote-tracking branch 'origin/main' into rr/core-290-dense-consolidation-corruption-main-base
2 parents 98efba9 + 0cbb47e commit 13a8d3c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+999
-387
lines changed

.github/workflows/build-backwards-compatibility.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,11 @@ jobs:
8282
sudo rm -rf /usr/share/dotnet
8383
sudo rm -rf /opt/ghc
8484
sudo rm -rf "/usr/local/share/boost"
85+
sudo rm -rf /usr/local/lib/android
86+
sudo rm -rf /opt/hostedtoolcache/CodeQL
8587
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
88+
sudo docker image prune --all --force
89+
sudo docker builder prune -a
8690
8791
- name: 'Test backward compatibility'
8892
id: test

.github/workflows/check-lint.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
name: check-lint
22
on:
3+
workflow_dispatch:
34
push:
45
branches:
56
- main

CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,6 @@ list(APPEND TILEDB_C_API_RELATIVE_HEADERS
314314
"${CMAKE_SOURCE_DIR}/tiledb/api/c_api/buffer/buffer_api_external.h"
315315
"${CMAKE_SOURCE_DIR}/tiledb/api/c_api/buffer_list/buffer_list_api_external.h"
316316
"${CMAKE_SOURCE_DIR}/tiledb/api/c_api/config/config_api_external.h"
317-
"${CMAKE_SOURCE_DIR}/tiledb/api/c_api/context/context_api_experimental.h"
318317
"${CMAKE_SOURCE_DIR}/tiledb/api/c_api/context/context_api_external.h"
319318
"${CMAKE_SOURCE_DIR}/tiledb/api/c_api/current_domain/current_domain_api_enum.h"
320319
"${CMAKE_SOURCE_DIR}/tiledb/api/c_api/current_domain/current_domain_api_external_experimental.h"

HISTORY.md

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
# TileDB v2.29.1 Release Notes
2+
3+
4+
## New features
5+
* Add support for Azure Data Lake Storage. [#5652](https://github.com/TileDB-Inc/TileDB/pull/5652)
6+
7+
8+
## Defects removed
9+
* Fix deserialization of relative group member URI [#5654](https://github.com/TileDB-Inc/TileDB/pull/5654)
10+
11+
112
# TileDB v2.29.0 Release Notes
213

314

@@ -13,7 +24,7 @@
1324

1425
* Add support for `Profiles` onto the `Config`. [#5498](https://github.com/TileDB-Inc/TileDB/pull/5498)
1526
* Add support for relative URI remote group members. [#5635](https://github.com/TileDB-Inc/TileDB/pull/5635)
16-
* Introduce `ls_recursive_v2` that includes directories in results for all backends. [#5602](https://github.com/TileDB-Inc/TileDB/pull/5602)
27+
* Add `ls_recursive_v2` that includes directories in results for all backends. [#5602](https://github.com/TileDB-Inc/TileDB/pull/5602)
1728

1829

1930
## Improvements
@@ -35,8 +46,9 @@
3546
* Refactor `class VFS` to remove `#ifdef`s. [#5589](https://github.com/TileDB-Inc/TileDB/pull/5589)
3647
* Add support for `tiledb_vfs_copy_file` and `tiledb_vfs_copy_dir` on Windows. [#5600](https://github.com/TileDB-Inc/TileDB/pull/5600)
3748
* Allow adding relative group members to remote groups. [#5623](https://github.com/TileDB-Inc/TileDB/pull/5623)
38-
* Always check return codes in Curl::get_data and throw [#5640](https://github.com/TileDB-Inc/TileDB/pull/5640)
49+
* Always check return codes in `Curl::get_data` and throw [#5640](https://github.com/TileDB-Inc/TileDB/pull/5640)
3950
* Do not retry most SSL-related failures in the REST client. [#5638](https://github.com/TileDB-Inc/TileDB/pull/5638)
51+
* Respect `TILEDB_PROFILE_DIR` environment variable in Profile save path. [#5644](https://github.com/TileDB-Inc/TileDB/pull/5644)
4052

4153

4254
## Deprecations

cmake/inputs/Config.cmake.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ if(NOT @BUILD_SHARED_LIBS@) # NOT BUILD_SHARED_LIBS
2828
if(@TILEDB_AZURE@) # TILEDB_AZURE
2929
find_dependency(azure-identity-cpp)
3030
find_dependency(azure-storage-blobs-cpp)
31+
find_dependency(azure-storage-files-datalake-cpp)
3132
endif()
3233
if(@TILEDB_GCS@) # TILEDB_GCS
3334
find_dependency(google_cloud_cpp_storage)

test/src/unit-capi-config.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,7 @@ TEST_CASE("C API: Test config iter", "[capi][config]") {
725725
all_param_values["vfs.azure.storage_account_key"] = "";
726726
all_param_values["vfs.azure.storage_sas_token"] = "";
727727
all_param_values["vfs.azure.blob_endpoint"] = "";
728+
all_param_values["vfs.azure.is_data_lake_endpoint"] = "";
728729
all_param_values["vfs.azure.block_list_block_size"] = "5242880";
729730
all_param_values["vfs.azure.max_parallel_ops"] =
730731
std::to_string(std::thread::hardware_concurrency());
@@ -797,6 +798,7 @@ TEST_CASE("C API: Test config iter", "[capi][config]") {
797798
vfs_param_values["azure.storage_account_key"] = "";
798799
vfs_param_values["azure.storage_sas_token"] = "";
799800
vfs_param_values["azure.blob_endpoint"] = "";
801+
vfs_param_values["azure.is_data_lake_endpoint"] = "";
800802
vfs_param_values["azure.block_list_block_size"] = "5242880";
801803
vfs_param_values["azure.max_parallel_ops"] =
802804
std::to_string(std::thread::hardware_concurrency());
@@ -863,6 +865,7 @@ TEST_CASE("C API: Test config iter", "[capi][config]") {
863865
azure_param_values["storage_account_key"] = "";
864866
azure_param_values["storage_sas_token"] = "";
865867
azure_param_values["blob_endpoint"] = "";
868+
azure_param_values["is_data_lake_endpoint"] = "";
866869
azure_param_values["block_list_block_size"] = "5242880";
867870
azure_param_values["max_parallel_ops"] =
868871
std::to_string(std::thread::hardware_concurrency());

test/src/unit-cppapi-config.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ TEST_CASE("C++ API: Config iterator", "[cppapi][config]") {
7777
names.push_back(it->first);
7878
}
7979
// Check number of VFS params in default config object.
80-
CHECK(names.size() == 67);
80+
CHECK(names.size() == 68);
8181
}
8282

8383
TEST_CASE("C++ API: Config Environment Variables", "[cppapi][config]") {

test/src/unit-sparse-global-order-reader.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,6 +1499,11 @@ TEST_CASE_METHOD(
14991499
CSparseGlobalOrderFx,
15001500
"Sparse global order reader: fragment skew",
15011501
"[sparse-global-order][rest]") {
1502+
#ifdef _WIN32
1503+
SKIP(
1504+
"CORE-328: This test fails on nightly [windows-latest - Sanitizer: OFF | "
1505+
"Assertions: ON | Debug] To re-enable when fixed.");
1506+
#endif
15021507
SECTION("Example") {
15031508
instance_fragment_skew<tiledb::test::AsserterCatch>(200, 8, 2);
15041509
}
@@ -1842,6 +1847,11 @@ TEST_CASE_METHOD(
18421847
CSparseGlobalOrderFx,
18431848
"Sparse global order reader: merge bound duplication",
18441849
"[sparse-global-order][rest]") {
1850+
#ifdef _WIN32
1851+
SKIP(
1852+
"CORE-328: This test fails on nightly [windows-latest - Sanitizer: OFF | "
1853+
"Assertions: ON | Debug] To re-enable when fixed.");
1854+
#endif
18451855
SECTION("Example") {
18461856
instance_merge_bound_duplication<tiledb::test::AsserterCatch>(
18471857
16, 16, 1024, 16, false);

test/src/unit-vfs.cc

Lines changed: 82 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -202,10 +202,88 @@ TEST_CASE("VFS: Test long local paths", "[vfs][long-paths]") {
202202
}
203203
}
204204

205+
TEST_CASE("VFS: copy_file", "[vfs][copy_file]") {
206+
LocalFsTest src_fs({0}), dst_fs({0});
207+
URI src_path = src_fs.temp_dir_.add_trailing_slash();
208+
URI dst_path = dst_fs.temp_dir_.add_trailing_slash();
209+
210+
ThreadPool compute_tp(4);
211+
ThreadPool io_tp(4);
212+
Config config = set_config_params();
213+
VFS vfs{
214+
&g_helper_stats, g_helper_logger().get(), &compute_tp, &io_tp, config};
215+
216+
size_t test_str_size = 0;
217+
SECTION("Filesize = 0 MB") {
218+
test_str_size = 0;
219+
}
220+
SECTION("Filesize = 1 MB") {
221+
test_str_size = 1048576;
222+
}
223+
SECTION("Filesize = 10 MB") {
224+
test_str_size = 10 * 1048576;
225+
}
226+
SECTION("Filesize = 100 MB") {
227+
test_str_size = 100 * 1048576;
228+
}
229+
SECTION("Filesize = 150 MB") {
230+
test_str_size = 150 * 1048576;
231+
}
232+
const std::string test_chars = "abcdefghijklmnopqrstuvwxyz";
233+
std::string test_str;
234+
test_str.reserve(test_str_size);
235+
std::random_device rd;
236+
std::mt19937 gen(rd());
237+
std::uniform_int_distribution<size_t> dist(0, test_chars.length() - 1);
238+
for (size_t i = 0; i < test_str_size; ++i) {
239+
test_str += test_chars[dist(gen)];
240+
}
241+
REQUIRE(test_str.size() == test_str_size);
242+
243+
// Create src_file and write data to it.
244+
auto src_file = URI(src_path.to_string() + "src_file");
245+
REQUIRE_NOTHROW(vfs.touch(src_file));
246+
test_str_size = test_str.size();
247+
REQUIRE_NOTHROW(vfs.write(src_file, test_str.data(), test_str_size));
248+
require_tiledb_ok(vfs.close_file(src_file));
249+
250+
// copy_file src -> dst using chunked-buffer I/O.
251+
// Note: it doesn't matter if the dst file exists; copy will create on write.
252+
auto dst_file = URI(dst_path.to_string() + "dst_file");
253+
REQUIRE_NOTHROW(vfs.chunked_buffer_io(src_file, dst_file));
254+
CHECK(vfs.is_file(src_file));
255+
256+
// Validate the contents are the same.
257+
if (test_str_size > 0) {
258+
CHECK(vfs.is_file(dst_file));
259+
std::string dst_file_str;
260+
dst_file_str.resize(test_str_size);
261+
require_tiledb_ok(vfs.read_exactly(
262+
dst_file, 0, (char*)dst_file_str.data(), test_str_size));
263+
CHECK(dst_file_str == test_str);
264+
}
265+
266+
// Clean up.
267+
if (src_path.is_gcs() || src_path.is_s3() || src_path.is_azure()) {
268+
REQUIRE_NOTHROW(vfs.remove_bucket(src_path));
269+
REQUIRE(!vfs.is_bucket(src_path));
270+
} else {
271+
REQUIRE_NOTHROW(vfs.remove_dir(src_path));
272+
REQUIRE(!vfs.is_dir(src_path));
273+
}
274+
if (dst_path.is_gcs() || dst_path.is_s3() || dst_path.is_azure()) {
275+
REQUIRE_NOTHROW(vfs.remove_bucket(dst_path));
276+
REQUIRE(!vfs.is_bucket(dst_path));
277+
} else {
278+
REQUIRE_NOTHROW(vfs.remove_dir(dst_path));
279+
REQUIRE(!vfs.is_dir(dst_path));
280+
}
281+
}
282+
205283
using AllBackends = std::tuple<LocalFsTest, GCSTest, GSTest, S3Test, AzureTest>;
206284
TEMPLATE_LIST_TEST_CASE(
207285
"VFS: URI semantics and file management", "[vfs][uri]", AllBackends) {
208-
TestType fs({0});
286+
TestType fs({});
209287
if (!fs.is_supported()) {
210288
return;
211289
}
@@ -218,19 +296,6 @@ TEMPLATE_LIST_TEST_CASE(
218296

219297
URI path = fs.temp_dir_.add_trailing_slash();
220298

221-
// Set up
222-
if (path.is_gcs() || path.is_s3() || path.is_azure()) {
223-
if (vfs.is_bucket(path)) {
224-
REQUIRE_NOTHROW(vfs.remove_bucket(path));
225-
}
226-
REQUIRE_NOTHROW(vfs.create_bucket(path));
227-
} else {
228-
if (vfs.is_dir(path)) {
229-
REQUIRE_NOTHROW(vfs.remove_dir(path));
230-
}
231-
REQUIRE_NOTHROW(vfs.create_dir(path));
232-
}
233-
234299
/* Create the following file hierarchy:
235300
*
236301
* path/dir1/subdir/file1
@@ -334,6 +399,7 @@ TEMPLATE_LIST_TEST_CASE(
334399
URI(children[1].path().native()) == ls_subdir.remove_trailing_slash());
335400
CHECK(children[0].file_size() == s.size());
336401
CHECK(children[1].file_size() == 0); // Directories don't get a size
402+
paths.clear();
337403

338404
// Move file
339405
auto file6 = URI(path.to_string() + "file6");
@@ -349,13 +415,11 @@ TEMPLATE_LIST_TEST_CASE(
349415
CHECK(vfs.is_dir(dir2));
350416
paths.clear();
351417

352-
// Remove files
418+
// Remove files & directories
353419
REQUIRE_NOTHROW(vfs.remove_file(file4));
354420
CHECK(!vfs.is_file(file4));
355421
REQUIRE_NOTHROW(vfs.remove_file(file6));
356422
CHECK(!vfs.is_file(file6));
357-
358-
// Remove directories
359423
REQUIRE_NOTHROW(vfs.remove_dir(dir2));
360424
CHECK(!vfs.is_file(file1));
361425
CHECK(!vfs.is_file(file2));
@@ -480,21 +544,6 @@ TEMPLATE_LIST_TEST_CASE("VFS: File I/O", "[vfs][uri][file_io]", AllBackends) {
480544
CHECK_THROWS(vfs.file_size(non_existent));
481545
}
482546

483-
// Set up
484-
if (path.is_gcs() || path.is_s3() || path.is_azure()) {
485-
if (vfs.is_bucket(path)) {
486-
REQUIRE_NOTHROW(vfs.remove_bucket(path));
487-
}
488-
REQUIRE_NOTHROW(vfs.create_bucket(path));
489-
} else {
490-
if (vfs.is_dir(path)) {
491-
REQUIRE_NOTHROW(vfs.remove_dir(path));
492-
}
493-
REQUIRE_NOTHROW(vfs.create_dir(path));
494-
// Bucket-specific operations are only valid for object store filesystems.
495-
CHECK_THROWS(vfs.create_bucket(path));
496-
}
497-
498547
// Prepare buffers
499548
uint64_t buffer_size = multiplier * max_parallel_ops * chunk_size;
500549
auto write_buffer = new char[buffer_size];
@@ -801,6 +850,7 @@ TEST_CASE("VFS: Construct Azure Blob Storage endpoint URIs", "[azure][uri]") {
801850
config.set("vfs.azure.storage_account_name", "exampleaccount"));
802851
require_tiledb_ok(config.set("vfs.azure.blob_endpoint", custom_endpoint));
803852
require_tiledb_ok(config.set("vfs.azure.storage_sas_token", sas_token));
853+
require_tiledb_ok(config.set("vfs.azure.is_data_lake_endpoint", "false"));
804854
if (sas_token.empty()) {
805855
// If the SAS token is empty, the VFS will try to connect to Microsoft Entra
806856
// ID to obtain credentials, which can take a long time because of retries.

tiledb/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,10 +524,12 @@ if (TILEDB_AZURE)
524524

525525
find_package(azure-identity-cpp CONFIG REQUIRED)
526526
find_package(azure-storage-blobs-cpp CONFIG REQUIRED)
527+
find_package(azure-storage-files-datalake-cpp CONFIG REQUIRED)
527528
target_link_libraries(TILEDB_CORE_OBJECTS_ILIB
528529
INTERFACE
529530
Azure::azure-identity
530531
Azure::azure-storage-blobs
532+
Azure::azure-storage-files-datalake
531533
)
532534
endif()
533535

0 commit comments

Comments
 (0)