From bb223666ebc5271477e67c224bae1aa25bd372d5 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Wed, 8 Oct 2025 21:59:46 -0700 Subject: [PATCH 1/5] chore(crc32c): replace crc32c with google-crc32c dependency --- .pre-commit-config.yaml | 3 ++- pyproject.toml | 3 ++- src/zarr/codecs/crc32c_.py | 8 +++++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index da3e2be9b2..8488e38d4d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,7 +31,8 @@ repos: # Package dependencies - packaging - donfig - - numcodecs[crc32c] + - numcodecs + - google-crc32c>=1.5 - numpy==2.1 # until https://github.com/numpy/numpy/issues/28034 is resolved - typing_extensions - universal-pathlib diff --git a/pyproject.toml b/pyproject.toml index 56ba14fa7e..7f14971396 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,8 @@ requires-python = ">=3.11" dependencies = [ 'packaging>=22.0', 'numpy>=1.26', - 'numcodecs[crc32c]>=0.14', + 'numcodecs>=0.14', + 'google-crc32c>=1.5', 'typing_extensions>=4.9', 'donfig>=0.8', ] diff --git a/src/zarr/codecs/crc32c_.py b/src/zarr/codecs/crc32c_.py index b2ea356b0c..9536d0d558 100644 --- a/src/zarr/codecs/crc32c_.py +++ b/src/zarr/codecs/crc32c_.py @@ -3,9 +3,9 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, cast +import google_crc32c import numpy as np import typing_extensions -from crc32c import crc32c from zarr.abc.codec import BytesBytesCodec from zarr.core.common import JSON, parse_named_configuration @@ -42,7 +42,7 @@ async def _decode_single( # Need to do a manual cast until https://github.com/numpy/numpy/issues/26783 is resolved computed_checksum = np.uint32( - crc32c(cast("typing_extensions.Buffer", inner_bytes)) + google_crc32c.value(cast("typing_extensions.Buffer", inner_bytes)) ).tobytes() stored_checksum = bytes(crc32_bytes) if computed_checksum != stored_checksum: @@ -58,7 +58,9 @@ async def _encode_single( ) -> Buffer | None: data = chunk_bytes.as_numpy_array() # Calculate the checksum and "cast" it to a numpy array - checksum = np.array([crc32c(cast("typing_extensions.Buffer", data))], dtype=np.uint32) + checksum = np.array( + [google_crc32c.value(cast("typing_extensions.Buffer", data))], dtype=np.uint32 + ) # Append the checksum (as bytes) to the data return chunk_spec.prototype.buffer.from_array_like(np.append(data, checksum.view("B"))) From d7256b35709c13fe072e0129f63cb88bbc67fd5b Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 10 Oct 2025 21:15:26 -0700 Subject: [PATCH 2/5] handle missing numcodecs crc32c --- .gitignore | 1 + tests/test_codecs/test_numcodecs.py | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index f2f41270ca..b79ce264c8 100644 --- a/.gitignore +++ b/.gitignore @@ -90,3 +90,4 @@ tests/.hypothesis .hypothesis/ zarr/version.py +zarr.egg-info/ diff --git a/tests/test_codecs/test_numcodecs.py b/tests/test_codecs/test_numcodecs.py index 68f4e0cf7b..2a0fc358f7 100644 --- a/tests/test_codecs/test_numcodecs.py +++ b/tests/test_codecs/test_numcodecs.py @@ -7,6 +7,7 @@ import numpy as np import pytest from numcodecs import GZip +from numcodecs.errors import UnknownCodecError from zarr import config, create_array, open_array from zarr.abc.numcodec import _is_numcodec, _is_numcodec_cls @@ -243,6 +244,13 @@ def test_generic_filter_packbits() -> None: ], ) def test_generic_checksum(codec_class: type[_numcodecs._NumcodecsBytesBytesCodec]) -> None: + # Check if the codec is available in numcodecs + try: + with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): + codec_class()._codec # noqa: B018 + except UnknownCodecError as e: # pragma: no cover + pytest.skip(f"{codec_class.codec_name} is not available in numcodecs: {e}") + data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16)) with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): @@ -352,8 +360,12 @@ def test_to_dict() -> None: ], ) def test_codecs_pickleable(codec_cls: type[_numcodecs._NumcodecsCodec]) -> None: - with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): - codec = codec_cls() + # Check if the codec is available in numcodecs + try: + with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): + codec = codec_cls() + except UnknownCodecError as e: # pragma: no cover + pytest.skip(f"{codec_cls.codec_name} is not available in numcodecs: {e}") expected = codec From 726c575e8f23fb45eae0c05aa32052b2ad9b2757 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 10 Oct 2025 21:32:59 -0700 Subject: [PATCH 3/5] handle missing UnknownCodecError --- tests/test_codecs/test_numcodecs.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_codecs/test_numcodecs.py b/tests/test_codecs/test_numcodecs.py index 2a0fc358f7..ddfca71294 100644 --- a/tests/test_codecs/test_numcodecs.py +++ b/tests/test_codecs/test_numcodecs.py @@ -7,7 +7,12 @@ import numpy as np import pytest from numcodecs import GZip -from numcodecs.errors import UnknownCodecError + +try: + from numcodecs.errors import UnknownCodecError +except ImportError: + # Older versions of numcodecs don't have a separate errors module + UnknownCodecError = ValueError from zarr import config, create_array, open_array from zarr.abc.numcodec import _is_numcodec, _is_numcodec_cls From a999b09495369c5a3090556afaca8999327304a4 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 10 Oct 2025 22:45:49 -0700 Subject: [PATCH 4/5] changelog --- changes/3515.misc.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/3515.misc.md diff --git a/changes/3515.misc.md b/changes/3515.misc.md new file mode 100644 index 0000000000..06a94c98a0 --- /dev/null +++ b/changes/3515.misc.md @@ -0,0 +1 @@ +Replace `crc32c` dependency with `google-crc32c` to resolve licensing concerns. The `crc32c` library uses LGPL license, while `google-crc32c` uses the more permissive Apache 2.0 license. This change maintains full backward compatibility with existing CRC32C-encoded data. From 863db4bf440c0f9a9ac82cf48a78f3caf2a45d7b Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 10 Oct 2025 22:49:55 -0700 Subject: [PATCH 5/5] update docs --- docs/user-guide/installation.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/user-guide/installation.md b/docs/user-guide/installation.md index f672a348cd..89c78804b2 100644 --- a/docs/user-guide/installation.md +++ b/docs/user-guide/installation.md @@ -7,7 +7,8 @@ Required dependencies include: - [Python](https://docs.python.org/3/) (3.11 or later) - [packaging](https://packaging.pypa.io) (22.0 or later) - [numpy](https://numpy.org) (1.26 or later) -- [numcodecs[crc32c]](https://numcodecs.readthedocs.io) (0.14 or later) +- [numcodecs](https://numcodecs.readthedocs.io) (0.14 or later) +- [google-crc32c](https://github.com/googleapis/python-crc32c) (1.5 or later) - [typing_extensions](https://typing-extensions.readthedocs.io) (4.9 or later) - [donfig](https://donfig.readthedocs.io) (0.8 or later)