@@ -13,6 +13,7 @@ from .compat_ext cimport PyBytes_RESIZE, ensure_continguous_memoryview
1313from .compat import ensure_contiguous_ndarray
1414from .abc import Codec
1515
16+ from libc.stdlib cimport malloc, realloc, free
1617
1718cdef extern from " zstd.h" :
1819
@@ -21,6 +22,23 @@ cdef extern from "zstd.h":
2122 struct ZSTD_CCtx_s:
2223 pass
2324 ctypedef ZSTD_CCtx_s ZSTD_CCtx
25+
26+ struct ZSTD_DStream_s:
27+ pass
28+ ctypedef ZSTD_DStream_s ZSTD_DStream
29+
30+ struct ZSTD_inBuffer_s:
31+ const void * src
32+ size_t size
33+ size_t pos
34+ ctypedef ZSTD_inBuffer_s ZSTD_inBuffer
35+
36+ struct ZSTD_outBuffer_s:
37+ void * dst
38+ size_t size
39+ size_t pos
40+ ctypedef ZSTD_outBuffer_s ZSTD_outBuffer
41+
2442 cdef enum ZSTD_cParameter:
2543 ZSTD_c_compressionLevel= 100
2644 ZSTD_c_checksumFlag= 201
@@ -36,12 +54,20 @@ cdef extern from "zstd.h":
3654 size_t dstCapacity,
3755 const void * src,
3856 size_t srcSize) nogil
39-
4057 size_t ZSTD_decompress(void * dst,
4158 size_t dstCapacity,
4259 const void * src,
4360 size_t compressedSize) nogil
4461
62+ size_t ZSTD_decompressStream(ZSTD_DStream* zds,
63+ ZSTD_outBuffer* output,
64+ ZSTD_inBuffer* input ) nogil
65+
66+ size_t ZSTD_DStreamOutSize() nogil
67+ ZSTD_DStream* ZSTD_createDStream() nogil
68+ size_t ZSTD_freeDStream(ZSTD_DStream* zds) nogil
69+ size_t ZSTD_initDStream(ZSTD_DStream* zds) nogil
70+
4571 cdef long ZSTD_CONTENTSIZE_UNKNOWN
4672 cdef long ZSTD_CONTENTSIZE_ERROR
4773 unsigned long long ZSTD_getFrameContentSize(const void * src,
@@ -55,7 +81,7 @@ cdef extern from "zstd.h":
5581
5682 unsigned ZSTD_isError(size_t code) nogil
5783
58- const char * ZSTD_getErrorName(size_t code)
84+ const char * ZSTD_getErrorName(size_t code) nogil
5985
6086
6187VERSION_NUMBER = ZSTD_versionNumber()
@@ -157,7 +183,10 @@ def decompress(source, dest=None):
157183 source : bytes-like
158184 Compressed data. Can be any object supporting the buffer protocol.
159185 dest : array-like, optional
160- Object to decompress into.
186+ Object to decompress into. If the content size is unknown, the
187+ length of dest must match the decompressed size. If the content size
188+ is unknown and dest is not provided, streaming decompression will be
189+ used.
161190
162191 Returns
163192 -------
@@ -174,6 +203,7 @@ def decompress(source, dest=None):
174203 char * dest_ptr
175204 size_t source_size, dest_size, decompressed_size
176205 size_t nbytes, cbytes, blocksize
206+ size_t dest_nbytes
177207
178208 # obtain source memoryview
179209 source_mv = ensure_continguous_memoryview(source)
@@ -187,9 +217,12 @@ def decompress(source, dest=None):
187217
188218 # determine uncompressed size
189219 dest_size = ZSTD_getFrameContentSize(source_ptr, source_size)
190- if dest_size == 0 or dest_size == ZSTD_CONTENTSIZE_UNKNOWN or dest_size == ZSTD_CONTENTSIZE_ERROR:
220+ if dest_size == 0 or dest_size == ZSTD_CONTENTSIZE_ERROR:
191221 raise RuntimeError (' Zstd decompression error: invalid input data' )
192222
223+ if dest_size == ZSTD_CONTENTSIZE_UNKNOWN and dest is None :
224+ return stream_decompress(source_pb)
225+
193226 # setup destination buffer
194227 if dest is None :
195228 # allocate memory
@@ -203,6 +236,9 @@ def decompress(source, dest=None):
203236 dest_ptr = < char * > dest_pb.buf
204237 dest_nbytes = dest_pb.len
205238
239+ if dest_size == ZSTD_CONTENTSIZE_UNKNOWN:
240+ dest_size = dest_nbytes
241+
206242 # validate output buffer
207243 if dest_nbytes < dest_size:
208244 raise ValueError (' destination buffer too small; expected at least %s , '
@@ -225,6 +261,97 @@ def decompress(source, dest=None):
225261
226262 return dest
227263
264+ cdef stream_decompress(const Py_buffer* source_pb):
265+ """ Decompress data of unknown size
266+
267+ Parameters
268+ ----------
269+ source : Py_buffer
270+ Compressed data buffer
271+
272+ Returns
273+ -------
274+ dest : bytes
275+ Object containing decompressed data.
276+ """
277+
278+ cdef:
279+ const char * source_ptr
280+ void * dest_ptr
281+ void * new_dst
282+ size_t source_size, dest_size, decompressed_size
283+ size_t DEST_GROWTH_SIZE, status
284+ ZSTD_inBuffer input
285+ ZSTD_outBuffer output
286+ ZSTD_DStream * zds
287+
288+ # Recommended size for output buffer, guaranteed to flush at least
289+ # one completely block in all circumstances
290+ DEST_GROWTH_SIZE = ZSTD_DStreamOutSize();
291+
292+ source_ptr = < const char * > source_pb.buf
293+ source_size = source_pb.len
294+
295+ # unknown content size, guess it is twice the size as the source
296+ dest_size = source_size * 2
297+
298+ if dest_size < DEST_GROWTH_SIZE:
299+ # minimum dest_size is DEST_GROWTH_SIZE
300+ dest_size = DEST_GROWTH_SIZE
301+
302+ dest_ptr = < char * > malloc(dest_size)
303+ zds = ZSTD_createDStream()
304+
305+ try :
306+
307+ with nogil:
308+
309+ status = ZSTD_initDStream(zds)
310+ if ZSTD_isError(status):
311+ error = ZSTD_getErrorName(status)
312+ ZSTD_freeDStream(zds);
313+ raise RuntimeError (' Zstd stream decompression error on ZSTD_initDStream: %s ' % error)
314+
315+ input = ZSTD_inBuffer(source_ptr, source_size, 0 )
316+ output = ZSTD_outBuffer(dest_ptr, dest_size, 0 )
317+
318+ # Initialize to 1 to force a loop iteration
319+ status = 1
320+ while (status > 0 or input .pos < input .size):
321+ # Possible returned values of ZSTD_decompressStream:
322+ # 0: frame is completely decoded and fully flushed
323+ # error (<0)
324+ # >0: suggested next input size
325+ status = ZSTD_decompressStream(zds, & output, & input )
326+
327+ if ZSTD_isError(status):
328+ error = ZSTD_getErrorName(status)
329+ raise RuntimeError (' Zstd stream decompression error on ZSTD_decompressStream: %s ' % error)
330+
331+ # There is more to decompress, grow the buffer
332+ if status > 0 and output.pos == output.size:
333+ new_size = output.size + DEST_GROWTH_SIZE
334+
335+ if new_size < output.size or new_size < DEST_GROWTH_SIZE:
336+ raise RuntimeError (' Zstd stream decompression error: output buffer overflow' )
337+
338+ new_dst = realloc(output.dst, new_size)
339+
340+ if new_dst == NULL :
341+ # output.dst freed in finally block
342+ raise RuntimeError (' Zstd stream decompression error on realloc: could not expand output buffer' )
343+
344+ output.dst = new_dst
345+ output.size = new_size
346+
347+ # Copy the output to a bytes object
348+ dest = PyBytes_FromStringAndSize(< char * > output.dst, output.pos)
349+
350+ finally :
351+ ZSTD_freeDStream(zds)
352+ free(output.dst)
353+
354+ return dest
228355
229356class Zstd (Codec ):
230357 """ Codec providing compression using Zstandard.
0 commit comments