Anklang 0.3.0-460-gc4ef46ba
ASE — Anklang Sound Engine (C++)

« « « Anklang Documentation
Loading...
Searching...
No Matches
compress.cc
Go to the documentation of this file.
1 // This Source Code Form is licensed MPL-2.0: http://mozilla.org/MPL/2.0
2#include "compress.hh"
3#include "storage.hh"
4#include "utils.hh"
5#include "platform.hh"
6#include "internal.hh"
7#include "testing.hh"
8
9#if !__has_include(<zstd.h>)
10#error "Missing <zstd.h> from libzstd-dev, please set CXXFLAGS and LDFLAGS"
11#endif
12#include <zstd.h>
13#include "external/blake3/c/blake3.h"
14
15namespace Ase {
16
17bool
18is_aiff (const String &input)
19{
20 return input.substr (0, 4) == "FORM" && input.substr (8, 4) == "AIFF";
21}
22
23bool
24is_wav (const String &input)
25{
26 return input.substr (0, 4) == "RIFF" && input.substr (8, 4) == "WAVE";
27}
28
29bool
30is_midi (const String &input)
31{
32 return input.substr (0, 4) == "MThd";
33}
34
35bool
36is_pdf (const String &input)
37{
38 return input.substr (0, 5) == "%PDF-";
39}
40
41static constexpr const size_t MB = 1024 * 1024;
42static struct { size_t level, size; } const zstd_adaptive_level[] = {
43 { 18, 1 * MB }, // slow, use only for small sizes
44 { 14, 3 * MB },
45 { 11, 11 * MB },
46 { 8, 20 * MB },
47 { 5, 42 * MB },
48 { 4, ~size_t (0) }, // acceptable fast compression
49}; // each level + size combination should take roughly the same time
50
51static int
52guess_zstd_level (size_t input_size)
53{
54 uint zal = 0;
55 while (zstd_adaptive_level[zal].size > input_size)
56 zal++;
57 return zstd_adaptive_level[zal].level;
58}
59
61zstd_compress (const String &input, int level)
62{
63 return zstd_compress (input.data(), input.size(), level);
64}
65
67zstd_compress (const void *src, size_t src_size, int level)
68{
69 const size_t maxosize = ZSTD_compressBound (src_size);
71 data.resize (maxosize);
72 const size_t osize = ZSTD_compress (&data[0], data.size(), src, src_size, level ? level : guess_zstd_level (src_size));
73 if (ZSTD_isError (osize))
74 {
75 warning ("zstd compression failed (input=%zu): %s", src_size, ZSTD_getErrorName (osize));
76 return "";
77 }
78 data.resize (osize);
79 data.reserve (data.size());
80 return data;
81}
82
84zstd_target_size (const String &input)
85{
86 const size_t maxosize = ZSTD_getFrameContentSize (&input[0], input.size());
87 if (maxosize == ZSTD_CONTENTSIZE_ERROR)
88 return -EILSEQ;
89 if (maxosize == ZSTD_CONTENTSIZE_UNKNOWN)
90 return -EOPNOTSUPP;
91 if (maxosize >= 2 * 1024 * 1024 * 1024ull)
92 return -EFBIG;
93 return maxosize;
94}
95
97zstd_uncompress (const String &input, void *dst, size_t dst_size)
98{
99 const ssize_t target_size = zstd_target_size (input);
100 errno = ENOMEM;
101 assert_return (target_size >= 0 && target_size <= dst_size, -ENOMEM);
102 const size_t osize = ZSTD_decompress (dst, dst_size, input.data(), input.size());
103 assert_return (osize == target_size, -ENOMEM);
104 return target_size;
105}
106
107String
108zstd_uncompress (const String &input)
109{
110 const ssize_t maxosize = zstd_target_size (input);
111 assert_return (maxosize >= 0, "unknown zstd data size");
112 String data;
113 data.resize (maxosize);
114 const size_t osize = ZSTD_decompress (&data[0], data.size(), &input[0], input.size());
115 if (ZSTD_isError (osize))
116 {
117 warning ("zstd decompression failed (input=%zu): %s", input.size(), ZSTD_getErrorName (osize));
118 return "";
119 }
120 data.resize (osize);
121 data.reserve (data.size());
122 assert_return (osize == maxosize, data); // paranoid, ensured by zstd
123 return data;
124}
125
126bool
127is_zstd (const String &input)
128{
129 return (input.size() >= 4 &&
130 input[0] == char (0x28) &&
131 input[1] == char (0xb5) &&
132 input[2] == char (0x2f) &&
133 input[3] == char (0xfd));
134}
135
136bool
137is_lz4 (const String &input)
138{
139 return (input.size() >= 4 &&
140 input[0] == char (0x04) &&
141 input[1] == char (0x22) &&
142 input[2] == char (0x4d) &&
143 input[3] == char (0x18));
144}
145
146bool
147is_zip (const String &input)
148{
149 return (input.size() >= 4 &&
150 input[0] == 'P' && input[1] == 'K' &&
151 ((input[2] == 0x03 && input[3] == 0x04) ||
152 (input[2] == 0x05 && input[3] == 0x06) ||
153 (input[2] == 0x07 && input[3] == 0x08)));
154}
155
156bool
157is_arj (const String &input)
158{
159 return input.size() >= 2 && input[0] == char (0x60) && input[1] == char (0xea);
160}
161
162bool
163is_isz (const String &input)
164{
165 return input.substr (0, 4) == "IsZ!";
166}
167
168bool
169is_ogg (const String &input)
170{
171 return input.substr (0, 4) == "OggS";
172}
173
174bool
175is_avi (const String &input)
176{
177 return input.substr (0, 4) == "RIFF" && input.substr (8, 4) == "AVI ";
178}
179
180bool
181is_gz (const String &input)
182{
183 return input.size() >= 2 && input[0] == char (0x1f) && input[1] == char (0x8B);
184}
185
186bool
187is_xz (const String &input)
188{
189 return (input.size() >= 6 &&
190 input[0] == char (0xfd) &&
191 input[1] == char (0x37) &&
192 input[2] == char (0x7a) &&
193 input[3] == char (0x58) &&
194 input[4] == char (0x5a) &&
195 input[5] == char (0x00));
196}
197
198bool
199is_png (const String &input)
200{
201 return (input.size() >= 8 &&
202 input[0] == char (0x89) &&
203 input.substr (1, 3) == "PNG" &&
204 input[4] == char (0x0d) &&
205 input[5] == char (0x0a) &&
206 input[6] == char (0x1a) &&
207 input[7] == char (0x0a));
208}
209
210bool
211is_jpg (const String &input)
212{
213 return (input.size() >= 4 &&
214 input[0] == char (0xff) &&
215 input[1] == char (0xd8) &&
216 input[2] == char (0xff) &&
217 (input[3] == char (0xdb) || input[3] == char (0xe0) ||
218 input[3] == char (0xee) || input[3] == char (0xe1)));
219}
220
221bool
222is_compressed (const String &input)
223{
224 return (is_zstd (input) ||
225 is_lz4 (input) ||
226 is_zip (input) ||
227 is_arj (input) ||
228 is_isz (input) ||
229 is_ogg (input) ||
230 is_avi (input) ||
231 is_gz (input) ||
232 is_xz (input) ||
233 is_png (input) ||
234 is_jpg (input));
235
236}
237
238class StreamReaderZStd final : public StreamReader {
239 StreamReaderP istream_;
240 std::vector<uint8_t> ibuffer_;
241 ZSTD_inBuffer zinput_ = { nullptr, 0, 0 };
242 ZSTD_DCtx *dctx_ = nullptr;
243 String name_;
244public:
245 StreamReaderZStd (const StreamReaderP &istream)
246 {
247 istream_ = istream;
248 name_ = istream_->name();
249 ibuffer_.resize (ZSTD_DStreamInSize());
250 dctx_ = ZSTD_createDCtx();
251 assert_return (dctx_ != nullptr);
252 }
254 {
255 close();
256 }
257 String
258 name() const override
259 {
260 return name_;
261 }
262 ssize_t
263 read (void *buffer, size_t len) override
264 {
265 return_unless (buffer && len > 0, 0);
266 size_t ret = 0;
267 // (pos<size) is true until all decompressed data is flushed
268 if (zinput_.pos < zinput_.size) {
269 ZSTD_outBuffer zoutput = { buffer, len, 0 };
270 ret = ZSTD_decompressStream (dctx_, &zoutput, &zinput_);
271 if (ZSTD_isError (ret))
272 goto zerror;
273 if (zoutput.pos)
274 return zoutput.pos;
275 }
276 // provide more input
277 while (zinput_.pos == zinput_.size && istream_) {
278 const ssize_t l = istream_->read (&ibuffer_[0], ibuffer_.size());
279 if (l > 0) {
280 zinput_ = { &ibuffer_[0], size_t (l), 0 };
281 } else // l <= 0
282 goto done;
283 ZSTD_outBuffer zoutput = { buffer, len, 0 };
284 ret = ZSTD_decompressStream (dctx_, &zoutput, &zinput_);
285 if (ZSTD_isError (ret))
286 goto zerror;
287 if (zoutput.pos)
288 return zoutput.pos;
289 }
290 zerror:
291 printerr ("%s: ZSTD_decompressStream: %s\n", program_alias(), ZSTD_getErrorName (ret));
292 done:
293 istream_ = nullptr;
294 return 0;
295 }
296 bool
297 close() override
298 {
299 return_unless (istream_, false);
300 const bool closeok = istream_->close();
301 istream_ = nullptr;
302 ibuffer_.clear();
303 ibuffer_.reserve (0);
304 if (dctx_)
305 ZSTD_freeDCtx (dctx_);
306 dctx_ = nullptr;
307 return closeok;
308 }
309};
310
311StreamReaderP
312stream_reader_zstd (StreamReaderP &istream)
313{
314 return_unless (istream, nullptr);
315 return std::make_shared<StreamReaderZStd> (istream);
316}
317
318static constexpr bool PRINT_ADAPTIVE = false;
319
320class StreamWriterZStd final : public StreamWriter {
321 StreamWriterP ostream_;
322 std::vector<uint8_t> obuffer_;
323 String name_;
324 ZSTD_CCtx *cctx_ = nullptr;
325 size_t itotal_ = 0;
326 uint8_t zal_ = 0;
327 bool last_block_ = false;
328public:
330 {
331 close();
332 obuffer_.clear();
333 obuffer_.reserve (0);
334 if (cctx_)
335 ZSTD_freeCCtx (cctx_);
336 cctx_ = nullptr;
337 }
338 StreamWriterZStd (const StreamWriterP &ostream, int level)
339 {
340 obuffer_.resize (ZSTD_CStreamOutSize());
341 cctx_ = ZSTD_createCCtx();
342 assert_return (cctx_ != nullptr);
343 size_t pret;
344 pret = ZSTD_CCtx_setParameter (cctx_, ZSTD_c_checksumFlag, 1);
345 assert_return (!ZSTD_isError (pret)); // ZSTD_getErrorName (pret)
346 if (level == 0) {
347 while (zstd_adaptive_level[zal_].size < std::numeric_limits<decltype (zstd_adaptive_level[0].size)>::max())
348 zal_++;
349 pret = ZSTD_CCtx_setParameter (cctx_, ZSTD_c_compressionLevel, level);
350 if (PRINT_ADAPTIVE) printerr ("ZSTD_compressStream2: fixed level=%u: %s\n", level, ZSTD_getErrorName (pret));
351 } else {
352 pret = ZSTD_CCtx_setParameter (cctx_, ZSTD_c_compressionLevel, zstd_adaptive_level[zal_].level);
353 if (PRINT_ADAPTIVE) printerr ("ZSTD_compressStream2: size=%u level=%u: %s\n", itotal_, zstd_adaptive_level[zal_].level, ZSTD_getErrorName (pret));
354 }
355 assert_return (!ZSTD_isError (pret)); // ZSTD_getErrorName (pret)
356 ostream_ = ostream;
357 name_ = ostream_->name();
358 }
359 String
360 name() const override
361 {
362 return name_;
363 }
364 ssize_t
365 write (const void *buffer, size_t len) override
366 {
367 errno = EIO;
368 return_unless (ostream_, -1);
369 const ZSTD_EndDirective mode = last_block_ ? ZSTD_e_end : ZSTD_e_continue;
370 ZSTD_inBuffer zinput = { buffer, len, 0 };
371 size_t ret = 0;
372 bool block_finished = false;
373 while (!block_finished) {
374 ZSTD_outBuffer zoutput = { &obuffer_[0], obuffer_.size(), 0 };
375
376 const size_t current_total = itotal_ + zinput.pos;
377 if (!last_block_ && current_total > zstd_adaptive_level[zal_].size)
378 {
379 zal_++;
380 const size_t pret = ZSTD_CCtx_setParameter (cctx_, ZSTD_c_compressionLevel, zstd_adaptive_level[zal_].level);
381 (void) pret;
382 if (PRINT_ADAPTIVE) printerr ("ZSTD_compressStream2: size=%u level=%u: %s\n", current_total, zstd_adaptive_level[zal_].level, ZSTD_getErrorName (pret));
383 zinput.size = zinput.pos;
384 ret = ZSTD_compressStream2 (cctx_, &zoutput, &zinput, ZSTD_e_end);
385 zinput.size = len;
386 }
387 else
388 ret = ZSTD_compressStream2 (cctx_, &zoutput, &zinput, mode);
389
390 if (ZSTD_isError (ret))
391 goto zerror;
392 const char *p = (const char*) &obuffer_[0], *const e = p + zoutput.pos;
393 while (p < e) {
394 const ssize_t n = ostream_->write (p, e - p);
395 if (n < 0)
396 goto error;
397 p += n;
398 }
399 block_finished = last_block_ ? ret == 0 : zinput.pos == zinput.size;
400 }
401 itotal_ += zinput.pos;
402 return zinput.pos;
403 zerror:
404 printerr ("%s: ZSTD_compressStream2: %s\n", program_alias(), ZSTD_getErrorName (ret));
405 errno = EIO;
406 return -1;
407 error:
408 printerr ("%s: ZSTD_compressStream2: %s\n", program_alias(), strerror (errno ? errno : EIO));
409 errno = errno ? errno : EIO;
410 return -1;
411 }
412 bool
413 close() override
414 {
415 bool closedok = true;
416 errno = EIO;
417 if (ostream_)
418 {
419 last_block_ = true;
420 ssize_t l;
421 do
422 l = write (nullptr, 0);
423 while (l > 0);
424 closedok &= l >= 0;
425 closedok &= ostream_->close();
426 ostream_ = nullptr;
427 }
428 return closedok;
429 }
430};
431
432StreamWriterP
433stream_writer_zstd (const StreamWriterP &ostream, int level)
434{
435 return_unless (ostream, nullptr);
436 return std::make_shared<StreamWriterZStd> (ostream, level);
437}
438
439String
440blake3_hash_string (const String &input)
441{
442 blake3_hasher hasher;
443 blake3_hasher_init (&hasher);
444 blake3_hasher_update (&hasher, input.data(), input.size());
445 uint8_t output[BLAKE3_OUT_LEN];
446 blake3_hasher_finalize (&hasher, output, BLAKE3_OUT_LEN);
447 blake3_hasher_reset (&hasher);
448 return String ((const char*) output, BLAKE3_OUT_LEN);
449}
450
451String
452blake3_hash_file (const String &filename)
453{
454 StreamReaderP stream = stream_reader_from_file (filename);
455 return_unless (stream, "");
456 blake3_hasher hasher;
457 blake3_hasher_init (&hasher);
458 uint8_t buffer[131072];
459 ssize_t l = stream->read (buffer, sizeof (buffer));
460 while (l > 0) {
461 blake3_hasher_update (&hasher, buffer, l);
462 l = stream->read (buffer, sizeof (buffer));
463 }
464 uint8_t output[BLAKE3_OUT_LEN];
465 blake3_hasher_finalize (&hasher, output, BLAKE3_OUT_LEN);
466 blake3_hasher_reset (&hasher);
467 return String ((const char*) output, BLAKE3_OUT_LEN);
468}
469
470} // Ase
471
472namespace {
473using namespace Ase;
474
475TEST_INTEGRITY (blake3_tests);
476static void
477blake3_tests()
478{
479 String h = string_to_hex (blake3_hash_string (""));
480 TCMP (h, ==, "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262");
481 h = string_to_hex (blake3_hash_string ("Hello Blake3"));
482 TASSERT (h == "6201e8ededb2f1f2b6362119b46b404e822efbd58d7922202408025c5f527c56");
483}
484
485} // Anon
#define EILSEQ
T data(T... args)
errno
#define assert_return(expr,...)
Return from the current function if expr is unmet and issue an assertion warning.
Definition internal.hh:29
#define return_unless(cond,...)
Return silently if cond does not evaluate to true with return value ...
Definition internal.hh:71
#define TEST_INTEGRITY(FUNC)
Register func as an integrity test.
Definition internal.hh:77
typedef char
The Anklang C++ API namespace.
Definition api.hh:9
String string_to_hex(const String &input)
Convert bytes in string input to hexadecimal numbers.
Definition strings.cc:1171
String program_alias()
Retrieve the program name as used for logging or debug messages.
Definition platform.cc:849
std::string String
Convenience alias for std::string.
Definition cxxaux.hh:35
uint32_t uint
Provide 'uint' as convenience type.
Definition cxxaux.hh:18
T size(T... args)
typedef uint8_t
strerror
T substr(T... args)
typedef ssize_t
#define TASSERT(cond)
Unconditional test assertion, enters breakpoint if not fullfilled.
Definition testing.hh:24
#define TCMP(a, cmp, b)
Compare a and b according to operator cmp, verbose on failiure.
Definition testing.hh:23